| 123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200 |
- "use strict";
- Object.defineProperty(exports, "__esModule", { value: true });
- exports.isDomainLabelStartChar = exports.isSchemeStartChar = exports.tldUrlHostRe = exports.schemeUrlRe = exports.invalidSchemeRe = exports.httpSchemePrefixRe = exports.httpSchemeRe = void 0;
- exports.isSchemeChar = isSchemeChar;
- exports.isDomainLabelChar = isDomainLabelChar;
- exports.isPathChar = isPathChar;
- exports.isUrlSuffixStartChar = isUrlSuffixStartChar;
- exports.isKnownTld = isKnownTld;
- exports.isValidSchemeUrl = isValidSchemeUrl;
- exports.isValidTldMatch = isValidTldMatch;
- exports.isValidIpV4Address = isValidIpV4Address;
- var char_utils_1 = require("../char-utils");
- var known_tlds_1 = require("./known-tlds");
- /**
- * Regular expression to match an http:// or https:// scheme.
- */
- exports.httpSchemeRe = /https?:\/\//i;
- /**
- * Regular expression to match an http:// or https:// scheme as the prefix of
- * a string.
- */
- exports.httpSchemePrefixRe = new RegExp('^' + exports.httpSchemeRe.source, 'i');
- /**
- * A regular expression used to determine the schemes we should not autolink
- */
- exports.invalidSchemeRe = /^(javascript|vbscript):/i;
- // A regular expression used to determine if the URL is a scheme match (such as
- // 'http://google.com', and as opposed to a "TLD match"). This regular
- // expression is used to parse out the host along with if the URL has an
- // authority component (i.e. '//')
- //
- // Capturing groups:
- // 1. '//' if the URL has an authority component, empty string otherwise
- // 2. The host (if one exists). Ex: 'google.com'
- //
- // See https://www.rfc-editor.org/rfc/rfc3986#appendix-A for terminology
- exports.schemeUrlRe = /^[A-Za-z][-.+A-Za-z0-9]*:(\/\/)?([^:/]*)/;
- // A regular expression used to determine if the URL is a TLD match (such as
- // 'google.com', and as opposed to a "scheme match"). This regular
- // expression is used to help parse out the TLD (top-level domain) of the host.
- //
- // See https://www.rfc-editor.org/rfc/rfc3986#appendix-A for terminology
- exports.tldUrlHostRe = /^(?:\/\/)?([^/#?:]+)/; // optionally prefixed with protocol-relative '//' chars
- /**
- * Determines if the given character code represents a character that may start
- * a scheme (ex: the 'h' in 'http')
- */
- exports.isSchemeStartChar = char_utils_1.isAsciiLetterChar; // Equivalent to checking the RegExp `/[A-Za-z]/`, but aliased for clarity and maintainability
- /**
- * Determines if the given character is a valid character in a scheme (such as
- * 'http' or 'ssh+git'), but only after the start char (which is handled by
- * {@link isSchemeStartChar}.
- */
- function isSchemeChar(charCode) {
- return ((0, char_utils_1.isAsciiLetterChar)(charCode) ||
- (0, char_utils_1.isDigitChar)(charCode) ||
- charCode === 43 /* Char.Plus */ || // '+'
- charCode === 45 /* Char.Dash */ || // '-'
- charCode === 46 /* Char.Dot */ // '.'
- );
- }
- /**
- * Determines if the character can begin a domain label, which must be an
- * alphanumeric character and not an underscore or dash.
- *
- * A domain label is a segment of a hostname such as subdomain.google.com.
- */
- exports.isDomainLabelStartChar = char_utils_1.isAlphaNumericOrMarkChar; // alias function for clarity
- /**
- * Determines if the character is part of a domain label (but not a domain label
- * start character).
- *
- * A domain label is a segment of a hostname such as subdomain.google.com.
- */
- function isDomainLabelChar(charCode) {
- return charCode === 95 /* Char.Underscore */ || (0, exports.isDomainLabelStartChar)(charCode);
- }
- /**
- * Determines if the character is a path character ("pchar") as defined by
- * https://tools.ietf.org/html/rfc3986#appendix-A
- *
- * pchar = unreserved / pct-encoded / sub-delims / ":" / "@"
- *
- * unreserved = ALPHA / DIGIT / "-" / "." / "_" / "~"
- * pct-encoded = "%" HEXDIG HEXDIG
- * sub-delims = "!" / "$" / "&" / "'" / "(" / ")"
- * / "*" / "+" / "," / ";" / "="
- *
- * Note that this implementation doesn't follow the spec exactly, but rather
- * follows URL path characters found out in the wild (spec might be out of date?)
- */
- function isPathChar(charCode) {
- return ((0, char_utils_1.isAlphaNumericOrMarkChar)(charCode) ||
- (0, char_utils_1.isUrlSuffixAllowedSpecialChar)(charCode) ||
- (0, char_utils_1.isUrlSuffixNotAllowedAsFinalChar)(charCode) // characters in addition to those allowed by isUrlSuffixAllowedSpecialChar()
- );
- }
- /**
- * Determines if the character given may begin the "URL Suffix" section of a
- * URI (i.e. the path, query, or hash section). These are the '/', '?' and '#'
- * characters.
- *
- * See https://tools.ietf.org/html/rfc3986#appendix-A
- */
- function isUrlSuffixStartChar(charCode) {
- return (charCode === 47 /* Char.Slash */ || // '/'
- charCode === 63 /* Char.Question */ || // '?'
- charCode === 35 /* Char.NumberSign */ // '#'
- );
- }
- /**
- * Determines if the top-level domain (TLD) read in the host is a known TLD.
- *
- * Example: 'com' would be a known TLD (for a host of 'google.com'), but
- * 'local' would not (for a domain name of 'my-computer.local').
- */
- function isKnownTld(tld) {
- return known_tlds_1.tldRegex.test(tld.toLowerCase()); // make sure the tld is lowercase for the regex
- }
- /**
- * Determines if the given `url` is a valid scheme-prefixed URL.
- */
- function isValidSchemeUrl(url) {
- // If the scheme is 'javascript:' or 'vbscript:', these link
- // types can be dangerous. Don't link them.
- if (exports.invalidSchemeRe.test(url)) {
- return false;
- }
- var schemeMatch = url.match(exports.schemeUrlRe);
- if (!schemeMatch) {
- return false;
- }
- var isAuthorityMatch = !!schemeMatch[1];
- var host = schemeMatch[2];
- if (isAuthorityMatch) {
- // Any match that has an authority ('//' chars) after the scheme is
- // valid, such as 'http://anything'
- return true;
- }
- // If there's no authority ('//' chars), check that we have a hostname
- // that looks valid.
- //
- // The host must contain at least one '.' char and have a domain label
- // with at least one letter to be considered valid.
- //
- // Accept:
- // - git:domain.com (scheme followed by a host
- // Do not accept:
- // - git:something ('something' doesn't look like a host)
- // - version:1.0 ('1.0' doesn't look like a host)
- if (host.indexOf('.') === -1 || !/[A-Za-z]/.test(host)) {
- // `letterRe` RegExp checks for a letter anywhere in the host string
- return false;
- }
- return true;
- }
- /**
- * Determines if the given `url` is a match with a valid TLD.
- */
- function isValidTldMatch(url) {
- // TLD URL such as 'google.com', we need to confirm that we have a valid
- // top-level domain
- var tldUrlHostMatch = url.match(exports.tldUrlHostRe);
- if (!tldUrlHostMatch) {
- // At this point, if the URL didn't match our TLD re, it must be invalid
- // (highly unlikely to happen, but just in case)
- return false;
- }
- var host = tldUrlHostMatch[0];
- var hostLabels = host.split('.');
- if (hostLabels.length < 2) {
- // 0 or 1 host label, there's no TLD. Ex: 'localhost'
- return false;
- }
- var tld = hostLabels[hostLabels.length - 1];
- if (!isKnownTld(tld)) {
- return false;
- }
- // TODO: Implement these conditions for TLD matcher:
- // (
- // this.longestDomainLabelLength <= 63 &&
- // this.domainNameLength <= 255
- // );
- return true;
- }
- // Regular expression to confirm a valid IPv4 address (ex: '192.168.0.1')
- // TODO: encode this into the state machine so that we don't need to run this
- // regexp separately to confirm the match
- var ipV4Re = /^(?:(?:25[0-5]|2[0-4][0-9]|[01]?[0-9][0-9]?)\.){3}(?:25[0-5]|2[0-4][0-9]|[01]?[0-9][0-9]?)$/;
- // Regular expression used to split the IPv4 address itself from any port/path/query/hash
- var ipV4PartRe = /[:/?#]/;
- /**
- * Determines if the given URL is a valid IPv4-prefixed URL.
- */
- function isValidIpV4Address(url) {
- // Grab just the IP address
- var ipV4Part = url.split(ipV4PartRe, 1)[0]; // only 1 result needed
- return ipV4Re.test(ipV4Part);
- }
- //# sourceMappingURL=uri-utils.js.map
|