智慧水务管理系统 - 精河县供水工程综合管理平台

parse-matches.js 50KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949950951952953954955956957958959960961962963964965966967968969970971972973974975976977978979980981982983984985986987988989990991992993994995996997998999100010011002100310041005100610071008100910101011101210131014101510161017101810191020102110221023102410251026102710281029103010311032103310341035103610371038103910401041104210431044104510461047104810491050105110521053105410551056105710581059106010611062106310641065106610671068106910701071107210731074107510761077107810791080108110821083108410851086108710881089109010911092109310941095109610971098109911001101110211031104110511061107110811091110111111121113111411151116111711181119112011211122112311241125112611271128112911301131113211331134113511361137113811391140114111421143114411451146114711481149115011511152115311541155115611571158115911601161116211631164116511661167116811691170117111721173117411751176117711781179118011811182118311841185118611871188118911901191119211931194119511961197119811991200
  1. import { UrlMatch } from '../match/url-match';
  2. import { assertNever } from '../utils';
  3. import { httpSchemeRe, isDomainLabelChar, isDomainLabelStartChar, isPathChar, isSchemeChar, isSchemeStartChar, isUrlSuffixStartChar, isValidIpV4Address, isValidSchemeUrl, isValidTldMatch, } from './uri-utils';
  4. import { isEmailLocalPartChar, isEmailLocalPartStartChar, isValidEmail, mailtoSchemePrefixRe, } from './email-utils';
  5. import { EmailMatch } from '../match/email-match';
  6. import { isHashtagTextChar, isValidHashtag } from './hashtag-utils';
  7. import { HashtagMatch } from '../match/hashtag-match';
  8. import { isMentionTextChar, isValidMention } from './mention-utils';
  9. import { MentionMatch } from '../match/mention-match';
  10. import { isPhoneNumberSeparatorChar, isPhoneNumberControlChar, isValidPhoneNumber, } from './phone-number-utils';
  11. import { PhoneMatch } from '../match/phone-match';
  12. import { isAlphaNumericOrMarkChar, isCloseBraceChar, isDigitChar, isOpenBraceChar, isUrlSuffixNotAllowedAsFinalChar, } from '../char-utils';
  13. // For debugging: search for and uncomment other "For debugging" lines
  14. // import CliTable from 'cli-table';
  15. /**
  16. * Context object containing all the state needed by the state machine functions.
  17. *
  18. * ## Historical note
  19. *
  20. * In v4.1.1, we used nested functions to handle the context via closures, but
  21. * this necessitated re-creating the functions for each call to `parseMatches()`,
  22. * which made them difficult for v8 to JIT optimize. In v4.1.2, we lifted all of
  23. * the functions to the top-level scope and passed the context object between
  24. * them, which allows the functions to be JIT compiled once and reused.
  25. */
  26. var ParseMatchesContext = /** @class */ (function () {
  27. function ParseMatchesContext(text, args) {
  28. this.charIdx = 0; // Current character index being processed
  29. this.matches = []; // Collection of matches found
  30. this._stateMachines = []; // Array of active state machines
  31. this.schemeUrlMachinesCount = 0; // part of an optimization to remove the need to go into a slow code block when unnecessary. Since it's been so long since the initial implementation, not sure that this can ever go above 1, but keeping it as a counter to be safe
  32. this.text = text;
  33. this.tagBuilder = args.tagBuilder;
  34. this.stripPrefix = args.stripPrefix;
  35. this.stripTrailingSlash = args.stripTrailingSlash;
  36. this.decodePercentEncoding = args.decodePercentEncoding;
  37. this.hashtagServiceName = args.hashtagServiceName;
  38. this.mentionServiceName = args.mentionServiceName;
  39. }
  40. Object.defineProperty(ParseMatchesContext.prototype, "stateMachines", {
  41. get: function () {
  42. return this._stateMachines;
  43. },
  44. enumerable: false,
  45. configurable: true
  46. });
  47. ParseMatchesContext.prototype.addMachine = function (stateMachine) {
  48. this._stateMachines.push(stateMachine);
  49. if (isSchemeUrlStateMachine(stateMachine)) {
  50. this.schemeUrlMachinesCount++;
  51. }
  52. };
  53. ParseMatchesContext.prototype.removeMachine = function (stateMachine) {
  54. // Performance note: this was originally implemented with Array.prototype.splice()
  55. // and mutated the array in place. Switching to filter added ~280ops/sec
  56. // on the benchmark, although likely at the expense of GC time. Perhaps
  57. // in the future, we implement a rotating array so we never need to move
  58. // or clean anything up
  59. this._stateMachines = this._stateMachines.filter(function (m) { return m !== stateMachine; });
  60. // If we've removed the URL state machine, set the flag to false.
  61. // This flag is a quick test that helps us skip a slow section of
  62. // code when there is already a URL state machine present.
  63. if (isSchemeUrlStateMachine(stateMachine)) {
  64. this.schemeUrlMachinesCount--;
  65. }
  66. };
  67. ParseMatchesContext.prototype.hasSchemeUrlMachine = function () {
  68. return this.schemeUrlMachinesCount > 0;
  69. };
  70. return ParseMatchesContext;
  71. }());
  72. /**
  73. * Parses URL, email, twitter, mention, and hashtag matches from the given
  74. * `text`.
  75. */
  76. export function parseMatches(text, args) {
  77. // Create the context object that will be passed to all state functions
  78. var context = new ParseMatchesContext(text, args);
  79. // For debugging: search for and uncomment other "For debugging" lines
  80. // const table = new CliTable({
  81. // head: ['charIdx', 'char', 'code', 'type', 'states', 'startIdx', 'reached accept state'],
  82. // });
  83. for (; context.charIdx < context.text.length; context.charIdx++) {
  84. var char = text.charAt(context.charIdx);
  85. var charCode = text.charCodeAt(context.charIdx);
  86. if (context.stateMachines.length === 0) {
  87. stateNoMatch(context, char, charCode);
  88. }
  89. else {
  90. // Must loop through the state machines backwards for when one
  91. // is removed
  92. for (var stateIdx = context.stateMachines.length - 1; stateIdx >= 0; stateIdx--) {
  93. var stateMachine = context.stateMachines[stateIdx];
  94. switch (stateMachine.state) {
  95. // Protocol-relative URL states
  96. case 11 /* State.ProtocolRelativeSlash1 */:
  97. stateProtocolRelativeSlash1(context, stateMachine, charCode);
  98. break;
  99. case 12 /* State.ProtocolRelativeSlash2 */:
  100. stateProtocolRelativeSlash2(context, stateMachine, charCode);
  101. break;
  102. case 0 /* State.SchemeChar */:
  103. stateSchemeChar(context, stateMachine, charCode);
  104. break;
  105. case 1 /* State.SchemeHyphen */:
  106. stateSchemeHyphen(context, stateMachine, charCode);
  107. break;
  108. case 2 /* State.SchemeColon */:
  109. stateSchemeColon(context, stateMachine, charCode);
  110. break;
  111. case 3 /* State.SchemeSlash1 */:
  112. stateSchemeSlash1(context, stateMachine, charCode);
  113. break;
  114. case 4 /* State.SchemeSlash2 */:
  115. stateSchemeSlash2(context, stateMachine, char, charCode);
  116. break;
  117. case 5 /* State.DomainLabelChar */:
  118. stateDomainLabelChar(context, stateMachine, charCode);
  119. break;
  120. case 6 /* State.DomainHyphen */:
  121. stateDomainHyphen(context, stateMachine, char, charCode);
  122. break;
  123. case 7 /* State.DomainDot */:
  124. stateDomainDot(context, stateMachine, char, charCode);
  125. break;
  126. case 13 /* State.IpV4Digit */:
  127. stateIpV4Digit(context, stateMachine, charCode);
  128. break;
  129. case 14 /* State.IpV4Dot */:
  130. stateIpV4Dot(context, stateMachine, charCode);
  131. break;
  132. case 8 /* State.PortColon */:
  133. statePortColon(context, stateMachine, charCode);
  134. break;
  135. case 9 /* State.PortNumber */:
  136. statePortNumber(context, stateMachine, charCode);
  137. break;
  138. case 10 /* State.Path */:
  139. statePath(context, stateMachine, charCode);
  140. break;
  141. // Email States
  142. case 15 /* State.EmailMailto_M */:
  143. stateEmailMailto_M(context, stateMachine, char, charCode);
  144. break;
  145. case 16 /* State.EmailMailto_A */:
  146. stateEmailMailto_A(context, stateMachine, char, charCode);
  147. break;
  148. case 17 /* State.EmailMailto_I */:
  149. stateEmailMailto_I(context, stateMachine, char, charCode);
  150. break;
  151. case 18 /* State.EmailMailto_L */:
  152. stateEmailMailto_L(context, stateMachine, char, charCode);
  153. break;
  154. case 19 /* State.EmailMailto_T */:
  155. stateEmailMailto_T(context, stateMachine, char, charCode);
  156. break;
  157. case 20 /* State.EmailMailto_O */:
  158. stateEmailMailto_O(context, stateMachine, charCode);
  159. break;
  160. case 21 /* State.EmailMailto_Colon */:
  161. stateEmailMailtoColon(context, stateMachine, charCode);
  162. break;
  163. case 22 /* State.EmailLocalPart */:
  164. stateEmailLocalPart(context, stateMachine, charCode);
  165. break;
  166. case 23 /* State.EmailLocalPartDot */:
  167. stateEmailLocalPartDot(context, stateMachine, charCode);
  168. break;
  169. case 24 /* State.EmailAtSign */:
  170. stateEmailAtSign(context, stateMachine, charCode);
  171. break;
  172. case 25 /* State.EmailDomainChar */:
  173. stateEmailDomainChar(context, stateMachine, charCode);
  174. break;
  175. case 26 /* State.EmailDomainHyphen */:
  176. stateEmailDomainHyphen(context, stateMachine, charCode);
  177. break;
  178. case 27 /* State.EmailDomainDot */:
  179. stateEmailDomainDot(context, stateMachine, charCode);
  180. break;
  181. // Hashtag states
  182. case 28 /* State.HashtagHashChar */:
  183. stateHashtagHashChar(context, stateMachine, charCode);
  184. break;
  185. case 29 /* State.HashtagTextChar */:
  186. stateHashtagTextChar(context, stateMachine, charCode);
  187. break;
  188. // Mention states
  189. case 30 /* State.MentionAtChar */:
  190. stateMentionAtChar(context, stateMachine, charCode);
  191. break;
  192. case 31 /* State.MentionTextChar */:
  193. stateMentionTextChar(context, stateMachine, charCode);
  194. break;
  195. // Phone number states
  196. case 32 /* State.PhoneNumberOpenParen */:
  197. statePhoneNumberOpenParen(context, stateMachine, char, charCode);
  198. break;
  199. case 33 /* State.PhoneNumberAreaCodeDigit1 */:
  200. statePhoneNumberAreaCodeDigit1(context, stateMachine, charCode);
  201. break;
  202. case 34 /* State.PhoneNumberAreaCodeDigit2 */:
  203. statePhoneNumberAreaCodeDigit2(context, stateMachine, charCode);
  204. break;
  205. case 35 /* State.PhoneNumberAreaCodeDigit3 */:
  206. statePhoneNumberAreaCodeDigit3(context, stateMachine, charCode);
  207. break;
  208. case 36 /* State.PhoneNumberCloseParen */:
  209. statePhoneNumberCloseParen(context, stateMachine, char, charCode);
  210. break;
  211. case 37 /* State.PhoneNumberPlus */:
  212. statePhoneNumberPlus(context, stateMachine, char, charCode);
  213. break;
  214. case 38 /* State.PhoneNumberDigit */:
  215. statePhoneNumberDigit(context, stateMachine, char, charCode);
  216. break;
  217. case 39 /* State.PhoneNumberSeparator */:
  218. statePhoneNumberSeparator(context, stateMachine, char, charCode);
  219. break;
  220. case 40 /* State.PhoneNumberControlChar */:
  221. statePhoneNumberControlChar(context, stateMachine, charCode);
  222. break;
  223. case 41 /* State.PhoneNumberPoundChar */:
  224. statePhoneNumberPoundChar(context, stateMachine, charCode);
  225. break;
  226. /* istanbul ignore next */
  227. default:
  228. assertNever(stateMachine.state);
  229. }
  230. }
  231. // Special case for handling a colon (or other non-alphanumeric)
  232. // when preceded by another character, such as in the text:
  233. // Link 1:http://google.com
  234. // In this case, the 'h' character after the colon wouldn't start a
  235. // new scheme url because we'd be in a ipv4 or tld url and the colon
  236. // would be interpreted as a port ':' char. Also, only start a new
  237. // scheme url machine if there isn't currently one so we don't start
  238. // new ones for colons inside a url
  239. //
  240. // TODO: The addition of this snippet (to fix the bug) in 4.0.1 lost
  241. // us ~500 ops/sec on the benchmarks. Optimizing it with the
  242. // hasSchemeUrlMachine() flag and optimizing the isSchemeStartChar()
  243. // method for 4.1.3 got us back about ~400ops/sec. One potential way
  244. // to improve this even ore is to add this snippet to individual
  245. // state handler functions where it can occur to prevent running it
  246. // on every loop interation.
  247. if (!context.hasSchemeUrlMachine() &&
  248. context.charIdx > 0 &&
  249. isSchemeStartChar(charCode)) {
  250. var prevCharCode = context.text.charCodeAt(context.charIdx - 1);
  251. if (!isSchemeStartChar(prevCharCode)) {
  252. context.addMachine(createSchemeUrlStateMachine(context.charIdx, 0 /* State.SchemeChar */));
  253. }
  254. }
  255. }
  256. // For debugging: search for and uncomment other "For debugging" lines
  257. // table.push([
  258. // String(context.charIdx),
  259. // char,
  260. // `10: ${char.charCodeAt(0)}\n0x: ${char.charCodeAt(0).toString(16)}\nU+${char.codePointAt(0)}`,
  261. // context.stateMachines.map(machine => `${StateMachineType[machine.type]}${'matchType' in machine ? ` (${UrlStateMachineMatchType[machine.matchType]})` : ''}`).join('\n') || '(none)',
  262. // context.stateMachines.map(machine => State[machine.state]).join('\n') || '(none)',
  263. // context.stateMachines.map(m => m.startIdx).join('\n'),
  264. // context.stateMachines.map(m => m.acceptStateReached).join('\n'),
  265. // ]);
  266. }
  267. // Capture any valid match at the end of the string
  268. // Note: this loop must happen in reverse because
  269. // captureMatchIfValidAndRemove() removes state machines from the array
  270. // and we'll end up skipping every other one if we remove while looping
  271. // forward
  272. for (var i = context.stateMachines.length - 1; i >= 0; i--) {
  273. context.stateMachines.forEach(function (stateMachine) {
  274. return captureMatchIfValidAndRemove(context, stateMachine);
  275. });
  276. }
  277. // For debugging: search for and uncomment other "For debugging" lines
  278. // console.log(`\nRead string:\n ${text}`);
  279. // console.log(table.toString());
  280. return context.matches;
  281. }
  282. /**
  283. * Handles the state when we're not in a URL/email/etc. (i.e. when no state machines exist)
  284. */
  285. function stateNoMatch(context, char, charCode) {
  286. var charIdx = context.charIdx;
  287. if (charCode === 35 /* Char.NumberSign */ /* '#' */) {
  288. // Hash char, start a Hashtag match
  289. context.addMachine(createHashtagStateMachine(charIdx, 28 /* State.HashtagHashChar */));
  290. }
  291. else if (charCode === 64 /* Char.AtSign */ /* '@' */) {
  292. // '@' char, start a Mention match
  293. context.addMachine(createMentionStateMachine(charIdx, 30 /* State.MentionAtChar */));
  294. }
  295. else if (charCode === 47 /* Char.Slash */ /* '/' */) {
  296. // A slash could begin a protocol-relative URL
  297. context.addMachine(createTldUrlStateMachine(charIdx, 11 /* State.ProtocolRelativeSlash1 */));
  298. }
  299. else if (charCode === 43 /* Char.Plus */ /* '+' */) {
  300. // A '+' char can start a Phone number
  301. context.addMachine(createPhoneNumberStateMachine(charIdx, 37 /* State.PhoneNumberPlus */));
  302. }
  303. else if (charCode === 40 /* Char.OpenParen */ /* '(' */) {
  304. context.addMachine(createPhoneNumberStateMachine(charIdx, 32 /* State.PhoneNumberOpenParen */));
  305. }
  306. else {
  307. if (isDigitChar(charCode)) {
  308. // A digit could start a phone number
  309. context.addMachine(createPhoneNumberStateMachine(charIdx, 38 /* State.PhoneNumberDigit */));
  310. // A digit could start an IP address
  311. context.addMachine(createIpV4UrlStateMachine(charIdx, 13 /* State.IpV4Digit */));
  312. }
  313. if (isEmailLocalPartStartChar(charCode)) {
  314. // Any email local part. An 'm' character in particular could
  315. // start a 'mailto:' match
  316. var startState = char.toLowerCase() === 'm' ? 15 /* State.EmailMailto_M */ : 22 /* State.EmailLocalPart */;
  317. context.addMachine(createEmailStateMachine(charIdx, startState));
  318. }
  319. if (isSchemeStartChar(charCode)) {
  320. // An uppercase or lowercase letter may start a scheme match
  321. context.addMachine(createSchemeUrlStateMachine(charIdx, 0 /* State.SchemeChar */));
  322. }
  323. if (isAlphaNumericOrMarkChar(charCode)) {
  324. // A unicode alpha character or digit could start a domain name
  325. // label for a TLD match
  326. context.addMachine(createTldUrlStateMachine(charIdx, 5 /* State.DomainLabelChar */));
  327. }
  328. }
  329. // Anything else, remain in the "non-url" state by not creating any
  330. // state machines
  331. }
  332. // Implements ABNF: ALPHA *( ALPHA / DIGIT / "+" / "-" / "." )
  333. function stateSchemeChar(context, stateMachine, charCode) {
  334. if (charCode === 58 /* Char.Colon */ /* ':' */) {
  335. stateMachine.state = 2 /* State.SchemeColon */;
  336. }
  337. else if (charCode === 45 /* Char.Dash */ /* '-' */) {
  338. stateMachine.state = 1 /* State.SchemeHyphen */;
  339. }
  340. else if (isSchemeChar(charCode)) {
  341. // Stay in SchemeChar state
  342. }
  343. else {
  344. // Any other character, not a scheme
  345. context.removeMachine(stateMachine);
  346. }
  347. }
  348. function stateSchemeHyphen(context, stateMachine, charCode) {
  349. var charIdx = context.charIdx;
  350. if (charCode === 45 /* Char.Dash */ /* '-' */) {
  351. // Stay in SchemeHyphen state
  352. // TODO: Should a colon following a dash be counted as the end of the scheme?
  353. // } else if (char === ':') {
  354. // stateMachine.state = State.SchemeColon;
  355. }
  356. else if (charCode === 47 /* Char.Slash */ /* '/' */) {
  357. // Not a valid scheme match, but may be the start of a
  358. // protocol-relative match (such as //google.com)
  359. context.removeMachine(stateMachine);
  360. context.addMachine(createTldUrlStateMachine(charIdx, 11 /* State.ProtocolRelativeSlash1 */));
  361. }
  362. else if (isSchemeChar(charCode)) {
  363. stateMachine.state = 0 /* State.SchemeChar */;
  364. }
  365. else {
  366. // Any other character, not a scheme
  367. context.removeMachine(stateMachine);
  368. }
  369. }
  370. // https://tools.ietf.org/html/rfc3986#appendix-A
  371. function stateSchemeColon(context, stateMachine, charCode) {
  372. var charIdx = context.charIdx;
  373. if (charCode === 47 /* Char.Slash */ /* '/' */) {
  374. stateMachine.state = 3 /* State.SchemeSlash1 */;
  375. }
  376. else if (charCode === 46 /* Char.Dot */ /* '.' */) {
  377. // We've read something like 'hello:.' - don't capture
  378. context.removeMachine(stateMachine);
  379. }
  380. else if (isDomainLabelStartChar(charCode)) {
  381. stateMachine.state = 5 /* State.DomainLabelChar */;
  382. // It's possible that we read an "introduction" piece of text,
  383. // and the character after the current colon actually starts an
  384. // actual scheme. An example of this is:
  385. // "The link:http://google.com"
  386. // Hence, start a new machine to capture this match if so
  387. if (isSchemeStartChar(charCode)) {
  388. context.addMachine(createSchemeUrlStateMachine(charIdx, 0 /* State.SchemeChar */));
  389. }
  390. }
  391. else {
  392. context.removeMachine(stateMachine);
  393. }
  394. }
  395. // https://tools.ietf.org/html/rfc3986#appendix-A
  396. function stateSchemeSlash1(context, stateMachine, charCode) {
  397. if (charCode === 47 /* Char.Slash */ /* '/' */) {
  398. stateMachine.state = 4 /* State.SchemeSlash2 */;
  399. }
  400. else if (isPathChar(charCode)) {
  401. stateMachine.state = 10 /* State.Path */;
  402. stateMachine.acceptStateReached = true;
  403. }
  404. else {
  405. captureMatchIfValidAndRemove(context, stateMachine);
  406. }
  407. }
  408. function stateSchemeSlash2(context, stateMachine, char, charCode) {
  409. if (charCode === 47 /* Char.Slash */ /* '/' */) {
  410. // 3rd slash, must be an absolute path (`path-absolute` in the
  411. // ABNF), such as in "file:///c:/windows/etc". See
  412. // https://tools.ietf.org/html/rfc3986#appendix-A
  413. stateMachine.state = 10 /* State.Path */;
  414. stateMachine.acceptStateReached = true;
  415. }
  416. else if (isDomainLabelStartChar(charCode)) {
  417. // start of "authority" section - see https://tools.ietf.org/html/rfc3986#appendix-A
  418. stateMachine.state = 5 /* State.DomainLabelChar */;
  419. stateMachine.acceptStateReached = true;
  420. }
  421. else {
  422. // not valid
  423. context.removeMachine(stateMachine);
  424. }
  425. }
  426. // Handles after we've read a '/' from the NonUrl state
  427. function stateProtocolRelativeSlash1(context, stateMachine, charCode) {
  428. if (charCode === 47 /* Char.Slash */ /* '/' */) {
  429. stateMachine.state = 12 /* State.ProtocolRelativeSlash2 */;
  430. }
  431. else {
  432. // Anything else, cannot be the start of a protocol-relative
  433. // URL.
  434. context.removeMachine(stateMachine);
  435. }
  436. }
  437. // Handles after we've read a second '/', which could start a protocol-relative URL
  438. function stateProtocolRelativeSlash2(context, stateMachine, charCode) {
  439. if (isDomainLabelStartChar(charCode)) {
  440. stateMachine.state = 5 /* State.DomainLabelChar */;
  441. }
  442. else {
  443. // Anything else, not a URL
  444. context.removeMachine(stateMachine);
  445. }
  446. }
  447. // Handles when we have read a domain label character
  448. function stateDomainLabelChar(context, stateMachine, charCode) {
  449. if (charCode === 46 /* Char.Dot */ /* '.' */) {
  450. stateMachine.state = 7 /* State.DomainDot */;
  451. }
  452. else if (charCode === 45 /* Char.Dash */ /* '-' */) {
  453. stateMachine.state = 6 /* State.DomainHyphen */;
  454. }
  455. else if (charCode === 58 /* Char.Colon */ /* ':' */) {
  456. // Beginning of a port number, end the domain name
  457. stateMachine.state = 8 /* State.PortColon */;
  458. }
  459. else if (isUrlSuffixStartChar(charCode)) {
  460. // '/', '?', or '#'
  461. stateMachine.state = 10 /* State.Path */;
  462. }
  463. else if (isDomainLabelChar(charCode)) {
  464. // Stay in the DomainLabelChar state
  465. }
  466. else {
  467. // Anything else, end the domain name
  468. captureMatchIfValidAndRemove(context, stateMachine);
  469. }
  470. }
  471. function stateDomainHyphen(context, stateMachine, char, charCode) {
  472. if (charCode === 45 /* Char.Dash */ /* '-' */) {
  473. // Remain in the DomainHyphen state
  474. }
  475. else if (charCode === 46 /* Char.Dot */ /* '.' */) {
  476. // Not valid to have a '-.' in a domain label
  477. captureMatchIfValidAndRemove(context, stateMachine);
  478. }
  479. else if (isDomainLabelStartChar(charCode)) {
  480. stateMachine.state = 5 /* State.DomainLabelChar */;
  481. }
  482. else {
  483. captureMatchIfValidAndRemove(context, stateMachine);
  484. }
  485. }
  486. function stateDomainDot(context, stateMachine, char, charCode) {
  487. if (charCode === 46 /* Char.Dot */ /* '.' */) {
  488. // domain names cannot have multiple '.'s next to each other.
  489. // It's possible we've already read a valid domain name though,
  490. // and that the '..' sequence just forms an ellipsis at the end
  491. // of a sentence
  492. captureMatchIfValidAndRemove(context, stateMachine);
  493. }
  494. else if (isDomainLabelStartChar(charCode)) {
  495. stateMachine.state = 5 /* State.DomainLabelChar */;
  496. stateMachine.acceptStateReached = true; // after hitting a dot, and then another domain label, we've reached an accept state
  497. }
  498. else {
  499. // Anything else, end the domain name
  500. captureMatchIfValidAndRemove(context, stateMachine);
  501. }
  502. }
  503. function stateIpV4Digit(context, stateMachine, charCode) {
  504. if (charCode === 46 /* Char.Dot */ /* '.' */) {
  505. stateMachine.state = 14 /* State.IpV4Dot */;
  506. }
  507. else if (charCode === 58 /* Char.Colon */ /* ':' */) {
  508. // Beginning of a port number
  509. stateMachine.state = 8 /* State.PortColon */;
  510. }
  511. else if (isDigitChar(charCode)) {
  512. // stay in the IPv4 digit state
  513. }
  514. else if (isUrlSuffixStartChar(charCode)) {
  515. stateMachine.state = 10 /* State.Path */;
  516. }
  517. else if (isAlphaNumericOrMarkChar(charCode)) {
  518. // If we hit an alpha character, must not be an IPv4
  519. // Example of this: 1.2.3.4abc
  520. context.removeMachine(stateMachine);
  521. }
  522. else {
  523. captureMatchIfValidAndRemove(context, stateMachine);
  524. }
  525. }
  526. function stateIpV4Dot(context, stateMachine, charCode) {
  527. if (isDigitChar(charCode)) {
  528. stateMachine.octetsEncountered++;
  529. // Once we have encountered 4 octets, it's *potentially* a valid
  530. // IPv4 address. Our IPv4 regex will confirm the match later
  531. // though to make sure each octet is in the 0-255 range, and
  532. // there's exactly 4 octets (not 5 or more)
  533. if (stateMachine.octetsEncountered === 4) {
  534. stateMachine.acceptStateReached = true;
  535. }
  536. stateMachine.state = 13 /* State.IpV4Digit */;
  537. }
  538. else {
  539. captureMatchIfValidAndRemove(context, stateMachine);
  540. }
  541. }
  542. function statePortColon(context, stateMachine, charCode) {
  543. if (isDigitChar(charCode)) {
  544. stateMachine.state = 9 /* State.PortNumber */;
  545. }
  546. else {
  547. captureMatchIfValidAndRemove(context, stateMachine);
  548. }
  549. }
  550. function statePortNumber(context, stateMachine, charCode) {
  551. if (isDigitChar(charCode)) {
  552. // Stay in port number state
  553. }
  554. else if (isUrlSuffixStartChar(charCode)) {
  555. // '/', '?', or '#'
  556. stateMachine.state = 10 /* State.Path */;
  557. }
  558. else {
  559. captureMatchIfValidAndRemove(context, stateMachine);
  560. }
  561. }
  562. function statePath(context, stateMachine, charCode) {
  563. if (isPathChar(charCode)) {
  564. // Stay in the path state
  565. }
  566. else {
  567. captureMatchIfValidAndRemove(context, stateMachine);
  568. }
  569. }
  570. // Handles if we're reading a 'mailto:' prefix on the string
  571. function stateEmailMailto_M(context, stateMachine, char, charCode) {
  572. if (char.toLowerCase() === 'a') {
  573. stateMachine.state = 16 /* State.EmailMailto_A */;
  574. }
  575. else {
  576. stateEmailLocalPart(context, stateMachine, charCode);
  577. }
  578. }
  579. function stateEmailMailto_A(context, stateMachine, char, charCode) {
  580. if (char.toLowerCase() === 'i') {
  581. stateMachine.state = 17 /* State.EmailMailto_I */;
  582. }
  583. else {
  584. stateEmailLocalPart(context, stateMachine, charCode);
  585. }
  586. }
  587. function stateEmailMailto_I(context, stateMachine, char, charCode) {
  588. if (char.toLowerCase() === 'l') {
  589. stateMachine.state = 18 /* State.EmailMailto_L */;
  590. }
  591. else {
  592. stateEmailLocalPart(context, stateMachine, charCode);
  593. }
  594. }
  595. function stateEmailMailto_L(context, stateMachine, char, charCode) {
  596. if (char.toLowerCase() === 't') {
  597. stateMachine.state = 19 /* State.EmailMailto_T */;
  598. }
  599. else {
  600. stateEmailLocalPart(context, stateMachine, charCode);
  601. }
  602. }
  603. function stateEmailMailto_T(context, stateMachine, char, charCode) {
  604. if (char.toLowerCase() === 'o') {
  605. stateMachine.state = 20 /* State.EmailMailto_O */;
  606. }
  607. else {
  608. stateEmailLocalPart(context, stateMachine, charCode);
  609. }
  610. }
  611. function stateEmailMailto_O(context, stateMachine, charCode) {
  612. if (charCode === 58 /* Char.Colon */ /* ':' */) {
  613. stateMachine.state = 21 /* State.EmailMailto_Colon */;
  614. }
  615. else {
  616. stateEmailLocalPart(context, stateMachine, charCode);
  617. }
  618. }
  619. function stateEmailMailtoColon(context, stateMachine, charCode) {
  620. if (isEmailLocalPartChar(charCode)) {
  621. stateMachine.state = 22 /* State.EmailLocalPart */;
  622. }
  623. else {
  624. context.removeMachine(stateMachine);
  625. }
  626. }
  627. // Handles the state when we're currently in the "local part" of an
  628. // email address (as opposed to the "domain part")
  629. function stateEmailLocalPart(context, stateMachine, charCode) {
  630. if (charCode === 46 /* Char.Dot */ /* '.' */) {
  631. stateMachine.state = 23 /* State.EmailLocalPartDot */;
  632. }
  633. else if (charCode === 64 /* Char.AtSign */ /* '@' */) {
  634. stateMachine.state = 24 /* State.EmailAtSign */;
  635. }
  636. else if (isEmailLocalPartChar(charCode)) {
  637. // stay in the "local part" of the email address
  638. // Note: because stateEmailLocalPart() is called from the
  639. // 'mailto' states (when the 'mailto' prefix itself has been
  640. // broken), make sure to set the state to EmailLocalPart
  641. stateMachine.state = 22 /* State.EmailLocalPart */;
  642. }
  643. else {
  644. // not an email address character
  645. context.removeMachine(stateMachine);
  646. }
  647. }
  648. // Handles the state where we've read a '.' character in the local part of
  649. // the email address (i.e. the part before the '@' character)
  650. function stateEmailLocalPartDot(context, stateMachine, charCode) {
  651. if (charCode === 46 /* Char.Dot */ /* '.' */) {
  652. // We read a second '.' in a row, not a valid email address
  653. // local part
  654. context.removeMachine(stateMachine);
  655. }
  656. else if (charCode === 64 /* Char.AtSign */ /* '@' */) {
  657. // We read the '@' character immediately after a dot ('.'), not
  658. // an email address
  659. context.removeMachine(stateMachine);
  660. }
  661. else if (isEmailLocalPartChar(charCode)) {
  662. stateMachine.state = 22 /* State.EmailLocalPart */;
  663. }
  664. else {
  665. // Anything else, not an email address
  666. context.removeMachine(stateMachine);
  667. }
  668. }
  669. function stateEmailAtSign(context, stateMachine, charCode) {
  670. if (isDomainLabelStartChar(charCode)) {
  671. stateMachine.state = 25 /* State.EmailDomainChar */;
  672. }
  673. else {
  674. // Anything else, not an email address
  675. context.removeMachine(stateMachine);
  676. }
  677. }
  678. function stateEmailDomainChar(context, stateMachine, charCode) {
  679. if (charCode === 46 /* Char.Dot */ /* '.' */) {
  680. stateMachine.state = 27 /* State.EmailDomainDot */;
  681. }
  682. else if (charCode === 45 /* Char.Dash */ /* '-' */) {
  683. stateMachine.state = 26 /* State.EmailDomainHyphen */;
  684. }
  685. else if (isDomainLabelChar(charCode)) {
  686. // Stay in the DomainChar state
  687. }
  688. else {
  689. // Anything else, we potentially matched if the criteria has
  690. // been met
  691. captureMatchIfValidAndRemove(context, stateMachine);
  692. }
  693. }
  694. function stateEmailDomainHyphen(context, stateMachine, charCode) {
  695. if (charCode === 45 /* Char.Dash */ /* '-' */ || charCode === 46 /* Char.Dot */ /* '.' */) {
  696. // Not valid to have two hyphens ("--") or hypen+dot ("-.")
  697. captureMatchIfValidAndRemove(context, stateMachine);
  698. }
  699. else if (isDomainLabelChar(charCode)) {
  700. stateMachine.state = 25 /* State.EmailDomainChar */;
  701. }
  702. else {
  703. // Anything else
  704. captureMatchIfValidAndRemove(context, stateMachine);
  705. }
  706. }
  707. function stateEmailDomainDot(context, stateMachine, charCode) {
  708. if (charCode === 46 /* Char.Dot */ /* '.' */ || charCode === 45 /* Char.Dash */ /* '-' */) {
  709. // not valid to have two dots ("..") or dot+hypen (".-")
  710. captureMatchIfValidAndRemove(context, stateMachine);
  711. }
  712. else if (isDomainLabelStartChar(charCode)) {
  713. stateMachine.state = 25 /* State.EmailDomainChar */;
  714. // After having read a '.' and then a valid domain character,
  715. // we now know that the domain part of the email is valid, and
  716. // we have found at least a partial EmailMatch (however, the
  717. // email address may have additional characters from this point)
  718. stateMachine.acceptStateReached = true;
  719. }
  720. else {
  721. // Anything else
  722. captureMatchIfValidAndRemove(context, stateMachine);
  723. }
  724. }
  725. // Handles the state when we've just encountered a '#' character
  726. function stateHashtagHashChar(context, stateMachine, charCode) {
  727. if (isHashtagTextChar(charCode)) {
  728. // '#' char with valid hash text char following
  729. stateMachine.state = 29 /* State.HashtagTextChar */;
  730. stateMachine.acceptStateReached = true;
  731. }
  732. else {
  733. context.removeMachine(stateMachine);
  734. }
  735. }
  736. // Handles the state when we're currently in the hash tag's text chars
  737. function stateHashtagTextChar(context, stateMachine, charCode) {
  738. if (isHashtagTextChar(charCode)) {
  739. // Continue reading characters in the HashtagText state
  740. }
  741. else {
  742. captureMatchIfValidAndRemove(context, stateMachine);
  743. }
  744. }
  745. // Handles the state when we've just encountered a '@' character
  746. function stateMentionAtChar(context, stateMachine, charCode) {
  747. if (isMentionTextChar(charCode)) {
  748. // '@' char with valid mention text char following
  749. stateMachine.state = 31 /* State.MentionTextChar */;
  750. stateMachine.acceptStateReached = true;
  751. }
  752. else {
  753. context.removeMachine(stateMachine);
  754. }
  755. }
  756. // Handles the state when we're currently in the mention's text chars
  757. function stateMentionTextChar(context, stateMachine, charCode) {
  758. if (isMentionTextChar(charCode)) {
  759. // Continue reading characters in the HashtagText state
  760. }
  761. else if (isAlphaNumericOrMarkChar(charCode)) {
  762. // Char is invalid for a mention text char, not a valid match.
  763. // Note that ascii alphanumeric chars are okay (which are tested
  764. // in the previous 'if' statement, but others are not)
  765. context.removeMachine(stateMachine);
  766. }
  767. else {
  768. captureMatchIfValidAndRemove(context, stateMachine);
  769. }
  770. }
  771. function statePhoneNumberPlus(context, stateMachine, char, charCode) {
  772. if (isDigitChar(charCode)) {
  773. stateMachine.state = 38 /* State.PhoneNumberDigit */;
  774. }
  775. else {
  776. context.removeMachine(stateMachine);
  777. // This character may start a new match. Add states for it
  778. stateNoMatch(context, char, charCode);
  779. }
  780. }
  781. function statePhoneNumberOpenParen(context, stateMachine, char, charCode) {
  782. if (isDigitChar(charCode)) {
  783. stateMachine.state = 33 /* State.PhoneNumberAreaCodeDigit1 */;
  784. }
  785. else {
  786. context.removeMachine(stateMachine);
  787. }
  788. // It's also possible that the paren was just an open brace for
  789. // a piece of text. Start other machines
  790. stateNoMatch(context, char, charCode);
  791. }
  792. function statePhoneNumberAreaCodeDigit1(context, stateMachine, charCode) {
  793. if (isDigitChar(charCode)) {
  794. stateMachine.state = 34 /* State.PhoneNumberAreaCodeDigit2 */;
  795. }
  796. else {
  797. context.removeMachine(stateMachine);
  798. }
  799. }
  800. function statePhoneNumberAreaCodeDigit2(context, stateMachine, charCode) {
  801. if (isDigitChar(charCode)) {
  802. stateMachine.state = 35 /* State.PhoneNumberAreaCodeDigit3 */;
  803. }
  804. else {
  805. context.removeMachine(stateMachine);
  806. }
  807. }
  808. function statePhoneNumberAreaCodeDigit3(context, stateMachine, charCode) {
  809. if (charCode === 41 /* Char.CloseParen */ /* ')' */) {
  810. stateMachine.state = 36 /* State.PhoneNumberCloseParen */;
  811. }
  812. else {
  813. context.removeMachine(stateMachine);
  814. }
  815. }
  816. function statePhoneNumberCloseParen(context, stateMachine, char, charCode) {
  817. if (isDigitChar(charCode)) {
  818. stateMachine.state = 38 /* State.PhoneNumberDigit */;
  819. }
  820. else if (isPhoneNumberSeparatorChar(charCode)) {
  821. stateMachine.state = 39 /* State.PhoneNumberSeparator */;
  822. }
  823. else {
  824. context.removeMachine(stateMachine);
  825. }
  826. }
  827. function statePhoneNumberDigit(context, stateMachine, char, charCode) {
  828. var charIdx = context.charIdx;
  829. // For now, if we've reached any digits, we'll say that the machine
  830. // has reached its accept state. The phone regex will confirm the
  831. // match later.
  832. // Alternatively, we could count the number of digits to avoid
  833. // invoking the phone number regex
  834. stateMachine.acceptStateReached = true;
  835. if (isPhoneNumberControlChar(charCode)) {
  836. stateMachine.state = 40 /* State.PhoneNumberControlChar */;
  837. }
  838. else if (charCode === 35 /* Char.NumberSign */ /* '#' */) {
  839. stateMachine.state = 41 /* State.PhoneNumberPoundChar */;
  840. }
  841. else if (isDigitChar(charCode)) {
  842. // Stay in the phone number digit state
  843. }
  844. else if (charCode === 40 /* Char.OpenParen */ /* '(' */) {
  845. stateMachine.state = 32 /* State.PhoneNumberOpenParen */;
  846. }
  847. else if (isPhoneNumberSeparatorChar(charCode)) {
  848. stateMachine.state = 39 /* State.PhoneNumberSeparator */;
  849. }
  850. else {
  851. captureMatchIfValidAndRemove(context, stateMachine);
  852. // The transition from a digit character to a letter can be the
  853. // start of a new scheme URL match
  854. if (isSchemeStartChar(charCode)) {
  855. context.addMachine(createSchemeUrlStateMachine(charIdx, 0 /* State.SchemeChar */));
  856. }
  857. }
  858. }
  859. function statePhoneNumberSeparator(context, stateMachine, char, charCode) {
  860. if (isDigitChar(charCode)) {
  861. stateMachine.state = 38 /* State.PhoneNumberDigit */;
  862. }
  863. else if (charCode === 40 /* Char.OpenParen */ /* '(' */) {
  864. stateMachine.state = 32 /* State.PhoneNumberOpenParen */;
  865. }
  866. else {
  867. captureMatchIfValidAndRemove(context, stateMachine);
  868. // This character may start a new match. Add states for it
  869. stateNoMatch(context, char, charCode);
  870. }
  871. }
  872. // The ";" characters is "wait" in a phone number
  873. // The "," characters is "pause" in a phone number
  874. function statePhoneNumberControlChar(context, stateMachine, charCode) {
  875. if (isPhoneNumberControlChar(charCode)) {
  876. // Stay in the "control char" state
  877. }
  878. else if (charCode === 35 /* Char.NumberSign */ /* '#' */) {
  879. stateMachine.state = 41 /* State.PhoneNumberPoundChar */;
  880. }
  881. else if (isDigitChar(charCode)) {
  882. stateMachine.state = 38 /* State.PhoneNumberDigit */;
  883. }
  884. else {
  885. captureMatchIfValidAndRemove(context, stateMachine);
  886. }
  887. }
  888. // The "#" characters is "pound" in a phone number
  889. function statePhoneNumberPoundChar(context, stateMachine, charCode) {
  890. if (isPhoneNumberControlChar(charCode)) {
  891. stateMachine.state = 40 /* State.PhoneNumberControlChar */;
  892. }
  893. else if (isDigitChar(charCode)) {
  894. // According to some of the older tests, if there's a digit
  895. // after a '#' sign, the match is invalid. TODO: Revisit if this is true
  896. context.removeMachine(stateMachine);
  897. }
  898. else {
  899. captureMatchIfValidAndRemove(context, stateMachine);
  900. }
  901. }
  902. /*
  903. * Captures a match if it is valid (i.e. has a full domain name for a
  904. * TLD match). If a match is not valid, it is possible that we want to
  905. * keep reading characters in order to make a full match.
  906. */
  907. function captureMatchIfValidAndRemove(context, stateMachine) {
  908. var matches = context.matches, text = context.text, charIdx = context.charIdx, tagBuilder = context.tagBuilder, stripPrefix = context.stripPrefix, stripTrailingSlash = context.stripTrailingSlash, decodePercentEncoding = context.decodePercentEncoding, hashtagServiceName = context.hashtagServiceName, mentionServiceName = context.mentionServiceName;
  909. // Remove the state machine first. There are a number of code paths
  910. // which return out of this function early, so make sure we have
  911. // this done
  912. context.removeMachine(stateMachine);
  913. // Make sure the state machine being checked has actually reached an
  914. // "accept" state. If it hasn't reach one, it can't be a match
  915. if (!stateMachine.acceptStateReached) {
  916. return;
  917. }
  918. var startIdx = stateMachine.startIdx;
  919. var matchedText = text.slice(stateMachine.startIdx, charIdx);
  920. // Handle any unbalanced braces (parens, square brackets, or curly
  921. // brackets) inside the URL. This handles situations like:
  922. // The link (google.com)
  923. // and
  924. // Check out this link here (en.wikipedia.org/wiki/IANA_(disambiguation))
  925. //
  926. // And also remove any punctuation chars at the end such as:
  927. // '?', ',', ':', '.', etc.
  928. matchedText = excludeUnbalancedTrailingBracesAndPunctuation(matchedText);
  929. switch (stateMachine.type) {
  930. case 0 /* StateMachineType.Url */: {
  931. // We don't want to accidentally match a URL that is preceded by an
  932. // '@' character, which would be an email address
  933. var charBeforeUrlMatch = text.charCodeAt(stateMachine.startIdx - 1);
  934. if (charBeforeUrlMatch === 64 /* Char.AtSign */ /* '@' */) {
  935. return;
  936. }
  937. switch (stateMachine.matchType) {
  938. case 0 /* UrlStateMachineMatchType.Scheme */: {
  939. // Autolinker accepts many characters in a url's scheme (like `fake://test.com`).
  940. // However, in cases where a URL is missing whitespace before an obvious link,
  941. // (for example: `nowhitespacehttp://www.test.com`), we only want the match to start
  942. // at the http:// part. We will check if the match contains a common scheme and then
  943. // shift the match to start from there.
  944. var httpSchemeMatch = httpSchemeRe.exec(matchedText);
  945. if (httpSchemeMatch) {
  946. // If we found an overmatched URL, we want to find the index
  947. // of where the match should start and shift the match to
  948. // start from the beginning of the common scheme
  949. startIdx = startIdx + httpSchemeMatch.index;
  950. matchedText = matchedText.slice(httpSchemeMatch.index);
  951. }
  952. if (!isValidSchemeUrl(matchedText)) {
  953. return; // not a valid match
  954. }
  955. break;
  956. }
  957. case 1 /* UrlStateMachineMatchType.Tld */: {
  958. if (!isValidTldMatch(matchedText)) {
  959. return; // not a valid match
  960. }
  961. break;
  962. }
  963. case 2 /* UrlStateMachineMatchType.IpV4 */: {
  964. if (!isValidIpV4Address(matchedText)) {
  965. return; // not a valid match
  966. }
  967. break;
  968. }
  969. /* istanbul ignore next */
  970. default:
  971. assertNever(stateMachine);
  972. }
  973. matches.push(new UrlMatch({
  974. tagBuilder: tagBuilder,
  975. matchedText: matchedText,
  976. offset: startIdx,
  977. urlMatchType: toUrlMatchType(stateMachine.matchType),
  978. url: matchedText,
  979. protocolRelativeMatch: matchedText.slice(0, 2) === '//',
  980. // TODO: Do these settings need to be passed to the match,
  981. // or should we handle them here in UrlMatcher?
  982. stripPrefix: stripPrefix,
  983. stripTrailingSlash: stripTrailingSlash,
  984. decodePercentEncoding: decodePercentEncoding,
  985. }));
  986. break;
  987. }
  988. case 1 /* StateMachineType.Email */: {
  989. // if the email address has a valid TLD, add it to the list of matches
  990. if (isValidEmail(matchedText)) {
  991. matches.push(new EmailMatch({
  992. tagBuilder: tagBuilder,
  993. matchedText: matchedText,
  994. offset: startIdx,
  995. email: matchedText.replace(mailtoSchemePrefixRe, ''),
  996. }));
  997. }
  998. break;
  999. }
  1000. case 2 /* StateMachineType.Hashtag */: {
  1001. if (isValidHashtag(matchedText)) {
  1002. matches.push(new HashtagMatch({
  1003. tagBuilder: tagBuilder,
  1004. matchedText: matchedText,
  1005. offset: startIdx,
  1006. serviceName: hashtagServiceName,
  1007. hashtag: matchedText.slice(1),
  1008. }));
  1009. }
  1010. break;
  1011. }
  1012. case 3 /* StateMachineType.Mention */: {
  1013. if (isValidMention(matchedText, mentionServiceName)) {
  1014. matches.push(new MentionMatch({
  1015. tagBuilder: tagBuilder,
  1016. matchedText: matchedText,
  1017. offset: startIdx,
  1018. serviceName: mentionServiceName,
  1019. mention: matchedText.slice(1), // strip off the '@' character at the beginning
  1020. }));
  1021. }
  1022. break;
  1023. }
  1024. case 4 /* StateMachineType.Phone */: {
  1025. // remove any trailing spaces that were considered as "separator"
  1026. // chars by the state machine
  1027. matchedText = matchedText.replace(/ +$/g, '');
  1028. if (isValidPhoneNumber(matchedText)) {
  1029. var cleanNumber = matchedText.replace(/[^0-9,;#]/g, ''); // strip out non-digit characters exclude comma semicolon and #
  1030. matches.push(new PhoneMatch({
  1031. tagBuilder: tagBuilder,
  1032. matchedText: matchedText,
  1033. offset: startIdx,
  1034. number: cleanNumber,
  1035. plusSign: matchedText.charAt(0) === '+',
  1036. }));
  1037. }
  1038. break;
  1039. }
  1040. /* istanbul ignore next */
  1041. default:
  1042. assertNever(stateMachine);
  1043. }
  1044. }
  1045. /**
  1046. * Helper function to convert a UrlStateMachineMatchType value to its
  1047. * UrlMatchType equivalent.
  1048. */
  1049. function toUrlMatchType(stateMachineMatchType) {
  1050. switch (stateMachineMatchType) {
  1051. case 0 /* UrlStateMachineMatchType.Scheme */:
  1052. return 'scheme';
  1053. case 1 /* UrlStateMachineMatchType.Tld */:
  1054. return 'tld';
  1055. case 2 /* UrlStateMachineMatchType.IpV4 */:
  1056. return 'ipV4';
  1057. /* istanbul ignore next */
  1058. default:
  1059. assertNever(stateMachineMatchType);
  1060. }
  1061. }
  1062. var oppositeBrace = {
  1063. ')': '(',
  1064. '}': '{',
  1065. ']': '[',
  1066. };
  1067. /**
  1068. * Determines if a match found has unmatched closing parenthesis,
  1069. * square brackets or curly brackets. If so, these unbalanced symbol(s) will be
  1070. * removed from the URL match itself.
  1071. *
  1072. * A match may have an extra closing parenthesis/square brackets/curly brackets
  1073. * at the end of the match because these are valid URL path characters. For
  1074. * example, "wikipedia.com/something_(disambiguation)" should be auto-linked.
  1075. *
  1076. * However, an extra parenthesis *will* be included when the URL itself is
  1077. * wrapped in parenthesis, such as in the case of:
  1078. *
  1079. * "(wikipedia.com/something_(disambiguation))"
  1080. *
  1081. * In this case, the last closing parenthesis should *not* be part of the
  1082. * URL itself, and this method will exclude it from the returned URL.
  1083. *
  1084. * For square brackets in URLs such as in PHP arrays, the same behavior as
  1085. * parenthesis discussed above should happen:
  1086. *
  1087. * "[http://www.example.com/foo.php?bar[]=1&bar[]=2&bar[]=3]"
  1088. *
  1089. * The very last closing square bracket should not be part of the URL itself,
  1090. * and therefore this method will remove it.
  1091. *
  1092. * @param matchedText The full matched URL/email/hashtag/etc. from the state
  1093. * machine parser.
  1094. * @return The updated matched text with extraneous suffix characters removed.
  1095. */
  1096. export function excludeUnbalancedTrailingBracesAndPunctuation(matchedText) {
  1097. var braceCounts = {
  1098. '(': 0,
  1099. '{': 0,
  1100. '[': 0,
  1101. };
  1102. for (var i = 0; i < matchedText.length; i++) {
  1103. var char = matchedText.charAt(i);
  1104. var charCode = matchedText.charCodeAt(i);
  1105. if (isOpenBraceChar(charCode)) {
  1106. braceCounts[char]++;
  1107. }
  1108. else if (isCloseBraceChar(charCode)) {
  1109. braceCounts[oppositeBrace[char]]--;
  1110. }
  1111. }
  1112. var endIdx = matchedText.length - 1;
  1113. while (endIdx >= 0) {
  1114. var char = matchedText.charAt(endIdx);
  1115. var charCode = matchedText.charCodeAt(endIdx);
  1116. if (isCloseBraceChar(charCode)) {
  1117. var oppositeBraceChar = oppositeBrace[char];
  1118. if (braceCounts[oppositeBraceChar] < 0) {
  1119. braceCounts[oppositeBraceChar]++;
  1120. endIdx--;
  1121. }
  1122. else {
  1123. break;
  1124. }
  1125. }
  1126. else if (isUrlSuffixNotAllowedAsFinalChar(charCode)) {
  1127. // Walk back a punctuation char like '?', ',', ':', '.', etc.
  1128. endIdx--;
  1129. }
  1130. else {
  1131. break;
  1132. }
  1133. }
  1134. return matchedText.slice(0, endIdx + 1);
  1135. }
  1136. function createSchemeUrlStateMachine(startIdx, state) {
  1137. return {
  1138. type: 0 /* StateMachineType.Url */,
  1139. startIdx: startIdx,
  1140. state: state,
  1141. acceptStateReached: false,
  1142. matchType: 0 /* UrlStateMachineMatchType.Scheme */,
  1143. };
  1144. }
  1145. function createTldUrlStateMachine(startIdx, state) {
  1146. return {
  1147. type: 0 /* StateMachineType.Url */,
  1148. startIdx: startIdx,
  1149. state: state,
  1150. acceptStateReached: false,
  1151. matchType: 1 /* UrlStateMachineMatchType.Tld */,
  1152. };
  1153. }
  1154. function createIpV4UrlStateMachine(startIdx, state) {
  1155. return {
  1156. type: 0 /* StateMachineType.Url */,
  1157. startIdx: startIdx,
  1158. state: state,
  1159. acceptStateReached: false,
  1160. matchType: 2 /* UrlStateMachineMatchType.IpV4 */,
  1161. octetsEncountered: 1, // starts at 1 because we create this machine when encountering the first octet
  1162. };
  1163. }
  1164. function createEmailStateMachine(startIdx, state) {
  1165. return {
  1166. type: 1 /* StateMachineType.Email */,
  1167. startIdx: startIdx,
  1168. state: state,
  1169. acceptStateReached: false,
  1170. };
  1171. }
  1172. function createHashtagStateMachine(startIdx, state) {
  1173. return {
  1174. type: 2 /* StateMachineType.Hashtag */,
  1175. startIdx: startIdx,
  1176. state: state,
  1177. acceptStateReached: false,
  1178. };
  1179. }
  1180. function createMentionStateMachine(startIdx, state) {
  1181. return {
  1182. type: 3 /* StateMachineType.Mention */,
  1183. startIdx: startIdx,
  1184. state: state,
  1185. acceptStateReached: false,
  1186. };
  1187. }
  1188. function createPhoneNumberStateMachine(startIdx, state) {
  1189. return {
  1190. type: 4 /* StateMachineType.Phone */,
  1191. startIdx: startIdx,
  1192. state: state,
  1193. acceptStateReached: false,
  1194. };
  1195. }
  1196. function isSchemeUrlStateMachine(machine) {
  1197. return (machine.type === 0 /* StateMachineType.Url */ &&
  1198. machine.matchType === 0 /* UrlStateMachineMatchType.Scheme */);
  1199. }
  1200. //# sourceMappingURL=parse-matches.js.map