智慧水务管理系统 - 精河县供水工程综合管理平台

parse-matches.js 51KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677787980818283848586878889909192939495969798991001011021031041051061071081091101111121131141151161171181191201211221231241251261271281291301311321331341351361371381391401411421431441451461471481491501511521531541551561571581591601611621631641651661671681691701711721731741751761771781791801811821831841851861871881891901911921931941951961971981992002012022032042052062072082092102112122132142152162172182192202212222232242252262272282292302312322332342352362372382392402412422432442452462472482492502512522532542552562572582592602612622632642652662672682692702712722732742752762772782792802812822832842852862872882892902912922932942952962972982993003013023033043053063073083093103113123133143153163173183193203213223233243253263273283293303313323333343353363373383393403413423433443453463473483493503513523533543553563573583593603613623633643653663673683693703713723733743753763773783793803813823833843853863873883893903913923933943953963973983994004014024034044054064074084094104114124134144154164174184194204214224234244254264274284294304314324334344354364374384394404414424434444454464474484494504514524534544554564574584594604614624634644654664674684694704714724734744754764774784794804814824834844854864874884894904914924934944954964974984995005015025035045055065075085095105115125135145155165175185195205215225235245255265275285295305315325335345355365375385395405415425435445455465475485495505515525535545555565575585595605615625635645655665675685695705715725735745755765775785795805815825835845855865875885895905915925935945955965975985996006016026036046056066076086096106116126136146156166176186196206216226236246256266276286296306316326336346356366376386396406416426436446456466476486496506516526536546556566576586596606616626636646656666676686696706716726736746756766776786796806816826836846856866876886896906916926936946956966976986997007017027037047057067077087097107117127137147157167177187197207217227237247257267277287297307317327337347357367377387397407417427437447457467477487497507517527537547557567577587597607617627637647657667677687697707717727737747757767777787797807817827837847857867877887897907917927937947957967977987998008018028038048058068078088098108118128138148158168178188198208218228238248258268278288298308318328338348358368378388398408418428438448458468478488498508518528538548558568578588598608618628638648658668678688698708718728738748758768778788798808818828838848858868878888898908918928938948958968978988999009019029039049059069079089099109119129139149159169179189199209219229239249259269279289299309319329339349359369379389399409419429439449459469479489499509519529539549559569579589599609619629639649659669679689699709719729739749759769779789799809819829839849859869879889899909919929939949959969979989991000100110021003100410051006100710081009101010111012101310141015101610171018101910201021102210231024102510261027102810291030103110321033103410351036103710381039104010411042104310441045104610471048104910501051105210531054105510561057105810591060106110621063106410651066106710681069107010711072107310741075107610771078107910801081108210831084108510861087108810891090109110921093109410951096109710981099110011011102110311041105110611071108110911101111111211131114111511161117111811191120112111221123112411251126112711281129113011311132113311341135113611371138113911401141114211431144114511461147114811491150115111521153115411551156115711581159116011611162116311641165116611671168116911701171117211731174117511761177117811791180118111821183118411851186118711881189119011911192119311941195119611971198119912001201120212031204
  1. "use strict";
  2. Object.defineProperty(exports, "__esModule", { value: true });
  3. exports.parseMatches = parseMatches;
  4. exports.excludeUnbalancedTrailingBracesAndPunctuation = excludeUnbalancedTrailingBracesAndPunctuation;
  5. var url_match_1 = require("../match/url-match");
  6. var utils_1 = require("../utils");
  7. var uri_utils_1 = require("./uri-utils");
  8. var email_utils_1 = require("./email-utils");
  9. var email_match_1 = require("../match/email-match");
  10. var hashtag_utils_1 = require("./hashtag-utils");
  11. var hashtag_match_1 = require("../match/hashtag-match");
  12. var mention_utils_1 = require("./mention-utils");
  13. var mention_match_1 = require("../match/mention-match");
  14. var phone_number_utils_1 = require("./phone-number-utils");
  15. var phone_match_1 = require("../match/phone-match");
  16. var char_utils_1 = require("../char-utils");
  17. // For debugging: search for and uncomment other "For debugging" lines
  18. // import CliTable from 'cli-table';
  19. /**
  20. * Context object containing all the state needed by the state machine functions.
  21. *
  22. * ## Historical note
  23. *
  24. * In v4.1.1, we used nested functions to handle the context via closures, but
  25. * this necessitated re-creating the functions for each call to `parseMatches()`,
  26. * which made them difficult for v8 to JIT optimize. In v4.1.2, we lifted all of
  27. * the functions to the top-level scope and passed the context object between
  28. * them, which allows the functions to be JIT compiled once and reused.
  29. */
  30. var ParseMatchesContext = /** @class */ (function () {
  31. function ParseMatchesContext(text, args) {
  32. this.charIdx = 0; // Current character index being processed
  33. this.matches = []; // Collection of matches found
  34. this._stateMachines = []; // Array of active state machines
  35. this.schemeUrlMachinesCount = 0; // part of an optimization to remove the need to go into a slow code block when unnecessary. Since it's been so long since the initial implementation, not sure that this can ever go above 1, but keeping it as a counter to be safe
  36. this.text = text;
  37. this.tagBuilder = args.tagBuilder;
  38. this.stripPrefix = args.stripPrefix;
  39. this.stripTrailingSlash = args.stripTrailingSlash;
  40. this.decodePercentEncoding = args.decodePercentEncoding;
  41. this.hashtagServiceName = args.hashtagServiceName;
  42. this.mentionServiceName = args.mentionServiceName;
  43. }
  44. Object.defineProperty(ParseMatchesContext.prototype, "stateMachines", {
  45. get: function () {
  46. return this._stateMachines;
  47. },
  48. enumerable: false,
  49. configurable: true
  50. });
  51. ParseMatchesContext.prototype.addMachine = function (stateMachine) {
  52. this._stateMachines.push(stateMachine);
  53. if (isSchemeUrlStateMachine(stateMachine)) {
  54. this.schemeUrlMachinesCount++;
  55. }
  56. };
  57. ParseMatchesContext.prototype.removeMachine = function (stateMachine) {
  58. // Performance note: this was originally implemented with Array.prototype.splice()
  59. // and mutated the array in place. Switching to filter added ~280ops/sec
  60. // on the benchmark, although likely at the expense of GC time. Perhaps
  61. // in the future, we implement a rotating array so we never need to move
  62. // or clean anything up
  63. this._stateMachines = this._stateMachines.filter(function (m) { return m !== stateMachine; });
  64. // If we've removed the URL state machine, set the flag to false.
  65. // This flag is a quick test that helps us skip a slow section of
  66. // code when there is already a URL state machine present.
  67. if (isSchemeUrlStateMachine(stateMachine)) {
  68. this.schemeUrlMachinesCount--;
  69. }
  70. };
  71. ParseMatchesContext.prototype.hasSchemeUrlMachine = function () {
  72. return this.schemeUrlMachinesCount > 0;
  73. };
  74. return ParseMatchesContext;
  75. }());
  76. /**
  77. * Parses URL, email, twitter, mention, and hashtag matches from the given
  78. * `text`.
  79. */
  80. function parseMatches(text, args) {
  81. // Create the context object that will be passed to all state functions
  82. var context = new ParseMatchesContext(text, args);
  83. // For debugging: search for and uncomment other "For debugging" lines
  84. // const table = new CliTable({
  85. // head: ['charIdx', 'char', 'code', 'type', 'states', 'startIdx', 'reached accept state'],
  86. // });
  87. for (; context.charIdx < context.text.length; context.charIdx++) {
  88. var char = text.charAt(context.charIdx);
  89. var charCode = text.charCodeAt(context.charIdx);
  90. if (context.stateMachines.length === 0) {
  91. stateNoMatch(context, char, charCode);
  92. }
  93. else {
  94. // Must loop through the state machines backwards for when one
  95. // is removed
  96. for (var stateIdx = context.stateMachines.length - 1; stateIdx >= 0; stateIdx--) {
  97. var stateMachine = context.stateMachines[stateIdx];
  98. switch (stateMachine.state) {
  99. // Protocol-relative URL states
  100. case 11 /* State.ProtocolRelativeSlash1 */:
  101. stateProtocolRelativeSlash1(context, stateMachine, charCode);
  102. break;
  103. case 12 /* State.ProtocolRelativeSlash2 */:
  104. stateProtocolRelativeSlash2(context, stateMachine, charCode);
  105. break;
  106. case 0 /* State.SchemeChar */:
  107. stateSchemeChar(context, stateMachine, charCode);
  108. break;
  109. case 1 /* State.SchemeHyphen */:
  110. stateSchemeHyphen(context, stateMachine, charCode);
  111. break;
  112. case 2 /* State.SchemeColon */:
  113. stateSchemeColon(context, stateMachine, charCode);
  114. break;
  115. case 3 /* State.SchemeSlash1 */:
  116. stateSchemeSlash1(context, stateMachine, charCode);
  117. break;
  118. case 4 /* State.SchemeSlash2 */:
  119. stateSchemeSlash2(context, stateMachine, char, charCode);
  120. break;
  121. case 5 /* State.DomainLabelChar */:
  122. stateDomainLabelChar(context, stateMachine, charCode);
  123. break;
  124. case 6 /* State.DomainHyphen */:
  125. stateDomainHyphen(context, stateMachine, char, charCode);
  126. break;
  127. case 7 /* State.DomainDot */:
  128. stateDomainDot(context, stateMachine, char, charCode);
  129. break;
  130. case 13 /* State.IpV4Digit */:
  131. stateIpV4Digit(context, stateMachine, charCode);
  132. break;
  133. case 14 /* State.IpV4Dot */:
  134. stateIpV4Dot(context, stateMachine, charCode);
  135. break;
  136. case 8 /* State.PortColon */:
  137. statePortColon(context, stateMachine, charCode);
  138. break;
  139. case 9 /* State.PortNumber */:
  140. statePortNumber(context, stateMachine, charCode);
  141. break;
  142. case 10 /* State.Path */:
  143. statePath(context, stateMachine, charCode);
  144. break;
  145. // Email States
  146. case 15 /* State.EmailMailto_M */:
  147. stateEmailMailto_M(context, stateMachine, char, charCode);
  148. break;
  149. case 16 /* State.EmailMailto_A */:
  150. stateEmailMailto_A(context, stateMachine, char, charCode);
  151. break;
  152. case 17 /* State.EmailMailto_I */:
  153. stateEmailMailto_I(context, stateMachine, char, charCode);
  154. break;
  155. case 18 /* State.EmailMailto_L */:
  156. stateEmailMailto_L(context, stateMachine, char, charCode);
  157. break;
  158. case 19 /* State.EmailMailto_T */:
  159. stateEmailMailto_T(context, stateMachine, char, charCode);
  160. break;
  161. case 20 /* State.EmailMailto_O */:
  162. stateEmailMailto_O(context, stateMachine, charCode);
  163. break;
  164. case 21 /* State.EmailMailto_Colon */:
  165. stateEmailMailtoColon(context, stateMachine, charCode);
  166. break;
  167. case 22 /* State.EmailLocalPart */:
  168. stateEmailLocalPart(context, stateMachine, charCode);
  169. break;
  170. case 23 /* State.EmailLocalPartDot */:
  171. stateEmailLocalPartDot(context, stateMachine, charCode);
  172. break;
  173. case 24 /* State.EmailAtSign */:
  174. stateEmailAtSign(context, stateMachine, charCode);
  175. break;
  176. case 25 /* State.EmailDomainChar */:
  177. stateEmailDomainChar(context, stateMachine, charCode);
  178. break;
  179. case 26 /* State.EmailDomainHyphen */:
  180. stateEmailDomainHyphen(context, stateMachine, charCode);
  181. break;
  182. case 27 /* State.EmailDomainDot */:
  183. stateEmailDomainDot(context, stateMachine, charCode);
  184. break;
  185. // Hashtag states
  186. case 28 /* State.HashtagHashChar */:
  187. stateHashtagHashChar(context, stateMachine, charCode);
  188. break;
  189. case 29 /* State.HashtagTextChar */:
  190. stateHashtagTextChar(context, stateMachine, charCode);
  191. break;
  192. // Mention states
  193. case 30 /* State.MentionAtChar */:
  194. stateMentionAtChar(context, stateMachine, charCode);
  195. break;
  196. case 31 /* State.MentionTextChar */:
  197. stateMentionTextChar(context, stateMachine, charCode);
  198. break;
  199. // Phone number states
  200. case 32 /* State.PhoneNumberOpenParen */:
  201. statePhoneNumberOpenParen(context, stateMachine, char, charCode);
  202. break;
  203. case 33 /* State.PhoneNumberAreaCodeDigit1 */:
  204. statePhoneNumberAreaCodeDigit1(context, stateMachine, charCode);
  205. break;
  206. case 34 /* State.PhoneNumberAreaCodeDigit2 */:
  207. statePhoneNumberAreaCodeDigit2(context, stateMachine, charCode);
  208. break;
  209. case 35 /* State.PhoneNumberAreaCodeDigit3 */:
  210. statePhoneNumberAreaCodeDigit3(context, stateMachine, charCode);
  211. break;
  212. case 36 /* State.PhoneNumberCloseParen */:
  213. statePhoneNumberCloseParen(context, stateMachine, char, charCode);
  214. break;
  215. case 37 /* State.PhoneNumberPlus */:
  216. statePhoneNumberPlus(context, stateMachine, char, charCode);
  217. break;
  218. case 38 /* State.PhoneNumberDigit */:
  219. statePhoneNumberDigit(context, stateMachine, char, charCode);
  220. break;
  221. case 39 /* State.PhoneNumberSeparator */:
  222. statePhoneNumberSeparator(context, stateMachine, char, charCode);
  223. break;
  224. case 40 /* State.PhoneNumberControlChar */:
  225. statePhoneNumberControlChar(context, stateMachine, charCode);
  226. break;
  227. case 41 /* State.PhoneNumberPoundChar */:
  228. statePhoneNumberPoundChar(context, stateMachine, charCode);
  229. break;
  230. /* istanbul ignore next */
  231. default:
  232. (0, utils_1.assertNever)(stateMachine.state);
  233. }
  234. }
  235. // Special case for handling a colon (or other non-alphanumeric)
  236. // when preceded by another character, such as in the text:
  237. // Link 1:http://google.com
  238. // In this case, the 'h' character after the colon wouldn't start a
  239. // new scheme url because we'd be in a ipv4 or tld url and the colon
  240. // would be interpreted as a port ':' char. Also, only start a new
  241. // scheme url machine if there isn't currently one so we don't start
  242. // new ones for colons inside a url
  243. //
  244. // TODO: The addition of this snippet (to fix the bug) in 4.0.1 lost
  245. // us ~500 ops/sec on the benchmarks. Optimizing it with the
  246. // hasSchemeUrlMachine() flag and optimizing the isSchemeStartChar()
  247. // method for 4.1.3 got us back about ~400ops/sec. One potential way
  248. // to improve this even ore is to add this snippet to individual
  249. // state handler functions where it can occur to prevent running it
  250. // on every loop interation.
  251. if (!context.hasSchemeUrlMachine() &&
  252. context.charIdx > 0 &&
  253. (0, uri_utils_1.isSchemeStartChar)(charCode)) {
  254. var prevCharCode = context.text.charCodeAt(context.charIdx - 1);
  255. if (!(0, uri_utils_1.isSchemeStartChar)(prevCharCode)) {
  256. context.addMachine(createSchemeUrlStateMachine(context.charIdx, 0 /* State.SchemeChar */));
  257. }
  258. }
  259. }
  260. // For debugging: search for and uncomment other "For debugging" lines
  261. // table.push([
  262. // String(context.charIdx),
  263. // char,
  264. // `10: ${char.charCodeAt(0)}\n0x: ${char.charCodeAt(0).toString(16)}\nU+${char.codePointAt(0)}`,
  265. // context.stateMachines.map(machine => `${StateMachineType[machine.type]}${'matchType' in machine ? ` (${UrlStateMachineMatchType[machine.matchType]})` : ''}`).join('\n') || '(none)',
  266. // context.stateMachines.map(machine => State[machine.state]).join('\n') || '(none)',
  267. // context.stateMachines.map(m => m.startIdx).join('\n'),
  268. // context.stateMachines.map(m => m.acceptStateReached).join('\n'),
  269. // ]);
  270. }
  271. // Capture any valid match at the end of the string
  272. // Note: this loop must happen in reverse because
  273. // captureMatchIfValidAndRemove() removes state machines from the array
  274. // and we'll end up skipping every other one if we remove while looping
  275. // forward
  276. for (var i = context.stateMachines.length - 1; i >= 0; i--) {
  277. context.stateMachines.forEach(function (stateMachine) {
  278. return captureMatchIfValidAndRemove(context, stateMachine);
  279. });
  280. }
  281. // For debugging: search for and uncomment other "For debugging" lines
  282. // console.log(`\nRead string:\n ${text}`);
  283. // console.log(table.toString());
  284. return context.matches;
  285. }
  286. /**
  287. * Handles the state when we're not in a URL/email/etc. (i.e. when no state machines exist)
  288. */
  289. function stateNoMatch(context, char, charCode) {
  290. var charIdx = context.charIdx;
  291. if (charCode === 35 /* Char.NumberSign */ /* '#' */) {
  292. // Hash char, start a Hashtag match
  293. context.addMachine(createHashtagStateMachine(charIdx, 28 /* State.HashtagHashChar */));
  294. }
  295. else if (charCode === 64 /* Char.AtSign */ /* '@' */) {
  296. // '@' char, start a Mention match
  297. context.addMachine(createMentionStateMachine(charIdx, 30 /* State.MentionAtChar */));
  298. }
  299. else if (charCode === 47 /* Char.Slash */ /* '/' */) {
  300. // A slash could begin a protocol-relative URL
  301. context.addMachine(createTldUrlStateMachine(charIdx, 11 /* State.ProtocolRelativeSlash1 */));
  302. }
  303. else if (charCode === 43 /* Char.Plus */ /* '+' */) {
  304. // A '+' char can start a Phone number
  305. context.addMachine(createPhoneNumberStateMachine(charIdx, 37 /* State.PhoneNumberPlus */));
  306. }
  307. else if (charCode === 40 /* Char.OpenParen */ /* '(' */) {
  308. context.addMachine(createPhoneNumberStateMachine(charIdx, 32 /* State.PhoneNumberOpenParen */));
  309. }
  310. else {
  311. if ((0, char_utils_1.isDigitChar)(charCode)) {
  312. // A digit could start a phone number
  313. context.addMachine(createPhoneNumberStateMachine(charIdx, 38 /* State.PhoneNumberDigit */));
  314. // A digit could start an IP address
  315. context.addMachine(createIpV4UrlStateMachine(charIdx, 13 /* State.IpV4Digit */));
  316. }
  317. if ((0, email_utils_1.isEmailLocalPartStartChar)(charCode)) {
  318. // Any email local part. An 'm' character in particular could
  319. // start a 'mailto:' match
  320. var startState = char.toLowerCase() === 'm' ? 15 /* State.EmailMailto_M */ : 22 /* State.EmailLocalPart */;
  321. context.addMachine(createEmailStateMachine(charIdx, startState));
  322. }
  323. if ((0, uri_utils_1.isSchemeStartChar)(charCode)) {
  324. // An uppercase or lowercase letter may start a scheme match
  325. context.addMachine(createSchemeUrlStateMachine(charIdx, 0 /* State.SchemeChar */));
  326. }
  327. if ((0, char_utils_1.isAlphaNumericOrMarkChar)(charCode)) {
  328. // A unicode alpha character or digit could start a domain name
  329. // label for a TLD match
  330. context.addMachine(createTldUrlStateMachine(charIdx, 5 /* State.DomainLabelChar */));
  331. }
  332. }
  333. // Anything else, remain in the "non-url" state by not creating any
  334. // state machines
  335. }
  336. // Implements ABNF: ALPHA *( ALPHA / DIGIT / "+" / "-" / "." )
  337. function stateSchemeChar(context, stateMachine, charCode) {
  338. if (charCode === 58 /* Char.Colon */ /* ':' */) {
  339. stateMachine.state = 2 /* State.SchemeColon */;
  340. }
  341. else if (charCode === 45 /* Char.Dash */ /* '-' */) {
  342. stateMachine.state = 1 /* State.SchemeHyphen */;
  343. }
  344. else if ((0, uri_utils_1.isSchemeChar)(charCode)) {
  345. // Stay in SchemeChar state
  346. }
  347. else {
  348. // Any other character, not a scheme
  349. context.removeMachine(stateMachine);
  350. }
  351. }
  352. function stateSchemeHyphen(context, stateMachine, charCode) {
  353. var charIdx = context.charIdx;
  354. if (charCode === 45 /* Char.Dash */ /* '-' */) {
  355. // Stay in SchemeHyphen state
  356. // TODO: Should a colon following a dash be counted as the end of the scheme?
  357. // } else if (char === ':') {
  358. // stateMachine.state = State.SchemeColon;
  359. }
  360. else if (charCode === 47 /* Char.Slash */ /* '/' */) {
  361. // Not a valid scheme match, but may be the start of a
  362. // protocol-relative match (such as //google.com)
  363. context.removeMachine(stateMachine);
  364. context.addMachine(createTldUrlStateMachine(charIdx, 11 /* State.ProtocolRelativeSlash1 */));
  365. }
  366. else if ((0, uri_utils_1.isSchemeChar)(charCode)) {
  367. stateMachine.state = 0 /* State.SchemeChar */;
  368. }
  369. else {
  370. // Any other character, not a scheme
  371. context.removeMachine(stateMachine);
  372. }
  373. }
  374. // https://tools.ietf.org/html/rfc3986#appendix-A
  375. function stateSchemeColon(context, stateMachine, charCode) {
  376. var charIdx = context.charIdx;
  377. if (charCode === 47 /* Char.Slash */ /* '/' */) {
  378. stateMachine.state = 3 /* State.SchemeSlash1 */;
  379. }
  380. else if (charCode === 46 /* Char.Dot */ /* '.' */) {
  381. // We've read something like 'hello:.' - don't capture
  382. context.removeMachine(stateMachine);
  383. }
  384. else if ((0, uri_utils_1.isDomainLabelStartChar)(charCode)) {
  385. stateMachine.state = 5 /* State.DomainLabelChar */;
  386. // It's possible that we read an "introduction" piece of text,
  387. // and the character after the current colon actually starts an
  388. // actual scheme. An example of this is:
  389. // "The link:http://google.com"
  390. // Hence, start a new machine to capture this match if so
  391. if ((0, uri_utils_1.isSchemeStartChar)(charCode)) {
  392. context.addMachine(createSchemeUrlStateMachine(charIdx, 0 /* State.SchemeChar */));
  393. }
  394. }
  395. else {
  396. context.removeMachine(stateMachine);
  397. }
  398. }
  399. // https://tools.ietf.org/html/rfc3986#appendix-A
  400. function stateSchemeSlash1(context, stateMachine, charCode) {
  401. if (charCode === 47 /* Char.Slash */ /* '/' */) {
  402. stateMachine.state = 4 /* State.SchemeSlash2 */;
  403. }
  404. else if ((0, uri_utils_1.isPathChar)(charCode)) {
  405. stateMachine.state = 10 /* State.Path */;
  406. stateMachine.acceptStateReached = true;
  407. }
  408. else {
  409. captureMatchIfValidAndRemove(context, stateMachine);
  410. }
  411. }
  412. function stateSchemeSlash2(context, stateMachine, char, charCode) {
  413. if (charCode === 47 /* Char.Slash */ /* '/' */) {
  414. // 3rd slash, must be an absolute path (`path-absolute` in the
  415. // ABNF), such as in "file:///c:/windows/etc". See
  416. // https://tools.ietf.org/html/rfc3986#appendix-A
  417. stateMachine.state = 10 /* State.Path */;
  418. stateMachine.acceptStateReached = true;
  419. }
  420. else if ((0, uri_utils_1.isDomainLabelStartChar)(charCode)) {
  421. // start of "authority" section - see https://tools.ietf.org/html/rfc3986#appendix-A
  422. stateMachine.state = 5 /* State.DomainLabelChar */;
  423. stateMachine.acceptStateReached = true;
  424. }
  425. else {
  426. // not valid
  427. context.removeMachine(stateMachine);
  428. }
  429. }
  430. // Handles after we've read a '/' from the NonUrl state
  431. function stateProtocolRelativeSlash1(context, stateMachine, charCode) {
  432. if (charCode === 47 /* Char.Slash */ /* '/' */) {
  433. stateMachine.state = 12 /* State.ProtocolRelativeSlash2 */;
  434. }
  435. else {
  436. // Anything else, cannot be the start of a protocol-relative
  437. // URL.
  438. context.removeMachine(stateMachine);
  439. }
  440. }
  441. // Handles after we've read a second '/', which could start a protocol-relative URL
  442. function stateProtocolRelativeSlash2(context, stateMachine, charCode) {
  443. if ((0, uri_utils_1.isDomainLabelStartChar)(charCode)) {
  444. stateMachine.state = 5 /* State.DomainLabelChar */;
  445. }
  446. else {
  447. // Anything else, not a URL
  448. context.removeMachine(stateMachine);
  449. }
  450. }
  451. // Handles when we have read a domain label character
  452. function stateDomainLabelChar(context, stateMachine, charCode) {
  453. if (charCode === 46 /* Char.Dot */ /* '.' */) {
  454. stateMachine.state = 7 /* State.DomainDot */;
  455. }
  456. else if (charCode === 45 /* Char.Dash */ /* '-' */) {
  457. stateMachine.state = 6 /* State.DomainHyphen */;
  458. }
  459. else if (charCode === 58 /* Char.Colon */ /* ':' */) {
  460. // Beginning of a port number, end the domain name
  461. stateMachine.state = 8 /* State.PortColon */;
  462. }
  463. else if ((0, uri_utils_1.isUrlSuffixStartChar)(charCode)) {
  464. // '/', '?', or '#'
  465. stateMachine.state = 10 /* State.Path */;
  466. }
  467. else if ((0, uri_utils_1.isDomainLabelChar)(charCode)) {
  468. // Stay in the DomainLabelChar state
  469. }
  470. else {
  471. // Anything else, end the domain name
  472. captureMatchIfValidAndRemove(context, stateMachine);
  473. }
  474. }
  475. function stateDomainHyphen(context, stateMachine, char, charCode) {
  476. if (charCode === 45 /* Char.Dash */ /* '-' */) {
  477. // Remain in the DomainHyphen state
  478. }
  479. else if (charCode === 46 /* Char.Dot */ /* '.' */) {
  480. // Not valid to have a '-.' in a domain label
  481. captureMatchIfValidAndRemove(context, stateMachine);
  482. }
  483. else if ((0, uri_utils_1.isDomainLabelStartChar)(charCode)) {
  484. stateMachine.state = 5 /* State.DomainLabelChar */;
  485. }
  486. else {
  487. captureMatchIfValidAndRemove(context, stateMachine);
  488. }
  489. }
  490. function stateDomainDot(context, stateMachine, char, charCode) {
  491. if (charCode === 46 /* Char.Dot */ /* '.' */) {
  492. // domain names cannot have multiple '.'s next to each other.
  493. // It's possible we've already read a valid domain name though,
  494. // and that the '..' sequence just forms an ellipsis at the end
  495. // of a sentence
  496. captureMatchIfValidAndRemove(context, stateMachine);
  497. }
  498. else if ((0, uri_utils_1.isDomainLabelStartChar)(charCode)) {
  499. stateMachine.state = 5 /* State.DomainLabelChar */;
  500. stateMachine.acceptStateReached = true; // after hitting a dot, and then another domain label, we've reached an accept state
  501. }
  502. else {
  503. // Anything else, end the domain name
  504. captureMatchIfValidAndRemove(context, stateMachine);
  505. }
  506. }
  507. function stateIpV4Digit(context, stateMachine, charCode) {
  508. if (charCode === 46 /* Char.Dot */ /* '.' */) {
  509. stateMachine.state = 14 /* State.IpV4Dot */;
  510. }
  511. else if (charCode === 58 /* Char.Colon */ /* ':' */) {
  512. // Beginning of a port number
  513. stateMachine.state = 8 /* State.PortColon */;
  514. }
  515. else if ((0, char_utils_1.isDigitChar)(charCode)) {
  516. // stay in the IPv4 digit state
  517. }
  518. else if ((0, uri_utils_1.isUrlSuffixStartChar)(charCode)) {
  519. stateMachine.state = 10 /* State.Path */;
  520. }
  521. else if ((0, char_utils_1.isAlphaNumericOrMarkChar)(charCode)) {
  522. // If we hit an alpha character, must not be an IPv4
  523. // Example of this: 1.2.3.4abc
  524. context.removeMachine(stateMachine);
  525. }
  526. else {
  527. captureMatchIfValidAndRemove(context, stateMachine);
  528. }
  529. }
  530. function stateIpV4Dot(context, stateMachine, charCode) {
  531. if ((0, char_utils_1.isDigitChar)(charCode)) {
  532. stateMachine.octetsEncountered++;
  533. // Once we have encountered 4 octets, it's *potentially* a valid
  534. // IPv4 address. Our IPv4 regex will confirm the match later
  535. // though to make sure each octet is in the 0-255 range, and
  536. // there's exactly 4 octets (not 5 or more)
  537. if (stateMachine.octetsEncountered === 4) {
  538. stateMachine.acceptStateReached = true;
  539. }
  540. stateMachine.state = 13 /* State.IpV4Digit */;
  541. }
  542. else {
  543. captureMatchIfValidAndRemove(context, stateMachine);
  544. }
  545. }
  546. function statePortColon(context, stateMachine, charCode) {
  547. if ((0, char_utils_1.isDigitChar)(charCode)) {
  548. stateMachine.state = 9 /* State.PortNumber */;
  549. }
  550. else {
  551. captureMatchIfValidAndRemove(context, stateMachine);
  552. }
  553. }
  554. function statePortNumber(context, stateMachine, charCode) {
  555. if ((0, char_utils_1.isDigitChar)(charCode)) {
  556. // Stay in port number state
  557. }
  558. else if ((0, uri_utils_1.isUrlSuffixStartChar)(charCode)) {
  559. // '/', '?', or '#'
  560. stateMachine.state = 10 /* State.Path */;
  561. }
  562. else {
  563. captureMatchIfValidAndRemove(context, stateMachine);
  564. }
  565. }
  566. function statePath(context, stateMachine, charCode) {
  567. if ((0, uri_utils_1.isPathChar)(charCode)) {
  568. // Stay in the path state
  569. }
  570. else {
  571. captureMatchIfValidAndRemove(context, stateMachine);
  572. }
  573. }
  574. // Handles if we're reading a 'mailto:' prefix on the string
  575. function stateEmailMailto_M(context, stateMachine, char, charCode) {
  576. if (char.toLowerCase() === 'a') {
  577. stateMachine.state = 16 /* State.EmailMailto_A */;
  578. }
  579. else {
  580. stateEmailLocalPart(context, stateMachine, charCode);
  581. }
  582. }
  583. function stateEmailMailto_A(context, stateMachine, char, charCode) {
  584. if (char.toLowerCase() === 'i') {
  585. stateMachine.state = 17 /* State.EmailMailto_I */;
  586. }
  587. else {
  588. stateEmailLocalPart(context, stateMachine, charCode);
  589. }
  590. }
  591. function stateEmailMailto_I(context, stateMachine, char, charCode) {
  592. if (char.toLowerCase() === 'l') {
  593. stateMachine.state = 18 /* State.EmailMailto_L */;
  594. }
  595. else {
  596. stateEmailLocalPart(context, stateMachine, charCode);
  597. }
  598. }
  599. function stateEmailMailto_L(context, stateMachine, char, charCode) {
  600. if (char.toLowerCase() === 't') {
  601. stateMachine.state = 19 /* State.EmailMailto_T */;
  602. }
  603. else {
  604. stateEmailLocalPart(context, stateMachine, charCode);
  605. }
  606. }
  607. function stateEmailMailto_T(context, stateMachine, char, charCode) {
  608. if (char.toLowerCase() === 'o') {
  609. stateMachine.state = 20 /* State.EmailMailto_O */;
  610. }
  611. else {
  612. stateEmailLocalPart(context, stateMachine, charCode);
  613. }
  614. }
  615. function stateEmailMailto_O(context, stateMachine, charCode) {
  616. if (charCode === 58 /* Char.Colon */ /* ':' */) {
  617. stateMachine.state = 21 /* State.EmailMailto_Colon */;
  618. }
  619. else {
  620. stateEmailLocalPart(context, stateMachine, charCode);
  621. }
  622. }
  623. function stateEmailMailtoColon(context, stateMachine, charCode) {
  624. if ((0, email_utils_1.isEmailLocalPartChar)(charCode)) {
  625. stateMachine.state = 22 /* State.EmailLocalPart */;
  626. }
  627. else {
  628. context.removeMachine(stateMachine);
  629. }
  630. }
  631. // Handles the state when we're currently in the "local part" of an
  632. // email address (as opposed to the "domain part")
  633. function stateEmailLocalPart(context, stateMachine, charCode) {
  634. if (charCode === 46 /* Char.Dot */ /* '.' */) {
  635. stateMachine.state = 23 /* State.EmailLocalPartDot */;
  636. }
  637. else if (charCode === 64 /* Char.AtSign */ /* '@' */) {
  638. stateMachine.state = 24 /* State.EmailAtSign */;
  639. }
  640. else if ((0, email_utils_1.isEmailLocalPartChar)(charCode)) {
  641. // stay in the "local part" of the email address
  642. // Note: because stateEmailLocalPart() is called from the
  643. // 'mailto' states (when the 'mailto' prefix itself has been
  644. // broken), make sure to set the state to EmailLocalPart
  645. stateMachine.state = 22 /* State.EmailLocalPart */;
  646. }
  647. else {
  648. // not an email address character
  649. context.removeMachine(stateMachine);
  650. }
  651. }
  652. // Handles the state where we've read a '.' character in the local part of
  653. // the email address (i.e. the part before the '@' character)
  654. function stateEmailLocalPartDot(context, stateMachine, charCode) {
  655. if (charCode === 46 /* Char.Dot */ /* '.' */) {
  656. // We read a second '.' in a row, not a valid email address
  657. // local part
  658. context.removeMachine(stateMachine);
  659. }
  660. else if (charCode === 64 /* Char.AtSign */ /* '@' */) {
  661. // We read the '@' character immediately after a dot ('.'), not
  662. // an email address
  663. context.removeMachine(stateMachine);
  664. }
  665. else if ((0, email_utils_1.isEmailLocalPartChar)(charCode)) {
  666. stateMachine.state = 22 /* State.EmailLocalPart */;
  667. }
  668. else {
  669. // Anything else, not an email address
  670. context.removeMachine(stateMachine);
  671. }
  672. }
  673. function stateEmailAtSign(context, stateMachine, charCode) {
  674. if ((0, uri_utils_1.isDomainLabelStartChar)(charCode)) {
  675. stateMachine.state = 25 /* State.EmailDomainChar */;
  676. }
  677. else {
  678. // Anything else, not an email address
  679. context.removeMachine(stateMachine);
  680. }
  681. }
  682. function stateEmailDomainChar(context, stateMachine, charCode) {
  683. if (charCode === 46 /* Char.Dot */ /* '.' */) {
  684. stateMachine.state = 27 /* State.EmailDomainDot */;
  685. }
  686. else if (charCode === 45 /* Char.Dash */ /* '-' */) {
  687. stateMachine.state = 26 /* State.EmailDomainHyphen */;
  688. }
  689. else if ((0, uri_utils_1.isDomainLabelChar)(charCode)) {
  690. // Stay in the DomainChar state
  691. }
  692. else {
  693. // Anything else, we potentially matched if the criteria has
  694. // been met
  695. captureMatchIfValidAndRemove(context, stateMachine);
  696. }
  697. }
  698. function stateEmailDomainHyphen(context, stateMachine, charCode) {
  699. if (charCode === 45 /* Char.Dash */ /* '-' */ || charCode === 46 /* Char.Dot */ /* '.' */) {
  700. // Not valid to have two hyphens ("--") or hypen+dot ("-.")
  701. captureMatchIfValidAndRemove(context, stateMachine);
  702. }
  703. else if ((0, uri_utils_1.isDomainLabelChar)(charCode)) {
  704. stateMachine.state = 25 /* State.EmailDomainChar */;
  705. }
  706. else {
  707. // Anything else
  708. captureMatchIfValidAndRemove(context, stateMachine);
  709. }
  710. }
  711. function stateEmailDomainDot(context, stateMachine, charCode) {
  712. if (charCode === 46 /* Char.Dot */ /* '.' */ || charCode === 45 /* Char.Dash */ /* '-' */) {
  713. // not valid to have two dots ("..") or dot+hypen (".-")
  714. captureMatchIfValidAndRemove(context, stateMachine);
  715. }
  716. else if ((0, uri_utils_1.isDomainLabelStartChar)(charCode)) {
  717. stateMachine.state = 25 /* State.EmailDomainChar */;
  718. // After having read a '.' and then a valid domain character,
  719. // we now know that the domain part of the email is valid, and
  720. // we have found at least a partial EmailMatch (however, the
  721. // email address may have additional characters from this point)
  722. stateMachine.acceptStateReached = true;
  723. }
  724. else {
  725. // Anything else
  726. captureMatchIfValidAndRemove(context, stateMachine);
  727. }
  728. }
  729. // Handles the state when we've just encountered a '#' character
  730. function stateHashtagHashChar(context, stateMachine, charCode) {
  731. if ((0, hashtag_utils_1.isHashtagTextChar)(charCode)) {
  732. // '#' char with valid hash text char following
  733. stateMachine.state = 29 /* State.HashtagTextChar */;
  734. stateMachine.acceptStateReached = true;
  735. }
  736. else {
  737. context.removeMachine(stateMachine);
  738. }
  739. }
  740. // Handles the state when we're currently in the hash tag's text chars
  741. function stateHashtagTextChar(context, stateMachine, charCode) {
  742. if ((0, hashtag_utils_1.isHashtagTextChar)(charCode)) {
  743. // Continue reading characters in the HashtagText state
  744. }
  745. else {
  746. captureMatchIfValidAndRemove(context, stateMachine);
  747. }
  748. }
  749. // Handles the state when we've just encountered a '@' character
  750. function stateMentionAtChar(context, stateMachine, charCode) {
  751. if ((0, mention_utils_1.isMentionTextChar)(charCode)) {
  752. // '@' char with valid mention text char following
  753. stateMachine.state = 31 /* State.MentionTextChar */;
  754. stateMachine.acceptStateReached = true;
  755. }
  756. else {
  757. context.removeMachine(stateMachine);
  758. }
  759. }
  760. // Handles the state when we're currently in the mention's text chars
  761. function stateMentionTextChar(context, stateMachine, charCode) {
  762. if ((0, mention_utils_1.isMentionTextChar)(charCode)) {
  763. // Continue reading characters in the HashtagText state
  764. }
  765. else if ((0, char_utils_1.isAlphaNumericOrMarkChar)(charCode)) {
  766. // Char is invalid for a mention text char, not a valid match.
  767. // Note that ascii alphanumeric chars are okay (which are tested
  768. // in the previous 'if' statement, but others are not)
  769. context.removeMachine(stateMachine);
  770. }
  771. else {
  772. captureMatchIfValidAndRemove(context, stateMachine);
  773. }
  774. }
  775. function statePhoneNumberPlus(context, stateMachine, char, charCode) {
  776. if ((0, char_utils_1.isDigitChar)(charCode)) {
  777. stateMachine.state = 38 /* State.PhoneNumberDigit */;
  778. }
  779. else {
  780. context.removeMachine(stateMachine);
  781. // This character may start a new match. Add states for it
  782. stateNoMatch(context, char, charCode);
  783. }
  784. }
  785. function statePhoneNumberOpenParen(context, stateMachine, char, charCode) {
  786. if ((0, char_utils_1.isDigitChar)(charCode)) {
  787. stateMachine.state = 33 /* State.PhoneNumberAreaCodeDigit1 */;
  788. }
  789. else {
  790. context.removeMachine(stateMachine);
  791. }
  792. // It's also possible that the paren was just an open brace for
  793. // a piece of text. Start other machines
  794. stateNoMatch(context, char, charCode);
  795. }
  796. function statePhoneNumberAreaCodeDigit1(context, stateMachine, charCode) {
  797. if ((0, char_utils_1.isDigitChar)(charCode)) {
  798. stateMachine.state = 34 /* State.PhoneNumberAreaCodeDigit2 */;
  799. }
  800. else {
  801. context.removeMachine(stateMachine);
  802. }
  803. }
  804. function statePhoneNumberAreaCodeDigit2(context, stateMachine, charCode) {
  805. if ((0, char_utils_1.isDigitChar)(charCode)) {
  806. stateMachine.state = 35 /* State.PhoneNumberAreaCodeDigit3 */;
  807. }
  808. else {
  809. context.removeMachine(stateMachine);
  810. }
  811. }
  812. function statePhoneNumberAreaCodeDigit3(context, stateMachine, charCode) {
  813. if (charCode === 41 /* Char.CloseParen */ /* ')' */) {
  814. stateMachine.state = 36 /* State.PhoneNumberCloseParen */;
  815. }
  816. else {
  817. context.removeMachine(stateMachine);
  818. }
  819. }
  820. function statePhoneNumberCloseParen(context, stateMachine, char, charCode) {
  821. if ((0, char_utils_1.isDigitChar)(charCode)) {
  822. stateMachine.state = 38 /* State.PhoneNumberDigit */;
  823. }
  824. else if ((0, phone_number_utils_1.isPhoneNumberSeparatorChar)(charCode)) {
  825. stateMachine.state = 39 /* State.PhoneNumberSeparator */;
  826. }
  827. else {
  828. context.removeMachine(stateMachine);
  829. }
  830. }
  831. function statePhoneNumberDigit(context, stateMachine, char, charCode) {
  832. var charIdx = context.charIdx;
  833. // For now, if we've reached any digits, we'll say that the machine
  834. // has reached its accept state. The phone regex will confirm the
  835. // match later.
  836. // Alternatively, we could count the number of digits to avoid
  837. // invoking the phone number regex
  838. stateMachine.acceptStateReached = true;
  839. if ((0, phone_number_utils_1.isPhoneNumberControlChar)(charCode)) {
  840. stateMachine.state = 40 /* State.PhoneNumberControlChar */;
  841. }
  842. else if (charCode === 35 /* Char.NumberSign */ /* '#' */) {
  843. stateMachine.state = 41 /* State.PhoneNumberPoundChar */;
  844. }
  845. else if ((0, char_utils_1.isDigitChar)(charCode)) {
  846. // Stay in the phone number digit state
  847. }
  848. else if (charCode === 40 /* Char.OpenParen */ /* '(' */) {
  849. stateMachine.state = 32 /* State.PhoneNumberOpenParen */;
  850. }
  851. else if ((0, phone_number_utils_1.isPhoneNumberSeparatorChar)(charCode)) {
  852. stateMachine.state = 39 /* State.PhoneNumberSeparator */;
  853. }
  854. else {
  855. captureMatchIfValidAndRemove(context, stateMachine);
  856. // The transition from a digit character to a letter can be the
  857. // start of a new scheme URL match
  858. if ((0, uri_utils_1.isSchemeStartChar)(charCode)) {
  859. context.addMachine(createSchemeUrlStateMachine(charIdx, 0 /* State.SchemeChar */));
  860. }
  861. }
  862. }
  863. function statePhoneNumberSeparator(context, stateMachine, char, charCode) {
  864. if ((0, char_utils_1.isDigitChar)(charCode)) {
  865. stateMachine.state = 38 /* State.PhoneNumberDigit */;
  866. }
  867. else if (charCode === 40 /* Char.OpenParen */ /* '(' */) {
  868. stateMachine.state = 32 /* State.PhoneNumberOpenParen */;
  869. }
  870. else {
  871. captureMatchIfValidAndRemove(context, stateMachine);
  872. // This character may start a new match. Add states for it
  873. stateNoMatch(context, char, charCode);
  874. }
  875. }
  876. // The ";" characters is "wait" in a phone number
  877. // The "," characters is "pause" in a phone number
  878. function statePhoneNumberControlChar(context, stateMachine, charCode) {
  879. if ((0, phone_number_utils_1.isPhoneNumberControlChar)(charCode)) {
  880. // Stay in the "control char" state
  881. }
  882. else if (charCode === 35 /* Char.NumberSign */ /* '#' */) {
  883. stateMachine.state = 41 /* State.PhoneNumberPoundChar */;
  884. }
  885. else if ((0, char_utils_1.isDigitChar)(charCode)) {
  886. stateMachine.state = 38 /* State.PhoneNumberDigit */;
  887. }
  888. else {
  889. captureMatchIfValidAndRemove(context, stateMachine);
  890. }
  891. }
  892. // The "#" characters is "pound" in a phone number
  893. function statePhoneNumberPoundChar(context, stateMachine, charCode) {
  894. if ((0, phone_number_utils_1.isPhoneNumberControlChar)(charCode)) {
  895. stateMachine.state = 40 /* State.PhoneNumberControlChar */;
  896. }
  897. else if ((0, char_utils_1.isDigitChar)(charCode)) {
  898. // According to some of the older tests, if there's a digit
  899. // after a '#' sign, the match is invalid. TODO: Revisit if this is true
  900. context.removeMachine(stateMachine);
  901. }
  902. else {
  903. captureMatchIfValidAndRemove(context, stateMachine);
  904. }
  905. }
  906. /*
  907. * Captures a match if it is valid (i.e. has a full domain name for a
  908. * TLD match). If a match is not valid, it is possible that we want to
  909. * keep reading characters in order to make a full match.
  910. */
  911. function captureMatchIfValidAndRemove(context, stateMachine) {
  912. var matches = context.matches, text = context.text, charIdx = context.charIdx, tagBuilder = context.tagBuilder, stripPrefix = context.stripPrefix, stripTrailingSlash = context.stripTrailingSlash, decodePercentEncoding = context.decodePercentEncoding, hashtagServiceName = context.hashtagServiceName, mentionServiceName = context.mentionServiceName;
  913. // Remove the state machine first. There are a number of code paths
  914. // which return out of this function early, so make sure we have
  915. // this done
  916. context.removeMachine(stateMachine);
  917. // Make sure the state machine being checked has actually reached an
  918. // "accept" state. If it hasn't reach one, it can't be a match
  919. if (!stateMachine.acceptStateReached) {
  920. return;
  921. }
  922. var startIdx = stateMachine.startIdx;
  923. var matchedText = text.slice(stateMachine.startIdx, charIdx);
  924. // Handle any unbalanced braces (parens, square brackets, or curly
  925. // brackets) inside the URL. This handles situations like:
  926. // The link (google.com)
  927. // and
  928. // Check out this link here (en.wikipedia.org/wiki/IANA_(disambiguation))
  929. //
  930. // And also remove any punctuation chars at the end such as:
  931. // '?', ',', ':', '.', etc.
  932. matchedText = excludeUnbalancedTrailingBracesAndPunctuation(matchedText);
  933. switch (stateMachine.type) {
  934. case 0 /* StateMachineType.Url */: {
  935. // We don't want to accidentally match a URL that is preceded by an
  936. // '@' character, which would be an email address
  937. var charBeforeUrlMatch = text.charCodeAt(stateMachine.startIdx - 1);
  938. if (charBeforeUrlMatch === 64 /* Char.AtSign */ /* '@' */) {
  939. return;
  940. }
  941. switch (stateMachine.matchType) {
  942. case 0 /* UrlStateMachineMatchType.Scheme */: {
  943. // Autolinker accepts many characters in a url's scheme (like `fake://test.com`).
  944. // However, in cases where a URL is missing whitespace before an obvious link,
  945. // (for example: `nowhitespacehttp://www.test.com`), we only want the match to start
  946. // at the http:// part. We will check if the match contains a common scheme and then
  947. // shift the match to start from there.
  948. var httpSchemeMatch = uri_utils_1.httpSchemeRe.exec(matchedText);
  949. if (httpSchemeMatch) {
  950. // If we found an overmatched URL, we want to find the index
  951. // of where the match should start and shift the match to
  952. // start from the beginning of the common scheme
  953. startIdx = startIdx + httpSchemeMatch.index;
  954. matchedText = matchedText.slice(httpSchemeMatch.index);
  955. }
  956. if (!(0, uri_utils_1.isValidSchemeUrl)(matchedText)) {
  957. return; // not a valid match
  958. }
  959. break;
  960. }
  961. case 1 /* UrlStateMachineMatchType.Tld */: {
  962. if (!(0, uri_utils_1.isValidTldMatch)(matchedText)) {
  963. return; // not a valid match
  964. }
  965. break;
  966. }
  967. case 2 /* UrlStateMachineMatchType.IpV4 */: {
  968. if (!(0, uri_utils_1.isValidIpV4Address)(matchedText)) {
  969. return; // not a valid match
  970. }
  971. break;
  972. }
  973. /* istanbul ignore next */
  974. default:
  975. (0, utils_1.assertNever)(stateMachine);
  976. }
  977. matches.push(new url_match_1.UrlMatch({
  978. tagBuilder: tagBuilder,
  979. matchedText: matchedText,
  980. offset: startIdx,
  981. urlMatchType: toUrlMatchType(stateMachine.matchType),
  982. url: matchedText,
  983. protocolRelativeMatch: matchedText.slice(0, 2) === '//',
  984. // TODO: Do these settings need to be passed to the match,
  985. // or should we handle them here in UrlMatcher?
  986. stripPrefix: stripPrefix,
  987. stripTrailingSlash: stripTrailingSlash,
  988. decodePercentEncoding: decodePercentEncoding,
  989. }));
  990. break;
  991. }
  992. case 1 /* StateMachineType.Email */: {
  993. // if the email address has a valid TLD, add it to the list of matches
  994. if ((0, email_utils_1.isValidEmail)(matchedText)) {
  995. matches.push(new email_match_1.EmailMatch({
  996. tagBuilder: tagBuilder,
  997. matchedText: matchedText,
  998. offset: startIdx,
  999. email: matchedText.replace(email_utils_1.mailtoSchemePrefixRe, ''),
  1000. }));
  1001. }
  1002. break;
  1003. }
  1004. case 2 /* StateMachineType.Hashtag */: {
  1005. if ((0, hashtag_utils_1.isValidHashtag)(matchedText)) {
  1006. matches.push(new hashtag_match_1.HashtagMatch({
  1007. tagBuilder: tagBuilder,
  1008. matchedText: matchedText,
  1009. offset: startIdx,
  1010. serviceName: hashtagServiceName,
  1011. hashtag: matchedText.slice(1),
  1012. }));
  1013. }
  1014. break;
  1015. }
  1016. case 3 /* StateMachineType.Mention */: {
  1017. if ((0, mention_utils_1.isValidMention)(matchedText, mentionServiceName)) {
  1018. matches.push(new mention_match_1.MentionMatch({
  1019. tagBuilder: tagBuilder,
  1020. matchedText: matchedText,
  1021. offset: startIdx,
  1022. serviceName: mentionServiceName,
  1023. mention: matchedText.slice(1), // strip off the '@' character at the beginning
  1024. }));
  1025. }
  1026. break;
  1027. }
  1028. case 4 /* StateMachineType.Phone */: {
  1029. // remove any trailing spaces that were considered as "separator"
  1030. // chars by the state machine
  1031. matchedText = matchedText.replace(/ +$/g, '');
  1032. if ((0, phone_number_utils_1.isValidPhoneNumber)(matchedText)) {
  1033. var cleanNumber = matchedText.replace(/[^0-9,;#]/g, ''); // strip out non-digit characters exclude comma semicolon and #
  1034. matches.push(new phone_match_1.PhoneMatch({
  1035. tagBuilder: tagBuilder,
  1036. matchedText: matchedText,
  1037. offset: startIdx,
  1038. number: cleanNumber,
  1039. plusSign: matchedText.charAt(0) === '+',
  1040. }));
  1041. }
  1042. break;
  1043. }
  1044. /* istanbul ignore next */
  1045. default:
  1046. (0, utils_1.assertNever)(stateMachine);
  1047. }
  1048. }
  1049. /**
  1050. * Helper function to convert a UrlStateMachineMatchType value to its
  1051. * UrlMatchType equivalent.
  1052. */
  1053. function toUrlMatchType(stateMachineMatchType) {
  1054. switch (stateMachineMatchType) {
  1055. case 0 /* UrlStateMachineMatchType.Scheme */:
  1056. return 'scheme';
  1057. case 1 /* UrlStateMachineMatchType.Tld */:
  1058. return 'tld';
  1059. case 2 /* UrlStateMachineMatchType.IpV4 */:
  1060. return 'ipV4';
  1061. /* istanbul ignore next */
  1062. default:
  1063. (0, utils_1.assertNever)(stateMachineMatchType);
  1064. }
  1065. }
  1066. var oppositeBrace = {
  1067. ')': '(',
  1068. '}': '{',
  1069. ']': '[',
  1070. };
  1071. /**
  1072. * Determines if a match found has unmatched closing parenthesis,
  1073. * square brackets or curly brackets. If so, these unbalanced symbol(s) will be
  1074. * removed from the URL match itself.
  1075. *
  1076. * A match may have an extra closing parenthesis/square brackets/curly brackets
  1077. * at the end of the match because these are valid URL path characters. For
  1078. * example, "wikipedia.com/something_(disambiguation)" should be auto-linked.
  1079. *
  1080. * However, an extra parenthesis *will* be included when the URL itself is
  1081. * wrapped in parenthesis, such as in the case of:
  1082. *
  1083. * "(wikipedia.com/something_(disambiguation))"
  1084. *
  1085. * In this case, the last closing parenthesis should *not* be part of the
  1086. * URL itself, and this method will exclude it from the returned URL.
  1087. *
  1088. * For square brackets in URLs such as in PHP arrays, the same behavior as
  1089. * parenthesis discussed above should happen:
  1090. *
  1091. * "[http://www.example.com/foo.php?bar[]=1&bar[]=2&bar[]=3]"
  1092. *
  1093. * The very last closing square bracket should not be part of the URL itself,
  1094. * and therefore this method will remove it.
  1095. *
  1096. * @param matchedText The full matched URL/email/hashtag/etc. from the state
  1097. * machine parser.
  1098. * @return The updated matched text with extraneous suffix characters removed.
  1099. */
  1100. function excludeUnbalancedTrailingBracesAndPunctuation(matchedText) {
  1101. var braceCounts = {
  1102. '(': 0,
  1103. '{': 0,
  1104. '[': 0,
  1105. };
  1106. for (var i = 0; i < matchedText.length; i++) {
  1107. var char = matchedText.charAt(i);
  1108. var charCode = matchedText.charCodeAt(i);
  1109. if ((0, char_utils_1.isOpenBraceChar)(charCode)) {
  1110. braceCounts[char]++;
  1111. }
  1112. else if ((0, char_utils_1.isCloseBraceChar)(charCode)) {
  1113. braceCounts[oppositeBrace[char]]--;
  1114. }
  1115. }
  1116. var endIdx = matchedText.length - 1;
  1117. while (endIdx >= 0) {
  1118. var char = matchedText.charAt(endIdx);
  1119. var charCode = matchedText.charCodeAt(endIdx);
  1120. if ((0, char_utils_1.isCloseBraceChar)(charCode)) {
  1121. var oppositeBraceChar = oppositeBrace[char];
  1122. if (braceCounts[oppositeBraceChar] < 0) {
  1123. braceCounts[oppositeBraceChar]++;
  1124. endIdx--;
  1125. }
  1126. else {
  1127. break;
  1128. }
  1129. }
  1130. else if ((0, char_utils_1.isUrlSuffixNotAllowedAsFinalChar)(charCode)) {
  1131. // Walk back a punctuation char like '?', ',', ':', '.', etc.
  1132. endIdx--;
  1133. }
  1134. else {
  1135. break;
  1136. }
  1137. }
  1138. return matchedText.slice(0, endIdx + 1);
  1139. }
  1140. function createSchemeUrlStateMachine(startIdx, state) {
  1141. return {
  1142. type: 0 /* StateMachineType.Url */,
  1143. startIdx: startIdx,
  1144. state: state,
  1145. acceptStateReached: false,
  1146. matchType: 0 /* UrlStateMachineMatchType.Scheme */,
  1147. };
  1148. }
  1149. function createTldUrlStateMachine(startIdx, state) {
  1150. return {
  1151. type: 0 /* StateMachineType.Url */,
  1152. startIdx: startIdx,
  1153. state: state,
  1154. acceptStateReached: false,
  1155. matchType: 1 /* UrlStateMachineMatchType.Tld */,
  1156. };
  1157. }
  1158. function createIpV4UrlStateMachine(startIdx, state) {
  1159. return {
  1160. type: 0 /* StateMachineType.Url */,
  1161. startIdx: startIdx,
  1162. state: state,
  1163. acceptStateReached: false,
  1164. matchType: 2 /* UrlStateMachineMatchType.IpV4 */,
  1165. octetsEncountered: 1, // starts at 1 because we create this machine when encountering the first octet
  1166. };
  1167. }
  1168. function createEmailStateMachine(startIdx, state) {
  1169. return {
  1170. type: 1 /* StateMachineType.Email */,
  1171. startIdx: startIdx,
  1172. state: state,
  1173. acceptStateReached: false,
  1174. };
  1175. }
  1176. function createHashtagStateMachine(startIdx, state) {
  1177. return {
  1178. type: 2 /* StateMachineType.Hashtag */,
  1179. startIdx: startIdx,
  1180. state: state,
  1181. acceptStateReached: false,
  1182. };
  1183. }
  1184. function createMentionStateMachine(startIdx, state) {
  1185. return {
  1186. type: 3 /* StateMachineType.Mention */,
  1187. startIdx: startIdx,
  1188. state: state,
  1189. acceptStateReached: false,
  1190. };
  1191. }
  1192. function createPhoneNumberStateMachine(startIdx, state) {
  1193. return {
  1194. type: 4 /* StateMachineType.Phone */,
  1195. startIdx: startIdx,
  1196. state: state,
  1197. acceptStateReached: false,
  1198. };
  1199. }
  1200. function isSchemeUrlStateMachine(machine) {
  1201. return (machine.type === 0 /* StateMachineType.Url */ &&
  1202. machine.matchType === 0 /* UrlStateMachineMatchType.Scheme */);
  1203. }
  1204. //# sourceMappingURL=parse-matches.js.map