智慧水务管理系统 - 精河县供水工程综合管理平台

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681
  1. "use strict";
  2. Object.defineProperty(exports, "__esModule", { value: true });
  3. exports.parseHtml = parseHtml;
  4. var tslib_1 = require("tslib");
  5. var char_utils_1 = require("../char-utils");
  6. var utils_1 = require("../utils");
  7. // For debugging: search for other "For debugging" lines
  8. // import CliTable from 'cli-table';
  9. var CurrentTag = /** @class */ (function () {
  10. function CurrentTag(cfg) {
  11. if (cfg === void 0) { cfg = {}; }
  12. this.idx = cfg.idx !== undefined ? cfg.idx : -1;
  13. this.type = cfg.type || 'tag';
  14. this.name = cfg.name || '';
  15. this.isOpening = !!cfg.isOpening;
  16. this.isClosing = !!cfg.isClosing;
  17. }
  18. return CurrentTag;
  19. }());
  20. var noCurrentTag = new CurrentTag(); // shared reference for when there is no current tag currently being read
  21. /**
  22. * Context object containing all the state needed by the HTML parsing state
  23. * machine function.
  24. *
  25. * ## Historical note
  26. *
  27. * In v4.1.5, we used nested functions to handle the context via closures, but
  28. * this necessitated re-creating the functions for each call to `parseHtml()`,
  29. * which made them difficult for v8 to JIT optimize. In v4.1.6, we lifted all of
  30. * the functions to the top-level scope and passed the context object between
  31. * them, which allows the functions to be JIT compiled once and reused.
  32. */
  33. var ParseHtmlContext = /** @class */ (function () {
  34. function ParseHtmlContext(html, callbacks) {
  35. this.charIdx = 0; // Current character index being processed
  36. this.state = 0 /* State.Data */; // begin in the Data state
  37. this.currentDataIdx = 0; // where the current data start index is
  38. this.currentTag = noCurrentTag; // describes the current tag that is being read
  39. this.html = html;
  40. this.callbacks = callbacks;
  41. }
  42. return ParseHtmlContext;
  43. }());
  44. /**
  45. * Parses an HTML string, calling the callbacks to notify of tags and text.
  46. *
  47. * ## History
  48. *
  49. * This file previously used a regular expression to find html tags in the input
  50. * text. Unfortunately, we ran into a bunch of catastrophic backtracking issues
  51. * with certain input text, causing Autolinker to either hang or just take a
  52. * really long time to parse the string.
  53. *
  54. * The current code is intended to be a O(n) algorithm that walks through
  55. * the string in one pass, and tries to be as cheap as possible. We don't need
  56. * to implement the full HTML spec, but rather simply determine where the string
  57. * looks like an HTML tag, and where it looks like text (so that we can autolink
  58. * that).
  59. *
  60. * This state machine parser is intended just to be a simple but performant
  61. * parser of HTML for the subset of requirements we have. We simply need to:
  62. *
  63. * 1. Determine where HTML tags are
  64. * 2. Determine the tag name (Autolinker specifically only cares about <a>,
  65. * <script>, and <style> tags, so as not to link any text within them)
  66. *
  67. * We don't need to:
  68. *
  69. * 1. Create a parse tree
  70. * 2. Auto-close tags with invalid markup
  71. * 3. etc.
  72. *
  73. * The other intention behind this is that we didn't want to add external
  74. * dependencies on the Autolinker utility which would increase its size. For
  75. * instance, adding htmlparser2 adds 125kb to the minified output file,
  76. * increasing its final size from 47kb to 172kb (at the time of writing). It
  77. * also doesn't work exactly correctly, treating the string "<3 blah blah blah"
  78. * as an HTML tag.
  79. *
  80. * Reference for HTML spec:
  81. *
  82. * https://www.w3.org/TR/html51/syntax.html#sec-tokenization
  83. *
  84. * @param {String} html The HTML to parse
  85. * @param {Object} callbacks
  86. * @param {Function} callbacks.onOpenTag Callback function to call when an open
  87. * tag is parsed. Called with the tagName as its argument.
  88. * @param {Function} callbacks.onCloseTag Callback function to call when a close
  89. * tag is parsed. Called with the tagName as its argument. If a self-closing
  90. * tag is found, `onCloseTag` is called immediately after `onOpenTag`.
  91. * @param {Function} callbacks.onText Callback function to call when text (i.e
  92. * not an HTML tag) is parsed. Called with the text (string) as its first
  93. * argument, and offset (number) into the string as its second.
  94. */
  95. function parseHtml(html, callbacks) {
  96. var context = new ParseHtmlContext(html, callbacks);
  97. // For debugging: search for other "For debugging" lines
  98. // const table = new CliTable( {
  99. // head: [ 'charIdx', 'char', 'state', 'currentDataIdx', 'currentOpenTagIdx', 'tag.type' ]
  100. // } );
  101. var len = html.length;
  102. while (context.charIdx < len) {
  103. var char = html.charAt(context.charIdx);
  104. var charCode = html.charCodeAt(context.charIdx);
  105. // For debugging: search for other "For debugging" lines
  106. // ALSO: Temporarily remove the 'const' keyword on the State enum
  107. // table.push([
  108. // String(charIdx),
  109. // char,
  110. // State[state],
  111. // String(currentDataIdx),
  112. // String(currentTag.idx),
  113. // currentTag.idx === -1 ? '' : currentTag.type
  114. // ]);
  115. switch (context.state) {
  116. case 0 /* State.Data */:
  117. stateData(context, char);
  118. break;
  119. case 1 /* State.TagOpen */:
  120. stateTagOpen(context, char, charCode);
  121. break;
  122. case 2 /* State.EndTagOpen */:
  123. stateEndTagOpen(context, char, charCode);
  124. break;
  125. case 3 /* State.TagName */:
  126. stateTagName(context, char, charCode);
  127. break;
  128. case 4 /* State.BeforeAttributeName */:
  129. stateBeforeAttributeName(context, char, charCode);
  130. break;
  131. case 5 /* State.AttributeName */:
  132. stateAttributeName(context, char, charCode);
  133. break;
  134. case 6 /* State.AfterAttributeName */:
  135. stateAfterAttributeName(context, char, charCode);
  136. break;
  137. case 7 /* State.BeforeAttributeValue */:
  138. stateBeforeAttributeValue(context, char, charCode);
  139. break;
  140. case 8 /* State.AttributeValueDoubleQuoted */:
  141. stateAttributeValueDoubleQuoted(context, char);
  142. break;
  143. case 9 /* State.AttributeValueSingleQuoted */:
  144. stateAttributeValueSingleQuoted(context, char);
  145. break;
  146. case 10 /* State.AttributeValueUnquoted */:
  147. stateAttributeValueUnquoted(context, char, charCode);
  148. break;
  149. case 11 /* State.AfterAttributeValueQuoted */:
  150. stateAfterAttributeValueQuoted(context, char, charCode);
  151. break;
  152. case 12 /* State.SelfClosingStartTag */:
  153. stateSelfClosingStartTag(context, char);
  154. break;
  155. case 13 /* State.MarkupDeclarationOpenState */:
  156. stateMarkupDeclarationOpen(context);
  157. break;
  158. case 14 /* State.CommentStart */:
  159. stateCommentStart(context, char);
  160. break;
  161. case 15 /* State.CommentStartDash */:
  162. stateCommentStartDash(context, char);
  163. break;
  164. case 16 /* State.Comment */:
  165. stateComment(context, char);
  166. break;
  167. case 17 /* State.CommentEndDash */:
  168. stateCommentEndDash(context, char);
  169. break;
  170. case 18 /* State.CommentEnd */:
  171. stateCommentEnd(context, char);
  172. break;
  173. case 19 /* State.CommentEndBang */:
  174. stateCommentEndBang(context, char);
  175. break;
  176. case 20 /* State.Doctype */:
  177. stateDoctype(context, char);
  178. break;
  179. /* istanbul ignore next */
  180. default:
  181. (0, utils_1.assertNever)(context.state);
  182. }
  183. // For debugging: search for other "For debugging" lines
  184. // ALSO: Temporarily remove the 'const' keyword on the State enum
  185. // table.push([
  186. // String(context.charIdx),
  187. // char,
  188. // State[context.state],
  189. // String(context.currentDataIdx),
  190. // String(context.currentTag.idx),
  191. // context.currentTag.idx === -1 ? '' : context.currentTag.type
  192. // ]);
  193. context.charIdx++;
  194. }
  195. if (context.currentDataIdx < context.charIdx) {
  196. emitText(context);
  197. }
  198. // For debugging: search for other "For debugging" lines
  199. // console.log( '\n' + table.toString() );
  200. }
  201. // Called when non-tags are being read (i.e. the text around HTML †ags)
  202. // https://www.w3.org/TR/html51/syntax.html#data-state
  203. function stateData(context, char) {
  204. if (char === '<') {
  205. startNewTag(context);
  206. }
  207. }
  208. // Called after a '<' is read from the Data state
  209. // https://www.w3.org/TR/html51/syntax.html#tag-open-state
  210. function stateTagOpen(context, char, charCode) {
  211. if (char === '!') {
  212. context.state = 13 /* State.MarkupDeclarationOpenState */;
  213. }
  214. else if (char === '/') {
  215. context.state = 2 /* State.EndTagOpen */;
  216. context.currentTag = new CurrentTag(tslib_1.__assign(tslib_1.__assign({}, context.currentTag), { isClosing: true }));
  217. }
  218. else if (char === '<') {
  219. // start of another tag (ignore the previous, incomplete one)
  220. startNewTag(context);
  221. }
  222. else if ((0, char_utils_1.isAsciiLetterChar)(charCode)) {
  223. // tag name start (and no '/' read)
  224. context.state = 3 /* State.TagName */;
  225. context.currentTag = new CurrentTag(tslib_1.__assign(tslib_1.__assign({}, context.currentTag), { isOpening: true }));
  226. }
  227. else {
  228. // Any other
  229. context.state = 0 /* State.Data */;
  230. context.currentTag = noCurrentTag;
  231. }
  232. }
  233. // After a '<x', '</x' sequence is read (where 'x' is a letter character),
  234. // this is to continue reading the tag name
  235. // https://www.w3.org/TR/html51/syntax.html#tag-name-state
  236. function stateTagName(context, char, charCode) {
  237. if ((0, char_utils_1.isWhitespaceChar)(charCode)) {
  238. context.currentTag = new CurrentTag(tslib_1.__assign(tslib_1.__assign({}, context.currentTag), { name: captureTagName(context) }));
  239. context.state = 4 /* State.BeforeAttributeName */;
  240. }
  241. else if (char === '<') {
  242. // start of another tag (ignore the previous, incomplete one)
  243. startNewTag(context);
  244. }
  245. else if (char === '/') {
  246. context.currentTag = new CurrentTag(tslib_1.__assign(tslib_1.__assign({}, context.currentTag), { name: captureTagName(context) }));
  247. context.state = 12 /* State.SelfClosingStartTag */;
  248. }
  249. else if (char === '>') {
  250. context.currentTag = new CurrentTag(tslib_1.__assign(tslib_1.__assign({}, context.currentTag), { name: captureTagName(context) }));
  251. emitTagAndPreviousTextNode(context); // resets to Data state as well
  252. }
  253. else if (!(0, char_utils_1.isAsciiLetterChar)(charCode) && !(0, char_utils_1.isDigitChar)(charCode) && char !== ':') {
  254. // Anything else that does not form an html tag. Note: the colon
  255. // character is accepted for XML namespaced tags
  256. resetToDataState(context);
  257. }
  258. else {
  259. // continue reading tag name
  260. }
  261. }
  262. // Called after the '/' is read from a '</' sequence
  263. // https://www.w3.org/TR/html51/syntax.html#end-tag-open-state
  264. function stateEndTagOpen(context, char, charCode) {
  265. if (char === '>') {
  266. // parse error. Encountered "</>". Skip it without treating as a tag
  267. resetToDataState(context);
  268. }
  269. else if ((0, char_utils_1.isAsciiLetterChar)(charCode)) {
  270. context.state = 3 /* State.TagName */;
  271. }
  272. else {
  273. // some other non-tag-like character, don't treat this as a tag
  274. resetToDataState(context);
  275. }
  276. }
  277. // https://www.w3.org/TR/html51/syntax.html#before-attribute-name-state
  278. function stateBeforeAttributeName(context, char, charCode) {
  279. if ((0, char_utils_1.isWhitespaceChar)(charCode)) {
  280. // stay in BeforeAttributeName state - continue reading chars
  281. }
  282. else if (char === '/') {
  283. context.state = 12 /* State.SelfClosingStartTag */;
  284. }
  285. else if (char === '>') {
  286. emitTagAndPreviousTextNode(context); // resets to Data state as well
  287. }
  288. else if (char === '<') {
  289. // start of another tag (ignore the previous, incomplete one)
  290. startNewTag(context);
  291. }
  292. else if (char === "=" || (0, char_utils_1.isQuoteChar)(charCode) || (0, char_utils_1.isControlChar)(charCode)) {
  293. // "Parse error" characters that, according to the spec, should be
  294. // appended to the attribute name, but we'll treat these characters
  295. // as not forming a real HTML tag
  296. resetToDataState(context);
  297. }
  298. else {
  299. // Any other char, start of a new attribute name
  300. context.state = 5 /* State.AttributeName */;
  301. }
  302. }
  303. // https://www.w3.org/TR/html51/syntax.html#attribute-name-state
  304. function stateAttributeName(context, char, charCode) {
  305. if ((0, char_utils_1.isWhitespaceChar)(charCode)) {
  306. context.state = 6 /* State.AfterAttributeName */;
  307. }
  308. else if (char === '/') {
  309. context.state = 12 /* State.SelfClosingStartTag */;
  310. }
  311. else if (char === '=') {
  312. context.state = 7 /* State.BeforeAttributeValue */;
  313. }
  314. else if (char === '>') {
  315. emitTagAndPreviousTextNode(context); // resets to Data state as well
  316. }
  317. else if (char === '<') {
  318. // start of another tag (ignore the previous, incomplete one)
  319. startNewTag(context);
  320. }
  321. else if ((0, char_utils_1.isQuoteChar)(charCode)) {
  322. // "Parse error" characters that, according to the spec, should be
  323. // appended to the attribute name, but we'll treat these characters
  324. // as not forming a real HTML tag
  325. resetToDataState(context);
  326. }
  327. else {
  328. // anything else: continue reading attribute name
  329. }
  330. }
  331. // https://www.w3.org/TR/html51/syntax.html#after-attribute-name-state
  332. function stateAfterAttributeName(context, char, charCode) {
  333. if ((0, char_utils_1.isWhitespaceChar)(charCode)) {
  334. // ignore the character - continue reading
  335. }
  336. else if (char === '/') {
  337. context.state = 12 /* State.SelfClosingStartTag */;
  338. }
  339. else if (char === '=') {
  340. context.state = 7 /* State.BeforeAttributeValue */;
  341. }
  342. else if (char === '>') {
  343. emitTagAndPreviousTextNode(context);
  344. }
  345. else if (char === '<') {
  346. // start of another tag (ignore the previous, incomplete one)
  347. startNewTag(context);
  348. }
  349. else if ((0, char_utils_1.isQuoteChar)(charCode)) {
  350. // "Parse error" characters that, according to the spec, should be
  351. // appended to the attribute name, but we'll treat these characters
  352. // as not forming a real HTML tag
  353. resetToDataState(context);
  354. }
  355. else {
  356. // Any other character, start a new attribute in the current tag
  357. context.state = 5 /* State.AttributeName */;
  358. }
  359. }
  360. // https://www.w3.org/TR/html51/syntax.html#before-attribute-value-state
  361. function stateBeforeAttributeValue(context, char, charCode) {
  362. if ((0, char_utils_1.isWhitespaceChar)(charCode)) {
  363. // ignore the character - continue reading
  364. }
  365. else if (char === "\"") {
  366. context.state = 8 /* State.AttributeValueDoubleQuoted */;
  367. }
  368. else if (char === "'") {
  369. context.state = 9 /* State.AttributeValueSingleQuoted */;
  370. }
  371. else if (/[>=`]/.test(char)) {
  372. // Invalid chars after an '=' for an attribute value, don't count
  373. // the current tag as an HTML tag
  374. resetToDataState(context);
  375. }
  376. else if (char === '<') {
  377. // start of another tag (ignore the previous, incomplete one)
  378. startNewTag(context);
  379. }
  380. else {
  381. // Any other character, consider it an unquoted attribute value
  382. context.state = 10 /* State.AttributeValueUnquoted */;
  383. }
  384. }
  385. // https://www.w3.org/TR/html51/syntax.html#attribute-value-double-quoted-state
  386. function stateAttributeValueDoubleQuoted(context, char) {
  387. if (char === "\"") {
  388. // end the current double-quoted attribute
  389. context.state = 11 /* State.AfterAttributeValueQuoted */;
  390. }
  391. else {
  392. // consume the character as part of the double-quoted attribute value
  393. }
  394. }
  395. // https://www.w3.org/TR/html51/syntax.html#attribute-value-single-quoted-state
  396. function stateAttributeValueSingleQuoted(context, char) {
  397. if (char === "'") {
  398. // end the current single-quoted attribute
  399. context.state = 11 /* State.AfterAttributeValueQuoted */;
  400. }
  401. else {
  402. // consume the character as part of the double-quoted attribute value
  403. }
  404. }
  405. // https://www.w3.org/TR/html51/syntax.html#attribute-value-unquoted-state
  406. function stateAttributeValueUnquoted(context, char, charCode) {
  407. if ((0, char_utils_1.isWhitespaceChar)(charCode)) {
  408. context.state = 4 /* State.BeforeAttributeName */;
  409. }
  410. else if (char === '>') {
  411. emitTagAndPreviousTextNode(context);
  412. }
  413. else if (char === '<') {
  414. // start of another tag (ignore the previous, incomplete one)
  415. startNewTag(context);
  416. }
  417. else {
  418. // Any other character, treat it as part of the attribute value
  419. }
  420. }
  421. // Called after a double-quoted or single-quoted attribute value is read
  422. // (i.e. after the closing quote character)
  423. // https://www.w3.org/TR/html51/syntax.html#after-attribute-value-quoted-state
  424. function stateAfterAttributeValueQuoted(context, char, charCode) {
  425. if ((0, char_utils_1.isWhitespaceChar)(charCode)) {
  426. context.state = 4 /* State.BeforeAttributeName */;
  427. }
  428. else if (char === '/') {
  429. context.state = 12 /* State.SelfClosingStartTag */;
  430. }
  431. else if (char === '>') {
  432. emitTagAndPreviousTextNode(context);
  433. }
  434. else if (char === '<') {
  435. // start of another tag (ignore the previous, incomplete one)
  436. startNewTag(context);
  437. }
  438. else {
  439. // Any other character, "parse error". Spec says to switch to the
  440. // BeforeAttributeState and re-consume the character, as it may be
  441. // the start of a new attribute name
  442. context.state = 4 /* State.BeforeAttributeName */;
  443. reconsumeCurrentChar(context);
  444. }
  445. }
  446. // A '/' has just been read in the current tag (presumably for '/>'), and
  447. // this handles the next character
  448. // https://www.w3.org/TR/html51/syntax.html#self-closing-start-tag-state
  449. function stateSelfClosingStartTag(context, char) {
  450. if (char === '>') {
  451. context.currentTag = new CurrentTag(tslib_1.__assign(tslib_1.__assign({}, context.currentTag), { isClosing: true }));
  452. emitTagAndPreviousTextNode(context); // resets to Data state as well
  453. }
  454. else {
  455. // Note: the spec calls for a character after a '/' within a start
  456. // tag to go back into the BeforeAttributeName state (in order to
  457. // read more attributes, but for the purposes of Autolinker, this is
  458. // most likely not a valid HTML tag. For example: "<something / other>"
  459. // state = State.BeforeAttributeName;
  460. // Instead, just treat as regular text
  461. resetToDataState(context);
  462. }
  463. }
  464. // https://www.w3.org/TR/html51/syntax.html#markup-declaration-open-state
  465. // (HTML Comments or !DOCTYPE)
  466. function stateMarkupDeclarationOpen(context) {
  467. var html = context.html, charIdx = context.charIdx;
  468. if (html.slice(charIdx, charIdx + 2) === '--') {
  469. // html comment
  470. context.charIdx++; // "consume" the second '-' character. Next loop iteration will consume the character after the '<!--' sequence
  471. context.currentTag = new CurrentTag(tslib_1.__assign(tslib_1.__assign({}, context.currentTag), { type: 'comment' }));
  472. context.state = 14 /* State.CommentStart */;
  473. }
  474. else if (html.slice(charIdx, charIdx + 7).toUpperCase() === 'DOCTYPE') {
  475. context.charIdx += 6; // "consume" the characters "OCTYPE" (the current loop iteraction consumed the 'D'). Next loop iteration will consume the character after the '<!DOCTYPE' sequence
  476. context.currentTag = new CurrentTag(tslib_1.__assign(tslib_1.__assign({}, context.currentTag), { type: 'doctype' }));
  477. context.state = 20 /* State.Doctype */;
  478. }
  479. else {
  480. // At this point, the spec specifies that the state machine should
  481. // enter the "bogus comment" state, in which case any character(s)
  482. // after the '<!' that were read should become an HTML comment up
  483. // until the first '>' that is read (or EOF). Instead, we'll assume
  484. // that a user just typed '<!' as part of some piece of non-html
  485. // text
  486. resetToDataState(context);
  487. }
  488. }
  489. // Handles after the sequence '<!--' has been read
  490. // https://www.w3.org/TR/html51/syntax.html#comment-start-state
  491. function stateCommentStart(context, char) {
  492. if (char === '-') {
  493. // We've read the sequence '<!---' at this point (3 dashes)
  494. context.state = 15 /* State.CommentStartDash */;
  495. }
  496. else if (char === '>') {
  497. // At this point, we'll assume the comment wasn't a real comment
  498. // so we'll just emit it as data. We basically read the sequence
  499. // '<!-->'
  500. resetToDataState(context);
  501. }
  502. else {
  503. // Any other char, take it as part of the comment
  504. context.state = 16 /* State.Comment */;
  505. }
  506. }
  507. // We've read the sequence '<!---' at this point (3 dashes)
  508. // https://www.w3.org/TR/html51/syntax.html#comment-start-dash-state
  509. function stateCommentStartDash(context, char) {
  510. if (char === '-') {
  511. // We've read '<!----' (4 dashes) at this point
  512. context.state = 18 /* State.CommentEnd */;
  513. }
  514. else if (char === '>') {
  515. // At this point, we'll assume the comment wasn't a real comment
  516. // so we'll just emit it as data. We basically read the sequence
  517. // '<!--->'
  518. resetToDataState(context);
  519. }
  520. else {
  521. // Anything else, take it as a valid comment
  522. context.state = 16 /* State.Comment */;
  523. }
  524. }
  525. // Currently reading the comment's text (data)
  526. // https://www.w3.org/TR/html51/syntax.html#comment-state
  527. function stateComment(context, char) {
  528. if (char === '-') {
  529. context.state = 17 /* State.CommentEndDash */;
  530. }
  531. else {
  532. // Any other character, stay in the Comment state
  533. }
  534. }
  535. // When we we've read the first dash inside a comment, it may signal the
  536. // end of the comment if we read another dash
  537. // https://www.w3.org/TR/html51/syntax.html#comment-end-dash-state
  538. function stateCommentEndDash(context, char) {
  539. if (char === '-') {
  540. context.state = 18 /* State.CommentEnd */;
  541. }
  542. else {
  543. // Wasn't a dash, must still be part of the comment
  544. context.state = 16 /* State.Comment */;
  545. }
  546. }
  547. // After we've read two dashes inside a comment, it may signal the end of
  548. // the comment if we then read a '>' char
  549. // https://www.w3.org/TR/html51/syntax.html#comment-end-state
  550. function stateCommentEnd(context, char) {
  551. if (char === '>') {
  552. emitTagAndPreviousTextNode(context);
  553. }
  554. else if (char === '!') {
  555. context.state = 19 /* State.CommentEndBang */;
  556. }
  557. else if (char === '-') {
  558. // A 3rd '-' has been read: stay in the CommentEnd state
  559. }
  560. else {
  561. // Anything else, switch back to the comment state since we didn't
  562. // read the full "end comment" sequence (i.e. '-->')
  563. context.state = 16 /* State.Comment */;
  564. }
  565. }
  566. // We've read the sequence '--!' inside of a comment
  567. // https://www.w3.org/TR/html51/syntax.html#comment-end-bang-state
  568. function stateCommentEndBang(context, char) {
  569. if (char === '-') {
  570. // We read the sequence '--!-' inside of a comment. The last dash
  571. // could signify that the comment is going to close
  572. context.state = 17 /* State.CommentEndDash */;
  573. }
  574. else if (char === '>') {
  575. // End of comment with the sequence '--!>'
  576. emitTagAndPreviousTextNode(context);
  577. }
  578. else {
  579. // The '--!' was not followed by a '>', continue reading the
  580. // comment's text
  581. context.state = 16 /* State.Comment */;
  582. }
  583. }
  584. /**
  585. * For DOCTYPES in particular, we don't care about the attributes. Just
  586. * advance to the '>' character and emit the tag, unless we find a '<'
  587. * character in which case we'll start a new tag.
  588. *
  589. * Example doctype tag:
  590. * <!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01//EN" "http://www.w3.org/TR/html4/strict.dtd">
  591. *
  592. * Actual spec: https://www.w3.org/TR/html51/syntax.html#doctype-state
  593. */
  594. function stateDoctype(context, char) {
  595. if (char === '>') {
  596. emitTagAndPreviousTextNode(context);
  597. }
  598. else if (char === '<') {
  599. startNewTag(context);
  600. }
  601. else {
  602. // stay in the Doctype state
  603. }
  604. }
  605. /**
  606. * Resets the state back to the Data state, and removes the current tag.
  607. *
  608. * We'll generally run this function whenever a "parse error" is
  609. * encountered, where the current tag that is being read no longer looks
  610. * like a real HTML tag.
  611. */
  612. function resetToDataState(context) {
  613. context.state = 0 /* State.Data */;
  614. context.currentTag = noCurrentTag;
  615. }
  616. /**
  617. * Starts a new HTML tag at the current index, ignoring any previous HTML
  618. * tag that was being read.
  619. *
  620. * We'll generally run this function whenever we read a new '<' character,
  621. * including when we read a '<' character inside of an HTML tag that we were
  622. * previously reading.
  623. */
  624. function startNewTag(context) {
  625. context.state = 1 /* State.TagOpen */;
  626. context.currentTag = new CurrentTag({ idx: context.charIdx });
  627. }
  628. /**
  629. * Once we've decided to emit an open tag, that means we can also emit the
  630. * text node before it.
  631. */
  632. function emitTagAndPreviousTextNode(context) {
  633. var textBeforeTag = context.html.slice(context.currentDataIdx, context.currentTag.idx);
  634. if (textBeforeTag) {
  635. // the html tag was the first element in the html string, or two
  636. // tags next to each other, in which case we should not emit a text
  637. // node
  638. context.callbacks.onText(textBeforeTag, context.currentDataIdx);
  639. }
  640. var currentTag = context.currentTag;
  641. if (currentTag.type === 'comment') {
  642. context.callbacks.onComment(currentTag.idx);
  643. }
  644. else if (currentTag.type === 'doctype') {
  645. context.callbacks.onDoctype(currentTag.idx);
  646. }
  647. else {
  648. if (currentTag.isOpening) {
  649. context.callbacks.onOpenTag(currentTag.name, currentTag.idx);
  650. }
  651. if (currentTag.isClosing) {
  652. // note: self-closing tags will emit both opening and closing
  653. context.callbacks.onCloseTag(currentTag.name, currentTag.idx);
  654. }
  655. }
  656. // Since we just emitted a tag, reset to the data state for the next char
  657. resetToDataState(context);
  658. context.currentDataIdx = context.charIdx + 1;
  659. }
  660. function emitText(context) {
  661. var text = context.html.slice(context.currentDataIdx, context.charIdx);
  662. context.callbacks.onText(text, context.currentDataIdx);
  663. context.currentDataIdx = context.charIdx + 1;
  664. }
  665. /**
  666. * Captures the tag name from the start of the tag to the current character
  667. * index, and converts it to lower case
  668. */
  669. function captureTagName(context) {
  670. var startIdx = context.currentTag.idx + (context.currentTag.isClosing ? 2 : 1);
  671. return context.html.slice(startIdx, context.charIdx).toLowerCase();
  672. }
  673. /**
  674. * Causes the main loop to re-consume the current character, such as after
  675. * encountering a "parse error" that changed state and needs to reconsume
  676. * the same character in that new state.
  677. */
  678. function reconsumeCurrentChar(context) {
  679. context.charIdx--;
  680. }
  681. //# sourceMappingURL=parse-html.js.map