智慧水务管理系统 - 精河县供水工程综合管理平台

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678
  1. import { __assign } from "tslib";
  2. import { isDigitChar, isAsciiLetterChar, isQuoteChar, isWhitespaceChar, isControlChar, } from '../char-utils';
  3. import { assertNever } from '../utils';
  4. // For debugging: search for other "For debugging" lines
  5. // import CliTable from 'cli-table';
  6. var CurrentTag = /** @class */ (function () {
  7. function CurrentTag(cfg) {
  8. if (cfg === void 0) { cfg = {}; }
  9. this.idx = cfg.idx !== undefined ? cfg.idx : -1;
  10. this.type = cfg.type || 'tag';
  11. this.name = cfg.name || '';
  12. this.isOpening = !!cfg.isOpening;
  13. this.isClosing = !!cfg.isClosing;
  14. }
  15. return CurrentTag;
  16. }());
  17. var noCurrentTag = new CurrentTag(); // shared reference for when there is no current tag currently being read
  18. /**
  19. * Context object containing all the state needed by the HTML parsing state
  20. * machine function.
  21. *
  22. * ## Historical note
  23. *
  24. * In v4.1.5, we used nested functions to handle the context via closures, but
  25. * this necessitated re-creating the functions for each call to `parseHtml()`,
  26. * which made them difficult for v8 to JIT optimize. In v4.1.6, we lifted all of
  27. * the functions to the top-level scope and passed the context object between
  28. * them, which allows the functions to be JIT compiled once and reused.
  29. */
  30. var ParseHtmlContext = /** @class */ (function () {
  31. function ParseHtmlContext(html, callbacks) {
  32. this.charIdx = 0; // Current character index being processed
  33. this.state = 0 /* State.Data */; // begin in the Data state
  34. this.currentDataIdx = 0; // where the current data start index is
  35. this.currentTag = noCurrentTag; // describes the current tag that is being read
  36. this.html = html;
  37. this.callbacks = callbacks;
  38. }
  39. return ParseHtmlContext;
  40. }());
  41. /**
  42. * Parses an HTML string, calling the callbacks to notify of tags and text.
  43. *
  44. * ## History
  45. *
  46. * This file previously used a regular expression to find html tags in the input
  47. * text. Unfortunately, we ran into a bunch of catastrophic backtracking issues
  48. * with certain input text, causing Autolinker to either hang or just take a
  49. * really long time to parse the string.
  50. *
  51. * The current code is intended to be a O(n) algorithm that walks through
  52. * the string in one pass, and tries to be as cheap as possible. We don't need
  53. * to implement the full HTML spec, but rather simply determine where the string
  54. * looks like an HTML tag, and where it looks like text (so that we can autolink
  55. * that).
  56. *
  57. * This state machine parser is intended just to be a simple but performant
  58. * parser of HTML for the subset of requirements we have. We simply need to:
  59. *
  60. * 1. Determine where HTML tags are
  61. * 2. Determine the tag name (Autolinker specifically only cares about <a>,
  62. * <script>, and <style> tags, so as not to link any text within them)
  63. *
  64. * We don't need to:
  65. *
  66. * 1. Create a parse tree
  67. * 2. Auto-close tags with invalid markup
  68. * 3. etc.
  69. *
  70. * The other intention behind this is that we didn't want to add external
  71. * dependencies on the Autolinker utility which would increase its size. For
  72. * instance, adding htmlparser2 adds 125kb to the minified output file,
  73. * increasing its final size from 47kb to 172kb (at the time of writing). It
  74. * also doesn't work exactly correctly, treating the string "<3 blah blah blah"
  75. * as an HTML tag.
  76. *
  77. * Reference for HTML spec:
  78. *
  79. * https://www.w3.org/TR/html51/syntax.html#sec-tokenization
  80. *
  81. * @param {String} html The HTML to parse
  82. * @param {Object} callbacks
  83. * @param {Function} callbacks.onOpenTag Callback function to call when an open
  84. * tag is parsed. Called with the tagName as its argument.
  85. * @param {Function} callbacks.onCloseTag Callback function to call when a close
  86. * tag is parsed. Called with the tagName as its argument. If a self-closing
  87. * tag is found, `onCloseTag` is called immediately after `onOpenTag`.
  88. * @param {Function} callbacks.onText Callback function to call when text (i.e
  89. * not an HTML tag) is parsed. Called with the text (string) as its first
  90. * argument, and offset (number) into the string as its second.
  91. */
  92. export function parseHtml(html, callbacks) {
  93. var context = new ParseHtmlContext(html, callbacks);
  94. // For debugging: search for other "For debugging" lines
  95. // const table = new CliTable( {
  96. // head: [ 'charIdx', 'char', 'state', 'currentDataIdx', 'currentOpenTagIdx', 'tag.type' ]
  97. // } );
  98. var len = html.length;
  99. while (context.charIdx < len) {
  100. var char = html.charAt(context.charIdx);
  101. var charCode = html.charCodeAt(context.charIdx);
  102. // For debugging: search for other "For debugging" lines
  103. // ALSO: Temporarily remove the 'const' keyword on the State enum
  104. // table.push([
  105. // String(charIdx),
  106. // char,
  107. // State[state],
  108. // String(currentDataIdx),
  109. // String(currentTag.idx),
  110. // currentTag.idx === -1 ? '' : currentTag.type
  111. // ]);
  112. switch (context.state) {
  113. case 0 /* State.Data */:
  114. stateData(context, char);
  115. break;
  116. case 1 /* State.TagOpen */:
  117. stateTagOpen(context, char, charCode);
  118. break;
  119. case 2 /* State.EndTagOpen */:
  120. stateEndTagOpen(context, char, charCode);
  121. break;
  122. case 3 /* State.TagName */:
  123. stateTagName(context, char, charCode);
  124. break;
  125. case 4 /* State.BeforeAttributeName */:
  126. stateBeforeAttributeName(context, char, charCode);
  127. break;
  128. case 5 /* State.AttributeName */:
  129. stateAttributeName(context, char, charCode);
  130. break;
  131. case 6 /* State.AfterAttributeName */:
  132. stateAfterAttributeName(context, char, charCode);
  133. break;
  134. case 7 /* State.BeforeAttributeValue */:
  135. stateBeforeAttributeValue(context, char, charCode);
  136. break;
  137. case 8 /* State.AttributeValueDoubleQuoted */:
  138. stateAttributeValueDoubleQuoted(context, char);
  139. break;
  140. case 9 /* State.AttributeValueSingleQuoted */:
  141. stateAttributeValueSingleQuoted(context, char);
  142. break;
  143. case 10 /* State.AttributeValueUnquoted */:
  144. stateAttributeValueUnquoted(context, char, charCode);
  145. break;
  146. case 11 /* State.AfterAttributeValueQuoted */:
  147. stateAfterAttributeValueQuoted(context, char, charCode);
  148. break;
  149. case 12 /* State.SelfClosingStartTag */:
  150. stateSelfClosingStartTag(context, char);
  151. break;
  152. case 13 /* State.MarkupDeclarationOpenState */:
  153. stateMarkupDeclarationOpen(context);
  154. break;
  155. case 14 /* State.CommentStart */:
  156. stateCommentStart(context, char);
  157. break;
  158. case 15 /* State.CommentStartDash */:
  159. stateCommentStartDash(context, char);
  160. break;
  161. case 16 /* State.Comment */:
  162. stateComment(context, char);
  163. break;
  164. case 17 /* State.CommentEndDash */:
  165. stateCommentEndDash(context, char);
  166. break;
  167. case 18 /* State.CommentEnd */:
  168. stateCommentEnd(context, char);
  169. break;
  170. case 19 /* State.CommentEndBang */:
  171. stateCommentEndBang(context, char);
  172. break;
  173. case 20 /* State.Doctype */:
  174. stateDoctype(context, char);
  175. break;
  176. /* istanbul ignore next */
  177. default:
  178. assertNever(context.state);
  179. }
  180. // For debugging: search for other "For debugging" lines
  181. // ALSO: Temporarily remove the 'const' keyword on the State enum
  182. // table.push([
  183. // String(context.charIdx),
  184. // char,
  185. // State[context.state],
  186. // String(context.currentDataIdx),
  187. // String(context.currentTag.idx),
  188. // context.currentTag.idx === -1 ? '' : context.currentTag.type
  189. // ]);
  190. context.charIdx++;
  191. }
  192. if (context.currentDataIdx < context.charIdx) {
  193. emitText(context);
  194. }
  195. // For debugging: search for other "For debugging" lines
  196. // console.log( '\n' + table.toString() );
  197. }
  198. // Called when non-tags are being read (i.e. the text around HTML †ags)
  199. // https://www.w3.org/TR/html51/syntax.html#data-state
  200. function stateData(context, char) {
  201. if (char === '<') {
  202. startNewTag(context);
  203. }
  204. }
  205. // Called after a '<' is read from the Data state
  206. // https://www.w3.org/TR/html51/syntax.html#tag-open-state
  207. function stateTagOpen(context, char, charCode) {
  208. if (char === '!') {
  209. context.state = 13 /* State.MarkupDeclarationOpenState */;
  210. }
  211. else if (char === '/') {
  212. context.state = 2 /* State.EndTagOpen */;
  213. context.currentTag = new CurrentTag(__assign(__assign({}, context.currentTag), { isClosing: true }));
  214. }
  215. else if (char === '<') {
  216. // start of another tag (ignore the previous, incomplete one)
  217. startNewTag(context);
  218. }
  219. else if (isAsciiLetterChar(charCode)) {
  220. // tag name start (and no '/' read)
  221. context.state = 3 /* State.TagName */;
  222. context.currentTag = new CurrentTag(__assign(__assign({}, context.currentTag), { isOpening: true }));
  223. }
  224. else {
  225. // Any other
  226. context.state = 0 /* State.Data */;
  227. context.currentTag = noCurrentTag;
  228. }
  229. }
  230. // After a '<x', '</x' sequence is read (where 'x' is a letter character),
  231. // this is to continue reading the tag name
  232. // https://www.w3.org/TR/html51/syntax.html#tag-name-state
  233. function stateTagName(context, char, charCode) {
  234. if (isWhitespaceChar(charCode)) {
  235. context.currentTag = new CurrentTag(__assign(__assign({}, context.currentTag), { name: captureTagName(context) }));
  236. context.state = 4 /* State.BeforeAttributeName */;
  237. }
  238. else if (char === '<') {
  239. // start of another tag (ignore the previous, incomplete one)
  240. startNewTag(context);
  241. }
  242. else if (char === '/') {
  243. context.currentTag = new CurrentTag(__assign(__assign({}, context.currentTag), { name: captureTagName(context) }));
  244. context.state = 12 /* State.SelfClosingStartTag */;
  245. }
  246. else if (char === '>') {
  247. context.currentTag = new CurrentTag(__assign(__assign({}, context.currentTag), { name: captureTagName(context) }));
  248. emitTagAndPreviousTextNode(context); // resets to Data state as well
  249. }
  250. else if (!isAsciiLetterChar(charCode) && !isDigitChar(charCode) && char !== ':') {
  251. // Anything else that does not form an html tag. Note: the colon
  252. // character is accepted for XML namespaced tags
  253. resetToDataState(context);
  254. }
  255. else {
  256. // continue reading tag name
  257. }
  258. }
  259. // Called after the '/' is read from a '</' sequence
  260. // https://www.w3.org/TR/html51/syntax.html#end-tag-open-state
  261. function stateEndTagOpen(context, char, charCode) {
  262. if (char === '>') {
  263. // parse error. Encountered "</>". Skip it without treating as a tag
  264. resetToDataState(context);
  265. }
  266. else if (isAsciiLetterChar(charCode)) {
  267. context.state = 3 /* State.TagName */;
  268. }
  269. else {
  270. // some other non-tag-like character, don't treat this as a tag
  271. resetToDataState(context);
  272. }
  273. }
  274. // https://www.w3.org/TR/html51/syntax.html#before-attribute-name-state
  275. function stateBeforeAttributeName(context, char, charCode) {
  276. if (isWhitespaceChar(charCode)) {
  277. // stay in BeforeAttributeName state - continue reading chars
  278. }
  279. else if (char === '/') {
  280. context.state = 12 /* State.SelfClosingStartTag */;
  281. }
  282. else if (char === '>') {
  283. emitTagAndPreviousTextNode(context); // resets to Data state as well
  284. }
  285. else if (char === '<') {
  286. // start of another tag (ignore the previous, incomplete one)
  287. startNewTag(context);
  288. }
  289. else if (char === "=" || isQuoteChar(charCode) || isControlChar(charCode)) {
  290. // "Parse error" characters that, according to the spec, should be
  291. // appended to the attribute name, but we'll treat these characters
  292. // as not forming a real HTML tag
  293. resetToDataState(context);
  294. }
  295. else {
  296. // Any other char, start of a new attribute name
  297. context.state = 5 /* State.AttributeName */;
  298. }
  299. }
  300. // https://www.w3.org/TR/html51/syntax.html#attribute-name-state
  301. function stateAttributeName(context, char, charCode) {
  302. if (isWhitespaceChar(charCode)) {
  303. context.state = 6 /* State.AfterAttributeName */;
  304. }
  305. else if (char === '/') {
  306. context.state = 12 /* State.SelfClosingStartTag */;
  307. }
  308. else if (char === '=') {
  309. context.state = 7 /* State.BeforeAttributeValue */;
  310. }
  311. else if (char === '>') {
  312. emitTagAndPreviousTextNode(context); // resets to Data state as well
  313. }
  314. else if (char === '<') {
  315. // start of another tag (ignore the previous, incomplete one)
  316. startNewTag(context);
  317. }
  318. else if (isQuoteChar(charCode)) {
  319. // "Parse error" characters that, according to the spec, should be
  320. // appended to the attribute name, but we'll treat these characters
  321. // as not forming a real HTML tag
  322. resetToDataState(context);
  323. }
  324. else {
  325. // anything else: continue reading attribute name
  326. }
  327. }
  328. // https://www.w3.org/TR/html51/syntax.html#after-attribute-name-state
  329. function stateAfterAttributeName(context, char, charCode) {
  330. if (isWhitespaceChar(charCode)) {
  331. // ignore the character - continue reading
  332. }
  333. else if (char === '/') {
  334. context.state = 12 /* State.SelfClosingStartTag */;
  335. }
  336. else if (char === '=') {
  337. context.state = 7 /* State.BeforeAttributeValue */;
  338. }
  339. else if (char === '>') {
  340. emitTagAndPreviousTextNode(context);
  341. }
  342. else if (char === '<') {
  343. // start of another tag (ignore the previous, incomplete one)
  344. startNewTag(context);
  345. }
  346. else if (isQuoteChar(charCode)) {
  347. // "Parse error" characters that, according to the spec, should be
  348. // appended to the attribute name, but we'll treat these characters
  349. // as not forming a real HTML tag
  350. resetToDataState(context);
  351. }
  352. else {
  353. // Any other character, start a new attribute in the current tag
  354. context.state = 5 /* State.AttributeName */;
  355. }
  356. }
  357. // https://www.w3.org/TR/html51/syntax.html#before-attribute-value-state
  358. function stateBeforeAttributeValue(context, char, charCode) {
  359. if (isWhitespaceChar(charCode)) {
  360. // ignore the character - continue reading
  361. }
  362. else if (char === "\"") {
  363. context.state = 8 /* State.AttributeValueDoubleQuoted */;
  364. }
  365. else if (char === "'") {
  366. context.state = 9 /* State.AttributeValueSingleQuoted */;
  367. }
  368. else if (/[>=`]/.test(char)) {
  369. // Invalid chars after an '=' for an attribute value, don't count
  370. // the current tag as an HTML tag
  371. resetToDataState(context);
  372. }
  373. else if (char === '<') {
  374. // start of another tag (ignore the previous, incomplete one)
  375. startNewTag(context);
  376. }
  377. else {
  378. // Any other character, consider it an unquoted attribute value
  379. context.state = 10 /* State.AttributeValueUnquoted */;
  380. }
  381. }
  382. // https://www.w3.org/TR/html51/syntax.html#attribute-value-double-quoted-state
  383. function stateAttributeValueDoubleQuoted(context, char) {
  384. if (char === "\"") {
  385. // end the current double-quoted attribute
  386. context.state = 11 /* State.AfterAttributeValueQuoted */;
  387. }
  388. else {
  389. // consume the character as part of the double-quoted attribute value
  390. }
  391. }
  392. // https://www.w3.org/TR/html51/syntax.html#attribute-value-single-quoted-state
  393. function stateAttributeValueSingleQuoted(context, char) {
  394. if (char === "'") {
  395. // end the current single-quoted attribute
  396. context.state = 11 /* State.AfterAttributeValueQuoted */;
  397. }
  398. else {
  399. // consume the character as part of the double-quoted attribute value
  400. }
  401. }
  402. // https://www.w3.org/TR/html51/syntax.html#attribute-value-unquoted-state
  403. function stateAttributeValueUnquoted(context, char, charCode) {
  404. if (isWhitespaceChar(charCode)) {
  405. context.state = 4 /* State.BeforeAttributeName */;
  406. }
  407. else if (char === '>') {
  408. emitTagAndPreviousTextNode(context);
  409. }
  410. else if (char === '<') {
  411. // start of another tag (ignore the previous, incomplete one)
  412. startNewTag(context);
  413. }
  414. else {
  415. // Any other character, treat it as part of the attribute value
  416. }
  417. }
  418. // Called after a double-quoted or single-quoted attribute value is read
  419. // (i.e. after the closing quote character)
  420. // https://www.w3.org/TR/html51/syntax.html#after-attribute-value-quoted-state
  421. function stateAfterAttributeValueQuoted(context, char, charCode) {
  422. if (isWhitespaceChar(charCode)) {
  423. context.state = 4 /* State.BeforeAttributeName */;
  424. }
  425. else if (char === '/') {
  426. context.state = 12 /* State.SelfClosingStartTag */;
  427. }
  428. else if (char === '>') {
  429. emitTagAndPreviousTextNode(context);
  430. }
  431. else if (char === '<') {
  432. // start of another tag (ignore the previous, incomplete one)
  433. startNewTag(context);
  434. }
  435. else {
  436. // Any other character, "parse error". Spec says to switch to the
  437. // BeforeAttributeState and re-consume the character, as it may be
  438. // the start of a new attribute name
  439. context.state = 4 /* State.BeforeAttributeName */;
  440. reconsumeCurrentChar(context);
  441. }
  442. }
  443. // A '/' has just been read in the current tag (presumably for '/>'), and
  444. // this handles the next character
  445. // https://www.w3.org/TR/html51/syntax.html#self-closing-start-tag-state
  446. function stateSelfClosingStartTag(context, char) {
  447. if (char === '>') {
  448. context.currentTag = new CurrentTag(__assign(__assign({}, context.currentTag), { isClosing: true }));
  449. emitTagAndPreviousTextNode(context); // resets to Data state as well
  450. }
  451. else {
  452. // Note: the spec calls for a character after a '/' within a start
  453. // tag to go back into the BeforeAttributeName state (in order to
  454. // read more attributes, but for the purposes of Autolinker, this is
  455. // most likely not a valid HTML tag. For example: "<something / other>"
  456. // state = State.BeforeAttributeName;
  457. // Instead, just treat as regular text
  458. resetToDataState(context);
  459. }
  460. }
  461. // https://www.w3.org/TR/html51/syntax.html#markup-declaration-open-state
  462. // (HTML Comments or !DOCTYPE)
  463. function stateMarkupDeclarationOpen(context) {
  464. var html = context.html, charIdx = context.charIdx;
  465. if (html.slice(charIdx, charIdx + 2) === '--') {
  466. // html comment
  467. context.charIdx++; // "consume" the second '-' character. Next loop iteration will consume the character after the '<!--' sequence
  468. context.currentTag = new CurrentTag(__assign(__assign({}, context.currentTag), { type: 'comment' }));
  469. context.state = 14 /* State.CommentStart */;
  470. }
  471. else if (html.slice(charIdx, charIdx + 7).toUpperCase() === 'DOCTYPE') {
  472. context.charIdx += 6; // "consume" the characters "OCTYPE" (the current loop iteraction consumed the 'D'). Next loop iteration will consume the character after the '<!DOCTYPE' sequence
  473. context.currentTag = new CurrentTag(__assign(__assign({}, context.currentTag), { type: 'doctype' }));
  474. context.state = 20 /* State.Doctype */;
  475. }
  476. else {
  477. // At this point, the spec specifies that the state machine should
  478. // enter the "bogus comment" state, in which case any character(s)
  479. // after the '<!' that were read should become an HTML comment up
  480. // until the first '>' that is read (or EOF). Instead, we'll assume
  481. // that a user just typed '<!' as part of some piece of non-html
  482. // text
  483. resetToDataState(context);
  484. }
  485. }
  486. // Handles after the sequence '<!--' has been read
  487. // https://www.w3.org/TR/html51/syntax.html#comment-start-state
  488. function stateCommentStart(context, char) {
  489. if (char === '-') {
  490. // We've read the sequence '<!---' at this point (3 dashes)
  491. context.state = 15 /* State.CommentStartDash */;
  492. }
  493. else if (char === '>') {
  494. // At this point, we'll assume the comment wasn't a real comment
  495. // so we'll just emit it as data. We basically read the sequence
  496. // '<!-->'
  497. resetToDataState(context);
  498. }
  499. else {
  500. // Any other char, take it as part of the comment
  501. context.state = 16 /* State.Comment */;
  502. }
  503. }
  504. // We've read the sequence '<!---' at this point (3 dashes)
  505. // https://www.w3.org/TR/html51/syntax.html#comment-start-dash-state
  506. function stateCommentStartDash(context, char) {
  507. if (char === '-') {
  508. // We've read '<!----' (4 dashes) at this point
  509. context.state = 18 /* State.CommentEnd */;
  510. }
  511. else if (char === '>') {
  512. // At this point, we'll assume the comment wasn't a real comment
  513. // so we'll just emit it as data. We basically read the sequence
  514. // '<!--->'
  515. resetToDataState(context);
  516. }
  517. else {
  518. // Anything else, take it as a valid comment
  519. context.state = 16 /* State.Comment */;
  520. }
  521. }
  522. // Currently reading the comment's text (data)
  523. // https://www.w3.org/TR/html51/syntax.html#comment-state
  524. function stateComment(context, char) {
  525. if (char === '-') {
  526. context.state = 17 /* State.CommentEndDash */;
  527. }
  528. else {
  529. // Any other character, stay in the Comment state
  530. }
  531. }
  532. // When we we've read the first dash inside a comment, it may signal the
  533. // end of the comment if we read another dash
  534. // https://www.w3.org/TR/html51/syntax.html#comment-end-dash-state
  535. function stateCommentEndDash(context, char) {
  536. if (char === '-') {
  537. context.state = 18 /* State.CommentEnd */;
  538. }
  539. else {
  540. // Wasn't a dash, must still be part of the comment
  541. context.state = 16 /* State.Comment */;
  542. }
  543. }
  544. // After we've read two dashes inside a comment, it may signal the end of
  545. // the comment if we then read a '>' char
  546. // https://www.w3.org/TR/html51/syntax.html#comment-end-state
  547. function stateCommentEnd(context, char) {
  548. if (char === '>') {
  549. emitTagAndPreviousTextNode(context);
  550. }
  551. else if (char === '!') {
  552. context.state = 19 /* State.CommentEndBang */;
  553. }
  554. else if (char === '-') {
  555. // A 3rd '-' has been read: stay in the CommentEnd state
  556. }
  557. else {
  558. // Anything else, switch back to the comment state since we didn't
  559. // read the full "end comment" sequence (i.e. '-->')
  560. context.state = 16 /* State.Comment */;
  561. }
  562. }
  563. // We've read the sequence '--!' inside of a comment
  564. // https://www.w3.org/TR/html51/syntax.html#comment-end-bang-state
  565. function stateCommentEndBang(context, char) {
  566. if (char === '-') {
  567. // We read the sequence '--!-' inside of a comment. The last dash
  568. // could signify that the comment is going to close
  569. context.state = 17 /* State.CommentEndDash */;
  570. }
  571. else if (char === '>') {
  572. // End of comment with the sequence '--!>'
  573. emitTagAndPreviousTextNode(context);
  574. }
  575. else {
  576. // The '--!' was not followed by a '>', continue reading the
  577. // comment's text
  578. context.state = 16 /* State.Comment */;
  579. }
  580. }
  581. /**
  582. * For DOCTYPES in particular, we don't care about the attributes. Just
  583. * advance to the '>' character and emit the tag, unless we find a '<'
  584. * character in which case we'll start a new tag.
  585. *
  586. * Example doctype tag:
  587. * <!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01//EN" "http://www.w3.org/TR/html4/strict.dtd">
  588. *
  589. * Actual spec: https://www.w3.org/TR/html51/syntax.html#doctype-state
  590. */
  591. function stateDoctype(context, char) {
  592. if (char === '>') {
  593. emitTagAndPreviousTextNode(context);
  594. }
  595. else if (char === '<') {
  596. startNewTag(context);
  597. }
  598. else {
  599. // stay in the Doctype state
  600. }
  601. }
  602. /**
  603. * Resets the state back to the Data state, and removes the current tag.
  604. *
  605. * We'll generally run this function whenever a "parse error" is
  606. * encountered, where the current tag that is being read no longer looks
  607. * like a real HTML tag.
  608. */
  609. function resetToDataState(context) {
  610. context.state = 0 /* State.Data */;
  611. context.currentTag = noCurrentTag;
  612. }
  613. /**
  614. * Starts a new HTML tag at the current index, ignoring any previous HTML
  615. * tag that was being read.
  616. *
  617. * We'll generally run this function whenever we read a new '<' character,
  618. * including when we read a '<' character inside of an HTML tag that we were
  619. * previously reading.
  620. */
  621. function startNewTag(context) {
  622. context.state = 1 /* State.TagOpen */;
  623. context.currentTag = new CurrentTag({ idx: context.charIdx });
  624. }
  625. /**
  626. * Once we've decided to emit an open tag, that means we can also emit the
  627. * text node before it.
  628. */
  629. function emitTagAndPreviousTextNode(context) {
  630. var textBeforeTag = context.html.slice(context.currentDataIdx, context.currentTag.idx);
  631. if (textBeforeTag) {
  632. // the html tag was the first element in the html string, or two
  633. // tags next to each other, in which case we should not emit a text
  634. // node
  635. context.callbacks.onText(textBeforeTag, context.currentDataIdx);
  636. }
  637. var currentTag = context.currentTag;
  638. if (currentTag.type === 'comment') {
  639. context.callbacks.onComment(currentTag.idx);
  640. }
  641. else if (currentTag.type === 'doctype') {
  642. context.callbacks.onDoctype(currentTag.idx);
  643. }
  644. else {
  645. if (currentTag.isOpening) {
  646. context.callbacks.onOpenTag(currentTag.name, currentTag.idx);
  647. }
  648. if (currentTag.isClosing) {
  649. // note: self-closing tags will emit both opening and closing
  650. context.callbacks.onCloseTag(currentTag.name, currentTag.idx);
  651. }
  652. }
  653. // Since we just emitted a tag, reset to the data state for the next char
  654. resetToDataState(context);
  655. context.currentDataIdx = context.charIdx + 1;
  656. }
  657. function emitText(context) {
  658. var text = context.html.slice(context.currentDataIdx, context.charIdx);
  659. context.callbacks.onText(text, context.currentDataIdx);
  660. context.currentDataIdx = context.charIdx + 1;
  661. }
  662. /**
  663. * Captures the tag name from the start of the tag to the current character
  664. * index, and converts it to lower case
  665. */
  666. function captureTagName(context) {
  667. var startIdx = context.currentTag.idx + (context.currentTag.isClosing ? 2 : 1);
  668. return context.html.slice(startIdx, context.charIdx).toLowerCase();
  669. }
  670. /**
  671. * Causes the main loop to re-consume the current character, such as after
  672. * encountering a "parse error" that changed state and needs to reconsume
  673. * the same character in that new state.
  674. */
  675. function reconsumeCurrentChar(context) {
  676. context.charIdx--;
  677. }
  678. //# sourceMappingURL=parse-html.js.map