智慧水务管理系统 - 精河县供水工程综合管理平台

parse-html.d.ts 3.5KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677787980818283848586878889
  1. /**
  2. * The callback functions that can be provided to {@link #parseHtml}.
  3. */
  4. export interface ParseHtmlCallbacks {
  5. onOpenTag: (tagName: string, offset: number) => void;
  6. onCloseTag: (tagName: string, offset: number) => void;
  7. onText: (text: string, offset: number) => void;
  8. onComment: (offset: number) => void;
  9. onDoctype: (offset: number) => void;
  10. }
  11. /**
  12. * Parses an HTML string, calling the callbacks to notify of tags and text.
  13. *
  14. * ## History
  15. *
  16. * This file previously used a regular expression to find html tags in the input
  17. * text. Unfortunately, we ran into a bunch of catastrophic backtracking issues
  18. * with certain input text, causing Autolinker to either hang or just take a
  19. * really long time to parse the string.
  20. *
  21. * The current code is intended to be a O(n) algorithm that walks through
  22. * the string in one pass, and tries to be as cheap as possible. We don't need
  23. * to implement the full HTML spec, but rather simply determine where the string
  24. * looks like an HTML tag, and where it looks like text (so that we can autolink
  25. * that).
  26. *
  27. * This state machine parser is intended just to be a simple but performant
  28. * parser of HTML for the subset of requirements we have. We simply need to:
  29. *
  30. * 1. Determine where HTML tags are
  31. * 2. Determine the tag name (Autolinker specifically only cares about <a>,
  32. * <script>, and <style> tags, so as not to link any text within them)
  33. *
  34. * We don't need to:
  35. *
  36. * 1. Create a parse tree
  37. * 2. Auto-close tags with invalid markup
  38. * 3. etc.
  39. *
  40. * The other intention behind this is that we didn't want to add external
  41. * dependencies on the Autolinker utility which would increase its size. For
  42. * instance, adding htmlparser2 adds 125kb to the minified output file,
  43. * increasing its final size from 47kb to 172kb (at the time of writing). It
  44. * also doesn't work exactly correctly, treating the string "<3 blah blah blah"
  45. * as an HTML tag.
  46. *
  47. * Reference for HTML spec:
  48. *
  49. * https://www.w3.org/TR/html51/syntax.html#sec-tokenization
  50. *
  51. * @param {String} html The HTML to parse
  52. * @param {Object} callbacks
  53. * @param {Function} callbacks.onOpenTag Callback function to call when an open
  54. * tag is parsed. Called with the tagName as its argument.
  55. * @param {Function} callbacks.onCloseTag Callback function to call when a close
  56. * tag is parsed. Called with the tagName as its argument. If a self-closing
  57. * tag is found, `onCloseTag` is called immediately after `onOpenTag`.
  58. * @param {Function} callbacks.onText Callback function to call when text (i.e
  59. * not an HTML tag) is parsed. Called with the text (string) as its first
  60. * argument, and offset (number) into the string as its second.
  61. */
  62. export declare function parseHtml(html: string, callbacks: ParseHtmlCallbacks): void;
  63. /**
  64. * The subset of the parser states defined in https://www.w3.org/TR/html51/syntax.html
  65. * which are useful for Autolinker.
  66. */
  67. export declare const enum State {
  68. Data = 0,
  69. TagOpen = 1,
  70. EndTagOpen = 2,
  71. TagName = 3,
  72. BeforeAttributeName = 4,
  73. AttributeName = 5,
  74. AfterAttributeName = 6,
  75. BeforeAttributeValue = 7,
  76. AttributeValueDoubleQuoted = 8,
  77. AttributeValueSingleQuoted = 9,
  78. AttributeValueUnquoted = 10,
  79. AfterAttributeValueQuoted = 11,
  80. SelfClosingStartTag = 12,
  81. MarkupDeclarationOpenState = 13,// When the sequence '<!' is read for an HTML comment or doctype
  82. CommentStart = 14,
  83. CommentStartDash = 15,
  84. Comment = 16,
  85. CommentEndDash = 17,
  86. CommentEnd = 18,
  87. CommentEndBang = 19,
  88. Doctype = 20
  89. }