| 1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677787980818283848586878889 |
- /**
- * The callback functions that can be provided to {@link #parseHtml}.
- */
- export interface ParseHtmlCallbacks {
- onOpenTag: (tagName: string, offset: number) => void;
- onCloseTag: (tagName: string, offset: number) => void;
- onText: (text: string, offset: number) => void;
- onComment: (offset: number) => void;
- onDoctype: (offset: number) => void;
- }
- /**
- * Parses an HTML string, calling the callbacks to notify of tags and text.
- *
- * ## History
- *
- * This file previously used a regular expression to find html tags in the input
- * text. Unfortunately, we ran into a bunch of catastrophic backtracking issues
- * with certain input text, causing Autolinker to either hang or just take a
- * really long time to parse the string.
- *
- * The current code is intended to be a O(n) algorithm that walks through
- * the string in one pass, and tries to be as cheap as possible. We don't need
- * to implement the full HTML spec, but rather simply determine where the string
- * looks like an HTML tag, and where it looks like text (so that we can autolink
- * that).
- *
- * This state machine parser is intended just to be a simple but performant
- * parser of HTML for the subset of requirements we have. We simply need to:
- *
- * 1. Determine where HTML tags are
- * 2. Determine the tag name (Autolinker specifically only cares about <a>,
- * <script>, and <style> tags, so as not to link any text within them)
- *
- * We don't need to:
- *
- * 1. Create a parse tree
- * 2. Auto-close tags with invalid markup
- * 3. etc.
- *
- * The other intention behind this is that we didn't want to add external
- * dependencies on the Autolinker utility which would increase its size. For
- * instance, adding htmlparser2 adds 125kb to the minified output file,
- * increasing its final size from 47kb to 172kb (at the time of writing). It
- * also doesn't work exactly correctly, treating the string "<3 blah blah blah"
- * as an HTML tag.
- *
- * Reference for HTML spec:
- *
- * https://www.w3.org/TR/html51/syntax.html#sec-tokenization
- *
- * @param {String} html The HTML to parse
- * @param {Object} callbacks
- * @param {Function} callbacks.onOpenTag Callback function to call when an open
- * tag is parsed. Called with the tagName as its argument.
- * @param {Function} callbacks.onCloseTag Callback function to call when a close
- * tag is parsed. Called with the tagName as its argument. If a self-closing
- * tag is found, `onCloseTag` is called immediately after `onOpenTag`.
- * @param {Function} callbacks.onText Callback function to call when text (i.e
- * not an HTML tag) is parsed. Called with the text (string) as its first
- * argument, and offset (number) into the string as its second.
- */
- export declare function parseHtml(html: string, callbacks: ParseHtmlCallbacks): void;
- /**
- * The subset of the parser states defined in https://www.w3.org/TR/html51/syntax.html
- * which are useful for Autolinker.
- */
- export declare const enum State {
- Data = 0,
- TagOpen = 1,
- EndTagOpen = 2,
- TagName = 3,
- BeforeAttributeName = 4,
- AttributeName = 5,
- AfterAttributeName = 6,
- BeforeAttributeValue = 7,
- AttributeValueDoubleQuoted = 8,
- AttributeValueSingleQuoted = 9,
- AttributeValueUnquoted = 10,
- AfterAttributeValueQuoted = 11,
- SelfClosingStartTag = 12,
- MarkupDeclarationOpenState = 13,// When the sequence '<!' is read for an HTML comment or doctype
- CommentStart = 14,
- CommentStartDash = 15,
- Comment = 16,
- CommentEndDash = 17,
- CommentEnd = 18,
- CommentEndBang = 19,
- Doctype = 20
- }
|