Last active
September 28, 2021 12:56
-
-
Save blackcater/3184fa55b149406323a54df2702a79ec to your computer and use it in GitHub Desktop.
Vue@2 源码中解析模板为 AST 的核心参考库。
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
/** | |
* unicode letters used for parsing html tags, component names and property paths. | |
* using https://www.w3.org/TR/html53/semantics-scripting.html#potentialcustomelementname | |
* skipping \u10000-\uEFFFF due to it freezing up PhantomJS | |
*/ | |
export const unicodeRegExp = /a-zA-Z\u00B7\u00C0-\u00D6\u00D8-\u00F6\u00F8-\u037D\u037F-\u1FFF\u200C-\u200D\u203F-\u2040\u2070-\u218F\u2C00-\u2FEF\u3001-\uD7FF\uF900-\uFDCF\uFDF0-\uFFFD/ |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
/** | |
* Not type-checking this file because it's mostly vendor code. | |
*/ | |
/*! | |
* HTML Parser By John Resig (ejohn.org) | |
* Modified by Juriy "kangax" Zaytsev | |
* Original code by Erik Arvidsson (MPL-1.1 OR Apache-2.0 OR GPL-2.0-or-later) | |
* http://erik.eae.net/simplehtmlparser/simplehtmlparser.js | |
*/ | |
import { makeMap, no } from 'shared/util' | |
import { isNonPhrasingTag } from 'web/compiler/util' | |
import { unicodeRegExp } from 'core/util/lang' | |
// Regular Expressions for parsing tags and attributes | |
const attribute = | |
/^\s*([^\s"'<>\/=]+)(?:\s*(=)\s*(?:"([^"]*)"+|'([^']*)'+|([^\s"'=<>`]+)))?/ | |
const dynamicArgAttribute = | |
/^\s*((?:v-[\w-]+:|@|:|#)\[[^=]+?\][^\s"'<>\/=]*)(?:\s*(=)\s*(?:"([^"]*)"+|'([^']*)'+|([^\s"'=<>`]+)))?/ | |
const ncname = `[a-zA-Z_][\\-\\.0-9_a-zA-Z${unicodeRegExp.source}]*` | |
const qnameCapture = `((?:${ncname}\\:)?${ncname})` | |
const startTagOpen = new RegExp(`^<${qnameCapture}`) | |
const startTagClose = /^\s*(\/?)>/ | |
const endTag = new RegExp(`^<\\/${qnameCapture}[^>]*>`) | |
const doctype = /^<!DOCTYPE [^>]+>/i | |
// #7298: escape - to avoid being passed as HTML comment when inlined in page | |
const comment = /^<!\--/ | |
const conditionalComment = /^<!\[/ | |
// Special Elements (can contain anything) | |
export const isPlainTextElement = makeMap('script,style,textarea', true) | |
const reCache = {} | |
const decodingMap = { | |
'<': '<', | |
'>': '>', | |
'"': '"', | |
'&': '&', | |
' ': '\n', | |
'	': '\t', | |
''': "'" | |
} | |
const encodedAttr = /&(?:lt|gt|quot|amp|#39);/g | |
const encodedAttrWithNewLines = /&(?:lt|gt|quot|amp|#39|#10|#9);/g | |
// #5992 | |
const isIgnoreNewlineTag = makeMap('pre,textarea', true) | |
const shouldIgnoreFirstNewline = (tag, html) => | |
tag && isIgnoreNewlineTag(tag) && html[0] === '\n' | |
function decodeAttr(value, shouldDecodeNewlines) { | |
const re = shouldDecodeNewlines ? encodedAttrWithNewLines : encodedAttr | |
return value.replace(re, match => decodingMap[match]) | |
} | |
export function parseHTML(html, options) { | |
const stack = [] | |
const expectHTML = options.expectHTML | |
const isUnaryTag = options.isUnaryTag || no | |
const canBeLeftOpenTag = options.canBeLeftOpenTag || no | |
let index = 0 | |
let last, lastTag | |
while (html) { | |
last = html | |
// Make sure we're not in a plaintext content element like script/style | |
if (!lastTag || !isPlainTextElement(lastTag)) { | |
let textEnd = html.indexOf('<') | |
if (textEnd === 0) { | |
// Comment: | |
if (comment.test(html)) { | |
const commentEnd = html.indexOf('-->') | |
if (commentEnd >= 0) { | |
if (options.shouldKeepComment) { | |
options.comment( | |
html.substring(4, commentEnd), | |
index, | |
index + commentEnd + 3 | |
) | |
} | |
advance(commentEnd + 3) | |
continue | |
} | |
} | |
// http://en.wikipedia.org/wiki/Conditional_comment#Downlevel-revealed_conditional_comment | |
if (conditionalComment.test(html)) { | |
const conditionalEnd = html.indexOf(']>') | |
if (conditionalEnd >= 0) { | |
advance(conditionalEnd + 2) | |
continue | |
} | |
} | |
// Doctype: | |
const doctypeMatch = html.match(doctype) | |
if (doctypeMatch) { | |
advance(doctypeMatch[0].length) | |
continue | |
} | |
// End tag: | |
const endTagMatch = html.match(endTag) | |
if (endTagMatch) { | |
const curIndex = index | |
advance(endTagMatch[0].length) | |
parseEndTag(endTagMatch[1], curIndex, index) | |
continue | |
} | |
// Start tag: | |
const startTagMatch = parseStartTag() | |
if (startTagMatch) { | |
handleStartTag(startTagMatch) | |
if (shouldIgnoreFirstNewline(startTagMatch.tagName, html)) { | |
advance(1) | |
} | |
continue | |
} | |
} | |
let text, rest, next | |
if (textEnd >= 0) { | |
rest = html.slice(textEnd) | |
while ( | |
!endTag.test(rest) && | |
!startTagOpen.test(rest) && | |
!comment.test(rest) && | |
!conditionalComment.test(rest) | |
) { | |
// < in plain text, be forgiving and treat it as text | |
next = rest.indexOf('<', 1) | |
if (next < 0) break | |
textEnd += next | |
rest = html.slice(textEnd) | |
} | |
text = html.substring(0, textEnd) | |
} | |
if (textEnd < 0) { | |
text = html | |
} | |
if (text) { | |
advance(text.length) | |
} | |
if (options.chars && text) { | |
options.chars(text, index - text.length, index) | |
} | |
} else { | |
let endTagLength = 0 | |
const stackedTag = lastTag.toLowerCase() | |
const reStackedTag = | |
reCache[stackedTag] || | |
(reCache[stackedTag] = new RegExp( | |
'([\\s\\S]*?)(</' + stackedTag + '[^>]*>)', | |
'i' | |
)) | |
const rest = html.replace(reStackedTag, function (all, text, endTag) { | |
endTagLength = endTag.length | |
if (!isPlainTextElement(stackedTag) && stackedTag !== 'noscript') { | |
text = text | |
.replace(/<!\--([\s\S]*?)-->/g, '$1') // #7298 | |
.replace(/<!\[CDATA\[([\s\S]*?)]]>/g, '$1') | |
} | |
if (shouldIgnoreFirstNewline(stackedTag, text)) { | |
text = text.slice(1) | |
} | |
if (options.chars) { | |
options.chars(text) | |
} | |
return '' | |
}) | |
index += html.length - rest.length | |
html = rest | |
parseEndTag(stackedTag, index - endTagLength, index) | |
} | |
if (html === last) { | |
options.chars && options.chars(html) | |
if ( | |
process.env.NODE_ENV !== 'production' && | |
!stack.length && | |
options.warn | |
) { | |
options.warn(`Mal-formatted tag at end of template: "${html}"`, { | |
start: index + html.length | |
}) | |
} | |
break | |
} | |
} | |
// Clean up any remaining tags | |
parseEndTag() | |
function advance(n) { | |
index += n | |
html = html.substring(n) | |
} | |
function parseStartTag() { | |
const start = html.match(startTagOpen) | |
if (start) { | |
const match = { | |
tagName: start[1], | |
attrs: [], | |
start: index | |
} | |
advance(start[0].length) | |
let end, attr | |
while ( | |
!(end = html.match(startTagClose)) && | |
(attr = html.match(dynamicArgAttribute) || html.match(attribute)) | |
) { | |
attr.start = index | |
advance(attr[0].length) | |
attr.end = index | |
match.attrs.push(attr) | |
} | |
if (end) { | |
match.unarySlash = end[1] | |
advance(end[0].length) | |
match.end = index | |
return match | |
} | |
} | |
} | |
function handleStartTag(match) { | |
const tagName = match.tagName | |
const unarySlash = match.unarySlash | |
if (expectHTML) { | |
if (lastTag === 'p' && isNonPhrasingTag(tagName)) { | |
parseEndTag(lastTag) | |
} | |
if (canBeLeftOpenTag(tagName) && lastTag === tagName) { | |
parseEndTag(tagName) | |
} | |
} | |
const unary = isUnaryTag(tagName) || !!unarySlash | |
const l = match.attrs.length | |
const attrs = new Array(l) | |
for (let i = 0; i < l; i++) { | |
const args = match.attrs[i] | |
const value = args[3] || args[4] || args[5] || '' | |
const shouldDecodeNewlines = | |
tagName === 'a' && args[1] === 'href' | |
? options.shouldDecodeNewlinesForHref | |
: options.shouldDecodeNewlines | |
attrs[i] = { | |
name: args[1], | |
value: decodeAttr(value, shouldDecodeNewlines) | |
} | |
if (process.env.NODE_ENV !== 'production' && options.outputSourceRange) { | |
attrs[i].start = args.start + args[0].match(/^\s*/).length | |
attrs[i].end = args.end | |
} | |
} | |
if (!unary) { | |
stack.push({ | |
tag: tagName, | |
lowerCasedTag: tagName.toLowerCase(), | |
attrs: attrs, | |
start: match.start, | |
end: match.end | |
}) | |
lastTag = tagName | |
} | |
if (options.start) { | |
options.start(tagName, attrs, unary, match.start, match.end) | |
} | |
} | |
function parseEndTag(tagName, start, end) { | |
let pos, lowerCasedTagName | |
if (start == null) start = index | |
if (end == null) end = index | |
// Find the closest opened tag of the same type | |
if (tagName) { | |
lowerCasedTagName = tagName.toLowerCase() | |
for (pos = stack.length - 1; pos >= 0; pos--) { | |
if (stack[pos].lowerCasedTag === lowerCasedTagName) { | |
break | |
} | |
} | |
} else { | |
// If no tag name is provided, clean shop | |
pos = 0 | |
} | |
if (pos >= 0) { | |
// Close all the open elements, up the stack | |
for (let i = stack.length - 1; i >= pos; i--) { | |
if ( | |
process.env.NODE_ENV !== 'production' && | |
(i > pos || !tagName) && | |
options.warn | |
) { | |
options.warn(`tag <${stack[i].tag}> has no matching end tag.`, { | |
start: stack[i].start, | |
end: stack[i].end | |
}) | |
} | |
if (options.end) { | |
options.end(stack[i].tag, start, end) | |
} | |
} | |
// Remove the open elements from the stack | |
stack.length = pos | |
lastTag = pos && stack[pos - 1].tag | |
} else if (lowerCasedTagName === 'br') { | |
if (options.start) { | |
options.start(tagName, [], true, start, end) | |
} | |
} else if (lowerCasedTagName === 'p') { | |
if (options.start) { | |
options.start(tagName, [], false, start, end) | |
} | |
if (options.end) { | |
options.end(tagName, start, end) | |
} | |
} | |
} | |
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
// HTML5 tags https://html.spec.whatwg.org/multipage/indices.html#elements-3 | |
// Phrasing Content https://html.spec.whatwg.org/multipage/dom.html#phrasing-content | |
export const isNonPhrasingTag = makeMap( | |
'address,article,aside,base,blockquote,body,caption,col,colgroup,dd,' + | |
'details,dialog,div,dl,dt,fieldset,figcaption,figure,footer,form,' + | |
'h1,h2,h3,h4,h5,h6,head,header,hgroup,hr,html,legend,li,menuitem,meta,' + | |
'optgroup,option,param,rp,rt,source,style,summary,tbody,td,tfoot,th,thead,' + | |
'title,tr,track' | |
) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment