Skip to content

Instantly share code, notes, and snippets.

@blackcater
Last active September 28, 2021 12:56
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save blackcater/3184fa55b149406323a54df2702a79ec to your computer and use it in GitHub Desktop.
Save blackcater/3184fa55b149406323a54df2702a79ec to your computer and use it in GitHub Desktop.
Vue@2 源码中解析模板为 AST 的核心参考库。
/**
* unicode letters used for parsing html tags, component names and property paths.
* using https://www.w3.org/TR/html53/semantics-scripting.html#potentialcustomelementname
* skipping \u10000-\uEFFFF due to it freezing up PhantomJS
*/
export const unicodeRegExp = /a-zA-Z\u00B7\u00C0-\u00D6\u00D8-\u00F6\u00F8-\u037D\u037F-\u1FFF\u200C-\u200D\u203F-\u2040\u2070-\u218F\u2C00-\u2FEF\u3001-\uD7FF\uF900-\uFDCF\uFDF0-\uFFFD/
/**
* Always return false.
*/
export const no = (a?: any, b?: any, c?: any) => false
/**
* Make a map and return a function for checking if a key
* is in that map.
*/
export function makeMap(
str: string,
expectsLowerCase?: boolean
): (key: string) => true | void {
const map = Object.create(null)
const list: Array<string> = str.split(',')
for (let i = 0; i < list.length; i++) {
map[list[i]] = true
}
return expectsLowerCase
? val => map[val.toLowerCase()]
: val => map[val]
}
/**
* Not type-checking this file because it's mostly vendor code.
*/
/*!
* HTML Parser By John Resig (ejohn.org)
* Modified by Juriy "kangax" Zaytsev
* Original code by Erik Arvidsson (MPL-1.1 OR Apache-2.0 OR GPL-2.0-or-later)
* http://erik.eae.net/simplehtmlparser/simplehtmlparser.js
*/
import { makeMap, no } from 'shared/util'
import { isNonPhrasingTag } from 'web/compiler/util'
import { unicodeRegExp } from 'core/util/lang'
// Regular Expressions for parsing tags and attributes
const attribute =
/^\s*([^\s"'<>\/=]+)(?:\s*(=)\s*(?:"([^"]*)"+|'([^']*)'+|([^\s"'=<>`]+)))?/
const dynamicArgAttribute =
/^\s*((?:v-[\w-]+:|@|:|#)\[[^=]+?\][^\s"'<>\/=]*)(?:\s*(=)\s*(?:"([^"]*)"+|'([^']*)'+|([^\s"'=<>`]+)))?/
const ncname = `[a-zA-Z_][\\-\\.0-9_a-zA-Z${unicodeRegExp.source}]*`
const qnameCapture = `((?:${ncname}\\:)?${ncname})`
const startTagOpen = new RegExp(`^<${qnameCapture}`)
const startTagClose = /^\s*(\/?)>/
const endTag = new RegExp(`^<\\/${qnameCapture}[^>]*>`)
const doctype = /^<!DOCTYPE [^>]+>/i
// #7298: escape - to avoid being passed as HTML comment when inlined in page
const comment = /^<!\--/
const conditionalComment = /^<!\[/
// Special Elements (can contain anything)
export const isPlainTextElement = makeMap('script,style,textarea', true)
const reCache = {}
const decodingMap = {
'&lt;': '<',
'&gt;': '>',
'&quot;': '"',
'&amp;': '&',
'&#10;': '\n',
'&#9;': '\t',
'&#39;': "'"
}
const encodedAttr = /&(?:lt|gt|quot|amp|#39);/g
const encodedAttrWithNewLines = /&(?:lt|gt|quot|amp|#39|#10|#9);/g
// #5992
const isIgnoreNewlineTag = makeMap('pre,textarea', true)
const shouldIgnoreFirstNewline = (tag, html) =>
tag && isIgnoreNewlineTag(tag) && html[0] === '\n'
function decodeAttr(value, shouldDecodeNewlines) {
const re = shouldDecodeNewlines ? encodedAttrWithNewLines : encodedAttr
return value.replace(re, match => decodingMap[match])
}
export function parseHTML(html, options) {
const stack = []
const expectHTML = options.expectHTML
const isUnaryTag = options.isUnaryTag || no
const canBeLeftOpenTag = options.canBeLeftOpenTag || no
let index = 0
let last, lastTag
while (html) {
last = html
// Make sure we're not in a plaintext content element like script/style
if (!lastTag || !isPlainTextElement(lastTag)) {
let textEnd = html.indexOf('<')
if (textEnd === 0) {
// Comment:
if (comment.test(html)) {
const commentEnd = html.indexOf('-->')
if (commentEnd >= 0) {
if (options.shouldKeepComment) {
options.comment(
html.substring(4, commentEnd),
index,
index + commentEnd + 3
)
}
advance(commentEnd + 3)
continue
}
}
// http://en.wikipedia.org/wiki/Conditional_comment#Downlevel-revealed_conditional_comment
if (conditionalComment.test(html)) {
const conditionalEnd = html.indexOf(']>')
if (conditionalEnd >= 0) {
advance(conditionalEnd + 2)
continue
}
}
// Doctype:
const doctypeMatch = html.match(doctype)
if (doctypeMatch) {
advance(doctypeMatch[0].length)
continue
}
// End tag:
const endTagMatch = html.match(endTag)
if (endTagMatch) {
const curIndex = index
advance(endTagMatch[0].length)
parseEndTag(endTagMatch[1], curIndex, index)
continue
}
// Start tag:
const startTagMatch = parseStartTag()
if (startTagMatch) {
handleStartTag(startTagMatch)
if (shouldIgnoreFirstNewline(startTagMatch.tagName, html)) {
advance(1)
}
continue
}
}
let text, rest, next
if (textEnd >= 0) {
rest = html.slice(textEnd)
while (
!endTag.test(rest) &&
!startTagOpen.test(rest) &&
!comment.test(rest) &&
!conditionalComment.test(rest)
) {
// < in plain text, be forgiving and treat it as text
next = rest.indexOf('<', 1)
if (next < 0) break
textEnd += next
rest = html.slice(textEnd)
}
text = html.substring(0, textEnd)
}
if (textEnd < 0) {
text = html
}
if (text) {
advance(text.length)
}
if (options.chars && text) {
options.chars(text, index - text.length, index)
}
} else {
let endTagLength = 0
const stackedTag = lastTag.toLowerCase()
const reStackedTag =
reCache[stackedTag] ||
(reCache[stackedTag] = new RegExp(
'([\\s\\S]*?)(</' + stackedTag + '[^>]*>)',
'i'
))
const rest = html.replace(reStackedTag, function (all, text, endTag) {
endTagLength = endTag.length
if (!isPlainTextElement(stackedTag) && stackedTag !== 'noscript') {
text = text
.replace(/<!\--([\s\S]*?)-->/g, '$1') // #7298
.replace(/<!\[CDATA\[([\s\S]*?)]]>/g, '$1')
}
if (shouldIgnoreFirstNewline(stackedTag, text)) {
text = text.slice(1)
}
if (options.chars) {
options.chars(text)
}
return ''
})
index += html.length - rest.length
html = rest
parseEndTag(stackedTag, index - endTagLength, index)
}
if (html === last) {
options.chars && options.chars(html)
if (
process.env.NODE_ENV !== 'production' &&
!stack.length &&
options.warn
) {
options.warn(`Mal-formatted tag at end of template: "${html}"`, {
start: index + html.length
})
}
break
}
}
// Clean up any remaining tags
parseEndTag()
function advance(n) {
index += n
html = html.substring(n)
}
function parseStartTag() {
const start = html.match(startTagOpen)
if (start) {
const match = {
tagName: start[1],
attrs: [],
start: index
}
advance(start[0].length)
let end, attr
while (
!(end = html.match(startTagClose)) &&
(attr = html.match(dynamicArgAttribute) || html.match(attribute))
) {
attr.start = index
advance(attr[0].length)
attr.end = index
match.attrs.push(attr)
}
if (end) {
match.unarySlash = end[1]
advance(end[0].length)
match.end = index
return match
}
}
}
function handleStartTag(match) {
const tagName = match.tagName
const unarySlash = match.unarySlash
if (expectHTML) {
if (lastTag === 'p' && isNonPhrasingTag(tagName)) {
parseEndTag(lastTag)
}
if (canBeLeftOpenTag(tagName) && lastTag === tagName) {
parseEndTag(tagName)
}
}
const unary = isUnaryTag(tagName) || !!unarySlash
const l = match.attrs.length
const attrs = new Array(l)
for (let i = 0; i < l; i++) {
const args = match.attrs[i]
const value = args[3] || args[4] || args[5] || ''
const shouldDecodeNewlines =
tagName === 'a' && args[1] === 'href'
? options.shouldDecodeNewlinesForHref
: options.shouldDecodeNewlines
attrs[i] = {
name: args[1],
value: decodeAttr(value, shouldDecodeNewlines)
}
if (process.env.NODE_ENV !== 'production' && options.outputSourceRange) {
attrs[i].start = args.start + args[0].match(/^\s*/).length
attrs[i].end = args.end
}
}
if (!unary) {
stack.push({
tag: tagName,
lowerCasedTag: tagName.toLowerCase(),
attrs: attrs,
start: match.start,
end: match.end
})
lastTag = tagName
}
if (options.start) {
options.start(tagName, attrs, unary, match.start, match.end)
}
}
function parseEndTag(tagName, start, end) {
let pos, lowerCasedTagName
if (start == null) start = index
if (end == null) end = index
// Find the closest opened tag of the same type
if (tagName) {
lowerCasedTagName = tagName.toLowerCase()
for (pos = stack.length - 1; pos >= 0; pos--) {
if (stack[pos].lowerCasedTag === lowerCasedTagName) {
break
}
}
} else {
// If no tag name is provided, clean shop
pos = 0
}
if (pos >= 0) {
// Close all the open elements, up the stack
for (let i = stack.length - 1; i >= pos; i--) {
if (
process.env.NODE_ENV !== 'production' &&
(i > pos || !tagName) &&
options.warn
) {
options.warn(`tag <${stack[i].tag}> has no matching end tag.`, {
start: stack[i].start,
end: stack[i].end
})
}
if (options.end) {
options.end(stack[i].tag, start, end)
}
}
// Remove the open elements from the stack
stack.length = pos
lastTag = pos && stack[pos - 1].tag
} else if (lowerCasedTagName === 'br') {
if (options.start) {
options.start(tagName, [], true, start, end)
}
} else if (lowerCasedTagName === 'p') {
if (options.start) {
options.start(tagName, [], false, start, end)
}
if (options.end) {
options.end(tagName, start, end)
}
}
}
}
// HTML5 tags https://html.spec.whatwg.org/multipage/indices.html#elements-3
// Phrasing Content https://html.spec.whatwg.org/multipage/dom.html#phrasing-content
export const isNonPhrasingTag = makeMap(
'address,article,aside,base,blockquote,body,caption,col,colgroup,dd,' +
'details,dialog,div,dl,dt,fieldset,figcaption,figure,footer,form,' +
'h1,h2,h3,h4,h5,h6,head,header,hgroup,hr,html,legend,li,menuitem,meta,' +
'optgroup,option,param,rp,rt,source,style,summary,tbody,td,tfoot,th,thead,' +
'title,tr,track'
)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment