Created
January 15, 2021 04:02
-
-
Save vegertar/0e7eed19b3336962bd090b75da9c4161 to your computer and use it in GitHub Desktop.
pruned from microlight (http://github.com/asvd/microlight)
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
/** | |
* @fileoverview microlight - syntax highlightning library | |
* @version 0.0.7 | |
* | |
* @license MIT, see http://github.com/asvd/microlight | |
* @copyright 2016 asvd <heliosframework@gmail.com> | |
* | |
* Code structure aims at minimizing the compressed library size | |
*/ | |
import assign from "lodash.assign"; | |
const WS_RE = /\S/; | |
const WORD_RE = /[$\w]/; | |
const OP_RE = /[/{}[(\-+*=<>:;|\\.,?!&@~]/; | |
const CLOSING_BRACE_RE = /[\])]/; | |
type Options = { | |
codeTag: string; | |
tokenTag: string; | |
keywords: Set<string>; | |
}; | |
const defaults: Options = { | |
codeTag: "code", | |
tokenTag: "span", | |
keywords: new Set([ | |
"abstract", | |
"arguments", | |
"await", | |
"boolean", | |
"break", | |
"byte", | |
"case", | |
"catch", | |
"char", | |
"class", | |
"const", | |
"continue", | |
"debugger", | |
"default", | |
"delete", | |
"do", | |
"double", | |
"else", | |
"enum", | |
"eval", | |
"export", | |
"extends", | |
"false", | |
"final", | |
"finally", | |
"float", | |
"for", | |
"function", | |
"goto", | |
"if", | |
"implements", | |
"import", | |
"in", | |
"instanceof", | |
"int", | |
"interface", | |
"let", | |
"long", | |
"native", | |
"new", | |
"null", | |
"package", | |
"private", | |
"protected", | |
"public", | |
"return", | |
"short", | |
"static", | |
"super", | |
"switch", | |
"synchronized", | |
"this", | |
"throw", | |
"throws", | |
"transient", | |
"true", | |
"try", | |
"typeof", | |
"var", | |
"void", | |
"volatile", | |
"while", | |
"with", | |
"yield", | |
]), | |
}; | |
export default function microlight(code: string, options?: Partial<Options>) { | |
const { codeTag, tokenTag, keywords } = assign({}, defaults, options); | |
const el = document.createElement(codeTag); | |
// current token content | |
let token = ""; | |
// current token type: | |
// 0: anything else (whitespaces / newlines) | |
// 1: operator or brace | |
// 2: closing braces (after which '/' is division not regex) | |
// 3: (key)word | |
// 4: regex | |
// 5: string starting with " | |
// 6: string starting with ' | |
// 7: xml comment <!-- --> | |
// 8: multiline comment /* */ | |
// 9: single-line comment starting with two slashes // | |
// 10: single-line comment starting with hash # | |
let tokenType = 0; | |
let lastTokenType = NaN; | |
// current character | |
let chr: string | undefined = "1"; | |
// next character | |
let next1 = code[0]; | |
// previous character | |
let prev1: string | undefined; | |
// the one before the previous | |
let prev2: string | undefined; | |
// current position | |
let pos = 0; | |
// running through characters and highlighting | |
while (true) { | |
prev2 = prev1; | |
prev1 = tokenType < 7 && prev1 === "\\" ? "1" : chr; | |
if (!prev1) { | |
break; | |
} | |
chr = next1; | |
next1 = code[++pos]; | |
// flag determining if token is multi-character | |
let multichar = token.length > 1; | |
if ( | |
!chr || // end of content | |
// types 9-10 (single-line comments) end with a newline | |
(tokenType > 8 && chr === "\n") || | |
// 0: whitespaces; merged together | |
(tokenType === 0 && WS_RE.test(chr)) || | |
// 1: operators; consist of a single character | |
tokenType === 1 || | |
// 2: braces; consist of a single character | |
tokenType === 2 || | |
// 3: (key)word | |
(tokenType === 3 && !WORD_RE.test(chr)) || | |
// 4: regex | |
(tokenType === 4 && (prev1 === "/" || prev1 === "\n") && multichar) || | |
// 5: string with " | |
(tokenType === 5 && prev1 === '"' && multichar) || | |
// 6: string with ' | |
(tokenType === 6 && prev1 === "'" && multichar) || | |
// 7: xml comment | |
(tokenType === 7 && code[pos - 4] + prev2 + prev1 === "-->") || | |
// 8 : multiline comment | |
(tokenType === 8 && prev2 + prev1 === "*/") | |
) { | |
// appending the token to the result | |
if (token) { | |
const classType = !tokenType | |
? 0 // not formatted | |
: tokenType < 3 | |
? 2 // punctuation | |
: tokenType > 6 | |
? 4 // comments | |
: tokenType > 3 | |
? 3 // regex and strings | |
: +keywords.has(token); // otherwise tokenType == 3, (key)word (1 if hits, 0 otherwise) | |
let className = ""; | |
switch (classType) { | |
case 1: | |
className = "keyword"; | |
break; | |
case 2: | |
className = "punctuation"; | |
break; | |
case 3: | |
className = "string"; | |
break; | |
case 4: | |
className = "comments"; | |
break; | |
} | |
const node = document.createElement(tokenTag); | |
node.appendChild(document.createTextNode(token)); | |
className && node.classList.add(className); | |
el.appendChild(node); | |
} | |
// saving the previous token type (skipping whitespaces and comments) | |
lastTokenType = tokenType && tokenType < 7 ? tokenType : lastTokenType; | |
// initializing a new token | |
token = ""; | |
// determining the new token type (going up the list until matching a token type start condition) | |
tokenType = 11; | |
let ok = false; | |
while (!ok) { | |
switch (--tokenType) { | |
case 0: // whitespace | |
ok = true; | |
break; | |
case 1: // operator or braces | |
ok = OP_RE.test(chr); | |
break; | |
case 2: // closing brace | |
ok = CLOSING_BRACE_RE.test(chr); | |
break; | |
case 3: // (key)word | |
ok = WORD_RE.test(chr); | |
break; | |
case 4: // regex | |
ok = | |
chr === "/" && | |
// previous token was an opening brace or an operator (otherwise division, not a regex) | |
lastTokenType < 2 && | |
// workaround for xml closing tags | |
prev1 !== "<"; | |
break; | |
case 5: // string with " | |
ok = chr === '"'; | |
break; | |
case 6: // string with ' | |
ok = chr === "'"; | |
break; | |
case 7: // xml comment | |
ok = chr + next1 + code[pos + 1] + code[pos + 2] === "<!--"; | |
break; | |
case 8: // multiline comment | |
ok = chr + next1 === "/*"; | |
break; | |
case 9: // single-line comment | |
ok = chr + next1 === "//"; | |
break; | |
case 10: // hash-style comment | |
ok = chr === "#"; | |
break; | |
} | |
} | |
} | |
token += chr; | |
} | |
return el; | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment