Last active
August 26, 2015 01:23
-
-
Save cdata/d4ea70b6fee4228a2cdd to your computer and use it in GitHub Desktop.
WIP stateless top-down CSS parser
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| [ | |
| { | |
| "type": "comment", | |
| "value": "/* An comment. */" | |
| }, | |
| { | |
| "type": "at-rule", | |
| "identifier": "@import", | |
| "value": " url('foo-external.css')" | |
| }, | |
| { | |
| "type": "comment", | |
| "value": "/* Another comment. */" | |
| }, | |
| { | |
| "type": "at-rule", | |
| "identifier": "@import", | |
| "value": " url('/baz/vim/other external.css')" | |
| }, | |
| { | |
| "type": "comment", | |
| "value": "/**\n * Fun comments.\n */" | |
| }, | |
| { | |
| "type": "selector", | |
| "combinator": ":root ", | |
| "block": { | |
| "type": "block", | |
| "statements": [ | |
| { | |
| "type": "property", | |
| "name": "--some-var", | |
| "value": { | |
| "type": "value", | |
| "value": "red" | |
| } | |
| }, | |
| { | |
| "type": "property", | |
| "name": "--some-mixin", | |
| "value": { | |
| "type": "block", | |
| "statements": [ | |
| { | |
| "type": "property", | |
| "name": "color", | |
| "value": { | |
| "type": "value", | |
| "value": "red" | |
| } | |
| } | |
| ] | |
| } | |
| }, | |
| { | |
| "type": "property", | |
| "name": "--another-mixin", | |
| "value": { | |
| "type": "block", | |
| "statements": [ | |
| { | |
| "type": "comment", | |
| "value": "/* Blue! */" | |
| }, | |
| { | |
| "type": "property", | |
| "name": "color", | |
| "value": { | |
| "type": "value", | |
| "value": "blue" | |
| } | |
| } | |
| ] | |
| } | |
| } | |
| ] | |
| } | |
| }, | |
| { | |
| "type": "selector", | |
| "combinator": "p ", | |
| "block": { | |
| "type": "block", | |
| "statements": [ | |
| { | |
| "type": "property", | |
| "name": "color", | |
| "value": { | |
| "type": "value", | |
| "value": "red" | |
| } | |
| } | |
| ] | |
| } | |
| }, | |
| { | |
| "type": "selector", | |
| "combinator": "p.mixed-in ", | |
| "block": { | |
| "type": "block", | |
| "statements": [ | |
| { | |
| "type": "at-rule", | |
| "identifier": "@apply", | |
| "value": "(--some-mixin)" | |
| } | |
| ] | |
| } | |
| }, | |
| { | |
| "type": "comment", | |
| "value": "/* Random semicolon. */" | |
| }, | |
| { | |
| "type": "selector", | |
| "combinator": ".some-class ", | |
| "block": { | |
| "type": "block", | |
| "statements": [ | |
| { | |
| "type": "property", | |
| "name": "text-decoration", | |
| "value": { | |
| "type": "value", | |
| "value": "underline" | |
| } | |
| } | |
| ] | |
| } | |
| }, | |
| { | |
| "type": "comment", | |
| "value": "/*\n * #pathological.comment:foo */" | |
| }, | |
| { | |
| "type": "selector", | |
| "combinator": "div.interesting > .combinator:not(:nth-of-type(0)) ", | |
| "block": { | |
| "type": "block", | |
| "statements": [ | |
| { | |
| "type": "property", | |
| "name": "position", | |
| "value": { | |
| "type": "value", | |
| "value": "relative" | |
| } | |
| } | |
| ] | |
| } | |
| }, | |
| { | |
| "type": "selector", | |
| "combinator": "#AnID ", | |
| "block": { | |
| "type": "block", | |
| "statements": [ | |
| { | |
| "type": "comment", | |
| "value": "/* Nice comment! */" | |
| }, | |
| { | |
| "type": "property", | |
| "name": "display", | |
| "value": { | |
| "type": "value", | |
| "value": "block" | |
| } | |
| } | |
| ] | |
| } | |
| }, | |
| { | |
| "type": "at-rule", | |
| "identifier": "@media", | |
| "value": " screen (min-width > 1024px) ", | |
| "block": { | |
| "type": "block", | |
| "statements": [ | |
| { | |
| "type": "selector", | |
| "combinator": "p ", | |
| "block": { | |
| "type": "block", | |
| "statements": [ | |
| { | |
| "type": "property", | |
| "name": "color", | |
| "value": { | |
| "type": "value", | |
| "value": "blue" | |
| } | |
| } | |
| ] | |
| } | |
| } | |
| ] | |
| } | |
| } | |
| ] |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| /* An comment. */ | |
| @import url('foo-external.css'); | |
| /* Another comment. */ | |
| @import url('/baz/vim/other external.css'); | |
| /** | |
| * Fun comments. | |
| */ | |
| :root { | |
| --some-var: red; | |
| --some-mixin: { | |
| color: red; | |
| }; | |
| --another-mixin: { | |
| /* Blue! */ | |
| color: blue; | |
| }; | |
| } | |
| p { | |
| color: red; | |
| } | |
| p.mixed-in { | |
| @apply(--some-mixin); | |
| } | |
| ; /* Random semicolon. */ | |
| .some-class { | |
| text-decoration: underline; | |
| } | |
| /* | |
| * #pathological.comment:foo */ | |
| div.interesting > .combinator:not(:nth-of-type(0)) { | |
| position: relative; | |
| } | |
| #AnID { | |
| /* Nice comment! */ | |
| display: block; | |
| } | |
| @media screen (min-width > 1024px) { | |
| p { | |
| color: blue; | |
| } | |
| } |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| (function(global) { | |
| // NOTE(cdata): Generally, tokens are structured as three-valued tuple, | |
| // where the first value is the token value, the second value is the token | |
| // type and the third value is the cursor index after consuming the raw | |
| // source. | |
| /** Tokenizer **/ | |
| var matcher = { | |
| whitespace: /\s/, | |
| commentOpen: /^\/\*/, | |
| commentClose: /\*\/$/, | |
| boundary: /[\(\)\{\}\[\]'"@;:\s\.+>~\^]|\/\*/, | |
| blockBoundary: /[;\{\}]/, | |
| propertyBoundary: /^[;:]/, | |
| quotation: /['"]/ | |
| }; | |
| var tokenType = { | |
| none: 0, | |
| whitespace: 1, | |
| boundary: 2, | |
| string: 3, | |
| word: 4 | |
| }; | |
| function Whitespace(source, offset) { | |
| var start = offset; | |
| while (matcher.whitespace.test(source[offset])) { | |
| ++offset; | |
| } | |
| return [ | |
| source.substr(start, offset - start), | |
| tokenType.whitespace, | |
| offset | |
| ]; | |
| } | |
| function Boundary(source, offset) { | |
| var token; | |
| if (matcher.boundary.test(source[offset])) { | |
| return [ | |
| source[offset], | |
| tokenType.boundary, | |
| offset + 1 | |
| ]; | |
| } | |
| return [ | |
| null, | |
| tokenType.none, | |
| offset | |
| ]; | |
| } | |
| function StringValue(source, offset) { | |
| var quotation = source[offset]; | |
| var escaped = false; | |
| var string = quotation; | |
| var character; | |
| if (!matcher.quotation.test(quotation)) { | |
| throw new Error('Expected quotation mark, but got ' + quotation + ' at ' + offset); | |
| } | |
| while (character = source[++offset]) { | |
| string += character; | |
| if (escaped) { | |
| escaped = false; | |
| continue; | |
| } | |
| if (character === quotation) { | |
| return [ | |
| string, | |
| tokenType.string, | |
| offset + 1 | |
| ]; | |
| } | |
| if (character === '\\') { | |
| escaped = true; | |
| } | |
| } | |
| return [ | |
| null, | |
| tokenType.none, | |
| offset | |
| ]; | |
| } | |
| function Word(source, offset) { | |
| var word = null; | |
| var character; | |
| while ((character = source[offset]) && !matcher.boundary.test(character)) { | |
| word = word || ''; | |
| word += character; | |
| ++offset; | |
| } | |
| return [ | |
| word, | |
| word == null ? tokenType.none : tokenType.word, | |
| offset | |
| ]; | |
| } | |
| function Token(source, offset) { | |
| var token = Whitespace(source, offset); | |
| if (token[0]) { | |
| return token; | |
| } | |
| token = Boundary(source, token[2]); | |
| switch(token[0]) { | |
| case null: | |
| break; | |
| case '"': | |
| case '\'': | |
| return StringValue(source, token[2] - 1); | |
| default: | |
| return token; | |
| } | |
| return Word(source, token[2]); | |
| } | |
| /** Parser **/ | |
| function Selector(tokens, index) { | |
| var token = tokens[index]; | |
| var combinator = ''; | |
| var block = null; | |
| while (!matcher.blockBoundary.test(token[0])) { | |
| combinator += token[0]; | |
| token = tokens[++index]; | |
| } | |
| if (token[0] === '{') { | |
| token = Block(tokens, index); | |
| block = token[0]; | |
| index = token[1]; | |
| } | |
| token = Delimiter(tokens, index); | |
| index = token[1]; | |
| return [{ | |
| type: 'selector', | |
| combinator: combinator, | |
| block: block | |
| }, index]; | |
| } | |
| function Value(tokens, index) { | |
| var token = tokens[index]; | |
| var value = ''; | |
| while (matcher.whitespace.test(token[0])) { | |
| token = tokens[++index]; | |
| } | |
| if (token[0] === '{') { | |
| return Block(tokens, index); | |
| } else { | |
| while (!matcher.blockBoundary.test(token[0])) { | |
| value += token[0]; | |
| token = tokens[++index]; | |
| } | |
| } | |
| token = Delimiter(tokens, index); | |
| index = token[1]; | |
| return [{ | |
| type: 'value', | |
| value: value | |
| }, index]; | |
| } | |
| function Property(tokens, index) { | |
| var token = tokens[index]; | |
| var name = ''; | |
| var value = null; | |
| while (!matcher.propertyBoundary.test(token[0])) { | |
| name += token[0]; | |
| token = tokens[++index]; | |
| } | |
| if (token[0] === ':') { | |
| token = Value(tokens, index + 1); | |
| value = token[0]; | |
| index = token[1]; | |
| } | |
| return [{ | |
| type: 'property', | |
| name: name, | |
| value: value | |
| }, index]; | |
| } | |
| function Rule(tokens, index) { | |
| var token = tokens[index]; | |
| var start = index; | |
| while (!matcher.blockBoundary.test(token[0])) { | |
| token = tokens[++index]; | |
| if (matcher.propertyBoundary.test(token[0])) { | |
| if (tokens[index + 1][1] === tokenType.word) { | |
| continue; | |
| } | |
| return Property(tokens, start); | |
| } | |
| if (matcher.blockBoundary.test(token[0])) { | |
| return Selector(tokens, start); | |
| } | |
| } | |
| return ParseError(tokens, index, 'Failed to parse Rule starting with ' + tokens[start][0]); | |
| } | |
| function Delimiter(tokens, index) { | |
| var token = tokens[index]; | |
| while (token && matcher.whitespace.test(token[0])) { | |
| token = tokens[++index]; | |
| } | |
| if (token && token[0] === ';') { | |
| token = tokens[++index]; | |
| } | |
| return [{ | |
| type: 'delimiter', | |
| value: token ? token[0] : 'EOF' | |
| }, index]; | |
| } | |
| function Block(tokens, index) { | |
| var token = tokens[++index]; | |
| var statements = []; | |
| var statement; | |
| while (token && token[0] !== '}') { | |
| statement = Statement(tokens, index); | |
| if (statement[0] == null) { | |
| break; | |
| } | |
| statements.push(statement[0]); | |
| token = Delimiter(tokens, statement[1]); | |
| index = token[1]; | |
| token = tokens[index]; | |
| } | |
| token = Delimiter(tokens, index + 1); | |
| index = token[1]; | |
| return [{ | |
| type: 'block', | |
| statements: statements | |
| }, index]; | |
| } | |
| function AtRule(tokens, index) { | |
| var token = tokens[index]; | |
| var identifier = token[0]; | |
| var value = ''; | |
| var block; | |
| token = tokens[++index]; | |
| if (token[1] !== tokenType.word) { | |
| return ParseError(tokens, index, 'At Rule missing identifier, got "' + token[0] + '" instead'); | |
| } | |
| identifier += token[0]; | |
| while ((token = tokens[++index]) && !matcher.blockBoundary.test(token[0])) { | |
| value += token[0]; | |
| } | |
| if (token[0] === '{') { | |
| token = Block(tokens, index); | |
| block = token[0]; | |
| index = token[1]; | |
| } | |
| token = Delimiter(tokens, index); | |
| index = token[1]; | |
| return [{ | |
| type: 'at-rule', | |
| identifier: identifier, | |
| value: value, | |
| block: block | |
| }, index]; | |
| } | |
| function Comment(tokens, index) { | |
| var token = tokens[index]; | |
| var comment = token[0]; | |
| while (!matcher.commentClose.test(token[0]) && (token = tokens[++index])) { | |
| comment += token[0]; | |
| } | |
| return [{ | |
| type: 'comment', | |
| value: comment | |
| }, index + 1]; | |
| } | |
| function ParseError(tokens, index, error) { | |
| return [{ | |
| type: 'parse-error', | |
| offset: tokens[index][2], | |
| error: error || '' | |
| }, index]; | |
| } | |
| function Statement(tokens, index) { | |
| var token = tokens[index]; | |
| while (token[1] === tokenType.whitespace) { | |
| token = tokens[++index]; | |
| } | |
| if (matcher.commentOpen.test(token[0])) { | |
| return Comment(tokens, index); | |
| } | |
| if (token[0] === '@') { | |
| return AtRule(tokens, index); | |
| } | |
| if (token[1] === tokenType.word || token[1] === tokenType.boundary) { | |
| return Rule(tokens, index); | |
| } | |
| return [null, tokens.length]; | |
| } | |
| function Stylesheet(tokens) { | |
| var lastIndex; | |
| var index = 0; | |
| var parsed = []; | |
| var statement; | |
| while (index < tokens.length) { | |
| statement = Statement(tokens, index); | |
| if (statement[0] != null) { | |
| parsed.push(statement[0]); | |
| } | |
| lastIndex = index; | |
| index = statement[1]; | |
| if (index === lastIndex) { | |
| console.error('Infinite loop detected, breaking.'); | |
| break; | |
| } | |
| } | |
| return parsed; | |
| } | |
| /** Public Interface **/ | |
| function parse(source) { | |
| return Stylesheet(tokenize(source)); | |
| } | |
| function tokenize(source) { | |
| var tokens = []; | |
| var offset = 0; | |
| var token; | |
| while ((token = Token(source, offset)) && token[0] != null) { | |
| tokens.push(token); | |
| offset = token[2]; | |
| } | |
| return tokens; | |
| } | |
| if (typeof module !== undefined) { | |
| module.exports = { | |
| parse: parse, | |
| tokenize: tokenize, | |
| tokenType: tokenType | |
| }; | |
| } else { | |
| global.parse = parse; | |
| global.tokenize = tokenize; | |
| global.tokenType = tokenType; | |
| } | |
| })(this); |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment