-
-
Save anonymous/ebb96864ae3e319f49c1 to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
_ = require 'lodash' | |
{Tokenizer} = require './tokenize' | |
last = _.last | |
### | |
Data structures: | |
{kind:, vals:} a node | |
{kind:, val:} a leaf | |
### | |
# ------------------------ | |
# --- Utility functions -- | |
# There's two varieties of data structures: 'vals' and 'val' | |
# 'vals' is a branch node that contains many direct children | |
# 'val' is a leaf node that has no direct children | |
makeVals = (kind, vals) -> { kind: kind, vals: vals } | |
makeVal = (kind, val) -> { kind: kind, val: val } | |
makeRoot = (vals=[]) -> { kind: '_root', vals: vals } | |
isPunct = (tok) -> tok.kind == 'punct' | |
isIdent = (tok) -> tok.kind == 'ident' | |
isString = (tok) -> tok.kind == 'string' | |
isNumber = (tok) -> tok.kind == 'number' | |
isEOL = (tok) -> tok.kind == 'eol' | |
printJSON = (j) -> | |
console.log(JSON.stringify(j, null, 2)) | |
printTree = (j, indent=0) -> | |
indentTxt = _.repeat(' ', indent * 2) | |
if _.isArray(j) and j.length | |
console.log(indentTxt+"Array[") | |
for val in j | |
printTree(val, indent + 1) | |
console.log(indentTxt+"]") | |
else if _.isObject(j) and j.vals? | |
console.log(indentTxt+"#{j.kind}[") | |
for val in j.vals | |
printTree(val, indent + 1) | |
console.log(indentTxt+"]") | |
else | |
console.log(indentTxt+"#{j.kind}(#{JSON.stringify(j.val)})") | |
Parsing = {} | |
# ------------------------- | |
# --- Parsing functions --- | |
# Splits a token stream into a tree of brackets | |
Parsing._brackets = (toks) -> | |
stack = [makeRoot()] | |
output = [] | |
bracketKindsMap = { | |
'(': 'group', ')': 'group' | |
'[': 'square', ']': 'square' | |
'{': 'block', '}': 'block' | |
} | |
for tok in toks | |
if isPunct(tok) and tok.val in ['(', '[', '{'] | |
bracketKind = bracketKindsMap[tok.val] | |
branch = { 'kind': bracketKind, 'vals': [] } | |
last(stack).vals.push(branch) | |
stack.push(branch) | |
else if isPunct(tok) and tok.val in [')', ']', '}'] and last(stack).kind == bracketKindsMap[tok.val] | |
stack.pop() | |
else | |
last(stack).vals.push(tok) | |
return stack[0].vals | |
# Splits a token stream by a given token | |
Parsing._splitBy = (toks, outerKind, innerKind, splitter) -> | |
group = makeVals(innerKind, []) | |
groups = [] | |
hasFound = false | |
for x in toks | |
if _.isEqual(x, splitter) | |
hasFound = true | |
groups.push(group) if group.vals.length > 0 | |
group = makeVals(innerKind, []) | |
else | |
group.vals.push(x) | |
groups.push(group) if group.vals.length > 0 | |
if hasFound | |
if outerKind? | |
return [makeVals(outerKind, groups)] | |
else | |
return groups | |
else | |
return toks | |
Parsing._splitLines = (toks) -> | |
group = makeVals('line', []) | |
groups = [] | |
foundNewline = false | |
for x in toks | |
if x.kind == 'newline' | |
foundNewline = true | |
hasFound = true | |
groups.push(group) if group.vals.length > 0 | |
group = makeVals('line', []) | |
else | |
group.vals.push(x) | |
groups.push(group) if group.vals.length > 0 | |
if foundNewline | |
return groups | |
# If there are any non-lines then wrap in a line | |
for tok in toks | |
if _.isObject(tok) and tok.kind != 'line' | |
return [makeVals('line', toks)] | |
# Otherwise return the lines | |
return toks | |
# ------------------------- | |
# --- Walking functions --- | |
# Walk through the nodes of a tree. | |
# before(x, isLeaf) walks outside-inwards | |
# after(x, isLeaf walks inside-outwards | |
# ... think the order of <begin> tags in XML, vs the order of </end> tags | |
Parsing._walk = (tree, before, after) -> | |
before(tree, false) if before | |
for x in tree.vals | |
if x.vals? | |
before(x, false) if before | |
Parsing._walk(x, before, after) | |
after(x, false) if after | |
else | |
before(x, true) if before | |
after(x, true) if after | |
after(tree, false) if after | |
Parsing._mapTreeInPlace = (tree, mapping) -> | |
Parsing._walk tree, null, (x, isLeaf) -> | |
if not isLeaf | |
x.vals = mapping(x.vals, x) | |
# -------------------- | |
# --- Tokenization --- | |
### | |
class Tokenizer | |
constructor: () -> | |
@rules = [] | |
addRule: (rule) -> | |
@rules.push(rule) | |
run: (txt) -> | |
offset = 0 | |
while true | |
for rule in rules | |
rule.match() | |
### | |
Parsing.tokenize = (txt) -> | |
return new Tokenizer(txt).tokenize() | |
# -------------------- | |
# --- Test it out! --- | |
txt = 'foo = bar = baz = 1, 2; 3, 4' | |
toks = Parsing.tokenize(txt) | |
toks = Parsing._brackets(toks) | |
root = makeVals('block', toks) | |
splitByAs = (root, outerKind, innerKind, splitter) -> | |
Parsing._mapTreeInPlace root, (toks) -> | |
Parsing._splitBy(toks, outerKind, innerKind, splitter) | |
return | |
# (lowest precedence) | |
# - newline stage: parse out newlines from inside {blocks} | |
Parsing._mapTreeInPlace root, (toks, element) -> | |
if element.kind == 'block' | |
# If we are in a {...} block, then each val must be a line | |
lines = Parsing._splitLines(toks) | |
else | |
# Remove extraneous newlines from the output | |
return _.filter(toks, (x) -> (x.kind != 'newline')) | |
# - semicolons and commas | |
Parsing._mapTreeInPlace root, (toks) -> Parsing._splitBy(toks, 'bindings', '...', makeVal('punct', '=')) | |
Parsing._mapTreeInPlace root, (toks) -> Parsing._splitBy(toks, 'semicolons', '...', makeVal('punct', ';')) | |
Parsing._mapTreeInPlace root, (toks) -> Parsing._splitBy(toks, 'commas', '...', makeVal('punct', ',')) | |
printTree(root) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment