Skip to content

Instantly share code, notes, and snippets.

/uniparse.coffee Secret

Created March 13, 2016 03:27
Show Gist options
  • Save anonymous/ebb96864ae3e319f49c1 to your computer and use it in GitHub Desktop.
Save anonymous/ebb96864ae3e319f49c1 to your computer and use it in GitHub Desktop.
_ = require 'lodash'
{Tokenizer} = require './tokenize'
last = _.last
###
Data structures:
{kind:, vals:} a node
{kind:, val:} a leaf
###
# ------------------------
# --- Utility functions --
# There's two varieties of data structures: 'vals' and 'val'
# 'vals' is a branch node that contains many direct children
# 'val' is a leaf node that has no direct children
makeVals = (kind, vals) -> { kind: kind, vals: vals }
makeVal = (kind, val) -> { kind: kind, val: val }
makeRoot = (vals=[]) -> { kind: '_root', vals: vals }
isPunct = (tok) -> tok.kind == 'punct'
isIdent = (tok) -> tok.kind == 'ident'
isString = (tok) -> tok.kind == 'string'
isNumber = (tok) -> tok.kind == 'number'
isEOL = (tok) -> tok.kind == 'eol'
printJSON = (j) ->
console.log(JSON.stringify(j, null, 2))
printTree = (j, indent=0) ->
indentTxt = _.repeat(' ', indent * 2)
if _.isArray(j) and j.length
console.log(indentTxt+"Array[")
for val in j
printTree(val, indent + 1)
console.log(indentTxt+"]")
else if _.isObject(j) and j.vals?
console.log(indentTxt+"#{j.kind}[")
for val in j.vals
printTree(val, indent + 1)
console.log(indentTxt+"]")
else
console.log(indentTxt+"#{j.kind}(#{JSON.stringify(j.val)})")
Parsing = {}
# -------------------------
# --- Parsing functions ---
# Splits a token stream into a tree of brackets
Parsing._brackets = (toks) ->
stack = [makeRoot()]
output = []
bracketKindsMap = {
'(': 'group', ')': 'group'
'[': 'square', ']': 'square'
'{': 'block', '}': 'block'
}
for tok in toks
if isPunct(tok) and tok.val in ['(', '[', '{']
bracketKind = bracketKindsMap[tok.val]
branch = { 'kind': bracketKind, 'vals': [] }
last(stack).vals.push(branch)
stack.push(branch)
else if isPunct(tok) and tok.val in [')', ']', '}'] and last(stack).kind == bracketKindsMap[tok.val]
stack.pop()
else
last(stack).vals.push(tok)
return stack[0].vals
# Splits a token stream by a given token
Parsing._splitBy = (toks, outerKind, innerKind, splitter) ->
group = makeVals(innerKind, [])
groups = []
hasFound = false
for x in toks
if _.isEqual(x, splitter)
hasFound = true
groups.push(group) if group.vals.length > 0
group = makeVals(innerKind, [])
else
group.vals.push(x)
groups.push(group) if group.vals.length > 0
if hasFound
if outerKind?
return [makeVals(outerKind, groups)]
else
return groups
else
return toks
Parsing._splitLines = (toks) ->
group = makeVals('line', [])
groups = []
foundNewline = false
for x in toks
if x.kind == 'newline'
foundNewline = true
hasFound = true
groups.push(group) if group.vals.length > 0
group = makeVals('line', [])
else
group.vals.push(x)
groups.push(group) if group.vals.length > 0
if foundNewline
return groups
# If there are any non-lines then wrap in a line
for tok in toks
if _.isObject(tok) and tok.kind != 'line'
return [makeVals('line', toks)]
# Otherwise return the lines
return toks
# -------------------------
# --- Walking functions ---
# Walk through the nodes of a tree.
# before(x, isLeaf) walks outside-inwards
# after(x, isLeaf walks inside-outwards
# ... think the order of <begin> tags in XML, vs the order of </end> tags
Parsing._walk = (tree, before, after) ->
before(tree, false) if before
for x in tree.vals
if x.vals?
before(x, false) if before
Parsing._walk(x, before, after)
after(x, false) if after
else
before(x, true) if before
after(x, true) if after
after(tree, false) if after
Parsing._mapTreeInPlace = (tree, mapping) ->
Parsing._walk tree, null, (x, isLeaf) ->
if not isLeaf
x.vals = mapping(x.vals, x)
# --------------------
# --- Tokenization ---
###
class Tokenizer
constructor: () ->
@rules = []
addRule: (rule) ->
@rules.push(rule)
run: (txt) ->
offset = 0
while true
for rule in rules
rule.match()
###
Parsing.tokenize = (txt) ->
return new Tokenizer(txt).tokenize()
# --------------------
# --- Test it out! ---
txt = 'foo = bar = baz = 1, 2; 3, 4'
toks = Parsing.tokenize(txt)
toks = Parsing._brackets(toks)
root = makeVals('block', toks)
splitByAs = (root, outerKind, innerKind, splitter) ->
Parsing._mapTreeInPlace root, (toks) ->
Parsing._splitBy(toks, outerKind, innerKind, splitter)
return
# (lowest precedence)
# - newline stage: parse out newlines from inside {blocks}
Parsing._mapTreeInPlace root, (toks, element) ->
if element.kind == 'block'
# If we are in a {...} block, then each val must be a line
lines = Parsing._splitLines(toks)
else
# Remove extraneous newlines from the output
return _.filter(toks, (x) -> (x.kind != 'newline'))
# - semicolons and commas
Parsing._mapTreeInPlace root, (toks) -> Parsing._splitBy(toks, 'bindings', '...', makeVal('punct', '='))
Parsing._mapTreeInPlace root, (toks) -> Parsing._splitBy(toks, 'semicolons', '...', makeVal('punct', ';'))
Parsing._mapTreeInPlace root, (toks) -> Parsing._splitBy(toks, 'commas', '...', makeVal('punct', ','))
printTree(root)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment