/uniparse.coffee Secret

## uniparse.coffee
_ = require 'lodash'
{Tokenizer} = require './tokenize'
last = _.last

###

Data structures:
  {kind:, vals:}    a node
  {kind:, val:}     a leaf

###

# ------------------------
# --- Utility functions --

# There's two varieties of data structures: 'vals' and 'val'
#   'vals' is a branch node that contains many direct children
#   'val' is a leaf node that has no direct children

makeVals = (kind, vals) -> { kind: kind, vals: vals }
makeVal = (kind, val) -> { kind: kind, val: val }

makeRoot = (vals=[]) -> { kind: '_root', vals: vals }

isPunct = (tok) -> tok.kind == 'punct'
isIdent = (tok) -> tok.kind == 'ident'
isString = (tok) -> tok.kind == 'string'
isNumber = (tok) -> tok.kind == 'number'
isEOL = (tok) -> tok.kind == 'eol'

printJSON = (j) ->
  console.log(JSON.stringify(j, null, 2))

printTree = (j, indent=0) ->
  indentTxt = _.repeat(' ', indent * 2)
  if _.isArray(j) and j.length
    console.log(indentTxt+"Array[")
    for val in j
      printTree(val, indent + 1)
    console.log(indentTxt+"]")
  else if _.isObject(j) and j.vals?
    console.log(indentTxt+"#{j.kind}[")
    for val in j.vals
      printTree(val, indent + 1)
    console.log(indentTxt+"]")
  else
    console.log(indentTxt+"#{j.kind}(#{JSON.stringify(j.val)})")


Parsing = {}

# -------------------------
# --- Parsing functions ---

# Splits a token stream into a tree of brackets
Parsing._brackets = (toks) ->
  stack = [makeRoot()]
  output = []

  bracketKindsMap = {
    '(': 'group',  ')': 'group'
    '[': 'square', ']': 'square'
    '{': 'block',  '}': 'block'
  }

  for tok in toks
    if isPunct(tok) and tok.val in ['(', '[', '{']
      bracketKind = bracketKindsMap[tok.val]
      branch = { 'kind': bracketKind, 'vals': [] }
      last(stack).vals.push(branch)
      stack.push(branch)
    else if isPunct(tok) and tok.val in [')', ']', '}'] and last(stack).kind == bracketKindsMap[tok.val]
      stack.pop()
    else
      last(stack).vals.push(tok)
  return stack[0].vals


# Splits a token stream by a given token
Parsing._splitBy = (toks, outerKind, innerKind, splitter) ->
  group = makeVals(innerKind, [])
  groups = []
  hasFound = false
  for x in toks
    if _.isEqual(x, splitter)
      hasFound = true
      groups.push(group) if group.vals.length > 0
      group = makeVals(innerKind, [])
    else
      group.vals.push(x)
  groups.push(group) if group.vals.length > 0

  if hasFound
    if outerKind?
      return [makeVals(outerKind, groups)]
    else
      return groups
  else
    return toks

Parsing._splitLines = (toks) ->
  group = makeVals('line', [])
  groups = []
  foundNewline = false
  for x in toks
    if x.kind == 'newline'
      foundNewline = true
      hasFound = true
      groups.push(group) if group.vals.length > 0
      group = makeVals('line', [])
    else
      group.vals.push(x)
  groups.push(group) if group.vals.length > 0

  if foundNewline
    return groups

  # If there are any non-lines then wrap in a line
  for tok in toks
    if _.isObject(tok) and tok.kind != 'line'
      return [makeVals('line', toks)]

  # Otherwise return the lines
  return toks


# -------------------------
# --- Walking functions ---

# Walk through the nodes of a tree.
#   before(x, isLeaf) walks outside-inwards
#   after(x, isLeaf walks inside-outwards
#   ... think the order of <begin> tags in XML, vs the order of </end> tags
Parsing._walk = (tree, before, after) ->
  before(tree, false) if before
  for x in tree.vals
    if x.vals?
      before(x, false) if before
      Parsing._walk(x, before, after)
      after(x, false) if after
    else
      before(x, true) if before
      after(x, true) if after
  after(tree, false) if after

Parsing._mapTreeInPlace = (tree, mapping) ->
  Parsing._walk tree, null, (x, isLeaf) ->
    if not isLeaf
      x.vals = mapping(x.vals, x)


# --------------------
# --- Tokenization ---
###
class Tokenizer
  constructor: () ->
    @rules = []
  addRule: (rule) ->
    @rules.push(rule)
  run: (txt) ->
    offset = 0
    while true

      for rule in rules
        rule.match()

###

Parsing.tokenize = (txt) ->
  return new Tokenizer(txt).tokenize()


# --------------------
# --- Test it out! ---

txt = 'foo = bar = baz = 1, 2; 3, 4'

toks = Parsing.tokenize(txt)

toks = Parsing._brackets(toks)
root = makeVals('block', toks)

splitByAs = (root, outerKind, innerKind, splitter) ->
  Parsing._mapTreeInPlace root, (toks) ->
    Parsing._splitBy(toks, outerKind, innerKind, splitter)
  return

# (lowest precedence)

# - newline stage: parse out newlines from inside {blocks}
Parsing._mapTreeInPlace root, (toks, element) ->
  if element.kind == 'block'
    # If we are in a {...} block, then each val must be a line
    lines = Parsing._splitLines(toks)
  else
    # Remove extraneous newlines from the output
    return _.filter(toks, (x) -> (x.kind != 'newline'))

# - semicolons and commas
Parsing._mapTreeInPlace root, (toks) -> Parsing._splitBy(toks, 'bindings', '...', makeVal('punct', '='))
Parsing._mapTreeInPlace root, (toks) -> Parsing._splitBy(toks, 'semicolons', '...', makeVal('punct', ';'))
Parsing._mapTreeInPlace root, (toks) -> Parsing._splitBy(toks, 'commas', '...', makeVal('punct', ','))

printTree(root)
	_ = require 'lodash'
	{Tokenizer} = require './tokenize'
	last = _.last

	###

	Data structures:
	{kind:, vals:} a node
	{kind:, val:} a leaf

	###

	# ------------------------
	# --- Utility functions --

	# There's two varieties of data structures: 'vals' and 'val'
	# 'vals' is a branch node that contains many direct children
	# 'val' is a leaf node that has no direct children

	makeVals = (kind, vals) -> { kind: kind, vals: vals }
	makeVal = (kind, val) -> { kind: kind, val: val }

	makeRoot = (vals=[]) -> { kind: '_root', vals: vals }

	isPunct = (tok) -> tok.kind == 'punct'
	isIdent = (tok) -> tok.kind == 'ident'
	isString = (tok) -> tok.kind == 'string'
	isNumber = (tok) -> tok.kind == 'number'
	isEOL = (tok) -> tok.kind == 'eol'

	printJSON = (j) ->
	console.log(JSON.stringify(j, null, 2))

	printTree = (j, indent=0) ->
	indentTxt = _.repeat(' ', indent * 2)
	if _.isArray(j) and j.length
	console.log(indentTxt+"Array[")
	for val in j
	printTree(val, indent + 1)
	console.log(indentTxt+"]")
	else if _.isObject(j) and j.vals?
	console.log(indentTxt+"#{j.kind}[")
	for val in j.vals
	printTree(val, indent + 1)
	console.log(indentTxt+"]")
	else
	console.log(indentTxt+"#{j.kind}(#{JSON.stringify(j.val)})")



	Parsing = {}

	# -------------------------
	# --- Parsing functions ---

	# Splits a token stream into a tree of brackets
	Parsing._brackets = (toks) ->
	stack = [makeRoot()]
	output = []

	bracketKindsMap = {
	'(': 'group', ')': 'group'
	'[': 'square', ']': 'square'
	'{': 'block', '}': 'block'
	}

	for tok in toks
	if isPunct(tok) and tok.val in ['(', '[', '{']
	bracketKind = bracketKindsMap[tok.val]
	branch = { 'kind': bracketKind, 'vals': [] }
	last(stack).vals.push(branch)
	stack.push(branch)
	else if isPunct(tok) and tok.val in [')', ']', '}'] and last(stack).kind == bracketKindsMap[tok.val]
	stack.pop()
	else
	last(stack).vals.push(tok)
	return stack[0].vals


	# Splits a token stream by a given token
	Parsing._splitBy = (toks, outerKind, innerKind, splitter) ->
	group = makeVals(innerKind, [])
	groups = []
	hasFound = false
	for x in toks
	if _.isEqual(x, splitter)
	hasFound = true
	groups.push(group) if group.vals.length > 0
	group = makeVals(innerKind, [])
	else
	group.vals.push(x)
	groups.push(group) if group.vals.length > 0

	if hasFound
	if outerKind?
	return [makeVals(outerKind, groups)]
	else
	return groups
	else
	return toks

	Parsing._splitLines = (toks) ->
	group = makeVals('line', [])
	groups = []
	foundNewline = false
	for x in toks
	if x.kind == 'newline'
	foundNewline = true
	hasFound = true
	groups.push(group) if group.vals.length > 0
	group = makeVals('line', [])
	else
	group.vals.push(x)
	groups.push(group) if group.vals.length > 0

	if foundNewline
	return groups

	# If there are any non-lines then wrap in a line
	for tok in toks
	if _.isObject(tok) and tok.kind != 'line'
	return [makeVals('line', toks)]

	# Otherwise return the lines
	return toks



	# -------------------------
	# --- Walking functions ---

	# Walk through the nodes of a tree.
	# before(x, isLeaf) walks outside-inwards
	# after(x, isLeaf walks inside-outwards
	# ... think the order of <begin> tags in XML, vs the order of </end> tags
	Parsing._walk = (tree, before, after) ->
	before(tree, false) if before
	for x in tree.vals
	if x.vals?
	before(x, false) if before
	Parsing._walk(x, before, after)
	after(x, false) if after
	else
	before(x, true) if before
	after(x, true) if after
	after(tree, false) if after

	Parsing._mapTreeInPlace = (tree, mapping) ->
	Parsing._walk tree, null, (x, isLeaf) ->
	if not isLeaf
	x.vals = mapping(x.vals, x)


	# --------------------
	# --- Tokenization ---
	###
	class Tokenizer
	constructor: () ->
	@rules = []
	addRule: (rule) ->
	@rules.push(rule)
	run: (txt) ->
	offset = 0
	while true

	for rule in rules
	rule.match()

	###

	Parsing.tokenize = (txt) ->
	return new Tokenizer(txt).tokenize()


	# --------------------
	# --- Test it out! ---

	txt = 'foo = bar = baz = 1, 2; 3, 4'

	toks = Parsing.tokenize(txt)

	toks = Parsing._brackets(toks)
	root = makeVals('block', toks)

	splitByAs = (root, outerKind, innerKind, splitter) ->
	Parsing._mapTreeInPlace root, (toks) ->
	Parsing._splitBy(toks, outerKind, innerKind, splitter)
	return

	# (lowest precedence)

	# - newline stage: parse out newlines from inside {blocks}
	Parsing._mapTreeInPlace root, (toks, element) ->
	if element.kind == 'block'
	# If we are in a {...} block, then each val must be a line
	lines = Parsing._splitLines(toks)
	else
	# Remove extraneous newlines from the output
	return _.filter(toks, (x) -> (x.kind != 'newline'))

	# - semicolons and commas
	Parsing._mapTreeInPlace root, (toks) -> Parsing._splitBy(toks, 'bindings', '...', makeVal('punct', '='))
	Parsing._mapTreeInPlace root, (toks) -> Parsing._splitBy(toks, 'semicolons', '...', makeVal('punct', ';'))
	Parsing._mapTreeInPlace root, (toks) -> Parsing._splitBy(toks, 'commas', '...', makeVal('punct', ','))

	printTree(root)