ZoomTen/parlexgen_try.nim

## parlexgen_try.nim
# based on https://github.com/choltreppe/parlexgen

import std/[strutils, options, strformat]
import parlexgen, macros

type
  # kinds of expressions
  ExpKind = enum
    expNum, # a numeric constant
  # operators
    expMultiply,
    #expDivision,
    expAdd,
    expSubtract

  # a node
  Expression = ref object
    case kind: ExpKind
    of expNum:
      val: int
    else:
      left: Expression
      right: Expression

  # tokens that shall be parsed
  TokenKind = enum
    T_NUMBER,
    T_LEFT_PAREN, T_RIGHT_PAREN,
    T_ADD, T_SUBTRACT,
    T_MULTIPLY, #T_DIVISION,
    T_SEPARATOR

  # token definition
  Token = object
    line, col: int
    case kind: TokenKind
    of T_NUMBER:
      val: int
    else: discard

# generic error handler
proc handleParseError(nameOfRule: string, token: Option[Token]) =
  var identifier: string;
  if token.isSome:
    identifier = fmt"{token.get().kind} ({token.get().line}:{token.get().col})"
  else:
    identifier = "EOF"
  echo fmt"{nameOfRule}: Unexpected {identifier}!"
  quit(1)

# creating the string representation of the resulting AST
func `$`(e: Expression): string =
  case e.kind
  of expNum: $e.val
  else: "(" & $e.left & " " & $e.kind & " " & $e.right & ")"

# actually performing operations on it!
func sumUp(e: Expression): int =
  case e.kind
  of expNum: e.val
  of expAdd: sumUp(e.left) + sumUp(e.right)
  of expSubtract: sumUp(e.left) - sumUp(e.right)
  of expMultiply: sumUp(e.left) * sumUp(e.right)
  #of expDivision: sumUp(e.left) / sumUp(e.right)

makeLexer lex[Token]: # we tell which regexen are qualified for token
  r"[0-9]+": Token(kind: T_NUMBER, val: parseInt(match),line: line, col: col)
  r"\*": Token(kind: T_MULTIPLY, line: line, col: col)
  #r"/": Token(kind: T_DIVISION, line: line, col: col)
  r"\+": Token(kind: T_ADD, line: line, col: col)
  r"-": Token(kind: T_SUBTRACT, line: line, col: col)
  r"\(": Token(kind: T_LEFT_PAREN, line: line, col: col)
  r"\)": Token(kind: T_RIGHT_PAREN, line: line, col: col)
# including the whitespace for this one is necessary to account
# for indentation
  r"\r?\n\s*": Token(kind: T_SEPARATOR, line: line, col: col)
  r"\s+": continue # discard this token

makeParser parse[Token]:
  MultipleExpr[seq[Expression]]:
    try:
      (MultipleExpr, T_SEPARATOR, Expr): s[0] & s[2]
    except:
      handleParseError "expr-separator-expr", token

    # best to define the level below if referencing it
    # in another match
    try:
      Expr: @[s[0]]
    except:
      handleParseError "expr-in", token

  Expr[Expression]:
    try:
      (Expr, T_ADD, Factor):
        Expression(kind: expAdd,
          left: s[0],
          right: s[2]
        )
    except:
      handleParseError "expr", token

    try:
      (Expr, T_SUBTRACT, Factor):
        Expression(kind: expSubtract,
          left: s[0],
          right: s[2]
        )
    except:
      handleParseError "expr - factor", token

    try:
      Factor: s[0]
    except:
      handleParseError "factor-in", token

  Factor[Expression]:
    try:
      (Factor, T_MULTIPLY, Term):
          Expression(kind: expMultiply,
            left: s[0],
            right: s[2]
          )
    except:
      handleParseError "factor * term", token
    #[
    (Factor, T_DIVISION, Term):
      Expression(kind: expDivision,
        left: s[0],
        right: s[2]
      )
    ]#
    try:
      Term: s[0]
    except:
      handleParseError "term-in", token

  Term[Expression]:
    try:
      (T_LEFT_PAREN, Expr, T_RIGHT_PAREN):
        s[1]
    except:
      handleParseError "( expr )", token

    try:
      T_NUMBER:
        Expression(kind: expNum, val: s[0].val)
    except:
      handleParseError "number", token

let test = """
  22 * 44 + 66 + 11 + (22 + 44)
  145 * 104"""

try:
  echo "List of tokens:"
  for token in test.tokens(lex):
    echo '\t' & fmt"{token.kind} ({token.line}:{token.col})"

  echo ""

  for i in test.parse(lex):
    echo "Parsed: " & $i
    echo "Summed: " & $(i.sumUp)
    echo ""

except LexingError as e:
  echo fmt"Encountered invalid character '{test[e.pos]}'! ({e.line}:{e.col})"
	# based on https://github.com/choltreppe/parlexgen

	import std/[strutils, options, strformat]
	import parlexgen, macros

	type
	# kinds of expressions
	ExpKind = enum
	expNum, # a numeric constant
	# operators
	expMultiply,
	#expDivision,
	expAdd,
	expSubtract

	# a node
	Expression = ref object
	case kind: ExpKind
	of expNum:
	val: int
	else:
	left: Expression
	right: Expression

	# tokens that shall be parsed
	TokenKind = enum
	T_NUMBER,
	T_LEFT_PAREN, T_RIGHT_PAREN,
	T_ADD, T_SUBTRACT,
	T_MULTIPLY, #T_DIVISION,
	T_SEPARATOR

	# token definition
	Token = object
	line, col: int
	case kind: TokenKind
	of T_NUMBER:
	val: int
	else: discard

	# generic error handler
	proc handleParseError(nameOfRule: string, token: Option[Token]) =
	var identifier: string;
	if token.isSome:
	identifier = fmt"{token.get().kind} ({token.get().line}:{token.get().col})"
	else:
	identifier = "EOF"
	echo fmt"{nameOfRule}: Unexpected {identifier}!"
	quit(1)

	# creating the string representation of the resulting AST
	func `$`(e: Expression): string =
	case e.kind
	of expNum: $e.val
	else: "(" & $e.left & " " & $e.kind & " " & $e.right & ")"

	# actually performing operations on it!
	func sumUp(e: Expression): int =
	case e.kind
	of expNum: e.val
	of expAdd: sumUp(e.left) + sumUp(e.right)
	of expSubtract: sumUp(e.left) - sumUp(e.right)
	of expMultiply: sumUp(e.left) * sumUp(e.right)
	#of expDivision: sumUp(e.left) / sumUp(e.right)

	makeLexer lex[Token]: # we tell which regexen are qualified for token
	r"[0-9]+": Token(kind: T_NUMBER, val: parseInt(match),line: line, col: col)
	r"\*": Token(kind: T_MULTIPLY, line: line, col: col)
	#r"/": Token(kind: T_DIVISION, line: line, col: col)
	r"\+": Token(kind: T_ADD, line: line, col: col)
	r"-": Token(kind: T_SUBTRACT, line: line, col: col)
	r"\(": Token(kind: T_LEFT_PAREN, line: line, col: col)
	r"\)": Token(kind: T_RIGHT_PAREN, line: line, col: col)
	# including the whitespace for this one is necessary to account
	# for indentation
	r"\r?\n\s*": Token(kind: T_SEPARATOR, line: line, col: col)
	r"\s+": continue # discard this token

	makeParser parse[Token]:
	MultipleExpr[seq[Expression]]:
	try:
	(MultipleExpr, T_SEPARATOR, Expr): s[0] & s[2]
	except:
	handleParseError "expr-separator-expr", token

	# best to define the level below if referencing it
	# in another match
	try:
	Expr: @[s[0]]
	except:
	handleParseError "expr-in", token

	Expr[Expression]:
	try:
	(Expr, T_ADD, Factor):
	Expression(kind: expAdd,
	left: s[0],
	right: s[2]
	)
	except:
	handleParseError "expr", token

	try:
	(Expr, T_SUBTRACT, Factor):
	Expression(kind: expSubtract,
	left: s[0],
	right: s[2]
	)
	except:
	handleParseError "expr - factor", token

	try:
	Factor: s[0]
	except:
	handleParseError "factor-in", token

	Factor[Expression]:
	try:
	(Factor, T_MULTIPLY, Term):
	Expression(kind: expMultiply,
	left: s[0],
	right: s[2]
	)
	except:
	handleParseError "factor * term", token
	#[
	(Factor, T_DIVISION, Term):
	Expression(kind: expDivision,
	left: s[0],
	right: s[2]
	)
	]#
	try:
	Term: s[0]
	except:
	handleParseError "term-in", token

	Term[Expression]:
	try:
	(T_LEFT_PAREN, Expr, T_RIGHT_PAREN):
	s[1]
	except:
	handleParseError "( expr )", token

	try:
	T_NUMBER:
	Expression(kind: expNum, val: s[0].val)
	except:
	handleParseError "number", token

	let test = """
	22 * 44 + 66 + 11 + (22 + 44)
	145 * 104"""

	try:
	echo "List of tokens:"
	for token in test.tokens(lex):
	echo '\t' & fmt"{token.kind} ({token.line}:{token.col})"

	echo ""

	for i in test.parse(lex):
	echo "Parsed: " & $i
	echo "Summed: " & $(i.sumUp)
	echo ""

	except LexingError as e:
	echo fmt"Encountered invalid character '{test[e.pos]}'! ({e.line}:{e.col})"