Skip to content

Instantly share code, notes, and snippets.

@ble
Last active May 23, 2018 05:59
Show Gist options
  • Save ble/8312995 to your computer and use it in GitHub Desktop.
Save ble/8312995 to your computer and use it in GitHub Desktop.
Expression parser
package ble
import scala.language.implicitConversions
package object parsimple {
import scala.util.parsing.combinator.{ JavaTokenParsers, ImplicitConversions }
import scala.util.parsing.input.CharSequenceReader
implicit def promoteToReader(cs: CharSequence): CharSequenceReader = new CharSequenceReader(cs)
//AST contains the definitions of all types used to represent parsed expressions
object AST {
//This trait does nothing but indicate that a type is an expression,
//but we could give it more behavior.
sealed trait Expression
//Ops contains the definitions of all operators recognized by our parser and
//some associated functionality.
object Ops {
//An Operator just needs some string to represent it
sealed class Operator(val opString: String)
//There's no difference here between a BinaryOperator and a UnaryOperator,
//but you can't substitute one for another.
sealed class BinaryOperator(op: String) extends Operator(op)
sealed class UnaryOperator(op: String) extends Operator(op)
//Define our unary operators:
case object LogicalNot extends UnaryOperator("!")
case object BitwiseNot extends UnaryOperator("~")
//Binary operators, grouped by decreasing precedence.
case object Mul extends BinaryOperator("*")
case object Div extends BinaryOperator("/")
case object Add extends BinaryOperator("+")
case object Sub extends BinaryOperator("-")
case object Lt extends BinaryOperator("<")
case object Lte extends BinaryOperator("<=")
case object Gt extends BinaryOperator(">")
case object Gte extends BinaryOperator(">=")
//C++ puts (in-)equality below order comparisons, so what the hell, we'll
//do that too.
case object Eq extends BinaryOperator("==")
case object NEq extends BinaryOperator("!=")
//We want to be able to enumerate these guys, so:
val binaryOperators = List(Mul, Div, Add, Sub, Lt, Lte, Gt, Gte, Eq, NEq)
val unaryOperators = List(LogicalNot, BitwiseNot)
//These are maps that let us look up operators by their operator string:
//the helper function used to define them comes later.
val stringToBinaryOp = opsToLookupTable(binaryOperators)
val stringToUnaryOp = opsToLookupTable(unaryOperators)
//BinaryOp and UnaryOp represent parsed expressions:
//BinaryOp represents <lhs> <operator> <rhs>
case class BinaryOp(
op: BinaryOperator,
lhs: Expression,
rhs: Expression) extends Expression
//Unary could represent either <operator> <operand> or
// <operand> <operator>
case class UnaryOp(
op: UnaryOperator,
operand: Expression) extends Expression
//The helper function used to make the map from string to operator:
//Since this is one of the more complicated method signatures in this
//example, an explanation of it follows immediately after the method body.
def opsToLookupTable[OpType <: Operator](ops: List[OpType]) = (
for {
(op, opString) <- ops.zip(ops.map(_.opString))
} yield(opString -> op)).toMap
//What the heck does
// def opsToLookupTable[OpType <: Operator](ops: List[OpType])
//mean?
//def = "I'm defining a method on my containing object, class, or trait"
//opsToLookupTable = "call it this silly long name"
//[OpType <: Operator] is a doozy, let's do it in parts:
// [OpType] would just mean
// "Part of my definition will allow you to use any type; whichever
// type you use, I'll refer to as OpType"
// OpType <: Operator means
// the type Operator is an upper bound to the type OpType
// (a subclass is "less" than the classes it inherits from
// and a superclass is "greater" than the classes that inherit form it), so
//[OpType <: Operator] = "Part of my definition will allow you to use
// Operator or any subclass thereof; whichever type
// you use, I'll refer to as OpType"
//Last but not least,
//(ops: List[OpType]) = "Remember that OpType? I'll take a list of those
// as my one and only argument."
//This function definition could have but does not need a return type,
//which would add to the signature like this:
// def opsToLookupTable[OpType <: Operator](ops: List[OpType]): Map[String, OpType]
}
//Two pseudo-constructors to simplify looking up an operator and
//creating an expression that uses that operator.
object UnaryOperation {
def apply(opString: String, operand: Expression): Expression =
Ops.UnaryOp(Ops.stringToUnaryOp(opString), operand)
}
object BinaryOperation {
def apply(opString: String, lhs: Expression, rhs: Expression): Expression =
Ops.BinaryOp(Ops.stringToBinaryOp(opString), lhs, rhs)
}
//These next three classes represent expressions like
// f(x, y, z),
// v[i, j, k], and
// o.field,
//respectively.
case class FunctionInvocation(
f: Expression,
args: List[Expression]) extends Expression
case class IndexLookup(
array: Expression,
indices: List[Expression]) extends Expression
case class FieldSelection(
struct: Expression,
fieldName: Sym) extends Expression
//This class represents an expression that is a literal number.
case class Num(value: Double) extends Expression
//This class represents an expression or part of an expression that is
//a symbol.
case class Sym(name: Symbol) extends Expression
}
//I'll define all the actual parsing in this object:
object ExprParse {
//Aside to people who know what I'm talking about:
//Rather than having this object extend JavaTokenParsers,
//it will have an instance of JavaTokenParsers and import
//all the names from it.
//This way ExprParse "uses-a JavaTokenParsers" rather than
// ExprParse "is-a JavaTokenParsers"
//and the fields of ExprParse don't contain a bunch of
//inherited junk.
//To people who don't know what I'm talking about, "anon" has a lot of
//functionality that I need and the import declaration lets me write code
//to access all of that functionality as if they were members of ExprParse,
//making the whole thing much easier to write... and a bit mysterious to read.
object anon extends JavaTokenParsers
import anon._
//The most important type that we're getting from anon is called Parser
//and although we'll be using lots and lots of Parsers, you won't see it
//mentioned much: where Scala can figure out the type of something, you
//don't have to write it down.
//This is a double-edged sword: sometimes saving the effort typing is good,
//sometimes code is much harder to understand without return types.
//Similarly, we don't want to be writing "AST.this" and "AST.Ops.that" all
//over, so we import them, too.
import AST._
import Ops._
//Because the parser rules used by expression may also use expression,
//Scala's type inference can't determine what the type of expression
//should be-- unless we make it clear by adding a return type.
def expression: Parser[Expression] = equality
//Parser[Expression] means
// "A parser that, when it succeeds, has constructed an Expression from
// the input it parsed."
//This is a helper function; it's signature means
//(opString: String) = "I take one argument, it's a string"
//: (Expression, Expression) => Expression =
// I return a function that takes two Expressions as arguments and returns
// an argument
def processBinaryOp(opString: String): (Expression, Expression) => Expression =
BinaryOperation(opString, _, _)
//These rules are super simple and very, very similar; the differences
//are that each one has different operators and each one refers to the
//one that follows immediately afterwards.
def equality = inequality.*( "[=!]=".r ^^ processBinaryOp )
//"An equality expression is one or more inequality expressions,
// with either == or != in between those inequality expressions."
//Hint: the `.r` in `"some string".r` means, "as a regex".
def inequality = arith.*( "[<>]=?".r ^^ processBinaryOp )
def arith = term.*( "[+-]".r ^^ processBinaryOp )
def term = factor.*( "[*/]".r ^^ processBinaryOp )
//Here's an interesting one: we have to handle one or more unary operators
//and these unary operators are right-associative:
def factor = "[!~]".r.* ~ negatable ^^ {
case negateOpStrings ~ rest =>
negateOpStrings.foldRight(rest)(
(opString, compounded) => UnaryOperation(opString, compounded))
}
//"A negatable expression is an invocable expression followed by one or more
// invocations."
def negatable = invocable ~ invocation.* ^^ {
case lhs ~ suffixes => suffixes.foldLeft(lhs)((x, f) => f(x))
}
def invocation =
functionInvoke | arrayIndex | fieldSelection
//The next 3 rules here are pretty dope.
//They are actually parsing rules that return functions!
//The returned functions take an expression and return expressions like
// thatExpression(some, function, args),
// thatExpression[index], or
// thatExpression.aField
//Even though these parsers have a kinda complicated type,
//we don't have to give them their explicit return type: Parser[Expression => Expression]
def functionInvoke = "(" ~> repsep(expression, ",") <~ ")" ^^ {
case fnArgs => (f: Expression) => FunctionInvocation(f, fnArgs)
}
def arrayIndex = "[" ~> repsep(expression, ",") <~ "]" ^^ {
case indices => (array: Expression) => IndexLookup(array, indices)
}
def fieldSelection = "." ~> symbol ^^ {
case fieldName => (struct: Expression) => FieldSelection(struct, fieldName)
}
//We have to define what's going to be invoked, be indexed, or have its fields
//selected, too.
def invocable = number | symbol | parenthesized
//These last 3 are dead simple, too
def number = floatingPointNumber ^^ (numStr => Num(numStr.toDouble))
def symbol = """[A-Za-z][A-Za-z0-9_]*""".r ^^ (name => Sym(Symbol(name)))
def parenthesized = "(" ~> expression <~ ")"
}
def parse(input: String) = ExprParse.expression(input)
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment