Skip to content

Instantly share code, notes, and snippets.

@JoshCheek
Created November 23, 2015 08:00
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save JoshCheek/94f7f97f0e47835356a2 to your computer and use it in GitHub Desktop.
Save JoshCheek/94f7f97f0e47835356a2 to your computer and use it in GitHub Desktop.
Wrote a parser from scratch!
# Was able to make this after watching http://confreaks.tv/videos/rubyconf2015-time-flies-like-an-arrow-fruit-flies-like-a-banana-parsers-for-great-good
# I've tried various things like this before, this is probably my 20+ attempt
# (example https://github.com/JoshCheek/ruby_slippers_for_the_cobblers_children/blob/cd8bc838140c2eae0a8d12d9a9f7f8ca2f47d756/interpreter/defs/experiments/my-own-parser/parsers/machine.rb)
# I've succeeded at using Treetop previously, and Michael Baker helped me make one back in Chicago
# but this is the first one I made from scratch and succeeded at
module MathParser
module Parsers
class Parser
attr_reader :name
def initialize(name:, &to_ast)
@name = name
@to_ast = to_ast
end
def to_ast
@to_ast.call
end
end
class Delegate < Parser
attr_reader :to
def initialize(to:, **keyrest, &to_ast)
@to = to
super(**keyrest, &to_ast)
end
end
class Sequential < Parser
attr_reader :sequence
def initialize(sequence:, **keyrest, &to_ast)
@sequence = sequence
super(**keyrest, &to_ast)
end
end
class Terminal < Parser
attr_reader :string
def initialize(string:, **keyrest, &to_ast)
@string = string
super(**keyrest, &to_ast)
end
end
class Or < Parser
attr_reader :possibilities
def initialize(possibilities:, **keyrest, &to_ast)
@possibilities = possibilities
super(**keyrest, &to_ast)
end
end
class Eos < Parser
def initialize(**keyrest, &to_ast)
super(**keyrest, &to_ast)
end
end
@nodes = {}
def self.[](name)
@nodes.fetch name
end
def self.[]=(name, value)
@nodes[name] = value
end
# root: expression
self[:root] = Sequential.new name: :root,
sequence: [
Delegate.new(name: :body, to: :expression),
Eos.new(name: :eos)
]
# expression: number | '(' expr ')' | binary_operator
self[:expression] = Or.new name: :expression,
possibilities: [
Delegate.new(name: :numeric_expression, to: :number),
Sequential.new(name: :group, sequence: [
Terminal.new(name: :open_paren, string: '('),
Delegate.new(name: :expression, to: :expression),
Terminal.new(name: :close_paren, string: ')'),
]),
Delegate.new(name: :binary_operator_sequence, to: :binary_operator)
]
# binary_operator: expr operator expr
self[:binary_operator] = Sequential.new name: :binary_operator,
sequence: [
Delegate.new(name: :left, to: :expression),
Delegate.new(name: :operator, to: :operator),
Delegate.new(name: :right, to: :expression),
]
# operator: [*+]
self[:operator] = Or.new name: :operator,
possibilities: [
Terminal.new(name: :multiply, string: '*'),
Terminal.new(name: :add, string: '+'),
]
# number: digit+
# digit: [0-9]
self[:number] = Delegate.new(name: :number, to: :digit) # punting on + for now
self[:digit] = Or.new name: :digit, possibilities: [*'0'..'9'].map { |digit| Terminal.new name: :digit, string: digit }
end
end
module MathParser
class Tree
attr_accessor :begin_index, :end_index, :source, :type, :children
def initialize(source:, type:, children:, begin_index:, end_index:)
self.source, self.type, self.children, self.begin_index, self.end_index = source, type, children, begin_index, end_index
end
def inspect(depth=1)
children_asts = children.map do |child|
if child.kind_of? Tree
"\n#{" "*depth}#{child.inspect depth+1}"
else
"\n#{" "*depth}#{child.inspect}"
end
end
"(#{type} `#{matched_code}`#{children_asts.join})"
end
def matched_code
source[begin_index...end_index]
end
def first
children.first
end
end
end
module MathParser
def self.call(code)
parse_tree = parse code: code, index: 0, parsers: Parsers, parser: Parsers[:root], required_successor: nil
to_ast parse_tree
end
private
def self.to_ast(parse_tree)
return parse_tree unless parse_tree.kind_of? Tree
case parse_tree.type
when :root, :body, :expression, :binary_operator_sequence, :left, :right, :numeric_expression, :digit, :operator, :multiply, :add
to_ast parse_tree.first
when :number
{ type: :number, value: to_ast(parse_tree.first), begin_index: parse_tree.begin_index, end_index: parse_tree.end_index}
when :group
left_paren, expression, right_paren = parse_tree.children
to_ast expression
when :binary_operator
left, operator, right = parse_tree.children
{ type: :operator, operator: to_ast(operator).intern, left: to_ast(left), right: to_ast(right), begin_index: parse_tree.begin_index, end_index: parse_tree.end_index}
else raise "WHAT KIND OF PARSE TREE IS THIS? #{parse_tree.inspect}"
end
end
def self.parse(code:, index:, parsers:, parser:, required_successor:)
{depth: caller.grep(/`parse'/).length, index: index, parser: parser, succ: required_successor}
case parser
when Parsers::Delegate
child = parse code: code, index: index, parsers: parsers, parser: parsers[parser.to], required_successor: required_successor
child && Tree.new(source: code, type: parser.name, children: [child], begin_index: index, end_index: child.end_index)
when Parsers::Or
parser.possibilities.each do |child|
child = parse code: code, index: index, parsers: parsers, parser: child, required_successor: required_successor
next unless child
return Tree.new(source: code, type: parser.name, children: [child], begin_index: index, end_index: child.end_index)
end
nil
when Parsers::Sequential
children = []
current_index = index
parser.sequence.each_with_index do |child, sequence_index|
successors = parser.sequence[sequence_index+1..-1]
successors << required_successor if required_successor
child_successor = nil
child_successor = Parsers::Sequential.new(name: :successors, sequence: successors) if successors.any?
ast = parse code: code, index: current_index, parsers: parsers, parser: child, required_successor: child_successor
return nil unless ast
children << ast
current_index = ast.end_index
end
Tree.new(source: code, type: parser.name, children: children, begin_index: index, end_index: children.last.end_index)
when Parsers::Terminal
string = code[index, parser.string.length]
if string == parser.string
end_index = index + string.length
return if required_successor && !parse(code: code, index: end_index, parsers: parsers, parser: required_successor, required_successor: nil)
Tree.new(source: code, type: parser.name, children: [string], begin_index: index, end_index: index+string.length)
end
when Parsers::Eos
if code.length <= index
Tree.new(source: code, type: parser.name, children: [], begin_index: index, end_index: index)
end
else raise "WHAT TYPE OF NODE IS THIS?! #{parser.inspect}"
end
end
end
MathParser.call('2*(1+3)')
# => {:type=>:operator,
# :operator=>:*,
# :left=>{:type=>:number, :value=>"2", :begin_index=>0, :end_index=>1},
# :right=>
# {:type=>:operator,
# :operator=>:+,
# :left=>{:type=>:number, :value=>"1", :begin_index=>3, :end_index=>4},
# :right=>{:type=>:number, :value=>"3", :begin_index=>5, :end_index=>6},
# :begin_index=>3,
# :end_index=>6},
# :begin_index=>0,
# :end_index=>7}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment