Created
November 23, 2015 08:00
-
-
Save JoshCheek/94f7f97f0e47835356a2 to your computer and use it in GitHub Desktop.
Wrote a parser from scratch!
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# Was able to make this after watching http://confreaks.tv/videos/rubyconf2015-time-flies-like-an-arrow-fruit-flies-like-a-banana-parsers-for-great-good | |
# I've tried various things like this before, this is probably my 20+ attempt | |
# (example https://github.com/JoshCheek/ruby_slippers_for_the_cobblers_children/blob/cd8bc838140c2eae0a8d12d9a9f7f8ca2f47d756/interpreter/defs/experiments/my-own-parser/parsers/machine.rb) | |
# I've succeeded at using Treetop previously, and Michael Baker helped me make one back in Chicago | |
# but this is the first one I made from scratch and succeeded at | |
module MathParser | |
module Parsers | |
class Parser | |
attr_reader :name | |
def initialize(name:, &to_ast) | |
@name = name | |
@to_ast = to_ast | |
end | |
def to_ast | |
@to_ast.call | |
end | |
end | |
class Delegate < Parser | |
attr_reader :to | |
def initialize(to:, **keyrest, &to_ast) | |
@to = to | |
super(**keyrest, &to_ast) | |
end | |
end | |
class Sequential < Parser | |
attr_reader :sequence | |
def initialize(sequence:, **keyrest, &to_ast) | |
@sequence = sequence | |
super(**keyrest, &to_ast) | |
end | |
end | |
class Terminal < Parser | |
attr_reader :string | |
def initialize(string:, **keyrest, &to_ast) | |
@string = string | |
super(**keyrest, &to_ast) | |
end | |
end | |
class Or < Parser | |
attr_reader :possibilities | |
def initialize(possibilities:, **keyrest, &to_ast) | |
@possibilities = possibilities | |
super(**keyrest, &to_ast) | |
end | |
end | |
class Eos < Parser | |
def initialize(**keyrest, &to_ast) | |
super(**keyrest, &to_ast) | |
end | |
end | |
@nodes = {} | |
def self.[](name) | |
@nodes.fetch name | |
end | |
def self.[]=(name, value) | |
@nodes[name] = value | |
end | |
# root: expression | |
self[:root] = Sequential.new name: :root, | |
sequence: [ | |
Delegate.new(name: :body, to: :expression), | |
Eos.new(name: :eos) | |
] | |
# expression: number | '(' expr ')' | binary_operator | |
self[:expression] = Or.new name: :expression, | |
possibilities: [ | |
Delegate.new(name: :numeric_expression, to: :number), | |
Sequential.new(name: :group, sequence: [ | |
Terminal.new(name: :open_paren, string: '('), | |
Delegate.new(name: :expression, to: :expression), | |
Terminal.new(name: :close_paren, string: ')'), | |
]), | |
Delegate.new(name: :binary_operator_sequence, to: :binary_operator) | |
] | |
# binary_operator: expr operator expr | |
self[:binary_operator] = Sequential.new name: :binary_operator, | |
sequence: [ | |
Delegate.new(name: :left, to: :expression), | |
Delegate.new(name: :operator, to: :operator), | |
Delegate.new(name: :right, to: :expression), | |
] | |
# operator: [*+] | |
self[:operator] = Or.new name: :operator, | |
possibilities: [ | |
Terminal.new(name: :multiply, string: '*'), | |
Terminal.new(name: :add, string: '+'), | |
] | |
# number: digit+ | |
# digit: [0-9] | |
self[:number] = Delegate.new(name: :number, to: :digit) # punting on + for now | |
self[:digit] = Or.new name: :digit, possibilities: [*'0'..'9'].map { |digit| Terminal.new name: :digit, string: digit } | |
end | |
end | |
module MathParser | |
class Tree | |
attr_accessor :begin_index, :end_index, :source, :type, :children | |
def initialize(source:, type:, children:, begin_index:, end_index:) | |
self.source, self.type, self.children, self.begin_index, self.end_index = source, type, children, begin_index, end_index | |
end | |
def inspect(depth=1) | |
children_asts = children.map do |child| | |
if child.kind_of? Tree | |
"\n#{" "*depth}#{child.inspect depth+1}" | |
else | |
"\n#{" "*depth}#{child.inspect}" | |
end | |
end | |
"(#{type} `#{matched_code}`#{children_asts.join})" | |
end | |
def matched_code | |
source[begin_index...end_index] | |
end | |
def first | |
children.first | |
end | |
end | |
end | |
module MathParser | |
def self.call(code) | |
parse_tree = parse code: code, index: 0, parsers: Parsers, parser: Parsers[:root], required_successor: nil | |
to_ast parse_tree | |
end | |
private | |
def self.to_ast(parse_tree) | |
return parse_tree unless parse_tree.kind_of? Tree | |
case parse_tree.type | |
when :root, :body, :expression, :binary_operator_sequence, :left, :right, :numeric_expression, :digit, :operator, :multiply, :add | |
to_ast parse_tree.first | |
when :number | |
{ type: :number, value: to_ast(parse_tree.first), begin_index: parse_tree.begin_index, end_index: parse_tree.end_index} | |
when :group | |
left_paren, expression, right_paren = parse_tree.children | |
to_ast expression | |
when :binary_operator | |
left, operator, right = parse_tree.children | |
{ type: :operator, operator: to_ast(operator).intern, left: to_ast(left), right: to_ast(right), begin_index: parse_tree.begin_index, end_index: parse_tree.end_index} | |
else raise "WHAT KIND OF PARSE TREE IS THIS? #{parse_tree.inspect}" | |
end | |
end | |
def self.parse(code:, index:, parsers:, parser:, required_successor:) | |
{depth: caller.grep(/`parse'/).length, index: index, parser: parser, succ: required_successor} | |
case parser | |
when Parsers::Delegate | |
child = parse code: code, index: index, parsers: parsers, parser: parsers[parser.to], required_successor: required_successor | |
child && Tree.new(source: code, type: parser.name, children: [child], begin_index: index, end_index: child.end_index) | |
when Parsers::Or | |
parser.possibilities.each do |child| | |
child = parse code: code, index: index, parsers: parsers, parser: child, required_successor: required_successor | |
next unless child | |
return Tree.new(source: code, type: parser.name, children: [child], begin_index: index, end_index: child.end_index) | |
end | |
nil | |
when Parsers::Sequential | |
children = [] | |
current_index = index | |
parser.sequence.each_with_index do |child, sequence_index| | |
successors = parser.sequence[sequence_index+1..-1] | |
successors << required_successor if required_successor | |
child_successor = nil | |
child_successor = Parsers::Sequential.new(name: :successors, sequence: successors) if successors.any? | |
ast = parse code: code, index: current_index, parsers: parsers, parser: child, required_successor: child_successor | |
return nil unless ast | |
children << ast | |
current_index = ast.end_index | |
end | |
Tree.new(source: code, type: parser.name, children: children, begin_index: index, end_index: children.last.end_index) | |
when Parsers::Terminal | |
string = code[index, parser.string.length] | |
if string == parser.string | |
end_index = index + string.length | |
return if required_successor && !parse(code: code, index: end_index, parsers: parsers, parser: required_successor, required_successor: nil) | |
Tree.new(source: code, type: parser.name, children: [string], begin_index: index, end_index: index+string.length) | |
end | |
when Parsers::Eos | |
if code.length <= index | |
Tree.new(source: code, type: parser.name, children: [], begin_index: index, end_index: index) | |
end | |
else raise "WHAT TYPE OF NODE IS THIS?! #{parser.inspect}" | |
end | |
end | |
end | |
MathParser.call('2*(1+3)') | |
# => {:type=>:operator, | |
# :operator=>:*, | |
# :left=>{:type=>:number, :value=>"2", :begin_index=>0, :end_index=>1}, | |
# :right=> | |
# {:type=>:operator, | |
# :operator=>:+, | |
# :left=>{:type=>:number, :value=>"1", :begin_index=>3, :end_index=>4}, | |
# :right=>{:type=>:number, :value=>"3", :begin_index=>5, :end_index=>6}, | |
# :begin_index=>3, | |
# :end_index=>6}, | |
# :begin_index=>0, | |
# :end_index=>7} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment