Skip to content

Instantly share code, notes, and snippets.

@FranchuFranchu
Created July 26, 2020 15:44
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save FranchuFranchu/83a2cec801c7e0d80e83e0a21094890e to your computer and use it in GitHub Desktop.
Save FranchuFranchu/83a2cec801c7e0d80e83e0a21094890e to your computer and use it in GitHub Desktop.
Earley parser in GDScript
extends Object
class_name Parser
# Based off https://github.com/tomerfiliba/tau/blob/master/earley3.py
class Test:
static func tostring(val):
if typeof(val) == TYPE_STRING:
return val
elif val == null:
return "None"
else:
return val._str()
class Production:
var terms = []
func _init(terms):
self.terms = terms
func get_class():
return "Production"
func id():
var ts = ""
for i in self.terms:
ts += Test.tostring(i) + ","
return ts
class Rule:
var name = ""
var productions = []
func _init(name, productions = []):
self.name = name
self.productions = productions
func add(productions_):
self.productions = self.productions + productions_
func get_class():
return "Rule"
func _str():
return self.name
class State:
var name
var production
var start_column
var end_column
var dot_index
var rules
func _init(name, production, dot_index, start_column):
self.name = name
self.production = production
self.dot_index = dot_index
self.start_column = start_column
self.rules = []
for i in production.terms:
if typeof(i) == TYPE_STRING:
continue
if i.get_class() == "Rule":
self.rules.append(i)
func completed():
return self.dot_index >= len(self.production.terms)
func id():
return self.name + self.production.id()
func next_term():
if self.completed():
return null
return self.production.terms[self.dot_index]
func get_class():
return "State"
func _str():
var terms = []
for p in self.production.terms:
if typeof(p) == TYPE_STRING:
terms.append(p)
continue
terms.append(p._str())
terms.insert(self.dot_index, "$")
return "%-5s -> %-16s [%s-%s]" % [self.name, PoolStringArray(terms).join(" "), self.start_column._str(), self.end_column._str()]
class Column:
var index
var token
var states
var _unique
func _init(index, token):
self.index = index
self.token = token
self.states = []
self._unique = []
func add(state):
if not self._unique.has(state.id()):
self._unique.append(state.id())
state.end_column = self
self.states.append(state)
return true
return false
func _str():
return str(self.index)
func print_():
var toks = Test.tostring(self.token)
print("[%s] %s" % [self.index, toks])
print("=".repeat(35))
for s in self.states:
print(s._str())
print()
class Node_parser:
var value
var children
func _init(value, children):
self.value = value
self.children = children
func print_(level = 0):
print(" ".repeat(level) + Test.tostring(self.value))
for child in self.children:
child.print_(level + 1)
func predict(col, rule):
for prod in rule.productions:
col.add(State.new(rule.name, prod, 0, col))
func scan(col, state, token):
if token != col.token:
print("return")
return
col.add(State.new(state.name, state.production, state.dot_index + 1, state.start_column))
func complete(col, state):
if not state.completed():
return
for st in state.start_column.states:
var term = st.next_term()
if typeof(term) == TYPE_STRING:
continue
elif typeof(term) == TYPE_NIL:
continue
elif not term.get_class() == "Rule":
continue
elif term.name == state.name:
col.add(State.new(st.name, st.production, st.dot_index + 1, st.start_column))
const GAMMA_RULE = "GAMMA"
func parse(rule, text):
var table = []
var idx = 0
for tok in ([null] + Array(text.to_lower().split(" "))):
table.append(Column.new(idx, tok))
idx += 1
table[0].add(State.new(GAMMA_RULE, Production.new([rule]), 0, table[0]))
for i in table:
i.print_()
idx = 0
for col in table:
print(col._str())
for state in col.states:
print(" ", state._str())
if state.completed():
print("complete")
complete(col, state)
else:
var term = state.next_term()
print(" ", Test.tostring(term))
if not (typeof(term) == TYPE_STRING) and term.get_class() == "Rule":
predict(col, term)
elif (idx + 1) < len(table):
print("end?")
print(idx, " ", state._str(), " ")
scan(table[idx+1], state, term)
idx += 1
# find gamma rule in last table column (otherwise fail)
for st in table[-1].states:
if st.name == GAMMA_RULE and st.completed():
return st
print("Fail while parsing")
return false
func build_trees(state):
return build_trees_helper([], state, len(state.rules) - 1, state.end_column)
func build_trees_helper(children, state, rule_index, end_column):
var start_column
if rule_index < 0:
return [Node_parser.new(state, children)]
elif rule_index == 0:
start_column = state.start_column
else:
start_column = null
var rule = state.rules[rule_index]
var outputs = []
for st in end_column.states:
if st == state:
break
if st == state or not st.completed() or st.name != rule.name:
continue
if start_column != null and st.start_column != start_column:
continue
for sub_tree in build_trees(st):
for node in build_trees_helper([sub_tree] + children, state, rule_index - 1, st.start_column):
outputs.append(node)
return outputs
func test():
print("test")
var N = Rule.new("N", [Production.new(["time"]), Production.new(["flight"]), Production.new(["banana"]), Production.new(["flies"]), Production.new(["boy"]), Production.new(["telescope"])])
var D = Rule.new("D", [Production.new(["the"]), Production.new(["a"]), Production.new(["an"])])
var V = Rule.new("V", [Production.new(["book"]), Production.new(["eat"]), Production.new(["sleep"]), Production.new(["saw"])])
var P = Rule.new("P", [Production.new(["with"]), Production.new(["in"]), Production.new(["on"]), Production.new(["at"]), Production.new(["through"])])
print("test")
var PP = Rule.new("PP")
var NP = Rule.new("NP", [Production.new([D, N]), Production.new(["john"]), Production.new(["houston"])])
NP.add([Production.new([NP, PP])])
PP.add([Production.new([P, NP])])
print("test")
var VP = Rule.new("VP", [Production.new([V, NP])])
VP.add([Production.new([VP, PP])])
var S = Rule.new("S", [Production.new([NP, VP]), Production.new([VP])])
var result = parse(S, "book the flight through houston")
for tree in build_trees(result):
tree.print_()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment