Skip to content

Instantly share code, notes, and snippets.

@vrthra
Last active February 27, 2021 06:39
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save vrthra/52c00b9100055be635f9b0ffcaa61ad3 to your computer and use it in GitHub Desktop.
Save vrthra/52c00b9100055be635f9b0ffcaa61ad3 to your computer and use it in GitHub Desktop.
error correcting earley
import random
import string
import itertools as I
grammar = {
'<start>': [['<expr>']],
'<expr>': [ ['<term>', '+', '<expr>'], ['<term>', '-', '<expr>'], ['<term>']],
'<term>': [ ['<fact>', '*', '<term>'], ['<fact>', '/', '<term>'], ['<fact>']],
'<fact>': [ ['<digits>'], ['(','<expr>',')']],
'<digits>': [ ['<digit>','<digits>'], ['<digit>']],
'<digit>': [["%s" % str(i)] for i in range(10)],
}
START = '<start>'
def print_g(g):
for k in g:
print(k)
for rule in g[k]:
print('| ', ' '.join([repr(k) for k in rule]))
# Add extras.
Any_one = '<$.>' # this is a terminal
Any_plus = '<$.+>' # this is a nonterminal
Empty = '<$>' # this is a nonterminal
def This_char(t): return '<$ {%s}>' % t # this is a terminal.
def Any_not(t): return '<$!{%s}>' % t # this is a terminal.
def is_nt(k):
if len(k) == 1: return False
return (k[0], k[-1]) == ('<', '>')
def translate_terminal(t):
if is_nt(t): return t
return This_char(t)
def translate_terminals(g):
return {k:[[translate_terminal(t) for t in alt] for alt in g[k]] for k in g}
# The following gets a penalty each during complete.
# Any use of <$.> : Any_one --- note, Any_plus gets +1 automatically
# Any use of <$> : Empty
# Any use of <$ !{.}> : Any_not
def augment_grammar(g):
Symbols = [t for k in g for alt in g[k] for t in alt if not is_nt(t)]
#Symbols = [i for i in string.printable if i not in '\n\r\t\x0b\x0c']
Match_any_char = {Any_one: [[k] for k in Symbols]}
Match_any_char_except = {}
for kk in Symbols:
Match_any_char_except[Any_not(kk)] = [[k] for k in Symbols if k != kk]
Match_empty = {Empty: []}
Match_a_char = {}
for kk in Symbols:
Match_a_char[This_char(kk)] = [
[kk],
[Any_plus, kk],
[Empty],
[Any_not(kk)]
]
return {**translate_terminals(g), **Match_any_char, **Match_a_char, **Match_any_char_except, **Match_empty}
print_g(augment_grammar(grammar))
import sys
sys.exit(0)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment