Skip to content

Instantly share code, notes, and snippets.

@mccutchen
Created April 9, 2010 21:08
Show Gist options
  • Save mccutchen/361586 to your computer and use it in GitHub Desktop.
Save mccutchen/361586 to your computer and use it in GitHub Desktop.
A rough first translation of Jesse's BNF grammar into pyparsing
from string import lowercase, uppercase
from pyparsing import (Word, Literal, ZeroOrMore, Regex, Or, CharsNotIn,
Optional, CaselessLiteral, Combine, Suppress,
srange, alphas, alphanums, nums)
def Literals(s, splitchar=None):
"""Splits the given string (defaults to splitting on spaces), turns each
item into a Literal, and combines them into an Or clause."""
return Or(map(Literal, s.split(splitchar)))
ess = Literal('es') | Literal('s')
alphanum = Word(alphanums + "',/.-")
alpha = Word(alphas + "'-")
num = Word(nums)
numword = Literals('one two three four five six seven eight nine ten')
fractional = Literals('half third fourth fifth sixth')
dot = Literal('.')
range_ = Literals('to -')
comma = Literals(', -')
connector = Literal('of')
paren = Literals('( )')
containers = Combine(Literals('package pk bag can container cn envelope jar')
+ Optional(ess))
metric = Literals('kg gram kilogram kg ml liter cc')
colloq = Literals("dozen/half-dozen/half dozen/doz/ea/each/x/"
"baker's dozen/bakers dozen/","/")
relative = Literals('medium-sized medium med/small med large small s lg')
genus = Literals('bunch bn head clove ear batch slice stick leg zest rind '
'sprig')
imperial = Literals('feet cup c quart qt pint teaspoon tspn tsp tablespoon '
'tbsp tb ts t pound lb ounce oz gallon')
mod_amount = Literals('scant generous several about')
qualitative = Literals('dash ds pinch pn splash shake')
fraction_word = numword + Combine(fractional + Optional(ess))
fraction = Combine(num + '/' + num, adjacent=False)
numeral = num + Optional(dot + num)
mixed = Combine(numeral + fraction, adjacent=False, joinString=' ')
word_mixed = numword + (fraction ^ fraction_word)
sub_amount = mixed ^ fraction ^ numeral ^ fraction_word ^ numword
amount = sub_amount + Optional(range_ + sub_amount).setResultsName('amount')
c1 = ((Optional(paren) + amount) ^
(comma + relative) ^
metric ^
(imperial + Optional(dot)) ^
(comma + containers + Optional(paren)))
container = c1 ^ containers
measurement = Combine(
Optional(mod_amount) +
(container ^ relative ^ metric ^ imperial ^ colloq ^ genus ^ qualitative) +
Optional(dot) + Optional(ess), adjacent=False).setResultsName('measurement')
words = alpha + ZeroOrMore(alpha)
words_plus = alphanum + ZeroOrMore(alphanum)
paren_comment = "(" + words_plus + ")"
comment = paren_comment ^ words_plus
thing = (Optional(words) + paren_comment + words) ^ words
anything = Regex('.*').setResultsName('anything')
list_ = Optional(Literal('*')) ^ Literal('the')
fork = Literals("plus or and")
suffix = CharsNotIn('\r\n')
about = CaselessLiteral('about')
type1 = Suppress(Optional(about)) + amount + measurement + anything
type2 = measurement + anything
type3 = Suppress(Optional(about)) + amount + anything
ingredient = Optional(list_) + (type1 ^ type2 ^ type3)
print ingredient.parseString('three jars of dozen eggs')
##############################################################################
# Original grammar by Jesse, from keycore/recipes/utils/parser.py
##############################################################################
"""
<nl> := [\n\r]+
<ts> := [ \t]+
ess := ('es'/'s')
alphanum := [A-Za-z',/0-9.-]+
alpha := [A-Za-z'-]+
numword := ('one'/'two'/'three'/'four'/'five'/'six'/'seven'/'eight'/'nine'/'ten')
dot := "."
range := ('to'/'-')
<comma> := [,-]+
connector := 'of'
paren := ('('/')')
containers := ('package'/'pk'/'bag'/'can'/'container'/'cn'/'envelope'/'jar'),ess?
metric := ('kg'/'gram'/'kilogram'/'ml'/'liter'/'cc')
colloq := ('dozen'/'half-dozen'/'half dozen'/'bakers dozen'/'doz'/'ea'/'each'/'x')
relative := ('medium-sized'/'medium'/'med/small'/'med'/'large'/'small'/'s'/'lg')
genus := ('bunch'/'bn'/'head'/'clove'/'ear'/'batch'/'slice'/'stick'/'leg'/'zest'/'rind'/'sprig')
imperial := ('feet'/'cup'/'c'/'quart'/'qt'/'pint'/'teaspoon'/'tspn'/'tsp'/'tablespoon'/'tbsp'/'tb'/'ts'/'t'/'pound'/'lb'/'ounce'/'oz'/'gallon' )
mod_amount := ('scant'/'generous'/'several'/'about')
qualitative := ('dash'/'ds'/'pinch'/'pn'/'splash'/'shake')
fraction_word := numword,ts,('half'/'third'/'forth'/'fifth'/'sixth'),ess?
fraction := [0-9]+,"/",[0-9]+
numeral := [0-9]+,(dot,[0-9]+)?
mixed := numeral,ts,(fraction/fraction_word)
word_mixed := numword,ts,(fraction/fraction_word)
sub_amount := (mixed/fraction/numeral/fraction_word/numword)
amount := (sub_amount,ts?,range,ts?,sub_amount)/sub_amount
c1 := (paren?,amount,ts/comma,relative/metric/imperial,dot?,ts/comma,containers,paren?)
#c2 := (containers,ts?,paren,amount,comma/ts,relative/metric/imperial,dot?,paren)
container := (c1/containers)
measurement := (mod_amount, ts)?,(container/relative/metric/imperial/colloq/genus/qualitative),dot?,ess?
words := alpha,(ts,alpha,?-('in'/'or'))*
words_plus := alphanum,(ts,alphanum)*
paren_comment := "(",ts?,words_plus,ts?,")"
comment := paren_comment/words_plus
thing := (words?,paren_comment,words)/words
any := [.]*
<list> := ([*]/'the')
fork := ('plus'/'or'/'and')
suffix := [^\n\r]+
ingredient := list?,ts?,(type2/type1/type3)
type1 := measurement,ts,any
type2 := ('about',ts)?,amount,ts?,measurement,ts,any
type3 := ('about',ts)?,amount,ts,any
"""
if __name__ == '__main__':
import doctest
tests = [
('1 onion', ['1', 'onion']),
]
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment