Skip to content

Instantly share code, notes, and snippets.

@aphexddb
Last active August 29, 2015 14:09
Show Gist options
  • Save aphexddb/ddc83d57c7f1c1b96458 to your computer and use it in GitHub Desktop.
Save aphexddb/ddc83d57c7f1c1b96458 to your computer and use it in GitHub Desktop.
Jison grammar for recipe ingredients

This is a recipe ingredient parser. Strings like:

2 cups whole pecans (8 ounces), toasted and chopped into small pieces

Should end up as:

{
  units: 2,
  unit: 'cups',
  item: 'whole pecans (8 ounces)',
  info: 'toasted and chopped into small pieces'
}

Where all elements are optional except item name.

Requires jison: npm install jison

var Parser = require("jison").Parser;
var fs = require("fs");
var ebnf = fs.readFileSync("./recipe_ingredient.jison", "utf8");
var parser = new Parser(ebnf);
// todo - save it
// var parserSource = parser.generate();
var strings = [
'6 tablespoons unsalted butter, cut into 1-inch pieces',
'1 cup (7 ounces) packed dark brown sugar',
'1/2 teaspoon table salt',
'3 large eggs',
'3/4 cup light corn syrup',
'1 tablespoon vanilla extract',
'2 cups whole pecans (8 ounces), toasted and chopped into small pieces',
'1 1/4 cup (6 1/4 ounces) unbleached all-purpose flour, plus more for dusting work surface',
'1 tablespoon sugar',
'1/2 teaspoon table salt',
'3 tablespoons chilled solid vegetable shortening',
'4 tablespoons (1/2 stick) cold unsalted butter, cut into 1/4-inch pieces',
'4 - 5 tablespoons ice water'
];
strings.forEach(function(str) {
console.log(">> Parsing: " + str);
var output = parser.parse(str);
console.log(JSON.stringify(output, null, 4));
});
%lex
%options flex case-insensitive
DecimalDigit [0-9]
DecimalDigits [0-9]+
NonZeroDigit [1-9]
IntegerLiteral ({NonZeroDigit}|{NonZeroDigit}{DecimalDigits})
UnitName ("teaspoons"|"teaspoon"|"tablespoons"|"tablespoon"|"fluid ounces"|"fluid ounce"|"ounces"|"ounce"|"cups"|"cup"|"pints"|"pint"|"quarts"|"quart"|"gallons"|"gallon"|"pounds"|"pound"|"milliliters"|"milliliter"|"deciliters"|"deciliter"|"liters"|"liter")
SimpleWord ([A-Za-z_]+)
ComplexHypenatedWord (\d+-{SimpleWord})
ContentInParenthesis (\().*(\))
WordText ({SimpleWord})|({ComplexHypenatedWord})|({ContentInParenthesis})
%%
\s+ /* skip whitespace */
{UnitName} console.log('UNIT_NAME'); return 'UNIT_NAME'
{IntegerLiteral} console.log('INTEGER_LITERAL');return 'INTEGER_LITERAL'
{WordText} console.log('WORD_TEXT');return "WORD_TEXT";
"/" console.log('SLASH');return "SLASH";
"," console.log('COMMA');return "COMMA";
"-" console.log('HYPHEN');return "HYPHEN";
<<EOF>> console.log('EOF');return 'EOF';
. console.log('ERROR');return "ERROR";
/lex
/* enable EBNF grammar syntax */
%ebnf
/* language grammar */
%start ingredient
%%
ingredient
: ingredient_format
{ return $1; }
;
ingredient_format
: unit_count unit_name ingredient_name ingredient_seperator ingredient_info EOF
{ $$ = {'count': $1, 'unit': $2, 'item': $3, info: $5}; }
| unit_count unit_name ingredient_name EOF
{ $$ = {'count': $1, 'unit': $2, 'item': $3, info: null}; }
| unit_count ingredient_name EOF
{ $$ = {'count': $1, 'unit': null, 'item': $2, info: null}; }
;
unit_count
: unit_range
| unit_complex_count
| unit_simple_count
;
unit_simple_count
: INTEGER_LITERAL
{ $$ = parseInt($1); }
;
unit_complex_count
: INTEGER_LITERAL SLASH INTEGER_LITERAL
{ $$ = parseInt($1) / parseInt($3); }
| INTEGER_LITERAL INTEGER_LITERAL SLASH INTEGER_LITERAL
{ $$ = parseInt($1) + (parseInt($2) / parseInt($4)); }
;
unit_range
: INTEGER_LITERAL HYPHEN INTEGER_LITERAL
{ $$ = parseInt($1) / parseInt($3); }
;
ingredient_seperator
: COMMA
{ $$ = ""; }
;
unit_name
: UNIT_NAME
{ $$ = $1; }
;
ingredient_name
: WORD_TEXT+
{ $$ = $1.join(" "); }
;
ingredient_info
: WORD_TEXT+
{ $$ = $1.join(" "); }
;
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment