Skip to content

Instantly share code, notes, and snippets.

What would you like to do?
Regex Classifier
import re
import random
def tokenise(s):
return ' '.join([t for t in re.split(r'([a-zA-Zñ][a-zA-Zñ\-]*|\d+\.\d+|[½⅓⅔¼¾⅕⅖⅗⅘⅙⅚⅛⅜⅝⅞\d]+|[^\w ])', s) if t.strip() != ''])
# replacement rule
class rule:
def __init__(self, pattern, substitution):
self.p = rule._translate_type_captures(rule._translate_type_matches(pattern))
self.s = rule._translate_type_substitutions(substitution)
def findall(self, s):
return re.findall(self.p, s)
def sub(self, s):
return re.sub(self.p, self.s, s)
def _translate_type_captures(s):
pat = r'\{\(\?\<(?P<type_and_index>[a-z_]+[0-9]*)\>(?P<content>.*?)\)\}'
rep = r' ?(?<![^\> ])(?P<T_\g<type_and_index>>\g<content>)(?![^\< ]) ?'
return re.sub(pat, rep, s)
def _translate_type_matches(s):
pat = r'\<\<!(?P<type_and_index>(?P<type>[a-z_]+)[0-9]*)\>\>'
rep = r'(?! ?\<\g<type>\>)'
s2 = re.sub(pat, rep, s)
pat = r'\<\<(?P<type_and_index>(?P<type>[a-z_]+)[0-9]*)\>\>'
rep = r' ?\<\g<type>\>(?P<T_\g<type_and_index>>(?:(?!\<\/\g<type>\>).)*)\<\/\g<type>\> ?'
return re.sub(pat, rep, s2)
def _translate_type_substitutions(s):
pat = r'\<\<(?P<type_and_index>(?P<type>[a-z_]+)[0-9]*)\>\>'
rep = r' <\g<type>>\\g<T_\g<type_and_index>></\g<type>> '
return re.sub(pat, rep, s)
# extract the content from the given string that is classified with the given types
def extract_typed(s, types):
return [re.findall('\<' + t + '\>(?:\<[^\>]+\>)*([^\<]*)(?:\<[^\>]+)*\</' + t + '\>', s) for t in types]
# represents an explored move in a sparse moves tree
class explored_move:
# construct starting with a 50% win ratio to allow movement up/down
initial_wins = 1
initial_plays = 2
def __init__(self, num_rules, prev_move=None):
self.num_rules = num_rules
self.prev_move = prev_move
self.next_moves = {}
self.wins = explored_move.initial_wins
self.plays = explored_move.initial_plays
# propagate a losing play back up to root
def play(self):
self.plays += 1
# illegal move sets wins to zero to prevent future selection
def illegal(self):
self.wins = 0
# propagate a winning play back up to root
def win(self, i):
self.wins += i
if self.prev_move is not None:
# get number of wins for next move i or initial wins if next move i not explored yet
def num_wins(self, i):
return self.next_moves[i].wins if i in self.next_moves else explored_move.initial_wins
# get number of plays for next move i or initial plays if next move i not explored yet
def num_plays(self, i):
return self.next_moves[i].plays if i in self.next_moves else explored_move.initial_plays
# create an explored move following the current explored move
def play_next(self, rule_idx):
if rule_idx not in self.next_moves:
self.next_moves[rule_idx] = explored_move(self.num_rules, self)
next_move = self.next_moves[rule_idx]
return next_move
# select a random move weighted by previous wins
def choose_next_move(self):
# calculate a common denominator
d = 1
for m in range(self.num_rules):
if self.num_wins(m) > 0:
d *= self.num_plays(m)
# accumulate win ratio * common denominator
max_i = 0
for m in range(self.num_rules):
max_i += self.num_wins(m) * (d // self.num_plays(m))
# if all moves have zero wins then return -1 (no valid move)
if max_i == 0:
return -1
# select a random point in the weighted move probability space
j = random.randrange(0, max_i)
# accumulate the move intervals to determine the selected move m for index i
i = 0
for m in range(self.num_rules):
i += self.num_wins(m) * (d // self.num_plays(m))
if i > j:
return m
# should never get here unless the above maths went wrong!
raise Exception('Unreachable: ' + str(i) + ' > ' + str(max_i))
# return the best of the explored next moves
def best_move(self):
# calculate a common denominator
d = 1
for i in range(self.num_rules):
if self.num_wins(i) > 0:
d *= self.num_plays(i)
# start with ratio zero and no valid move
best_ratio = 0
best_m = (-1, None)
# for each move if the ratio is better store it as the new best along with the best move
for item in self.next_moves.items():
m, node = item
ratio = node.wins * (d // node.plays)
if ratio > best_ratio:
best_ratio = ratio
best_m = (m, node)
# return the best move
return best_m
remove_empty_rule = rule('\<(?P<type>[a-z_]+)\>\s*\<\/(?P=type)\>', '')
def try_play_move(rules, cur_move, s):
# try to select a move
r = cur_move.choose_next_move()
# if no valid move return Nones
if r == -1:
return None, None
# make the move
next_move = cur_move.play_next(r)
s_next = remove_empty_rule.sub(rules[r].sub(s))
# if it was legal return next move and string
if s_next != s and len(re.findall('\<(?P<type>[a-z_]+)\>(\s*\<[a-z_]+\>)*\s*\<(?P=type)\>', s_next)) == 0:
return next_move, s_next
# otherwise, mark illegal and return current
return cur_move, s
# play with rules to reach output from input
def play(rules, sin, pout, paths, max_depth=1000):
rout = rule(pout, '')
# start with root move
root = explored_move(len(rules))
# explore random paths
for p in range(paths):
# reset state for new path
s = sin
cur_move = root
# make depth number of moves
best_win = 0
best_win_last_move = None
for d in range(max_depth):
# play a random move
next_move = cur_move
while next_move == cur_move:
next_move, s_next = try_play_move(rules, cur_move, s)
# if no legal move then stop this path
if next_move is None:
# if this move was illegal, try again
if next_move == cur_move:
# move on to next move
cur_move = next_move
s = s_next
# check if matches any end state
win_weight = len(''.join([''.join(x) if isinstance(x, tuple) else x for x in rout.findall(s)]))
if win_weight > best_win:
best_win = win_weight
best_win_last_move = cur_move
# if win found record best win up to the move that produced the best win
if best_win > 0:
# replay move sequence with most wins
r, node = root.best_move()
while node is not None and node.wins > 1:
sin = rules[r].sub(sin)
r, node = node.best_move()
# return resulting string
return sin
def program(rules, sin, pouts, paths, max_depth=1000):
s = sin
for pout in pouts:
s = play(rules, s, pout, paths, max_depth)
return s
# user defined rules
rules = [
# units
rule(r'{(?<unit>pinch)}', '<<unit>>'),
rule(r'{(?<unit>cloves?)}', '<<unit>>'),
rule(r'{(?<unit>mls?|mL|cc|millilitres?|milliliters?)}', '<<unit>>'),
rule(r'{(?<unit>tsps?|t|teaspoons?)}', '<<unit>>'),
rule(r'{(?<unit>[tT]bsps?|T|tbl|tbs|[tT]ablespoons?)}', '<<unit>>'),
rule(r'{(?<unit>fl ?oz|fluid ounces?)}', '<<unit>>'),
rule(r'{(?<unit>cups?)}', '<<unit>>'),
rule(r'{(?<unit>p|pts?|pints?)}', '<<unit>>'),
rule(r'{(?<unit>ls?|L|litres?|liters?)}', '<<unit>>'),
rule(r'{(?<unit>gals?|gallons?/)}', '<<unit>>'),
rule(r'{(?<unit>dls?|dL|decilitre|deciliter)}', '<<unit>>'),
rule(r'{(?<unit>gs?|grams?|grammes?)}', '<<unit>>'),
rule(r'{(?<unit>oz|ounces?)}', '<<unit>>'),
rule(r'{(?<unit>lbs?|#|pounds?)}', '<<unit>>'),
rule(r'{(?<unit>kgs?|kilos?|kilograms?)}', '<<unit>>'),
# numbers
rule(r'{(?<number>(?:\d* )?\d+ ?\/ ?\d+|\d*\s?[½⅓⅔¼¾⅕⅖⅗⅘⅙⅚⅛⅜⅝⅞]|\d+(\.\d+)?)}', '<<number>>'),
rule(r'{(?<number>an?)}', '<<number>>'),
# amounts
rule(r'{(?<amount>to taste)}', '<<amount>>'),
rule(r'{(?<amount>to serve)}', '<<amount>>'),
rule(r'{(?<amount>for \w+ing)}', '<<amount>>'),
rule(r'{(?<amount>[gG]enerous dash)}', '<<amount>>'),
rule(r'{(?<amount><<number1>><<unit1>>\+<<number2>><<unit2>>)}', '<<amount>>'),
rule(r'{(?<amount><<number>>[\-–]?<<unit>>\.?)}', '<<amount>>'),
rule(r'{(?<amount><<number>><<!unit>>(?! [\-–] \<number\>)(?: of)?)}', '<<amount>>'),
# ingredients
rule(r'^<<amount1>>{(?<ingredient>\w[\s\w-]+)}', '<<amount1>><<ingredient>>'),
rule(r'^<<amount1>><<amount2>>{(?<ingredient>\w[\s\w-]+)}', '<<amount1>><<amount2>><<ingredient>>'),
rule(r'^<<amount1>> \( <<amount2>> \) {(?<ingredient>\w[\s\w-]+)}', '<<amount1>><<amount2>><<ingredient>>'),
rule(r'^<<amount1>> \( <<amount2>><<amount3>> \) {(?<ingredient>\w[\s\w-]+)}', '<<amount1>><<amount2>><<amount3>><<ingredient>>'),
rule(r'^<<amount1>> \+ <<amount2>>{(?<ingredient>\w[\s\w-]+)}', '<<amount1>>+<<amount2>><<ingredient>>'),
rule(r'^<<amount1>> plus <<amount2>>{(?<ingredient>\w[\s\w-]+)}', '<<amount1>>+<<amount2>><<ingredient>>'),
rule(r'^<<amount1>> \( <<amount2>> \) plus <<amount3>>{(?<ingredient>\w[\s\w-]+)}', '<<amount1>><<amount2>><<amount3>><<ingredient>>'),
rule(r'^<<amount1>> [\-–] <<amount2>>{(?<ingredient>\w[\s\w-]+)}', '<<amount1>>-<<amount2>><<ingredient>>'),
rule(r'^<<amount1>> \| <<amount2>>{(?<ingredient>\w[\s\w-]+)}', '<<amount1>>-<<amount2>><<ingredient>>'),
rule(r'^{(?<ingredient>\w[\s\w-]+)}<<amount1>>', '<<amount1>><<ingredient>>'),
rule(r'^{(?<ingredient>\w[\s\w-]+)},<<amount1>>', '<<amount1>><<ingredient>>'),
in_strs_1 = [
'6 – 7 cups of Three different types of vegetables*, chopped into bite-sized pieces',
'1 1/2 tsp. Onion Powder',
'1 tsp. Garlic Powder',
'1 tsp. Ground Ginger',
'1/4 cup Orange Marmalade',
'2 1/2 T. Soy Sauce, Tamari or Bragg’s Aminos',
'2 T. Water',
'15 oz. can Black Beans, drained and rinsed',
'3 – 4 cups Cooked Rice or Quinoa (heat up the frozen type when in a pinch)',
'1 butternut squash (around 1 kg)',
'2 tbsp lemon juice',
'75g grated vegan parmesan (or 15g Nutritional yeast flakes)',
'½ tsp powered garlic',
'1 tsp. mustard powder',
'1 tsp. grated nutmeg',
'300ml Alpro Oat Unsweetened drink',
'2 tbsp extra virgin olive oil, plus a little extra for oiling the squash',
'1 bunch of sage',
'100g hazelnuts, roughly chopped',
'400g macaroni',
'Salt and pepper',
'umeboshi paste 4 tbsp, see notes below',
'Chinese black vinegar 2 tsp',
'toasted sesame oil 2 tsp',
'dark soy sauce 2 tbsp',
'shaoxing wine 2 tsp',
'garlic 2 cloves, peeled',
'Chinese five-spice ¼ tsp',
'dried chilli flakes ¼ tsp',
'tempeh 300g block, see notes below',
'pineapple ½, peeled and cored',
'spring onions 2, finely chopped',
'coriander ½ a small bunch, leaves picked',
'dried chilli flakes a pinch',
'echalion shallots 2, halved and thinly sliced',
'red chilli 1, deseeded and thinly sliced',
'lemons 2, juiced',
'farro 180g, see notes below',
'agave syrup 1 tbsp',
'rapeseed oil 4 tbsp',
'pecans 125g',
'flat-leaf parsley a bunch, roughly chopped',
'rocket 70g',
'peaches 3, slightly under-ripe, halved and stoned',
'thyme 4 sprigs, leaves picked',
'1 15-ounce can chickpeas (rinsed, drained, and dried)',
'1 Tbsp olive oil',
'1 Tbsp dried or fresh oregano',
'1 pinch sea salt',
'2 tsp garlic powder',
'3 Tbsp gluten-free panko bread crumbs',
'1 Tbsp vegan parmesan cheese',
'1 Tbsp olive oil',
'3 cloves garlic, minced (3 cloves yield ~1 1/2 Tbsp or 9 g)',
'1/4 cup carrots (very finely diced)',
'1 15-ounce can tomato sauce',
'1 Tbsp dried or fresh oregano',
'1 Tbsp vegan parmesan cheese (plus more to taste)',
'1-2 Tbsp sweetener of choice (such as organic cane sugar or coconut sugar // optional)',
'10 large carrots (ribboned with a vegetable peeler // or sub 8 ounces pasta of choice per 10 carrots)',
'Red pepper flakes',
'Vegan parmesan cheese',
'Fresh basil',
'3/4 cup dried chickpeas (soaked and cooked, see step 1)',
'1/2 red onion (peeled)',
'2 garlic cloves (peeled)',
'1/4 teaspoon salt',
'1/4 teaspoon paprika powder',
'1/4 cup fresh parsley',
'1 tablespoon lemon juice',
'1 teaspoon olive oil',
'1-2 tablespoons besan/chickpea flour',
'1 avocado',
'1/2 teaspoon lime juice',
'1/4 teaspoon salt',
'ground pepper',
'4 pretzel buns',
'1/2 cup baby spinach',
'1/2 cup arugula',
'3 tablespoons Olive Oil',
'1 large brown/yellow/white Onion diced',
'4 Garlic Cloves minced',
'1 large Zucchini sliced, then quartered',
'4 celery stalks sliced',
'6 medium sized Tomatoes diced',
'3 Bell Peppers 1 green, 2 yellow,red, or orange',
'3 tablespoons favorite Chili Powder storebought or homemade',
'1 tablespoon Cumin',
'2 tablespoons Paprika',
'1 teaspoon Smoked Paprika',
'4 1/2 cups 900ml Tomato Puree',
'4 cups 800ml Water',
'3 cups Beans of choice - Kidney Black, Pinto etc. (soaked and cooked, or canned)',
'2 cups Corn',
'Salt and Pepper',
'1 Avocado - optional diced',
'handful of fresh Cilantro - optional',
'3 cups butternut squash (cubed*)',
'3 cloves garlic (whole // skin removed)',
'2 Tbsp olive oil (divided)'
in_strs_2 = [
'180g | 1 cup uncooked brown rice',
'½ small butternut squash , cubed',
'2 small sweet potatoes , cubed',
'1 tablespoons olive oil',
'6 mushrooms',
'4 handfuls of raw spinach',
'60g | 1/2 cup edamame beans',
'2 green onions , chopped',
'1 teaspoon sesame oil',
'1 tablespoon Tamari , or soy sauce',
'1 teaspoon maple syrup',
'salt and pepper , for seasoning',
'sesame seeds , for sprinkling',
'2 squares of silver foil',
'2 roasted bulbs of garlic (see instructions)',
'½ lemon juice only',
'5½ tablespoons tahini (you can sub cashew butter)',
'leftover marinade from the mushrooms',
'80mls | 1/3 cup water',
'1 tablespoon maple syrup',
'1 tablespoon Tamari or soy sauce',
'pasta of your choice',
'1 small butternut squash',
'1 yellow onion',
'2 cloves of garlic',
'1 tsp fresh chopped sage',
'1 tsp fresh rosemary',
'1 tsp herbes de provence',
'½ tsp red pepper flakes',
'2 cups of vegetable stock or water',
'3-4 tbsp coconut milk (optional)',
'1 lime, juice',
'1 large handful of pecans',
'salt, pepper',
'2 tbsp olive oil divided',
'2 lb mushroom caps we prefer baby Portobellos, stems off and sliced thick',
'6 large carrots peeled and sliced into 1 inch 2.5 cm circles',
'1 large yellow onion peeled and diced',
'1 large shallot peeled and sliced thin',
'2 garlic cloves peeled and minced',
'2 cups vegetable broth',
'1 ½ cups red wine',
'1 tbsp tomato paste',
'2 tsp ground sea salt or to taste',
'2 tbsp fresh thyme leaves plus extra for garnishing',
'2 tsp dried Italian seasoning',
'Black pepper to taste',
'1 tbsp + 1 tsp all-purpose flour use cornstarch to make it gluten free',
'1/3 cup water',
'1 1 lb package of fettucine',
'1 tbsp (15 ml) extra-virgin olive oil',
'1 medium red onion, finely diced',
'3 garlic cloves, minced',
'26 oz (794 g) crushed tomatoes ( I use Pomi brand)',
'1/3 cup (80 ml) red wine (or 3 tbsp (45 ml) balsamic vinegar)',
'1 tbsp (2 g) dried Italian seasoning',
'3 tsps (15 g) ground sea salt, or to taste',
'2 tsp (1 g) red pepper flakes (optional)',
'Ground black pepper to taste',
'½ cup (20 g) fresh basil leaves, chopped',
'¼ cup (10 g) flat leaf parsley leaves, chopped',
'15 oz (425 g) black beans, drained (reserve ¼ cup (60 ml) of the juice) and rinsed well',
'1 tbsp (15 ml) plus 1 tsp extra-virgin olive oil',
'1 large Portobello mushroom cap – gills removed and sliced thin',
'1 shallot, peeled and sliced thin',
'2 garlic cloves, minced',
'¼ cup (10 g) flat leaf parsley leaves',
'1 tbsp (2 g) dried Italian seasoning',
'½ cup (61 g) breadcrumbs (use gluten free if desired)',
'½ cup (86 g) cornmeal',
'2 tsp (6 g) tapioca starch',
'1 tsp ground sea salt',
'Black pepper to taste',
'1 (1 lb 454 g) box of spaghetti',
'2 tbsp (30 g) sea salt',
'3 pounds zucchini (2 to 3 inches in diameter - for making the "zoodles")',
'1 head of cauliflower (broken into large florets)',
'2 carrots (peeled)',
'8 ounces crimini mushrooms (cleaned and stems trimmed)',
'1 medium yellow onion (halved or quartered)',
'3 cloves garlic (peeled)',
'1 cup walnuts',
'2 28 ounce cans crushed tomatoes (I love the Muir Glen brand)',
'1/4 cup sundried tomatoes',
'2 tablespoon nutritional yeast (optional (adds a savory quality))',
'1 teaspoon salt',
'1 teaspoon dried oregano',
'1 teaspoon dried basil',
'1 teaspoon maple syrup (to taste)',
'1 large sweet potato, peeled and cubed (about 2 cups)',
'1 tablespoon olive oil',
'¼ yellow onion, diced (about ½ cup)',
'2 cloves garlic, minced',
'1 teaspoon garam masala',
'1 teaspoon curry powder',
'¼ teaspoon cumin',
'⅛ teaspoon red pepper/cayenne',
'½ teaspoon sea salt',
'1 15 ounce can diced tomatoes (low sodium if available)',
'1 15 ounce can garbanzo beans (drained & rinsed)',
'1 14 ounce can light coconut milk'
in_strs = [
'2 tablespoons raisins (optional)',
'2 handfuls torn kale leaves',
'3 cups prepared red quinoa or grain of your choice',
'2 tablespoons cilantro, roughly chopped',
'salt and pepper',
'4 large portabella mushrooms',
'2 T maple syrup',
'2 T low sodium tamari/ liquid aminos',
'1 T sesame oil',
'2 cloves garlic, minced',
'Lemon pepper',
'Lime juice (to serve)',
'Green onions',
'Toasted sesame seeds',
'Greens (I used a supergreens mix of baby spinach + mizuna)',
'Avocado basil sauce (recipe below)',
'Caramelized kimchi (recipe below)',
'1 small head cauliflower, florets removed',
'1 tablespoon olive oil',
'6 ounces buckwheat soba noodles',
'1/3 cup fresh cilantro leaves, chopped',
'2 tablespoon toasted hemp or sesame seeds',
'½ lime, juiced (optional)',
'1 tablespoon plus ½ teaspoon freshly grated ginger',
'2 tablespoons plus 1 teaspoon low-sodium soy sauce or tamari',
'1 tablespoon dark sesame oil',
'1 tablespoon unseasoned rice vinegar',
'1 teaspoon honey (Vegans can sub brown sugar or agave.)',
'½-1 teaspoon crushed red pepper flakes (depending on how much heat you like)',
'¼ cup thinly sliced scallions, white and light green parts only (about 4 scallions)',
'1 tbsp oil',
'1 tsp cumin seeds',
'1/2 yellow onion, finely chopped',
'1/2 jalapeño chile, minced',
'1 package of chicken style strips or pieces – for example Fry’s or Quorn, 1inch tofu strips, pre grilled, pan or deep fried, texturized vegetable protein)',
'1 cilantro bunch',
'1 tsp salt',
'Fresh cracked pepper',
'1 tsp oil',
'1 Poblano chile, minced',
'1/2 jalapeño chile, minced',
'1 yellow banana pepper, chopped',
'3 garlic cloves, peeled',
'8 tomatillos',
'1/4 cup water',
'1/2 yellow or white onion, quartered',
'1 tsp salt',
'8-12 corn tortillas',
'2 cups cabbage, finely shredded',
'2-4 limes, quartered',
'1/4 cup cilantro, chopped',
'120g gluten-free wild rice',
'350ml water',
'280g tofu (1 block), medium to firm',
'¼ teaspoon turmeric',
'1 teaspoon coconut oil',
'1 small onion, long thin slices',
'½ clove garlic',
'¼ teaspoon himalayan salt',
'150g red and yellow peppers, chopped',
'80g broccoli, chopped',
'3 tablespoons soy sauce (make sure it is a gluten-free kind)',
'Black pepper',
'1 1/2 cups cooked chickpeas',
'2 teaspoons safflower or other neutral oil',
'1/4 teaspoon cayenne',
'1/4 teaspoon ground cinnamon',
'1/2 teaspoon Garam Masala',
'1/4 teaspoon salt',
'3/4 cup chopped red onion',
'1 (1-inch) knob of ginger',
'3 cloves garlic',
'2 tablespoons water',
'1 teaspoon safflower or other neutral oil',
'1/4 teaspoon cumin seeds',
'2 bay leaves',
'4 cloves',
'1 1/4 cups canned or culinary coconut milk',
'3/4 cup ripe mango pulp or puree (unsweetened or lightly sweetened canned)',
'1/2 teaspoon salt',
'2 teaspoons apple cider vinegar',
'Generous dash of black pepper',
'1/4 teaspoon Garam Masala, for garnish',
'2 tablespoons chopped cilantro, for garnish',
'1 cup unroasted cashews, soaked in hot water for at least an hour',
'3½ cups water',
'2 cloves garlic',
'¼ cup nutritional yeast',
'1½ Tablespoons white miso',
'1 teaspoon lemon juice',
'1 teaspoon sea salt',
'black pepper, to taste',
'¼ teaspoon nutmeg',
'2 Tablespoons flour',
'1lb of fettuccine or any kind of pasta',
'1 1/4 lb / 565 g kabocha squash',
prog = ['<<number>>[\-–]?<<unit>>', '<<amount>>', '<<ingredient>>']
n = 0
for input in in_strs + in_strs_1 + in_strs_2:
s = program(rules, tokenise(input), prog, 128)
typed = extract_typed(s, ['number', 'unit', 'ingredient'])
print(typed[2][0] if len(typed[2]) > 0 else '--' + input)
if len(typed[2]) == 0:
n += 1
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment
You can’t perform that action at this time.