Skip to content

Instantly share code, notes, and snippets.

@AnonymerNiklasistanonym
Last active February 27, 2023 07:58
Show Gist options
  • Save AnonymerNiklasistanonym/620dcece6975c232d625fbcd81e2f5ff to your computer and use it in GitHub Desktop.
Save AnonymerNiklasistanonym/620dcece6975c232d625fbcd81e2f5ff to your computer and use it in GitHub Desktop.
Basic (naive) Parsing Algorithms

Bottom-Up Parser(word='dnvdndn'):

Grammar:

  • S $\rightarrow$ NP VP
  • NP $\rightarrow$ 'd' 'n'
  • VP $\rightarrow$ 'v' NP NP | 'v' NP
stack input action
0 $ d n v d n d n $ start
1 $ d n v d n d n $ shift('d')
2 $ d n v d n d n $ shift('n')
3 $ NP v d n d n $ reduce('NP' $\rightarrow$ 'd' 'n')
4 $ NP v d n d n $ shift('v')
5 $ NP v d n d n $ shift('d')
6 $ NP v d n d n $ shift('n')
7 $ NP v NP d n $ reduce('NP' $\rightarrow$ 'd' 'n')
8 $ NP VP d n $ reduce('VP' $\rightarrow$ 'v' 'NP')
9 $ S d n $ reduce('S' $\rightarrow$ 'NP' 'VP')
10 $ S d n $ shift('d')
11 $ S d n $ shift('n')
12 $ S NP $ reduce('NP' $\rightarrow$ 'd' 'n')
13 $ S NP $ backtrack
14 $ S d n $ backtrack
15 $ NP VP d n $ shift('d')
16 $ NP VP d n $ shift('n')
17 $ NP VP NP $ reduce('NP' $\rightarrow$ 'd' 'n')
18 $ NP VP NP $ backtrack
19 $ NP VP d n $ backtrack
20 $ NP v NP d n $ shift('d')
21 $ NP v NP d n $ shift('n')
22 $ NP v NP NP $ reduce('NP' $\rightarrow$ 'd' 'n')
23 $ NP VP $ reduce('VP' $\rightarrow$ 'v' 'NP' 'NP')
24 $ S $ reduce('S' $\rightarrow$ 'NP' 'VP')
25 $ S $ accept

Parse trees:

  • 'S' $\rightarrow$ ('NP' $\rightarrow$ ('d', 'n'), 'VP' $\rightarrow$ ('v', 'NP' $\rightarrow$ ('d', 'n'), 'NP' $\rightarrow$ ('d', 'n')))
#!/usr/bin/env python3
# Use typing annotations in Python < 3.11
from __future__ import annotations
# Internal dependencies
import os
from dataclasses import dataclass
from typing import Dict, List, Final, Tuple, Optional, Set
# Installed dependencies
import pandas as pd
# Types
WordStr = str
GrammarRulesStr = str
GrammarNonTerminal = str
GrammarTerminal = str
GrammarProduction = List[GrammarTerminal | GrammarNonTerminal]
GrammarRules = Dict[GrammarNonTerminal, List[GrammarProduction]]
FirstSets = Dict[GrammarNonTerminal, Set[GrammarTerminal]]
FollowSets = Dict[GrammarNonTerminal, Set[GrammarTerminal]]
Stack = List[GrammarTerminal | GrammarNonTerminal]
InputBuffer = List[str]
Action = str
Arity = int
Derivation = Tuple[GrammarTerminal | GrammarNonTerminal, GrammarProduction]
DerivationHistory = List[Derivation]
History = List[
Tuple[Stack, InputBuffer, Action, Optional[Derivation], Optional[DerivationHistory]]
]
@dataclass
class DerivationTreeNode:
symbol: str
children: Optional[List[DerivationTreeNode]] = None
def __repr__(self):
if self.children is None or len(self.children) == 0:
return f"{self.symbol!r}"
return f"{self.symbol!r} $\\rightarrow$ ({', '.join(repr(child) for child in self.children)})"
# Runtime data
data: Final[List[Tuple[WordStr, GrammarRulesStr]]] = [
(
"dnvdndn",
"S → NP VP\n" + "NP → 'd' 'n'\n" + "VP → 'v' NP NP | 'v' NP\n",
),
]
def get_grammar_rules_from_string(grammar_rules_str: str) -> GrammarRules:
rules: GrammarRules = dict()
for grammar_rule_str in grammar_rules_str.splitlines():
non_terminal, production_str = tuple(grammar_rule_str.split(" → ", 1))
if non_terminal not in rules:
rules[non_terminal] = list()
for production in production_str.split(" | "):
rules[non_terminal].append([x.strip("'") for x in production.split(" ")])
return rules
def build_tree(derivations: List[Derivation]) -> DerivationTreeNode:
if len(derivations) == 0:
raise RuntimeError("Empty derivation list!")
new_node_children: List[DerivationTreeNode] = []
for symbol, productions in derivations:
children: List[DerivationTreeNode] = []
for production in reversed(productions):
if (
len(new_node_children) > 0
and new_node_children[-1].symbol == production
):
children.append(new_node_children[-1])
new_node_children = new_node_children[:-1]
else:
children.append(DerivationTreeNode(symbol=production))
new_node = DerivationTreeNode(symbol=symbol, children=list(reversed(children)))
new_node_children.append(new_node)
return new_node_children[0]
def bottom_up_parser_backtrack(
grammar_rules: GrammarRules,
input_buffer: InputBuffer,
stack: Optional[Stack] = None,
history: Optional[History] = None,
stop_at_first_solution: bool = True,
derivation_history: Optional[DerivationHistory] = None,
) -> Tuple[History, bool]:
if stack is None:
stack = []
if history is None:
history = [(stack.copy(), input_buffer.copy(), "start", None, None)]
if derivation_history is None:
derivation_history = []
# Termination condition / Accept
if stack == ["S"] and len(input_buffer) == 0:
history.append(
(stack.copy(), input_buffer.copy(), f"accept", None, derivation_history)
)
return history, True
# Reduce
for non_terminal, productions in grammar_rules.items():
for production in productions:
if production == stack[-len(production) :]:
new_stack = stack[: -len(production)] + [non_terminal]
derivation: Derivation = (non_terminal, production)
history.append(
(
new_stack,
input_buffer.copy(),
f"reduce({non_terminal!r} $\\rightarrow$ {' '.join([f'{x!r}' for x in production])})",
None,
None,
)
)
new_history, accept = bottom_up_parser_backtrack(
grammar_rules,
input_buffer.copy(),
new_stack,
[],
derivation_history=derivation_history + [derivation],
stop_at_first_solution=stop_at_first_solution,
)
history.extend(new_history)
if accept and stop_at_first_solution:
return history, True
# Shift
if len(input_buffer) > 0:
new_stack = stack + [input_buffer[0]]
history.append(
(new_stack, input_buffer[1:], f"shift({input_buffer[0]!r})", None, None)
)
new_history, accept = bottom_up_parser_backtrack(
grammar_rules,
input_buffer[1:],
new_stack,
[],
derivation_history=derivation_history,
stop_at_first_solution=stop_at_first_solution,
)
return [*history, *new_history], accept
# Fail (if no shift reduce was possible)
history.append((stack.copy(), input_buffer.copy(), f"backtrack", None, None))
return history, False
def format_history(history: History) -> str:
history_df: Dict[str, List[str]] = {"stack": [], "input": [], "action": []}
for stack, input_buffer, action, _derivation, _derivation_history in history:
history_df["stack"].append(" ".join(["$", *stack]))
history_df["input"].append(" ".join([*input_buffer, "$"]))
history_df["action"].append(action)
df = pd.DataFrame(data=history_df)
md2anki_anki = "MD2ANKI_ANKI_HTML"
if md2anki_anki in os.environ and os.environ[md2anki_anki] == str(True):
# Render to HTML for anki notes because otherwise the table looks bad
return df.to_html()
return df.to_markdown(colalign=("right", "left", "right"))
def get_parse_trees(history: History) -> List[DerivationTreeNode]:
parse_trees: List[DerivationTreeNode] = []
for _stack, _input_buffer, action, derivation, derivation_history in history:
if action.startswith("accept") and derivation_history is not None:
parse_tree = build_tree(derivation_history)
if parse_tree is not None:
parse_trees.append(parse_tree)
return parse_trees
for [word_str, grammar_str] in data:
grammar = get_grammar_rules_from_string(grammar_str)
bu_history, _ = bottom_up_parser_backtrack(
grammar, list(word_str), stop_at_first_solution=True
)
grammar_md_list = "".join(
map(
lambda x: f"\n- {x}",
grammar_str.replace("→", "$\\rightarrow$").splitlines(),
)
)
print(f"**Bottom-Up Parser(word={word_str!r})**:\n\nGrammar:\n{grammar_md_list}\n")
print(f"\n{format_history(bu_history)}\n\n")
print(
"Parse trees:\n"
+ "".join(map(lambda x: f"\n- {repr(x)}", get_parse_trees(bu_history)))
+ "\n\n"
)

Top-Down Parser(sentence='a a a b b'):

Grammar:

  • S $\rightarrow$ epsilon | A B | X B
  • T $\rightarrow$ A B | X B
  • X $\rightarrow$ A T
  • A $\rightarrow$ 'a'
  • B $\rightarrow$ 'b'

0 'a' 1 'a' 2 'a' 3 'b' 4 'b' 5

end at $\rightarrow$, start at $\downarrow$ 1: a 2: a a 3: a a a 4: a a a b 5: a a a b b
0: a A (a) - - - X (A T)
1: a a - A (a) - X (A T) S (X B), T (X B)
2: a a a - - A (a) S (A B), T (A B) -
3: a a a b - - - B (b) -
4: a a a b b - - - - B (b)

Top-Down Parser(sentence='a a a b b b'):

Grammar:

  • S $\rightarrow$ epsilon | A B | X B
  • T $\rightarrow$ A B | X B
  • X $\rightarrow$ A T
  • A $\rightarrow$ 'a'
  • B $\rightarrow$ 'b'

0 'a' 1 'a' 2 'a' 3 'b' 4 'b' 5 'b' 6

end at $\rightarrow$, start at $\downarrow$ 1: a 2: a a 3: a a a 4: a a a b 5: a a a b b 6: a a a b b b
0: a A (a) - - - X (A T) S (X B), T (X B)
1: a a - A (a) - X (A T) S (X B), T (X B) -
2: a a a - - A (a) S (A B), T (A B) - -
3: a a a b - - - B (b) - -
4: a a a b b - - - - B (b) -
5: a a a b b b - - - - - B (b)

Top-Down Parser(sentence='workers can fish'):

Grammar:

  • S $\rightarrow$ NP VP | NP IP
  • IP $\rightarrow$ AUX VP
  • VP $\rightarrow$ V NP
  • NP $\rightarrow$ DET N
  • DET $\rightarrow$ 'the'
  • N $\rightarrow$ 'workers' | 'fish'
  • NP $\rightarrow$ 'workers' | 'fish'
  • AUX $\rightarrow$ 'can'
  • V $\rightarrow$ 'can' | 'fish'
  • VP $\rightarrow$ 'can' | 'fish'

0 'workers' 1 'can' 2 'fish' 3

end at $\rightarrow$, start at $\downarrow$ 1: workers 2: workers can 3: workers can fish
0: workers NP (workers), N (workers) S (NP VP) S (NP IP), S (NP VP)
1: workers can - VP (can), AUX (can), V (can) IP (AUX VP), VP (V NP)
2: workers can fish - - VP (fish), NP (fish), N (fish), V (fish)
#!/usr/bin/env python3
# Use typing annotations in Python < 3.11
from __future__ import annotations
# Internal dependencies
import os
from dataclasses import dataclass
from typing import Dict, List, Final, Tuple, Optional, Set, Union
# Installed dependencies
import pandas as pd
# Types
SentenceStr = str
SentenceList = List[str]
GrammarRulesStr = str
GrammarNonTerminal = str
GrammarTerminal = str
GrammarProductionCNF = Union[
Tuple[GrammarNonTerminal, GrammarNonTerminal], GrammarTerminal
]
GrammarRules = Dict[GrammarNonTerminal, List[GrammarProductionCNF]]
CYKTableFoundNonTerminal = Tuple[GrammarNonTerminal, GrammarProductionCNF]
CYKTable = List[List[List[CYKTableFoundNonTerminal]]]
# Runtime data
data: Final[List[Tuple[SentenceStr, GrammarRulesStr]]] = [
(
"a a a b b",
"S → epsilon | A B | X B\n"
+ "T → A B | X B\n"
+ "X → A T\n"
+ "A → 'a'\n"
+ "B → 'b'\n",
),
(
"a a a b b b",
"S → epsilon | A B | X B\n"
+ "T → A B | X B\n"
+ "X → A T\n"
+ "A → 'a'\n"
+ "B → 'b'\n",
),
(
"workers can fish",
"S → NP VP | NP IP\n"
+ "IP → AUX VP\n"
+ "VP → V NP\n"
+ "NP → DET N\n"
+ "DET → 'the'\n"
+ "N → 'workers' | 'fish'\n"
+ "NP → 'workers' | 'fish'\n"
+ "AUX → 'can'\n"
+ "V → 'can' | 'fish'\n"
+ "VP → 'can' | 'fish'\n",
),
]
def get_grammar_rules_from_string(grammar_rules_str: str) -> GrammarRules:
rules: GrammarRules = dict()
for grammar_rule_str in grammar_rules_str.splitlines():
non_terminal, production_str = tuple(grammar_rule_str.split(" → ", 1))
if non_terminal not in rules:
rules[non_terminal] = list()
for production in production_str.split(" | "):
production_list = production.split(" ")
if len(production_list) == 1:
rules[non_terminal].append(production_list[0].strip("'"))
elif len(production_list) == 2:
rules[non_terminal].append((production_list[0], production_list[1]))
else:
raise RuntimeError("Expected CNF grammar!")
return rules
def get_non_terminals(
grammar_rules: GrammarRules, production: GrammarProductionCNF
) -> List[CYKTableFoundNonTerminal]:
non_terminals: List[CYKTableFoundNonTerminal] = []
for non_terminal, productions in grammar_rules.items():
for non_terminal_production in productions:
if production == non_terminal_production:
non_terminals.append((non_terminal, production))
return non_terminals
def cyk_parser(grammar_rules: GrammarRules, sentence_list: SentenceList) -> CYKTable:
# Create cyk table
w, h = len(sentence_list), len(sentence_list)
cyk_table: CYKTable = [[[] for x in range(w)] for y in range(h)]
# Initial non terminal setup
for k in range(1, len(sentence_list) + 1):
cyk_table[k - 1][k - 1] = get_non_terminals(grammar_rules, sentence_list[k - 1])
for j in range(2, len(sentence_list) + 1):
for i in reversed(range(0, j)):
for k in range(i + 1, j):
for b, _ in cyk_table[i][k - 1]:
for c, _ in cyk_table[k][j - 1]:
cyk_table[i][j - 1].extend(
get_non_terminals(grammar_rules, (b, c))
)
return cyk_table
def format_cyk_table(cyk_table: CYKTable, sentence_list: SentenceList) -> str:
first_column_id = "end at $\\rightarrow$, start at $\\downarrow$"
cyk_table_df: Dict[str, List[str]] = {
first_column_id: [],
}
for column in range(len(cyk_table)):
column_id = f"{column + 1}: {' '.join(sentence_list[:column + 1])}"
cyk_table_df[column_id] = []
for row in range(len(cyk_table)):
if column == 0:
cyk_table_df[first_column_id].append(
f"{row}: {' '.join(sentence_list[:row + 1])}"
)
non_terminals = cyk_table[row][column]
description = "-" if len(non_terminals) == 0 else ""
first = True
for non_terminal, production in non_terminals:
description += f"{'' if first else ', '}{non_terminal} ("
first = False
if isinstance(production, str):
description += f"{production})"
else:
description += f"{' '.join(production)})"
cyk_table_df[column_id].append(description)
df = pd.DataFrame(data=cyk_table_df)
md2anki_anki = "MD2ANKI_ANKI_HTML"
if md2anki_anki in os.environ and os.environ[md2anki_anki] == str(True):
# Render to HTML for anki notes because otherwise the table looks bad
return df.to_html(index=False)
return df.to_markdown(index=False)
def create_numbered_sentence(sentence_list: SentenceList) -> str:
flat_list: List[str] = []
for sub_list in [
[str(index), f"{x!r}"] for index, x in enumerate(sentence_list)
] + [[str(len(sentence_list))]]:
for item in sub_list:
flat_list.append(item)
return " ".join(flat_list)
for sentence_str, grammar_str in data:
grammar = get_grammar_rules_from_string(grammar_str)
sentence = sentence_str.split(" ")
cyk_table_output = cyk_parser(grammar, sentence)
grammar_md_list = "".join(
map(
lambda x: f"\n- {x}",
grammar_str.replace("→", "$\\rightarrow$").splitlines(),
)
)
print(
f"**Top-Down Parser(sentence={sentence_str!r})**:\n"
f"\nGrammar:\n{grammar_md_list}\n"
f"\n{create_numbered_sentence(sentence)}\n"
f"\n{format_cyk_table(cyk_table_output, sentence)}\n"
)

Early Parser(sentence='Papa ate the caviar with a spoon'):

Grammar:

  • ROOT $\rightarrow$ S
  • S $\rightarrow$ NP VP
  • NP $\rightarrow$ Det N | NP PP
  • VP $\rightarrow$ VP PP | V NP
  • PP $\rightarrow$ P NP
  • NP $\rightarrow$ 'Papa'
  • N $\rightarrow$ 'caviar' | 'spoon'
  • V $\rightarrow$ 'ate'
  • P $\rightarrow$ 'with'
  • Det $\rightarrow$ 'the' | 'a'

0 'Papa' 1 'ate' 2 'the' 3 'caviar' 4 'with' 5 'a' 6 'spoon' 7

0 1 2 3 4 5 6 7
[0] 'ROOT' $\rightarrow$ $\bullet$ S (start) [0] 'NP' $\rightarrow$ Papa $\bullet$ (scan('Papa')) [1] 'V' $\rightarrow$ ate $\bullet$ (scan('ate')) [2] 'Det' $\rightarrow$ the $\bullet$ (scan('the')) [3] 'N' $\rightarrow$ caviar $\bullet$ (scan('caviar')) [4] 'P' $\rightarrow$ with $\bullet$ (scan('with')) [5] 'Det' $\rightarrow$ a $\bullet$ (scan('a')) [6] 'N' $\rightarrow$ spoon $\bullet$ (scan('spoon'))
[0] 'S' $\rightarrow$ $\bullet$ NP VP (predict('S' $\rightarrow$ 'NP' 'VP')) [0] 'S' $\rightarrow$ NP $\bullet$ VP (attach/complete(['NP'])) [1] 'VP' $\rightarrow$ V $\bullet$ NP (attach/complete(['V'])) [2] 'NP' $\rightarrow$ Det $\bullet$ N (attach/complete(['Det'])) [2] 'NP' $\rightarrow$ Det N $\bullet$ (attach/complete(['N'])) [4] 'PP' $\rightarrow$ P $\bullet$ NP (attach/complete(['P'])) [5] 'NP' $\rightarrow$ Det $\bullet$ N (attach/complete(['Det'])) [5] 'NP' $\rightarrow$ Det N $\bullet$ (attach/complete(['N']))
[0] 'NP' $\rightarrow$ $\bullet$ Det N (predict('NP' $\rightarrow$ 'Det' 'N')) [0] 'NP' $\rightarrow$ NP $\bullet$ PP (attach/complete(['NP'])) [2] 'NP' $\rightarrow$ $\bullet$ Det N (predict('NP' $\rightarrow$ 'Det' 'N')) [3] 'N' $\rightarrow$ $\bullet$ caviar (predict('N' $\rightarrow$ 'caviar')) [1] 'VP' $\rightarrow$ V NP $\bullet$ (attach/complete(['NP'])) [5] 'NP' $\rightarrow$ $\bullet$ Det N (predict('NP' $\rightarrow$ 'Det' 'N')) [6] 'N' $\rightarrow$ $\bullet$ caviar (predict('N' $\rightarrow$ 'caviar')) [4] 'PP' $\rightarrow$ P NP $\bullet$ (attach/complete(['NP']))
[0] 'NP' $\rightarrow$ $\bullet$ NP PP (predict('NP' $\rightarrow$ 'NP' 'PP')) [1] 'VP' $\rightarrow$ $\bullet$ VP PP (predict('VP' $\rightarrow$ 'VP' 'PP')) [2] 'NP' $\rightarrow$ $\bullet$ NP PP (predict('NP' $\rightarrow$ 'NP' 'PP')) [3] 'N' $\rightarrow$ $\bullet$ spoon (predict('N' $\rightarrow$ 'spoon')) [2] 'NP' $\rightarrow$ NP $\bullet$ PP (attach/complete(['NP'])) [5] 'NP' $\rightarrow$ $\bullet$ NP PP (predict('NP' $\rightarrow$ 'NP' 'PP')) [6] 'N' $\rightarrow$ $\bullet$ spoon (predict('N' $\rightarrow$ 'spoon')) [5] 'NP' $\rightarrow$ NP $\bullet$ PP (attach/complete(['NP']))
[0] 'NP' $\rightarrow$ $\bullet$ Papa (predict('NP' $\rightarrow$ 'Papa')) [1] 'VP' $\rightarrow$ $\bullet$ V NP (predict('VP' $\rightarrow$ 'V' 'NP')) [2] 'NP' $\rightarrow$ $\bullet$ Papa (predict('NP' $\rightarrow$ 'Papa')) [0] 'S' $\rightarrow$ NP VP $\bullet$ (attach/complete(['VP'])) [5] 'NP' $\rightarrow$ $\bullet$ Papa (predict('NP' $\rightarrow$ 'Papa')) [2] 'NP' $\rightarrow$ NP PP $\bullet$ (attach/complete(['PP']))
[0] 'Det' $\rightarrow$ $\bullet$ the (predict('Det' $\rightarrow$ 'the')) [1] 'PP' $\rightarrow$ $\bullet$ P NP (predict('PP' $\rightarrow$ 'P' 'NP')) [2] 'Det' $\rightarrow$ $\bullet$ the (predict('Det' $\rightarrow$ 'the')) [1] 'VP' $\rightarrow$ VP $\bullet$ PP (attach/complete(['VP'])) [5] 'Det' $\rightarrow$ $\bullet$ the (predict('Det' $\rightarrow$ 'the')) [1] 'VP' $\rightarrow$ VP PP $\bullet$ (attach/complete(['PP']))
[0] 'Det' $\rightarrow$ $\bullet$ a (predict('Det' $\rightarrow$ 'a')) [1] 'V' $\rightarrow$ $\bullet$ ate (predict('V' $\rightarrow$ 'ate')) [2] 'Det' $\rightarrow$ $\bullet$ a (predict('Det' $\rightarrow$ 'a')) [4] 'PP' $\rightarrow$ $\bullet$ P NP (predict('PP' $\rightarrow$ 'P' 'NP')) [5] 'Det' $\rightarrow$ $\bullet$ a (predict('Det' $\rightarrow$ 'a')) [7] 'PP' $\rightarrow$ $\bullet$ P NP (predict('PP' $\rightarrow$ 'P' 'NP'))
[1] 'P' $\rightarrow$ $\bullet$ with (predict('P' $\rightarrow$ 'with')) [0] 'ROOT' $\rightarrow$ S $\bullet$ (attach/complete(['S'])) [1] 'VP' $\rightarrow$ V NP $\bullet$ (attach/complete(['NP']))
[4] 'P' $\rightarrow$ $\bullet$ with (predict('P' $\rightarrow$ 'with')) [2] 'NP' $\rightarrow$ NP $\bullet$ PP (attach/complete(['NP']))
[0] 'S' $\rightarrow$ NP VP $\bullet$ (attach/complete(['VP']))
[1] 'VP' $\rightarrow$ VP $\bullet$ PP (attach/complete(['VP']))
[7] 'P' $\rightarrow$ $\bullet$ with (predict('P' $\rightarrow$ 'with'))
[0] 'ROOT' $\rightarrow$ S $\bullet$ (attach/complete(['S']))
#!/usr/bin/env python3
# Use typing annotations in Python < 3.11
from __future__ import annotations
# Internal dependencies
import os
from typing import Dict, List, Final, Tuple, Optional, Set, Union
# Installed dependencies
import pandas as pd
# Types
SentenceStr = str
SentenceList = List[str]
GrammarRulesStr = str
GrammarNonTerminal = str
GrammarTerminal = str
GrammarProduction = List[GrammarNonTerminal | GrammarTerminal]
GrammarRules = Dict[GrammarNonTerminal, List[GrammarProduction]]
EarlyHypothesis = Tuple[
int, GrammarNonTerminal, GrammarProduction, GrammarProduction, str
]
EarlyTableRow = List[EarlyHypothesis]
EarlyTable = List[EarlyTableRow]
# Runtime data
data: Final[List[Tuple[SentenceStr, GrammarRulesStr]]] = [
(
"Papa ate the caviar with a spoon",
"ROOT → S\n"
+ "S → NP VP\n"
+ "NP → Det N | NP PP\n"
+ "VP → VP PP | V NP\n"
+ "PP → P NP\n"
+ "NP → 'Papa'\n"
+ "N → 'caviar' | 'spoon'\n"
+ "V → 'ate'\n"
+ "P → 'with'\n"
+ "Det → 'the' | 'a'\n",
),
]
def get_grammar_rules_from_string(grammar_rules_str: str) -> GrammarRules:
rules: GrammarRules = dict()
for grammar_rule_str in grammar_rules_str.splitlines():
non_terminal, production_str = tuple(grammar_rule_str.split(" → ", 1))
if non_terminal not in rules:
rules[non_terminal] = list()
for production in production_str.split(" | "):
rules[non_terminal].append([x.strip("'") for x in production.split(" ")])
return rules
def early_parser(
grammar_rules: GrammarRules, sentence_list: SentenceList
) -> EarlyTable:
# Create and initialize early table
early_table: EarlyTable = [
[(0, "ROOT", [], grammar_rules["ROOT"][0], "start")],
]
for _ in sentence_list:
early_table.append([])
for column_j in range(0, len(sentence_list) + 1):
agenda = early_table[column_j].copy()
while len(agenda) > 0:
current_hypothesis_i = agenda.pop(0)
# Scan
terminal_t = (
current_hypothesis_i[3][0] if len(current_hypothesis_i[3]) > 0 else None
)
if (
terminal_t is not None
and terminal_t not in grammar_rules
and len(sentence_list) > column_j
and terminal_t == sentence_list[column_j]
):
new_hypothesis = (
current_hypothesis_i[0],
current_hypothesis_i[1],
current_hypothesis_i[2] + [terminal_t],
current_hypothesis_i[3][1:],
f"scan({terminal_t!r})",
)
already_exists = False
for hypothesis in early_table[column_j + 1]:
if (
hypothesis[0] == new_hypothesis[0]
and hypothesis[1] == new_hypothesis[1]
and hypothesis[2] == new_hypothesis[2]
and hypothesis[3] == new_hypothesis[3]
):
already_exists = True
break
if not already_exists:
early_table[column_j + 1].append(new_hypothesis)
# Predict
non_terminal_b = (
current_hypothesis_i[3][0] if len(current_hypothesis_i[3]) > 0 else None
)
if non_terminal_b is not None and non_terminal_b in grammar_rules:
for production in grammar_rules[non_terminal_b]:
new_hypothesis = (
column_j,
non_terminal_b,
[],
production,
f"predict({non_terminal_b!r} $\\rightarrow$ {' '.join([f'{x!r}' for x in production])})",
)
already_exists = False
for hypothesis in early_table[column_j]:
if (
hypothesis[0] == new_hypothesis[0]
and hypothesis[1] == new_hypothesis[1]
and hypothesis[2] == new_hypothesis[2]
and hypothesis[3] == new_hypothesis[3]
):
already_exists = True
break
if not already_exists:
early_table[column_j].append(new_hypothesis)
agenda.append(new_hypothesis)
# Attach/Complete
if len(current_hypothesis_i[3]) == 0:
for customer in early_table[current_hypothesis_i[0]]:
if customer[3][:1] == [current_hypothesis_i[1]]:
new_hypothesis = (
customer[0],
customer[1],
customer[2] + customer[3][:1],
customer[3][1:],
f"attach/complete({customer[3][:1]!r})",
)
already_exists = False
for hypothesis in early_table[column_j]:
if (
hypothesis[0] == new_hypothesis[0]
and hypothesis[1] == new_hypothesis[1]
and hypothesis[2] == new_hypothesis[2]
and hypothesis[3] == new_hypothesis[3]
):
already_exists = True
break
if not already_exists:
early_table[column_j].append(new_hypothesis)
agenda.append(new_hypothesis)
return early_table
def split_dict(d, n):
keys = list(d.keys())
for i in range(0, len(keys), n):
yield {k: d[k] for k in keys[i: i + n]}
def format_early_table(early_table: EarlyTable) -> str:
early_table_df: Dict[str, List[str]] = dict()
max_row_count: int = 0
for index, column in enumerate(early_table):
early_table_df[f"{index}"] = []
for row in column:
early_table_df[f"{index}"].append(
f"[{row[0]}] {row[1]!r} $\\rightarrow$ {' '.join(row[2])} $\\bullet$ {' '.join(row[3])} ({row[4]})"
)
if max_row_count < len(early_table_df[f"{index}"]):
max_row_count = len(early_table_df[f"{index}"])
for _, dict_entry in early_table_df.items():
dict_entry.extend(["" for _ in range(max_row_count - len(dict_entry))])
df = pd.DataFrame(data=early_table_df)
md2anki_anki = "MD2ANKI_ANKI_HTML"
if md2anki_anki in os.environ and os.environ[md2anki_anki] == str(True):
# Render to HTML for anki notes because otherwise the table looks bad
return df.to_html(index=False)
md2anki_pdf = "MD2ANKI_PANDOC_PDF"
if md2anki_pdf in os.environ and os.environ[md2anki_pdf] == str(True):
# Render to PDF as multiple tables because its too big
output_list: List[str] = []
for part_i in split_dict(early_table_df, 4):
df_part_i = pd.DataFrame(data=part_i)
output_list.append(df_part_i.to_markdown(index=False))
return '\n\n'.join(output_list)
return df.to_markdown(index=False)
def create_numbered_sentence(sentence_list: SentenceList) -> str:
flat_list: List[str] = []
for sub_list in [
[str(index), f"{x!r}"] for index, x in enumerate(sentence_list)
] + [[str(len(sentence_list))]]:
for item in sub_list:
flat_list.append(item)
return " ".join(flat_list)
for sentence_str, grammar_str in data:
grammar = get_grammar_rules_from_string(grammar_str)
sentence = sentence_str.split(" ")
early_table_output = early_parser(grammar, sentence)
grammar_md_list = "".join(
map(
lambda x: f"\n- {x}",
grammar_str.replace("→", "$\\rightarrow$").splitlines(),
)
)
print(
f"**Early Parser(sentence={sentence_str!r})**:\n"
f"\nGrammar:\n{grammar_md_list}\n"
f"\n{create_numbered_sentence(sentence)}\n"
f"\n{format_early_table(early_table_output)}\n"
)

Top-Down Parser(word='dnvdn',look-ahead=False):

Grammar:

  • S $\rightarrow$ NP VP
  • NP $\rightarrow$ 'd' 'n'
  • VP $\rightarrow$ 'v' NP NP | 'v' NP | 'v'
stack input action
0 $ S d n v d n $ start
1 $ VP NP d n v d n $ expand('S' $\rightarrow$ 'NP' 'VP') $\Rightarrow$ ('S', 2)
2 $ VP n d d n v d n $ expand('NP' $\rightarrow$ 'd' 'n') $\Rightarrow$ ('NP', 2)
3 $ VP n n v d n $ match('d') $\Rightarrow$ ('d', 0)
4 $ VP v d n $ match('n') $\Rightarrow$ ('n', 0)
5 $ NP NP v v d n $ expand('VP' $\rightarrow$ 'v' 'NP' 'NP') $\Rightarrow$ ('VP', 3)
6 $ NP NP d n $ match('v') $\Rightarrow$ ('v', 0)
7 $ NP n d d n $ expand('NP' $\rightarrow$ 'd' 'n') $\Rightarrow$ ('NP', 2)
8 $ NP n n $ match('d') $\Rightarrow$ ('d', 0)
9 $ NP $ match('n') $\Rightarrow$ ('n', 0)
10 $ NP $ backtrack
11 $ NP NP d n $ backtrack
12 $ NP v v d n $ expand('VP' $\rightarrow$ 'v' 'NP') $\Rightarrow$ ('VP', 2)
13 $ NP d n $ match('v') $\Rightarrow$ ('v', 0)
14 $ n d d n $ expand('NP' $\rightarrow$ 'd' 'n') $\Rightarrow$ ('NP', 2)
15 $ n n $ match('d') $\Rightarrow$ ('d', 0)
16 $ $ match('n') $\Rightarrow$ ('n', 0)
17 $ $ accept

Parse trees:

  • 'S' $\rightarrow$ ('NP' $\rightarrow$ ('d', 'n'), 'VP' $\rightarrow$ ('v', 'NP' $\rightarrow$ ('d', 'n')))

Top-Down Parser(word='(3(7/-2))',look-ahead=True)*:

Grammar:

  • S $\rightarrow$ '(' S Op S ')' | Num
  • Num $\rightarrow$ Sign Num | '1' | '2' | '3' | '4' | '5' | '6' | '7' | '8' | '9' | '0'
  • Op $\rightarrow$ '+' | '-' | '*' | '/'
  • Sign $\rightarrow$ '-'

FIRST sets:

  • FIRST(S) = { (, -, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9 }
  • FIRST(Num) = { -, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9 }
  • FIRST(Op) = { *, +, -, / }
  • FIRST(Sign) = { - }

FOLLOW sets (take with a grain of salt - not 100% checked!):

  • FOLLOW(S) = { $, (, -, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9 }
  • FOLLOW(Num) = { -, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9 }
  • FOLLOW(Op) = { }
  • FOLLOW(Sign) = { }
stack input action
0 $ S ( 3 * ( 7 / - 2 ) ) $ start
1 $ ) S Op S ( ( 3 * ( 7 / - 2 ) ) $ expand('S' $\rightarrow$ '(' 'S' 'Op' 'S' ')') $\Rightarrow$ ('S', 5)
2 $ ) S Op S 3 * ( 7 / - 2 ) ) $ match('(') $\Rightarrow$ ('(', 0)
3 $ ) S Op S 3 * ( 7 / - 2 ) ) $ look-ahead('3' != '(' ['S' $\rightarrow$ '(' 'S' 'Op' 'S' ')'])
4 $ ) S Op Num 3 * ( 7 / - 2 ) ) $ expand('S' $\rightarrow$ 'Num') $\Rightarrow$ ('S', 1)
5 $ ) S Op Num 3 * ( 7 / - 2 ) ) $ look-ahead('3' $\not\in$ FIRST('Sign') ['Num' $\rightarrow$ 'Sign' 'Num'])
6 $ ) S Op Num 3 * ( 7 / - 2 ) ) $ look-ahead('3' != '1' ['Num' $\rightarrow$ '1'])
7 $ ) S Op Num 3 * ( 7 / - 2 ) ) $ look-ahead('3' != '2' ['Num' $\rightarrow$ '2'])
8 $ ) S Op 3 3 * ( 7 / - 2 ) ) $ expand('Num' $\rightarrow$ '3') $\Rightarrow$ ('Num', 1)
9 $ ) S Op * ( 7 / - 2 ) ) $ match('3') $\Rightarrow$ ('3', 0)
10 $ ) S Op * ( 7 / - 2 ) ) $ look-ahead('*' != '+' ['Op' $\rightarrow$ '+'])
11 $ ) S Op * ( 7 / - 2 ) ) $ look-ahead('*' != '-' ['Op' $\rightarrow$ '-'])
12 $ ) S * * ( 7 / - 2 ) ) $ expand('Op' $\rightarrow$ '*') $\Rightarrow$ ('Op', 1)
13 $ ) S ( 7 / - 2 ) ) $ match('') $\Rightarrow$ ('', 0)
14 $ ) ) S Op S ( ( 7 / - 2 ) ) $ expand('S' $\rightarrow$ '(' 'S' 'Op' 'S' ')') $\Rightarrow$ ('S', 5)
15 $ ) ) S Op S 7 / - 2 ) ) $ match('(') $\Rightarrow$ ('(', 0)
16 $ ) ) S Op S 7 / - 2 ) ) $ look-ahead('7' != '(' ['S' $\rightarrow$ '(' 'S' 'Op' 'S' ')'])
17 $ ) ) S Op Num 7 / - 2 ) ) $ expand('S' $\rightarrow$ 'Num') $\Rightarrow$ ('S', 1)
18 $ ) ) S Op Num 7 / - 2 ) ) $ look-ahead('7' $\not\in$ FIRST('Sign') ['Num' $\rightarrow$ 'Sign' 'Num'])
19 $ ) ) S Op Num 7 / - 2 ) ) $ look-ahead('7' != '1' ['Num' $\rightarrow$ '1'])
20 $ ) ) S Op Num 7 / - 2 ) ) $ look-ahead('7' != '2' ['Num' $\rightarrow$ '2'])
21 $ ) ) S Op Num 7 / - 2 ) ) $ look-ahead('7' != '3' ['Num' $\rightarrow$ '3'])
22 $ ) ) S Op Num 7 / - 2 ) ) $ look-ahead('7' != '4' ['Num' $\rightarrow$ '4'])
23 $ ) ) S Op Num 7 / - 2 ) ) $ look-ahead('7' != '5' ['Num' $\rightarrow$ '5'])
24 $ ) ) S Op Num 7 / - 2 ) ) $ look-ahead('7' != '6' ['Num' $\rightarrow$ '6'])
25 $ ) ) S Op 7 7 / - 2 ) ) $ expand('Num' $\rightarrow$ '7') $\Rightarrow$ ('Num', 1)
26 $ ) ) S Op / - 2 ) ) $ match('7') $\Rightarrow$ ('7', 0)
27 $ ) ) S Op / - 2 ) ) $ look-ahead('/' != '+' ['Op' $\rightarrow$ '+'])
28 $ ) ) S Op / - 2 ) ) $ look-ahead('/' != '-' ['Op' $\rightarrow$ '-'])
29 $ ) ) S Op / - 2 ) ) $ look-ahead('/' != '' ['Op' $\rightarrow$ ''])
30 $ ) ) S / / - 2 ) ) $ expand('Op' $\rightarrow$ '/') $\Rightarrow$ ('Op', 1)
31 $ ) ) S - 2 ) ) $ match('/') $\Rightarrow$ ('/', 0)
32 $ ) ) S - 2 ) ) $ look-ahead('-' != '(' ['S' $\rightarrow$ '(' 'S' 'Op' 'S' ')'])
33 $ ) ) Num - 2 ) ) $ expand('S' $\rightarrow$ 'Num') $\Rightarrow$ ('S', 1)
34 $ ) ) Num Sign - 2 ) ) $ expand('Num' $\rightarrow$ 'Sign' 'Num') $\Rightarrow$ ('Num', 2)
35 $ ) ) Num - - 2 ) ) $ expand('Sign' $\rightarrow$ '-') $\Rightarrow$ ('Sign', 1)
36 $ ) ) Num 2 ) ) $ match('-') $\Rightarrow$ ('-', 0)
37 $ ) ) Num 2 ) ) $ look-ahead('2' $\not\in$ FIRST('Sign') ['Num' $\rightarrow$ 'Sign' 'Num'])
38 $ ) ) Num 2 ) ) $ look-ahead('2' != '1' ['Num' $\rightarrow$ '1'])
39 $ ) ) 2 2 ) ) $ expand('Num' $\rightarrow$ '2') $\Rightarrow$ ('Num', 1)
40 $ ) ) ) ) $ match('2') $\Rightarrow$ ('2', 0)
41 $ ) ) $ match(')') $\Rightarrow$ (')', 0)
42 $ $ match(')') $\Rightarrow$ (')', 0)
43 $ $ accept

Parse trees:

  • 'S' $\rightarrow$ ('(', 'S' $\rightarrow$ ('Num' $\rightarrow$ ('3')), 'Op' $\rightarrow$ ('*'), 'S' $\rightarrow$ ('(', 'S' $\rightarrow$ ('Num' $\rightarrow$ ('7')), 'Op' $\rightarrow$ ('/'), 'S' $\rightarrow$ ('Num' $\rightarrow$ ('Sign' $\rightarrow$ ('-'), 'Num' $\rightarrow$ ('2'))), ')'), ')')
#!/usr/bin/env python3
# Use typing annotations in Python < 3.11
from __future__ import annotations
# Internal dependencies
import os
from dataclasses import dataclass
from typing import Dict, List, Final, Tuple, Optional, Set
# Installed dependencies
import pandas as pd
# Types
WordStr = str
GrammarRulesStr = str
LookAheadToggle = bool
GrammarNonTerminal = str
GrammarTerminal = str
GrammarProduction = List[GrammarTerminal | GrammarNonTerminal]
GrammarRules = Dict[GrammarNonTerminal, List[GrammarProduction]]
FirstSets = Dict[GrammarNonTerminal, Set[GrammarTerminal]]
FollowSets = Dict[GrammarNonTerminal, Set[GrammarTerminal]]
Arity = int
Derivation = Tuple[GrammarTerminal | GrammarNonTerminal, Arity]
DerivationHistory = List[Derivation]
Stack = List[GrammarTerminal | GrammarNonTerminal]
InputBuffer = List[str]
Action = str
History = List[
Tuple[Stack, InputBuffer, Action, Optional[Derivation], Optional[DerivationHistory]]
]
@dataclass
class DerivationTreeNode:
symbol: str
children: Optional[List[DerivationTreeNode]] = None
def __repr__(self):
if self.children is None or len(self.children) == 0:
return f"{self.symbol!r}"
return f"{self.symbol!r} $\\rightarrow$ ({', '.join(repr(child) for child in self.children)})"
# Runtime data
data: Final[List[Tuple[WordStr, GrammarRulesStr, LookAheadToggle]]] = [
(
"dnvdn",
"S → NP VP\n" + "NP → 'd' 'n'\n" + "VP → 'v' NP NP | 'v' NP | 'v'",
False,
),
(
"(3*(7/-2))",
"S → '(' S Op S ')' | Num\n"
+ "Num → Sign Num | '1' | '2' | '3' | '4' | '5' | '6' | '7' | '8' | '9' | '0' \n"
+ "Op → '+' | '-' | '*' | '/'\n"
+ "Sign → '-'\n",
True,
),
]
def get_grammar_rules_from_string(grammar_rules_str: str) -> GrammarRules:
rules: GrammarRules = dict()
for grammar_rule_str in grammar_rules_str.splitlines():
non_terminal, production_str = tuple(grammar_rule_str.split(" → ", 1))
if non_terminal not in rules:
rules[non_terminal] = list()
for production in production_str.split(" | "):
rules[non_terminal].append([x.strip("'") for x in production.split(" ")])
return rules
def calculate_first_sets(grammar_rules: GrammarRules) -> FirstSets:
first_sets: FirstSets = dict()
for non_terminal, productions in grammar_rules.items():
if non_terminal not in first_sets:
first_sets[non_terminal] = set()
def find_first_set(symbol: str) -> Set[str]:
first_set = set()
if symbol in grammar_rules:
for production in grammar_rules[symbol]:
if production[0] not in grammar_rules:
first_set.add(production[0])
first_set = first_set.union(find_first_set(production[0]))
return first_set
first_sets[non_terminal] = first_sets[non_terminal].union(
find_first_set(non_terminal)
)
return first_sets
def calculate_follow_set(grammar_rules: GrammarRules) -> FollowSets:
first_sets = calculate_first_sets(grammar_rules)
follow_sets: FollowSets = {"S": {"$"}}
for non_terminal, productions in grammar_rules.items():
if non_terminal not in follow_sets:
follow_sets[non_terminal] = set()
for production in productions:
for prod_non_terminal in production[1:]:
if prod_non_terminal not in grammar_rules:
continue
follow_sets[non_terminal] = follow_sets[non_terminal].union(
first_sets[non_terminal]
)
return follow_sets
def build_tree(
derivations: List[Derivation],
) -> Tuple[DerivationTreeNode, List[Derivation]]:
if len(derivations) == 0:
raise RuntimeError("Empty derivation list!")
symbol, arity = derivations[0]
derivations_rest = derivations[1:]
subtrees = []
for i in range(arity):
subtree, derivations_rest = build_tree(derivations_rest)
subtrees.append(subtree)
return DerivationTreeNode(symbol=symbol, children=subtrees), derivations_rest
def top_down_parser_backtrack(
grammar_rules: GrammarRules,
input_buffer: InputBuffer,
stack: Optional[Stack] = None,
history: Optional[History] = None,
derivation_history: Optional[DerivationHistory] = None,
look_ahead: Optional[Tuple[FirstSets, FollowSets]] = None,
stop_at_first_solution: bool = True,
) -> Tuple[History, bool]:
if stack is None:
stack = ["S"]
if history is None:
history = [(stack.copy(), input_buffer.copy(), "start", None, None)]
if derivation_history is None:
derivation_history = []
# Termination condition / Accept
if len(stack) == 0 and len(input_buffer) == 0:
history.append(
(
stack.copy(),
input_buffer.copy(),
f"accept{'' if stop_at_first_solution else ', backtrack'}",
None,
derivation_history,
)
)
return history, True
# Match
if len(stack) > 0 and len(input_buffer) > 0 and stack[-1] == input_buffer[0]:
derivation_match: Derivation = (stack[-1], 0)
history.append(
(
stack[:-1],
input_buffer[1:],
f"match({stack[-1]!r})",
derivation_match,
None,
)
)
new_history, accept = top_down_parser_backtrack(
grammar_rules,
input_buffer[1:],
stack[:-1],
[],
derivation_history=derivation_history + [derivation_match],
look_ahead=look_ahead,
stop_at_first_solution=stop_at_first_solution,
)
return [*history, *new_history], accept
# Expand rules
if len(stack) > 0 and len(input_buffer) > 0 and stack[-1] in grammar_rules:
for production in grammar_rules[stack[-1]]:
if look_ahead is not None:
# Check if the production will at any point match the top element in the input buffer
if (
production[0] not in look_ahead[0]
and production[0] != input_buffer[0]
):
history.append(
(
stack.copy(),
input_buffer.copy(),
f"look-ahead({input_buffer[0]!r} != {production[0]!r} [{stack[-1]!r} $\\rightarrow$ {' '.join([f'{x!r}' for x in production])}])",
None,
None,
)
)
continue
if (
production[0] in look_ahead[0]
and input_buffer[0] not in look_ahead[0][production[0]]
):
history.append(
(
stack.copy(),
input_buffer.copy(),
f"look-ahead({input_buffer[0]!r} $\\not\\in$ FIRST({production[0]!r}) [{stack[-1]!r} $\\rightarrow$ {' '.join([f'{x!r}' for x in production])}])",
None,
None,
)
)
continue
new_stack: Stack = [*stack[:-1], *reversed(production)]
derivation_expand: Derivation = (stack[-1], len(production))
history.append(
(
new_stack,
input_buffer.copy(),
f"expand({stack[-1]!r} $\\rightarrow$ {' '.join([f'{x!r}' for x in production])})",
derivation_expand,
None,
)
)
new_history, accept = top_down_parser_backtrack(
grammar_rules,
input_buffer.copy(),
new_stack,
[],
derivation_history=derivation_history + [derivation_expand],
look_ahead=look_ahead,
stop_at_first_solution=stop_at_first_solution,
)
history.extend(new_history)
if accept and stop_at_first_solution:
return history, True
# Backtrack (if no match or expand was found)
history.append((stack.copy(), input_buffer.copy(), f"backtrack", None, None))
return history, False
def format_history(history: History) -> str:
history_df: Dict[str, List[str]] = {"stack": [], "input": [], "action": []}
for stack, input_buffer, action, derivation, _derivation_history in history:
history_df["stack"].append(" ".join(["$", *stack]))
history_df["input"].append(" ".join([*input_buffer, "$"]))
history_df["action"].append(
action + ("" if derivation is None else f" $\\Rightarrow$ {derivation}")
)
df = pd.DataFrame(data=history_df)
md2anki_anki = "MD2ANKI_ANKI_HTML"
if md2anki_anki in os.environ and os.environ[md2anki_anki] == str(True):
# Render to HTML for anki notes because otherwise the table looks bad
return df.to_html()
return df.to_markdown(colalign=("right", "left", "right", "left"))
def get_parse_trees(history: History) -> List[DerivationTreeNode]:
parse_trees: List[DerivationTreeNode] = []
for _stack, _input_buffer, action, derivation, derivation_history in history:
if action.startswith("accept") and derivation_history is not None:
parse_tree, _ = build_tree(derivation_history)
if parse_tree is not None:
parse_trees.append(parse_tree)
return parse_trees
for [word_str, grammar_str, use_look_ahead] in data:
grammar = get_grammar_rules_from_string(grammar_str)
look_ahead_info = (
(calculate_first_sets(grammar), calculate_follow_set(grammar))
if use_look_ahead
else None
)
td_history, _ = top_down_parser_backtrack(
grammar, list(word_str), look_ahead=look_ahead_info, stop_at_first_solution=True
)
grammar_md_list = "".join(
map(
lambda x: f"\n- {x}",
grammar_str.replace("→", "$\\rightarrow$").splitlines(),
)
)
print(
f"**Top-Down Parser(word={word_str!r},look-ahead={use_look_ahead})**:\n\nGrammar:\n{grammar_md_list}\n"
)
if look_ahead_info is not None:
print(
"FIRST sets:\n"
+ "".join(
map(
lambda x: f"\n- FIRST({x[0]}) = "
+ "$\\{$ "
+ ", ".join(sorted(x[1]))
+ " $\\}$",
look_ahead_info[0].items(),
)
)
)
print(
"\nFOLLOW sets (take with a grain of salt - not 100% checked!):\n"
+ "".join(
map(
lambda x: f"\n- FOLLOW({x[0]}) = "
+ "$\\{$ "
+ ", ".join(sorted(x[1]))
+ " $\\}$",
look_ahead_info[1].items(),
)
)
)
print(f"\n{format_history(td_history)}\n")
print(
"Parse trees:\n"
+ "".join(map(lambda x: f"\n- {repr(x)}", get_parse_trees(td_history)))
+ "\n\n"
)
s --> np, vp.
np --> det, n.
np --> det, n, pp.
np --> pron.
vp --> v.
vp --> v, np.
vp --> v, np, pp.
pp --> p, np.
pron --> ["I"].
det --> ["the"].
det --> ["an"].
det --> ["my"].
n --> ["elephant"].
n --> ["pajamas"].
v --> ["sneezed"].
v --> ["shot"].
p --> ["in"].
parse(String) :-
split_string(String, ' ', '', List),
writeln(List),
s(List, []).
%visible(+all),
:- leash(-all), trace, parse("I shot an elephant in my pajamas"), notrace.
Call: (32) parse("I shot an elephant in my pajamas")
Call: (33) split_string("I shot an elephant in my pajamas", ' ', '', _7814)
Exit: (33) split_string("I shot an elephant in my pajamas", ' ', '', ["I", "shot", "an", "elephant", "in", "my", "pajamas"])
Call: (33) writeln(["I", "shot", "an", "elephant", "in", "my", "pajamas"])
[I,shot,an,elephant,in,my,pajamas]
Exit: (33) writeln(["I", "shot", "an", "elephant", "in", "my", "pajamas"])
Call: (33) s(["I", "shot", "an", "elephant", "in", "my", "pajamas"], [])
Call: (34) np(["I", "shot", "an", "elephant", "in", "my", "pajamas"], _10972)
Call: (35) det(["I", "shot", "an", "elephant", "in", "my", "pajamas"], _11586)
Fail: (35) det(["I", "shot", "an", "elephant", "in", "my", "pajamas"], _11586)
Redo: (34) np(["I", "shot", "an", "elephant", "in", "my", "pajamas"], _10972)
Call: (35) det(["I", "shot", "an", "elephant", "in", "my", "pajamas"], _13424)
Fail: (35) det(["I", "shot", "an", "elephant", "in", "my", "pajamas"], _13424)
Redo: (34) np(["I", "shot", "an", "elephant", "in", "my", "pajamas"], _10972)
Call: (35) pron(["I", "shot", "an", "elephant", "in", "my", "pajamas"], _10972)
Exit: (35) pron(["I", "shot", "an", "elephant", "in", "my", "pajamas"], ["shot", "an", "elephant", "in", "my", "pajamas"])
Exit: (34) np(["I", "shot", "an", "elephant", "in", "my", "pajamas"], ["shot", "an", "elephant", "in", "my", "pajamas"])
Call: (34) vp(["shot", "an", "elephant", "in", "my", "pajamas"], [])
Call: (35) v(["shot", "an", "elephant", "in", "my", "pajamas"], [])
Fail: (35) v(["shot", "an", "elephant", "in", "my", "pajamas"], [])
Redo: (34) vp(["shot", "an", "elephant", "in", "my", "pajamas"], [])
Call: (35) v(["shot", "an", "elephant", "in", "my", "pajamas"], _19546)
Exit: (35) v(["shot", "an", "elephant", "in", "my", "pajamas"], ["an", "elephant", "in", "my", "pajamas"])
Call: (35) np(["an", "elephant", "in", "my", "pajamas"], [])
Call: (36) det(["an", "elephant", "in", "my", "pajamas"], _21384)
Exit: (36) det(["an", "elephant", "in", "my", "pajamas"], ["elephant", "in", "my", "pajamas"])
Call: (36) n(["elephant", "in", "my", "pajamas"], [])
Fail: (36) n(["elephant", "in", "my", "pajamas"], [])
Redo: (35) np(["an", "elephant", "in", "my", "pajamas"], [])
Call: (36) det(["an", "elephant", "in", "my", "pajamas"], _24446)
Exit: (36) det(["an", "elephant", "in", "my", "pajamas"], ["elephant", "in", "my", "pajamas"])
Call: (36) n(["elephant", "in", "my", "pajamas"], _25672)
Exit: (36) n(["elephant", "in", "my", "pajamas"], ["in", "my", "pajamas"])
Call: (36) pp(["in", "my", "pajamas"], [])
Call: (37) p(["in", "my", "pajamas"], _27510)
Exit: (37) p(["in", "my", "pajamas"], ["my", "pajamas"])
Call: (37) np(["my", "pajamas"], [])
Call: (38) det(["my", "pajamas"], _29348)
Exit: (38) det(["my", "pajamas"], ["pajamas"])
Call: (38) n(["pajamas"], [])
Exit: (38) n(["pajamas"], [])
Exit: (37) np(["my", "pajamas"], [])
Exit: (36) pp(["in", "my", "pajamas"], [])
Exit: (35) np(["an", "elephant", "in", "my", "pajamas"], [])
Exit: (34) vp(["shot", "an", "elephant", "in", "my", "pajamas"], [])
Exit: (33) s(["I", "shot", "an", "elephant", "in", "my", "pajamas"], [])
Exit: (32) parse("I shot an elephant in my pajamas")
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment