Skip to content

Instantly share code, notes, and snippets.

@lukbukkit
Last active January 6, 2021 13:31
Show Gist options
  • Save lukbukkit/49778422dbd799ab6e3eddded6b782ed to your computer and use it in GitHub Desktop.
Save lukbukkit/49778422dbd799ab6e3eddded6b782ed to your computer and use it in GitHub Desktop.
Advent of Code 2020 Day 19 - Implementation using a Earley Parser - https://adventofcode.com/2020/day/19
package main
import (
"advent-of-code/common"
"log"
"strings"
)
const inputFile = "day19/input.txt"
// We're using a Earley Parser
// - https://en.wikipedia.org/wiki/Earley_parser
// - http://www.brawer.ch/prolog/earley.pdf
// -> https://loup-vaillant.fr/tutorials/earley-parsing/
// We could improve the parser by checking for empty rules, but our input won't have empty rules
// https://loup-vaillant.fr/tutorials/earley-parsing/empty-rules
type Symbol struct {
text string
terminal bool
}
func (s Symbol) equal(other Symbol) bool {
return s.terminal == other.terminal && s.text == other.text
}
type Rule struct {
name string
symbols []Symbol
}
type Grammar struct {
rules []Rule
startRule string
}
type EarleyItem struct {
rule int
next int
start int
}
// Compares two items for equality (needed for safe append)
func (i EarleyItem) equal(other EarleyItem) bool {
return i.rule == other.rule && i.start == other.start && i.next == other.next
}
type EarleySet []EarleyItem
// Adds an item at the end of the Earley set
func (set EarleySet) unsafeAppend(item EarleyItem) EarleySet {
return append(set, item)
}
// Adds an item at the end of the Earley set, **unless already present**
func (set EarleySet) append(item EarleyItem) EarleySet {
for _, setItem := range set {
if item.equal(setItem) {
return set
}
}
return append(set, item)
}
type EarleyParser struct {
grammar *Grammar
input []Symbol
}
func (e *EarleyParser) Recognizes() bool {
S := e.buildItems()
// e.print(S)
// e.diagnose(S)
return e.hasCompleteParse(S)
}
func (e *EarleyParser) buildItems() []EarleySet {
// Earley sets
S := make([]EarleySet, 1)
for i := range S {
S[i] = make(EarleySet, 0)
}
// put start item(s) in S[0]
for i := 0; i < len(e.grammar.rules); i++ {
rule := e.grammar.rules[i]
if rule.name == e.grammar.startRule {
S[0] = S[0].unsafeAppend(EarleyItem{rule: i, start: 0, next: 0})
}
}
// populate the rest of S[i]
for i := 0; i < len(S); i++ {
for j := 0; j < len(S[i]); j++ {
symbol, next := e.nextSymbol(S[i][j])
if !next {
S = e.complete(S, i, j)
} else if symbol.terminal {
S = e.scan(S, i, j, *symbol)
} else if !symbol.terminal {
S = e.predict(S, i, *symbol)
} else {
log.Fatalf("illegal rule")
}
}
}
return S
}
func (e *EarleyParser) predict(S []EarleySet, i int, symbol Symbol) []EarleySet {
for ruleIndex, rule := range e.grammar.rules {
if rule.name == symbol.text {
S[i] = S[i].append(EarleyItem{rule: ruleIndex, next: 0, start: i})
}
}
return S
}
func (e *EarleyParser) scan(S []EarleySet, i, j int, symbol Symbol) []EarleySet {
item := S[i][j]
if i < len(e.input) && symbol.equal(e.input[i]) {
if len(S) <= i+1 {
S = append(S, make(EarleySet, 0))
}
S[i+1] = S[i+1].unsafeAppend(EarleyItem{rule: item.rule, next: item.next + 1, start: item.start})
}
return S
}
func (e *EarleyParser) complete(S []EarleySet, i, j int) []EarleySet {
item := S[i][j]
for _, oldItem := range S[item.start] {
symbol, exists := e.nextSymbol(oldItem)
if exists && symbol.text == e.name(item) {
S[i] = S[i].append(EarleyItem{rule: oldItem.rule, next: oldItem.next + 1, start: oldItem.start})
}
}
return S
}
// Next element in the rule of this item
func (e *EarleyParser) nextSymbol(item EarleyItem) (*Symbol, bool) {
symbols := e.grammar.rules[item.rule].symbols
next := item.next < len(symbols)
if next {
return &symbols[item.next], next
}
return nil, false
}
// Gets the name of the rule pointed by the item
func (e *EarleyParser) name(item EarleyItem) string {
return e.grammar.rules[item.rule].name
}
func (e *EarleyParser) hasPartialParse(S []EarleySet, i int) bool {
if len(S) <= i {
return false
}
for _, item := range S[i] {
rule := e.grammar.rules[item.rule]
if rule.name == e.grammar.startRule && item.next == len(rule.symbols) && item.start == 0 {
return true
}
}
return false
}
func (e *EarleyParser) hasCompleteParse(S []EarleySet) bool {
return e.hasPartialParse(S, len(e.input))
}
func (e *EarleyParser) lastPartialParse(S []EarleySet) int {
for i := len(S) - 1; i >= 0; i-- {
if e.hasPartialParse(S, i) {
return i
}
}
return -1
}
// Checks if there is a item in the last iteration which is
// * complete
// * has started at the beginning (state set 0)
// * has the same name as the start rule
func (e *EarleyParser) diagnose(S []EarleySet) {
if e.hasCompleteParse(S) {
log.Printf("the input has been reconised. congratulations!")
} else {
if len(S) == len(e.input) {
log.Printf("the input is maybe incomplete")
} else {
log.Printf("the input has stopped making sense at character %v", len(S))
}
lpp := e.lastPartialParse(S)
if lpp >= 0 {
log.Printf("the first %v characters of the input have been recongnised", lpp)
} else {
log.Printf("the begining of the input couldn't be parsed")
}
}
}
// Print the sets of Earley states
func (e *EarleyParser) print(S []EarleySet) {
for i, set := range S {
log.Printf("=== %v ===", i)
for _, item := range set {
rule := e.grammar.rules[item.rule]
ruleStr := ""
for j, symbol := range rule.symbols {
if j == item.next {
ruleStr = ruleStr + "• "
}
if !symbol.terminal {
ruleStr = ruleStr + symbol.text + " "
} else {
ruleStr = ruleStr + "'" + symbol.text + "' "
}
}
if item.next == len(rule.symbols) {
ruleStr = ruleStr + "•"
}
log.Printf("%12v -> %-30v (%v)", rule.name, ruleStr, item.start)
}
log.Println()
}
}
func ParseGrammar(lines []string) *Grammar {
for i, line := range lines {
if line == "" {
lines = lines[:i]
break
}
}
rules := make([]Rule, 0, 2*len(lines))
for _, line := range lines {
splitNameRules := strings.SplitN(line, ":", 2)
name := splitNameRules[0]
splitRules := strings.Split(splitNameRules[1], "|")
for _, ruleString := range splitRules {
fields := strings.Fields(ruleString)
symbols := make([]Symbol, 0, len(fields))
for _, field := range fields {
if strings.HasPrefix(field, "\"") {
symbols = append(symbols, Symbol{text: strings.Trim(field, "\""), terminal: true})
} else {
symbols = append(symbols, Symbol{text: field, terminal: false})
}
}
rules = append(rules, Rule{name: name, symbols: symbols})
}
}
return &Grammar{
rules: rules,
startRule: "0",
}
}
func EveryRuneAsTerminalSymbol(s string) []Symbol {
runes := []rune(s)
symbols := make([]Symbol, len(s))
for i, r := range runes {
symbols[i] = Symbol{
text: string([]rune{r}),
terminal: true,
}
}
return symbols
}
func FilterMessages(lines []string) []string {
for i, line := range lines {
if line == "" {
return lines[i+1:]
}
}
return lines
}
// As of part 2 it's required that some rules are updated
func UpdateRules(lines []string) []string {
cpy := make([]string, len(lines))
for i, line := range lines {
if strings.HasPrefix(line, "8:") {
cpy[i] = "8: 42 | 42 8"
} else if strings.HasPrefix(line, "11:") {
cpy[i] = "11: 42 31 | 42 11 31"
} else {
cpy[i] = line
}
}
return cpy
}
func CountAccepted(messages []string, grammar *Grammar) int {
sum := 0
for _, line := range messages {
parser := EarleyParser{
grammar: grammar,
input: EveryRuneAsTerminalSymbol(line),
}
if parser.Recognizes() {
sum++
}
}
return sum
}
func main() {
lines, err := common.ReadInputLines(inputFile)
common.AbortIfInputErr(err, inputFile)
grammar := ParseGrammar(lines)
messages := FilterMessages(lines)
accepted := CountAccepted(messages, grammar)
log.Printf("part 1: %v strings were accpeted", accepted)
newGrammar := ParseGrammar(UpdateRules(lines))
newAccepted := CountAccepted(messages, newGrammar)
log.Printf("part 2: %v strings were accepted", newAccepted)
}
package main
import (
"strconv"
"strings"
"testing"
)
func TS(s string) Symbol {
return Symbol{
text: s,
terminal: true,
}
}
func NTS(s string) Symbol {
return Symbol{
text: s,
terminal: false,
}
}
func GrammarOfTutorial() *Grammar {
rules := make([]Rule, 0)
sum := "Sum"
product := "Product"
factor := "Factor"
number := "Number"
plusMinus := "PlusMinus"
mulDiv := "MulDiv"
digit := "Digit"
rules = append(rules, Rule{name: sum, symbols: []Symbol{NTS(sum), NTS(plusMinus), NTS(product)}})
rules = append(rules, Rule{name: sum, symbols: []Symbol{NTS(product)}})
rules = append(rules, Rule{name: product, symbols: []Symbol{NTS(product), NTS(mulDiv), NTS(factor)}})
rules = append(rules, Rule{name: product, symbols: []Symbol{NTS(factor)}})
rules = append(rules, Rule{name: factor, symbols: []Symbol{TS("("), NTS(sum), TS(")")}})
rules = append(rules, Rule{name: factor, symbols: []Symbol{NTS(number)}})
rules = append(rules, Rule{name: number, symbols: []Symbol{NTS(digit), NTS(number)}})
rules = append(rules, Rule{name: number, symbols: []Symbol{NTS(digit)}})
rules = append(rules, Rule{name: plusMinus, symbols: []Symbol{TS("+")}})
rules = append(rules, Rule{name: plusMinus, symbols: []Symbol{TS("-")}})
rules = append(rules, Rule{name: mulDiv, symbols: []Symbol{TS("*")}})
rules = append(rules, Rule{name: mulDiv, symbols: []Symbol{TS("/")}})
for i := 0; i < 10; i++ {
rules = append(rules, Rule{name: digit, symbols: []Symbol{TS(strconv.Itoa(i))}})
}
return &Grammar{
rules: rules,
startRule: sum,
}
}
func TestEarleyParser_Recognizes(t *testing.T) {
type fields struct {
grammar *Grammar
input []Symbol
}
tests := []struct {
name string
fields fields
want bool
}{
{name: "Example 1", fields: fields{
grammar: GrammarOfTutorial(),
input: EveryRuneAsTerminalSymbol("1+(2*3+4)"),
}, want: true},
{name: "Example 2", fields: fields{
grammar: GrammarOfTutorial(),
input: EveryRuneAsTerminalSymbol("1+(2*3+"),
}, want: false},
{name: "Example 3", fields: fields{
grammar: GrammarOfTutorial(),
input: EveryRuneAsTerminalSymbol("2*3*4("),
}, want: false},
}
for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) {
e := &EarleyParser{
grammar: tt.fields.grammar,
input: tt.fields.input,
}
if got := e.Recognizes(); got != tt.want {
t.Errorf("Recognizes() = %v, want %v", got, tt.want)
}
})
}
}
func TestCountAccepted(t *testing.T) {
type args struct {
lines []string
grammar *Grammar
}
tests := []struct {
name string
args args
want int
}{
{name: "Example Part 1", args: args{
lines: FilterMessages(strings.Split(examplePart1, "\n")),
grammar: ParseGrammar(strings.Split(examplePart1, "\n")),
}, want: 2},
{name: "Example Part 2", args: args{
lines: FilterMessages(strings.Split(examplePart2, "\n")),
grammar: ParseGrammar(UpdateRules(strings.Split(examplePart2, "\n"))),
}, want: 12},
}
for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) {
if got := CountAccepted(tt.args.lines, tt.args.grammar); got != tt.want {
t.Errorf("CountAccepted() = %v, want %v", got, tt.want)
}
})
}
}
const examplePart1 = `0: 4 1 5
1: 2 3 | 3 2
2: 4 4 | 5 5
3: 4 5 | 5 4
4: "a"
5: "b"
ababbb
bababa
abbbab
aaabbb
aaaabbb`
const examplePart2 = `42: 9 14 | 10 1
9: 14 27 | 1 26
10: 23 14 | 28 1
1: "a"
11: 42 31
5: 1 14 | 15 1
19: 14 1 | 14 14
12: 24 14 | 19 1
16: 15 1 | 14 14
31: 14 17 | 1 13
6: 14 14 | 1 14
2: 1 24 | 14 4
0: 8 11
13: 14 3 | 1 12
15: 1 | 14
17: 14 2 | 1 7
23: 25 1 | 22 14
28: 16 1
4: 1 1
20: 14 14 | 1 15
3: 5 14 | 16 1
27: 1 6 | 14 18
14: "b"
21: 14 1 | 1 14
25: 1 1 | 1 14
22: 14 14
8: 42
26: 14 22 | 1 20
18: 15 15
7: 14 5 | 1 21
24: 14 1
abbbbbabbbaaaababbaabbbbabababbbabbbbbbabaaaa
bbabbbbaabaabba
babbbbaabbbbbabbbbbbaabaaabaaa
aaabbbbbbaaaabaababaabababbabaaabbababababaaa
bbbbbbbaaaabbbbaaabbabaaa
bbbababbbbaaaaaaaabbababaaababaabab
ababaaaaaabaaab
ababaaaaabbbaba
baabbaaaabbaaaababbaababb
abbbbabbbbaaaababbbbbbaaaababb
aaaaabbaabaaaaababaa
aaaabbaaaabbaaa
aaaabbaabbaaaaaaabbbabbbaaabbaabaaa
babaaabbbaaabaababbaabababaaab
aabbbbbaabbbaaaaaabbbbbababaaaaabbaaabba`
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment