Skip to content

Instantly share code, notes, and snippets.

@pczarn
Last active May 25, 2019 17:33
Show Gist options
  • Save pczarn/cfbb4a88bef2c2c429c731978cafa377 to your computer and use it in GitHub Desktop.
Save pczarn/cfbb4a88bef2c2c429c731978cafa377 to your computer and use it in GitHub Desktop.
Defining grammars / parse forests / BNF eventually for an Earley parser in Agda
open import Data.List
open import Data.Nat
open import Data.Nat.Properties
open import Relation.Binary.PropositionalEquality
-- open import Data.Product
module Earley where
data Terminality : Set where
T : Terminality
NT : Terminality
data StringLevel : Set where
leaf-level : (t : Terminality) → StringLevel
product-level : StringLevel
data Level : Set where
string-level : StringLevel → Level
sum-level : Level
leaf-level' = λ (t : Terminality) → string-level (leaf-level t)
product-level' = string-level product-level
-- -- --
module Grammar
(Symbol : (Terminality → Set))
where
infixr 14 _~∷~_
infixr 12 _|∷|_
-- Ring without an additive inverse.
-- _~∷~_ is multiplication. ε is the multiplicative identity.
-- _|∷|_ is addition. ∅ is the additive identity.
data Rhs : Level → Set
data Rhs where
leaf : {t : Terminality} → Symbol t → Rhs (leaf-level' t)
ε : Rhs product-level'
_~∷~_ : {t : Terminality} → (Rhs (leaf-level' t)) → (xs : Rhs product-level') → Rhs product-level'
∅ : Rhs sum-level
_|∷|_ : (ys : Rhs product-level') → (y : Rhs sum-level) → Rhs sum-level
-- -- --
infixl 13 _~_
infixl 11 _‖_
_~_ : {l0 l1 : StringLevel} → (xx : Rhs (string-level l0)) → (yy : Rhs (string-level l1)) → Rhs product-level'
_~_ {leaf-level _} {leaf-level _} x y = x ~∷~ y ~∷~ ε
_~_ {leaf-level t} {product-level} (leaf {t} x) yy = (leaf x) ~∷~ yy
_~_ {product-level} {leaf-level _} ε val = val ~∷~ ε
_~_ {product-level} {product-level} ε yy = yy
_~_ {product-level} (x ~∷~ xs) yy = x ~∷~ (xs ~ yy)
_‖_ : {a b : Level} → (xx : Rhs a) → (yy : Rhs b) → Rhs sum-level
leaf l ‖ leaf r = leaf l ~∷~ ε |∷| leaf r ~∷~ ε |∷| ∅
leaf l ‖ ε = leaf l ~∷~ ε |∷| ε |∷| ∅
leaf l ‖ y ~∷~ ys = leaf l ~∷~ ε |∷| y ~∷~ ys |∷| ∅
leaf l ‖ ∅ = leaf l ~∷~ ε |∷| ∅
leaf l ‖ y |∷| ys = leaf l ~∷~ ε |∷| y |∷| ys
ε ‖ leaf l = ε |∷| leaf l ~∷~ ε |∷| ∅
ε ‖ ε = ε |∷| ε |∷| ∅
ε ‖ y ~∷~ ys = ε |∷| y ~∷~ ys |∷| ∅
ε ‖ ∅ = ε |∷| ∅
ε ‖ y |∷| ys = ε |∷| y |∷| ys
x ~∷~ xs ‖ leaf l = x ~∷~ xs |∷| leaf l ~∷~ ε |∷| ∅
x ~∷~ xs ‖ ε = x ~∷~ xs |∷| ε |∷| ∅
x ~∷~ xs ‖ y ~∷~ ys = x ~∷~ xs |∷| y ~∷~ ys |∷| ∅
x ~∷~ xs ‖ ∅ = x ~∷~ xs |∷| ∅
x ~∷~ xs ‖ y |∷| ys = x ~∷~ xs |∷| y |∷| ys
∅ ‖ leaf l = leaf l ~∷~ ε |∷| ∅
∅ ‖ ε = ε |∷| ∅
∅ ‖ y ~∷~ ys = y ~∷~ ys |∷| ∅
∅ ‖ ∅ = ∅
∅ ‖ y |∷| ys = y |∷| ys
x |∷| xs ‖ leaf l = leaf l ~∷~ ε |∷| x |∷| xs
x |∷| xs ‖ ε = ε |∷| x |∷| xs
x |∷| xs ‖ y ~∷~ ys = x |∷| (xs ‖ y ~∷~ ys)
x |∷| xs ‖ ∅ = x |∷| xs
x |∷| xs ‖ y |∷| ys = x |∷| y |∷| (xs ‖ ys)
-- -- --
infixr 10 _∷=_
data Rule : Set where
_∷=_ : {l : Level} → (α : Rhs (leaf-level' NT)) → (β : Rhs l) → Rule
Grammar = List Rule
infixr 9 _,_
infix 8 bnf_
end : Grammar
end = []
_,_ : Rule → Grammar → Grammar
r , g = r ∷ g
bnf_ : Grammar → Grammar
bnf g = g
-- -- --
module Bocage
(Symbol : (Terminality → Set))
where
-- (a parse forest is a grammar where nonterminals are parse forests)
data TerminalOrBocage : Terminality → Set
open Grammar Symbol using (Rule)
open Grammar TerminalOrBocage
public
using (leaf; ε; ∅; _~_; _‖_)
renaming (Rhs to Forest; _~∷~_ to _<×>_; _|∷|_ to _|+|_)
data TerminalOrBocage where
terminal : (sym : Symbol T) → TerminalOrBocage T
bocage : (rule : Rule) → (parse-forest : Forest sum-level) → TerminalOrBocage NT
data MySymbol : Terminality → Set
data MySymbol where
MyT : ℕ → MySymbol T
MyNT : ℕ → MySymbol NT
module _ where
open Grammar MySymbol
module Alphabeth {S : Level → Set} (f : MySymbol T → S (leaf-level' T)) where
The = f (MyT 0)
A = f (MyT 1)
Man = f (MyT 2)
Dog = f (MyT 3)
Bites = f (MyT 4)
Pets = f (MyT 5)
open Alphabeth {Rhs} leaf
sentence = leaf (MyNT 0)
subject = leaf (MyNT 1)
predicate = leaf (MyNT 2)
article = leaf (MyNT 3)
noun = leaf (MyNT 4)
verb = leaf (MyNT 5)
direct_object = leaf (MyNT 6)
my-grammar =
bnf
sentence ∷= subject ~ predicate ,
subject ∷= article ~ noun ,
predicate ∷= verb ~ direct_object ,
direct_object ∷= article ~ noun ,
article ∷= The ‖ A ,
noun ∷= Man ‖ Dog ,
verb ∷= Bites ‖ Pets ,
end
String = List (Rhs (leaf-level' T))
my-words = The ∷ Man ∷ Pets ∷ A ∷ Dog ∷ []
module _ where
open Grammar MySymbol using (_∷=_; _~_; _‖_; Rhs) renaming (leaf to leaf')
open Bocage MySymbol hiding (_~_; _‖_)
open Alphabeth {Rhs} leaf'
open Alphabeth {Forest} (λ (sym : MySymbol T) → leaf (terminal sym)) renaming (The to The'; A to A'; Man to Man'; Dog to Dog'; Bites to Bites'; Pets to Pets')
forest =
bocage (sentence ∷= subject ~ predicate) (
leaf (
bocage (subject ∷= article ~ noun) (
leaf (bocage (article ∷= The ‖ A) (The' <×> ε |+| ∅))
<×>
leaf (bocage (noun ∷= Man ‖ Dog) (Man' <×> ε |+| ∅))
<×> ε
|+| ∅
)
)
<×>
leaf (bocage (predicate ∷= verb ~ direct_object) (
leaf (bocage (verb ∷= Bites ‖ Pets) (Bites' <×> ε |+| ∅))
<×>
leaf (bocage (direct_object ∷= article ~ noun) (
leaf (bocage (article ∷= The ‖ A) (A' <×> ε |+| ∅))
<×>
leaf (bocage (noun ∷= Man ‖ Dog) (Dog' <×> ε |+| ∅))
<×> ε
|+| ∅
)
)
<×> ε
|+| ∅
)
)
<×> ε
|+| ∅
)
module ParseForestExamples where
open Grammar MySymbol using (Rhs) renaming (_∷=_ to _∷='_; leaf to leaf'; ε to ε'; ∅ to ∅'; _~_ to _~'_; _‖_ to _‖'_)
open Bocage MySymbol
open Alphabeth {Forest} (λ (sym : MySymbol T) → leaf (terminal sym))
flatten-string : {l : StringLevel} → (forest : Forest (string-level l)) → Rhs product-level'
flatten-sum : (forest : Forest sum-level) → Rhs sum-level
flatten-string {leaf-level T} (leaf (terminal t)) = leaf' t ~' ε'
flatten-string {leaf-level NT} (leaf (bocage (leaf' α ∷=' _) _)) = leaf' α ~' ε'
flatten-string {product-level} ε = ε'
flatten-string {product-level} (x <×> xs) = flatten-string x ~' flatten-string xs
flatten-sum ∅ = ∅'
flatten-sum (x |+| xs) = flatten-string x ‖' flatten-sum xs
_∷=_ : {l : Level} → (α : Rhs (leaf-level' NT)) → (β : Forest l) → Forest (leaf-level' NT)
_∷=_ {string-level (leaf-level _)} α β = leaf (bocage (α ∷=' flatten-string β) (β <×> ε |+| ∅))
_∷=_ {string-level product-level} α β = leaf (bocage (α ∷=' flatten-string β) (β |+| ∅))
_∷=_ {sum-level} α β = leaf (bocage (α ∷=' flatten-sum β) β)
-- my-words = The ∷ Man ∷ Pets ∷ A ∷ Dog ∷ []
forest' =
sentence ∷= (
subject ∷= (
article ∷= The
) ~ (
noun ∷= Man
)
) ~ (
predicate ∷= (
verb ∷= Pets
) ~ (
direct_object ∷= (
article ∷= A
) ~ (
noun ∷= Dog
)
)
)
-- -- --
module Transform
(Symbol : (Terminality → Set))
where
open Grammar Symbol using (Grammar; Rule; Rhs; _~∷~_; ∅) renaming (_∷=_ to _∷=_; leaf to leaf'; ε to ε'; _~_ to _~'_; _‖_ to _‖'_)
open Bocage Symbol using (Forest; _<×>_; _|+|_)
data ⊥ : Set where
-- bottom
IsFlatRhs : {l : Level} → Rhs l → Set
IsFlatRhs {string-level l} _ = Rhs (string-level l)
IsFlatRhs {sum-level} _ = ⊥
-- data IsFlatRhs where
-- leaf : {t : Terminality} → (sym : Symbol t) → IsFlatRhs (leaf' sym)
-- ε : IsFlatRhs ε'
-- _~∷~_ : {t : Terminality} → (x : Rhs (leaf-level' t)) → (xs : Rhs product-level') → IsFlatRhs (x ~∷~ xs)
data IsBinarizedRhs : {l : Level} → Rhs l → Set
data IsBinarizedRhs where
leaf : {t : Terminality} → (sym : Symbol t) → IsBinarizedRhs (leaf' sym)
ε : IsBinarizedRhs ε'
_~ε : {t : Terminality}
→ (x : Rhs (leaf-level' t))
→ IsBinarizedRhs (x ~∷~ ε')
_~_ : {t-l t-r : Terminality}
→ (left : Rhs (leaf-level' t-l))
→ (right : Rhs (leaf-level' t-r))
→ IsBinarizedRhs (left ~∷~ right ~∷~ ε')
-- -- --
module IsX (IsXRhs : ({l : Level} → Rhs l → Set)) where
data IsXRule : Rule → Set
data IsXRule where
_∷=_ : {l : Level} {β' : Rhs l} → (α : IsXRhs Rhs (leaf-level' NT)) → (β : IsXRhs β') → IsXRule (α ∷= β')
data IsXGrammar : Grammar → Set
data IsXGrammar where
[] : IsXGrammar []
_∷_ : {l : Level} {x' : Rhs l} {xs' : Grammar} (x : IsXRule x') → (xs : IsXGrammar xs') → IsXGrammar (x' ∷ xs')
-- -- --
module _ where
open IsX IsFlatRhs renaming (IsXGrammar to IsFlat; IsXRule to IsFlatRule)
open IsX IsBinarizedRhs renaming (IsXGrammar to IsBinarized; IsXRule to IsBinarizedRule)
-- -- --
-- IsSameLanguageRecognizedBy : Grammar → Grammar → Symbol NT → Set
-- -- ∀ {t : Terminality} → ∀ (sym : Symbol NT) → (g0 : Grammar) → (g1 : Grammar) → Set
-- ∀ (sym : Symbol NT) → (g0 : Grammar) → (g1 : Grammar) → Set
data IsSameLanguageRule : Rule → Rule → Set
data IsSameLanguage : Grammar → Grammar → Set
data IsSameLanguage where
[] : IsSameLanguage [] []
α∷=∅_ : {a b : Grammar} {α : Rhs (leaf-level' NT)} → IsSameLanguage a b → IsSameLanguage ((α ∷= ∅) ∷ a) b
_∷_ : {l : Level} {x y : Rhs l} {xs ys : Grammar} (x : IsSameLanguageRule x') → (xs : IsSameLanguage xs') → IsSameLanguage (x' ∷ xs')
_++l_ : {a b c d : Grammar} (l0 : IsSameLanguage a b) → (l1 : IsSameLanguage c d) → IsSameLanguage (a ++ c) (b ++ d)
_++l_ [] l1 = l1
_++l_ (r ∷ rs) l1 = r ∷ (_++l_ rs l1)
_++f_ : {a b : Grammar} (f0 : IsFlat a) → (f1 : IsFlat b) → IsFlat (a ++ b)
_++f_ [] l1 = l1
_++f_ (r ∷ rs) l1 = r ∷ (_++f_ rs l1)
-- -- --
-- generate : Grammar → List String
-- IsSameLanguage : (a : Grammar) → (b : Grammar) → generate a ≡ generate b
-- -- --
record FlatGrammar : Set where
field
grammar : Grammar
isFlat : IsFlat grammar
record FlatteningResult (g : Grammar) : Set where
field
result : FlatGrammar
isSameLanguage : IsSameLanguage g (grammar result)
record RuleFlatteningResult (r : Rule) : Set where
field
grammar : Grammar
isFlat : IsFlat grammar
isSameLanguage : IsSameLanguage (r ∷ []) grammar
record BinarizedGrammar : Set where
field
grammar : Grammar
isBinarized : IsBinarized grammar
record BinarizationResult (g : Grammar) : Set where
field
result : BinarizedGrammar
isSameLanguage : IsSameLanguage g (grammar result)
flatten-rule : (r : Rule) → RuleFlatteningResult r
flatten-rule (α ∷= (β |∷| ɣ)) =
record
{ grammar = (α ∷= β) ∷ grammar (flatten-rule (α ∷= ɣ))
; isFlat = (α ∷= β) ∷ isFlat (flatten-rule (α ∷= ɣ))
; isSameLanguage = (α ∷= β) ∷ isSameLanguage (flatten-rule (α ∷= ɣ))
}
flatten-rule (α ∷= ∅) =
record
{ grammar = []
; isFlat = []
; isSameLanguage = α∷=∅ []
}
flatten : (g : Grammar) → FlatteningResult g
flatten r ∷ rs =
record
{ result = record
{ grammar = rule (flatten-rule r) ++ grammar (result (flatten rs))
; isFlat = isFlat (flatten-rule r) ++f isFlat (result (flatten rs))
}
; isSameLanguage = isSameLanguage (flatten-rule r) ++l isSameLanguage flatten rs
}
-- binarize : (g : Grammar) → BinarizationResult g
-- parse : BinarizedGrammar → String → Forest
-- parse : (a : Forest) → (b : Forest) → Intersection a b
-- Intersection : (a b : Forest (leaf-level' NT)) : Set
data DepVec (A : ℕ → Set) : ℕ → Set where
[] : DepVec A 0
_∷_ : {n : ℕ} → A n -> DepVec A n → DepVec A (suc n)
data SparseDepVec (A : ℕ → Set) : ℕ → Set where
[] : DepVec A 0
_∷_ : {n o : ℕ} → A o → n ≤ o → DepVec A o → DepVec A (suc o)
record Item (d o : ℕ) : Set where
field
dot : Fin d
origin : Fin (suc o)
node : Forest sum-level
record Recognizer (n : ℕ) : Set where
field
grammar : BinarizedGrammar
node : Forest sum-level
medial-items : DepVec (SparseDepVec (maxdot & SparseDepVec Item) n) n
current-medial-items : List (Item n)
parse : BinarizedGrammar → String → Forest
parse-acc : Recognizer → String → Recognizer
scan : {n : ℕ} → Recognizer n → Rhs (leaf-level' T) → Recognizer (suc n)
parse g l = node (parse-acc g l)
parse-acc r [] = r
parse-acc r (x ∷ xs) = parse-acc (scan r x) xs
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment