Last active
May 25, 2019 17:33
-
-
Save pczarn/cfbb4a88bef2c2c429c731978cafa377 to your computer and use it in GitHub Desktop.
Defining grammars / parse forests / BNF eventually for an Earley parser in Agda
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
open import Data.List | |
open import Data.Nat | |
open import Data.Nat.Properties | |
open import Relation.Binary.PropositionalEquality | |
-- open import Data.Product | |
module Earley where | |
data Terminality : Set where | |
T : Terminality | |
NT : Terminality | |
data StringLevel : Set where | |
leaf-level : (t : Terminality) → StringLevel | |
product-level : StringLevel | |
data Level : Set where | |
string-level : StringLevel → Level | |
sum-level : Level | |
leaf-level' = λ (t : Terminality) → string-level (leaf-level t) | |
product-level' = string-level product-level | |
-- -- -- | |
module Grammar | |
(Symbol : (Terminality → Set)) | |
where | |
infixr 14 _~∷~_ | |
infixr 12 _|∷|_ | |
-- Ring without an additive inverse. | |
-- _~∷~_ is multiplication. ε is the multiplicative identity. | |
-- _|∷|_ is addition. ∅ is the additive identity. | |
data Rhs : Level → Set | |
data Rhs where | |
leaf : {t : Terminality} → Symbol t → Rhs (leaf-level' t) | |
ε : Rhs product-level' | |
_~∷~_ : {t : Terminality} → (Rhs (leaf-level' t)) → (xs : Rhs product-level') → Rhs product-level' | |
∅ : Rhs sum-level | |
_|∷|_ : (ys : Rhs product-level') → (y : Rhs sum-level) → Rhs sum-level | |
-- -- -- | |
infixl 13 _~_ | |
infixl 11 _‖_ | |
_~_ : {l0 l1 : StringLevel} → (xx : Rhs (string-level l0)) → (yy : Rhs (string-level l1)) → Rhs product-level' | |
_~_ {leaf-level _} {leaf-level _} x y = x ~∷~ y ~∷~ ε | |
_~_ {leaf-level t} {product-level} (leaf {t} x) yy = (leaf x) ~∷~ yy | |
_~_ {product-level} {leaf-level _} ε val = val ~∷~ ε | |
_~_ {product-level} {product-level} ε yy = yy | |
_~_ {product-level} (x ~∷~ xs) yy = x ~∷~ (xs ~ yy) | |
_‖_ : {a b : Level} → (xx : Rhs a) → (yy : Rhs b) → Rhs sum-level | |
leaf l ‖ leaf r = leaf l ~∷~ ε |∷| leaf r ~∷~ ε |∷| ∅ | |
leaf l ‖ ε = leaf l ~∷~ ε |∷| ε |∷| ∅ | |
leaf l ‖ y ~∷~ ys = leaf l ~∷~ ε |∷| y ~∷~ ys |∷| ∅ | |
leaf l ‖ ∅ = leaf l ~∷~ ε |∷| ∅ | |
leaf l ‖ y |∷| ys = leaf l ~∷~ ε |∷| y |∷| ys | |
ε ‖ leaf l = ε |∷| leaf l ~∷~ ε |∷| ∅ | |
ε ‖ ε = ε |∷| ε |∷| ∅ | |
ε ‖ y ~∷~ ys = ε |∷| y ~∷~ ys |∷| ∅ | |
ε ‖ ∅ = ε |∷| ∅ | |
ε ‖ y |∷| ys = ε |∷| y |∷| ys | |
x ~∷~ xs ‖ leaf l = x ~∷~ xs |∷| leaf l ~∷~ ε |∷| ∅ | |
x ~∷~ xs ‖ ε = x ~∷~ xs |∷| ε |∷| ∅ | |
x ~∷~ xs ‖ y ~∷~ ys = x ~∷~ xs |∷| y ~∷~ ys |∷| ∅ | |
x ~∷~ xs ‖ ∅ = x ~∷~ xs |∷| ∅ | |
x ~∷~ xs ‖ y |∷| ys = x ~∷~ xs |∷| y |∷| ys | |
∅ ‖ leaf l = leaf l ~∷~ ε |∷| ∅ | |
∅ ‖ ε = ε |∷| ∅ | |
∅ ‖ y ~∷~ ys = y ~∷~ ys |∷| ∅ | |
∅ ‖ ∅ = ∅ | |
∅ ‖ y |∷| ys = y |∷| ys | |
x |∷| xs ‖ leaf l = leaf l ~∷~ ε |∷| x |∷| xs | |
x |∷| xs ‖ ε = ε |∷| x |∷| xs | |
x |∷| xs ‖ y ~∷~ ys = x |∷| (xs ‖ y ~∷~ ys) | |
x |∷| xs ‖ ∅ = x |∷| xs | |
x |∷| xs ‖ y |∷| ys = x |∷| y |∷| (xs ‖ ys) | |
-- -- -- | |
infixr 10 _∷=_ | |
data Rule : Set where | |
_∷=_ : {l : Level} → (α : Rhs (leaf-level' NT)) → (β : Rhs l) → Rule | |
Grammar = List Rule | |
infixr 9 _,_ | |
infix 8 bnf_ | |
end : Grammar | |
end = [] | |
_,_ : Rule → Grammar → Grammar | |
r , g = r ∷ g | |
bnf_ : Grammar → Grammar | |
bnf g = g | |
-- -- -- | |
module Bocage | |
(Symbol : (Terminality → Set)) | |
where | |
-- (a parse forest is a grammar where nonterminals are parse forests) | |
data TerminalOrBocage : Terminality → Set | |
open Grammar Symbol using (Rule) | |
open Grammar TerminalOrBocage | |
public | |
using (leaf; ε; ∅; _~_; _‖_) | |
renaming (Rhs to Forest; _~∷~_ to _<×>_; _|∷|_ to _|+|_) | |
data TerminalOrBocage where | |
terminal : (sym : Symbol T) → TerminalOrBocage T | |
bocage : (rule : Rule) → (parse-forest : Forest sum-level) → TerminalOrBocage NT | |
data MySymbol : Terminality → Set | |
data MySymbol where | |
MyT : ℕ → MySymbol T | |
MyNT : ℕ → MySymbol NT | |
module _ where | |
open Grammar MySymbol | |
module Alphabeth {S : Level → Set} (f : MySymbol T → S (leaf-level' T)) where | |
The = f (MyT 0) | |
A = f (MyT 1) | |
Man = f (MyT 2) | |
Dog = f (MyT 3) | |
Bites = f (MyT 4) | |
Pets = f (MyT 5) | |
open Alphabeth {Rhs} leaf | |
sentence = leaf (MyNT 0) | |
subject = leaf (MyNT 1) | |
predicate = leaf (MyNT 2) | |
article = leaf (MyNT 3) | |
noun = leaf (MyNT 4) | |
verb = leaf (MyNT 5) | |
direct_object = leaf (MyNT 6) | |
my-grammar = | |
bnf | |
sentence ∷= subject ~ predicate , | |
subject ∷= article ~ noun , | |
predicate ∷= verb ~ direct_object , | |
direct_object ∷= article ~ noun , | |
article ∷= The ‖ A , | |
noun ∷= Man ‖ Dog , | |
verb ∷= Bites ‖ Pets , | |
end | |
String = List (Rhs (leaf-level' T)) | |
my-words = The ∷ Man ∷ Pets ∷ A ∷ Dog ∷ [] | |
module _ where | |
open Grammar MySymbol using (_∷=_; _~_; _‖_; Rhs) renaming (leaf to leaf') | |
open Bocage MySymbol hiding (_~_; _‖_) | |
open Alphabeth {Rhs} leaf' | |
open Alphabeth {Forest} (λ (sym : MySymbol T) → leaf (terminal sym)) renaming (The to The'; A to A'; Man to Man'; Dog to Dog'; Bites to Bites'; Pets to Pets') | |
forest = | |
bocage (sentence ∷= subject ~ predicate) ( | |
leaf ( | |
bocage (subject ∷= article ~ noun) ( | |
leaf (bocage (article ∷= The ‖ A) (The' <×> ε |+| ∅)) | |
<×> | |
leaf (bocage (noun ∷= Man ‖ Dog) (Man' <×> ε |+| ∅)) | |
<×> ε | |
|+| ∅ | |
) | |
) | |
<×> | |
leaf (bocage (predicate ∷= verb ~ direct_object) ( | |
leaf (bocage (verb ∷= Bites ‖ Pets) (Bites' <×> ε |+| ∅)) | |
<×> | |
leaf (bocage (direct_object ∷= article ~ noun) ( | |
leaf (bocage (article ∷= The ‖ A) (A' <×> ε |+| ∅)) | |
<×> | |
leaf (bocage (noun ∷= Man ‖ Dog) (Dog' <×> ε |+| ∅)) | |
<×> ε | |
|+| ∅ | |
) | |
) | |
<×> ε | |
|+| ∅ | |
) | |
) | |
<×> ε | |
|+| ∅ | |
) | |
module ParseForestExamples where | |
open Grammar MySymbol using (Rhs) renaming (_∷=_ to _∷='_; leaf to leaf'; ε to ε'; ∅ to ∅'; _~_ to _~'_; _‖_ to _‖'_) | |
open Bocage MySymbol | |
open Alphabeth {Forest} (λ (sym : MySymbol T) → leaf (terminal sym)) | |
flatten-string : {l : StringLevel} → (forest : Forest (string-level l)) → Rhs product-level' | |
flatten-sum : (forest : Forest sum-level) → Rhs sum-level | |
flatten-string {leaf-level T} (leaf (terminal t)) = leaf' t ~' ε' | |
flatten-string {leaf-level NT} (leaf (bocage (leaf' α ∷=' _) _)) = leaf' α ~' ε' | |
flatten-string {product-level} ε = ε' | |
flatten-string {product-level} (x <×> xs) = flatten-string x ~' flatten-string xs | |
flatten-sum ∅ = ∅' | |
flatten-sum (x |+| xs) = flatten-string x ‖' flatten-sum xs | |
_∷=_ : {l : Level} → (α : Rhs (leaf-level' NT)) → (β : Forest l) → Forest (leaf-level' NT) | |
_∷=_ {string-level (leaf-level _)} α β = leaf (bocage (α ∷=' flatten-string β) (β <×> ε |+| ∅)) | |
_∷=_ {string-level product-level} α β = leaf (bocage (α ∷=' flatten-string β) (β |+| ∅)) | |
_∷=_ {sum-level} α β = leaf (bocage (α ∷=' flatten-sum β) β) | |
-- my-words = The ∷ Man ∷ Pets ∷ A ∷ Dog ∷ [] | |
forest' = | |
sentence ∷= ( | |
subject ∷= ( | |
article ∷= The | |
) ~ ( | |
noun ∷= Man | |
) | |
) ~ ( | |
predicate ∷= ( | |
verb ∷= Pets | |
) ~ ( | |
direct_object ∷= ( | |
article ∷= A | |
) ~ ( | |
noun ∷= Dog | |
) | |
) | |
) | |
-- -- -- | |
module Transform | |
(Symbol : (Terminality → Set)) | |
where | |
open Grammar Symbol using (Grammar; Rule; Rhs; _~∷~_; ∅) renaming (_∷=_ to _∷=_; leaf to leaf'; ε to ε'; _~_ to _~'_; _‖_ to _‖'_) | |
open Bocage Symbol using (Forest; _<×>_; _|+|_) | |
data ⊥ : Set where | |
-- bottom | |
IsFlatRhs : {l : Level} → Rhs l → Set | |
IsFlatRhs {string-level l} _ = Rhs (string-level l) | |
IsFlatRhs {sum-level} _ = ⊥ | |
-- data IsFlatRhs where | |
-- leaf : {t : Terminality} → (sym : Symbol t) → IsFlatRhs (leaf' sym) | |
-- ε : IsFlatRhs ε' | |
-- _~∷~_ : {t : Terminality} → (x : Rhs (leaf-level' t)) → (xs : Rhs product-level') → IsFlatRhs (x ~∷~ xs) | |
data IsBinarizedRhs : {l : Level} → Rhs l → Set | |
data IsBinarizedRhs where | |
leaf : {t : Terminality} → (sym : Symbol t) → IsBinarizedRhs (leaf' sym) | |
ε : IsBinarizedRhs ε' | |
_~ε : {t : Terminality} | |
→ (x : Rhs (leaf-level' t)) | |
→ IsBinarizedRhs (x ~∷~ ε') | |
_~_ : {t-l t-r : Terminality} | |
→ (left : Rhs (leaf-level' t-l)) | |
→ (right : Rhs (leaf-level' t-r)) | |
→ IsBinarizedRhs (left ~∷~ right ~∷~ ε') | |
-- -- -- | |
module IsX (IsXRhs : ({l : Level} → Rhs l → Set)) where | |
data IsXRule : Rule → Set | |
data IsXRule where | |
_∷=_ : {l : Level} {β' : Rhs l} → (α : IsXRhs Rhs (leaf-level' NT)) → (β : IsXRhs β') → IsXRule (α ∷= β') | |
data IsXGrammar : Grammar → Set | |
data IsXGrammar where | |
[] : IsXGrammar [] | |
_∷_ : {l : Level} {x' : Rhs l} {xs' : Grammar} (x : IsXRule x') → (xs : IsXGrammar xs') → IsXGrammar (x' ∷ xs') | |
-- -- -- | |
module _ where | |
open IsX IsFlatRhs renaming (IsXGrammar to IsFlat; IsXRule to IsFlatRule) | |
open IsX IsBinarizedRhs renaming (IsXGrammar to IsBinarized; IsXRule to IsBinarizedRule) | |
-- -- -- | |
-- IsSameLanguageRecognizedBy : Grammar → Grammar → Symbol NT → Set | |
-- -- ∀ {t : Terminality} → ∀ (sym : Symbol NT) → (g0 : Grammar) → (g1 : Grammar) → Set | |
-- ∀ (sym : Symbol NT) → (g0 : Grammar) → (g1 : Grammar) → Set | |
data IsSameLanguageRule : Rule → Rule → Set | |
data IsSameLanguage : Grammar → Grammar → Set | |
data IsSameLanguage where | |
[] : IsSameLanguage [] [] | |
α∷=∅_ : {a b : Grammar} {α : Rhs (leaf-level' NT)} → IsSameLanguage a b → IsSameLanguage ((α ∷= ∅) ∷ a) b | |
_∷_ : {l : Level} {x y : Rhs l} {xs ys : Grammar} (x : IsSameLanguageRule x') → (xs : IsSameLanguage xs') → IsSameLanguage (x' ∷ xs') | |
_++l_ : {a b c d : Grammar} (l0 : IsSameLanguage a b) → (l1 : IsSameLanguage c d) → IsSameLanguage (a ++ c) (b ++ d) | |
_++l_ [] l1 = l1 | |
_++l_ (r ∷ rs) l1 = r ∷ (_++l_ rs l1) | |
_++f_ : {a b : Grammar} (f0 : IsFlat a) → (f1 : IsFlat b) → IsFlat (a ++ b) | |
_++f_ [] l1 = l1 | |
_++f_ (r ∷ rs) l1 = r ∷ (_++f_ rs l1) | |
-- -- -- | |
-- generate : Grammar → List String | |
-- IsSameLanguage : (a : Grammar) → (b : Grammar) → generate a ≡ generate b | |
-- -- -- | |
record FlatGrammar : Set where | |
field | |
grammar : Grammar | |
isFlat : IsFlat grammar | |
record FlatteningResult (g : Grammar) : Set where | |
field | |
result : FlatGrammar | |
isSameLanguage : IsSameLanguage g (grammar result) | |
record RuleFlatteningResult (r : Rule) : Set where | |
field | |
grammar : Grammar | |
isFlat : IsFlat grammar | |
isSameLanguage : IsSameLanguage (r ∷ []) grammar | |
record BinarizedGrammar : Set where | |
field | |
grammar : Grammar | |
isBinarized : IsBinarized grammar | |
record BinarizationResult (g : Grammar) : Set where | |
field | |
result : BinarizedGrammar | |
isSameLanguage : IsSameLanguage g (grammar result) | |
flatten-rule : (r : Rule) → RuleFlatteningResult r | |
flatten-rule (α ∷= (β |∷| ɣ)) = | |
record | |
{ grammar = (α ∷= β) ∷ grammar (flatten-rule (α ∷= ɣ)) | |
; isFlat = (α ∷= β) ∷ isFlat (flatten-rule (α ∷= ɣ)) | |
; isSameLanguage = (α ∷= β) ∷ isSameLanguage (flatten-rule (α ∷= ɣ)) | |
} | |
flatten-rule (α ∷= ∅) = | |
record | |
{ grammar = [] | |
; isFlat = [] | |
; isSameLanguage = α∷=∅ [] | |
} | |
flatten : (g : Grammar) → FlatteningResult g | |
flatten r ∷ rs = | |
record | |
{ result = record | |
{ grammar = rule (flatten-rule r) ++ grammar (result (flatten rs)) | |
; isFlat = isFlat (flatten-rule r) ++f isFlat (result (flatten rs)) | |
} | |
; isSameLanguage = isSameLanguage (flatten-rule r) ++l isSameLanguage flatten rs | |
} | |
-- binarize : (g : Grammar) → BinarizationResult g | |
-- parse : BinarizedGrammar → String → Forest | |
-- parse : (a : Forest) → (b : Forest) → Intersection a b | |
-- Intersection : (a b : Forest (leaf-level' NT)) : Set | |
data DepVec (A : ℕ → Set) : ℕ → Set where | |
[] : DepVec A 0 | |
_∷_ : {n : ℕ} → A n -> DepVec A n → DepVec A (suc n) | |
data SparseDepVec (A : ℕ → Set) : ℕ → Set where | |
[] : DepVec A 0 | |
_∷_ : {n o : ℕ} → A o → n ≤ o → DepVec A o → DepVec A (suc o) | |
record Item (d o : ℕ) : Set where | |
field | |
dot : Fin d | |
origin : Fin (suc o) | |
node : Forest sum-level | |
record Recognizer (n : ℕ) : Set where | |
field | |
grammar : BinarizedGrammar | |
node : Forest sum-level | |
medial-items : DepVec (SparseDepVec (maxdot & SparseDepVec Item) n) n | |
current-medial-items : List (Item n) | |
parse : BinarizedGrammar → String → Forest | |
parse-acc : Recognizer → String → Recognizer | |
scan : {n : ℕ} → Recognizer n → Rhs (leaf-level' T) → Recognizer (suc n) | |
parse g l = node (parse-acc g l) | |
parse-acc r [] = r | |
parse-acc r (x ∷ xs) = parse-acc (scan r x) xs | |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment