Last active
March 23, 2022 14:33
-
-
Save schar/4e0ddc592bcf753f8e2d826cfbf93eb9 to your computer and use it in GitHub Desktop.
Incredibly simple but extremely non-performant CFG parsing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import Data.Tree | |
splitsPlus :: [a] -> [([a], [a])] | |
splitsPlus u = [splitAt i u | i <- [1..length u - 1]] | |
data Rule n x = n :- x | n :< (n, n) | |
deriving (Eq, Show) | |
parse | |
:: (Eq cat, Eq term) => | |
[Rule cat term]-> [term] -> [cat] | |
parse g [x] = [n | n :- y <- g, y==x] | |
parse g xs = [n | (ls, rs) <- splitsPlus xs, | |
nl <- parse g ls, nr <- parse g rs, | |
n :< (l, r) <- g, nl == l, nr == r ] | |
data Cat = S | D | DP | N | NP | VT | VD | VP | P | PP | |
deriving (Eq, Read, Show) | |
eng :: [Rule Cat String] | |
eng = [ S :< (DP, VP) , | |
VP :< (VT, DP) , | |
VT :< (VD, DP) , | |
DP :< (D , NP) , | |
NP :< (NP, PP) , | |
PP :< (P, DP) , | |
VP :< (VP, PP) , | |
DP :- "Mary" , | |
VT :- "saw" , | |
VD :- "gave" , | |
VP :- "left" , | |
D :- "the" , | |
NP :- "binoculars", | |
NP :- "elk" , | |
P :- "with" ] | |
parseToTree | |
:: (Show cat, Eq cat, Read cat) => | |
[Rule cat String] -> [String] -> [Tree String] | |
parseToTree g [x] = [(Node (show n) [Node x []]) | n :- y <- g, y==x] | |
parseToTree g xs = | |
[ Node (show n) [tl, tr] | | |
(ls, rs) <- splitsPlus xs, | |
tl <- parseToTree g ls, tr <- parseToTree g rs, | |
n :< (l, r) <- g, read (rootLabel tl) == l, read (rootLabel tr) == r ] | |
s1 :: [String] | |
s1 = words "Mary saw the elk with the binoculars" | |
s2 :: [String] | |
s2 = words "Mary gave the elk with the binoculars the binoculars" | |
s3 :: [String] | |
s3 = map return "aaabbb" | |
main :: IO () | |
main = do | |
putStrLn (drawForest (parseToTree eng s1)) | |
putStrLn (drawForest (parseToTree eng s2)) | |
putStrLn (drawForest (parseToTree lbal s3)) | |
putStrLn (drawForest (parseToTree rbal s3)) | |
data ANBN = X | A | B | R deriving (Eq, Read, Show) | |
lbal = [ X :< (A, B), | |
X :< (R, B), | |
R :< (A, X), | |
A :- "a" , | |
B :- "b" ] | |
rbal = [ X :< (A, B), | |
X :< (A, R), | |
R :< (X, B), | |
A :- "a" , | |
B :- "b" ] |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment