Last active
May 8, 2016 15:02
-
-
Save schar/17341b0f0815a6f1732538b9d0a49cf5 to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
{-- to add: semantics, binding[?] --} | |
import Data.Char | |
import Data.List | |
data Type = | |
E | T | N | V | -- atomic cats | |
M Type | -- monads | |
FS Type Type | BS Type Type | -- {/ , \} | |
FF Type Type | BB Type Type | -- {//,\\} | |
X -- islands punctuation | |
deriving (Show, Eq) | |
data Tree = Atom String | Bin String TTree TTree | Phase Tree | |
deriving (Show, Eq) | |
type TTree = (Tree, [Type]) | |
type Sentence = [TTree] | |
sentence :: String -> Sentence | |
sentence = map lookup . words . concatMap spaceSymb | |
where lookup w = (Atom w, lexicon w) | |
spaceSymb c = if isAlphaNum c then [c] else " "++[c]++" " | |
-- parse symbols separately, regardless of whitespace | |
-- lexicon | |
-- | |
lexicon :: String -> [Type] | |
lexicon w | |
| w == "dylan" = [E] | |
| w == "boy" = [N] | |
| w == "elk" = [N] | |
| w == "binoculars" = [N] | |
| w == "left" = [BS E T] | |
| w == "saw" = [FS (BS E T) E] | |
| w == "with" = [FS (BS (BS E T) (BS E T)) E, FS (BS N N) E] | |
| w == "thought" = [FS (BS E T) T] | |
| w == "the" = [FS E N] | |
| w == "if" = [FS (FS (M T) (M T)) $ M T] | |
| w == "and" = [FS (BS T T) T] | |
| w == "someone" = [M E] | |
| w == "everyone" = [FF (M T) (BB E $ M T)] | |
| w == "every" = [FF (FF (M T) (BB E $ M T)) (BB (FS T N) $ M T)] | |
| w == "some" = [FF (M E) (BB (FS T N) $ M T)] | |
| w == "[" = [X] | |
| w == "]" = [X] | |
-- parsing | |
-- | |
ttrees :: Sentence -> [TTree] | |
ttrees [] = [] | |
ttrees [t] = [t] | |
ttrees ts = | |
[t | (ls,rs) <- splits ts, | |
l <- ttrees ls, r <- ttrees rs, | |
t <- addLowers $ combine l r] | |
-- break in half multiple ways, | |
-- try to combine the pieces, | |
-- add the lowers, then the joins | |
++ | |
[(Phase (Bin w t1 t2), [s]) | | |
x <- snd (head ts), y <- snd (last ts), x == X, y == X, | |
(ls,rs) <- splits . init . tail $ ts, -- ditch the punctuation | |
l <- ttrees ls, r <- ttrees rs, | |
(Bin w t1 t2, ss) <- addLowers $ combine l r, | |
s <- ss, evaluated s] | |
combine :: TTree -> TTree -> [TTree] | |
combine (a,ts) (b,ss) = | |
[(Bin -- L R | |
"FA" (a, [FS t1 t2]) (b, [s]), [t1]) | | |
FS t1 t2 <- ts, s <- ss, t2 == s] | |
++ | |
[(Bin -- R L | |
"BA" (a, [t]) (b, [BS s1 s2]), [s2]) | | |
t <- ts, BS s1 s2 <- ss, t == s1] | |
++ | |
[(Bin -- L $ unit R | |
"FA$_\\eta$" (a, [FS t1 t2]) (b, [s]), [t1]) | | |
FS t1 t2 <- ts, s <- ss, t2 == M s] | |
++ | |
[(Bin -- R $ unit L | |
"BA$_\\eta$" (a, [t]) (b, [BS s1 s2]), [s2]) | | |
t <- ts, BS s1 s2 <- ss, M t == s1] | |
++ | |
[(Bin -- \k -> L (\x -> k $ combine x R) | |
("SL("++w++")") (a, [FF x (BB y z)]) (b, [u]), [FF x (BB v z)]) | | |
FF x (BB y z) <- ts, u <- ss, | |
(Bin w t1 t2, rs) <- combine (a, [y]) (b, [u]), v <- rs] | |
++ | |
[(Bin -- \k -> R (\x -> k $ combine L x) | |
("SR("++w++")") (a, [u]) (b, [FF x (BB y z)]), [FF x (BB v z)]) | | |
u <- ts, | |
FF x (BB y z) <- ss, | |
(Bin w t1 t2, rs) <- combine (a, [u]) (b, [y]), v <- rs] | |
++ | |
[(Bin -- \k -> L >>= \x -> k $ combine x R | |
("$\\star$L("++w++")") (a, [M t]) (b, [s]), [v]) | | |
M t <- ts, s <- ss, | |
(Bin w t1 t2, rs) <- combine (a, [FF x (BB t x) | x <- returnTypes]) | |
(b, [s]), v <- rs] | |
++ | |
[(Bin -- \k -> R >>= \x -> k $ combine L x | |
("$\\star$R("++w++")") (a, [t]) (b, [M s]), [v]) | | |
t <- ts, M s <- ss, | |
(Bin w t1 t2, rs) <- combine (a, [t]) | |
(b, [FF x (BB s x) | x <- returnTypes]), v <- rs] | |
parse :: String -> [TTree] | |
parse = ttrees . sentence | |
-- helper functions for parsing | |
-- | |
howDeep :: Int | |
howDeep = 5 | |
-- wow: the way things are set up guarantees you don't climb higher | |
-- in the type hierarchy than you need to....? | |
enumTypes :: Int -> [Type] | |
enumTypes 1 = [M T] | |
enumTypes n = enumTypes (n-1) ++ [M . last $ enumTypes (n-1)] | |
returnTypes :: [Type] | |
returnTypes = enumTypes howDeep | |
splits :: [TTree] -> [([TTree], [TTree])] -- return all cleavings of a list | |
splits ts = concat [f ts | f <- map modSplitAt [1..length ts - 1]] | |
modSplitAt :: Int -> [TTree] -> [([TTree], [TTree])] | |
-- using island boundaries to narrow the search space; not necessary | |
modSplitAt n [] = [] | |
modSplitAt n xs = [(ys,zs) | (ys,zs) <- [splitAt n xs], | |
count "[" ys == count "]" ys, | |
count "[" zs == count "]" zs] | |
where count x = length . filter ((== Atom x) . fst) | |
addLowers :: [TTree] -> [TTree] | |
addLowers ttrees = | |
ttrees | |
++ | |
[(Bin -- incorporating all possible lowerings of the ttrees in question | |
("Lower("++w++")") t1 t2, [tl]) | | |
(Bin w t1 t2, ts) <- ttrees, t <- ts, tl <- tail $ closeUnderLower [t]] | |
closeUnder :: Eq a => (a -> a) -> [a] -> [a] | |
closeUnder f ts = let new = union ts (map f ts) in | |
if new == ts then ts else closeUnder f new | |
closeUnderLower :: [Type] -> [Type] | |
closeUnderLower = closeUnder lowerType | |
lowerType :: Type -> Type | |
lowerType t = case t of | |
FF a (BB b c) -> | |
if M b == c || lowerType b == c then a else FF a (BB (lowerType b) c) | |
_ -> t | |
evaluated :: Type -> Bool | |
evaluated t = case t of | |
FF a b -> False | |
FS a b -> evaluated a | |
BS a b -> evaluated b | |
BB a b -> evaluated b | |
M a -> evaluated a | |
_ -> True | |
-- parsing concluded | |
-- here's the pretty printer | |
-- writes a tex file with trees of the sentential parses in your working dir | |
-- | |
prettyCat :: Type -> String -- printing cats | |
prettyCat cat = case cat of | |
FS x y -> "(" ++ prettyCat x ++ " / " ++ prettyCat y ++ ")" | |
BS x y -> "(" ++ prettyCat x ++ " \\backslash " ++ prettyCat y ++ ")" | |
FF x y -> "(" ++ prettyCat x ++ " \\sslash " ++ prettyCat y ++ ")" | |
BB x y -> "(" ++ prettyCat x ++ " \\bbslash " ++ prettyCat y ++ ")" | |
M a -> "\\textsf{M}" ++ prettyCat a | |
_ -> map toLower (show cat) | |
pprettyCat :: Type -> String -- dropping outer parens | |
pprettyCat thing = case xs of | |
[] -> [] | |
_ -> if head xs == '(' && last xs == ')' | |
then drop 1 $ init xs | |
else xs | |
where xs = prettyCat thing | |
pretty :: TTree -> [String] | |
pretty tree = case tree of | |
(Atom x, ts) -> ["[{$"++pprettyCat t++"$} ["++x++"] ]" | t <- ts] | |
(Bin w x y, ts) -> | |
["[{$\\textbf{\\textsf{"++w++"}} \\vdash " ++ | |
pprettyCat t++"$} "++a++" "++b++" ]" | |
| | |
t <- ts, a <- pretty x, b <- pretty y] | |
(Phase t, ts) -> ["[$\\blacksquare$ "++a++" ]" | a <- pretty (t, ts)] | |
toForest :: String -> String | |
toForest = unlines . concatMap | |
(map | |
(\x -> "\\begin{forest}" ++ | |
"for tree={scale=.8}," ++ | |
"where n children=0{tier=word}{}\n" ++ | |
x ++ | |
"\n\\end{forest}\\\\\n") | |
. pretty) . filter (prop . snd) . parse -- LOL | |
prop :: [Type] -> Bool | |
prop ts = case ts of | |
[FF a b] -> False | |
_ -> True | |
toParse :: String | |
toParse = "if [someone saw every elk with the binoculars] [dylan left]" | |
main :: IO () | |
main = do | |
putStr output | |
writeFile "sandbox.tex" $ preamble++output++end | |
where output = toForest toParse | |
preamble = | |
"\\documentclass{article}\n\\synctex=1\n" ++ | |
"\\usepackage[margin=.8in]{geometry}\n" ++ | |
"\\usepackage{forest,mathtools,newtxtext,newtxmath}\n" ++ | |
"\\newcommand\\bs\\backslash{}\n" ++ | |
"\\newcommand\\sslash{\\mathord{/\\mkern-5mu/}}\n" ++ | |
"\\newcommand\\bbslash{\\mathord{\\bs\\mkern-5.2mu\\bs}}\n" ++ | |
"\\begin{document}\n\n" | |
end = "\\end{document}" |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment