Created
February 17, 2011 02:25
-
-
Save danlei/830820 to your computer and use it in GitHub Desktop.
Simple dependence parser
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
module DSParser | |
where | |
import Prelude hiding (lex) | |
type Word = String | |
type Pos = Integer | |
data Token = Token { pos :: Pos, lex :: Word } | |
type Tokens = [Token] | |
type Stack = Tokens | |
instance Eq Token where | |
Token n _ == Token n' _ = n == n' | |
instance Ord Token where | |
Token n _ <= Token n' _ = n <= n' | |
instance Show Token where | |
show (Token _ w) = w | |
tokenize :: String -> Tokens | |
tokenize s = zipWith Token [1..] (words s) | |
data Arc = Arc { from :: Token, to :: Token } | |
type Graph = [Arc] | |
instance Show Arc where | |
show (Arc n n') = show n ++ " ⟶ " ++ show n' | |
arcTo :: Graph -> Token -> Bool | |
arcTo a n = n `elem` map to a | |
data Rule = RightHeaded Word Word | |
| LeftHeaded Word Word | |
deriving Eq | |
type Grammar = [Rule] | |
instance Show Rule where | |
show (LeftHeaded w w') = w ++ " ⟵ " ++ w' | |
show (RightHeaded w w') = w ++ " ⟶ " ++ w' | |
(⟶), (⟵) :: Word -> Word -> Rule | |
w ⟵ w' = LeftHeaded w w' | |
w ⟶ w' = RightHeaded w w' | |
data Conf = Conf Stack Tokens Graph | |
instance Show Conf where | |
show (Conf s i a) = | |
"(" ++ show s ++ ", " ++ show i ++ ", " ++ show a ++ ")" | |
parse' :: Grammar -> Conf -> Conf | |
parse' g c | |
| Conf (n:s) (n':i) a <- c, lex n ⟵ lex n' `elem` g, not (arcTo a n) | |
= parse' g (Conf s (n':i) (Arc n' n:a)) -- left arc | |
| Conf (n:s) (n':i) a <- c, lex n ⟶ lex n' `elem` g, not (arcTo a n') | |
= parse' g (Conf (n':n:s) i (Arc n n':a)) -- right arc | |
| Conf (n:s) i a <- c, arcTo a n | |
= parse' g (Conf s i a) -- reduce | |
| Conf s (n:i) a <- c | |
= parse' g (Conf (n:s) i a) -- shift | |
| Conf s [] a <- c | |
= Conf s [] a -- termination | |
parse :: Grammar -> String -> Graph | |
parse g s = graph $ parse' g (Conf [] (tokenize s) []) | |
where graph (Conf _ _ a) = a | |
-- s = "På 60-talet målade han djärva tavlor som retade Nikita Chrusjtjov" | |
-- g = ["På" ⟶ "60-talet", | |
-- "På" ⟵ "målade", | |
-- "målade" ⟶ "han", | |
-- "djärva" ⟵ "tavlor", | |
-- "målade" ⟶ "tavlor", | |
-- "tavlor" ⟶ "som", | |
-- "som" ⟶ "retade", | |
-- "retade" ⟶ "Nikita", | |
-- "retade" ⟶ "Chrusjtjov"] | |
-- parse g s |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment