Skip to content

Instantly share code, notes, and snippets.

@danlei
Created February 17, 2011 02:25
Show Gist options
  • Save danlei/830820 to your computer and use it in GitHub Desktop.
Save danlei/830820 to your computer and use it in GitHub Desktop.
Simple dependence parser
module DSParser
where
import Prelude hiding (lex)
type Word = String
type Pos = Integer
data Token = Token { pos :: Pos, lex :: Word }
type Tokens = [Token]
type Stack = Tokens
instance Eq Token where
Token n _ == Token n' _ = n == n'
instance Ord Token where
Token n _ <= Token n' _ = n <= n'
instance Show Token where
show (Token _ w) = w
tokenize :: String -> Tokens
tokenize s = zipWith Token [1..] (words s)
data Arc = Arc { from :: Token, to :: Token }
type Graph = [Arc]
instance Show Arc where
show (Arc n n') = show n ++ " ⟶ " ++ show n'
arcTo :: Graph -> Token -> Bool
arcTo a n = n `elem` map to a
data Rule = RightHeaded Word Word
| LeftHeaded Word Word
deriving Eq
type Grammar = [Rule]
instance Show Rule where
show (LeftHeaded w w') = w ++ " ⟵ " ++ w'
show (RightHeaded w w') = w ++ " ⟶ " ++ w'
(⟶), (⟵) :: Word -> Word -> Rule
w ⟵ w' = LeftHeaded w w'
w ⟶ w' = RightHeaded w w'
data Conf = Conf Stack Tokens Graph
instance Show Conf where
show (Conf s i a) =
"(" ++ show s ++ ", " ++ show i ++ ", " ++ show a ++ ")"
parse' :: Grammar -> Conf -> Conf
parse' g c
| Conf (n:s) (n':i) a <- c, lex n ⟵ lex n' `elem` g, not (arcTo a n)
= parse' g (Conf s (n':i) (Arc n' n:a)) -- left arc
| Conf (n:s) (n':i) a <- c, lex n ⟶ lex n' `elem` g, not (arcTo a n')
= parse' g (Conf (n':n:s) i (Arc n n':a)) -- right arc
| Conf (n:s) i a <- c, arcTo a n
= parse' g (Conf s i a) -- reduce
| Conf s (n:i) a <- c
= parse' g (Conf (n:s) i a) -- shift
| Conf s [] a <- c
= Conf s [] a -- termination
parse :: Grammar -> String -> Graph
parse g s = graph $ parse' g (Conf [] (tokenize s) [])
where graph (Conf _ _ a) = a
-- s = "På 60-talet målade han djärva tavlor som retade Nikita Chrusjtjov"
-- g = ["På" ⟶ "60-talet",
-- "På" ⟵ "målade",
-- "målade" ⟶ "han",
-- "djärva" ⟵ "tavlor",
-- "målade" ⟶ "tavlor",
-- "tavlor" ⟶ "som",
-- "som" ⟶ "retade",
-- "retade" ⟶ "Nikita",
-- "retade" ⟶ "Chrusjtjov"]
-- parse g s
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment