Skip to content

Instantly share code, notes, and snippets.

@takaki
Created November 29, 2012 14:41
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save takaki/4169514 to your computer and use it in GitHub Desktop.
Save takaki/4169514 to your computer and use it in GitHub Desktop.
Parsec: RFC 5322 (RFC 822) email address parser
import Debug.Trace
import Control.Monad.State
import Text.Parsec
import Data.Char
import Control.Monad.Trans
import Control.Applicative hiding((<|>),many,optional)
-- trace s f = f
-- ref. RFC5322 and RFC5234
--debug_putStrLn = putStrLn
debug_putStrLn = return
debug_print s0 = liftIO $ debug_putStrLn $ s0
debug_in s0 = debug_print $ "[IN] " ++ s0
debug_return s0 s1 = do
liftIO $ debug_putStrLn $ "[OUT] " ++ s0 ++ " : [" ++ s1 ++ "]"
return s1
-- address = mailbox / group
address :: ParsecT [Char] u IO [Char]
address = do
debug_in "address"
s0 <- mailbox <|> group
-- debug_return "address" s0
return s0
-- mailbox = name-addr / addr-spec
mailbox = do
debug_in "mailbox"
s0 <- try(name_addr) <|> addr_spec
debug_return "mailbox" s0
-- name-addr = [display-name] angle-addr
name_addr = do
debug_in "name_addr"
s0 <- option "" display_name
s1 <- angle_addr
liftIO $ putStrLn $ " DISPLAY_NAME: " ++ s0
liftIO $ putStrLn $ " ADDDR_SPEC: " ++ s1
debug_return "name_addr" $ s0 ++ s1
-- angle-addr = [CFWS] "<" addr-spec ">" [CFWS] /
-- obs-angle-addr
angle_addr = do
debug_in "angle_addr"
s0 <- option "" cfws
s1 <- char '<'
s2 <- addr_spec
s3 <- char '>'
s4 <- option "" cfws
liftIO $ putStrLn $ " ADDDR_SPEC: " ++ s2
debug_return "angle_addr" $ s0 ++ [s1] ++ s2 ++ [s3] ++ s4
<|>
obs_angle_addr
-- group = display-name ":" [group-list] ";" [CFWS]
group = do
debug_in "group"
s0 <- display_name
s1 <- char ':'
s2 <- option "" group_list
s3 <- char ';'
s4 <- option "" cfws
debug_return "group" $ s0 ++ [s1] ++ s2 ++ [s3] ++ s4
-- display-name = phrase
display_name = do
debug_in "display_name"
s0 <- phrase
debug_return "display_name" s0
-- mailbox-list = (mailbox *("," mailbox)) / obs-mbox-list
mailbox_list :: ParsecT [Char] u IO [Char]
mailbox_list = do {
s0 <- mailbox ;
s1 <- many (do
s0 <- char ','
s1 <- mailbox
return $ [s0] ++ s1
);
debug_return "mailbox_list" $ s0 ++ (concat s1)
} <|>
obs_mbox_list
-- address-list = (address *("," address)) / obs-addr-list
{-
address_list :: ParsecT [Char] u IO [Char]
address_list = try(do {
address ;
many (char ',' >> address)
} ) <|>
obs_addr_list
-}
-- group-list = mailbox-list / CFWS / obs-group-list
group_list = do
mailbox_list
<|>
cfws
<|>
obs_group_list
-- addr-spec = local-part "@" domain
addr_spec = do
debug_in "addr_spec"
s0 <- local_part
s1 <- char '@'
s2 <- domain
liftIO $ putStrLn $ " LOCAL-PART: " ++ s0
liftIO $ putStrLn $ " DOMAIN: " ++ s2
debug_return "addr_spec" $ s0 ++ [s1] ++ s2
-- local-part = dot-atom / quoted-string / obs-local-part
local_part = do
debug_in "local_part"
s0 <- (try dot_atom) <|> try quoted_string <|> obs_local_part
debug_return "local_part" s0
-- domain = dot-atom / domain-literal / obs-domain
domain = do
debug_in "domain"
s0 <- try dot_atom <|> try domain_literal <|> obs_domain
debug_return "domain" s0
domain_literal = do
s0 <- option "" cfws
s1 <- char '['
s2 <- many (do
s0 <- option "" fws
s1 <- dtext
return $ s0 ++ s1
)
s3 <- option "" fws
s4 <- char ']'
s5 <- option "" cfws
debug_return "domain_literal" $ s0 ++ [s1] ++ (concat s2) ++ s3 ++ [s4] ++ s5
dtext = do
debug_in "dtext"
s0 <- do {
s0 <- oneOf $ map chr ([33..90] ++ [94..126]);
return [s0]
}<|> obs_dtext
debug_return "dtext" s0
-- CFWS = (1*([FWS] comment) [FWS]) / FWS
cfws :: ParsecT [Char] u IO [Char]
cfws = do
debug_in "cfws"
s0 <- try(do {
s0 <- many1 (do
s0 <- option "" fws
s1 <- comment
return $ s0 ++ s1
);
s1 <- option "" fws;
return $ (concat s0) ++ s1
} ) <|> fws
-- debug_return "cfws" s0
debug_return "cfws" ""
-- FWS = ([*WSP CRLF] 1*WSP) / obs-FWS
fws :: ParsecT [Char] u IO [Char]
fws = do
debug_in "fws"
s0 <- try(do {
s0 <- try (option "" (do
s0 <- many wsp
s1 <- crlf
return $ s0 ++ s1
));
s1 <- many1 wsp;
return $ s0 ++ s1
} ) <|> obs_fws
debug_return "fws" s0
wsp = do
s1 <- char ' ' <|> char '\t' <?> "@wsp"
-- debug_print "wsp" $ "'" ++ [s1] ++ "'"
return s1
-- obs-FWS = 1*WSP *(CRLF 1*WSP)
obs_fws = do
s0 <- many1 wsp
s1 <- many (do
s0 <- crlf
s1 <- many1 wsp
return $ s0 ++ s1
)
debug_return "OBS_fws" $ s0 ++ (concat s1)
crlf = do
s1 <- ( do
s0 <- char '\r'
s1 <- char '\n'
return $ [s0, s1]
) <?> "@crlf"
debug_return "crlf" s1
ctext =
try (oneOf $ map chr ([33..39] ++ [42..91] ++ [93..126]))
<|>
obs_ctext
-- ccontent = ctext / quoted-pair / comment
ccontent :: ParsecT [Char] u IO [Char]
ccontent = do
debug_in "ccontent"
s0 <- ((try ctext) >>= return . pure)
<|>
(try quoted_pair)
<|>
comment
debug_return "ccontent" s0
-- comment = "(" *([FWS] ccontent) [FWS] ")"
comment = do
debug_in "comment"
s0 <- char '('
s1 <- many (do
s0 <- option "" fws
s1 <- ccontent
return $ s0 ++ s1
)
s2 <- option "" fws
s3 <- char ')'
debug_return "comment" $ [s0] ++ (concat s1) ++ s2 ++ [s3]
-- obs-angle-addr = [CFWS] "<" obs-route addr-spec ">" [CFWS]
obs_angle_addr = do
s0 <- option "" cfws
s1 <- char '<'
s2 <- obs_route
s3 <- addr_spec
s4 <- char '>'
s5 <- option "" cfws
debug_return "OBS_angle_addr" $ s0 ++ [s1] ++ s2 ++ s3 ++ [s4] ++ s5
-- obs-route = obs-domain-list ":"
obs_route = do
s0 <- obs_domain_list
s1 <- char ':'
debug_return "OBS_route" $ s0 ++ [s1]
-- obs-domain-list = *(CFWS / ",") "@" domain
-- *("," [CFWS] ["@" domain])
obs_domain_list = do
s0 <- many (cfws <|> (char ',' >> return ","))
s1 <- char '@'
s2 <- domain
s3 <- many (do
s0 <- char ','
s1 <- option "" cfws
s2 <- option "" (do
s0 <- char '@'
s1 <- domain
return $ [s0] ++ s1)
return $ [s0] ++ s1 ++ s2)
debug_return "OBS_domain_list" $ (concat s0) ++ [s1] ++ s2 ++ (concat s3)
-- obs-mbox-list = *([CFWS] ",") mailbox *("," [mailbox / CFWS])
obs_mbox_list = do
s0 <- many (do
s0 <- cfws
s1 <- char ','
return $ s0 ++ [s1]
)
s1 <- mailbox
s2 <- many (do
s0 <- char ','
s1 <- option "" (mailbox <|> cfws)
return $ [s0] ++ s1
)
debug_return "OBS_mbox_list" $ (concat s0) ++ s1 ++ (concat s2)
-- obs-addr-list = *([CFWS] ",") address *("," [address / CFWS])
{-
obs_addr_list :: ParsecT [Char] u IO [Char]
obs_addr_list = do
s0 <- many (do
s0 <- option "" cfws
s1 <- char ','
return $ s0 ++ [s1]
)
s1 <- address
s2 <- many (do
s0 <- char ','
s1 <- option "" (address <|> cfws)
return $ [s0] ++ s1)
return $ (concat s0) ++ s1 ++ (concat s2)
-}
-- obs-group-list = 1*([CFWS] ",") [CFWS]
obs_group_list = do
s0 <- many1 (do
s0 <- option "" cfws
s1 <- char ','
return $ s0 ++ [s1]
)
s1 <- option "" cfws
return $ (concat s0) ++ s1
-- obs-local-part = word *("." word)
obs_local_part = do
s0 <- word
s1 <- many (do
s0 <- char '.'
s1 <- word
return $ [s0] ++ s1
)
return $ s0 ++ (concat s1)
-- obs-domain = atom *("." atom)
obs_domain = do
s0 <- atom
s1 <- many (char '.' >> atom)
return $ s0 ++ (concat s1)
-- obs-dtext = obs-NO-WS-CTL / quoted-pair
obs_dtext :: ParsecT [Char] u IO [Char]
obs_dtext = do
debug_in "OBS_dtext"
(obs_NO_WS_CTL >>= return . pure)
<|>
quoted_pair
word :: ParsecT [Char] u IO [Char]
word = do
debug_in "word"
s0 <- try atom <|> quoted_string
debug_return "word" s0
phrase = do
debug_in "phrase"
s0 <- (try(many1 word)) <|> (obs_phrase >>= return . pure)
debug_return "phrase" $ concat s0
atext = do
s0 <- (oneOf (['a'..'z'] ++ ['A'..'Z'])
<|>
oneOf ['0'..'9']
<|>
oneOf "!#$%&'*+-/=?^_`{|}~")
debug_print $ "[OUT] atext: " ++ [s0]
return s0
-- atom = [CFWS] 1*atext [CFWS]
atom = do
debug_in "atom"
s0 <- option "" cfws
s1 <- many1 atext
s2 <- option "" cfws
debug_return "atom" $ s0 ++ s1 ++ s2
-- dot-atom = [CFWS] dot-atom-text [CFWS]
dot_atom = do
debug_in "dot_atom"
s0 <- option "" cfws
s1 <- dot_atom_text
s2 <- option "" cfws
--debug_return "dot_atom" $ "[-" ++ s0 ++ "-]" ++ s1 ++ "[-" ++ s2 ++ "-]"
debug_return "dot_atom" $ s0 ++ s1 ++ s2
--debug_return "dot_atom" s1
-- dot-atom-text = 1*atext *("." 1*atext)
dot_atom_text = do
debug_in "dot_atom_text"
s0 <- many1 atext
s1 <- many (do
s0 <- char '.'
s1 <- many1 atext
return $ [s0] ++ s1
)
debug_return "dot_atom_text" $ s0 ++ (concat s1)
qtext =
(try(oneOf $ map chr $ [33] ++ [35..91] ++ [93..126])
<|>
obs_qtext) >>= (debug_return "qtext" . pure)
qcontent :: ParsecT [Char] u IO [Char]
qcontent = qtext <|> quoted_pair
quoted_pair :: ParsecT [Char] u IO [Char]
quoted_pair =
(do
s0 <- char '\\'
s1 <- vchar <|> wsp
return $ [s0,s1]
)
<|> obs_qp
quoted_string :: ParsecT [Char] u IO [Char]
quoted_string = do
debug_in "quoted_string"
s0 <- option "" cfws
s1 <- char '"'
s2 <- many (do
s1 <- option "" fws
s2 <- qcontent
return $ s1 ++ s2
)
s3 <- option "" fws
s4 <- char '"'
s5 <- option "" cfws
debug_return "quoted_string" $ s0 ++ [s1] ++ (concat s2) ++ s3 ++ [s4] ++ s5
vchar = oneOf $ map chr [0x21..0x7e]
obs_NO_WS_CTL = do
s0 <- (oneOf $ map chr ([1..8] ++ [11] ++ [12] ++ [14..31] ++ [127]))
return s0
obs_ctext = obs_NO_WS_CTL
obs_qtext = obs_NO_WS_CTL
obs_qp = do
s0 <- char '\\'
s1 <- (char (chr 0) <|> obs_NO_WS_CTL <|> char '\n' <|> char '\r')
return [s0,s1]
-- obs-phrase = word *(word / "." / CFWS)
obs_phrase :: ParsecT [Char] u IO [Char]
obs_phrase = do
s0 <- word
s1 <- many
(try word
<|>
try (char '.' >> return ".")
<|>
cfws)
return $ s0 ++ (concat s1)
runTest p s = do
putStrLn $ "Parse: " ++ s
parseTest p s
runTest2 p s = do
putStrLn $ "Parse: " ++ s
runParserT p () "" s >>= either print print
main = do
mapM_ (runTest2 address)
[
"takaki@mbox.example.com"
, "(Takaki)takaki@mbox.example.com"
, "takaki(hoge)@mbox.example.com"
, "Jeffy <\"That Tall Guy\"@ora.com (this addres no longer active)>"
, "(Takaki)takaki@mbox.example.com"
, "takaki@[192.168.0.1]abc"
, "takaki@mbox.example.com"
, "<takaki@mbox.example.com>"
, "Takaki<takaki@mbox.example.com>"
, "\"Tak aki\" <takaki@mbox.example.com>"
, " Takaki <takaki@mbox.example.com>"
, "\"000\" A(B\"C(hh)C\"BB)AA <(pre)\"abcdef\"(po\"<>\"st)@(hoge)example.com (abc\"nest\" def)> (a \"de\" bc)"
, "\"!@#..$%^%^&=%\"@example.com"
]
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment