Created
November 20, 2012 03:46
-
-
Save cheald/4115813 to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
%%{ | |
machine rfc2822_header; | |
action mark { | |
@pos = p | |
} | |
action domain { | |
# We have to match 'foo . bar' per the tests and transform it into 'foo.bar' | |
@parts[:host] = data[@pos..p-1].delete(" ") | |
} | |
action local { | |
@parts[:local] = data[@pos..p-1] | |
} | |
action name { | |
# Enclosing quotes are stripped to make the tests pass. | |
@parts[:name] = data[@pos..p-1].gsub(/^"|"$/, "") | |
} | |
# Common ABNF rules | |
cr = "\r"; | |
lf = "\n"; | |
crlf = cr lf; | |
sp = " "; | |
tab = "\t"; | |
wsp = (sp | tab); | |
obs_fws = wsp+ ( crlf wsp+ )*; | |
fws = ( ( wsp* crlf )? wsp+ ) | obs_fws; | |
NO_WS_CTL = 0x01..0x08 | "\v" | "\f" | 0x0e..0x1f | 0x7f; | |
# Rules to consume comments | |
consumeAny = any - (")" | ")") | "(" @{fcall consumeComments;}; | |
consumeComments := consumeAny+ ")" $!{fhold; fret; }; | |
startComment = "(" @{fcall consumeComments;}; | |
CFWS = ( fws? startComment )* ( ( fws? startComment ) | fws); | |
ctext = NO_WS_CTL | 0x21..0x27 | 0x2a..0x5b; | |
obs_char = 0x00..0x09 | "\v" | "\f" | 0x0e..0x7f; | |
obs_text = "\n"* "\r"* ( obs_char "\n"* "\r"* )*; | |
text = 0x01..0x09 | "\v" | "\f" | 0x0e..0x7f; # | obs_text; # obs_causes parser ambiguity; track down why. | |
obs_qp = "\\" 0x00..0x7f; | |
quoted_pair = ( "\\" text ) | obs_qp; | |
dtext = NO_WS_CTL | 0x21..0x5a | 0x5E..0x7E; | |
dcontent = dtext | quoted_pair; | |
atext = alpha | digit | "!" | "#" | "$" | "%" | "&" | "'" | "*" | "+" | "-" | "/" | "=" | "?" | "^" | "_" | "`" | "{" | "|" | "}" | "~"; | |
dot_atom_text = atext+ ( "."* atext+ )* "."*; | |
dot_atom = CFWS? dot_atom_text CFWS?; | |
qtext = NO_WS_CTL | "!" | 0x23..0x5b | 0x5d..0x7e; | |
qcontent = qtext | quoted_pair; | |
quoted_string = "\"" ( fws? qcontent )* fws? "\""; | |
atom = atext+; | |
word = atom | quoted_string; | |
obs_phrase = word (word | "." | CFWS)+; | |
phrase = word+ | obs_phrase; | |
obs_domain = atom ( "." atom )*; | |
domain_literal = CFWS? "[" (fws? dcontent) fws? "]" CFWS?; | |
domain = dot_atom ("."? dot_atom)* | domain_literal | obs_domain; | |
obs_local_part = word ( "." word )*; | |
obs_domain_list = "@" domain ((CFWS | "," )* CFWS? "@" domain)*; | |
obs_route = CFWS? obs_domain_list ":" CFWS?; | |
# I modified display_name here from `phrase` to account for unquoted names with commas. | |
display_name = word (CFWS? (word | ","))*; | |
local_part = dot_atom | quoted_string | obs_local_part; | |
addr_spec = (local_part >mark %local) "@" (domain >mark %domain); | |
obs_angle_addr = CFWS? "<" obs_route? addr_spec ">" CFWS?; | |
angle_addr = CFWS? "<" addr_spec ">" CFWS? | obs_angle_addr; | |
name_addr = (display_name >mark %name) angle_addr; | |
mailbox = name_addr | addr_spec; | |
obs_mbox_list = (mailbox? CFWS? "," CFWS?)+ mailbox?; | |
mailbox_list = mailbox ("," mailbox)* | obs_mbox_list; | |
group = display_name ":" (mailbox_list | CFWS)? ";" CFWS?; | |
address = mailbox | group; | |
obs_addr_list = (address? CFWS? "," CFWS?)+ address?; | |
address_list = address ("," address)* | obs_addr_list; | |
main := address_list; | |
}%% | |
def process_host(host) | |
host.gsub(' ','') | |
end | |
def parse(email) | |
@parts = {} | |
@pos = [] | |
data = email | |
eof = data.length | |
%%write data; | |
%%write init; | |
%%write exec; | |
@parts | |
end | |
require 'minitest/spec' | |
require 'minitest/autorun' | |
describe "mail parsing" do | |
it "can parse a basic email address" do | |
address = parse("ryan@example.com") | |
address[:local].must_equal "ryan" | |
address[:host].must_equal "example.com" | |
end | |
it "can parse a named address" do | |
address = parse("Ryan Bigg <ryan@example.com>") | |
address[:name].must_equal "Ryan Bigg" | |
address[:local].must_equal "ryan" | |
address[:host].must_equal "example.com" | |
end | |
# Stolen from Mail | |
it "should support |Minero Aoki<aamine@0246.loveruby.net>|" do | |
address = parse("Minero Aoki<aamine@0246.loveruby.net>") | |
address[:name].must_equal "Minero Aoki" | |
address[:local].must_equal "aamine" | |
address[:host].must_equal "0246.loveruby.net" | |
end | |
# Stolen from Mail | |
it "should support lots of dots" do | |
1.upto(10) do |times| | |
dots = "." * times | |
address = parse("hoge#{dots}test@docomo.ne.jp") | |
address[:local].must_equal "hoge#{dots}test" | |
address[:host].must_equal "docomo.ne.jp" | |
end | |
end | |
# Stolen from Mail | |
it "should handle trailing dots" do | |
1.upto(10) do |times| | |
dots = "." * times | |
address = parse("hoge#{dots}@docomo.ne.jp") | |
address[:local].must_equal "hoge#{dots}" | |
address[:host].must_equal "docomo.ne.jp" | |
end | |
end | |
# Stolen from Mail (which actually stole it from a Perl test suite) | |
it 'should handle "Joe & J. Harvey" <ddd @Org>' do | |
address = parse('"Joe & J. Harvey" <ddd @Org>') | |
address[:name].must_equal "Joe & J. Harvey" | |
address[:local].must_equal "ddd " | |
address[:host].must_equal "Org" | |
end | |
it "should handle jrh%cup.portal.com@portal.unix.portal.com" do | |
address = parse('jrh%cup.portal.com@portal.unix.portal.com') | |
address[:local].must_equal "jrh%cup.portal.com" | |
address[:host].must_equal "portal.unix.portal.com" | |
end | |
it "should handle David Apfelbaum <da0g+@andrew.cmu.edu>" do | |
address = parse("David Apfelbaum <da0g+@andrew.cmu.edu>'") | |
address[:name].must_equal "David Apfelbaum" | |
address[:local].must_equal "da0g+" | |
address[:host].must_equal "andrew.cmu.edu" | |
end | |
it "should handle Stephen Burke, Liverpool <BURKE@vxdsya.desy.de>" do | |
address = parse("Stephen Burke, Liverpool <BURKE@vxdsya.desy.de>") | |
address[:name].must_equal "Stephen Burke, Liverpool" | |
address[:local].must_equal "BURKE" | |
address[:host].must_equal "vxdsya.desy.de" | |
end | |
it "should handle jdoe@test . example" do | |
address = parse("jdoe@test . example") | |
address[:local].must_equal "jdoe" | |
address[:host].must_equal "test.example" | |
end | |
it "should handle /G=Owen/S=Smith/O=SJ-Research/ADMD=INTERSPAN/C=GB/@mhs-relay.ac.uk" do | |
address = parse("/G=Owen/S=Smith/O=SJ-Research/ADMD=INTERSPAN/C=GB/@mhs-relay.ac.uk") | |
address[:local].must_equal "/G=Owen/S=Smith/O=SJ-Research/ADMD=INTERSPAN/C=GB/" | |
address[:host].must_equal "mhs-relay.ac.uk" | |
end | |
it "should handle The Newcastle Info-Server <info-admin@newcastle.ac.uk>" do | |
address = parse("The Newcastle Info-Server <info-admin@newcastle.ac.uk>") | |
address[:name].must_equal "The Newcastle Info-Server" | |
address[:local].must_equal "info-admin" | |
address[:host].must_equal "newcastle.ac.uk" | |
end | |
it "should handle JAMES R. TWINE - THE NERD <TWINE57%SDELVB%SNYDELVA.bitnet@CUNYVM.CUNY.EDU>" do | |
address = parse(%Q{"JAMES R. TWINE - THE NERD" <TWINE57%SDELVB%SNYDELVA.bitnet@CUNYVM.CUNY.EDU>}) | |
address[:name].must_equal "JAMES R. TWINE - THE NERD" | |
address[:local].must_equal "TWINE57%SDELVB%SNYDELVA.bitnet" | |
address[:host].must_equal "CUNYVM.CUNY.EDU" | |
end | |
end |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment