Skip to content

Instantly share code, notes, and snippets.

@cheald
Created November 20, 2012 03:46
Show Gist options
  • Star 1 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save cheald/4115813 to your computer and use it in GitHub Desktop.
Save cheald/4115813 to your computer and use it in GitHub Desktop.
%%{
machine rfc2822_header;
action mark {
@pos = p
}
action domain {
# We have to match 'foo . bar' per the tests and transform it into 'foo.bar'
@parts[:host] = data[@pos..p-1].delete(" ")
}
action local {
@parts[:local] = data[@pos..p-1]
}
action name {
# Enclosing quotes are stripped to make the tests pass.
@parts[:name] = data[@pos..p-1].gsub(/^"|"$/, "")
}
# Common ABNF rules
cr = "\r";
lf = "\n";
crlf = cr lf;
sp = " ";
tab = "\t";
wsp = (sp | tab);
obs_fws = wsp+ ( crlf wsp+ )*;
fws = ( ( wsp* crlf )? wsp+ ) | obs_fws;
NO_WS_CTL = 0x01..0x08 | "\v" | "\f" | 0x0e..0x1f | 0x7f;
# Rules to consume comments
consumeAny = any - (")" | ")") | "(" @{fcall consumeComments;};
consumeComments := consumeAny+ ")" $!{fhold; fret; };
startComment = "(" @{fcall consumeComments;};
CFWS = ( fws? startComment )* ( ( fws? startComment ) | fws);
ctext = NO_WS_CTL | 0x21..0x27 | 0x2a..0x5b;
obs_char = 0x00..0x09 | "\v" | "\f" | 0x0e..0x7f;
obs_text = "\n"* "\r"* ( obs_char "\n"* "\r"* )*;
text = 0x01..0x09 | "\v" | "\f" | 0x0e..0x7f; # | obs_text; # obs_causes parser ambiguity; track down why.
obs_qp = "\\" 0x00..0x7f;
quoted_pair = ( "\\" text ) | obs_qp;
dtext = NO_WS_CTL | 0x21..0x5a | 0x5E..0x7E;
dcontent = dtext | quoted_pair;
atext = alpha | digit | "!" | "#" | "$" | "%" | "&" | "'" | "*" | "+" | "-" | "/" | "=" | "?" | "^" | "_" | "`" | "{" | "|" | "}" | "~";
dot_atom_text = atext+ ( "."* atext+ )* "."*;
dot_atom = CFWS? dot_atom_text CFWS?;
qtext = NO_WS_CTL | "!" | 0x23..0x5b | 0x5d..0x7e;
qcontent = qtext | quoted_pair;
quoted_string = "\"" ( fws? qcontent )* fws? "\"";
atom = atext+;
word = atom | quoted_string;
obs_phrase = word (word | "." | CFWS)+;
phrase = word+ | obs_phrase;
obs_domain = atom ( "." atom )*;
domain_literal = CFWS? "[" (fws? dcontent) fws? "]" CFWS?;
domain = dot_atom ("."? dot_atom)* | domain_literal | obs_domain;
obs_local_part = word ( "." word )*;
obs_domain_list = "@" domain ((CFWS | "," )* CFWS? "@" domain)*;
obs_route = CFWS? obs_domain_list ":" CFWS?;
# I modified display_name here from `phrase` to account for unquoted names with commas.
display_name = word (CFWS? (word | ","))*;
local_part = dot_atom | quoted_string | obs_local_part;
addr_spec = (local_part >mark %local) "@" (domain >mark %domain);
obs_angle_addr = CFWS? "<" obs_route? addr_spec ">" CFWS?;
angle_addr = CFWS? "<" addr_spec ">" CFWS? | obs_angle_addr;
name_addr = (display_name >mark %name) angle_addr;
mailbox = name_addr | addr_spec;
obs_mbox_list = (mailbox? CFWS? "," CFWS?)+ mailbox?;
mailbox_list = mailbox ("," mailbox)* | obs_mbox_list;
group = display_name ":" (mailbox_list | CFWS)? ";" CFWS?;
address = mailbox | group;
obs_addr_list = (address? CFWS? "," CFWS?)+ address?;
address_list = address ("," address)* | obs_addr_list;
main := address_list;
}%%
def process_host(host)
host.gsub(' ','')
end
def parse(email)
@parts = {}
@pos = []
data = email
eof = data.length
%%write data;
%%write init;
%%write exec;
@parts
end
require 'minitest/spec'
require 'minitest/autorun'
describe "mail parsing" do
it "can parse a basic email address" do
address = parse("ryan@example.com")
address[:local].must_equal "ryan"
address[:host].must_equal "example.com"
end
it "can parse a named address" do
address = parse("Ryan Bigg <ryan@example.com>")
address[:name].must_equal "Ryan Bigg"
address[:local].must_equal "ryan"
address[:host].must_equal "example.com"
end
# Stolen from Mail
it "should support |Minero Aoki<aamine@0246.loveruby.net>|" do
address = parse("Minero Aoki<aamine@0246.loveruby.net>")
address[:name].must_equal "Minero Aoki"
address[:local].must_equal "aamine"
address[:host].must_equal "0246.loveruby.net"
end
# Stolen from Mail
it "should support lots of dots" do
1.upto(10) do |times|
dots = "." * times
address = parse("hoge#{dots}test@docomo.ne.jp")
address[:local].must_equal "hoge#{dots}test"
address[:host].must_equal "docomo.ne.jp"
end
end
# Stolen from Mail
it "should handle trailing dots" do
1.upto(10) do |times|
dots = "." * times
address = parse("hoge#{dots}@docomo.ne.jp")
address[:local].must_equal "hoge#{dots}"
address[:host].must_equal "docomo.ne.jp"
end
end
# Stolen from Mail (which actually stole it from a Perl test suite)
it 'should handle "Joe & J. Harvey" <ddd @Org>' do
address = parse('"Joe & J. Harvey" <ddd @Org>')
address[:name].must_equal "Joe & J. Harvey"
address[:local].must_equal "ddd "
address[:host].must_equal "Org"
end
it "should handle jrh%cup.portal.com@portal.unix.portal.com" do
address = parse('jrh%cup.portal.com@portal.unix.portal.com')
address[:local].must_equal "jrh%cup.portal.com"
address[:host].must_equal "portal.unix.portal.com"
end
it "should handle David Apfelbaum <da0g+@andrew.cmu.edu>" do
address = parse("David Apfelbaum <da0g+@andrew.cmu.edu>'")
address[:name].must_equal "David Apfelbaum"
address[:local].must_equal "da0g+"
address[:host].must_equal "andrew.cmu.edu"
end
it "should handle Stephen Burke, Liverpool <BURKE@vxdsya.desy.de>" do
address = parse("Stephen Burke, Liverpool <BURKE@vxdsya.desy.de>")
address[:name].must_equal "Stephen Burke, Liverpool"
address[:local].must_equal "BURKE"
address[:host].must_equal "vxdsya.desy.de"
end
it "should handle jdoe@test . example" do
address = parse("jdoe@test . example")
address[:local].must_equal "jdoe"
address[:host].must_equal "test.example"
end
it "should handle /G=Owen/S=Smith/O=SJ-Research/ADMD=INTERSPAN/C=GB/@mhs-relay.ac.uk" do
address = parse("/G=Owen/S=Smith/O=SJ-Research/ADMD=INTERSPAN/C=GB/@mhs-relay.ac.uk")
address[:local].must_equal "/G=Owen/S=Smith/O=SJ-Research/ADMD=INTERSPAN/C=GB/"
address[:host].must_equal "mhs-relay.ac.uk"
end
it "should handle The Newcastle Info-Server <info-admin@newcastle.ac.uk>" do
address = parse("The Newcastle Info-Server <info-admin@newcastle.ac.uk>")
address[:name].must_equal "The Newcastle Info-Server"
address[:local].must_equal "info-admin"
address[:host].must_equal "newcastle.ac.uk"
end
it "should handle JAMES R. TWINE - THE NERD <TWINE57%SDELVB%SNYDELVA.bitnet@CUNYVM.CUNY.EDU>" do
address = parse(%Q{"JAMES R. TWINE - THE NERD" <TWINE57%SDELVB%SNYDELVA.bitnet@CUNYVM.CUNY.EDU>})
address[:name].must_equal "JAMES R. TWINE - THE NERD"
address[:local].must_equal "TWINE57%SDELVB%SNYDELVA.bitnet"
address[:host].must_equal "CUNYVM.CUNY.EDU"
end
end
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment