Created
December 12, 2011 18:00
-
-
Save nevans/1468355 to your computer and use it in GitHub Desktop.
tree top and IMAP astrings
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# See http://tools.ietf.org/html/rfc3501#section-9 | |
# INTERNET MESSAGE ACCESS PROTOCOL - VERSION 4rev1 - Formal Syntax | |
module IMAP | |
grammar Astring | |
# astring = 1*ASTRING-CHAR / string | |
rule astring | |
ASTRING_CHAR+ / string | |
end | |
# ASTRING-CHAR = ATOM-CHAR / resp-specials | |
rule ASTRING_CHAR | |
ATOM_CHAR / resp_specials | |
end | |
# ATOM-CHAR = <any CHAR except atom-specials> | |
rule ATOM_CHAR | |
. !{|seq| | |
char = seq[0].text_value | |
IMAP::AstringParser.new.parse(char, :root => :atom_specials) | |
} | |
end | |
# atom-specials = "(" / ")" / "{" / SP / CTL / list-wildcards / | |
# quoted-specials / resp-specials | |
rule atom_specials | |
"(" / ")" / "{" / SP / CTL / list_wildcards / | |
quoted_specials / resp_specials | |
end | |
# list-wildcards = "%" / "*" | |
rule list_wildcards | |
"%" / "*" | |
end | |
# quoted-specials = DQUOTE / "\" | |
rule quoted_specials | |
DQUOTE / "\\" | |
end | |
# resp-specials = "]" | |
rule resp_specials | |
"]" | |
end | |
# (2) In all cases, SP refers to exactly one space. It is | |
# NOT permitted to substitute TAB, insert additional spaces, | |
# or otherwise treat SP as being equivalent to LWSP. | |
# string = quoted / literal | |
rule string | |
quoted / literal | |
end | |
# quoted = DQUOTE *QUOTED-CHAR DQUOTE | |
rule quoted | |
DQUOTE QUOTED_CHAR* DQUOTE | |
end | |
# QUOTED-CHAR = <any TEXT-CHAR except quoted-specials> / | |
# "\" quoted-specials | |
rule QUOTED_CHAR | |
[^\r\n\x22\\] / "\\" quoted_specials | |
end | |
# TEXT-CHAR = <any CHAR except CR and LF> | |
rule TEXT_CHAR | |
[^\r\n] | |
end | |
# literal = "{" number "}" CRLF *CHAR8 | |
# ; Number represents the number of CHAR8s | |
rule literal | |
"{" number "}" CRLF CHAR8* &{|seq| | |
expected_length = seq[1].text_value.to_i | |
actual_length = seq[4].text_value.length | |
expected_length == actual_length | |
} | |
end | |
# number = 1*DIGIT | |
# ; Unsigned 32-bit integer | |
# ; (0 <= n < 4,294,967,296) | |
rule number | |
DIGIT+ | |
end | |
# DIGIT | |
rule DIGIT | |
[0-9] | |
end | |
# CHAR8 = %x01-ff | |
# ; any OCTET except NUL, %x00 | |
rule CHAR8 | |
[\x01-\xff] | |
end | |
############################################################################### | |
############################################################################### | |
## The following rules are taken from | |
## http://tools.ietf.org/html/rfc5234#appendix-B.1 | |
## Augmented BNF for Syntax Specifications: ABNF - Core Rules | |
############################################################################### | |
############################################################################### | |
# CRLF = CR LF | |
# ; Internet standard newline | |
rule CRLF | |
CR LF | |
end | |
# CR = %x0D | |
# ; carriage return | |
rule CR | |
"\x0d" | |
end | |
# LF = %x0A | |
# ; linefeed | |
rule LF | |
"\x0a" | |
end | |
# CTL = %x00-1F / %x7F | |
# ; controls | |
rule CTL | |
[\x00-\x1F\x7F] | |
end | |
# DQUOTE = %x22 | |
# ; " (Double Quote) | |
rule DQUOTE | |
[\x22] | |
end | |
# SP = %x20 | |
rule SP | |
" " | |
end | |
end | |
end |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
require 'treetop' | |
require 'imap_astring.treetop' | |
describe IMAP::AstringParser do | |
# should parse | |
[ | |
"a", | |
"abcde", | |
"contains]resp]specials", | |
'"simplequoted"', | |
'"quoted with space"', | |
'"quoted * with list wildcards%"', | |
'"quoted with resp special ]]]"', | |
'"quoted \\" with escapes\\\\ "', # count em up. urgh. | |
"{33}\r\nliteral string\nwith\r\ncrazy chars\n", | |
].each do |string| | |
it "should match #{string.inspect}" do | |
parser = IMAP::AstringParser.new | |
parse = parser.parse(string) | |
if !parse | |
$stderr.puts "Parse Error: #{parser.failure_reason}" | |
end | |
parse.should be_true | |
end | |
end | |
# should not parse | |
[ | |
"contains space", | |
'quote"inthemiddle', | |
'contains%list*wildcards', | |
'"improperly quoted', | |
"{7}no CRLF", | |
"{7}\rno CRLF", | |
"{7}\nno CRLF", | |
"{99}\r\ntoo short\n", | |
"{12}\r\nincludes\x00nil", | |
].each do |string| | |
it "should not match #{string.inspect}" do | |
parser = IMAP::AstringParser.new | |
parse = parser.parse(string) | |
parse.should be_false | |
end | |
end | |
end |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
Lines 18 and 74 (
ATOM-CHAR
andliteral
) are the only ugly bits. Is there a better way?