Skip to content

Instantly share code, notes, and snippets.

@postmodern
Created January 15, 2011 04:39
Show Gist options
  • Save postmodern/780702 to your computer and use it in GitHub Desktop.
Save postmodern/780702 to your computer and use it in GitHub Desktop.
A pure Ruby HTTP parser using Parslet.
require 'parslet'
require 'pp'
class HTTPParser < Parslet::Parser
#
# Character Classes
#
rule(:digit) { match('[0-9]') }
rule(:digits) { digit.repeat(1) }
rule(:xdigit) { digit | match('[a-fA-F]') }
rule(:upper) { match('[A-Z]') }
rule(:lower) { match('[a-z]') }
rule(:alpha) { upper | lower }
rule(:alnum) { alpha | digit }
rule(:cntrl) { match('[\x00-\x1f]') }
rule(:ascii) { match('[\x00-\x7f]') }
rule(:sp) { str(' ') }
rule(:lws) { sp | str("\t") }
rule(:crlf) { str("\r\n") }
rule(:ctl) { cntrl | str("\x7f") }
rule(:text) { lws | ctl.absnt? >> ascii }
rule(:safe) { str('$') | str('-') | str('_') | str('.') }
rule(:extra) {
str('!') | str('*') | str("'") | str('(') | str(')') | str(',')
}
rule(:reserved) {
str(';') | str('/') | str('?') | str(':') | str('@')
str('&') | str('=') | str('+')
}
rule(:sorta_safe) { str('"') | str('<') | str('>') }
rule(:unsafe) { ctl | sp | str('#') | str('%') | sorta_safe }
rule(:national) {
(alpha | digit | reserved | extra | safe | unsafe).absnt? >> any
}
rule(:unreserved) { alpha | digit | safe | extra | national }
rule(:escape) { str("%u").maybe >> xdigit >> xdigit }
rule(:uchar) { unreserved | escape | sorta_safe }
rule(:pchar) {
uchar | str(':') | str('@') | str('&') | str('=') | str('+')
}
rule(:separators) {
str('(') | str(')') | str('<') | str('>') | str('@') | str(',') |
str(';') | str(':') | str("\\") | str('"') | str('/') | str('[') |
str(']') | str('?') | str('=') | str('{') | str('}') | sp |
str("\t")
}
#
# Elements
#
rule(:token) { (ctl | separators).absnt? >> ascii }
rule(:comment_text) { (str('(') | str(')')).absnt? >> text }
rule(:comment) { str('(') >> comment_text.repeat >> str(')') }
rule(:quoted_pair) { str("\\") >> ascii }
rule(:quoted_text) { quoted_pair | str('"').absnt? >> text }
rule(:quoted_string) { str('"') >> quoted_text >> str('"') }
#
# URI Elements
#
rule(:scheme) {
(alpha | digit | str('+') | str('-') | str('.')).repeat
}
rule(:absolute_uri) { scheme >> str(':') >> (uchar | reserved).repeat }
rule(:path) { pchar.repeat(1) >> (str('/') >> pchar.repeat).repeat }
rule(:query_string) { (uchar | reserved).repeat }
rule(:param) { (pchar | str('/')).repeat }
rule(:params) { param >> (str(';') >> param).repeat }
rule(:frag) { (uchar | reserved).repeat }
rule(:relative_path) {
path.maybe.as(:path) >>
(str(';') >> params.as(:params)).maybe >>
(str('?') >> query_string.as(:query)).maybe >>
(str('#') >> frag.as(:fragment)).maybe
}
rule(:absolute_path) { str('/').repeat(1) >> relative_path }
rule(:request_uri) { str('*') | absolute_uri | absolute_path }
#
# HTTP Elements
#
rule(:request_method) {
str('OPTIONS') |
str('GET') |
str('HEAD') |
str('POST') |
str('PUT') |
str('DELETE') |
str('TRACE') |
str('CONNECT') |
token.repeat(1)
}
rule(:version_number) { digits >> str('.') >> digits }
rule(:http_version) { str('HTTP/') >> version_number.as(:version) }
rule(:request_line) {
request_method.as(:method) >>
sp >> request_uri.as(:uri) >>
sp >> http_version
}
rule(:header_name) { (str(':').absnt? >> token).repeat(1) }
rule(:header_value) { (text | token | separators | quoted_string).repeat(1) }
rule(:header) {
header_name.as(:name) >> str(':') >> lws.repeat(1) >>
header_value.as(:value) >> crlf
}
rule(:request) {
request_line >> crlf >> header.repeat.as(:headers) >> crlf
}
root :request
end
parser = HTTPParser.new
begin
pp parser.parse(STDIN.read)
rescue Parslet::ParseFailed => error
STDERR.puts error
STDERR.puts parser.root.error_tree
end
@postmodern
Copy link
Author

$ cat http_request.txt | ruby http_parser.rb
{:method=>"GET",
 :uri=>
  {:path=>
    "blog/2011/engine-yard-cloud-out-loud-s01e05-crafting-rails-applications/"},
 :version=>"1.1",
 :headers=>
  [{:name=>"Host", :value=>"www.engineyard.com"},
   {:name=>"User-Agent", :value=>"Mozilla/5.0"},
   {:name=>"Accept",
    :value=>"text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8"},
   {:name=>"Accept-Language", :value=>"en-us,en;q=0.5"},
   {:name=>"Accept-Encoding", :value=>"gzip,deflate"},
   {:name=>"Accept-Charset", :value=>"ISO-8859-1,utf-8;q=0.7,*;q=0.7"},
   {:name=>"Keep-Alive", :value=>"115"},
   {:name=>"Connection", :value=>"keep-alive"},
   {:name=>"Cookie", :value=>"PHPSESSID=f395no19c719djvm7ed0kt4dr1"},
   {:name=>"X-Behavioral-Ad-Opt-Out", :value=>"1"},
   {:name=>"X-Do-Not-Track", :value=>"1"},
   {:name=>"Cache-Control", :value=>"max-age=0"}]}

@postmodern
Copy link
Author

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment