Skip to content

Instantly share code, notes, and snippets.

@ixti
Created January 4, 2015 15:15
Show Gist options
  • Save ixti/074dae7fdf5d6775d44c to your computer and use it in GitHub Desktop.
Save ixti/074dae7fdf5d6775d44c to your computer and use it in GitHub Desktop.
require "benchmark/ips"
require "parslet"
module HTTP
class ContentType
class ParsletParser < ::Parslet::Parser
class CharList # :nodoc:
def initialize(list = nil)
@list = list || yield
end
def -(other)
CharList.new @list - other.to_a
end
def +(other)
CharList.new @list + other.to_a
end
def to_a
@list.dup
end
alias :to_ary :to_a
def to_s
@list.join
end
alias :to_str :to_s
def size
to_s.size
end
end
# rubocop:disable LineLength
# rubocop:disable Blocks
# rubocop:disable BlockAlignment
CHAR = CharList.new { (0..127).to_a.map(&:chr) }
CTLS = CharList.new { (0..31).to_a.map(&:chr) << 127.chr }
CR = CharList.new { [13.chr] }
LF = CharList.new { [10.chr] }
SPACE = CharList.new { [" "] }
HTAB = CharList.new { [9.chr] }
CRLF = CharList.new { [13.chr + 10.chr] }
SPECIALS = CharList.new { ["(", ")", "<", ">", "@", ",", ";", ":", "\\", "\"", ".", "[", "]"] }
TSPECIALS = CharList.new { SPECIALS + ["/", "?", "="] }
rule(:quoted_pair) { str("\\") >> match[Regexp.escape CHAR] }
rule(:linear_ws) { (str(CRLF).repeat(0, 1) >> (str(SPACE) | str(HTAB))).repeat(1) }
rule(:qtext) { match[Regexp.escape CHAR - ['"', "\\"] - CR] }
rule(:quoted_string) { str('"') >> (qtext | quoted_pair).repeat.as(:value) >> str('"') }
rule(:token) { match[Regexp.escape CHAR - SPACE - CTLS - TSPECIALS].repeat(1) }
rule(:space) { str(SPACE) }
rule(:x_token) { str("x-") >> token }
rule(:type) { str("application") | str("audio") | str("image") | str("message") | str("multipart") | str("text") | str("video") | x_token }
rule(:subtype) { token }
rule(:attribute) { token }
rule(:value) { token.as(:value) | quoted_string }
rule(:parameter) { attribute.as(:attribute) >> str("=") >> value }
rule(:parameters) { space.repeat >> str(";") >> space.repeat >> parameter.as(:parameter) }
rule(:content_type) { type.as(:type) >> str("/") >> subtype.as(:subtype) >> parameters.repeat }
root(:content_type)
def self.parse(str)
o = { :type => nil, :subtype => nil, :parameters => {} }
parsed = new.parse str
parsed = [parsed] unless parsed.is_a? Array
o[:type] = parsed.first[:type].to_s.downcase
o[:subtype] = parsed.first[:subtype].to_s.downcase
o[:parameters] = Hash[parse_params parsed[1..-1]]
o
end
def self.parse_params(list)
Array(list).map do |hash|
[
hash[:parameter][:attribute].to_s.downcase,
hash[:parameter][:value].to_s
]
end
end
end
module RegexpParser
DIGIT = ((0x30)..(0x39)).map(&:chr)
ALPHA = ((0x41)..(0x5a)).map(&:chr) + ((0x61)..(0x7a)).map(&:chr)
VCHAR = ((0x21)..(0x7e)).map(&:chr)
TCHAR = %w(! # $ % & ' * + - . ^ _ ` | ~) + DIGIT + ALPHA
HTAB = [(0x09).chr]
SP = [(0x20).chr]
OBS_TEXT = ((0x80)..(0xff)).map(&:chr)
QDTEXT = HTAB + SP + [(0x21).chr] + ((0x23)..(0x5b)).map(&:chr) + ((0x5d)..(0x7e)).map(&:chr) + OBS_TEXT
OWS = "[" + (HTAB + SP).map { |c| Regexp.escape c }.join("") + "]*"
TOKEN = "[" + TCHAR.map { |c| Regexp.escape c }.join("") + "]+"
QUOTED_PAIR = Regexp.escape("\\") + "[" + (HTAB + SP + VCHAR + OBS_TEXT).map { |c| Regexp.escape c }.join("") + "]"
QUOTED_STRING = "\"(?:[" + QDTEXT.map { |c| Regexp.escape c }.join("") + "]|" + QUOTED_PAIR + ")*\""
REGEXP = /^(#{TOKEN})\/(#{TOKEN})((?:#{OWS};#{OWS}#{TOKEN}=(?:#{TOKEN}|#{QUOTED_STRING}))*)$/
PARAM_REGEXP = /^#{OWS};#{OWS}(#{TOKEN})=(#{TOKEN}|#{QUOTED_STRING})/
def self.parse(s)
o = { :type => nil, :subtype => nil, :parameters => {} }
m = s.match REGEXP
return o unless m
o[:type] = m[1].downcase
o[:subtype] = m[2].downcase
parse_params(m[3]) { |k, v| o[:parameters][k] = v }
o
end
def self.parse_params(str)
until str && str.empty?
m = str.match PARAM_REGEXP
break unless m
str = str.byteslice(m[0].bytesize, str.bytesize)
yield(m[1], m[2])
end
end
end
end
end
TEST = 'text/plain; charset=utf-8; test="foo \"bar\" baz"'
Benchmark.ips do |x|
x.report("parslet") { HTTP::ContentType::ParsletParser.parse TEST }
x.report("regexp") { HTTP::ContentType::RegexpParser.parse TEST }
x.compare!
end
@ixti
Copy link
Author

ixti commented Jan 4, 2015

% cat Gemfile.lock
GEM
  remote: https://rubygems.org/
  specs:
    benchmark-ips (2.1.0)
    blankslate (3.1.3)
    parslet (1.6.2)
      blankslate (>= 2.0, <= 4.0)

PLATFORMS
  ruby

DEPENDENCIES
  benchmark-ips
  parslet

% bundle exec ruby test.rb
Calculating -------------------------------------
             parslet    77.000  i/100ms
              regexp     8.898k i/100ms
-------------------------------------------------
             parslet    779.462  (± 2.3%) i/s -      3.927k
              regexp     99.384k (± 0.8%) i/s -    498.288k

Comparison:
              regexp:    99383.8 i/s
             parslet:      779.5 i/s - 127.50x slower

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment