ixti/compare-parslet-with-regexp.rb

## compare-parslet-with-regexp.rb
require "benchmark/ips"
require "parslet"

module HTTP
  class ContentType
    class ParsletParser < ::Parslet::Parser
      class CharList # :nodoc:
        def initialize(list = nil)
          @list = list || yield
        end

        def -(other)
          CharList.new @list - other.to_a
        end

        def +(other)
          CharList.new @list + other.to_a
        end

        def to_a
          @list.dup
        end
        alias :to_ary :to_a

        def to_s
          @list.join
        end
        alias :to_str :to_s

        def size
          to_s.size
        end
      end

      # rubocop:disable LineLength
      # rubocop:disable Blocks
      # rubocop:disable BlockAlignment

      CHAR      = CharList.new { (0..127).to_a.map(&:chr) }
      CTLS      = CharList.new { (0..31).to_a.map(&:chr) << 127.chr }
      CR        = CharList.new { [13.chr] }
      LF        = CharList.new { [10.chr] }
      SPACE     = CharList.new { [" "] }
      HTAB      = CharList.new { [9.chr] }
      CRLF      = CharList.new { [13.chr + 10.chr] }
      SPECIALS  = CharList.new { ["(", ")", "<", ">", "@", ",", ";", ":", "\\", "\"", ".", "[", "]"] }
      TSPECIALS = CharList.new { SPECIALS + ["/", "?", "="] }

      rule(:quoted_pair)    { str("\\") >> match[Regexp.escape CHAR] }
      rule(:linear_ws)      { (str(CRLF).repeat(0, 1) >> (str(SPACE) | str(HTAB))).repeat(1) }
      rule(:qtext)          { match[Regexp.escape CHAR - ['"', "\\"] - CR] }
      rule(:quoted_string)  { str('"') >> (qtext | quoted_pair).repeat.as(:value) >> str('"') }
      rule(:token)          { match[Regexp.escape CHAR - SPACE - CTLS - TSPECIALS].repeat(1) }
      rule(:space)          { str(SPACE) }
      rule(:x_token)        { str("x-") >> token }
      rule(:type)           { str("application") | str("audio") | str("image") | str("message") | str("multipart") | str("text") | str("video") | x_token }
      rule(:subtype)        { token }
      rule(:attribute)      { token }
      rule(:value)          { token.as(:value) | quoted_string }
      rule(:parameter)      { attribute.as(:attribute) >> str("=") >> value }
      rule(:parameters)     { space.repeat >> str(";") >> space.repeat >> parameter.as(:parameter) }
      rule(:content_type)   { type.as(:type) >> str("/") >> subtype.as(:subtype) >> parameters.repeat }
      root(:content_type)

      def self.parse(str)
        o = { :type => nil, :subtype => nil, :parameters => {} }

        parsed = new.parse str
        parsed = [parsed] unless parsed.is_a? Array

        o[:type] = parsed.first[:type].to_s.downcase
        o[:subtype] = parsed.first[:subtype].to_s.downcase
        o[:parameters] = Hash[parse_params parsed[1..-1]]

        o
      end

      def self.parse_params(list)
        Array(list).map do |hash|
          [
            hash[:parameter][:attribute].to_s.downcase,
            hash[:parameter][:value].to_s
          ]
        end
      end
    end

    module RegexpParser
      DIGIT         = ((0x30)..(0x39)).map(&:chr)
      ALPHA         = ((0x41)..(0x5a)).map(&:chr) + ((0x61)..(0x7a)).map(&:chr)
      VCHAR         = ((0x21)..(0x7e)).map(&:chr)
      TCHAR         = %w(! # $ % & ' * + - . ^ _ ` | ~) + DIGIT + ALPHA
      HTAB          = [(0x09).chr]
      SP            = [(0x20).chr]
      OBS_TEXT      = ((0x80)..(0xff)).map(&:chr)
      QDTEXT        = HTAB + SP + [(0x21).chr] + ((0x23)..(0x5b)).map(&:chr) + ((0x5d)..(0x7e)).map(&:chr) + OBS_TEXT

      OWS           = "[" + (HTAB + SP).map { |c| Regexp.escape c }.join("") + "]*"
      TOKEN         = "[" + TCHAR.map { |c| Regexp.escape c }.join("") + "]+"
      QUOTED_PAIR   = Regexp.escape("\\") + "[" + (HTAB + SP + VCHAR + OBS_TEXT).map { |c| Regexp.escape c }.join("") + "]"
      QUOTED_STRING = "\"(?:[" + QDTEXT.map { |c| Regexp.escape c }.join("") + "]|" + QUOTED_PAIR + ")*\""

      REGEXP        = /^(#{TOKEN})\/(#{TOKEN})((?:#{OWS};#{OWS}#{TOKEN}=(?:#{TOKEN}|#{QUOTED_STRING}))*)$/
      PARAM_REGEXP  = /^#{OWS};#{OWS}(#{TOKEN})=(#{TOKEN}|#{QUOTED_STRING})/

      def self.parse(s)
        o = { :type => nil, :subtype => nil, :parameters => {} }

        m = s.match REGEXP
        return o unless m

        o[:type]    = m[1].downcase
        o[:subtype] = m[2].downcase

        parse_params(m[3]) { |k, v| o[:parameters][k] = v }

        o
      end

      def self.parse_params(str)
        until str && str.empty?
          m = str.match PARAM_REGEXP
          break unless m
          str = str.byteslice(m[0].bytesize, str.bytesize)
          yield(m[1], m[2])
        end
      end
    end
  end
end

TEST = 'text/plain; charset=utf-8; test="foo \"bar\" baz"'

Benchmark.ips do |x|
  x.report("parslet") { HTTP::ContentType::ParsletParser.parse TEST }
  x.report("regexp") { HTTP::ContentType::RegexpParser.parse TEST }

  x.compare!
end
	require "benchmark/ips"
	require "parslet"

	module HTTP
	class ContentType
	class ParsletParser < ::Parslet::Parser
	class CharList # :nodoc:
	def initialize(list = nil)
	@list = list \|\| yield
	end

	def -(other)
	CharList.new @list - other.to_a
	end

	def +(other)
	CharList.new @list + other.to_a
	end

	def to_a
	@list.dup
	end
	alias :to_ary :to_a

	def to_s
	@list.join
	end
	alias :to_str :to_s

	def size
	to_s.size
	end
	end

	# rubocop:disable LineLength
	# rubocop:disable Blocks
	# rubocop:disable BlockAlignment

	CHAR = CharList.new { (0..127).to_a.map(&:chr) }
	CTLS = CharList.new { (0..31).to_a.map(&:chr) << 127.chr }
	CR = CharList.new { [13.chr] }
	LF = CharList.new { [10.chr] }
	SPACE = CharList.new { [" "] }
	HTAB = CharList.new { [9.chr] }
	CRLF = CharList.new { [13.chr + 10.chr] }
	SPECIALS = CharList.new { ["(", ")", "<", ">", "@", ",", ";", ":", "\\", "\"", ".", "[", "]"] }
	TSPECIALS = CharList.new { SPECIALS + ["/", "?", "="] }

	rule(:quoted_pair) { str("\\") >> match[Regexp.escape CHAR] }
	rule(:linear_ws) { (str(CRLF).repeat(0, 1) >> (str(SPACE) \| str(HTAB))).repeat(1) }
	rule(:qtext) { match[Regexp.escape CHAR - ['"', "\\"] - CR] }
	rule(:quoted_string) { str('"') >> (qtext \| quoted_pair).repeat.as(:value) >> str('"') }
	rule(:token) { match[Regexp.escape CHAR - SPACE - CTLS - TSPECIALS].repeat(1) }
	rule(:space) { str(SPACE) }
	rule(:x_token) { str("x-") >> token }
	rule(:type) { str("application") \| str("audio") \| str("image") \| str("message") \| str("multipart") \| str("text") \| str("video") \| x_token }
	rule(:subtype) { token }
	rule(:attribute) { token }
	rule(:value) { token.as(:value) \| quoted_string }
	rule(:parameter) { attribute.as(:attribute) >> str("=") >> value }
	rule(:parameters) { space.repeat >> str(";") >> space.repeat >> parameter.as(:parameter) }
	rule(:content_type) { type.as(:type) >> str("/") >> subtype.as(:subtype) >> parameters.repeat }
	root(:content_type)

	def self.parse(str)
	o = { :type => nil, :subtype => nil, :parameters => {} }

	parsed = new.parse str
	parsed = [parsed] unless parsed.is_a? Array

	o[:type] = parsed.first[:type].to_s.downcase
	o[:subtype] = parsed.first[:subtype].to_s.downcase
	o[:parameters] = Hash[parse_params parsed[1..-1]]

	o
	end

	def self.parse_params(list)
	Array(list).map do \|hash\|
	[
	hash[:parameter][:attribute].to_s.downcase,
	hash[:parameter][:value].to_s
	]
	end
	end
	end

	module RegexpParser
	DIGIT = ((0x30)..(0x39)).map(&:chr)
	ALPHA = ((0x41)..(0x5a)).map(&:chr) + ((0x61)..(0x7a)).map(&:chr)
	VCHAR = ((0x21)..(0x7e)).map(&:chr)
	TCHAR = %w(! # $ % & ' * + - . ^ _ ` \| ~) + DIGIT + ALPHA
	HTAB = [(0x09).chr]
	SP = [(0x20).chr]
	OBS_TEXT = ((0x80)..(0xff)).map(&:chr)
	QDTEXT = HTAB + SP + [(0x21).chr] + ((0x23)..(0x5b)).map(&:chr) + ((0x5d)..(0x7e)).map(&:chr) + OBS_TEXT

	OWS = "[" + (HTAB + SP).map { \|c\| Regexp.escape c }.join("") + "]*"
	TOKEN = "[" + TCHAR.map { \|c\| Regexp.escape c }.join("") + "]+"
	QUOTED_PAIR = Regexp.escape("\\") + "[" + (HTAB + SP + VCHAR + OBS_TEXT).map { \|c\| Regexp.escape c }.join("") + "]"
	QUOTED_STRING = "\"(?:[" + QDTEXT.map { \|c\| Regexp.escape c }.join("") + "]\|" + QUOTED_PAIR + ")*\""

	REGEXP = /^(#{TOKEN})\/(#{TOKEN})((?:#{OWS};#{OWS}#{TOKEN}=(?:#{TOKEN}\|#{QUOTED_STRING}))*)$/
	PARAM_REGEXP = /^#{OWS};#{OWS}(#{TOKEN})=(#{TOKEN}\|#{QUOTED_STRING})/

	def self.parse(s)
	o = { :type => nil, :subtype => nil, :parameters => {} }

	m = s.match REGEXP
	return o unless m

	o[:type] = m[1].downcase
	o[:subtype] = m[2].downcase

	parse_params(m[3]) { \|k, v\| o[:parameters][k] = v }

	o
	end

	def self.parse_params(str)
	until str && str.empty?
	m = str.match PARAM_REGEXP
	break unless m
	str = str.byteslice(m[0].bytesize, str.bytesize)
	yield(m[1], m[2])
	end
	end
	end
	end
	end

	TEST = 'text/plain; charset=utf-8; test="foo \"bar\" baz"'

	Benchmark.ips do \|x\|
	x.report("parslet") { HTTP::ContentType::ParsletParser.parse TEST }
	x.report("regexp") { HTTP::ContentType::RegexpParser.parse TEST }

	x.compare!
	end