tenderlove/test.rb

## test.rb
require "strscan"

class Lexer
  IDENTIFIER =    /[_A-Za-z][_0-9A-Za-z]*\b/
  IGNORE   =       %r{
    (?:
      [, \c\r\n\t]+ |
      \#.*$
    )*
  }x
  INT =           /[-]?(?:[0]|[1-9][0-9]*)/
  FLOAT_DECIMAL = /[.][0-9]+/
  FLOAT_EXP =     /[eE][+-]?[0-9]+/
  FLOAT =  /#{INT}#{FLOAT_DECIMAL}#{FLOAT_EXP}|#{FLOAT_DECIMAL}|#{FLOAT_EXP}/

  KEYWORDS = [ "on", "fragment", "true", "false", "null", "query", "mutation",
    "subscription", "schema", "scalar", "type", "extend", "implements",
    "interface", "union", "enum", "input", "directive", "repeatable"
  ].freeze

  KW_RE = /#{Regexp.union(KEYWORDS.sort)}\b/

  module Literals
    LCURLY =        '{'
    RCURLY =        '}'
    LPAREN =        '('
    RPAREN =        ')'
    LBRACKET =      '['
    RBRACKET =      ']'
    COLON =         ':'
    VAR_SIGN =      '$'
    DIR_SIGN =      '@'
    EQUALS =        '='
    BANG =          '!'
    PIPE =          '|'
    AMP =           '&'
  end

  ELLIPSIS =      '...'

  PUNCTUATION_TABLE = Literals.constants.each_with_object([]) { |n, o|
    o[Literals.const_get(n).ord] = n
  }

  def initialize doc
    @doc = doc
    @scan = StringScanner.new doc
  end

  def next_token
    @scan.skip(IGNORE)

    return if @scan.eos?

    case
    when tok = PUNCTUATION_TABLE[@doc.getbyte(@scan.pos)] then
      # If the byte at the current position is inside our lookup table, push
      # the scanner position forward 1 and return the token
      @scan.pos += 1
      tok
    when len = @scan.skip(KW_RE) then
      # Return early if uniquely identifiable via length
      return :ON if len == 2
      return :SUBSCRIPTION if len == 12

      # Get the position of the start of the keyword
      start = @scan.pos - len

      # Get the 2nd and 3rd byte of the keyword and combine to a 16 bit int
      key = (@doc.getbyte(start + 2) << 8) | @doc.getbyte(start + 1)

      KW_TABLE[_hash(key)]
    when @scan.skip(IDENTIFIER)      then :IDENTIFIER
    when @scan.skip(ELLIPSIS)        then :ELLIPSIS
    when @scan.skip(FLOAT)           then :FLOAT
    when @scan.skip(INT)             then :INT
    else
      @scan.getch
      :UNKNOWN_CHAR
    end
  end

  KW_TABLE = [:INTERFACE, :MUTATION, :EXTEND, :FALSE, :ENUM, :TRUE, :NULL,
              nil, nil, nil, nil, nil, nil, nil, :QUERY, nil, nil, :REPEATABLE,
              :IMPLEMENTS, :INPUT, :TYPE, :SCHEMA, nil, nil, nil, :DIRECTIVE,
              :UNION, nil, nil, :SCALAR, nil, :FRAGMENT]

  def _hash key
    (key * 18592990) >> 27 & 0x1f
  end
end

require "benchmark/ips"

def allocations
  x = GC.stat(:total_allocated_objects)
  yield
  GC.stat(:total_allocated_objects) - x
end

def go doc
  lexer = Lexer.new doc
  while tok = lexer.next_token
    # do nothing
  end
end

if $0 == __FILE__
  doc = ARGF.read

  Benchmark.ips { |x| x.report { go doc } }
  p ALLOCATIONS: allocations { go doc }
  p ALLOCATIONS: allocations { go doc }
end
	require "strscan"

	class Lexer
	IDENTIFIER = /[_A-Za-z][_0-9A-Za-z]*\b/
	IGNORE = %r{
	(?:
	[, \c\r\n\t]+ \|
	\#.*$
	)*
	}x
	INT = /[-]?(?:[0]\|[1-9][0-9]*)/
	FLOAT_DECIMAL = /[.][0-9]+/
	FLOAT_EXP = /[eE][+-]?[0-9]+/
	FLOAT = /#{INT}#{FLOAT_DECIMAL}#{FLOAT_EXP}\|#{FLOAT_DECIMAL}\|#{FLOAT_EXP}/

	KEYWORDS = [ "on", "fragment", "true", "false", "null", "query", "mutation",
	"subscription", "schema", "scalar", "type", "extend", "implements",
	"interface", "union", "enum", "input", "directive", "repeatable"
	].freeze

	KW_RE = /#{Regexp.union(KEYWORDS.sort)}\b/

	module Literals
	LCURLY = '{'
	RCURLY = '}'
	LPAREN = '('
	RPAREN = ')'
	LBRACKET = '['
	RBRACKET = ']'
	COLON = ':'
	VAR_SIGN = '$'
	DIR_SIGN = '@'
	EQUALS = '='
	BANG = '!'
	PIPE = '\|'
	AMP = '&'
	end

	ELLIPSIS = '...'

	PUNCTUATION_TABLE = Literals.constants.each_with_object([]) { \|n, o\|
	o[Literals.const_get(n).ord] = n
	}

	def initialize doc
	@doc = doc
	@scan = StringScanner.new doc
	end

	def next_token
	@scan.skip(IGNORE)

	return if @scan.eos?

	case
	when tok = PUNCTUATION_TABLE[@doc.getbyte(@scan.pos)] then
	# If the byte at the current position is inside our lookup table, push
	# the scanner position forward 1 and return the token
	@scan.pos += 1
	tok
	when len = @scan.skip(KW_RE) then
	# Return early if uniquely identifiable via length
	return :ON if len == 2
	return :SUBSCRIPTION if len == 12

	# Get the position of the start of the keyword
	start = @scan.pos - len

	# Get the 2nd and 3rd byte of the keyword and combine to a 16 bit int
	key = (@doc.getbyte(start + 2) << 8) \| @doc.getbyte(start + 1)

	KW_TABLE[_hash(key)]
	when @scan.skip(IDENTIFIER) then :IDENTIFIER
	when @scan.skip(ELLIPSIS) then :ELLIPSIS
	when @scan.skip(FLOAT) then :FLOAT
	when @scan.skip(INT) then :INT
	else
	@scan.getch
	:UNKNOWN_CHAR
	end
	end

	KW_TABLE = [:INTERFACE, :MUTATION, :EXTEND, :FALSE, :ENUM, :TRUE, :NULL,
	nil, nil, nil, nil, nil, nil, nil, :QUERY, nil, nil, :REPEATABLE,
	:IMPLEMENTS, :INPUT, :TYPE, :SCHEMA, nil, nil, nil, :DIRECTIVE,
	:UNION, nil, nil, :SCALAR, nil, :FRAGMENT]

	def _hash key
	(key * 18592990) >> 27 & 0x1f
	end
	end

	require "benchmark/ips"

	def allocations
	x = GC.stat(:total_allocated_objects)
	yield
	GC.stat(:total_allocated_objects) - x
	end

	def go doc
	lexer = Lexer.new doc
	while tok = lexer.next_token
	# do nothing
	end
	end

	if $0 == __FILE__
	doc = ARGF.read

	Benchmark.ips { \|x\| x.report { go doc } }
	p ALLOCATIONS: allocations { go doc }
	p ALLOCATIONS: allocations { go doc }
	end