Last active
September 27, 2023 16:00
-
-
Save tenderlove/e9bb912648a3d2bce00c4f60bc632a10 to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
require "strscan" | |
class Lexer | |
IDENTIFIER = /[_A-Za-z][_0-9A-Za-z]*\b/ | |
IGNORE = %r{ | |
(?: | |
[, \c\r\n\t]+ | | |
\#.*$ | |
)* | |
}x | |
INT = /[-]?(?:[0]|[1-9][0-9]*)/ | |
FLOAT_DECIMAL = /[.][0-9]+/ | |
FLOAT_EXP = /[eE][+-]?[0-9]+/ | |
FLOAT = /#{INT}#{FLOAT_DECIMAL}#{FLOAT_EXP}|#{FLOAT_DECIMAL}|#{FLOAT_EXP}/ | |
KEYWORDS = [ "on", "fragment", "true", "false", "null", "query", "mutation", | |
"subscription", "schema", "scalar", "type", "extend", "implements", | |
"interface", "union", "enum", "input", "directive", "repeatable" | |
].freeze | |
KW_RE = /#{Regexp.union(KEYWORDS.sort)}\b/ | |
module Literals | |
LCURLY = '{' | |
RCURLY = '}' | |
LPAREN = '(' | |
RPAREN = ')' | |
LBRACKET = '[' | |
RBRACKET = ']' | |
COLON = ':' | |
VAR_SIGN = '$' | |
DIR_SIGN = '@' | |
EQUALS = '=' | |
BANG = '!' | |
PIPE = '|' | |
AMP = '&' | |
end | |
ELLIPSIS = '...' | |
PUNCTUATION_TABLE = Literals.constants.each_with_object([]) { |n, o| | |
o[Literals.const_get(n).ord] = n | |
} | |
def initialize doc | |
@doc = doc | |
@scan = StringScanner.new doc | |
end | |
def next_token | |
@scan.skip(IGNORE) | |
return if @scan.eos? | |
case | |
when tok = PUNCTUATION_TABLE[@doc.getbyte(@scan.pos)] then | |
# If the byte at the current position is inside our lookup table, push | |
# the scanner position forward 1 and return the token | |
@scan.pos += 1 | |
tok | |
when len = @scan.skip(KW_RE) then | |
# Return early if uniquely identifiable via length | |
return :ON if len == 2 | |
return :SUBSCRIPTION if len == 12 | |
# Get the position of the start of the keyword | |
start = @scan.pos - len | |
# Get the 2nd and 3rd byte of the keyword and combine to a 16 bit int | |
key = (@doc.getbyte(start + 2) << 8) | @doc.getbyte(start + 1) | |
KW_TABLE[_hash(key)] | |
when @scan.skip(IDENTIFIER) then :IDENTIFIER | |
when @scan.skip(ELLIPSIS) then :ELLIPSIS | |
when @scan.skip(FLOAT) then :FLOAT | |
when @scan.skip(INT) then :INT | |
else | |
@scan.getch | |
:UNKNOWN_CHAR | |
end | |
end | |
KW_TABLE = [:INTERFACE, :MUTATION, :EXTEND, :FALSE, :ENUM, :TRUE, :NULL, | |
nil, nil, nil, nil, nil, nil, nil, :QUERY, nil, nil, :REPEATABLE, | |
:IMPLEMENTS, :INPUT, :TYPE, :SCHEMA, nil, nil, nil, :DIRECTIVE, | |
:UNION, nil, nil, :SCALAR, nil, :FRAGMENT] | |
def _hash key | |
(key * 18592990) >> 27 & 0x1f | |
end | |
end | |
require "benchmark/ips" | |
def allocations | |
x = GC.stat(:total_allocated_objects) | |
yield | |
GC.stat(:total_allocated_objects) - x | |
end | |
def go doc | |
lexer = Lexer.new doc | |
while tok = lexer.next_token | |
# do nothing | |
end | |
end | |
if $0 == __FILE__ | |
doc = ARGF.read | |
Benchmark.ips { |x| x.report { go doc } } | |
p ALLOCATIONS: allocations { go doc } | |
p ALLOCATIONS: allocations { go doc } | |
end |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
@tenderlove Yep! I think you've got the spiritual equivalent in the upstream version 👍