-
-
Save tenderlove/e9bb912648a3d2bce00c4f60bc632a10 to your computer and use it in GitHub Desktop.
require "strscan" | |
class Lexer | |
IDENTIFIER = /[_A-Za-z][_0-9A-Za-z]*\b/ | |
IGNORE = %r{ | |
(?: | |
[, \c\r\n\t]+ | | |
\#.*$ | |
)* | |
}x | |
INT = /[-]?(?:[0]|[1-9][0-9]*)/ | |
FLOAT_DECIMAL = /[.][0-9]+/ | |
FLOAT_EXP = /[eE][+-]?[0-9]+/ | |
FLOAT = /#{INT}#{FLOAT_DECIMAL}#{FLOAT_EXP}|#{FLOAT_DECIMAL}|#{FLOAT_EXP}/ | |
KEYWORDS = [ "on", "fragment", "true", "false", "null", "query", "mutation", | |
"subscription", "schema", "scalar", "type", "extend", "implements", | |
"interface", "union", "enum", "input", "directive", "repeatable" | |
].freeze | |
KW_RE = /#{Regexp.union(KEYWORDS.sort)}\b/ | |
module Literals | |
LCURLY = '{' | |
RCURLY = '}' | |
LPAREN = '(' | |
RPAREN = ')' | |
LBRACKET = '[' | |
RBRACKET = ']' | |
COLON = ':' | |
VAR_SIGN = '$' | |
DIR_SIGN = '@' | |
EQUALS = '=' | |
BANG = '!' | |
PIPE = '|' | |
AMP = '&' | |
end | |
ELLIPSIS = '...' | |
PUNCTUATION_TABLE = Literals.constants.each_with_object([]) { |n, o| | |
o[Literals.const_get(n).ord] = n | |
} | |
def initialize doc | |
@doc = doc | |
@scan = StringScanner.new doc | |
end | |
def next_token | |
@scan.skip(IGNORE) | |
return if @scan.eos? | |
case | |
when tok = PUNCTUATION_TABLE[@doc.getbyte(@scan.pos)] then | |
# If the byte at the current position is inside our lookup table, push | |
# the scanner position forward 1 and return the token | |
@scan.pos += 1 | |
tok | |
when len = @scan.skip(KW_RE) then | |
# Return early if uniquely identifiable via length | |
return :ON if len == 2 | |
return :SUBSCRIPTION if len == 12 | |
# Get the position of the start of the keyword | |
start = @scan.pos - len | |
# Get the 2nd and 3rd byte of the keyword and combine to a 16 bit int | |
key = (@doc.getbyte(start + 2) << 8) | @doc.getbyte(start + 1) | |
KW_TABLE[_hash(key)] | |
when @scan.skip(IDENTIFIER) then :IDENTIFIER | |
when @scan.skip(ELLIPSIS) then :ELLIPSIS | |
when @scan.skip(FLOAT) then :FLOAT | |
when @scan.skip(INT) then :INT | |
else | |
@scan.getch | |
:UNKNOWN_CHAR | |
end | |
end | |
KW_TABLE = [:INTERFACE, :MUTATION, :EXTEND, :FALSE, :ENUM, :TRUE, :NULL, | |
nil, nil, nil, nil, nil, nil, nil, :QUERY, nil, nil, :REPEATABLE, | |
:IMPLEMENTS, :INPUT, :TYPE, :SCHEMA, nil, nil, nil, :DIRECTIVE, | |
:UNION, nil, nil, :SCALAR, nil, :FRAGMENT] | |
def _hash key | |
(key * 18592990) >> 27 & 0x1f | |
end | |
end | |
require "benchmark/ips" | |
def allocations | |
x = GC.stat(:total_allocated_objects) | |
yield | |
GC.stat(:total_allocated_objects) - x | |
end | |
def go doc | |
lexer = Lexer.new doc | |
while tok = lexer.next_token | |
# do nothing | |
end | |
end | |
if $0 == __FILE__ | |
doc = ARGF.read | |
Benchmark.ips { |x| x.report { go doc } } | |
p ALLOCATIONS: allocations { go doc } | |
p ALLOCATIONS: allocations { go doc } | |
end |
@marktriggs nice! Thanks for the tip. Do you mind trying it on the upstream lexer? Unfortunately I changed it around a bunch, so I'm not sure if the optimization will apply. The changes I've made upstream are pretty similar to this optimization, but it's great to document them here for future internet travelers.
@tenderlove Yep! I think you've got the spiritual equivalent in the upstream version 👍
Hi @tenderlove.
Nice post!
Because all keywords are lexically identifiers, I did some experiments to check if processing keywords and identifiers together would provide some performance gains. And in fact, I found an approach that gives a 10% improvement in the upstream benchmark
The idea is to use masks instead of sequential codes for lead characters:
module LeadBytes
INT = 1 << 0
KW = 1 << 1
ELLIPSIS = 1 << 2
STRING = 1 << 3
PUNCT = 1 << 4
IDENT = 1 << 5
end
("A".."Z").each { |chr| LEAD_BYTES[chr.ord] |= LeadBytes::IDENT }
("a".."z").each { |chr| LEAD_BYTES[chr.ord] |= LeadBytes::IDENT }
LEAD_BYTES['_'.ord] |= LeadBytes::IDENT
KEYWORDS.each { |kw| LEAD_BYTES[kw.getbyte(0)] |= LeadBytes::KW }
And consider keywords after capturing an identifier, doing some tricks to avoid matching with KW_RE
as much as possible:
elsif lead_code & LeadBytes::IDENT != 0
@len = @scan.skip(IDENTIFIER)
if lead_code & LeadBytes::KW != 0 then
if @len >= 4 then
fp = (@string.getbyte(@start+1) * 13 + @string.getbyte(@start+2)) * 13 + @string.getbyte(@start+3)
tk = KW_FP3[fp]
if tk then
@scan.pos = @start
return tk if @scan.skip(KW_RE) == @len
@scan.pos = @start + @len
end
elsif @len == 2 then
return :ON if lead_byte == 111 && @string.getbyte(@start+1) == 110
end
end
:IDENTIFIER
KW_FP3
is a precomputed map with fingerprints of the bytes 1, 2 and 3 of all keywords (except on
) to have a fast check to avoid checking for KW_RE
most of the times, and also to find out the keyword token.
Enabling YJIT, performance goes from
tinygql$ rake benchmark
/home/p/.rvm/rubies/ruby-3.3.5/bin/ruby --yjit -I lib bin/bench.rb
ruby 3.3.5 (2024-09-03 revision ef084cc8f4) +YJIT [x86_64-linux]
Warming up --------------------------------------
kitchen-sink 550.000 i/100ms
negotiate 19.000 i/100ms
Calculating -------------------------------------
kitchen-sink 5.745k (± 0.9%) i/s (174.06 μs/i) - 29.150k in 5.074317s
negotiate 193.872 (± 0.5%) i/s (5.16 ms/i) - 988.000 in 5.096450s
############################## ALLOCATIONS ##############################
kitchen-sink 231
negotiate 7365
to
$ rake benchmark
/home/p/.rvm/rubies/ruby-3.3.5/bin/ruby --yjit -I lib bin/bench.rb
ruby 3.3.5 (2024-09-03 revision ef084cc8f4) +YJIT [x86_64-linux]
Warming up --------------------------------------
kitchen-sink 607.000 i/100ms
negotiate 21.000 i/100ms
Calculating -------------------------------------
kitchen-sink 6.298k (± 1.0%) i/s (158.78 μs/i) - 31.564k in 5.012249s
negotiate 215.438 (± 0.9%) i/s (4.64 ms/i) - 1.092k in 5.069007s
############################## ALLOCATIONS ##############################
kitchen-sink 231
negotiate 7365
Regards!
@Palaolitico wow this is great! Thank you!! 😍
Do you mind sending a PR upstream? I'd be happy to merge.
Yes, of course. Here you have it.
Hi @tenderlove,
Great post, thanks! Messing around with this test, I see a respectable performance boost by helping the KW_RE short-circuit in more cases:
(stealth edited: now using a zero-width pattern to make sure we don't accept weird keywords like "frue" and "talse". The old "I made this faster by introducing bugs" :P)