Fibered Recursive Descent JSON Parser
#!/usr/bin/env ruby | |
require 'fiber' | |
# A resumable, recursive descent JSON parser, using Fibers. | |
# http://www.negativecode.com/posts/2012/03/26/fibers-resumable-recursive-descent-json-parsing/ | |
class Parser | |
Result = Struct.new(:value) | |
def initialize | |
@finished = false | |
@scanner = Scanner.new | |
@fiber = Fiber.new do | |
result = parse.value | |
@finished = true | |
error unless @scanner.empty? | |
Fiber.yield result | |
end | |
end | |
def <<(data) | |
@scanner << data | |
if @finished | |
error unless @scanner.empty? | |
else | |
@fiber.resume | |
end | |
end | |
private | |
def parse | |
whitespace | |
parse_object || | |
parse_array || | |
error | |
end | |
def parse_value | |
whitespace | |
parse_object || | |
parse_array || | |
parse_string || | |
parse_number || | |
parse_true || | |
parse_false || | |
parse_null || | |
error | |
ensure | |
whitespace | |
end | |
def parse_object | |
parse_container('{}', {}) do |object| | |
key = parse_string || error | |
error unless @scanner.scan(':') | |
object[key.value] = parse_value.value | |
end | |
end | |
def parse_array | |
parse_container('[]', []) do |array| | |
array << parse_value.value | |
end | |
end | |
def parse_container(delimiters, result) | |
open, close = delimiters[0], delimiters[1] | |
return unless @scanner.scan(open) | |
whitespace | |
more = false | |
while @scanner.peek != close | |
yield result | |
ch = @scanner.peek | |
error unless ch == ',' || ch == close | |
more = (ch == ',') | |
@scanner.scan(',') if ch == ',' | |
end | |
error if more | |
@scanner.scan(close) | |
Result.new(result) | |
end | |
def parse_true | |
parse_keyword('true', true) | |
end | |
def parse_false | |
parse_keyword('false', false) | |
end | |
def parse_null | |
parse_keyword('null', nil) | |
end | |
def parse_keyword(word, value) | |
match = word.each_char.all? do |ch| | |
@scanner.scan(ch) | |
end | |
match ? Result.new(value) : nil | |
end | |
def parse_number | |
if ch = @scanner.scan(/\d/) | |
ch << @scanner.scan_until(/\D/) | |
Result.new(ch.to_i) | |
end | |
end | |
def parse_string | |
whitespace | |
return unless @scanner.scan('"') | |
value = @scanner.scan_until(/"/) | |
@scanner.scan('"') | |
whitespace | |
Result.new(value) | |
end | |
def whitespace | |
@scanner.skip(/\s/) | |
end | |
def error | |
raise "grammar error at character #{@scanner.position}" | |
end | |
end | |
class Scanner | |
attr_reader :position | |
def initialize | |
@buf = [] | |
@current = nil | |
@position = 0 | |
end | |
def empty? | |
!@buf.any? {|ch| /\S/ =~ ch } | |
end | |
def <<(data) | |
@buf += data.split('') | |
end | |
def peek | |
return @current if @current | |
Fiber.yield if @buf.empty? | |
@buf.shift.tap do |ch| | |
@current = ch | |
@position += 1 | |
end | |
end | |
def skip(re) | |
while scan(re); end | |
end | |
def scan(re) | |
ch = peek | |
if re.is_a?(String) ? ch == re : re.match(ch) | |
@current = nil | |
ch | |
end | |
end | |
def scan_until(re) | |
''.tap do |buf| | |
until (ch = peek) =~ re || ch == nil | |
buf << ch | |
@current = nil | |
end | |
end | |
end | |
end | |
parser = Parser.new | |
json = '[1, 2, 3, [4, 5, 6]]' | |
json.each_char do |ch| | |
if result = parser << ch | |
puts result.inspect | |
end | |
end |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment