Skip to content

Instantly share code, notes, and snippets.

@iliabylich
Last active October 29, 2020 01:11
Show Gist options
  • Save iliabylich/0b9d8ecfac1df86667025e219a2ed5a9 to your computer and use it in GitHub Desktop.
Save iliabylich/0b9d8ecfac1df86667025e219a2ed5a9 to your computer and use it in GitHub Desktop.
recursive json parser
require 'strscan'
class Result
class Ok < self
def or(&block)
self
end
def and(&block)
chained = block.call(value)
case chained
when Ok
Ok.new(value + chained.value, length + chained.length, @buffer)
when Error
rollback
chained
end
end
def map(&block)
yield @value, @length
end
def map_value(value = nil, &block)
if block_given?
value = block.call(@value)
end
Ok.new(value, @length, @buffer)
end
def as_array
Ok.new([@value], @length, @buffer)
end
end
class Error < self
def or(&block)
rollback
block.call(self)
end
def and(&block)
self
end
def map(&block)
self
end
def map_value(value = nil, &block)
self
end
def as_array
self
end
end
def initialize(value, length, buffer)
@value = value
@length = length
@buffer = buffer
end
def rollback
@buffer.pos -= length
@value = :rollback
end
attr_reader :value, :length
end
class MonadicJsonParser
class Error < StandardError
end
def initialize(input)
@input = input
end
def tokens
@tokens = []
@buffer = StringScanner.new(@input)
result = try_json
result.value
end
def try_json
try_element
end
def try_value
try_object
.or { try_array }
.or { try_string }
.or { try_number }
.or { try_raw('true').map_value(true) }
.or { try_raw('false').map_value(false) }
.or { try_raw('null').map_value(nil) }
end
def try_object
try_raw('{')
.and { try_members.or { try_ws } }
.and { try_raw('}') }
.map_value { |(_lcurly, *pairs, _rcurly)| pairs.reduce(&:merge) }
end
def try_members
try_member.as_array
.and {
(
try_raw(',')
.and { try_members }
.map_value { |(_comma, *pairs)| pairs }
).or { Ok([]) }
}
end
def try_member
try_ws
.and { try_string.as_array }
.and { try_ws }
.and { try_raw(':') }
.and { try_ws }
.and { try_element.as_array }
.map_value { |(_ws, string, _ws, _colon, _ws, element)| { string => element } }
end
def try_array
try_raw('[')
.and { try_elements.or { try_ws } }
.and { try_raw(']') }
.map_value { |(_lbrack, *elements, _rbrack)| elements }
end
def try_elements
try_element
.as_array
.and {
try_raw(',')
.and { try_elements }
.map_value { |(_comma, *elements)| elements }
.or { Ok([]) }
}
end
def try_element
try_ws
.and { try_value.as_array }
.and { try_ws }
.map_value { |(_wd, value, _wd)| value }
end
def try_string
try_raw('"')
.and { try_characters }
.and { try_raw('"') }
.map_value { |(_quote, *chars, _quote)| chars.join }
end
def try_characters
try_character
.and { try_characters }
.or { Ok(['']) }
end
def try_character
try_raw(/[^"\\]+/)
.or {
try_raw("\\")
.map_value('')
.and { try_escape }
.as_array
}
end
def try_escape
try_raw("\"").map_value('"')
.or { try_raw("\\").map_value("\\") }
.or { try_raw("/").map_value("/") }
.or { try_raw("b").map_value("\b") }
.or { try_raw("f").map_value("\f") }
.or { try_raw("n").map_value("\n") }
.or { try_raw("r").map_value("\r") }
.or { try_raw("t").map_value("\t") }
.or {
try_raw('u')
.map_value('')
.and { try_hex }
.and { try_hex }
.and { try_hex }
.and { try_hex }
.map_value { |hex| hex.to_i(16).chr(Encoding::UTF_8) }
}
end
def try_hex
try_raw(/[0-9A-Fa-f]/).map_value { |(value)| value }
end
def try_number
try_integer.as_array
.and { try_fraction.as_array }
.and { try_exponent.as_array }
.map_value { |(integer, fraction, exponent)|
if fraction || exponent
[*integer, *fraction, *exponent].join.to_f
else
[*integer, *fraction].join.to_i
end
}
end
def try_integer
try_raw('-').or { Ok(['']) }
.and {
(try_onenine.and { try_digits })
.or { try_digit }
}
end
def try_digits
try_digit.and { try_digits.or { Ok([]) } }
end
def try_digit
try_raw('0').or { try_onenine }
end
def try_onenine
try_raw(/[1-9]/)
end
def try_fraction
(try_raw('.').and { try_digits })
.or { Ok(nil) }
end
def try_exponent
try_raw(/[eE]/)
.and { try_sign }
.and { try_digits }
.or { Ok(nil) }
end
def try_sign
try_raw('-')
.or { try_raw('+') }
.or { Ok(['']) }
end
def try_ws
try_raw(/\s*/)
end
# ....
def Ok(value)
Result::Ok.new(value, 0, @buffer)
end
def try_raw(raw)
if @buffer.scan(raw)
Result::Ok.new([@buffer.matched], @buffer.matched.length, @buffer)
else
Result::Error.new(
[:unexpected_char, expected: raw, got: @buffer.string[@buffer.pos]],
0,
@buffer
)
end
end
end
ws = " \n\r\t"
input = <<-JSON
{
#{ws}"whitespaces"#{ws}:#{ws}" "#{ws},
"null": null,
"true": true,
"false": false,
"numbers": [
1,
-12,
1.2,
-1.2,
1e3,
-1e3,
1.2e3,
-1.2e3,
1E3,
-1E3,
1.2E3,
-1.2E3,
1e-3,
-1e-3,
1.2e-3,
-1.2e-3,
1E-3,
-1E-3,
1.2E-3,
-1.2E-3
],
"strings": "\\" \\/ \\\\ \\b \\f \\n \\r \\t \\u0123 \\u4567 \\u890A \\uBCDE \\uffff some text",
"array": [#{ws}1#{ws},#{ws}2#{ws}],
"object": {#{ws}"key"#{ws}:#{ws}"value"#{ws}}
}
JSON
expected = {
"whitespaces"=>" ",
"null"=>nil,
"true"=>true,
"false"=>false,
"numbers"=>[
1,
-12,
1.2,
-1.2,
1000.0,
-1000.0,
1200.0,
-1200.0,
1000.0,
-1000.0,
1200.0,
-1200.0,
0.001,
-0.001,
0.0012,
-0.0012,
0.001,
-0.001,
0.0012,
-0.0012
],
"strings"=>"\" / \\ \b \f \n \r \t ģ 䕧 褊 볞 \uFFFF some text",
"array"=>[1, 2],
"object"=>{"key"=>"value"}
}
require 'json'
output = JSON.parse(input)
p output
p output == expected
require_relative './lexer'
output2 = MonadicJsonParser.new(input).tokens
p output2
p output2 == expected
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment