Skip to content

Instantly share code, notes, and snippets.

@itarato
Last active February 28, 2021 06:14
Show Gist options
  • Star 1 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save itarato/534df2a9ed888fda128e37d16dabadae to your computer and use it in GitHub Desktop.
Save itarato/534df2a9ed888fda128e37d16dabadae to your computer and use it in GitHub Desktop.
Parser combinator trial
require 'pp'
require "test/unit"
class String
def parse_response
ParseResponse.new(self)
end
def token
for_tokens(self)
end
end
raw = <<-JSON
{
"foo" => "bar"
}
JSON
class Result
attr_reader :err_value, :ok_value
def initialize(ok_value: nil, err_value: nil)
if err_value != nil
@is_ok = false
@err_value = err_value
else
@is_ok = true
@ok_value = ok_value
end
end
def ok?
@is_ok
end
def err?
!@is_ok
end
def and_then
ok? ? (yield @ok_value) : self
end
class << self
def ok(value)
self.new(ok_value: value)
end
def err(value)
self.new(err_value: value)
end
end
end
class ParseResponse
attr_reader :rest, :args
def initialize(rest, *args)
@rest = rest
@args = args
end
def progress(size, *added_tokens)
@rest = @rest[size..-1]
@args = @args + added_tokens
self
end
def drop(n = 1)
@args.pop(n)
self
end
end
def for_tokens(token)
return lambda do |input|
return Result.err(:token_too_short) if input.rest.size < token.size
return Result.err([:token_mismatch, token, input]) if input.rest[0...token.size] != token
return Result.ok(input.progress(token.size, token))
end
end
def for_sequence(min_length = 1, &block)
return lambda do |input|
seq = input.rest.chars.take_while(&block).join('')
return Result.err(:sequence_too_short) if seq.size < min_length
return Result.ok(input.progress(seq.size, seq))
end
end
def attach(*parsers)
return lambda { |input| parsers.inject(Result.ok(input)) { |input, parser| input.and_then &parser } }
end
def drop(n = 1)
return lambda do |input|
(yield input).and_then {|input| Result.ok(input.drop(n)) }
end
end
def iterate(min = 1)
return lambda do |input|
last_ok = Result.ok(input)
iteration_done = 0
current = last_ok
while current.ok?
current = yield current.ok_value
if current.ok?
iteration_done += 1
last_ok = current
end
end
return Result.err(:iteration_too_short) if iteration_done < min
last_ok
end
end
class TestParser < Test::Unit::TestCase
def test_for_tokens
start_tag = for_tokens('<')
assert(start_tag.call('<'.parse_response).ok?)
assert(start_tag.call('<abc'.parse_response).ok?)
assert_equal(['<'], start_tag.call('<'.parse_response).ok_value.args)
assert_equal('', start_tag.call('<'.parse_response).ok_value.rest)
assert_equal('abc', start_tag.call('<abc'.parse_response).ok_value.rest)
end
def test_for_sequence
string_seq = for_sequence {|s| /[a-z]{1}/ =~ s }
assert(string_seq.call('ab123'.parse_response).ok?)
assert_equal('123', string_seq.call('ab123'.parse_response).ok_value.rest)
assert_equal(['ab'], string_seq.call('ab123'.parse_response).ok_value.args)
end
def test_attach
start_tag = for_tokens('<')
name = for_sequence {|s| /[a-z]{1}/ =~ s }
end_tag = for_tokens('/>')
full_tag = attach(start_tag, name, end_tag)
res_ok = full_tag.call('<hello/>inner'.parse_response)
assert(res_ok.ok?)
assert_equal('inner', res_ok.ok_value.rest)
assert_equal(['<', 'hello', '/>'], res_ok.ok_value.args)
assert(full_tag.call('a<hello>else'.parse_response).err?)
assert(full_tag.call('a<hello1>else'.parse_response).err?)
assert(full_tag.call('<hello>else'.parse_response).err?)
end
def test_iterate
foo_word = for_tokens('foo')
one_or_more = iterate &foo_word
res_ok = one_or_more.call('foofoofoofobar'.parse_response)
assert(res_ok.ok?)
assert_equal('fobar', res_ok.ok_value.rest)
assert_equal(['foo', 'foo', 'foo'], res_ok.ok_value.args)
end
def test_json
raw = <<-JSON
{
"foo" => "bar",
"bar" => "zoo"
}
JSON
whitespace_seq = for_sequence {|c| /[\n\t\r ]{1}/ =~ c }
whitespace_one = drop &whitespace_seq
whitespace = iterate(0, &whitespace_one)
open_brace = '{'.token
close_brace = '}'.token
quote_seq = '"'.token
quote = drop &quote_seq
ident = for_sequence {|c| /[a-z]{1}/ =~ c }
string = attach(quote, ident, quote)
key_assign = '=>'.token
comma = ','.token
one_or_more_comma = iterate(0, &comma)
key_value = attach(whitespace, string, whitespace, key_assign, whitespace, string, one_or_more_comma, whitespace)
key_values = iterate(0, &key_value)
json = attach(
whitespace,
open_brace,
whitespace,
key_values,
whitespace,
close_brace,
whitespace,
)
assert(json.call(raw.parse_response).ok?)
end
end
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment