Last active
August 29, 2015 14:00
-
-
Save mikaelhg/4a82b65619466f4c4b94 to your computer and use it in GitHub Desktop.
A Parslet parser for a random Ruby .inspect dump
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/env ruby | |
require 'parslet' | |
require 'pp' | |
require 'awesome_print' | |
class InspectParser < Parslet::Parser | |
# TOKENS | |
rule(:space) { match('\s').repeat(1) } | |
rule(:space?) { space.maybe } | |
rule(:number) { match['0-9'] } | |
rule(:identifier) { match['a-zA-Z_'].repeat(1) } | |
rule(:s_comma) { space? >> str(',') >> space? } | |
# TYPES | |
# strings ripped from http://zerowidth.com/2013/02/24/parsing-toml-in-ruby-with-parslet.html | |
rule(:string_special) { match['\0\t\n\r"\\\\'] } | |
rule(:escaped_special) { str("\\") >> match['0tnr"\\\\'] } | |
rule(:t_string) do | |
str('"') >> | |
(escaped_special | string_special.absent? >> any).repeat.as(:string) >> | |
str('"') | |
end | |
# ruby cray cray | |
rule :t_timestamp do | |
number.repeat(4).as(:year) >> str('-') >> | |
number.repeat(2).as(:month) >> str('-') >> | |
number.repeat(2).as(:day) >> | |
str(' ') >> | |
number.repeat(2).as(:hours) >> str(':') >> | |
number.repeat(2).as(:minutes) >> str(':') >> | |
number.repeat(2).as(:seconds) >> | |
str(' ') >> | |
str('UTC') # too lazy to write out actual TZ rules | |
end | |
rule(:t_nil) { str('nil') } | |
rule(:t_boolean) { str('true') | str('false') } | |
rule(:t_keyword) { str(':') >> identifier } | |
rule(:t_integer) { str('-').maybe >> match('[0-9]').repeat(1) } | |
rule(:t_hex) { str('0x') >> match('[0-9a-fA-F]').repeat(1) } | |
rule :value do | |
t_nil.as(:nil) \ | |
| t_boolean.as(:boolean) \ | |
| t_string \ | |
| t_hex.as(:hex) \ | |
| t_array.as(:array) \ | |
| t_hash.as(:hash) \ | |
| t_ruby_hash.as(:ruby_hash) \ | |
| t_keyword.as(:keyword) \ | |
| t_object.as(:object) \ | |
| t_timestamp.as(:timestamp) \ | |
| t_integer.as(:integer) | |
end | |
# GENERIC ARRAY | |
rule :t_array do | |
str('[') >> | |
value.maybe >> | |
(s_comma >> value).repeat >> | |
str(']') | |
end | |
# "NORMAL" HASH | |
rule :t_keyval do | |
(identifier | t_keyword ).as(:key) >> | |
str(':') >> space >> value.as(:value) | |
end | |
rule :suprise_hash do | |
t_keyval.maybe >> | |
(s_comma >> t_keyval).repeat | |
end | |
rule :t_hash do | |
str('{') >> suprise_hash >> str('}') | |
end | |
# RUBY STYLE "NUTTY NUTBAR" HASH | |
rule :t_ruby_kv do | |
(t_keyword | identifier | t_integer).as(:key) >> | |
str('=>') >> | |
value.as(:value) | |
end | |
rule :t_ruby_hash do | |
str('{') >> | |
t_ruby_kv >> (s_comma >> t_ruby_kv).repeat(0) >> | |
str('}') | |
end | |
# OBJECT | |
rule(:object_id) { str(':') >> t_hex.as(:oid) } | |
rule :object_name do | |
identifier.as(:type) >> (str(':') >> t_hex).maybe | |
end | |
rule :instance_variable do | |
str('@') >> identifier.as(:name) >> | |
str('=') >> value.as(:value) | |
end | |
rule :object_fields do | |
instance_variable.as(:instance_variable) >> | |
(s_comma >> instance_variable.as(:instance_variable)).repeat | |
end | |
# ruby cray cray | |
rule :object_content do | |
(str('@').absent? >> suprise_hash) | object_fields | |
end | |
rule :t_object do | |
str('#<') >> | |
object_name >> space >> object_content.as(:members) >> | |
str('>') | |
end | |
rule :t_object_list do | |
t_object.as(:object).maybe >> | |
(s_comma >> t_object.as(:object)).repeat | |
end | |
rule :inspect_root do | |
str('<[') >> t_object_list >> str(']>') >> any.maybe | |
end | |
root :inspect_root | |
end | |
class InspectTransform < Parslet::Transform | |
rule(:array_item => simple(:item)) do | |
item.to_s | |
end | |
rule(:array => subtree(:contents)) do | |
contents.to_s + "\n" | |
end | |
rule(:instance_variable) do | |
end | |
end | |
begin | |
data = File.read(ARGV[0]) | |
tree = InspectParser.new.parse(data) | |
ap tree, options = { } | |
# InspectTransform.new.apply(tree) | |
rescue Parslet::ParseFailed => failure | |
puts failure.cause.ascii_tree | |
end |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment