Created
November 19, 2015 04:12
-
-
Save rampion/10e4bb613db13ab9ed1b to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
require 'set' | |
def ContextFreeGrammar(&blk) | |
ContextFreeGrammar.new(&blk) | |
end | |
class ContextFreeGrammar | |
attr_reader :rules | |
def initialize(&blk) | |
@rules = instance_eval(&blk) | |
# TODO : make sure a symbol isn't the leftmost element in its expansion | |
end | |
def infinity | |
Float::INFINITY | |
end | |
def literal str | |
Literal.new(self, str) | |
end | |
def regexp re | |
Regexp.new(self, re) | |
end | |
def symbol name | |
Symbol.new(self, name) | |
end | |
def quantify child, range | |
Star.new(self, child, range) | |
end | |
def any_of( *children ) | |
Pipe.new(self, children) | |
end | |
def concatenate( *children ) | |
Plus.new(self, children) | |
end | |
SYMBOL_REGEXP = /\A(?<name>[A-Z]\w*)(?<operator>[?=])?\Z/ | |
# define any | |
def method_missing(method_name, *args, &blk) | |
matched = SYMBOL_REGEXP.match(method_name) | |
super unless matched and blk.nil? and args.length == (matched['operator'] == '=' ? 1 : 0) | |
name = matched['name'].to_sym | |
case matched['operator'] | |
when nil ; symbol(name) | |
when '?' ; quantify(symbol(name), (0..1)) | |
end | |
end | |
# remove any existing methods that look like Symbols | |
(instance_methods + private_instance_methods).each do |method_name| | |
next unless method_name =~ SYMBOL_REGEXP | |
undef_method method_name | |
end | |
module Parsed | |
def + other | |
case other | |
when Plus ; grammar.concatenate(self, *other.children) | |
when Parsed ; grammar.concatenate(self, other) | |
else ; raise ArgumentError.new() | |
end | |
end | |
def | other | |
case other | |
when Pipe ; grammar.any_of(self, *other.children) | |
when Parsed ; grammar.any_of(self, other) | |
else ; raise ArgumentError.new() | |
end | |
end | |
def * arg | |
range = case arg | |
when Range ; arg | |
when Int ; (arg..arg) | |
else ; raise ArgumentError.new() | |
end | |
grammar.quantify( self, range ) | |
end | |
def inspect | |
klass = self.class | |
"#{klass}(#{to_a[1..-1].map(&:inspect).join(',')})" | |
end | |
def to_regexp | |
::Regexp.new(_to_regexp(Set.new)) | |
end | |
end | |
Literal = Struct.new(:grammar, :string) do | |
def _to_regexp(_defined=Set.new) | |
::Regexp.escape(string) | |
end | |
include Parsed | |
end | |
Regexp = Struct.new(:grammar, :regexp) do | |
def _to_regexp(_defined=Set.new) | |
regexp.to_s | |
end | |
include Parsed | |
end | |
Symbol = Struct.new(:grammar, :name) do | |
include Parsed | |
def _to_regexp(defined) | |
if defined.include? name | |
'\g<' + name.to_s + '>' | |
else | |
defined.add name # stateful | |
rule = grammar.rules[name] | |
unless rule | |
raise "undefined symbol #{name}" | |
end | |
'(?<' + name.to_s + '>' + grammar.rules[name]._to_regexp(defined) + ')' | |
end | |
end | |
end | |
Plus = Struct.new(:grammar, :children) do | |
include Parsed | |
def + other | |
case other | |
when Plus ; grammar.concatenate(*self.children, *other.children) | |
when Parsed ; grammar.concatenate(*self.children, other) | |
else ; raise ArgumentError.new() | |
end | |
end | |
def _to_regexp(defined) | |
children.map { |child| child._to_regexp(defined) }.join('') | |
end | |
end | |
Pipe = Struct.new(:grammar, :children) do | |
include Parsed | |
def | other | |
case other | |
when Pipe ; grammar.any_of(*self.children, *other.children) | |
when Parsed ; grammar.any_of(*self.children, other) | |
else ; raise ArgumentError.new() | |
end | |
end | |
def _to_regexp(defined) | |
'(?:' + children.map { |child| child._to_regexp(defined) }.join('|') + ')' | |
end | |
end | |
Star = Struct.new(:grammar, :child, :range) do | |
include Parsed | |
def _to_regexp(defined) | |
'(?:' + child._to_regexp(defined) + '){' + range.low + ',' + (range.high == grammar.infinity ? '' : range.high) + '}' | |
end | |
end | |
end |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
require './ContextFreeGrammar' | |
simple_json = ContextFreeGrammar {{ | |
Object: | |
literal('{}') | | |
literal('{') + Members() + literal('}'), | |
Members: | |
Pair() | | |
Pair() + literal(',') + Members(), | |
Pair: | |
String() + literal(':') + Value(), | |
Array: | |
literal('[]') | | |
literal('[') + Elements()+ literal(']'), | |
Elements: | |
Value() | | |
Value() + literal(',') + Elements(), | |
Value: | |
String() | | |
Number() | | |
Object() | | |
Array() | | |
literal("true") | | |
literal("false") | | |
literal("null"), | |
String: | |
literal('""') | | |
literal('"') + Chars() + literal('"'), | |
Chars: | |
Char() | | |
Char() + Chars(), | |
Char: | |
regexp(/[^"\\]|\\(?:["\\\/bfnrt]|[A-Fa-f0-9]{4})/), | |
Number: | |
Int() | | |
Int() + Frac() | | |
Int() + Exp() | | |
Int() + Frac() + Exp(), | |
Int: | |
Digit() | | |
Digit1To9() + Digits() | | |
literal('-') + Digit() | | |
literal('-') + Digit1To9() + Digits(), | |
Frac: | |
literal('.') + Digits(), | |
Exp: | |
E() + Digits(), | |
Digits: | |
Digit() | | |
Digit() + Digits(), | |
E: | |
literal('e') | | |
literal('e+') | | |
literal('e-') | | |
literal('E') | | |
literal('E+') | | |
literal('E-'), | |
Digit: | |
regexp(/[0-9]/), | |
Digit1To9: | |
regexp(/[1-9]/) | |
}} | |
p simple_json.Value.to_regexp | |
exit 0 | |
complex_json = ContextFreeGrammar {{ | |
Object: | |
literal('{') + Members?() + literal('}'), | |
Members: | |
Pair() + (literal(',') + Pair()) * (0..infinity), | |
Pair: | |
String() + literal(':') + Value(), | |
Array: | |
literal('[') + Elements?() + literal(']'), | |
Elements: | |
Value() + (literal(',') + Value()) * (0..infinity), | |
Value: | |
Object() | | |
Array() | | |
Value() | | |
String() | | |
Number() | | |
literal("true") | | |
literal("false") | | |
literal("null") | |
}} | |
p complex_json |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment