Skip to content

Instantly share code, notes, and snippets.

@rampion
Created November 19, 2015 04:12
Show Gist options
  • Save rampion/10e4bb613db13ab9ed1b to your computer and use it in GitHub Desktop.
Save rampion/10e4bb613db13ab9ed1b to your computer and use it in GitHub Desktop.
require 'set'
def ContextFreeGrammar(&blk)
ContextFreeGrammar.new(&blk)
end
class ContextFreeGrammar
attr_reader :rules
def initialize(&blk)
@rules = instance_eval(&blk)
# TODO : make sure a symbol isn't the leftmost element in its expansion
end
def infinity
Float::INFINITY
end
def literal str
Literal.new(self, str)
end
def regexp re
Regexp.new(self, re)
end
def symbol name
Symbol.new(self, name)
end
def quantify child, range
Star.new(self, child, range)
end
def any_of( *children )
Pipe.new(self, children)
end
def concatenate( *children )
Plus.new(self, children)
end
SYMBOL_REGEXP = /\A(?<name>[A-Z]\w*)(?<operator>[?=])?\Z/
# define any
def method_missing(method_name, *args, &blk)
matched = SYMBOL_REGEXP.match(method_name)
super unless matched and blk.nil? and args.length == (matched['operator'] == '=' ? 1 : 0)
name = matched['name'].to_sym
case matched['operator']
when nil ; symbol(name)
when '?' ; quantify(symbol(name), (0..1))
end
end
# remove any existing methods that look like Symbols
(instance_methods + private_instance_methods).each do |method_name|
next unless method_name =~ SYMBOL_REGEXP
undef_method method_name
end
module Parsed
def + other
case other
when Plus ; grammar.concatenate(self, *other.children)
when Parsed ; grammar.concatenate(self, other)
else ; raise ArgumentError.new()
end
end
def | other
case other
when Pipe ; grammar.any_of(self, *other.children)
when Parsed ; grammar.any_of(self, other)
else ; raise ArgumentError.new()
end
end
def * arg
range = case arg
when Range ; arg
when Int ; (arg..arg)
else ; raise ArgumentError.new()
end
grammar.quantify( self, range )
end
def inspect
klass = self.class
"#{klass}(#{to_a[1..-1].map(&:inspect).join(',')})"
end
def to_regexp
::Regexp.new(_to_regexp(Set.new))
end
end
Literal = Struct.new(:grammar, :string) do
def _to_regexp(_defined=Set.new)
::Regexp.escape(string)
end
include Parsed
end
Regexp = Struct.new(:grammar, :regexp) do
def _to_regexp(_defined=Set.new)
regexp.to_s
end
include Parsed
end
Symbol = Struct.new(:grammar, :name) do
include Parsed
def _to_regexp(defined)
if defined.include? name
'\g<' + name.to_s + '>'
else
defined.add name # stateful
rule = grammar.rules[name]
unless rule
raise "undefined symbol #{name}"
end
'(?<' + name.to_s + '>' + grammar.rules[name]._to_regexp(defined) + ')'
end
end
end
Plus = Struct.new(:grammar, :children) do
include Parsed
def + other
case other
when Plus ; grammar.concatenate(*self.children, *other.children)
when Parsed ; grammar.concatenate(*self.children, other)
else ; raise ArgumentError.new()
end
end
def _to_regexp(defined)
children.map { |child| child._to_regexp(defined) }.join('')
end
end
Pipe = Struct.new(:grammar, :children) do
include Parsed
def | other
case other
when Pipe ; grammar.any_of(*self.children, *other.children)
when Parsed ; grammar.any_of(*self.children, other)
else ; raise ArgumentError.new()
end
end
def _to_regexp(defined)
'(?:' + children.map { |child| child._to_regexp(defined) }.join('|') + ')'
end
end
Star = Struct.new(:grammar, :child, :range) do
include Parsed
def _to_regexp(defined)
'(?:' + child._to_regexp(defined) + '){' + range.low + ',' + (range.high == grammar.infinity ? '' : range.high) + '}'
end
end
end
require './ContextFreeGrammar'
simple_json = ContextFreeGrammar {{
Object:
literal('{}') |
literal('{') + Members() + literal('}'),
Members:
Pair() |
Pair() + literal(',') + Members(),
Pair:
String() + literal(':') + Value(),
Array:
literal('[]') |
literal('[') + Elements()+ literal(']'),
Elements:
Value() |
Value() + literal(',') + Elements(),
Value:
String() |
Number() |
Object() |
Array() |
literal("true") |
literal("false") |
literal("null"),
String:
literal('""') |
literal('"') + Chars() + literal('"'),
Chars:
Char() |
Char() + Chars(),
Char:
regexp(/[^"\\]|\\(?:["\\\/bfnrt]|[A-Fa-f0-9]{4})/),
Number:
Int() |
Int() + Frac() |
Int() + Exp() |
Int() + Frac() + Exp(),
Int:
Digit() |
Digit1To9() + Digits() |
literal('-') + Digit() |
literal('-') + Digit1To9() + Digits(),
Frac:
literal('.') + Digits(),
Exp:
E() + Digits(),
Digits:
Digit() |
Digit() + Digits(),
E:
literal('e') |
literal('e+') |
literal('e-') |
literal('E') |
literal('E+') |
literal('E-'),
Digit:
regexp(/[0-9]/),
Digit1To9:
regexp(/[1-9]/)
}}
p simple_json.Value.to_regexp
exit 0
complex_json = ContextFreeGrammar {{
Object:
literal('{') + Members?() + literal('}'),
Members:
Pair() + (literal(',') + Pair()) * (0..infinity),
Pair:
String() + literal(':') + Value(),
Array:
literal('[') + Elements?() + literal(']'),
Elements:
Value() + (literal(',') + Value()) * (0..infinity),
Value:
Object() |
Array() |
Value() |
String() |
Number() |
literal("true") |
literal("false") |
literal("null")
}}
p complex_json
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment