Last active
September 5, 2015 14:30
-
-
Save olegkovalenko/a257a3784f964f1e92bb to your computer and use it in GitHub Desktop.
Monadic parsing in Ruby
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# Based on FUNCTIONAL PEARLS: Monadic Parsing in Haskell | |
# by Graham Hutton | |
# University of Nottingham | |
# Erik Meijer | |
# University of Utrecht | |
# | |
require 'superators' # gem install superators | |
# parser = ->(x) { [a, l]} | |
def item | |
->(cs) { | |
if cs.empty? | |
[] | |
else | |
[cs[0], cs[1..-1]] | |
end | |
} | |
end | |
# ITEM = item | |
# class Monad m where | |
# return :: a -> m a | |
# (>>=) ::ma->(a->mb)->mb | |
# | |
# instance Monad Parser where | |
# return a = Parser (\cs -> [(a,cs)]) | |
def unit(a) ->(cs) {[a, cs]} end | |
# zero = Parser (\cs -> []) | |
def zero; ->(cs) { [] } end | |
# p >>= f = Parser (\cs -> concat [parse (f a) cs’ | (a,cs’) <- parse p cs]) | |
FLAT_MAP = flat_map = ->(p1, &f) { ->(cs) { | |
r = p1.call(cs) | |
puts "+++ r = #{r}" | |
if r.empty? | |
[] | |
else | |
a = r[0] | |
puts "+++ a = #{a}" | |
cs1 = r[1] | |
p2 = f.call(a) | |
p2.call(cs1) | |
end | |
} } | |
FLAT_MAP = flat_map | |
# sat :: (Char -> Bool) -> Parser Char | |
# sat p = do {c <- item; if p c then return c else zero} | |
def sat(&p) | |
item.flat_map {|c| | |
if p.call(c) | |
unit(c) | |
else | |
zero | |
end | |
} | |
# ->(cs) { | |
# r = item.call(cs) | |
# if r.empty? | |
# [] | |
# else | |
# if p.call(r[0]) | |
# r | |
# else | |
# [] | |
# end | |
# end | |
# } | |
end | |
def char(c) sat {|x| x == c } end | |
def string(str) | |
str.split("").map {|c| char(c)}.reduce(unit("")) { |m, p| m.flat_map {|mc| p.map {|c| mc + c }}} | |
end | |
# space :: Parser String | |
# space = many (sat isSpace) | |
def space; sat {|c| c == ' ' or c == "\t" or c == "\n"}.many end | |
# symb :: String -> Parser String | |
# symb cs = token (string cs) | |
def symb(str) string(str).token end | |
class Proc | |
def flat_map(&block) | |
FLAT_MAP.call(self, &block) | |
end | |
def map(&block) | |
->(cs) { | |
r = self.call(cs) | |
if r.empty? | |
[] | |
else | |
[block.call(r[0]), r[1]] | |
end | |
} | |
end | |
# p ++ q = Parser (\cs -> parse p cs ++ parse q cs) | |
def plusplus(q) | |
->(cs) { self.call(cs) + q.call(cs) } | |
end | |
superator "++" do |operand| | |
self.plusplus(operand) | |
end | |
superator "+++" do |operand| | |
plusplusplus(operand) | |
end | |
def plusplusplus(operand) | |
p = self.plusplus(operand) | |
->(cs) { | |
r = p.call(cs) | |
r.empty? ? [] : r[0..1] | |
} | |
end | |
# many :: Parser a -> Parser [a] | |
# many p = many1 p +++ return [] | |
def many | |
many1() +++ unit([]) | |
end | |
# many1 :: Parser a -> Parser [a] | |
# many1 p = do {a <- p; as <- many p; return (a:as)} | |
def many1 | |
flat_map {|a| | |
many.flat_map {|as| | |
unit([a] + as) | |
} | |
} | |
end | |
# sepby :: Parser a -> Parser b -> Parser [a] | |
# p ‘sepby‘ sep = (p ‘sepby1‘ sep) +++ return [] | |
def sepby(sep) | |
sepby1(sep) +++ unit([]) | |
end | |
# sepby1 :: Parser a -> Parser b -> Parser [a] | |
# p ‘sepby1‘ sep = do a <-p | |
# as <- many (do {sep; p}) | |
# return (a:as) | |
def sepby1(sep) | |
flat_map {|a| | |
(sep.flat_map {|_| self}).many.flat_map {|as| | |
unit([a] + as) | |
} | |
} | |
end | |
# chainl :: Parser a -> Parser (a->a->a) -> a -> Parser a | |
# chainl p op a = (p ‘chainl1‘ op) +++ return a | |
def chainl(op, default) | |
self.chainl1(op) +++ unit(default) | |
end | |
# do {a <- p; rest a} | |
# where | |
# rest a = (do f <- op | |
# b <- p | |
# rest (f a b)) | |
# +++ return a | |
def chainl1(op) | |
self.flat_map {|a| | |
puts "=== #{a}" | |
rest(op, a) | |
} | |
end | |
def rest(op, a) | |
op.flat_map {|f| | |
flat_map {|b| | |
puts "=== #{b}" | |
puts "=== rest of #{a} #{b}" | |
rest(op, f.call(a, b)) | |
} | |
# } +++ unit(a) | |
}.plusplusplus(unit(a)) | |
end | |
# token :: Parser a -> Parser a | |
# token p = do {a <- p; space; return a} | |
def token | |
self.flat_map {|a| | |
space.flat_map {|_| unit(a)} | |
} | |
end | |
# apply :: Parser a -> String -> [(a,String)] | |
# apply p = parse (do {space; p}) | |
def apply | |
space.flat_map {|_| self} | |
end | |
def lstrip; apply end | |
def rstrip; flat_map {|a| space.flat_map { unit(a) }} end | |
end | |
puts item.call("123").inspect | |
# puts flat_map.call(item, ->(a) { if a == "2" then item else unit.call("0") end }).call("123").inspect | |
# puts flat_map.call(item) {|a| | |
# if a == "2" then item else unit.call("0") end | |
# }.call("123").inspect | |
# puts item.flat_map {|a| | |
# if a == "2" then item else unit.call("0") end | |
# }.call("123").inspect | |
# | |
p3r2 = item.flat_map {|a1| | |
item.flat_map {|a2| | |
# item.flat_map {|a3| | |
# unit.call([a1, a3].join("-")) | |
# } | |
item.map {|a3| [a1, a3].join("-") } | |
} | |
} | |
puts p3r2.call("123") | |
puts (zero.plusplus(item).call("123") == ["1", "23"]) | |
# puts (item.plusplus(zero).call("123") == ["1", "23"]) | |
puts ((item() ++ zero).call("123") == ["1", "23"]) | |
puts ((item() ++ item).call("123")).inspect | |
puts ((item() +++ item).call("123") == ["1", "23"]) | |
p4 = | |
char("1").flat_map {|a| | |
char("2").flat_map {|b| | |
char("3").map {|c| [a,b,c].join.to_i * 2 } | |
} | |
} | |
puts p4.call("123").inspect | |
puts char("?").call("123").inspect | |
puts string("{}").call("123").inspect | |
puts string("{}").call("{}").inspect | |
puts char("a").many1().call("aaabcd").inspect | |
puts string("aaa").call("aaabcd").inspect | |
puts string("a").many1.call("aaabcd").inspect | |
digit = ('0'..'9').map {|c| char c}.inject(zero) {|m, i| m +++ i} | |
number = digit.many1.flat_map {|h| char(".").flat_map {|_| digit.many1.map {|t| [h,".",t].join.to_f}}} | |
puts number.call("1.023").inspect | |
puts number.sepby(char(",")).call("1.023,0.12,1.25").inspect | |
# op = (char('+') +++ char('-')).flat_map {|c| | |
# case c | |
# when '+' then unit(->(l, r) { l + r } ) | |
# when '-' then unit(->(l, r) { l - r } ) | |
# end | |
# } | |
op = | |
char('+').flat_map {|_| unit ->(l, r) { l + r }} +++ | |
char('-').flat_map {|_| unit ->(l, r) { l - r }} | |
puts number.chainl(op, 0).call("1.023+0.12+1.25").inspect # == 2.393 | |
puts space.call("abc").inspect | |
puts space.call(" abc").inspect | |
puts space.call(" \n \tabc").inspect | |
puts string("def").call("def token").inspect | |
puts string("puts").call("puts 1").inspect | |
puts string("puts").apply.call(" puts 1").inspect | |
puts string("puts").lstrip.call(" puts 1").inspect | |
puts string("puts").rstrip.call("puts 1").inspect | |
puts string("puts").lstrip.rstrip.call("\t puts 1").inspect | |
# standard grammar for arithmetic expressions built up from single digits using | |
# the operators +, -, * and /, together with parentheses (Aho et al., 1986): | |
# | |
# digit = do {x <- token (sat isDigit); return (ord x - ord ’0’)} | |
digit = sat {|c| ('0'..'9').include? c }.token.flat_map {|c| unit(c.to_i)} | |
D = digit | |
# puts digit.call("023").inspect | |
# addop = do {symb "+"; return (+)} +++ do {symb "-"; return (-)} | |
addop = | |
symb("+").flat_map {|_| unit ->(l, r) { l + r }} +++ | |
symb("-").flat_map {|_| unit ->(l, r) { l - r }} | |
A = addop | |
# puts digit.rstrip.lstrip.chainl(addop, 0).call("3 + 2 - 1").inspect # == 4 | |
# mulop = do {symb "*"; return (*)} +++ do {symb "/"; return (div)} | |
mulop = | |
symb("*").flat_map {|_| unit ->(l, r) { l * r }} +++ | |
symb("/").flat_map {|_| unit ->(l, r) { l / r }} | |
M = mulop | |
# puts digit.rstrip.lstrip.chainl(mulop, 1).call("3 * 4 / 2").inspect # == 4 | |
# | |
# expr = term ‘chainl1‘ addop | |
def expr; term.chainl1(A) end | |
# term = factor ‘chainl1‘ mulop | |
def term; factor.chainl1(M) end | |
# factor = digit +++ do {symb "("; n <- expr; symb ")"; return n} | |
def factor; (D) +++ (symb("(").flat_map {|_| expr.flat_map {|n| symb(")").flat_map {|_| unit(n)}}}) end | |
puts expr.call("1 - 2 * 3 + 4").inspect | |
puts expr.call("(1 - 2) * 3 + 4").inspect |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment