Skip to content

Instantly share code, notes, and snippets.

@mebusw
Forked from yuanmai/CpsRegex.fsx
Last active August 31, 2017 10:02
Show Gist options
  • Save mebusw/f3a1e6f5ef570244cc70dd154e6a998f to your computer and use it in GitHub Desktop.
Save mebusw/f3a1e6f5ef570244cc70dd154e6a998f to your computer and use it in GitHub Desktop.
用 CPS 实现字符串匹配。基于讲座 https://youtu.be/cnhb4M8-J5M
class Re(object):
def match(self, str, idx, cont):
raise NotImplementedError()
def matches(self, str):
return self.match(str, 0, lambda i: i == len(str))
class ReChar(Re):
# ReChar(c) represents the character literal c.
def __init__(self, char): self.char = char
def match(self, str, idx, cont=lambda x: True):
# If the character at idx is the one we're looking for, we match!
if len(str) > idx and str[idx] == self.char:
# If we match, we call our continuation, to check that the rest of
# the string matches
return cont(idx+1)
return False # we didn't match
class ReSeq(Re):
# ReSeq(a, b) represents /ab/
def __init__(self, first, second): self.first, self.second = first, second
def match(self, str, idx, cont=lambda x: True):
# first try matching self.first
return self.first.match(str, idx,
# giving a modified continuation that checks
# whether self.second matches
lambda idx: self.second.match(str, idx, cont))
class ReOr(Re):
# ReOr(a,b) represents /a|b/
def __init__(self, first, second): self.first, self.second = first, second
def match(self, str, idx, cont=lambda x: True):
return (self.first.match(str, idx, cont) # try matching self.first
or self.second.match(str, idx, cont) # else try self.second
)
# Note how we might call our continuation twice - backtracking!
class ReStar(Re):
# ReStar(a) represents /a*/
def __init__(self, inner): self.inner = inner
def match(self, str, idx, cont=lambda x: True):
return (
# the one-or-more case, like /aa*/
self.inner.match(str, idx,
lambda idx: self.match(str, idx, cont))
# the empty case, like //
or cont(idx))
def atLeastOne(re):
return ReSeq(re, ReStar(re))
assert ReChar('a').match('a', 0) ==True
assert ReChar('a').match('b', 0) ==False
assert ReOr(ReChar('a'), ReChar('b')).match('b', 0) ==True
assert ReOr(ReChar('a'), ReChar('b')).match('c', 0) ==False
assert ReSeq(ReChar('a'), ReChar('c')).match('ac', 0) ==True
assert ReStar(ReChar('a')).match('aa', 0) ==True
assert ReStar(ReChar('a')).match('aaa', 0) ==True
assert ReStar(ReChar('a')).match('aab', 0) ==True
assert ReSeq(ReStar(ReChar('a')), ReChar('c')).match('aaac', 0) ==True
assert atLeastOne(ReChar('a')).match('', 0) ==False
assert ReSeq(ReSeq(ReChar('a'), ReChar('b')), ReChar('c')).match('abc', 0) ==True
def lit(c):
def re(s, cont=lambda x: True):
if not (True and s): return False
return c == s[0] and cont(s[1:])
return re
def seq(re_a, re_b):
def re(s, cont=lambda x: True):
return re_a(s, cont=lambda rest: re_b(rest, cont))
return re
def either(re_a, re_b):
# def f(s, cont=lambda x: True):
# return x(s, cont) or y(s, cont)
# return f
return lambda s, cont=lambda x:True : re_a(s, cont) or re_b(s, cont)
def star(re):
def f(s, cont=lambda x: True):
return re(s, cont=lambda rest: re(rest, cont))
return f
def at_least_one(re):
return seq(re, star(re))
assert lit('a')('a')
assert lit('a')('b') ==False
assert seq(lit('a'),lit('b'))('ab')
assert either(lit('a'),lit('b'))('b')
assert either(lit('a'),lit('b'))('c')==False
assert star(lit('a'))('aa')
assert star(lit('a'))('') ==False
assert star(lit('a'))('bb') ==False
assert at_least_one(lit('a'))('')==False
type Re =
| Lit of char
| Seq of Re * Re
| Or of Re * Re
| Star of Re
let rec match2 (re: Re) (str: char list) k =
match re with
| Lit(c) -> match str with
| [] -> false
| x::xs -> (x = c) && (k xs)
| Seq(a, b) -> match2 a str (fun rest -> match2 b rest k)
| Or(a, b) -> match2 a str k || match2 b str k
| Star(a) -> match2 a str (fun rest -> match2 (Star a) rest k) || k str
match2 (Lit 'a') ("b" |> Seq.toList) (fun x -> true)
match2 (Star (Lit 'a')) ("aaab" |> Seq.toList) (fun x -> true)
package cps;
import java.util.function.Function;
public interface Re {
boolean match(String s, Function<String, Boolean> kont);
default boolean match(String s) {
return match(s, kont -> true);
}
/**
* 匹配字符
* @param charLiteral
* @return
*/
static Re lit(char charLiteral) {
return (s, kont) -> {
if (s.isEmpty()) {
return false;
}
return charLiteral == s.charAt(0) && kont.apply(s.substring(1));
};
}
/**
* 按顺序匹配a b
* @param a
* @param b
* @return
*/
static Re seq(Re a, Re b) {
return (s, kont) -> a.match(s, rest -> b.match(rest, kont));
}
/**
* 匹配a或b
* @param a
* @param b
* @return
*/
static Re either(Re a, Re b) {
return (s, kont) -> a.match(s, kont) || b.match(s, kont);
}
/**
* 匹配0到N个
* @param re
* @return
*/
static Re star(Re re) {
return (s, kont) -> re.match(s, rest -> star(re).match(rest, kont))
|| kont.apply(s);
}
static Re atLeastOne(Re re) {
return seq(re, star(re));
}
}
package cps;
import org.junit.Test;
import static cps.Re.*;
import static org.junit.Assert.assertTrue;
public class ReTest {
@Test
public void should_match() {
assertTrue(lit('a').match("a"));
assertTrue(!lit('b').match("a"));
assertTrue(either(lit('a'), lit('b')).match("b"));
assertTrue(seq(lit('a'), lit('b')).match("ab"));
assertTrue(star(lit('a')).match("aa"));
assertTrue(!atLeastOne(lit('a')).match(""));
}
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment