Last active
December 10, 2015 08:48
-
-
Save punya/4410544 to your computer and use it in GitHub Desktop.
Java translation of http://sebfisch.github.com/haskell-regexp/regexp-play.pdf
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import com.google.common.base.Function; | |
public abstract class RegW<T> implements Semiring<T> { | |
public abstract class RegType { | |
private RegType() { | |
// only allow instantiation from this file | |
} | |
@Override | |
public final String toString() { | |
StringBuilder builder = new StringBuilder(); | |
appendTo(builder); | |
return builder.toString(); | |
} | |
protected abstract void appendTo(StringBuilder builder); | |
public abstract T accept(String str); | |
} | |
private T fromBoolean(boolean in) { | |
return in ? one() : zero(); | |
} | |
public RegType eps() { | |
class Eps extends RegType { | |
@Override | |
protected void appendTo(StringBuilder builder) { | |
builder.append("ε"); | |
} | |
@Override | |
public T accept(String str) { | |
return fromBoolean(str.isEmpty()); | |
} | |
} | |
return new Eps(); | |
} | |
private final class MatchesChar implements Function<Character, T> { | |
private final char ch; | |
private MatchesChar(char ch) { | |
this.ch = ch; | |
} | |
@Override | |
public T apply(Character in) { | |
return fromBoolean(ch == in); | |
} | |
@Override | |
public String toString() { | |
return String.valueOf(ch); | |
} | |
} | |
public RegType sym(char ch) { | |
return sym(new MatchesChar(ch)); | |
} | |
public RegType sym(final Function<Character, T> fn) { | |
class Sym extends RegType { | |
@Override | |
protected void appendTo(StringBuilder builder) { | |
builder.append(fn); | |
} | |
@Override | |
public T accept(String str) { | |
return str.length() == 1 ? fn.apply(str.charAt(0)) : zero(); | |
} | |
} | |
return new Sym(); | |
} | |
public RegType alt(final RegType left, final RegType right) { | |
class Alt extends RegType { | |
@Override | |
protected void appendTo(StringBuilder builder) { | |
builder.append(left).append("|").append(right); | |
} | |
@Override | |
public T accept(String str) { | |
return plus(left.accept(str), right.accept(str)); | |
} | |
} | |
return new Alt(); | |
} | |
public RegType seq(final RegType left, final RegType right) { | |
class Seq extends RegType { | |
@Override | |
protected void appendTo(StringBuilder builder) { | |
builder.append(left).append(right); | |
} | |
@Override | |
public T accept(String str) { | |
T result = zero(); | |
for (int i = 0; i <= str.length(); ++i) { | |
result = plus(result, times( | |
left.accept(str.substring(0, i)), | |
right.accept(str.substring(i)))); | |
} | |
return result; | |
} | |
} | |
return new Seq(); | |
} | |
public RegType rep(final RegType reg) { | |
class Rep extends RegType { | |
@Override | |
protected void appendTo(StringBuilder builder) { | |
builder.append("(").append(reg).append(")*"); | |
} | |
@Override | |
public T accept(String str) { | |
if (str.isEmpty()) { | |
return one(); | |
} | |
T result = zero(); | |
for (int i = 1; i <= str.length(); ++i) { | |
result = plus(result, times( | |
reg.accept(str.substring(0, i)), | |
accept(str.substring(i)))); | |
} | |
return result; | |
} | |
} | |
return new Rep(); | |
} | |
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import static org.hamcrest.CoreMatchers.is; | |
import static org.junit.Assert.assertThat; | |
import org.junit.Test; | |
public final class RegWTest { | |
private static final class MatchCounter extends RegW<Integer> { | |
@Override | |
public Integer zero() { | |
return 0; | |
} | |
@Override | |
public Integer one() { | |
return 1; | |
} | |
@Override | |
public Integer plus(Integer left, Integer right) { | |
return left + right; | |
} | |
@Override | |
public Integer times(Integer left, Integer right) { | |
return left * right; | |
} | |
} | |
@Test | |
public void testCount() { | |
MatchCounter m = new MatchCounter(); | |
MatchCounter.RegType as = m.alt(m.sym('a'), m.rep(m.sym('a'))); | |
MatchCounter.RegType bs = m.alt(m.sym('b'), m.rep(m.sym('b'))); | |
assertThat(as.accept("a"), is(2)); | |
assertThat(m.seq(as, bs).accept("ab"), is(4)); | |
} | |
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
public interface Semiring<T> { | |
T zero(); | |
T one(); | |
T plus(T left, T right); | |
T times(T left, T right); | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment