Skip to content

Instantly share code, notes, and snippets.

@noel-yap
Created September 24, 2012 21:54
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save noel-yap/3778691 to your computer and use it in GitHub Desktop.
Save noel-yap/3778691 to your computer and use it in GitHub Desktop.
Regex matchers in Scala
package org.siliconvalleypatterns
/**
* @author noel.yap@gmail.com
*/
class CompiledRegexMatcher(val regex: String) {
def compile(): List[Token] = {
def compile(regex: String): List[Token] = {
if (regex.isEmpty) {
Nil
} else {
val token = regex.head match {
case '\\' => {
if (regex.length < 2) {
throw new Exception("malformed regex")
} else {
new EscapedToken(regex(1))
}
}
case '^' => new BeginToken
case '$' => new EndToken
case '*' => throw new Exception("malformed regex")
case c => {
val underlying = if (c == '.') {
new AnyToken
} else {
new LiteralToken(c)
}
if (regex.length > 1 && regex(1) == '*') {
new ZeroOrMoreToken(underlying)
} else {
underlying
}
}
}
token +: compile(token.munch(regex))
}
}
compile(regex)
}
def ~(text:String): Boolean = {
matches(compile(), text)
}
def matches(regex: List[Token], text: String): Boolean = {
def restOfRegexMatches(regex: List[Token], text: String): (Boolean, Boolean) = {
if (regex.isEmpty) {
(true, false)
} else {
val regexHead = regex.head
val (isMatch, restOfText) = regexHead.matches(text)
(isMatch && restOfRegexMatches(regex.tail, restOfText)._1, regexHead.tryAgain())
}
}
val (isMatch, tryAgain) = restOfRegexMatches(regex, text)
isMatch || tryAgain && !text.isEmpty && matches(regex, text.tail)
}
trait TryAgainTrait {
def tryAgain(): Boolean
}
trait TryAgain extends TryAgainTrait {
override val tryAgain = true
}
trait DoNotTryAgain extends TryAgainTrait {
override val tryAgain = false
}
trait RegexMuncher {
val munchAmount: Int
def munch(regex: String): String = regex.substring(munchAmount)
}
trait OneCharMuncher extends RegexMuncher {
override val munchAmount = 1
}
trait TwoCharMuncher extends RegexMuncher {
override val munchAmount = 2
}
abstract class Token extends RegexMuncher with TryAgainTrait {
def canEqual(that: Any) = that.isInstanceOf[Token]
override def equals(that: Any) = that match {
case t: Token => t canEqual this
case _ => false
}
def matches(text: String): (Boolean, String)
}
abstract class CharToken extends Token with OneCharMuncher with TryAgain {
override def canEqual(that: Any) = that.isInstanceOf[CharToken]
override def matches(text: String) = {
if (text.isEmpty) {
(false, text)
} else {
(matchesChar(text.head), text.tail)
}
}
def matchesChar(char: => Char): Boolean
}
class LiteralToken(val literal: Char) extends CharToken {
override def canEqual(that: Any) = that.isInstanceOf[LiteralToken]
override def equals(that: Any) = that match {
case t: LiteralToken => super.equals(that) && (t canEqual this) && t.literal == this.literal
case _ => false
}
override def hashCode() = 41 + literal.hashCode
override def matchesChar(char: => Char) = literal == char
}
class EscapedToken(literal: Char) extends LiteralToken(literal) with TwoCharMuncher with TryAgain {
override def canEqual(that: Any) = that.isInstanceOf[EscapedToken]
}
class AnyToken extends CharToken {
override def canEqual(that: Any) = that.isInstanceOf[AnyToken]
override def matchesChar(char: => Char) = true
}
class BeginToken extends Token with OneCharMuncher with DoNotTryAgain {
override def canEqual(that: Any) = that.isInstanceOf[BeginToken]
override def matches(text: String) = (true, text)
}
class EndToken extends Token with OneCharMuncher with DoNotTryAgain {
override def canEqual(that: Any) = that.isInstanceOf[EndToken]
override def matches(text: String) = (text.isEmpty, text)
}
class ZeroOrMoreToken(val underlying: CharToken) extends Token with TwoCharMuncher with TryAgain {
override def canEqual(that: Any) = that.isInstanceOf[ZeroOrMoreToken]
override def equals(that: Any) = that match {
case t: ZeroOrMoreToken => super.equals(that) && (t canEqual this) && t.underlying == this.underlying
case _ => false
}
override def hashCode() = 41 + underlying.hashCode
override def matches(text: String) = {
def munch(t: String): String = {
if (underlying.matches(t)._1) {
munch(t.tail)
} else {
t
}
}
(true, munch(text))
}
}
}
package org.siliconvalleypatterns
import org.specs2.mutable.Specification
/**
* @author noel.yap@gmail.com
*/
class CompiledRegexMatcherSpec extends Specification {
"$" should {
val regexMatcher = new CompiledRegexMatcher("$")
"compile to EndToken" in {
regexMatcher.compile() mustEqual List(new regexMatcher.EndToken)
}
"match empty text" in {
regexMatcher ~ "" mustEqual true
}
"not match non-empty text" in {
regexMatcher ~ "$" mustEqual false
}
}
"^" should {
val regexMatcher = new CompiledRegexMatcher("^")
"compile to BeginToken" in {
regexMatcher.compile() mustEqual List(new regexMatcher.BeginToken)
}
"match empty text" in {
regexMatcher ~ "" mustEqual true
}
"match non-empty text" in {
regexMatcher ~ "^" mustEqual true
}
}
"." should {
val regexMatcher = new CompiledRegexMatcher(".")
"compile to AnyToken" in {
regexMatcher.compile() mustEqual List(new regexMatcher.AnyToken)
}
"not match empty text" in {
regexMatcher ~ "" mustEqual false
}
"match non-empty text" in {
regexMatcher ~ "." mustEqual true
}
}
"literal" should {
val regexMatcher = new CompiledRegexMatcher("a")
"compile to LiteralToken" in {
regexMatcher.compile() mustEqual List(new regexMatcher.LiteralToken('a'))
}
"not match empty text" in {
regexMatcher ~ "" mustEqual false
}
"not match that text" in {
regexMatcher ~ "0" mustEqual false
}
"match itself" in {
regexMatcher ~ "a" mustEqual true
}
}
"literal*" should {
val regexMatcher = new CompiledRegexMatcher("^a*$")
"compile" in {
regexMatcher.compile() mustEqual List(
new regexMatcher.BeginToken,
new regexMatcher.ZeroOrMoreToken(new regexMatcher.LiteralToken('a')),
new regexMatcher.EndToken)
}
"match empty text" in {
regexMatcher ~ "" mustEqual true
}
"match one" in {
regexMatcher ~ "a" mustEqual true
}
"match multiple" in {
regexMatcher ~ "aa" mustEqual true
}
"not match" in {
regexMatcher ~ "0" mustEqual false
}
}
".*" should {
val regexMatcher = new CompiledRegexMatcher("^.*$")
"compile" in {
regexMatcher.compile() mustEqual List(
new regexMatcher.BeginToken,
new regexMatcher.ZeroOrMoreToken(new regexMatcher.AnyToken),
new regexMatcher.EndToken)
}
"match empty text" in {
regexMatcher ~ "" mustEqual true
}
"match one" in {
regexMatcher ~ "a" mustEqual true
}
"match multiple" in {
regexMatcher ~ "aa" mustEqual true
}
}
"*" should {
val regexMatcher = new CompiledRegexMatcher("*")
"not compile" in {
regexMatcher.compile() must throwAn[Exception](message = "malformed regex")
}
"not match empty text" in {
regexMatcher ~ "" must throwAn[Exception](message = "malformed regex")
}
"not match non-empty text" in {
regexMatcher ~ "" must throwAn[Exception](message = "malformed regex")
}
}
"bc*d" should {
val regexMatcher = new CompiledRegexMatcher("bc*d")
"compile" in {
import regexMatcher._
regexMatcher.compile() mustEqual List(
new LiteralToken('b'),
new ZeroOrMoreToken(new LiteralToken('c')),
new LiteralToken('d'))
}
"match abde" in {
regexMatcher ~ "abde" mustEqual true
}
"match abcde" in {
regexMatcher ~ "abcde" mustEqual true
}
"match abccde" in {
regexMatcher ~ "abccde" mustEqual true
}
"not match abfde" in {
regexMatcher ~ "abfde" mustEqual false
}
}
"^a" should {
val regexMatcher = new CompiledRegexMatcher("^a")
"compile" in {
regexMatcher.compile() mustEqual List(
new regexMatcher.BeginToken,
new regexMatcher.LiteralToken('a'))
}
"match a" in {
regexMatcher ~ "a" mustEqual true
}
"match ab" in {
regexMatcher ~ "ab" mustEqual true
}
"not match ba" in {
regexMatcher ~ "ba" mustEqual false
}
}
"""\""" should {
"not compile" in {
val regexMatcher = new CompiledRegexMatcher("\\")
regexMatcher.compile() must throwAn[Exception](message = "malformed regex")
}
"compile" in {
val regexMatcher = new CompiledRegexMatcher( """^\\$""")
import regexMatcher._
regexMatcher.compile() mustEqual List(
new BeginToken,
new EscapedToken('\\'),
new EndToken)
}
"""match \""" in {
val regexMatcher = new CompiledRegexMatcher( """^\\$""")
regexMatcher ~ """\""" mustEqual true
}
"match ^" in {
val regexMatcher = new CompiledRegexMatcher( """^\^$""")
regexMatcher ~ "^" mustEqual true
}
"match ." in {
val regexMatcher = new CompiledRegexMatcher( """^\.$""")
regexMatcher ~ "." mustEqual true
}
"match *" in {
val regexMatcher = new CompiledRegexMatcher( """^\*$""")
regexMatcher ~ "*" mustEqual true
}
}
}
class CoreRegexMatcher(val regex: String) {
def ~(text: String): Boolean = {
def restOfRegexMatches(regex: String, text: String): (Boolean, Boolean) = {
if (regex.isEmpty) {
(true, false)
} else {
regex.head match {
case '\\' => (regex(1) == text.head, true)
case '*' => throw new Exception("malformed regex")
case '^' => {
(restOfRegexMatches(regex.tail, text)._1, false)
}
case '$' => (text.isEmpty, false)
case _ => {
val firstCharMatches = !text.isEmpty && (regex.head == text.head || regex.head == '.')
if (regex.length < 2 || regex(1) != '*') {
(firstCharMatches && restOfRegexMatches(regex.tail, text.tail)._1, true)
} else {
(firstCharMatches || restOfRegexMatches(regex.substring(2), text)._1, true)
}
}
}
}
}
val (isMatch, tryAgain) = restOfRegexMatches(regex, text)
isMatch || tryAgain && !text.isEmpty && this ~ text.tail
}
}
import org.specs2.mutable.Specification
class CoreRegexMatcherSpec extends Specification {
"$" should {
val regexMatcher = new CoreRegexMatcher("$")
"match empty text" in {
regexMatcher ~ "" mustEqual true
}
"not match non-empty text" in {
regexMatcher ~ "$" mustEqual false
}
}
"^" should {
val regexMatcher = new CoreRegexMatcher("^")
"match empty text" in {
regexMatcher ~ "" mustEqual true
}
"match non-empty text" in {
regexMatcher ~ "^" mustEqual true
}
}
"." should {
val regexMatcher = new CoreRegexMatcher(".")
"not match empty text" in {
regexMatcher ~ "" mustEqual false
}
"match non-empty text" in {
regexMatcher ~ "." mustEqual true
}
}
"literal" should {
val regexMatcher = new CoreRegexMatcher("a")
"not match empty text" in {
regexMatcher ~ "" mustEqual false
}
"not match that text" in {
regexMatcher ~ "0" mustEqual false
}
"match itself" in {
regexMatcher ~ "a" mustEqual true
}
}
"literal*" should {
val regexMatcher = new CoreRegexMatcher("^a*$")
"match empty text" in {
regexMatcher ~ "" mustEqual true
}
"match one" in {
regexMatcher ~ "a" mustEqual true
}
"match multiple" in {
regexMatcher ~ "aa" mustEqual true
}
"not match" in {
regexMatcher ~ "0" mustEqual false
}
}
".*" should {
val regexMatcher = new CoreRegexMatcher("^.*$")
"match empty text" in {
regexMatcher ~ "" mustEqual true
}
"match one" in {
regexMatcher ~ "a" mustEqual true
}
"match multiple" in {
regexMatcher ~ "aa" mustEqual true
}
}
"*" should {
val regexMatcher = new CoreRegexMatcher("*")
"not match empty text" in {
regexMatcher ~ "" must throwA[Exception](message="malformed regex")
}
"not match non-empty text" in {
regexMatcher ~ "" must throwA[Exception](message="malformed regex")
}
}
"bc*d" should {
val regexMatcher = new CoreRegexMatcher("bc*d")
"match abde" in {
regexMatcher ~ "abde" mustEqual true
}
"match abcde" in {
regexMatcher ~ "abcde" mustEqual true
}
"match abccde" in {
regexMatcher ~ "abccde" mustEqual true
}
"not match abfde" in {
regexMatcher ~ "abfde" mustEqual false
}
}
"^a" should {
val regexMatcher = new CoreRegexMatcher("^a")
"match a" in {
regexMatcher ~ "a" mustEqual true
}
"match ab" in {
regexMatcher ~ "ab" mustEqual true
}
"not match ba" in {
regexMatcher ~ "ba" mustEqual false
}
}
"""\""" should {
"""match \""" in {
val regexMatcher = new CoreRegexMatcher("""^\\$""")
regexMatcher ~ """\""" mustEqual true
}
"match ^" in {
val regexMatcher = new CoreRegexMatcher("""^\^$""")
regexMatcher ~ "^" mustEqual true
}
"match ." in {
val regexMatcher = new CoreRegexMatcher("""^\.$""")
regexMatcher ~ "." mustEqual true
}
"match *" in {
val regexMatcher = new CoreRegexMatcher("""^\*$""")
regexMatcher ~ "*" mustEqual true
}
}
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment