public
Created

Regex matchers in Scala

  • Download Gist
CompiledRegexMatcher.scala
Scala
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160
package org.siliconvalleypatterns
 
/**
* @author noel.yap@gmail.com
*/
class CompiledRegexMatcher(val regex: String) {
def compile(): List[Token] = {
def compile(regex: String): List[Token] = {
if (regex.isEmpty) {
Nil
} else {
val token = regex.head match {
case '\\' => {
if (regex.length < 2) {
throw new Exception("malformed regex")
} else {
new EscapedToken(regex(1))
}
}
case '^' => new BeginToken
case '$' => new EndToken
case '*' => throw new Exception("malformed regex")
case c => {
val underlying = if (c == '.') {
new AnyToken
} else {
new LiteralToken(c)
}
 
if (regex.length > 1 && regex(1) == '*') {
new ZeroOrMoreToken(underlying)
} else {
underlying
}
}
}
 
token +: compile(token.munch(regex))
}
}
 
compile(regex)
}
 
def ~(text:String): Boolean = {
matches(compile(), text)
}
 
def matches(regex: List[Token], text: String): Boolean = {
def restOfRegexMatches(regex: List[Token], text: String): (Boolean, Boolean) = {
if (regex.isEmpty) {
(true, false)
} else {
val regexHead = regex.head
val (isMatch, restOfText) = regexHead.matches(text)
 
(isMatch && restOfRegexMatches(regex.tail, restOfText)._1, regexHead.tryAgain())
}
}
 
val (isMatch, tryAgain) = restOfRegexMatches(regex, text)
 
isMatch || tryAgain && !text.isEmpty && matches(regex, text.tail)
}
 
trait TryAgainTrait {
def tryAgain(): Boolean
}
 
trait TryAgain extends TryAgainTrait {
override val tryAgain = true
}
 
trait DoNotTryAgain extends TryAgainTrait {
override val tryAgain = false
}
 
trait RegexMuncher {
val munchAmount: Int
def munch(regex: String): String = regex.substring(munchAmount)
}
 
trait OneCharMuncher extends RegexMuncher {
override val munchAmount = 1
}
 
trait TwoCharMuncher extends RegexMuncher {
override val munchAmount = 2
}
 
abstract class Token extends RegexMuncher with TryAgainTrait {
def canEqual(that: Any) = that.isInstanceOf[Token]
override def equals(that: Any) = that match {
case t: Token => t canEqual this
case _ => false
}
def matches(text: String): (Boolean, String)
}
 
abstract class CharToken extends Token with OneCharMuncher with TryAgain {
override def canEqual(that: Any) = that.isInstanceOf[CharToken]
override def matches(text: String) = {
if (text.isEmpty) {
(false, text)
} else {
(matchesChar(text.head), text.tail)
}
}
def matchesChar(char: => Char): Boolean
}
 
class LiteralToken(val literal: Char) extends CharToken {
override def canEqual(that: Any) = that.isInstanceOf[LiteralToken]
override def equals(that: Any) = that match {
case t: LiteralToken => super.equals(that) && (t canEqual this) && t.literal == this.literal
case _ => false
}
override def hashCode() = 41 + literal.hashCode
override def matchesChar(char: => Char) = literal == char
}
 
class EscapedToken(literal: Char) extends LiteralToken(literal) with TwoCharMuncher with TryAgain {
override def canEqual(that: Any) = that.isInstanceOf[EscapedToken]
}
 
class AnyToken extends CharToken {
override def canEqual(that: Any) = that.isInstanceOf[AnyToken]
override def matchesChar(char: => Char) = true
}
 
class BeginToken extends Token with OneCharMuncher with DoNotTryAgain {
override def canEqual(that: Any) = that.isInstanceOf[BeginToken]
override def matches(text: String) = (true, text)
}
 
class EndToken extends Token with OneCharMuncher with DoNotTryAgain {
override def canEqual(that: Any) = that.isInstanceOf[EndToken]
override def matches(text: String) = (text.isEmpty, text)
}
 
class ZeroOrMoreToken(val underlying: CharToken) extends Token with TwoCharMuncher with TryAgain {
override def canEqual(that: Any) = that.isInstanceOf[ZeroOrMoreToken]
override def equals(that: Any) = that match {
case t: ZeroOrMoreToken => super.equals(that) && (t canEqual this) && t.underlying == this.underlying
case _ => false
}
override def hashCode() = 41 + underlying.hashCode
override def matches(text: String) = {
def munch(t: String): String = {
if (underlying.matches(t)._1) {
munch(t.tail)
} else {
t
}
}
 
(true, munch(text))
}
}
}
CompiledRegexMatcherSpec.scala
Scala
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236
package org.siliconvalleypatterns
 
import org.specs2.mutable.Specification
 
/**
* @author noel.yap@gmail.com
*/
class CompiledRegexMatcherSpec extends Specification {
"$" should {
val regexMatcher = new CompiledRegexMatcher("$")
 
"compile to EndToken" in {
regexMatcher.compile() mustEqual List(new regexMatcher.EndToken)
}
 
"match empty text" in {
regexMatcher ~ "" mustEqual true
}
 
"not match non-empty text" in {
regexMatcher ~ "$" mustEqual false
}
}
 
"^" should {
val regexMatcher = new CompiledRegexMatcher("^")
 
"compile to BeginToken" in {
regexMatcher.compile() mustEqual List(new regexMatcher.BeginToken)
}
 
"match empty text" in {
regexMatcher ~ "" mustEqual true
}
 
"match non-empty text" in {
regexMatcher ~ "^" mustEqual true
}
}
 
"." should {
val regexMatcher = new CompiledRegexMatcher(".")
 
"compile to AnyToken" in {
regexMatcher.compile() mustEqual List(new regexMatcher.AnyToken)
}
 
"not match empty text" in {
regexMatcher ~ "" mustEqual false
}
 
"match non-empty text" in {
regexMatcher ~ "." mustEqual true
}
}
 
"literal" should {
val regexMatcher = new CompiledRegexMatcher("a")
 
"compile to LiteralToken" in {
regexMatcher.compile() mustEqual List(new regexMatcher.LiteralToken('a'))
}
 
"not match empty text" in {
regexMatcher ~ "" mustEqual false
}
 
"not match that text" in {
regexMatcher ~ "0" mustEqual false
}
 
"match itself" in {
regexMatcher ~ "a" mustEqual true
}
}
 
"literal*" should {
val regexMatcher = new CompiledRegexMatcher("^a*$")
 
"compile" in {
regexMatcher.compile() mustEqual List(
new regexMatcher.BeginToken,
new regexMatcher.ZeroOrMoreToken(new regexMatcher.LiteralToken('a')),
new regexMatcher.EndToken)
}
 
"match empty text" in {
regexMatcher ~ "" mustEqual true
}
 
"match one" in {
regexMatcher ~ "a" mustEqual true
}
 
"match multiple" in {
regexMatcher ~ "aa" mustEqual true
}
 
"not match" in {
regexMatcher ~ "0" mustEqual false
}
}
 
".*" should {
val regexMatcher = new CompiledRegexMatcher("^.*$")
 
"compile" in {
regexMatcher.compile() mustEqual List(
new regexMatcher.BeginToken,
new regexMatcher.ZeroOrMoreToken(new regexMatcher.AnyToken),
new regexMatcher.EndToken)
}
 
"match empty text" in {
regexMatcher ~ "" mustEqual true
}
 
"match one" in {
regexMatcher ~ "a" mustEqual true
}
 
"match multiple" in {
regexMatcher ~ "aa" mustEqual true
}
}
 
"*" should {
val regexMatcher = new CompiledRegexMatcher("*")
 
"not compile" in {
regexMatcher.compile() must throwAn[Exception](message = "malformed regex")
}
 
"not match empty text" in {
regexMatcher ~ "" must throwAn[Exception](message = "malformed regex")
}
 
"not match non-empty text" in {
regexMatcher ~ "" must throwAn[Exception](message = "malformed regex")
}
}
 
"bc*d" should {
val regexMatcher = new CompiledRegexMatcher("bc*d")
 
"compile" in {
import regexMatcher._
 
regexMatcher.compile() mustEqual List(
new LiteralToken('b'),
new ZeroOrMoreToken(new LiteralToken('c')),
new LiteralToken('d'))
}
 
"match abde" in {
regexMatcher ~ "abde" mustEqual true
}
 
"match abcde" in {
regexMatcher ~ "abcde" mustEqual true
}
 
"match abccde" in {
regexMatcher ~ "abccde" mustEqual true
}
 
"not match abfde" in {
regexMatcher ~ "abfde" mustEqual false
}
}
 
"^a" should {
val regexMatcher = new CompiledRegexMatcher("^a")
 
"compile" in {
regexMatcher.compile() mustEqual List(
new regexMatcher.BeginToken,
new regexMatcher.LiteralToken('a'))
}
 
"match a" in {
regexMatcher ~ "a" mustEqual true
}
 
"match ab" in {
regexMatcher ~ "ab" mustEqual true
}
 
"not match ba" in {
regexMatcher ~ "ba" mustEqual false
}
}
 
"""\""" should {
"not compile" in {
val regexMatcher = new CompiledRegexMatcher("\\")
 
regexMatcher.compile() must throwAn[Exception](message = "malformed regex")
}
 
"compile" in {
val regexMatcher = new CompiledRegexMatcher( """^\\$""")
 
import regexMatcher._
 
regexMatcher.compile() mustEqual List(
new BeginToken,
new EscapedToken('\\'),
new EndToken)
}
 
"""match \""" in {
val regexMatcher = new CompiledRegexMatcher( """^\\$""")
 
regexMatcher ~ """\""" mustEqual true
}
 
"match ^" in {
val regexMatcher = new CompiledRegexMatcher( """^\^$""")
 
regexMatcher ~ "^" mustEqual true
}
 
"match ." in {
val regexMatcher = new CompiledRegexMatcher( """^\.$""")
 
regexMatcher ~ "." mustEqual true
}
 
"match *" in {
val regexMatcher = new CompiledRegexMatcher( """^\*$""")
 
regexMatcher ~ "*" mustEqual true
}
}
}
CoreRegexMatcher.scala
Scala
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32
class CoreRegexMatcher(val regex: String) {
def ~(text: String): Boolean = {
def restOfRegexMatches(regex: String, text: String): (Boolean, Boolean) = {
if (regex.isEmpty) {
(true, false)
} else {
regex.head match {
case '\\' => (regex(1) == text.head, true)
case '*' => throw new Exception("malformed regex")
case '^' => {
(restOfRegexMatches(regex.tail, text)._1, false)
}
case '$' => (text.isEmpty, false)
case _ => {
val firstCharMatches = !text.isEmpty && (regex.head == text.head || regex.head == '.')
 
if (regex.length < 2 || regex(1) != '*') {
(firstCharMatches && restOfRegexMatches(regex.tail, text.tail)._1, true)
} else {
(firstCharMatches || restOfRegexMatches(regex.substring(2), text)._1, true)
}
}
}
}
}
 
val (isMatch, tryAgain) = restOfRegexMatches(regex, text)
 
 
isMatch || tryAgain && !text.isEmpty && this ~ text.tail
}
}
CoreRegexMatcherSpec.scala
Scala
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165
import org.specs2.mutable.Specification
 
class CoreRegexMatcherSpec extends Specification {
"$" should {
val regexMatcher = new CoreRegexMatcher("$")
 
"match empty text" in {
regexMatcher ~ "" mustEqual true
}
 
"not match non-empty text" in {
regexMatcher ~ "$" mustEqual false
}
}
 
"^" should {
val regexMatcher = new CoreRegexMatcher("^")
 
"match empty text" in {
regexMatcher ~ "" mustEqual true
}
 
"match non-empty text" in {
regexMatcher ~ "^" mustEqual true
}
}
 
"." should {
val regexMatcher = new CoreRegexMatcher(".")
 
"not match empty text" in {
regexMatcher ~ "" mustEqual false
}
 
"match non-empty text" in {
regexMatcher ~ "." mustEqual true
}
}
 
"literal" should {
val regexMatcher = new CoreRegexMatcher("a")
 
"not match empty text" in {
regexMatcher ~ "" mustEqual false
}
 
"not match that text" in {
regexMatcher ~ "0" mustEqual false
}
 
"match itself" in {
regexMatcher ~ "a" mustEqual true
}
}
 
"literal*" should {
val regexMatcher = new CoreRegexMatcher("^a*$")
 
"match empty text" in {
regexMatcher ~ "" mustEqual true
}
 
"match one" in {
regexMatcher ~ "a" mustEqual true
}
 
"match multiple" in {
regexMatcher ~ "aa" mustEqual true
}
 
"not match" in {
regexMatcher ~ "0" mustEqual false
}
}
 
".*" should {
val regexMatcher = new CoreRegexMatcher("^.*$")
 
"match empty text" in {
regexMatcher ~ "" mustEqual true
}
 
"match one" in {
regexMatcher ~ "a" mustEqual true
}
 
"match multiple" in {
regexMatcher ~ "aa" mustEqual true
}
}
 
"*" should {
val regexMatcher = new CoreRegexMatcher("*")
 
"not match empty text" in {
regexMatcher ~ "" must throwA[Exception](message="malformed regex")
}
 
"not match non-empty text" in {
regexMatcher ~ "" must throwA[Exception](message="malformed regex")
}
}
 
"bc*d" should {
val regexMatcher = new CoreRegexMatcher("bc*d")
 
"match abde" in {
regexMatcher ~ "abde" mustEqual true
}
 
"match abcde" in {
regexMatcher ~ "abcde" mustEqual true
}
 
"match abccde" in {
regexMatcher ~ "abccde" mustEqual true
}
 
"not match abfde" in {
regexMatcher ~ "abfde" mustEqual false
}
}
 
"^a" should {
val regexMatcher = new CoreRegexMatcher("^a")
 
"match a" in {
regexMatcher ~ "a" mustEqual true
}
 
"match ab" in {
regexMatcher ~ "ab" mustEqual true
}
 
"not match ba" in {
regexMatcher ~ "ba" mustEqual false
}
}
 
"""\""" should {
"""match \""" in {
val regexMatcher = new CoreRegexMatcher("""^\\$""")
 
regexMatcher ~ """\""" mustEqual true
}
 
"match ^" in {
val regexMatcher = new CoreRegexMatcher("""^\^$""")
 
regexMatcher ~ "^" mustEqual true
}
 
"match ." in {
val regexMatcher = new CoreRegexMatcher("""^\.$""")
 
regexMatcher ~ "." mustEqual true
}
 
"match *" in {
val regexMatcher = new CoreRegexMatcher("""^\*$""")
 
regexMatcher ~ "*" mustEqual true
}
}
}

Please sign in to comment on this gist.

Something went wrong with that request. Please try again.