Skip to content

Instantly share code, notes, and snippets.

@yellowflash
Last active September 1, 2018 01:06
Show Gist options
  • Star 12 You must be signed in to star a gist
  • Fork 1 You must be signed in to fork a gist
  • Save yellowflash/826004277874cadabbc502e6d406b39e to your computer and use it in GitHub Desktop.
Save yellowflash/826004277874cadabbc502e6d406b39e to your computer and use it in GitHub Desktop.
Regex Engine which runs in `O(mn)` Glushkov automaton
sealed trait RegExp {
def isFinal:Boolean
def acceptsEmpty:Boolean
def shift(prev:Boolean, character:Char):RegExp
def matches(str:String) =
if(str.isEmpty) this.acceptsEmpty
else str.foldLeft(this.shift(true, str.head))(_.shift(false,_)).isFinal
}
object Epsilon extends RegExp {
override val isFinal = false
override val acceptsEmpty = true
override def shift(prev:Boolean, character: Char) = this
}
case class Symbol(sym:Char, override val isFinal:Boolean = false) extends RegExp {
override val acceptsEmpty = false
override def shift(prev: Boolean, character: Char) =
if(prev && sym == character) Symbol(sym, isFinal = true)
else Symbol(sym, false)
}
case class Alternation(r1:RegExp, r2:RegExp) extends RegExp {
override val isFinal = r1.isFinal || r2.isFinal
override val acceptsEmpty = r1.acceptsEmpty || r2.acceptsEmpty
override def shift(prev:Boolean, character: Char) =
Alternation(r1.shift(prev, character), r2.shift(prev,character))
}
case class Sequence(r1:RegExp, r2:RegExp) extends RegExp {
override val isFinal = r2.isFinal || (r2.acceptsEmpty && r1.isFinal)
override val acceptsEmpty = r1.acceptsEmpty && r2.acceptsEmpty
override def shift(prev:Boolean, character:Char) =
Sequence(r1.shift(prev, character), r2.shift((prev && r1.acceptsEmpty) || r1.isFinal, character))
}
case class Repetition(r:RegExp) extends RegExp {
override val isFinal = r.isFinal
override val acceptsEmpty = true
override def shift(prev: Boolean, character: Char) =
Repetition(r.shift(r.isFinal || prev, character))
}
object RegExpOps {
class RegExpOps(regex:RegExp) {
def ~(other: RegExp) = Sequence(regex, other)
def |(other: RegExp) = Alternation(regex, other)
def * = Repetition(regex)
}
implicit def toRegexOps(regex:RegExp) = new RegExpOps(regex)
def s(character:Character) = Symbol(character)
}
object Main {
import RegExpOps._
def main (args: Array[String]): Unit = {
val regex:RegExp = ((((s('a') | s('b'))*) ~ (s('c') ~ ((s('a') | s('b'))*)) ~ s('c'))*) ~ ((s('a')|s('b'))*)
List("ab", "abc", "abcc", "abcca", "abccac", "abccacac").foreach { str =>
println(str + " matches = " + regex.matches(str))
}
}
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment