Last active
September 1, 2018 01:06
-
-
Save yellowflash/826004277874cadabbc502e6d406b39e to your computer and use it in GitHub Desktop.
Regex Engine which runs in `O(mn)` Glushkov automaton
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
sealed trait RegExp { | |
def isFinal:Boolean | |
def acceptsEmpty:Boolean | |
def shift(prev:Boolean, character:Char):RegExp | |
def matches(str:String) = | |
if(str.isEmpty) this.acceptsEmpty | |
else str.foldLeft(this.shift(true, str.head))(_.shift(false,_)).isFinal | |
} | |
object Epsilon extends RegExp { | |
override val isFinal = false | |
override val acceptsEmpty = true | |
override def shift(prev:Boolean, character: Char) = this | |
} | |
case class Symbol(sym:Char, override val isFinal:Boolean = false) extends RegExp { | |
override val acceptsEmpty = false | |
override def shift(prev: Boolean, character: Char) = | |
if(prev && sym == character) Symbol(sym, isFinal = true) | |
else Symbol(sym, false) | |
} | |
case class Alternation(r1:RegExp, r2:RegExp) extends RegExp { | |
override val isFinal = r1.isFinal || r2.isFinal | |
override val acceptsEmpty = r1.acceptsEmpty || r2.acceptsEmpty | |
override def shift(prev:Boolean, character: Char) = | |
Alternation(r1.shift(prev, character), r2.shift(prev,character)) | |
} | |
case class Sequence(r1:RegExp, r2:RegExp) extends RegExp { | |
override val isFinal = r2.isFinal || (r2.acceptsEmpty && r1.isFinal) | |
override val acceptsEmpty = r1.acceptsEmpty && r2.acceptsEmpty | |
override def shift(prev:Boolean, character:Char) = | |
Sequence(r1.shift(prev, character), r2.shift((prev && r1.acceptsEmpty) || r1.isFinal, character)) | |
} | |
case class Repetition(r:RegExp) extends RegExp { | |
override val isFinal = r.isFinal | |
override val acceptsEmpty = true | |
override def shift(prev: Boolean, character: Char) = | |
Repetition(r.shift(r.isFinal || prev, character)) | |
} | |
object RegExpOps { | |
class RegExpOps(regex:RegExp) { | |
def ~(other: RegExp) = Sequence(regex, other) | |
def |(other: RegExp) = Alternation(regex, other) | |
def * = Repetition(regex) | |
} | |
implicit def toRegexOps(regex:RegExp) = new RegExpOps(regex) | |
def s(character:Character) = Symbol(character) | |
} | |
object Main { | |
import RegExpOps._ | |
def main (args: Array[String]): Unit = { | |
val regex:RegExp = ((((s('a') | s('b'))*) ~ (s('c') ~ ((s('a') | s('b'))*)) ~ s('c'))*) ~ ((s('a')|s('b'))*) | |
List("ab", "abc", "abcc", "abcca", "abccac", "abccacac").foreach { str => | |
println(str + " matches = " + regex.matches(str)) | |
} | |
} | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment