Skip to content

Instantly share code, notes, and snippets.

@lostinplace
Last active October 20, 2016 21:53
Show Gist options
  • Save lostinplace/ab9bba97613e774b7d88059cfd6df89b to your computer and use it in GitHub Desktop.
Save lostinplace/ab9bba97613e774b7d88059cfd6df89b to your computer and use it in GitHub Desktop.
package wikiparser
import fastparse.all._
import fastparse.core.{Mutable}
import scala.util.matching.Regex
class RegexParser(pattern: Regex, leapDistance:Int = 10, maxLength:Int=100) extends Parser[Regex.Match] {
import fastparse.core.{ParseCtx}
private val _startsWith = """^\^.*""".r
private val _innerPattern = _ensurePatternEvaluatesFromStart(pattern)
override def toString: String = s"Regex(${_innerPattern.toString})"
private val zeroLengthMessage = s"$toString match with length > 0"
private def _ensurePatternEvaluatesFromStart(aPattern: Regex) = {
aPattern.toString match {
case _startsWith(result) => aPattern
case insufficient => ("^" + insufficient).r
}
}
def maxChars(newMax: Int): RegexParser = RegexParser(_innerPattern, leapDistance, newMax)
def leap(newLeap: Int): RegexParser = RegexParser(_innerPattern, newLeap, maxLength)
override def parseRec(cfg: ParseCtx[Char, String], index: Int): Mutable[Regex.Match, Char, String] = {
val inputLength = cfg.input.length
val remainingLength = inputLength - index
var failures = Set[fastparse.core.Parser[_, Char, String]]()
var accumulator:String = ""
def accIndexAfter(leap:Int) = {
index + accumulator.length + leap
}
do {
accumulator = cfg.input.slice(index, accIndexAfter(leapDistance))
val matchCandidate = _innerPattern.findFirstMatchIn(accumulator)
_innerPattern.findFirstMatchIn(accumulator) match {
case None =>
case Some(x) if x.end == 0 => failures += this.opaque(zeroLengthMessage)
case Some(result) => if(result.end<accumulator.length || accumulator.length == remainingLength)
return success(cfg.success, result, index+result.end, Set(), false)
}
} while( accumulator.length < maxLength && accumulator.length < remainingLength )
fail(cfg.failure, index, failures, false)
}
}
object RegexParser {
implicit def regexToRegexParser(pattern: Regex): RegexParser = R(pattern)
def R(pattern:Regex, leapDistance:Int = 10, maxLength:Int=100) = RegexParser(pattern, leapDistance, maxLength)
def apply(pattern: Regex, leapDistance: Int = 10, maxLength: Int = 100): RegexParser =
new RegexParser(pattern, leapDistance, maxLength)
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment