Last active
October 20, 2016 21:53
-
-
Save lostinplace/ab9bba97613e774b7d88059cfd6df89b to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
package wikiparser | |
import fastparse.all._ | |
import fastparse.core.{Mutable} | |
import scala.util.matching.Regex | |
class RegexParser(pattern: Regex, leapDistance:Int = 10, maxLength:Int=100) extends Parser[Regex.Match] { | |
import fastparse.core.{ParseCtx} | |
private val _startsWith = """^\^.*""".r | |
private val _innerPattern = _ensurePatternEvaluatesFromStart(pattern) | |
override def toString: String = s"Regex(${_innerPattern.toString})" | |
private val zeroLengthMessage = s"$toString match with length > 0" | |
private def _ensurePatternEvaluatesFromStart(aPattern: Regex) = { | |
aPattern.toString match { | |
case _startsWith(result) => aPattern | |
case insufficient => ("^" + insufficient).r | |
} | |
} | |
def maxChars(newMax: Int): RegexParser = RegexParser(_innerPattern, leapDistance, newMax) | |
def leap(newLeap: Int): RegexParser = RegexParser(_innerPattern, newLeap, maxLength) | |
override def parseRec(cfg: ParseCtx[Char, String], index: Int): Mutable[Regex.Match, Char, String] = { | |
val inputLength = cfg.input.length | |
val remainingLength = inputLength - index | |
var failures = Set[fastparse.core.Parser[_, Char, String]]() | |
var accumulator:String = "" | |
def accIndexAfter(leap:Int) = { | |
index + accumulator.length + leap | |
} | |
do { | |
accumulator = cfg.input.slice(index, accIndexAfter(leapDistance)) | |
val matchCandidate = _innerPattern.findFirstMatchIn(accumulator) | |
_innerPattern.findFirstMatchIn(accumulator) match { | |
case None => | |
case Some(x) if x.end == 0 => failures += this.opaque(zeroLengthMessage) | |
case Some(result) => if(result.end<accumulator.length || accumulator.length == remainingLength) | |
return success(cfg.success, result, index+result.end, Set(), false) | |
} | |
} while( accumulator.length < maxLength && accumulator.length < remainingLength ) | |
fail(cfg.failure, index, failures, false) | |
} | |
} | |
object RegexParser { | |
implicit def regexToRegexParser(pattern: Regex): RegexParser = R(pattern) | |
def R(pattern:Regex, leapDistance:Int = 10, maxLength:Int=100) = RegexParser(pattern, leapDistance, maxLength) | |
def apply(pattern: Regex, leapDistance: Int = 10, maxLength: Int = 100): RegexParser = | |
new RegexParser(pattern, leapDistance, maxLength) | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment