Last active
August 29, 2015 14:08
-
-
Save ghidalgo3/159f5dffe3319e148cbd to your computer and use it in GitHub Desktop.
crazy replacement
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
def findRegexReplaceMatch(s: String, zomRegex: collection.immutable.Seq[(Regex, Match => String)]) : String = { | |
import scala.language.implicitConversions | |
//I really needed the start and end points of a regex match, this makes it less awkward in expressions | |
implicit def match2tuple(m : Match) : (Int, Int) = (m.start, m.end) | |
//detects overlaps in a set of regions. | |
def overlaps(set :scala.collection.mutable.Set[(Int,Int)], region : (Int, Int)) : Boolean = { | |
set.exists { case (begin, end) => | |
region._1 > begin && region._1 < end || | |
region._2 > begin && region._2 < end | |
} | |
} | |
def matchLength(m : Match) = m.end-m.start | |
val matchedRegions = HashSet[(Int,Int)]() | |
val willReplace = ArrayBuffer[(Match, Match => String)]() | |
//first figure out which regex will do replacements based on non-overlappingness. | |
//that is, no two regex can modify any one character. Proper partitions of the string. | |
for((regex, matcher) <- zomRegex) { | |
regex.findAllMatchIn(s).foreach{ match_ => | |
if(!overlaps(matchedRegions, match_)) { | |
//matcherRegions is only important here to record which regions we've matched | |
//so we don't overlap | |
matchedRegions += match_ | |
//I'll need to keep track of which matches will get used | |
willReplace.append((match_, matcher)) | |
} | |
} | |
} | |
//lists wouldn't need to shift data for the next part | |
//arrays would have been bad because if a replacement is not | |
//the same size as the match it is replacing we would have to | |
//move elements in the array. | |
var strList = s.toList | |
//offset keeps track of the changes in the length of the final string because | |
//the willReplace match list knows the matche's original start and end points | |
//but if the length of the string changes from a replacement we need to | |
//update the offset so that the correct locations get patched. | |
var offset = 0 | |
willReplace.foreach { case (match_, matcher) => | |
strList = strList.patch(offset+match_.start, matcher(match_), matchLength(match_) ) | |
offset += matcher(match_).length - matchLength(match_) | |
} | |
//finally. | |
strList.mkString | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment