Skip to content

Instantly share code, notes, and snippets.

@ghidalgo3
Last active August 29, 2015 14:08
Show Gist options
  • Save ghidalgo3/159f5dffe3319e148cbd to your computer and use it in GitHub Desktop.
Save ghidalgo3/159f5dffe3319e148cbd to your computer and use it in GitHub Desktop.
crazy replacement
def findRegexReplaceMatch(s: String, zomRegex: collection.immutable.Seq[(Regex, Match => String)]) : String = {
import scala.language.implicitConversions
//I really needed the start and end points of a regex match, this makes it less awkward in expressions
implicit def match2tuple(m : Match) : (Int, Int) = (m.start, m.end)
//detects overlaps in a set of regions.
def overlaps(set :scala.collection.mutable.Set[(Int,Int)], region : (Int, Int)) : Boolean = {
set.exists { case (begin, end) =>
region._1 > begin && region._1 < end ||
region._2 > begin && region._2 < end
}
}
def matchLength(m : Match) = m.end-m.start
val matchedRegions = HashSet[(Int,Int)]()
val willReplace = ArrayBuffer[(Match, Match => String)]()
//first figure out which regex will do replacements based on non-overlappingness.
//that is, no two regex can modify any one character. Proper partitions of the string.
for((regex, matcher) <- zomRegex) {
regex.findAllMatchIn(s).foreach{ match_ =>
if(!overlaps(matchedRegions, match_)) {
//matcherRegions is only important here to record which regions we've matched
//so we don't overlap
matchedRegions += match_
//I'll need to keep track of which matches will get used
willReplace.append((match_, matcher))
}
}
}
//lists wouldn't need to shift data for the next part
//arrays would have been bad because if a replacement is not
//the same size as the match it is replacing we would have to
//move elements in the array.
var strList = s.toList
//offset keeps track of the changes in the length of the final string because
//the willReplace match list knows the matche's original start and end points
//but if the length of the string changes from a replacement we need to
//update the offset so that the correct locations get patched.
var offset = 0
willReplace.foreach { case (match_, matcher) =>
strList = strList.patch(offset+match_.start, matcher(match_), matchLength(match_) )
offset += matcher(match_).length - matchLength(match_)
}
//finally.
strList.mkString
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment