Create a gist now

Instantly share code, notes, and snippets.

What would you like to do?
Learning Scala : Boyer–Moore search
//http://blog.aunndroid.com/2011/11/learning-scala-boyermoore-search-1.html
object boyer_more {
def main(args : Array[String]) = {
if (args.length == 2 && args(0).length >= args(1).length) {
val result = boyer_more_search(args(0), args(1))
println("Match found : " + result.length +
" Pos : " + result.mkString(" "))
} else {
println
println("Usage: boyer_more string_body search_string")
val searchString = "EXAMPLE"
//val searchString="ANPANMAN"
val searchBody = "HERE IS A SIMPLE EXAMPLE"
println("e.g. : boyer_more \"" + searchBody +
"\" \"" + searchString + "\"")
val result = boyer_more_search(searchBody, searchString)
println("Match found : " + result.length +
" Pos : " + result.mkString(" "))
}
}
def boyer_more_search(searchBody : String, searchString : String) : List[Int] = {
val table2 = createTable2(searchString)
println(table2)
val table1 = createTable1(searchString)
println(table1)
startSearch(searchBody, searchString, 0, table1, table2)
}
def startSearch(searchBody : String, searchString : String,
pos : Int, table1 : Map[Int, Int],
table2 : Map[Char, Int]) : List[Int] = {
def search(pos : Int, inc : Int = 0) : List[Int] = {
if (pos >= searchBody.length - 1) return List[Int]()
println(" " * pos + searchString)
println(searchBody)
println(" " * (pos + (searchString.length - 1 - inc)) + "-")
val jump = matchChar(pos + searchString.length - 1, inc)
if (jump > 0) return search(pos + jump, 0)
/* found a match character, check the next character */
if (jump == 0) return search(pos, inc + 1)
/* found a search string, jump the whole search string's length */
if (jump == -1) return pos :: search(pos + searchString.length, 0)
Nil
}
def matchChar(pos : Int, inc : Int) : Int = {
if (!searchBody.isDefinedAt(pos - inc)) return -2
/* invalid char at the end of String, return search string length */
if (!table2.isDefinedAt(searchBody(pos))) return searchString.length
if (searchBody(pos - inc) == searchString(searchString.length - 1 - inc)) {
if (searchString.length - 1 - (inc + 1) == -1) return -1
return 0
}
val result1 = table1.get(inc).get
/* invalid char inside the String, return table1 jump value */
if (!table2.isDefinedAt(searchBody(pos - inc))) return result1
val result2 = table2.get(searchBody(pos - inc)).get
if (result2 > result1) return result2
result1
}
search(pos)
}
import scala.collection.immutable._
def createTable2(str : String) : Map[Char, Int] = {
/* reverse: to use foldLeft,
tail: the last(now head) charcter, which is not neede, is dropped */
val result = str.reverse.tail.foldLeft(Map[Char,Int](),1) {
case ((mMap,pos),ch) if ! mMap.isDefinedAt(ch) => (mMap ++ Map(ch -> pos), pos+1)
case ((mMap,pos),ch) => (mMap,pos +1)
}
return result._1
}
def createTable1(str : String) : Map[Int, Int] = {
val strOps = str.reverse
/* create a list of valid characters from the str */
val strList = strOps.foldLeft(List[Char]()) {
case (l, ch) if !l.contains(ch) => ch :: l
case (l, ch) => l
}
val result = strOps.foldLeft(Map[Int, Int](), 0, "") {
case ((mMap, pos, subStr), ch) if pos != 0 => (mMap ++ Map(pos -> calculate(pos, subStr, `str`,
/* a valid characters list is reconstructed again without its actual character */
`strList`.filter {
case x if x == ch => false
case _ => true
})), pos + 1, ch + subStr)
case ((mMap, pos, subStr), ch) => (Map(0 -> 1), pos + 1, ch + subStr)
}
result._1
}
def calculate(pos : Int, subStr : String,
str : String, strList : List[Char]=Nil) : Int = {
/* strList will be empty when the recursive call to this function
for backtracking subStr is invoked*/
if (strList.isEmpty) {
val jListMin = subSetJump(subStr, str)
/* the current subStr is not a valid sub string,
backtrack 1 and calculate to find again*/
if (jListMin < 0)
return calculate(pos - 1, subStr.tail,str)
/* the current subStr is a valid sub String
but the jump is 0 and no moving. backtrack 1 and calculate */
if (str.length - 1 - pos - jListMin == 0)
return calculate(pos - 1, subStr.tail,str)
/* difference of current pos(counting from front)
with the position first of the first valid subset */
return str.length - 1 - pos - jListMin
}
/* get the positions of all valid sub strings
constructed with valid characters */
val jList = strList.foldLeft(List[Int]()) {
(j, ch) => subSetJump(ch + `subStr`, `str`) :: j
}
/* jList.max is less than 0, the rest will be the same */
if (jList.max < 0) {
if (subStr.length == 1) return str.length
return calculate(pos - 1, subStr,str)
}
/* we will use the minimum positive value including 0*/
val jListmin=(jList.filter{
case x if x >= 0 => true
case x => false}).min
if (str.length - 1 - pos - jListmin == 0)
return calculate(pos - 1, subStr.tail,str)
return str.length - 1 - pos - jListmin
}
def subSetJump(subStr : String, str : String) : Int = {
val list = (for (i <- 0 until str.length)
yield str.slice(i, i + subStr.length)).toList
list.indexOf(subStr)
}
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment