Learning Scala : Boyer–Moore search
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
//http://blog.aunndroid.com/2011/11/learning-scala-boyermoore-search-1.html | |
object boyer_more { | |
def main(args : Array[String]) = { | |
if (args.length == 2 && args(0).length >= args(1).length) { | |
val result = boyer_more_search(args(0), args(1)) | |
println("Match found : " + result.length + | |
" Pos : " + result.mkString(" ")) | |
} else { | |
println | |
println("Usage: boyer_more string_body search_string") | |
val searchString = "EXAMPLE" | |
//val searchString="ANPANMAN" | |
val searchBody = "HERE IS A SIMPLE EXAMPLE" | |
println("e.g. : boyer_more \"" + searchBody + | |
"\" \"" + searchString + "\"") | |
val result = boyer_more_search(searchBody, searchString) | |
println("Match found : " + result.length + | |
" Pos : " + result.mkString(" ")) | |
} | |
} | |
def boyer_more_search(searchBody : String, searchString : String) : List[Int] = { | |
val table2 = createTable2(searchString) | |
println(table2) | |
val table1 = createTable1(searchString) | |
println(table1) | |
startSearch(searchBody, searchString, 0, table1, table2) | |
} | |
def startSearch(searchBody : String, searchString : String, | |
pos : Int, table1 : Map[Int, Int], | |
table2 : Map[Char, Int]) : List[Int] = { | |
def search(pos : Int, inc : Int = 0) : List[Int] = { | |
if (pos >= searchBody.length - 1) return List[Int]() | |
println(" " * pos + searchString) | |
println(searchBody) | |
println(" " * (pos + (searchString.length - 1 - inc)) + "-") | |
val jump = matchChar(pos + searchString.length - 1, inc) | |
if (jump > 0) return search(pos + jump, 0) | |
/* found a match character, check the next character */ | |
if (jump == 0) return search(pos, inc + 1) | |
/* found a search string, jump the whole search string's length */ | |
if (jump == -1) return pos :: search(pos + searchString.length, 0) | |
Nil | |
} | |
def matchChar(pos : Int, inc : Int) : Int = { | |
if (!searchBody.isDefinedAt(pos - inc)) return -2 | |
/* invalid char at the end of String, return search string length */ | |
if (!table2.isDefinedAt(searchBody(pos))) return searchString.length | |
if (searchBody(pos - inc) == searchString(searchString.length - 1 - inc)) { | |
if (searchString.length - 1 - (inc + 1) == -1) return -1 | |
return 0 | |
} | |
val result1 = table1.get(inc).get | |
/* invalid char inside the String, return table1 jump value */ | |
if (!table2.isDefinedAt(searchBody(pos - inc))) return result1 | |
val result2 = table2.get(searchBody(pos - inc)).get | |
if (result2 > result1) return result2 | |
result1 | |
} | |
search(pos) | |
} | |
import scala.collection.immutable._ | |
def createTable2(str : String) : Map[Char, Int] = { | |
/* reverse: to use foldLeft, | |
tail: the last(now head) charcter, which is not neede, is dropped */ | |
val result = str.reverse.tail.foldLeft(Map[Char,Int](),1) { | |
case ((mMap,pos),ch) if ! mMap.isDefinedAt(ch) => (mMap ++ Map(ch -> pos), pos+1) | |
case ((mMap,pos),ch) => (mMap,pos +1) | |
} | |
return result._1 | |
} | |
def createTable1(str : String) : Map[Int, Int] = { | |
val strOps = str.reverse | |
/* create a list of valid characters from the str */ | |
val strList = strOps.foldLeft(List[Char]()) { | |
case (l, ch) if !l.contains(ch) => ch :: l | |
case (l, ch) => l | |
} | |
val result = strOps.foldLeft(Map[Int, Int](), 0, "") { | |
case ((mMap, pos, subStr), ch) if pos != 0 => (mMap ++ Map(pos -> calculate(pos, subStr, `str`, | |
/* a valid characters list is reconstructed again without its actual character */ | |
`strList`.filter { | |
case x if x == ch => false | |
case _ => true | |
})), pos + 1, ch + subStr) | |
case ((mMap, pos, subStr), ch) => (Map(0 -> 1), pos + 1, ch + subStr) | |
} | |
result._1 | |
} | |
def calculate(pos : Int, subStr : String, | |
str : String, strList : List[Char]=Nil) : Int = { | |
/* strList will be empty when the recursive call to this function | |
for backtracking subStr is invoked*/ | |
if (strList.isEmpty) { | |
val jListMin = subSetJump(subStr, str) | |
/* the current subStr is not a valid sub string, | |
backtrack 1 and calculate to find again*/ | |
if (jListMin < 0) | |
return calculate(pos - 1, subStr.tail,str) | |
/* the current subStr is a valid sub String | |
but the jump is 0 and no moving. backtrack 1 and calculate */ | |
if (str.length - 1 - pos - jListMin == 0) | |
return calculate(pos - 1, subStr.tail,str) | |
/* difference of current pos(counting from front) | |
with the position first of the first valid subset */ | |
return str.length - 1 - pos - jListMin | |
} | |
/* get the positions of all valid sub strings | |
constructed with valid characters */ | |
val jList = strList.foldLeft(List[Int]()) { | |
(j, ch) => subSetJump(ch + `subStr`, `str`) :: j | |
} | |
/* jList.max is less than 0, the rest will be the same */ | |
if (jList.max < 0) { | |
if (subStr.length == 1) return str.length | |
return calculate(pos - 1, subStr,str) | |
} | |
/* we will use the minimum positive value including 0*/ | |
val jListmin=(jList.filter{ | |
case x if x >= 0 => true | |
case x => false}).min | |
if (str.length - 1 - pos - jListmin == 0) | |
return calculate(pos - 1, subStr.tail,str) | |
return str.length - 1 - pos - jListmin | |
} | |
def subSetJump(subStr : String, str : String) : Int = { | |
val list = (for (i <- 0 until str.length) | |
yield str.slice(i, i + subStr.length)).toList | |
list.indexOf(subStr) | |
} | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment