Last active
January 4, 2016 16:19
-
-
Save vivekn/8646332 to your computer and use it in GitHub Desktop.
BKTree
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
package com.vivekn.bktree | |
import scala.collection.mutable.ArrayBuffer | |
import scala.collection.mutable.Map | |
class BKTreeNode(str: String) { | |
var children: Map[Int, BKTreeNode] = Map[Int, BKTreeNode]() | |
def add(word: String) : Unit = { | |
val dist = WordUtils.levenshtein(str, word) | |
if (children.contains(dist)) children(dist) add (word) | |
else children(dist) = new BKTreeNode(word) | |
} | |
def find(word: String, tolerance: Int, results: ArrayBuffer[String] = ArrayBuffer[String]()) : ArrayBuffer[String] = { | |
val dist = WordUtils.levenshtein(word, str) | |
if (dist <= tolerance) results += str | |
for(i <- (dist-tolerance) to (dist+tolerance) if children.contains(i)) | |
children(i).find(word, tolerance, results) | |
results | |
} | |
val += : (String) => Unit = add | |
} | |
object WordUtils { | |
def levenshtein(word1: String, word2: String): Int = { | |
var arr = Array.fill(word1.size+1, word2.size+1)(0) | |
for(i <- 0 to word2.size) arr(0)(i) = i | |
for(i <- 0 to word1.size) arr(i)(0) = i | |
for(i <- 1 to word1.size; j <- 1 to word2.size) | |
arr(i)(j) = Array(arr(i-1)(j) + 1, arr(i)(j-1) + 1, arr(i-1)(j-1) + | |
(if (word1(i-1) == word2(j-1)) 0 else 1)).min | |
arr(word1.size)(word2.size) | |
} | |
def buildTree(array: Traversable[String]): BKTreeNode = { | |
var root = new BKTreeNode(array head) | |
for(str <- array tail) root += str | |
root | |
} | |
def main(args: Array[String]): Unit = { | |
var tree = buildTree(Array("book", "books", "boo", "cake", "cart", "cape")) | |
println(tree.find("caqe", 3)) | |
} | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment