Skip to content

Instantly share code, notes, and snippets.

@mukulrawat1986
Created September 30, 2015 11:20
Show Gist options
  • Save mukulrawat1986/d667f09b681befbed193 to your computer and use it in GitHub Desktop.
Save mukulrawat1986/d667f09b681befbed193 to your computer and use it in GitHub Desktop.
package main
import (
"bufio"
"fmt"
"log"
"os"
)
// Define a type to implement a bktree node
// The bktreeNode holds two values:
// str of type string to hold a word from the dictionary
// child is a map of levenshtein distance to a node
type bktreeNode struct {
str string
child map[int]*bktreeNode
}
// Define a type to implement a Bktree
// The Bktree structure holds a root which is a pointer to
// the bktreeNode type
type Bktree struct {
root *bktreeNode
}
// The function find Levenshtein distance between two strings
// It takes as argument two strings and returns an integer value
// which is the levenshtein distance between the two strings
func levenshtein(a, b string) int {
// Find the length of the strings
la, lb := len(a), len(b)
// Create array to store distance
col := make([]int, la+1)
var lastdiag, olddiag, i, j int
var temp1, temp2, temp3 int
for i = 1; i <= la; i++ {
col[i] = i
}
for i = 1; i <= lb; i++ {
col[0] = i
for j, lastdiag = 1, i-1; j <= la; j++ {
olddiag = col[j]
temp1 = col[j] + 1
temp2 = col[j-1] + 1
if a[j-1] == b[i-1] {
temp3 = lastdiag + 0
} else {
temp3 = lastdiag + 1
}
col[j] = min(temp1, temp2, temp3)
lastdiag = olddiag
}
}
return col[la]
}
// A function to find the minimum of three integers
// Returns the minimum of three integers
func min(a, b, c int) int {
if a < b && a < c {
return a
} else if b < a && b < c {
return b
} else {
return c
}
}
// Function call to create a new Bktree
// It returns a pointer to a new Bktree structure
func NewTree() *Bktree {
return &Bktree{
root: nil,
}
}
// Function call to create a new bktreeNode
// It returns a pointer to a new bktreeNode struct
func newbktreeNode(s string) *bktreeNode {
return &bktreeNode{
str: s,
child: make(map[int]*bktreeNode),
}
}
// Given a string and a node, insert the string
func (b *Bktree) insert(root *bktreeNode, s string) {
d := levenshtein(root.str, s)
if d == 0 {
return
}
for distance, node := range root.child {
if d == distance {
b.insert(node, s)
return
}
}
// if distance is new
ch := newbktreeNode(s)
root.child[d] = ch
}
// Insert a node in a Bktree
// It inserts the string at the right place in Bktree struct
func (b *Bktree) Insert(s string) {
if b.root == nil {
b.root = newbktreeNode(s)
return
} else {
b.insert(b.root, s)
return
}
}
// Internal function to traverse the tree
func traverse(node *bktreeNode) {
if node == nil {
fmt.Printf("Empty \n")
return
}
fmt.Printf("Parent: %s\n", node.str)
for distance, node := range node.child {
fmt.Printf("%d %s\n", distance, node.str)
traverse(node)
}
fmt.Printf("Returning %s\n", node.str)
}
// Internal function to traverse the tree, to use it
// change name of function to Traverse()
func (b *Bktree) traverse1() {
traverse(b.root)
}
// Private function to find a string
func find(node *bktreeNode, s string, l int) (res []string) {
d := levenshtein(node.str, s)
min_d := d - l
max_d := d + l
if d <= l {
res = append(res, node.str)
}
for dis, n := range node.child {
if dis >= min_d && dis <= max_d {
res = append(res, find(n, s, l)...)
}
}
return
}
// Function to find all strings within the tolerance levenshtein distance
func (b *Bktree) Find(s string, l int) []string {
return find(b.root, s, l)
}
// Our main function where we load a dictionary in our bktree and then for a
// given word and a particular tolerance, it will create a list of words which
// are close to it in spelling. Bktree are used to create spell checkers.
func main() {
// Create a new bktree
bk := NewTree()
// open the dictionary file
f, err := os.Open("/usr/share/dict/words")
if err != nil {
log.Println("Problem opening /usr/share/dict/words")
os.Exit(1)
}
defer f.Close()
fmt.Printf("Starting scanning\n")
scan := bufio.NewScanner(f)
for scan.Scan() {
l := scan.Text()
fmt.Printf("%s\n", l)
if l != "" {
bk.Insert(l)
}
}
log.Println("Finished loading in our bktree")
res := bk.Find("caqe", 1)
fmt.Printf("Possible correct words for the misspelled words are : %v\n", res)
res = bk.Find("cops", 2)
fmt.Printf("Possible correct words for the misspelled words are : %v\n", res)
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment