Created
September 30, 2015 11:20
-
-
Save mukulrawat1986/d667f09b681befbed193 to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
package main | |
import ( | |
"bufio" | |
"fmt" | |
"log" | |
"os" | |
) | |
// Define a type to implement a bktree node | |
// The bktreeNode holds two values: | |
// str of type string to hold a word from the dictionary | |
// child is a map of levenshtein distance to a node | |
type bktreeNode struct { | |
str string | |
child map[int]*bktreeNode | |
} | |
// Define a type to implement a Bktree | |
// The Bktree structure holds a root which is a pointer to | |
// the bktreeNode type | |
type Bktree struct { | |
root *bktreeNode | |
} | |
// The function find Levenshtein distance between two strings | |
// It takes as argument two strings and returns an integer value | |
// which is the levenshtein distance between the two strings | |
func levenshtein(a, b string) int { | |
// Find the length of the strings | |
la, lb := len(a), len(b) | |
// Create array to store distance | |
col := make([]int, la+1) | |
var lastdiag, olddiag, i, j int | |
var temp1, temp2, temp3 int | |
for i = 1; i <= la; i++ { | |
col[i] = i | |
} | |
for i = 1; i <= lb; i++ { | |
col[0] = i | |
for j, lastdiag = 1, i-1; j <= la; j++ { | |
olddiag = col[j] | |
temp1 = col[j] + 1 | |
temp2 = col[j-1] + 1 | |
if a[j-1] == b[i-1] { | |
temp3 = lastdiag + 0 | |
} else { | |
temp3 = lastdiag + 1 | |
} | |
col[j] = min(temp1, temp2, temp3) | |
lastdiag = olddiag | |
} | |
} | |
return col[la] | |
} | |
// A function to find the minimum of three integers | |
// Returns the minimum of three integers | |
func min(a, b, c int) int { | |
if a < b && a < c { | |
return a | |
} else if b < a && b < c { | |
return b | |
} else { | |
return c | |
} | |
} | |
// Function call to create a new Bktree | |
// It returns a pointer to a new Bktree structure | |
func NewTree() *Bktree { | |
return &Bktree{ | |
root: nil, | |
} | |
} | |
// Function call to create a new bktreeNode | |
// It returns a pointer to a new bktreeNode struct | |
func newbktreeNode(s string) *bktreeNode { | |
return &bktreeNode{ | |
str: s, | |
child: make(map[int]*bktreeNode), | |
} | |
} | |
// Given a string and a node, insert the string | |
func (b *Bktree) insert(root *bktreeNode, s string) { | |
d := levenshtein(root.str, s) | |
if d == 0 { | |
return | |
} | |
for distance, node := range root.child { | |
if d == distance { | |
b.insert(node, s) | |
return | |
} | |
} | |
// if distance is new | |
ch := newbktreeNode(s) | |
root.child[d] = ch | |
} | |
// Insert a node in a Bktree | |
// It inserts the string at the right place in Bktree struct | |
func (b *Bktree) Insert(s string) { | |
if b.root == nil { | |
b.root = newbktreeNode(s) | |
return | |
} else { | |
b.insert(b.root, s) | |
return | |
} | |
} | |
// Internal function to traverse the tree | |
func traverse(node *bktreeNode) { | |
if node == nil { | |
fmt.Printf("Empty \n") | |
return | |
} | |
fmt.Printf("Parent: %s\n", node.str) | |
for distance, node := range node.child { | |
fmt.Printf("%d %s\n", distance, node.str) | |
traverse(node) | |
} | |
fmt.Printf("Returning %s\n", node.str) | |
} | |
// Internal function to traverse the tree, to use it | |
// change name of function to Traverse() | |
func (b *Bktree) traverse1() { | |
traverse(b.root) | |
} | |
// Private function to find a string | |
func find(node *bktreeNode, s string, l int) (res []string) { | |
d := levenshtein(node.str, s) | |
min_d := d - l | |
max_d := d + l | |
if d <= l { | |
res = append(res, node.str) | |
} | |
for dis, n := range node.child { | |
if dis >= min_d && dis <= max_d { | |
res = append(res, find(n, s, l)...) | |
} | |
} | |
return | |
} | |
// Function to find all strings within the tolerance levenshtein distance | |
func (b *Bktree) Find(s string, l int) []string { | |
return find(b.root, s, l) | |
} | |
// Our main function where we load a dictionary in our bktree and then for a | |
// given word and a particular tolerance, it will create a list of words which | |
// are close to it in spelling. Bktree are used to create spell checkers. | |
func main() { | |
// Create a new bktree | |
bk := NewTree() | |
// open the dictionary file | |
f, err := os.Open("/usr/share/dict/words") | |
if err != nil { | |
log.Println("Problem opening /usr/share/dict/words") | |
os.Exit(1) | |
} | |
defer f.Close() | |
fmt.Printf("Starting scanning\n") | |
scan := bufio.NewScanner(f) | |
for scan.Scan() { | |
l := scan.Text() | |
fmt.Printf("%s\n", l) | |
if l != "" { | |
bk.Insert(l) | |
} | |
} | |
log.Println("Finished loading in our bktree") | |
res := bk.Find("caqe", 1) | |
fmt.Printf("Possible correct words for the misspelled words are : %v\n", res) | |
res = bk.Find("cops", 2) | |
fmt.Printf("Possible correct words for the misspelled words are : %v\n", res) | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment