Skip to content

Instantly share code, notes, and snippets.

@rsirres
Created November 23, 2018 21:31
Show Gist options
  • Save rsirres/7ce41ff2aa926935f2844279b759cbf6 to your computer and use it in GitHub Desktop.
Save rsirres/7ce41ff2aa926935f2844279b759cbf6 to your computer and use it in GitHub Desktop.
Implementation of levensthein automata in nimlang
# http://julesjacobs.github.io/2015/06/17/disqus-levenshtein-simple-and-fast.html
import sugar
import sequtils
import tables
import strutils
import hashes
type
SparseLevenshteinAutomaton = object
max_edits: int
s: string
State = tuple[indices: seq[int], values: seq[int]]
proc hash(x: State): Hash =
## Piggyback on the already available string hash proc.
##
## Without this proc nothing works!
result = x.indices.hash !& x.values.hash
result = !$result
proc start(self: SparseLevenshteinAutomaton):State =
return (newSeq[int](self.max_edits), newSeq[int](self.max_edits))
proc step(self: SparseLevenshteinAutomaton, n: State, c:char): State =
var new_indices: seq[int]
var new_values: seq[int]
if len(n.indices) != 0 and n.indices[0] == 0 and n.values[0] < self.max_edits:
new_indices = @[0]
new_values = @[n.values[0] + 1]
else:
new_indices = @[]
new_values = @[]
for j, i in n.indices:
if i == len(self.s): break
var cost = if self.s[i] == c: 0 else: 1
var val = n.values[j] + cost
if len(new_indices) != 0 and new_indices[^1] == i:
val = min(val, new_values[^1] + 1)
if j+1 < len(n.indices) and n.indices[j+1] == i+1:
val = min(val, n.values[j+1] + 1)
if val <= self.max_edits:
new_indices.add(i+1)
new_values.add(val)
return (new_indices, new_values)
proc is_match(self: SparseLevenshteinAutomaton, n: State): bool =
return len(n.indices) != 0 and n.indices[^1] == len(self.s)
proc can_match(self: SparseLevenshteinAutomaton, n: tuple[indices: seq[int], values: seq[int]]):bool =
return len(n.indices) != 0
proc transitions(self: SparseLevenshteinAutomaton, n: tuple[indices: seq[int], values: seq[int]]): seq[char] =
return deduplicate( lc[self.s[i] | (i <- n.indices, i < len(self.s)), char] )
var counter = @[0] # list is a hack for mutable lexical scoping
var states = initTable[State, int]()
var trans:seq[(int, int, char)] = @[]
var matching: seq[int] = @[]
let lev = SparseLevenshteinAutomaton(s:"woof", max_edits:2)
# proc explore(state: State) : int =
# let key = state # lists can't be hashed in Python so convert to a tuple
# if key in states: return states[key]
# var i = counter[0]
# counter[0] += 1
# states[key] = i
# if lev.is_match(state): matching.add(i)
# for c in concat(lev.transitions(state), @['*']):
# var newstate = lev.step(state, c)
# var j = explore(newstate)
# trans.add((i, j, c))
# return i
# var i = explore(lev.start())
var state = lev.start()
let s = "woofies"
for c in s:
state = lev.step(state, c)
echo lev.can_match(state) # True, "w" can match "bannana" with distance 1
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment