Skip to content

Instantly share code, notes, and snippets.

@DrustZ
Last active July 3, 2018 02:49
Show Gist options
  • Save DrustZ/dfd0b4a189913ac2c6464fead64af962 to your computer and use it in GitHub Desktop.
Save DrustZ/dfd0b4a189913ac2c6464fead64af962 to your computer and use it in GitHub Desktop.
Wagner-Fischer Levenshtein distance, now with a means to generate all possible optimal alignments. (js version)
//A library adopted from Python version Wagner & Fischer algorithm:
//url: https://gist.github.com/kylebgorman/8034009
//for more information, please go to the original library
function INSERTION(A, cost=1){
return cost;
}
function DELETION(A, cost=1){
return cost;
}
function SUBSTITUTION(A, B, cost=1) {
return cost;
}
function makeTrace(costv, opsv) {
return {cost: costv,
ops: opsv}
}
class WagnerFischer {
constructor(A, B, insertion=INSERTION, deletion=DELETION, substitution=SUBSTITUTION){
// Stores cost functions in a dictionary for programmatic access.
this.costs = {"I": insertion, "D": deletion, "S": substitution}
this.asz = A.length
this.bsz = B.length
this.create2DArray()
// Fills in edges.
this.table[0][0] = makeTrace(0, ["O"]) // Start cell.
for (let i = 1; i < this.asz+1; ++i){
this.table[i][0] = makeTrace(this.table[i - 1][0].cost + this.costs["D"](A[i - 1]),
["D"])
}
for (let j = 1; j < this.bsz+1; ++j){
this.table[0][j] = makeTrace(this.table[0][j - 1].cost + this.costs["I"](B[j - 1]),
["I"])
}
// Fills in rest.
for (let i = 0; i < this.asz; ++i){
for (let j = 0; j < this.bsz; ++j){
// Cleans it up in case there are more than one check for match
// first, as it is always the cheapest option.
if (A[i] == B[j]){
this.table[i + 1][j + 1] = makeTrace(this.table[i][j].cost, ["M"])
}
// Checks for other types.
else{
let costD = this.table[i][j + 1].cost + this.costs["D"](A[i])
let costI = this.table[i + 1][j].cost + this.costs["I"](B[j])
let costS = this.table[i][j].cost + this.costs["S"](A[i], B[j])
let min_val = Math.min(costI, costD, costS)
let trace = makeTrace(min_val, [])
// Adds _all_ operations matching minimum value.
if (costD == min_val)
trace.ops.push("D")
if (costI == min_val)
trace.ops.push("I")
if (costS == min_val)
trace.ops.push("S")
this.table[i + 1][j + 1] = trace
}
}
}
// Stores optimum cost as a property.
this.cost = this.table[this.asz][this.bsz].cost
//cell operation iterator
this.stepback = function(i, j, trace, path_back){
/*
Given a cell location (i, j) and a Trace object trace, generate
all traces they point back to in the table
*/
let res = []
for (let op of trace.ops){
if (op === "M")
res.push([i - 1, j - 1, this.table[i - 1][j - 1], path_back.concat(["M"])])
else if (op === "I")
res.push([i, j - 1, this.table[i][j - 1], path_back.concat(["I"])])
else if (op === "D")
res.push([i - 1, j, this.table[i - 1][j], path_back.concat(["D"])])
else if (op === "S")
res.push([i - 1, j - 1, this.table[i - 1][j - 1], path_back.concat(["S"])])
else if (op === "O") // finished!
res.push([])
}
return res
}
//alignment iterator
this.alignments = function*(){
/*
Generate all alignments with optimal-cost via breadth-first
traversal of the graph of all optimal-cost (reverse) paths
implicit in the dynamic programming table
Each cell of the queue is a tuple of (i, j, trace, path_back)
where i, j is the current index, trace is the trace object at
this cell, and path_back is a reversed list of edit operations
which is initialized as an empty list.
*/
let queue = this.stepback(this.asz, this.bsz, this.table[this.asz][this.bsz], [])
while (queue.length > 0){
let step = queue.shift();
if (step.length < 1)
continue
let i = step[0];
let j = step[1];
let trace = step[2];
let path_back = step[3];
if (trace.ops[0] === "O"){
// We have reached the origin, the end of a reverse path, so
// yield the list of edit operations in reverse.
yield path_back.reverse()
continue
}
queue.push(...this.stepback(i, j, trace, path_back))
}
}
}
IDS() {
/*
Estimates insertions, deletions, and substitution _count_ (not
costs). Non-integer values arise when there are multiple possible
alignments with the same cost.
*/
let npaths = 0
let opcounts = {M:0,I:0,D:0,S:0}
let alignments = this.alignments()
for (let alignment of alignments){
// Counts edit types for this path, ignoring "M" (which is free).
opcounts.M += alignment.filter(function(val) { return val === "M";}).length;
opcounts.I += alignment.filter(function(val) { return val === "I";}).length;
opcounts.D += alignment.filter(function(val) { return val === "D";}).length;
opcounts.S += alignment.filter(function(val) { return val === "S";}).length;
npaths += 1
}
for (let key in opcounts){
opcounts[key] /= npaths
}
// Averages over all paths.
return opcounts;
}
create2DArray() {
this.table = []
for (let i = 0; i < this.asz+1; ++i){
this.table.push(new Array(this.bsz+1))
}
}
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment