Created
June 22, 2011 20:00
-
-
Save leegao/1040989 to your computer and use it in GitHub Desktop.
NW Align
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
-- Tweak Me for a better fit. | |
penalty = {match = 1, nomatch = -1, gap = -1 } | |
hrz = 1; vrt = 2; mch = 0 | |
function match(a,b) return a==b and penalty.match or penalty.nomatch end | |
function select(o, top, left) | |
if o>=top and o>=left then | |
return {mch, o} | |
elseif top >= left then | |
return {vrt, top } | |
else | |
return {hrz, left} | |
end | |
end | |
function nw_matrix(A, B) | |
local a,b = #A, #B | |
local matrix = {} | |
for i=1,a+1 do | |
matrix[i] = {} | |
for j=1,b+1 do | |
local cur = {} | |
if i == 1 then | |
cur = {hrz, (j-1)*penalty.nomatch} | |
elseif j == 1 then | |
cur = {vrt, (i-1)*penalty.nomatch} | |
else | |
cur = {0, 0} | |
end | |
matrix[i][j] = cur | |
end | |
end | |
for i=2,a+1 do | |
for j=2,b+1 do | |
local m, v, h = matrix[i-1][j-1][2] + match(A:byte(i-1), B:byte(j-1)), | |
matrix[i-1][j][2] + penalty.gap, | |
matrix[i][j-1][2] + penalty.gap | |
matrix[i][j] = select(m,v,h) | |
end | |
end | |
return matrix | |
end | |
function numtrace(matrix, A, B) | |
local i, j, n = #A, #B, 0 | |
while i > 1 and j > 1 do | |
local dir = matrix[i][j][1] | |
n = n+1 | |
if dir == 0 then | |
i=i-1;j=j-1 | |
elseif dir == 1 then | |
j = j-1 | |
else | |
i = i-1 | |
end | |
end | |
return n + (i>j and i or j) | |
end | |
function trace_align(matrix, A, B) | |
local sequences, n = {{}, {}}, numtrace(matrix, A, B) | |
local i, j, mutations = #A+1, #B+1, 0 | |
while i > 1 or j > 1 do | |
local dir = matrix[i][j][1] | |
if dir == 0 then | |
sequences[1][n] = A:byte(i-1) | |
sequences[2][n] = B:byte(j-1) | |
if A[i] ~= B[j] then mutations = mutations + 1 end | |
i = i-1; j = j-1; | |
elseif dir == 1 then | |
sequences[1][n] = 45 | |
sequences[2][n] = B:byte(j-1) | |
j = j-1 | |
mutations = mutations + 1 | |
else | |
sequences[1][n] = A:byte(i-1) | |
sequences[2][n] = 45 | |
i = i-1 | |
mutations = mutations + 1 | |
end | |
n = n - 1; | |
end | |
local function reduce(chars) | |
local s = "" | |
for _,c in ipairs(chars) do | |
s = s .. string.char(c) | |
end | |
return s | |
end | |
return {reduce(sequences[1]), reduce(sequences[2])} | |
end | |
function align(A, B) | |
local mat = nw_matrix(A,B) | |
return unpack(trace_align(mat, A, B)) | |
end | |
A, B = align("I am the very model of a modern Major-General,", "I am the very model of a cartoon individual,") | |
print(A) | |
print(B) | |
A, B = align("I've information vegetable, animal, and mineral,", "My animation's comical, unusual, and whimsical,") | |
print(A) | |
print(B) | |
--[[ Output | |
I am the very model of a -modern Major-General, | |
I am the very model of a cartoon ---individual, | |
I've information-- vegetable, an-imal, and -mi-neral, | |
--My an--imation's comica-l-, unusual, and whimsical, | |
]] |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment