|
#!/usr/bin/env python |
|
|
|
import json |
|
import os.path |
|
import optparse |
|
|
|
GAP = 1 |
|
MATCH = 2 |
|
MISMATCH = 3 |
|
|
|
START = 1 |
|
TOP = 2 |
|
LEFT = 3 |
|
TOP_LEFT = 4 |
|
|
|
parser = optparse.OptionParser() |
|
parser.add_option("-a", "--alpha", dest="alpha", type="string", help="the first string, e.g. -a ATA") |
|
parser.add_option("-b", "--beta", dest="beta", type="string", help="the second string, e.g. -b GATC") |
|
parser.add_option("-M", "--match", dest="match", type="int", default=1, help="the match score, e.g. -M 1") |
|
parser.add_option("-m", "--mismatch", dest="mismatch", type="int", default=-2, help="the mismatch score, e.g. -m 2") |
|
parser.add_option("-G", "--gapExtension", dest="gapExtension", type="int", default=-1, help="the gap extension score, e.g. -g -1") |
|
parser.add_option("-g", "--gapInitiation", dest="gapInitiation", type="int", default=-3, help="the gap initiation score, e.g. -g -3") |
|
parser.add_option("-d", "--debug", dest="debug", action="store_true", default=False, help="enable debug prints and disable json result") |
|
parser.add_option("-x", "--substitutionMatrix", dest="substitutionMatrix", help="the json with a matrix of substitutions scores", metavar="FILE") |
|
(options, args) = parser.parse_args() |
|
|
|
if options.alpha is None or options.beta is None or len(options.alpha) < 1 or len(options.beta) < 1: |
|
parser.error("You need to informe the alpha and beta strings") |
|
|
|
alphaLen = len(options.alpha) + 1 |
|
betaLen = len(options.beta) + 1 |
|
table = [[0 for b in range(betaLen)] for a in range(alphaLen)] |
|
path = [[0 for b in range(betaLen)] for a in range(alphaLen)] |
|
|
|
substitutionMatrix = {} |
|
if options.substitutionMatrix is not None: |
|
if os.path.isfile(options.substitutionMatrix): |
|
substitutionMatrix = json.loads("".join([line for line in open(options.substitutionMatrix,"r")]).replace("\n","").replace(" ","")) |
|
else: |
|
parser.error('Substitution Matrix not found') |
|
|
|
for a in range(alphaLen): |
|
top = None |
|
left = None |
|
topLeft = None |
|
hasLeft = None |
|
hasTop = a > 0 |
|
alpha = options.alpha[a - 1] |
|
for b in range(betaLen): |
|
hasLeft = b > 0 |
|
if hasTop and hasLeft: |
|
topLeft = table[a - 1][b - 1] |
|
if (options.alpha[a - 1] in substitutionMatrix and options.beta[b - 1] in substitutionMatrix): |
|
topLeft = topLeft + substitutionMatrix[options.alpha[a - 1]][options.beta[b - 1]] |
|
if options.alpha[a - 1] == options.beta[b - 1]: |
|
topLeft = topLeft + options.match |
|
else: |
|
topLeft = topLeft + options.mismatch |
|
else: |
|
topLeft = None |
|
if hasTop: |
|
top = table[a - 1][b] |
|
if (hasLeft and path[a - 1][b] == TOP_LEFT) or (a == 1): |
|
top = top + options.gapInitiation |
|
else: |
|
top = top + options.gapExtension |
|
else: |
|
top = None |
|
if hasLeft: |
|
left = table[a][b - 1] |
|
if (hasTop and path[a][b - 1] == TOP_LEFT) or (b == 1): |
|
left = left + options.gapInitiation |
|
else: |
|
left = left + options.gapExtension |
|
else: |
|
left = None |
|
if (hasLeft and hasTop and topLeft >= top and topLeft >= left): |
|
table[a][b] = topLeft |
|
path[a][b] = TOP_LEFT |
|
elif (hasTop and top >= topLeft and top >= left): |
|
table[a][b] = top |
|
path[a][b] = TOP |
|
elif (hasLeft and left >= topLeft and left >= top): |
|
table[a][b] = left |
|
path[a][b] = LEFT |
|
else: |
|
table[a][b] = 0 |
|
path[a][b] = START |
|
|
|
a = alphaLen - 1 |
|
b = betaLen - 1 |
|
result = [] |
|
merge = [] |
|
while path[a][b] != START: |
|
if path[a][b] == TOP_LEFT: |
|
result.append([options.alpha[a - 1], options.beta[b - 1]]) |
|
a = a - 1; |
|
b = b - 1; |
|
merge.append(MATCH if options.alpha[a] == options.beta[b] else MISMATCH) |
|
elif path[a][b] == TOP: |
|
result.append([options.alpha[a - 1], '_']) |
|
a = a - 1; |
|
merge.append(GAP) |
|
elif path[a][b] == LEFT: |
|
result.append(['_', options.beta[b - 1]]) |
|
b = b - 1; |
|
merge.append(GAP) |
|
|
|
alphaResult = "" |
|
betaResult = "" |
|
result.reverse() |
|
merge.reverse() |
|
for r in result: |
|
alphaResult = alphaResult + str(r[0]) |
|
betaResult = betaResult + str(r[1]) |
|
|
|
score = table[alphaLen - 1][betaLen - 1] |
|
if options.debug: |
|
print ("") |
|
print ("Options:") |
|
print (options) |
|
|
|
print ("") |
|
print ("Substitution Matrix:") |
|
print (substitutionMatrix) |
|
|
|
print ("") |
|
print ("Result table") |
|
biggestDigits = 0 |
|
for i in range(len(table)): |
|
for j in range(len(table[i])): |
|
biggestDigits = max(biggestDigits, len(str(table[i][j]))) |
|
def printSeparator(): |
|
print ("+---" + (("+----" + ("-" * biggestDigits)) * len(table[0])) + "+") |
|
printSeparator() |
|
line = "| |" |
|
base = " " * (biggestDigits + 2) |
|
for b in " " + options.beta: |
|
line = line + base + b + " |" |
|
print (line) |
|
printSeparator() |
|
def charForPath(item): |
|
if item == TOP_LEFT: |
|
return '\xe2\x86\x96' |
|
elif item == TOP: |
|
return '\xe2\x86\x91' |
|
elif item == LEFT: |
|
return '\xe2\x86\x90' |
|
else: |
|
return '\xe2\x96\xb6' |
|
base = " {:<1} {:>" + str(biggestDigits) + "} |" |
|
for i in range(len(table)): |
|
line = "| " + (" " if i == 0 else options.alpha[i - 1]) + " |" |
|
for j in range(len(table[i])): |
|
line = line + base.format(charForPath(path[i][j]), table[i][j]) |
|
print line |
|
printSeparator() |
|
|
|
print ("") |
|
print ("Strings comparations") |
|
print (alphaResult) |
|
print (betaResult) |
|
mergeString = "" |
|
for m in range(len(merge)): |
|
if merge[m] == GAP: |
|
mergeString = mergeString + '_' |
|
elif merge[m] == MISMATCH: |
|
mergeString = mergeString + '*' |
|
else: |
|
mergeString = mergeString + result[m][0] |
|
print (mergeString) |
|
|
|
print ("") |
|
print ("Score: " + str(score)) |
|
print ("") |
|
else: |
|
result = { \ |
|
"alphaResult" : alphaResult,\ |
|
"betaResult" : betaResult,\ |
|
"path" : path,\ |
|
"table" : table,\ |
|
"merge" : merge,\ |
|
"score" : score } |
|
print json.dumps(result, separators=(',',':')) |