Skip to content

Instantly share code, notes, and snippets.

@sinnamone
Last active June 7, 2017 12:50
Show Gist options
  • Save sinnamone/caa5e3d60ec776999df65dc025f5484e to your computer and use it in GitHub Desktop.
Save sinnamone/caa5e3d60ec776999df65dc025f5484e to your computer and use it in GitHub Desktop.
get_best_match
#!/usr/bin/python
# ordina per query (ASC), score (DESC), MM (ASC) e gap (ASC)
import sys
from operator import itemgetter
if len(sys.argv) < 2+1:
sys.exit("""
Usage: %s output_type input_type [file.blast8_]
'output_type':
'-sort': do only the sort
'-best': do the sort and get only the best
'input_type':
'-0': query, subject, similarity, ...
'-1': query, query_len, subject, similarity, ...
'-2': query, subject, subject_len, similarity, ...
'-3': query, query_len, subject, subject_len, similarity, ...
Sort by query (ASC), score (DESC), MM (ASC), gap (ASC)
Output in STDOUT
""" % sys.argv[0])
work_to_do = sys.argv[1]
if work_to_do not in ['-sort','-best']:
sys.exit("ERROR: Wrong output_type: %s." % work_to_do)
column_type = [
str, # 0, query (order)
int, # 1, query_len
str, # 2, subject
int, # 3, subject_len
float, # 4, similarity
int, # 5, match_len
int, # 6, MM (order)
int, # 7, gap (order)
int, # 8, q_start
int, # 9, q_end
int, # 10, s_start
int, # 11, s_end
float, # 12, e_value
float, # 13, score (order)
#eventually other fields
]
t = sys.argv[2]
if t == "-0":
column_type.pop(1)
column_type.pop(2) # sarebbe 3 ma prima ho fatto il 'pop'
score_pos = 11
elif t == "-1":
column_type.pop(3)
score_pos = 12
elif t == "-2":
column_type.pop(1)
score_pos = 12
elif t == "-3":
score_pos = 13
else:
sys.exit("ERROR: Wrong input_type: %s." % t)
if len(sys.argv)==3: # from STDIN
fd = sys.stdin
else:
fd = open(sys.argv[3])
matches = [i.replace('\n','').split('\t') for i in fd]
# se ci sono campi in piu' (ad es. annotazioni) aggiorna column_type
for i in matches:
if len(i) > score_pos: # extra data
column_type.append(str)
break
if len(column_type) > score_pos+1: # se ho aggiunto il campo
for n,i in enumerate(matches):
standard_data = i[:score_pos+1]
extra_data = "\t".join(i[score_pos+1:])
matches[n] = standard_data
matches[n].append(extra_data)
# cast dei campi
for i in range(len(matches)):
for k,v in enumerate(column_type):
try:
matches[i][k] = v(matches[i][k])
except ValueError:
sys.exit("ERROR: Format problem at line %d: is the '%s' option correct??" % (i+1,t))
# ordina per query (ASC), score (DESC), MM (ASC) e gap (ASC)
matches.sort(key=itemgetter(score_pos),reverse=True) # sort su score
matches.sort(key=itemgetter(0)) # sort su read, MM e gap
# scrive
if work_to_do == '-sort':
for m in matches:
print "\t".join(map(str,m))
elif work_to_do == '-best':
name = None
score = -1
for m in matches:
if m[0] == name:
if m[score_pos] == score:
print "\t".join(map(str,m))
else: # scrive comunque
name = m[0]
score = m[score_pos]
print "\t".join(map(str,m))
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment