Created
February 3, 2016 01:12
-
-
Save ksahlin/76a16f0b6fa19988b1db to your computer and use it in GitHub Desktop.
speedup when parsing cigarstring in alignment
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
@property | |
def alignment(self): | |
r_index = 0 | |
q_index = 0 | |
r_seq = self.reference[self.reference_begin: self.reference_end + 1] | |
q_seq = self.query[self.query_begin: self.query_end + 1] | |
r_line = m_line = q_line = '' | |
for (op_len, op_char) in self.iter_cigar: | |
op_len = int(op_len) | |
if op_char.upper() == 'M': | |
ref_piece = r_seq[r_index: r_index + op_len] | |
query_peace = q_seq[q_index: q_index + op_len] | |
r_line += ref_piece | |
q_line += query_peace | |
m_list = [] # faster with "".join([list of str]) instead of += | |
for (r_base, q_base) in zip(ref_piece, query_peace): | |
# XXX: ambiguity codes? matrix match? | |
if r_base.upper() == q_base.upper(): | |
m_list.append('|') | |
else: | |
m_list.append('*') | |
match_seq = "".join([char for char in m_list]) | |
m_line += match_seq | |
r_index += op_len | |
q_index += op_len | |
elif op_char.upper() == 'I': | |
# insertion into reference | |
r_line += '-' * op_len | |
m_line += ' ' * op_len | |
q_line += q_seq[q_index: q_index + op_len] | |
# only query index change | |
q_index += op_len | |
elif op_char.upper() == 'D': | |
# deletion from reference | |
r_line += r_seq[r_index: r_index + op_len] | |
m_line += ' ' * op_len | |
q_line += '-' * op_len | |
# only ref index change | |
r_index += op_len | |
return (r_line, m_line, q_line) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment