Skip to content

Instantly share code, notes, and snippets.

@holtgrewe
Created January 31, 2014 16:41
Show Gist options
  • Save holtgrewe/8735840 to your computer and use it in GitHub Desktop.
Save holtgrewe/8735840 to your computer and use it in GitHub Desktop.
Script that takes a SAM file and removes leading/trailing insertions/deletions from CIGAR string such that samtools tview likes it better.
#!/usr/bin/env python
import sys
def splitCigar(cigar):
result = []
count = ''
operation = ''
for x in cigar:
if x not in '0123456789':
operation = x
if count:
result.append((int(count), operation))
operation = ''
count = ''
else:
count = count + x
return result
def fixLine(line):
vals = line.split('\t')
if vals[3] == '*' or vals[5] == '*':
return line
pos = int(vals[3])
cigar = vals[5]
if cigar != '*':
cigar = splitCigar(cigar)
if cigar[0][1] == 'I':
pos -= cigar[0][0]
if len(cigar) > 1 and cigar[1][1] == 'M':
cigar[1] = (cigar[0][0] + cigar[1][0], 'M')
cigar.pop(0)
else:
cigar[0] = (cigar[0][0], 'M')
elif cigar[0][1] == 'D':
pos += cigar[0][0]
cigar.pop(0)
if cigar[-1][1] == 'I':
if len(cigar) > 1 and cigar[-2][1] == 'M':
cigar[-2] = (cigar[-1][0] + cigar[-2][0], 'M')
cigar.pop()
else:
cigar[-1]= (cigar[-1][0], 'M')
elif cigar[-1][1] == 'D':
cigar.pop()
#if len(cigar) > 1 and cigar[-2][1] == 'M':
# cigar[-2] = (cigar[-1][0] + cigar[-2][0], 'M')
#cigar.pop()
#else:
#cigar[-1]= (cigar[-1][0], 'M')
cigar = ''.join(''.join(map(str, list(x))) for x in cigar)
vals[3] = str(pos)
vals[5] = cigar
return '\t'.join(vals)
if __name__ == '__main__':
for line in sys.stdin:
if line.startswith('@'):
print line.strip()
else:
print fixLine(line.strip())
@holtgrewe
Copy link
Author

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment