Skip to content

Instantly share code, notes, and snippets.

@marcelm
Created September 16, 2015 09:06
Show Gist options
  • Save marcelm/ae83bd0367cc1bceeb6a to your computer and use it in GitHub Desktop.
Save marcelm/ae83bd0367cc1bceeb6a to your computer and use it in GitHub Desktop.
Use pysam and pyfaidx to find mismatches in an interval
from pysam import AlignmentFile
from pyfaidx import Fasta
def has_mismatch_in_interval(reference, bamfile, chrom, start, end):
"""
Return whether there is a mismatch in the interval (start, end) in any read mapping to the given chromosome.
reference -- a pyfaidx.Fasta object or something that behaves similarly
"""
for column in bamfile.pileup(chrom, start, end):
refbase = reference[chrom][column.pos:column.pos+1]
for piledup in column.pileups:
if piledup.indel != 0: # Insertion is positive; deletion is negative
# Ignore indels
continue
querybase = piledup.alignment.query_sequence[piledup.query_position]
if refbase != querybase:
# Mismatch
return True
return False
ref = Fasta('reference.fasta')
bamfile = AlignmentFile('mappedreads.bam')
has_mismatch_in_interval(ref, bamfile, 'scaffold17', 1000, 2000)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment