Skip to content

Instantly share code, notes, and snippets.

@mdshw5
Last active January 28, 2016 03:20
Show Gist options
  • Save mdshw5/18e2fd7ea862161cb6b5 to your computer and use it in GitHub Desktop.
Save mdshw5/18e2fd7ea862161cb6b5 to your computer and use it in GitHub Desktop.
biostars 174423
import pyfaidx
import simplesam
with pyfaidx.Fasta('genome.fasta') as fasta, simplesam.Reader(open('orfH9_offtarget.sam')) as sam:
for read in sam:
if read.mapped and not read.secondary:
# grab the reference sequnce from indexed FASTA
zero_index = read.pos - 1
sequence_chunk = fasta[read.rname][zero_index:zero_index+20]
# or get the reference sequence from the SAM MD tag
sequence_chunk_ = read.parse_md()[0:20]
# both sequence_chunks should be the same
assert sequence_chunk.seq == sequence_chunk_
# pyfaidx Seq objects have some methods you might use
sequence_chunk.complement
sequence_chunk.reverse
sequence_chunk.reverse.complement # or -sequence_chunk
# you can test membership against the FASTA index
read.rname in fasta # returns True or False
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment