Last active
January 28, 2016 03:20
biostars 174423
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import pyfaidx | |
import simplesam | |
with pyfaidx.Fasta('genome.fasta') as fasta, simplesam.Reader(open('orfH9_offtarget.sam')) as sam: | |
for read in sam: | |
if read.mapped and not read.secondary: | |
# grab the reference sequnce from indexed FASTA | |
zero_index = read.pos - 1 | |
sequence_chunk = fasta[read.rname][zero_index:zero_index+20] | |
# or get the reference sequence from the SAM MD tag | |
sequence_chunk_ = read.parse_md()[0:20] | |
# both sequence_chunks should be the same | |
assert sequence_chunk.seq == sequence_chunk_ | |
# pyfaidx Seq objects have some methods you might use | |
sequence_chunk.complement | |
sequence_chunk.reverse | |
sequence_chunk.reverse.complement # or -sequence_chunk | |
# you can test membership against the FASTA index | |
read.rname in fasta # returns True or False |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment