Skip to content

Instantly share code, notes, and snippets.

View mdshw5's full-sized avatar

Matt Shirley mdshw5

View GitHub Profile
@mdshw5
mdshw5 / answer.py
Created March 10, 2016 17:22
biostars 75693
import simplesam
with simplesam.Reader(open('input.sam')) as sam:
with simplesam.Writer(open('output.sam', 'w'), sam.header) as fixed:
for read in sam:
if bool(read.flag & 0x800):
read.flag -= 0x800
if not read.secondary:
read.flag += 0x100
fixed.write(read)
@mdshw5
mdshw5 / answer.py
Last active February 20, 2016 12:39
biostars 177781
import pyfaidx
with pyfaidx.Fasta("file.fa") as peg_fasta:
with open("file.sorted.fa", 'w') as sorted_fasta:
for id in sorted(peg_fasta.keys()):
sorted_fasta.write('>' + peg_fasta[id].long_name)
sorted_fasta.write(str(peg_fasta[id]))
@mdshw5
mdshw5 / answer.py
Last active January 28, 2016 03:20
biostars 174423
import pyfaidx
import simplesam
with pyfaidx.Fasta('genome.fasta') as fasta, simplesam.Reader(open('orfH9_offtarget.sam')) as sam:
for read in sam:
if read.mapped and not read.secondary:
# grab the reference sequnce from indexed FASTA
zero_index = read.pos - 1
sequence_chunk = fasta[read.rname][zero_index:zero_index+20]
# or get the reference sequence from the SAM MD tag
@mdshw5
mdshw5 / answer.py
Last active January 19, 2016 14:35
biostars 173201
from simplesam import Reader
from pyfaidx import Fasta
with Reader(open('library.bam', 'r')) as sam_file, Fasta('hg38.fa', as_raw=True) as hg38:
for read in sam_file:
if read.mapped:
# might also want to handle read.reverse here
prior_pos = read.pos - 2 # read.pos is 1-based
prior_base = hg38[read.rname][prior_pos]
@mdshw5
mdshw5 / answer.py
Created January 14, 2016 03:07
biostars 172641
from pyfaidx import Fasta
genes = Fasta('Genome.fasta')
with open('chr02_18s', 'w') as f:
seqFile = genes['chr02'][146062:148216]
f.write('>' + seqFile.name)
f.write(seqFile.seq) # or str(seqFile)
@mdshw5
mdshw5 / answer.py
Last active December 16, 2015 01:53
biostars 169723
from pyfaidx import Fasta
file_1 = {}
with open('file1.txt', 'r') as ids:
for line in ids:
key, value = line.strip().split('\t')
file_1[key] = value
file_2 = Fasta('file2.fa', key_function=lambda key: file_1[key])
@mdshw5
mdshw5 / answer.py
Created November 11, 2015 03:00
biostars 165335
from pyfaidx import Fasta
name_map = {}
with open('newnames.txt') as newnames:
next(newnames) # remove header
for line in newnames:
old, new = line.rstrip().split()
name_map[old] = new
with open('seqnew.fa', 'w') as new_fasta:
@mdshw5
mdshw5 / answer.py
Created August 17, 2015 22:54
Biostars 154785
import random
import sys
from pyfaidx import Fasta
n = 10
faa = Fasta("file.faa")
for sample in random.sample(faa, n):
print(sample.name)
for line in sample:
print(line)
@mdshw5
mdshw5 / pileup.py
Created May 20, 2015 02:04
Simple pileup-like script
#!/usr/bin/env python
import sys
import argparse
import pkg_resources
from collections import deque
from collections import Counter
try:
from collections import OrderedDict
except ImportError: #python 2.6
@mdshw5
mdshw5 / example.py
Created May 11, 2015 17:24
biostars 141739
from pyfaidx import FastaVariant
consensus = FastaVariant('reference.fasta', 'sample1.vcf.gz', het=True, hom=True)
chrom = 'chr1'
seq = consensus[chrom][0:8]
print(seq) # AGTGCG
# if you don't want to invariant sites masked, you're good to go. otherwise: