Skip to content

Instantly share code, notes, and snippets.

@kdaily
Forked from seandavi/bamsplit.py
Created February 14, 2013 20:12
Show Gist options
  • Save kdaily/4955971 to your computer and use it in GitHub Desktop.
Save kdaily/4955971 to your computer and use it in GitHub Desktop.
#!/usr/bin/env python
# split a bam file by read group ID
# Sean Davis <seandavi@gmail.com>
# March 10, 2012
#
import pysam
import argparse
import logging
logging.basicConfig(level=logging.INFO)
parser = argparse.ArgumentParser()
parser.add_argument('filename',help="The bam filename")
opts = parser.parse_args()
infile = pysam.Samfile(opts.filename,'rb')
header = infile.header
readgroups = header['RG']
# remove readgroups from header
del(header['RG'])
outfiles = {}
for rg in readgroups:
tmphead = header
tmphead['RG']=[rg]
logging.info('Creating new BAM file: %s',(rg['ID']+'.bam','wb'))
outfiles[rg['ID']] = pysam.Samfile(rg['ID']+'.bam','wb',header=tmphead)
j=0
for read in infile:
j+=1
idtag = [x[1] for x in read.tags if x[0]=='RG'][0]
if((j % 100000)==0):
logging.info('read and wrote %d records',(j))
outfiles[idtag].write(read)
for f in outfiles.values():
f.close()
infile.close()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment