Skip to content

Instantly share code, notes, and snippets.

@brevans
Last active December 23, 2015 15:09
Show Gist options
  • Save brevans/6653392 to your computer and use it in GitHub Desktop.
Save brevans/6653392 to your computer and use it in GitHub Desktop.
bowtie 2 -> sorted bams
#!/usr/bin/env python
'''
example:
./gen_bowtie2_coms.py fastq_dir bin_dir out_dir
-all sample fastq files must be in on directory
-the bin directory should have samtools and bowtie2
-the outdir must already exist
This script prints the commands to screen. You can save them to a file with
./gen_bowtie2_coms.py fastq_dir bin_dir out_dir > jobs.txt
'''
import sys
import re
from os import path
from collections import defaultdict as dd
from glob import glob
def abs_join(a, b):
return path.join(path.abspath(a), b)
def get_sample_fqs(dir):
samples = dd(lambda: {'1':[], '2':[]})
for fi in glob(abs_join(dir, '*.fastq.gz')):
#for each file, find its sample name and its read orientation
mat = re.match('([\w\-_]*)_L\d\d\d_R(\d)_\d\d\d\.fastq\.gz',
path.basename(fi))
s_name, pair_num = mat.groups()
#add the file to the samples dictionary
samples[s_name][pair_num].append(fi)
samples[s_name]['rg_id_string'] = '--rg-id {0} --rg SM:{0} --rg LB:{0} --rg PL:ILLUMINA'.format(s_name)
return samples
def generate_bowtie2_commands(samples, ref, bin, out, bowtie_args):
coms = []
for sample in samples.keys():
com = '{0} {1} {2} -x {3} -1 {4} -2 {5} | {6} view -Su - | {6} sort - {7}; {6} index {7}.bam;'.format(
abs_join(bin, 'bowtie2'),
samples[sample]['rg_id_string'],
bowtie_args,
ref,
','.join(samples[sample]['1']),
','.join(samples[sample]['2']),
abs_join(bin, 'samtools'),
abs_join(out, sample))
coms.append(com)
return coms
def main():
samples = get_sample_fqs(sys.argv[1])
ref = sys.argv[2]
bindir = sys.argv[3]
outdir = sys.argv[4]
try:
bowtie2_args = sys.argv[5]
except IndexError:
bowtie2_args = '--no-unal --very-sensitive-local'
commands = generate_bowtie2_commands(samples, ref, bindir, outdir, bowtie2_args)
print '\n'.join(commands)
if __name__ == '__main__':
main()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment