Skip to content

Instantly share code, notes, and snippets.

Created June 9, 2013 20:49
Show Gist options
  • Save anonymous/5745151 to your computer and use it in GitHub Desktop.
Save anonymous/5745151 to your computer and use it in GitHub Desktop.
import os
import re
from ruffus import *
import subprocess
import sqlite3
parser = cmdline.get_argparse(description='Midlands-1 analysis pipeline')
parser.add_argument("-s", "--sqlite_db", type=str, help="Name and path of SQLite3 database")
parser.add_argument("-c", "--sql_command", type=str, help="SQL command to return rows")
options = parser.parse_args()
# optional logger which can be passed to ruffus tasks
logger, logger_mutex = cmdline.setup_logging (__name__, options.log_file, options.verbose)
#_____________________________________________________________________________________
# pipelined functions go here
#_____________________________________________________________________________________
def cmd_exists(cmd):
try:
ret = subprocess.call([cmd],
stdout=subprocess.PIPE, stderr=subprocess.PIPE)
return True
except OSError, e:
return 0
binaries = dict((b, b) for b in ['bwa', 'samtools'])
missing_files = [binary_file for binary_file in binaries if not cmd_exists(binary_file)]
if missing_files:
raise SystemExit("%s not on path" % (", ".join(missing_files)))
def read_run_details_sqlite3(fn, cmd):
conn = sqlite3.connect(fn)
conn.row_factory=sqlite3.Row
c = conn.cursor()
c.execute(cmd)
rows = c.fetchall()
return rows
runs = read_run_details_sqlite3(options.sqlite_db, options.sql_command)
print "%d runs selected " % (len(runs),)
input_prefix = '/mnt/phatso/nick/alnQC'
usefulhash = {}
def make_input_files(outfile):
files = []
for run in runs:
files.append((
('%s/%s/%s_1_trimmed.fastq.gz' % (input_prefix, run['sequencing_plate'], run['label']),
'%s/%s/%s_2_trimmed.fastq.gz' % (input_prefix, run['sequencing_plate'], run['label'])),
outfile % run['label'],
run['label']
))
usefulhash[outfile % run['label']] = run['label']
return files
initial_input_files = make_input_files('%s.sorted.bam')
denovo_assembly_files = make_input_files('%s.assembly')
@files(initial_input_files)
def align_to_les(infiles, outfile, label):
os.system("""%s mem -t16 refs/NC_011770.fna %s %s |
%s view -bS - |
%s sort -f -@ 8 - %s""" % (binaries['bwa'],
infiles[0], infiles[1],
binaries['samtools'],
binaries['samtools'],
outfile))
@merge(align_to_les, "variants.vcf")
#@merge(['CF40-P31.sorted.bam', 'CF60-P44.sorted.bam'], "variants.vcf")
def generate_vcf(infiles, outfile):
fh = open("labels.txt", "w")
for fn in infiles:
print >>fh, usefulhash[fn]
fh.close()
cmd = "%s mpileup -q 50 -f refs/NC_011770.fna %s \
| java -jar bin/VarScan.jar mpileup2snp \
--min-coverage 2 --min-var-freq 0.9 --p-value 0.005 \
--output-vcf --vcf-sample-list labels.txt > %s" % (binaries['samtools'],
" ".join(infiles), outfile)
os.system(cmd)
@transform(align_to_les, suffix(".sorted.bam"), r"\1_assembly/contigs.fa")
def velvet_assembly(infile, outfile):
label = usefulhash[infile]
os.system("bin/velvet_1.2.10/velveth %s_assembly 63 -bam %s" % (label, infile))
os.system("bin/velvet_1.2.10/velvetg %s_assembly -exp_cov auto -cov_cutoff auto" % (label))
@transform(velvet_assembly, suffix("contigs.fa"), "mlst.json")
def mlst_profile(infile, outfile):
os.system("curl -o %s -F file=@%s -F name=Pseudomonas_aeruginosa http://192.168.1.31:8001/mlst/?format=json" % (outfile, infile))
cmdline.run(options)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment