Skip to content

Instantly share code, notes, and snippets.

@squirrelo
Created April 16, 2015 17:52
Show Gist options
  • Save squirrelo/aee81e4b482f4197aac2 to your computer and use it in GitHub Desktop.
Save squirrelo/aee81e4b482f4197aac2 to your computer and use it in GitHub Desktop.
bloom_filter
#!/usr/bin/env python
from os.path import join, basename, splitext
from subprocess import Popen, PIPE
from shlex import split as comm_split
import click
from qiime_default_reference import get_reference_taxonomy
@click.command()
@click.option('--seqs_fp', '-i', type=click.Path(
exists=True, dir_okay=False, readable=True, resolve_path=True),
help="Input demultiplexed fna file")
@click.option('--output_fp', '-o', type=click.Path(
exists=True, file_okay=False, readable=True, writable=True,
resolve_path=True), help="Output folder for filtered fna file")
@click.option('--bloom_fp', '-b', type=click.Path(
exists=True, dir_okay=False, readable=True, resolve_path=True),
help="Path to the bloom sequence reference file")
@click.option('--taxonomy_fp', '-t', default=get_reference_taxonomy(),
type=click.Path(exists=True, dir_okay=False, readable=True,
resolve_path=True), help="Taxonomy file to use "
"(Default %s)" % get_reference_taxonomy())
@click.option('--threads', default=1,
help='Number of threads to run for sormeRNA (Default 1)')
def bloom_filter(bloom_fp, seqs_fp, output_fp,
taxonomy_fp=get_reference_taxonomy(), threads=1):
base_file = splitext(basename(seqs_fp))[0]
base_folder = join(output_fp, base_file + "-blooms")
# create sortmerna params file
params_file = join(output_fp, "sortmerna_params.txt")
with open(params_file, 'w') as f:
f.write("pick_otus:otu_picking_method sortmerna\n")
f.write("pick_otus:similarity 0.97\n")
f.write("pick_otus:threads %d\n" % threads)
# set up and run bloom filtering - sequence picking step
bloom_args = {
'input': seqs_fp,
'output': base_folder,
'reference': bloom_fp,
'taxonomy': taxonomy_fp
}
args = comm_split(('pick_closed_reference_otus.py -i %(input)s -o '
'%(output)s -r %(reference)s -t %(taxonomy)s ' + '-p '
+ params_file) % bloom_args)
bloom_call = Popen(args, stderr=PIPE)
return_code = bloom_call.wait()
if return_code != 0:
raise RuntimeError('Error running bloom filter:\n%s' %
bloom_call.stderr.read())
# set up and run bloom filtering - sequence removal step
filter_args = {
'input': seqs_fp,
'output': join(output_fp, base_file + "-bloomfiltered.fna"),
'otus': join(base_folder,
'sortmerna_picked_otus', base_file + '_otus.txt')
}
args = comm_split(('filter_fasta.py -f %(input)s -m %(otus)s -n -o '
'%(output)s') % filter_args)
filter_call = Popen(args, stderr=PIPE)
return_code = filter_call.wait()
if return_code != 0:
raise RuntimeError('error running bloom filter:\n%s' %
filter_call.stderr.read())
if __name__ == '__main__':
bloom_filter()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment