Last active
July 6, 2018 17:44
-
-
Save standage/504c09b8b28d35c4c7bac39ca69e39e9 to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
INDIVIDUALS = ('proband', 'mother', 'father') | |
KSIZES = ('27', '31', '35', '39', '43', '47', '51') | |
rule all: | |
input: | |
expand('{coverage}x_k{ksize}_kevlar_calls_like.vcf', | |
coverage=('30'), ksize=KSIZES) | |
rule novel_reads: | |
input: | |
'{coverage}x_proband_Int.fastq.gz', | |
'{coverage}x_mother_1.fastq.gz', | |
'{coverage}x_mother_2.fastq.gz', | |
'{coverage}x_father_1.fastq.gz', | |
'{coverage}x_father_2.fastq.gz', | |
output: | |
'{coverage}x_k{ksize}_novel.augfastq.gz', | |
expand('{{coverage}}x_k{{ksize}}_{indiv}_kmers.counttable', | |
indiv=INDIVIDUALS) | |
threads: 16 | |
shell: '''kevlar novel \ | |
--ksize {wildcards.ksize} --threads {threads} \ | |
--case {input[0]} --case-min 5 \ | |
--control {input[1]} {input[2]} \ | |
--control {input[3]} {input[4]} --ctrl-max 1 \ | |
--memory 4G --max-fpr 0.1 \ | |
--out {output[0]} \ | |
--save-case-counts {output[1]} \ | |
--save-ctrl-counts {output[2]} {output[3]} | |
''' | |
rule filter_reads: | |
input: | |
'{coverage}x_k{ksize}_novel.augfastq.gz', | |
'mask.nt' | |
output: '{coverage}x_k{ksize}_filtered.augfastq.gz' | |
shell: '''kevlar filter \ | |
--ksize {wildcards.ksize} --abund-memory 500M --mask {input[1]} \ | |
--out {output} {input[0]} | |
''' | |
rule partition: | |
input: '{coverage}x_k{ksize}_filtered.augfastq.gz' | |
output: '{coverage}x_k{ksize}_partitioned.augfastq.gz' | |
shell: 'kevlar partition --out {output} {input}' | |
rule call_variants: | |
input: | |
'{coverage}x_k{ksize}_partitioned.augfastq.gz', | |
'GCA_000001405.15_GRCh38_no_alt_analysis_set.fna.gz' | |
output: '{coverage}x_k{ksize}_kevlar_calls_raw.vcf' | |
threads: 4 | |
shell: '''kevlar alac \ | |
--delta 50 --seed-size 51 --ksize {wildcards.ksize} --threads {threads} \ | |
--out {output} {input} | |
''' | |
rule populate_sct: | |
input: | |
'GCA_000001405.15_GRCh38_no_alt_analysis_set.fna.gz' | |
output: | |
'hg38_k{ksize}.sct' | |
threads: 16 | |
shell: './make-sct.py {input} {wildcards.ksize} {output} 16' | |
rule score_variants: | |
input: | |
'{coverage}x_k{ksize}_kevlar_calls_raw.vcf', | |
'hg38_k{ksize}.sct', | |
expand('{{coverage}}x_k{{ksize}}_{indiv}_kmers.counttable', | |
indiv=INDIVIDUALS) | |
output: | |
'{coverage}x_k{ksize}_kevlar_calls_like.vcf', | |
shell: '''kevlar simlike \ | |
--refr {input[1]} --case {input[2]} --controls {input[3]} {input[4]} \ | |
--mu 30.0 --sigma 8.0 --epsilon 0.001 --case-min 5 \ | |
--sample-labels Proband Mother Father --out {output} \ | |
{input[0]} | |
''' |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/env python | |
import argparse | |
import khmer | |
import sys | |
import threading | |
parser = argparse.ArgumentParser() | |
parser.add_argument('infile') | |
parser.add_argument('ksize', type=int) | |
parser.add_argument('outfile') | |
parser.add_argument('threads', type=int) | |
args = parser.parse_args() | |
sct = khmer.SmallCounttable(args.ksize, 3e9, 4) | |
threads = list() | |
seqparser = khmer.ReadParser(args.infile) | |
for _ in range(args.threads): | |
thread = threading.Thread(target=sct.consume_seqfile, args=(seqparser, )) | |
threads.append(thread) | |
thread.start() | |
for thread in threads: | |
thread.join() | |
print(khmer.calc_expected_collisions(sct)) | |
sct.save(args.outfile) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment