Skip to content

Instantly share code, notes, and snippets.

@standage
Last active July 6, 2018 17:44
Show Gist options
  • Save standage/504c09b8b28d35c4c7bac39ca69e39e9 to your computer and use it in GitHub Desktop.
Save standage/504c09b8b28d35c4c7bac39ca69e39e9 to your computer and use it in GitHub Desktop.
INDIVIDUALS = ('proband', 'mother', 'father')
KSIZES = ('27', '31', '35', '39', '43', '47', '51')
rule all:
input:
expand('{coverage}x_k{ksize}_kevlar_calls_like.vcf',
coverage=('30'), ksize=KSIZES)
rule novel_reads:
input:
'{coverage}x_proband_Int.fastq.gz',
'{coverage}x_mother_1.fastq.gz',
'{coverage}x_mother_2.fastq.gz',
'{coverage}x_father_1.fastq.gz',
'{coverage}x_father_2.fastq.gz',
output:
'{coverage}x_k{ksize}_novel.augfastq.gz',
expand('{{coverage}}x_k{{ksize}}_{indiv}_kmers.counttable',
indiv=INDIVIDUALS)
threads: 16
shell: '''kevlar novel \
--ksize {wildcards.ksize} --threads {threads} \
--case {input[0]} --case-min 5 \
--control {input[1]} {input[2]} \
--control {input[3]} {input[4]} --ctrl-max 1 \
--memory 4G --max-fpr 0.1 \
--out {output[0]} \
--save-case-counts {output[1]} \
--save-ctrl-counts {output[2]} {output[3]}
'''
rule filter_reads:
input:
'{coverage}x_k{ksize}_novel.augfastq.gz',
'mask.nt'
output: '{coverage}x_k{ksize}_filtered.augfastq.gz'
shell: '''kevlar filter \
--ksize {wildcards.ksize} --abund-memory 500M --mask {input[1]} \
--out {output} {input[0]}
'''
rule partition:
input: '{coverage}x_k{ksize}_filtered.augfastq.gz'
output: '{coverage}x_k{ksize}_partitioned.augfastq.gz'
shell: 'kevlar partition --out {output} {input}'
rule call_variants:
input:
'{coverage}x_k{ksize}_partitioned.augfastq.gz',
'GCA_000001405.15_GRCh38_no_alt_analysis_set.fna.gz'
output: '{coverage}x_k{ksize}_kevlar_calls_raw.vcf'
threads: 4
shell: '''kevlar alac \
--delta 50 --seed-size 51 --ksize {wildcards.ksize} --threads {threads} \
--out {output} {input}
'''
rule populate_sct:
input:
'GCA_000001405.15_GRCh38_no_alt_analysis_set.fna.gz'
output:
'hg38_k{ksize}.sct'
threads: 16
shell: './make-sct.py {input} {wildcards.ksize} {output} 16'
rule score_variants:
input:
'{coverage}x_k{ksize}_kevlar_calls_raw.vcf',
'hg38_k{ksize}.sct',
expand('{{coverage}}x_k{{ksize}}_{indiv}_kmers.counttable',
indiv=INDIVIDUALS)
output:
'{coverage}x_k{ksize}_kevlar_calls_like.vcf',
shell: '''kevlar simlike \
--refr {input[1]} --case {input[2]} --controls {input[3]} {input[4]} \
--mu 30.0 --sigma 8.0 --epsilon 0.001 --case-min 5 \
--sample-labels Proband Mother Father --out {output} \
{input[0]}
'''
#!/usr/bin/env python
import argparse
import khmer
import sys
import threading
parser = argparse.ArgumentParser()
parser.add_argument('infile')
parser.add_argument('ksize', type=int)
parser.add_argument('outfile')
parser.add_argument('threads', type=int)
args = parser.parse_args()
sct = khmer.SmallCounttable(args.ksize, 3e9, 4)
threads = list()
seqparser = khmer.ReadParser(args.infile)
for _ in range(args.threads):
thread = threading.Thread(target=sct.consume_seqfile, args=(seqparser, ))
threads.append(thread)
thread.start()
for thread in threads:
thread.join()
print(khmer.calc_expected_collisions(sct))
sct.save(args.outfile)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment