Skip to content

Instantly share code, notes, and snippets.

@rpetit3
Created October 18, 2017 23:06
Show Gist options
  • Save rpetit3/961e9b94b9c29c7acceda4dd9b6f8328 to your computer and use it in GitHub Desktop.
Save rpetit3/961e9b94b9c29c7acceda4dd9b6f8328 to your computer and use it in GitHub Desktop.
FASTQ cleanup using Snakemake
PATH = '/home/rpetit/staphopia/apps/analysis-pipeline'
sample = 'N315_R1'
rule all:
input:
'analyses/fastq-stats/{sample}.cleanup.fastq.json'.format(
sample=sample
)
rule bbduk_phix:
input:
fq = '{sample}.fastq.gz'.format(sample=sample)
output:
phix = 'analyses/fastq-qc/nophix_r1.fastq',
json = 'analyses/fastq-stats/{sample}.original.fastq.json'.format(
sample=sample
)
benchmark:
"logs/benchmarks/bbduk-phix.txt"
shell:
"zcat {input.fq} | {PATH}/bin/fastq-stats > {output.json};"
"{PATH}/bin/bbduk.sh -Xmx2g k=31 threads=1 in={input.fq} "
"out={output.phix} stats=logs/fastq_cleanup-bbduk-phix.txt hdist=1 "
"ref={PATH}/data/fastq-qc/phiX-NC_001422.fasta overwrite=t ordered=t;"
rule bbduk_adapter:
input:
fq = rules.bbduk_phix.output.phix
output:
noadapt = 'analyses/fastq-qc/noadapter_r1.fastq'
benchmark:
"logs/benchmarks/bbduk-adapter.txt"
shell:
"{PATH}/bin/bbduk.sh -Xmx2g threads=1 in={input.fq} "
"out={output.noadapt} stats=logs/fastq_cleanup-bbduk-adapter.txt "
"ref={PATH}/data/fastq-qc/adapters.fasta ktrim=r k=23 mink=11 hdist=1 "
"tpe tbo qout=33 minlength=36 overwrite=t ordered=t"
rule spades_ec:
input:
fq = rules.bbduk_adapter.output.noadapt
output:
ec_fq = 'analyses/fastq-qc/corrected/noadapter_r1.00.0_0.cor.fastq',
json = 'analyses/fastq-stats/{sample}.post-adapter.fastq.json'.format(
sample=sample
)
benchmark:
"logs/benchmarks/spades-ec.txt"
shell:
"cat {input.fq} | {PATH}/bin/fastq-stats > {output.json};"
"{PATH}/bin/spades.py -s {input.fq} --only-error-correction "
"--disable-gzip-output -t 1 -o analyses/fastq-qc/"
rule spades_stats:
input:
fq = rules.spades_ec.output.ec_fq
output:
json = 'analyses/fastq-stats/{sample}.post-ecc.fastq.json'
shell:
'cat {input.fq} | {PATH}/bin/fastq-stats > {output.json}'
rule fastq_cleanup:
input:
fq = rules.spades_ec.output.ec_fq,
stats = 'analyses/fastq-stats/{sample}.post-ecc.fastq.json'
output:
final_fq = '{sample}.cleanup.fastq.gz'
benchmark:
"logs/benchmarks/fastq-clean.txt"
shell:
"cat {input.fq} | {PATH}/bin/fastq_cleanup --stats {input.stats} "
"--coverage 100 | gzip --best - > {output.final_fq}"
rule final_stats:
input:
fq = rules.fastq_cleanup.output.final_fq
output:
json = 'analyses/fastq-stats/{sample}.cleanup.fastq.json'
shell:
"zcat {input.fq} | {PATH}/bin/fastq-stats > {output.json};"
"rm -rf analyses/fastq-qc/"
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment