Skip to content

Instantly share code, notes, and snippets.

@standage
Created December 28, 2017 23:19
Show Gist options
  • Save standage/934fa454cebcaf4a96debccbbf6527b6 to your computer and use it in GitHub Desktop.
Save standage/934fa454cebcaf4a96debccbbf6527b6 to your computer and use it in GitHub Desktop.
This has been replaced by a Snakemake workflow, but thought I should save it for posterity.
#!/usr/bin/env bash
set -eo pipefail
echo -n "Simulate random genome sequences..."
starttime="$(date +%s)"
nuclmm simulate --out helium-refr.fa --order 6 --numseqs 1 --seqlen 2500000 --seed 8013785666 human.order6.mm
nuclmm simulate --out neon-refr.fa --order 6 --numseqs 1 --seqlen 25000000 --seed 7066509186 human.order6.mm
nuclmm simulate --out argon-refr.fa --order 6 --numseqs 1 --seqlen 250000000 --seed 3015700578 human.order6.mm
gzip -f {helium,neon,argon}-refr.fa
elapsed="$(($(date +%s)-starttime))"
echo "done! ($elapsed seconds elapsed)"
echo -n "Simulate trios w. shared (inherited) and unique (de novo) variants..."
starttime="$(date +%s)"
kevlar gentrio --inherited 100 --de-novo 5 --vcf helium.vcf --prefix helium --seed 1776 helium-refr.fa.gz
kevlar gentrio --inherited 100 --de-novo 10 --vcf neon.vcf --prefix neon --seed 1812 neon-refr.fa.gz
kevlar gentrio --inherited 100 --de-novo 12 --vcf argon.vcf --prefix argon --seed 1944 argon-refr.fa.gz
gzip {helium,neon,argon}-{mother,father,proband}.fasta
elapsed="$(($(date +%s)-starttime))"
echo "done! ($elapsed seconds elapsed)"
echo -n "Simulate 2x100bp paired-end Illumina sequencing of each individual to 30x coverage..."
starttime="$(date +%s)"
wgsim -e 0.005 -r 0.0 -d 450 -s 50 -N 375000 -1 100 -2 100 -S 2053695854357871005 helium-mother.fasta.gz helium-mother-reads-1.fq helium-mother-reads-2.fq
wgsim -e 0.005 -r 0.0 -d 450 -s 50 -N 375000 -1 100 -2 100 -S 4517457392071889495 helium-father.fasta.gz helium-father-reads-1.fq helium-father-reads-2.fq
wgsim -e 0.005 -r 0.0 -d 450 -s 50 -N 375000 -1 100 -2 100 -S 2574020394472462046 helium-proband.fasta.gz helium-proband-reads-1.fq helium-proband-reads-2.fq
gzip helium-{mother,father,proband}-reads-{1,2}.fq
wgsim -e 0.005 -r 0.0 -d 450 -s 50 -N 3750000 -1 100 -2 100 -S 1890702223848595625 neon-mother.fasta.gz neon-mother-reads-1.fq neon-mother-reads-2.fq
wgsim -e 0.005 -r 0.0 -d 450 -s 50 -N 3750000 -1 100 -2 100 -S 586287033698423193 neon-father.fasta.gz neon-father-reads-1.fq neon-father-reads-2.fq
wgsim -e 0.005 -r 0.0 -d 450 -s 50 -N 3750000 -1 100 -2 100 -S 1728372192399379054 neon-proband.fasta.gz neon-proband-reads-1.fq neon-proband-reads-2.fq
gzip neon-{mother,father,proband}-reads-{1,2}.fq
wgsim -e 0.005 -r 0.0 -d 450 -s 50 -N 3750000 -1 100 -2 100 -S 4291835990902352011 argon-mother.fasta.gz argon-mother-reads-1.fq argon-mother-reads-2.fq
wgsim -e 0.005 -r 0.0 -d 450 -s 50 -N 3750000 -1 100 -2 100 -S 7738774760351418614 argon-father.fasta.gz argon-father-reads-1.fq argon-father-reads-2.fq
wgsim -e 0.005 -r 0.0 -d 450 -s 50 -N 3750000 -1 100 -2 100 -S 8286444275301796832 argon-proband.fasta.gz argon-proband-reads-1.fq argon-proband-reads-2.fq
elapsed="$(($(date +%s)-starttime))"
echo "done! ($elapsed seconds elapsed)"
echo -n "Apply error correction to all reads"
starttime="$(date +%s)"
lighter -r helium-mother-reads-1.fq -r helium-mother-reads-2.fq -r helium-father-reads-1.fq -r helium-father-reads-2.fq -r helium-proband-reads-1.fq -r helium-proband-reads-2.fq -K 25 2500000 -t 2 -saveTrustedKmers helium-trustedkmers-bloomfilter.lighter
for sample in mother father proband
do
lighter -r helium-${sample}-reads-1.fq -r helium-${sample}-reads-2.fq -K 25 2500000 -t 2 -loadTrustedKmers helium-trustedkmers-bloomfilter.lighter
done
lighter -r neon-mother-reads-1.fq -r neon-mother-reads-2.fq -r neon-father-reads-1.fq -r neon-father-reads-2.fq -r neon-proband-reads-1.fq -r neon-proband-reads-2.fq -K 25 2500000 -t 2 -saveTrustedKmers neon-trustedkmers-bloomfilter.lighter
for sample in mother father proband
do
lighter -r neon-${sample}-reads-1.fq -r neon-${sample}-reads-2.fq -K 25 25000000 -t 2 -loadTrustedKmers neon-trustedkmers-bloomfilter.lighter
done
lighter -r argon-mother-reads-1.fq -r argon-mother-reads-2.fq -r argon-father-reads-1.fq -r argon-father-reads-2.fq -r argon-proband-reads-1.fq -r argon-proband-reads-2.fq -K 25 2500000 -t 2 -saveTrustedKmers argon-trustedkmers-bloomfilter.lighter
for sample in mother father proband
do
lighter -r argon-${sample}-reads-1.fq -r argon-${sample}-reads-2.fq -K 25 250000000 -t 2 -loadTrustedKmers argon-trustedkmers-bloomfilter.lighter
done
elapsed="$(($(date +%s)-starttime))"
echo "done! ($elapsed seconds elapsed)"
echo -n "Interleave corrected and uncorrected reads"
starttime="$(date +%s)"
for trio in helium neon argon
do
for sample in mother father proband
paste <(paste - - - - < ${trio}-${sample}-reads-1.fq) <(paste - - - - < ${trio}-${sample}-reads-2.fq) | tr '\t' '\n' | gzip -c > ${trio}-${sample}-reads.fq.gz
paste <(paste - - - - < ${trio}-${sample}-reads-1.cor.fq) <(paste - - - - < ${trio}-${sample}-reads-2.cor.fq) | tr '\t' '\n' | gzip -c > ${trio}-${sample}-reads-cor.fq.gz
done
done
elapsed="$(($(date +%s)-starttime))"
echo "done! ($elapsed seconds elapsed)"
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment