Created
December 28, 2017 23:19
-
-
Save standage/934fa454cebcaf4a96debccbbf6527b6 to your computer and use it in GitHub Desktop.
This has been replaced by a Snakemake workflow, but thought I should save it for posterity.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/env bash | |
set -eo pipefail | |
echo -n "Simulate random genome sequences..." | |
starttime="$(date +%s)" | |
nuclmm simulate --out helium-refr.fa --order 6 --numseqs 1 --seqlen 2500000 --seed 8013785666 human.order6.mm | |
nuclmm simulate --out neon-refr.fa --order 6 --numseqs 1 --seqlen 25000000 --seed 7066509186 human.order6.mm | |
nuclmm simulate --out argon-refr.fa --order 6 --numseqs 1 --seqlen 250000000 --seed 3015700578 human.order6.mm | |
gzip -f {helium,neon,argon}-refr.fa | |
elapsed="$(($(date +%s)-starttime))" | |
echo "done! ($elapsed seconds elapsed)" | |
echo -n "Simulate trios w. shared (inherited) and unique (de novo) variants..." | |
starttime="$(date +%s)" | |
kevlar gentrio --inherited 100 --de-novo 5 --vcf helium.vcf --prefix helium --seed 1776 helium-refr.fa.gz | |
kevlar gentrio --inherited 100 --de-novo 10 --vcf neon.vcf --prefix neon --seed 1812 neon-refr.fa.gz | |
kevlar gentrio --inherited 100 --de-novo 12 --vcf argon.vcf --prefix argon --seed 1944 argon-refr.fa.gz | |
gzip {helium,neon,argon}-{mother,father,proband}.fasta | |
elapsed="$(($(date +%s)-starttime))" | |
echo "done! ($elapsed seconds elapsed)" | |
echo -n "Simulate 2x100bp paired-end Illumina sequencing of each individual to 30x coverage..." | |
starttime="$(date +%s)" | |
wgsim -e 0.005 -r 0.0 -d 450 -s 50 -N 375000 -1 100 -2 100 -S 2053695854357871005 helium-mother.fasta.gz helium-mother-reads-1.fq helium-mother-reads-2.fq | |
wgsim -e 0.005 -r 0.0 -d 450 -s 50 -N 375000 -1 100 -2 100 -S 4517457392071889495 helium-father.fasta.gz helium-father-reads-1.fq helium-father-reads-2.fq | |
wgsim -e 0.005 -r 0.0 -d 450 -s 50 -N 375000 -1 100 -2 100 -S 2574020394472462046 helium-proband.fasta.gz helium-proband-reads-1.fq helium-proband-reads-2.fq | |
gzip helium-{mother,father,proband}-reads-{1,2}.fq | |
wgsim -e 0.005 -r 0.0 -d 450 -s 50 -N 3750000 -1 100 -2 100 -S 1890702223848595625 neon-mother.fasta.gz neon-mother-reads-1.fq neon-mother-reads-2.fq | |
wgsim -e 0.005 -r 0.0 -d 450 -s 50 -N 3750000 -1 100 -2 100 -S 586287033698423193 neon-father.fasta.gz neon-father-reads-1.fq neon-father-reads-2.fq | |
wgsim -e 0.005 -r 0.0 -d 450 -s 50 -N 3750000 -1 100 -2 100 -S 1728372192399379054 neon-proband.fasta.gz neon-proband-reads-1.fq neon-proband-reads-2.fq | |
gzip neon-{mother,father,proband}-reads-{1,2}.fq | |
wgsim -e 0.005 -r 0.0 -d 450 -s 50 -N 3750000 -1 100 -2 100 -S 4291835990902352011 argon-mother.fasta.gz argon-mother-reads-1.fq argon-mother-reads-2.fq | |
wgsim -e 0.005 -r 0.0 -d 450 -s 50 -N 3750000 -1 100 -2 100 -S 7738774760351418614 argon-father.fasta.gz argon-father-reads-1.fq argon-father-reads-2.fq | |
wgsim -e 0.005 -r 0.0 -d 450 -s 50 -N 3750000 -1 100 -2 100 -S 8286444275301796832 argon-proband.fasta.gz argon-proband-reads-1.fq argon-proband-reads-2.fq | |
elapsed="$(($(date +%s)-starttime))" | |
echo "done! ($elapsed seconds elapsed)" | |
echo -n "Apply error correction to all reads" | |
starttime="$(date +%s)" | |
lighter -r helium-mother-reads-1.fq -r helium-mother-reads-2.fq -r helium-father-reads-1.fq -r helium-father-reads-2.fq -r helium-proband-reads-1.fq -r helium-proband-reads-2.fq -K 25 2500000 -t 2 -saveTrustedKmers helium-trustedkmers-bloomfilter.lighter | |
for sample in mother father proband | |
do | |
lighter -r helium-${sample}-reads-1.fq -r helium-${sample}-reads-2.fq -K 25 2500000 -t 2 -loadTrustedKmers helium-trustedkmers-bloomfilter.lighter | |
done | |
lighter -r neon-mother-reads-1.fq -r neon-mother-reads-2.fq -r neon-father-reads-1.fq -r neon-father-reads-2.fq -r neon-proband-reads-1.fq -r neon-proband-reads-2.fq -K 25 2500000 -t 2 -saveTrustedKmers neon-trustedkmers-bloomfilter.lighter | |
for sample in mother father proband | |
do | |
lighter -r neon-${sample}-reads-1.fq -r neon-${sample}-reads-2.fq -K 25 25000000 -t 2 -loadTrustedKmers neon-trustedkmers-bloomfilter.lighter | |
done | |
lighter -r argon-mother-reads-1.fq -r argon-mother-reads-2.fq -r argon-father-reads-1.fq -r argon-father-reads-2.fq -r argon-proband-reads-1.fq -r argon-proband-reads-2.fq -K 25 2500000 -t 2 -saveTrustedKmers argon-trustedkmers-bloomfilter.lighter | |
for sample in mother father proband | |
do | |
lighter -r argon-${sample}-reads-1.fq -r argon-${sample}-reads-2.fq -K 25 250000000 -t 2 -loadTrustedKmers argon-trustedkmers-bloomfilter.lighter | |
done | |
elapsed="$(($(date +%s)-starttime))" | |
echo "done! ($elapsed seconds elapsed)" | |
echo -n "Interleave corrected and uncorrected reads" | |
starttime="$(date +%s)" | |
for trio in helium neon argon | |
do | |
for sample in mother father proband | |
paste <(paste - - - - < ${trio}-${sample}-reads-1.fq) <(paste - - - - < ${trio}-${sample}-reads-2.fq) | tr '\t' '\n' | gzip -c > ${trio}-${sample}-reads.fq.gz | |
paste <(paste - - - - < ${trio}-${sample}-reads-1.cor.fq) <(paste - - - - < ${trio}-${sample}-reads-2.cor.fq) | tr '\t' '\n' | gzip -c > ${trio}-${sample}-reads-cor.fq.gz | |
done | |
done | |
elapsed="$(($(date +%s)-starttime))" | |
echo "done! ($elapsed seconds elapsed)" |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment