Last active
September 25, 2015 15:41
-
-
Save ialbert/37ee252d8e01d6719ccb to your computer and use it in GitHub Desktop.
Ebola Genome Analysis in 1 minute
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/bin/bash | |
# | |
# Cue the music: https://www.youtube.com/watch?v=JtyByefOvgQ | |
# | |
# | |
# Edit (Sept 2015): due to an unreasonable slowness of ncbi the script now requires | |
# that you fetch the runinfo fetch once before starting the script | |
# It should not really be slow but it takes almost 30 secons. | |
# | |
# mkdir -p ~/refs | |
# esearch -db sra -query PRJNA257197 | efetch -format runinfo > ~/refs/runinfo.txt | |
# | |
echo | |
echo '**********************************************************************' | |
echo '** Your mission - should you choose to accept it - is to reproduce **' | |
echo '** the SNP results of the Ebola Science paper in less than a minute **' | |
echo '**********************************************************************' | |
sleep 2 | |
echo | |
echo '*******************************************' | |
echo '** I guess better get to it! Tick! Tock! **' | |
echo '*******************************************' | |
sleep 2 | |
# Set up directories | |
mkdir -p sra bam refs | |
echo | |
echo "*** Accessing the NCBI remote firewall ... " | |
echo | |
# Create the reference genome | |
REF=refs/KJ660346.fa | |
efetch -db nucleotide -id KJ660346 -format fasta > $REF | |
# Fix ids to match annotator. | |
sed -i '' 's/gi|674810549|gb|KJ660346.2|/KJ660346/' $REF | |
bwa index $REF 2> /dev/null | |
samtools faidx $REF | |
echo | |
echo "*** Accessing mainframe for run information ... " | |
echo | |
# | |
#esearch -db sra -query PRJNA257197 | efetch -format runinfo > ~/refs/runinfo.txt | |
# | |
cat ~/refs/runinfo.txt | grep "Apr 14" | cut -f 1 -d ',' | grep SRR | head -15 > sraids.txt | |
echo | |
echo "*** Downloading secrets ... " | |
echo | |
cat sraids.txt | parallel fastq-dump -X 15000 -O sra --split-files {} 1> /dev/null | |
echo | |
echo "*** Decoding secrets ..." | |
echo | |
cat sraids.txt | parallel "bwa mem -R '@RG\tID:{}\tSM:{}\tLB:{}' $REF sra/{}_1.fastq sra/{}_2.fastq 2> /dev/null | samtools view -b - | samtools sort -o - tmp > bam/{}.bam" | |
echo | |
echo -e '*** Uploading Visual Basic to open socket ...' | |
echo | |
ls bam/*.bam | parallel samtools index {} | |
echo | |
echo "*** QUICK! DISARM THE EXPLOSIVES ..." | |
sleep 2 | |
echo | |
printf "*** CUT THE \e[1mRED\e[m WIRE " | |
sleep 2 | |
echo | |
printf "*** THE \e[1mOTHER RED\e[m WIRE" | |
sleep 2 | |
echo | |
echo "*** SNP ***" | |
sleep 2 | |
echo | |
freebayes -p 1 -f $REF bam/*.bam > results.vcf | |
echo | |
echo "*** Success ..." | |
echo | |
java -jar ~/lib/snpEff/snpEff.jar -v ebola_zaire results.vcf > ANNOTATED_VARIANTS.vcf | |
echo | |
echo "*** WARNING! The data will self destruct in one minute! ***" | |
echo | |
sleep 2 |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment