Skip to content

Instantly share code, notes, and snippets.

@Jayesh-Kumar-Sundaram
Last active February 4, 2021 23:45
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save Jayesh-Kumar-Sundaram/f5016f63ad3ad87686ee11d23c96b0db to your computer and use it in GitHub Desktop.
Save Jayesh-Kumar-Sundaram/f5016f63ad3ad87686ee11d23c96b0db to your computer and use it in GitHub Desktop.
This script takes in the list of SRR terms and download the .fastq.gz files from ENA database
#!/bin/bash
### Usage ###
# This script takes in the list of SRR terms and download the .fastq.gz files from ENA database
### Comments ###
# Create a file named "SRRs_to_process.txt" in the same directory as the script file and this file should have a list of single SRR ID as entries in every line.
# Fastq files will be downloaded in the same directory where the script is located. Script also generates an output file named "Number_of_reads_in_fastq_files.txt" which has information on the status of download of the fastq files and number of reads in the fastq file downloaded.
# Note: This script was tested in both Mac OS and linux OS and it requires "curl".
ena_fastq_dir="ftp://ftp.sra.ebi.ac.uk/vol1/fastq"
if [[ ! -e ./Number_of_reads_in_fastq_files.txt ]]; then
touch ./Number_of_reads_in_fastq_files.txt
fi
while IFS= read -r line
do
IFS=' ' read -a SRR <<< "$line"
length=${#SRR}
if [ "$length" == "9" ]
then
if curl ${ena_fastq_dir}/${SRR:0:6}/${SRR}/ > temp
then echo "Temp file download successful" |& tee -a Number_of_reads_in_fastq_files.txt
else echo "Temp file download failed" |& tee -a Number_of_reads_in_fastq_files.txt
fi
f="temp"
num_l=$(zgrep . $f | wc -l)
dt=$(date '+%d/%m/%Y %H:%M:%S');echo $dt |& tee -a Number_of_reads_in_fastq_files.txt
echo ${SRR} |& tee -a Number_of_reads_in_fastq_files.txt
if curl ${ena_fastq_dir}/${SRR:0:6}/${SRR}/${SRR}_1.fastq.gz > ${SRR}_1.fastq.gz
then echo "Download successful" |& tee -a Number_of_reads_in_fastq_files.txt
else
if curl ${ena_fastq_dir}/${SRR:0:6}/${SRR}/${SRR}.fastq.gz > ${SRR}_1.fastq.gz
then echo "Download successful" |& tee -a Number_of_reads_in_fastq_files.txt
else echo "Download failed" |& tee -a Number_of_reads_in_fastq_files.txt
fi
fi
f=${SRR}_1.fastq.gz
num_r=$(zgrep . $f | wc -l)
echo "Fastq file: $f; Number of lines: $num_r; Number of reads: $(echo "scale=2; $num_r/4" | bc)" |& tee -a Number_of_reads_in_fastq_files.txt
if [ "$num_l" == "2" ]
then
dt=$(date '+%d/%m/%Y %H:%M:%S');echo $dt |& tee -a Number_of_reads_in_fastq_files.txt
echo ${SRR} |& tee -a Number_of_reads_in_fastq_files.txt
if curl ${ena_fastq_dir}/${SRR:0:6}/${SRR}/${SRR}_2.fastq.gz > ${SRR}_2.fastq.gz
then echo "Download successful" |& tee -a Number_of_reads_in_fastq_files.txt
else echo "Download failed" |& tee -a Number_of_reads_in_fastq_files.txt
fi
f=${SRR}_2.fastq.gz
num_r=$(zgrep . $f | wc -l)
echo "Fastq file: $f; Number of lines: $num_r; Number of reads: $(echo "scale=2; $num_r/4" | bc)" |& tee -a Number_of_reads_in_fastq_files.txt
fi
fi
if [ "$length" == "10" ]
then
if curl ${ena_fastq_dir}/${SRR:0:6}/00${SRR:9:10}/${SRR}/ > temp
then echo "Temp file download successful" |& tee -a Number_of_reads_in_fastq_files.txt
else echo "Download failed" |& tee -a Number_of_reads_in_fastq_files.txt
fi
f="temp"
num_l=$(zgrep . $f | wc -l)
dt=$(date '+%d/%m/%Y %H:%M:%S');echo $dt |& tee -a Number_of_reads_in_fastq_files.txt
echo ${SRR} |& tee -a Number_of_reads_in_fastq_files.txt
if curl ${ena_fastq_dir}/${SRR:0:6}/00${SRR:9:10}/${SRR}/${SRR}_1.fastq.gz > ${SRR}_1.fastq.gz
then echo "Download successful" |& tee -a Number_of_reads_in_fastq_files.txt
else
if curl ${ena_fastq_dir}/${SRR:0:6}/00${SRR:9:10}/${SRR}/${SRR}.fastq.gz > ${SRR}_1.fastq.gz
then echo "Download successful" |& tee -a Number_of_reads_in_fastq_files.txt
else echo "Download failed" |& tee -a Number_of_reads_in_fastq_files.txt
fi
fi
f=${SRR}_1.fastq.gz
num_r=$(zgrep . $f | wc -l)
echo "Fastq file: $f; Number of lines: $num_r; Number of reads: $(echo "scale=2; $num_r/4" | bc)" |& tee -a Number_of_reads_in_fastq_files.txt
if [ "$num_l" == "2" ]
then
dt=$(date '+%d/%m/%Y %H:%M:%S');echo $dt |& tee -a Number_of_reads_in_fastq_files.txt
echo ${SRR} |& tee -a Number_of_reads_in_fastq_files.txt
if curl ${ena_fastq_dir}/${SRR:0:6}/00${SRR:9:10}/${SRR}/${SRR}_2.fastq.gz > ${SRR}_2.fastq.gz
then echo "Download successful" |& tee -a Number_of_reads_in_fastq_files.txt
else echo "Download failed" |& tee -a Number_of_reads_in_fastq_files.txt
fi
f=${SRR}_2.fastq.gz
num_r=$(zgrep . $f | wc -l)
echo "Fastq file: $f; Number of lines: $num_r; Number of reads: $(echo "scale=2; $num_r/4" | bc)" |& tee -a Number_of_reads_in_fastq_files.txt
fi
fi
if [ "$length" == "11" ]
then
if curl ${ena_fastq_dir}/${SRR:0:6}/0${SRR:9:11}/${SRR}/ > temp
then echo "Temp file download successful" |& tee -a Number_of_reads_in_fastq_files.txt
else echo "Temp file download failed" |& tee -a Number_of_reads_in_fastq_files.txt
fi
f="temp"
num_l=$(zgrep . $f | wc -l)
dt=$(date '+%d/%m/%Y %H:%M:%S');echo $dt |& tee -a Number_of_reads_in_fastq_files.txt
echo ${SRR} |& tee -a Number_of_reads_in_fastq_files.txt
if curl ${ena_fastq_dir}/${SRR:0:6}/0${SRR:9:11}/${SRR}/${SRR}_1.fastq.gz > ${SRR}_1.fastq.gz
then echo "Download successful" |& tee -a Number_of_reads_in_fastq_files.txt
else
if curl ${ena_fastq_dir}/${SRR:0:6}/0${SRR:9:11}/${SRR}/${SRR}.fastq.gz > ${SRR}_1.fastq.gz
then echo "Download successful" |& tee -a Number_of_reads_in_fastq_files.txt
else echo "Download failed" |& tee -a Number_of_reads_in_fastq_files.txt
fi
fi
f=${SRR}_1.fastq.gz
num_r=$(zgrep . $f | wc -l)
echo "Fastq file: $f; Number of lines: $num_r; Number of reads: $(echo "scale=2; $num_r/4" | bc)" |& tee -a Number_of_reads_in_fastq_files.txt
if [ "$num_l" == "2" ]
then
dt=$(date '+%d/%m/%Y %H:%M:%S');echo $dt |& tee -a Number_of_reads_in_fastq_files.txt
echo ${SRR} |& tee -a Number_of_reads_in_fastq_files.txt
if curl ${ena_fastq_dir}/${SRR:0:6}/0${SRR:9:11}/${SRR}/${SRR}_2.fastq.gz > ${SRR}_2.fastq.gz
then echo "Download successful" |& tee -a Number_of_reads_in_fastq_files.txt
else echo "Download failed" |& tee -a Number_of_reads_in_fastq_files.txt
fi
f=${SRR}_2.fastq.gz
num_r=$(zgrep . $f | wc -l)
echo "Fastq file: $f; Number of lines: $num_r; Number of reads: $(echo "scale=2; $num_r/4" | bc)" |& tee -a Number_of_reads_in_fastq_files.txt
fi
fi
done < "SRRs_to_process.txt" # File containing list of SRRs to download
rm -rf temp
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment