Last active
February 4, 2021 23:45
-
-
Save Jayesh-Kumar-Sundaram/f5016f63ad3ad87686ee11d23c96b0db to your computer and use it in GitHub Desktop.
This script takes in the list of SRR terms and download the .fastq.gz files from ENA database
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/bin/bash | |
### Usage ### | |
# This script takes in the list of SRR terms and download the .fastq.gz files from ENA database | |
### Comments ### | |
# Create a file named "SRRs_to_process.txt" in the same directory as the script file and this file should have a list of single SRR ID as entries in every line. | |
# Fastq files will be downloaded in the same directory where the script is located. Script also generates an output file named "Number_of_reads_in_fastq_files.txt" which has information on the status of download of the fastq files and number of reads in the fastq file downloaded. | |
# Note: This script was tested in both Mac OS and linux OS and it requires "curl". | |
ena_fastq_dir="ftp://ftp.sra.ebi.ac.uk/vol1/fastq" | |
if [[ ! -e ./Number_of_reads_in_fastq_files.txt ]]; then | |
touch ./Number_of_reads_in_fastq_files.txt | |
fi | |
while IFS= read -r line | |
do | |
IFS=' ' read -a SRR <<< "$line" | |
length=${#SRR} | |
if [ "$length" == "9" ] | |
then | |
if curl ${ena_fastq_dir}/${SRR:0:6}/${SRR}/ > temp | |
then echo "Temp file download successful" |& tee -a Number_of_reads_in_fastq_files.txt | |
else echo "Temp file download failed" |& tee -a Number_of_reads_in_fastq_files.txt | |
fi | |
f="temp" | |
num_l=$(zgrep . $f | wc -l) | |
dt=$(date '+%d/%m/%Y %H:%M:%S');echo $dt |& tee -a Number_of_reads_in_fastq_files.txt | |
echo ${SRR} |& tee -a Number_of_reads_in_fastq_files.txt | |
if curl ${ena_fastq_dir}/${SRR:0:6}/${SRR}/${SRR}_1.fastq.gz > ${SRR}_1.fastq.gz | |
then echo "Download successful" |& tee -a Number_of_reads_in_fastq_files.txt | |
else | |
if curl ${ena_fastq_dir}/${SRR:0:6}/${SRR}/${SRR}.fastq.gz > ${SRR}_1.fastq.gz | |
then echo "Download successful" |& tee -a Number_of_reads_in_fastq_files.txt | |
else echo "Download failed" |& tee -a Number_of_reads_in_fastq_files.txt | |
fi | |
fi | |
f=${SRR}_1.fastq.gz | |
num_r=$(zgrep . $f | wc -l) | |
echo "Fastq file: $f; Number of lines: $num_r; Number of reads: $(echo "scale=2; $num_r/4" | bc)" |& tee -a Number_of_reads_in_fastq_files.txt | |
if [ "$num_l" == "2" ] | |
then | |
dt=$(date '+%d/%m/%Y %H:%M:%S');echo $dt |& tee -a Number_of_reads_in_fastq_files.txt | |
echo ${SRR} |& tee -a Number_of_reads_in_fastq_files.txt | |
if curl ${ena_fastq_dir}/${SRR:0:6}/${SRR}/${SRR}_2.fastq.gz > ${SRR}_2.fastq.gz | |
then echo "Download successful" |& tee -a Number_of_reads_in_fastq_files.txt | |
else echo "Download failed" |& tee -a Number_of_reads_in_fastq_files.txt | |
fi | |
f=${SRR}_2.fastq.gz | |
num_r=$(zgrep . $f | wc -l) | |
echo "Fastq file: $f; Number of lines: $num_r; Number of reads: $(echo "scale=2; $num_r/4" | bc)" |& tee -a Number_of_reads_in_fastq_files.txt | |
fi | |
fi | |
if [ "$length" == "10" ] | |
then | |
if curl ${ena_fastq_dir}/${SRR:0:6}/00${SRR:9:10}/${SRR}/ > temp | |
then echo "Temp file download successful" |& tee -a Number_of_reads_in_fastq_files.txt | |
else echo "Download failed" |& tee -a Number_of_reads_in_fastq_files.txt | |
fi | |
f="temp" | |
num_l=$(zgrep . $f | wc -l) | |
dt=$(date '+%d/%m/%Y %H:%M:%S');echo $dt |& tee -a Number_of_reads_in_fastq_files.txt | |
echo ${SRR} |& tee -a Number_of_reads_in_fastq_files.txt | |
if curl ${ena_fastq_dir}/${SRR:0:6}/00${SRR:9:10}/${SRR}/${SRR}_1.fastq.gz > ${SRR}_1.fastq.gz | |
then echo "Download successful" |& tee -a Number_of_reads_in_fastq_files.txt | |
else | |
if curl ${ena_fastq_dir}/${SRR:0:6}/00${SRR:9:10}/${SRR}/${SRR}.fastq.gz > ${SRR}_1.fastq.gz | |
then echo "Download successful" |& tee -a Number_of_reads_in_fastq_files.txt | |
else echo "Download failed" |& tee -a Number_of_reads_in_fastq_files.txt | |
fi | |
fi | |
f=${SRR}_1.fastq.gz | |
num_r=$(zgrep . $f | wc -l) | |
echo "Fastq file: $f; Number of lines: $num_r; Number of reads: $(echo "scale=2; $num_r/4" | bc)" |& tee -a Number_of_reads_in_fastq_files.txt | |
if [ "$num_l" == "2" ] | |
then | |
dt=$(date '+%d/%m/%Y %H:%M:%S');echo $dt |& tee -a Number_of_reads_in_fastq_files.txt | |
echo ${SRR} |& tee -a Number_of_reads_in_fastq_files.txt | |
if curl ${ena_fastq_dir}/${SRR:0:6}/00${SRR:9:10}/${SRR}/${SRR}_2.fastq.gz > ${SRR}_2.fastq.gz | |
then echo "Download successful" |& tee -a Number_of_reads_in_fastq_files.txt | |
else echo "Download failed" |& tee -a Number_of_reads_in_fastq_files.txt | |
fi | |
f=${SRR}_2.fastq.gz | |
num_r=$(zgrep . $f | wc -l) | |
echo "Fastq file: $f; Number of lines: $num_r; Number of reads: $(echo "scale=2; $num_r/4" | bc)" |& tee -a Number_of_reads_in_fastq_files.txt | |
fi | |
fi | |
if [ "$length" == "11" ] | |
then | |
if curl ${ena_fastq_dir}/${SRR:0:6}/0${SRR:9:11}/${SRR}/ > temp | |
then echo "Temp file download successful" |& tee -a Number_of_reads_in_fastq_files.txt | |
else echo "Temp file download failed" |& tee -a Number_of_reads_in_fastq_files.txt | |
fi | |
f="temp" | |
num_l=$(zgrep . $f | wc -l) | |
dt=$(date '+%d/%m/%Y %H:%M:%S');echo $dt |& tee -a Number_of_reads_in_fastq_files.txt | |
echo ${SRR} |& tee -a Number_of_reads_in_fastq_files.txt | |
if curl ${ena_fastq_dir}/${SRR:0:6}/0${SRR:9:11}/${SRR}/${SRR}_1.fastq.gz > ${SRR}_1.fastq.gz | |
then echo "Download successful" |& tee -a Number_of_reads_in_fastq_files.txt | |
else | |
if curl ${ena_fastq_dir}/${SRR:0:6}/0${SRR:9:11}/${SRR}/${SRR}.fastq.gz > ${SRR}_1.fastq.gz | |
then echo "Download successful" |& tee -a Number_of_reads_in_fastq_files.txt | |
else echo "Download failed" |& tee -a Number_of_reads_in_fastq_files.txt | |
fi | |
fi | |
f=${SRR}_1.fastq.gz | |
num_r=$(zgrep . $f | wc -l) | |
echo "Fastq file: $f; Number of lines: $num_r; Number of reads: $(echo "scale=2; $num_r/4" | bc)" |& tee -a Number_of_reads_in_fastq_files.txt | |
if [ "$num_l" == "2" ] | |
then | |
dt=$(date '+%d/%m/%Y %H:%M:%S');echo $dt |& tee -a Number_of_reads_in_fastq_files.txt | |
echo ${SRR} |& tee -a Number_of_reads_in_fastq_files.txt | |
if curl ${ena_fastq_dir}/${SRR:0:6}/0${SRR:9:11}/${SRR}/${SRR}_2.fastq.gz > ${SRR}_2.fastq.gz | |
then echo "Download successful" |& tee -a Number_of_reads_in_fastq_files.txt | |
else echo "Download failed" |& tee -a Number_of_reads_in_fastq_files.txt | |
fi | |
f=${SRR}_2.fastq.gz | |
num_r=$(zgrep . $f | wc -l) | |
echo "Fastq file: $f; Number of lines: $num_r; Number of reads: $(echo "scale=2; $num_r/4" | bc)" |& tee -a Number_of_reads_in_fastq_files.txt | |
fi | |
fi | |
done < "SRRs_to_process.txt" # File containing list of SRRs to download | |
rm -rf temp |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment