Skip to content

Instantly share code, notes, and snippets.

@michael-ford
Last active January 6, 2019 22:59
Show Gist options
  • Save michael-ford/2be94d9780c95a02f6c161f97c2b2330 to your computer and use it in GitHub Desktop.
Save michael-ford/2be94d9780c95a02f6c161f97c2b2330 to your computer and use it in GitHub Desktop.
Fetch Reads from EBI-ENA via Aspera and Map to Reference
#!/bin/bash
# This program is free software: you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
# You should have received a copy of the GNU General Public License
# along with this program. If not, see <https://www.gnu.org/licenses/>.
set -e
set -u
set -o pipefail
if [ "$#" -lt 4 ]
then
echo "Fetch reads from EBI and map with Minimap"
echo "usage: fetch_reads_and_map.sh [accession_list.txt] [read_save_dir_path] [mapping_save_dir_path] [mapping_reference] <threads[6]>"
exit 1
fi
# Set env variables here:
$ASPERA="~/.aspera"
export $MINIMAP="minimap"
$ACCESSION_SUFFIX='_subreads.fastq.gz' # this can be changed incase the read datatype you are downloading changes the suffix
# Get parameters
accessions=$1
read_dir=$2
export mapping_dir=$3
export reference=$4
threads=6
if [ "$#" -lt 5 ]; then threads=$5; fi
#Download fastq files
mkdir -p $read_dir
while read line; do
st=${line:0:6}
end=${line:9:1}
$ASPERA/connect/bin/ascp -QT -l 300m -P33001 -i $ASPERA/connect/etc/asperaweb_id_dsa.openssh era-fasp@fasp.sra.ebi.ac.uk:/vol1/fastq/$st/00$end/$line/${line}${ACCESSION_SUFFIX} $read_dir
done < $accessions
#Map reads against reference
map (){
echo $1
output_path="$mapping_dir/$(basname $1 | tr \. \\t | cut -f1)";
$MINIMAP -ax map-pb -t 1 $reference $1 | samtools view -b -F4 | samtools sort -m 2G - > $output_path.bam;
}
export -f map
mkdir -p $mapping_dir
find $read_dir -name '*fastq*' | \
parallel -j $threads map &&
samtools merge "$mapping_dir.bam" $mapping_dir/*bam
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment