Skip to content

Instantly share code, notes, and snippets.

@tkrahn
Last active September 14, 2019 20:36
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save tkrahn/3034da009271287695364786478a5bb8 to your computer and use it in GitHub Desktop.
Save tkrahn/3034da009271287695364786478a5bb8 to your computer and use it in GitHub Desktop.
Nanopore De Novo Assembly Pipeline (Experimental)
#!/bin/bash
START=$(date +%s.%N)
clear
# setup parameters
YSEQID=${PWD##*/}
# YSEQID="1234" # (the above command simply gets the name of the last segment of the current working directory)
NUM_THREADS=$(getconf _NPROCESSORS_ONLN)
echo "We can use ${NUM_THREADS} threads."
READS_1="fastq/${YSEQID}_R1.fastq.gz"
READS_2="fastq/${YSEQID}_R2.fastq.gz"
READS_NANOPORE="fastq/${YSEQID}_nanopore.fastq.gz"
REF="/genomes/0/refseq/hg38/hg38.fa"
# Pipeline commands:
wtdbg2 -x ont -g3g -t31 -t ${NUM_THREADS} -i ${READS_NANOPORE} -fo ${YSEQID}_nanopore_wtbg2
wtpoa-cns -t ${NUM_THREADS} -i ${YSEQID}_nanopore_wtbg2.ctg.lay.gz -fo ${YSEQID}_nanopore_wtbg2.ctg.fa
# polish consensus
minimap2 -t ${NUM_THREADS} -ax map-ont -r2k ${YSEQID}_nanopore_wtbg2.ctg.fa ${READS_NANOPORE} | \
samtools sort -@${NUM_THREADS} >${YSEQID}_nanopore_wtbg2.bam
samtools view -F0x900 ${YSEQID}_nanopore_wtbg2.bam | \
wtpoa-cns -t ${NUM_THREADS} -d ${YSEQID}_nanopore_wtbg2.ctg.fa -i - -fo ${YSEQID}_nanopore_wtbg2.cns.fa
bwa index ${YSEQID}_nanopore_wtbg2.cns.fa
# Addtional polishment using short reads
bwa mem -t ${NUM_THREADS} ${YSEQID}_nanopore_wtbg2.cns.fa ${READS_1} ${READS_2} | \
samtools sort -O SAM | wtpoa-cns -t ${NUM_THREADS} -x sam-sr -d ${YSEQID}_nanopore_wtbg2.cns.fa -i - -fo ${YSEQID}_nanopore_wtbg2.srp.fa
# Delete no longer needed large files
#rm -f $FILE
# map sr polished fasta sequences to hg38
minimap2 -t ${NUM_THREADS} -ax asm5 --cs ${REF} ${YSEQID}_nanopore_wtbg2.srp.fa | \
samtools view -@ $NUM_THREADS -b -t ${REF} -o ${YSEQID}_nanopore_wtbg2.srp.bam -
samtools sort -@ $NUM_THREADS -T /usr/local/geospiza/var/tmp/sorted -o ${YSEQID}_nanopore_wtbg2.srp.sorted.bam ${YSEQID}_nanopore_wtbg2.srp.bam
samtools index ${YSEQID}_nanopore_wtbg2.srp.sorted.bam
samtools idxstats ${YSEQID}_nanopore_wtbg2.srp.sorted.bam > ${YSEQID}_nanopore_wtbg2.srp.sorted.bam.idxstats
# Extract chrY
samtools view -@ $NUM_THREADS -b -o "${YSEQID}_nanopore_wtbg2.chrY.bam" ${YSEQID}_nanopore_wtbg2.srp.sorted.bam chrY
samtools index ${YSEQID}_nanopore_wtbg2.chrY.bam
# Extract chrY fasta files from chrY bam
samtools bam2fq -@ ${NUM_THREADS} ${YSEQID}_nanopore_wtbg2.chrY.bam > ${YSEQID}_chrY_contigs.fastq
END=$(date +%s.%N)
DIFF=$(echo "$END - $START" | bc)
echo ${DIFF}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment