Skip to content

Instantly share code, notes, and snippets.

@cfriedline
Last active August 2, 2016 17:26
Show Gist options
  • Save cfriedline/556e5cbc114e6741acd697ecfe8a2159 to your computer and use it in GitHub Desktop.
Save cfriedline/556e5cbc114e6741acd697ecfe8a2159 to your computer and use it in GitHub Desktop.
RefMapOpt.sh changes for SE
diff --git a/RefMapOpt.sh.1 b/RefMapOpt.sh
index 47f8f02..ce0e168 100644
--- a/RefMapOpt.sh.1
+++ b/RefMapOpt.sh
@@ -11,9 +11,9 @@ if find ${PATH//:/ } -maxdepth 1 -name trimmomatic*jar 2> /dev/null| grep -q 'tr
echo "The dependency trimmomatic is not installed or is not in your" '$PATH'"."
NUMDEP=$((NUMDEP + 1))
fi
-
-if find ${PATH//:/ } -maxdepth 1 -name TruSeq2-PE.fa 2> /dev/null | grep -q 'Tru' ; then
- ADAPTERS=$(find ${PATH//:/ } -maxdepth 1 -name TruSeq2-PE.fa 2> /dev/null | head -1)
+
+if find ${PATH//:/ } -maxdepth 1 -name TruSeq2-SE.fa 2> /dev/null | grep -q 'Tru' ; then
+ ADAPTERS=$(find ${PATH//:/ } -maxdepth 1 -name TruSeq2-SE.fa 2> /dev/null | head -1)
else
echo "The file listing adapters (included with trimmomatic) is not installed or is not in your" '$PATH'"."
NUMDEP=$((NUMDEP + 1))
@@ -54,7 +54,7 @@ if [ ${NAMES[@]:(-1)}.F.fq.gz -nt ${NAMES[@]:(-1)}.uniq.seqs ];then
for i in "${NAMES[@]}";
do
zcat $i.R.fq.gz | head -2 | tail -1 >> lengths.txt
- done
+ done
MaxLen=$(mawk '{ print length() | "sort -rn" }' lengths.txt| head -1)
LENGTH=$(( $MaxLen / 3))
for i in "${NAMES[@]}"
@@ -63,8 +63,8 @@ if [ ${NAMES[@]:(-1)}.F.fq.gz -nt ${NAMES[@]:(-1)}.uniq.seqs ];then
done
cat namelist | parallel --no-notice -j $NUMProc "mawk '$AWK1' {}.assembled.fastq | mawk '$AWK2' | perl -e '$PERLT' > {}.uniq.seqs"
fi
-
-
+
+
fi
#Create a data file with the number of unique sequences and the number of occurrences
@@ -118,7 +118,7 @@ if [ "$ATYPE" == "PE" ]; then
mv ref.RC.fq ref.R.fq
LENGTH=$(mawk '!/>/' rainbow.fasta | mawk '(NR==1||length<shortest){shortest=length} END {print shortest}')
LENGTH=$(( $LENGTH * 5 / 4))
-
+
pearRM -f ref.F.fq -r ref.R.fq -o overlap -p 0.001 -j $NUMProc -n $LENGTH &>kopt.log
rm ref.F.fq ref.R.fq
@@ -186,8 +186,8 @@ do
rm lengths.txt &> /dev/null
for k in "${RANDNAMES[@]}";
do
- zcat $k.R.fq.gz | head -2 | tail -1 >> lengths.txt
- done
+ zcat $k.F.fq.gz | head -2 | tail -1 >> lengths.txt
+ done
MaxLen=$(mawk '{ print length() | "sort -rn" }' lengths.txt| head -1)
INSERT=$(($MaxLen * 2 ))
INSERTH=$(($INSERT + 100 ))
@@ -198,10 +198,11 @@ do
rm $i.$j.results 2>/dev/null
for k in "${RANDNAMES[@]}"
do
- bwa mem reference.fasta $k.R1.fq.gz $k.R2.fq.gz -L 20,5 -I $INSERT,$SD,$INSERTH,$INSERTL -t 32 -a -M -T 10 -A 1 -B 4 -O 6 -R "@RG\tID:$k\tSM:$k\tPL:Illumina" 2> bwa.$i.log | mawk '!/\t[2-9].[SH].*/' | mawk '!/[2-9].[SH]\t/' | samtools view -@32 -q 1 -SbT reference.fasta - > $k.bam
- samtools sort -@32 $k.bam $k
+ #bwa mem reference.fasta $k.R1.fq.gz $k.R2.fq.gz -L 20,5 -I $INSERT,$SD,$INSERTH,$INSERTL -t 32 -a -M -T 10 -A 1 -B 4 -O 6 -R "@RG\tID:$k\tSM:$k\tPL:Illumina" 2> bwa.$i.log | mawk '!/\t[2-9].[SH].*/' | mawk '!/[2-9].[SH]\t/' | samtools view -@32 -q 1 -SbT reference.fasta - > $k.bam
+ bwa mem reference.fasta $k.R1.fq.gz -L 20,5 -t 32 -a -M -T 10 -A 1 -B 4 -O 6 -R "@RG\tID:$k\tSM:$k\tPL:Illumina" 2> bwa.$i.log | mawk '!/\t[2-9].[SH].*/' | mawk '!/[2-9].[SH]\t/' | samtools view -@32 -q 1 -SbT reference.fasta - > $k.bam
+ samtools sort -@32 $k.bam $k
samtools index $k.bam
- MM=$(samtools flagstat $k.bam | grep -E 'mapped \(|properly' | cut -f1 -d '+' | tr -d '\n')
+ MM=$(samtools flagstat $k.bam | grep -E 'mapped \(' | cut -f1 -d '+' | tr -d '\n')
CM=$(samtools idxstats $k.bam | mawk '$3 > 0' | wc -l)
CC=$(samtools idxstats $k.bam | mawk '{ sum += $3; n++ } END { if (n > 0) print sum / n; }')
DD=$(samtools idxstats $k.bam | mawk '$3 >0' | mawk '{ sum += $3; n++ } END { if (n > 0) print sum / n; }')
@@ -220,5 +221,5 @@ do
echo -e "$CCC\t$DDD\t$CON\t$AC\t$i\t$j\t$SUMM\t$SUMPM\t$AVEM\t$AVEP\t$BBM" >> mapping.results
fi
done
-
+
done
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment