Skip to content

Instantly share code, notes, and snippets.

@kdiverson
Last active August 29, 2015 14:20
Show Gist options
  • Save kdiverson/1cbc5ecfba170f5b4666 to your computer and use it in GitHub Desktop.
Save kdiverson/1cbc5ecfba170f5b4666 to your computer and use it in GitHub Desktop.
run LMAT
hit_total=`awk '{total += $3} END {print total}' *fastsummary.species`
nohit_total=`awk '{total += $2} END {print total}' *nomatchsum`
total=(($hit_total + $nohit_total))
human=`awk '{if ($4==9606) {print $3} }' *fastsummary.species`
echo (($human / $total))
hit_total=`awk '{total += $3} END {print total}' *fastsummary.species`; nohit_total=`awk '{total += $2} END {print total}' *nomatchsum`; total=(($hit_total + $nohit_total)); human=`awk '{if ($4==9606) {print $3} }' *fastsummary.species`; echo (($human / $total))
#!/bin/bash
export LMAT_DIR=/mnt/EXT/Schloss-data/kiverson/LMAT-1.2.6/runtime_inputs
LMAT_BIN=/mnt/EXT/Schloss-data/kiverson/LMAT-1.2.6/bin
SEQTK=/mnt/EXT/Schloss-data/kiverson/seqtk/seqtk
LMAT_DB=/mnt/EXT/Schloss-data/kiverson/LMAT-1.2.6/bin/kML+Human.v4-14.20.g10.db
OUT_DIR=tmp
SINGLE_BASE=A7221349_6_TAAGGCGA-TAGATCGC_L003
R1=A7221349_6_TAAGGCGA-TAGATCGC_L003_R1_001.fastq
R2=A7221349_6_TAAGGCGA-TAGATCGC_L003_R2_001.fastq
BASE=A7221349_6_TAAGGCGA-TAGATCGC_L003_001
#####
run_lmat() {
$LMAT_BIN/merge_fastq_reads_with_N_separator.pl <(zcat $R1) <(zcat $R2) | $SEQTK seq -A -q 10 -n N - $BASE.q10mask.fasta
$LMAT_BIN/run_rl.sh --db_file=$LMAT_DB --query_file=$BASE.q10mask.fasta --odir=$OUT_DIR --threads=8
rm $BASE.q10mask.fasta
rm *.out
}
#$LMAT_BIN/merge_fastq_reads_with_N_separator.pl $R1 $R2 $BASE.fastq
#$SEQTK seq -A -q 10 -n N $BASE.fastq > $BASE.q10mask.fasta
#above comands combined here
workingdir=`pwd`
sampledir=${workingdir##/*}
sample=${sampledir#*_}
count=`ls *R1*.gz | wc -l`
for ((k=1; k<=$count; k++))
do
R1=*R1_00${k}.fastq.gz
R2=*R2_00${k}.fastq.gz
BASE=${sample}_00${k}
$OUT_DIR=$BASE_out
quicksubmit """
$LMAT_BIN/merge_fastq_reads_with_N_separator.pl <(zcat $R1) <(zcat $R2) | $SEQTK seq -A -q 10 -n N - $BASE.q10mask.fasta &&\
$LMAT_BIN/run_rl.sh --db_file=$LMAT_DB --query_file=$BASE.q10mask.fasta --odir=$OUT_DIR --threads=8 &&\
rm $BASE.q10mask.fasta &&\
rm *.out"""
done
for i in {1..8}
do
R1=A7221366_6_CGTACTAG-TATCCTCT_L002_R1_00$i.fastq.gz
R2=A7221366_6_CGTACTAG-TATCCTCT_L002_R2_00$i.fastq.gz
BASE=A7221366_6_CGTACTAG-TATCCTCT_L002_00$i
quicksubmit """
$LMAT_BIN/merge_fastq_reads_with_N_separator.pl <(zcat $R1) <(zcat $R2) | $SEQTK seq -A -q 10 -n N - $BASE.q10mask.fasta &&\
$LMAT_BIN/run_rl.sh --db_file=$LMAT_DB --query_file=$BASE.q10mask.fasta --odir=$OUT_DIR --threads=8 &&\
rm $BASE.q10mask.fasta &&\
rm *.out"""
run KML
/mnt/EXT/Schloss-data/kiverson/LMAT-1.2.6/bin/merge_fastq_reads_with_N_separator.pl A7221349_6_TAAGGCGA-TAGATCGC_L003_R1_001.fastq A7221349_6_TAAGGCGA-TAGATCGC_L003_R2_001.fastq A7221349_6_TAAGGCGA-TAGATCGC_L003_001.fastq
/mnt/EXT/Schloss-data/kiverson/seqtk/seqtk seq -A -q 10 -n N A7221349_6_TAAGGCGA-TAGATCGC_L003_001.fastq > A7221349_6_TAAGGCGA-TAGATCGC_L003_001.q10mask.fasta
export LMAT_DIR=/mnt/EXT/Schloss-data/kiverson/LMAT-1.2.6/runtime_inputs
/mnt/EXT/Schloss-data/kiverson/LMAT-1.2.6/bin/run_rl.sh --db_file=/mnt/EXT/Schloss-data/kiverson/LMAT-1.2.6/bin/kML+Human.v4-14.20.g10.db --query_file=A7221349_6_TAAGGCGA-TAGATCGC_L003_001.q10mask.fasta --odir=tmp --threads=8
file structure:
EPAN12-0099_Batch_[1-6]/Sample_${var}/${var}_${seq}_L00[num]_R[1-2]_00[num].fastq.gz
=====
/mnt/EXT/Schloss-data/kiverson/LMAT-1.2.6/bin/merge_fastq_reads_with_N_separator.pl <(zcat A7221349_6_TAAGGCGA-TAGATCGC_L003_R1_002.fastq.gz) <(zcat A7221349_6_TAAGGCGA-TAGATCGC_L003_R2_002.fastq.gz) A7221349_6_TAAGGCGA-TAGATCGC_L003_002.fastq
cat A7221349_6_TAAGGCGA-TAGATCGC_L003_002.fastq | /mnt/EXT/Schloss-data/kiverson/seqtk/seqtk seq -A -q 10 -n N - > A7221349_6_TAAGGCGA-TAGATCGC_L003_002.q10mask.fasta
=====
/mnt/EXT/Schloss-data/kiverson/LMAT-1.2.6/bin/merge_fastq_reads_with_N_separator.pl <(zcat A7221349_6_TAAGGCGA-TAGATCGC_L003_R1_003.fastq.gz) <(zcat A7221349_6_TAAGGCGA-TAGATCGC_L003_R2_003.fastq.gz)
A7221349_6_TAAGGCGA-TAGATCGC_L003_003.fastq
====
/mnt/EXT/Schloss-data/kiverson/LMAT-1.2.6/bin/merge_fastq_reads_with_N_separator.pl <(zcat A7221349_6_TAAGGCGA-TAGATCGC_L003_R1_002.fastq.gz) <(zcat A7221349_6_TAAGGCGA-TAGATCGC_L003_R2_002.fastq.gz) | /mnt/EXT/Schloss-data/kiverson/seqtk/seqtk seq -A -q 10 -n N - > A7221349_6_TAAGGCGA-TAGATCGC_L003_002.q10mask.fasta
#===
/mnt/EXT/Schloss-data/kiverson/LMAT-1.2.6/bin/merge_fastq_reads_with_N_separator.pl <(zcat A7221366_6_CGTACTAG-TATCCTCT_L002_R1_001.fastq.gz) <(zcat A7221366_6_CGTACTAG-TATCCTCT_L002_R2_001.fastq.gz) | /mnt/EXT/Schloss-data/kiverson/seqtk/seqtk seq -A -q 10 -n N - > A7221366_6_CGTACTAG-TATCCTCT_L002_001.q10mask.fasta
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment