Skip to content

Instantly share code, notes, and snippets.

@naarkhoo
Last active August 29, 2015 14:19
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save naarkhoo/25dc9da1ddc474bec624 to your computer and use it in GitHub Desktop.
Save naarkhoo/25dc9da1ddc474bec624 to your computer and use it in GitHub Desktop.
Pipeline to process V3+V4 16sRNA from MiSeq
# do this on each sample
#usearch -fastq_filter raw.fq -fastqout reads.fq -fastq_trunclen 260 -fastq_truncqual 10 -fastq_maxee 0.5
rawdir="/home/rostam/Projects/16srna/MS96/raw_data/"
maindir="/home/rostam/Projects/16srna/MS96/Mature"
output_dir=$maindir/"R260/pair/intereads/"
#tools
usearch61="/bin/usearch61"
flash="/home/rostam/qiime_software/smash-v2.0/flash/flash"
seqUtils="/home/rostam/qiime_software/smash-v2.0/data/maui/seqUtils"
blastn="/home/rostam/qiime_software/smash-v2.0/data/ncbi-blast+/2.2.26/blastn"
uc2otutab="/home/rostam/qiime_software/usearch-python_script/uc2otutab.py"
clip_primer="/home/rostam/qiime_software/smash-v2.0/bin/clip_primers.pl"
bmp2uparseperl=$maindir/"bmp-uparse-pipeline.pl"
fastaspliter="/home/rostam/qiime_software/smash-v2.0/data/fastq-splitter.pl"
fastanumber="/home/rostam/qiime_software/usearch-python_script/fasta_number.py"
cdhitest="/home/rostam/qiime_software/smash-v2.0/data/cd-hit-v4.6.1-2012-08-27/cd-hit-est"
corrsize=$maindir/"add_size2.pl"
#primers
# from illumina protocol in the document folder
forward_primer=$maindir/"fwd_primer.fa"
reverse_primer=$maindir/"rev_primer.fa"
#outputs
here=${PWD}
progress_process=$output_dir/"process.stat" #tables shows how much read pass the criteria at each step
hq_reads_folder=$output_dir/"hg_reads_folder" # high quality reads after trimming and basepair quality check
clean_reads=$output_dir/"clean_reads" #after flash
OTUFOLDER=$output_dir/"OTU_folder" #basepair quality on top of flash
clippingfolder=$output_dir/"clipping"
taxfolder=$output_dir/"tax"
#parameteres %the following parameteres are not
trunclen1=260
trunclen2=230
truncqual1=10
truncqual2=15
maxee=0.5
if [ -f $OTUFOLDER/otu.stat ]; then
echo "OTU stat file is removed"
rm $OTUFOLDER/otu.stat
fi
: '
# folder for clipping information
if [ ! -d $clippingfolder ]; then
mkdir $clippingfolder
else
rm -rf $clippingfolder/*
fi
# folder for high quality reads
if [ ! -d $hq_reads_folder ]; then
mkdir $hq_reads_folder
else
rm -rf $hq_reads_folder/*
fi
# folder for clean quality reads
if [ ! -d $clean_reads ]; then
mkdir $clean_reads
else
rm -rf $clean_reads/*
fi
# folder for OTUS
if [ ! -d $OTUFOLDER ]; then
mkdir $OTUFOLDER
else
rm -rf $OTUFOLDER/*
fi
# folder for tax
if [ ! -d $taxfolder ]; then
mkdir $taxfolder
else
rm -rf $taxfolder/*
fi
# restart the progress file
if [ -f $progress_process ]; then
echo "progress file is removed"
rm $progress_process
fi
# write the header
echo "sample_ID" "#fwread" "#reverseread" "#contignmbr" "#hqcontig" "#clipforward" "clipreverse" "cleanread400nmb" "cleanread400nmbfa" " " > $progress_process
for fsq in $rawdir*R2*.fastq
do
echo $fsq
# fsq='/home/rostam/Projects/16srna/MS96/raw_data/1165_A_9_run111_GCTACGCTCCGTAAGA_L001_R2_001.fastq'
# declare -a arr
bar=(`echo $fsq | tr '_' ' '`)
sample=${bar[2]}_${bar[3]}
echo $sample
forward_read=$rawdir*$sample\_*R1*
nmbfwread=$(grep "^\+$" $forward_read -c)
echo $nmbfwread
reverse_read=$rawdir*$sample\_*R2*
nmbreverread=$(grep "^\+$" $reverse_read -c)
echo $nmbreverread
$flash -r 300 -f 456 -s 11 $forward_read $reverse_read --output-prefix $sample.pair -d $clean_reads
contigssnmb=$(grep "^\+$" $clean_reads/$sample.pair.extendedFrags.fastq -c)
#expectation values for each read ! usearch works with probability not the quality score, so it converts the quality scores to expectation values and sum them up++
$usearch61 -fastq_filter $clean_reads/$sample.pair.extendedFrags.fastq -fastqout $hq_reads_folder/$sample.pair.hg.fq -fastq_maxee $maxee
hgcontignmbr=$(grep "^\+$" $hq_reads_folder/$sample.pair.hg.fq -c)
cliplogforward=$sample.p.clip.log #high quality basepair contigs
$clip_primer --seq=$hq_reads_folder/$sample.pair.hg.fq --type=fastq --fwdprimer=$forward_primer --cpus=8 > $output_dir/$cliplogforward
clipforwardnmbr=$(grep "^\+$" $hq_reads_folder/$sample.pair.hg.clipped.fq -c)
#reverse the sequence and remove its forward primer (reverse primer would be removed)
$seqUtils -m revcomp -t fastq $hq_reads_folder/$sample.pair.hg.clipped.fq > $clippingfolder/$sample.clipped.rev.fq
cliplogreverse=$sample.p.clip.rev.log #high quality basepair contigs
$clip_primer --seq=$clippingfolder/$sample.clipped.rev.fq --type=fastq --fwdprimer=$reverse_primer --cpus=8 > $output_dir/$cliplogreverse
cliprevnmbr=$(grep "^\+$" $clippingfolder/$sample.clipped.rev.clipped.fq -c)
#reverse the sequence again, as it was in the first place
$seqUtils -m revcomp -t fastq $clippingfolder/$sample.clipped.rev.clipped.fq > $clean_reads/$sample.clean.fq
$seqUtils -m separate -t fastq -s 399 $clean_reads/$sample.clean.fq 1>$clean_reads/$sample.clean.400.fq 2>$clean_reads/$sample.clean.short.fq
cleanread400nmb=$(grep "^\+$" $clean_reads/$sample.clean.400.fq -c)
#(update - we dont need fasta now becaise we are still depends on split library!)we use the following only to co#analysis ! I prefer to not use the split_library function at all!
#$usearch61 -fastq_filter $clean_reads/$sample.clean.400.fq -fastaout $clean_reads/$sample.clean.400.fa -fastq_maxee $maxee
#cleanread400nmbfasta=$(grep ">" $clean_reads/$sample.clean.400.fa -c)
echo $sample $nmbfwread $nmbreverread $contigssnmb $hgcontignmbr $clipforwardnmbr $cliprevnmbr $cleanread400nmb $cleanread400nmbfasta " " >> $progress_process
done
exit
'
# limited to 400
#clean_reads=$output_dir/"clean_reads"
# split library put the size on the reads id and let it work for sortbysize !
#$python ~/qiime_software/qiime-1.8.0-release/bin/split_libraries_fastq.py \
#-i $clean_reads/A_10.clean.400.fq,$clean_reads/A_18.clean.400.fq,$clean_reads/A_25.clean.400.fq,$clean_reads/A_32.clean.400.fq,$clean_reads/A_3.clean.400.fq,$clean_reads/A_47.clean.400.fq,$clean_reads/A_54.clean.400.fq,$clean_reads/A_61.clean.400.fq,$clean_reads/A_69.clean.400.fq,$clean_reads/A_76.clean.400.fq,$clean_reads/A_83.clean.400.fq,$clean_reads/A_90.clean.400.fq,$clean_reads/A_11.clean.400.fq,$clean_reads/A_19.clean.400.fq,$clean_reads/A_26.clean.400.fq,$clean_reads/A_33.clean.400.fq,$clean_reads/A_40.clean.400.fq,$clean_reads/A_48.clean.400.fq,$clean_reads/A_55.clean.400.fq,$clean_reads/A_62.clean.400.fq,$clean_reads/A_6.clean.400.fq,$clean_reads/A_77.clean.400.fq,$clean_reads/A_84.clean.400.fq,$clean_reads/A_91.clean.400.fq,$clean_reads/A_12.clean.400.fq,$clean_reads/A_1.clean.400.fq,$clean_reads/A_27.clean.400.fq,$clean_reads/A_34.clean.400.fq,$clean_reads/A_41.clean.400.fq,$clean_reads/A_49.clean.400.fq,$clean_reads/A_56.clean.400.fq,$clean_reads/A_63.clean.400.fq,$clean_reads/A_70.clean.400.fq,$clean_reads/A_78.clean.400.fq,$clean_reads/A_85.clean.400.fq,$clean_reads/A_92.clean.400.fq,$clean_reads/A_13.clean.400.fq,$clean_reads/A_20.clean.400.fq,$clean_reads/A_28.clean.400.fq,$clean_reads/A_35.clean.400.fq,$clean_reads/A_42.clean.400.fq,$clean_reads/A_4.clean.400.fq,$clean_reads/A_57.clean.400.fq,$clean_reads/A_64.clean.400.fq,$clean_reads/A_71.clean.400.fq,$clean_reads/A_79.clean.400.fq,$clean_reads/A_86.clean.400.fq,$clean_reads/A_93.clean.400.fq,$clean_reads/A_14.clean.400.fq,$clean_reads/A_21.clean.400.fq,$clean_reads/A_29.clean.400.fq,$clean_reads/A_36.clean.400.fq,$clean_reads/A_43.clean.400.fq,$clean_reads/A_50.clean.400.fq,$clean_reads/A_58.clean.400.fq,$clean_reads/A_65.clean.400.fq,$clean_reads/A_72.clean.400.fq,$clean_reads/A_7.clean.400.fq,$clean_reads/A_87.clean.400.fq,$clean_reads/A_94.clean.400.fq,$clean_reads/A_15.clean.400.fq,$clean_reads/A_22.clean.400.fq,$clean_reads/A_2.clean.400.fq,$clean_reads/A_37.clean.400.fq,$clean_reads/A_44.clean.400.fq,$clean_reads/A_51.clean.400.fq,$clean_reads/A_59.clean.400.fq,$clean_reads/A_66.clean.400.fq,$clean_reads/A_73.clean.400.fq,$clean_reads/A_80.clean.400.fq,$clean_reads/A_88.clean.400.fq,$clean_reads/A_95.clean.400.fq,$clean_reads/A_16.clean.400.fq,$clean_reads/A_23.clean.400.fq,$clean_reads/A_30.clean.400.fq,$clean_reads/A_38.clean.400.fq,$clean_reads/A_45.clean.400.fq,$clean_reads/A_52.clean.400.fq,$clean_reads/A_5.clean.400.fq,$clean_reads/A_67.clean.400.fq,$clean_reads/A_74.clean.400.fq,$clean_reads/A_81.clean.400.fq,$clean_reads/A_89.clean.400.fq,$clean_reads/A_96.clean.400.fq,$clean_reads/A_17.clean.400.fq,$clean_reads/A_24.clean.400.fq,$clean_reads/A_31.clean.400.fq,$clean_reads/A_39.clean.400.fq,$clean_reads/A_46.clean.400.fq,$clean_reads/A_53.clean.400.fq,$clean_reads/A_60.clean.400.fq,$clean_reads/A_68.clean.400.fq,$clean_reads/A_75.clean.400.fq,$clean_reads/A_82.clean.400.fq,$clean_reads/A_8.clean.400.fq,$clean_reads/A_9.clean.400.fq \
#-o $clean_reads/ \
#-m /home/rostam/Projects/16srna/MS96/raw_data/mapping.single.qiime \
#--barcode_type 'not-barcoded' \
#--sample_ids A10,A18,A25,A32,A3,A47,A54,A61,A69,A76,A83,A90,A11,A19,A26,A33,A40,A48,A55,A62,A6,A77,A84,A91,A12,A1,A27,A34,A41,A49,A56,A63,A70,A78,A85,A92,A13,A20,A28,A35,A42,A4,A57,A64,A71,A79,A86,A93,A14,A21,A29,A36,A43,A50,A58,A65,A72,A7,A87,A94,A15,A22,A2,A37,A44,A51,A59,A66,A73,A80,A88,A95,A16,A23,A30,A38,A45,A52,A5,A67,A74,A81,A89,A96,A17,A24,A31,A39,A46,A53,A60,A68,A75,A82,A8,A9 \
#--store_demultiplexed_fastq \
#-v
: '
# everything
clean_reads=$output_dir/"clean_reads"
# split library put the size on the reads id and let it work for sortbysize !
$python ~/qiime_software/qiime-1.8.0-release/bin/split_libraries_fastq.py \
-i $clean_reads/A_10.clean.fq,$clean_reads/A_18.clean.fq,$clean_reads/A_25.clean.fq,$clean_reads/A_32.clean.fq,$clean_reads/A_3.clean.fq,$clean_reads/A_47.clean.fq,$clean_reads/A_54.clean.fq,$clean_reads/A_61.clean.fq,$clean_reads/A_69.clean.fq,$clean_reads/A_76.clean.fq,$clean_reads/A_83.clean.fq,$clean_reads/A_90.clean.fq,$clean_reads/A_11.clean.fq,$clean_reads/A_19.clean.fq,$clean_reads/A_26.clean.fq,$clean_reads/A_33.clean.fq,$clean_reads/A_40.clean.fq,$clean_reads/A_48.clean.fq,$clean_reads/A_55.clean.fq,$clean_reads/A_62.clean.fq,$clean_reads/A_6.clean.fq,$clean_reads/A_77.clean.fq,$clean_reads/A_84.clean.fq,$clean_reads/A_91.clean.fq,$clean_reads/A_12.clean.fq,$clean_reads/A_1.clean.fq,$clean_reads/A_27.clean.fq,$clean_reads/A_34.clean.fq,$clean_reads/A_41.clean.fq,$clean_reads/A_49.clean.fq,$clean_reads/A_56.clean.fq,$clean_reads/A_63.clean.fq,$clean_reads/A_70.clean.fq,$clean_reads/A_78.clean.fq,$clean_reads/A_85.clean.fq,$clean_reads/A_92.clean.fq,$clean_reads/A_13.clean.fq,$clean_reads/A_20.clean.fq,$clean_reads/A_28.clean.fq,$clean_reads/A_35.clean.fq,$clean_reads/A_42.clean.fq,$clean_reads/A_4.clean.fq,$clean_reads/A_57.clean.fq,$clean_reads/A_64.clean.fq,$clean_reads/A_71.clean.fq,$clean_reads/A_79.clean.fq,$clean_reads/A_86.clean.fq,$clean_reads/A_93.clean.fq,$clean_reads/A_14.clean.fq,$clean_reads/A_21.clean.fq,$clean_reads/A_29.clean.fq,$clean_reads/A_36.clean.fq,$clean_reads/A_43.clean.fq,$clean_reads/A_50.clean.fq,$clean_reads/A_58.clean.fq,$clean_reads/A_65.clean.fq,$clean_reads/A_72.clean.fq,$clean_reads/A_7.clean.fq,$clean_reads/A_87.clean.fq,$clean_reads/A_94.clean.fq,$clean_reads/A_15.clean.fq,$clean_reads/A_22.clean.fq,$clean_reads/A_2.clean.fq,$clean_reads/A_37.clean.fq,$clean_reads/A_44.clean.fq,$clean_reads/A_51.clean.fq,$clean_reads/A_59.clean.fq,$clean_reads/A_66.clean.fq,$clean_reads/A_73.clean.fq,$clean_reads/A_80.clean.fq,$clean_reads/A_88.clean.fq,$clean_reads/A_95.clean.fq,$clean_reads/A_16.clean.fq,$clean_reads/A_23.clean.fq,$clean_reads/A_30.clean.fq,$clean_reads/A_38.clean.fq,$clean_reads/A_45.clean.fq,$clean_reads/A_52.clean.fq,$clean_reads/A_5.clean.fq,$clean_reads/A_67.clean.fq,$clean_reads/A_74.clean.fq,$clean_reads/A_81.clean.fq,$clean_reads/A_89.clean.fq,$clean_reads/A_96.clean.fq,$clean_reads/A_17.clean.fq,$clean_reads/A_24.clean.fq,$clean_reads/A_31.clean.fq,$clean_reads/A_39.clean.fq,$clean_reads/A_46.clean.fq,$clean_reads/A_53.clean.fq,$clean_reads/A_60.clean.fq,$clean_reads/A_68.clean.fq,$clean_reads/A_75.clean.fq,$clean_reads/A_82.clean.fq,$clean_reads/A_8.clean.fq,$clean_reads/A_9.clean.fq \
-o $clean_reads/ \
-m /home/rostam/Projects/16srna/MS96/raw_data/mapping.single.qiime \
--barcode_type 'not-barcoded' \
--sample_ids A10,A18,A25,A32,A3,A47,A54,A61,A69,A76,A83,A90,A11,A19,A26,A33,A40,A48,A55,A62,A6,A77,A84,A91,A12,A1,A27,A34,A41,A49,A56,A63,A70,A78,A85,A92,A13,A20,A28,A35,A42,A4,A57,A64,A71,A79,A86,A93,A14,A21,A29,A36,A43,A50,A58,A65,A72,A7,A87,A94,A15,A22,A2,A37,A44,A51,A59,A66,A73,A80,A88,A95,A16,A23,A30,A38,A45,A52,A5,A67,A74,A81,A89,A96,A17,A24,A31,A39,A46,A53,A60,A68,A75,A82,A8,A9 \
--store_demultiplexed_fastq \
-v
'
#(we dont need fasta yet ! if we replace splitlibrary with something else, then we can use it!)cat R260/pair/intereads/clean_reads/*.400.fa > R260/pair/intereads/clean_reads/seqs.fa
#perl $fastaspliter --n-parts 4 $clean_reads/seqs.fastq
#for i in {1..4}
#do
# splitfile=seqs.part-$i.fastq
# splitfilefastq=$clean_reads/$splitfile
# splitfilefasta=$clean_reads/seqs.part-$i.fa
#
# $usearch61 -fastq_filter $splitfilefastq -fastaout $splitfilefasta -fastq_maxee $maxee &> $splitfilefasta.stat
# awk '/^>/{print s? s"\n"$0:$0;s="";next}{s=s sprintf("%s",$0)}END{if(s)print s}' $splitfilefasta > $splitfilefasta.fa
# derepfiles=$clean_reads/seqs.part-$i.derep.fa
# $usearch61 -derep_fulllength $splitfilefasta.fa -output $derepfiles -sizeout
#done
cat $clean_reads/seqs.*.derep.fa > $clean_reads/seqs.tot.fasta
awk '/^>/{print s? s"\n"$0:$0;s="";next}{s=s sprintf("%s",$0)}END{if(s)print s}' $clean_reads/seqs.tot.fasta > $clean_reads/seqs.tot.wob.fasta
#: '
perl $bmp2uparseperl -i $clean_reads/seqs.tot.wob.fasta -o $clean_reads/seqs.tot.fasta
#cdhit
#$usearch61 -sortbysize $clean_reads/seqs.tot.fasta -output $clean_reads/seqs.tot.sort.fasta -minsize 1
#$cdhitest -i $clean_reads/seqs.tot.fasta -o $OTUFOLDER/uniques.fa -c 1 -n 12 -l 100 -d 0 -A 0.95 -s 0.95 -T 12 -M 100000
#$corrsize $OTUFOLDER/uniques.fa $OTUFOLDER/uniques.fa.clstr > $OTUFOLDER/uniques.fa.fixed
#: '
#uparse
$usearch61 -sortbysize $clean_reads/seqs.tot.fasta -output $clean_reads/seqs.tot.sorted.uparse.fasta -minsize 2
$usearch61 -cluster_otus $clean_reads/seqs.tot.sorted.uparse.fasta -otus $OTUFOLDER/otus97_uparse1.fa -otu_radius_pct 3 -fastaout $OTUFOLDER/seqs.tot.singles97.annotated
# write in the log file how many OTUs are formed
grep ">" $OTUFOLDER/otus97_uparse1.fa -c >> $OTUFOLDER/otu.stat
#chimera removal
$usearch61 -uchime_ref $OTUFOLDER/otus97_uparse1.fa -db ~/qiime_software/gg_13_8_otus/gold.fa -strand plus -nonchimeras $OTUFOLDER/otus97_uparse2.fa -chimeras $OTUFOLDER/chimeras97_uparse2.fa
#'
# make them in 1 line
awk '/^>/{print s? s"\n"$0:$0;s="";next}{s=s sprintf("%s",$0)}END{if(s)print s}' $OTUFOLDER/otus97_uparse2.fa > $OTUFOLDER/otus97_uparse3.fa
# colour them
python $fastanumber $OTUFOLDER/otus97_uparse3.fa OTU_> $OTUFOLDER/otus97_uparse.fa
# write how many non-chimera
grep ">" $OTUFOLDER/otus97_uparse.fa -c >> $OTUFOLDER/otu.stat
nonchimera=$(grep ">" $OTUFOLDER/otus97_uparse.fa -c)
# blast versus mouse
$blastn -query $OTUFOLDER/otus97_uparse.fa -db /home/rostam/Projects/16srna/DB/mm10 -out $OTUFOLDER/otus1.mm10.blastn -reward 1 -penalty -2 -gapopen 2 -gapextend 2 -outfmt 6 -max_target_seqs 5 -perc_identity 85 -num_threads 40 -strand both
# peak the ones are higher than 89.99
nmbmouse=$( awk '$3>89.99{print $1}' $OTUFOLDER/otus1.mm10.blastn | sort | uniq | wc -w)
# write mouse otus
awk '$3>89.99{print $1}' $OTUFOLDER/otus1.mm10.blastn | sort | uniq > $OTUFOLDER/mouse_otus.list
# write how many non-mouse otus
nonmouse=$((nonchimera - nmbmouse))
echo $nonmouse >> $OTUFOLDER/otu.stat
# make mouse fasta!
grep -f $OTUFOLDER/mouse_otus.list $OTUFOLDER/otus97_uparse.fa -A 1 > $OTUFOLDER/mouse.fa
# remove the mouse reads from the original otus
grep -v -f $OTUFOLDER/mouse.fa $OTUFOLDER/otus97_uparse.fa > $OTUFOLDER/otus.nmouse.fa
grep -c ">" $OTUFOLDER/otus.nmouse.fa
# map reads back to these good OTUs
$usearch61 -usearch_global $clean_reads/seqs.tot.fasta -db $OTUFOLDER/otus.nmouse.fa -strand plus -id 0.97 -uc $OTUFOLDER/map.uc
$usearch61 -usearch_global $clean_reads/seqs.tot.fasta -db $OTUFOLDER/chimeras97_uparse2.fa -strand plus -id 0.97 -uc $OTUFOLDER/map_chimera.uc
#cut -f9 $OTUFOLDER/map_chimera.uc | tr '=' ' ' | cut -d " " -f3 | sort | uniq -c > $OTUFOLDER/chimera_reads.stat
awk '$1=="H" {print $9,$10}' $OTUFOLDER/map_chimera.uc | tr '_' ' '| cut -d ' ' -f1 | sort | uniq -c > $OTUFOLDER/chimera_reads.stat
sed 's/--//' $OTUFOLDER/mouse.fa > $OTUFOLDER/mousewodsh.fa
$usearch61 -usearch_global $clean_reads/seqs.tot.fasta -db $OTUFOLDER/mousewodsh.fa -strand plus -id 0.97 -uc $OTUFOLDER/map_mouse.uc
#cut -f9 $OTUFOLDER/map_mouse.uc | tr '=' ' ' | cut -d " " -f3 | sort | uniq -c > $OTUFOLDER/mouse_reads.stat
awk '$1=="H" {print $9,$10}' $OTUFOLDER/map_mouse.uc | tr '_' ' '| cut -d ' ' -f1 | sort | uniq -c > $OTUFOLDER/mouse_reads.stat
python $uc2otutab $OTUFOLDER/map.uc > $OTUFOLDER/otu_table.txt
biom convert -i $OTUFOLDER/otu_table.txt -o $OTUFOLDER/otu_table_uparse.biome --table-type="otu table"
#'
export OTUFOLDER=/home/rostam/Projects/16srna/MS96/Mature/R260/pair/intereads/OTU_folder
export taxfolder=/home/rostam/Projects/16srna/MS96/Mature/R260/pair/intereads/tax
python ~/qiime_software/qiime-1.8.0-release/bin/assign_taxonomy.py \
-i $OTUFOLDER/otus.nmouse.fa \
-o $taxfolder/blast \
-r ~/qiime_software/gg_otus-13_8-release/rep_set/97_otus.fasta \
-t ~/qiime_software/gg_13_8_otus/taxonomy/99_otu_taxonomy.txt \
-m blast
#python ~/qiime_software/qiime-1.8.0-release/bin/assign_taxonomy.py \
# -i $OTUFOLDER/otus.nmouse.fa \
# -o $taxfolder/rdp \
# -r ~/qiime_software/gg_otus-13_8-release/rep_set/97_otus.fasta \
# -t ~/qiime_software/gg_13_8_otus/taxonomy/99_otu_taxonomy.txt \
# -m rdp
biom add-metadata \
-i $OTUFOLDER/otu_table_uparse.biome \
-o $OTUFOLDER/otu_table_uparse.blast.tax.biome \
--observation-metadata-fp $taxfolder/blast/otus.nmouse_tax_assignments.txt \
--observation-header OTUID,taxonomy,confidence --sc-separated taxonomy --float-fields confidence\
python ~/qiime_software/qiime-1.8.0-release/bin/assign_taxonomy.py \
-i $OTUFOLDER/otus.nmouse.fa \
-o $taxfolder/blast_SLV \
-r /home/rostam/Projects/16srna/DB/silva.bacteria/SLV_115.fasta \
-t /home/rostam/Projects/16srna/DB/silva.bacteria/SLV_115.tax \
-m blast
biom add-metadata \
-i $OTUFOLDER/otu_table_uparse.biome \
-o $OTUFOLDER/otu_table_uparse.blast.slv.tax.biome \
--observation-metadata-fp $taxfolder/blast_SLV/otus.nmouse_tax_assignments.txt \
--observation-header OTUID,taxonomy,confidence --sc-separated taxonomy --float-fields confidence\
## TO OD : assosiation using silva119
## TO DO : tree using silva119
## inspired from http://www.brmicrobiome.org/#!16s-profiling-pipeline-illumin/c3u5
python ~/qiime_software/qiime-1.8.0-release/bin/align_seqs.py \
-i $OTUFOLDER/otus.nmouse.fa \
-o $OTUFOLDER/rep_set_align \
-t ~/qiime_software/gg_otus-13_8-release/rep_set_aligned/97_otus.fasta
python ~/qiime_software/qiime-1.8.0-release/bin/filter_alignment.py \
-i $OTUFOLDER/rep_set_align/otus.nmouse_aligned.fasta \
-o $OTUFOLDER/filtered_alignment \
python ~/qiime_software/qiime-1.8.0-release/bin/make_phylogeny.py \
-i $OTUFOLDER/filtered_alignment/otus.nmouse_aligned_pfiltered.fasta \
-o $OTUFOLDER/rep_set.tre \
exit
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment