Last active
August 29, 2015 14:19
-
-
Save naarkhoo/25dc9da1ddc474bec624 to your computer and use it in GitHub Desktop.
Pipeline to process V3+V4 16sRNA from MiSeq
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# do this on each sample | |
#usearch -fastq_filter raw.fq -fastqout reads.fq -fastq_trunclen 260 -fastq_truncqual 10 -fastq_maxee 0.5 | |
rawdir="/home/rostam/Projects/16srna/MS96/raw_data/" | |
maindir="/home/rostam/Projects/16srna/MS96/Mature" | |
output_dir=$maindir/"R260/pair/intereads/" | |
#tools | |
usearch61="/bin/usearch61" | |
flash="/home/rostam/qiime_software/smash-v2.0/flash/flash" | |
seqUtils="/home/rostam/qiime_software/smash-v2.0/data/maui/seqUtils" | |
blastn="/home/rostam/qiime_software/smash-v2.0/data/ncbi-blast+/2.2.26/blastn" | |
uc2otutab="/home/rostam/qiime_software/usearch-python_script/uc2otutab.py" | |
clip_primer="/home/rostam/qiime_software/smash-v2.0/bin/clip_primers.pl" | |
bmp2uparseperl=$maindir/"bmp-uparse-pipeline.pl" | |
fastaspliter="/home/rostam/qiime_software/smash-v2.0/data/fastq-splitter.pl" | |
fastanumber="/home/rostam/qiime_software/usearch-python_script/fasta_number.py" | |
cdhitest="/home/rostam/qiime_software/smash-v2.0/data/cd-hit-v4.6.1-2012-08-27/cd-hit-est" | |
corrsize=$maindir/"add_size2.pl" | |
#primers | |
# from illumina protocol in the document folder | |
forward_primer=$maindir/"fwd_primer.fa" | |
reverse_primer=$maindir/"rev_primer.fa" | |
#outputs | |
here=${PWD} | |
progress_process=$output_dir/"process.stat" #tables shows how much read pass the criteria at each step | |
hq_reads_folder=$output_dir/"hg_reads_folder" # high quality reads after trimming and basepair quality check | |
clean_reads=$output_dir/"clean_reads" #after flash | |
OTUFOLDER=$output_dir/"OTU_folder" #basepair quality on top of flash | |
clippingfolder=$output_dir/"clipping" | |
taxfolder=$output_dir/"tax" | |
#parameteres %the following parameteres are not | |
trunclen1=260 | |
trunclen2=230 | |
truncqual1=10 | |
truncqual2=15 | |
maxee=0.5 | |
if [ -f $OTUFOLDER/otu.stat ]; then | |
echo "OTU stat file is removed" | |
rm $OTUFOLDER/otu.stat | |
fi | |
: ' | |
# folder for clipping information | |
if [ ! -d $clippingfolder ]; then | |
mkdir $clippingfolder | |
else | |
rm -rf $clippingfolder/* | |
fi | |
# folder for high quality reads | |
if [ ! -d $hq_reads_folder ]; then | |
mkdir $hq_reads_folder | |
else | |
rm -rf $hq_reads_folder/* | |
fi | |
# folder for clean quality reads | |
if [ ! -d $clean_reads ]; then | |
mkdir $clean_reads | |
else | |
rm -rf $clean_reads/* | |
fi | |
# folder for OTUS | |
if [ ! -d $OTUFOLDER ]; then | |
mkdir $OTUFOLDER | |
else | |
rm -rf $OTUFOLDER/* | |
fi | |
# folder for tax | |
if [ ! -d $taxfolder ]; then | |
mkdir $taxfolder | |
else | |
rm -rf $taxfolder/* | |
fi | |
# restart the progress file | |
if [ -f $progress_process ]; then | |
echo "progress file is removed" | |
rm $progress_process | |
fi | |
# write the header | |
echo "sample_ID" "#fwread" "#reverseread" "#contignmbr" "#hqcontig" "#clipforward" "clipreverse" "cleanread400nmb" "cleanread400nmbfa" " " > $progress_process | |
for fsq in $rawdir*R2*.fastq | |
do | |
echo $fsq | |
# fsq='/home/rostam/Projects/16srna/MS96/raw_data/1165_A_9_run111_GCTACGCTCCGTAAGA_L001_R2_001.fastq' | |
# declare -a arr | |
bar=(`echo $fsq | tr '_' ' '`) | |
sample=${bar[2]}_${bar[3]} | |
echo $sample | |
forward_read=$rawdir*$sample\_*R1* | |
nmbfwread=$(grep "^\+$" $forward_read -c) | |
echo $nmbfwread | |
reverse_read=$rawdir*$sample\_*R2* | |
nmbreverread=$(grep "^\+$" $reverse_read -c) | |
echo $nmbreverread | |
$flash -r 300 -f 456 -s 11 $forward_read $reverse_read --output-prefix $sample.pair -d $clean_reads | |
contigssnmb=$(grep "^\+$" $clean_reads/$sample.pair.extendedFrags.fastq -c) | |
#expectation values for each read ! usearch works with probability not the quality score, so it converts the quality scores to expectation values and sum them up++ | |
$usearch61 -fastq_filter $clean_reads/$sample.pair.extendedFrags.fastq -fastqout $hq_reads_folder/$sample.pair.hg.fq -fastq_maxee $maxee | |
hgcontignmbr=$(grep "^\+$" $hq_reads_folder/$sample.pair.hg.fq -c) | |
cliplogforward=$sample.p.clip.log #high quality basepair contigs | |
$clip_primer --seq=$hq_reads_folder/$sample.pair.hg.fq --type=fastq --fwdprimer=$forward_primer --cpus=8 > $output_dir/$cliplogforward | |
clipforwardnmbr=$(grep "^\+$" $hq_reads_folder/$sample.pair.hg.clipped.fq -c) | |
#reverse the sequence and remove its forward primer (reverse primer would be removed) | |
$seqUtils -m revcomp -t fastq $hq_reads_folder/$sample.pair.hg.clipped.fq > $clippingfolder/$sample.clipped.rev.fq | |
cliplogreverse=$sample.p.clip.rev.log #high quality basepair contigs | |
$clip_primer --seq=$clippingfolder/$sample.clipped.rev.fq --type=fastq --fwdprimer=$reverse_primer --cpus=8 > $output_dir/$cliplogreverse | |
cliprevnmbr=$(grep "^\+$" $clippingfolder/$sample.clipped.rev.clipped.fq -c) | |
#reverse the sequence again, as it was in the first place | |
$seqUtils -m revcomp -t fastq $clippingfolder/$sample.clipped.rev.clipped.fq > $clean_reads/$sample.clean.fq | |
$seqUtils -m separate -t fastq -s 399 $clean_reads/$sample.clean.fq 1>$clean_reads/$sample.clean.400.fq 2>$clean_reads/$sample.clean.short.fq | |
cleanread400nmb=$(grep "^\+$" $clean_reads/$sample.clean.400.fq -c) | |
#(update - we dont need fasta now becaise we are still depends on split library!)we use the following only to co#analysis ! I prefer to not use the split_library function at all! | |
#$usearch61 -fastq_filter $clean_reads/$sample.clean.400.fq -fastaout $clean_reads/$sample.clean.400.fa -fastq_maxee $maxee | |
#cleanread400nmbfasta=$(grep ">" $clean_reads/$sample.clean.400.fa -c) | |
echo $sample $nmbfwread $nmbreverread $contigssnmb $hgcontignmbr $clipforwardnmbr $cliprevnmbr $cleanread400nmb $cleanread400nmbfasta " " >> $progress_process | |
done | |
exit | |
' | |
# limited to 400 | |
#clean_reads=$output_dir/"clean_reads" | |
# split library put the size on the reads id and let it work for sortbysize ! | |
#$python ~/qiime_software/qiime-1.8.0-release/bin/split_libraries_fastq.py \ | |
#-i $clean_reads/A_10.clean.400.fq,$clean_reads/A_18.clean.400.fq,$clean_reads/A_25.clean.400.fq,$clean_reads/A_32.clean.400.fq,$clean_reads/A_3.clean.400.fq,$clean_reads/A_47.clean.400.fq,$clean_reads/A_54.clean.400.fq,$clean_reads/A_61.clean.400.fq,$clean_reads/A_69.clean.400.fq,$clean_reads/A_76.clean.400.fq,$clean_reads/A_83.clean.400.fq,$clean_reads/A_90.clean.400.fq,$clean_reads/A_11.clean.400.fq,$clean_reads/A_19.clean.400.fq,$clean_reads/A_26.clean.400.fq,$clean_reads/A_33.clean.400.fq,$clean_reads/A_40.clean.400.fq,$clean_reads/A_48.clean.400.fq,$clean_reads/A_55.clean.400.fq,$clean_reads/A_62.clean.400.fq,$clean_reads/A_6.clean.400.fq,$clean_reads/A_77.clean.400.fq,$clean_reads/A_84.clean.400.fq,$clean_reads/A_91.clean.400.fq,$clean_reads/A_12.clean.400.fq,$clean_reads/A_1.clean.400.fq,$clean_reads/A_27.clean.400.fq,$clean_reads/A_34.clean.400.fq,$clean_reads/A_41.clean.400.fq,$clean_reads/A_49.clean.400.fq,$clean_reads/A_56.clean.400.fq,$clean_reads/A_63.clean.400.fq,$clean_reads/A_70.clean.400.fq,$clean_reads/A_78.clean.400.fq,$clean_reads/A_85.clean.400.fq,$clean_reads/A_92.clean.400.fq,$clean_reads/A_13.clean.400.fq,$clean_reads/A_20.clean.400.fq,$clean_reads/A_28.clean.400.fq,$clean_reads/A_35.clean.400.fq,$clean_reads/A_42.clean.400.fq,$clean_reads/A_4.clean.400.fq,$clean_reads/A_57.clean.400.fq,$clean_reads/A_64.clean.400.fq,$clean_reads/A_71.clean.400.fq,$clean_reads/A_79.clean.400.fq,$clean_reads/A_86.clean.400.fq,$clean_reads/A_93.clean.400.fq,$clean_reads/A_14.clean.400.fq,$clean_reads/A_21.clean.400.fq,$clean_reads/A_29.clean.400.fq,$clean_reads/A_36.clean.400.fq,$clean_reads/A_43.clean.400.fq,$clean_reads/A_50.clean.400.fq,$clean_reads/A_58.clean.400.fq,$clean_reads/A_65.clean.400.fq,$clean_reads/A_72.clean.400.fq,$clean_reads/A_7.clean.400.fq,$clean_reads/A_87.clean.400.fq,$clean_reads/A_94.clean.400.fq,$clean_reads/A_15.clean.400.fq,$clean_reads/A_22.clean.400.fq,$clean_reads/A_2.clean.400.fq,$clean_reads/A_37.clean.400.fq,$clean_reads/A_44.clean.400.fq,$clean_reads/A_51.clean.400.fq,$clean_reads/A_59.clean.400.fq,$clean_reads/A_66.clean.400.fq,$clean_reads/A_73.clean.400.fq,$clean_reads/A_80.clean.400.fq,$clean_reads/A_88.clean.400.fq,$clean_reads/A_95.clean.400.fq,$clean_reads/A_16.clean.400.fq,$clean_reads/A_23.clean.400.fq,$clean_reads/A_30.clean.400.fq,$clean_reads/A_38.clean.400.fq,$clean_reads/A_45.clean.400.fq,$clean_reads/A_52.clean.400.fq,$clean_reads/A_5.clean.400.fq,$clean_reads/A_67.clean.400.fq,$clean_reads/A_74.clean.400.fq,$clean_reads/A_81.clean.400.fq,$clean_reads/A_89.clean.400.fq,$clean_reads/A_96.clean.400.fq,$clean_reads/A_17.clean.400.fq,$clean_reads/A_24.clean.400.fq,$clean_reads/A_31.clean.400.fq,$clean_reads/A_39.clean.400.fq,$clean_reads/A_46.clean.400.fq,$clean_reads/A_53.clean.400.fq,$clean_reads/A_60.clean.400.fq,$clean_reads/A_68.clean.400.fq,$clean_reads/A_75.clean.400.fq,$clean_reads/A_82.clean.400.fq,$clean_reads/A_8.clean.400.fq,$clean_reads/A_9.clean.400.fq \ | |
#-o $clean_reads/ \ | |
#-m /home/rostam/Projects/16srna/MS96/raw_data/mapping.single.qiime \ | |
#--barcode_type 'not-barcoded' \ | |
#--sample_ids A10,A18,A25,A32,A3,A47,A54,A61,A69,A76,A83,A90,A11,A19,A26,A33,A40,A48,A55,A62,A6,A77,A84,A91,A12,A1,A27,A34,A41,A49,A56,A63,A70,A78,A85,A92,A13,A20,A28,A35,A42,A4,A57,A64,A71,A79,A86,A93,A14,A21,A29,A36,A43,A50,A58,A65,A72,A7,A87,A94,A15,A22,A2,A37,A44,A51,A59,A66,A73,A80,A88,A95,A16,A23,A30,A38,A45,A52,A5,A67,A74,A81,A89,A96,A17,A24,A31,A39,A46,A53,A60,A68,A75,A82,A8,A9 \ | |
#--store_demultiplexed_fastq \ | |
#-v | |
: ' | |
# everything | |
clean_reads=$output_dir/"clean_reads" | |
# split library put the size on the reads id and let it work for sortbysize ! | |
$python ~/qiime_software/qiime-1.8.0-release/bin/split_libraries_fastq.py \ | |
-i $clean_reads/A_10.clean.fq,$clean_reads/A_18.clean.fq,$clean_reads/A_25.clean.fq,$clean_reads/A_32.clean.fq,$clean_reads/A_3.clean.fq,$clean_reads/A_47.clean.fq,$clean_reads/A_54.clean.fq,$clean_reads/A_61.clean.fq,$clean_reads/A_69.clean.fq,$clean_reads/A_76.clean.fq,$clean_reads/A_83.clean.fq,$clean_reads/A_90.clean.fq,$clean_reads/A_11.clean.fq,$clean_reads/A_19.clean.fq,$clean_reads/A_26.clean.fq,$clean_reads/A_33.clean.fq,$clean_reads/A_40.clean.fq,$clean_reads/A_48.clean.fq,$clean_reads/A_55.clean.fq,$clean_reads/A_62.clean.fq,$clean_reads/A_6.clean.fq,$clean_reads/A_77.clean.fq,$clean_reads/A_84.clean.fq,$clean_reads/A_91.clean.fq,$clean_reads/A_12.clean.fq,$clean_reads/A_1.clean.fq,$clean_reads/A_27.clean.fq,$clean_reads/A_34.clean.fq,$clean_reads/A_41.clean.fq,$clean_reads/A_49.clean.fq,$clean_reads/A_56.clean.fq,$clean_reads/A_63.clean.fq,$clean_reads/A_70.clean.fq,$clean_reads/A_78.clean.fq,$clean_reads/A_85.clean.fq,$clean_reads/A_92.clean.fq,$clean_reads/A_13.clean.fq,$clean_reads/A_20.clean.fq,$clean_reads/A_28.clean.fq,$clean_reads/A_35.clean.fq,$clean_reads/A_42.clean.fq,$clean_reads/A_4.clean.fq,$clean_reads/A_57.clean.fq,$clean_reads/A_64.clean.fq,$clean_reads/A_71.clean.fq,$clean_reads/A_79.clean.fq,$clean_reads/A_86.clean.fq,$clean_reads/A_93.clean.fq,$clean_reads/A_14.clean.fq,$clean_reads/A_21.clean.fq,$clean_reads/A_29.clean.fq,$clean_reads/A_36.clean.fq,$clean_reads/A_43.clean.fq,$clean_reads/A_50.clean.fq,$clean_reads/A_58.clean.fq,$clean_reads/A_65.clean.fq,$clean_reads/A_72.clean.fq,$clean_reads/A_7.clean.fq,$clean_reads/A_87.clean.fq,$clean_reads/A_94.clean.fq,$clean_reads/A_15.clean.fq,$clean_reads/A_22.clean.fq,$clean_reads/A_2.clean.fq,$clean_reads/A_37.clean.fq,$clean_reads/A_44.clean.fq,$clean_reads/A_51.clean.fq,$clean_reads/A_59.clean.fq,$clean_reads/A_66.clean.fq,$clean_reads/A_73.clean.fq,$clean_reads/A_80.clean.fq,$clean_reads/A_88.clean.fq,$clean_reads/A_95.clean.fq,$clean_reads/A_16.clean.fq,$clean_reads/A_23.clean.fq,$clean_reads/A_30.clean.fq,$clean_reads/A_38.clean.fq,$clean_reads/A_45.clean.fq,$clean_reads/A_52.clean.fq,$clean_reads/A_5.clean.fq,$clean_reads/A_67.clean.fq,$clean_reads/A_74.clean.fq,$clean_reads/A_81.clean.fq,$clean_reads/A_89.clean.fq,$clean_reads/A_96.clean.fq,$clean_reads/A_17.clean.fq,$clean_reads/A_24.clean.fq,$clean_reads/A_31.clean.fq,$clean_reads/A_39.clean.fq,$clean_reads/A_46.clean.fq,$clean_reads/A_53.clean.fq,$clean_reads/A_60.clean.fq,$clean_reads/A_68.clean.fq,$clean_reads/A_75.clean.fq,$clean_reads/A_82.clean.fq,$clean_reads/A_8.clean.fq,$clean_reads/A_9.clean.fq \ | |
-o $clean_reads/ \ | |
-m /home/rostam/Projects/16srna/MS96/raw_data/mapping.single.qiime \ | |
--barcode_type 'not-barcoded' \ | |
--sample_ids A10,A18,A25,A32,A3,A47,A54,A61,A69,A76,A83,A90,A11,A19,A26,A33,A40,A48,A55,A62,A6,A77,A84,A91,A12,A1,A27,A34,A41,A49,A56,A63,A70,A78,A85,A92,A13,A20,A28,A35,A42,A4,A57,A64,A71,A79,A86,A93,A14,A21,A29,A36,A43,A50,A58,A65,A72,A7,A87,A94,A15,A22,A2,A37,A44,A51,A59,A66,A73,A80,A88,A95,A16,A23,A30,A38,A45,A52,A5,A67,A74,A81,A89,A96,A17,A24,A31,A39,A46,A53,A60,A68,A75,A82,A8,A9 \ | |
--store_demultiplexed_fastq \ | |
-v | |
' | |
#(we dont need fasta yet ! if we replace splitlibrary with something else, then we can use it!)cat R260/pair/intereads/clean_reads/*.400.fa > R260/pair/intereads/clean_reads/seqs.fa | |
#perl $fastaspliter --n-parts 4 $clean_reads/seqs.fastq | |
#for i in {1..4} | |
#do | |
# splitfile=seqs.part-$i.fastq | |
# splitfilefastq=$clean_reads/$splitfile | |
# splitfilefasta=$clean_reads/seqs.part-$i.fa | |
# | |
# $usearch61 -fastq_filter $splitfilefastq -fastaout $splitfilefasta -fastq_maxee $maxee &> $splitfilefasta.stat | |
# awk '/^>/{print s? s"\n"$0:$0;s="";next}{s=s sprintf("%s",$0)}END{if(s)print s}' $splitfilefasta > $splitfilefasta.fa | |
# derepfiles=$clean_reads/seqs.part-$i.derep.fa | |
# $usearch61 -derep_fulllength $splitfilefasta.fa -output $derepfiles -sizeout | |
#done | |
cat $clean_reads/seqs.*.derep.fa > $clean_reads/seqs.tot.fasta | |
awk '/^>/{print s? s"\n"$0:$0;s="";next}{s=s sprintf("%s",$0)}END{if(s)print s}' $clean_reads/seqs.tot.fasta > $clean_reads/seqs.tot.wob.fasta | |
#: ' | |
perl $bmp2uparseperl -i $clean_reads/seqs.tot.wob.fasta -o $clean_reads/seqs.tot.fasta | |
#cdhit | |
#$usearch61 -sortbysize $clean_reads/seqs.tot.fasta -output $clean_reads/seqs.tot.sort.fasta -minsize 1 | |
#$cdhitest -i $clean_reads/seqs.tot.fasta -o $OTUFOLDER/uniques.fa -c 1 -n 12 -l 100 -d 0 -A 0.95 -s 0.95 -T 12 -M 100000 | |
#$corrsize $OTUFOLDER/uniques.fa $OTUFOLDER/uniques.fa.clstr > $OTUFOLDER/uniques.fa.fixed | |
#: ' | |
#uparse | |
$usearch61 -sortbysize $clean_reads/seqs.tot.fasta -output $clean_reads/seqs.tot.sorted.uparse.fasta -minsize 2 | |
$usearch61 -cluster_otus $clean_reads/seqs.tot.sorted.uparse.fasta -otus $OTUFOLDER/otus97_uparse1.fa -otu_radius_pct 3 -fastaout $OTUFOLDER/seqs.tot.singles97.annotated | |
# write in the log file how many OTUs are formed | |
grep ">" $OTUFOLDER/otus97_uparse1.fa -c >> $OTUFOLDER/otu.stat | |
#chimera removal | |
$usearch61 -uchime_ref $OTUFOLDER/otus97_uparse1.fa -db ~/qiime_software/gg_13_8_otus/gold.fa -strand plus -nonchimeras $OTUFOLDER/otus97_uparse2.fa -chimeras $OTUFOLDER/chimeras97_uparse2.fa | |
#' | |
# make them in 1 line | |
awk '/^>/{print s? s"\n"$0:$0;s="";next}{s=s sprintf("%s",$0)}END{if(s)print s}' $OTUFOLDER/otus97_uparse2.fa > $OTUFOLDER/otus97_uparse3.fa | |
# colour them | |
python $fastanumber $OTUFOLDER/otus97_uparse3.fa OTU_> $OTUFOLDER/otus97_uparse.fa | |
# write how many non-chimera | |
grep ">" $OTUFOLDER/otus97_uparse.fa -c >> $OTUFOLDER/otu.stat | |
nonchimera=$(grep ">" $OTUFOLDER/otus97_uparse.fa -c) | |
# blast versus mouse | |
$blastn -query $OTUFOLDER/otus97_uparse.fa -db /home/rostam/Projects/16srna/DB/mm10 -out $OTUFOLDER/otus1.mm10.blastn -reward 1 -penalty -2 -gapopen 2 -gapextend 2 -outfmt 6 -max_target_seqs 5 -perc_identity 85 -num_threads 40 -strand both | |
# peak the ones are higher than 89.99 | |
nmbmouse=$( awk '$3>89.99{print $1}' $OTUFOLDER/otus1.mm10.blastn | sort | uniq | wc -w) | |
# write mouse otus | |
awk '$3>89.99{print $1}' $OTUFOLDER/otus1.mm10.blastn | sort | uniq > $OTUFOLDER/mouse_otus.list | |
# write how many non-mouse otus | |
nonmouse=$((nonchimera - nmbmouse)) | |
echo $nonmouse >> $OTUFOLDER/otu.stat | |
# make mouse fasta! | |
grep -f $OTUFOLDER/mouse_otus.list $OTUFOLDER/otus97_uparse.fa -A 1 > $OTUFOLDER/mouse.fa | |
# remove the mouse reads from the original otus | |
grep -v -f $OTUFOLDER/mouse.fa $OTUFOLDER/otus97_uparse.fa > $OTUFOLDER/otus.nmouse.fa | |
grep -c ">" $OTUFOLDER/otus.nmouse.fa | |
# map reads back to these good OTUs | |
$usearch61 -usearch_global $clean_reads/seqs.tot.fasta -db $OTUFOLDER/otus.nmouse.fa -strand plus -id 0.97 -uc $OTUFOLDER/map.uc | |
$usearch61 -usearch_global $clean_reads/seqs.tot.fasta -db $OTUFOLDER/chimeras97_uparse2.fa -strand plus -id 0.97 -uc $OTUFOLDER/map_chimera.uc | |
#cut -f9 $OTUFOLDER/map_chimera.uc | tr '=' ' ' | cut -d " " -f3 | sort | uniq -c > $OTUFOLDER/chimera_reads.stat | |
awk '$1=="H" {print $9,$10}' $OTUFOLDER/map_chimera.uc | tr '_' ' '| cut -d ' ' -f1 | sort | uniq -c > $OTUFOLDER/chimera_reads.stat | |
sed 's/--//' $OTUFOLDER/mouse.fa > $OTUFOLDER/mousewodsh.fa | |
$usearch61 -usearch_global $clean_reads/seqs.tot.fasta -db $OTUFOLDER/mousewodsh.fa -strand plus -id 0.97 -uc $OTUFOLDER/map_mouse.uc | |
#cut -f9 $OTUFOLDER/map_mouse.uc | tr '=' ' ' | cut -d " " -f3 | sort | uniq -c > $OTUFOLDER/mouse_reads.stat | |
awk '$1=="H" {print $9,$10}' $OTUFOLDER/map_mouse.uc | tr '_' ' '| cut -d ' ' -f1 | sort | uniq -c > $OTUFOLDER/mouse_reads.stat | |
python $uc2otutab $OTUFOLDER/map.uc > $OTUFOLDER/otu_table.txt | |
biom convert -i $OTUFOLDER/otu_table.txt -o $OTUFOLDER/otu_table_uparse.biome --table-type="otu table" | |
#' | |
export OTUFOLDER=/home/rostam/Projects/16srna/MS96/Mature/R260/pair/intereads/OTU_folder | |
export taxfolder=/home/rostam/Projects/16srna/MS96/Mature/R260/pair/intereads/tax | |
python ~/qiime_software/qiime-1.8.0-release/bin/assign_taxonomy.py \ | |
-i $OTUFOLDER/otus.nmouse.fa \ | |
-o $taxfolder/blast \ | |
-r ~/qiime_software/gg_otus-13_8-release/rep_set/97_otus.fasta \ | |
-t ~/qiime_software/gg_13_8_otus/taxonomy/99_otu_taxonomy.txt \ | |
-m blast | |
#python ~/qiime_software/qiime-1.8.0-release/bin/assign_taxonomy.py \ | |
# -i $OTUFOLDER/otus.nmouse.fa \ | |
# -o $taxfolder/rdp \ | |
# -r ~/qiime_software/gg_otus-13_8-release/rep_set/97_otus.fasta \ | |
# -t ~/qiime_software/gg_13_8_otus/taxonomy/99_otu_taxonomy.txt \ | |
# -m rdp | |
biom add-metadata \ | |
-i $OTUFOLDER/otu_table_uparse.biome \ | |
-o $OTUFOLDER/otu_table_uparse.blast.tax.biome \ | |
--observation-metadata-fp $taxfolder/blast/otus.nmouse_tax_assignments.txt \ | |
--observation-header OTUID,taxonomy,confidence --sc-separated taxonomy --float-fields confidence\ | |
python ~/qiime_software/qiime-1.8.0-release/bin/assign_taxonomy.py \ | |
-i $OTUFOLDER/otus.nmouse.fa \ | |
-o $taxfolder/blast_SLV \ | |
-r /home/rostam/Projects/16srna/DB/silva.bacteria/SLV_115.fasta \ | |
-t /home/rostam/Projects/16srna/DB/silva.bacteria/SLV_115.tax \ | |
-m blast | |
biom add-metadata \ | |
-i $OTUFOLDER/otu_table_uparse.biome \ | |
-o $OTUFOLDER/otu_table_uparse.blast.slv.tax.biome \ | |
--observation-metadata-fp $taxfolder/blast_SLV/otus.nmouse_tax_assignments.txt \ | |
--observation-header OTUID,taxonomy,confidence --sc-separated taxonomy --float-fields confidence\ | |
## TO OD : assosiation using silva119 | |
## TO DO : tree using silva119 | |
## inspired from http://www.brmicrobiome.org/#!16s-profiling-pipeline-illumin/c3u5 | |
python ~/qiime_software/qiime-1.8.0-release/bin/align_seqs.py \ | |
-i $OTUFOLDER/otus.nmouse.fa \ | |
-o $OTUFOLDER/rep_set_align \ | |
-t ~/qiime_software/gg_otus-13_8-release/rep_set_aligned/97_otus.fasta | |
python ~/qiime_software/qiime-1.8.0-release/bin/filter_alignment.py \ | |
-i $OTUFOLDER/rep_set_align/otus.nmouse_aligned.fasta \ | |
-o $OTUFOLDER/filtered_alignment \ | |
python ~/qiime_software/qiime-1.8.0-release/bin/make_phylogeny.py \ | |
-i $OTUFOLDER/filtered_alignment/otus.nmouse_aligned_pfiltered.fasta \ | |
-o $OTUFOLDER/rep_set.tre \ | |
exit |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment