Last active
July 5, 2016 16:08
-
-
Save dylanbstorey/99b9f109031f34c5a9233bf0f5d435d7 to your computer and use it in GitHub Desktop.
General Analyses of a Micro Biome
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
library(ggplot2) | |
library(reshape2) | |
library(vegan) | |
library(gplots) | |
library(ggrepel) | |
library(fossil) | |
global_S <- NULL | |
get_species<-function(matrix){ | |
IndicesToGrab<-grep("s__",matrix$ID); | |
matrix$ID<-as.character(matrix$ID); | |
Species<-matrix[IndicesToGrab,]; | |
Species$ID<-gsub("^.*s__|","",Species$ID) | |
row.names(Species)=Species$ID | |
Species<-Species[,-1]; | |
Species | |
} | |
get_genus<-function(matrix){ | |
IndicesToGrab<-grep("g__",matrix$ID); | |
Genera<-matrix[IndicesToGrab,]; | |
Genera$ID<-gsub("^.*g__|","",Genera$ID); | |
Genus_and_species<-grep("s__",Genera$ID); | |
Genera<-Genera[-Genus_and_species,]; | |
row.names(Genera)<-Genera$ID; | |
Genera<-Genera[,-1]; | |
Genera | |
} | |
get_family<-function(matrix){ | |
IndicesToGrab<-grep("f__",matrix$ID); | |
Families<-matrix[IndicesToGrab,]; | |
Families$ID<-gsub("^.*f__|","", Families$ID); | |
Family_genus_species_list<-grep("g__", Families$ID); | |
Families<-Families[-Family_genus_species_list,]; | |
Family_species_list <- grep("s__",Families$ID); | |
Families<-Families[-Family_species_list,]; | |
row.names(Families)<-Families$ID; | |
Families<-Families[,-1]; | |
Families | |
} | |
barplot_proportional_data <-function(matrix,level,outname){ | |
matrix.norm<-data.matrix(matrix); | |
matrix.norm<-sweep(matrix.norm,2,colSums(matrix.norm),'/'); | |
matrix.norm<-sweep(matrix.norm,2,colSums(matrix.norm),'/'); | |
colnames(matrix.norm) <- colnames(matrix) | |
rownames(matrix.norm) <-rownames(matrix) | |
matrix.norm.melt<-melt(matrix.norm); | |
ggplot(matrix.norm.melt,aes(x=Var2,y=value,fill=Var1))+geom_bar(stat="identity",position="stack")+theme(legend.position="none",panel.grid.major=element_blank(),panel.grid.minor=element_blank(),panel.border=element_blank(),panel.background=element_blank())+theme(axis.text.x = element_text(angle = 45, hjust = 1))+xlab("Sample Name")+ylab("Proportion of Observations")+ggtitle(paste("Proprotional observations at ",level," level")) | |
ggsave(filename=paste0(outname,".pdf"),plot=last_plot(),width=16,height=12,units="in",dpi=500); | |
} | |
rarefy_here<-function(matrix,name){ | |
if (! file.exists(name)){ | |
minObservations<-min(colSums(matrix)) | |
rarefied<-data.frame(rrarefy(t(matrix),minObservations)) | |
write.table(rarefied,file = name) | |
} | |
else{ | |
rarefied <- read.table(name,header=TRUE) | |
} | |
rarefied | |
} | |
log_heatform<-function(matrix,level,outname){ | |
logxform <-log2(matrix) | |
logxform[logxform==-Inf]<-0 | |
logxform<-logxform[,apply(logxform, 2, var, na.rm=TRUE) != 0] | |
pdf(paste0(outname,".pdf"), width=16 , height=32) | |
heatmap.2(as.matrix(t(logxform)), col=my_palette,keysize = .75, margins=c(15,10), trace="none", xlab="Sample ID",ylab="Taxon",main=paste0(c("Clustering of samples at ", level," level" )),cexRow=0.3,sepwidth=c(0.05,0.05)) | |
dev.off() | |
} | |
log_heatform_big<-function(matrix,level,outname){ | |
logxform <-log2(matrix) | |
logxform[logxform==-Inf]<-0 | |
logxform<-logxform[,apply(logxform, 2, var, na.rm=TRUE) != 0] | |
pdf(paste0(outname,".pdf"), width=16 , height=32) | |
heatmap.2(as.matrix(t(logxform)), col=my_palette,keysize = .75, margins=c(15,10), trace="none", xlab="Sample ID",ylab="Taxon",main=paste0(c("Clustering of samples at ", level," level" )),cexRow=1,sepwidth=c(0.05,0.05)) | |
dev.off() | |
} | |
alpha_diversity_metrics<-function(matrix,level){ | |
global_S <- NULL | |
# TAKES RAREFIED DATA | |
H <- data.frame(diversity(matrix)) | |
simpson <- data.frame(diversity(matrix, "simpson")) | |
shannon<-data.frame(diversity(matrix , "shannon")) | |
invsimp <- data.frame(diversity(matrix, "inv")) | |
alpha <- data.frame(fisher.alpha(matrix)) | |
## Species richness (S) and Pielou's evenness (J): | |
S <- data.frame(specnumber(matrix)) | |
J <- data.frame(H/log(S)) | |
Diversity<-cbind(simpson, shannon, invsimp, alpha, S, J) | |
Diversity$Sample<-row.names(Diversity) | |
names(Diversity)<-c("Simpson", "Shannon", "InvSimpson", "Alpha", 'ClassNo', "Evenness", "Sample") | |
Diversity<-Diversity[,c(7,1,2,3,4, 5, 6)] | |
pdf(file=paste0(level,'_estimate_pairs.pdf')) | |
pairs(cbind(simpson, shannon, alpha, J, S), col="blue") | |
dev.off() | |
# Plots of alpha-diversity metrics | |
ggplot(data=Diversity, aes(x=Sample, y=Simpson)) + geom_bar(colour="black", fill="darkblue", stat="identity") +theme(axis.text.x = element_text(angle = 45, hjust = 1)) | |
ggsave(paste0(level,"_Diversity_Simpson.pdf"),width=5,height=5,units="in",dpi=500) | |
ggplot(data=Diversity, aes(x=Sample, y=Alpha)) + geom_bar(colour="black", fill="darkblue", stat="identity")+theme(axis.text.x = element_text(angle = 45, hjust = 1)) | |
ggsave(paste0(level,"_Diversity_Alpha.pdf"),width=5,height=5,units="in",dpi=500) | |
ggplot(data=Diversity, aes(x=Sample, y=Evenness)) + geom_bar(colour="black", fill="darkblue", stat="identity") +theme(axis.text.x = element_text(angle = 45, hjust = 1)) | |
ggsave(paste0(level,"_Diversity_Eveness.pdf"),width=5,height=5,units="in",dpi=500) | |
ggplot(data=Diversity, aes(x=Sample, y=ClassNo)) + geom_bar(colour="black", fill="darkblue", stat="identity") +theme(axis.text.x = element_text(angle = 45, hjust = 1)) | |
ggsave(paste0(level,"_Diversity_classNo.pdf"),width=5,height=5,units="in",dpi=500) | |
ggplot(data=Diversity, aes(x=Sample, y=Shannon)) + geom_bar(colour="black", fill="darkblue", stat="identity") +theme(axis.text.x = element_text(angle = 45, hjust = 1)) | |
ggsave(paste0(level,"_Diversity_Shannon.pdf"),width=5,height=5,units="in",dpi=500) | |
global_S <<- S | |
} | |
species_per_sample<-function(matrix,level){ | |
## Rarefaction curves | |
raremax <- min(rowSums(t(matrix))) | |
Srare <- rarefy(t(matrix), raremax) | |
x<-data.frame(global_S,Srare) | |
pdf(paste0(level,"_rarefaction.pdf")) | |
plot(x, xlab = paste0("Observed No. of ",level), ylab = paste0("Rarefied No. of ",level)) | |
dev.off(); | |
pdf(paste0("rarefaction_",level,'.pdf')) | |
rarecurve(t(matrix), step = 1000, col = "blue", cex = 0.5, sample= raremax, xlab = "Sample Size", ylab = level, label = TRUE, lwd = 2) | |
dev.off() | |
} | |
my_palette <- colorRampPalette(c("blue", "white", "red"))(n = 1000) | |
draw_circle<- function(center = c(0,0),diameter = 1, npoints = 1000){ | |
r = 25 | |
tt <- seq(0,2*pi,length.out = npoints) | |
xx <- center[1] + r * cos(tt) | |
yy <- center[2] + r * sin(tt) | |
return(data.frame(x = xx, y = yy)) | |
} | |
circle<-draw_circle() | |
bi_plot <-function(matrix, outfile ){ | |
logxform <-log2(matrix) | |
logxform[logxform==-Inf]<-0 | |
logxform<-logxform[,apply(logxform, 2, var, na.rm=TRUE) != 0] | |
#PCA and Scree | |
pca<-prcomp(logxform , center = TRUE , scale.=TRUE) | |
std_dev<-pca$sdev | |
loadings<-pca$rotation | |
norm_loadings<-sweep(loadings,2,colSums(loadings),"/") | |
scores<-pca$x | |
variance<- std_dev^2 | |
variance<-variance/sum(variance) * 100 | |
variance<-as.data.frame(variance) | |
variance <-cbind(PC = as.numeric(row.names(variance)),variance) | |
ggplot(variance, aes(y=variance,x=PC))+geom_bar(stat="identity")+ylab("Proportion of Variance")+xlab("Principle Component") | |
ggsave(filename=paste0(outfile,"_scree.pdf"),plot=last_plot(),width=16,height=12,units="in",dpi=500) | |
vecs<-data.frame(loadings[,c('PC1','PC2')]) | |
vecs = vecs * 25 * 10 | |
tmp_vecs1a<-vecs[order(vecs[,1]),][1:10,] | |
tmp_vecs1b<-vecs[rev(order(vecs[,1])),][1:10,] | |
tmp_vecs2a<-vecs[order(vecs[,2]),][1:10,] | |
tmp_vecs2b<-vecs[rev(order(vecs[,2])),][1:10,] | |
vecs <-rbind(tmp_vecs1a,tmp_vecs1b,tmp_vecs2a,tmp_vecs2b) | |
vecs <-unique(vecs) | |
pca.df <-as.data.frame(pca$x) | |
pca.df <- cbind( ID = row.names(pca.df),pca.df) | |
ggplot(pca.df, aes(x=PC1 , y = PC2)) + | |
geom_point( | |
size=1, | |
)+ | |
geom_text_repel( | |
size=2.5, | |
aes(label=ID), | |
segment.color = 'gray', | |
point.padding = unit(1.0,"lines"), | |
box.padding = unit(0.1,"lines"), | |
force = 1, | |
)+ | |
geom_point( | |
size=5, | |
shape = 3, | |
aes(x=0,y=0), | |
color="grey", | |
)+ | |
geom_path( | |
data=circle, | |
aes(x , y), | |
color="red")+ | |
coord_fixed()+ | |
geom_point( | |
data=vecs, | |
aes(x=PC1,y=PC2), | |
color="red", | |
size=2 | |
)+ | |
geom_text_repel( | |
data=vecs, | |
aes(x=PC1,y=PC2,label=rownames(vecs)), | |
color = "red", | |
size=2.5, | |
segment.color = 'gray', | |
point.padding = unit(1.0,"lines"), | |
box.padding = unit(0.1,"lines"), | |
force = 1 | |
) | |
ggsave(filename=paste0(outfile,"_pca.pdf"),plot=last_plot(),width=16,height=12,units="in",dpi=500); | |
} | |
#Kraken Mapping | |
map_rates = as.data.frame(read.table(file="stats.tbl",header=TRUE,sep="\t")) | |
map_rates.melt <- melt(map_rates,id.vars = 'sample') | |
ggplot(data=map_rates.melt, aes(x=sample,y=value,fill=variable))+geom_bar(stat="identity")+theme(axis.text.x=element_text(angle=90,hjust=1,vjust=0.5))+labs(x= "Sample",y="Proportion Classified",title="Proportion of Classified and Unclassified Reads \n Kraken")+guides(fill=guide_legend(title="Read Type")) | |
ggsave(filename = "KrakenClassificationProportion.pdf", plot=last_plot(),width=16,height=12,units="in",dpi=500); | |
#data prep | |
kraken_mpa <- read.csv("kraken-mpa.merge",sep="\t",header=TRUE) | |
kraken_mpa$ID<-as.character(kraken_mpa$ID) | |
#sub sample Bacteria, Viruses , and Archaea | |
indices<-grep("Bacteria",kraken_mpa$ID) | |
bacteria_mpa <-kraken_mpa[indices,] | |
indices<-grep("Viruses",kraken_mpa$ID) | |
viruses_mpa <-kraken_mpa[indices,] | |
indices<-grep("Archaea",kraken_mpa$ID) | |
archaea_mpa <-kraken_mpa[indices,] | |
dirlist <- c('AllTaxa', 'Bacteria','Archaea','Viruses','Views') | |
for( i in dirlist){ | |
if (! dir.exists(i)){ | |
dir.create(i) | |
} | |
} | |
setwd('AllTaxa') | |
kraken_mpa.rarefy.family <-rarefy_here(get_family(kraken_mpa) ,'kraken_mpa_rarefy_family.txt') | |
kraken_mpa.rarefy.genus <-rarefy_here(get_genus(kraken_mpa) ,'kraken_mpa_rarefy_genus.txt') | |
kraken_mpa.rarefy.species <-rarefy_here(get_species(kraken_mpa),'kraken_mpa_rarefy_species.txt') | |
barplot_proportional_data(get_family(kraken_mpa),'family', 'All_taxa_family_proportion') | |
log_heatform(kraken_mpa.rarefy.family,'family','All_taxa_family_heat') | |
bi_plot(kraken_mpa.rarefy.family,"All_taxa_family_biplot") | |
alpha_diversity_metrics(kraken_mpa.rarefy.family,'Families') | |
species_per_sample(get_family(kraken_mpa),'Families') | |
barplot_proportional_data(get_genus(kraken_mpa),'genus','All_taxa_genus_proportion') | |
log_heatform(kraken_mpa.rarefy.genus,'genus','All_taxa_genus_heat') | |
bi_plot(kraken_mpa.rarefy.genus,'All_taxa_genus_biplot') | |
alpha_diversity_metrics(kraken_mpa.rarefy.genus,'Genera') | |
species_per_sample(get_genus(kraken_mpa),'Genera') | |
barplot_proportional_data(get_species(kraken_mpa),'species','All_taxa_species_proportion') | |
log_heatform(kraken_mpa.rarefy.species,'species','All_taxa_species_heat') | |
bi_plot(kraken_mpa.rarefy.species,'All_taxa_species_biplot') | |
alpha_diversity_metrics(kraken_mpa.rarefy.species,'Species') | |
species_per_sample(get_species(kraken_mpa),'Species') | |
setwd('..') | |
setwd('Bacteria') | |
bacteria_mpa.rarefy.family <-rarefy_here(get_family(bacteria_mpa) ,'bacteria_mpa_rarefy_family.txt') | |
bacteria_mpa.rarefy.genus <-rarefy_here(get_genus(bacteria_mpa) ,'bacteria_mpa_rarefy_genus.txt') | |
bacteria_mpa.rarefy.species <-rarefy_here(get_species(bacteria_mpa),'bacteria_mpa_rarefy_species.txt') | |
barplot_proportional_data(get_family(bacteria_mpa),'family','Bacteria_familiy_proportion') | |
log_heatform(bacteria_mpa.rarefy.family,'family','Bacteria_family_heat') | |
bi_plot(bacteria_mpa.rarefy.family,'Bacteria_family_biplot') | |
barplot_proportional_data(get_genus(bacteria_mpa),'genus','Bacteria_genus_proportion') | |
log_heatform(bacteria_mpa.rarefy.genus,'genus','Bacteria_genus_heat') | |
bi_plot(bacteria_mpa.rarefy.genus,'Bacteria_genus_biplot') | |
barplot_proportional_data(get_species(bacteria_mpa),'species','Bacteria_species_proportion') | |
log_heatform(bacteria_mpa.rarefy.species,'species','Bacteria_species_heat') | |
bi_plot(bacteria_mpa.rarefy.species,'Bacteria_species_biplot') | |
alpha_diversity_metrics(bacteria_mpa.rarefy.family,'Families') | |
species_per_sample(get_family(bacteria_mpa),'Families') | |
alpha_diversity_metrics(bacteria_mpa.rarefy.genus,'Genera') | |
species_per_sample(get_genus(bacteria_mpa),'Genera') | |
alpha_diversity_metrics(bacteria_mpa.rarefy.species,'Species') | |
species_per_sample(get_species(bacteria_mpa),'Species') | |
setwd('..') | |
setwd('Viruses') | |
viruses_mpa.rarefy.family <-rarefy_here(get_family(viruses_mpa) ,'viruses_mpa_rarefy_family.txt') | |
viruses_mpa.rarefy.genus <-rarefy_here(get_genus(viruses_mpa) ,'viruses_mpa_rarefy_genus.txt') | |
viruses_mpa.rarefy.species <-rarefy_here(get_species(viruses_mpa),'viruses_mpa_rarefy_species.txt') | |
barplot_proportional_data(get_family(viruses_mpa),'family','Virus_family_proportion') | |
log_heatform(viruses_mpa.rarefy.family,'family','Virus_family_heat') | |
bi_plot(viruses_mpa.rarefy.family,'Viruses_family_biplot') | |
barplot_proportional_data(get_genus(viruses_mpa),'genus','Virus_genus_proportion') | |
log_heatform(viruses_mpa.rarefy.genus,'genus','Virus_genus_heat') | |
bi_plot(viruses_mpa.rarefy.genus,'Viruses_genus_biplot') | |
barplot_proportional_data(get_species(viruses_mpa),'species','Virus_species_proportion') | |
log_heatform(viruses_mpa.rarefy.species,'species','Virus_species_heat') | |
bi_plot(viruses_mpa.rarefy.species,'Virus_species_biplot') | |
alpha_diversity_metrics(viruses_mpa.rarefy.family,'Families') | |
species_per_sample(get_family(viruses_mpa),'Families') | |
alpha_diversity_metrics(viruses_mpa.rarefy.genus,'Genera') | |
species_per_sample(get_genus(viruses_mpa),'Genera') | |
alpha_diversity_metrics(viruses_mpa.rarefy.species,'Species') | |
species_per_sample(get_species(viruses_mpa),'Species') | |
setwd('..') | |
setwd('Archaea') | |
#archaea_mpa.rarefy.family <-rarefy_here(get_family(archaea_mpa) ,'archaea_mpa_rarefy_family.txt') | |
archaea_mpa.rarefy.genus <-rarefy_here(get_genus(archaea_mpa) ,'archaea_mpa_rarefy_genus.txt') | |
archaea_mpa.rarefy.species <-rarefy_here(get_species(archaea_mpa),'archaea_mpa_rarefy_species.txt') | |
#barplot_proportional_data(get_family(archaea_mpa),'family','Archaea_family_proportion') | |
#log_heatform(archaea_mpa.rarefy.family,'family','Archaea_family_heat') | |
barplot_proportional_data(get_genus(archaea_mpa),'genus','Archaea_genus_proportion') | |
log_heatform(archaea_mpa.rarefy.genus,'family','Archaea_genus_heat') | |
bi_plot(archaea_mpa.rarefy.genus,'Archaea_genus_biplot') | |
barplot_proportional_data(get_species(archaea_mpa),'species','Archaea_species_proportion') | |
log_heatform(archaea_mpa.rarefy.species,'species','Archaea_speceis_heat') | |
bi_plot(archaea_mpa.rarefy.species,'Archaea_species_biplot') | |
#alpha_diversity_metrics(archaea_mpa.rarefy.family,'Families') | |
#species_per_sample(get_family(archaea_mpa),'Families') | |
alpha_diversity_metrics(archaea_mpa.rarefy.genus,'Genera') | |
species_per_sample(get_genus(archaea_mpa),'Genera') | |
alpha_diversity_metrics(archaea_mpa.rarefy.species,'Species') | |
species_per_sample(get_species(archaea_mpa),'Species') | |
setwd('..') | |
setwd('Views') | |
bacteria_genera <- c('g__Salmonella' , 'g__Campylobacter', 'g__Staphylococcus','g__Clostridium','g__Chlamydia','g__Streptococcus','g__Escherichia','g__Enterococcus','g__Enterobacter', 'g__Helicobacter', 'g__Bacillus','g__Listeria','g__Alteromonas','g__Pseudomonas','g__Lactobacillus','g__Lactococcus','g__Altermonas') | |
views_mpa <- list() | |
for (i in bacteria_genera){ | |
indices <- grep(i,bacteria_mpa$ID) | |
views_mpa <- rbind (views_mpa,bacteria_mpa[indices,] ) | |
} | |
views_mpa.rarefy.genus <- rarefy_here(get_genus(views_mpa),'views_mpa_rarefy_genus.txt') | |
views_mpa.rarefy.species <-rarefy_here(get_species(views_mpa),'views_mpa_rarefy_species.txt') | |
log_heatform_big(views_mpa.rarefy.genus,'genus','views_genus_heat') | |
log_heatform_big(views_mpa.rarefy.species,'species','views_species_heat') | |
setwd('..') |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
--- | |
title: "Swine Micro-biome" | |
author: "Dylan B Storey" | |
output: | |
pdf_document: | |
fig_caption: yes | |
keep_tex: yes | |
html_document: | |
fig_caption: yes | |
keep_md: yes | |
--- | |
```{r, library load , echo=FALSE,message=FALSE} | |
library(ggplot2) | |
library(reshape2) | |
library(vegan) | |
library(gplots) | |
library(ggrepel) | |
library(fossil) | |
library(knitr) | |
library(pander) | |
``` | |
```{r , function declaration , echo=FALSE, message=FALSE} | |
global_S <- NULL | |
get_species<-function(matrix){ | |
#' Given an matrix where taxon ID is mpa format - gets the species rows and cleans up the name accordingly | |
#' | |
#' @param matrix the incomoning mpa matrix | |
#' @return a subset of \code{matrix} that only contains species rows | |
#' | |
IndicesToGrab<-grep("s__",matrix$ID); | |
matrix$ID<-as.character(matrix$ID); | |
Species<-matrix[IndicesToGrab,]; | |
Species$ID<-gsub("^.*s__|","",Species$ID) | |
row.names(Species)=Species$ID | |
Species<-Species[,-1]; | |
Species | |
} | |
get_genus<-function(matrix){ | |
#' Given an matrix where taxon ID is mpa format - gets the genus rows and cleans up the name accordingly | |
#' | |
#' @param matrix the incomoning mpa matrix | |
#' @return a subset of \code{matrix} that only contains genus rows | |
#' | |
IndicesToGrab<-grep("g__",matrix$ID); | |
Genera<-matrix[IndicesToGrab,]; | |
Genera$ID<-gsub("^.*g__|","",Genera$ID); | |
Genus_and_species<-grep("s__",Genera$ID); | |
Genera<-Genera[-Genus_and_species,]; | |
row.names(Genera)<-Genera$ID; | |
Genera<-Genera[,-1]; | |
Genera | |
} | |
get_family<-function(matrix){ | |
#' Given an matrix where taxon ID is mpa format - gets the family rows and cleans up the name accordingly | |
#' | |
#' @param matrix the incomoning mpa matrix | |
#' @return a subset of \code{matrix} that only contains family rows | |
#' | |
IndicesToGrab<-grep("f__",matrix$ID); | |
Families<-matrix[IndicesToGrab,]; | |
Families$ID<-gsub("^.*f__|","", Families$ID); | |
Family_genus_species_list<-grep("g__", Families$ID); | |
Families<-Families[-Family_genus_species_list,]; | |
Family_species_list <- grep("s__",Families$ID); | |
Families<-Families[-Family_species_list,]; | |
row.names(Families)<-Families$ID; | |
Families<-Families[,-1]; | |
Families | |
} | |
barplot_proportional_data<-function(matrix,level){ | |
#' Given a matrix of data , creates a stacked bargraph of the data as proportions | |
#' | |
#' @param matrix , the incoming MPA matrix | |
#' @param level , the taxonomic level to put in the title | |
#' | |
#' @return the ggplot2 object | |
matrix.norm<-data.matrix(matrix) | |
matrix.norm<-sweep(matrix.norm,2,colSums(matrix.norm),'/'); | |
colnames(matrix.norm) <- colnames(matrix) | |
rownames(matrix.norm) <-rownames(matrix) | |
matrix.norm.melt<-melt(matrix.norm); | |
pp1<-ggplot(matrix.norm.melt,aes(x=Var2,y=value,fill=Var1))+geom_bar(stat="identity",position="stack")+theme(legend.position="none",panel.grid.major=element_blank(),panel.grid.minor=element_blank(),panel.border=element_blank(),panel.background=element_blank())+theme(axis.text.x = element_text(angle = 45, hjust = 1,size=4))+xlab("Sample Name")+ylab("Proportion of Observations")+ggtitle(paste("Proprotional observations at ",level," level")) | |
return(pp1) | |
} | |
rarefy_here<-function(matrix,name=NULL){ | |
#' Cached rarefication , because rarefying data is expensive a cached version is useful | |
#' This function caches both the incoming matrix AND the rarefied matrix | |
#' | |
#' @param matrix , the matrix count data to be rarefied | |
#' @param name (default=NULL i.e. optional) where to cache the data | |
#' | |
#' @return the rarefied matrix of \code{matrix} | |
#' | |
if (!is.null(name)){ | |
name = paste0(name,'.rare.txt') | |
original_table_name = paste0(name,'.no_rare.txt') | |
if (! file.exists(name)){ | |
#file doesn't exist , rarefy and cache | |
Observations<-colSums(matrix) | |
rarefied<-data.frame(rrarefy(t(matrix),min(Observations))) | |
write.table(t(matrix),file=original_table_name) | |
write.table(rarefied,file = name) | |
} | |
else{ | |
#load from disk | |
rarefied <- read.table(name,header=TRUE) | |
} | |
} | |
else{ | |
rarefied<-data.frame(rrarefy(t(matrix),min(Observations))) | |
} | |
return(rarefied) | |
} | |
log2_heatmap<-function(matrix,level, main = NULL, cexRow = 0.3 , trace = "none", keysize =1 , xlab = "Sample ID", ylab = NULL, sepwidth=c(0.05,0.05), ColumnColors = NULL,...){ | |
#' creates a heatmap of the matrix after a log2 transformation | |
#' | |
#' @param matrix the incoming matrix | |
#' @param level the taxonomic level (for title) | |
#' | |
#' @return heatmap.2 object | |
#' @example my_plot <- log2_heatmap(data) | |
#' @example my_plot() | |
my_palette <- colorRampPalette(c("blue", "white", "red"))(n = 1000) | |
logxform <-log2(matrix) | |
logxform[logxform==-Inf]<-0 | |
logxform<-logxform[,apply(logxform, 2, var, na.rm=TRUE) != 0] | |
hclustfunc <- function(x) hclust(x, method="average") | |
distfunc <- function(x) dist(x, method="canberra") | |
# perform clustering on rows and columns | |
cl.col <- hclustfunc(distfunc(matrix)) | |
col_colors<-ColumnColors[cl.col$order] | |
plot_heatmap<- function() heatmap.2(as.matrix(t(logxform)), | |
col=my_palette, | |
main=main, | |
distfun = distfunc, | |
hclustfun = hclustfunc, | |
cexRow = cexRow, | |
trace = trace, | |
keysize = keysize, | |
xlab = xlab, | |
ylab = ylab, | |
sepwidth = sepwidth, | |
ColSideColors = col_colors , | |
... | |
) | |
} | |
draw_circle<- function(center = c(0,0),r = 1, npoints = 100){ | |
#' Draws a circle and places data into a data-frame for later use | |
#' @param center , where should I center on | |
#' @param diameter , how wide | |
#' @param npoints , how many points to draw | |
#' | |
#' @return dataframe of x, y co-ordinates | |
tt <- seq(0,2*pi,length.out = npoints) | |
xx <- center[1] + r * cos(tt) | |
yy <- center[2] + r * sin(tt) | |
return(data.frame(x = xx, y = yy , r = r)) | |
} | |
``` | |
```{r , echo=FALSE,message=FALSE} | |
multiplot <- function(..., plotlist=NULL, file, cols=1, layout=NULL) { | |
require(grid) | |
plots <- c(list(...), plotlist) | |
numPlots = length(plots) | |
if (is.null(layout)) { | |
layout <- matrix(seq(1, cols * ceiling(numPlots/cols)), | |
ncol = cols, nrow = ceiling(numPlots/cols)) | |
} | |
if (numPlots==1) { | |
print(plots[[1]]) | |
} else { | |
grid.newpage() | |
pushViewport(viewport(layout = grid.layout(nrow(layout), ncol(layout)))) | |
for (i in 1:numPlots) { | |
matchidx <- as.data.frame(which(layout == i, arr.ind = TRUE)) | |
print(plots[[i]], vp = viewport(layout.pos.row = matchidx$row, | |
layout.pos.col = matchidx$col)) | |
} | |
} | |
} | |
``` | |
```{r, echo=FALSE,message=FALSE} | |
better_bi_plot <- function(matrix , logtransform = TRUE , PC_selection = c(1,2) , disp_load = 10 , color_by = NULL, vector_scale = 25 , scree = TRUE){ | |
#' Calculates PCA and build a bi-plot using ggrepel for a prettier output | |
#' @param matrix , the matrix of data | |
#' @param logtransform , should the data be transformed | |
#' @param PC_selection , a list of PCs to retain for plotting defaults to c(1,2) | |
#' @param disp_load , which loadings should be displayed (defaults to the strongest 10) | |
#' | |
#' @returns bi_plot data frame , all plots are in bi_plot$plot , screeplot in bi_plot$screeplot , pca in bi_plot$pca | |
#build circle | |
r= 1 | |
tt <- seq(0,2*pi,length.out = 1000) | |
xx <- 0 + r * cos(tt) | |
yy <- 0 + r * sin(tt) | |
circle<- (data.frame(x = xx, y = yy , r = r)) | |
#Transform if wished | |
if (logtransform == TRUE){ | |
matrix <- log2(matrix) | |
matrix[matrix==-Inf]<-0 | |
matrix<-matrix[,apply(matrix, 2, var, na.rm=TRUE) != 0] | |
} | |
#PCA perform pca | |
pca <- prcomp(matrix, center = TRUE, scale.=TRUE) | |
#Scree Plot | |
std_deviation <- pca$sdev | |
variance<-std_deviation^2 | |
variance<-variance / sum(variance) | |
variance<-as.data.frame(variance) | |
variance <-cbind(PC = as.numeric(row.names(variance)),variance) | |
if (scree == TRUE){ | |
screeplot <- ggplot(variance, aes(y=variance,x=PC))+geom_bar(stat="identity")+ylab("Proportion of Variance")+xlab("Principle Component") | |
plot(screeplot) | |
} | |
loadings <-pca$rotation | |
loadings <- sweep(loadings , 2,pca | |
$sdev , FUN="*") | |
scores <- pca$x | |
#start building bi-plots for ALL the PC pairs | |
for( i in 1:length(PC_selection)){ | |
if (i != length(PC_selection)){ | |
j = i+1 | |
vecs <- data.frame(loadings[,c(i,j)]) | |
#get the top and bottom vectors | |
tmp_vecs1a<-vecs[order(vecs[,1]),][1:disp_load,] | |
tmp_vecs1b<-vecs[rev(order(vecs[,1])),][1:disp_load,] | |
tmp_vecs2a<-vecs[order(vecs[,2]),][1:disp_load,] | |
tmp_vecs2b<-vecs[rev(order(vecs[,2])),][1:disp_load,] | |
vecs <-rbind(tmp_vecs1a,tmp_vecs1b,tmp_vecs2a,tmp_vecs2b) | |
vecs <-unique(vecs) | |
pca.df <- as.data.frame(pca$x[,c(i,j)]) | |
pca.df <- cbind(ID = row.names(pca.df),pca.df) | |
pca.df <- merge(pca.df , color_by, by=0 , all=TRUE) | |
pca.df$Row.names<-NULL | |
pca_plot <- ggplot(as.data.frame(pca.df), aes_string(x=colnames(pca.df)[2] , y=colnames(pca.df)[3], colour='factor'))+ | |
geom_point(size = 2) + | |
geom_text_repel( | |
size=2.5, | |
aes(label=ID), | |
segment.color = 'gray', | |
point.padding = unit(1.0,"lines"), | |
box.padding = unit(0.1,"lines"), | |
force = 1, | |
)+ | |
geom_point( | |
size=5, | |
shape = 3, | |
aes(x=0,y=0), | |
colour = 'black' | |
)+ | |
scale_color_hue(l=40) | |
loadings_plot <- ggplot()+ | |
geom_path( | |
data=circle, | |
aes(x , y), | |
color="black")+ | |
coord_fixed()+ | |
geom_point( | |
data=vecs, | |
aes_string(x=colnames(vecs)[1] , y=colnames(vecs)[2]), | |
color="black", | |
size=2 | |
)+ | |
geom_text_repel( | |
data=vecs, | |
aes_string(x=colnames(vecs)[1] , y=colnames(vecs)[2], label='rownames(vecs)'), | |
color = "black", | |
size=2.5, | |
segment.color = 'gray', | |
point.padding = unit(1.0,"lines"), | |
box.padding = unit(0.1,"lines"), | |
force = 1 | |
) + | |
geom_point( | |
size=5, | |
shape = 3, | |
aes(x=0,y=0), | |
colour = 'black' | |
) | |
multiplot(pca_plot , loadings_plot, cols = 2) | |
} | |
} | |
} | |
``` | |
```{r load-mapping-rates , echo = FALSE , fig.width=5} | |
map_rates = as.data.frame(read.table(file="kraken_stats.tbl",header=TRUE,sep="\t")) | |
map_rates.proportion_melt <- melt(map_rates,id.vars = 'sample' , measure.vars=c('unclassified.proportion','classified.proportion')) | |
map_rates.count_melt <- melt(map_rates,id.vars = 'sample' , measure.vars=c('unclassified.count','classified.count')) | |
row.names(map_rates) <- map_rates$sample | |
map_rates$sample <- NULL | |
``` | |
## Sequencing reads per sample: | |
```{r, mapping-bargraph , echo=FALSE, fig.cap="Read counts for all samples"} | |
ggplot(data=map_rates.count_melt, aes(x=sample,y=value,fill=variable))+geom_bar(stat="identity")+theme(axis.text.x=element_text(angle=45,hjust=1, size=4))+labs(x= "Sample",y="Number Classified",title="Number of Classified and Unclassified Reads \n Kraken")+guides(fill=guide_legend(title="Read Type")) | |
``` | |
```{r, mapping-rates , echo = FALSE , fig.cap="Proportion of classified and un-classified reads"} | |
ggplot(data=map_rates.proportion_melt, aes(x=sample,y=value,fill=variable))+geom_bar(stat="identity")+theme(axis.text.x=element_text(angle=45,hjust=1,size=4))+labs(x= "Sample",y="Proportion Classified",title="Proportion of Classified and Unclassified Reads \n Kraken")+guides(fill=guide_legend(title="Read Type")) | |
``` | |
```{r,data-prep, echo=FALSE} | |
#Sample Groups | |
sample_groups = read.table(file="sample_info.csv",sep = ",",header=TRUE) | |
sample_groups <- subset(sample_groups, sample_groups$sample_name != '30-03-R') | |
sample_groups <- subset(sample_groups, sample_groups$sample_name != '30-05-I') | |
sample_groups <- subset(sample_groups, sample_groups$sample_name != '31-13-R') | |
row.names(sample_groups)<-sample_groups$sample_name | |
#getting our groupings together | |
normal_feed <- c('06-10-R','07-13-R','08-11-R','09-03-R','30-11-R','33-12-R') | |
malnourished <- c('06-13-R','07-09-R','08-06-R','09-05-R','30-10-R','33-10-R') | |
malnourished_gm <- c('29-13-R','31-02-R','31-03-R','33-05-R','33-08-R') | |
malnourished_hlz <- c('29-02-R','29-12-R','31-08-R','31-11-R','33-04-R','33-06-R') | |
malnourished_etec <- c('29-15-R','29-16-R','30-01-R','31-07-R') | |
malnourished_gm_etec <- c('29-01-R','29-11-R','31-09-R') | |
malnourished_hlz_etec <- c('29-03-R','29-08-R','30-05-R','31-10-R') | |
ileum_normal_feed <- c('06-10-I','08-11-I','33-12-I') | |
ileum_malnourished <- c('07-09-I','09-05-I','30-10-I') | |
ileum_malnourished_gm <- c('29-13-I','31-02-I','31-03-I','31-13-I') | |
ileum_malnourished_hlz <- c('29-12-I','31-08-I','33-04-I') | |
ileum_malnourished_etec <- c('29-15-I','29-16-I') | |
ileum_malnourished_hlz_etec <- c('29-03-I','31-10-I') | |
ileum_malnourished_gm_etec <- c('29-11-I','30-03-I','31-09-I') | |
names = c(normal_feed ,malnourished ,malnourished_gm ,malnourished_hlz,malnourished_etec ,malnourished_gm_etec ,malnourished_hlz_etec ,ileum_normal_feed ,ileum_malnourished ,ileum_malnourished_gm ,ileum_malnourished_hlz,ileum_malnourished_etec ,ileum_malnourished_hlz_etec ,ileum_malnourished_gm_etec) | |
kraken_mpa <- read.csv("swine-merged.mpa",sep="\t",header=TRUE,check.names = FALSE) | |
kraken_mpa$ID<-as.character(kraken_mpa$ID) | |
kraken_mpa$'30-03-R' <- NULL | |
kraken_mpa$'30-05-I'<-NULL | |
kraken_mpa$'31-13-R'<-NULL | |
kraken_mpa <- kraken_mpa[c("ID",names)] | |
c_normal_feed <- rep( 'green' , length(normal_feed)) | |
c_ileum_normal_feed <- rep( 'green', length(ileum_normal_feed)) | |
c_malnourished <- rep( 'gold',length(malnourished)) | |
c_ileum_malnourished <- rep( 'gold', length(ileum_malnourished)) | |
c_malnourished_gm <- rep( 'cornflowerblue',length(malnourished_gm)) | |
c_ileum_malnourished_gm <- rep( 'cornflowerblue', length(ileum_malnourished_gm)) | |
c_ileum_malnourished_hlz <- rep( 'dodgerblue', length(ileum_malnourished_hlz)) | |
c_malnourished_hlz <- rep( 'dodgerblue',length(malnourished_hlz)) | |
c_malnourished_etec <- rep( 'red4',length(malnourished_etec)) | |
c_ileum_malnourished_etec <- rep( 'red4', length(ileum_malnourished_etec)) | |
c_malnourished_gm_etec <- rep( 'tomato',length(malnourished_gm_etec)) | |
c_ileum_malnourished_gm_etec <- rep( 'tomato', length(ileum_malnourished_gm_etec)) | |
c_ileum_malnourished_hlz_etec <- rep( 'firebrick', length(ileum_malnourished_hlz_etec)) | |
c_malnourished_hlz_etec <- rep( 'firebrick',length(malnourished_hlz_etec)) | |
column_colors <- c(c_normal_feed ,c_malnourished ,c_malnourished_gm ,c_malnourished_hlz,c_malnourished_etec ,c_malnourished_gm_etec ,c_malnourished_hlz_etec ,c_ileum_normal_feed ,c_ileum_malnourished ,c_ileum_malnourished_gm ,c_ileum_malnourished_hlz,c_ileum_malnourished_etec ,c_ileum_malnourished_hlz_etec ,c_ileum_malnourished_gm_etec) | |
dirlist <- c('all_taxa', 'bacteria','archaea','viruses') | |
for( i in dirlist){ | |
if (! dir.exists(i)){ | |
dir.create(i) | |
} | |
} | |
``` | |
```{r} | |
setwd('all_taxa') | |
kraken_mpa.rarefy.family <-rarefy_here(get_family(kraken_mpa) ,'all_taxa_family') | |
kraken_mpa.rarefy.genus <-rarefy_here(get_genus(kraken_mpa) ,'all_taxa_genus') | |
kraken_mpa.rarefy.species <-rarefy_here(get_species(kraken_mpa),'all_taxa_species') | |
setwd('..') | |
``` | |
```{r} | |
indices<-grep("Bacteria",kraken_mpa$ID) | |
bacteria_mpa <-kraken_mpa[indices,] | |
setwd('bacteria') | |
bacteria_mpa.rarefy.family <-rarefy_here(get_family(bacteria_mpa) ,'bacteria_family') | |
bacteria_mpa.rarefy.genus <-rarefy_here(get_genus(bacteria_mpa) ,'bacteria_genus') | |
bacteria_mpa.rarefy.species <-rarefy_here(get_species(bacteria_mpa),'bacteria_species') | |
setwd('..') | |
indices<-grep("Viruses",kraken_mpa$ID) | |
virus_mpa <-kraken_mpa[indices,] | |
setwd('viruses') | |
virus_mpa.rarefy.family <-rarefy_here(get_family(virus_mpa) ,'viruses_family') | |
virus_mpa.rarefy.genus <-rarefy_here(get_genus(virus_mpa) ,'viruses_genus') | |
virus_mpa.rarefy.species <-rarefy_here(get_species(virus_mpa),'viruses_species') | |
setwd('..') | |
indices<-grep("Archaea",kraken_mpa$ID) | |
archaea_mpa <-kraken_mpa[indices,] | |
setwd('archaea') | |
archaea_mpa.rarefy.family <-rarefy_here(get_family(kraken_mpa) ,'archeaa_family') | |
archaea_mpa.rarefy.genus <-rarefy_here(get_genus(kraken_mpa) ,'archeaa_genus') | |
archaea_mpa.rarefy.species <-rarefy_here(get_species(kraken_mpa),'archeaa_species') | |
setwd('..') | |
``` | |
#Proportional Measures | |
##All Taxa | |
```{r, echo=FALSE , fig.cap = "Proportion of observations for all taxa at Family level "} | |
p1<-barplot_proportional_data(get_family(kraken_mpa),'Family') | |
plot(p1) | |
``` | |
```{r, echo=FALSE , fig.cap = "Proportion of observations for all taxa at Genus level "} | |
p1<-barplot_proportional_data(get_genus(kraken_mpa),'Genus') | |
plot(p1) | |
``` | |
##Bacteria | |
```{r, echo=FALSE , fig.cap = "Proportion of observations for Bacteria at Family level "} | |
p1<-barplot_proportional_data(get_family(bacteria_mpa),'Family') | |
plot(p1) | |
``` | |
```{r, echo=FALSE , fig.cap = "Proportion of observations for Bacteria at Genus level ",fig.height=11, fig.width=8} | |
p1<-barplot_proportional_data(get_genus(bacteria_mpa),'Genus') | |
plot(p1) | |
``` | |
## Viral | |
```{r, echo=FALSE , fig.cap = "Proportion of observations for all taxa at Family level "} | |
p1<-barplot_proportional_data(get_family(virus_mpa),'Family') | |
plot(p1) | |
``` | |
```{r, echo=FALSE , fig.cap = "Proportion of observations for all taxa at Genus level "} | |
p1<-barplot_proportional_data(get_genus(virus_mpa),'Genus') | |
plot(p1) | |
``` | |
#Clustering | |
##All Taxa | |
```{r, echo=FALSE , fig.cap = "Clustered heat map of all taxa at Family level ",fig.height=12 , fig.width=12} | |
p2<-log2_heatmap(kraken_mpa.rarefy.family , ColumnColors = column_colors ) | |
p2() | |
``` | |
```{r, echo=FALSE , fig.cap = "Clustered heat map of all taxa at Genus level ",fig.height=12 , fig.width=12} | |
p2<-log2_heatmap(kraken_mpa.rarefy.genus, ColumnColors = column_colors) | |
p2() | |
``` | |
##Bacteria | |
```{r, echo=FALSE , fig.cap = "Clustered heat map of Bacteria at Family level ",fig.height=12 , fig.width=12} | |
p2<-log2_heatmap(bacteria_mpa.rarefy.family, ColumnColors = column_colors) | |
p2() | |
``` | |
```{r, echo=FALSE , fig.cap = "Clustered heat map of Bacteria at Genus level ",fig.height=12 , fig.width=12} | |
p2<-log2_heatmap(bacteria_mpa.rarefy.genus, ColumnColors = column_colors) | |
p2() | |
``` | |
##Viral Ecology | |
```{r, echo=FALSE , fig.cap = "Clustered heat map of Viruses at Family level ",fig.height=12 , fig.width=12} | |
p2<-log2_heatmap(virus_mpa.rarefy.genus, ColumnColors = column_colors) | |
p2() | |
``` | |
```{r, echo=FALSE , fig.cap = "Clustered heat map of Viruses at Family level ",fig.height=12 , fig.width=12} | |
p2<-log2_heatmap(virus_mpa.rarefy.family, ColumnColors = column_colors) | |
p2() | |
``` | |
#PCA and Loadings | |
## All Groups | |
```{r , build-coloring-factors} | |
groups<-as.data.frame(sample_groups$group) | |
row.names(groups) <- row.names(sample_groups) | |
colnames(groups) <-'factor' | |
nutrition<-as.data.frame(sample_groups$nutrition) | |
row.names(nutrition) <- row.names(sample_groups) | |
colnames(nutrition) <-'factor' | |
collection<-as.data.frame(sample_groups$collection_type) | |
row.names(collection)<- row.names(sample_groups) | |
colnames(collection) <- 'factor' | |
supplement<-as.data.frame(sample_groups$supplement) | |
row.names(supplement) <- row.names(sample_groups) | |
colnames(supplement)<-'factor' | |
etec_challenge <- as.data.frame(sample_groups$etec_challenge) | |
row.names(etec_challenge) <- row.names(sample_groups) | |
colnames(etec_challenge)<-'factor' | |
``` | |
```{r , fig.height = 12 , fig.width = 18 , fig.caption="PCA and Loading plots of based on all measurements, samples colored by treatment"} | |
better_bi_plot( kraken_mpa.rarefy.genus , color_by = groups) | |
``` | |
```{r , fig.height = 12 , fig.width = 18 , fig.caption="PCA and Loading plots of based on all measurements, samples colored by collection location"} | |
better_bi_plot( kraken_mpa.rarefy.genus , color_by = collection , scree = FALSE) | |
``` | |
```{r , fig.height = 12 , fig.width = 18 , fig.caption="PCA and Loading plots of based on all measurements, samples colored by collection nutrition"} | |
better_bi_plot( kraken_mpa.rarefy.genus , color_by = nutrition , scree = FALSE) | |
``` | |
```{r , fig.height = 12 , fig.width = 18 , fig.caption="PCA and Loading plots of based on all measurements, samples colored by supplement"} | |
better_bi_plot( kraken_mpa.rarefy.genus , color_by = supplement , scree = FALSE) | |
``` | |
```{r , fig.height = 12 , fig.width = 18 , fig.caption="PCA and Loading plots of based on all measurements, samples colored by challenge"} | |
better_bi_plot( kraken_mpa.rarefy.genus , color_by = etec_challenge, scree = FALSE) | |
``` | |
```{r} | |
fecal<-subset(sample_groups, collection_type == 'fecal', select = sample_name) | |
illial <-subset(sample_groups, collection_type == 'ileum', select = sample_name) | |
fecal.rarefy.genus <- kraken_mpa.rarefy.genus[row.names(fecal),] | |
fecal.groups <- as.data.frame(groups[row.names(fecal),]) | |
row.names(fecal.groups)<-row.names(fecal) | |
colnames(fecal.groups)<-'factor' | |
fecal.nutrition <- as.data.frame( nutrition[row.names(fecal),]) | |
fecal.collection <- as.data.frame( collection[row.names(fecal),]) | |
fecal.supplement <- as.data.frame( supplement[row.names(fecal),]) | |
fecal.etec_challenge <- as.data.frame( etec_challenge[row.names(fecal),]) | |
row.names(fecal.nutrition ) <- row.names(fecal) | |
row.names(fecal.collection ) <- row.names(fecal) | |
row.names(fecal.supplement ) <- row.names(fecal) | |
row.names(fecal.etec_challenge) <- row.names(fecal) | |
colnames(fecal.nutrition )<- 'factor' | |
colnames(fecal.collection )<- 'factor' | |
colnames(fecal.supplement )<- 'factor' | |
colnames(fecal.etec_challenge)<- 'factor' | |
``` | |
```{r , fig.height = 12 , fig.width = 18 , fig.caption="PCA and Loading plots of fecal samples based on all measurements, samples colored by group"} | |
better_bi_plot(fecal.rarefy.genus, color_by = fecal.groups) | |
``` | |
```{r , fig.height = 12 , fig.width = 18 , fig.caption="PCA and loading plots of fecal samples , colored by nutrition"} | |
better_bi_plot( fecal.rarefy.genus, color_by = fecal.nutrition , scree = FALSE) | |
``` | |
```{r , fig.height = 12 , fig.width = 18 , fig.caption="PCA and loading plots of fecal samples , colored by collection location"} | |
better_bi_plot( fecal.rarefy.genus, color_by = fecal.collection , scree = FALSE) | |
``` | |
```{r , fig.height = 12 , fig.width = 18 , fig.caption="PCA and loading plots of fecal samples , colored by supplement"} | |
better_bi_plot( fecal.rarefy.genus, color_by = fecal.supplement , scree = FALSE) | |
``` | |
```{r , fig.height = 12 , fig.width = 18 , fig.caption="PCA and loading plots of fecal samples , colored by challenge"} | |
better_bi_plot( fecal.rarefy.genus, color_by = fecal.etec_challenge, scree = FALSE) | |
``` | |
```{r , warning=FALSE} | |
fecal.rarefy.genus.t <- t(kraken_mpa.rarefy.genus) | |
fecal.rarefy.genus.t <- fecal.rarefy.genus.t[rowSums(fecal.rarefy.genus.t) != 0, ] | |
pvalues = c() | |
bacteria = c() | |
for( row in row.names(fecal.rarefy.genus.t)){ | |
x<- fecal.rarefy.genus.t[row,row.names(fecal)] | |
y<- fecal.rarefy.genus.t[row,row.names(illial)] | |
test <- wilcox.test(x, y , exact = TRUE , conf.int = TRUE ) | |
if(test$p.value <= 0.05){ | |
pvalues <- c (pvalues , test$p.value) | |
bacteria <- c (bacteria, row) | |
} | |
# if(test$p.value < 0.5){ | |
# boxplot(x,y,main = row) | |
# } | |
} | |
pander(data.frame(bacteria , pvalues)) | |
``` |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment