Skip to content

Instantly share code, notes, and snippets.

@jnpaulson
Last active August 29, 2015 14:09
Show Gist options
  • Save jnpaulson/e2a0afd677a426b42e08 to your computer and use it in GitHub Desktop.
Save jnpaulson/e2a0afd677a426b42e08 to your computer and use it in GitHub Desktop.
pca/pcoa plots of the american gut consortium data
require(vegan)
require(biom)
require(metagenomeSeq)
files = grep(".biom$",list.files(),value=TRUE)
files2= grep(".txt",list.files(),value=TRUE)
files3 = sprintf("%s_MRexperiment.rdata",files)
# This generates the data - only run once.
for(i in 1:length(files)){
obj = load_biom(files[i])
pd = read.csv(files2[i],sep="\t",stringsAsFactors=FALSE)
mc = match(colnames(obj),pd[,1])
rownames(pd) = pd[,1]
pd = AnnotatedDataFrame(pd[mc,])
fd = AnnotatedDataFrame(fData(obj))
obj = newMRexperiment(MRcounts(obj),featureData = fd,phenoData = pd)
obj = cumNorm(obj,p=cumNormStat(obj))
save(obj,file=files3[i])
rm(list=c(obj,pd,fd))
}
# this actually plots the data
for(i in files3){
load(i)
show(i)
#genus
genus = log2(aggTax(obj,lvl="X6",norm=TRUE,out='matrix')+1)
#species
species = log2(aggTax(obj,lvl="X7",norm=TRUE,out='matrix')+1)
sampleID = factor(sapply(strsplit(colnames(obj),"\\."),function(i){i[1]}))
pdf(sprintf("~/Desktop/%s.pdf",i))
plotOrd(obj,main=files[i],pch=21,bg=sampleID) # check options for pcoa / various distances
plot(0,0,ylim=c(0,100),xlim=c(0,10),bty="l")
legend("topright",legend=unique(sampleID),box.col=NA,fill=unique(sampleID))
dev.off()
pdf(sprintf("~/Desktop/%s_genus.pdf",i))
plotOrd(genus,main=files[i],pch=21,bg=sampleID) # check options for pcoa / various distances
plot(0,0,ylim=c(0,100),xlim=c(0,10),bty="l")
legend("topright",legend=unique(sampleID),box.col=NA,fill=unique(sampleID))
dev.off()
pdf(sprintf("~/Desktop/%s_species.pdf",i))
plotOrd(species,main=files[i],pch=21,bg=sampleID) # check options for pcoa / various distances
plot(0,0,ylim=c(0,100),xlim=c(0,10),bty="l")
legend("topright",legend=unique(sampleID),box.col=NA,fill=unique(sampleID))
dev.off()
}
## Placing names / separating body sites etc
location = sampleID = factor(sapply(strsplit(colnames(obj),"\\."),function(i){i[2]}))
locals = lapply(unique(location),function(i){
which(location==i)
})
names(locals) = unique(location)
locals = locals[which(sapply(locals,length)>1)]
for(j in locals){
obj2= obj[,j]
genus2=genus[,j]
species2 = species[,j]
sampleID2 = sampleID[j]
pdf(sprintf("~/Desktop/forjustin/%s.pdf",paste(i,j,sep=":")))
vals = plotOrd(obj2,main=files[i],pch=21,bg=sampleID2,ret=TRUE)
text(x=vals,labels=sampleID2,cex=.5)
plot(0,0,ylim=c(0,100),xlim=c(0,10),bty="l")
legend("topright",legend=unique(sampleID),box.col=NA,fill=unique(sampleID))
dev.off()
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment