Last active
May 17, 2016 21:41
-
-
Save dylanbstorey/0a5ca747365b80e088ce7edeff0c3ea4 to your computer and use it in GitHub Desktop.
Better Bi Plot
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
library(ggplot2) | |
library(reshape2) | |
library(vegan) | |
library(gplots) | |
library(ggrepel) | |
library(fossil) | |
multiplot <- function(..., plotlist=NULL, file, cols=1, layout=NULL) { | |
require(grid) | |
plots <- c(list(...), plotlist) | |
numPlots = length(plots) | |
if (is.null(layout)) { | |
layout <- matrix(seq(1, cols * ceiling(numPlots/cols)), | |
ncol = cols, nrow = ceiling(numPlots/cols)) | |
} | |
if (numPlots==1) { | |
print(plots[[1]]) | |
} else { | |
grid.newpage() | |
pushViewport(viewport(layout = grid.layout(nrow(layout), ncol(layout)))) | |
for (i in 1:numPlots) { | |
matchidx <- as.data.frame(which(layout == i, arr.ind = TRUE)) | |
print(plots[[i]], vp = viewport(layout.pos.row = matchidx$row, | |
layout.pos.col = matchidx$col)) | |
} | |
} | |
} | |
better_bi_plot <- function(matrix , logtransform = TRUE , PC_selection = c(1,2) , disp_load = 10 , color_by = NULL, vector_scale = 25){ | |
#' Calculates PCA and build a bi-plot using ggrepel for a prettier output | |
#' @param matrix , the matrix of data | |
#' @param logtransform , should the data be transformed | |
#' @param PC_selection , a list of PCs to retain for plotting defaults to c(1,2) | |
#' @param disp_load , which loadings should be displayed (defaults to the strongest 10) | |
#' | |
#' @returns bi_plot data frame , all plots are in bi_plot$plot , screeplot in bi_plot$screeplot , pca in bi_plot$pca | |
#build circle | |
r= 1 | |
tt <- seq(0,2*pi,length.out = 1000) | |
xx <- 0 + r * cos(tt) | |
yy <- 0 + r * sin(tt) | |
circle<- (data.frame(x = xx, y = yy , r = r)) | |
#Transform if wished | |
if (logtransform == TRUE){ | |
matrix <- log2(matrix) | |
matrix[matrix==-Inf]<-0 | |
matrix<-matrix[,apply(matrix, 2, var, na.rm=TRUE) != 0] | |
} | |
#PCA perform pca | |
pca <- prcomp(matrix, center = TRUE, scale.=TRUE) | |
#Scree Plot | |
std_deviation <- pca$sdev | |
variance<-std_deviation^2 | |
variance<-variance / sum(variance) | |
variance<-as.data.frame(variance) | |
variance <-cbind(PC = as.numeric(row.names(variance)),variance) | |
screeplot <- ggplot(variance, aes(y=variance,x=PC))+geom_bar(stat="identity")+ylab("Proportion of Variance")+xlab("Principle Component") | |
plot(screeplot) | |
#calculate loadings | |
loadings <-pca$rotation | |
loadings <- sweep(loadings , 2,pca | |
$sdev , FUN="*") | |
scores <- pca$x | |
#start building bi-plots for ALL the PC pairs | |
for( i in 1:length(PC_selection)){ | |
if (i != length(PC_selection)){ | |
j = i+1 | |
vecs <- data.frame(loadings[,c(i,j)]) | |
#get the top and bottom vectors | |
tmp_vecs1a<-vecs[order(vecs[,1]),][1:disp_load,] | |
tmp_vecs1b<-vecs[rev(order(vecs[,1])),][1:disp_load,] | |
tmp_vecs2a<-vecs[order(vecs[,2]),][1:disp_load,] | |
tmp_vecs2b<-vecs[rev(order(vecs[,2])),][1:disp_load,] | |
vecs <-rbind(tmp_vecs1a,tmp_vecs1b,tmp_vecs2a,tmp_vecs2b) | |
vecs <-unique(vecs) | |
pca.df <- as.data.frame(pca$x[,c(i,j)]) | |
pca.df <- cbind(ID = row.names(pca.df),pca.df) | |
if ( is.null(color_by)){ | |
pca.df[,'factor'] <- 'sample' | |
} | |
else{ | |
pca.df <- merge(pca.df , color_by, by=0 , all=TRUE) | |
} | |
pca.df$Row.names<-NULL | |
pca_plot <- ggplot(as.data.frame(pca.df), aes_string(x=colnames(pca.df)[2] , y=colnames(pca.df)[3], colour='factor'))+ | |
geom_point(size = 2) + | |
geom_text_repel( | |
size=2.5, | |
aes(label=ID), | |
segment.color = 'gray', | |
point.padding = unit(1.0,"lines"), | |
box.padding = unit(0.1,"lines"), | |
force = 1, | |
)+ | |
geom_point( | |
size=5, | |
shape = 3, | |
aes(x=0,y=0), | |
colour = 'black' | |
)+ | |
scale_color_hue(l=40) | |
loadings_plot <- ggplot()+ | |
geom_path( | |
data=circle, | |
aes(x , y), | |
color="black")+ | |
coord_fixed()+ | |
geom_point( | |
data=vecs, | |
aes_string(x=colnames(vecs)[1] , y=colnames(vecs)[2]), | |
color="black", | |
size=2 | |
)+ | |
geom_text_repel( | |
data=vecs, | |
aes_string(x=colnames(vecs)[1] , y=colnames(vecs)[2], label='rownames(vecs)'), | |
color = "black", | |
size=2.5, | |
segment.color = 'gray', | |
point.padding = unit(1.0,"lines"), | |
box.padding = unit(0.1,"lines"), | |
force = 1 | |
) + | |
geom_point( | |
size=5, | |
shape = 3, | |
aes(x=0,y=0), | |
colour = 'black' | |
) | |
multiplot(pca_plot , loadings_plot, cols = 2) | |
} | |
} | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment