Last active
April 5, 2016 21:38
-
-
Save jasdumas/0fc8ffa8826ebe7ee9bc874a15a5cf76 to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
## Load required packages ## | |
library(GEOquery) | |
library(reshape2) | |
library(survival) | |
library(ggplot2) | |
library(GGally) | |
library(plotly) | |
## Download data from GEO ## | |
GSE = "GSE19915" | |
GPL = "GPL3883" | |
data.series = getGEO(GEO = GSE, AnnotGPL = FALSE, getGPL = FALSE) | |
data.platform = getGEO(GPL) | |
data.index = match(GPL, sapply(data.series, annotation)) | |
data.p = pData(data.series[[data.index]]) | |
data.expr = exprs(data.series[[data.index]]) | |
common = intersect(colnames(data.expr), rownames(data.p)) | |
m1 = match(common, colnames(data.expr)) | |
m2 = match(common, rownames(data.p)) | |
data.expr = data.expr[,m1] | |
data.p = data.p[m2,] | |
## generate boxplot of expression profiles ## | |
title = "samples" | |
s.num = 1:ncol(data.expr) | |
n = ncol(data.expr) | |
if (n > 30) { | |
s.num = sample(1:n, 30) | |
title = "selected samples" | |
} | |
title = paste0(GSE, "/", GPL, " ", title) | |
fixed.df <- as.data.frame(x=data.expr[,s.num], stringsAsFactors = FALSE) | |
x1 <- reshape2::melt(fixed.df, na.rm = TRUE, id.vars = NULL, | |
variable.name = "variable", value.name = "value") | |
exp.prof.plot <- ggplot(x1, aes(variable, value)) + | |
geom_boxplot(outlier.colour = "green") + | |
labs(title = title, y = "log2 expression", x = "") + | |
theme(axis.text.x = element_text(angle = 90, hjust = 1)) | |
ggplotly(exp.prof.plot) | |
################################################################################ | |
# generates dotplot evaluating differential expression for expression | |
# vector 'x' and group labels 'y'. The 'GroupNames' are levels of 'y' in | |
# in presentation order, and 'group.names' are optional names for these levels | |
################################################################################ | |
stripchart2 <- function(x,y, GroupNames, group.names = NULL, col = NULL, | |
main = "", ...) { | |
s = split(x,y) | |
num.groups = sum(sapply(s, function(x)!all(is.na(x)))) | |
stats = !(all(is.na(x) | length(s) == 0 | num.groups < 2)) | |
if (is.null(group.names)) group.names = GroupNames | |
if (is.null(col)) col = 1:length(s) | |
add = NULL | |
if (stats & length(s) == 2) { | |
m = lapply(s,mean, na.rm=TRUE) | |
fc = 2**(m[[2]] - m[[1]]) | |
if (group.names[1] > group.names[2]) { | |
fc = 1/fc | |
} | |
fc = round(fc, 2) | |
count.na <-function(x) sum(!is.na(x)) | |
n = sapply(s, count.na) | |
if (min(n) > 1) { | |
t = t.test(s[[1]], s[[2]]) | |
p = round(t$p.value, 3) | |
if (p < 0.001) { | |
p = "(P < 0.001)" | |
} else { | |
p = paste0("(P = ", p, ")") | |
} | |
add = paste("\nFC = ", fc, p, collapse = "") | |
} | |
} else if (stats) { | |
l = lm(x~y); l = summary(l) | |
if (any(l$df == 0)) { | |
add = "\nP = NA" | |
} else { | |
p = 1-pf(l$fstatistic[1], l$fstatistic[2], l$fstatistic[3]) | |
p = round(p, 3) | |
if (p < 0.001) { | |
add = "\nP < 0.001" | |
} else { | |
add = paste0("\nP = ", p) | |
} | |
} | |
} | |
if (is.null(main)) { | |
main = "" | |
} else { | |
main = paste(main, add) | |
} | |
m = melt(s, na.rm=FALSE) | |
# re-order levels based on input order according to GroupNames | |
# this must be done here, as 'melt' reorders alphabetically | |
f <-function(x,l) { | |
w = which(l%in%x) | |
if (length(w) == 0) return(NA) | |
w | |
} | |
m$L1 = reorder(m$L1, sapply(m$L1,f,GroupNames)) | |
s2 = split(m$value, m$L1) | |
n.groups = sapply(s2, function(x)sum(!is.na(x))) | |
n.groups = paste0("(n=", n.groups, ")") | |
group.names = paste0(group.names, "\n", n.groups) | |
mean.no.na <<- function(x) mean(x,na.rm=TRUE) | |
stripchart3 <- ggplot(m, aes(x = as.factor(L1), y = value, color=L1)) | |
return(ggplotly(stripchart3 + | |
labs(title = main, y = "log2 expression", x="") + | |
theme(legend.position="none", | |
axis.text.x = element_text(face = "bold", color = "black")) + | |
scale_x_discrete(labels=group.names) + | |
geom_point(position = position_jitter(h=0,w=NULL), aes(colour = L1), na.rm = TRUE) + | |
scale_colour_manual(values = col) + | |
geom_errorbar(stat = "summary", fun.y = "mean.no.na", width=0.8, | |
aes(ymax=..y..,ymin=..y..))) | |
) | |
} | |
## Differential Expression Analysis ## | |
probe = "11058" | |
column = "characteristics_ch1.2" | |
groups = c("tumor grade: G1","tumor grade: G2","tumor grade: G3","tumor grade: Gx") | |
x = data.expr[probe,] | |
y = as.character(data.p[,column]) | |
k = y%in% groups | |
y[!k] = NA | |
y = factor(y) | |
group.names = c("tumor grade: G1","tumor grade: G2","tumor grade: G3","tumor grade: Gx") | |
main = paste(GSE, "ABR|N/A (11058)", sep = ": ") | |
col = c("black","red","green3","blue") | |
print(stripchart2(x,y, groups, group.names = group.names, main = main, col=col)) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment