-
-
Save linglingtingfei/b0fcef682418e610238ce674c587c2fe to your computer and use it in GitHub Desktop.
Gene co-expression network analysis in R (WGCNA package)
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
library(WGCNA) | |
library(DESeq2) | |
library(cluster) | |
options(stringsAsFactors = FALSE); | |
setwd(dir = '/home/nlong/Public/clc') | |
x=read.csv("ccllcc", sep='\t', header=T) | |
datExpr = as.data.frame(x[, -c(1)]); | |
#names(datExpr) = names(x)[, -c(1)] | |
rownames(datExpr) = x$gene | |
datExpr=t(datExpr) | |
gsg = goodSamplesGenes(datExpr, verbose = 3); | |
gsg$allOK | |
if (!gsg$allOK) | |
{ | |
# Optionally, print the gene and sample names that were removed: | |
if (sum(!gsg$goodGenes)>0) | |
printFlush(paste("Removing genes:", paste(names(datExpr)[!gsg$goodGenes], collapse = ", "))); | |
if (sum(!gsg$goodSamples)>0) | |
printFlush(paste("Removing samples:", paste(rownames(datExpr)[!gsg$goodSamples], collapse = ", "))); | |
# Remove the offending genes and samples from the data: | |
datExpr = datExpr[gsg$goodSamples, gsg$goodGenes] | |
} | |
datExpr<-as.matrix(t(datExpr)) | |
condition<-(c(rep("norm",4), rep("still",4), rep("norm",4), rep("still",4))) | |
coldata<-data.frame(row.names=colnames(datExpr), condition) | |
dds=DESeqDataSetFromMatrix(datExpr, colData=coldata, design=~condition) | |
dds2=DESeq(dds,betaPrior=F) | |
vsd=getVarianceStabilizedData(dds2) | |
vsd2=t(vsd) | |
tree=flashClust(dist(vsd2), method="average") | |
sizeGrWindow(12,9) | |
par(cex = 0.6) | |
par(mar = c(0,4,2,0)) | |
plot(tree, main = "Sample clustering to detect outliers", sub="", xlab="", cex.lab = 1.5,cex.axis = 1.5, cex.main = 2) | |
powers = c(c(1:10), seq(from = 12, to=40, by=2)) | |
options(stringsAsFactors = FALSE) | |
ALLOW_WGCNA_THREADS=20 | |
#sft = pickSoftThreshold(vsd2, dataIsExpr = TRUE, powerVector = powers, verbose = 5, networkType='signed hybrid') | |
sft = pickSoftThreshold(vsd3, dataIsExpr = TRUE, powerVector = powers, verbose = 5, networkType='signed hybrid') | |
sizeGrWindow(9,5) | |
par(mfrow=c(1,2)) | |
cex1=0.9 | |
plot(sft$fitIndices[,1], -sign(sft$fitIndices[,3])*sft$fitIndices[,2], | |
xlab="Soft Threshold (power)",ylab="Scale Free Topology Model Fit,signed R^2",type="n", | |
main = paste("Scale independence")); | |
text(sft$fitIndices[,1], -sign(sft$fitIndices[,3])*sft$fitIndices[,2], | |
labels=powers,cex=cex1,col="red"); | |
# this line corresponds to using an R^2 cut-off of h | |
abline(h=0.90,col="red") | |
# Mean connectivity as a function of the soft-thresholding power | |
plot(sft$fitIndices[,1], sft$fitIndices[,5], | |
xlab="Soft Threshold (power)",ylab="Mean Connectivity", type="n", | |
main = paste("Mean connectivity")) | |
text(sft$fitIndices[,1], sft$fitIndices[,5], labels=powers, cex=cex1,col="red") | |
abline(h=0.90, col="red") | |
softPower=16 | |
adjacency=adjacency(vsd2,power=softPower, type= 'unsigned') | |
TOM=TOMsimilarity(adjacency, TOMType="unsigned", TOMDenom='mean') | |
dissTOM=1-TOM | |
geneTree=flashClust(as.dist(dissTOM), method="average") | |
sizeGrWindow(12,9) | |
plot(geneTree,xlab="", sub="", main="Gene clustering on TOM-based dissimilarity", labels=FALSE, hang=0.04) | |
minModuleSize=20 | |
dynamicMods=cutreeDynamic(dendro=geneTree, cutHeight = "hybrid", distM=dissTOM, | |
deepSplit=2, pamRespectsDendro=FALSE, minAbsGap=0.05, | |
minClusterSize=minModuleSize) | |
table(dynamicMods) | |
dynamicColors=labels2colors(dynamicMods) | |
table(dynamicColors) | |
sizeGrWindow(8,6) | |
plotDendroAndColors(geneTree,dynamicColors, "Dynamic Tree Cut", | |
dendroLabels=FALSE, hang=0.03, | |
addGuide=TRUE, guideHang=0.05, | |
main= "Gene dendrogram") | |
MEList=moduleEigengenes(vsd3, colors=dynamicColors) | |
MEs = MEList$eigengenes | |
MEDiss=1-cor(MEs) | |
METree = flashClust(as.dist(MEDiss), method='average') | |
sizeGrWindow(7,6) | |
plot(METree, main="Clustering of module eigengenes", xlab='', sub='') | |
MEDissThres = 0.1 | |
abline(h=MEDissThres, col="red") | |
merge=mergeCloseModules(vsd2, dynamicColors, useAbs=T, cutHeight = MEDissThres, verbose=3) | |
mergedColors=merge$colors | |
mergedMEs=merge$newMEs | |
sizeGrWindow(12,9) | |
plotDendroAndColors(geneTree, cbind(mergedColors, mergedColors), | |
c("Dynamic Tree Cut", "Merged dynamic"), | |
dendroLabels=FALSE, hang=0.03, | |
addGuide=TRUE, guideHang = 0.05) | |
dev.off() | |
moduleColors=mergedColors | |
colorOrder=c("grey", standardColors(50)) | |
moduleLabels=match(moduleColors, colorOrder)-1 | |
MEs=mergedMEs | |
save(MEs, moduleLabels, moduleColors, geneTree, file="intermediate.RData") | |
MEList | |
nGenes=ncol(vsd2) | |
nSamples=nrow(vsd2) | |
MEs0=moduleEigengenes(vsd2,moduleColors)$eigengenes | |
MEs=orderMEs(MEs0) | |
###signedKME | |
hobag=signedKME(vsd2, mergedMEs, outputColumnName="kME", corFnc="cor", corOptions= "use = 'p'") | |
write.table(hobag, file='hobag.csv', sep='\t') | |
ADJ1=abs(cor(vsd2,use="p"))^6 | |
Alldegrees1=intramodularConnectivity(ADJ1, colorh1) | |
write.table(Alldegrees1, file='connectivity.csv', sep='\t') | |
###EXTRACT MODULES### | |
gene.names=colnames(vsd2) | |
module_colors= setdiff(unique(moduleColors), "grey") | |
for (color in module_colors){ | |
module=gene.names[which(dynamicColors==color)] | |
write.table(module, paste("module_",color, ".txt",sep=""), sep="\t", row.names=FALSE, col.names=FALSE,quote=FALSE) | |
} | |
###INTRAMODULAR CONNECTIVITY | |
datME=mergedMEs | |
signif(cor(datME, use="p"),2) | |
dissimME=(1-t(cor(datME,method='p')))/2 | |
hclustdatME=hclust(as.dist(dissimME),method="average") | |
par(mfrow=c(1,1)) | |
plot(hclustdatME, main="Clustering tree based of the module eigengenes") | |
sizeGrWindow(8,9) | |
y=(c(1,1,1,1,2,2,2,2)) | |
plotMEpairs(datME,y) | |
sizeGrWindow(8,7); | |
which.module="pink" | |
ME=datME[, paste("ME",which.module, sep="")] | |
par(mfrow=c(2,1), mar=c(0.3, 5.5, 3, 2)) | |
plotMat(t(scale(vsd2[,colorh1==which.module ]) ), | |
nrgcols=30,rlabels=F,rcols=which.module, | |
main=which.module, cex.main=2) | |
par(mar=c(5, 4.2, 0, 0.7)) | |
barplot(ME, col=which.module, main="", cex.main=2, | |
ylab="eigengene expression",xlab="array sample") | |
signif(cor(y,datME, use="p"),2) | |
###FROM TREE-CUTTING COMPARISON | |
vsd3=vsd2[, rank(-k, ties.method="first")<=3600] | |
dissADJ=1-ADJ1 | |
hierADJ=hclust(as.dist(dissADJ), method="average") | |
###INTRAMODULAR CONNECTIVITY | |
ADJ1=abs(cor(vsd3,use="p"))^6 ##vsd3 = vsd2 with subset of 3600 connected genes | |
dissTOM=TOMdist(ADJ1) | |
hierTOM=hclust(as.dist(dissTOM), method="average") | |
colorStaticTOM = as.character(cutreeStaticColor(hierTOM, cutHeight=.99, minSize=20)) | |
colorDynamicTOM = labels2colors (cutreeDynamic(hierTOM,method="tree")) | |
colorDynamicHybridTOM = labels2colors(cutreeDynamic(hierTOM, distM= dissTOM , cutHeight = 0.998, | |
deepSplit=2, pamRespectsDendro = FALSE)) | |
colorh1= colorDynamicHybridTOM | |
Alldegrees1=intramodularConnectivity(ADJ1, colorh1) | |
#head(Alldegrees1) | |
colorlevels=unique(colorh1) | |
sizeGrWindow(9,6) | |
par(mfrow=c(2,as.integer(0.5+length(colorlevels)/2))) | |
par(mar = c(4,5,3,1)) | |
for (i in c(1:length(colorlevels))) | |
{ | |
whichmodule=colorlevels[[i]]; | |
restrict1 = (colorh1==whichmodule); | |
verboseScatterplot(Alldegrees1$kWithin[restrict1], | |
GeneSignificance[restrict1], col=colorh1[restrict1], | |
main=whichmodule, | |
xlab = "Connectivity", ylab = "Gene Significance", abline = TRUE) | |
} | |
######MODULE MEMBERSHIP -- "EIGENGENE" | |
modNames = substring(names(MEs), 3) | |
module = 'turquoise' | |
column = match(module, modNames); | |
moduleGenes=moduleColors==module; | |
geneModuleMembership=as.data.frame(cor(vsd3,MEs0, use='p')); | |
MMPvalue=as.data.frame(corPvalueStudent(as.matrix(geneModuleMembership), nSamples)); | |
names(geneModuleMembership)=paste("MM", modNames, sep=""); | |
names(MMPvalue)=paste("p.MM", modNames, sep="") | |
sizeGrWindow(7,7); | |
par(mfrow=c(1,1)); | |
verboseScatterplot(abs(geneModuleMembership[moduleGenes,column]), | |
abs() | |
#####NETWORK CONNECTIVITY GRAPHS | |
dissTOM = 1-TOMsimilarityFromExpr(vsd3, power = 12, TOMType= 'signed hybrid'); | |
# Transform dissTOM with a power to make moderately strong connections more visible in the heatmap | |
plotTOM = dissTOM^7; | |
# Set diagonal to NA for a nicer plot | |
diag(plotTOM) = NA; | |
# Call the plot function | |
sizeGrWindow(9,9) | |
TOMplot(plotTOM, geneTree, mergedColors, main = "Network heatmap plot, all genes") | |
nSelect = 400 | |
# For reproducibility, we set the random seed | |
set.seed(10); | |
select = sample(nGenes, size = nSelect); | |
selectTOM = dissTOM[select, select]; | |
# There’s no simple way of restricting a clustering tree to a subset of genes, so we must re-cluster. | |
selectTree = flashClust(as.dist(selectTOM), method = "average") | |
selectColors = moduleColors[select]; | |
# Open a graphical window | |
sizeGrWindow(9,9) | |
# Taking the dissimilarity to a power, say 10, makes the plot more informative by effectively changing | |
# the color palette; setting the diagonal to NA also improves the clarity of the plot | |
plotDiss = selectTOM^7; | |
diag(plotDiss) = NA; | |
TOMplot(plotDiss, selectTree, selectColors, main = "Network heatmap plot, selected genes") | |
MEs = moduleEigengenes(vsd2, moduleColors)$eigengenes |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment