Skip to content

Instantly share code, notes, and snippets.

@linglingtingfei
Forked from n-long/wgcna.R
Created September 12, 2018 14:21
Show Gist options
  • Save linglingtingfei/b0fcef682418e610238ce674c587c2fe to your computer and use it in GitHub Desktop.
Save linglingtingfei/b0fcef682418e610238ce674c587c2fe to your computer and use it in GitHub Desktop.
Gene co-expression network analysis in R (WGCNA package)
library(WGCNA)
library(DESeq2)
library(cluster)
options(stringsAsFactors = FALSE);
setwd(dir = '/home/nlong/Public/clc')
x=read.csv("ccllcc", sep='\t', header=T)
datExpr = as.data.frame(x[, -c(1)]);
#names(datExpr) = names(x)[, -c(1)]
rownames(datExpr) = x$gene
datExpr=t(datExpr)
gsg = goodSamplesGenes(datExpr, verbose = 3);
gsg$allOK
if (!gsg$allOK)
{
# Optionally, print the gene and sample names that were removed:
if (sum(!gsg$goodGenes)>0)
printFlush(paste("Removing genes:", paste(names(datExpr)[!gsg$goodGenes], collapse = ", ")));
if (sum(!gsg$goodSamples)>0)
printFlush(paste("Removing samples:", paste(rownames(datExpr)[!gsg$goodSamples], collapse = ", ")));
# Remove the offending genes and samples from the data:
datExpr = datExpr[gsg$goodSamples, gsg$goodGenes]
}
datExpr<-as.matrix(t(datExpr))
condition<-(c(rep("norm",4), rep("still",4), rep("norm",4), rep("still",4)))
coldata<-data.frame(row.names=colnames(datExpr), condition)
dds=DESeqDataSetFromMatrix(datExpr, colData=coldata, design=~condition)
dds2=DESeq(dds,betaPrior=F)
vsd=getVarianceStabilizedData(dds2)
vsd2=t(vsd)
tree=flashClust(dist(vsd2), method="average")
sizeGrWindow(12,9)
par(cex = 0.6)
par(mar = c(0,4,2,0))
plot(tree, main = "Sample clustering to detect outliers", sub="", xlab="", cex.lab = 1.5,cex.axis = 1.5, cex.main = 2)
powers = c(c(1:10), seq(from = 12, to=40, by=2))
options(stringsAsFactors = FALSE)
ALLOW_WGCNA_THREADS=20
#sft = pickSoftThreshold(vsd2, dataIsExpr = TRUE, powerVector = powers, verbose = 5, networkType='signed hybrid')
sft = pickSoftThreshold(vsd3, dataIsExpr = TRUE, powerVector = powers, verbose = 5, networkType='signed hybrid')
sizeGrWindow(9,5)
par(mfrow=c(1,2))
cex1=0.9
plot(sft$fitIndices[,1], -sign(sft$fitIndices[,3])*sft$fitIndices[,2],
xlab="Soft Threshold (power)",ylab="Scale Free Topology Model Fit,signed R^2",type="n",
main = paste("Scale independence"));
text(sft$fitIndices[,1], -sign(sft$fitIndices[,3])*sft$fitIndices[,2],
labels=powers,cex=cex1,col="red");
# this line corresponds to using an R^2 cut-off of h
abline(h=0.90,col="red")
# Mean connectivity as a function of the soft-thresholding power
plot(sft$fitIndices[,1], sft$fitIndices[,5],
xlab="Soft Threshold (power)",ylab="Mean Connectivity", type="n",
main = paste("Mean connectivity"))
text(sft$fitIndices[,1], sft$fitIndices[,5], labels=powers, cex=cex1,col="red")
abline(h=0.90, col="red")
softPower=16
adjacency=adjacency(vsd2,power=softPower, type= 'unsigned')
TOM=TOMsimilarity(adjacency, TOMType="unsigned", TOMDenom='mean')
dissTOM=1-TOM
geneTree=flashClust(as.dist(dissTOM), method="average")
sizeGrWindow(12,9)
plot(geneTree,xlab="", sub="", main="Gene clustering on TOM-based dissimilarity", labels=FALSE, hang=0.04)
minModuleSize=20
dynamicMods=cutreeDynamic(dendro=geneTree, cutHeight = "hybrid", distM=dissTOM,
deepSplit=2, pamRespectsDendro=FALSE, minAbsGap=0.05,
minClusterSize=minModuleSize)
table(dynamicMods)
dynamicColors=labels2colors(dynamicMods)
table(dynamicColors)
sizeGrWindow(8,6)
plotDendroAndColors(geneTree,dynamicColors, "Dynamic Tree Cut",
dendroLabels=FALSE, hang=0.03,
addGuide=TRUE, guideHang=0.05,
main= "Gene dendrogram")
MEList=moduleEigengenes(vsd3, colors=dynamicColors)
MEs = MEList$eigengenes
MEDiss=1-cor(MEs)
METree = flashClust(as.dist(MEDiss), method='average')
sizeGrWindow(7,6)
plot(METree, main="Clustering of module eigengenes", xlab='', sub='')
MEDissThres = 0.1
abline(h=MEDissThres, col="red")
merge=mergeCloseModules(vsd2, dynamicColors, useAbs=T, cutHeight = MEDissThres, verbose=3)
mergedColors=merge$colors
mergedMEs=merge$newMEs
sizeGrWindow(12,9)
plotDendroAndColors(geneTree, cbind(mergedColors, mergedColors),
c("Dynamic Tree Cut", "Merged dynamic"),
dendroLabels=FALSE, hang=0.03,
addGuide=TRUE, guideHang = 0.05)
dev.off()
moduleColors=mergedColors
colorOrder=c("grey", standardColors(50))
moduleLabels=match(moduleColors, colorOrder)-1
MEs=mergedMEs
save(MEs, moduleLabels, moduleColors, geneTree, file="intermediate.RData")
MEList
nGenes=ncol(vsd2)
nSamples=nrow(vsd2)
MEs0=moduleEigengenes(vsd2,moduleColors)$eigengenes
MEs=orderMEs(MEs0)
###signedKME
hobag=signedKME(vsd2, mergedMEs, outputColumnName="kME", corFnc="cor", corOptions= "use = 'p'")
write.table(hobag, file='hobag.csv', sep='\t')
ADJ1=abs(cor(vsd2,use="p"))^6
Alldegrees1=intramodularConnectivity(ADJ1, colorh1)
write.table(Alldegrees1, file='connectivity.csv', sep='\t')
###EXTRACT MODULES###
gene.names=colnames(vsd2)
module_colors= setdiff(unique(moduleColors), "grey")
for (color in module_colors){
module=gene.names[which(dynamicColors==color)]
write.table(module, paste("module_",color, ".txt",sep=""), sep="\t", row.names=FALSE, col.names=FALSE,quote=FALSE)
}
###INTRAMODULAR CONNECTIVITY
datME=mergedMEs
signif(cor(datME, use="p"),2)
dissimME=(1-t(cor(datME,method='p')))/2
hclustdatME=hclust(as.dist(dissimME),method="average")
par(mfrow=c(1,1))
plot(hclustdatME, main="Clustering tree based of the module eigengenes")
sizeGrWindow(8,9)
y=(c(1,1,1,1,2,2,2,2))
plotMEpairs(datME,y)
sizeGrWindow(8,7);
which.module="pink"
ME=datME[, paste("ME",which.module, sep="")]
par(mfrow=c(2,1), mar=c(0.3, 5.5, 3, 2))
plotMat(t(scale(vsd2[,colorh1==which.module ]) ),
nrgcols=30,rlabels=F,rcols=which.module,
main=which.module, cex.main=2)
par(mar=c(5, 4.2, 0, 0.7))
barplot(ME, col=which.module, main="", cex.main=2,
ylab="eigengene expression",xlab="array sample")
signif(cor(y,datME, use="p"),2)
###FROM TREE-CUTTING COMPARISON
vsd3=vsd2[, rank(-k, ties.method="first")<=3600]
dissADJ=1-ADJ1
hierADJ=hclust(as.dist(dissADJ), method="average")
###INTRAMODULAR CONNECTIVITY
ADJ1=abs(cor(vsd3,use="p"))^6 ##vsd3 = vsd2 with subset of 3600 connected genes
dissTOM=TOMdist(ADJ1)
hierTOM=hclust(as.dist(dissTOM), method="average")
colorStaticTOM = as.character(cutreeStaticColor(hierTOM, cutHeight=.99, minSize=20))
colorDynamicTOM = labels2colors (cutreeDynamic(hierTOM,method="tree"))
colorDynamicHybridTOM = labels2colors(cutreeDynamic(hierTOM, distM= dissTOM , cutHeight = 0.998,
deepSplit=2, pamRespectsDendro = FALSE))
colorh1= colorDynamicHybridTOM
Alldegrees1=intramodularConnectivity(ADJ1, colorh1)
#head(Alldegrees1)
colorlevels=unique(colorh1)
sizeGrWindow(9,6)
par(mfrow=c(2,as.integer(0.5+length(colorlevels)/2)))
par(mar = c(4,5,3,1))
for (i in c(1:length(colorlevels)))
{
whichmodule=colorlevels[[i]];
restrict1 = (colorh1==whichmodule);
verboseScatterplot(Alldegrees1$kWithin[restrict1],
GeneSignificance[restrict1], col=colorh1[restrict1],
main=whichmodule,
xlab = "Connectivity", ylab = "Gene Significance", abline = TRUE)
}
######MODULE MEMBERSHIP -- "EIGENGENE"
modNames = substring(names(MEs), 3)
module = 'turquoise'
column = match(module, modNames);
moduleGenes=moduleColors==module;
geneModuleMembership=as.data.frame(cor(vsd3,MEs0, use='p'));
MMPvalue=as.data.frame(corPvalueStudent(as.matrix(geneModuleMembership), nSamples));
names(geneModuleMembership)=paste("MM", modNames, sep="");
names(MMPvalue)=paste("p.MM", modNames, sep="")
sizeGrWindow(7,7);
par(mfrow=c(1,1));
verboseScatterplot(abs(geneModuleMembership[moduleGenes,column]),
abs()
#####NETWORK CONNECTIVITY GRAPHS
dissTOM = 1-TOMsimilarityFromExpr(vsd3, power = 12, TOMType= 'signed hybrid');
# Transform dissTOM with a power to make moderately strong connections more visible in the heatmap
plotTOM = dissTOM^7;
# Set diagonal to NA for a nicer plot
diag(plotTOM) = NA;
# Call the plot function
sizeGrWindow(9,9)
TOMplot(plotTOM, geneTree, mergedColors, main = "Network heatmap plot, all genes")
nSelect = 400
# For reproducibility, we set the random seed
set.seed(10);
select = sample(nGenes, size = nSelect);
selectTOM = dissTOM[select, select];
# There’s no simple way of restricting a clustering tree to a subset of genes, so we must re-cluster.
selectTree = flashClust(as.dist(selectTOM), method = "average")
selectColors = moduleColors[select];
# Open a graphical window
sizeGrWindow(9,9)
# Taking the dissimilarity to a power, say 10, makes the plot more informative by effectively changing
# the color palette; setting the diagonal to NA also improves the clarity of the plot
plotDiss = selectTOM^7;
diag(plotDiss) = NA;
TOMplot(plotDiss, selectTree, selectColors, main = "Network heatmap plot, selected genes")
MEs = moduleEigengenes(vsd2, moduleColors)$eigengenes
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment