Skip to content

Instantly share code, notes, and snippets.

@johnlees
Last active April 24, 2018 15:16
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save johnlees/64013095026ccd77ff751fc2985b6fe2 to your computer and use it in GitHub Desktop.
Save johnlees/64013095026ccd77ff751fc2985b6fe2 to your computer and use it in GitHub Desktop.
Gene expression modules for S. pneumoniae from PneumoExpress co-expression matrix
require(WGCNA)
# See:
# https://labs.genetics.ucla.edu/horvath/CoexpressionNetwork/Rpackages/WGCNA/Tutorials/FemaleLiver-02-networkConstr-man.pdf
SPV_2_name = read.delim("SPV_2_name.txt", header=FALSE)
colnames(SPV_2_name) = c("SPV", "gene")
# co-expression matrix from Veening lab
# https://www.biorxiv.org/content/early/2018/03/22/283739
# data: https://zenodo.org/record/1157923
# once
`18_matrix_long_5` <- read.csv("18_matrix_long_5.csv", stringsAsFactors=FALSE)
`18_matrix_long_4` <- read.csv("18_matrix_long_4.csv", stringsAsFactors=FALSE)
`18_matrix_long_3` <- read.csv("18_matrix_long_3.csv", stringsAsFactors=FALSE)
`18_matrix_long_2` <- read.csv("18_matrix_long_2.csv", stringsAsFactors=FALSE)
`18_matrix_long_1` <- read.csv("18_matrix_long_1.csv", stringsAsFactors=FALSE)
all = rbind(`18_matrix_long_1`, `18_matrix_long_2`, `18_matrix_long_3`, `18_matrix_long_4`, `18_matrix_long_5`)
all_M = matrix(nrow = 2152, ncol = 2152, dimnames=list(origin=unique(all$gene1), dev=unique(all$gene2)))
all_M[cbind(all$gene1, all$gene2)] = all$correlation
saveRDS(all_M, "coexpression_matrix.Rds")
# subsequently
all_M = readRDS("coexpression_matrix.Rds")
# Run WGCNA as in tutorial
powers = c(c(1:10), seq(from = 12, to=20, by=2))
sft = pickSoftThreshold.fromSimilarity(all_M, powerVector = powers, verbose = 5)
sizeGrWindow(9, 5)
par(mfrow = c(1,2))
cex1 = 0.9
plot(sft$fitIndices[,1], -sign(sft$fitIndices[,3])*sft$fitIndices[,2],
xlab="Soft Threshold (power)",ylab="Scale Free Topology Model Fit,signed R^2",type="n",
main = paste("Scale independence"))
text(sft$fitIndices[,1], -sign(sft$fitIndices[,3])*sft$fitIndices[,2],
labels=powers,cex=cex1,col="red")
# this line corresponds to using an R^2 cut-off of h
abline(h=0.90,col="red")
# Mean connectivity as a function of the soft-thresholding power
plot(sft$fitIndices[,1], sft$fitIndices[,5],
xlab="Soft Threshold (power)",ylab="Mean Connectivity", type="n",
main = paste("Mean connectivity"))
text(sft$fitIndices[,1], sft$fitIndices[,5], labels=powers, cex=cex1,col="red")
softPower = 5
adjacency = adjacency.fromSimilarity(all_M, power = softPower)
# Turn adjacency into topological overlap
TOM = TOMsimilarity(adjacency);
dissTOM = 1-TOM
# Call the hierarchical clustering function
geneTree = hclust(as.dist(dissTOM), method = "average")
# Plot the resulting clustering tree (dendrogram)
sizeGrWindow(12,9)
plot(geneTree, xlab="", sub="", main = "Gene clustering on TOM-based dissimilarity",
labels = FALSE, hang = 0.04)
# We like large modules, so we set the minimum module size relatively high:
minModuleSize = 10;
# Module identification using dynamic tree cut:
dynamicMods = cutreeDynamic(dendro = geneTree, distM = dissTOM,
deepSplit = 2, pamRespectsDendro = FALSE,
minClusterSize = minModuleSize);
table(dynamicMods)
# Convert numeric lables into colors
dynamicColors = labels2colors(dynamicMods)
table(dynamicColors)
# Plot the dendrogram and colors underneath
sizeGrWindow(8,6)
plotDendroAndColors(geneTree, dynamicColors, "Dynamic Tree Cut",
dendroLabels = FALSE, hang = 0.03,
addGuide = TRUE, guideHang = 0.05,
main = "Gene dendrogram and module colors")
colorOrder = c("grey", standardColors(50))
moduleLabels = match(dynamicColors, colorOrder)-1
modules = data.frame(SPV=rownames(all_M), module=moduleLabels)
labelled_modules = merge(modules, SPV_2_name, by.x = "SPV", by.y = "SPV", all.x = TRUE)
write.table(labelled_modules, "WGCNA_modules.tsv", quote = F, sep = "\t", row.names = F, col.names = F)
#### !
# Can't do this with similarity
#### !
# Not run
# Calculate eigengenes
MEList = moduleEigengenes(all_M, colors = dynamicColors)
MEs = MEList$eigengenes
# Calculate dissimilarity of module eigengenes
MEDiss = 1-cor(MEs);
# Cluster module eigengenes
METree = hclust(as.dist(MEDiss), method = "average");
# Plot the result
sizeGrWindow(7, 6)
plot(METree, main = "Clustering of module eigengenes",
xlab = "", sub = "")
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment