Created
October 31, 2015 20:15
-
-
Save sauljackman/d8ee176ba6d40cbc45ae to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
## Install packages if you don't already have them | |
install.packages(c("stringr", "igraph"), dependencies = TRUE) | |
## Load the packages | |
library(stringr) | |
library(igraph) | |
## Read in the data | |
queries <- read.csv("~/Downloads/queries.csv") | |
tables <- read.csv("~/Downloads/tables.csv") | |
# See what tables look like | |
head(queries,1) | |
head(tables) | |
## Create an empty data frame, with rows equal to number of queries being analyzed | |
x <- data.frame(rep(0,length(queries$query))) | |
## Populate data frame | |
# In the data frame x, each row is a query and each column is the name of a table from your database; each cell is an indicator for whether a particular table name appears in a particular query | |
for (i in 1:length(tables$tables)) { | |
x[,i] <- str_detect(queries$query, as.character(tables$tables[i])) | |
} | |
## Name columns, replace TRUE with 1 and FALSE with 0 | |
colnames(x) <- tables$tables | |
x[x==FALSE] <- 0 | |
x[x==TRUE] <- 1 | |
## Create term-document matrix | |
termDocMatrix <- t(as.matrix(x)) | |
## Create term-term matrix | |
termMatrix <- termDocMatrix %*% t(termDocMatrix) | |
## Create graph-adjacency matrix | |
g <- graph.adjacency(termMatrix, weighted=T, mode = "undirected") | |
## Remove loops | |
g <- simplify(g) | |
## Set labels and degrees of vertices | |
V(g)$label <- V(g)$name | |
V(g)$degree <- degree(g) | |
## Plot a Graph | |
plot(g, layout=layout_in_circle(g)) | |
## Plot a prettier graph | |
V(g)$label.cex <- 3 * (0.06125 * V(g)$degree / max(V(g)$degree) + .2) | |
V(g)$label.color <- rgb(0, 0, .2, .49 * V(g)$degree / max(V(g)$degree) + .5) | |
V(g)$frame.color <- rgb(0, 0, .2, .39 * V(g)$degree / max(V(g)$degree) + .6) | |
egam <- (log(E(g)$weight)+.4) / max(log(E(g)$weight)+.4) | |
E(g)$color <- rgb((colorRamp(c("blue", "yellow", "red"))(E(g)$weight/max(E(g)$weight)))/255, alpha=egam) | |
E(g)$width <- egam | |
plot(g, layout=layout_in_circle(g), vertex.color = rgb((colorRamp(c("blue", "yellow", "red"))(degree(g)/max(degree(g))))/255), vertex.size = ((V(g)$degree)*2/3)+2, edge.width = 6 * E(g)$weight / max(E(g)$weight)) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment