Skip to content

Instantly share code, notes, and snippets.

@sauljackman
Created October 31, 2015 20:15
Show Gist options
  • Star 2 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save sauljackman/d8ee176ba6d40cbc45ae to your computer and use it in GitHub Desktop.
Save sauljackman/d8ee176ba6d40cbc45ae to your computer and use it in GitHub Desktop.
## Install packages if you don't already have them
install.packages(c("stringr", "igraph"), dependencies = TRUE)
## Load the packages
library(stringr)
library(igraph)
## Read in the data
queries <- read.csv("~/Downloads/queries.csv")
tables <- read.csv("~/Downloads/tables.csv")
# See what tables look like
head(queries,1)
head(tables)
## Create an empty data frame, with rows equal to number of queries being analyzed
x <- data.frame(rep(0,length(queries$query)))
## Populate data frame
# In the data frame x, each row is a query and each column is the name of a table from your database; each cell is an indicator for whether a particular table name appears in a particular query
for (i in 1:length(tables$tables)) {
x[,i] <- str_detect(queries$query, as.character(tables$tables[i]))
}
## Name columns, replace TRUE with 1 and FALSE with 0
colnames(x) <- tables$tables
x[x==FALSE] <- 0
x[x==TRUE] <- 1
## Create term-document matrix
termDocMatrix <- t(as.matrix(x))
## Create term-term matrix
termMatrix <- termDocMatrix %*% t(termDocMatrix)
## Create graph-adjacency matrix
g <- graph.adjacency(termMatrix, weighted=T, mode = "undirected")
## Remove loops
g <- simplify(g)
## Set labels and degrees of vertices
V(g)$label <- V(g)$name
V(g)$degree <- degree(g)
## Plot a Graph
plot(g, layout=layout_in_circle(g))
## Plot a prettier graph
V(g)$label.cex <- 3 * (0.06125 * V(g)$degree / max(V(g)$degree) + .2)
V(g)$label.color <- rgb(0, 0, .2, .49 * V(g)$degree / max(V(g)$degree) + .5)
V(g)$frame.color <- rgb(0, 0, .2, .39 * V(g)$degree / max(V(g)$degree) + .6)
egam <- (log(E(g)$weight)+.4) / max(log(E(g)$weight)+.4)
E(g)$color <- rgb((colorRamp(c("blue", "yellow", "red"))(E(g)$weight/max(E(g)$weight)))/255, alpha=egam)
E(g)$width <- egam
plot(g, layout=layout_in_circle(g), vertex.color = rgb((colorRamp(c("blue", "yellow", "red"))(degree(g)/max(degree(g))))/255), vertex.size = ((V(g)$degree)*2/3)+2, edge.width = 6 * E(g)$weight / max(E(g)$weight))
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment