Skip to content

Instantly share code, notes, and snippets.

@bohdanszymanik
Created November 27, 2022 01:39
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save bohdanszymanik/847f505db17ea0198f3e37b6bbc53f13 to your computer and use it in GitHub Desktop.
Save bohdanszymanik/847f505db17ea0198f3e37b6bbc53f13 to your computer and use it in GitHub Desktop.
remove.packages("rlang")
remove.packages("tibble")
rlangUrl <- "https://cran.r-project.org/bin/windows/contrib/4.0/rlang_1.0.2.zip"
install.packages(rlangUrl, repos=NULL, type="binary")
tcltkUrl <- "https://cran.r-project.org/bin/windows/contrib/4.0/tcltk2_1.2-11.zip"
install.packages(tcltkUrl, repos=NULL, type="binary")
knitrUrl <- "https://cran.r-project.org/bin/windows/contrib/4.0/knitr_1.38.zip"
install.packages(knitrUrl, repos=NULL, type="binary")
rglUrl <- "https://cran.r-project.org/bin/windows/contrib/4.0/rgl_0.108.3.zip"
install.packages(rglUrl, repos=NULL, type="binary")
fastmapUrl <- "https://cran.r-project.org/bin/windows/contrib/4.0/fastmap_1.1.0.zip"
install.packages(fastmapUrl, repos=NULL, type="binary")
htmltoolsUrl <- "https://cran.r-project.org/bin/windows/contrib/4.0/htmltools_0.5.2.zip"
install.packages(htmltoolsUrl, repos=NULL, type="binary")
htmlwidgetsUrl <- "https://cran.r-project.org/bin/windows/contrib/4.0/htmlwidgets_1.5.4.zip"
install.packages(htmlwidgetsUrl, repos=NULL, type="binary")
igraphUrl <- "https://cran.r-project.org/bin/windows/contrib/4.0/igraph_1.3.0.zip"
install.packages(igraphUrl, repos=NULL, type="binary")
DBIurl <- "https://cran.r-project.org/bin/windows/contrib/4.0/DBI_1.1.2.zip"
install.packages(DBIurl, repos=NULL, type="binary")
rJavaurl <- "https://cran.r-project.org/bin/windows/contrib/4.0/rJava_1.0-6.zip"
install.packages(rJavaurl, repos=NULL, type="binary")
rjdbcurl <- "https://cran.r-project.org/bin/windows/contrib/4.0/RJDBC_0.2-10.zip"
install.packages(rjdbcurl, repos=NULL, type="binary")
tweenr <- "https://cran.r-project.org/bin/windows/contrib/4.0/tweenr_1.0.2.zip"
install.packages(tweenr, repos=NULL, type="binary")
polyclipurl <- "https://cran.r-project.org/bin/windows/contrib/4.0/polyclip_1.10-0.zip"
install.packages(polyclipurl, repos=NULL, type="binary")
ggforceurl <- "https://cran.r-project.org/bin/windows/contrib/4.0/ggforce_0.3.3.zip"
install.packages(ggforceurl, repos=NULL, type="binary")
ggraphurl <- "https://cran.r-project.org/bin/windows/contrib/4.0/ggraph_2.0.5.zip"
install.packages(ggraphurl, repos=NULL, type="binary")
ggrepelurl <- "https://cran.r-project.org/bin/windows/contrib/4.0/ggrepel_0.9.1.zip"
install.packages(ggrepelurl, repos=NULL, type="binary")
graphlayoutsurl <- "https://cran.r-project.org/bin/windows/contrib/4.0/graphlayouts_0.8.0.zip"
install.packages(graphlayoutsurl, repos=NULL, type="binary")
pillarurl <- "https://cran.r-project.org/bin/windows/contrib/4.0/pillar_1.7.0.zip"
install.packages(pillarurl, repos=NULL, type="binary")
tidyselecturl <- "https://cran.r-project.org/bin/windows/contrib/4.0/tidyselect_1.1.2.zip"
install.packages(tidyselecturl, repos=NULL, type="binary")
ggplot2url <- "https://cran.r-project.org/bin/windows/contrib/4.0/ggplot2_3.3.5.zip"
install.packages(ggplot2url, repos=NULL, type="binary")
ggfunurl <- "https://cran.r-project.org/bin/windows/contrib/4.0/ggfun_0.0.6.zip"
install.packages(ggfunurl, repos=NULL, type="binary")
scalesurl <- "https://cran.r-project.org/bin/windows/contrib/4.0/scales_1.2.0.zip"
install.packages(scalesurl, repos=NULL, type="binary")
ggimageurl <- "https://cran.r-project.org/bin/windows/contrib/4.0/ggimage_0.3.0.zip"
install.packages(ggimageurl, repos=NULL, type="binary")
tibbleurl <- "https://cran.r-project.org/bin/windows/contrib/4.0/tibble_3.1.6.zip"
install.packages(tibbleurl, repos=NULL, type="binary")
getpassurl <- "https://cran.r-project.org/bin/windows/contrib/4.0/getPass_0.2-2.zip"
install.packages(getpassurl, repos=NULL, type="binary")
dplyrurl <- "https://cran.r-project.org/bin/windows/contrib/4.0/dplyr_1.0.8.zip"
install.packages(dplyrurl, repos=NULL, type="binary")
tibbleurl <- "https://cran.r-project.org/bin/windows/contrib/4.0/tibble_3.1.6.zip"
install.packages(tibbleurl, repos=NULL, type="binary")
install.packages("tidygraph")
install.packages("tidyr")
install.packages("tictoc")
library(rlang)
library(tcltk)
library(igraph)
library(htmlwidgets)
library(htmltools)
library(fastmap)
library(rgl)
library(knitr)
library(DBI)
library(rJava)
library(RJDBC)
library(tibble)
library(getPass)
library(dplyr)
library(tidyr)
library(purrr)
library(pillar)
library(tidygraph)
library(graphlayouts)
library(ggrepel)
library(tweenr)
library(polyclip)
library(ggforce)
library(ggraph)
library(ggplot2)
library(tictoc)
library(ggimage)
# useful references
# https://jeremydfoote.com/Communication-and-Social-Networks/resources/ggraph_walkthrough.html
# https://www.r-bloggers.com/2019/03/graph-analysis-using-the-tidyverse/
# https://www.jessesadler.com/post/network-analysis-with-r/
# OK, so far so good, now let's connect to DB and start querying
# tuples
jcc = JDBC("com.ibm.db2.jcc.DB2Driver",
"C:/../java/db2jcc4.jar")
# this is not the best way to get the password - displays it as a value in RStudio
# pwd <- getPass::getPass()
# instead just put a pwd dialog box into the connection string dbConnect function parameters
# more info in https://solutions.rstudio.com/db/best-practices/managing-credentials/
#Connection String - either one works
# conn = dbConnect(jcc,"jdbc:db2://10.113.91.195:50000/DBNIAR02",<uid here>,getPass::getPass())
conn = dbConnect(jcc,"jdbc:db2://some db2 database host:50000/some database",rstudioapi::askForPassword("Enter username"),rstudioapi::askForPassword("Enter password"))
# I can do two years worth of links with a graph made up from numeric node ids, but with character ids then I
# need to drop back to less eg maybe 1 year because performance drops
q <- " select
sourceid as Source
,targetid as Target
,sourceType
,targetType
,linkType
from someschema.someLinkTable
where months_between(current date, creationDate) < 6
"
# depending upon implementation this might give us a single link
# or 2 records for any source/target pair, one for the
# source - target, and another reciprocal but otherwise identical link with source
# and target reversed
rs <- dbSendQuery(conn, q)
dft <- tibble::as_tibble(fetch(rs, -1))
# dft is effectively an edge list giving source target
# + attributes of source and target
# + attributes of the edge
# to get tidygraph to load attributes for nodes we need to generate a node list
nodes <-
dplyr::bind_rows(
dft %>% select(id=SOURCE, entityType=SOURCETYPE),
dft %>% select(id=TARGET, entityType=TARGETTYPE)) %>%
distinct()
edges <-
dft %>%
mutate(linkType=stringr::str_trim(LINKTYPE)) %>%
select(from=SOURCE, to=TARGET, linkType)
nodes
edges
nodes %>% distinct(entityType)
# # A tibble: with some nodes of some entity or another
nodes %>%
dplyr::group_by(entityType) %>%
dplyr::summarise(n=n())
# same same as
nodes %>%
dplyr::count(entityType)
edges %>%
dplyr::group_by(linkType) %>%
dplyr::summarise(n=n())
# or
edges %>%
dplyr::count(linkType)
# # A tibble: of edges of differing link types
# strange... this doesn't work but...
graph <- tbl_graph(nodes=nodes,
edges=edges,
directed=F)
# this does work - I wonder if connected to this issues somehow https://github.com/thomasp85/tidygraph/issues/89
# needs to be undirected for the shortest_paths function to work
graph <- igraph::graph_from_data_frame(edges, vertices = nodes, directed=F) %>% as_tbl_graph()
graph
# Actually, I think it's more about this issue https://stackoverflow.com/questions/50457926/tidygraph-and-igraph-build-graph-from-dataframe-discrepancy
nodesChar <-
dplyr::bind_rows(
dft %>% select(id=SOURCE, entityType=SOURCETYPE),
dft %>% select(id=TARGET, entityType=TARGETTYPE)) %>%
distinct() %>%
mutate(id = as.character(id))
nodesChar
# # A tibble of nodes using chr to identify the ids
edgesChar <-
dft %>%
mutate(linkType=stringr::str_trim(LINKTYPE)) %>%
select(from=SOURCE, to=TARGET, linkType) %>%
mutate_at(vars(from, to), as.character)
edgesChar
# # A tibble of edges and edge types with chrs as the id types
graphChar <- tbl_graph(nodes=nodesChar,
edges=edgesChar,
directed=F)
# yes, that fixed it - now tbl_graph works
# let's now join up with some other known ids
pois <- tribble(
~pid, ~pidChar,
1, "1",
2,"2"
3,"3"
)
# lets check to see if we can find these people
# below works for numeric ids in nodes
pois %>% dplyr::inner_join(nodes, by = c("pid" = "id"))
# but we now have character node ids
pois %>% dplyr::inner_join(nodesChar, by = c("pidChar" = "id"))
# all two element combinations of POIs
combs <- combn(pois$pid,2)
combsChar <- combn(pois$pidChar,2)
# quick check on one of the nodes we know exists that we can find it
V(graph)[name==5]
V(graphChar)[id=="8"]
myplots <- vector('list', ncol(combs))
sps <-
for (col in 1:ncol(combs)) {
print(paste(as.character(combs[1,col]), as.character(combs[2,col])))
path <- igraph::shortest_paths(graph,
from = V(graph)[id==combs[1,col]],
to = V(graph)[id==combs[2,col]])
if (length(path$vpath[[1]]) > 1) {
print(paste("Found a path, plotting a graph over", length(path$vpath[[1]]), "nodes"))
myplots[[col]] <- igraph::induced_subgraph(graph, vids = unlist(path$vpath[1])) %>%
tidygraph::as_tbl_graph() %>%
ggraph(layout = "nicely") +
geom_edge_link() +
# geom_node_point(size = 10, fill = "white", shape = 21) +
geom_image(aes(x = x, y = y, image=
case_when(
entityType == 'Person' ~ 'C:\\..\\Person.png',
entityType == 'Organisation' ~ 'C:\\..\\Organization.png',
entityType == 'Location' ~ 'C:\\..\\Place.png',
)
), size = 0.05) +
geom_label(aes(x = x, y = y, label = id, colour=factor(entityType)), nudge_y = 0.2, nudge_x = 0.2, label.size = NA) +
# geom_node_text(aes(label = name), repel = TRUE) +
theme_graph()
}
}
Filter(Negate(is.null),myplots)
# why am I often only getting paths involving people and not other entity types?
# figuring out how these functions to find shortest paths work
t <- igraph::shortest_paths(graph, "2", "6")
length(t$vpath[[1]])
for (n in t$vpath[1]) {
print(paste("n", n))
}
allT <- igraph::all_shortest_paths(graph, "3", "16")
length(allT$res)
length(allT$res[[1]])
length(allT$res[1])
length(allT$res[2])
length(allT$res[3])
length(allT$res[[2]])
length(allT$res[[3]])
unlist(allT$res)
for (n in allT$vpath[1]) {
print(paste("n", n))
}
allT1 <- igraph::all_shortest_paths(graph, "17", "15")
# here's the question - how would I construct a new graph made up of the resulting paths?
graph
# graph %>%
# activate(nodes) %>%
# filter(selected_node==80282)
sub_graph <- graph %>%
morph(to_subgraph, id %in% t$vpath[1])
sub_graph <- to_subgraph(graph, id %in% t$vpath[1], subset_by = "nodes")$subgraph
sub_graph <- to_subgraph(graph, id %in% c(80282), subset_by = "nodes")$subgraph
# well this works!
igraph::induced_subgraph(graph, vids = unlist(t$vpath[1])) %>%
tidygraph::as_tbl_graph() %>%
ggraph(layout = "nicely") +
geom_edge_link() +
geom_node_point(size = 10, fill = "white", shape = 21) +
geom_label(aes(x = x, y = y, label = name, colour=factor(entityType)), nudge_y = 0.2, nudge_x = 0.2, label.size = NA) +
# geom_node_text(aes(label = name), repel = TRUE) +
theme_graph()
ggraph(sub_graph) +
geom_edge_link() +
geom_node_point() +
theme_graph()
# to plot with icons how about the following icons?
# https://ionic.io/ionicons
# geom_icon()
# body-outline -> a person
# map-outline -> a map
# people-outline -> org
# lots of emoji/dingbat icons we could also use https://apps.timwhitlock.info/emoji/tables/unicode
# geom_emoji()
# ggimage::geom_image() enables use of an arbitrary image file so we could use icons we have on disk
# https://stackoverflow.com/questions/16300344/how-to-flatten-a-list-of-lists
# in following c is applied to the list items to create a vector, do I need a vector here or just the list?
# do.call(c, unlist(foolist, recursive=FALSE))
# this time we use all_shortest_paths and plot them all
# drats ... this doesn't work with nodes/edges having a large dataset size and character ids - runs out of memory
myplots <- vector('list', ncol(combs))
spsAll <-
for (col in 1:ncol(combs)) {
print(paste(as.character(combs[1,col]), as.character(combs[2,col])))
paths <- igraph::all_shortest_paths(graph,
from = V(graph)[name==combs[1,col]],
to = V(graph)[name==combs[2,col]])
if (length(paths$res) > 1) {
print(paste("Found a path, plotting a graph over", length(unlist(path$res)), "nodes"))
myplots[[col]] <- igraph::induced_subgraph(graph, vids = unlist(paths$res)) %>%
tidygraph::as_tbl_graph() %>%
ggraph(layout = "nicely") +
geom_edge_link() +
# geom_node_point(size = 10, fill = "white", shape = 21) +
geom_image(aes(x = x, y = y, image=
case_when(
entityType == 'Person' ~ 'C:\\..\\Person.png',
entityType == 'Organisation' ~ 'C:\\..\\Organization.png',
entityType == 'Location' ~ 'C:\\..\\Place.png',
)
), size = 0.05) +
geom_label(aes(x = x, y = y, label = name, colour=factor(entityType)), nudge_y = 0.2, nudge_x = 0.2, label.size = NA) +
# geom_node_text(aes(label = name), repel = TRUE) +
theme_graph()
}
}
Filter(Negate(is.null),myplots)
# some of the nodes are massively connected - I suspect locations like major cities etc
# I think I can usefully simplify the graph if I get rid of them
# in the hope this addresses some of my memory errors
vertexEdges <- degree(graph)
sort(vertexEdges, decreasing=T)
sort(vertexEdges[vertexEdges<100], decreasing=T)
vertexEdges[1]
# turns out that there aren't so many highly connected nodes
# and all vertices have at least one edge
length(vertexEdges)
length(vertexEdges[vertexEdges<10])
degree_distribution(graph)
vertexEdges[vertexEdges==9]
length(V(graph)) #2421707
length(V(graph)[degree(graph)<10]) #2313383
neighbors(graph, "73")
neighbors(graph, "6") # returns x neighbour vertices which is what we expect
neighbors(graph, "306") # returns y neighbour vertices which is what we expect
neighbors(graph, "973") # returns z neighbours which is what we expect
# this works but saves only roughly 1/3rd memory
graphSmaller <- igraph::induced_subgraph(graph, vids=V(graph)[degree(graph)<10], impl="auto") %>% tidygraph::as_tbl_graph()
# little confused at how these graphs are represented in the RStudio Data viewer
# when small it's just 'List of some number of nodes
# when large it's 'Large tbl_graph (some number of elements, some MB)
t1 <- igraph::make_ring(100) %>% tidygraph::as_tbl_graph() # List of 100
t2 <- igraph::make_ring(100000) %>% tidygraph::as_tbl_graph() # Large tbl_graph (100000 elements, 4.8 MB)
t3 <- igraph::make_ring(100000) # Large igraph (100000 elements, 4.8 MB)
# reclaim our resources
rm("t1")
rm("t2")
rm("t3")
gc()
# so let's try with our smaller graph - drats <10 connections makes for no discovered paths
# let's increase
graphSmaller <- igraph::induced_subgraph(graph, vids=V(graph)[degree(graph)<20], impl="auto") %>% tidygraph::as_tbl_graph()
myplots <- vector('list', ncol(combs))
spsAll <-
for (col in 1:ncol(combs)) {
print(paste(as.character(combs[1,col]), as.character(combs[2,col])))
paths <- igraph::all_shortest_paths(graphSmaller,
from = V(graphSmaller)[name==combs[1,col]],
to = V(graphSmaller)[name==combs[2,col]])
if (length(paths$res) > 1) {
print(paste("Found a path, plotting a graph over", length(unlist(path$res)), "nodes"))
myplots[[col]] <- igraph::induced_subgraph(graphSmaller, vids = unlist(paths$res)) %>%
tidygraph::as_tbl_graph() %>%
ggraph(layout = "nicely") +
geom_edge_link() +
# geom_node_point(size = 10, fill = "white", shape = 21) +
geom_image(aes(x = x, y = y, image=
case_when(
entityType == 'Person' ~ 'C:\\..\\Person.png',
entityType == 'Organisation' ~ 'C:\\..\\Organization.png',
entityType == 'Location' ~ 'C:\\..\\Place.png',
)
), size = 0.05) +
geom_label(aes(x = x, y = y, label = name, colour=factor(entityType)), nudge_y = 0.2, nudge_x = 0.2, label.size = NA) +
# geom_node_text(aes(label = name), repel = TRUE) +
theme_graph()
}
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment