Code to create your collaboration network in R
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
## Visualizing authorship networks: code for this blog post: | |
## https://mathewkiang.com/2018/06/17/my-collaboration-network/ | |
library(scholar) # devtools::install_github("jkeirstead/scholar") | |
library(visNetwork) | |
library(tidyverse) | |
## Constants | |
MIN_TIME <- 60 * 5 | |
MAX_TIME <- 60 * 30 | |
PROJ_WEIGHT <- .8 | |
NODE_WEIGHT <- 1 | |
my_scholar_id <- "eD9_J3wAAAAJ" | |
## If we've already pulled from Google Scholar, don't do it again. The sleep | |
## timer makes this an unreasonably long process. | |
if (!file.exists("./scholar_pulls.rda")) { | |
## Pull publication history of myself | |
mvk_df <- get_publications(my_scholar_id) | |
## Drop the Opioid Trends preprint so we're not double-counting | |
mvk_df <- mvk_df %>% | |
filter(pubid != "-yGd096yOn8C") %>% | |
mutate_at(vars(one_of("title", "author", "journal", | |
"number", "cid", "pubid")), | |
as.character) | |
## Fix UTF-8 encoding -- once here and once later. | |
mvk_df <- mvk_df %>% | |
mutate(author = gsub("é|\xe9", "e", author)) | |
## The summary df does not have a full author list so we need that. Set a | |
## generous sleep timer to stay below rate limit. It seems like one | |
## pull every 15 to 30 minutes is fairly safe. | |
authors <- NULL | |
pubid <- NULL | |
for (p in mvk_df$pubid) { | |
print(p) | |
authors <- c(authors, getCompleteAuthors(my_scholar_id, p)) | |
pubid <- c(pubid, p) | |
Sys.sleep(runif(1, min = MIN_TIME, max = MAX_TIME)) | |
} | |
## Save it so we don't have to pull again. | |
save(authors, pubid, mvk_df, file = "./scholar_pulls.rda") | |
} | |
## Now munging data | |
load("./scholar_pulls.rda") | |
## Split out every author into their own column | |
full_authors <- tibble(pubid, f_authors = authors) | |
full_authors <- full_authors %>% | |
## Fix encoding | |
mutate(f_authors = gsub("é|\xe9", "e", f_authors)) %>% | |
## Fix Rob's middle initial | |
mutate(f_authors = gsub("W Moeller", "M Moeller", f_authors)) %>% | |
## Fix Jen's middle initial | |
mutate(f_authors = gsub("Jennifer Hayes", "Jennifer E Hayes", | |
f_authors)) %>% | |
separate(f_authors, into = sprintf("author_%02i", 1:30), | |
remove = FALSE, sep = ", ", fill = "right") | |
## Remove all the completely empty columns | |
full_authors <- full_authors[, colSums(!is.na(full_authors)) > 0] | |
## Merge back to original dataframe | |
mvk_df <- mvk_df %>% | |
left_join(full_authors) | |
## Get a list of unique co-authors for the node list | |
distinct_authors <- unique( | |
unlist(strsplit(full_authors$f_authors, ", ", fixed = TRUE)) | |
) | |
distinct_authors <- distinct_authors[distinct_authors != "Mathew V Kiang"] | |
## Now let's make a node list | |
nodes <- bind_rows( | |
mvk_df %>% | |
select(title) %>% | |
mutate(value = PROJ_WEIGHT, | |
group = "paper", | |
id = title), | |
tibble( | |
title = distinct_authors, | |
value = NODE_WEIGHT, | |
group = "coauthor", | |
id = title | |
) | |
) | |
## Add some github side projects | |
nodes <- nodes %>% | |
add_case(title = "narcan", | |
value = PROJ_WEIGHT, | |
group = "software", | |
id = title) %>% | |
add_case(title = "metabeiwe", | |
value = PROJ_WEIGHT, | |
group = "software", | |
id = title) %>% | |
add_case(title = "beiwe_data_sample", | |
value = PROJ_WEIGHT, | |
group = "data", | |
id = title) | |
## Now the edge list -- reshape our merged dataframe into an edgelist | |
## going from paper to co-author | |
edges <- mvk_df %>% | |
select(title, starts_with("author_")) %>% | |
gather(key = "author", value = "from", starts_with("author_")) %>% | |
filter(!is.na(from), from != "Mathew V Kiang") %>% | |
select(to = title, from) | |
## Add cases for the non-papers | |
edges <- edges %>% | |
add_case(to = "narcan", from = "Monica J Alexander") %>% | |
add_case(to = "beiwe_data_sample", from = "Jukka-Pekka Onnela") %>% | |
add_case(to = "beiwe_data_sample", from = "Jeanette Lorme") | |
visNetwork(nodes, edges) %>% | |
visLayout(randomSeed = 123456) %>% | |
visInteraction(zoomView = FALSE, dragView = FALSE) %>% | |
visGroups(groupname = "coauthor", color = "#3288bd") %>% | |
visGroups(groupname = "paper", color = "#d53e4f") %>% | |
visGroups(groupname = "data", color = "#abdda4") %>% | |
visGroups(groupname = "software", color = "#fdae61") %>% | |
visNodes(scaling = list(min = 15, max = 25)) %>% | |
visEdges(width = 3, color = "grey") %>% | |
visSave("./author_network.html") | |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment