Instantly share code, notes, and snippets.
Created
February 17, 2015 00:22
-
Save Pakillo/7f07b9479fe720212612 to your computer and use it in GitHub Desktop.
Coauthorsip networks
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# Author network mapping for journal publications. | |
# By: Simon Goring (final code: 10/2/2013) | |
# https://downwithtime.wordpress.com/2013/02/10/on-blogging-and-collaboration/ | |
library(bibtex) | |
library(network) | |
library(RColorBrewer) | |
# Read in the bibtex file | |
# You can export this file from your google scholar network. | |
# Ultimately, this code can be used with any bibtex file though. | |
citations <- read.bib('SJGcitations.bib') | |
# Process the author list, first create an n x n matrix with all the authors | |
# then go paper by paper and increment when authors co-occur. | |
authors <- lapply(citations, function(x) x$author) | |
unique.authors <- unique((unlist(authors))[names(unlist(authors)) == 'family']) | |
coauth.table <- matrix(nrow = length(unique.authors), ncol=length(unique.authors), | |
dimnames = list(unique.authors, unique.authors), 0) | |
for(i in 1:length(citations)){ | |
paper.auth <- unlist(authors[[i]])[names(unlist(authors[[i]])) == 'family'] | |
coauth.table[paper.auth,paper.auth] <- coauth.table[paper.auth,paper.auth] + 1 | |
} | |
# Build the network diagram: | |
author.net <- network(coauth.table) | |
network.vertex.names(author.net) <- rownames(coauth.table) | |
col.set <- brewer.pal(6, 'Dark2') | |
par(family='serif') | |
# ?plotnetwork will explain these commands | |
aa <- plot(author.net, | |
label = rownames(coauth.table), | |
usearrows = FALSE, | |
jitter = TRUE, | |
displayisolates = FALSE, | |
boxed.labels=FALSE, | |
displaylabels=FALSE, | |
pad=5, | |
vertex.cex = log((colSums(coauth.table))), | |
edge.lwd=coauth.table*3, | |
edge.col = rgb(0.1, 0.1, 0.1, 0.1), | |
label.cex = 0.7, | |
label.pad = 0.7) | |
########################### | |
## https://downwithtime.wordpress.com/2015/02/12/building-your-network-using-orcid-and-ropensci/ | |
library(rorcid) | |
library(igraph) | |
# The idea is to go into a user and get all their papers, | |
# and all the papers of people they've published with: | |
simon.record <- orcid_id(orcid = '0000-0002-2700-4605', | |
profile="works") | |
get_doi <- function(x){ | |
# This pulls the DOIs out of the ORCiD record: | |
list.x <- x$'work-external-identifiers.work-external-identifier' | |
# We have to catch a few objects with NULL DOI information: | |
do.call(rbind.data.frame,lapply(list.x, function(x){ | |
if(length(x) == 0 | (!'DOI' %in% x[,1])){ | |
data.frame(value=NA) | |
} else{ | |
data.frame(value = x[which(x[,1] %in% 'DOI'),2]) | |
} | |
})) | |
} | |
get_papers <- function(x){ | |
all.papers <- x[[1]]$works # this is where the papers are. | |
papers <- data.frame(title = all.papers$'work-title.title.value', | |
doi = get_doi(all.papers)) | |
paper.doi <- lapply(1:nrow(papers), function(x){ | |
if(!is.na(papers[x,2]))return(orcid_doi(dois = papers[x,2], fuzzy = FALSE)) | |
# sometimes there's no DOI | |
# if that's the case then just return NA: | |
return(NA) | |
}) | |
your.papers <- lapply(1:length(paper.doi), function(x){ | |
if(is.na(paper.doi[[x]])){ | |
data.frame(doi=NA, orcid=NA, name=NA) | |
} else { | |
data.frame(doi = papers[x,2], | |
orcid = paper.doi[[x]][[1]]$data$'orcid-identifier.path', | |
name = paste(paper.doi[[x]][[1]]$data$'personal-details.given-names.value', | |
paper.doi[[x]][[1]]$data$'personal-details.family-name.value', | |
sep = ' '), | |
stringsAsFactors = FALSE) | |
}}) | |
do.call(rbind.data.frame, your.papers) | |
} | |
simons <- get_papers(simon.record) | |
unique.orcids <- unique(simons$orcid) | |
all.colleagues <- list() | |
for(i in 1:length(unique.orcids)){ | |
all.colleagues[[i]] <- get_papers(orcid_id(orcid = unique.orcids[i], profile="works")) | |
} | |
all.df <- do.call(rbind.data.frame, all.colleagues) | |
all.df <- na.omit(all.df[!duplicated(all.df),]) | |
all.pairs <- matrix(ncol = length(unique(all.df$name)), | |
nrow = length(unique(all.df$name)), | |
dimnames = list(unique(all.df$name),unique(all.df$name)), 0) | |
unique.dois <- unique(as.character(all.df$doi)) | |
for(i in 1:length(unique.dois)){ | |
doi <- unique.dois[i] | |
all.pairs[all.df$name[all.df$doi %in% doi],all.df$name[all.df$doi %in% doi]] <- | |
all.pairs[all.df$name[all.df$doi %in% doi],all.df$name[all.df$doi %in% doi]] + 1 | |
} | |
all.pairs <- all.pairs[rowSums(all.pairs)>0, colSums(all.pairs)>0] | |
diag(all.pairs) <- 0 | |
author.adj <- graph.adjacency(all.pairs, mode = 'undirected', weighted = TRUE) | |
# Plot so that the width of the lines connecting the nodes reflects the | |
# number of papers co-authored by both individuals. | |
# This is Figure 1 of this blog post. | |
plot(author.adj, vertex.label.cex = 0.8, edge.width = E(author.adj)$weight) | |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment