Pakillo/coauthor-networks.R

## coauthor-networks.R
#  Author network mapping for journal publications.
#  By: Simon Goring (final code: 10/2/2013)
# https://downwithtime.wordpress.com/2013/02/10/on-blogging-and-collaboration/

library(bibtex)
library(network)
library(RColorBrewer)

#  Read in the bibtex file
#  You can export this file from your google scholar network.
#  Ultimately, this code can be used with any bibtex file though.
citations <- read.bib('SJGcitations.bib')

#  Process the author list, first create an n x n matrix with all the authors
#  then go paper by paper and increment when authors co-occur.
authors <- lapply(citations, function(x) x$author)

unique.authors <- unique((unlist(authors))[names(unlist(authors)) == 'family'])

coauth.table <- matrix(nrow = length(unique.authors), ncol=length(unique.authors),
                       dimnames = list(unique.authors, unique.authors), 0)

for(i in 1:length(citations)){
  paper.auth <- unlist(authors[[i]])[names(unlist(authors[[i]])) == 'family']
  coauth.table[paper.auth,paper.auth] <- coauth.table[paper.auth,paper.auth] + 1
}

#  Build the network diagram:
author.net <- network(coauth.table)
network.vertex.names(author.net) <- rownames(coauth.table)

col.set <- brewer.pal(6, 'Dark2')

par(family='serif')

#  ?plotnetwork will explain these commands
aa <- plot(author.net,
           label = rownames(coauth.table),
           usearrows = FALSE,
           jitter = TRUE,
           displayisolates = FALSE,
           boxed.labels=FALSE,
           displaylabels=FALSE,
           pad=5,
           vertex.cex = log((colSums(coauth.table))),
           edge.lwd=coauth.table*3,
           edge.col = rgb(0.1, 0.1, 0.1, 0.1),
           label.cex = 0.7,
           label.pad = 0.7)


###########################

## https://downwithtime.wordpress.com/2015/02/12/building-your-network-using-orcid-and-ropensci/

library(rorcid)
library(igraph)

# The idea is to go into a user and get all their papers,
# and all the papers of people they've published with:

simon.record <- orcid_id(orcid = '0000-0002-2700-4605',
                         profile="works")

get_doi <- function(x){
  #  This pulls the DOIs out of the ORCiD record:
  list.x <- x$'work-external-identifiers.work-external-identifier'

  #  We have to catch a few objects with NULL DOI information:
  do.call(rbind.data.frame,lapply(list.x, function(x){
      if(length(x) == 0 | (!'DOI' %in% x[,1])){
        data.frame(value=NA)
      } else{
        data.frame(value = x[which(x[,1] %in% 'DOI'),2])
      }
    }))
}

get_papers <- function(x){
  all.papers <- x[[1]]$works # this is where the papers are.
  papers <- data.frame(title = all.papers$'work-title.title.value',
                       doi   = get_doi(all.papers))

  paper.doi <- lapply(1:nrow(papers), function(x){
    if(!is.na(papers[x,2]))return(orcid_doi(dois = papers[x,2], fuzzy = FALSE))
    # sometimes there's no DOI
    # if that's the case then just return NA:
    return(NA)
  })

  your.papers <- lapply(1:length(paper.doi), function(x){
      if(is.na(paper.doi[[x]])){
        data.frame(doi=NA, orcid=NA, name=NA)
      } else {
        data.frame(doi = papers[x,2],
                   orcid = paper.doi[[x]][[1]]$data$'orcid-identifier.path',
                   name = paste(paper.doi[[x]][[1]]$data$'personal-details.given-names.value',
                                paper.doi[[x]][[1]]$data$'personal-details.family-name.value',
                                sep = ' '),
                   stringsAsFactors = FALSE)
      }})
  do.call(rbind.data.frame, your.papers)

}

simons <- get_papers(simon.record)

unique.orcids <- unique(simons$orcid)

all.colleagues <- list()

for(i in 1:length(unique.orcids)){
  all.colleagues[[i]] <- get_papers(orcid_id(orcid = unique.orcids[i], profile="works"))
}


all.df <- do.call(rbind.data.frame, all.colleagues)
all.df <- na.omit(all.df[!duplicated(all.df),])

all.pairs <- matrix(ncol = length(unique(all.df$name)),
                    nrow = length(unique(all.df$name)),
                    dimnames = list(unique(all.df$name),unique(all.df$name)), 0)

unique.dois <- unique(as.character(all.df$doi))

for(i in 1:length(unique.dois)){
  doi <- unique.dois[i]

  all.pairs[all.df$name[all.df$doi %in% doi],all.df$name[all.df$doi %in% doi]] <-
    all.pairs[all.df$name[all.df$doi %in% doi],all.df$name[all.df$doi %in% doi]] + 1

}

all.pairs <- all.pairs[rowSums(all.pairs)>0, colSums(all.pairs)>0]

diag(all.pairs) <- 0


author.adj <- graph.adjacency(all.pairs, mode = 'undirected', weighted = TRUE)
#  Plot so that the width of the lines connecting the nodes reflects the
#  number of papers co-authored by both individuals.
#  This is Figure 1 of this blog post.
plot(author.adj, vertex.label.cex = 0.8, edge.width = E(author.adj)$weight)
	# Author network mapping for journal publications.
	# By: Simon Goring (final code: 10/2/2013)
	# https://downwithtime.wordpress.com/2013/02/10/on-blogging-and-collaboration/

	library(bibtex)
	library(network)
	library(RColorBrewer)

	# Read in the bibtex file
	# You can export this file from your google scholar network.
	# Ultimately, this code can be used with any bibtex file though.
	citations <- read.bib('SJGcitations.bib')

	# Process the author list, first create an n x n matrix with all the authors
	# then go paper by paper and increment when authors co-occur.
	authors <- lapply(citations, function(x) x$author)

	unique.authors <- unique((unlist(authors))[names(unlist(authors)) == 'family'])

	coauth.table <- matrix(nrow = length(unique.authors), ncol=length(unique.authors),
	dimnames = list(unique.authors, unique.authors), 0)

	for(i in 1:length(citations)){
	paper.auth <- unlist(authors[[i]])[names(unlist(authors[[i]])) == 'family']
	coauth.table[paper.auth,paper.auth] <- coauth.table[paper.auth,paper.auth] + 1
	}

	# Build the network diagram:
	author.net <- network(coauth.table)
	network.vertex.names(author.net) <- rownames(coauth.table)

	col.set <- brewer.pal(6, 'Dark2')

	par(family='serif')

	# ?plotnetwork will explain these commands
	aa <- plot(author.net,
	label = rownames(coauth.table),
	usearrows = FALSE,
	jitter = TRUE,
	displayisolates = FALSE,
	boxed.labels=FALSE,
	displaylabels=FALSE,
	pad=5,
	vertex.cex = log((colSums(coauth.table))),
	edge.lwd=coauth.table*3,
	edge.col = rgb(0.1, 0.1, 0.1, 0.1),
	label.cex = 0.7,
	label.pad = 0.7)



	###########################

	## https://downwithtime.wordpress.com/2015/02/12/building-your-network-using-orcid-and-ropensci/

	library(rorcid)
	library(igraph)

	# The idea is to go into a user and get all their papers,
	# and all the papers of people they've published with:

	simon.record <- orcid_id(orcid = '0000-0002-2700-4605',
	profile="works")

	get_doi <- function(x){
	# This pulls the DOIs out of the ORCiD record:
	list.x <- x$'work-external-identifiers.work-external-identifier'

	# We have to catch a few objects with NULL DOI information:
	do.call(rbind.data.frame,lapply(list.x, function(x){
	if(length(x) == 0 \| (!'DOI' %in% x[,1])){
	data.frame(value=NA)
	} else{
	data.frame(value = x[which(x[,1] %in% 'DOI'),2])
	}
	}))
	}

	get_papers <- function(x){
	all.papers <- x[[1]]$works # this is where the papers are.
	papers <- data.frame(title = all.papers$'work-title.title.value',
	doi = get_doi(all.papers))

	paper.doi <- lapply(1:nrow(papers), function(x){
	if(!is.na(papers[x,2]))return(orcid_doi(dois = papers[x,2], fuzzy = FALSE))
	# sometimes there's no DOI
	# if that's the case then just return NA:
	return(NA)
	})

	your.papers <- lapply(1:length(paper.doi), function(x){
	if(is.na(paper.doi[[x]])){
	data.frame(doi=NA, orcid=NA, name=NA)
	} else {
	data.frame(doi = papers[x,2],
	orcid = paper.doi[[x]][[1]]$data$'orcid-identifier.path',
	name = paste(paper.doi[[x]][[1]]$data$'personal-details.given-names.value',
	paper.doi[[x]][[1]]$data$'personal-details.family-name.value',
	sep = ' '),
	stringsAsFactors = FALSE)
	}})
	do.call(rbind.data.frame, your.papers)

	}

	simons <- get_papers(simon.record)

	unique.orcids <- unique(simons$orcid)

	all.colleagues <- list()

	for(i in 1:length(unique.orcids)){
	all.colleagues[[i]] <- get_papers(orcid_id(orcid = unique.orcids[i], profile="works"))
	}



	all.df <- do.call(rbind.data.frame, all.colleagues)
	all.df <- na.omit(all.df[!duplicated(all.df),])

	all.pairs <- matrix(ncol = length(unique(all.df$name)),
	nrow = length(unique(all.df$name)),
	dimnames = list(unique(all.df$name),unique(all.df$name)), 0)

	unique.dois <- unique(as.character(all.df$doi))

	for(i in 1:length(unique.dois)){
	doi <- unique.dois[i]

	all.pairs[all.df$name[all.df$doi %in% doi],all.df$name[all.df$doi %in% doi]] <-
	all.pairs[all.df$name[all.df$doi %in% doi],all.df$name[all.df$doi %in% doi]] + 1

	}

	all.pairs <- all.pairs[rowSums(all.pairs)>0, colSums(all.pairs)>0]

	diag(all.pairs) <- 0


	author.adj <- graph.adjacency(all.pairs, mode = 'undirected', weighted = TRUE)
	# Plot so that the width of the lines connecting the nodes reflects the
	# number of papers co-authored by both individuals.
	# This is Figure 1 of this blog post.
	plot(author.adj, vertex.label.cex = 0.8, edge.width = E(author.adj)$weight)