Skip to content

Instantly share code, notes, and snippets.

@tts
Last active December 11, 2015 10:08
Show Gist options
  • Star 1 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save tts/4584793 to your computer and use it in GitHub Desktop.
Save tts/4584793 to your computer and use it in GitHub Desktop.
Altmetrics scores by Altmetric and traditional citation metrics (WoS) of Aalto University publications published between 2007 and 2012 and with a DOI known by WoS.
###########################################################
#
# Altmetrics about Aalto University publications
# 2007-2012 with a DOI
#
# Altmetrics data provided by altmetric.com
# Web of Science data provided by Thomson Reuters
#
##########################################################
library(rAltmetric)
library(plyr)
library(reshape)
# DOIs are gathered from a local source along with citation counts
# by Thomson Reuters Web of Science (WoS), and publication year.
#
# Merge DOIs (and other variables) by school
aalto.doi <- merge_all(list(arts.doi, biz.doi, chem.doi, elec.doi, eng.doi, sci.doi))
# Preamble DOIs for the rAltmetric run
aalto.doi$di <- paste("doi/", aalto.doi$di, sep = "")
# Take just the DOIs
dois <- as.data.frame(aalto.doi$di, stringsAsFactors = FALSE)
# Rename the variable
names(dois) <- c("doi")
# Below, I first run DOIs against the API of Altmetric
# with the altmetrics function from the rAltmetric package.
#
# The result (raw_metrics) is a nested list element. From this list, with
# the altmetric_data function, a number of altmetrics scores are
# saved as variables to a data frame (metric_data). The DOIs have a
# column but for some reason the value in all is '1'.
#
# However, my goal is to merge the traditional WoS citations
# (along with the publication year) with Altmetric scores.
# The common nominator is the DOI. To get this done, I need to pick up
# DOIs from raw_metrics, merge them with metric_data, and again, merge
# this with metrics from WoS.
raw_metrics <- llply(dois$doi, altmetrics, .progress = 'text')
metric_data <- ldply(raw_metrics, altmetric_data)
# Number of root list elements. They include metrics or are NULL
N <- length(raw_metrics)
# Initialize a data frame
adoi <- data.frame(doi = character(N),
stringsAsFactors = FALSE)
# From all items, take the second list element.
# If it is not NULL, store the DOI, otherwise store NA
for (i in 1:N) {
if ( !is.null(raw_metrics[[i]][[2]]) ) {
adoi$doi[i] <- raw_metrics[[i]][[2]]
} else {
adoi$doi[i] <- 'NA'
}
}
# Exclude NA rows and rename variable
adoi.full <- as.data.frame(adoi[adoi$doi != 'NA', ])
names(adoi.full) <- c("doi")
# Merge adoi.full with metric_data. First, generate an ID
# to both data frames, and then merge by the ID
metric_data$id <- seq(from = 1, to = nrow(metric_data))
adoi.full$id <- seq(from = 1, to = nrow(adoi.full))
aalto.alt.all <- merge(metric_data, adoi.full, by = "id")
# Then, merge aalto.alt.all with aalto.doi by DOI
aalto.doi$di <- sub("doi/", "", aalto.doi$di)
aalto.all <- merge(aalto.doi, aalto.alt.all,
by.x = "di", by.y = "doi.y")
# Choose relevant columns only (di = DOI, py = publ year, tc = WoS)
aalto.all.m <- aalto.all[ , c("di", "url", "py",
"tc",
"mendeley", "connotea", "citeulike", "readers_count",
"cited_by_gplus_count", "cited_by_fbwalls_count",
"cited_by_posts_count", "cited_by_tweeters_count",
"cited_by_accounts_count", "cited_by_feeds_count")]
# Factorize DOI
aalto.all.m$di <- factor(aalto.all.m$di)
# Top5 stats: Twitter, Google+, Facebook, news feed
head(aalto.all.m[order(aalto.all.m$cited_by_tweeters_count, decreasing = TRUE), ], n = 5)
head(aalto.all.m[order(aalto.all.m$cited_by_gplus_count, decreasing = TRUE), ], n = 5)
head(aalto.all.m[order(aalto.all.m$cited_by_fbwalls_count, decreasing = TRUE), ], n = 5)
head(aalto.all.m[order(aalto.all.m$cited_by_feeds_count, decreasing = TRUE), ], n = 5)
# Top 10 Mendeley
top10m <- head(aalto.all.m[order(aalto.all.m$mendeley, decreasing = TRUE), ], n = 10)
# NA to 0
top10m[is.na(top10m)] <- 0
# Top 10 WoS
top10w <- head(aalto.all.m[order(aalto.all.m$tc, decreasing = TRUE), ], n = 10)
top10w[is.na(top10w)] <- 0
# Reshape
subsetm <- t(data.frame(top10m$mendeley, top10m$tc, top10m$connotea,
top10m$citeulike, top10m$cited_by_fbwalls_count,
top10m$cited_by_posts_count, top10m$cited_by_tweeters_count,
top10m$cited_by_feeds_count))
subsetw <- t(data.frame(top10w$mendeley, top10w$tc, top10w$connotea,
top10w$citeulike, top10w$cited_by_fbwalls_count,
top10w$cited_by_posts_count, top10w$cited_by_tweeters_count,
top10w$cited_by_feeds_count))
color.scheme <- c(rgb(126, 0, 27, maxColorValue = 255),
rgb(200, 64, 94, maxColorValue = 255),
rgb(255, 71, 90, maxColorValue = 255),
rgb(255, 149, 131, maxColorValue = 255),
rgb(169, 124, 82, maxColorValue = 255),
rgb(215, 124, 26, maxColorValue = 255),
rgb(233, 172, 131, maxColorValue = 255),
rgb(199, 174, 36, maxColorValue = 255))
# Plot
png("aalto.top10.mendeley.png", width = 1024, height = 768, res = 72)
par(mar=c(4,16,7,3), cex = 0.80)
barplot(subsetm,
legend = c("Mendeley", "WoS", "Connotea", "CiteULike", "Facebook",
"Posts", "Twitter", "Feeds"),
names.arg = paste(top10m$di, " (", top10m$py, ")", sep = ""),
horiz = TRUE,
las = 1,
main = "Top10 Aalto articles in Mendeley saves 2007-2012",
col = color.scheme,
beside = TRUE)
dev.off()
# Plot Top10 WoS
png("aalto.top10.wos.png", width = 1024, height = 768, res = 72)
par(mar=c(4,16,7,3), cex = 0.80)
barplot(subsetw,
legend = c("Mendeley", "WoS", "Connotea", "CiteULike", "Facebook",
"Posts", "Twitter", "Feeds"),
names.arg = paste(top10w$di, " (", top10w$py, ")", sep = ""),
horiz = TRUE,
las = 1,
main = "Top10 Aalto articles (DOI) in WoS citations with altmetrics 2007-2012",
col = color.scheme,
beside = TRUE)
dev.off()
##################################################
#
# Spearman correlations of Mendeley and WoS.
# Publications published between 2009 and 2011
#
##################################################
# 2009-2011
m20092011 <- aalto.all.m[aalto.all.m$py >= 2009 & aalto.all.m$py <= 2011, c("mendeley")]
w20092011 <- aalto.all.m[aalto.all.m$py >= 2009 & aalto.all.m$py <= 2011, c("tc")]
#################################################################
#
# Scatterplotting. Is there any relationship between these variables?
#
#################################################################
plot(m20092011, w20092011,
xlab = "Mendeley",
ylab = "WoS",
main = "Mendeley saves and WoS citations in Aalto publications 2009-2011")
abline(lm(m ~ w))
mw <- data.frame(cbind(m20092011, w20092011))
cor(mw$m20092011, mw$w20092011, method = "spearman")
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment