Skip to content

Instantly share code, notes, and snippets.

@tts tts/gist:4584793
Last active Dec 11, 2015

Embed
What would you like to do?
Altmetrics scores by Altmetric and traditional citation metrics (WoS) of Aalto University publications published between 2007 and 2012 and with a DOI known by WoS.
###########################################################
#
# Altmetrics about Aalto University publications
# 2007-2012 with a DOI
#
# Altmetrics data provided by altmetric.com
# Web of Science data provided by Thomson Reuters
#
##########################################################
library(rAltmetric)
library(plyr)
library(reshape)
# DOIs are gathered from a local source along with citation counts
# by Thomson Reuters Web of Science (WoS), and publication year.
#
# Merge DOIs (and other variables) by school
aalto.doi <- merge_all(list(arts.doi, biz.doi, chem.doi, elec.doi, eng.doi, sci.doi))
# Preamble DOIs for the rAltmetric run
aalto.doi$di <- paste("doi/", aalto.doi$di, sep = "")
# Take just the DOIs
dois <- as.data.frame(aalto.doi$di, stringsAsFactors = FALSE)
# Rename the variable
names(dois) <- c("doi")
# Below, I first run DOIs against the API of Altmetric
# with the altmetrics function from the rAltmetric package.
#
# The result (raw_metrics) is a nested list element. From this list, with
# the altmetric_data function, a number of altmetrics scores are
# saved as variables to a data frame (metric_data). The DOIs have a
# column but for some reason the value in all is '1'.
#
# However, my goal is to merge the traditional WoS citations
# (along with the publication year) with Altmetric scores.
# The common nominator is the DOI. To get this done, I need to pick up
# DOIs from raw_metrics, merge them with metric_data, and again, merge
# this with metrics from WoS.
raw_metrics <- llply(dois$doi, altmetrics, .progress = 'text')
metric_data <- ldply(raw_metrics, altmetric_data)
# Number of root list elements. They include metrics or are NULL
N <- length(raw_metrics)
# Initialize a data frame
adoi <- data.frame(doi = character(N),
stringsAsFactors = FALSE)
# From all items, take the second list element.
# If it is not NULL, store the DOI, otherwise store NA
for (i in 1:N) {
if ( !is.null(raw_metrics[[i]][[2]]) ) {
adoi$doi[i] <- raw_metrics[[i]][[2]]
} else {
adoi$doi[i] <- 'NA'
}
}
# Exclude NA rows and rename variable
adoi.full <- as.data.frame(adoi[adoi$doi != 'NA', ])
names(adoi.full) <- c("doi")
# Merge adoi.full with metric_data. First, generate an ID
# to both data frames, and then merge by the ID
metric_data$id <- seq(from = 1, to = nrow(metric_data))
adoi.full$id <- seq(from = 1, to = nrow(adoi.full))
aalto.alt.all <- merge(metric_data, adoi.full, by = "id")
# Then, merge aalto.alt.all with aalto.doi by DOI
aalto.doi$di <- sub("doi/", "", aalto.doi$di)
aalto.all <- merge(aalto.doi, aalto.alt.all,
by.x = "di", by.y = "doi.y")
# Choose relevant columns only (di = DOI, py = publ year, tc = WoS)
aalto.all.m <- aalto.all[ , c("di", "url", "py",
"tc",
"mendeley", "connotea", "citeulike", "readers_count",
"cited_by_gplus_count", "cited_by_fbwalls_count",
"cited_by_posts_count", "cited_by_tweeters_count",
"cited_by_accounts_count", "cited_by_feeds_count")]
# Factorize DOI
aalto.all.m$di <- factor(aalto.all.m$di)
# Top5 stats: Twitter, Google+, Facebook, news feed
head(aalto.all.m[order(aalto.all.m$cited_by_tweeters_count, decreasing = TRUE), ], n = 5)
head(aalto.all.m[order(aalto.all.m$cited_by_gplus_count, decreasing = TRUE), ], n = 5)
head(aalto.all.m[order(aalto.all.m$cited_by_fbwalls_count, decreasing = TRUE), ], n = 5)
head(aalto.all.m[order(aalto.all.m$cited_by_feeds_count, decreasing = TRUE), ], n = 5)
# Top 10 Mendeley
top10m <- head(aalto.all.m[order(aalto.all.m$mendeley, decreasing = TRUE), ], n = 10)
# NA to 0
top10m[is.na(top10m)] <- 0
# Top 10 WoS
top10w <- head(aalto.all.m[order(aalto.all.m$tc, decreasing = TRUE), ], n = 10)
top10w[is.na(top10w)] <- 0
# Reshape
subsetm <- t(data.frame(top10m$mendeley, top10m$tc, top10m$connotea,
top10m$citeulike, top10m$cited_by_fbwalls_count,
top10m$cited_by_posts_count, top10m$cited_by_tweeters_count,
top10m$cited_by_feeds_count))
subsetw <- t(data.frame(top10w$mendeley, top10w$tc, top10w$connotea,
top10w$citeulike, top10w$cited_by_fbwalls_count,
top10w$cited_by_posts_count, top10w$cited_by_tweeters_count,
top10w$cited_by_feeds_count))
color.scheme <- c(rgb(126, 0, 27, maxColorValue = 255),
rgb(200, 64, 94, maxColorValue = 255),
rgb(255, 71, 90, maxColorValue = 255),
rgb(255, 149, 131, maxColorValue = 255),
rgb(169, 124, 82, maxColorValue = 255),
rgb(215, 124, 26, maxColorValue = 255),
rgb(233, 172, 131, maxColorValue = 255),
rgb(199, 174, 36, maxColorValue = 255))
# Plot
png("aalto.top10.mendeley.png", width = 1024, height = 768, res = 72)
par(mar=c(4,16,7,3), cex = 0.80)
barplot(subsetm,
legend = c("Mendeley", "WoS", "Connotea", "CiteULike", "Facebook",
"Posts", "Twitter", "Feeds"),
names.arg = paste(top10m$di, " (", top10m$py, ")", sep = ""),
horiz = TRUE,
las = 1,
main = "Top10 Aalto articles in Mendeley saves 2007-2012",
col = color.scheme,
beside = TRUE)
dev.off()
# Plot Top10 WoS
png("aalto.top10.wos.png", width = 1024, height = 768, res = 72)
par(mar=c(4,16,7,3), cex = 0.80)
barplot(subsetw,
legend = c("Mendeley", "WoS", "Connotea", "CiteULike", "Facebook",
"Posts", "Twitter", "Feeds"),
names.arg = paste(top10w$di, " (", top10w$py, ")", sep = ""),
horiz = TRUE,
las = 1,
main = "Top10 Aalto articles (DOI) in WoS citations with altmetrics 2007-2012",
col = color.scheme,
beside = TRUE)
dev.off()
##################################################
#
# Spearman correlations of Mendeley and WoS.
# Publications published between 2009 and 2011
#
##################################################
# 2009-2011
m20092011 <- aalto.all.m[aalto.all.m$py >= 2009 & aalto.all.m$py <= 2011, c("mendeley")]
w20092011 <- aalto.all.m[aalto.all.m$py >= 2009 & aalto.all.m$py <= 2011, c("tc")]
#################################################################
#
# Scatterplotting. Is there any relationship between these variables?
#
#################################################################
plot(m20092011, w20092011,
xlab = "Mendeley",
ylab = "WoS",
main = "Mendeley saves and WoS citations in Aalto publications 2009-2011")
abline(lm(m ~ w))
mw <- data.frame(cbind(m20092011, w20092011))
cor(mw$m20092011, mw$w20092011, method = "spearman")
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment
You can’t perform that action at this time.