Last active
December 27, 2015 04:09
-
-
Save tts/7264601 to your computer and use it in GitHub Desktop.
Gather altmetrics about Aalto University publications from Altmetric, ImpactStory and PLoS ALM
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
################################################################################################################## | |
# | |
# Altmetrics about Aalto University publications | |
# 2007-2012 with a DOI in WoS | |
# | |
# 2.11.2013 Tuija Sonkkila | |
# | |
# Altmetric data provided by Altmetric.com | |
# ImpactStory data provied by ImpactStory | |
# PLoS ALM data provide by PLoS | |
# Web of Science data provided by Thomson Reuters | |
# | |
# 1. fetch Aalto DOI's, publ years, and cites from WoS (aaltodata.csv) | |
# 2. query Altmetric.com with these DOIs, and merge subset of the result with the rest of data | |
# 3. query ImpactStory and PLoS ALM with the DOIs found in Altmetric.com, and merge as in 2. | |
# 4. save as CSV and use as import to a Shiny web app | |
# | |
# Here, first Altmetric.com because with some of the Aalto DOIs the ImpactStory API throws an error | |
# and stops (when ~18% is queried) | |
# | |
# About building an R Shiny web app from this data, see https://gist.github.com/tts/6990101 | |
# | |
################################################################################################################# | |
library(rAltmetric) | |
library(rImpactStory) | |
library(rplos) | |
library(alm) | |
library(plyr) | |
# File with DOIs, year of publ and WoS cites | |
aaltoData <- read.table("aaltodata.csv", | |
header = TRUE, | |
sep = ",", | |
col.names = c("id", "year", "doi", "wos"), | |
stringsAsFactors = FALSE) | |
# Just DOIs | |
dois <- as.data.frame(aaltoData$doi, stringsAsFactors = FALSE) | |
names(dois) <- "doi" | |
# Preamble for the rAltmetric run | |
dois$doi <- paste("doi/", dois$doi, sep = "") | |
########################################### | |
# | |
# Query Altmetric.com API | |
# | |
# Store your key in .Rprofile | |
# | |
# Install rAltmetric from GitHub, | |
# because the CRAN version is older | |
# (at least for the moment): | |
# | |
# library("devtools") | |
# install_github("rAltmetric", "ropensci") | |
# | |
############################################ | |
raw_metrics <- llply(dois$doi, altmetrics, .progress = 'text') | |
metric_data <- ldply(raw_metrics, altmetric_data) | |
# Number of root list elements. They include metrics or are NULL | |
N <- length(raw_metrics) | |
# Initialize a data frame | |
adoi <- data.frame(doi = character(N), | |
stringsAsFactors = FALSE) | |
# From all items, take the second list element. | |
# If it is not NULL, store the DOI, otherwise store NA | |
for (i in 1:N) { | |
if ( !is.null(raw_metrics[[i]][[2]]) ) { | |
adoi$doi[i] <- raw_metrics[[i]][[2]] | |
} else { | |
adoi$doi[i] <- 'NA' | |
} | |
} | |
names(adoi.full) <- c("doi") | |
# Merge adoi.full with metric_data. First, generate an ID | |
# to both data frames, and then merge by the ID | |
metric_data$id <- seq(from = 1, to = nrow(metric_data)) | |
adoi.full$id <- seq(from = 1, to = nrow(adoi.full)) | |
aalto.alt.all <- merge(metric_data, adoi.full, by = "id") | |
# Then, merge with aaltoData by DOI | |
aalto.all <- merge(aaltoData, aalto.alt.all, by.x = "doi", by.y = "doi.y") | |
# Choose only some relevant columns (score = Altmetric score) | |
aalto.all.m <- aalto.all[ , c("doi", "url", "details_url", "year", | |
"wos", "score", | |
"mendeley", "connotea", "citeulike", "readers_count", | |
"cited_by_gplus_count", "cited_by_fbwalls_count", | |
"cited_by_posts_count", "cited_by_tweeters_count", | |
"cited_by_accounts_count", "cited_by_feeds_count", "cited_by_videos_count", | |
"cited_by_msm_count")] | |
####################################################################################### | |
# | |
# Query ImpactStory API | |
# | |
# Store your key in .Rprofile | |
# | |
# Note that you first need to delete the metrics list object if you happen to have one. | |
# Otherwise you'll get "Error: attempt to apply non-function" | |
# | |
####################################################################################### | |
metrics <- llply(as.list(aalto.all.m$doi), metrics, .progress = 'text') | |
# https://gist.github.com/SChamberlain/6136591 | |
parse_is <- function(input) { | |
metrics <- input$metrics | |
delicious_bookmarks <- try(metrics$`delicious:bookmarks`$values[["raw"]]) | |
mendeley_readers <- try(metrics$`mendeley:readers`$values[["raw"]]) | |
plosalm_html_views <- try(metrics$`plosalm:html_views`$values[["raw"]]) | |
plosalm_pdf_views <- try(metrics$`plosalm:pdf_views`$values[["raw"]]) | |
plosalm_scopus <- try(metrics$`plosalm:scopus`$values[["raw"]]) | |
# pmc_citations <- try(metrics$`pubmed:pmc_citations`$values[['raw']]) | |
pmc_citations_one <- try(metrics$`plosalm:pmc_full-text`$values[["raw"]]) | |
pmc_citations_two <- try(metrics$`plosalm:pmc_pdf`$values[["raw"]]) | |
if (is.null(pmc_citations_one) & is.null(pmc_citations_one)) { | |
pmc_citations <- NULL | |
} else { | |
pmc_citations <- pmc_citations_one + pmc_citations_two | |
} | |
topsy_tweets <- try(metrics$`topsy:tweets`$values[["raw"]]) | |
results <- list(delicious_bookmarks = delicious_bookmarks, mendeley_readers = mendeley_readers, | |
plosalm_html_views = plosalm_html_views, plosalm_pdf_views = plosalm_pdf_views, | |
plosalm_scopus = plosalm_scopus, pmc_citations = pmc_citations, topsy_tweets = topsy_tweets) | |
results[sapply(results, is.null)] <- NA | |
#data.frame(results, date_modified = input$last_modified) | |
data.frame(results) | |
} | |
is.df <- ldply(metrics, parse_is) | |
# Parse the DOI from the list | |
add_doi <- function(input) { | |
thisdoi <- try(input$aliases[["doi"]]) | |
if (is.null(thisdoi)) { | |
thisdoi <- NULL | |
} | |
alldois <- list(thisdoi = thisdoi) | |
alldois[sapply(alldois, is.null)] <- NA | |
data.frame(alldois) | |
} | |
is_doi.df <- ldply(metrics, add_doi) | |
# Merge is_doi.df and results by ID | |
is_doi.df$id <- seq(from = 1, to = nrow(is_doi.df)) | |
is.df$id <- seq(from = 1, to = nrow(is.df)) | |
is.m <- merge(is_doi.df, is.df, by = "id") | |
# take a few columns | |
is.m.sub <- is.m[ , c("id", | |
"delicious_bookmarks", | |
"plosalm_html_views", | |
"plosalm_pdf_views", | |
"plosalm_scopus", | |
"pmc_citations", | |
"topsy_tweets")] | |
# and merge with aalto.all.m. You cannot merge with DOI because it can be missing from is.m.sub | |
aalto.all.m$id <- seq(from = 1, to = nrow(aalto.all.m)) | |
final.data <- merge(is.m.sub, aalto.all.m, by = "id") | |
# Choose some columns and save | |
aalto.altm.is <- final.data[ , c("doi", "url", "details_url", "year", | |
"wos", "score", | |
"mendeley", "connotea", "citeulike", "readers_count", | |
"cited_by_gplus_count", "cited_by_fbwalls_count", | |
"cited_by_posts_count", "cited_by_tweeters_count", | |
"cited_by_accounts_count", "cited_by_feeds_count", "cited_by_videos_count", | |
"cited_by_msm_count", | |
"delicious_bookmarks", | |
"plosalm_html_views", | |
"plosalm_pdf_views", | |
"plosalm_scopus", | |
"pmc_citations", | |
"topsy_tweets")] | |
########################################## | |
# | |
# Query PLoS ALM API | |
# | |
# From every DOI not found, the API returns an | |
# "Error : client error: (404) Not Found" | |
# | |
########################################## | |
parse_plos <- function(doi){ | |
plos_dat <- alm(doi = doi, total_details = TRUE) | |
if (is.null(plos_dat)) { | |
plos_dat <- NULL | |
} | |
plos_all <- list(plos_dat = plos_dat) | |
plos_all[sapply(plos_all, is.null)] <- NA | |
data.frame(plos_all) | |
} | |
plos_results <- ldply(aalto.all.m$doi, parse_plos) | |
# Choose some metrics | |
plos_df <- plos_results[ , c("plos_dat.crossref_total", | |
"plos_dat.nature_total", | |
"plos_dat.pubmed_total", | |
"plos_dat.f1000_total", | |
"plos_dat.figshare_total")] | |
names(plos_df) <- c("CrossRef", "Nature", "PubMed", "F1000", "Figshare") | |
# Merge with the rest of data | |
plos_df$id <- seq(from = 1, to = nrow(plos_df)) | |
aalto.altm.is$id <- seq(from = 1, to = nrow(aalto.altm.is)) | |
all.data <- merge(aalto.altm.is, plos_df, by = "id") | |
all.data.save <- all.data[ , c("doi", "url", "details_url", "year", "wos", "score", "mendeley", "connotea", "citeulike", "readers_count", | |
"cited_by_gplus_count", "cited_by_fbwalls_count", "cited_by_posts_count", "cited_by_tweeters_count", | |
"cited_by_accounts_count", "cited_by_feeds_count", "cited_by_videos_count", "cited_by_msm_count", | |
"delicious_bookmarks", "plosalm_html_views", "plosalm_pdf_views", "plosalm_scopus", "pmc_citations", | |
"topsy_tweets", "CrossRef", "Nature", "PubMed", "F1000", "Figshare")] | |
########################################## | |
# | |
# Save for Shiny use | |
# | |
########################################## | |
write.csv(all.data.save, file = "aalto.altm.csv") |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment