tts/altmapi.R

## altmapi.R
##################################################################################################################
#
# Altmetrics about Aalto University publications
# 2007-2012 with a DOI in WoS
#
# 2.11.2013 Tuija Sonkkila
#
# Altmetric data provided by Altmetric.com
# ImpactStory data provied by ImpactStory
# PLoS ALM data provide by PLoS
# Web of Science data provided by Thomson Reuters
#
# 1. fetch Aalto DOI's, publ years, and cites from WoS (aaltodata.csv)
# 2. query Altmetric.com with these DOIs, and merge subset of the result with the rest of data
# 3. query ImpactStory and PLoS ALM with the DOIs found in Altmetric.com, and merge as in 2.
# 4. save as CSV and use as import to a Shiny web app
#
# Here, first Altmetric.com because with some of the Aalto DOIs the ImpactStory API throws an error
# and stops (when ~18% is queried)
#
# About building an R Shiny web app from this data, see https://gist.github.com/tts/6990101
#
#################################################################################################################
library(rAltmetric)
library(rImpactStory)
library(rplos)
library(alm)
library(plyr)

# File with DOIs, year of publ and WoS cites
aaltoData <- read.table("aaltodata.csv",
                   header = TRUE,
                   sep = ",",
                   col.names = c("id", "year", "doi", "wos"),
                   stringsAsFactors = FALSE)

# Just DOIs
dois <- as.data.frame(aaltoData$doi, stringsAsFactors = FALSE)
names(dois) <- "doi"
# Preamble for the rAltmetric run
dois$doi <- paste("doi/", dois$doi, sep = "")

###########################################
#
#  Query Altmetric.com API
#
#  Store your key in .Rprofile
#
#  Install rAltmetric from GitHub,
#  because the CRAN version is older
#  (at least for the moment):
#
#  library("devtools")
#  install_github("rAltmetric", "ropensci")
#
############################################

raw_metrics <- llply(dois$doi, altmetrics, .progress = 'text')
metric_data <- ldply(raw_metrics, altmetric_data)

# Number of root list elements. They include metrics or are NULL
N <- length(raw_metrics)

# Initialize a data frame
adoi <- data.frame(doi = character(N),
                   stringsAsFactors = FALSE)

# From all items, take the second list element.
# If it is not NULL, store the DOI, otherwise store NA
for (i in 1:N) {
  if ( !is.null(raw_metrics[[i]][[2]]) )  {
    adoi$doi[i] <- raw_metrics[[i]][[2]]
  } else {
    adoi$doi[i] <- 'NA'
  }
}

names(adoi.full) <- c("doi")

# Merge adoi.full with metric_data. First, generate an ID
# to both data frames, and then merge by the ID
metric_data$id <- seq(from = 1, to = nrow(metric_data))
adoi.full$id <- seq(from = 1, to = nrow(adoi.full))
aalto.alt.all <- merge(metric_data, adoi.full, by = "id")

# Then, merge with aaltoData by DOI
aalto.all <- merge(aaltoData, aalto.alt.all, by.x = "doi", by.y = "doi.y")

# Choose only some relevant columns (score = Altmetric score)
aalto.all.m <- aalto.all[ , c("doi", "url", "details_url", "year",
                              "wos", "score",
                              "mendeley", "connotea", "citeulike", "readers_count",
                              "cited_by_gplus_count", "cited_by_fbwalls_count",
                              "cited_by_posts_count", "cited_by_tweeters_count",
                              "cited_by_accounts_count", "cited_by_feeds_count", "cited_by_videos_count",
                              "cited_by_msm_count")]

#######################################################################################
#
# Query ImpactStory API
#
# Store your key in .Rprofile
#
# Note that you first need to delete the metrics list object if you happen to have one.
# Otherwise you'll get "Error: attempt to apply non-function"
#
#######################################################################################


metrics <- llply(as.list(aalto.all.m$doi), metrics, .progress = 'text')

# https://gist.github.com/SChamberlain/6136591
parse_is <- function(input) {
  metrics <- input$metrics
  delicious_bookmarks <- try(metrics$`delicious:bookmarks`$values[["raw"]])
  mendeley_readers <- try(metrics$`mendeley:readers`$values[["raw"]])
  plosalm_html_views <- try(metrics$`plosalm:html_views`$values[["raw"]])
  plosalm_pdf_views <- try(metrics$`plosalm:pdf_views`$values[["raw"]])
  plosalm_scopus <- try(metrics$`plosalm:scopus`$values[["raw"]])
  # pmc_citations <- try(metrics$`pubmed:pmc_citations`$values[['raw']])
  pmc_citations_one <- try(metrics$`plosalm:pmc_full-text`$values[["raw"]])
  pmc_citations_two <- try(metrics$`plosalm:pmc_pdf`$values[["raw"]])
  if (is.null(pmc_citations_one) & is.null(pmc_citations_one)) {
    pmc_citations <- NULL
  } else {
    pmc_citations <- pmc_citations_one + pmc_citations_two
  }
  topsy_tweets <- try(metrics$`topsy:tweets`$values[["raw"]])
  results <- list(delicious_bookmarks = delicious_bookmarks, mendeley_readers = mendeley_readers,
                  plosalm_html_views = plosalm_html_views, plosalm_pdf_views = plosalm_pdf_views,
                  plosalm_scopus = plosalm_scopus, pmc_citations = pmc_citations, topsy_tweets = topsy_tweets)
  results[sapply(results, is.null)] <- NA
  #data.frame(results, date_modified = input$last_modified)
  data.frame(results)
}

is.df <- ldply(metrics, parse_is)

# Parse the DOI from the list
add_doi <- function(input) {
  thisdoi <- try(input$aliases[["doi"]])
  if (is.null(thisdoi)) {
    thisdoi <- NULL
  }
  alldois <- list(thisdoi = thisdoi)
  alldois[sapply(alldois, is.null)] <- NA
  data.frame(alldois)
}

is_doi.df <- ldply(metrics, add_doi)

# Merge is_doi.df and results by ID
is_doi.df$id <- seq(from = 1, to = nrow(is_doi.df))
is.df$id <- seq(from = 1, to = nrow(is.df))
is.m <- merge(is_doi.df, is.df, by = "id")

# take a few columns
is.m.sub <- is.m[ , c("id",
                      "delicious_bookmarks",
                      "plosalm_html_views",
                      "plosalm_pdf_views",
                      "plosalm_scopus",
                      "pmc_citations",
                      "topsy_tweets")]

# and merge with aalto.all.m. You cannot merge with DOI because it can be missing from is.m.sub
aalto.all.m$id <- seq(from = 1, to = nrow(aalto.all.m))
final.data <- merge(is.m.sub, aalto.all.m, by = "id")

# Choose some columns and save
aalto.altm.is <- final.data[ , c("doi", "url", "details_url", "year",
                                 "wos", "score",
                                 "mendeley", "connotea", "citeulike", "readers_count",
                                 "cited_by_gplus_count", "cited_by_fbwalls_count",
                                 "cited_by_posts_count", "cited_by_tweeters_count",
                                 "cited_by_accounts_count", "cited_by_feeds_count", "cited_by_videos_count",
                                 "cited_by_msm_count",
                                 "delicious_bookmarks",
                                 "plosalm_html_views",
                                 "plosalm_pdf_views",
                                 "plosalm_scopus",
                                 "pmc_citations",
                                 "topsy_tweets")]

##########################################
#
# Query PLoS ALM API
#
# From every DOI not found, the API returns an
# "Error : client error: (404) Not Found"
#
##########################################

parse_plos <- function(doi){
  plos_dat <- alm(doi = doi, total_details = TRUE)
  if (is.null(plos_dat)) {
    plos_dat <- NULL
  }
  plos_all <- list(plos_dat = plos_dat)
  plos_all[sapply(plos_all, is.null)] <- NA
  data.frame(plos_all)
}

plos_results <- ldply(aalto.all.m$doi, parse_plos)
# Choose some metrics
plos_df <- plos_results[ , c("plos_dat.crossref_total",
                             "plos_dat.nature_total",
                             "plos_dat.pubmed_total",
                             "plos_dat.f1000_total",
                             "plos_dat.figshare_total")]

names(plos_df) <- c("CrossRef", "Nature", "PubMed", "F1000", "Figshare")

# Merge with the rest of data
plos_df$id <- seq(from = 1, to = nrow(plos_df))
aalto.altm.is$id <- seq(from = 1, to = nrow(aalto.altm.is))
all.data <- merge(aalto.altm.is, plos_df, by = "id")
all.data.save <- all.data[ , c("doi", "url", "details_url", "year", "wos", "score", "mendeley", "connotea", "citeulike", "readers_count",
                               "cited_by_gplus_count", "cited_by_fbwalls_count", "cited_by_posts_count", "cited_by_tweeters_count",
                               "cited_by_accounts_count", "cited_by_feeds_count", "cited_by_videos_count", "cited_by_msm_count",
                               "delicious_bookmarks", "plosalm_html_views", "plosalm_pdf_views", "plosalm_scopus", "pmc_citations",
                               "topsy_tweets", "CrossRef", "Nature", "PubMed", "F1000", "Figshare")]

##########################################
#
# Save for Shiny use
#
##########################################

write.csv(all.data.save, file = "aalto.altm.csv")
	##################################################################################################################
	#
	# Altmetrics about Aalto University publications
	# 2007-2012 with a DOI in WoS
	#
	# 2.11.2013 Tuija Sonkkila
	#
	# Altmetric data provided by Altmetric.com
	# ImpactStory data provied by ImpactStory
	# PLoS ALM data provide by PLoS
	# Web of Science data provided by Thomson Reuters
	#
	# 1. fetch Aalto DOI's, publ years, and cites from WoS (aaltodata.csv)
	# 2. query Altmetric.com with these DOIs, and merge subset of the result with the rest of data
	# 3. query ImpactStory and PLoS ALM with the DOIs found in Altmetric.com, and merge as in 2.
	# 4. save as CSV and use as import to a Shiny web app
	#
	# Here, first Altmetric.com because with some of the Aalto DOIs the ImpactStory API throws an error
	# and stops (when ~18% is queried)
	#
	# About building an R Shiny web app from this data, see https://gist.github.com/tts/6990101
	#
	#################################################################################################################
	library(rAltmetric)
	library(rImpactStory)
	library(rplos)
	library(alm)
	library(plyr)

	# File with DOIs, year of publ and WoS cites
	aaltoData <- read.table("aaltodata.csv",
	header = TRUE,
	sep = ",",
	col.names = c("id", "year", "doi", "wos"),
	stringsAsFactors = FALSE)

	# Just DOIs
	dois <- as.data.frame(aaltoData$doi, stringsAsFactors = FALSE)
	names(dois) <- "doi"
	# Preamble for the rAltmetric run
	dois$doi <- paste("doi/", dois$doi, sep = "")

	###########################################
	#
	# Query Altmetric.com API
	#
	# Store your key in .Rprofile
	#
	# Install rAltmetric from GitHub,
	# because the CRAN version is older
	# (at least for the moment):
	#
	# library("devtools")
	# install_github("rAltmetric", "ropensci")
	#
	############################################

	raw_metrics <- llply(dois$doi, altmetrics, .progress = 'text')
	metric_data <- ldply(raw_metrics, altmetric_data)

	# Number of root list elements. They include metrics or are NULL
	N <- length(raw_metrics)

	# Initialize a data frame
	adoi <- data.frame(doi = character(N),
	stringsAsFactors = FALSE)

	# From all items, take the second list element.
	# If it is not NULL, store the DOI, otherwise store NA
	for (i in 1:N) {
	if ( !is.null(raw_metrics[[i]][[2]]) ) {
	adoi$doi[i] <- raw_metrics[[i]][[2]]
	} else {
	adoi$doi[i] <- 'NA'
	}
	}

	names(adoi.full) <- c("doi")

	# Merge adoi.full with metric_data. First, generate an ID
	# to both data frames, and then merge by the ID
	metric_data$id <- seq(from = 1, to = nrow(metric_data))
	adoi.full$id <- seq(from = 1, to = nrow(adoi.full))
	aalto.alt.all <- merge(metric_data, adoi.full, by = "id")

	# Then, merge with aaltoData by DOI
	aalto.all <- merge(aaltoData, aalto.alt.all, by.x = "doi", by.y = "doi.y")

	# Choose only some relevant columns (score = Altmetric score)
	aalto.all.m <- aalto.all[ , c("doi", "url", "details_url", "year",
	"wos", "score",
	"mendeley", "connotea", "citeulike", "readers_count",
	"cited_by_gplus_count", "cited_by_fbwalls_count",
	"cited_by_posts_count", "cited_by_tweeters_count",
	"cited_by_accounts_count", "cited_by_feeds_count", "cited_by_videos_count",
	"cited_by_msm_count")]

	#######################################################################################
	#
	# Query ImpactStory API
	#
	# Store your key in .Rprofile
	#
	# Note that you first need to delete the metrics list object if you happen to have one.
	# Otherwise you'll get "Error: attempt to apply non-function"
	#
	#######################################################################################


	metrics <- llply(as.list(aalto.all.m$doi), metrics, .progress = 'text')

	# https://gist.github.com/SChamberlain/6136591
	parse_is <- function(input) {
	metrics <- input$metrics
	delicious_bookmarks <- try(metrics$`delicious:bookmarks`$values[["raw"]])
	mendeley_readers <- try(metrics$`mendeley:readers`$values[["raw"]])
	plosalm_html_views <- try(metrics$`plosalm:html_views`$values[["raw"]])
	plosalm_pdf_views <- try(metrics$`plosalm:pdf_views`$values[["raw"]])
	plosalm_scopus <- try(metrics$`plosalm:scopus`$values[["raw"]])
	# pmc_citations <- try(metrics$`pubmed:pmc_citations`$values[['raw']])
	pmc_citations_one <- try(metrics$`plosalm:pmc_full-text`$values[["raw"]])
	pmc_citations_two <- try(metrics$`plosalm:pmc_pdf`$values[["raw"]])
	if (is.null(pmc_citations_one) & is.null(pmc_citations_one)) {
	pmc_citations <- NULL
	} else {
	pmc_citations <- pmc_citations_one + pmc_citations_two
	}
	topsy_tweets <- try(metrics$`topsy:tweets`$values[["raw"]])
	results <- list(delicious_bookmarks = delicious_bookmarks, mendeley_readers = mendeley_readers,
	plosalm_html_views = plosalm_html_views, plosalm_pdf_views = plosalm_pdf_views,
	plosalm_scopus = plosalm_scopus, pmc_citations = pmc_citations, topsy_tweets = topsy_tweets)
	results[sapply(results, is.null)] <- NA
	#data.frame(results, date_modified = input$last_modified)
	data.frame(results)
	}

	is.df <- ldply(metrics, parse_is)

	# Parse the DOI from the list
	add_doi <- function(input) {
	thisdoi <- try(input$aliases[["doi"]])
	if (is.null(thisdoi)) {
	thisdoi <- NULL
	}
	alldois <- list(thisdoi = thisdoi)
	alldois[sapply(alldois, is.null)] <- NA
	data.frame(alldois)
	}

	is_doi.df <- ldply(metrics, add_doi)

	# Merge is_doi.df and results by ID
	is_doi.df$id <- seq(from = 1, to = nrow(is_doi.df))
	is.df$id <- seq(from = 1, to = nrow(is.df))
	is.m <- merge(is_doi.df, is.df, by = "id")

	# take a few columns
	is.m.sub <- is.m[ , c("id",
	"delicious_bookmarks",
	"plosalm_html_views",
	"plosalm_pdf_views",
	"plosalm_scopus",
	"pmc_citations",
	"topsy_tweets")]

	# and merge with aalto.all.m. You cannot merge with DOI because it can be missing from is.m.sub
	aalto.all.m$id <- seq(from = 1, to = nrow(aalto.all.m))
	final.data <- merge(is.m.sub, aalto.all.m, by = "id")

	# Choose some columns and save
	aalto.altm.is <- final.data[ , c("doi", "url", "details_url", "year",
	"wos", "score",
	"mendeley", "connotea", "citeulike", "readers_count",
	"cited_by_gplus_count", "cited_by_fbwalls_count",
	"cited_by_posts_count", "cited_by_tweeters_count",
	"cited_by_accounts_count", "cited_by_feeds_count", "cited_by_videos_count",
	"cited_by_msm_count",
	"delicious_bookmarks",
	"plosalm_html_views",
	"plosalm_pdf_views",
	"plosalm_scopus",
	"pmc_citations",
	"topsy_tweets")]

	##########################################
	#
	# Query PLoS ALM API
	#
	# From every DOI not found, the API returns an
	# "Error : client error: (404) Not Found"
	#
	##########################################

	parse_plos <- function(doi){
	plos_dat <- alm(doi = doi, total_details = TRUE)
	if (is.null(plos_dat)) {
	plos_dat <- NULL
	}
	plos_all <- list(plos_dat = plos_dat)
	plos_all[sapply(plos_all, is.null)] <- NA
	data.frame(plos_all)
	}

	plos_results <- ldply(aalto.all.m$doi, parse_plos)
	# Choose some metrics
	plos_df <- plos_results[ , c("plos_dat.crossref_total",
	"plos_dat.nature_total",
	"plos_dat.pubmed_total",
	"plos_dat.f1000_total",
	"plos_dat.figshare_total")]

	names(plos_df) <- c("CrossRef", "Nature", "PubMed", "F1000", "Figshare")

	# Merge with the rest of data
	plos_df$id <- seq(from = 1, to = nrow(plos_df))
	aalto.altm.is$id <- seq(from = 1, to = nrow(aalto.altm.is))
	all.data <- merge(aalto.altm.is, plos_df, by = "id")
	all.data.save <- all.data[ , c("doi", "url", "details_url", "year", "wos", "score", "mendeley", "connotea", "citeulike", "readers_count",
	"cited_by_gplus_count", "cited_by_fbwalls_count", "cited_by_posts_count", "cited_by_tweeters_count",
	"cited_by_accounts_count", "cited_by_feeds_count", "cited_by_videos_count", "cited_by_msm_count",
	"delicious_bookmarks", "plosalm_html_views", "plosalm_pdf_views", "plosalm_scopus", "pmc_citations",
	"topsy_tweets", "CrossRef", "Nature", "PubMed", "F1000", "Figshare")]

	##########################################
	#
	# Save for Shiny use
	#
	##########################################

	write.csv(all.data.save, file = "aalto.altm.csv")