Skip to content

Instantly share code, notes, and snippets.

@tts
Created July 8, 2016 09:59
Show Gist options
  • Star 1 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save tts/2e1fdacc341e663f1990bf0837bcde1a to your computer and use it in GitHub Desktop.
Save tts/2e1fdacc341e663f1990bf0837bcde1a to your computer and use it in GitHub Desktop.
Collect works and bio via ORCID Public API
# Query ACRIS on ORCID IDs. Based on these, fetch works and bio via ORCID Public API
#
# SQL query in Pure and save in CSV: SELECT firstname, lastname, orcid FROM PERSON WHERE orcid IS NOT NULL
# Change character set: iconv -f ISO-8859-1 -t UTF-8 orcidinpure.csv > orcidinpure_utf8.csv
#
# Tuija Sonkkila 7.7.2016
library(rorcid)
orcidinpure <- read.csv2("orcidinpure_utf8.csv", header = F, stringsAsFactors = F)
names(orcidinpure) <- c("first", "last", "orcid")
# Testing my own data
#
# out <- orcid_id(orcid = "0000-0002-6892-9305")
# as.POSIXct(out$`0000-0002-6892-9305`$`orcid-history`$`submission-date`$value/1000, origin="1970-01-01")
# "2013-08-26 16:47:05 EEST" > OK
#
# works <- works(orcid_id("0000-0002-6892-9305"))
# myworks <- works$data # to data frame
# All ORCID IDs as a vector
idsinpure <- orcidinpure$orcid
# Prepare data frame
worksinorcid <- data.frame(orcid= character(0), title= character(0), type = character(0), year= character(0), created= numeric(0), stringsAsFactors = F)
# Collect works
for (i in idsinpure) {
this <- works(orcid_id(i))
if ( this$data != 'None') {
for (j in 1:nrow(this$data)) {
thisworks <- data.frame(orcid= character(1), title= character(1), type = character(1), year= character(1), created= numeric(1), stringsAsFactors = F)
thisworks$orcid <- attributes(this)$orcid
thisworks$title <-this$data[j, "work-title.title.value"]
thisworks$type <- this$data[j, "work-type"]
thisworks$year <- this$data[j, "publication-date.year.value"]
thisworks$created <- this$data[j, "created-date.value"]
worksinorcid <- rbind(worksinorcid, thisworks)
}
}
}
# UNIX timestamp to date
worksinorcid$created <- as.POSIXct(worksinorcid$created/1000, origin="1970-01-01")
# Write to file
write.csv(worksinorcid, file ="worksinorcid.csv", row.names = F)
# Collect bio
bioinpure <- orcid_id(orcid = idsinpure, profile="bio")
bioinorcid <- data.frame(orcid= character(0), biocreated=character(0), first=character(0), last=character(0), stringsAsFactors = F)
for (i in 1:length(bioinpure)) {
this <- names(bioinpure)[i]
thisbio <- data.frame(orcid=character(1), biocreated=character(1), first=character(1), last=character(1), stringsAsFactors = F)
thisbio$orcid <- this
thisbio$biocreated <- bioinpure[[this]]$`orcid-history`$`submission-date`$value
# Parsing info from ORCID for testing purposes
thisbio$first <- bioinpure[[this]]$`orcid-bio`$`personal-details`$`given-names`$value
thisbio$last <- ifelse(!is.null(bioinpure[[this]]$`orcid-bio`$`personal-details`$`family-name`),
bioinpure[[this]]$`orcid-bio`$`personal-details`$`family-name`$value,
"N/A") # one was w/o familyname
bioinorcid <- rbind(bioinorcid, thisbio)
}
bioinorcid$biocreated <- as.POSIXct(bioinorcid$biocreated/1000, origin="1970-01-01")
# Join with works data
library(dplyr)
bioandworksinorcid <- left_join(bioinorcid, worksinorcid)
# Write to file
write.csv(bioandworksinorcid, file = "bioandworksinorcid.csv", row.names = F)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment