cpsievert/elife.R

## elife.R
library(XML)
library(RCurl)
library(stringr)
library(elife)

# Obtain all the dois! Well, at least every one on GitHub.
# Should/could this be an option in searchelife? It doesn't take that long...
con <- getURL("https://github.com/elifesciences/elife-articles")
doc <- htmlParse(con, asText = TRUE)
nodes <- getNodeSet(doc, path="//a[@class='js-directory-link']")
files <- sapply(nodes, xmlValue)
files <- files[grep("elife[0-9]+.xml", files)]
nums <- str_extract(files, "[0-9]+")
dois <- paste0("10.7554/eLife.", nums)

# Passing all these dois at once to elife_doi returns error...
# For now, just query in chunks
start <- seq(1, length(dois), by = 100)
end <- start - 1
end <- c(end[-1], length(dois))
idx <- cbind(start, end)
abs <- NULL
for (i in seq_len(dim(idx)[1])) {
  ab <- elife_doi(dois[idx[i,1]:idx[i,2]], ret = "abstract")
  abs <- c(abs, ab)
}
	library(XML)
	library(RCurl)
	library(stringr)
	library(elife)

	# Obtain all the dois! Well, at least every one on GitHub.
	# Should/could this be an option in searchelife? It doesn't take that long...
	con <- getURL("https://github.com/elifesciences/elife-articles")
	doc <- htmlParse(con, asText = TRUE)
	nodes <- getNodeSet(doc, path="//a[@class='js-directory-link']")
	files <- sapply(nodes, xmlValue)
	files <- files[grep("elife[0-9]+.xml", files)]
	nums <- str_extract(files, "[0-9]+")
	dois <- paste0("10.7554/eLife.", nums)

	# Passing all these dois at once to elife_doi returns error...
	# For now, just query in chunks
	start <- seq(1, length(dois), by = 100)
	end <- start - 1
	end <- c(end[-1], length(dois))
	idx <- cbind(start, end)
	abs <- NULL
	for (i in seq_len(dim(idx)[1])) {
	ab <- elife_doi(dois[idx[i,1]:idx[i,2]], ret = "abstract")
	abs <- c(abs, ab)
	}