gzt/downloadpdfs.R Secret

## downloadpdfs.R
####### This presumes you have a .bib file which has, for each
####### bibliography entry, a legal file name as a key and a DOI
####### and that you have a directory you want to save the files in.
####### If your bibtex entry is like
#######   @Article{Bob_1980, ..... }
####### then the file will be saved as Bob_1980.pdf.
####### You must have a DOI for each entry.

bibfile = "/path/to/bib/file.bib"
savedir = "/path/to/save/dir/"

tags = gsub("^.*\\{(.*),", "\\1", grep("@", readLines(bibfile), value = TRUE))

doilist = gsub("^.*\\{(.*)\\},", "\\1", grep("doi.*=", readLines(bibfile), value = TRUE))

filelist = paste0(savedir, tags, ".pdf")
urllist = paste0("https://sci-hub.tw/", doilist)

pdfpattern = "https://.*pdf\\?download=true"

if (length(doilist) != length(tags)) stop("Error: missing DOIs")
for (i in 1:length(doilist)) {
	if (!file.exists(filelist[i])) {
		scihubhtml = readLines(urllist[i])
		pdfurl = gsub("^.*(https.*true).*", "\\1", grep(pdfpattern, scihubhtml, value = TRUE))
    ####### skip file if no pdf found
		if(length(pdfurl) > 0) download.file(pdfurl, destfile = filelist[i], method = "auto")
	}
}
	####### This presumes you have a .bib file which has, for each
	####### bibliography entry, a legal file name as a key and a DOI
	####### and that you have a directory you want to save the files in.
	####### If your bibtex entry is like
	####### @Article{Bob_1980, ..... }
	####### then the file will be saved as Bob_1980.pdf.
	####### You must have a DOI for each entry.

	bibfile = "/path/to/bib/file.bib"
	savedir = "/path/to/save/dir/"

	tags = gsub("^.\\{(.),", "\\1", grep("@", readLines(bibfile), value = TRUE))

	doilist = gsub("^.\\{(.)\\},", "\\1", grep("doi.*=", readLines(bibfile), value = TRUE))

	filelist = paste0(savedir, tags, ".pdf")
	urllist = paste0("https://sci-hub.tw/", doilist)

	pdfpattern = "https://.*pdf\\?download=true"

	if (length(doilist) != length(tags)) stop("Error: missing DOIs")
	for (i in 1:length(doilist)) {
	if (!file.exists(filelist[i])) {
	scihubhtml = readLines(urllist[i])
	pdfurl = gsub("^.(https.true).*", "\\1", grep(pdfpattern, scihubhtml, value = TRUE))
	####### skip file if no pdf found
	if(length(pdfurl) > 0) download.file(pdfurl, destfile = filelist[i], method = "auto")
	}
	}