Created
April 16, 2019 20:02
-
-
Save Yuri-M-Dias/c9ca6a7c7de25447b7d06cf0b7470d74 to your computer and use it in GitHub Desktop.
Download all articles for a SPACEOPS conference. Handle with care, the website implements rate limits on the order of max 5 sessions/minute! (25/5 minutes)
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
library(rvest) | |
library(purrr) | |
library(here) | |
library(tictoc) | |
library(magrittr) | |
library(doParallel) | |
library(tidyverse) | |
# Downloads all articles from an AIAA page | |
domain = "https://arc.aiaa.org" | |
targetPage = paste0(domain, "/doi/book/10.2514/MSPOPS18") | |
targetHTML = read_html(targetPage) | |
# Necessary to trigger the js?! | |
targetHTML %>% | |
html_text() | |
dois = targetHTML %>% | |
html_nodes(".pdf") %>% | |
html_attr("href") %>% | |
as_data_frame() %>% | |
transmute( | |
link = paste0(domain, value), | |
doi = str_extract(value, "\\d+\\.\\d+\\/\\d+\\.\\d+\\-\\d+") | |
) | |
foreach(doilink = dois$link, doiname = dois$doi) %do% { | |
# | used as placeholder in place of / | |
escapedDoiName = doiname %>% | |
str_replace("\\/", "|") | |
download.file( | |
url = doilink, | |
destfile = paste0(escapedDoiName, ".pdf"), | |
method = "wget" | |
) | |
Sys.sleep(2) # Just so that IAC won't block me... | |
} | |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment