Skip to content

Instantly share code, notes, and snippets.

@long39ng
Created April 27, 2020 10:25
Show Gist options
  • Save long39ng/f27dd99e5a03c5edf7d6121b43902621 to your computer and use it in GitHub Desktop.
Save long39ng/f27dd99e5a03c5edf7d6121b43902621 to your computer and use it in GitHub Desktop.
Script to download all Springer books released for free during the 2020 COVID-19 pandemic
library(tidyverse)
library(httr)
library(fs)
library(here)
titles_url <- "https://resource-cms.springernature.com/springer-cms/rest/v1/content/17858272/data/v4"
dl_folder <- "downloads"
# Save Excel table to tmp folder
GET(titles_url, write_disk(tmp_table <- tempfile(fileext = ".xlsx")))
titles <- readxl::read_xlsx(tmp_table) %>%
janitor::clean_names()
titles <- titles %>%
mutate(
book_url = map_chr(open_url, ~ GET(.)$url),
pdf_url = book_url %>%
str_replace_all(c("/book/" = "/content/pdf/", "%2F" = "/")) %>%
str_c(".pdf")
) %>%
separate(author,
into = c("author1", "author2", "author3"), sep = ", ",
extra = "drop", fill = "right"
) %>%
mutate(
author1 = map_chr(str_split(author1, " "), tail, 1),
author2 = map_chr(str_split(author2, " "), tail, 1),
author3 = map_chr(str_split(author3, " "), tail, 1),
file_name = case_when(
# One author:
is.na(author2) ~ str_c(author1, copyright_year, book_title, sep = "_"),
# More than two authors:
!is.na(author3) ~ str_c(author1, "et_al", copyright_year, book_title, sep = "_"),
# Two authors:
TRUE ~ str_c(author1, author2, copyright_year, book_title, sep = "_")
) %>%
str_replace_all(c(":" = ".", "/" = "_")) %>%
str_c(".pdf")
)
titles %>%
distinct(english_package_name) %>%
map(~ dir_create(here(dl_folder, .)))
titles %$%
map(pdf_url, ~ GET(., write_disk(here(
dl_folder,
english_package_name[pdf_url == .],
file_name[pdf_url == .]
))))
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment