Skip to content

Instantly share code, notes, and snippets.

@deeplook
Created April 29, 2020 19:49
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save deeplook/a31ecd29523b965d7957ec66308471d4 to your computer and use it in GitHub Desktop.
Save deeplook/a31ecd29523b965d7957ec66308471d4 to your computer and use it in GitHub Desktop.
Fetch free Springer ebooks.
#!/usr/bin/env python3
"""
Download free Springer ebooks.
Examples:
python fetch_springer_ebooks python
python fetch_springer_ebooks --dest my/ebooks math
python fetch_springer_ebooks
python fetch_springer_ebooks -h
Dependencies:
- Firefox
- geckodriver
- requests
- selenium
- python-slugify
"""
import argparse
import os
import requests
from selenium import webdriver
from selenium.webdriver.firefox.options import Options
from slugify import slugify
def download(args):
dest = args.dest
query = args.query
options = Options()
options.headless = True
driver = webdriver.Firefox(options=options, executable_path='geckodriver')
url = "https://springer.com"
driver.get(url)
url = "https://link.springer.com/search?facet-content-type=%22Book%22&package=mat-covid19_textbooks"
driver.get(url)
input_el = driver.find_element_by_xpath('//*[@id="query"]')
input_el.clear()
input_el.send_keys(query)
button_el = driver.find_element_by_xpath('//*[@id="search"]').click()
page = 0
i = 0
while True:
print(f"page {page}")
for book in driver.find_elements_by_xpath('//h2/a[@class="title"]'):
link = book.get_attribute("href")
url = link.replace("springer.com/book", "springer.com/content/pdf") + ".pdf"
text = book.text
base, ext = os.path.splitext(os.path.basename(url))
fname = "%s-%s%s" % (base, slugify(text), ext)
path = os.path.join(dest, fname)
print(i, fname)
if not os.path.exists(path):
with open(path, "wb") as f:
f.write(requests.get(url).content)
i += 1
try:
next = driver.find_element_by_xpath('//a[@class="next"]')
next.click()
except:
break
page += 1
driver.close()
if __name__ == "__main__":
desc = "Download free Springer ebooks."
p = argparse.ArgumentParser(description=desc)
p.add_argument('query', metavar="TEXT", default="",
help='Query to search books, example: "python", default: "".')
p.add_argument('--dest', metavar='PATH', default=".",
help='Existing destination folder, default: "."')
args = p.parse_args()
download(args)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment