Skip to content

Instantly share code, notes, and snippets.

@hailg
Created April 26, 2020 15:54
Show Gist options
  • Save hailg/cc1dd4e7296f900a5405d28a783522a5 to your computer and use it in GitHub Desktop.
Save hailg/cc1dd4e7296f900a5405d28a783522a5 to your computer and use it in GitHub Desktop.
Download Springer book
def download_book(driver, url):
# Go to the book page
driver.get(url)
# Extract the URL with h1 tag
title_h1 = _find_element(driver, By.XPATH, "//div[@class='page-title']/h1", timeout=30)
book_title = title_h1.text
# Normalize the file name
downloading_file_name = re.sub('[^a-zA-Z0-9.]', '_', book_title)
# Extract PDF link and download it
pdf_link = _find_element(driver, By.LINK_TEXT, 'Download book PDF', timeout=10).get_attribute('href')
_download_file(pdf_link, os.path.join(DOWNLOAD_FOLDER, '%s.pdf' % downloading_file_name))
# Extract the EPUB link and download it
epub_btn = _find_element(driver, By.LINK_TEXT, 'Download book EPUB', timeout=1)
if epub_btn:
epub_link = epub_btn.get_attribute('href')
_download_file(epub_link, os.path.join(DOWNLOAD_FOLDER, '%s.epub' % downloading_file_name))
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment