Created
May 31, 2020 15:28
-
-
Save erincar/428dd5b9f48c9b52ad0c8afe58587d54 to your computer and use it in GitHub Desktop.
[2020-04-29] Springer free e books autodownload
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import argparse | |
import os | |
import requests | |
import urllib.request | |
from lxml import html | |
from pathlib import Path | |
parser = argparse.ArgumentParser() | |
parser.add_argument( | |
"--start", | |
type=int, | |
default=1, | |
nargs="?", | |
help="starting index for the list", | |
) | |
args = parser.parse_args() | |
page_url = 'https://www.thebiomics.com/notes/springer-free-e-books-list.html' | |
headers = { | |
'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10.15; rv:76.0) Gecko/20100101 Firefox/76.0', | |
} | |
page_response = requests.get(page_url, headers=headers) | |
page_tree = html.fromstring(page_response.content) | |
base_springer = "https://link.springer.com" | |
download_path = os.path.join(str(Path.home()), "Downloads", "Springer") | |
urls = page_tree.xpath( | |
'//table[@class="table data-table"]/tbody/tr/td[position()=1]/a/@href' | |
)[args.start-1:] | |
amount, s = len(urls), 0 | |
for i, url in enumerate(urls): | |
try: | |
response = requests.get(url, headers=headers) | |
tree = html.fromstring(response.content) | |
pdf = tree.xpath('//a[@data-track-action="Book download - pdf"]/@href')[0].replace("%2F", "/") | |
download_url = base_springer + pdf | |
authors = tree.xpath('//div[@class="persons__list"]/ul/li/span/text()') | |
bookname = tree.xpath('//div[@data-test="book-title"]/h1/text()')[0] | |
filename = ", ".join(authors) + " - " + bookname + ".pdf" | |
print(download_url, "->", filename) | |
urllib.request.urlretrieve( | |
download_url, | |
os.path.join(download_path, filename.replace("\xa0", " ")), | |
) | |
print(f"Downloaded {i+1-s}/{amount}, skipped {s}") | |
except Exception: | |
print("Error during download, skipping") | |
s += 1 |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment