tcostam/download_books.py

## download_books.py
"""
There are books available for free download from springer editor
According to the following blog, it put together a spreadsheet with
all available books:
https://marcusnunes.me/posts/livros-gratuitos-da-springer/

This script requires python3, pandas, xlrd and wget

Steps:
1. Put spreadsheet on the same folder as this script
2. Populate var file_name with the spreadsheet name
3. Populate var extension with pdf or epub
"""

import pandas as pd
import requests
import re

import wget

file_name = "FreeEnglishTextbooks.xlsx"

dfs = pd.read_excel(file_name, sheet_name="eBook list")

extension = "epub"

errors = []

for index, row in dfs.iterrows():
    item = dict(row)
    title = item["Book Title"]
    author = item["Author"]
    url = item["OpenURL"]

    print('Downloding "{} - {}" from {}'.format(title, author, url))

    book_page = requests.get(url)

    base_url = "http://link.springer.com"
    file_urls = re.findall(
        '(?<=<a\shref=")[A-z0-9_@\-^!#$%&+={}.\/\\\[\]]+\.' + extension + "+",
        str(book_page._content),
    )

    if file_urls:
        wget.download(
            base_url + file_urls[0], "{} - {}.{}".format(title, author, extension)
        )
    else:
        error_msg = "Can't find a {} file for book {} - {}: {}".format(
            extension, title, author, url
        )
        errors.append(error_msg)
        print(error_msg)

for error_msg in errors:
    print("The following errors occurred:")
    print(error_msg)
	"""
	There are books available for free download from springer editor
	According to the following blog, it put together a spreadsheet with
	all available books:
	https://marcusnunes.me/posts/livros-gratuitos-da-springer/

	This script requires python3, pandas, xlrd and wget

	Steps:
	1. Put spreadsheet on the same folder as this script
	2. Populate var file_name with the spreadsheet name
	3. Populate var extension with pdf or epub
	"""

	import pandas as pd
	import requests
	import re

	import wget

	file_name = "FreeEnglishTextbooks.xlsx"

	dfs = pd.read_excel(file_name, sheet_name="eBook list")

	extension = "epub"

	errors = []

	for index, row in dfs.iterrows():
	item = dict(row)
	title = item["Book Title"]
	author = item["Author"]
	url = item["OpenURL"]

	print('Downloding "{} - {}" from {}'.format(title, author, url))

	book_page = requests.get(url)

	base_url = "http://link.springer.com"
	file_urls = re.findall(
	'(?<=<a\shref=")[A-z0-9_@\-^!#$%&+={}.\/\\\[\]]+\.' + extension + "+",
	str(book_page._content),
	)

	if file_urls:
	wget.download(
	base_url + file_urls[0], "{} - {}.{}".format(title, author, extension)
	)
	else:
	error_msg = "Can't find a {} file for book {} - {}: {}".format(
	extension, title, author, url
	)
	errors.append(error_msg)
	print(error_msg)

	for error_msg in errors:
	print("The following errors occurred:")
	print(error_msg)