Skip to content

Instantly share code, notes, and snippets.

@khaledkee
Last active April 14, 2021 03:47
Show Gist options
  • Save khaledkee/7bd7183842d2a84daaeb9a40461b935e to your computer and use it in GitHub Desktop.
Save khaledkee/7bd7183842d2a84daaeb9a40461b935e to your computer and use it in GitHub Desktop.
Springer Free eBooks collector
from PyPDF2 import PdfFileReader
from bs4 import BeautifulSoup
from urllib.request import urlopen
import tqdm
import os
import re
with open('Springer Ebooks.pdf', 'rb') as PDFFile:
PDF = PdfFileReader(PDFFile)
pages = PDF.getNumPages()
# key = '/Annots'
# uri = '/URI'
# anchor = '/A'
# constants
BOOKS_NO = 408
URL_PREFIX = "http"
# dictionary of title -> url
urls = dict()
# Construct the dictionary
bookIndex = 1
for page in range(pages):
pageObject = PDF.getPage(page)
lines = pageObject.extractText().split('\n')
currentLine = 0
while currentLine < len(lines):
# find line with book index
while currentLine + 1 < len(lines) and (
not lines[currentLine].isnumeric() or int(lines[currentLine]) != bookIndex):
currentLine += 1
if currentLine + 1 >= len(lines):
break
title = lines[currentLine + 1]
# move to the url
while not lines[currentLine].startswith(URL_PREFIX):
currentLine += 1
url = lines[currentLine]
urls[title] = url
bookIndex += 1
BOOKS_DIR = "books"
PDF_EXT = ".pdf"
PDF_LINK_ATTRIB = {'title': 'Download this book in PDF format'}
ANCHOR_TAG = 'a'
ANCHOR_LINK_ATTRIB = 'href'
SPRINGER_PATH = 'https://link.springer.com'
# Download the books
for title in tqdm.tqdm(urls):
url = urls[title]
html_page = urlopen(url)
soup = BeautifulSoup(html_page, 'html.parser')
if not os.path.exists(BOOKS_DIR):
os.makedirs(BOOKS_DIR)
for link in soup.findAll(ANCHOR_TAG, attrs=PDF_LINK_ATTRIB):
pdfLink = SPRINGER_PATH + link.get(ANCHOR_LINK_ATTRIB)
# escape / to not make python think it's a directory
file_name = title.replace("/", ",").strip()
fullfilename = os.path.join(BOOKS_DIR, file_name + PDF_EXT)
if not os.path.exists(fullfilename):
file_resp = urlopen(pdfLink)
file_resp_bin = file_resp.read()
with open(fullfilename, "wb") as binfile:
binfile.write(file_resp_bin)
@khaledkee
Copy link
Author

This is a Python script to download a list of Spring free eBooks (At the time of creating it)!
Caution: the books come from different fields

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment