Skip to content

Instantly share code, notes, and snippets.

Last active June 17, 2019 09:21
Show Gist options
  • Save patvdleer/3da45e7e24cc402fedec4f004f2975aa to your computer and use it in GitHub Desktop.
Save patvdleer/3da45e7e24cc402fedec4f004f2975aa to your computer and use it in GitHub Desktop.
packtpub daily claimer/downloader
import os
import re
import shutil
import sys
import requests
import argparse
except ImportError:
sys.stderr.write("The Python module argparse is required")
import bs4 as bs
except ImportError:
sys.stderr.write("The Python module BeautifulSoup 4 (bs4) is required")
class PacktSubException(BaseException):
"""Abstract Exception"""
class ArgumentException(PacktSubException):
"""Crappy argument"""
class LoginException(PacktSubException):
"""Failed to login"""
class ClaimException(PacktSubException):
"""Failed to claim latest book"""
class LocalCreateException(PacktSubException):
"""Can't create something locally"""
class Book(object):
authors = None
title = None
nid = None
def __init__(self, entry):
assert isinstance(entry, bs.Tag)
info = entry.find("div", {'class': "product-info"})
self.nid = entry.attrs['nid']
assert isinstance(info, bs.Tag)
authors = info.find("div", {'class': "author"}).getText()
if "," in authors:
authors = authors.strip().split(",")
authors = [authors.strip()]
self.authors = authors
self.title = info.find("div", {'class': "title"}).getText().strip().rstrip(" [eBook]")
def get_url(self, type):
return "" % (self.nid, type, )
def to_filename(self, type):
return "%s - %s.%s" % (", ".join(self.authors), self.title, type, )
class PacktSub(object):
session = None
claim_link = None
path = "."
headers = {
'user-agent': 'cfreebook/0.0.1'
types = ['epub', 'mobi', 'pdf']
type = "all"
def __init__(self, username=None, password=None):
self.session = requests.session()
if username is not None and password is not None:
self.login(username, password)
self.path = os.path.abspath(self.path)
def set_path(self, path):
path = os.path.abspath(path)
if not os.path.exists(path):
except OSError:
raise LocalCreateException("Can't create directory: %s" % path)
if not os.path.isdir(path):
raise ArgumentException("Path is not a directory: %s" % path)
if not os.access(path, os.W_OK):
raise LocalCreateException("Can't create files in: %s" % path)
self.path = path
def set_type(self, type):
if type not in self.types + ['all']:
raise ArgumentException("Unknown type: %s" % type)
self.type = type
def _download(self, book, type=None, path=None):
assert isinstance(book, Book)
if path is None:
path = self.path
if type is None:
type = self.type
if type is "all":
for _type in self.types:
response = self.session.get(book.get_url(_type), headers=self.headers, stream=True)
with open(os.path.join(path, book.to_filename(_type)), 'wb') as out_file:
shutil.copyfileobj(response.raw, out_file)
response = self.session.get(book.get_url(type), headers=self.headers, stream=True)
with open(os.path.join(path, book.to_filename(type)), 'wb') as out_file:
shutil.copyfileobj(response.raw, out_file)
def get_product_account_list(self):
books = self.session.get("", headers=self.headers)
soup = bs.BeautifulSoup(books.text, 'html.parser')
container = soup.find(id="product-account-list")
books = []
assert isinstance(container, bs.Tag)
for entry in container.find_all('div', {'class': "product-line"}):
if entry.find("div", {'class': "product-info"}):
return books
def download_latest(self, type=None, path=None):
books = self.get_product_account_list()
self._download(books[0], type, path)
def download_all(self, type=None, path=None):
books = self.get_product_account_list()
for book in books:
self._download(book, type, path)
def claim(self):
claim_book = self.session.get("" + self.claim_link, headers=self.headers)
if claim_book.url != "":
raise ClaimException("Failed to claim book")
def login(self, username, password):
home = self.session.get("", headers=self.headers)
regxp ="/freelearning-claim/\d*/\d*", home.text)
self.claim_link =
regxp ="form_build_id.\s*id=.([^\"]+)\"", home.text)
form_build_id =
auth ="", {
"email": username,
"password": password,
"op": "Login",
"form_build_id": form_build_id,
"form_id": "packt_user_login_form"
}, headers=self.headers)
if auth.url != "":
raise LoginException("Authentication failed")
if __name__ == "__main__":
parser = argparse.ArgumentParser(description='Packtpub downloader', prog="PacktSub", usage='%(prog)s [options]')
parser.add_argument("-c", "--claim", help='Claim today\'s book', action="store_true")
parser.add_argument("-d", "--download", help='Download today\'s book', action="store_true")
parser.add_argument("-a", "--auto", help='auto modes', action="store_true")
parser.add_argument("--all", help='Download all the ebooks', action="store_true")
parser.add_argument("-o", "--output", help='Output directory', default=".")
parser.add_argument("-t", "--type", help='Type of file', default="all", choices=PacktSub.types + ['all'])
parser.add_argument("-u", "--username", help='Username (email)', required=True)
parser.add_argument("-p", "--password", help='password', required=True)
args = parser.parse_args()
username, password = args.username, args.password
packtsub = PacktSub(username, password)
if args.claim or
if ( or and not args.all:
if args.all:
except PacktSubException as e:
sys.stderr.write("%s: %s" % (e.__class__.__name__, str(e), ))
Copy link

nneul commented Dec 19, 2018

No longer works due to changes in site - quick hack I put together here you can look at to see new method against their new REST endpoints which is much simpler than site parsing.

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment