Skip to content

Instantly share code, notes, and snippets.

@patvdleer
Last active June 17, 2019 09:21
Show Gist options
  • Star 2 You must be signed in to star a gist
  • Fork 1 You must be signed in to fork a gist
  • Save patvdleer/3da45e7e24cc402fedec4f004f2975aa to your computer and use it in GitHub Desktop.
Save patvdleer/3da45e7e24cc402fedec4f004f2975aa to your computer and use it in GitHub Desktop.
packtpub daily claimer/downloader
import os
import re
import shutil
import sys
import requests
try:
import argparse
except ImportError:
sys.stderr.write("The Python module argparse is required")
sys.exit(1)
try:
import bs4 as bs
except ImportError:
sys.stderr.write("The Python module BeautifulSoup 4 (bs4) is required")
sys.exit(1)
class PacktSubException(BaseException):
"""Abstract Exception"""
class ArgumentException(PacktSubException):
"""Crappy argument"""
class LoginException(PacktSubException):
"""Failed to login"""
class ClaimException(PacktSubException):
"""Failed to claim latest book"""
class LocalCreateException(PacktSubException):
"""Can't create something locally"""
class Book(object):
authors = None
title = None
nid = None
def __init__(self, entry):
assert isinstance(entry, bs.Tag)
info = entry.find("div", {'class': "product-info"})
self.nid = entry.attrs['nid']
assert isinstance(info, bs.Tag)
authors = info.find("div", {'class': "author"}).getText()
if "," in authors:
authors = authors.strip().split(",")
else:
authors = [authors.strip()]
self.authors = authors
self.title = info.find("div", {'class': "title"}).getText().strip().rstrip(" [eBook]")
def get_url(self, type):
return "https://www.packtpub.com/ebook_download/%s/%s" % (self.nid, type, )
def to_filename(self, type):
return "%s - %s.%s" % (", ".join(self.authors), self.title, type, )
class PacktSub(object):
session = None
claim_link = None
path = "."
headers = {
'user-agent': 'cfreebook/0.0.1'
}
types = ['epub', 'mobi', 'pdf']
type = "all"
def __init__(self, username=None, password=None):
self.session = requests.session()
if username is not None and password is not None:
self.login(username, password)
self.path = os.path.abspath(self.path)
def set_path(self, path):
path = os.path.abspath(path)
if not os.path.exists(path):
try:
os.mkdir(path)
except OSError:
raise LocalCreateException("Can't create directory: %s" % path)
if not os.path.isdir(path):
raise ArgumentException("Path is not a directory: %s" % path)
if not os.access(path, os.W_OK):
raise LocalCreateException("Can't create files in: %s" % path)
self.path = path
def set_type(self, type):
if type not in self.types + ['all']:
raise ArgumentException("Unknown type: %s" % type)
self.type = type
def _download(self, book, type=None, path=None):
assert isinstance(book, Book)
if path is None:
path = self.path
if type is None:
type = self.type
if type is "all":
for _type in self.types:
response = self.session.get(book.get_url(_type), headers=self.headers, stream=True)
with open(os.path.join(path, book.to_filename(_type)), 'wb') as out_file:
shutil.copyfileobj(response.raw, out_file)
else:
response = self.session.get(book.get_url(type), headers=self.headers, stream=True)
with open(os.path.join(path, book.to_filename(type)), 'wb') as out_file:
shutil.copyfileobj(response.raw, out_file)
def get_product_account_list(self):
books = self.session.get("https://www.packtpub.com/account/my-ebooks", headers=self.headers)
soup = bs.BeautifulSoup(books.text, 'html.parser')
container = soup.find(id="product-account-list")
books = []
assert isinstance(container, bs.Tag)
for entry in container.find_all('div', {'class': "product-line"}):
if entry.find("div", {'class': "product-info"}):
books.append(Book(entry))
return books
def download_latest(self, type=None, path=None):
books = self.get_product_account_list()
self._download(books[0], type, path)
def download_all(self, type=None, path=None):
books = self.get_product_account_list()
for book in books:
self._download(book, type, path)
def claim(self):
claim_book = self.session.get("https://www.packtpub.com" + self.claim_link, headers=self.headers)
if claim_book.url != "https://www.packtpub.com/account/my-ebooks":
raise ClaimException("Failed to claim book")
def login(self, username, password):
home = self.session.get("https://www.packtpub.com/packt/offers/free-learning", headers=self.headers)
regxp = re.search(r"/freelearning-claim/\d*/\d*", home.text)
self.claim_link = regxp.group(0)
regxp = re.search(r"form_build_id.\s*id=.([^\"]+)\"", home.text)
form_build_id = regxp.group(1)
auth = self.session.post("https://www.packtpub.com/register", {
"email": username,
"password": password,
"op": "Login",
"form_build_id": form_build_id,
"form_id": "packt_user_login_form"
}, headers=self.headers)
if auth.url != "https://www.packtpub.com/account":
raise LoginException("Authentication failed")
if __name__ == "__main__":
parser = argparse.ArgumentParser(description='Packtpub downloader', prog="PacktSub", usage='%(prog)s [options]')
parser.add_argument("-c", "--claim", help='Claim today\'s book', action="store_true")
parser.add_argument("-d", "--download", help='Download today\'s book', action="store_true")
parser.add_argument("-a", "--auto", help='auto modes', action="store_true")
parser.add_argument("--all", help='Download all the ebooks', action="store_true")
parser.add_argument("-o", "--output", help='Output directory', default=".")
parser.add_argument("-t", "--type", help='Type of file', default="all", choices=PacktSub.types + ['all'])
parser.add_argument("-u", "--username", help='Username (email)', required=True)
parser.add_argument("-p", "--password", help='password', required=True)
args = parser.parse_args()
try:
username, password = args.username, args.password
packtsub = PacktSub(username, password)
packtsub.set_path(args.output)
packtsub.set_type(args.type)
if args.claim or args.auto:
packtsub.claim()
if (args.download or args.auto) and not args.all:
packtsub.download_latest()
if args.all:
packtsub.download_all()
except PacktSubException as e:
sys.stderr.write("%s: %s" % (e.__class__.__name__, str(e), ))
sys.exit(1)
@nneul
Copy link

nneul commented Dec 19, 2018

No longer works due to changes in site - quick hack I put together here you can look at to see new method against their new REST endpoints which is much simpler than site parsing. https://gist.github.com/nneul/6eda98fd87a58a623b857523247f3471

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment