Last active
June 17, 2019 09:21
-
-
Save patvdleer/3da45e7e24cc402fedec4f004f2975aa to your computer and use it in GitHub Desktop.
packtpub daily claimer/downloader
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import os | |
import re | |
import shutil | |
import sys | |
import requests | |
try: | |
import argparse | |
except ImportError: | |
sys.stderr.write("The Python module argparse is required") | |
sys.exit(1) | |
try: | |
import bs4 as bs | |
except ImportError: | |
sys.stderr.write("The Python module BeautifulSoup 4 (bs4) is required") | |
sys.exit(1) | |
class PacktSubException(BaseException): | |
"""Abstract Exception""" | |
class ArgumentException(PacktSubException): | |
"""Crappy argument""" | |
class LoginException(PacktSubException): | |
"""Failed to login""" | |
class ClaimException(PacktSubException): | |
"""Failed to claim latest book""" | |
class LocalCreateException(PacktSubException): | |
"""Can't create something locally""" | |
class Book(object): | |
authors = None | |
title = None | |
nid = None | |
def __init__(self, entry): | |
assert isinstance(entry, bs.Tag) | |
info = entry.find("div", {'class': "product-info"}) | |
self.nid = entry.attrs['nid'] | |
assert isinstance(info, bs.Tag) | |
authors = info.find("div", {'class': "author"}).getText() | |
if "," in authors: | |
authors = authors.strip().split(",") | |
else: | |
authors = [authors.strip()] | |
self.authors = authors | |
self.title = info.find("div", {'class': "title"}).getText().strip().rstrip(" [eBook]") | |
def get_url(self, type): | |
return "https://www.packtpub.com/ebook_download/%s/%s" % (self.nid, type, ) | |
def to_filename(self, type): | |
return "%s - %s.%s" % (", ".join(self.authors), self.title, type, ) | |
class PacktSub(object): | |
session = None | |
claim_link = None | |
path = "." | |
headers = { | |
'user-agent': 'cfreebook/0.0.1' | |
} | |
types = ['epub', 'mobi', 'pdf'] | |
type = "all" | |
def __init__(self, username=None, password=None): | |
self.session = requests.session() | |
if username is not None and password is not None: | |
self.login(username, password) | |
self.path = os.path.abspath(self.path) | |
def set_path(self, path): | |
path = os.path.abspath(path) | |
if not os.path.exists(path): | |
try: | |
os.mkdir(path) | |
except OSError: | |
raise LocalCreateException("Can't create directory: %s" % path) | |
if not os.path.isdir(path): | |
raise ArgumentException("Path is not a directory: %s" % path) | |
if not os.access(path, os.W_OK): | |
raise LocalCreateException("Can't create files in: %s" % path) | |
self.path = path | |
def set_type(self, type): | |
if type not in self.types + ['all']: | |
raise ArgumentException("Unknown type: %s" % type) | |
self.type = type | |
def _download(self, book, type=None, path=None): | |
assert isinstance(book, Book) | |
if path is None: | |
path = self.path | |
if type is None: | |
type = self.type | |
if type is "all": | |
for _type in self.types: | |
response = self.session.get(book.get_url(_type), headers=self.headers, stream=True) | |
with open(os.path.join(path, book.to_filename(_type)), 'wb') as out_file: | |
shutil.copyfileobj(response.raw, out_file) | |
else: | |
response = self.session.get(book.get_url(type), headers=self.headers, stream=True) | |
with open(os.path.join(path, book.to_filename(type)), 'wb') as out_file: | |
shutil.copyfileobj(response.raw, out_file) | |
def get_product_account_list(self): | |
books = self.session.get("https://www.packtpub.com/account/my-ebooks", headers=self.headers) | |
soup = bs.BeautifulSoup(books.text, 'html.parser') | |
container = soup.find(id="product-account-list") | |
books = [] | |
assert isinstance(container, bs.Tag) | |
for entry in container.find_all('div', {'class': "product-line"}): | |
if entry.find("div", {'class': "product-info"}): | |
books.append(Book(entry)) | |
return books | |
def download_latest(self, type=None, path=None): | |
books = self.get_product_account_list() | |
self._download(books[0], type, path) | |
def download_all(self, type=None, path=None): | |
books = self.get_product_account_list() | |
for book in books: | |
self._download(book, type, path) | |
def claim(self): | |
claim_book = self.session.get("https://www.packtpub.com" + self.claim_link, headers=self.headers) | |
if claim_book.url != "https://www.packtpub.com/account/my-ebooks": | |
raise ClaimException("Failed to claim book") | |
def login(self, username, password): | |
home = self.session.get("https://www.packtpub.com/packt/offers/free-learning", headers=self.headers) | |
regxp = re.search(r"/freelearning-claim/\d*/\d*", home.text) | |
self.claim_link = regxp.group(0) | |
regxp = re.search(r"form_build_id.\s*id=.([^\"]+)\"", home.text) | |
form_build_id = regxp.group(1) | |
auth = self.session.post("https://www.packtpub.com/register", { | |
"email": username, | |
"password": password, | |
"op": "Login", | |
"form_build_id": form_build_id, | |
"form_id": "packt_user_login_form" | |
}, headers=self.headers) | |
if auth.url != "https://www.packtpub.com/account": | |
raise LoginException("Authentication failed") | |
if __name__ == "__main__": | |
parser = argparse.ArgumentParser(description='Packtpub downloader', prog="PacktSub", usage='%(prog)s [options]') | |
parser.add_argument("-c", "--claim", help='Claim today\'s book', action="store_true") | |
parser.add_argument("-d", "--download", help='Download today\'s book', action="store_true") | |
parser.add_argument("-a", "--auto", help='auto modes', action="store_true") | |
parser.add_argument("--all", help='Download all the ebooks', action="store_true") | |
parser.add_argument("-o", "--output", help='Output directory', default=".") | |
parser.add_argument("-t", "--type", help='Type of file', default="all", choices=PacktSub.types + ['all']) | |
parser.add_argument("-u", "--username", help='Username (email)', required=True) | |
parser.add_argument("-p", "--password", help='password', required=True) | |
args = parser.parse_args() | |
try: | |
username, password = args.username, args.password | |
packtsub = PacktSub(username, password) | |
packtsub.set_path(args.output) | |
packtsub.set_type(args.type) | |
if args.claim or args.auto: | |
packtsub.claim() | |
if (args.download or args.auto) and not args.all: | |
packtsub.download_latest() | |
if args.all: | |
packtsub.download_all() | |
except PacktSubException as e: | |
sys.stderr.write("%s: %s" % (e.__class__.__name__, str(e), )) | |
sys.exit(1) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
No longer works due to changes in site - quick hack I put together here you can look at to see new method against their new REST endpoints which is much simpler than site parsing. https://gist.github.com/nneul/6eda98fd87a58a623b857523247f3471