patvdleer/packtsub.py

## packtsub.py
import os
import re
import shutil
import sys
import requests

try:
    import argparse
except ImportError:
    sys.stderr.write("The Python module argparse is required")
    sys.exit(1)

try:
    import bs4 as bs
except ImportError:
    sys.stderr.write("The Python module BeautifulSoup 4 (bs4) is required")
    sys.exit(1)


class PacktSubException(BaseException):
    """Abstract Exception"""


class ArgumentException(PacktSubException):
    """Crappy argument"""


class LoginException(PacktSubException):
    """Failed to login"""


class ClaimException(PacktSubException):
    """Failed to claim latest book"""


class LocalCreateException(PacktSubException):
    """Can't create something locally"""


class Book(object):
    authors = None
    title = None
    nid = None

    def __init__(self, entry):
        assert isinstance(entry, bs.Tag)
        info = entry.find("div", {'class': "product-info"})
        self.nid = entry.attrs['nid']
        assert isinstance(info, bs.Tag)
        authors = info.find("div", {'class': "author"}).getText()
        if "," in authors:
            authors = authors.strip().split(",")
        else:
            authors = [authors.strip()]
        self.authors = authors
        self.title = info.find("div", {'class': "title"}).getText().strip().rstrip(" [eBook]")

    def get_url(self, type):
        return "https://www.packtpub.com/ebook_download/%s/%s" % (self.nid, type, )

    def to_filename(self, type):
        return "%s - %s.%s" % (", ".join(self.authors), self.title, type, )


class PacktSub(object):
    session = None
    claim_link = None
    path = "."
    headers = {
        'user-agent': 'cfreebook/0.0.1'
    }
    types = ['epub', 'mobi', 'pdf']
    type = "all"

    def __init__(self, username=None, password=None):
        self.session = requests.session()
        if username is not None and password is not None:
            self.login(username, password)
        self.path = os.path.abspath(self.path)

    def set_path(self, path):
        path = os.path.abspath(path)
        if not os.path.exists(path):
            try:
                os.mkdir(path)
            except OSError:
                raise LocalCreateException("Can't create directory: %s" % path)
        if not os.path.isdir(path):
            raise ArgumentException("Path is not a directory: %s" % path)
        if not os.access(path, os.W_OK):
            raise LocalCreateException("Can't create files in: %s" % path)
        self.path = path

    def set_type(self, type):
        if type not in self.types + ['all']:
            raise ArgumentException("Unknown type: %s" % type)
        self.type = type

    def _download(self, book, type=None, path=None):
        assert isinstance(book, Book)
        if path is None:
            path = self.path
        if type is None:
            type = self.type

        if type is "all":
            for _type in self.types:
                response = self.session.get(book.get_url(_type), headers=self.headers, stream=True)
                with open(os.path.join(path, book.to_filename(_type)), 'wb') as out_file:
                    shutil.copyfileobj(response.raw, out_file)
        else:
            response = self.session.get(book.get_url(type), headers=self.headers, stream=True)
            with open(os.path.join(path, book.to_filename(type)), 'wb') as out_file:
                shutil.copyfileobj(response.raw, out_file)

    def get_product_account_list(self):
        books = self.session.get("https://www.packtpub.com/account/my-ebooks", headers=self.headers)
        soup = bs.BeautifulSoup(books.text, 'html.parser')
        container = soup.find(id="product-account-list")
        books = []
        assert isinstance(container, bs.Tag)
        for entry in container.find_all('div', {'class': "product-line"}):
            if entry.find("div", {'class': "product-info"}):
                books.append(Book(entry))
        return books

    def download_latest(self, type=None, path=None):
        books = self.get_product_account_list()
        self._download(books[0], type, path)

    def download_all(self, type=None, path=None):
        books = self.get_product_account_list()
        for book in books:
            self._download(book, type, path)

    def claim(self):
        claim_book = self.session.get("https://www.packtpub.com" + self.claim_link, headers=self.headers)
        if claim_book.url != "https://www.packtpub.com/account/my-ebooks":
            raise ClaimException("Failed to claim book")

    def login(self, username, password):
        home = self.session.get("https://www.packtpub.com/packt/offers/free-learning", headers=self.headers)

        regxp = re.search(r"/freelearning-claim/\d*/\d*", home.text)
        self.claim_link = regxp.group(0)

        regxp = re.search(r"form_build_id.\s*id=.([^\"]+)\"", home.text)
        form_build_id = regxp.group(1)

        auth = self.session.post("https://www.packtpub.com/register", {
            "email": username,
            "password": password,
            "op": "Login",
            "form_build_id": form_build_id,
            "form_id": "packt_user_login_form"
        }, headers=self.headers)

        if auth.url != "https://www.packtpub.com/account":
            raise LoginException("Authentication failed")


if __name__ == "__main__":
    parser = argparse.ArgumentParser(description='Packtpub downloader', prog="PacktSub", usage='%(prog)s [options]')
    parser.add_argument("-c", "--claim", help='Claim today\'s book', action="store_true")
    parser.add_argument("-d", "--download", help='Download today\'s book', action="store_true")
    parser.add_argument("-a", "--auto", help='auto modes', action="store_true")
    parser.add_argument("--all", help='Download all the ebooks', action="store_true")
    parser.add_argument("-o", "--output", help='Output directory', default=".")
    parser.add_argument("-t", "--type", help='Type of file', default="all", choices=PacktSub.types + ['all'])
    parser.add_argument("-u", "--username", help='Username (email)', required=True)
    parser.add_argument("-p", "--password", help='password', required=True)
    args = parser.parse_args()

    try:
        username, password = args.username, args.password
        packtsub = PacktSub(username, password)
        packtsub.set_path(args.output)
        packtsub.set_type(args.type)

        if args.claim or args.auto:
            packtsub.claim()
        if (args.download or args.auto) and not args.all:
            packtsub.download_latest()
        if args.all:
            packtsub.download_all()
    except PacktSubException as e:
        sys.stderr.write("%s: %s" % (e.__class__.__name__, str(e), ))
        sys.exit(1)
	import os
	import re
	import shutil
	import sys
	import requests

	try:
	import argparse
	except ImportError:
	sys.stderr.write("The Python module argparse is required")
	sys.exit(1)

	try:
	import bs4 as bs
	except ImportError:
	sys.stderr.write("The Python module BeautifulSoup 4 (bs4) is required")
	sys.exit(1)


	class PacktSubException(BaseException):
	"""Abstract Exception"""


	class ArgumentException(PacktSubException):
	"""Crappy argument"""


	class LoginException(PacktSubException):
	"""Failed to login"""


	class ClaimException(PacktSubException):
	"""Failed to claim latest book"""


	class LocalCreateException(PacktSubException):
	"""Can't create something locally"""


	class Book(object):
	authors = None
	title = None
	nid = None

	def __init__(self, entry):
	assert isinstance(entry, bs.Tag)
	info = entry.find("div", {'class': "product-info"})
	self.nid = entry.attrs['nid']
	assert isinstance(info, bs.Tag)
	authors = info.find("div", {'class': "author"}).getText()
	if "," in authors:
	authors = authors.strip().split(",")
	else:
	authors = [authors.strip()]
	self.authors = authors
	self.title = info.find("div", {'class': "title"}).getText().strip().rstrip(" [eBook]")

	def get_url(self, type):
	return "https://www.packtpub.com/ebook_download/%s/%s" % (self.nid, type, )

	def to_filename(self, type):
	return "%s - %s.%s" % (", ".join(self.authors), self.title, type, )


	class PacktSub(object):
	session = None
	claim_link = None
	path = "."
	headers = {
	'user-agent': 'cfreebook/0.0.1'
	}
	types = ['epub', 'mobi', 'pdf']
	type = "all"

	def __init__(self, username=None, password=None):
	self.session = requests.session()
	if username is not None and password is not None:
	self.login(username, password)
	self.path = os.path.abspath(self.path)

	def set_path(self, path):
	path = os.path.abspath(path)
	if not os.path.exists(path):
	try:
	os.mkdir(path)
	except OSError:
	raise LocalCreateException("Can't create directory: %s" % path)
	if not os.path.isdir(path):
	raise ArgumentException("Path is not a directory: %s" % path)
	if not os.access(path, os.W_OK):
	raise LocalCreateException("Can't create files in: %s" % path)
	self.path = path

	def set_type(self, type):
	if type not in self.types + ['all']:
	raise ArgumentException("Unknown type: %s" % type)
	self.type = type

	def _download(self, book, type=None, path=None):
	assert isinstance(book, Book)
	if path is None:
	path = self.path
	if type is None:
	type = self.type

	if type is "all":
	for _type in self.types:
	response = self.session.get(book.get_url(_type), headers=self.headers, stream=True)
	with open(os.path.join(path, book.to_filename(_type)), 'wb') as out_file:
	shutil.copyfileobj(response.raw, out_file)
	else:
	response = self.session.get(book.get_url(type), headers=self.headers, stream=True)
	with open(os.path.join(path, book.to_filename(type)), 'wb') as out_file:
	shutil.copyfileobj(response.raw, out_file)

	def get_product_account_list(self):
	books = self.session.get("https://www.packtpub.com/account/my-ebooks", headers=self.headers)
	soup = bs.BeautifulSoup(books.text, 'html.parser')
	container = soup.find(id="product-account-list")
	books = []
	assert isinstance(container, bs.Tag)
	for entry in container.find_all('div', {'class': "product-line"}):
	if entry.find("div", {'class': "product-info"}):
	books.append(Book(entry))
	return books

	def download_latest(self, type=None, path=None):
	books = self.get_product_account_list()
	self._download(books[0], type, path)

	def download_all(self, type=None, path=None):
	books = self.get_product_account_list()
	for book in books:
	self._download(book, type, path)

	def claim(self):
	claim_book = self.session.get("https://www.packtpub.com" + self.claim_link, headers=self.headers)
	if claim_book.url != "https://www.packtpub.com/account/my-ebooks":
	raise ClaimException("Failed to claim book")

	def login(self, username, password):
	home = self.session.get("https://www.packtpub.com/packt/offers/free-learning", headers=self.headers)

	regxp = re.search(r"/freelearning-claim/\d/\d", home.text)
	self.claim_link = regxp.group(0)

	regxp = re.search(r"form_build_id.\s*id=.([^\"]+)\"", home.text)
	form_build_id = regxp.group(1)

	auth = self.session.post("https://www.packtpub.com/register", {
	"email": username,
	"password": password,
	"op": "Login",
	"form_build_id": form_build_id,
	"form_id": "packt_user_login_form"
	}, headers=self.headers)

	if auth.url != "https://www.packtpub.com/account":
	raise LoginException("Authentication failed")


	if __name__ == "__main__":
	parser = argparse.ArgumentParser(description='Packtpub downloader', prog="PacktSub", usage='%(prog)s [options]')
	parser.add_argument("-c", "--claim", help='Claim today\'s book', action="store_true")
	parser.add_argument("-d", "--download", help='Download today\'s book', action="store_true")
	parser.add_argument("-a", "--auto", help='auto modes', action="store_true")
	parser.add_argument("--all", help='Download all the ebooks', action="store_true")
	parser.add_argument("-o", "--output", help='Output directory', default=".")
	parser.add_argument("-t", "--type", help='Type of file', default="all", choices=PacktSub.types + ['all'])
	parser.add_argument("-u", "--username", help='Username (email)', required=True)
	parser.add_argument("-p", "--password", help='password', required=True)
	args = parser.parse_args()

	try:
	username, password = args.username, args.password
	packtsub = PacktSub(username, password)
	packtsub.set_path(args.output)
	packtsub.set_type(args.type)

	if args.claim or args.auto:
	packtsub.claim()
	if (args.download or args.auto) and not args.all:
	packtsub.download_latest()
	if args.all:
	packtsub.download_all()
	except PacktSubException as e:
	sys.stderr.write("%s: %s" % (e.__class__.__name__, str(e), ))
	sys.exit(1)