Kovrinic/oreillyurls_1.py

## readme.md

      
    Raw
  

              readme.md
            
          
    Download all free ebooks from oreilly.com


oreillyurls_1.py

Pulls categories from http://www.oreilly.com/programming/free/ which does not catch all of the free available content.

oreillyurls_2.py

More comprehensive and pulls its categories from http://www.oreilly.com/free/
Placed into class, with options to specify etypes (i.e. .mobi etc...), categories, and processes (i.e. parallel thread count).

  
## oreillyurls_1.py
#!/usr/bin/env python
#
# Matthew Rothfuss
# 10/13/2016
#
# Purpose:
#   Download all free ebooks (epub, mobi, pdf) from oreilly.com
#
# Overview:
#   Runs in parallel threads, based on cpu count * 2 (i.e. 8 cpus * 2 = 16 threads)
#   Change "process_count" to desired threads if cpu count causes issues
#
# Original:
#   https://www.reddit.com/r/Python/comments/56syaa/7_oreilly_python_books_for_free/d8n6597

import multiprocessing
import requests
import re
import os

####################################################################################################
def is_same_file_size(filepath, url):
    """Check file size against URL ebook content size"""

    # Read the length of a file from the file header it is in Bytes
    r = requests.head(url)
    remote_size = float(r.headers['Content-Length'])

    # Local File size in bytes and 0 if it does not exist
    try:
        local_size = float(os.path.getsize(filepath))
    except:
        local_size = 0.0

    return remote_size == local_size

####################################################################################################
def ensure_dir(path):
    try:
        os.makedirs(path, exist_ok=True)
    except TypeError as e:
        # python 2.x fix
        if os.path.exists(path):
            if os.path.isfile(path):
                raise Exception("Error: Directory path already exists as a file. {}".format(path)) from e
        else:
            os.makedirs(path)

####################################################################################################
def download_file(url):
    """
    Download ebook
    Ensures download directory, based on category and book title
    Will download updated versions of ebooks if exists
        checks file size against URL ebook content size
    """

    dl_dir = url.split('/', 4)[3]
    # http://stackoverflow.com/a/16696317
    filename = url.split('/')[-1]
    file_dir = "".join(filename.split('.')[:-1])
    filepath = os.path.abspath("./{}/{}/{}".format(dl_dir, file_dir, filename))

    # Test if file already exists
    if os.path.isfile(filepath):
        # Test file size matching
        if is_same_file_size(filepath, url):
            print("SKIP Existing {}".format(filepath))
            return
        else:
            print("NEW {}".format(filename))

    # download/save ebook
    try:
        r = requests.get(url, stream=True)  # NOTE the stream=True parameter
        r.raise_for_status()  # raise error if status gives error
        ensure_dir(os.path.abspath("./{}/{}".format(dl_dir, file_dir)))  # ensure download directory
        with open(filepath, 'wb') as f:
            for chunk in r.iter_content(chunk_size=1024):
                if chunk:  # filter out keep-alive new chunks
                    f.write(chunk)
        print("SAVED '{}'".format(url))
        return filename
    except Exception as e:
        print(u"ERROR: Cannot handle '{}' >>> ({})".format(url, e))
        return

####################################################################################################
def download_all_from_urls(endings, urls):
    """
    Format download URLs into ebook URLs
    Spawn download category with multiple threads
    """

    combinations = (url.replace('.csp', ending)
                .replace('/free/', '/free/files/')
                for url in urls
                for ending in endings)

    process_count = multiprocessing.cpu_count()*2  # thread count depending on cpu count
    #process_count = 4  # hard code thread count
    pool = multiprocessing.Pool(processes=process_count)
    pool.map(download_file, combinations)
    pool.close() # no more tasks
    pool.join()  # wrap up current tasks

####################################################################################################
def get_urls(dl_type):
    """Parse oreilly category for all download URLs"""

    data = requests.get('http://www.oreilly.com/{}/free/'.format(dl_type))

    # Other URLs
    # Don't worry, I'm not _parsing_ html with regex. Merely scraping it. :)
    pattern = re.compile('http://www.oreilly.com/{}/free.*\.csp'.format(dl_type))
    return pattern.findall(data.text)

####################################################################################################
def get_main_urls():
    """Get all the pages containing free ebooks"""

    data = requests.get('http://www.oreilly.com/programming/free/')

    # Just match the name not the whole URL
    pattern = re.compile('(?<=oreilly\.com\/)(.*)(?=\/free\/)')

    # Remove duplicates
    return list(set(pattern.findall(data.text)))

####################################################################################################
"""Main Program"""

file_types = ['.pdf', '.mobi', '.epub']
download_types = get_main_urls()

for download_type in download_types:
    urls = get_urls(download_type)
    download_all_from_urls(file_types, urls)

## oreillyurls_2.py
#!/usr/bin/env python3
#
# Matthew Rothfuss
# First: 10/13/2016
# Edit: 03/07/2018
#
# Purpose:
#   Download all free ebooks (epub, mobi, pdf) from oreilly.com
#
# Overview:
#   Runs in parallel threads, based on cpu count * 2 (i.e. 8 cpus * 2 = 16 threads)
#   Change "processes" to desired threads if cpu count causes issues
#   Can specify custom "categories" and "etypes"
#   Set "verbose" to True, to print all output
#
# Original:
#   https://www.reddit.com/r/Python/comments/56syaa/7_oreilly_python_books_for_free/d8n6597

from argparse import ArgumentParser as AP
from lxml import html as HTML
import multiprocessing
import argcomplete
import requests
import re
import os

####################################################################################################
class OReillyEbookFreeScraper(object):
    def __init__(self, categories=None, etypes=None, processes=None, outdir=None, dryrun=False, verbose=0):
        self.base_url = 'http://www.oreilly.com'
        self.dryrun = dryrun
        self.verbose = verbose > 0
        self.outdir = outdir
        self.categories = list()
        if categories:
            self.categories = categories if isinstance(categories, list) else [categories]
        self.ebook_types = ['.pdf', '.mobi', '.epub']
        if etypes:
            self.ebook_types = etypes if isinstance(etypes, list) else [etypes]
        if set([ ebt for ebt in self.ebook_types if not ebt.startswith('.')]):
            self.ebook_types = [('.'+t if not t.startswith('.') else t) for t in self.ebook_types]
        self.processes = processes if processes else multiprocessing.cpu_count()*2


        self.verify_outdir()  # setup output directory
        if self.dryrun:
            print("*                                                                              *")
            print("*******************************  Dry-Run Mode  *********************************")
            print("*                                                                              *")
        print("\nSaving all free ebooks ({}) from oreilly.com".format(', '.join(self.ebook_types)))
        print("*" * 80)
        self.get_all_free_ebook_urls()
        print("Done")

    def is_same_file_size(self, filepath, url):
        """Check file size against URL ebook content size"""

        # Read the length of a file from the file header it is in Bytes
        r = requests.head(url)
        remote_size = float(r.headers['Content-Length'])

        # Local File size in bytes and 0 if it does not exist
        try: local_size = float(os.path.getsize(filepath))
        except: local_size = 0.0

        return remote_size == local_size

    def ensure_dir(self, path):
        try:
            os.makedirs(path, exist_ok=True)
        except TypeError as e:
            # python 2.x fix
            if os.path.exists(path):
                if os.path.isfile(path):
                    raise Exception("Error: Directory path already exists as a file. {}".format(path))
            else:
                os.makedirs(path)

    def verify_outdir(self):
        if not self.dryrun:
            self.ensure_dir(self.outdir)

    def download(self, url):
        """
        Download ebook
        Ensures download directory, based on category and book title
        Will download updated versions of ebooks if exists
            checks file size against URL ebook content size
        """

        dl_dir = url.split('/', 4)[3]
        # http://stackoverflow.com/a/16696317
        filename = url.split('/')[-1]
        file_dir = "".join(filename.split('.')[:-1])
        filepath = os.path.abspath(self.outdir+"/{}/{}/{}".format(dl_dir, file_dir, filename))

        # Test if file already exists
        if os.path.isfile(filepath):
            # Test file size matching
            if self.is_same_file_size(filepath, url):
                if self.verbose:
                    print("SKIP:    Existing {}".format(filepath))
                return
            elif self.verbose:
                print("NEW:     {}".format(url))

        # skip download for dryrun
        if self.dryrun:
            print("[dryrun]: {} --> {}".format(filename, filepath))
            return

        # download/save ebook
        try:
            r = requests.get(url, stream=True)  # NOTE the stream=True parameter
            r.raise_for_status()  # raise error if status gives error
            self.ensure_dir(os.path.abspath(self.outdir+"/{}/{}".format(dl_dir, file_dir)))  # ensure download directory
            with open(filepath, 'wb') as f:
                for chunk in r.iter_content(chunk_size=1024):
                    if chunk:  # filter out keep-alive new chunks
                        f.write(chunk)
            print("SAVED:   {}".format(url))
            return filename
        except requests.exceptions.HTTPError as e:
            if (e.response.status_code == 404):
                if self.verbose:
                    print(u"WARN:    {} file does NOT exists for '{}'".format(url.split('.')[-1], url))
            else:
                print(u"ERROR:   <HTTP {}> Cannot handle '{}'".format(e.response.status_code, url))
        except Exception as e:
            print(u"ERROR:   Cannot handle '{}' >>> ({})".format(url, e))
        return

    def expand_url(self, url):
        try:
            r = requests.head(url)
            r.raise_for_status()
            rurl = r.url
            if 'location' in r.headers:
                rurl = r.headers['location']

            if rurl.endswith('.do'):
                return
            elif rurl.split('/', 5)[4] != 'free':
                return

            return rurl
        except:
            return

    def pa(self, cat):
        if cat == "ai":
            return "data"
        elif cat == "software-engineering":
            return "programming"
        elif cat == "operations":
            return "webops-perf"
        elif cat == "web-programming":
            return "web-platform"
        else:
            return cat

    def category_from_url(self, url):
        surl = url.split('/', 5) if url else ''
        return surl[3] if surl else ''

    def get_dl_links(self, url):
        try:
            req = requests.get(url)
            req.raise_for_status()
        except Exception as e:
            if self.verbose:
                print(u"WARN:   Cannot handle '{}' >>> ({})".format(url, e))
            return list()

        try:
            html = HTML.fromstring(req.text)
        except Exception as e:
            if self.verbose:
                print(u"WARN:   Cannot find HTML in '{}' >>> ({})".format(url, e))
            return list()

        post_vals = dict()
        for item in html.xpath('//form[@method="post"]'):
            keys = item.xpath('./input/@name')
            values = item.xpath('./input/@value')
            post_vals = dict(zip(keys, values))

        post_vals.update(
            {
                'first': 'a', 'last': 'b', 'email': 'a@b.com',
                'newsletter': 'nl_webops_perf', 'x-a': 'Get Your Free Ebook',
                'x-redirect': url+"?download=true"
                }
            )

        purl = self.base_url+"/cs/user/create/download_requests"
        try:
            req2 = requests.post(purl, data=post_vals)
            req2.raise_for_status()
        except Exception as e:
            if self.verbose:
                print(u"WARN:   Cannot handle '{}' >>> ({})".format(purl, e))
            return list()

        r = re.search("var\s*?shortLink\s*?\=\s*?[\'\"]([^\'\"]+)[\'\"]", req2.text)
        if not r:
            return list()

        slink = r.group(1).strip()

        dlurl = self.base_url + '/' + self.pa(self.category_from_url(url)) + '/free/files/' + slink
        return [ dlurl+t for t in self.ebook_types ]

    def get_all_free_ebook_urls(self):
        """Get all free ebook urls"""

        furl = self.base_url+'/free/'
        try:
            req = requests.get(furl)
            req.raise_for_status()
        except Exception as e:
            print(u"ERROR:   Cannot handle '{}' >>> ({})".format(furl, e))
            return list()

        html = HTML.fromstring(req.text)
        eurls = html.xpath('//div[@name="FreeEbooks"]//a[@class="item-title"]/@href')

        pool = multiprocessing.Pool(processes=self.processes)

        urls = pool.map(self.expand_url, eurls)
        urls = list(sorted([u for u in set(urls) if u]))

        if self.categories:
            urls = list(set([u for u in urls if self.category_from_url(u) in self.categories]))
            categories = self.categories
        else:
            categories = pool.map(self.category_from_url, urls)
            categories = list(sorted([c for c in set(categories) if c]))  # remove duplicates

        print("Downloading available free ebooks from the following categorie(s):\n{}".format(', '.join(categories)))
        print("*" * 80)
        print("Saving to the following directory:\n{}".format(os.path.abspath(self.outdir)))
        print("*" * 80)

        dl_urls = pool.map(self.get_dl_links, urls)
        dl_flat_list = [item for sublist in dl_urls for item in sublist]
        pool.map(self.download, dl_flat_list)

        pool.close() # no more tasks
        pool.join()  # wrap up current tasks

        if self.dryrun:
            print("\nINFO: {} possible ebooks in '{}' categorie(s).".format(int(len(dl_flat_list)/3), ', '.join(categories)))

        return

####################################################################################################
if __name__ == "__main__":
    """
    Main Program
    Examples:
        1 : oreillyurls_2.py -p 4
        2 : oreillyurls_2.py -c 'data'
        3 : oreillyurls_2.py -c ['programming', 'data'] -o ~/Downloads/ebooks-tmp
    """

    parser = AP(description='Download all free ebooks (epub, mobi, pdf) from oreilly.com')
    parser.add_argument('-c', '--categories', dest='categories', help='Specify custom "categories". Can be single or list.')
    parser.add_argument('-t', '--types', dest='etypes', help='Specify custom "ebook types". Can be single or list.')
    parser.add_argument('-p', '--processes', dest='processes', help='Specify custom processes count. Used in multithreading.')
    parser.add_argument('-o', '--out', dest='outdir', default='ebooks-oreilly', help='Output directory')
    parser.add_argument('-d', '--dryrun', dest='dryrun', default=False, action='store_true', help='Do a dry run without downloading.')
    parser.add_argument('-v', '--verbose', dest='verbose', default=0, action="count", help='Enable/Disable verbose logging.')
    argcomplete.autocomplete(parser)
    args = parser.parse_args()

    OReillyEbookFreeScraper(**vars(args))
	#!/usr/bin/env python
	#
	# Matthew Rothfuss
	# 10/13/2016
	#
	# Purpose:
	# Download all free ebooks (epub, mobi, pdf) from oreilly.com
	#
	# Overview:
	# Runs in parallel threads, based on cpu count * 2 (i.e. 8 cpus * 2 = 16 threads)
	# Change "process_count" to desired threads if cpu count causes issues
	#
	# Original:
	# https://www.reddit.com/r/Python/comments/56syaa/7_oreilly_python_books_for_free/d8n6597

	import multiprocessing
	import requests
	import re
	import os

	####################################################################################################
	def is_same_file_size(filepath, url):
	"""Check file size against URL ebook content size"""

	# Read the length of a file from the file header it is in Bytes
	r = requests.head(url)
	remote_size = float(r.headers['Content-Length'])

	# Local File size in bytes and 0 if it does not exist
	try:
	local_size = float(os.path.getsize(filepath))
	except:
	local_size = 0.0

	return remote_size == local_size

	####################################################################################################
	def ensure_dir(path):
	try:
	os.makedirs(path, exist_ok=True)
	except TypeError as e:
	# python 2.x fix
	if os.path.exists(path):
	if os.path.isfile(path):
	raise Exception("Error: Directory path already exists as a file. {}".format(path)) from e
	else:
	os.makedirs(path)

	####################################################################################################
	def download_file(url):
	"""
	Download ebook
	Ensures download directory, based on category and book title
	Will download updated versions of ebooks if exists
	checks file size against URL ebook content size
	"""

	dl_dir = url.split('/', 4)[3]
	# http://stackoverflow.com/a/16696317
	filename = url.split('/')[-1]
	file_dir = "".join(filename.split('.')[:-1])
	filepath = os.path.abspath("./{}/{}/{}".format(dl_dir, file_dir, filename))

	# Test if file already exists
	if os.path.isfile(filepath):
	# Test file size matching
	if is_same_file_size(filepath, url):
	print("SKIP Existing {}".format(filepath))
	return
	else:
	print("NEW {}".format(filename))

	# download/save ebook
	try:
	r = requests.get(url, stream=True) # NOTE the stream=True parameter
	r.raise_for_status() # raise error if status gives error
	ensure_dir(os.path.abspath("./{}/{}".format(dl_dir, file_dir))) # ensure download directory
	with open(filepath, 'wb') as f:
	for chunk in r.iter_content(chunk_size=1024):
	if chunk: # filter out keep-alive new chunks
	f.write(chunk)
	print("SAVED '{}'".format(url))
	return filename
	except Exception as e:
	print(u"ERROR: Cannot handle '{}' >>> ({})".format(url, e))
	return

	####################################################################################################
	def download_all_from_urls(endings, urls):
	"""
	Format download URLs into ebook URLs
	Spawn download category with multiple threads
	"""

	combinations = (url.replace('.csp', ending)
	.replace('/free/', '/free/files/')
	for url in urls
	for ending in endings)

	process_count = multiprocessing.cpu_count()*2 # thread count depending on cpu count
	#process_count = 4 # hard code thread count
	pool = multiprocessing.Pool(processes=process_count)
	pool.map(download_file, combinations)
	pool.close() # no more tasks
	pool.join() # wrap up current tasks

	####################################################################################################
	def get_urls(dl_type):
	"""Parse oreilly category for all download URLs"""

	data = requests.get('http://www.oreilly.com/{}/free/'.format(dl_type))

	# Other URLs
	# Don't worry, I'm not _parsing_ html with regex. Merely scraping it. :)
	pattern = re.compile('http://www.oreilly.com/{}/free.*\.csp'.format(dl_type))
	return pattern.findall(data.text)

	####################################################################################################
	def get_main_urls():
	"""Get all the pages containing free ebooks"""

	data = requests.get('http://www.oreilly.com/programming/free/')

	# Just match the name not the whole URL
	pattern = re.compile('(?<=oreilly\.com\/)(.*)(?=\/free\/)')

	# Remove duplicates
	return list(set(pattern.findall(data.text)))

	####################################################################################################
	"""Main Program"""

	file_types = ['.pdf', '.mobi', '.epub']
	download_types = get_main_urls()

	for download_type in download_types:
	urls = get_urls(download_type)
	download_all_from_urls(file_types, urls)
	#!/usr/bin/env python3
	#
	# Matthew Rothfuss
	# First: 10/13/2016
	# Edit: 03/07/2018
	#
	# Purpose:
	# Download all free ebooks (epub, mobi, pdf) from oreilly.com
	#
	# Overview:
	# Runs in parallel threads, based on cpu count * 2 (i.e. 8 cpus * 2 = 16 threads)
	# Change "processes" to desired threads if cpu count causes issues
	# Can specify custom "categories" and "etypes"
	# Set "verbose" to True, to print all output
	#
	# Original:
	# https://www.reddit.com/r/Python/comments/56syaa/7_oreilly_python_books_for_free/d8n6597

	from argparse import ArgumentParser as AP
	from lxml import html as HTML
	import multiprocessing
	import argcomplete
	import requests
	import re
	import os

	####################################################################################################
	class OReillyEbookFreeScraper(object):
	def __init__(self, categories=None, etypes=None, processes=None, outdir=None, dryrun=False, verbose=0):
	self.base_url = 'http://www.oreilly.com'
	self.dryrun = dryrun
	self.verbose = verbose > 0
	self.outdir = outdir
	self.categories = list()
	if categories:
	self.categories = categories if isinstance(categories, list) else [categories]
	self.ebook_types = ['.pdf', '.mobi', '.epub']
	if etypes:
	self.ebook_types = etypes if isinstance(etypes, list) else [etypes]
	if set([ ebt for ebt in self.ebook_types if not ebt.startswith('.')]):
	self.ebook_types = [('.'+t if not t.startswith('.') else t) for t in self.ebook_types]
	self.processes = processes if processes else multiprocessing.cpu_count()*2


	self.verify_outdir() # setup output directory
	if self.dryrun:
	print("* *")
	print("***************************** Dry-Run Mode *******************************")
	print("* *")
	print("\nSaving all free ebooks ({}) from oreilly.com".format(', '.join(self.ebook_types)))
	print("" 80)
	self.get_all_free_ebook_urls()
	print("Done")

	def is_same_file_size(self, filepath, url):
	"""Check file size against URL ebook content size"""

	# Read the length of a file from the file header it is in Bytes
	r = requests.head(url)
	remote_size = float(r.headers['Content-Length'])

	# Local File size in bytes and 0 if it does not exist
	try: local_size = float(os.path.getsize(filepath))
	except: local_size = 0.0

	return remote_size == local_size

	def ensure_dir(self, path):
	try:
	os.makedirs(path, exist_ok=True)
	except TypeError as e:
	# python 2.x fix
	if os.path.exists(path):
	if os.path.isfile(path):
	raise Exception("Error: Directory path already exists as a file. {}".format(path))
	else:
	os.makedirs(path)

	def verify_outdir(self):
	if not self.dryrun:
	self.ensure_dir(self.outdir)

	def download(self, url):
	"""
	Download ebook
	Ensures download directory, based on category and book title
	Will download updated versions of ebooks if exists
	checks file size against URL ebook content size
	"""

	dl_dir = url.split('/', 4)[3]
	# http://stackoverflow.com/a/16696317
	filename = url.split('/')[-1]
	file_dir = "".join(filename.split('.')[:-1])
	filepath = os.path.abspath(self.outdir+"/{}/{}/{}".format(dl_dir, file_dir, filename))

	# Test if file already exists
	if os.path.isfile(filepath):
	# Test file size matching
	if self.is_same_file_size(filepath, url):
	if self.verbose:
	print("SKIP: Existing {}".format(filepath))
	return
	elif self.verbose:
	print("NEW: {}".format(url))

	# skip download for dryrun
	if self.dryrun:
	print("[dryrun]: {} --> {}".format(filename, filepath))
	return

	# download/save ebook
	try:
	r = requests.get(url, stream=True) # NOTE the stream=True parameter
	r.raise_for_status() # raise error if status gives error
	self.ensure_dir(os.path.abspath(self.outdir+"/{}/{}".format(dl_dir, file_dir))) # ensure download directory
	with open(filepath, 'wb') as f:
	for chunk in r.iter_content(chunk_size=1024):
	if chunk: # filter out keep-alive new chunks
	f.write(chunk)
	print("SAVED: {}".format(url))
	return filename
	except requests.exceptions.HTTPError as e:
	if (e.response.status_code == 404):
	if self.verbose:
	print(u"WARN: {} file does NOT exists for '{}'".format(url.split('.')[-1], url))
	else:
	print(u"ERROR: <HTTP {}> Cannot handle '{}'".format(e.response.status_code, url))
	except Exception as e:
	print(u"ERROR: Cannot handle '{}' >>> ({})".format(url, e))
	return

	def expand_url(self, url):
	try:
	r = requests.head(url)
	r.raise_for_status()
	rurl = r.url
	if 'location' in r.headers:
	rurl = r.headers['location']

	if rurl.endswith('.do'):
	return
	elif rurl.split('/', 5)[4] != 'free':
	return

	return rurl
	except:
	return

	def pa(self, cat):
	if cat == "ai":
	return "data"
	elif cat == "software-engineering":
	return "programming"
	elif cat == "operations":
	return "webops-perf"
	elif cat == "web-programming":
	return "web-platform"
	else:
	return cat

	def category_from_url(self, url):
	surl = url.split('/', 5) if url else ''
	return surl[3] if surl else ''

	def get_dl_links(self, url):
	try:
	req = requests.get(url)
	req.raise_for_status()
	except Exception as e:
	if self.verbose:
	print(u"WARN: Cannot handle '{}' >>> ({})".format(url, e))
	return list()

	try:
	html = HTML.fromstring(req.text)
	except Exception as e:
	if self.verbose:
	print(u"WARN: Cannot find HTML in '{}' >>> ({})".format(url, e))
	return list()

	post_vals = dict()
	for item in html.xpath('//form[@method="post"]'):
	keys = item.xpath('./input/@name')
	values = item.xpath('./input/@value')
	post_vals = dict(zip(keys, values))

	post_vals.update(
	{
	'first': 'a', 'last': 'b', 'email': 'a@b.com',
	'newsletter': 'nl_webops_perf', 'x-a': 'Get Your Free Ebook',
	'x-redirect': url+"?download=true"
	}
	)

	purl = self.base_url+"/cs/user/create/download_requests"
	try:
	req2 = requests.post(purl, data=post_vals)
	req2.raise_for_status()
	except Exception as e:
	if self.verbose:
	print(u"WARN: Cannot handle '{}' >>> ({})".format(purl, e))
	return list()

	r = re.search("var\s?shortLink\s?\=\s*?[\'\"]([^\'\"]+)[\'\"]", req2.text)
	if not r:
	return list()

	slink = r.group(1).strip()

	dlurl = self.base_url + '/' + self.pa(self.category_from_url(url)) + '/free/files/' + slink
	return [ dlurl+t for t in self.ebook_types ]

	def get_all_free_ebook_urls(self):
	"""Get all free ebook urls"""

	furl = self.base_url+'/free/'
	try:
	req = requests.get(furl)
	req.raise_for_status()
	except Exception as e:
	print(u"ERROR: Cannot handle '{}' >>> ({})".format(furl, e))
	return list()

	html = HTML.fromstring(req.text)
	eurls = html.xpath('//div[@name="FreeEbooks"]//a[@class="item-title"]/@href')

	pool = multiprocessing.Pool(processes=self.processes)

	urls = pool.map(self.expand_url, eurls)
	urls = list(sorted([u for u in set(urls) if u]))

	if self.categories:
	urls = list(set([u for u in urls if self.category_from_url(u) in self.categories]))
	categories = self.categories
	else:
	categories = pool.map(self.category_from_url, urls)
	categories = list(sorted([c for c in set(categories) if c])) # remove duplicates

	print("Downloading available free ebooks from the following categorie(s):\n{}".format(', '.join(categories)))
	print("" 80)
	print("Saving to the following directory:\n{}".format(os.path.abspath(self.outdir)))
	print("" 80)

	dl_urls = pool.map(self.get_dl_links, urls)
	dl_flat_list = [item for sublist in dl_urls for item in sublist]
	pool.map(self.download, dl_flat_list)

	pool.close() # no more tasks
	pool.join() # wrap up current tasks

	if self.dryrun:
	print("\nINFO: {} possible ebooks in '{}' categorie(s).".format(int(len(dl_flat_list)/3), ', '.join(categories)))

	return

	####################################################################################################
	if __name__ == "__main__":
	"""
	Main Program
	Examples:
	1 : oreillyurls_2.py -p 4
	2 : oreillyurls_2.py -c 'data'
	3 : oreillyurls_2.py -c ['programming', 'data'] -o ~/Downloads/ebooks-tmp
	"""

	parser = AP(description='Download all free ebooks (epub, mobi, pdf) from oreilly.com')
	parser.add_argument('-c', '--categories', dest='categories', help='Specify custom "categories". Can be single or list.')
	parser.add_argument('-t', '--types', dest='etypes', help='Specify custom "ebook types". Can be single or list.')
	parser.add_argument('-p', '--processes', dest='processes', help='Specify custom processes count. Used in multithreading.')
	parser.add_argument('-o', '--out', dest='outdir', default='ebooks-oreilly', help='Output directory')
	parser.add_argument('-d', '--dryrun', dest='dryrun', default=False, action='store_true', help='Do a dry run without downloading.')
	parser.add_argument('-v', '--verbose', dest='verbose', default=0, action="count", help='Enable/Disable verbose logging.')
	argcomplete.autocomplete(parser)
	args = parser.parse_args()

	OReillyEbookFreeScraper(**vars(args))