Skip to content

Instantly share code, notes, and snippets.

@oleeander
Last active December 23, 2015 05:19
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save oleeander/6586392 to your computer and use it in GitHub Desktop.
Save oleeander/6586392 to your computer and use it in GitHub Desktop.
STealAllBooks from it-ebooks.info
#!/usr/bin/env python3
import argparse
import requests
import atexit
import os
import signal
import sys
from pyquery import PyQuery
def latest_book_id():
html = PyQuery(url='http://it-ebooks.info/')
return int(
html('table[width="100%"] tr td a').eq(0).attr.href.split('/')[2]
)
def download_ebook(directory_prefix, book_id):
book_url = 'http://it-ebooks.info/book/{id}/'.format(
id=book_id
)
book_page = PyQuery(url=book_url)
book_pdf = book_page('table > tr').eq(15)('td').eq(1)('a').attr.href
if book_pdf == None:
return 'Unable to download #{book_id}. 404? {book_url}'.format(
book_id=book_id,
book_url=book_url
)
book_pdf = book_pdf.replace('/go.php', 'http://it-ebooks.info/go.php')
book_head = requests.head(book_pdf, headers={'referer': book_url})
if 'content-disposition' in book_head.headers:
filename = book_head.headers['content-disposition'].split('filename=', 1)[1]
if filename[0] == "'" or filename[0] == '"':
filename = filename[1:-1]
filename = os.path.basename(filename)
else:
filename = 'it-ebooks_info_{book_id}_{name}.pdf'.format(
book_id=book_id,
name=os.path.basename(book_pdf)
)
if os.path.exists(directory_prefix) and not os.path.isdir(directory_prefix):
raise NotADirectoryError('"{directory_prefix}" isn\'t a directory.'.format(
directory_prefix=directory_prefix
))
elif not os.path.exists(directory_prefix):
os.makedirs(directory_prefix)
with open(directory_prefix + '/' + filename, 'wb') as pdf:
book_dl = requests.get(book_pdf, headers={'referer': book_url}, stream=True)
for block in book_dl.iter_content(1024):
if not block:
pdf.close()
book_dl.close()
break
pdf.write(block)
return (book_id, filename)
def save_last_id():
with open('STAB_last_id', 'w') as last_id:
last_id.write(str(book_id))
last_id.close()
if __name__ == '__main__':
parser = argparse.ArgumentParser(description='STealAllBooks from it-ebooks.info')
parser.add_argument('start_id', nargs='?', metavar='start_id', type=int,
default=1,
help='ID of first ebook')
parser.add_argument('end_id', nargs='?', metavar='end_id', type=int,
default=latest_book_id(),
help='ID of last ebook')
parser.add_argument('--directory_prefix', '-P', metavar='directory', type=str,
default='.',
help='The directory where all ebooks will be saved to.')
args = parser.parse_args()
args.directory_prefix = os.path.normpath(args.directory_prefix)
if args.start_id == 1:
try:
with open('STAB_last_id') as last_id:
args.start_id = int(last_id.read())
last_id.close()
except:
pass
atexit.register(save_last_id)
signal.signal(signal.SIGTERM, lambda signum, frame: sys.exit(1))
try:
for book_id in range(args.start_id, args.end_id):
print('Downloading ebook #{id}...'.format(id=book_id))
print(download_ebook(args.directory_prefix, book_id))
except KeyboardInterrupt:
sys.exit(1)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment