Skip to content

Instantly share code, notes, and snippets.

@gokaybiz
Last active October 7, 2022 23:14
Show Gist options
  • Save gokaybiz/0d1b1caaeb0d036e5abb05ff79225d47 to your computer and use it in GitHub Desktop.
Save gokaybiz/0d1b1caaeb0d036e5abb05ff79225d47 to your computer and use it in GitHub Desktop.
pubhtml pdf book downloader
import requests
import re
import json
from os.path import realpath, dirname, exists
from os import makedirs
from shutil import rmtree
from PIL import Image
BASE_PATH = dirname(realpath(__file__))
def download(path, url):
try:
client = requests.get(url, allow_redirects = True)
with open(path, 'wb+') as image:
image.write(client.content)
except Exception:
download(path, url)
def convert(inputSequence, outputDir):
images = [(Image.open(input)).convert('RGB') for input in inputSequence]
images[0].save(f'{outputDir}.pdf', save_all=True, append_images=images[1:])
def getBookList(url):
client = requests.get(url)
match = re.findall(r'bookData:\s(.*?)[\s]+}', client.text)[0]
return json.loads(match)
bookList = getBookList('https://pubhtml5.com/bookcase/dofw')
for book in bookList:
title = book['title']
totalPageCount = int(book['pages'])
pageSeq = range(1, totalPageCount+1)
baseUrl = book['url']
pages = [f'{baseUrl}files/large/{page}.jpg' for page in pageSeq]
tmpSavePath = f'{BASE_PATH}/tmp/{title}'
not exists(tmpSavePath) and makedirs(tmpSavePath)
for pageNum, pageUrl in enumerate(pages):
download(f'{tmpSavePath}/{pageNum+1}.jpg', pageUrl)
convert([f'{tmpSavePath}/{page}.jpg' for page in pageSeq], f'{BASE_PATH}/{title}')
rmtree(tmpSavePath)
print(f'{title} ({totalPageCount} sf.) basariyla indirildi!')
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment