Skip to content

Instantly share code, notes, and snippets.

@Krazybug
Created June 25, 2018 22:08
Show Gist options
  • Star 1 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save Krazybug/906033c42042909e67706a61d998f98f to your computer and use it in GitHub Desktop.
Save Krazybug/906033c42042909e67706a61d998f98f to your computer and use it in GitHub Desktop.
Calibre Downloader
import requests
import json
import os
import time
# - traiter les formats en stockant le hash du fichier
# - capturer les exceptions
# - mettre un cli
# - autoriser 3 modes: update metadata, update file et append filename
# - une query pou la recheche exemple http://localhost:8080/ajax/search?sort=id&sort_order=desc
# - bufferiser les fichiers
# - indice start/stop
# - stocker un index
# - id = timestamp + id process
# - cover et json avec le meme nom
# - mutualiser le code
# - json None
# - moteur de recherche
# - genartion de page html cliquable
# - mettre une taille limite
# - acces avec un mot de passe
# - mode debug
def get_file(url, path, id, format):
print(url)
r1=requests.get(url)
# r1=requests.get(url, stream=True)
try:
f_name=path+r1.headers['Content-Disposition'].split('filename=')[1].strip('"')
print(f_name)
except:
f_name=path+id+"."+format
os.makedirs(os.path.dirname(f_name), exist_ok=True)
with open(f_name, 'wb') as fd:
fd.write(r1.content)
# time.sleep(1)
# with open(f_name, 'wb') as fd:
# for chunk in r.iter_content(chunk_size=4096):
# fd.write(chunk)
def get_cover(url, path):
r1=requests.get(url)
# r1=requests.get(url, stream=True)
f_name=path+"cover.jpg"
print(f_name)
os.makedirs(os.path.dirname(f_name), exist_ok=True)
with open(f_name, 'wb') as fd:
fd.write(r1.content)
# with open(f_name, 'wb') as fd:
# for chunk in r.iter_content(chunk_size=4096):
# fd.write(chunk)
max_size=1000*1024*1024
offset=0
# offset=800
num=25
# num=3
# server='http://localhost:8080/'
# server='http://209.44.124.40:8080'
# server='http://46.244.213.139:8080'
# url=server+'ajax/search?num=1'
server="http://188.96.212.209:8080"
#url=server+'/ajax/search/FSK18?num=0'
url=server+'/ajax/search/Zeitschrift_XXX?num=0'
print(url)
r=requests.get(url)
total_num=int(r.json()["total_num"])
# total_num=7
my_formats=['azw3', 'epub', 'pdf', 'mobi', 'doc', 'zip', 'txt', 'chm']
# my_formats=['azw3', 'epub', 'doc', 'zip']
range=offset+1
while offset < total_num:
print("offset=", str(offset))
url=server+'/ajax/search/Zeitschrift_XXX?num='+str(num)+'&offset='+str(offset)
print(url)
r=requests.get(url)
print("from: ", str(offset), " to: ", str(offset+int(r.json()['num'])))
book_ids=r.json()["book_ids"]
books_s=",".join(str(i) for i in r.json()['book_ids'])
url=server+'/ajax/books/Zeitschrift_XXX?ids='+books_s
r=requests.get(url)
print(url)
print(len(r.json()))
# mettre le rang avec la bovle te min de la requete et du stop total_num
for id in r.json().keys():
print ('-> range={}/{}'.format(str(range),str(total_num)))
book={}
book['formats']=list(set(r.json()[id]['formats']) & set(my_formats))
book['title']=r.json()[id]['title']
print ('--> {}: {}'.format(id, book['title']))
formats=book['formats']
for f in formats:
if not 'size' in r.json()[id]['format_metadata'][f] or max_size < int(r.json()[id]['format_metadata'][f]['size']):
book['formats'].remove(f)
print ("format {} ignored for {}:'{}' too large)".format(f, id, book['title']))
if not len(book['formats']):
print ("'{}' ignored: no more format available in {})".format(book['title'], (r.json()[id]['formats'])))
else:
for f in book['formats']:
if f in r.json()[id]['main_format']:
url_path=r.json()[id]['main_format'][f]
else:
url_path=r.json()[id]['other_formats'][f]
print ("--->", url_path)
url=server+url_path
f_path='import/'+id+'/'
get_file(url, f_path, id, f)
url_path=r.json()[id]['cover']
url=server+url_path
f_path='import/'+id+'/'
print ("---->", url_path)
get_cover(url, f_path)
book['id']=id
book['source']=server+'/calibre/ajax/book/'+id
book['authors']=r.json()[id]['authors']
book['uuid']=r.json()[id]['uuid']
book['identifiers']=r.json()[id]['identifiers']
book['pubdate']=r.json()[id]['pubdate']
book['publisher']=r.json()[id]['publisher']
book['languages']=r.json()[id]['languages']
book['comments']=r.json()[id]['comments']
book['series']=r.json()[id]['series']
book['tags']=r.json()[id]['tags']
print(book)
filename=f_path+'metadata.json'
os.makedirs(os.path.dirname(filename), exist_ok=True)
with open(filename, 'w') as fd:
json.dump(book, fd)
range=range+1
offset=offset+num
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment