Skip to content

Instantly share code, notes, and snippets.

@Pullusb
Last active August 20, 2022 17:50
Show Gist options
  • Save Pullusb/a572fc54eff42efff59148fd68158f0e to your computer and use it in GitHub Desktop.
Save Pullusb/a572fc54eff42efff59148fd68158f0e to your computer and use it in GitHub Desktop.
## ****
# Merge cbz archives v1.3
# Group 'cbz' archive content into bigger cbz archive chunks (made to group chapters on e-reader)
# put in the same location of cbz and run
# below set: archive name, chucks size, folder containing cbz to groups (default same as script)
## ****
import re
import zipfile
from pathlib import Path
def right_num(s, before_point=True) -> str:
'''return rightest number in string
before_point: avoid taking number after a decimal
'''
if before_point:
res = re.search(r'(\d+)(?!.*\d)', s.split('.')[0])
else:
res = re.search(r'(\d+)(?!.*\d)', s)
if not res:
return
return res.group(1)
def longest_num(s) -> str:
'''return longest number (in term of character) in string'''
res = re.findall(r'\d+', s)
if not res:
return
res.sort(key=lambda x: len(str(x)))
return res[-1]
### Define rules ------
## Define folder containing cbz archives
loc = Path(__file__).parent # same as script
# loc = Path(os.getcwd()) # working directory
# loc = Path(r"path/to/folder") # set folder manually
## name of the output archive(s)
name = input('Name of archive(s) (if nothing specified named "comic"): ')
if not name:
name = 'comic'
## split all cbz in sublists to create multiple fat cbz according to given file limit
filelimit = input('Chunck size to merge (if nothing specified default to 21): ')
if not filelimit:
filelimit = '21'
## fallback
while not filelimit.isnumeric():
filelimit = input('Chunck size to merge should be a number: ')
if not filelimit:
filelimit = '21' # important to have it as str
filelimit = int(filelimit)
## method to find numbering in cbz names
num_fn = longest_num # by longest number in string (fail if there is an unrelated longest number in cbz name)
# num_fn = right_num # rightest number in string (fail if there is)
### Script ---------
cbzs = []
for f in loc.iterdir():
if f.is_dir():
continue
if f.suffix == '.cbz':
cbzs.append(f)
## sorting
# cbzs.sort(key=lambda x: x.name) # sort alphabetical (fail with unconsistent padding)
cbzs.sort(key=lambda x: int(num_fn(x.stem))) # sort by evaluated number
print('Adding:')
for f in cbzs:
print(f'- {f.name}')
tmp_dir = loc / 'tmpdir'
tmp_dir.mkdir(exist_ok=True)
i = 1
cbz_multilist = [cbzs[i : i+filelimit] for i in range(0, len(cbzs), filelimit)]
print(f'\nArchive Splits ({len(cbz_multilist)})')
for c in cbz_multilist:
print(f'+ {c[0].name} -> {c[-1].name} ({len(c)})')
output = loc.parent / 'output'
output.mkdir(exist_ok=True)
for cbs in cbz_multilist:
dest = output / f'{name}_{num_fn(cbs[0].stem)}-{num_fn(cbs[-1].stem)}.cbz'
i = 1 # reset numeration (comment to keep continuous numeration over multiple cbz)
print(f'--- archive {dest.name}:')
with zipfile.ZipFile(dest, 'w', zipfile.ZIP_STORED) as zipf: # no compression (same as nothing specified, else use zipfile.ZIP_DEFLATED)
# unzip one by one and feed onthe fly to the new big cbz
for cb in cbs:
# unzip
with zipfile.ZipFile(cb, 'r') as zip_ref:
zip_ref.extractall(tmp_dir)
# zip out
for f in tmp_dir.iterdir():
arcname = f"{i:04d}_{cb.stem.strip(' _').replace(' ', '-')}_{f.name}"
print(arcname, '>', f)
zipf.write(f, arcname)
i += 1
# delete temp files
for f in reversed([f for f in tmp_dir.iterdir()]):
f.unlink()
print('--file at:', dest)
print()
# remove tmp dir
tmp_dir.rmdir()
print('Done')
input('Press enter to finish.')
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment