Pullusb/merge_cbz_archives.py

## merge_cbz_archives.py
## ****
# Merge cbz archives v1.3
# Group 'cbz' archive content into bigger cbz archive chunks (made to group chapters on e-reader)
# put in the same location of cbz and run
# below set: archive name, chucks size, folder containing cbz to groups (default same as script)
## ****

import re
import zipfile
from pathlib import Path

def right_num(s, before_point=True) -> str:
    '''return rightest number in string
    before_point: avoid taking number after a decimal
    '''
    if before_point:
        res = re.search(r'(\d+)(?!.*\d)', s.split('.')[0])
    else:
        res = re.search(r'(\d+)(?!.*\d)', s)
    if not res:
        return
    return res.group(1)

def longest_num(s) -> str:
    '''return longest number (in term of character) in string'''
    res = re.findall(r'\d+', s)
    if not res:
        return
    res.sort(key=lambda x: len(str(x)))
    return res[-1]

### Define rules ------

## Define folder containing cbz archives
loc = Path(__file__).parent # same as script
# loc = Path(os.getcwd()) # working directory
# loc = Path(r"path/to/folder") # set folder manually

## name of the output archive(s)
name = input('Name of archive(s) (if nothing specified named "comic"): ')
if not name:
    name = 'comic'

## split all cbz in sublists to create multiple fat cbz according to given file limit
filelimit = input('Chunck size to merge (if nothing specified default to 21): ')
if not filelimit:
    filelimit = '21'

## fallback
while not filelimit.isnumeric():
    filelimit = input('Chunck size to merge should be a number: ')
    if not filelimit:
        filelimit = '21' # important to have it as str

filelimit = int(filelimit)

## method to find numbering in cbz names
num_fn = longest_num # by longest number in string (fail if there is an unrelated longest number in cbz name)
# num_fn = right_num # rightest number in string (fail if there is)


### Script ---------

cbzs = []
for f in loc.iterdir():
    if f.is_dir():
        continue
    if f.suffix == '.cbz':
        cbzs.append(f)

## sorting
# cbzs.sort(key=lambda x: x.name) # sort alphabetical (fail with unconsistent padding)
cbzs.sort(key=lambda x: int(num_fn(x.stem))) # sort by evaluated number

print('Adding:')
for f in cbzs:
    print(f'- {f.name}')

tmp_dir = loc / 'tmpdir'
tmp_dir.mkdir(exist_ok=True)

i = 1

cbz_multilist = [cbzs[i : i+filelimit] for i in range(0, len(cbzs), filelimit)]

print(f'\nArchive Splits ({len(cbz_multilist)})')
for c in cbz_multilist:
    print(f'+ {c[0].name} -> {c[-1].name} ({len(c)})')

output = loc.parent / 'output'
output.mkdir(exist_ok=True)

for cbs in cbz_multilist:
    dest = output / f'{name}_{num_fn(cbs[0].stem)}-{num_fn(cbs[-1].stem)}.cbz'
    i = 1 # reset numeration (comment to keep continuous numeration over multiple cbz)

    print(f'--- archive {dest.name}:')
    with zipfile.ZipFile(dest, 'w', zipfile.ZIP_STORED) as zipf: # no compression (same as nothing specified, else use zipfile.ZIP_DEFLATED)
        # unzip one by one and feed onthe fly to the new big cbz
        for cb in cbs:
            # unzip
            with zipfile.ZipFile(cb, 'r') as zip_ref:
                zip_ref.extractall(tmp_dir)

            # zip out
            for f in tmp_dir.iterdir():
                arcname = f"{i:04d}_{cb.stem.strip(' _').replace(' ', '-')}_{f.name}"
                print(arcname, '>', f)
                zipf.write(f, arcname)
                i += 1

            # delete temp files
            for f in reversed([f for f in tmp_dir.iterdir()]):
                f.unlink()
        print('--file at:', dest)
        print()

# remove tmp dir
tmp_dir.rmdir()
print('Done')
input('Press enter to finish.')
	## ****
	# Merge cbz archives v1.3
	# Group 'cbz' archive content into bigger cbz archive chunks (made to group chapters on e-reader)
	# put in the same location of cbz and run
	# below set: archive name, chucks size, folder containing cbz to groups (default same as script)
	## ****

	import re
	import zipfile
	from pathlib import Path

	def right_num(s, before_point=True) -> str:
	'''return rightest number in string
	before_point: avoid taking number after a decimal
	'''
	if before_point:
	res = re.search(r'(\d+)(?!.*\d)', s.split('.')[0])
	else:
	res = re.search(r'(\d+)(?!.*\d)', s)
	if not res:
	return
	return res.group(1)

	def longest_num(s) -> str:
	'''return longest number (in term of character) in string'''
	res = re.findall(r'\d+', s)
	if not res:
	return
	res.sort(key=lambda x: len(str(x)))
	return res[-1]

	### Define rules ------

	## Define folder containing cbz archives
	loc = Path(__file__).parent # same as script
	# loc = Path(os.getcwd()) # working directory
	# loc = Path(r"path/to/folder") # set folder manually

	## name of the output archive(s)
	name = input('Name of archive(s) (if nothing specified named "comic"): ')
	if not name:
	name = 'comic'

	## split all cbz in sublists to create multiple fat cbz according to given file limit
	filelimit = input('Chunck size to merge (if nothing specified default to 21): ')
	if not filelimit:
	filelimit = '21'

	## fallback
	while not filelimit.isnumeric():
	filelimit = input('Chunck size to merge should be a number: ')
	if not filelimit:
	filelimit = '21' # important to have it as str

	filelimit = int(filelimit)

	## method to find numbering in cbz names
	num_fn = longest_num # by longest number in string (fail if there is an unrelated longest number in cbz name)
	# num_fn = right_num # rightest number in string (fail if there is)


	### Script ---------

	cbzs = []
	for f in loc.iterdir():
	if f.is_dir():
	continue
	if f.suffix == '.cbz':
	cbzs.append(f)

	## sorting
	# cbzs.sort(key=lambda x: x.name) # sort alphabetical (fail with unconsistent padding)
	cbzs.sort(key=lambda x: int(num_fn(x.stem))) # sort by evaluated number

	print('Adding:')
	for f in cbzs:
	print(f'- {f.name}')

	tmp_dir = loc / 'tmpdir'
	tmp_dir.mkdir(exist_ok=True)

	i = 1

	cbz_multilist = [cbzs[i : i+filelimit] for i in range(0, len(cbzs), filelimit)]

	print(f'\nArchive Splits ({len(cbz_multilist)})')
	for c in cbz_multilist:
	print(f'+ {c[0].name} -> {c[-1].name} ({len(c)})')

	output = loc.parent / 'output'
	output.mkdir(exist_ok=True)

	for cbs in cbz_multilist:
	dest = output / f'{name}_{num_fn(cbs[0].stem)}-{num_fn(cbs[-1].stem)}.cbz'
	i = 1 # reset numeration (comment to keep continuous numeration over multiple cbz)

	print(f'--- archive {dest.name}:')
	with zipfile.ZipFile(dest, 'w', zipfile.ZIP_STORED) as zipf: # no compression (same as nothing specified, else use zipfile.ZIP_DEFLATED)
	# unzip one by one and feed onthe fly to the new big cbz
	for cb in cbs:
	# unzip
	with zipfile.ZipFile(cb, 'r') as zip_ref:
	zip_ref.extractall(tmp_dir)

	# zip out
	for f in tmp_dir.iterdir():
	arcname = f"{i:04d}_{cb.stem.strip(' _').replace(' ', '-')}_{f.name}"
	print(arcname, '>', f)
	zipf.write(f, arcname)
	i += 1

	# delete temp files
	for f in reversed([f for f in tmp_dir.iterdir()]):
	f.unlink()
	print('--file at:', dest)
	print()

	# remove tmp dir
	tmp_dir.rmdir()
	print('Done')
	input('Press enter to finish.')