Skip to content

Instantly share code, notes, and snippets.

@neckothy
Created January 18, 2024 09:24
Show Gist options
  • Save neckothy/6e8aa12842a72b14d5c0359d80801d92 to your computer and use it in GitHub Desktop.
Save neckothy/6e8aa12842a72b14d5c0359d80801d92 to your computer and use it in GitHub Desktop.
get volumes and chapters included in a manga archive
# really low effort, assumes internal page names follow something close to the Madokami naming scheme
# originally started to be able to auto update tracking sites before I realized I don't care enough
# > python get_vapters.py path/to/archive.cbz [path/to/additional/archives.cbz]
# path/to/archive.cbz: v01, c001-005
import os
import re
import sys
import zipfile
def get_zip_file(file_path):
with zipfile.ZipFile(file_path) as z:
return z
def is_zip(file_path):
zip_exts = ("zip", "cbz")
ext = file_path.rsplit(".", maxsplit=1)[1]
return ext in zip_exts
def is_image(file_name):
img_exts = ("png", "jpg", "webp", "avif")
ext = file_name.rsplit(".", maxsplit=1)[1]
return ext in img_exts
def page_names_from_zip(zip_file):
files = zip_file.filelist.copy()
page_names = [x.filename for x in files if not x.is_dir() and is_image(x.filename)]
return page_names
def parse_page_names(page_names):
volumes, chapters = [], []
pattern = r"([\w\d ]+) - c(\d{3,4}) \(v(\d{2,3})\).+"
for name in page_names:
match = re.search(pattern, name)
if match:
title, chapter, volume = match.group(1, 2, 3)
if volume not in volumes:
volumes.append(int(volume))
if chapter not in chapters:
chapters.append(int(chapter))
return (volumes, chapters)
def get_ranges(numbers):
numbers = sorted(set(numbers))
gaps = [[s, e] for s, e in zip(numbers, numbers[1:]) if s + 1 < e]
edges = iter(numbers[:1] + sum(gaps, []) + numbers[-1:])
return list(zip(edges, edges))
def format_ranges(ranges, range_type):
formatted = ""
pad = 2 if range_type == "volume" else 3
for r in ranges:
start = str(r[0]).zfill(pad)
end = str(r[1]).zfill(pad)
if start == end:
formatted += f"{start}, "
else:
formatted += f"{start}-{end}, "
formatted = re.sub(", $", "", formatted)
return formatted
if __name__ == "__main__":
if len(sys.argv) > 1:
for f in sys.argv[1:]:
if os.path.isfile(f) and is_zip(f):
zip_file = get_zip_file(f)
page_names = page_names_from_zip(zip_file)
volumes, chapters = parse_page_names(page_names)
volume_range = get_ranges(volumes)
chapter_range = get_ranges(chapters)
volume_str = format_ranges(volume_range, "volume")
chapter_str = format_ranges(chapter_range, "chapter")
print(f"{zip_file.filename}: v{volume_str}, c{chapter_str}")
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment