Skip to content

Instantly share code, notes, and snippets.

@Vesihiisi
Created May 31, 2021 10:51
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save Vesihiisi/33c94345dde142412955531aef3fbfcf to your computer and use it in GitHub Desktop.
Save Vesihiisi/33c94345dde142412955531aef3fbfcf to your computer and use it in GitHub Desktop.
#!/usr/bin/python
# -*- coding: utf-8 -*-
"""One trick script to
collate sets of tiff files across multiple
directories into smaller DJVUs.
Used for upload of manuscripts
from Musikverket 2021-05.
"""
import os
from shutil import which # used for djvu conversion
from subprocess import run # used for djvu conversion
from tqdm import tqdm
def list_subdirs(path):
return [f.path for f in os.scandir(path) if f.is_dir()]
def can_djvu():
"""
Check if DjVu files can be created.
Check whether DjVuLibre is installed,
on PATH and marked as executable.
"""
return which('djvm') is not None and which('c44') is not None
def create_work_djvu(path):
parent_path = os.path.basename(os.path.dirname(path))
tmp_djvu = os.path.join(path, "tmp.djvu")
book_djvu = parent_path + "---" + os.path.basename(os.path.normpath(path)) + ".djvu"
files_to_process = sorted([x for x in os.listdir(path) if x.endswith(".tif")])
print("====== Starting {} ======".format(book_djvu))
for i, page in tqdm(enumerate(files_to_process, 1), total=len(files_to_process)):
tmp_jpg = os.path.join(path, "{}.jpg".format(page))
run(['convert', os.path.join(path, page), tmp_jpg], check=True)
run(['c44', '-crcbfull', tmp_jpg, tmp_djvu], check=True)
if i == 1:
run(['djvm', '-c', book_djvu, tmp_djvu], check=True)
else:
run(['djvm', '-i', book_djvu, tmp_djvu], check=True)
os.remove(tmp_jpg)
os.remove(tmp_djvu)
print("====== Completed {} ======".format(book_djvu))
def main():
exclude = ["system volume information", "$", "mediaexplorer",
"recycle", "trash", "!"] # irrelevant directories
for directory in sorted(list_subdirs('.')):
if any(substring in directory.lower() for substring in exclude):
continue
for subdirectory in list_subdirs(directory):
create_work_djvu(subdirectory)
if __name__ == "__main__":
if not can_djvu():
raise Exception('Djvu utils djvm and c44 not found.')
main()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment