Created
March 27, 2023 13:32
-
-
Save rgaudin/dff138f41cf7bb7cf5e75e7a0379f2ef to your computer and use it in GitHub Desktop.
Print basic information about a ZIM (via python-libzim)
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/env python3 | |
import argparse | |
import pathlib | |
import sys | |
import traceback | |
try: | |
from libzim.reader import Archive | |
except ImportError as exc: | |
print("ziminfo requires python-libzim") | |
print("Install it with `pip install libzim`") | |
print(str(exc)) | |
sys.exit(2) | |
__version__ = "1.0" | |
debug = False | |
def has_entry_matching(zim: Archive, path: str) -> bool: | |
"""whether ZIM contains entry with this path in its last entries""" | |
for index in range(zim.all_entry_count - 1, zim.all_entry_count - 10, -1): | |
if not index: | |
break | |
if zim._get_entry_by_id(index).path == path: | |
return True | |
return False | |
def ziminfo( | |
src_path: pathlib.Path, | |
debug: bool, | |
) -> int: | |
print(f"ZIM Info for {src_path}") | |
src_path = src_path.expanduser().resolve() | |
try: | |
zim = Archive(src_path) | |
except Exception as exc: | |
raise IOError(f"Source ZIM ({src_path}) doesnt exists: {exc}") | |
print("Properties") | |
print(f" - UUID: {zim.uuid}") | |
if zim.has_main_entry: | |
main_entry_str = zim.main_entry.path | |
if zim.main_entry.is_redirect: | |
main_entry_str += f" ({zim.main_entry.get_redirect_entry().path})" | |
else: | |
main_entry_str = "None" | |
print(f" - Main Entry: {main_entry_str}") | |
print(f" - New NS scheme: {zim.has_new_namespace_scheme}") | |
print(f" - Multipart: {zim.is_multipart}") | |
print(f" - Has Full-Text Index: {zim.has_fulltext_index}") | |
listings = [] | |
if has_entry_matching(zim, "listing/titleOrdered/v0"): | |
listings.append("v0") | |
if has_entry_matching(zim, "listing/titleOrdered/v1"): | |
listings.append("v1") | |
print(f" - Has Title Index: {zim.has_title_index} {', '.join(listings)}") | |
print(f" - Checksum: {zim.checksum if zim.has_checksum else 'None'}") | |
print(f" - Entry Count: {zim.entry_count}") | |
print(f" - All Entry Count: {zim.all_entry_count}") | |
print(f" - Article Count: {zim.article_count}") | |
print(f" - Media Count: {zim.media_count}") | |
print( | |
" - Illustration sizes: " | |
f"{zim.get_illustration_sizes() if zim.has_illustration else 'None'}" | |
) | |
# v1 and v0 title listing | |
print("Metadata:") | |
for name in zim.metadata_keys: | |
item = zim.get_metadata_item(name) | |
if item.mimetype.startswith("text/plain"): | |
preview = bytes(item.content).decode("UTF-8") | |
else: | |
preview = f"{item.mimetype} binary ({item.size} bytes)" | |
print(f" - {name}: {preview}") | |
missing_mandatory_metadata = [ | |
name | |
for name in [ | |
"Title", | |
"Description", | |
"Creator", | |
"Publisher", | |
"Date", | |
"Name", | |
"Language", | |
] | |
if name not in zim.metadata_keys | |
] | |
if missing_mandatory_metadata: | |
print(f"Missing mandatory metadata: {', '.join(missing_mandatory_metadata)}.") | |
return 0 | |
def entrypoint(): | |
epilog = """ | |
""" | |
parser = argparse.ArgumentParser( | |
prog="ziminfo.py", | |
description="Basic information from a ZIM file", | |
epilog=epilog, | |
formatter_class=argparse.RawTextHelpFormatter, | |
) | |
parser.add_argument("src_path") | |
parser.add_argument( | |
"--debug", help="Enable verbose output", action="store_true", default=False | |
) | |
parser.add_argument( | |
"-v", "--version", action="version", version=f"%(prog)s {__version__}" | |
) | |
args = dict(parser.parse_args()._get_kwargs()) | |
args["src_path"] = pathlib.Path(args["src_path"]) | |
try: | |
sys.exit(ziminfo(**args)) | |
except Exception as exc: | |
print(f"ERROR. An {type(exc).__name__} error occurred: {exc}") | |
if args.debug: | |
print(traceback.format_exception_only(exc, sys.last_value)) | |
raise SystemExit(1) | |
if __name__ == "__main__": | |
entrypoint() |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment