Skip to content

Instantly share code, notes, and snippets.

@rgaudin
Created March 27, 2023 13:32
Show Gist options
  • Save rgaudin/dff138f41cf7bb7cf5e75e7a0379f2ef to your computer and use it in GitHub Desktop.
Save rgaudin/dff138f41cf7bb7cf5e75e7a0379f2ef to your computer and use it in GitHub Desktop.
Print basic information about a ZIM (via python-libzim)
#!/usr/bin/env python3
import argparse
import pathlib
import sys
import traceback
try:
from libzim.reader import Archive
except ImportError as exc:
print("ziminfo requires python-libzim")
print("Install it with `pip install libzim`")
print(str(exc))
sys.exit(2)
__version__ = "1.0"
debug = False
def has_entry_matching(zim: Archive, path: str) -> bool:
"""whether ZIM contains entry with this path in its last entries"""
for index in range(zim.all_entry_count - 1, zim.all_entry_count - 10, -1):
if not index:
break
if zim._get_entry_by_id(index).path == path:
return True
return False
def ziminfo(
src_path: pathlib.Path,
debug: bool,
) -> int:
print(f"ZIM Info for {src_path}")
src_path = src_path.expanduser().resolve()
try:
zim = Archive(src_path)
except Exception as exc:
raise IOError(f"Source ZIM ({src_path}) doesnt exists: {exc}")
print("Properties")
print(f" - UUID: {zim.uuid}")
if zim.has_main_entry:
main_entry_str = zim.main_entry.path
if zim.main_entry.is_redirect:
main_entry_str += f" ({zim.main_entry.get_redirect_entry().path})"
else:
main_entry_str = "None"
print(f" - Main Entry: {main_entry_str}")
print(f" - New NS scheme: {zim.has_new_namespace_scheme}")
print(f" - Multipart: {zim.is_multipart}")
print(f" - Has Full-Text Index: {zim.has_fulltext_index}")
listings = []
if has_entry_matching(zim, "listing/titleOrdered/v0"):
listings.append("v0")
if has_entry_matching(zim, "listing/titleOrdered/v1"):
listings.append("v1")
print(f" - Has Title Index: {zim.has_title_index} {', '.join(listings)}")
print(f" - Checksum: {zim.checksum if zim.has_checksum else 'None'}")
print(f" - Entry Count: {zim.entry_count}")
print(f" - All Entry Count: {zim.all_entry_count}")
print(f" - Article Count: {zim.article_count}")
print(f" - Media Count: {zim.media_count}")
print(
" - Illustration sizes: "
f"{zim.get_illustration_sizes() if zim.has_illustration else 'None'}"
)
# v1 and v0 title listing
print("Metadata:")
for name in zim.metadata_keys:
item = zim.get_metadata_item(name)
if item.mimetype.startswith("text/plain"):
preview = bytes(item.content).decode("UTF-8")
else:
preview = f"{item.mimetype} binary ({item.size} bytes)"
print(f" - {name}: {preview}")
missing_mandatory_metadata = [
name
for name in [
"Title",
"Description",
"Creator",
"Publisher",
"Date",
"Name",
"Language",
]
if name not in zim.metadata_keys
]
if missing_mandatory_metadata:
print(f"Missing mandatory metadata: {', '.join(missing_mandatory_metadata)}.")
return 0
def entrypoint():
epilog = """
"""
parser = argparse.ArgumentParser(
prog="ziminfo.py",
description="Basic information from a ZIM file",
epilog=epilog,
formatter_class=argparse.RawTextHelpFormatter,
)
parser.add_argument("src_path")
parser.add_argument(
"--debug", help="Enable verbose output", action="store_true", default=False
)
parser.add_argument(
"-v", "--version", action="version", version=f"%(prog)s {__version__}"
)
args = dict(parser.parse_args()._get_kwargs())
args["src_path"] = pathlib.Path(args["src_path"])
try:
sys.exit(ziminfo(**args))
except Exception as exc:
print(f"ERROR. An {type(exc).__name__} error occurred: {exc}")
if args.debug:
print(traceback.format_exception_only(exc, sys.last_value))
raise SystemExit(1)
if __name__ == "__main__":
entrypoint()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment