Skip to content

Instantly share code, notes, and snippets.

@rgaudin
Created August 21, 2023 08:19
Show Gist options
  • Save rgaudin/2d63b7542377ec33a33a88ad47f0d2d4 to your computer and use it in GitHub Desktop.
Save rgaudin/2d63b7542377ec33a33a88ad47f0d2d4 to your computer and use it in GitHub Desktop.
List content of a ZIM file
#!/Users/reg/src/wikihow/.venv/bin/python3
# !/usr/bin/env python3
import argparse
import logging
import sys
import humanfriendly
from zimscraperlib.zim.archive import Archive
from zimscraperlib.logging import getLogger as lib_getLogger
NAME = "zimls"
VERSION = "0.1"
logger = lib_getLogger(
NAME,
level=logging.DEBUG,
log_format="[%(asctime)s] %(levelname)s:%(message)s",
)
class ZimLs:
def __init__(self, fpath, **kwargs):
self.fpath = fpath
def run(self):
zim = Archive(self.fpath)
for idx in range(zim.all_entry_count):
entry = zim._get_entry_by_id(idx)
print(humanfriendly.format_size(entry.get_item().size), entry.path)
def main():
parser = argparse.ArgumentParser(
prog=NAME,
description="Scraper to create ZIM files wikihow articles",
)
parser.add_argument(
"--debug", help="Enable verbose output", action="store_true", default=False
)
parser.add_argument(
"--version",
help=f"Display {NAME} version and exit",
action="version",
version=VERSION,
)
parser.add_argument("fpath", help="ZIM file to work off")
args = parser.parse_args()
try:
tool = ZimLs(**dict(args._get_kwargs()))
sys.exit(tool.run())
except Exception as exc:
logger.error(f"FAILED. An error occurred: {exc}")
if args.debug:
logger.exception(exc)
raise SystemExit(1)
if __name__ == "__main__":
main()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment