Skip to content

Instantly share code, notes, and snippets.

Last active February 4, 2020 05:10
Show Gist options
  • Save dahlia/94001716bc5391148e19c0710045c37e to your computer and use it in GitHub Desktop.
Save dahlia/94001716bc5391148e19c0710045c37e to your computer and use it in GitHub Desktop.
Count keys from multiple Bencodex data files
#!/usr/bin/env python3
"""Count keys from multiple Bencodex data files."""
import collections
import pathlib
import sys
from typing import Counter, Union
from bencodex import BValue, load
def is_vector(value: BValue) -> bool:
"""Checks if the given Bencodex value can have other Bencodex values."""
if isinstance(value, (str, bytes)):
return False
return isinstance(
def count_keys(content: BValue) -> Counter[Union[str, bytes]]:
if not is_vector(content):
return collections.Counter()
if isinstance(content,
summary = collections.Counter(content.keys())
for key, val in content.items():
if is_vector(val): # short cirtcuit for optimization
return summary
elif isinstance(content,
summary = collections.Counter()
for el in content:
if is_vector(el): # short cirtcuit for optimization
return summary
return collections.Counter()
def count_keys_in_file(path: pathlib.Path) -> Counter[Union[str, bytes]]:
if path.is_dir():
summary = collections.Counter()
for f in path.iterdir():
return summary
with'rb') as f:
content = load(f)
return count_keys(content)
def main() -> None:
path = pathlib.Path(sys.argv[1])
except IndexError:
print('error: missing path', file=sys.stderr)
raise SystemExit(1)
summary: Counter[Union[str, bytes]] = count_keys_in_file(path)
key_max_len = max(len(k) for k in summary.keys())
count_max_len = max(len(str(c)) for c in summary.values())
for key, count in summary.most_common():
if __name__ == '__main__':
bencodex >= 1.0.0, < 2.0.0
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment