Skip to content

Instantly share code, notes, and snippets.

@dahlia
Last active February 4, 2020 05:10
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save dahlia/94001716bc5391148e19c0710045c37e to your computer and use it in GitHub Desktop.
Save dahlia/94001716bc5391148e19c0710045c37e to your computer and use it in GitHub Desktop.
Count keys from multiple Bencodex data files
#!/usr/bin/env python3
"""Count keys from multiple Bencodex data files."""
import collections
import collections.abc
import pathlib
import sys
from typing import Counter, Union
from bencodex import BValue, load
def is_vector(value: BValue) -> bool:
"""Checks if the given Bencodex value can have other Bencodex values."""
if isinstance(value, (str, bytes)):
return False
return isinstance(
value,
(collections.abc.Mapping, collections.abc.Sequence)
)
def count_keys(content: BValue) -> Counter[Union[str, bytes]]:
if not is_vector(content):
return collections.Counter()
if isinstance(content, collections.abc.Mapping):
summary = collections.Counter(content.keys())
for key, val in content.items():
if is_vector(val): # short cirtcuit for optimization
summary.update(count_keys(val))
return summary
elif isinstance(content, collections.abc.Sequence):
summary = collections.Counter()
for el in content:
if is_vector(el): # short cirtcuit for optimization
summary.update(count_keys(el))
return summary
return collections.Counter()
def count_keys_in_file(path: pathlib.Path) -> Counter[Union[str, bytes]]:
if path.is_dir():
summary = collections.Counter()
for f in path.iterdir():
summary.update(count_keys_in_file(f))
return summary
with path.open('rb') as f:
content = load(f)
return count_keys(content)
def main() -> None:
try:
path = pathlib.Path(sys.argv[1])
except IndexError:
print('error: missing path', file=sys.stderr)
raise SystemExit(1)
summary: Counter[Union[str, bytes]] = count_keys_in_file(path)
key_max_len = max(len(k) for k in summary.keys())
count_max_len = max(len(str(c)) for c in summary.values())
for key, count in summary.most_common():
print(f'{key!r:{key_max_len}}\t{count:{count_max_len}}')
if __name__ == '__main__':
main()
bencodex >= 1.0.0, < 2.0.0
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment