Last active
February 4, 2020 05:10
-
-
Save dahlia/94001716bc5391148e19c0710045c37e to your computer and use it in GitHub Desktop.
Count keys from multiple Bencodex data files
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/env python3 | |
"""Count keys from multiple Bencodex data files.""" | |
import collections | |
import collections.abc | |
import pathlib | |
import sys | |
from typing import Counter, Union | |
from bencodex import BValue, load | |
def is_vector(value: BValue) -> bool: | |
"""Checks if the given Bencodex value can have other Bencodex values.""" | |
if isinstance(value, (str, bytes)): | |
return False | |
return isinstance( | |
value, | |
(collections.abc.Mapping, collections.abc.Sequence) | |
) | |
def count_keys(content: BValue) -> Counter[Union[str, bytes]]: | |
if not is_vector(content): | |
return collections.Counter() | |
if isinstance(content, collections.abc.Mapping): | |
summary = collections.Counter(content.keys()) | |
for key, val in content.items(): | |
if is_vector(val): # short cirtcuit for optimization | |
summary.update(count_keys(val)) | |
return summary | |
elif isinstance(content, collections.abc.Sequence): | |
summary = collections.Counter() | |
for el in content: | |
if is_vector(el): # short cirtcuit for optimization | |
summary.update(count_keys(el)) | |
return summary | |
return collections.Counter() | |
def count_keys_in_file(path: pathlib.Path) -> Counter[Union[str, bytes]]: | |
if path.is_dir(): | |
summary = collections.Counter() | |
for f in path.iterdir(): | |
summary.update(count_keys_in_file(f)) | |
return summary | |
with path.open('rb') as f: | |
content = load(f) | |
return count_keys(content) | |
def main() -> None: | |
try: | |
path = pathlib.Path(sys.argv[1]) | |
except IndexError: | |
print('error: missing path', file=sys.stderr) | |
raise SystemExit(1) | |
summary: Counter[Union[str, bytes]] = count_keys_in_file(path) | |
key_max_len = max(len(k) for k in summary.keys()) | |
count_max_len = max(len(str(c)) for c in summary.values()) | |
for key, count in summary.most_common(): | |
print(f'{key!r:{key_max_len}}\t{count:{count_max_len}}') | |
if __name__ == '__main__': | |
main() |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
bencodex >= 1.0.0, < 2.0.0 |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment