Skip to content

Instantly share code, notes, and snippets.

@jevinskie
Created April 12, 2024 03:53
Show Gist options
  • Save jevinskie/41d3bd7a01325bbb6aca11f861567c51 to your computer and use it in GitHub Desktop.
Save jevinskie/41d3bd7a01325bbb6aca11f861567c51 to your computer and use it in GitHub Desktop.
Python pretty byte histogram rendering for TUI
#!/usr/bin/env python3
# -> % ./byte_histogram.py -i ./byte_histogram.py
# 20 ' ' [ 1426]: ██████████████████████████████████████████████████████████████████████████████████████████████
# e2 [ 427]: ████████████████████████████▏
# 96 [ 427]: ████████████████████████████▏
# 88 [ 337]: ██████████████████████▏
# 0a ' ' [ 183]: ████████████
# 27 ''' [ 171]: ███████████▎
# 65 'e' [ 144]: █████████▍
# 3a ':' [ 130]: ████████▌
# 74 't' [ 129]: ████████▌
# 69 'i' [ 120]: ███████▉
# 72 'r' [ 116]: ███████▋
# 5b '[' [ 104]: ██████▊
# 5d ']' [ 104]: ██████▊
# 23 '#' [ 96]: ██████▎
# 73 's' [ 94]: ██████▏
# 61 'a' [ 87]: █████▋
# 6e 'n' [ 78]: █████▏
# 5f '_' [ 76]: █████
# 6c 'l' [ 69]: ████▌
# 6f 'o' [ 59]: ███▉
# 34 '4' [ 57]: ███▊
# 66 'f' [ 56]: ███▋
# 64 'd' [ 52]: ███▍
# 6d 'm' [ 51]: ███▎
# 31 '1' [ 49]: ███▏
# 70 'p' [ 48]: ███▏
# 33 '3' [ 46]: ███
# 32 '2' [ 45]: ██▉
# 22 '"' [ 45]: ██▉
# 28 '(' [ 44]: ██▉
# 29 ')' [ 44]: ██▉
# 62 'b' [ 43]: ██▊
# 63 'c' [ 42]: ██▊
# 3d '=' [ 40]: ██▋
# 67 'g' [ 33]: ██▏
# 75 'u' [ 32]: ██
# 68 'h' [ 31]: ██
# 30 '0' [ 31]: ██
# 37 '7' [ 30]: █▉
# 35 '5' [ 30]: █▉
# 79 'y' [ 29]: █▉
# 38 '8' [ 29]: █▉
# 36 '6' [ 29]: █▉
# 2e '.' [ 28]: █▊
# 2c ',' [ 25]: █▋
# 39 '9' [ 20]: █▎
# 8f [ 17]: █
# 54 'T' [ 17]: █
# 8e [ 16]: █
# 77 'w' [ 16]: █
# 2d '-' [ 14]: ▉
# 8a [ 14]: ▉
# 78 'x' [ 14]: ▉
# 89 [ 12]: ▊
# 46 'F' [ 12]: ▊
# 49 'I' [ 12]: ▊
# 6b 'k' [ 12]: ▊
# 8b [ 11]: ▋
# 48 'H' [ 11]: ▋
# 4e 'N' [ 11]: ▋
# 2b '+' [ 11]: ▋
# 8c [ 10]: ▋
# 8d [ 10]: ▋
# 4d 'M' [ 10]: ▋
# 2f '/' [ 9]: ▌
# 3e '>' [ 9]: ▌
# 42 'B' [ 9]: ▌
# 41 'A' [ 9]: ▌
# 44 'D' [ 8]: ▌
# 55 'U' [ 7]: ▍
# 7b '{' [ 7]: ▍
# 7d '}' [ 7]: ▍
# 45 'E' [ 6]: ▍
# 43 'C' [ 6]: ▍
# 50 'P' [ 5]: ▎
# 52 'R' [ 5]: ▎
# 57 'W' [ 5]: ▎
# 4c 'L' [ 5]: ▎
# 76 'v' [ 4]: ▎
# 4f 'O' [ 4]: ▎
# 47 'G' [ 4]: ▎
# 53 'S' [ 4]: ▎
# 7a 'z' [ 4]: ▎
# 2a '*' [ 4]: ▎
# 25 '%' [ 3]: ▏
# 58 'X' [ 3]: ▏
# 4b 'K' [ 3]: ▏
# 40 '@' [ 3]: ▏
# 3c '<' [ 3]: ▏
# 21 '!' [ 2]: ▏
# 7c '|' [ 2]: ▏
# 5c '\' [ 2]: ▏
# 71 'q' [ 2]: ▏
import argparse
import sys
import termios
from collections import defaultdict
from pathlib import Path
from string import printable, whitespace
from typing import Final
class ByteHistogram(defaultdict):
MAX_TERM_WIDTH: Final[int] = 1024
FALLBACK_TERM_WIDTH: Final[int] = 80
COUNT_NUM_DIGITS: Final[int] = 5
def __init__(self, include_zeros: bool = False):
super().__init__(int)
if include_zeros:
for i in range(0x100):
self[i] = 0
@staticmethod
def get_bar_width(prefix_len: int) -> int:
try:
_, col = termios.tcgetwinsize(sys.stdin)
except Exception:
col = ByteHistogram.FALLBACK_TERM_WIDTH
col = min(col, ByteHistogram.MAX_TERM_WIDTH)
col = max(col - prefix_len, 0)
return col
@staticmethod
def block_str(percentage: float, width: int = 80) -> str:
full_width = width * 8
num_blk = int(percentage * full_width)
full_blks = num_blk // 8
partial_blks = num_blk % 8
return "█" * full_blks + ("", "▏", "▎", "▍", "▌", "▋", "▊", "▉")[partial_blks]
def add_bytes(self, buf: bytes) -> None:
for b in buf:
if b >= 0xFF:
print("wtf: {b:02x}")
self[b] += 1
def ascii_histogram(self, width: int | None = None):
assert all([0 <= b <= 0xFF for b in self.keys()])
res = ""
sorted_self = dict(sorted(self.items(), key=lambda i: i[1], reverse=True))
max_num = list(sorted_self.values())[0]
if width is None:
width = self.get_bar_width(2 + 1 + 3 + 2 + ByteHistogram.COUNT_NUM_DIGITS + 3)
for m, n in sorted_self.items():
c = chr(m)
if c in printable:
byte_rep = f"'{c}'"
if c in whitespace:
byte_rep = "' '"
else:
byte_rep = " "
res += (
f"{m:02x} {byte_rep} [{n:{ByteHistogram.COUNT_NUM_DIGITS}d}]: "
+ self.block_str(n / max_num, width=width)
+ "\n"
)
return res
def real_main(args) -> int:
hist = ByteHistogram()
hist.add_bytes(open(args.in_file, "rb").read())
print(hist.ascii_histogram())
return 0
def get_arg_parser() -> argparse.ArgumentParser:
parser = argparse.ArgumentParser(description="byte_histogram.py")
parser.add_argument("-i", "--in-file", required=True, type=Path, help="Input path")
return parser
def main() -> int:
return real_main(get_arg_parser().parse_args())
if __name__ == "__main__":
sys.exit(main())
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment