Skip to content

Instantly share code, notes, and snippets.

@mikesamuel
Last active July 11, 2024 21:46
Show Gist options
  • Save mikesamuel/4e0d82a7e5aec24667aa1b494a5a77ff to your computer and use it in GitHub Desktop.
Save mikesamuel/4e0d82a7e5aec24667aa1b494a5a77ff to your computer and use it in GitHub Desktop.
Python histogramming
def histogram(nums, n_buckets, title):
min_val = min(*nums)
max_val = max(*nums)
buckets = []
bucket_width = (max_val - min_val) / float(n_buckets)
while (len(buckets) < n_buckets):
buckets.append(0)
sum_of_values = 0
for num in nums:
sum_of_values += num
bucket_idx = max(0, min(n_buckets - 1, int((num - min_val) / bucket_width)))
buckets[bucket_idx] += 1
bucket_labels = [
'[%.02g, %.02g)' % (min_val + i * bucket_width, min_val + (i + 1) * bucket_width)
for i in range(0, n_buckets)
]
label_column_width = max(*[len(lbl) for lbl in bucket_labels])
histogram_lines = []
n_text_columns = 75
max_count = max(*[buckets])
# Header line like '### TITLE ###'
left_pad = (n_text_columns - len(title) - 2) // 2
histogram_lines.append(
f'{"#" * left_pad} {title} {"#" * (n_text_columns - len(title) - 2 - left_pad)}'
)
total = 0
for i in range(0, n_buckets):
bucket_label = bucket_labels[i]
count = buckets[i]
total += count
line = ' ' * (label_column_width - len(bucket_label))
line += bucket_label
line += ': '
bar_width = (count / max_count) * 45.0
line += '▄' * int(bar_width)
if bar_width > int(bar_width):
line += '▖'
line += ' (%s)' % count
histogram_lines.append(line)
average = sum_of_values / total
histogram_lines.append('Total: %d, mean: %0.2g' % (total, average))
return '\n'.join(histogram_lines)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment