Last active
December 4, 2024 23:21
-
-
Save alexmic/8345076 to your computer and use it in GitHub Desktop.
Counts the number of pixels per letter of the English alphabet.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
from operator import itemgetter | |
from PIL import Image, ImageDraw, ImageFont | |
# Make a lowercase + uppercase alphabet. | |
alphabet = 'abcdefghijklmnopqrstuvwxyz' | |
alphabet += ''.join(map(str.upper, alphabet)) | |
# We'll use Helvetica in big type. | |
helvetica = ImageFont.truetype('Helvetica.ttf', 100) | |
def draw_letter(letter, save=True): | |
img = Image.new('RGB', (100, 100), 'white') | |
draw = ImageDraw.Draw(img) | |
draw.text((0,0), letter, font=helvetica, fill='#000000') | |
if save: | |
img.save("imgs/{}.png".format(letter), 'PNG') | |
return img | |
def count_black_pixels(img): | |
pixels = list(img.getdata()) | |
return len(filter(lambda rgb: sum(rgb) == 0, pixels)) | |
if __name__ == '__main__': | |
counts = [ | |
(letter, count_black_pixels(draw_letter(letter))) | |
for letter in alphabet | |
] | |
print sorted(counts, key=itemgetter(1), reverse=True) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# -*- coding: utf-8 -*- | |
from __future__ import division | |
import os | |
from collections import defaultdict | |
from math import sqrt | |
from PIL import Image, ImageDraw, ImageFont | |
# Make a lowercase + uppercase alphabet. | |
alphabet = 'abcdefghijklmnopqrstuvwxyz' | |
alphabet += ''.join(map(str.upper, alphabet)) | |
def draw_letter(letter, font, save=True): | |
img = Image.new('RGB', (100, 100), 'white') | |
draw = ImageDraw.Draw(img) | |
draw.text((0,0), letter, font=font, fill='#000000') | |
if save: | |
img.save("imgs/{}.png".format(letter), 'PNG') | |
return img | |
def count_black_pixels(img): | |
pixels = list(img.getdata()) | |
return len(filter(lambda rgb: sum(rgb) == 0, pixels)) | |
def available_fonts(): | |
for root, dirs, filenames in os.walk('/Users/alex/Desktop/English'): | |
for name in filenames: | |
path = os.path.join(root, name) | |
try: | |
yield ImageFont.truetype(path, 100) | |
except IOError: | |
pass | |
def letter_statistics(counts): | |
for letter, counts in sorted(counts.iteritems()): | |
n = len(counts) | |
mean = sum(counts) / n | |
sd = sqrt(sum((x - mean) ** 2 for x in counts) / n) | |
yield letter, mean, sd | |
def main(): | |
counts = defaultdict(list) | |
for letter in alphabet: | |
for font in available_fonts(): | |
img = draw_letter(letter, font, save=False) | |
count = count_black_pixels(img) | |
counts[letter].append(count) | |
for letter, mean, sd in letter_statistics(counts): | |
print u"{0}: {1:.2f} ± {2:.2f}".format(letter, mean, sd) | |
if __name__ == '__main__': | |
main() |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
[('M', 2493), ('W', 2414), ('B', 1909), ('R', 1857), ('G', 1798), ('N', 1779), ('D', 1765), ('Q', 1758), ('g', 1657), ('O', 1652), ('E', 1635), ('S', 1610), ('m', 1545), ('A', 1539), ('K', 1539), ('H', 1528), ('U', 1528), ('w', 1513), ('b', 1452), ('Z', 1445), ('C', 1429), ('X', 1420), ('P', 1419), ('p', 1388), ('q', 1388), ('d', 1381), ('V', 1207), ('e', 1202), ('F', 1195), ('a', 1171), ('k', 1157), ('h', 1145), ('T', 1056), ('y', 1053), ('L', 1045), ('z', 1044), ('Y', 1031), ('o', 1016), ('n', 1012), ('u', 985), ('s', 976), ('c', 883), ('J', 874), ('x', 816), ('v', 761), ('t', 739), ('f', 722), ('j', 701), ('l', 584), ('I', 584), ('r', 570), ('i', 512)] |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
A: 1443.91 ± 644.15 | |
B: 1841.38 ± 685.26 | |
C: 1247.28 ± 543.88 | |
D: 1741.80 ± 675.39 | |
E: 1562.66 ± 592.35 | |
F: 1287.51 ± 485.45 | |
G: 1598.12 ± 660.65 | |
H: 1829.83 ± 738.88 | |
I: 912.00 ± 402.38 | |
J: 1020.24 ± 419.26 | |
K: 1648.93 ± 687.54 | |
L: 1078.63 ± 428.35 | |
M: 2217.51 ± 945.19 | |
N: 1810.05 ± 758.11 | |
O: 1647.88 ± 684.50 | |
P: 1450.26 ± 555.61 | |
Q: 1921.22 ± 792.46 | |
R: 1770.32 ± 672.66 | |
S: 1358.10 ± 591.48 | |
T: 1183.93 ± 481.17 | |
U: 1500.25 ± 640.91 | |
V: 1281.79 ± 580.83 | |
W: 2139.06 ± 945.29 | |
X: 1448.34 ± 638.57 | |
Y: 1162.64 ± 489.21 | |
Z: 1452.01 ± 591.28 | |
a: 1126.90 ± 466.50 | |
b: 1416.52 ± 587.70 | |
c: 817.44 ± 368.99 | |
d: 1435.09 ± 594.68 | |
e: 1057.23 ± 458.34 | |
f: 1017.09 ± 449.75 | |
g: 1522.26 ± 636.10 | |
h: 1374.94 ± 582.75 | |
i: 686.07 ± 327.42 | |
j: 888.88 ± 412.74 | |
k: 1318.74 ± 575.17 | |
l: 797.02 ± 327.72 | |
m: 1725.93 ± 746.73 | |
n: 1154.92 ± 494.20 | |
o: 1065.75 ± 466.91 | |
p: 1436.74 ± 601.54 | |
q: 1419.39 ± 580.56 | |
r: 729.67 ± 327.41 | |
s: 903.54 ± 415.78 | |
t: 849.93 ± 367.05 | |
u: 1133.91 ± 485.74 | |
v: 848.74 ± 411.86 | |
w: 1433.83 ± 671.25 | |
x: 960.69 ± 449.55 | |
y: 1088.74 ± 509.81 | |
z: 969.60 ± 414.24 |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment