Skip to content

Instantly share code, notes, and snippets.

@jsundram
Last active October 6, 2023 19:39
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save jsundram/b3c2f3120645b25895f8b53faf36decf to your computer and use it in GitHub Desktop.
Save jsundram/b3c2f3120645b25895f8b53faf36decf to your computer and use it in GitHub Desktop.
letter statistics visualizations for wordle (using the short wordle wordlist)
from collections import Counter
from string import ascii_lowercase as ALPHABET
import json
import matplotlib.pyplot as plt
import numpy as np
"""
For https://www.powerlanguage.co.uk/wordle/.
Read more here: https://www.nytimes.com/2022/01/03/technology/wordle-word-game-creator.html
"""
plt.style.use("Solarize_Light2")
plt.rcParams["savefig.facecolor"] = plt.rcParams["axes.facecolor"]
plt.rcParams["savefig.edgecolor"] = plt.rcParams["axes.edgecolor"]
def get_words(key='short'):
"""Key must be one of:
1) "short": wordle's short list (2,314 entries)
2) "long": wordle's long list
3) "dict": /usr/share/dict/words of length 5 (not proper nouns)
"""
if key in ['short', 'long']:
with open('wordle.json') as f:
data = json.load(f)
return [w.lower() for w in data.get(key, [])]
elif key == 'dict':
with open('/usr/share/dict/words') as f:
words = [line.strip() for line in f]
return [w for w in words if len(w) == 5 and not w[0].isupper()]
return []
def histogram(words):
c = Counter([a for w in words for a in w])
y = [i for (i, _) in enumerate(c.keys())]
width = [v for (k, v) in sorted(c.items())]
tick_label = [k for (k, v) in sorted(c.items())]
fig, ax = plt.subplots()
plt.barh(y=list(reversed(y)), width=width, tick_label=tick_label)
ax.set_title("Letter Distribution from {:,} 5-letter Words".format(len(words)))
plt.savefig("histogram.png")
def heatmap(words):
"""https://stackoverflow.com/questions/14391959/heatmap-in-matplotlib-with-pcolor"""
data = get_frequencies(words)
fig, ax = plt.subplots(figsize=(10, 3))
heatmap = ax.pcolor(data, cmap=plt.cm.RdYlBu_r)
# Put the major ticks at the middle of each cell.
ax.set_xticks(np.arange(data.shape[1]) + 0.5, minor=False)
ax.set_yticks(np.arange(data.shape[0]) + 0.5, minor=False)
# Want a more natural, table-like display.
ax.invert_yaxis()
ax.xaxis.tick_top()
ax.set_xticklabels(ALPHABET, minor=False)
ax.set_yticklabels(list(range(1, 6)), minor=False)
ax.set_aspect("equal")
ax.set_title(
"Letter Frequencies from {:,} 5-letter words by position in word".format(
len(words)
)
)
ax.set_ylabel("Position")
ax.set_xlabel("Letter")
# Make a legend that aligns with the heatmap
# https://stackoverflow.com/a/18195921/2683
ax_pos = ax.get_position()
spacing, width = 0.01, 0.02
cax = fig.add_axes([ax_pos.x1 + spacing, ax_pos.y0, width, ax_pos.height])
fig.colorbar(heatmap, cax=cax)
plt.savefig("heatmap.png", dpi=300)
def get_frequencies(words):
table = np.zeros((len(words[0]), len(ALPHABET)), dtype=int)
ix = {a: i for (i, a) in enumerate(ALPHABET)}
for word in words:
for row, a in enumerate(word):
col = ix[a]
table[row][col] += 1
return table
def main():
words = get_words('short')
histogram(words)
heatmap(words)
if __name__ == "__main__":
main()
@jsundram
Copy link
Author

jsundram commented Jan 10, 2022

histogram

@jsundram
Copy link
Author

jsundram commented Jan 10, 2022

heatmap

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment