jsundram/wordle_stats_viz.py

## wordle_stats_viz.py
from collections import Counter
from string import ascii_lowercase as ALPHABET
import json
import matplotlib.pyplot as plt
import numpy as np

"""
For https://www.powerlanguage.co.uk/wordle/.
Read more here: https://www.nytimes.com/2022/01/03/technology/wordle-word-game-creator.html
"""


plt.style.use("Solarize_Light2")
plt.rcParams["savefig.facecolor"] = plt.rcParams["axes.facecolor"]
plt.rcParams["savefig.edgecolor"] = plt.rcParams["axes.edgecolor"]


def get_words(key='short'):
    """Key must be one of:
        1) "short": wordle's short list (2,314 entries)
        2) "long": wordle's long list
        3) "dict": /usr/share/dict/words of length 5 (not proper nouns)
    """
    if key in ['short', 'long']:
        with open('wordle.json') as f:
            data = json.load(f)
            return [w.lower() for w in data.get(key, [])]
    elif key == 'dict':
        with open('/usr/share/dict/words') as f:
            words = [line.strip() for line in f]
            return [w for w in words if len(w) == 5 and not w[0].isupper()]
    return []


def histogram(words):
    c = Counter([a for w in words for a in w])
    y = [i for (i, _) in enumerate(c.keys())]
    width = [v for (k, v) in sorted(c.items())]
    tick_label = [k for (k, v) in sorted(c.items())]
    fig, ax = plt.subplots()
    plt.barh(y=list(reversed(y)), width=width, tick_label=tick_label)
    ax.set_title("Letter Distribution from {:,} 5-letter Words".format(len(words)))
    plt.savefig("histogram.png")


def heatmap(words):
    """https://stackoverflow.com/questions/14391959/heatmap-in-matplotlib-with-pcolor"""
    data = get_frequencies(words)

    fig, ax = plt.subplots(figsize=(10, 3))
    heatmap = ax.pcolor(data, cmap=plt.cm.RdYlBu_r)

    # Put the major ticks at the middle of each cell.
    ax.set_xticks(np.arange(data.shape[1]) + 0.5, minor=False)
    ax.set_yticks(np.arange(data.shape[0]) + 0.5, minor=False)

    # Want a more natural, table-like display.
    ax.invert_yaxis()
    ax.xaxis.tick_top()

    ax.set_xticklabels(ALPHABET, minor=False)
    ax.set_yticklabels(list(range(1, 6)), minor=False)

    ax.set_aspect("equal")
    ax.set_title(
        "Letter Frequencies from {:,} 5-letter words by position in word".format(
            len(words)
        )
    )
    ax.set_ylabel("Position")
    ax.set_xlabel("Letter")

    # Make a legend that aligns with the heatmap
    # https://stackoverflow.com/a/18195921/2683
    ax_pos = ax.get_position()
    spacing, width = 0.01, 0.02
    cax = fig.add_axes([ax_pos.x1 + spacing, ax_pos.y0, width, ax_pos.height])
    fig.colorbar(heatmap, cax=cax)

    plt.savefig("heatmap.png", dpi=300)


def get_frequencies(words):
    table = np.zeros((len(words[0]), len(ALPHABET)), dtype=int)
    ix = {a: i for (i, a) in enumerate(ALPHABET)}
    for word in words:
        for row, a in enumerate(word):
            col = ix[a]
            table[row][col] += 1
    return table


def main():
    words = get_words('short')
    histogram(words)
    heatmap(words)


if __name__ == "__main__":
    main()
	from collections import Counter
	from string import ascii_lowercase as ALPHABET
	import json
	import matplotlib.pyplot as plt
	import numpy as np

	"""
	For https://www.powerlanguage.co.uk/wordle/.
	Read more here: https://www.nytimes.com/2022/01/03/technology/wordle-word-game-creator.html
	"""


	plt.style.use("Solarize_Light2")
	plt.rcParams["savefig.facecolor"] = plt.rcParams["axes.facecolor"]
	plt.rcParams["savefig.edgecolor"] = plt.rcParams["axes.edgecolor"]


	def get_words(key='short'):
	"""Key must be one of:
	1) "short": wordle's short list (2,314 entries)
	2) "long": wordle's long list
	3) "dict": /usr/share/dict/words of length 5 (not proper nouns)
	"""
	if key in ['short', 'long']:
	with open('wordle.json') as f:
	data = json.load(f)
	return [w.lower() for w in data.get(key, [])]
	elif key == 'dict':
	with open('/usr/share/dict/words') as f:
	words = [line.strip() for line in f]
	return [w for w in words if len(w) == 5 and not w[0].isupper()]
	return []


	def histogram(words):
	c = Counter([a for w in words for a in w])
	y = [i for (i, _) in enumerate(c.keys())]
	width = [v for (k, v) in sorted(c.items())]
	tick_label = [k for (k, v) in sorted(c.items())]
	fig, ax = plt.subplots()
	plt.barh(y=list(reversed(y)), width=width, tick_label=tick_label)
	ax.set_title("Letter Distribution from {:,} 5-letter Words".format(len(words)))
	plt.savefig("histogram.png")


	def heatmap(words):
	"""https://stackoverflow.com/questions/14391959/heatmap-in-matplotlib-with-pcolor"""
	data = get_frequencies(words)

	fig, ax = plt.subplots(figsize=(10, 3))
	heatmap = ax.pcolor(data, cmap=plt.cm.RdYlBu_r)

	# Put the major ticks at the middle of each cell.
	ax.set_xticks(np.arange(data.shape[1]) + 0.5, minor=False)
	ax.set_yticks(np.arange(data.shape[0]) + 0.5, minor=False)

	# Want a more natural, table-like display.
	ax.invert_yaxis()
	ax.xaxis.tick_top()

	ax.set_xticklabels(ALPHABET, minor=False)
	ax.set_yticklabels(list(range(1, 6)), minor=False)

	ax.set_aspect("equal")
	ax.set_title(
	"Letter Frequencies from {:,} 5-letter words by position in word".format(
	len(words)
	)
	)
	ax.set_ylabel("Position")
	ax.set_xlabel("Letter")

	# Make a legend that aligns with the heatmap
	# https://stackoverflow.com/a/18195921/2683
	ax_pos = ax.get_position()
	spacing, width = 0.01, 0.02
	cax = fig.add_axes([ax_pos.x1 + spacing, ax_pos.y0, width, ax_pos.height])
	fig.colorbar(heatmap, cax=cax)

	plt.savefig("heatmap.png", dpi=300)


	def get_frequencies(words):
	table = np.zeros((len(words[0]), len(ALPHABET)), dtype=int)
	ix = {a: i for (i, a) in enumerate(ALPHABET)}
	for word in words:
	for row, a in enumerate(word):
	col = ix[a]
	table[row][col] += 1
	return table


	def main():
	words = get_words('short')
	histogram(words)
	heatmap(words)


	if __name__ == "__main__":
	main()