timburks/static-wordle.go

## static-wordle.go
package main

import (
	"fmt"
	"io/ioutil"
	"sort"
	"strings"
)

// This computes a list of static guesses to prune the Wordle solution space.
// Guesses are "static" because we choose these guesses without knowing what
// Wordle's scoring will return for them.
func main() {
	// Get the word list from https://tiny.one/wordlewords.
	// Make sure that your saved version does not end with a blank line!
	bytes, err := ioutil.ReadFile("words.txt")
	if err != nil {
		panic(err)
	}
	words := strings.Split(string(bytes), "\n")

	if false {
		// Optionally filter the word list to a smaller size.
		// This is mainly useful for testing and debugging.
		filtered := make([]string, 0)
		for i, w := range words {
			if i%5 == 0 {
				filtered = append(filtered, w)
			}
		}
		words = filtered
	}

	// The prefix is the set of guesses that have already been made.
	// We start with an empty prefix and will add to it as each next guess is determined.
	prefix := []string{}
	// This loop greedily chooses the next guess by selecting the one that
	// minimizes the size of the largest bucket of words associated with
	// any possible scoring of a guess (or set of guesses).
	for {
		guess, maxBucketSize, histogram := computeBestGuess(words, prefix)
		fmt.Printf("best guess %s (%d) using prefix %+v\n", guess, maxBucketSize, prefix)
		printHistogram(histogram)
		if maxBucketSize == 1 {
			// A max bucket size of 1 means our guesses would identify all the
			// possible solution words.
			break
		}
		done := false
		for _, p := range prefix {
			if guess == p {
				// A repeated guess means there's no more information to be gained
				// by guessing with the available words.
				done = true
			}
		}
		if done {
			break
		}
		prefix = append(prefix, guess)
	}
}

// Compute the next guess which minimizes the largest bucket of possible solutions
// where the buckets correspond to scorings of our guess and any specified "prefix" guesses.
func computeBestGuess(words []string, prefix []string) (string, int, map[int]int) {
	steps := len(words) / 40 // We use this to print a progress indicator (below).
	bestGuess := ""
	bestMaxBucketSize := len(words)
	var bestBuckets map[string]int
	fmt.Printf("choosing guess %d", len(prefix)+1)
	for g, guess := range words {
		buckets := make(map[string]int, 0)
		for _, secret := range words {
			var s string
			for _, p := range prefix {
				s += score(p, secret)
			}
			s += score(guess, secret)
			buckets[string(s)] += 1
		}
		max := 0
		for _, v := range buckets {
			if v > max {
				max = v
			}
		}
		if g%steps == 0 {
			fmt.Printf(".")
		}
		if max < bestMaxBucketSize {
			bestGuess = guess
			bestMaxBucketSize = max
			bestBuckets = buckets
		}
	}
	fmt.Printf("\n")
	// Compute a histogram of the buckets.
	// This is the number of buckets of each size in the buckets of possible solutions
	// that we would have after making the best guess.
	histogram := make(map[int]int)
	for _, v := range bestBuckets {
		histogram[v] += 1
	}
	return bestGuess, bestMaxBucketSize, histogram
}

// Print a histogram of bucket sizes.
func printHistogram(histogram map[int]int) {
	keys := make([]int, 0)
	total := 0.0
	for k, v := range histogram {
		keys = append(keys, k)
		total += float64(k * v)
	}
	sort.Ints(keys)
	sum := 0.0
	fmt.Println("SIZE     N PERCENT (of words in a bucket of this size or less)")
	fmt.Println("---- ----- -------")
	for _, k := range keys {
		sum += float64(k * histogram[k])
		fmt.Printf("%4d %5d %6.2f%%\n", k, histogram[k], 100.0*sum/total)
	}
}

// Score a wordle guess.
func score(guessWord, secretWord string) string {
	// These byte arrays will be modified as the score is computed.
	guess := []byte(guessWord)
	secret := []byte(secretWord)
	// Mark the exact matches.
	for i, g := range guess {
		if g == secret[i] {
			guess[i] = '2'  // Mark the exact match.
			secret[i] = ' ' // We don't want to match this letter against any other.
		}
	}
	// Find letters that are correct but out-of-place.
	for i, g := range guess {
		if g != '2' { // Only do this for letters that weren't exact matches.
			for j, s := range secret {
				if g == s {
					guess[i] = '1'  // Mark the right letter that's in the wrong place.
					secret[j] = ' ' // We don't want to match this letter against any other.
					break
				}
			}
		}
	}
	// Set all unmarked letters to '0'.
	for i, g := range guess {
		if g != '1' && g != '2' {
			guess[i] = '0'
		}
	}
	// By now we have transformed the guess into a score. Return it.
	return string(guess)
}

## static-wordle.txt
choosing guess 1.........................................
best guess serai (697) using prefix []
SIZE     N PERCENT (of words in a bucket of this size or less)
---- ----- -------
   1    16   0.12%
   2    16   0.37%
   3     9   0.58%
   4     8   0.82%
   5     1   0.86%
   6     8   1.23%
   7     2   1.34%
   8     2   1.46%
   9     4   1.74%
  10     2   1.90%
  11     4   2.24%
  12     6   2.79%
  13     2   2.99%
  14     1   3.10%
  15     1   3.21%
  17     1   3.35%
  18     4   3.90%
  19     2   4.19%
  20     1   4.35%
  21     2   4.67%
  23     1   4.85%
  24     1   5.03%
  26     1   5.23%
  27     2   5.65%
  28     2   6.08%
  29     1   6.31%
  30     1   6.54%
  31     1   6.78%
  32     1   7.02%
  37     1   7.31%
  39     1   7.61%
  40     1   7.92%
  42     3   8.89%
  43     4  10.21%
  45     1  10.56%
  47     1  10.92%
  49     1  11.30%
  50     1  11.69%
  51     1  12.08%
  52     1  12.48%
  53     1  12.89%
  55     1  13.31%
  56     1  13.74%
  57     1  14.18%
  61     3  15.60%
  65     1  16.10%
  68     1  16.62%
  74     2  17.76%
  75     1  18.34%
  77     1  18.93%
  78     1  19.53%
  79     1  20.14%
  81     1  20.77%
  86     1  21.43%
  87     1  22.10%
  89     1  22.79%
  91     1  23.49%
 111     1  24.34%
 113     1  25.22%
 114     1  26.09%
 116     1  26.99%
 131     2  29.01%
 132     1  30.03%
 140     1  31.11%
 149     1  32.25%
 161     1  33.50%
 162     1  34.74%
 172     1  36.07%
 201     1  37.62%
 211     1  39.25%
 214     1  40.90%
 221     1  42.60%
 222     1  44.31%
 227     1  46.06%
 233     1  47.86%
 260     1  49.86%
 271     1  51.95%
 316     1  54.39%
 322     1  56.87%
 342     1  59.51%
 350     1  62.20%
 391     2  68.23%
 421     1  71.48%
 513     1  75.43%
 550     1  79.67%
 626     1  84.50%
 638     1  89.42%
 676     1  94.63%
 697     1 100.00%
choosing guess 2.........................................
best guess blunt (131) using prefix [serai]
SIZE     N PERCENT (of words in a bucket of this size or less)
---- ----- -------
   1   919   7.08%
   2   347  12.43%
   3   204  17.15%
   4   125  21.01%
   5    82  24.17%
   6    65  27.17%
   7    46  29.66%
   8    51  32.80%
   9    30  34.88%
  10    35  37.58%
  11    17  39.02%
  12    26  41.43%
  13    21  43.53%
  14    18  45.47%
  15    15  47.21%
  16    10  48.44%
  17    11  49.88%
  18    12  51.55%
  19    12  53.31%
  20    10  54.85%
  21    11  56.63%
  22     8  57.99%
  23     5  58.87%
  25     5  59.84%
  26     2  60.24%
  27     2  60.65%
  28     6  61.95%
  29     6  63.29%
  30     6  64.68%
  31     3  65.39%
  32     3  66.13%
  33     2  66.64%
  34     3  67.43%
  35     3  68.24%
  36     4  69.35%
  37     2  69.92%
  38     2  70.51%
  39     2  71.11%
  41     1  71.42%
  42     2  72.07%
  43     4  73.40%
  44     2  74.07%
  45     1  74.42%
  46     4  75.84%
  47     2  76.56%
  48     2  77.30%
  49     1  77.68%
  52     2  78.48%
  54     1  78.90%
  55     1  79.32%
  56     3  80.62%
  57     2  81.50%
  58     2  82.39%
  59     1  82.85%
  60     1  83.31%
  61     2  84.25%
  62     2  85.21%
  63     1  85.69%
  64     1  86.19%
  67     1  86.70%
  70     1  87.24%
  75     1  87.82%
  78     2  89.02%
  87     1  89.69%
  90     1  90.39%
  91     1  91.09%
  97     2  92.58%
 104     1  93.39%
 113     1  94.26%
 115     1  95.14%
 120     1  96.07%
 124     1  97.02%
 127     1  98.00%
 128     1  98.99%
 131     1 100.00%
choosing guess 3.........................................
best guess moped (39) using prefix [serai blunt]
SIZE     N PERCENT (of words in a bucket of this size or less)
---- ----- -------
   1  3934  30.33%
   2  1101  47.30%
   3   405  56.67%
   4   247  64.28%
   5   148  69.99%
   6    91  74.20%
   7    74  78.19%
   8    51  81.34%
   9    33  83.63%
  10    28  85.78%
  11    20  87.48%
  12    16  88.96%
  13    20  90.97%
  14     6  91.61%
  15     6  92.31%
  16     6  93.05%
  17     8  94.09%
  18     4  94.65%
  19     3  95.09%
  20     3  95.55%
  21     1  95.71%
  22     5  96.56%
  23     2  96.92%
  24     2  97.29%
  26     1  97.49%
  28     2  97.92%
  29     1  98.14%
  30     2  98.60%
  34     1  98.87%
  35     1  99.14%
  36     1  99.41%
  37     1  99.70%
  39     1 100.00%
choosing guess 4.........................................
best guess gawcy (15) using prefix [serai blunt moped]
SIZE     N PERCENT (of words in a bucket of this size or less)
---- ----- -------
   1  8318  64.12%
   2  1086  80.87%
   3   325  88.38%
   4   146  92.88%
   5    55  95.00%
   6    34  96.58%
   7    17  97.49%
   8    10  98.11%
   9    12  98.94%
  10     4  99.25%
  11     5  99.68%
  13     1  99.78%
  14     1  99.88%
  15     1 100.00%
choosing guess 5.........................................
best guess fehme (7) using prefix [serai blunt moped gawcy]
SIZE     N PERCENT (of words in a bucket of this size or less)
---- ----- -------
   1 10184  78.51%
   2   848  91.58%
   3   201  96.23%
   4    64  98.20%
   5    24  99.13%
   6    13  99.73%
   7     5 100.00%
choosing guess 6.........................................
best guess amoks (5) using prefix [serai blunt moped gawcy fehme]
SIZE     N PERCENT (of words in a bucket of this size or less)
---- ----- -------
   1 11358  87.56%
   2   595  96.73%
   3    87  98.74%
   4    22  99.42%
   5    15 100.00%
choosing guess 7.........................................
best guess ajiva (4) using prefix [serai blunt moped gawcy fehme amoks]
SIZE     N PERCENT (of words in a bucket of this size or less)
---- ----- -------
   1 11994  92.46%
   2   411  98.80%
   3    40  99.72%
   4     9 100.00%
choosing guess 8.........................................
best guess dusts (3) using prefix [serai blunt moped gawcy fehme amoks ajiva]
SIZE     N PERCENT (of words in a bucket of this size or less)
---- ----- -------
   1 12364  95.31%
   2   274  99.54%
   3    20 100.00%
choosing guess 9.........................................
best guess aahed (3) using prefix [serai blunt moped gawcy fehme amoks ajiva dusts]
SIZE     N PERCENT (of words in a bucket of this size or less)
---- ----- -------
   1 12364  95.31%
   2   274  99.54%
   3    20 100.00%
choosing guess 10.........................................
best guess aahed (3) using prefix [serai blunt moped gawcy fehme amoks ajiva dusts aahed]
SIZE     N PERCENT (of words in a bucket of this size or less)
---- ----- -------
   1 12364  95.31%
   2   274  99.54%
   3    20 100.00%
	package main

	import (
	"fmt"
	"io/ioutil"
	"sort"
	"strings"
	)

	// This computes a list of static guesses to prune the Wordle solution space.
	// Guesses are "static" because we choose these guesses without knowing what
	// Wordle's scoring will return for them.
	func main() {
	// Get the word list from https://tiny.one/wordlewords.
	// Make sure that your saved version does not end with a blank line!
	bytes, err := ioutil.ReadFile("words.txt")
	if err != nil {
	panic(err)
	}
	words := strings.Split(string(bytes), "\n")

	if false {
	// Optionally filter the word list to a smaller size.
	// This is mainly useful for testing and debugging.
	filtered := make([]string, 0)
	for i, w := range words {
	if i%5 == 0 {
	filtered = append(filtered, w)
	}
	}
	words = filtered
	}

	// The prefix is the set of guesses that have already been made.
	// We start with an empty prefix and will add to it as each next guess is determined.
	prefix := []string{}
	// This loop greedily chooses the next guess by selecting the one that
	// minimizes the size of the largest bucket of words associated with
	// any possible scoring of a guess (or set of guesses).
	for {
	guess, maxBucketSize, histogram := computeBestGuess(words, prefix)
	fmt.Printf("best guess %s (%d) using prefix %+v\n", guess, maxBucketSize, prefix)
	printHistogram(histogram)
	if maxBucketSize == 1 {
	// A max bucket size of 1 means our guesses would identify all the
	// possible solution words.
	break
	}
	done := false
	for _, p := range prefix {
	if guess == p {
	// A repeated guess means there's no more information to be gained
	// by guessing with the available words.
	done = true
	}
	}
	if done {
	break
	}
	prefix = append(prefix, guess)
	}
	}

	// Compute the next guess which minimizes the largest bucket of possible solutions
	// where the buckets correspond to scorings of our guess and any specified "prefix" guesses.
	func computeBestGuess(words []string, prefix []string) (string, int, map[int]int) {
	steps := len(words) / 40 // We use this to print a progress indicator (below).
	bestGuess := ""
	bestMaxBucketSize := len(words)
	var bestBuckets map[string]int
	fmt.Printf("choosing guess %d", len(prefix)+1)
	for g, guess := range words {
	buckets := make(map[string]int, 0)
	for _, secret := range words {
	var s string
	for _, p := range prefix {
	s += score(p, secret)
	}
	s += score(guess, secret)
	buckets[string(s)] += 1
	}
	max := 0
	for _, v := range buckets {
	if v > max {
	max = v
	}
	}
	if g%steps == 0 {
	fmt.Printf(".")
	}
	if max < bestMaxBucketSize {
	bestGuess = guess
	bestMaxBucketSize = max
	bestBuckets = buckets
	}
	}
	fmt.Printf("\n")
	// Compute a histogram of the buckets.
	// This is the number of buckets of each size in the buckets of possible solutions
	// that we would have after making the best guess.
	histogram := make(map[int]int)
	for _, v := range bestBuckets {
	histogram[v] += 1
	}
	return bestGuess, bestMaxBucketSize, histogram
	}

	// Print a histogram of bucket sizes.
	func printHistogram(histogram map[int]int) {
	keys := make([]int, 0)
	total := 0.0
	for k, v := range histogram {
	keys = append(keys, k)
	total += float64(k * v)
	}
	sort.Ints(keys)
	sum := 0.0
	fmt.Println("SIZE N PERCENT (of words in a bucket of this size or less)")
	fmt.Println("---- ----- -------")
	for _, k := range keys {
	sum += float64(k * histogram[k])
	fmt.Printf("%4d %5d %6.2f%%\n", k, histogram[k], 100.0*sum/total)
	}
	}

	// Score a wordle guess.
	func score(guessWord, secretWord string) string {
	// These byte arrays will be modified as the score is computed.
	guess := []byte(guessWord)
	secret := []byte(secretWord)
	// Mark the exact matches.
	for i, g := range guess {
	if g == secret[i] {
	guess[i] = '2' // Mark the exact match.
	secret[i] = ' ' // We don't want to match this letter against any other.
	}
	}
	// Find letters that are correct but out-of-place.
	for i, g := range guess {
	if g != '2' { // Only do this for letters that weren't exact matches.
	for j, s := range secret {
	if g == s {
	guess[i] = '1' // Mark the right letter that's in the wrong place.
	secret[j] = ' ' // We don't want to match this letter against any other.
	break
	}
	}
	}
	}
	// Set all unmarked letters to '0'.
	for i, g := range guess {
	if g != '1' && g != '2' {
	guess[i] = '0'
	}
	}
	// By now we have transformed the guess into a score. Return it.
	return string(guess)
	}
	choosing guess 1.........................................
	best guess serai (697) using prefix []
	SIZE N PERCENT (of words in a bucket of this size or less)
	---- ----- -------
	1 16 0.12%
	2 16 0.37%
	3 9 0.58%
	4 8 0.82%
	5 1 0.86%
	6 8 1.23%
	7 2 1.34%
	8 2 1.46%
	9 4 1.74%
	10 2 1.90%
	11 4 2.24%
	12 6 2.79%
	13 2 2.99%
	14 1 3.10%
	15 1 3.21%
	17 1 3.35%
	18 4 3.90%
	19 2 4.19%
	20 1 4.35%
	21 2 4.67%
	23 1 4.85%
	24 1 5.03%
	26 1 5.23%
	27 2 5.65%
	28 2 6.08%
	29 1 6.31%
	30 1 6.54%
	31 1 6.78%
	32 1 7.02%
	37 1 7.31%
	39 1 7.61%
	40 1 7.92%
	42 3 8.89%
	43 4 10.21%
	45 1 10.56%
	47 1 10.92%
	49 1 11.30%
	50 1 11.69%
	51 1 12.08%
	52 1 12.48%
	53 1 12.89%
	55 1 13.31%
	56 1 13.74%
	57 1 14.18%
	61 3 15.60%
	65 1 16.10%
	68 1 16.62%
	74 2 17.76%
	75 1 18.34%
	77 1 18.93%
	78 1 19.53%
	79 1 20.14%
	81 1 20.77%
	86 1 21.43%
	87 1 22.10%
	89 1 22.79%
	91 1 23.49%
	111 1 24.34%
	113 1 25.22%
	114 1 26.09%
	116 1 26.99%
	131 2 29.01%
	132 1 30.03%
	140 1 31.11%
	149 1 32.25%
	161 1 33.50%
	162 1 34.74%
	172 1 36.07%
	201 1 37.62%
	211 1 39.25%
	214 1 40.90%
	221 1 42.60%
	222 1 44.31%
	227 1 46.06%
	233 1 47.86%
	260 1 49.86%
	271 1 51.95%
	316 1 54.39%
	322 1 56.87%
	342 1 59.51%
	350 1 62.20%
	391 2 68.23%
	421 1 71.48%
	513 1 75.43%
	550 1 79.67%
	626 1 84.50%
	638 1 89.42%
	676 1 94.63%
	697 1 100.00%
	choosing guess 2.........................................
	best guess blunt (131) using prefix [serai]
	SIZE N PERCENT (of words in a bucket of this size or less)
	---- ----- -------
	1 919 7.08%
	2 347 12.43%
	3 204 17.15%
	4 125 21.01%
	5 82 24.17%
	6 65 27.17%
	7 46 29.66%
	8 51 32.80%
	9 30 34.88%
	10 35 37.58%
	11 17 39.02%
	12 26 41.43%
	13 21 43.53%
	14 18 45.47%
	15 15 47.21%
	16 10 48.44%
	17 11 49.88%
	18 12 51.55%
	19 12 53.31%
	20 10 54.85%
	21 11 56.63%
	22 8 57.99%
	23 5 58.87%
	25 5 59.84%
	26 2 60.24%
	27 2 60.65%
	28 6 61.95%
	29 6 63.29%
	30 6 64.68%
	31 3 65.39%
	32 3 66.13%
	33 2 66.64%
	34 3 67.43%
	35 3 68.24%
	36 4 69.35%
	37 2 69.92%
	38 2 70.51%
	39 2 71.11%
	41 1 71.42%
	42 2 72.07%
	43 4 73.40%
	44 2 74.07%
	45 1 74.42%
	46 4 75.84%
	47 2 76.56%
	48 2 77.30%
	49 1 77.68%
	52 2 78.48%
	54 1 78.90%
	55 1 79.32%
	56 3 80.62%
	57 2 81.50%
	58 2 82.39%
	59 1 82.85%
	60 1 83.31%
	61 2 84.25%
	62 2 85.21%
	63 1 85.69%
	64 1 86.19%
	67 1 86.70%
	70 1 87.24%
	75 1 87.82%
	78 2 89.02%
	87 1 89.69%
	90 1 90.39%
	91 1 91.09%
	97 2 92.58%
	104 1 93.39%
	113 1 94.26%
	115 1 95.14%
	120 1 96.07%
	124 1 97.02%
	127 1 98.00%
	128 1 98.99%
	131 1 100.00%
	choosing guess 3.........................................
	best guess moped (39) using prefix [serai blunt]
	SIZE N PERCENT (of words in a bucket of this size or less)
	---- ----- -------
	1 3934 30.33%
	2 1101 47.30%
	3 405 56.67%
	4 247 64.28%
	5 148 69.99%
	6 91 74.20%
	7 74 78.19%
	8 51 81.34%
	9 33 83.63%
	10 28 85.78%
	11 20 87.48%
	12 16 88.96%
	13 20 90.97%
	14 6 91.61%
	15 6 92.31%
	16 6 93.05%
	17 8 94.09%
	18 4 94.65%
	19 3 95.09%
	20 3 95.55%
	21 1 95.71%
	22 5 96.56%
	23 2 96.92%
	24 2 97.29%
	26 1 97.49%
	28 2 97.92%
	29 1 98.14%
	30 2 98.60%
	34 1 98.87%
	35 1 99.14%
	36 1 99.41%
	37 1 99.70%
	39 1 100.00%
	choosing guess 4.........................................
	best guess gawcy (15) using prefix [serai blunt moped]
	SIZE N PERCENT (of words in a bucket of this size or less)
	---- ----- -------
	1 8318 64.12%
	2 1086 80.87%
	3 325 88.38%
	4 146 92.88%
	5 55 95.00%
	6 34 96.58%
	7 17 97.49%
	8 10 98.11%
	9 12 98.94%
	10 4 99.25%
	11 5 99.68%
	13 1 99.78%
	14 1 99.88%
	15 1 100.00%
	choosing guess 5.........................................
	best guess fehme (7) using prefix [serai blunt moped gawcy]
	SIZE N PERCENT (of words in a bucket of this size or less)
	---- ----- -------
	1 10184 78.51%
	2 848 91.58%
	3 201 96.23%
	4 64 98.20%
	5 24 99.13%
	6 13 99.73%
	7 5 100.00%
	choosing guess 6.........................................
	best guess amoks (5) using prefix [serai blunt moped gawcy fehme]
	SIZE N PERCENT (of words in a bucket of this size or less)
	---- ----- -------
	1 11358 87.56%
	2 595 96.73%
	3 87 98.74%
	4 22 99.42%
	5 15 100.00%
	choosing guess 7.........................................
	best guess ajiva (4) using prefix [serai blunt moped gawcy fehme amoks]
	SIZE N PERCENT (of words in a bucket of this size or less)
	---- ----- -------
	1 11994 92.46%
	2 411 98.80%
	3 40 99.72%
	4 9 100.00%
	choosing guess 8.........................................
	best guess dusts (3) using prefix [serai blunt moped gawcy fehme amoks ajiva]
	SIZE N PERCENT (of words in a bucket of this size or less)
	---- ----- -------
	1 12364 95.31%
	2 274 99.54%
	3 20 100.00%
	choosing guess 9.........................................
	best guess aahed (3) using prefix [serai blunt moped gawcy fehme amoks ajiva dusts]
	SIZE N PERCENT (of words in a bucket of this size or less)
	---- ----- -------
	1 12364 95.31%
	2 274 99.54%
	3 20 100.00%
	choosing guess 10.........................................
	best guess aahed (3) using prefix [serai blunt moped gawcy fehme amoks ajiva dusts aahed]
	SIZE N PERCENT (of words in a bucket of this size or less)
	---- ----- -------
	1 12364 95.31%
	2 274 99.54%
	3 20 100.00%