Skip to content

Instantly share code, notes, and snippets.

@isovector
Created January 21, 2022 01:59
Show Gist options
  • Star 8 You must be signed in to star a gist
  • Fork 1 You must be signed in to fork a gist
  • Save isovector/00c7974588e5e1dfeb94f2ea7a7ef675 to your computer and use it in GitHub Desktop.
Save isovector/00c7974588e5e1dfeb94f2ea7a7ef675 to your computer and use it in GitHub Desktop.
import qualified Data.Set as S
import Data.Set (Set)
import Data.Char (isLower)
import Data.Ord (comparing, Down (Down))
import Data.List (sortBy, subsequences, minimumBy, maximumBy)
import Control.Monad.Trans.Writer.CPS
import Data.Monoid
import Data.Foldable (traverse_)
wordFilter :: String -> Bool
wordFilter w = length w == 5 && all (flip elem letters) w
type Dict = Set String
data Pos = P1 | P2 | P3 | P4 | P5
deriving (Eq, Ord, Show, Enum, Bounded)
data Result = Exact Char Pos | Has Char | Hasnt Char
deriving (Eq, Ord, Show)
data Hit = Yup | Hit | Miss
deriving (Eq, Ord, Show)
parseHit :: Char -> Maybe Hit
parseHit 'x' = Just Yup
parseHit '.' = Just Hit
parseHit ' ' = Just Miss
parseHit _ = Nothing
parseHits :: String -> Maybe [Hit]
parseHits = traverse parseHit . take 5
makeResult :: [Char] -> [Hit] -> [Result]
makeResult = go P1
where
go :: Pos -> [Char] -> [Hit] -> [Result]
go n (s : ss) (Yup : hs) = Exact s n : go (succ n) ss hs
go n (s : ss) (Hit : hs) = Has s : go (succ n) ss hs
go n (s : ss) (Miss : hs) = Hasnt s : go (succ n) ss hs
go _ [] [] = []
go _ _ _ = error "bad bad man"
refineDict :: Result -> Dict -> Dict
refineDict (Exact c pos) ws = S.filter ((== c) . posToChar pos) ws
refineDict (Has c) ws = S.filter (elem c) ws
refineDict (Hasnt c) ws = S.filter (not . elem c) ws
posToChar :: Pos -> String -> Char
posToChar p s = s !! fromEnum p
entropy :: Dict -> Char -> Int
entropy d c =
let without = refineDict (Hasnt c) d
with = d S.\\ without
in abs $ S.size without - S.size with
check :: String -> String -> [Result]
check word' = go P1 (S.fromList word') word'
where
go n bag (w : word) (g : guess)
| w == g = Exact g n : go (succ n) bag word guess
| (S.member g bag) = Has g : go (succ n) bag word guess
| not (S.member g bag) = Hasnt g : go (succ n) bag word guess
| otherwise = go (succ n) bag word guess
go _ _ [] [] = []
go _ _ _ _ = error "broken invariant"
letters :: [Char]
letters = ['a' .. 'z']
best :: Dict -> [Char]
best d = sortBy (comparing $ entropy d) letters
counts :: Dict -> [(Char, Int)]
counts d = fmap (\x -> (x, entropy d x)) letters
wordScore :: Dict -> String -> (Int, Down Int)
wordScore d s =
let s' = S.toList $ S.fromList s
num_dups = 5 - length s'
k = S.size d
in (length s', Down $ sum (fmap (entropy d) s'))
nextGuess :: Dict -> Dict -> String
nextGuess all_words dict = maximumBy (comparing $ wordScore dict) $ S.elems all_words
search :: String -> Dict -> Dict -> IO ()
search word d0 d | S.size d == 1 = putStrLn $ head $ S.elems d
search word d0 d | S.null d = error "NO MORE WORDS"
search word d0 d = do
let g = nextGuess d0 d
putStrLn g
let res = check word g
let d' = appEndo (foldMap (Endo . refineDict) res) d
print $ S.toList d'
print $ log (fromIntegral (S.size d) / fromIntegral (S.size d')) / log 2
search word d0 d'
seek :: Dict -> Dict -> IO ()
seek d0 d | S.size d == 1 = putStrLn $ head $ S.elems d
seek d0 d | S.null d = error "NO MORE WORDS"
seek d0 d = do
let g = nextGuess d0 d
putStrLn g
putStr "> "
Just x <- fmap parseHits getLine
let res = makeResult g x
seek d0 $ appEndo (foldMap (Endo . refineDict) res) d
main :: IO ()
main = do
dict <- fmap (S.fromList . filter wordFilter . lines) $ readFile "words"
-- dict <- fmap (S.fromList . filter wordFilter . lines) $ readFile "/usr/share/dict/words"
let word = "pilot"
search word dict dict
@isovector
Copy link
Author

Yeah, it's definitely the same game!

@jhrcek
Copy link

jhrcek commented Jan 25, 2022

Thanks for the interesting article!
Just played with the code for fun and the performance is abysmal.
There are >15k words in /usr/share/dict/words on my system that pass the five letter all-lowercase filter.
It takes couple of minutes before I get any output from this script.
Most time is spent in nextGuess which has quadratic complexity (for each of the 15k words your're going through entire Dict to calculate entropy of each of the word's Chars). Calculating entropy for each char only once greatly speeds this up.
Nevertheless I still learned something playing with the code 😄

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment