Skip to content

Instantly share code, notes, and snippets.

@DanMeakin
Created August 1, 2017 16:12
Show Gist options
  • Save DanMeakin/f2cc74557a8bbf2246f734f25e7ec3b9 to your computer and use it in GitHub Desktop.
Save DanMeakin/f2cc74557a8bbf2246f734f25e7ec3b9 to your computer and use it in GitHub Desktop.
My Haskell solution to the TrustPilot code challenge.
{-# LANGUAGE OverloadedStrings #-}
{-# LANGUAGE ViewPatterns #-}
module TrustPilotChallenge where
import Control.Monad
import Data.ByteString.Char8 (ByteString)
import qualified Data.ByteString.Char8 as B
import qualified Data.ByteString.Lazy.Char8 as BL
import Data.Digest.Pure.MD5
import Data.Function (on)
import Data.List (groupBy, nub, permutations, sortBy,
tails, (\\))
import Data.Map (Map)
import qualified Data.Map as Map
import Data.Maybe (fromMaybe, isJust)
type MD5Targets = [String]
type WordList = [ByteString]
-- | Run through the exercise, printing matching phrases as they are found.
--
-- This function finds all three solutions to this exercise, terminating after
-- discovery of the third.
findMatchingPhrases :: IO ()
findMatchingPhrases = do
dict <- readAndFilterWordList anagram "data/wordlist"
putStrLn "Finding phrases"
let phraseLengths = candidateLengths . B.length . B.concat . B.words $ anagram
candidates = candidatePhrases phraseLengths dict anagram
matches =
take 3 .
nub .
fmap (matchingPermutation md5Targets) .
filter (hasMatchingPermutation md5Targets)
mapM_ (putStrLn . B.unpack . B.unwords) . matches $ candidates
--------------------------------------------------------------------------------
-- Challenge Parameters
md5Targets :: MD5Targets
md5Targets =
[ "e4820b45d2277f3844eac66c903e84be" -- Easiest
, "23170acc097c24edb98fc5488ab033fe" -- Harder
, "665e5bcb0c20062fe8abaaf4628bb154" -- Hardest
]
anagram :: ByteString
anagram = "poultry outwits ants"
--------------------------------------------------------------------------------
-- | Read and filter a dictionary file. This removes all invalid words for the
-- purpose of this exercise.
readAndFilterWordList :: ByteString -> FilePath -> IO WordList
readAndFilterWordList anagram path =
filterWordList anagram <$> readWordList path
-- | Read a dictionary file.
readWordList :: FilePath -> IO WordList
readWordList path = B.lines <$> B.readFile path
-- | Filter a word list to narrow the problem space.
--
-- First, filter words based on whether they are wholly contained within the
-- problem anagram. Then, remove duplicates.
filterWordList :: ByteString -> WordList -> WordList
filterWordList anagram = nub . filter (`isContainedWithin` anagram)
--------------------------------------------------------------------------------
-- | Check if any permutation of a wordlist matches an MD5 target.
hasMatchingPermutation :: MD5Targets -> WordList -> Bool
hasMatchingPermutation targets ws = or $ md5Match targets <$> permutations ws
-- | Get the matching permutation of a wordlist matching an MD5 target.
matchingPermutation :: MD5Targets -> WordList -> WordList
matchingPermutation targets ws =
head . filter (md5Match targets) $ permutations ws
md5Match :: MD5Targets -> WordList -> Bool
md5Match targets candidate = (show . md5 . BL.fromStrict . B.unwords) candidate `elem` targets
--------------------------------------------------------------------------------
-- | Create a map of words indexed by word length.
wordsByLength :: WordList -> Map Int WordList
wordsByLength =
Map.fromList .
fmap (\subList -> (B.length . head $ subList, subList)) .
groupBy ((==) `on` B.length) . sortBy (compare `on` B.length)
-- | Return all words of the passed length contained within the passed map.
wordOfLength :: Int -> Map Int WordList -> WordList
wordOfLength nChars =
fromMaybe [] . Map.lookup (fromIntegral nChars)
-- | Returns lists of lengths of possible candidate word lists.
--
-- This returns a nested list of each, with each inner list representing the
-- lengths of words within a possible candidate phrase. The sum of the lengths
-- within each nested list is exactly equal to total. This returns a maximum of
-- four values per nested list, representing up to four words.
candidateLengths :: Int -> [[Int]]
candidateLengths total =
[ filter (/= 0) [a, b, c, d]
| a <- [0 .. total]
, b <- [a .. total]
, c <- [b .. total]
, d <- [c .. total]
, a + b + c + d == total
]
-- | Generates a list of candidate phrases.
--
-- This function takes a set of candidate lengths, a wordlist, an anagram of
-- the target phrase, and then generates a set of phrases which share all the
-- same characters as the anagram.
candidatePhrases :: [[Int]] -> WordList -> ByteString -> [WordList]
candidatePhrases lengths words anagram = do
lenTriple <- lengths
ws <- makeWordsAndCheckValidity lenTriple
guard $ (&&) <$> unique <*> sameChars anagram . B.concat $ ws
return ws
where
makeWordsAndCheckValidity = fmap fst . foldM f ([], anagram)
f (words, anagram) thisLen = do
word <- wordOf thisLen
case stripChars word anagram of
Just ana -> return (word : words, ana)
Nothing -> mzero
wordOf n = wordOfLength n $ wordsByLength words
sameChars = (==) `on` B.sort . B.concat . B.words
--------------------------------------------------------------------------------
-- | Check if all entries in a list are unique.
unique :: Eq a => [a] -> Bool
unique [] = True
unique (x:xs) = x `notElem` xs && unique xs
-- | Check if a substring's characters are wholly contained within a ByteSring.
--
-- This functions similarly to 'stripChars', only returning a Bool to indicate
-- whether the substring's characters are within the main ByteString.
isContainedWithin :: ByteString -> ByteString -> Bool
isContainedWithin substr = isJust . stripChars substr
-- | Strip each of the characters in a ByteString from another.
--
-- Each of the characters to be stripped is removed only once from the target
-- ByteString. If any characters within the substring are not contained within
-- the target ByteString, Nothing is returned.
stripChars :: ByteString -> ByteString -> Maybe ByteString
stripChars substr mainstr = B.foldl f (Just mainstr) substr
where
f remMainstr substrChar = remMainstr >>= rejoin . B.break (== substrChar)
rejoin (_, B.null -> True) = Nothing
rejoin (top, B.tail -> rest) = Just (top `B.append` rest)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment