Last active
December 15, 2015 18:58
-
-
Save tmhedberg/5307286 to your computer and use it in GitHub Desktop.
Split a string into valid words, if it is possible to do so
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| import Control.Monad | |
| import Data.Char | |
| import Data.Functor | |
| import Data.List | |
| import Data.Maybe | |
| import Data.Set (Set, fromList, member) | |
| import System.Environment | |
| import System.Exit | |
| main = do args <- getArgs | |
| when (null args) (error "No argument") | |
| words <- splitSentence (map toLower $ head args) <$> getWordSet | |
| when (null words) exitFailure | |
| putStrLn $ unwords words | |
| getWordSet :: IO (Set String) | |
| getWordSet = fromList . filter okWord . lines . map toLower | |
| <$> readFile "/usr/share/dict/words" | |
| okWord :: String -> Bool | |
| okWord s | s `elem` ["i", "a"] = True | |
| | length s <= 1 = False | |
| | not $ all isAlpha s = False | |
| | all isUpper s = False | |
| | any (`elem`['a', 'e', 'i', 'o', 'u']) s = True | |
| | otherwise = False | |
| splitSentence :: String -> Set String -> [String] | |
| splitSentence s = go [] (length s) s | |
| where | |
| go consumed _ "" _ = reverse consumed | |
| go consumed atMost s words = | |
| case takeWord words $ take atMost s of | |
| Nothing -> case consumed of | |
| [] -> [] | |
| lastWord:others -> | |
| go others (length lastWord - 1) (lastWord ++ s) words | |
| Just w -> | |
| go (w:consumed) (length s - length w) (drop (length w) s) words | |
| takeWord :: Set String -> String -> Maybe String | |
| takeWord words = listToMaybe . reverse . filter (`member`words) . inits |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment