-
-
Save kgadek/5213089 to your computer and use it in GitHub Desktop.
(Poorly written) Haskell programme to find out 20 most common words in text
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import Text.ParserCombinators.Parsec hiding (spaces) | |
import System.Environment | |
import Data.List (foldl') | |
import qualified Data.Map.Strict as M | |
main = interact getWords | |
getWords input = | |
case parse parseWords "words parser" input of | |
Left err -> "Error: " ++ show err | |
Right val -> (show . getMost . countWords) val ++ "\n" | |
parseWords = many parseSingleWord | |
where | |
parseSingleWord = do | |
parseBlank | |
x <- many1 (noneOf symbolOrBlank) | |
parseBlank | |
return x | |
parseBlank = many (oneOf symbolOrBlank) | |
symbolOrBlank = ",. \n\r!?-:;\"/'=$()[]{}`" | |
countWords = foldl' (\d w -> M.insertWith (+) w 1 d) M.empty | |
getMost :: M.Map String Integer -> [(Integer, String)] | |
getMost = (take 20 . M.toDescList . getMost') | |
where getMost' = M.foldlWithKey' (\d w c -> M.insert c w d) M.empty |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment