Created March 21, 2013 13:41
(Poorly written) Haskell programme to find out 20 most common words in text
import Text.ParserCombinators.Parsec hiding (spaces)
import System.Environment
import Data.List (foldl')
import qualified Data.Map.Strict as M
main = interact getWords
getWords input =
case parse parseWords "words parser" input of
Left err -> "Error: " ++ show err
Right val -> (show . getMost . countWords) val ++ "\n"
parseWords = many parseSingleWord
parseSingleWord = do
x <- many1 (noneOf symbolOrBlank)
return x
parseBlank = many (oneOf symbolOrBlank)
symbolOrBlank = ",. \n\r!?-:;\"/'=$()[]{}`"
countWords = foldl' (\d w -> M.insertWith (+) w 1 d) M.empty
getMost :: M.Map String Integer -> [(Integer, String)]
getMost = (take 20 . M.toDescList . getMost')
where getMost' = M.foldlWithKey' (\d w c -> M.insert c w d) M.empty
