Created

Embed URL

HTTPS clone URL

SSH clone URL

You can clone with HTTPS or SSH.

Download Gist

Fixed wordfreq solutions to use text/bytestring

View wordfreq-bs.hs
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46
{-# LANGUAGE BangPatterns #-}
module Main where
 
import Prelude
import Data.ByteString as B
import Data.ByteString.Char8 as BC8
import System.Environment (getArgs)
import Control.Arrow
 
import Data.List (sortBy)
import Data.Char (isLetter)
import qualified Data.HashMap.Strict as HM
import Data.Ord (comparing)
 
-- | equivalent to a-zA-Z ranges in D, doesn't handle unicode data (unlike Data.Char.isLetter)
isLetterFast c | (c >= 97) && (c <= 122) = True
| (c >= 65) && (c <= 90) = True
| otherwise = False
 
-- | toLower equivalent for word8
toLowerWord8 c | (c >= 65) && (c <= 90) = c+32
| otherwise = c
 
replaceNonLetter c | isLetterFast c = c
| otherwise = 32 -- ' '
 
createReport :: Int -> ByteString -> String
createReport n text =
Prelude.unlines $
Prelude.map (\(w, count) -> (BC8.unpack w) ++ " " ++ show count) $
Prelude.take n $
sortBy (flip $ comparing snd) $
HM.toList $
HM.fromListWith (\ !old !new -> old+new) $
Prelude.map (\w -> (w, 1)) $
BC8.words $
B.map (replaceNonLetter . toLowerWord8) $
text
 
 
main = do
[fileName, nstr] <- getArgs
let n = read nstr :: Int
text <- BC8.readFile fileName
Prelude.putStr $ createReport n text
return ()
View wordfreq-bs.hs
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37
{-# LANGUAGE BangPatterns #-}
module Main where
 
import Prelude
import Data.Text as T
import Data.Text.IO as T
import System.Environment (getArgs)
import Control.Arrow
 
import Data.List (sortBy)
import Data.Char (isLetter)
import qualified Data.HashMap.Strict as HM
import Data.Ord (comparing)
 
replaceNonLetter c | isLetter c = c
| otherwise = ' '
 
createReport :: Int -> Text -> String
createReport n text =
Prelude.unlines $
Prelude.map (\(w, count) -> (T.unpack w) ++ " " ++ show count) $
Prelude.take n $
sortBy (flip $ comparing snd) $
HM.toList $
HM.fromListWith (\ !old !new -> old+new) $
Prelude.map (\w -> (w, 1)) $
T.words $
T.map replaceNonLetter $
T.toLower $
text
 
main = do
[fileName, nstr] <- getArgs
let n = read nstr :: Int
text <- T.readFile fileName
Prelude.putStr $ createReport n text
return ()

For what it's worth, I finished the ByteStringing of your ByteString module; it seems a bit more that twice as fast that way. https://gist.github.com/anonymous/5850805 I made an attempt to avoid the Data.Char functions for Text, which seemed to give a 25-30% speedup, at the cost of silly boilerplate.

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment
Something went wrong with that request. Please try again.