Skip to content

Instantly share code, notes, and snippets.

@cwvh
Created February 4, 2014 08:25
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save cwvh/8799966 to your computer and use it in GitHub Desktop.
Save cwvh/8799966 to your computer and use it in GitHub Desktop.
fast reverse ordered character count
import qualified Data.ByteString.Char8 as B
import qualified Data.ByteString.Internal as B
import qualified Data.ByteString.Unsafe as B
import Data.List
import Data.Word
import Foreign
import Foreign.C.Types
import Foreign.Marshal.Alloc
import Foreign.ForeignPtr
import Text.Printf
rle :: B.ByteString -> [(Int, Word8)]
rle (B.PS input s l) = B.inlinePerformIO $ allocaArray 128 $ \arr -> do
B.memset (castPtr arr) 0 (128 * fromIntegral (sizeOf (undefined :: CSize)))
withForeignPtr input (\x -> countOccurrences arr (x `plusPtr` s) l)
let go 128 xs = return xs
go i xs = do n <- peekElemOff arr i
if n /= 0
then go (i+1) ((fromIntegral n, fromIntegral i):xs)
else go (i+1) xs
go 0 []
where
countOccurrences :: Ptr CSize -> Ptr Word8 -> Int -> IO ()
countOccurrences !counts !str !len = go 0
where
go i | i == len = return ()
| otherwise = do k <- fromIntegral `fmap` peekElemOff str i
x <- peekElemOff counts k
pokeElemOff counts k (x+1)
go (i+1)
tally :: B.ByteString -> [(Int, Word8)]
tally = reverse . sort . rle
main :: IO ()
main = do
contents <- B.getContents
let results = tally contents
mapM_ fmt results
where
fmt (count, ch) = printf "%c %d\n" ch count
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment