Skip to content

Instantly share code, notes, and snippets.

@ndmitchell
Last active November 23, 2018 13:23
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save ndmitchell/f758711a017429b514e66ff4057730f1 to your computer and use it in GitHub Desktop.
Save ndmitchell/f758711a017429b514e66ff4057730f1 to your computer and use it in GitHub Desktop.
{-# LANGUAGE BangPatterns, RecordWildCards #-}
import Data.List.Extra
import System.Process.Extra
import System.Directory
import System.FilePath
import System.Environment
import Data.Char
import System.IO
import Debug.Trace
import Control.Monad
import qualified Data.ByteString.Lazy.Char8 as BSL
import qualified Codec.Archive.Tar as Tar
import qualified Codec.Compression.GZip as Zip
main :: IO ()
main = do
args <- getArgs
packages <- take (if null args then 20 else maxBound) . sortOn lower . filter (".tar.gz" `isSuffixOf`) <$> getDirectoryContents "mirror"
res <- fmap mconcat $ forM packages $ \package -> do
files <- Tar.foldEntries (:) [] (const []) . Tar.read . Zip.decompress <$> BSL.readFile ("mirror" </> package)
res <- fmap mconcat $ forM files $ \file -> do
let fileName = Tar.entryPath file
case Tar.entryContent file of
Tar.NormalFile bs sz | takeExtension fileName == ".hs" -> do
-- print (fileName, calcRes bs)
return $! calcRes bs
_ -> return mempty
putStrLn $ intercalate "," $ package : map show [length files, resSpaceSpaceColon res, resSpaceColon res, resColon res, resColonColon res, fst $ cost res, snd $ cost res]
return res
hPrint stderr (length packages, res, cost res)
cost Res{..} = (resSpaceSpaceColon + resSpaceColon + resColon - resColonColon, resSpaceSpaceColon + resSpaceColon*2 + resColon*3 - resColonColon)
data Res = Res
{resSpaceSpaceColon :: {-# UNPACK #-} !Int
,resSpaceColon :: {-# UNPACK #-} !Int
,resColon :: {-# UNPACK #-} !Int
,resColonColon :: {-# UNPACK #-} !Int
} deriving Show
instance Monoid Res where
mempty = Res 0 0 0 0
mappend (Res x1 x2 x3 x4) (Res y1 y2 y3 y4) = Res (x1+y1) (x2+y2) (x3+y3) (x4+y4)
calcRes :: BSL.ByteString -> Res
calcRes = f mempty . BSL.unpack
where
f !res _ | False = res
f res (x:'-':'-':y:xs) | not $ isOperator x, not $ isOperator y = f res $ dropWhile (/= '\n') xs
f res ('{':'-':xs) = f res $ snd $ breakOn "-}" xs
f res ('\"':xs) = f res (g xs)
where
g ('\\':x:xs) = g xs
g ('\"':xs) = xs
g (x:xs) = g xs
g [] = []
f res (x:':':':':y:xs) | not $ isOperator x, not $ isOperator y = f res{resColonColon = resColonColon res + 1} xs
f res (x:':':y:xs) | not $ isOperator x, not $ isOperator y = f
(if isSpace x && isSpace y then res{resSpaceSpaceColon = resSpaceSpaceColon res + 1}
else if isSpace x || isSpace y then res{resSpaceColon = resSpaceColon res + 1}
else res{resColon = resColon res + 1}
) xs
f res (_:xs) = f res xs
f res _ = res
isOperator x = if isAscii x then x `elem` "!#$%&*+./<=>?@\\^|-~" else isSymbol x
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment