{-# LANGUAGE BangPatterns, RecordWildCards #-} | |
import Data.List.Extra | |
import System.Process.Extra | |
import System.Directory | |
import System.FilePath | |
import System.Environment | |
import Data.Char | |
import System.IO | |
import Debug.Trace | |
import Control.Monad | |
import qualified Data.ByteString.Lazy.Char8 as BSL | |
import qualified Codec.Archive.Tar as Tar | |
import qualified Codec.Compression.GZip as Zip | |
main :: IO () | |
main = do | |
args <- getArgs | |
packages <- take (if null args then 20 else maxBound) . sortOn lower . filter (".tar.gz" `isSuffixOf`) <$> getDirectoryContents "mirror" | |
res <- fmap mconcat $ forM packages $ \package -> do | |
files <- Tar.foldEntries (:) [] (const []) . Tar.read . Zip.decompress <$> BSL.readFile ("mirror" </> package) | |
res <- fmap mconcat $ forM files $ \file -> do | |
let fileName = Tar.entryPath file | |
case Tar.entryContent file of | |
Tar.NormalFile bs sz | takeExtension fileName == ".hs" -> do | |
-- print (fileName, calcRes bs) | |
return $! calcRes bs | |
_ -> return mempty | |
putStrLn $ intercalate "," $ package : map show [length files, resSpaceSpaceColon res, resSpaceColon res, resColon res, resColonColon res, fst $ cost res, snd $ cost res] | |
return res | |
hPrint stderr (length packages, res, cost res) | |
cost Res{..} = (resSpaceSpaceColon + resSpaceColon + resColon - resColonColon, resSpaceSpaceColon + resSpaceColon*2 + resColon*3 - resColonColon) | |
data Res = Res | |
{resSpaceSpaceColon :: {-# UNPACK #-} !Int | |
,resSpaceColon :: {-# UNPACK #-} !Int | |
,resColon :: {-# UNPACK #-} !Int | |
,resColonColon :: {-# UNPACK #-} !Int | |
} deriving Show | |
instance Monoid Res where | |
mempty = Res 0 0 0 0 | |
mappend (Res x1 x2 x3 x4) (Res y1 y2 y3 y4) = Res (x1+y1) (x2+y2) (x3+y3) (x4+y4) | |
calcRes :: BSL.ByteString -> Res | |
calcRes = f mempty . BSL.unpack | |
where | |
f !res _ | False = res | |
f res (x:'-':'-':y:xs) | not $ isOperator x, not $ isOperator y = f res $ dropWhile (/= '\n') xs | |
f res ('{':'-':xs) = f res $ snd $ breakOn "-}" xs | |
f res ('\"':xs) = f res (g xs) | |
where | |
g ('\\':x:xs) = g xs | |
g ('\"':xs) = xs | |
g (x:xs) = g xs | |
g [] = [] | |
f res (x:':':':':y:xs) | not $ isOperator x, not $ isOperator y = f res{resColonColon = resColonColon res + 1} xs | |
f res (x:':':y:xs) | not $ isOperator x, not $ isOperator y = f | |
(if isSpace x && isSpace y then res{resSpaceSpaceColon = resSpaceSpaceColon res + 1} | |
else if isSpace x || isSpace y then res{resSpaceColon = resSpaceColon res + 1} | |
else res{resColon = resColon res + 1} | |
) xs | |
f res (_:xs) = f res xs | |
f res _ = res | |
isOperator x = if isAscii x then x `elem` "!#$%&*+./<=>?@\\^|-~" else isSymbol x |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment