This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
{-# LANGUAGE BangPatterns, RecordWildCards #-} | |
import Data.List.Extra | |
import System.Process.Extra | |
import System.Directory | |
import System.FilePath | |
import System.Environment | |
import Data.Char | |
import System.IO | |
import Debug.Trace | |
import Control.Monad | |
import qualified Data.ByteString.Lazy.Char8 as BSL | |
import qualified Codec.Archive.Tar as Tar | |
import qualified Codec.Compression.GZip as Zip | |
main :: IO () | |
main = do | |
args <- getArgs | |
packages <- take (if null args then 20 else maxBound) . sortOn lower . filter (".tar.gz" `isSuffixOf`) <$> getDirectoryContents "mirror" | |
res <- fmap mconcat $ forM packages $ \package -> do | |
files <- Tar.foldEntries (:) [] (const []) . Tar.read . Zip.decompress <$> BSL.readFile ("mirror" </> package) | |
res <- fmap mconcat $ forM files $ \file -> do | |
let fileName = Tar.entryPath file | |
case Tar.entryContent file of | |
Tar.NormalFile bs sz | takeExtension fileName == ".hs" -> do | |
-- print (fileName, calcRes bs) | |
return $! calcRes bs | |
_ -> return mempty | |
putStrLn $ intercalate "," $ package : map show [length files, resSpaceSpaceColon res, resSpaceColon res, resColon res, resColonColon res, fst $ cost res, snd $ cost res] | |
return res | |
hPrint stderr (length packages, res, cost res) | |
cost Res{..} = (resSpaceSpaceColon + resSpaceColon + resColon - resColonColon, resSpaceSpaceColon + resSpaceColon*2 + resColon*3 - resColonColon) | |
data Res = Res | |
{resSpaceSpaceColon :: {-# UNPACK #-} !Int | |
,resSpaceColon :: {-# UNPACK #-} !Int | |
,resColon :: {-# UNPACK #-} !Int | |
,resColonColon :: {-# UNPACK #-} !Int | |
} deriving Show | |
instance Monoid Res where | |
mempty = Res 0 0 0 0 | |
mappend (Res x1 x2 x3 x4) (Res y1 y2 y3 y4) = Res (x1+y1) (x2+y2) (x3+y3) (x4+y4) | |
calcRes :: BSL.ByteString -> Res | |
calcRes = f mempty . BSL.unpack | |
where | |
f !res _ | False = res | |
f res (x:'-':'-':y:xs) | not $ isOperator x, not $ isOperator y = f res $ dropWhile (/= '\n') xs | |
f res ('{':'-':xs) = f res $ snd $ breakOn "-}" xs | |
f res ('\"':xs) = f res (g xs) | |
where | |
g ('\\':x:xs) = g xs | |
g ('\"':xs) = xs | |
g (x:xs) = g xs | |
g [] = [] | |
f res (x:':':':':y:xs) | not $ isOperator x, not $ isOperator y = f res{resColonColon = resColonColon res + 1} xs | |
f res (x:':':y:xs) | not $ isOperator x, not $ isOperator y = f | |
(if isSpace x && isSpace y then res{resSpaceSpaceColon = resSpaceSpaceColon res + 1} | |
else if isSpace x || isSpace y then res{resSpaceColon = resSpaceColon res + 1} | |
else res{resColon = resColon res + 1} | |
) xs | |
f res (_:xs) = f res xs | |
f res _ = res | |
isOperator x = if isAscii x then x `elem` "!#$%&*+./<=>?@\\^|-~" else isSymbol x |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment