Created
April 26, 2011 20:31
-
-
Save beastaugh/943054 to your computer and use it in GitHub Desktop.
Script to count how many different packages use particular licenses on Hackage.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
{-# LANGUAGE OverloadedStrings #-} | |
module Main (main) where | |
{- | |
To use this script you need to extract your local Hackage index file. If | |
your setup is anything like mine, you should be able to find a .tar file | |
containing the listing somewhere like this: | |
~/.cabal/packages/hackage.haskell.org/00-index.tar.gz | |
Extract that file into a directory called "index" in the same directory as | |
this script. Then you'll have a tree of files something like this: | |
- CountLicenses.hs | |
- index | |
* package-one | |
* package-two | |
* package-three | |
- 0.1 | |
- 0.2 | |
* package-three.cabal | |
To use this script just run | |
runghc CountLicenses.hs | |
Alternatively, compile it and then run it | |
ghc --make -Wall CountLicenses | |
./CountLicenses | |
Essentially all this script does is traverse the directories under "index", | |
grab the "License" fields from the .cabal files, count how many times each | |
license is used and then print the results. | |
Dependencies | |
This script depends on the 'text' package, which comes with the Haskell | |
Platform and is available from Hackage via the cabal install tool. All | |
other dependencies are libraries which come with GHC. | |
-} | |
import Prelude hiding (readFile) | |
import Control.Monad (filterM, liftM) | |
import Data.List (group, sort, sortBy) | |
import Data.Ord (comparing) | |
import qualified Data.Text as T | |
import Data.Text.IO (readFile) | |
import System.Directory (doesDirectoryExist, getDirectoryContents) | |
import System.FilePath ((</>), (<.>), takeBaseName) | |
import Text.Printf (printf) | |
main :: IO () | |
main = do | |
let path = "./index/" | |
dirContents <- getDirectoryContents path | |
packages <- filterM doesDirectoryExist (map (path </>) . drop 2 $ dirContents) | |
versions <- mapM getDirectoryContents packages | |
let latests = map (last . drop 2) versions | |
pkgNames = map takeBaseName packages | |
cabalFiles = zipWith (\p v -> path </> p </> v </> p <.> "cabal") pkgNames latests | |
licenses <- getLicenses cabalFiles | |
let total = sum . map snd $ licenses | |
showPc = uncurry (showPercentage total) | |
putStrLn $ "Your Hackage listing contains " ++ | |
(show . length) packages ++ " packages with " ++ | |
(show . length) licenses ++ " different licenses.\n" | |
putStr . unlines . map showPc $ licenses | |
getLicenses :: [String] -> IO [(T.Text, Int)] | |
getLicenses files = do | |
licenses <- mapM getLicense files | |
return $ map count_ . reverse . sortBy (comparing length) . group . sort $ licenses | |
where | |
count_ xs = (head xs, length xs) | |
getLicense :: FilePath -> IO T.Text | |
getLicense fileName = do | |
ls <- getLines fileName | |
return $ case (map toLicense . filter isLicenseLine) ls of | |
[] -> "No license given" | |
(x:_) -> x | |
isLicenseLine :: T.Text -> Bool | |
isLicenseLine line = case T.split (== ':') line of | |
[] -> False | |
(x:_) -> ((==) "LICENSE" . T.toUpper . T.strip) x | |
toLicense :: T.Text -> T.Text | |
toLicense = T.strip . head . tail . T.split (== ':') | |
showPercentage :: Int -> T.Text -> Int -> String | |
showPercentage total license count = | |
T.unpack license ++ ": " ++ show count ++ suffix | |
where | |
percentage = (fromIntegral count / fromIntegral total) * 100.0 | |
suffix = " (" ++ printf "%.2f" (percentage :: Double) ++ "%)" | |
getLines :: FilePath -> IO [T.Text] | |
getLines = liftM T.lines . readFile |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# As of Tue 26 Apr 2011 21:54:38 BST | |
Your Hackage listing contains 2985 packages with 12 different licenses. | |
BSD3: 2159 (72.33%) | |
GPL: 345 (11.56%) | |
LGPL: 117 (3.92%) | |
OtherLicense: 107 (3.58%) | |
PublicDomain: 85 (2.85%) | |
MIT: 67 (2.24%) | |
GPL-3: 64 (2.14%) | |
LGPL-2.1: 16 (0.54%) | |
GPL-2: 12 (0.40%) | |
LGPL-3: 8 (0.27%) | |
BSD4: 3 (0.10%) | |
No license given: 2 (0.07%) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment