Skip to content

Instantly share code, notes, and snippets.

@beastaugh
Created April 26, 2011 20:31
Show Gist options
  • Save beastaugh/943054 to your computer and use it in GitHub Desktop.
Save beastaugh/943054 to your computer and use it in GitHub Desktop.
Script to count how many different packages use particular licenses on Hackage.
{-# LANGUAGE OverloadedStrings #-}
module Main (main) where
{-
To use this script you need to extract your local Hackage index file. If
your setup is anything like mine, you should be able to find a .tar file
containing the listing somewhere like this:
~/.cabal/packages/hackage.haskell.org/00-index.tar.gz
Extract that file into a directory called "index" in the same directory as
this script. Then you'll have a tree of files something like this:
- CountLicenses.hs
- index
* package-one
* package-two
* package-three
- 0.1
- 0.2
* package-three.cabal
To use this script just run
runghc CountLicenses.hs
Alternatively, compile it and then run it
ghc --make -Wall CountLicenses
./CountLicenses
Essentially all this script does is traverse the directories under "index",
grab the "License" fields from the .cabal files, count how many times each
license is used and then print the results.
Dependencies
This script depends on the 'text' package, which comes with the Haskell
Platform and is available from Hackage via the cabal install tool. All
other dependencies are libraries which come with GHC.
-}
import Prelude hiding (readFile)
import Control.Monad (filterM, liftM)
import Data.List (group, sort, sortBy)
import Data.Ord (comparing)
import qualified Data.Text as T
import Data.Text.IO (readFile)
import System.Directory (doesDirectoryExist, getDirectoryContents)
import System.FilePath ((</>), (<.>), takeBaseName)
import Text.Printf (printf)
main :: IO ()
main = do
let path = "./index/"
dirContents <- getDirectoryContents path
packages <- filterM doesDirectoryExist (map (path </>) . drop 2 $ dirContents)
versions <- mapM getDirectoryContents packages
let latests = map (last . drop 2) versions
pkgNames = map takeBaseName packages
cabalFiles = zipWith (\p v -> path </> p </> v </> p <.> "cabal") pkgNames latests
licenses <- getLicenses cabalFiles
let total = sum . map snd $ licenses
showPc = uncurry (showPercentage total)
putStrLn $ "Your Hackage listing contains " ++
(show . length) packages ++ " packages with " ++
(show . length) licenses ++ " different licenses.\n"
putStr . unlines . map showPc $ licenses
getLicenses :: [String] -> IO [(T.Text, Int)]
getLicenses files = do
licenses <- mapM getLicense files
return $ map count_ . reverse . sortBy (comparing length) . group . sort $ licenses
where
count_ xs = (head xs, length xs)
getLicense :: FilePath -> IO T.Text
getLicense fileName = do
ls <- getLines fileName
return $ case (map toLicense . filter isLicenseLine) ls of
[] -> "No license given"
(x:_) -> x
isLicenseLine :: T.Text -> Bool
isLicenseLine line = case T.split (== ':') line of
[] -> False
(x:_) -> ((==) "LICENSE" . T.toUpper . T.strip) x
toLicense :: T.Text -> T.Text
toLicense = T.strip . head . tail . T.split (== ':')
showPercentage :: Int -> T.Text -> Int -> String
showPercentage total license count =
T.unpack license ++ ": " ++ show count ++ suffix
where
percentage = (fromIntegral count / fromIntegral total) * 100.0
suffix = " (" ++ printf "%.2f" (percentage :: Double) ++ "%)"
getLines :: FilePath -> IO [T.Text]
getLines = liftM T.lines . readFile
# As of Tue 26 Apr 2011 21:54:38 BST
Your Hackage listing contains 2985 packages with 12 different licenses.
BSD3: 2159 (72.33%)
GPL: 345 (11.56%)
LGPL: 117 (3.92%)
OtherLicense: 107 (3.58%)
PublicDomain: 85 (2.85%)
MIT: 67 (2.24%)
GPL-3: 64 (2.14%)
LGPL-2.1: 16 (0.54%)
GPL-2: 12 (0.40%)
LGPL-3: 8 (0.27%)
BSD4: 3 (0.10%)
No license given: 2 (0.07%)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment