-
-
Save Fuuzetsu/8276445 to your computer and use it in GitHub Desktop.
Scrapes most-recent versions of all Hackage packages for the status of their documentation. Note that if you want to use this, you'll have to change the file path to save to at the top of the program.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
{-# LANGUAGE LambdaCase #-} | |
module Main where | |
import Control.Monad ((>=>)) | |
import Data.Either (rights, lefts) | |
import Data.Tree.NTree.TypeDefs (NTree) | |
import Data.List (isPrefixOf, isInfixOf) | |
import Network.HTTP (simpleHTTP, getRequest, rspBody) | |
import Network.Stream (ConnError) | |
import Text.XML.HXT.Core hiding (mkName) | |
resultFile :: String | |
resultFile = "/home/shana/hackageresult" | |
parsePackages | |
:: (ArrowXml cat, ArrowChoice cat) => cat XmlTree [Char] | |
parsePackages = hasName "a" >>> getAttrValue "href" | |
>>> arr (\x -> if "/package/" `isPrefixOf` x | |
then Left $ prependHackage x | |
else Right ()) | |
>>> returnA ||| zeroArrow | |
parseVersion :: ArrowXml a => a XmlTree String | |
parseVersion = hasName "tr" //> hasName "strong" /> getText | |
parseDate :: (ArrowXml cat, ArrowChoice cat) => cat XmlTree String | |
parseDate = hasName "tr" //> getText | |
>>> arr (\x -> if isDate x | |
then Left x | |
else Right ()) | |
>>> returnA ||| zeroArrow | |
-- try to guess if something's a date | |
isDate :: String -> Bool | |
isDate s = length (words s) == 6 && ":" `isInfixOf` (last $ take 4 (words s)) | |
parseModules :: ArrowXml a => a XmlTree (Either String String) | |
parseModules = hasAttrValue "class" (== "module") | |
/> ifA isText (getText >>^ Left) | |
(hasName "a" >>> getAttrValue "href" >>^ Right) | |
appendReport :: String -> String -> String | |
appendReport u v = (u ++ "-" ++ v ++ "/reports/") | |
prependHackage :: String -> String | |
prependHackage = ("http://hackage.haskell.org" ++) | |
packageUrl :: String | |
packageUrl = "http://hackage.haskell.org/packages/" | |
fetch :: String -> IO (Either ConnError String) | |
fetch = simpleHTTP . getRequest >=> return . fmap rspBody | |
parseHackage :: Maybe [String] -> IO [(String, PackageStatus)] | |
parseHackage urls = case urls of | |
Nothing -> fetch packageUrl >>= \case | |
Left _ -> return [] | |
Right pp -> case runL parsePackages pp of | |
[] -> return [] | |
xs -> onPackage xs | |
Just xs -> onPackage xs | |
where | |
onPackage xs = do | |
let n = length xs | |
flip mapM (zip xs [1 .. ]) $ \(p, cn) -> do | |
putStrLn $ "[" ++ show cn ++ "/" ++ show n ++ "] " ++ "Processing " ++ p | |
r <- processPackage p | |
appendFile resultFile $ show (p, r) ++ "\n" | |
return (p, r) | |
data PackageStatus = AllGood | NoDetectedModules | |
| MissingDocs (Maybe String) String String | |
| MixedDocs | |
| NetworkProblem String | |
| ParsingProblem String | |
deriving (Eq, Show) | |
runL :: LA (NTree XNode) b -> String -> [b] | |
runL p = runLA (xreadDoc //> p) | |
processPackage :: String -> IO PackageStatus | |
processPackage u = do | |
ps <- fetch u >>= \case | |
Left e -> return . Left . NetworkProblem $ show e | |
Right s -> return $ case runL parseVersion s of | |
[] -> Left $ ParsingProblem "Couldn't parse version" | |
v:_ -> Right (v, s, concat $ runL parseDate s) | |
case ps of | |
Left e -> return e | |
Right (v, s, d) -> case runL parseModules s of | |
[] -> return NoDetectedModules | |
xs -> case (lefts xs, rights xs) of | |
([], []) -> return NoDetectedModules | |
([], _) -> return AllGood | |
(_, []) -> fetch (appendReport u v) >>= \case | |
Left _ -> return $ MissingDocs Nothing v d | |
Right s' -> return $ MissingDocs (parseFailReason s') v d | |
_ -> return MixedDocs | |
parseFailReason :: String -> Maybe String | |
parseFailReason [] = Nothing | |
parseFailReason s | |
| "installOutcome = " `isPrefixOf` s = Just $ | |
takeWhile (\x -> x /= ',' && x /= ' ') | |
(drop (length "installOutcome = ") s) | |
| otherwise = parseFailReason $ drop 1 s | |
main :: IO () | |
main = parseHackage Nothing >> putStrLn "Done" |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment