Skip to content

Instantly share code, notes, and snippets.

@micrypt
Created February 21, 2012 23:24
Show Gist options
  • Save micrypt/1879767 to your computer and use it in GitHub Desktop.
Save micrypt/1879767 to your computer and use it in GitHub Desktop.
Download xkcd comics using STM
{-# Language PackageImports #-}
module Main where
import Control.Monad (liftM, forM_, replicateM_)
import Control.Concurrent (forkIO)
import Control.Concurrent.STM
import "mtl" Control.Monad.Error
import Control.Applicative
import Control.Exception
import qualified Network.Stream as Stream (Result)
import Control.Arrow
import Network.HTTP
import Network.URI (parseURI)
import System.FilePath (takeFileName, (</>))
import System.IO
import System.Environment
import System.Posix.User
import System.Directory
import Text.JSON
----------------------------------------------------------------------
main = do
dir <- makeComicDir
putStrLn $ "Created " ++ dir
Right json <- xkcdFetchJSON Current
let curNum = xkcdGetNumber json "num"
comics = take curNum $ iterate (subtract 1) curNum
putStrLn $ "Downloading " ++ (show $ length comics) ++ " comics..."
comicQueue <- newTChanIO
atomically $ forM_ (ComicNumber <$> comics) $ writeTChan comicQueue
workers <- newTVarIO 8
replicateM_ 8 . forkIO $ worker comicQueue workers dir
waitFor workers
putStrLn "DONE"
----------------------------------------------------------------------
data ComicNumber = Current | ComicNumber Int deriving (Show)
getReq = fmap (mkRequest GET) . parseURI
getRequestE = maybe (throwError "invalid url") return . getReq
tryRequest :: Request_String
-> IO (Either IOException (Stream.Result (Response String)))
tryRequest = try . simpleHTTP
simpleHttpE request = do
response <- liftIO $ tryRequest request
case response of
Left err -> throwError $ show err
Right rsp -> return rsp
getResponseBodyE = either (throwError.show) (return.rspBody)
fetchHtmlA = Kleisli getRequestE >>>
Kleisli simpleHttpE >>>
Kleisli getResponseBodyE
fetchHTMLBody url = runErrorT $ runKleisli fetchHtmlA url
----------------------------------------------------------------------
xkcd = "http://xkcd.com/"
xkcdJSONUrl Current = xkcd ++ "info.0.json"
xkcdJSONUrl (ComicNumber n) = xkcd ++ show n ++ "/info.0.json"
xkcdFetchJSON :: ComicNumber -> IO (Either String String)
xkcdFetchJSON num = runErrorT $ runKleisli fetchHtmlA $ xkcdJSONUrl num
xkcdComicUrl :: ComicNumber -> IO String
xkcdComicUrl num = do
Right jstr <- xkcdFetchJSON num
let (Ok (JSObject jobj)) = decode jstr
(Ok img) = valFromObj "img" jobj
return img
xkcdGetNumber :: String -> String -> Int
xkcdGetNumber jstr key =
let (Ok (JSObject jobj)) = decode jstr
(Ok jval) = valFromObj key jobj
in jval
----------------------------------------------------------------------
getImgName = takeFileName
downloadComic dir num = do
url <- xkcdComicUrl num
let (ComicNumber n) = num
name = (show n) ++ "_" ++ getImgName url
path = dir </> name
comic <- fetchHTMLBody url
case comic of
Left err -> putStrLn $ "ERROR: " ++ show err
Right img -> do
file <- openBinaryFile path WriteMode
hPutStr file img
hClose file
putStrLn $ "Saving " ++ name
makeComicDir = do
homedir <- getHomeDirectory
let imgdir = homedir </> "xkcd"
createDirectory imgdir
return imgdir
----------------------------------------------------------------------
worker jobs alive dir = work
where quit = atomically $ readTVar alive >>= writeTVar alive . (subtract 1)
cont = do job@(ComicNumber n) <- atomically $ readTChan jobs
if' (n == 404) work $ downloadComic dir job >> work
work = (atomically $ isEmptyTChan jobs) >>= \x -> if' x quit cont
waitFor alive = atomically $ readTVar alive >>= check . (==0)
----------------------------------------------------------------------
if' :: Bool -> a -> a -> a
if' True x _ = x
if' False _ y = y
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment