Skip to content

Instantly share code, notes, and snippets.

@uduki
Created March 20, 2012 19:20
Show Gist options
  • Save uduki/2140113 to your computer and use it in GitHub Desktop.
Save uduki/2140113 to your computer and use it in GitHub Desktop.
いかにしておっぱい画像をダウンロードするか〜2012 Haskell
{-# LANGUAGE OverloadedStrings, PatternGuards #-}
import qualified Codec.Binary.Url as CBU
import qualified Control.Concurrent as CC
import qualified Control.Concurrent.STM as STM
import qualified Control.Exception as CE
import qualified Control.Monad as CM
import qualified Control.Monad.Trans as CMT
import qualified Data.Aeson as A
import qualified Data.Attoparsec as DA
import qualified Data.ByteString as BS
import qualified Data.ByteString.Lazy as BSL
import qualified Data.Conduit as DC
import qualified Data.Conduit.List as DCL
import qualified Data.Digest.Pure.SHA as SHA
import qualified Data.HashMap.Lazy as DHL
import qualified Data.List as DL
import qualified Data.Text as DT
import qualified Data.Text.Encoding as DTE
import qualified Data.Text.IO as DTI
import qualified Data.Vector as V
import qualified Network.HTTP.Conduit as NHC
import qualified System.Directory as SD
main :: IO ()
main = do
SD.doesDirectoryExist "images" >>= (`CM.unless` SD.createDirectory "images")
x <- STM.newTMVarIO 0
CM.forM_ [1..20] $ \n -> NHC.withManager $ downloads n $ imgDownloader x
STM.atomically (STM.readTMVar x >>= \n -> CM.unless (n == 0) STM.retry)
putStrLn "Done."
imgDownloader :: DC.ResourceIO m => STM.TMVar Int -> DC.Sink BS.ByteString m ()
imgDownloader stm = do
a <- BS.concat `CM.fmap` DCL.consume
case DA.parseOnly A.json a of
Left e -> CMT.liftIO $ putStrLn e
Right c -> V.forM_ (getUrls c) $ \url -> CMT.liftIO $ limitableForkIO 20 stm $ do
DTI.putStrLn url
img <- NHC.simpleHttp $ DT.unpack url
BSL.writeFile ("images/" ++ SHA.showDigest (SHA.sha1 img) ++ ".jpg") img
limitableForkIO :: Int -> STM.TMVar Int -> IO () -> IO ()
limitableForkIO limit x action = start >> CM.void (CC.forkIO (action `CE.finally` end))
where
start = STM.atomically $ do
n <- STM.takeTMVar x
CM.unless (n < limit) STM.retry
STM.putTMVar x (n + 1)
end = STM.atomically $ do
n <- STM.takeTMVar x
STM.putTMVar x (n - 1)
getUrls :: A.Value -> V.Vector DT.Text
getUrls v
| A.Object x0 <- v
, Just (A.Object x1) <- DHL.lookup "SearchResponse" x0
, Just (A.Object x2) <- DHL.lookup "Image" x1
, Just (A.Array x3) <- DHL.lookup "Results" x2
= V.map toUrl x3
| otherwise
= V.empty
where
toUrl x
| A.Object a0 <- x
, Just (A.String a1) <- DHL.lookup "MediaUrl" a0
= a1
| otherwise
= DT.empty
downloads :: DC.ResourceIO m => Int -> DC.Sink BS.ByteString m () -> NHC.Manager -> DC.ResourceT m ()
downloads n sink m =
case makeRequest of
Nothing -> CMT.liftIO $ putStrLn "組み立てたURLがおかしい"
Just req -> NHC.http req m >>= \r -> NHC.responseBody r DC.$$ sink
where
queryToUrl vals = "http://api.bing.net/json.aspx?" ++ DL.intercalate "&" (map (\(a, b) -> a ++ "=" ++ b) vals)
makeRequest = NHC.parseUrl $ queryToUrl [ ("appid", "AppId")
, ("version", "2.2")
, ("market", "ja-JP")
, ("sources", "Image")
, ("image.offset", show $ 50 * (n - 1))
, ("image.count", "50")
, ("adult", "off")
, ("query", CBU.encode $ BS.unpack $ DTE.encodeUtf8 "おっぱい")
]
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment