Created
September 2, 2014 04:33
-
-
Save mopemope/7f0f8616df64e079a328 to your computer and use it in GitHub Desktop.
HENTAI
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
{-# LANGUAGE OverloadedStrings #-} | |
{-# LANGUAGE DeriveGeneric #-} | |
import Control.Monad | |
import Data.Aeson (FromJSON, ToJSON, decode) | |
import Data.Text (concat, unpack) | |
import Data.Text.Internal (Text) | |
-- import Data.Text.IO (putStrLn) | |
-- import qualified Data.ByteString as B | |
import qualified Data.ByteString.Char8 as BC | |
import qualified Data.ByteString.Lazy as BL | |
import qualified Data.ByteString.Lazy.Char8 as BLC | |
import qualified Data.ByteString.Base64 as B64 | |
import Data.Monoid | |
import GHC.Generics | |
import Network.HTTP.Conduit (simpleHttp) | |
import Network.HTTP.Types.URI (urlDecode) | |
import Text.HTML.DOM (parseLBS) | |
import Text.XML.Cursor | |
import Text.Printf | |
import Text.Regex.PCRE | |
import System.Directory | |
import System.FilePath (joinPath, takeDirectory) | |
import Prelude hiding (concat, putStrLn) | |
---------------------- | |
-- processData :: [Text] -> IO () | |
-- processData = mapM_ putStrLn | |
cursorFor :: String -> IO Cursor | |
cursorFor u = do | |
page <- simpleHttp u | |
return $ fromDocument $ parseLBS page | |
extractLink :: Cursor -> String | |
extractLink = unpack . concat . attribute "href" | |
imgPat :: String | |
imgPat = "\\d+-hentai4.me-(\\d+).jpg" | |
createImgPath :: Int -> String -> FilePath | |
createImgPath folder img' = | |
let a = img' =~ imgPat :: (String, String, String, [String]) | |
in case a of | |
(_, _, _, [imgId]) -> joinPath [(show folder), toFile imgId] | |
_ -> "" | |
where toInt i = read i :: Int | |
toFile i = (show $ toInt i) <> ".jpg" | |
---------------------- | |
findBookLinks :: Cursor -> [Cursor] | |
findBookLinks = element "a" >=> attributeIs "rel" "bookmark" | |
getBookLinks :: String -> IO [String] | |
getBookLinks url = do | |
cursor <- cursorFor url | |
return (cursor $// findBookLinks &| extractLink) | |
----------- | |
pat :: String | |
pat = "/gallery-(\\d+)-(\\d+)" :: String | |
toMetaDataUrl :: String -> Maybe String | |
toMetaDataUrl url = | |
let a = url =~ pat :: (String, String, String, [String]) | |
in case a of | |
(_, _, _, [imgId, hostId]) -> Just ("http://hentai4.me/ajax.php?dowork=getimg&id=" <> imgId <> "&host=" <> hostId) | |
_ -> Nothing | |
findGalleryLink :: Cursor -> [Cursor] | |
findGalleryLink = element "a" >=> attributeIs "title" "View Gallery" | |
getGallery :: String -> IO String | |
getGallery url = do | |
cursor <- cursorFor url | |
return (head $ cursor $// findGalleryLink &| extractLink) | |
data ImageJSON = | |
ImageJSON { img :: [Int] | |
, img_name :: [Text] | |
, folder_link :: Int | |
, host :: Text | |
} deriving (Show, Generic) | |
instance FromJSON ImageJSON | |
instance ToJSON ImageJSON | |
decodeResponse :: Maybe String -> IO BC.ByteString | |
decodeResponse (Just url) = do | |
result <- simpleHttp url | |
return (b64decode $ urlDecode True (BL.toStrict result)) | |
where b64decode input = | |
case B64.decode input of | |
Left _ -> "" | |
Right a -> a | |
decodeResponse Nothing = return "" | |
parseJson :: BLC.ByteString -> Maybe ImageJSON | |
parseJson = decode | |
createImgUrl :: ImageJSON -> [(String, String)] | |
createImgUrl j = let imgs = img_name j; | |
h = unpack (host j); | |
flnk = folder_link j | |
in map (toUrl h flnk) imgs | |
where | |
imgUrl :: String -> Int -> Text -> String | |
imgUrl host' folder img' = printf "http://%s.hdporn4.me/%d/%s" host' folder (unpack img') | |
toUrl host' folder img' = (createImgPath folder (unpack img'), imgUrl host' folder img') | |
----------------- | |
downloadImg :: FilePath -> (FilePath, String) -> IO () | |
downloadImg root (imgPath, url) = | |
let path = joinPath [root, imgPath]; | |
parent' = takeDirectory path | |
in do | |
print $ "Download ... " <> url | |
print $ "Output " <> path | |
createDirectoryIfMissing True parent' | |
simpleHttp url >>= BL.writeFile path | |
downloadAll :: FilePath -> [(FilePath, String)] -> IO () | |
downloadAll root = | |
mapM_ (downloadImg root) | |
main :: IO () | |
main = do | |
links <- getBookLinks "http://hentai4.me" | |
forM_ links $ \x -> do | |
a <- getGallery x | |
b <- decodeResponse $ toMetaDataUrl a | |
case parseJson (BL.fromStrict b) of | |
Just json -> downloadAll "/tmp" $ createImgUrl json | |
Nothing -> print $ unpack "End" | |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment