Skip to content

Instantly share code, notes, and snippets.

@mopemope
Created September 2, 2014 04:33
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save mopemope/7f0f8616df64e079a328 to your computer and use it in GitHub Desktop.
Save mopemope/7f0f8616df64e079a328 to your computer and use it in GitHub Desktop.
HENTAI
{-# LANGUAGE OverloadedStrings #-}
{-# LANGUAGE DeriveGeneric #-}
import Control.Monad
import Data.Aeson (FromJSON, ToJSON, decode)
import Data.Text (concat, unpack)
import Data.Text.Internal (Text)
-- import Data.Text.IO (putStrLn)
-- import qualified Data.ByteString as B
import qualified Data.ByteString.Char8 as BC
import qualified Data.ByteString.Lazy as BL
import qualified Data.ByteString.Lazy.Char8 as BLC
import qualified Data.ByteString.Base64 as B64
import Data.Monoid
import GHC.Generics
import Network.HTTP.Conduit (simpleHttp)
import Network.HTTP.Types.URI (urlDecode)
import Text.HTML.DOM (parseLBS)
import Text.XML.Cursor
import Text.Printf
import Text.Regex.PCRE
import System.Directory
import System.FilePath (joinPath, takeDirectory)
import Prelude hiding (concat, putStrLn)
----------------------
-- processData :: [Text] -> IO ()
-- processData = mapM_ putStrLn
cursorFor :: String -> IO Cursor
cursorFor u = do
page <- simpleHttp u
return $ fromDocument $ parseLBS page
extractLink :: Cursor -> String
extractLink = unpack . concat . attribute "href"
imgPat :: String
imgPat = "\\d+-hentai4.me-(\\d+).jpg"
createImgPath :: Int -> String -> FilePath
createImgPath folder img' =
let a = img' =~ imgPat :: (String, String, String, [String])
in case a of
(_, _, _, [imgId]) -> joinPath [(show folder), toFile imgId]
_ -> ""
where toInt i = read i :: Int
toFile i = (show $ toInt i) <> ".jpg"
----------------------
findBookLinks :: Cursor -> [Cursor]
findBookLinks = element "a" >=> attributeIs "rel" "bookmark"
getBookLinks :: String -> IO [String]
getBookLinks url = do
cursor <- cursorFor url
return (cursor $// findBookLinks &| extractLink)
-----------
pat :: String
pat = "/gallery-(\\d+)-(\\d+)" :: String
toMetaDataUrl :: String -> Maybe String
toMetaDataUrl url =
let a = url =~ pat :: (String, String, String, [String])
in case a of
(_, _, _, [imgId, hostId]) -> Just ("http://hentai4.me/ajax.php?dowork=getimg&id=" <> imgId <> "&host=" <> hostId)
_ -> Nothing
findGalleryLink :: Cursor -> [Cursor]
findGalleryLink = element "a" >=> attributeIs "title" "View Gallery"
getGallery :: String -> IO String
getGallery url = do
cursor <- cursorFor url
return (head $ cursor $// findGalleryLink &| extractLink)
data ImageJSON =
ImageJSON { img :: [Int]
, img_name :: [Text]
, folder_link :: Int
, host :: Text
} deriving (Show, Generic)
instance FromJSON ImageJSON
instance ToJSON ImageJSON
decodeResponse :: Maybe String -> IO BC.ByteString
decodeResponse (Just url) = do
result <- simpleHttp url
return (b64decode $ urlDecode True (BL.toStrict result))
where b64decode input =
case B64.decode input of
Left _ -> ""
Right a -> a
decodeResponse Nothing = return ""
parseJson :: BLC.ByteString -> Maybe ImageJSON
parseJson = decode
createImgUrl :: ImageJSON -> [(String, String)]
createImgUrl j = let imgs = img_name j;
h = unpack (host j);
flnk = folder_link j
in map (toUrl h flnk) imgs
where
imgUrl :: String -> Int -> Text -> String
imgUrl host' folder img' = printf "http://%s.hdporn4.me/%d/%s" host' folder (unpack img')
toUrl host' folder img' = (createImgPath folder (unpack img'), imgUrl host' folder img')
-----------------
downloadImg :: FilePath -> (FilePath, String) -> IO ()
downloadImg root (imgPath, url) =
let path = joinPath [root, imgPath];
parent' = takeDirectory path
in do
print $ "Download ... " <> url
print $ "Output " <> path
createDirectoryIfMissing True parent'
simpleHttp url >>= BL.writeFile path
downloadAll :: FilePath -> [(FilePath, String)] -> IO ()
downloadAll root =
mapM_ (downloadImg root)
main :: IO ()
main = do
links <- getBookLinks "http://hentai4.me"
forM_ links $ \x -> do
a <- getGallery x
b <- decodeResponse $ toMetaDataUrl a
case parseJson (BL.fromStrict b) of
Just json -> downloadAll "/tmp" $ createImgUrl json
Nothing -> print $ unpack "End"
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment