Skip to content

Instantly share code, notes, and snippets.

@DuckOfDoom
Last active November 7, 2016 15:56
Show Gist options
  • Save DuckOfDoom/b8839a71514cf1858dc00840a0d69506 to your computer and use it in GitHub Desktop.
Save DuckOfDoom/b8839a71514cf1858dc00840a0d69506 to your computer and use it in GitHub Desktop.
Html parsing
{-# OPTIONS_GHC -Wall #-}
{-# TemplateHaskell #-}
module Main where
import Control.Lens (makeLenses, (.~), (^.))
import Data.List.Split (chunksOf)
import Data.ByteString.Lazy (ByteString)
import qualified Data.ByteString.Lazy as LBS (writeFile, toStrict)
import Data.Text (unpack)
import Data.Text.Encoding (decodeUtf8)
import qualified Network.Wreq as Wreq (get, responseBody)
import Text.HTML.TagSoup (Tag, fromAttrib, isTagCloseName,
parseTags, renderTags, (~/=))
data Event = Event
{ name :: String
, location :: String
, date :: String
, link :: String
}
deriving Show
makeLenses ''Event
url :: String
url = "http://us.battle.net/hearthstone/en/fireside-gatherings?country=RU#fireside-gatherings"
getHtml :: IO ByteString
getHtml = do
response <- Wreq.get url
return $ (response ^. Wreq.responseBody)
findEventsTable :: ByteString -> [Tag ByteString]
findEventsTable s = takeWhile (~/= "</div>") . drop 1 . dropWhile (~/= "</div>") . dropWhile (~/= "<div class=meetups-event-table>") . parseTags
splitEvents :: [Tag ByteString] -> [[Tag ByteString]]
splitEvents = chunksOf 32
parseEvent :: [Tag ByteString] -> Event
parseEvent tags = Event name location date link
where link = "http://us.battle.net/" ++ (fromAttrib "href" (head tags))
name = ""
location = ""
date = ""
test :: IO ()
test = do
tags <- (splitEvents . findEventsTable) <$> getHtml
LBS.writeFile "test.html" $ renderTags $ head $ tags
downloadHtml :: IO ()
downloadHtml = do
getHtml >>= (LBS.writeFile "downloaded.html")
main :: IO ()
main = return ()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment