Skip to content

Instantly share code, notes, and snippets.

@neongreen
Created March 17, 2019 10:47
Show Gist options
  • Save neongreen/cd7fcf98771e252410b6c538545b1b0d to your computer and use it in GitHub Desktop.
Save neongreen/cd7fcf98771e252410b6c538545b1b0d to your computer and use it in GitHub Desktop.
{-# LANGUAGE OverloadedStrings #-}
{-# LANGUAGE DeriveGeneric #-}
{-# LANGUAGE OverloadedStrings #-}
{-# LANGUAGE ApplicativeDo #-}
module Main where
import Data.Default
import Network.HTTP.Req
import Data.Time.Calendar
import qualified Data.Text as Text
import qualified Data.Text.IO as Text
import Data.Text (Text)
import Data.Text.Encoding
import Data.Foldable
import Fmt
import GHC.Generics
import Text.Regex.Applicative
import Data.Char
import Control.Monad
import Data.Maybe
import Text.HTML.TagSoup
import Text.HTML.TagSoup.Tree
main :: IO ()
main = do
bs <- runReq def $
req GET (http "www.zib.de" /: "mathematics-calendar")
NoReqBody
bsResponse
mempty
let parsed = parseTree (decodeUtf8 (responseBody bs))
for_ (calendarEntries parsed) $ \entry -> do
pretty $ genericF $ parseCalendarEntry entry
putStrLn ""
data CalendarEntry = CalendarEntry
{ date :: Day
, author :: Text
, title :: Text
, datetime :: Text
, locationName :: Text
, locationAddress :: Text
} deriving (Eq, Show, Generic)
calendarEntries :: [TagTree Text] -> [[TagTree Text]]
calendarEntries t =
[ subtrees
| TagBranch _ attrs subtrees <- universeTree t
, ("class", "calendar-entry") `Prelude.elem` attrs
]
parseCalendarEntry :: [TagTree Text] -> CalendarEntry
parseCalendarEntry t = do
let getClass className =
innerText $ flattenTree $ concat $
[ subtrees
| TagBranch _ attrs subtrees <- universeTree t
, ("class", className) `Prelude.elem` attrs
]
CalendarEntry
{ date = parseDay $ getClass "date"
, author = getClass "author"
, title = getClass "title"
, datetime = getClass "datetime"
, locationAddress = getClass "location-address"
, locationName = getClass "location-name"
}
parseDay :: Text -> Day
parseDay = fromMaybe (error "bad Day") . match regex . Text.unpack
where
digit = psym isDigit
regex = do
d <- read <$> some digit <* "."
m <- read <$> some digit <* "."
y <- read <$> replicateM 4 digit
pure (fromGregorian y m d)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment