Skip to content

Instantly share code, notes, and snippets.

@danidiaz
Last active August 29, 2015 14:02
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save danidiaz/0002fc09cd35e3815cca to your computer and use it in GitHub Desktop.
Save danidiaz/0002fc09cd35e3815cca to your computer and use it in GitHub Desktop.
{-# LANGUAGE OverloadedStrings #-}
import Control.Applicative
import Control.Lens
import Control.Monad
import qualified Data.Traversable as T
import qualified Data.Foldable as F
import qualified Data.Map as M
import qualified Data.Text.Lazy as T
import qualified Data.Text.Lazy.Encoding as T
import Network.Wreq
import Text.HTML.TagSoup
getPageTags :: T.Text -> IO [Tag T.Text]
getPageTags url = parseTags . T.decodeUtf8 . view responseBody <$> get (T.unpack url)
partitionByClass :: T.Text -> [Tag T.Text] -> [[Tag T.Text]]
partitionByClass c = partitions $ flip (~==) $ TagOpen T.empty [("class",c)]
main :: IO ()
main = do
pageTags <- getPageTags "http://www.reddit.com/r/programming"
let topicUrlMap = M.fromList $ do
topicTags <- partitionByClass "entry unvoted" $ pageTags
_:TagText title:_ <- partitionByClass "title may-blank " topicTags
TagOpen "a" args:_ <- partitionByClass "comments may-blank" topicTags
("href",commentsUrl) <- args
return (title,commentsUrl)
commentTagsMap <- T.forM topicUrlMap getPageTags
let countHaskells comment = sum [ T.count "haskell" txt | TagText txt <- comment ]
tally = liftA2 (,) length (sum . map countHaskells)
. drop 1
. partitionByClass "usertext-body may-blank-within"
tallyMap = tally <$> commentTagsMap
flip M.traverseWithKey tallyMap $ \k (_,v) -> do
putStrLn $ "Topic title: " ++ show k
putStrLn $ "Haskell mentions: " ++ show v
let (commentCounts,haskellCounts) = unzip $ F.toList tallyMap
putStrLn $ "Total comments: " ++ show (sum commentCounts)
putStrLn $ "Total Haskell mentions: " ++ show (sum haskellCounts)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment