Last active

Embed URL

HTTPS clone URL

SSH clone URL

You can clone with HTTPS or SSH.

Download Gist
View counthaskell.hs
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40
{-# LANGUAGE OverloadedStrings #-}
 
import Control.Applicative
import Control.Lens
import Control.Monad
import qualified Data.Traversable as T
import qualified Data.Foldable as F
import qualified Data.Map as M
import qualified Data.Text.Lazy as T
import qualified Data.Text.Lazy.Encoding as T
import Network.Wreq
import Text.HTML.TagSoup
 
getPageTags :: T.Text -> IO [Tag T.Text]
getPageTags url = parseTags . T.decodeUtf8 . view responseBody <$> get (T.unpack url)
 
partitionByClass :: T.Text -> [Tag T.Text] -> [[Tag T.Text]]
partitionByClass c = partitions $ flip (~==) $ TagOpen T.empty [("class",c)]
 
main :: IO ()
main = do
pageTags <- getPageTags "http://www.reddit.com/r/programming"
let topicUrlMap = M.fromList $ do
topicTags <- partitionByClass "entry unvoted" $ pageTags
_:TagText title:_ <- partitionByClass "title may-blank " topicTags
TagOpen "a" args:_ <- partitionByClass "comments may-blank" topicTags
("href",commentsUrl) <- args
return (title,commentsUrl)
commentTagsMap <- T.forM topicUrlMap getPageTags
let countHaskells comment = sum [ T.count "haskell" txt | TagText txt <- comment ]
tally = liftA2 (,) length (sum . map countHaskells)
. drop 1
. partitionByClass "usertext-body may-blank-within"
tallyMap = tally <$> commentTagsMap
flip M.traverseWithKey tallyMap $ \k (_,v) -> do
putStrLn $ "Topic title: " ++ show k
putStrLn $ "Haskell mentions: " ++ show v
let (commentCounts,haskellCounts) = unzip $ F.toList tallyMap
putStrLn $ "Total comments: " ++ show (sum commentCounts)
putStrLn $ "Total Haskell mentions: " ++ show (sum haskellCounts)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment
Something went wrong with that request. Please try again.