Skip to content

Instantly share code, notes, and snippets.

@pedrofurla
Created December 10, 2021 08:13
Show Gist options
  • Save pedrofurla/d626590d00b2b1b86f79b1d61c5730bc to your computer and use it in GitHub Desktop.
Save pedrofurla/d626590d00b2b1b86f79b1d61c5730bc to your computer and use it in GitHub Desktop.
Extract Stackoverflow code blocks
{-# LANGUAGE LambdaCase #-}
{-
cabal v2-install --lib hxt
cabal v2-install --lib hxt-xpath
cabal v2-install --lib http-client
cabal v2-install --lib http-client-tls
-}
import Text.XML.HXT.Core (runX, readString, withParseHTML, withWarnings)
import Text.XML.HXT.XPath.XPathEval (getXPath)
import Text.XML.HXT.DOM.TypeDefs
import Data.Tree.NTree.TypeDefs
import Network.HTTP.Client (newManager, parseRequest, httpLbs, responseBody)
import Network.HTTP.Client.TLS (tlsManagerSettings)
import qualified Data.ByteString.Lazy.Char8 as BL
import qualified Data.Text.Lazy.Encoding as T
import qualified Data.Text.Lazy as T
import Data.Foldable (for_)
-- Retrieves the Haskell code text from StackOverflow questions
soQuestionCode :: String -> IO [String]
soQuestionCode questionId =
do
man <- newManager tlsManagerSettings
req <- parseRequest $ "https://stackoverflow.com/questions/" <> questionId <> "/"
resp <- httpLbs req man
let body = T.unpack . T.decodeUtf8 . responseBody $ resp
-- writeFile ("tmp-so" <> questionId <> ".html") body
xml <- runX . readString [withParseHTML True, withWarnings False] $ body
-- let codeXml = getXPath "//pre[contains(@class,'lang-hs')]//text()" $ head $ xml
let codeXml = getXPath "//pre//code/text()" . head $ xml
blocks = textNodes codeXml
pure blocks
soInspect :: String -> Int -> IO ()
soInspect questionId length = blocks >>= (flip for_) putStrLn
where blocks =
(\n c -> "---- " <> show n <> "\n" <> (unlines . take length . lines $ c)) `zipWith` [0..] <$> soQuestionCode questionId
textNodes :: XmlTrees -> [String]
textNodes = (=<<) (\case (NTree (XText s) []) -> [s]; _ -> [])
-- (textNodes $ getXPath "//pre//code/text()" $ head $ xml) :: [String]
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment