Created
December 16, 2013 18:21
-
-
Save anonymous/7991727 to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
{-# LANGUAGE OverloadedStrings #-} | |
import Control.Monad | |
import Control.Monad.IO.Class | |
import Data.Char | |
import Data.Conduit | |
import Data.Conduit.Binary (sinkFile) | |
import Network.HTTP.Conduit | |
import Text.HTML.DOM | |
import Text.Printf | |
import Text.XML.Cursor | |
import qualified Data.ByteString as B | |
import qualified Data.ByteString.Lazy as L | |
import qualified Data.Text as T | |
extractLink cursor | |
| null links = Nothing | |
| otherwise = Just . T.unpack . head $ links | |
where | |
contains k = check $ T.isInfixOf k . T.concat . content | |
links = cursor | |
$// element "a" | |
&/ element "strong" | |
&/ contains "Download to Spreadsheet" | |
>=> parent | |
>=> parent | |
>=> attribute "href" | |
getLink manager = do | |
msym <- await | |
case msym of | |
Nothing -> return () | |
Just sym -> do | |
let sym' = concatMap (printf "%%%02x" . ord) $ sym | |
let url = "http://finance.yahoo.com/q/hp?s=" ++ sym' | |
req <- parseUrl url | |
res <- http req manager | |
xml <- responseBody res $$+- sinkDoc | |
yield (sym, extractLink . fromDocument $ xml) | |
downloadContent manager = do | |
mx <- await | |
case mx of | |
Nothing -> return () | |
Just (sym, Nothing) -> return () | |
Just (sym, Just link) -> do | |
req <- parseUrl link | |
res <- http req manager | |
responseBody res $$+- sinkFile sym | |
sourceSyms = do | |
yield "VTI" | |
main = do | |
runResourceT $ do | |
withManager $ \manager -> do | |
sourceSyms $= getLink manager $$ downloadContent manager | |
return () |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment