Skip to content

Instantly share code, notes, and snippets.

@gatlin
Last active August 29, 2015 14:11
Show Gist options
  • Save gatlin/5e021498cc7a99211476 to your computer and use it in GitHub Desktop.
Save gatlin/5e021498cc7a99211476 to your computer and use it in GitHub Desktop.
Someone stole my bike! So I'm casting a wide net on Craigslist
{-# LANGUAGE TemplateHaskell #-}
{-# LANGUAGE OverloadedStrings #-}
import Prelude hiding (map)
import Pipes
import Pipes.Prelude (stdoutLn, map)
import Pipes.Interleave
import Network.Wreq
import qualified Data.ByteString.Lazy.Char8 as C
import qualified Data.ByteString.Char8 as SC
import Control.Lens hiding (indices, each)
import Control.Lens.TH
import Text.Feed.Import
import Text.Feed.Query
import Text.Feed.Types
import Text.RSS.Syntax
import Control.Monad.Stream (forever)
import Text.HTML.TagSoup
import Data.ByteString.Lazy.Search (indices)
import Data.Int
import Data.Maybe (fromJust)
import System.Environment (getArgs)
-- | Represents a listing
data Listing = Listing
{ _uri :: String
, _description :: String
, _title :: String
, _publishedDate :: DateString
, _rawData :: C.ByteString
}
instance Show Listing where
show (Listing u d t p r) = concat
[ "( ",p," ) ", t, "\n-----\n",d,"\n-----\n",u,"\n=====" ]
makeLenses ''Listing
-- Feed urls, I'll probably keep adding more
feedUrls = [ "http://dallas.craigslist.org/search/bia?format=rss"
, "http://sanantonio.craigslist.org/search/bia?format=rss"
, "http://houston.craigslist.org/search/bia?format=rss"
, "http://sanmarcos.craigslist.org/search/bia?format=rss"
, "http://austin.craigslist.org/search/bia?format=rss"
]
-- Given a feed URL produce RSS items
loadFeedItems:: String -> Producer Item IO ()
loadFeedItems url = do
res <- lift $ get url
let f = parseFeedString . C.unpack $ res ^. responseBody
case f of
Nothing -> return ()
Just f' -> for (each (getFeedItems f')) yield
-- Convert an item to a Listing
itemToListing :: Item -> Listing
itemToListing itm = Listing
{ _uri = fromJust $ getItemLink itm
, _description = fromJust $ getItemDescription itm
, _title = fromJust $ getItemTitle itm
, _publishedDate = fromJust $ getItemDate itm
, _rawData = C.empty
}
-- Download the complete listing text from Craigslist
loadListing :: Pipe Listing Listing IO ()
loadListing = forever $ do
listing <- await
res <- lift $ get $ listing ^. uri
yield $ listing & rawData .~ (res ^. responseBody)
searchListing :: Monad m => SC.ByteString -> Pipe Listing Listing m ()
searchListing needle = loop where
loop = do
l <- await
let tags = parseTags $ l ^. rawData
let text = innerText tags
let found = indices needle text
if ((length found) > 0)
then yield l >> loop
else loop
-- Merge the different pipelines into one
siftFeeds :: [String] -> SC.ByteString -> Producer Listing IO ()
siftFeeds fds ndl =
let ps = fmap (\fd -> loadFeedItems fd
>-> map itemToListing
>-> loadListing
>-> searchListing ndl ) fds
in interleave (\_ _ -> EQ) ps
-- Run the pipeline, make the listings presentable, and print them
main :: IO ()
main = do
args <- getArgs
let needle = SC.pack $ args !! 0
runEffect $ siftFeeds feedUrls needle
>-> map show
>-> forever stdoutLn
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment