Last active
August 29, 2015 14:11
-
-
Save gatlin/5e021498cc7a99211476 to your computer and use it in GitHub Desktop.
Someone stole my bike! So I'm casting a wide net on Craigslist
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
{-# LANGUAGE TemplateHaskell #-} | |
{-# LANGUAGE OverloadedStrings #-} | |
import Prelude hiding (map) | |
import Pipes | |
import Pipes.Prelude (stdoutLn, map) | |
import Pipes.Interleave | |
import Network.Wreq | |
import qualified Data.ByteString.Lazy.Char8 as C | |
import qualified Data.ByteString.Char8 as SC | |
import Control.Lens hiding (indices, each) | |
import Control.Lens.TH | |
import Text.Feed.Import | |
import Text.Feed.Query | |
import Text.Feed.Types | |
import Text.RSS.Syntax | |
import Control.Monad.Stream (forever) | |
import Text.HTML.TagSoup | |
import Data.ByteString.Lazy.Search (indices) | |
import Data.Int | |
import Data.Maybe (fromJust) | |
import System.Environment (getArgs) | |
-- | Represents a listing | |
data Listing = Listing | |
{ _uri :: String | |
, _description :: String | |
, _title :: String | |
, _publishedDate :: DateString | |
, _rawData :: C.ByteString | |
} | |
instance Show Listing where | |
show (Listing u d t p r) = concat | |
[ "( ",p," ) ", t, "\n-----\n",d,"\n-----\n",u,"\n=====" ] | |
makeLenses ''Listing | |
-- Feed urls, I'll probably keep adding more | |
feedUrls = [ "http://dallas.craigslist.org/search/bia?format=rss" | |
, "http://sanantonio.craigslist.org/search/bia?format=rss" | |
, "http://houston.craigslist.org/search/bia?format=rss" | |
, "http://sanmarcos.craigslist.org/search/bia?format=rss" | |
, "http://austin.craigslist.org/search/bia?format=rss" | |
] | |
-- Given a feed URL produce RSS items | |
loadFeedItems:: String -> Producer Item IO () | |
loadFeedItems url = do | |
res <- lift $ get url | |
let f = parseFeedString . C.unpack $ res ^. responseBody | |
case f of | |
Nothing -> return () | |
Just f' -> for (each (getFeedItems f')) yield | |
-- Convert an item to a Listing | |
itemToListing :: Item -> Listing | |
itemToListing itm = Listing | |
{ _uri = fromJust $ getItemLink itm | |
, _description = fromJust $ getItemDescription itm | |
, _title = fromJust $ getItemTitle itm | |
, _publishedDate = fromJust $ getItemDate itm | |
, _rawData = C.empty | |
} | |
-- Download the complete listing text from Craigslist | |
loadListing :: Pipe Listing Listing IO () | |
loadListing = forever $ do | |
listing <- await | |
res <- lift $ get $ listing ^. uri | |
yield $ listing & rawData .~ (res ^. responseBody) | |
searchListing :: Monad m => SC.ByteString -> Pipe Listing Listing m () | |
searchListing needle = loop where | |
loop = do | |
l <- await | |
let tags = parseTags $ l ^. rawData | |
let text = innerText tags | |
let found = indices needle text | |
if ((length found) > 0) | |
then yield l >> loop | |
else loop | |
-- Merge the different pipelines into one | |
siftFeeds :: [String] -> SC.ByteString -> Producer Listing IO () | |
siftFeeds fds ndl = | |
let ps = fmap (\fd -> loadFeedItems fd | |
>-> map itemToListing | |
>-> loadListing | |
>-> searchListing ndl ) fds | |
in interleave (\_ _ -> EQ) ps | |
-- Run the pipeline, make the listings presentable, and print them | |
main :: IO () | |
main = do | |
args <- getArgs | |
let needle = SC.pack $ args !! 0 | |
runEffect $ siftFeeds feedUrls needle | |
>-> map show | |
>-> forever stdoutLn |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment