Skip to content

Instantly share code, notes, and snippets.

@adatta02
Created July 25, 2011 03:26
Show Gist options
  • Save adatta02/1103509 to your computer and use it in GitHub Desktop.
Save adatta02/1103509 to your computer and use it in GitHub Desktop.
Craigslist RSS to email
<?php
/****
* Little PHP script to hit Craigslist RSS feeds, check new posts, and shoot you an email with them.
* Good for finding things ;)
* REQUIRES http://code.google.com/p/phpquery/
****/
function resultsSort($a, $b) {
if ( count($a["posts"]) == count($b["posts"]) ) { return 0; }
return (count($a["posts"]) < count($b["posts"])) ? 1 : -1;
}
require_once 'phpQuery-onefile.php';
if( !is_file( dirname(__FILE__) . "/seenPosts.json" ) ){
touch( dirname(__FILE__) . "/seenPosts.json" );
$seenPosts = array( );
}else{
$seenPosts = json_decode( file_get_contents( dirname(__FILE__) . "/seenPosts.json" ), true );
}
$keywords = array( "symfony" );
$cities = array(
"atlanta",
"austin",
"boston",
"chicago",
"dallas",
"denver",
"detroit",
"houston",
"lasvegas",
"losangeles",
"miami",
"minneapolis",
"newyork",
"orangecounty",
"philadelphia",
"phoenix",
"portland",
"raleigh",
"sacramento",
"sandiego",
"seattle",
"sfbay",
"washingtondc"
);
$baseUrls = array(
"http://%s.craigslist.org/search/ggg?query=%s&catAbb=jjj&srchType=A&addThree=&format=rss",
"http://%s.craigslist.org/search/?areaID=4&subAreaID=&query=%s&catAbb=ggg&format=rss",
);
$results = array( );
$totalPosts = 0;
foreach( $keywords as $kw ){
foreach( $cities as $ct ){
foreach( $baseUrls as $bu ){
$posts = array( );
$url = sprintf( $bu, $ct, $kw );
echo $url . "\n";
$ch = curl_init();
curl_setopt($ch, CURLOPT_URL, $url);
curl_setopt($ch, CURLOPT_FAILONERROR, false);
curl_setopt($ch, CURLOPT_FOLLOWLOCATION, 1);
curl_setopt($ch, CURLOPT_RETURNTRANSFER, 1);
curl_setopt($ch, CURLOPT_TIMEOUT, 30);
curl_setopt($ch, CURLOPT_HTTPHEADER, Array("User-Agent: Mozilla/5.0 (Windows; U; Windows NT 5.1; en-US; rv:1.8.1.15) Gecko/20080623 Firefox/2.0.0.15") );
$html = curl_exec($ch);
curl_close($ch);
$doc = phpQuery::newDocumentXML( $html );
phpQuery::selectDocument($doc);
foreach( pq("item") as $it ){
$link = pq($it)->children("link:first")->text();
$body = strip_tags( str_replace( "<br>", "\n", pq($it)->children("description:first")->text() ) );
if( !in_array($link, $seenPosts) ){
$posts[] = array(
"title" => pq($it)->children("title:first")->text(),
"body" => $body,
"link" => $link
);
$seenPosts[] = $link;
}
}
$totalPosts += count( $posts );
$results[] = array( "url" => $url, "posts" => $posts );
}
}
}
usort($results, 'resultsSort');
$emailBody = "Total Posts: " . $totalPosts . "\n\n";
foreach( $results as $obj ){
$emailBody .= $obj["url"] . " ( " . count($obj["posts"]) . " )" . "\n\n";
foreach( $obj["posts"] as $p ){
$emailBody .= $p["link"] . "\n" . $p["title"] . "\n" . $p["body"] . "\n\n";
}
$emailBody .= "\n\n\n\n";
}
$headers = 'From: cl-bot@twitlabs.com' . "\r\n" .
'Reply-To: cl-bot@twitlabs.com' . "\r\n" .
'X-Mailer: PHP/' . phpversion();
@mail( "[put in your email]", "CL Bot " . date("c"), $emailBody, $headers );
file_put_contents( dirname(__FILE__) . "/seenPosts.json", json_encode( $seenPosts ) );
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment