Skip to content

Instantly share code, notes, and snippets.

@Asbra
Created December 13, 2014 16:27
Show Gist options
  • Star 3 You must be signed in to star a gist
  • Fork 1 You must be signed in to fork a gist
  • Save Asbra/84b32d4b31a9914c4cf2 to your computer and use it in GitHub Desktop.
Save Asbra/84b32d4b31a9914c4cf2 to your computer and use it in GitHub Desktop.
PHP Craigslist job finder (scraper)
<?php
/**
* Craigslist job finder
* Scans a given Craigslist section, checks titles for keyword(s), then emails when found.
* @author Johan <johan@asbra.net>
* @date 2014-11-10
* @modified 2014-11-10
*/
// the $keyword variable contains the keywords you want to find
// $keyword = 'programmer';
// it supports regular expressions
// $keyword = '/(programm?er|coder)/i';
// and also supports taking an array of keywords
// $keyword = array('programmer', 'coder');
$keyword = array('model', 'actress');
// The city to look in, it is the subdomain ie. newyork.craigslist.org
$city = 'newyork';
// Section to search for the keyword(s) in
$section = 'jjj'; // jjj=jobs
// Email settings
// Email address to send to and subject line of the email
$email_to = 'john.smith@gmail.com';
$email_subject = 'CraigsList jobs found!';
///////////////////////////////////////////////////////////////////////////////
// Do not modify below this line
// Depends on my cURL wrapper http://asbra.net/php-curl-class-snippet-tutorial/
require 'curl.php';
$curl = new cURL();
$url = 'http://'.$city.'.craigslist.org/search/'.$section;
$curl->get($url);
// Regex to match the links
$regex = '/<a[^>]*?href="([^"]+)"[^>]*?class="hdrlnk"[^>]*?>([^<]+)<\/a>/';
preg_match_all($regex, $curl->data, $matches);
echo 'Found '.count($matches[0])." jobs total\r\n";
set_error_handler(function() { /* ignore errors */ });
$found = 0;
function search($keyword)
{
global $matches;
for ($i = 0; $i < count($matches[0]); $i++)
{
$link = $matches[1][$i];
$title = $matches[2][$i];
$html = '<a href="'.$link.'">'.$title.'</a>'."\r\n";
echo "Searching for keyword {$keyword} in '{$title}'\r\n";
if (stripos($title, $keyword) !== false || preg_match($keyword, $title) === 1)
{
global $found;
$found++;
echo "{$title}\r\n";
return $html;
}
}
return false;
}
$html = '';
if (is_array($keyword))
{
foreach ($keyword as $kw)
{
$html .= search($kw);
}
}
else
{
$html = search($keyword);
}
if (!empty($html)) {
mail($email_to, $email_subject, $html);
echo "Sent email\r\n{$html}\r\n";
}
restore_error_handler();
echo "Found {$found} matching jobs\r\n";
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment