Skip to content

Instantly share code, notes, and snippets.

@nicolas-t
Last active August 29, 2015 14:05
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save nicolas-t/d698ab8bb8150193e4a6 to your computer and use it in GitHub Desktop.
Save nicolas-t/d698ab8bb8150193e4a6 to your computer and use it in GitHub Desktop.
Receive an email when a new item pops at Leboncoin.fr
<?php
/* ******************************************************************************* */
/* Script to track search results on leboncoin */
/* Receive an email when a new item pops */
/* ******************************************************************************* */
/* ******************************************************************************* */
// - Add this cron job to check for new items every 30 minutes */
// */30 * * * * curl http://yoursite.com/leboncoin.php */
/* - Make the folder containing this script writable */
/* ******************************************************************************* */
/* ******************************************************************************* */
/* Add search pages here : */
/* ******************************************************************************* */
$targets = array(
'http://www.leboncoin.fr/annonces/offres/ile_de_france/occasions/?f=a&th=1&q=lapin+nain',
'http://www.leboncoin.fr/annonces/offres/ile_de_france/occasions/?f=a&th=1&q=lapin+belier'
);
/* ******************************************************************************* */
/* Add/Remove proxies here (mainly because leboncoin blacklisted OVH) : */
/* ******************************************************************************* */
$proxies = array(
'62.210.56.250:8089',
'62.210.56.250:7808',
'62.210.56.250:3127',
'188.165.244.5:8080',
'195.154.231.43:3128'
);
/* ******************************************************************************* */
/* Add your email address here : */
/* ******************************************************************************* */
define('EMAIL_ADDRESS', 'your.address@provider.com');
/* ******************************************************************************* */
/* Stop editing here (unless you know what you're doing) : */
/* ******************************************************************************* */
function checkProxy($proxy){
$timeout = 5;
$splited = explode(':',$proxy);
if(!@fsockopen($splited[0], $splited[1], $errorNumber, $errorMessage, $timeout)){
return false;
}
else{
return true;
}
}
function file_get_contents_curl($url) {
global $proxies;
$curl = curl_init();
curl_setopt($curl, CURLOPT_AUTOREFERER, TRUE);
curl_setopt($curl, CURLOPT_HEADER, 0);
curl_setopt($curl, CURLOPT_RETURNTRANSFER, 1);
curl_setopt($curl, CURLOPT_URL, $url);
curl_setopt($curl, CURLOPT_FOLLOWLOCATION, TRUE);
curl_setopt($curl, CURLOPT_CONNECTTIMEOUT, 60);
if(count($proxies)){
$proxiesDown = true;
foreach($proxies as $proxy){
if(checkProxy($proxy)){
$proxiesDown = false;
curl_setopt($curl, CURLOPT_PROXY, $proxy);
break;
}
}
if($proxiesDown){
sendMail('Leboncoin — proxies are down', "Yo,\n\n Proxies to access leboncoin are down :(");
}
}
$data = curl_exec($curl);
curl_close($curl);
return $data;
}
function sendMail($subject, $body){
$to = EMAIL_ADDRESS;
mail($to, $subject, $body);
}
function getItems($src){
$html = file_get_contents_curl($src);
$dom = new DOMDocument();
@$dom->loadHTML($html);
$result = array();
$parent = "list-lbc";
$a = new DOMXPath($dom);
$items = $a->query("//*[contains(concat(' ', normalize-space(@class), ' '), ' $parent ')]/a");
foreach ($items as $item) {
$result[] = $item->getAttribute('href');
}
return $result;
}
function getKnown($file){
$content = @file_get_contents($file);
return explode(',', $content);
}
function detectNew($items, $known){
foreach($items as $item){
if(!in_array($item, $known)){
sendMail('Leboncoin — New item', "Yo,\n\n New item yolo : \n\n ".$item);
}
}
}
function updateKnown($file, $items){
$fh = fopen($file, 'w') or die("can't open file");
$data = implode(',', $items);
fwrite($fh, $data);
fclose($fh);
}
foreach($targets as $k => $url){
$dataFile = 'known'.$k.'.txt';
$items = getItems($url);
$known = getKnown($dataFile);
detectNew($items, $known);
updateKnown($dataFile, $items);
}
?>
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment