Skip to content

Instantly share code, notes, and snippets.

@carlosernestolopez
Created October 8, 2019 06:38
Show Gist options
  • Save carlosernestolopez/a594fedab6dac8a730258a86b9838b6a to your computer and use it in GitHub Desktop.
Save carlosernestolopez/a594fedab6dac8a730258a86b9838b6a to your computer and use it in GitHub Desktop.
getInfo todobusco.com using search
<?php
# getInfo todobusco.com using search
# celopez.ni1990@gmail.com
$countries = array('costa-rica', 'nicaragua', 'dominicana');
foreach($countries as $country) {
# CASAS
#$url = 'https://todobusco.com/casa/'.$country.'/buscar/alquiler-venta-de-inmuebles-en-'.$country;
# VENTAS
$url = 'https://todobusco.com/venta/'.$country.'/buscar/alquiler-venta-de-clasificados-clasificados-en-'.$country;
$info = @file_get_contents( $url );
preg_match_all('|<strong>(.*?) </strong>|', $info, $matches);
$total = (int)$matches[1][0];
$pages = ceil($total / 18); # SI SON CASAS 20, SI ES VENTA SON 18 RESULTADOS POR PAGINA
for($i=1;$i<=$pages;$i++) {
print "Retrieving page #".$i." / ".$pages." of ".$country." ";
$info_page = @file_get_contents($url.'/pag/'.$i);
preg_match_all('|href="(.*?-[0-9]+)"|', $info_page, $matches);
$urls = array_unique($matches[1]);
sort($urls);
file_put_contents($country.'.txt', implode("\n", $urls)."\n", FILE_APPEND);
print "Done. ".count($urls)." found\n";
}
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment