Created
October 8, 2019 06:38
-
-
Save carlosernestolopez/a594fedab6dac8a730258a86b9838b6a to your computer and use it in GitHub Desktop.
getInfo todobusco.com using search
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
<?php | |
# getInfo todobusco.com using search | |
# celopez.ni1990@gmail.com | |
$countries = array('costa-rica', 'nicaragua', 'dominicana'); | |
foreach($countries as $country) { | |
# CASAS | |
#$url = 'https://todobusco.com/casa/'.$country.'/buscar/alquiler-venta-de-inmuebles-en-'.$country; | |
# VENTAS | |
$url = 'https://todobusco.com/venta/'.$country.'/buscar/alquiler-venta-de-clasificados-clasificados-en-'.$country; | |
$info = @file_get_contents( $url ); | |
preg_match_all('|<strong>(.*?) </strong>|', $info, $matches); | |
$total = (int)$matches[1][0]; | |
$pages = ceil($total / 18); # SI SON CASAS 20, SI ES VENTA SON 18 RESULTADOS POR PAGINA | |
for($i=1;$i<=$pages;$i++) { | |
print "Retrieving page #".$i." / ".$pages." of ".$country." "; | |
$info_page = @file_get_contents($url.'/pag/'.$i); | |
preg_match_all('|href="(.*?-[0-9]+)"|', $info_page, $matches); | |
$urls = array_unique($matches[1]); | |
sort($urls); | |
file_put_contents($country.'.txt', implode("\n", $urls)."\n", FILE_APPEND); | |
print "Done. ".count($urls)." found\n"; | |
} | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment