Created
March 18, 2012 21:17
-
-
Save monolo/2081381 to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
<?php | |
namespace Pro\CrawlerBundle\Parser; | |
use Goutte\Client; | |
use Symfony\Component\DomCrawler\Crawler; | |
use Pro\CrawlerBundle\Entity\ZonaDeluxe as entity; | |
class zonaDeluxe | |
{ | |
private $url; | |
private $client; | |
public function __construct() | |
{ | |
$this->url="http://www.zonadeluxe.com/discotecas/barcelona/"; | |
$this->client = new Client(); | |
} | |
public function getLocalBase() | |
{ | |
$discotecas = Array(); | |
$crawler = $this->client->request('GET',$this->url); | |
$discos = $crawler->filter("#parte_discos")->filter(".cajaDisco2")->filter("a"); | |
$urls = $discos->extract(Array("href")); | |
$images = $discos->filter("img")->extract(Array("title","src")); | |
for($i=0;$i<count($urls);$i++){ | |
$discotecas[$i]["url"]=$urls[$i]; | |
$discotecas[$i]["title"]=$images[$i][0]; | |
preg_match("#_\d*#is", $images[$i][1], $aux); | |
preg_match("#[^_]\d*#is", $aux[0], $aux1); | |
$discotecas[$i]["id"]=$aux1[0]; | |
$discotecas[$i]["image"]="http://www.zonadeluxe.com/logos/".$discotecas[$i]["id"].".png"; | |
$latlog = $this->getLatLog($discotecas[$i]["url"]); | |
$discotecas[$i]["lat"] = $latlog["lat"]; | |
$discotecas[$i]["lng"] = $latlog["lng"]; | |
$localinfo = $this->getLocalInfo($discotecas[$i]["id"]); | |
$discotecas[$i]["comment"]=$localinfo["comment"]; | |
} | |
return $discotecas; | |
} | |
public function getLatLog($url) | |
{ | |
$dicotecas = Array(); | |
$crawler = $this->client->request('GET',$url); | |
$script = $crawler->filter("body")->filter("script")->eq(9)->text(); | |
preg_match("#var point = new GLatLng\([^\)]*#is", $script, $aux); | |
preg_match("#\([^\)]*#is", $aux[0], $aux1); | |
preg_match("#[^\(][^\)]*#is", $aux1[0], $aux2); | |
$latlog = explode(",",$aux2[0]); | |
$dicotecas["lat"]=$latlog[0]; | |
$dicotecas["lng"]=$latlog[1]; | |
return $dicotecas; | |
} | |
public function getLocalInfo($id) | |
{ | |
$dicotecas = Array(); | |
$url = "http://www.zonadeluxe.com/dialogs/desc_disco.php"; | |
$crawler = $this->client->request('POST',$url, array("id" => $id)); | |
$comment = $crawler->filter("p")->text(); | |
$dicotecas["comment"]=$comment; | |
return $dicotecas; | |
} | |
public function getLocalFlush() | |
{ | |
$em = $this->get('doctrine.orm.entity_manager');; | |
$discotecas = $this->getLocalBase(); | |
foreach($discotecas as $dicoteca) | |
{ | |
$local = $em->getRepository('ProCrawlerBundle:ZonaDeluxe')->findOneByName($dicoteca["title"]); | |
if(!$local) | |
{ | |
$auxloca = new entity(); | |
$auxloca->setAdress($dicoteca["adress"]); | |
$auxloca->setComment($dicoteca["comment"]); | |
$auxloca->setCreateAt(new \DateTime("now")); | |
$auxloca->setIdLocal($dicoteca["id"]); | |
$auxloca->setUrl($dicoteca["url"]); | |
$auxloca->setUrlImages($dicoteca["image"]); | |
$auxloca->setLat($dicoteca["lat"]); | |
$auxloca->setLng($dicoteca["lng"]); | |
$em->persist($auxloca); | |
$em->flush(); | |
} | |
else | |
{ | |
$local = new ZonaDeluxe; | |
$local->setAdress($dicoteca["adress"]); | |
$local->setComment($dicoteca["comment"]); | |
$local->setCreateAt(new \DateTime("now")); | |
$local->setIdLocal($dicoteca["id"]); | |
$local->setUrl($dicoteca["url"]); | |
$local->setUrlImages($dicoteca["image"]); | |
$local->setLat($dicoteca["lat"]); | |
$local->setLng($dicoteca["lng"]); | |
$em->flush(); | |
} | |
} | |
} | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment