Last active
December 15, 2015 08:18
-
-
Save lyrixx/5229686 to your computer and use it in GitHub Desktop.
Rapt crawler.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
/composer.lock | |
/vendor/ |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
{ | |
"name": "lyrixx/ratp", | |
"description": "class pour recuperer les horraires des prochains bus / metro", | |
"require": { | |
"fabpot/goutte": "dev-master" | |
}, | |
"license": "MIT", | |
"authors": [ | |
{ | |
"name": "Grégoire Pineau", | |
"email": "lyrixx@lyrixx.info" | |
} | |
], | |
"autoload" : { | |
"psr-0" : { "":"" } | |
} | |
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
<?php | |
/** | |
* @author Grégoire Pineau <lyrixx@lyrixx.info> | |
* | |
* Usage: | |
* | |
* $ratp = new Ratp(array( | |
* array('type' => 'bus', 'line' => '173', 'stop' => 'General Leclerc-Victor Hugo'), | |
* array('type' => 'metro', 'line' => '13', 'stop' => 'Porte de Clichy'), | |
* )); | |
* | |
* $enjoy = $ratp->fetch(); | |
*/ | |
use Goutte\Client; | |
class Ratp | |
{ | |
CONST TYPE_METRO = 'metro'; | |
CONST TYPE_BUS = 'bus'; | |
private $client; | |
private $stations; | |
private $entrypoint; | |
public function __construct(array $stations, $client = null, $entrypoint = null) | |
{ | |
$this->stations = $stations; | |
$this->client = $client ?: new Client(); | |
$this->entrypoint = $entrypoint ?: 'http://wap.ratp.fr/siv/schedule'; | |
} | |
public function fetch() | |
{ | |
$stations = array(); | |
$client = $this->client; | |
$client->setServerParameter('HTTP_USER_AGENT', 'Mozilla/5.0 (X11; Ubuntu; Linux i686; rv:19.0) Gecko/20100101 Firefox/19.0'); | |
foreach ($this->buildQueries($this->stations) as $stop) { | |
$id = sprintf('%s.%s', $stop['line'], $stop['stop']); | |
$client->request('GET', $stop['query']); | |
try { | |
$name = $client->getCrawler()->filter('.bwhite')->eq(1)->text(); | |
} catch (\InvalidArgumentException $e) { | |
continue; | |
} | |
$stations[$id]['name'] = $name; | |
$stations[$id]['type'] = $stop['type']; | |
$stations[$id]['line'] = $stop['line']; | |
if (!isset($stations[$id]['destinations'])) { | |
$stations[$id]['destinations'] = array(); | |
} | |
$destinations = array(); | |
foreach ($client->getCrawler()->filter('.bg1, .bg3') as $child) { | |
// Hack to remove all service messages | |
if ("\n" !== $child->nodeValue[0]) { | |
continue; | |
} | |
// Remove some space and ">" | |
if (!$dir = trim($child->nodeValue, "\n > ")) { | |
continue; | |
} | |
if (!$time = trim($child->nextSibling->nodeValue)) { | |
continue; | |
} | |
if (!isset($destinations[$dir])) { | |
$destinations[$dir] = array(); | |
} | |
$destinations[$dir][] = $time; | |
} | |
if ($destinations) { | |
$stations[$id]['destinations'] += $destinations; | |
} else { | |
unset($stations[$id]); | |
} | |
} | |
return $stations; | |
} | |
private function buildQueries(array $stops = array()) | |
{ | |
$stopsTmp = array(); | |
foreach ($stops as $stop) { | |
if (static::TYPE_METRO == $stop['type']) { | |
$query = $this->buildUrl($stop['type'], $stop['line'], $stop['stop'], 'A'); | |
$stopsTmp[] = array_replace($stop, array('query' => $query)); | |
$query = $this->buildUrl($stop['type'], $stop['line'], $stop['stop'], 'R'); | |
$stopsTmp[] = array_replace($stop, array('query' => $query)); | |
} else { | |
$query = $this->buildUrl($stop['type'], $stop['line'], $stop['stop']); | |
$stopsTmp[] = array_replace($stop, array('query' => $query)); | |
} | |
} | |
return $stopsTmp; | |
} | |
private function buildUrl($type, $line, $stop, $dir = null) | |
{ | |
$params = array( | |
'service' => 'next', | |
'reseau' => $type, | |
'lineid' => strtoupper($type[0]) .$line, | |
'stationname' => $stop, | |
); | |
if ($dir) { | |
$params['directionsens'] = strtoupper($dir); | |
} | |
return sprintf('%s?%s', $this->entrypoint, http_build_query($params)); | |
} | |
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/php | |
<?php | |
require __DIR__.'/vendor/autoload.php'; | |
$ratp = new Ratp(array( | |
array('type' => 'bus', 'line' => '74', 'stop' => 'General Leclerc-Victor Hugo'), | |
array('type' => 'metro', 'line' => '13', 'stop' => 'Porte de Clichy'), | |
)); | |
foreach ($ratp->fetch() as $stop) { | |
echo sprintf("%s, line %s, %s\n", $stop['type'], $stop['line'], $stop['name']); | |
if (!isset($stop['destinations'])) { | |
continue; | |
} | |
foreach ($stop['destinations'] as $dir => $times) { | |
echo sprintf(" %s: %s\n", $dir, implode(', ', $times)); | |
} | |
echo "\n"; | |
} | |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment