Skip to content

Instantly share code, notes, and snippets.

@lyrixx
Last active December 15, 2015 08:18
Show Gist options
  • Save lyrixx/5229686 to your computer and use it in GitHub Desktop.
Save lyrixx/5229686 to your computer and use it in GitHub Desktop.
Rapt crawler.
/composer.lock
/vendor/
{
"name": "lyrixx/ratp",
"description": "class pour recuperer les horraires des prochains bus / metro",
"require": {
"fabpot/goutte": "dev-master"
},
"license": "MIT",
"authors": [
{
"name": "Grégoire Pineau",
"email": "lyrixx@lyrixx.info"
}
],
"autoload" : {
"psr-0" : { "":"" }
}
}
<?php
/**
* @author Grégoire Pineau <lyrixx@lyrixx.info>
*
* Usage:
*
* $ratp = new Ratp(array(
* array('type' => 'bus', 'line' => '173', 'stop' => 'General Leclerc-Victor Hugo'),
* array('type' => 'metro', 'line' => '13', 'stop' => 'Porte de Clichy'),
* ));
*
* $enjoy = $ratp->fetch();
*/
use Goutte\Client;
class Ratp
{
CONST TYPE_METRO = 'metro';
CONST TYPE_BUS = 'bus';
private $client;
private $stations;
private $entrypoint;
public function __construct(array $stations, $client = null, $entrypoint = null)
{
$this->stations = $stations;
$this->client = $client ?: new Client();
$this->entrypoint = $entrypoint ?: 'http://wap.ratp.fr/siv/schedule';
}
public function fetch()
{
$stations = array();
$client = $this->client;
$client->setServerParameter('HTTP_USER_AGENT', 'Mozilla/5.0 (X11; Ubuntu; Linux i686; rv:19.0) Gecko/20100101 Firefox/19.0');
foreach ($this->buildQueries($this->stations) as $stop) {
$id = sprintf('%s.%s', $stop['line'], $stop['stop']);
$client->request('GET', $stop['query']);
try {
$name = $client->getCrawler()->filter('.bwhite')->eq(1)->text();
} catch (\InvalidArgumentException $e) {
continue;
}
$stations[$id]['name'] = $name;
$stations[$id]['type'] = $stop['type'];
$stations[$id]['line'] = $stop['line'];
if (!isset($stations[$id]['destinations'])) {
$stations[$id]['destinations'] = array();
}
$destinations = array();
foreach ($client->getCrawler()->filter('.bg1, .bg3') as $child) {
// Hack to remove all service messages
if ("\n" !== $child->nodeValue[0]) {
continue;
}
// Remove some space and ">"
if (!$dir = trim($child->nodeValue, "\n > ")) {
continue;
}
if (!$time = trim($child->nextSibling->nodeValue)) {
continue;
}
if (!isset($destinations[$dir])) {
$destinations[$dir] = array();
}
$destinations[$dir][] = $time;
}
if ($destinations) {
$stations[$id]['destinations'] += $destinations;
} else {
unset($stations[$id]);
}
}
return $stations;
}
private function buildQueries(array $stops = array())
{
$stopsTmp = array();
foreach ($stops as $stop) {
if (static::TYPE_METRO == $stop['type']) {
$query = $this->buildUrl($stop['type'], $stop['line'], $stop['stop'], 'A');
$stopsTmp[] = array_replace($stop, array('query' => $query));
$query = $this->buildUrl($stop['type'], $stop['line'], $stop['stop'], 'R');
$stopsTmp[] = array_replace($stop, array('query' => $query));
} else {
$query = $this->buildUrl($stop['type'], $stop['line'], $stop['stop']);
$stopsTmp[] = array_replace($stop, array('query' => $query));
}
}
return $stopsTmp;
}
private function buildUrl($type, $line, $stop, $dir = null)
{
$params = array(
'service' => 'next',
'reseau' => $type,
'lineid' => strtoupper($type[0]) .$line,
'stationname' => $stop,
);
if ($dir) {
$params['directionsens'] = strtoupper($dir);
}
return sprintf('%s?%s', $this->entrypoint, http_build_query($params));
}
}
#!/usr/bin/php
<?php
require __DIR__.'/vendor/autoload.php';
$ratp = new Ratp(array(
array('type' => 'bus', 'line' => '74', 'stop' => 'General Leclerc-Victor Hugo'),
array('type' => 'metro', 'line' => '13', 'stop' => 'Porte de Clichy'),
));
foreach ($ratp->fetch() as $stop) {
echo sprintf("%s, line %s, %s\n", $stop['type'], $stop['line'], $stop['name']);
if (!isset($stop['destinations'])) {
continue;
}
foreach ($stop['destinations'] as $dir => $times) {
echo sprintf(" %s: %s\n", $dir, implode(', ', $times));
}
echo "\n";
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment