Skip to content

Instantly share code, notes, and snippets.

@Bertware
Last active October 17, 2018 13:30
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save Bertware/5255b8e6ad316415c697a1c6ace67e19 to your computer and use it in GitHub Desktop.
Save Bertware/5255b8e6ad316415c697a1c6ace67e19 to your computer and use it in GitHub Desktop.
Split GTFS files into one file per transport operator
<?php
const GTFS_PATH = '/tmp/gtfs/';
const TARGET_DIR = './splitGtfs/';
@mkdir(GTFS_PATH);
@mkdir(TARGET_DIR);
if (! file_exists(GTFS_PATH . "sweden.zip")){
echo 'getting GTFS...';
file_put_contents(GTFS_PATH . "sweden.zip", file_get_contents("https://api.resrobot.se/gtfs/sweden.zip?key=94a585db-e249-4adc-b0c1-7c770427ae2e"));
}
$zip = new ZipArchive;
$res = $zip->open(GTFS_PATH . 'sweden.zip');
if ($res === TRUE) {
$zip->extractTo(GTFS_PATH);
$zip->close();
} else {
die("Failed to get GTFS");
}
$gtfs_agency = deserializeCSV(GTFS_PATH . 'agency.txt');
$gtfs_calendar = deserializeCSV(GTFS_PATH . 'calendar.txt');
$gtfs_calendar_dates = deserializeCSV(GTFS_PATH . 'calendar_dates.txt');
$gtfs_feed_info = deserializeCSV(GTFS_PATH . 'feed_info.txt');
$gtfs_routes = deserializeCSV(GTFS_PATH . 'routes.txt');
$gtfs_stops = deserializeCSV(GTFS_PATH . 'stops.txt');
$gtfs_stop_times = deserializeCSV(GTFS_PATH . 'stop_times.txt');
$gtfs_transfers = deserializeCSV(GTFS_PATH . 'transfers.txt');
$gtfs_trips = deserializeCSV(GTFS_PATH . 'trips.txt');
foreach ($gtfs_agency as $agencyId => $agency) {
$part_feed_info = $gtfs_feed_info;
$part_agency = [$agency];
echo PHP_EOL . 'parsing... ' . $agency['agency_name'] . PHP_EOL;
$part_routes = [];
$used_routes = [];
foreach ($gtfs_routes as $key => $route) {
if ($route['agency_id'] != $agency['agency_id']){
continue;
}
$part_routes[] = $route;
$used_routes[$route['route_id']] = $route['route_id'];
}
echo 'Kept ' . count($part_routes) . ' out of ' . count($gtfs_routes) . ' routes' . PHP_EOL;
if(count($part_routes) == 0) {
echo 'No routes, skipped!' . PHP_EOL;
continue;
}
$part_trips = [];
$used_services = [];
$used_stops = [];
$used_trips = [];
foreach ($gtfs_trips as $key => $trip) {
if (!array_key_exists($trip['route_id'], $used_routes)){
continue;
}
$part_trips[] = $trip;
$used_trips[] = $trip['trip_id'];
$used_services[$trip['service_id']] = $trip['service_id'];
$used_trips[$trip['trip_id']] = $trip['trip_id'];
}
echo $agency['agency_name'] . ' has ' . count($part_trips) . ' trips' . PHP_EOL;
$part_stop_times = [];
foreach ($gtfs_stop_times as $key => $stop_time) {
if (!array_key_exists($stop_time['trip_id'], $used_trips)){
continue;
}
$part_stop_times [] = $stop_time;
$used_stops[$stop_time['stop_id']] = $stop_time['stop_id'];
}
echo $agency['agency_name'] . ' has ' . count($part_stop_times) . ' stop times' . PHP_EOL;
$part_calendar = [];
foreach ($gtfs_calendar as $key => $calendar) {
if (!array_key_exists($calendar['service_id'], $used_services)){
continue;
}
$part_calendar [] = $calendar;
}
$part_calendar_dates = [];
foreach ($gtfs_calendar_dates as $key => $calendar_date) {
if (!in_array($calendar_date['service_id'], $used_services)){
continue;
}
$part_calendar_dates [] = $calendar_date;
}
$part_stops = [];
foreach ($gtfs_stops as $key => $stop) {
if (!in_array($stop['stop_id'], $used_stops)){
continue;
}
$part_stops [] = $stop;
}
echo $agency['agency_name'] . ' has ' . count($part_stops) . ' stops' . PHP_EOL;
$part_transfers = [];
foreach ($gtfs_transfers as $key => $transfer) {
if (!array_key_exists($transfer['from_stop_id'], $used_stops) && !array_key_exists($transfer['to_stop_id'], $used_stops)){
continue;
}
$part_transfers [] = $transfer;
}
echo 'exporting... ' . $agency['agency_name'] . PHP_EOL;
@mkdir(TARGET_DIR . $agency['agency_name'] . '/');
serializeCSV(TARGET_DIR . $agency['agency_name'] . '/agency.txt', $part_agency);
serializeCSV(TARGET_DIR . $agency['agency_name'] . '/calendar.txt', $part_calendar);
serializeCSV(TARGET_DIR . $agency['agency_name'] . '/calendar_dates.txt', $part_calendar_dates);
serializeCSV(TARGET_DIR . $agency['agency_name'] . '/feed_info.txt', $part_feed_info);
serializeCSV(TARGET_DIR . $agency['agency_name'] . '/routes.txt', $part_routes);
serializeCSV(TARGET_DIR . $agency['agency_name'] . '/stops.txt', $part_stops);
serializeCSV(TARGET_DIR . $agency['agency_name'] . '/stop_times.txt', $part_stop_times);
serializeCSV(TARGET_DIR . $agency['agency_name'] . '/transfers.txt', $part_transfers);
serializeCSV(TARGET_DIR . $agency['agency_name'] . '/trips.txt', $part_trips);
$zip = new ZipArchive;
echo 'Saving ' . TARGET_DIR . $agency['agency_name'] . '.zip...' . PHP_EOL;
if ($zip->open(TARGET_DIR . $agency['agency_name'] . '.zip', ZipArchive::CREATE) === TRUE)
{
$zip->addFile(TARGET_DIR . $agency['agency_name'] . '/agency.txt','agency.txt');
$zip->addFile(TARGET_DIR . $agency['agency_name'] . '/calendar.txt','calendar.txt');
$zip->addFile(TARGET_DIR . $agency['agency_name'] . '/calendar_dates.txt','calendar_dates.txt');
$zip->addFile(TARGET_DIR . $agency['agency_name'] . '/feed_info.txt','feed_info.txt');
$zip->addFile(TARGET_DIR . $agency['agency_name'] . '/routes.txt','routes.txt');
$zip->addFile(TARGET_DIR . $agency['agency_name'] . '/stops.txt','stops.txt');
$zip->addFile(TARGET_DIR . $agency['agency_name'] . '/stop_times.txt','stop_times.txt');
$zip->addFile(TARGET_DIR . $agency['agency_name'] . '/transfers.txt','transfers.txt');
$zip->addFile(TARGET_DIR . $agency['agency_name'] . '/trips.txt','trips.txt');
// All files are added, so close the zip file.
$zip->close();
}
}
/**
* Serialize data to a CSV file
*
* @param $headers array The headers which should be written
* @param $station array The data as an associative array (header => value) to serialize to CSV
* @return string CSV representation of the data
*/
function serializeCSV($path, $data)
{
$headers = array_keys(current($data));
$file = '';
echo 'serializing... ' . $path . ' with ' . count($data). ' rows' . PHP_EOL;
$row = '';
// Loop over all headers
for ($i = 0; $i < count($headers); $i++) {
$row .= $headers[$i];
// No trailing comma
if ($i < count($headers) - 1) {
$row .= ',';
}
}
$file .= $row . PHP_EOL;
foreach ($data as $key => $dataRow) {
// Resulting serialized line
$row = '';
// Loop over all headers
for ($i = 0; $i < count($headers); $i++) {
// Which value we are appending
$header = $headers[$i];
// Add key if it exists, otherwise leave empty
if (key_exists($header, $dataRow)) {
$row .= $dataRow[$header];
}
// No trailing comma
if ($i < count($headers) - 1) {
$row .= ',';
}
}
$file .= $row . PHP_EOL;
}
file_put_contents($path, $file);
}
/**
* Serialize data to a CSV row
*
* @param $headers array The headers which should be written
* @param $station array The data as an associative array (header => value) to serialize to CSV
* @return string CSV representation of the data
*/
function serializeCSVLine($headers, $station): string
{
// Resulting serialized line
$row = '';
// Loop over all headers
for ($i = 0; $i < count($headers); $i++) {
// Which value we are appending
$header = $headers[$i];
// Add key if it exists, otherwise leave empty
if (key_exists($header, $station)) {
$row .= $station[$header];
}
// No trailing comma
if ($i < count($headers) - 1) {
$row .= ',';
}
}
// Return line with newline character
return $row . PHP_EOL;
}
/**
* Load a CSV file and store it in an associative array with incremental keys
* Each line is stored as an associative array using column headers as key and the fields as value.
*
* @param $csvPath string File path leading to the CSV file
* @return array the deserialized data
*/
function deserializeCSV($csvPath): array
{
// Open the GTFS stops file and read it into an associative array
$fileReadHandle = fopen($csvPath, 'r');
if (!$fileReadHandle) {
die($csvPath . ' could not be opened!');
} else {
echo 'Deserializing ' . $csvPath . PHP_EOL;
}
// Read the original headers
$headers = trim(fgets($fileReadHandle));
// Transform the original headers into an array
$headers = explode(',', $headers);
// Trim tabs, newlines, ...
$headers = array_map('trim', $headers);
$entries = [];
// Go through all rows
while (($line = fgets($fileReadHandle)) !== false) {
$line = trim($line);
$entry = explode(',', $line);
$entry = array_map('trim', $entry);
// The first column is used as key in the associative array
$first = $entry[0];
@$entry = array_combine($headers, $entry);
$entries[] = $entry;
}
return $entries;
}
?>
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment