Skip to content

Instantly share code, notes, and snippets.

@Bertware
Last active November 20, 2018 16:19
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save Bertware/ad3c5f16cf8602e8cb9193a8a24d5bd3 to your computer and use it in GitHub Desktop.
Save Bertware/ad3c5f16cf8602e8cb9193a8a24d5bd3 to your computer and use it in GitHub Desktop.
Prototype PHP script to convert a GTFS archive into a
<?php
/**
* Ćreate a stations list, agencies list, stations list per agency, stops files from GTFS.
* Due to a lack of time this doesn't output 100% valid linked data (yet), but it's good enough for now
*
* Free to use, adapt, modify, redistribute however you want at your own responsibility.
*
* Requirements:
* php-zip
* fopen_url_allowed = true
*
*/
// Constants, example GTFS from Sweden
const GTFS_ZIP = 'https://transitfeeds.com/p/trafiklab/50/latest/download';
//const GTFS_ZIP = '/home/bert/Desktop/splitGtfs/SJ.zip';
const TMP_UNZIP_PATH = 'gtfs';
const TMP_ZIPFILE = 'sv-latest-gtfs.zip';
const GTFS_AGENCIES = 'agency.txt';
const GTFS_STOP_TIMES = 'stop_times.txt';
const GTFS_TRIPS = 'trips.txt';
const GTFS_ROUTES = 'routes.txt';
const GTFS_STOPS = 'stops.txt';
const GTFS_CAL_DATES = 'calendar_dates.txt';
const GTFS_TRANSFER_TIMES = 'transfers.txt';
const STATIONS_CSV = 'stations.csv';
const STOPS_CSV = 'stops.csv';
const CSV_HEADER_URI = 'URI';
const CSV_HEADER_NAME = 'name';
const CSV_HEADER_COUNTRY = 'country-code';
const CSV_HEADER_LONGITUDE = 'longitude';
const CSV_HEADER_LATITUDE = 'latitude';
const CSV_HEADER_AVG_STOP_TIMES = 'avg_stop_times';
const CSV_HEADER_TRANSFER_TIME = 'official_transfer_time';
const CSV_WRITE_HEADERS = [CSV_HEADER_URI, CSV_HEADER_NAME,
CSV_HEADER_COUNTRY, CSV_HEADER_LONGITUDE, CSV_HEADER_LATITUDE, CSV_HEADER_AVG_STOP_TIMES, CSV_HEADER_TRANSFER_TIME];
const BASE_URI = "http://se.lc.bertmarcelis.be/";
const BASE_URI_STOPS = BASE_URI . "stops/";
const BASE_URI_AGENCY = BASE_URI . "agency/";
const BASE_URI_ROUTE = BASE_URI . "route/";
const UNWANTED_ARRAY = array('Š' => 'S', 'š' => 's', 'Ž' => 'Z', 'ž' => 'z', 'À' => 'A', 'Á' => 'A', 'Â' => 'A', 'Ã' => 'A', 'Ä' => 'A', 'Å' => 'A', 'Æ' => 'A', 'Ç' => 'C', 'È' => 'E', 'É' => 'E',
'Ê' => 'E', 'Ë' => 'E', 'Ì' => 'I', 'Í' => 'I', 'Î' => 'I', 'Ï' => 'I', 'Ñ' => 'N', 'Ò' => 'O', 'Ó' => 'O', 'Ô' => 'O', 'Õ' => 'O', 'Ö' => 'O', 'Ø' => 'O', 'Ù' => 'U',
'Ú' => 'U', 'Û' => 'U', 'Ü' => 'U', 'Ý' => 'Y', 'Þ' => 'B', 'ß' => 'Ss', 'à' => 'a', 'á' => 'a', 'â' => 'a', 'ã' => 'a', 'ä' => 'a', 'å' => 'a', 'æ' => 'a', 'ç' => 'c',
'è' => 'e', 'é' => 'e', 'ê' => 'e', 'ë' => 'e', 'ì' => 'i', 'í' => 'i', 'î' => 'i', 'ï' => 'i', 'ð' => 'o', 'ñ' => 'n', 'ò' => 'o', 'ó' => 'o', 'ô' => 'o', 'õ' => 'o',
'ö' => 'o', 'ø' => 'o', 'ù' => 'u', 'ú' => 'u', 'û' => 'u', 'ý' => 'y', 'þ' => 'b', 'ÿ' => 'y', ' ' => '', '-' => '', '&' => '', '/' => '');
const GTFS_TYPES = [
'100' => 'Railway Service',
'101' => 'High Speed Rail Service',
'102' => 'Long Distance Trains',
'103' => 'Inter Regional Rail Service',
'104' => 'Car Transport Rail Service',
'105' => 'Sleeper Rail Service',
'106' => 'Regional Rail Service',
'107' => 'Tourist Railway Service',
'108' => 'Rail Shuttle (Within Complex)',
'109' => 'Suburban Railway',
'110' => 'Replacement Rail Service',
'111' => 'Special Rail Service',
'112' => 'Lorry Transport Rail Service',
'113' => 'All Rail Services',
'114' => 'Cross-Country Rail Service',
'115' => 'Vehicle Transport Rail Service',
'116' => 'Rack and Pinion Railway',
'117' => 'Additional Rail Service',
'200' => 'Coach Service',
'201' => 'International Coach Service',
'202' => 'National Coach Service',
'203' => 'Shuttle Coach Service',
'204' => 'Regional Coach Service',
'205' => 'Special Coach Service',
'206' => 'Sightseeing Coach Service',
'207' => 'Tourist Coach Service',
'208' => 'Commuter Coach Service',
'209' => 'All Coach Services',
'300' => 'Suburban Railway Service',
'400' => 'Urban Railway Service',
'401' => 'Metro Service',
'402' => 'Underground Service',
'403' => 'Urban Railway Service',
'404' => 'All Urban Railway Services',
'405' => 'Monorail',
'500' => 'Metro Service',
'600' => 'Underground Service',
'700' => 'Bus Service',
'701' => 'Regional Bus Service',
'702' => 'Express Bus Service',
'703' => 'Stopping Bus Service',
'704' => 'Local Bus Service',
'705' => 'Night Bus Service',
'706' => 'Post Bus Service',
'707' => 'Special Needs Bus',
'708' => 'Mobility Bus Service',
'709' => 'Mobility Bus for Registered Disabled',
'710' => 'Sightseeing Bus',
'711' => 'Shuttle Bus',
'712' => 'School Bus',
'713' => 'School and Public Service Bus',
'714' => 'Rail Replacement Bus Service',
'715' => 'Demand and Response Bus Service',
'716' => 'All Bus Services',
'800' => 'Trolleybus Service',
'900' => 'Tram Service',
'901' => 'City Tram Service',
'902' => 'Local Tram Service',
'903' => 'Regional Tram Service',
'904' => 'Sightseeing Tram Service',
'905' => 'Shuttle Tram Service',
'906' => 'All Tram Services',
'1000' => 'Water Transport Service',
'1001' => 'International Car Ferry Service',
'1002' => 'National Car Ferry Service',
'1003' => 'Regional Car Ferry Service',
'1004' => 'Local Car Ferry Service',
'1005' => 'International Passenger Ferry Service',
'1006' => 'National Passenger Ferry Service',
'1007' => 'Regional Passenger Ferry Service',
'1008' => 'Local Passenger Ferry Service',
'1009' => 'Post Boat Service',
'1010' => 'Train Ferry Service',
'1011' => 'Road-Link Ferry Service',
'1012' => 'Airport-Link Ferry Service',
'1013' => 'Car High-Speed Ferry Service',
'1014' => 'Passenger High-Speed Ferry Service',
'1015' => 'Sightseeing Boat Service',
'1016' => 'School Boat',
'1017' => 'Cable-Drawn Boat Service',
'1018' => 'River Bus Service',
'1019' => 'Scheduled Ferry Service',
'1020' => 'Shuttle Ferry Service',
'1021' => 'All Water Transport Services',
'1100' => 'Air Service',
'1101' => 'International Air Service',
'1102' => 'Domestic Air Service',
'1103' => 'Intercontinental Air Service',
'1104' => 'Domestic Scheduled Air Service',
'1105' => 'Shuttle Air Service',
'1106' => 'Intercontinental Charter Air Service',
'1107' => 'International Charter Air Service',
'1108' => 'Round-Trip Charter Air Service',
'1109' => 'Sightseeing Air Service',
'1110' => 'Helicopter Air Service',
'1111' => 'Domestic Charter Air Service',
'1112' => 'Schengen-Area Air Service',
'1113' => 'Airship Service',
'1114' => 'All Air Services',
'1200' => 'Ferry Service',
'1300' => 'Telecabin Service',
'1301' => 'Telecabin Service',
'1302' => 'Cable Car Service',
'1303' => 'Elevator Service',
'1304' => 'Chair Lift Service',
'1305' => 'Drag Lift Service',
'1306' => 'Small Telecabin Service',
'1307' => 'All Telecabin Services',
'1400' => 'Funicular Service',
'1401' => 'Funicular Service',
'1402' => 'All Funicular Service',
'1500' => 'Taxi Service',
'1501' => 'Communal Taxi Service',
'1502' => 'Water Taxi Service',
'1503' => 'Rail Taxi Service',
'1504' => 'Bike Taxi Service',
'1505' => 'Licensed Taxi Service',
'1506' => 'Private Hire Service Vehicle',
'1507' => 'All Taxi Services',
'1600' => 'Self Drive',
'1601' => 'Hire Car',
'1602' => 'Hire Van',
'1603' => 'Hire Motorbike',
'1604' => 'Hire Cycle',
'1700' => 'Miscellaneous Service',
'1701' => 'Cable Car',
'1702' => 'Horse-drawn Carriage'];
/*
* Step 1 : Get the latest information from GTFS.
* This information can be found at http://www.belgianrail.be/nl/klantendienst/infodiensten-reistools/public-data/open-data.aspx
*/
const HTTP_SWS_GEONAMES_ORG_660013 = "http://sws.geonames.org/660013";
const HTTP_SWS_GEONAMES_ORG_2623032 = "http://sws.geonames.org/2623032";
const HTTP_SWS_GEONAMES_ORG_3144096 = "http://sws.geonames.org/3144096/";
const HTTP_SWS_GEONAMES_ORG_2661886 = "http://sws.geonames.org/2661886";
const CC_SE = "SE";
const CC_NO = "NO";
const CC_DK = "DK";
const CC_FI = "FI";
echo 'Gathering resources...' . PHP_EOL;
downloadGTFS();
list($providers, $routes, $stopdata) = getProviderAndTransportTypeInfo();
echo 'Calculating derivates...' . PHP_EOL;
/*
* Gather prerequisite data
*/
list($handledDaysCount, $stopFrequencies) = getStopTimes();
$transferTimes = parseTransferTimes();
/*
* Patch the csv file
*
* For this step, we need 3 actions:
* - Discover which stations are present already and storing their data in an associative array
* - Update calculated or extracted data (official_transfer_time, avg_stop_times)
* - Appending stations which aren't present yet
* - Write the new file to disk
*/
$gtfsStations = getGTFSStops();
// The new CSV file will be compiled in memory, in the $result variable.
echo 'Compiling JSON-LD file...' . PHP_EOL;
createStationsDataStructure($gtfsStations, $stopdata, $providers, $stopFrequencies, $handledDaysCount, $transferTimes);
createAgenciesStopLists($gtfsStations, $stopdata, $providers, $stopFrequencies, $handledDaysCount, $transferTimes);
createAgenciesDataStructure($gtfsStations, $stopdata, $providers, $stopFrequencies, $handledDaysCount, $transferTimes);
createRoutesDataStructure($gtfsStations, $stopdata, $providers, $stopFrequencies, $handledDaysCount, $transferTimes);
/**
* Download and extract the latest GTFS data set
*/
function downloadGTFS(): void
{
echo 'Downloading data...' . PHP_EOL;
if (!file_exists(TMP_ZIPFILE)) {
// Download zip file with GTFS data.
file_put_contents(TMP_ZIPFILE, file_get_contents(GTFS_ZIP));
}
echo 'Extracting data...' . PHP_EOL;
// Load the zip file.
$zip = new ZipArchive();
if ($zip->open(TMP_ZIPFILE) != 'true') {
die('Could not extract downloaded GTFS data');
}
// Extract the zip file and remove it.
$zip->extractTo(TMP_UNZIP_PATH);
$zip->close();
// Get the files we need.
rename(TMP_UNZIP_PATH . '/' . GTFS_STOP_TIMES, GTFS_STOP_TIMES);
rename(TMP_UNZIP_PATH . '/' . GTFS_ROUTES, GTFS_ROUTES);
rename(TMP_UNZIP_PATH . '/' . GTFS_AGENCIES, GTFS_AGENCIES);
rename(TMP_UNZIP_PATH . '/' . GTFS_TRIPS, GTFS_TRIPS);
rename(TMP_UNZIP_PATH . '/' . GTFS_CAL_DATES, GTFS_CAL_DATES);
rename(TMP_UNZIP_PATH . '/' . GTFS_STOPS, GTFS_STOPS);
rename(TMP_UNZIP_PATH . '/' . GTFS_TRANSFER_TIMES, GTFS_TRANSFER_TIMES);
echo 'Cleaning up resources...' . PHP_EOL;
// Remove temporary data.
$tmpfiles = scandir(TMP_UNZIP_PATH);
foreach ($tmpfiles as $file) {
if ($file != '.' && $file != '..') {
// Remove all extracted files from the zip file.
unlink(TMP_UNZIP_PATH . '/' . $file);
}
}
reset($tmpfiles);
// Remove the empty folder.
rmdir(TMP_UNZIP_PATH);
}
/**
* @return array
*/
function getProviderAndTransportTypeInfo()
{
echo 'Gathering agencies...' . PHP_EOL;
// Link Providers to stop ids
$providersCSV = deserializeCSV(GTFS_AGENCIES);
$providers = [];
foreach ($providersCSV as $provider) {
$providers[$provider['agency_id']] = ['name' => $provider['agency_name'], 'url' => $provider['agency_url']];
}
echo 'Gathering routes...' . PHP_EOL;
$routesCSV = deserializeCSV(GTFS_ROUTES);
$routes = [];
foreach ($routesCSV as $route) {
$routes[$route['route_id']] = ['name' => $route['route_long_name'], 'number' => $route['route_short_name'], 'provider' => $route['agency_id'], 'route_type' => $route['route_type']];
}
echo 'Gathering trips...' . PHP_EOL;
$tripsCSV = deserializeCSV(GTFS_TRIPS);
$trips = [];
foreach ($tripsCSV as $trip) {
$trips[$trip['trip_id']] = ['route_id' => $trip['route_id']];
}
echo(count($trips) . " trips " . PHP_EOL);
echo 'Gathering stop times...' . PHP_EOL;
$stopdata = [];
echo "reading GTFS_STOP_TIMES streaming" . PHP_EOL;
// Open the CSV file and read it into an associative array
$i = 0;
$handle = @fopen(GTFS_STOP_TIMES, "r");
if ($handle) {
$fields = array();
while (($row = fgetcsv($handle)) !== false) {
if (empty($fields)) {
$fields = $row;
continue;
}
foreach ($row as $k => $value) {
$stoptime[$fields[$k]] = $value;
}
$routeId = $trips[$stoptime['trip_id']]['route_id'];
$stopdata[$stoptime['stop_id']][$routes[$routeId]['provider']][] = $routes[$routeId]['route_type'];
}
if (!feof($handle)) {
echo "Error: unexpected fgets() fail\n";
}
fclose($handle);
}
echo "read GTFS_STOP_TIMES" . PHP_EOL;
foreach ($stopdata as $stop => $stopProviders) {
foreach ($stopProviders as $provider => $types) {
$stopdata[$stop][$provider] = array_values(array_unique($stopdata[$stop][$provider]));
}
}
return array($providers, $routes, $stopdata);
}
/**
* Load the recommended transfer times per station
* @return array
*/
function parseTransferTimes(): array
{
// CSV Header:
// from_stop_id,to_stop_id,transfer_type,min_transfer_time,from_trip_id,to_trip_id
$parsedCsv = deserializeCSV(GTFS_TRANSFER_TIMES);
$transferTimes = [];
foreach ($parsedCsv as $key => $csvRow) {
if ($csvRow['from_stop_id'] !== $csvRow['to_stop_id']) {
// We only want intra-stop transfers. NMBS GTFS only includes those, but to be sure, add a check
continue;
}
// Station UIC ID to HAFAS
$uri = BASE_URI_STOPS . $csvRow['from_stop_id'];
// Transfer value
$transfer = $csvRow['min_transfer_time'];
// Store value for station id
$transferTimes[$uri] = $transfer;
}
// We don't need this file anymore. Cleanup.
unlink(GTFS_TRANSFER_TIMES);
return $transferTimes;
}
/**
* Get the number of stops made on each station, as well as the number of days which were handled.
* This can be used to calculate both the stop times per station and the average stop times per station.
* @return array
*/
function getStopTimes(): array
{
echo 'Creating service id frequency table...' . PHP_EOL;
$fileReadHandle = fopen(GTFS_CAL_DATES, 'r');
if (!$fileReadHandle) {
die(GTFS_CAL_DATES . ' could not be opened!');
}
// skip the first line (csv header)
fgets($fileReadHandle);
// Create the frequency table.
$serviceFrequency = [];
// The dates we've handled.
$isDateHandled = [];
while (($line = fgets($fileReadHandle)) !== false) {
/*
* File format:
* service_id,date,exception_type
*/
$parts = explode(',', $line);
// Get service ID.
$serviceId = $parts[0];
$date = $parts[1];
// Increase frequency.
if (isset($serviceFrequency[$serviceId])) {
$serviceFrequency[$serviceId]++;
} else {
// Set initial value if key isn't added yet.
$serviceFrequency[$serviceId] = 1;
}
$isDateHandled[$date] = 1;
}
// Close this handle. Important!
fclose($fileReadHandle);
// We don't need this file anymore. Cleanup.
unlink(GTFS_CAL_DATES);
// Use the calender frequencies to calculate the frequency of each trip
echo 'Creating trip id frequency table...' . PHP_EOL;
$fileReadHandle = fopen(GTFS_TRIPS, 'r');
if (!$fileReadHandle) {
die(GTFS_TRIPS . ' could not be opened!');
}
// skip the first line (csv header)
fgets($fileReadHandle);
// Create the frequency table containing each trips frequency..
$tripFrequencies = [];
while (($line = fgets($fileReadHandle)) !== false) {
/*
* File format:
* route_id,service_id,trip_id
*/
// Get service ID.
$parts = explode(',', $line);
$serviceId = $parts[1];
$tripId = trim($parts[2]);
// Set frequency, which is the same as the service frequency.
$tripFrequencies[$tripId] = $serviceFrequency[$serviceId];
}
// Close this handle. Important!
fclose($fileReadHandle);
// We don't need this file anymore. Cleanup.
unlink(GTFS_TRIPS);
// Use the
echo 'Creating frequency table...' . PHP_EOL;
$fileReadHandle = fopen(GTFS_STOP_TIMES, 'r');
if (!$fileReadHandle) {
die('GTFS stop times file could not be opened!');
}
// skip the first line (csv header)
fgets($fileReadHandle);
// Create the frequency table.
$stopFrequencies = [];
while (($line = fgets($fileReadHandle)) !== false) {
/*
* File format:
* trip_id,arrival_time,departure_time,stop_id,stop_sequence
* 88____:046::8821402:8400526:3:650:20181208,6:43:00,6:43:00,8821402,1,,0,1,
*/
$parts = explode(',', $line);
// Get stop ID.
$uri = BASE_URI_STOPS . $parts[3];
$tripId = $parts[0];
// The amount of time this trip is made.
$tripFrequency = $tripFrequencies[$tripId];
// Increase frequency.
if (isset($stopFrequencies[$uri])) {
$stopFrequencies[$uri] += $tripFrequency;
} else {
// Set initial value if key isn't added yet.
$stopFrequencies[$uri] = $tripFrequency;
}
}
// Close this handle. Important!
fclose($fileReadHandle);
unlink(GTFS_STOP_TIMES);
// Get the number of days that were handled. We need this to calculate the average later on.
$handledDaysCount = count($isDateHandled);
return [$handledDaysCount, $stopFrequencies];
}
/**
* Load a list of 'official' stops data from the GTFS dataset
* @return array
*/
function getGTFSStops(): array
{
// CSV Header:
// stop_id,stop_code,stop_name,stop_desc,stop_lat,stop_lon,zone_id,stop_url,location_type,parent_station,platform_code
$parsedCsv = deserializeCSV(GTFS_STOPS);
usort($parsedCsv, function ($a, $b) {
if ($a['stop_name'] != $b['stop_name'])
return $a['stop_name'] > $b['stop_name'];
else
if ($a['stop_id'] != $b['stop_id'])
return $a['stop_id'] > $b['stop_id'];
else
return $a['platform_code'] > $b['platform_code'];
});
$gtfsStations = [];
// Go through all files.
foreach ($parsedCsv as $key => $csvRow) {
$uri = BASE_URI_STOPS . $csvRow['stop_id'];
$gtfsStations[$uri] = $csvRow;
}
unlink(GTFS_STOPS);
return $gtfsStations;
}
/**
* Load a CSV file and store it in an associative array with the first CSV column value as key.
* Each line is stored as an associative array using column headers as key and the fields as value.
*
* @param $csvPath string File path leading to the CSV file
* @return array the deserialized data
*/
function deserializeCSV($csvPath): array
{
echo "reading $csvPath" . PHP_EOL;
// Open the CSV file and read it into an associative array
$array = $fields = array();
$i = 0;
$handle = @fopen($csvPath, "r");
if ($handle) {
while (($row = fgetcsv($handle)) !== false) {
if (empty($fields)) {
$fields = $row;
continue;
}
foreach ($row as $k => $value) {
$array[$i][$fields[$k]] = $value;
}
$i++;
}
if (!feof($handle)) {
echo "Error: unexpected fgets() fail\n";
}
fclose($handle);
}
echo "read $csvPath" . PHP_EOL;
return $array;
}
/**
* @param $gtfsStations
* @param $stopdata
* @param $providers
* @param $stopFrequencies
* @param $handledDaysCount
* @param $transferTimes
* @return array
*/
function createStationsDataStructure(&$gtfsStations, &$stopdata, &$providers, &$stopFrequencies, $handledDaysCount, $transferTimes): array
{
echo 'Saving...' . PHP_EOL;
$jsonLd = [
'version' => 0.1,
'timestamp' => time(),
'@context' => [
'dct' => 'http://purl.org/dc/terms/',
'longitude' => 'http://www.w3.org/2003/01/geo/wgs84_pos#long',
'latitude' =>'http://www.w3.org/2003/01/geo/wgs84_pos#lat',
'country' => [
'@type' => '@id',
'@id' => 'http://www.geonames.org/ontology#parentCountry'
],
'gtfs' => 'http://vocab.gtfs.org/terms#',
'stop' => 'gtfs:Stop',
'hafasCode' => 'gtfs:code',
'official_transfer_time' => 'gtfs:minimumTransferTime',
'name' => 'foaf:name',
'foaf' => 'http://xmlns.com/foaf/0.1/'
],
'@graph' => []
];
@mkdir('stops');
foreach ($gtfsStations as $uri => $gtfsStation) {
if (empty($uri) || strpos($uri, "_") !== false || strpos($uri, "S8") !== false) {
continue; // Invalid data
}
$ldValue = [];
$ldValue['@context'] = [
'longitude' => 'http://www.w3.org/2003/01/geo/wgs84_pos#long',
'latitude' =>'http://www.w3.org/2003/01/geo/wgs84_pos#lat',
'dct' => 'http://purl.org/dc/terms/',
'country' => [
'@type' => '@id',
'@id' => 'http://www.geonames.org/ontology#parentCountry'
],
'foaf' => 'http://xmlns.com/foaf/0.1/',
'gtfs' => 'http://vocab.gtfs.org/terms#',
'stop' => 'gtfs:Stop',
'hafasCode' => 'gtfs:code',
'official_transfer_time' => 'gtfs:minimumTransferTime',
'name' => 'foaf:name'
];
$ldValue['@id'] = $uri;
$ldValue['@type'] = 'stop';
$ldValue['latitude'] = $gtfsStation['stop_lat'];
$ldValue['longitude'] = $gtfsStation['stop_lon'];
$ldValue['name'] = $gtfsStation['stop_name'];
$ldValue['hafasCode'] = $gtfsStation['stop_id'];
switch (substr($ldValue['hafasCode'], 0, 2)) {
case '10':
$ldValue['countryCode'] = CC_FI;
$ldValue['country'] = HTTP_SWS_GEONAMES_ORG_660013;
break;
case '86':
$ldValue['countryCode'] = CC_DK;
$ldValue['country'] = HTTP_SWS_GEONAMES_ORG_2623032;
break;
case '76':
$ldValue['countryCode'] = CC_NO;
$ldValue['country'] = HTTP_SWS_GEONAMES_ORG_3144096;
break;
case '74':
default:
$ldValue['countryCode'] = CC_SE;
$ldValue['country'] = HTTP_SWS_GEONAMES_ORG_2661886;
break;
}
foreach ($stopdata[$ldValue['hafasCode']] as $agency => $transporttypes) {
$readableTransportTypes = array();
foreach ($transporttypes as $value) {
$readableTransportTypes[] = GTFS_TYPES[$value];
}
$ldValue['operatedBy'][] = [
'@id' => BASE_URI_AGENCY . strtr(strtolower($providers[$agency]['name']), UNWANTED_ARRAY),
'name' => $providers[$agency]['name'],
'gtfs:routeType' => $transporttypes,
'routeType' => $readableTransportTypes
];
}
$ldValue['avg_stop_times'] = round($stopFrequencies[$uri] / $handledDaysCount, 4);
$ldValue['official_transfer_time'] = $transferTimes[$uri];
file_put_contents("stops/" . $ldValue['hafasCode'] . ".json", json_encode($ldValue, JSON_PRETTY_PRINT | JSON_UNESCAPED_UNICODE | JSON_UNESCAPED_SLASHES));
unset($ldValue['@context']);
$jsonLd['@graph'][] = $ldValue;
}
file_put_contents("stations.json", json_encode($jsonLd, JSON_PRETTY_PRINT | JSON_UNESCAPED_UNICODE | JSON_UNESCAPED_SLASHES));
return $jsonLd;
}
function createAgenciesDataStructure($gtfsStations, $stopdata, $providers, $stopFrequencies, $handledDaysCount, $transferTimes)
{
$masterCatalog = [
'@context' => [
'xsd' => 'http://www.w3.org/2001/XMLSchema#',
'dcat' => 'http://www.w3.org/ns/dcat#',
'dct' => 'http://purl.org/dc/terms/',
'foaf' => 'http://xmlns.com/foaf/0.1/',
'owl' => 'http://www.w3.org/2002/07/owl#',
'schema' => 'http://schema.org/',
'dct:modified' => [
'@type' => 'xsd:dateTime'
],
'dct:issued' => [
'@type' => 'xsd:dateTime'
],
'dct:spatial' => [
'@type' => '@id'
],
'dct:license' => [
'@type' => '@id'
],
'dct:conformsTo' => [
'@type' => '@id'
],
'dcat:mediaType' => [
'@type' => 'xsd:string'
],
'schema:startDate' => [
'@type' => 'xsd:dateTime'
],
'schema:endDate' => [
'@type' => 'xsd:dateTime'
],
'minLatitude' => [
'@id' => 'http://aims.fao.org/aos/geopolitical.owl#hasMinLatitude',
'@type' => '@id'
],
'maxLatitude' => [
'@id' => 'http://aims.fao.org/aos/geopolitical.owl#hasMaxLatitude',
'@type' => '@id'
],
'minLongitude' => [
'@id' => 'http://aims.fao.org/aos/geopolitical.owl#hasMinLongitude',
'@type' => '@id'
],
'maxLongitude' => [
'@id' => 'http://aims.fao.org/aos/geopolitical.owl#hasMaxLongitude',
'@type' => '@id'
],
],
'@id' => 'http://se.lc.bertmarcelis.be/catalog',
'@type' => 'dcat:Catalog',
'dct:title' => 'Catalog of Samtrafiken datasets',
'dct:description' => 'Catalog of datasets published by Samtrafiken / Trafiklab',
'dct:modified' => '2018-11-01T10:00:00.000+01:00',
'dct:license' => 'http://creativecommons.org/publicdomain/zero/1.0/',
'dct:rights' => 'public',
'dct:publisher' => [
'@id' => 'http://samtrafiken.se',
'@type' => 'foaf:Organization',
'foaf:name' => 'Samtrafiken'
],
'dcat:dataset' => [
]
];
foreach ($providers as $agencyId => $provider) {
$stations = [];
$minLat = 999;
$maxLat = -999;
$minLong = 999;
$maxLong = -999;
foreach ($gtfsStations as $gtfsStation) {
if (key_exists($agencyId, $stopdata[$gtfsStation['stop_id']])) {
$stations[] = BASE_URI_STOPS . $gtfsStation['stop_id'];
$minLat = min($gtfsStation['stop_lat'], $minLat);
$maxLat = max($gtfsStation['stop_lat'], $maxLat);
$minLong = min($gtfsStation['stop_lon'], $minLong);
$maxLong = max($gtfsStation['stop_lon'], $maxLong);
}
}
$jsonLd = [
'version' => 0.1,
'timestamp' => time(),
'@context' => [
'geo' => 'http://www.w3.org/2003/01/geo/wgs84_pos#',
'dct' => 'http://purl.org/dc/terms/',
'foaf' => 'http://xmlns.com/foaf/0.1/',
"minLatitude" => [
'@id' => 'http=>//aims.fao.org/aos/geopolitical.owl#hasMinLatitude',
'@type' => '@id'
],
'maxLatitude' => [
'@id' => 'http=>//aims.fao.org/aos/geopolitical.owl#hasMaxLatitude',
'@type' => '@id'
],
'minLongitude' => [
'@id' => 'http=>//aims.fao.org/aos/geopolitical.owl#hasMinLongitude',
'@type' => '@id'
],
'maxLongitude' => [
'@id' => 'http=>//aims.fao.org/aos/geopolitical.owl#hasMaxLongitude',
'@type' => '@id'
], 'country' => [
'@type' => '@id',
'@id' => 'http://www.geonames.org/ontology#parentCountry'
]
],
'@id' => BASE_URI_AGENCY . strtr(strtolower($provider['name']), UNWANTED_ARRAY),
"dct:publisher" => [
'@id' => 'http://samtrafiken.se',
'@type' => 'foaf:Organization',
'foaf:name' => 'Samtrafiken'
],
'foaf:name' => $provider['name'],
'foaf:homepage' => $provider['url'],
'linkedconnections' => 'https://se.lc.bertmarcelis.be/' . strtr(strtolower($provider['name']), UNWANTED_ARRAY) . '/connections',
'minLatitude' => $minLat,
'maxLatitude' => $maxLat,
'minLongitude' => $minLong,
'maxLongitude' => $maxLong,
'stations' => $stations
];
@mkdir('agency');
file_put_contents("agency/" . strtr(strtolower($provider['name']), UNWANTED_ARRAY) . ".json", json_encode($jsonLd, JSON_PRETTY_PRINT | JSON_UNESCAPED_UNICODE | JSON_UNESCAPED_SLASHES));
$readableTransportTypes = array();
foreach ($gtfsStations as $gtfsStation) {
if (key_exists($agencyId, $stopdata[$gtfsStation['stop_id']])) {
foreach ($stopdata[$gtfsStation['stop_id']][$agencyId] as $value) {
$readableTransportTypes[] = GTFS_TYPES[$value];
}
$readableTransportTypes = array_values(array_unique($readableTransportTypes));
}
}
$catalogEntry = [
'@id' => 'http://se.lc.bertmarcelis.be/datasets/' . strtr(strtolower($provider['name']), UNWANTED_ARRAY) . '/connections',
'@type' => 'dcat:Dataset',
'dct:description' => 'Linked Connections dataset for ' . strtr($provider['name'], UNWANTED_ARRAY),
'dct:title' => 'Linked Connections - ' . strtr($provider['name'], UNWANTED_ARRAY),
//'dct:spatial' => 'http://sws.geonames.org/2673722/',
'minLatitude' => $minLat,
'maxLatitude' => $maxLat,
'minLongitude' => $minLong,
'maxLongitude' => $maxLong,
'dcat:keyword' => $readableTransportTypes,
'dct:conformsTo' => 'http://linkedconnections.org/specification/1-0',
'dct:accessRights' => 'public',
'dcat:distribution' => [
[
'@id' => 'http://se.lc.bertmarcelis.be/' . strtr(strtolower($provider['name']), UNWANTED_ARRAY) . '/connections',
'@type' => 'dcat:Distribution',
'dcat:accessURL' => 'http://se.lc.bertmarcelis.be/' . strtr(strtolower($provider['name']), UNWANTED_ARRAY) . '/connections',
'dct:license' => 'http://creativecommons.org/publicdomain/zero/1.0/',
'dcat:mediaType' => 'application/ld+json',
'dct:issued' => (new DateTime())->format(DATE_RFC3339),
]
]
];
$stopsCatalogEntry = [
'@id' => 'http://se.lc.bertmarcelis.be/datasets/'. strtr(strtolower($provider['name']), UNWANTED_ARRAY) . '/stops',
'@type' => 'dcat:Dataset',
'dct:description' => 'Stop locations dataset for ' . strtr($provider['name'], UNWANTED_ARRAY),
'dct:title' => 'Stop locations - ' . strtr($provider['name'], UNWANTED_ARRAY),
'dct:spatial' => 'http://sws.geonames.org/2673722/',
'minLatitude' => $minLat,
'maxLatitude' => $maxLat,
'minLongitude' => $minLong,
'maxLongitude' => $maxLong,
'dcat:keyword' => $readableTransportTypes,
'dct:accessRights' => 'public',
'dcat:distribution' => [
[
'@id' => 'http://se.lc.bertmarcelis.be/stops' . strtr(strtolower($provider['name']), UNWANTED_ARRAY),
'@type' => 'dcat:Distribution',
'dcat:accessURL' => 'http://se.lc.bertmarcelis.be/stations-' . strtr(strtolower($provider['name']), UNWANTED_ARRAY) . '.json',
'dct:license' => 'http://creativecommons.org/publicdomain/zero/1.0/',
'dcat:mediaType' => 'application/ld+json',
'dct:issued' => (new DateTime())->format(DATE_RFC3339),
]
]
];
$masterCatalog['dcat:dataset'][] = $catalogEntry;
$masterCatalog['dcat:dataset'][] = $stopsCatalogEntry;
}
file_put_contents("master-catalog.json", json_encode($masterCatalog, JSON_PRETTY_PRINT | JSON_UNESCAPED_UNICODE | JSON_UNESCAPED_SLASHES));
}
function createAgenciesStopLists($gtfsStations, $stopdata, $providers, $stopFrequencies, $handledDaysCount, $transferTimes)
{
foreach ($providers as $agencyId => $provider) {
$jsonLd = [
'version' => 0.1,
'timestamp' => time(),
'@context' => [
'dct' => 'http://purl.org/dc/terms/',
'longitude' => 'http://www.w3.org/2003/01/geo/wgs84_pos#long',
'latitude' =>'http://www.w3.org/2003/01/geo/wgs84_pos#lat',
'country' => [
'@type' => '@id',
'@id' => 'http://www.geonames.org/ontology#parentCountry'
],
'gtfs' => 'http://vocab.gtfs.org/terms#',
'stop' => 'gtfs:Stop',
'hafasCode' => 'gtfs:code',
'official_transfer_time' => 'gtfs:minimumTransferTime',
'name' => 'foaf:name',
'foaf' => 'http://xmlns.com/foaf/0.1/'
],
'@graph' => []
];
foreach ($gtfsStations as $gtfsStation) {
if (key_exists($agencyId, $stopdata[$gtfsStation['stop_id']])) {
$ldValue = [];
$uri = BASE_URI_STOPS . $gtfsStation['stop_id'];
$ldValue['@id'] = $uri;
$ldValue['@type'] = 'stop';
$ldValue['latitude'] = $gtfsStation['stop_lat'];
$ldValue['longitude'] = $gtfsStation['stop_lon'];
$ldValue['name'] = $gtfsStation['stop_name'];
$ldValue['hafasCode'] = $gtfsStation['stop_id'];
switch (substr($ldValue['hafasCode'], 0, 2)) {
case '10':
$ldValue['countryCode'] = CC_FI;
$ldValue['country'] = HTTP_SWS_GEONAMES_ORG_660013;
break;
case '86':
$ldValue['countryCode'] = CC_DK;
$ldValue['country'] = HTTP_SWS_GEONAMES_ORG_2623032;
break;
case '76':
$ldValue['countryCode'] = CC_NO;
$ldValue['country'] = HTTP_SWS_GEONAMES_ORG_3144096;
break;
case '74':
default:
$ldValue['countryCode'] = CC_SE;
$ldValue['country'] = HTTP_SWS_GEONAMES_ORG_2661886;
break;
}
foreach ($stopdata[$ldValue['hafasCode']] as $agency => $transporttypes) {
$readableTransportTypes = array();
foreach ($transporttypes as $value) {
$readableTransportTypes[] = GTFS_TYPES[$value];
}
$ldValue['agency'][] = [
'@id' => BASE_URI_AGENCY . strtr(strtolower($providers[$agency]['name']), UNWANTED_ARRAY),
'name' => $providers[$agency]['name'],
'gtfs:routeType' => $transporttypes,
'routeType' => $readableTransportTypes
];
unset($readableTransportTypes);
}
$ldValue['avg_stop_times'] = round($stopFrequencies[$uri] / $handledDaysCount, 4);
$ldValue['official_transfer_time'] = $transferTimes[$uri];
$jsonLd['@graph'][] = $ldValue;
}
}
file_put_contents("stations-" . strtr(strtolower($provider['name']), UNWANTED_ARRAY) . ".json", json_encode($jsonLd, JSON_PRETTY_PRINT | JSON_UNESCAPED_UNICODE | JSON_UNESCAPED_SLASHES));
}
}
function createRoutesDataStructure($gtfsStations, $stopdata, $providers, $stopFrequencies, $handledDaysCount, $transferTimes)
{
}
<?php
// Route indexing for Linked Connections, outputting Linked Data Fragments
for($i = 1; $i < count($argv); $i++){
$agency = $argv[$i];
$trips = [];
$routes = [];
$page = "http://se.lc.bertmarcelis.be/$agency/connections?departureTime=2018-11-05T03:00:00.000Z";
while (!empty($page) && $page < "https://se.lc.bertmarcelis.be/$agency/connections?departureTime=2018-12-01T01:00:00.000Z") {
echo "page " . $page . PHP_EOL;
$connections = json_decode(file_get_contents($page ),true);
foreach ($connections['@graph'] as $connection) {
$trips[$connection['gtfs:trip']][] = [
'departureStop' => $connection['departureStop'],
'arrivalStop' => $connection['arrivalStop'],
'connection' => $connection['@id'],
'page' => $page
];
}
$page = $connections['hydra:next'];
//echo "next: " . $page . PHP_EOL;
}
@mkdir('vehicle');
@mkdir('vehicle/' . $agency);
foreach ($trips as $id =>$trip) {
$tripLd = [
"@context" => [
"xsd" => "http://www.w3.org/2001/XMLSchema#",
"lc" => "http://semweb.mmlab.be/ns/linkedconnections#",
"hydra" => "http://www.w3.org/ns/hydra/core#",
"gtfs" => "http://vocab.gtfs.org/terms#",
"Connection" => "lc:Connection",
"arrivalStop" => [
"@type" => "@id",
"@id" => "lc:arrivalStop"
],
"departureStop" => [
"@type" => "@id",
"@id" => "lc:departureStop"
],
"@id" => [
"@type" => "@id",
"@id" => "lc:Connection"
]
],
'@id' => $id,
'@graph' => []
];
foreach ($trip as $tripStop){
$tripLd['@graph'][] = $tripStop;
}
$file = substr($id,strpos($id,'/',10)+1);
$folder = substr($file,0,strlen($file)-strlen(basename($file)));
@mkdir($folder);
file_put_contents($file .".json", json_encode($tripLd, JSON_PRETTY_PRINT | JSON_UNESCAPED_UNICODE | JSON_UNESCAPED_SLASHES));
}
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment