Skip to content

Instantly share code, notes, and snippets.

Created April 12, 2016 13:06
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save anonymous/e1776b620768f5a22da4f3bc2749562f to your computer and use it in GitHub Desktop.
Save anonymous/e1776b620768f5a22da4f3bc2749562f to your computer and use it in GitHub Desktop.
<?php
//
// A little stupid script that uses PHP's list of time zone IDs to query
// the Olson TZ database and finally outputs a "date_time_zonespec.csv"
// file for use with boost's tz_database...
//
// Instructions:
//
// 1) Download tzdata* and tzcode* from https://www.iana.org/time-zones
// 2) Extract to some folder
// 3) Change Makefile to include CFLAGS=-m32
// (workaround for https://bugs.launchpad.net/ubuntu/+source/eglibc/+bug/737872)
// 4) make TOPDIR=$HOME/tzdir install
// 5) php -n make_boost_zonespec.php > date_time_zonespec-`date '%Y-%m-%d'`.csv
//
/**
* Path to (z)dump utility:
*/
define('TZDUMP_CMD', '~/tzdir/etc/zdump');
date_default_timezone_set('UTC');
$ZONE_UTC = new DateTimeZone('UTC');
/**
* Returns the index of $dt's weekday in $dt's month
* or -1 if it's the last weekday (e.g. Sunday) in that month.
*
* @param DateTimeInterface $dt
* @return int
*/
function weekday_in_month(DateTimeInterface $dt) {
$curr = DateTime::createFromFormat('|Y-m-d H', $dt->format('Y-m') . '-01 12');
$weekdays = [];
while ($curr->format('Y-m') === $dt->format('Y-m')) {
$weekdays[$curr->format('D')][] = $curr->format('Y-m-d');
$curr->modify('+1 day');
}
//
// KNOWN BUG: the index is taken from one specific year. This means that
// we may e.g. return -1 although it really should be 4...
//
$index = array_search($dt->format('Y-m-d'), $weekdays[$dt->format('D')]);
if ($index === count($weekdays[$dt->format('D')]) - 1) {
return -1;
} else {
return $index + 1;
}
}
/**
* Formats $seconds according to boost's CSV specs.
*
* @param int $seconds
* @return string
*/
function format_offset($seconds) {
$hours = $seconds / 3600;
if ($hours < 0) {
$hours = ceil($hours);
} else {
$hours = floor($hours);
}
return sprintf('%+03d:%02d:%02d', $hours % 24, abs($seconds % 3600) / 60, 0);
}
$result = [];
// http://www.boost.org/doc/libs/1_60_0/doc/html/date_time/local_time.html#tz_database_datafile
$FIELD_LIST = [ 'ID', 'STD ABBR', 'STD NAME', 'DST ABBR', 'DST NAME', 'GMT offset', 'DST adjustment', 'DST Start Date rule', 'Start time', 'DST End date rule', 'End time' ];
$all_ids = [];
$aliases = [];
// get list of timezone IDs + create some aliases for our CSV file:
foreach (DateTimeZone::listAbbreviations() as $rows) {
foreach ($rows as $row) {
$id = $row['timezone_id'];
// take only "locations":
if (strpos($id, '/') === false) {
continue;
}
$all_ids[] = $id;
// alias A/B/C by A/B:
$aliases[$id][] = preg_replace('~^(\w+)(?:/\w+)*(/\w+)$~', '\1\2', $id);
// alias A-B by A_B:
$aliases[$id][] = str_replace('-', '_', $id);
}
}
$all_ids = array_unique($all_ids);
foreach ($all_ids as $id) {
$row = array_fill_keys($FIELD_LIST, null);
$row['ID'] = $id;
// use information for this year:
$date_range = date('Y') . ',' . (date('Y') + 1);
$dump = preg_split('~\R~', shell_exec(TZDUMP_CMD . ' -c ' . escapeshellarg($date_range) . ' -v ' . escapeshellarg($id)), 0, PREG_SPLIT_NO_EMPTY);
foreach ($dump as $i => $line) {
if (!preg_match('~^([\w/-]+)\s+(.+?) UT = (.+?) ([A-Za-z]{2,6}) isdst=([01]) gmtoff=(-?[0-9]+)$~', $line, $match)) {
printf(STDERR, 'BAD LINE from tzdump: ' . $line . PHP_EOL);
continue;
}
list (, , $utc_str, $local_str, $abbr, $isdst, $gmtoff) = $match;
if ($abbr === 'zzz') {
continue;
}
$dump[$i] = (object)[
'utc' => DateTimeImmutable::createFromFormat('D M d H:i:s Y', $utc_str, $ZONE_UTC),
'dst' => (bool)(int)$isdst,
'gmtoff' => (int)$gmtoff,
'abbr' => $abbr,
];
}
$dump = array_filter($dump, 'is_object');
$has_dst = false;
foreach ($dump as $entry) {
if ($entry->utc->format('Y') < 1910) {
continue;
}
$has_dst = $has_dst || $entry->dst;
// simply take the last entry (probably for the year 2038). This is good enough for this information:
if ($entry->dst) {
$row['DST ABBR'] = $entry->abbr;
} else {
$row['STD ABBR'] = $entry->abbr;
$row['GMT offset'] = format_offset($entry->gmtoff);
}
}
if (!$has_dst) {
$row['DST adjustment'] = '+00:00:00';
$row['End time'] = '+00:00:00';
} else {
$before_dst = null;
$begin_dst = null;
$before_dst_end = null;
$begin_std = null;
foreach ($dump as $entry) {
if ($entry->utc->format('Y') < 1910 || $entry->utc->format('Y') >= 2038) {
continue;
}
if ($entry->dst && $entry->utc->format('s') == 0) {
$begin_dst = $entry;
} elseif (!$entry->dst && $entry->utc->format('s') == 0) {
$begin_std = $entry;
} elseif ($entry->dst && $entry->utc->format('s') != 0) {
$before_dst_end = $entry;
} elseif (!$entry->dst && $entry->utc->format('s') != 0) {
$before_dst = $entry;
}
}
assert(!in_array(null, [ $before_dst, $begin_dst, $before_dst_end, $begin_std ], true));
// these assertions could fail if the tz database contained some change info for the future:
assert($before_dst->abbr === $row['STD ABBR']);
assert($begin_dst->abbr === $row['DST ABBR']);
assert($before_dst_end->abbr === $row['DST ABBR']);
assert($begin_std->abbr === $row['STD ABBR']);
$row['DST adjustment'] = format_offset($begin_dst->gmtoff - $begin_std->gmtoff);
$row['Start time'] = format_offset($begin_dst->utc->modify($begin_std->gmtoff . 'seconds')->format('H') * 3600);
$row['End time'] = format_offset($begin_std->utc->modify($begin_dst->gmtoff . ' seconds')->format('H') * 3600);
$time_for_start_rule = $before_dst->utc->modify($begin_dst->gmtoff . 'seconds');
$row['DST Start Date rule'] = weekday_in_month($time_for_start_rule) . ';' . $time_for_start_rule->format('w;n');
$time_for_end_rule = $begin_std->utc->modify($begin_dst->gmtoff . ' seconds');
$row['DST End date rule'] = weekday_in_month($time_for_end_rule) . ';' . $time_for_end_rule->format('w;n');
}
$row['STD NAME'] = $row['STD ABBR'];
$row['DST NAME'] = $row['DST ABBR'];
$result[] = $row;
foreach (array_diff(array_unique($aliases[$id]), $all_ids) as $alias) {
$row['ID'] = $alias;
$result[] = $row;
}
}
//
// sort results:
//
usort($result, function(array $a, array $b) {
return strnatcasecmp($a['ID'], $b['ID']);
});
//
// output in CSV format.
//
// add header:
echo '"' . join('","', $FIELD_LIST) . '"' . PHP_EOL;
// walk:
foreach ($result as $row) {
$line = '';
foreach ($FIELD_LIST as $field) {
$line .= '"' . addslashes($row[$field]) . '",';
}
echo rtrim($line, ',') . PHP_EOL;
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment