Created
April 12, 2016 13:06
-
-
Save anonymous/e1776b620768f5a22da4f3bc2749562f to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
<?php | |
// | |
// A little stupid script that uses PHP's list of time zone IDs to query | |
// the Olson TZ database and finally outputs a "date_time_zonespec.csv" | |
// file for use with boost's tz_database... | |
// | |
// Instructions: | |
// | |
// 1) Download tzdata* and tzcode* from https://www.iana.org/time-zones | |
// 2) Extract to some folder | |
// 3) Change Makefile to include CFLAGS=-m32 | |
// (workaround for https://bugs.launchpad.net/ubuntu/+source/eglibc/+bug/737872) | |
// 4) make TOPDIR=$HOME/tzdir install | |
// 5) php -n make_boost_zonespec.php > date_time_zonespec-`date '%Y-%m-%d'`.csv | |
// | |
/** | |
* Path to (z)dump utility: | |
*/ | |
define('TZDUMP_CMD', '~/tzdir/etc/zdump'); | |
date_default_timezone_set('UTC'); | |
$ZONE_UTC = new DateTimeZone('UTC'); | |
/** | |
* Returns the index of $dt's weekday in $dt's month | |
* or -1 if it's the last weekday (e.g. Sunday) in that month. | |
* | |
* @param DateTimeInterface $dt | |
* @return int | |
*/ | |
function weekday_in_month(DateTimeInterface $dt) { | |
$curr = DateTime::createFromFormat('|Y-m-d H', $dt->format('Y-m') . '-01 12'); | |
$weekdays = []; | |
while ($curr->format('Y-m') === $dt->format('Y-m')) { | |
$weekdays[$curr->format('D')][] = $curr->format('Y-m-d'); | |
$curr->modify('+1 day'); | |
} | |
// | |
// KNOWN BUG: the index is taken from one specific year. This means that | |
// we may e.g. return -1 although it really should be 4... | |
// | |
$index = array_search($dt->format('Y-m-d'), $weekdays[$dt->format('D')]); | |
if ($index === count($weekdays[$dt->format('D')]) - 1) { | |
return -1; | |
} else { | |
return $index + 1; | |
} | |
} | |
/** | |
* Formats $seconds according to boost's CSV specs. | |
* | |
* @param int $seconds | |
* @return string | |
*/ | |
function format_offset($seconds) { | |
$hours = $seconds / 3600; | |
if ($hours < 0) { | |
$hours = ceil($hours); | |
} else { | |
$hours = floor($hours); | |
} | |
return sprintf('%+03d:%02d:%02d', $hours % 24, abs($seconds % 3600) / 60, 0); | |
} | |
$result = []; | |
// http://www.boost.org/doc/libs/1_60_0/doc/html/date_time/local_time.html#tz_database_datafile | |
$FIELD_LIST = [ 'ID', 'STD ABBR', 'STD NAME', 'DST ABBR', 'DST NAME', 'GMT offset', 'DST adjustment', 'DST Start Date rule', 'Start time', 'DST End date rule', 'End time' ]; | |
$all_ids = []; | |
$aliases = []; | |
// get list of timezone IDs + create some aliases for our CSV file: | |
foreach (DateTimeZone::listAbbreviations() as $rows) { | |
foreach ($rows as $row) { | |
$id = $row['timezone_id']; | |
// take only "locations": | |
if (strpos($id, '/') === false) { | |
continue; | |
} | |
$all_ids[] = $id; | |
// alias A/B/C by A/B: | |
$aliases[$id][] = preg_replace('~^(\w+)(?:/\w+)*(/\w+)$~', '\1\2', $id); | |
// alias A-B by A_B: | |
$aliases[$id][] = str_replace('-', '_', $id); | |
} | |
} | |
$all_ids = array_unique($all_ids); | |
foreach ($all_ids as $id) { | |
$row = array_fill_keys($FIELD_LIST, null); | |
$row['ID'] = $id; | |
// use information for this year: | |
$date_range = date('Y') . ',' . (date('Y') + 1); | |
$dump = preg_split('~\R~', shell_exec(TZDUMP_CMD . ' -c ' . escapeshellarg($date_range) . ' -v ' . escapeshellarg($id)), 0, PREG_SPLIT_NO_EMPTY); | |
foreach ($dump as $i => $line) { | |
if (!preg_match('~^([\w/-]+)\s+(.+?) UT = (.+?) ([A-Za-z]{2,6}) isdst=([01]) gmtoff=(-?[0-9]+)$~', $line, $match)) { | |
printf(STDERR, 'BAD LINE from tzdump: ' . $line . PHP_EOL); | |
continue; | |
} | |
list (, , $utc_str, $local_str, $abbr, $isdst, $gmtoff) = $match; | |
if ($abbr === 'zzz') { | |
continue; | |
} | |
$dump[$i] = (object)[ | |
'utc' => DateTimeImmutable::createFromFormat('D M d H:i:s Y', $utc_str, $ZONE_UTC), | |
'dst' => (bool)(int)$isdst, | |
'gmtoff' => (int)$gmtoff, | |
'abbr' => $abbr, | |
]; | |
} | |
$dump = array_filter($dump, 'is_object'); | |
$has_dst = false; | |
foreach ($dump as $entry) { | |
if ($entry->utc->format('Y') < 1910) { | |
continue; | |
} | |
$has_dst = $has_dst || $entry->dst; | |
// simply take the last entry (probably for the year 2038). This is good enough for this information: | |
if ($entry->dst) { | |
$row['DST ABBR'] = $entry->abbr; | |
} else { | |
$row['STD ABBR'] = $entry->abbr; | |
$row['GMT offset'] = format_offset($entry->gmtoff); | |
} | |
} | |
if (!$has_dst) { | |
$row['DST adjustment'] = '+00:00:00'; | |
$row['End time'] = '+00:00:00'; | |
} else { | |
$before_dst = null; | |
$begin_dst = null; | |
$before_dst_end = null; | |
$begin_std = null; | |
foreach ($dump as $entry) { | |
if ($entry->utc->format('Y') < 1910 || $entry->utc->format('Y') >= 2038) { | |
continue; | |
} | |
if ($entry->dst && $entry->utc->format('s') == 0) { | |
$begin_dst = $entry; | |
} elseif (!$entry->dst && $entry->utc->format('s') == 0) { | |
$begin_std = $entry; | |
} elseif ($entry->dst && $entry->utc->format('s') != 0) { | |
$before_dst_end = $entry; | |
} elseif (!$entry->dst && $entry->utc->format('s') != 0) { | |
$before_dst = $entry; | |
} | |
} | |
assert(!in_array(null, [ $before_dst, $begin_dst, $before_dst_end, $begin_std ], true)); | |
// these assertions could fail if the tz database contained some change info for the future: | |
assert($before_dst->abbr === $row['STD ABBR']); | |
assert($begin_dst->abbr === $row['DST ABBR']); | |
assert($before_dst_end->abbr === $row['DST ABBR']); | |
assert($begin_std->abbr === $row['STD ABBR']); | |
$row['DST adjustment'] = format_offset($begin_dst->gmtoff - $begin_std->gmtoff); | |
$row['Start time'] = format_offset($begin_dst->utc->modify($begin_std->gmtoff . 'seconds')->format('H') * 3600); | |
$row['End time'] = format_offset($begin_std->utc->modify($begin_dst->gmtoff . ' seconds')->format('H') * 3600); | |
$time_for_start_rule = $before_dst->utc->modify($begin_dst->gmtoff . 'seconds'); | |
$row['DST Start Date rule'] = weekday_in_month($time_for_start_rule) . ';' . $time_for_start_rule->format('w;n'); | |
$time_for_end_rule = $begin_std->utc->modify($begin_dst->gmtoff . ' seconds'); | |
$row['DST End date rule'] = weekday_in_month($time_for_end_rule) . ';' . $time_for_end_rule->format('w;n'); | |
} | |
$row['STD NAME'] = $row['STD ABBR']; | |
$row['DST NAME'] = $row['DST ABBR']; | |
$result[] = $row; | |
foreach (array_diff(array_unique($aliases[$id]), $all_ids) as $alias) { | |
$row['ID'] = $alias; | |
$result[] = $row; | |
} | |
} | |
// | |
// sort results: | |
// | |
usort($result, function(array $a, array $b) { | |
return strnatcasecmp($a['ID'], $b['ID']); | |
}); | |
// | |
// output in CSV format. | |
// | |
// add header: | |
echo '"' . join('","', $FIELD_LIST) . '"' . PHP_EOL; | |
// walk: | |
foreach ($result as $row) { | |
$line = ''; | |
foreach ($FIELD_LIST as $field) { | |
$line .= '"' . addslashes($row[$field]) . '",'; | |
} | |
echo rtrim($line, ',') . PHP_EOL; | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment