/make_boost_zonespec.php

## make_boost_zonespec.php
<?php
//
// A little stupid script that uses PHP's list of time zone IDs to query
// the Olson TZ database and finally outputs a "date_time_zonespec.csv"
// file for use with boost's tz_database...
//
// Instructions:
//
// 1) Download tzdata* and tzcode* from https://www.iana.org/time-zones
// 2) Extract to some folder
// 3) Change Makefile to include CFLAGS=-m32
//    (workaround for https://bugs.launchpad.net/ubuntu/+source/eglibc/+bug/737872)
// 4) make TOPDIR=$HOME/tzdir install
// 5) php -n make_boost_zonespec.php > date_time_zonespec-`date '%Y-%m-%d'`.csv
//

/**
 * Path to (z)dump utility:
 */
define('TZDUMP_CMD', '~/tzdir/etc/zdump');

date_default_timezone_set('UTC');
$ZONE_UTC = new DateTimeZone('UTC');

/**
 * Returns the index of $dt's weekday in $dt's month
 * or -1 if it's the last weekday (e.g. Sunday) in that month.
 *
 * @param DateTimeInterface $dt
 * @return int
 */
function weekday_in_month(DateTimeInterface $dt) {
    $curr = DateTime::createFromFormat('|Y-m-d H', $dt->format('Y-m') . '-01 12');

    $weekdays = [];

    while ($curr->format('Y-m') === $dt->format('Y-m')) {
        $weekdays[$curr->format('D')][] = $curr->format('Y-m-d');

        $curr->modify('+1 day');
    }

    //
    // KNOWN BUG: the index is taken from one specific year. This means that
    // we may e.g. return -1 although it really should be 4...
    //

    $index = array_search($dt->format('Y-m-d'), $weekdays[$dt->format('D')]);

    if ($index === count($weekdays[$dt->format('D')]) - 1) {
        return -1;
    } else {
        return $index + 1;
    }
}

/**
 * Formats $seconds according to boost's CSV specs.
 *
 * @param int $seconds
 * @return string
 */
function format_offset($seconds) {
    $hours = $seconds / 3600;

    if ($hours < 0) {
        $hours = ceil($hours);
    } else {
        $hours = floor($hours);
    }

    return sprintf('%+03d:%02d:%02d', $hours % 24, abs($seconds % 3600) / 60, 0);
}

$result = [];

// http://www.boost.org/doc/libs/1_60_0/doc/html/date_time/local_time.html#tz_database_datafile
$FIELD_LIST = [ 'ID', 'STD ABBR', 'STD NAME', 'DST ABBR', 'DST NAME', 'GMT offset', 'DST adjustment', 'DST Start Date rule', 'Start time', 'DST End date rule', 'End time' ];

$all_ids = [];
$aliases = [];

// get list of timezone IDs + create some aliases for our CSV file:
foreach (DateTimeZone::listAbbreviations() as $rows) {
    foreach ($rows as $row) {
        $id = $row['timezone_id'];

        // take only "locations":
        if (strpos($id, '/') === false) {
            continue;
        }

        $all_ids[] = $id;

        // alias A/B/C by A/B:
        $aliases[$id][] = preg_replace('~^(\w+)(?:/\w+)*(/\w+)$~', '\1\2', $id);

        // alias A-B by A_B:
        $aliases[$id][] = str_replace('-', '_', $id);
    }
}

$all_ids = array_unique($all_ids);

foreach ($all_ids as $id) {
    $row = array_fill_keys($FIELD_LIST, null);
    $row['ID'] = $id;

    // use information for this year:
    $date_range = date('Y') . ',' . (date('Y') + 1);

    $dump = preg_split('~\R~', shell_exec(TZDUMP_CMD . ' -c ' . escapeshellarg($date_range) . ' -v ' . escapeshellarg($id)), 0, PREG_SPLIT_NO_EMPTY);

    foreach ($dump as $i => $line) {
        if (!preg_match('~^([\w/-]+)\s+(.+?) UT = (.+?) ([A-Za-z]{2,6}) isdst=([01]) gmtoff=(-?[0-9]+)$~', $line, $match)) {
            printf(STDERR, 'BAD LINE from tzdump: ' . $line . PHP_EOL);

            continue;
        }

        list (, , $utc_str, $local_str, $abbr, $isdst, $gmtoff) = $match;

        if ($abbr === 'zzz') {
            continue;
        }

        $dump[$i] = (object)[
            'utc' => DateTimeImmutable::createFromFormat('D M d H:i:s Y', $utc_str, $ZONE_UTC),
            'dst' => (bool)(int)$isdst,
            'gmtoff' => (int)$gmtoff,
            'abbr' => $abbr,
        ];
    }

    $dump = array_filter($dump, 'is_object');

    $has_dst = false;

    foreach ($dump as $entry) {
        if ($entry->utc->format('Y') < 1910) {
            continue;
        }

        $has_dst = $has_dst || $entry->dst;

        // simply take the last entry (probably for the year 2038). This is good enough for this information:
        if ($entry->dst) {
            $row['DST ABBR'] = $entry->abbr;
        } else {
            $row['STD ABBR'] = $entry->abbr;
            $row['GMT offset'] = format_offset($entry->gmtoff);
        }
    }

    if (!$has_dst) {
        $row['DST adjustment'] = '+00:00:00';
        $row['End time'] = '+00:00:00';
    } else {
        $before_dst = null;
        $begin_dst = null;
        $before_dst_end = null;
        $begin_std = null;

        foreach ($dump as $entry) {
            if ($entry->utc->format('Y') < 1910 || $entry->utc->format('Y') >= 2038) {
                continue;
            }

            if ($entry->dst && $entry->utc->format('s') == 0) {
                $begin_dst = $entry;
            } elseif (!$entry->dst && $entry->utc->format('s') == 0) {
                $begin_std = $entry;
            } elseif ($entry->dst && $entry->utc->format('s') != 0) {
                $before_dst_end = $entry;
            } elseif (!$entry->dst && $entry->utc->format('s') != 0) {
                $before_dst = $entry;
            }
        }

        assert(!in_array(null, [ $before_dst, $begin_dst, $before_dst_end, $begin_std ], true));

        // these assertions could fail if the tz database contained some change info for the future:
        assert($before_dst->abbr === $row['STD ABBR']);
        assert($begin_dst->abbr === $row['DST ABBR']);
        assert($before_dst_end->abbr === $row['DST ABBR']);
        assert($begin_std->abbr === $row['STD ABBR']);

        $row['DST adjustment'] = format_offset($begin_dst->gmtoff - $begin_std->gmtoff);
        $row['Start time'] = format_offset($begin_dst->utc->modify($begin_std->gmtoff . 'seconds')->format('H') * 3600);
        $row['End time'] = format_offset($begin_std->utc->modify($begin_dst->gmtoff . ' seconds')->format('H') * 3600);

        $time_for_start_rule = $before_dst->utc->modify($begin_dst->gmtoff . 'seconds');
        $row['DST Start Date rule'] = weekday_in_month($time_for_start_rule) . ';' . $time_for_start_rule->format('w;n');

        $time_for_end_rule = $begin_std->utc->modify($begin_dst->gmtoff . ' seconds');
        $row['DST End date rule'] = weekday_in_month($time_for_end_rule) . ';' . $time_for_end_rule->format('w;n');
    }

    $row['STD NAME'] = $row['STD ABBR'];
    $row['DST NAME'] = $row['DST ABBR'];

    $result[] = $row;

    foreach (array_diff(array_unique($aliases[$id]), $all_ids) as $alias) {
        $row['ID'] = $alias;
        $result[] = $row;
    }
}

//
// sort results:
//
usort($result, function(array $a, array $b) {
    return strnatcasecmp($a['ID'], $b['ID']);
});

//
// output in CSV format.
//

// add header:
echo '"' . join('","', $FIELD_LIST) . '"' . PHP_EOL;

// walk:
foreach ($result as $row) {
    $line = '';

    foreach ($FIELD_LIST as $field) {
        $line .= '"' . addslashes($row[$field]) . '",';
    }

    echo rtrim($line, ',') . PHP_EOL;
}
	<?php
	//
	// A little stupid script that uses PHP's list of time zone IDs to query
	// the Olson TZ database and finally outputs a "date_time_zonespec.csv"
	// file for use with boost's tz_database...
	//
	// Instructions:
	//
	// 1) Download tzdata* and tzcode* from https://www.iana.org/time-zones
	// 2) Extract to some folder
	// 3) Change Makefile to include CFLAGS=-m32
	// (workaround for https://bugs.launchpad.net/ubuntu/+source/eglibc/+bug/737872)
	// 4) make TOPDIR=$HOME/tzdir install
	// 5) php -n make_boost_zonespec.php > date_time_zonespec-`date '%Y-%m-%d'`.csv
	//

	/**
	* Path to (z)dump utility:
	*/
	define('TZDUMP_CMD', '~/tzdir/etc/zdump');

	date_default_timezone_set('UTC');
	$ZONE_UTC = new DateTimeZone('UTC');

	/**
	* Returns the index of $dt's weekday in $dt's month
	* or -1 if it's the last weekday (e.g. Sunday) in that month.
	*
	* @param DateTimeInterface $dt
	* @return int
	*/
	function weekday_in_month(DateTimeInterface $dt) {
	$curr = DateTime::createFromFormat('\|Y-m-d H', $dt->format('Y-m') . '-01 12');

	$weekdays = [];

	while ($curr->format('Y-m') === $dt->format('Y-m')) {
	$weekdays[$curr->format('D')][] = $curr->format('Y-m-d');

	$curr->modify('+1 day');
	}

	//
	// KNOWN BUG: the index is taken from one specific year. This means that
	// we may e.g. return -1 although it really should be 4...
	//

	$index = array_search($dt->format('Y-m-d'), $weekdays[$dt->format('D')]);

	if ($index === count($weekdays[$dt->format('D')]) - 1) {
	return -1;
	} else {
	return $index + 1;
	}
	}

	/**
	* Formats $seconds according to boost's CSV specs.
	*
	* @param int $seconds
	* @return string
	*/
	function format_offset($seconds) {
	$hours = $seconds / 3600;

	if ($hours < 0) {
	$hours = ceil($hours);
	} else {
	$hours = floor($hours);
	}

	return sprintf('%+03d:%02d:%02d', $hours % 24, abs($seconds % 3600) / 60, 0);
	}

	$result = [];

	// http://www.boost.org/doc/libs/1_60_0/doc/html/date_time/local_time.html#tz_database_datafile
	$FIELD_LIST = [ 'ID', 'STD ABBR', 'STD NAME', 'DST ABBR', 'DST NAME', 'GMT offset', 'DST adjustment', 'DST Start Date rule', 'Start time', 'DST End date rule', 'End time' ];

	$all_ids = [];
	$aliases = [];

	// get list of timezone IDs + create some aliases for our CSV file:
	foreach (DateTimeZone::listAbbreviations() as $rows) {
	foreach ($rows as $row) {
	$id = $row['timezone_id'];

	// take only "locations":
	if (strpos($id, '/') === false) {
	continue;
	}

	$all_ids[] = $id;

	// alias A/B/C by A/B:
	$aliases[$id][] = preg_replace('~^(\w+)(?:/\w+)*(/\w+)$~', '\1\2', $id);

	// alias A-B by A_B:
	$aliases[$id][] = str_replace('-', '_', $id);
	}
	}

	$all_ids = array_unique($all_ids);

	foreach ($all_ids as $id) {
	$row = array_fill_keys($FIELD_LIST, null);
	$row['ID'] = $id;

	// use information for this year:
	$date_range = date('Y') . ',' . (date('Y') + 1);

	$dump = preg_split('~\R~', shell_exec(TZDUMP_CMD . ' -c ' . escapeshellarg($date_range) . ' -v ' . escapeshellarg($id)), 0, PREG_SPLIT_NO_EMPTY);

	foreach ($dump as $i => $line) {
	if (!preg_match('~^([\w/-]+)\s+(.+?) UT = (.+?) ([A-Za-z]{2,6}) isdst=([01]) gmtoff=(-?[0-9]+)$~', $line, $match)) {
	printf(STDERR, 'BAD LINE from tzdump: ' . $line . PHP_EOL);

	continue;
	}

	list (, , $utc_str, $local_str, $abbr, $isdst, $gmtoff) = $match;

	if ($abbr === 'zzz') {
	continue;
	}

	$dump[$i] = (object)[
	'utc' => DateTimeImmutable::createFromFormat('D M d H:i:s Y', $utc_str, $ZONE_UTC),
	'dst' => (bool)(int)$isdst,
	'gmtoff' => (int)$gmtoff,
	'abbr' => $abbr,
	];
	}

	$dump = array_filter($dump, 'is_object');

	$has_dst = false;

	foreach ($dump as $entry) {
	if ($entry->utc->format('Y') < 1910) {
	continue;
	}

	$has_dst = $has_dst \|\| $entry->dst;

	// simply take the last entry (probably for the year 2038). This is good enough for this information:
	if ($entry->dst) {
	$row['DST ABBR'] = $entry->abbr;
	} else {
	$row['STD ABBR'] = $entry->abbr;
	$row['GMT offset'] = format_offset($entry->gmtoff);
	}
	}

	if (!$has_dst) {
	$row['DST adjustment'] = '+00:00:00';
	$row['End time'] = '+00:00:00';
	} else {
	$before_dst = null;
	$begin_dst = null;
	$before_dst_end = null;
	$begin_std = null;

	foreach ($dump as $entry) {
	if ($entry->utc->format('Y') < 1910 \|\| $entry->utc->format('Y') >= 2038) {
	continue;
	}

	if ($entry->dst && $entry->utc->format('s') == 0) {
	$begin_dst = $entry;
	} elseif (!$entry->dst && $entry->utc->format('s') == 0) {
	$begin_std = $entry;
	} elseif ($entry->dst && $entry->utc->format('s') != 0) {
	$before_dst_end = $entry;
	} elseif (!$entry->dst && $entry->utc->format('s') != 0) {
	$before_dst = $entry;
	}
	}

	assert(!in_array(null, [ $before_dst, $begin_dst, $before_dst_end, $begin_std ], true));

	// these assertions could fail if the tz database contained some change info for the future:
	assert($before_dst->abbr === $row['STD ABBR']);
	assert($begin_dst->abbr === $row['DST ABBR']);
	assert($before_dst_end->abbr === $row['DST ABBR']);
	assert($begin_std->abbr === $row['STD ABBR']);

	$row['DST adjustment'] = format_offset($begin_dst->gmtoff - $begin_std->gmtoff);
	$row['Start time'] = format_offset($begin_dst->utc->modify($begin_std->gmtoff . 'seconds')->format('H') * 3600);
	$row['End time'] = format_offset($begin_std->utc->modify($begin_dst->gmtoff . ' seconds')->format('H') * 3600);

	$time_for_start_rule = $before_dst->utc->modify($begin_dst->gmtoff . 'seconds');
	$row['DST Start Date rule'] = weekday_in_month($time_for_start_rule) . ';' . $time_for_start_rule->format('w;n');

	$time_for_end_rule = $begin_std->utc->modify($begin_dst->gmtoff . ' seconds');
	$row['DST End date rule'] = weekday_in_month($time_for_end_rule) . ';' . $time_for_end_rule->format('w;n');
	}

	$row['STD NAME'] = $row['STD ABBR'];
	$row['DST NAME'] = $row['DST ABBR'];

	$result[] = $row;

	foreach (array_diff(array_unique($aliases[$id]), $all_ids) as $alias) {
	$row['ID'] = $alias;
	$result[] = $row;
	}
	}

	//
	// sort results:
	//
	usort($result, function(array $a, array $b) {
	return strnatcasecmp($a['ID'], $b['ID']);
	});

	//
	// output in CSV format.
	//

	// add header:
	echo '"' . join('","', $FIELD_LIST) . '"' . PHP_EOL;

	// walk:
	foreach ($result as $row) {
	$line = '';

	foreach ($FIELD_LIST as $field) {
	$line .= '"' . addslashes($row[$field]) . '",';
	}

	echo rtrim($line, ',') . PHP_EOL;
	}