Skip to content

Instantly share code, notes, and snippets.

@lifthrasiir
Created January 28, 2017 16:33
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save lifthrasiir/004c214c6c406dfd79217c6e1b8e10de to your computer and use it in GitHub Desktop.
Save lifthrasiir/004c214c6c406dfd79217c6e1b8e10de to your computer and use it in GitHub Desktop.
Example implementation for the proposed Temporenc byte format
// Example implementation for the proposed Temporenc byte format
// Kang Seonghoon, 2017-01-29, public domain
#include <stdint.h>
#include <stddef.h>
#include <string.h>
struct temporenc {
int has_date: 1, has_year: 1, has_mon: 1, has_day: 1;
int has_time: 1, has_hour: 1, has_min: 1, has_sec: 1, has_nsec: 1;
int has_utcoff: 1, utcoff_unknown: 1;
int32_t year; // ISO 8601 year
uint32_t mon, day; // 1-based
uint32_t hour, min, sec, nsec; // 0-based
int32_t utcoff; // in seconds, e.g. +09:00 == +32400
};
// a helper routine for reading the time component, returns 0 if anything goes wrong.
static int temporenc_read_time(struct temporenc *out, uint32_t second) {
// second: ...HHH HHNNNNNN SSSSSSXY
out->has_time = 1;
out->hour = (second >> 14) & 31;
out->min = (second >> 8) & 63;
out->sec = (second >> 2) & 63;
out->has_nsec = second & 1;
out->has_utcoff = (second >> 1) & 1;
if (out->hour < 24) out->has_hour = 1; else if (out->hour != 27) return 0;
if (out->min < 60) out->has_min = 1; else if (out->min != 63) return 0;
if (out->sec < 61) out->has_sec = 1; else if (out->sec != 63) return 0;
return 1;
}
// returns the number of bytes read (<= `len`), or 0 if anything goes wrong.
// if it succeeds it will update `dt`; otherwise the value of `dt` is unknown.
// `dt.xxx` value is still invalid if the corresponding `dt.has_xxx` is false,
// with an exception that `dt.nsec` is zero if `dt.has_sec && !dt.has_nsec`.
size_t temporenc_read(struct temporenc *out, const char *buf, size_t len) {
if (out == NULL || buf == NULL || len < 3) return 0;
struct temporenc dt = {0};
size_t read = 0;
#define BUF(i) ((uint32_t) (uint8_t) buf[i])
// decode first three bytes
// first: YYYYYYYY YYYYMMMM DDDDDHHH (continues...)
const uint32_t first = (BUF(0) << 16) | (BUF(1) << 8) | BUF(2);
read += 3;
switch (BUF(0)) {
case 0xfc: case 0xfd: // time-only, shift `first` to match the other case
if (!temporenc_read_time(&dt, first << 2)) return 0; // assume XY bits to be 00
goto okay;
case 0xfe: case 0xff: // reserved
return 0;
}
dt.has_date = 1;
dt.year = (int32_t) (first >> 12);
dt.mon = ((first >> 8) & 15) + 1;
dt.day = ((first >> 3) & 31) + 1;
if (dt.year < 4031) dt.has_year = 1;
if (dt.mon < 13) dt.has_mon = 1; else if (dt.mon != 16) return 0;
uint32_t ndays = 31; // the conservative upper bound if year and month are absent
if (dt.has_year && dt.has_mon) {
const uint8_t NDAYSINMON[13] = {0, 31, 28, 31, 30, 31, 30, 31, 31, 30, 31, 30, 31};
ndays = NDAYSINMON[dt.mon];
if (dt.mon == 2) ndays += !(dt.year % 4) && (!(dt.year % 100) || (dt.year % 400));
}
if (dt.day <= ndays) dt.has_day = 1; else if (dt.day != 32) return 0;
if ((first & 7) == 7) goto okay; // date-only
// decode next two bytes
if (len < 5) return 0;
const uint32_t second = ((first & 7) << 16) | (BUF(3) << 8) | BUF(4);
read += 2;
if (!temporenc_read_time(&dt, second)) return 0;
// decode the sub-second component if any
if (dt.has_nsec) {
if (!dt.has_sec) return 0;
uint32_t tag = BUF(5) >> 6;
if (len < 7 + tag) return 0;
switch (tag) {
case 0: // millisecond
dt.nsec = (BUF(5) << 8) | BUF(6);
read += 2;
if (dt.nsec & 0x0f) return 0; // no undefined padding
dt.nsec = (dt.nsec >> 4) & 0x3ff;
if (dt.nsec >= 1000) return 0;
dt.nsec *= 1000000;
break;
case 1: // microsecond
dt.nsec = (BUF(5) << 16) | (BUF(6) << 8) | BUF(7);
read += 3;
if (dt.nsec & 0x03) return 0; // no undefined padding
dt.nsec = (dt.nsec >> 2) & 0xfffff;
if (dt.nsec >= 1000000) return 0;
dt.nsec *= 1000;
break;
case 2: case 3: // nsec and more
dt.nsec = (BUF(5) << 24) | (BUF(6) << 16) | (BUF(7) << 8) | BUF(8);
read += 4;
dt.nsec = dt.nsec & 0x3fffffff;
if (dt.nsec >= 1000000000) return 0;
if (tag == 3) { // skip additional precision if present
// nit: we don't check the validity for those bits
do {
tag = BUF(read) >> 6;
const size_t required = read + 2 + tag;
if (len < required) return 0;
read = required;
} while (tag == 3);
}
break;
}
}
// decode the UTC offset if any
if (dt.has_utcoff) {
if (len < read + 1) return 0;
const uint32_t qutcoff = BUF(read);
read += 1;
if (qutcoff & 0x80) return 0; // no undefined padding
switch (qutcoff) {
case 126: dt.utcoff_unknown = 1; break;
case 127: dt.has_utcoff = 0; break;
default: dt.utcoff = ((int32_t) qutcoff - 64) * 15 * 60;
}
}
okay:
*out = dt;
return read;
}
#ifndef IMPL_ONLY
#include <stdio.h>
// a quick and dirty formatting
int temporenc_format(char *buf, size_t len, const struct temporenc *dt) {
if (dt == NULL) return snprintf(buf, len, "(null)");
int written = 0;
#define FMT(...) do { \
int ret = snprintf(buf, len, ##__VA_ARGS__); \
if (ret < 0) return ret; \
written += ret; \
if ((size_t) ret > len) ret = (int) len; \
len -= ret; \
buf += ret; \
} while (0)
if (dt->has_date) {
if (dt->has_year) FMT("%04d", (int) dt->year); else FMT("____");
if (dt->has_mon) FMT("-%02d", (int) dt->mon); else FMT("-__");
if (dt->has_day) FMT("-%02d", (int) dt->day); else FMT("-__");
}
if (dt->has_time) {
if (dt->has_hour) FMT("T%02d", (int) dt->hour); else FMT("T__");
if (dt->has_min) FMT(":%02d", (int) dt->min); else FMT(":__");
if (dt->has_sec) FMT(":%02d", (int) dt->sec); else FMT(":__");
if (dt->has_nsec) FMT(".%09d", (int) dt->nsec);
}
if (dt->has_utcoff) {
if (dt->utcoff_unknown) {
FMT("+__:__");
} else if (dt->utcoff < 0) {
FMT("-%02d:%02d", (int) (-dt->utcoff / 60 / 60), (int) (-dt->utcoff / 60 % 60));
} else {
FMT("+%02d:%02d", (int) (dt->utcoff / 60 / 60), (int) (dt->utcoff / 60 % 60));
}
}
return written;
}
// test driver
int main() {
struct test { size_t line, len; const char buf[16]; const char *expected; };
#define T(buf, fmt) { __LINE__, sizeof(buf) - 1, (buf), (fmt) }
static const struct test TESTS[] = {
T("", "(null)"),
T("\000", "(null)"),
T("\000\000", "(null)"),
T("\000\000\000", "(null)"),
T("\000\000\007", "0000-01-01"),
T("\000\000\017", "0000-01-02"),
T("\176\020\347", "2017-01-29"),
T("\176\020\377", "2017-01-__"),
T("\176\021\337", "2017-02-28"),
T("\176\021\347", "(null)"), // 2017-02-29
T("\176\001\347", "2016-02-29"),
T("\175\001\347", "2000-02-29"),
T("\176\037\067", "2017-__-07"),
T("\373\353\367", "4030-12-31"),
T("\373\373\367", "____-12-31"),
T("\373\377\377", "____-__-__"),
T("\374\000\000", "T00:00:00"),
T("\374\020\203", "T01:02:03"),
T("\375\176\374", "T23:59:60"),
T("\375\176\375", "(null)"), // T23:60:61
T("\375\177\074", "(null)"), // T23:60:60
T("\375\216\374", "(null)"), // T24:59:60
T("\374\020\277", "T01:02:__"),
T("\374\037\303", "T01:__:03"),
T("\375\260\203", "T__:02:03"),
T("\375\277\377", "T__:__:__"),
T("\377\377\377", "(null)"),
T("\176\020\343", "(null)"),
T("\176\020\343\042", "(null)"),
T("\176\020\343\042\340", "2017-01-29T12:34:56"),
T("\176\020\343\042\341", "(null)"),
T("\176\020\343\042\341\076", "(null)"),
T("\176\020\343\042\341\076\160", "2017-01-29T12:34:56.999000000"),
T("\176\020\343\042\341\076\161", "(null)"), // invalid padding
T("\176\020\343\042\341\076\200", "(null)"), // 2017-01-29T12:34:56.(1000)000000
T("\176\020\343\042\341\175", "(null)"),
T("\176\020\343\042\341\175\010", "(null)"),
T("\176\020\343\042\341\175\010\374", "2017-01-29T12:34:56.999999000"),
T("\176\020\343\042\341\175\010\375", "(null)"), // invalid padding
T("\176\020\343\042\341\175\011\000", "(null)"), // 2017-01-29T12:34:56.(1000000)000
T("\176\020\343\042\341\273", "(null)"),
T("\176\020\343\042\341\273\232", "(null)"),
T("\176\020\343\042\341\273\232\311", "(null)"),
T("\176\020\343\042\341\273\232\311\377", "2017-01-29T12:34:56.999999999"),
T("\176\020\343\042\341\273\232\312\000", "(null)"), // 2017-01-29T12:34:56.(1000000000)
T("\176\020\343\042\341\373\232\311\377", "(null)"),
T("\176\020\343\042\341\373\232\311\377\076\160", "2017-01-29T12:34:56.999999999"),
T("\176\020\343\042\341\373\232\311\377\373\232", "(null)"),
T("\176\020\343\042\374", "2017-01-29T12:34:__"),
T("\176\020\343\042\375\076\160", "(null)"), // 2017-01-29T12:34:__.999000000
T("\373\377\376\377\374", "____-__-__T__:__:__"),
T("\176\020\343\042\342", "(null)"),
T("\176\020\343\042\342\000", "2017-01-29T12:34:56-16:00"),
T("\176\020\343\042\342\001", "2017-01-29T12:34:56-15:45"),
T("\176\020\343\042\342\100", "2017-01-29T12:34:56+00:00"),
T("\176\020\343\042\342\144", "2017-01-29T12:34:56+09:00"),
T("\176\020\343\042\342\175", "2017-01-29T12:34:56+15:15"),
T("\176\020\343\042\342\176", "2017-01-29T12:34:56+__:__"),
T("\176\020\343\042\342\177", "2017-01-29T12:34:56"), // no offset available
T("\176\020\343\042\343\273\232\311\377\144", "2017-01-29T12:34:56.999999999+09:00"),
{ 0 }, // sentinel
};
int failed = 0;
for (const struct test *it = TESTS; it->line > 0; ++it) {
char actual[256] = {0};
struct temporenc dt;
size_t read = temporenc_read(&dt, it->buf, it->len);
temporenc_format(actual, sizeof(actual), read > 0 && read == it->len ? &dt : NULL);
if (strcmp(it->expected, actual) != 0) {
printf("test failed on line %d: expected `%s` != actual `%s`\n",
(int) it->line, it->expected, actual);
failed = 1;
}
}
return failed;
}
#endif // !defined(IMPL_ONLY)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment