Created
January 28, 2017 16:33
-
-
Save lifthrasiir/004c214c6c406dfd79217c6e1b8e10de to your computer and use it in GitHub Desktop.
Example implementation for the proposed Temporenc byte format
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
// Example implementation for the proposed Temporenc byte format | |
// Kang Seonghoon, 2017-01-29, public domain | |
#include <stdint.h> | |
#include <stddef.h> | |
#include <string.h> | |
struct temporenc { | |
int has_date: 1, has_year: 1, has_mon: 1, has_day: 1; | |
int has_time: 1, has_hour: 1, has_min: 1, has_sec: 1, has_nsec: 1; | |
int has_utcoff: 1, utcoff_unknown: 1; | |
int32_t year; // ISO 8601 year | |
uint32_t mon, day; // 1-based | |
uint32_t hour, min, sec, nsec; // 0-based | |
int32_t utcoff; // in seconds, e.g. +09:00 == +32400 | |
}; | |
// a helper routine for reading the time component, returns 0 if anything goes wrong. | |
static int temporenc_read_time(struct temporenc *out, uint32_t second) { | |
// second: ...HHH HHNNNNNN SSSSSSXY | |
out->has_time = 1; | |
out->hour = (second >> 14) & 31; | |
out->min = (second >> 8) & 63; | |
out->sec = (second >> 2) & 63; | |
out->has_nsec = second & 1; | |
out->has_utcoff = (second >> 1) & 1; | |
if (out->hour < 24) out->has_hour = 1; else if (out->hour != 27) return 0; | |
if (out->min < 60) out->has_min = 1; else if (out->min != 63) return 0; | |
if (out->sec < 61) out->has_sec = 1; else if (out->sec != 63) return 0; | |
return 1; | |
} | |
// returns the number of bytes read (<= `len`), or 0 if anything goes wrong. | |
// if it succeeds it will update `dt`; otherwise the value of `dt` is unknown. | |
// `dt.xxx` value is still invalid if the corresponding `dt.has_xxx` is false, | |
// with an exception that `dt.nsec` is zero if `dt.has_sec && !dt.has_nsec`. | |
size_t temporenc_read(struct temporenc *out, const char *buf, size_t len) { | |
if (out == NULL || buf == NULL || len < 3) return 0; | |
struct temporenc dt = {0}; | |
size_t read = 0; | |
#define BUF(i) ((uint32_t) (uint8_t) buf[i]) | |
// decode first three bytes | |
// first: YYYYYYYY YYYYMMMM DDDDDHHH (continues...) | |
const uint32_t first = (BUF(0) << 16) | (BUF(1) << 8) | BUF(2); | |
read += 3; | |
switch (BUF(0)) { | |
case 0xfc: case 0xfd: // time-only, shift `first` to match the other case | |
if (!temporenc_read_time(&dt, first << 2)) return 0; // assume XY bits to be 00 | |
goto okay; | |
case 0xfe: case 0xff: // reserved | |
return 0; | |
} | |
dt.has_date = 1; | |
dt.year = (int32_t) (first >> 12); | |
dt.mon = ((first >> 8) & 15) + 1; | |
dt.day = ((first >> 3) & 31) + 1; | |
if (dt.year < 4031) dt.has_year = 1; | |
if (dt.mon < 13) dt.has_mon = 1; else if (dt.mon != 16) return 0; | |
uint32_t ndays = 31; // the conservative upper bound if year and month are absent | |
if (dt.has_year && dt.has_mon) { | |
const uint8_t NDAYSINMON[13] = {0, 31, 28, 31, 30, 31, 30, 31, 31, 30, 31, 30, 31}; | |
ndays = NDAYSINMON[dt.mon]; | |
if (dt.mon == 2) ndays += !(dt.year % 4) && (!(dt.year % 100) || (dt.year % 400)); | |
} | |
if (dt.day <= ndays) dt.has_day = 1; else if (dt.day != 32) return 0; | |
if ((first & 7) == 7) goto okay; // date-only | |
// decode next two bytes | |
if (len < 5) return 0; | |
const uint32_t second = ((first & 7) << 16) | (BUF(3) << 8) | BUF(4); | |
read += 2; | |
if (!temporenc_read_time(&dt, second)) return 0; | |
// decode the sub-second component if any | |
if (dt.has_nsec) { | |
if (!dt.has_sec) return 0; | |
uint32_t tag = BUF(5) >> 6; | |
if (len < 7 + tag) return 0; | |
switch (tag) { | |
case 0: // millisecond | |
dt.nsec = (BUF(5) << 8) | BUF(6); | |
read += 2; | |
if (dt.nsec & 0x0f) return 0; // no undefined padding | |
dt.nsec = (dt.nsec >> 4) & 0x3ff; | |
if (dt.nsec >= 1000) return 0; | |
dt.nsec *= 1000000; | |
break; | |
case 1: // microsecond | |
dt.nsec = (BUF(5) << 16) | (BUF(6) << 8) | BUF(7); | |
read += 3; | |
if (dt.nsec & 0x03) return 0; // no undefined padding | |
dt.nsec = (dt.nsec >> 2) & 0xfffff; | |
if (dt.nsec >= 1000000) return 0; | |
dt.nsec *= 1000; | |
break; | |
case 2: case 3: // nsec and more | |
dt.nsec = (BUF(5) << 24) | (BUF(6) << 16) | (BUF(7) << 8) | BUF(8); | |
read += 4; | |
dt.nsec = dt.nsec & 0x3fffffff; | |
if (dt.nsec >= 1000000000) return 0; | |
if (tag == 3) { // skip additional precision if present | |
// nit: we don't check the validity for those bits | |
do { | |
tag = BUF(read) >> 6; | |
const size_t required = read + 2 + tag; | |
if (len < required) return 0; | |
read = required; | |
} while (tag == 3); | |
} | |
break; | |
} | |
} | |
// decode the UTC offset if any | |
if (dt.has_utcoff) { | |
if (len < read + 1) return 0; | |
const uint32_t qutcoff = BUF(read); | |
read += 1; | |
if (qutcoff & 0x80) return 0; // no undefined padding | |
switch (qutcoff) { | |
case 126: dt.utcoff_unknown = 1; break; | |
case 127: dt.has_utcoff = 0; break; | |
default: dt.utcoff = ((int32_t) qutcoff - 64) * 15 * 60; | |
} | |
} | |
okay: | |
*out = dt; | |
return read; | |
} | |
#ifndef IMPL_ONLY | |
#include <stdio.h> | |
// a quick and dirty formatting | |
int temporenc_format(char *buf, size_t len, const struct temporenc *dt) { | |
if (dt == NULL) return snprintf(buf, len, "(null)"); | |
int written = 0; | |
#define FMT(...) do { \ | |
int ret = snprintf(buf, len, ##__VA_ARGS__); \ | |
if (ret < 0) return ret; \ | |
written += ret; \ | |
if ((size_t) ret > len) ret = (int) len; \ | |
len -= ret; \ | |
buf += ret; \ | |
} while (0) | |
if (dt->has_date) { | |
if (dt->has_year) FMT("%04d", (int) dt->year); else FMT("____"); | |
if (dt->has_mon) FMT("-%02d", (int) dt->mon); else FMT("-__"); | |
if (dt->has_day) FMT("-%02d", (int) dt->day); else FMT("-__"); | |
} | |
if (dt->has_time) { | |
if (dt->has_hour) FMT("T%02d", (int) dt->hour); else FMT("T__"); | |
if (dt->has_min) FMT(":%02d", (int) dt->min); else FMT(":__"); | |
if (dt->has_sec) FMT(":%02d", (int) dt->sec); else FMT(":__"); | |
if (dt->has_nsec) FMT(".%09d", (int) dt->nsec); | |
} | |
if (dt->has_utcoff) { | |
if (dt->utcoff_unknown) { | |
FMT("+__:__"); | |
} else if (dt->utcoff < 0) { | |
FMT("-%02d:%02d", (int) (-dt->utcoff / 60 / 60), (int) (-dt->utcoff / 60 % 60)); | |
} else { | |
FMT("+%02d:%02d", (int) (dt->utcoff / 60 / 60), (int) (dt->utcoff / 60 % 60)); | |
} | |
} | |
return written; | |
} | |
// test driver | |
int main() { | |
struct test { size_t line, len; const char buf[16]; const char *expected; }; | |
#define T(buf, fmt) { __LINE__, sizeof(buf) - 1, (buf), (fmt) } | |
static const struct test TESTS[] = { | |
T("", "(null)"), | |
T("\000", "(null)"), | |
T("\000\000", "(null)"), | |
T("\000\000\000", "(null)"), | |
T("\000\000\007", "0000-01-01"), | |
T("\000\000\017", "0000-01-02"), | |
T("\176\020\347", "2017-01-29"), | |
T("\176\020\377", "2017-01-__"), | |
T("\176\021\337", "2017-02-28"), | |
T("\176\021\347", "(null)"), // 2017-02-29 | |
T("\176\001\347", "2016-02-29"), | |
T("\175\001\347", "2000-02-29"), | |
T("\176\037\067", "2017-__-07"), | |
T("\373\353\367", "4030-12-31"), | |
T("\373\373\367", "____-12-31"), | |
T("\373\377\377", "____-__-__"), | |
T("\374\000\000", "T00:00:00"), | |
T("\374\020\203", "T01:02:03"), | |
T("\375\176\374", "T23:59:60"), | |
T("\375\176\375", "(null)"), // T23:60:61 | |
T("\375\177\074", "(null)"), // T23:60:60 | |
T("\375\216\374", "(null)"), // T24:59:60 | |
T("\374\020\277", "T01:02:__"), | |
T("\374\037\303", "T01:__:03"), | |
T("\375\260\203", "T__:02:03"), | |
T("\375\277\377", "T__:__:__"), | |
T("\377\377\377", "(null)"), | |
T("\176\020\343", "(null)"), | |
T("\176\020\343\042", "(null)"), | |
T("\176\020\343\042\340", "2017-01-29T12:34:56"), | |
T("\176\020\343\042\341", "(null)"), | |
T("\176\020\343\042\341\076", "(null)"), | |
T("\176\020\343\042\341\076\160", "2017-01-29T12:34:56.999000000"), | |
T("\176\020\343\042\341\076\161", "(null)"), // invalid padding | |
T("\176\020\343\042\341\076\200", "(null)"), // 2017-01-29T12:34:56.(1000)000000 | |
T("\176\020\343\042\341\175", "(null)"), | |
T("\176\020\343\042\341\175\010", "(null)"), | |
T("\176\020\343\042\341\175\010\374", "2017-01-29T12:34:56.999999000"), | |
T("\176\020\343\042\341\175\010\375", "(null)"), // invalid padding | |
T("\176\020\343\042\341\175\011\000", "(null)"), // 2017-01-29T12:34:56.(1000000)000 | |
T("\176\020\343\042\341\273", "(null)"), | |
T("\176\020\343\042\341\273\232", "(null)"), | |
T("\176\020\343\042\341\273\232\311", "(null)"), | |
T("\176\020\343\042\341\273\232\311\377", "2017-01-29T12:34:56.999999999"), | |
T("\176\020\343\042\341\273\232\312\000", "(null)"), // 2017-01-29T12:34:56.(1000000000) | |
T("\176\020\343\042\341\373\232\311\377", "(null)"), | |
T("\176\020\343\042\341\373\232\311\377\076\160", "2017-01-29T12:34:56.999999999"), | |
T("\176\020\343\042\341\373\232\311\377\373\232", "(null)"), | |
T("\176\020\343\042\374", "2017-01-29T12:34:__"), | |
T("\176\020\343\042\375\076\160", "(null)"), // 2017-01-29T12:34:__.999000000 | |
T("\373\377\376\377\374", "____-__-__T__:__:__"), | |
T("\176\020\343\042\342", "(null)"), | |
T("\176\020\343\042\342\000", "2017-01-29T12:34:56-16:00"), | |
T("\176\020\343\042\342\001", "2017-01-29T12:34:56-15:45"), | |
T("\176\020\343\042\342\100", "2017-01-29T12:34:56+00:00"), | |
T("\176\020\343\042\342\144", "2017-01-29T12:34:56+09:00"), | |
T("\176\020\343\042\342\175", "2017-01-29T12:34:56+15:15"), | |
T("\176\020\343\042\342\176", "2017-01-29T12:34:56+__:__"), | |
T("\176\020\343\042\342\177", "2017-01-29T12:34:56"), // no offset available | |
T("\176\020\343\042\343\273\232\311\377\144", "2017-01-29T12:34:56.999999999+09:00"), | |
{ 0 }, // sentinel | |
}; | |
int failed = 0; | |
for (const struct test *it = TESTS; it->line > 0; ++it) { | |
char actual[256] = {0}; | |
struct temporenc dt; | |
size_t read = temporenc_read(&dt, it->buf, it->len); | |
temporenc_format(actual, sizeof(actual), read > 0 && read == it->len ? &dt : NULL); | |
if (strcmp(it->expected, actual) != 0) { | |
printf("test failed on line %d: expected `%s` != actual `%s`\n", | |
(int) it->line, it->expected, actual); | |
failed = 1; | |
} | |
} | |
return failed; | |
} | |
#endif // !defined(IMPL_ONLY) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment