Skip to content

Instantly share code, notes, and snippets.

@jefgen
Created March 2, 2021 06:10
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save jefgen/814cb7adeb45e3923308a159f50b7b02 to your computer and use it in GitHub Desktop.
Save jefgen/814cb7adeb45e3923308a159f50b7b02 to your computer and use it in GitHub Desktop.
ICU Parsing/Formatting ISO 8601 date times
#include <windows.h>
#include <icu.h>
#include <memory>
#include <functional>
// A unique pointer with a custom deleter method for wrapping the opaque ICU objects.
template<typename T>
using deleted_unique_ptr = std::unique_ptr<T, std::function<void(T*)>>;
int wmain()
{
wprintf(L"Hello World!\n");
UErrorCode status = U_ZERO_ERROR;
/*
From: https://www.w3.org/TR/NOTE-datetime
Complete date plus hours, minutes and seconds:
YYYY-MM-DDThh:mm:ssTZD (eg 1997-07-16T19:20:30+01:00)
Complete date plus hours, minutes, seconds and a decimal fraction of a second
YYYY-MM-DDThh:mm:ss.sTZD (eg 1997-07-16T19:20:30.45+01:00)
*/
// In order to handle both styles, we create two parsers and fallback to the second one if the first one fails.
// Note: Alternatively we could also check the string length of the input date/time as well.
UChar patternWithMilliseconds[] = u"yyyy-MM-dd'T'HH:mm:ss.SSSZ";
UChar pattern[] = u"yyyy-MM-dd'T'HH:mm:ssZ";
deleted_unique_ptr<UDateFormat> parser1(
udat_open(UDAT_PATTERN, UDAT_PATTERN, "", u"UTC", -1, patternWithMilliseconds, -1, &status),
[](auto* p) { udat_close(p); }
);
if (U_FAILURE(status)) {
wprintf(L"Failed to create parser1.\n");
return -1;
}
deleted_unique_ptr<UDateFormat> parser2(
udat_open(UDAT_PATTERN, UDAT_PATTERN, "", u"UTC", -1, pattern, -1, &status),
[](auto* p) { udat_close(p); }
);
if (U_FAILURE(status)) {
wprintf(L"Failed to create parser2.\n");
return -1;
}
// array of ICU parsers
auto parsers = {
parser1.get(),
parser2.get()
};
// dates to parse
auto dates = {
u"1994-11-05T13:15:30Z",
u"2016-03-31T15:04:32.049Z",
u"2017-10-30T15:05:33Z",
u"1985-04-12T23:20:30.1234567-03",
u"2012-01-01T12:34:56+09:00"
};
// for testing we'll output the date/time again.
deleted_unique_ptr<UDateFormat> formatter(
udat_open(UDAT_PATTERN, UDAT_PATTERN, "", u"UTC", -1, patternWithMilliseconds, -1, &status),
[](auto* p) { udat_close(p); }
);
if (U_FAILURE(status)) {
wprintf(L"Failed to create formatter.\n");
return -1;
}
for (const auto& date : dates) {
// Note: ICU's UChar is char16_t which is the same as wchar_t on Windows.
wprintf(L"\nparsing date: %s\n", reinterpret_cast<const wchar_t*>(date));
UDate parsedDate = {};
for (const auto& parser : parsers) {
status = U_ZERO_ERROR;
parsedDate = udat_parse(parser, date, -1, 0, &status);
if (U_FAILURE(status)) {
wprintf(L" Note: falling back to second parser.\n");
} else {
break;
}
}
if (U_FAILURE(status)) {
wprintf(L"ERROR: Could not parse the date! -- Error: %hs \n", u_errorName(status));
continue;
}
UChar buffer[200] = {};
int32_t len = udat_format(formatter.get(), parsedDate, buffer, ARRAYSIZE(buffer), 0, &status);
if (U_FAILURE(status)) {
wprintf(L"Failed to format the date! -- Error: %hs \n", u_errorName(status));
}
if (len >= ARRAYSIZE(buffer)) {
wprintf(L"The formatted date is too big to fit!\n");
}
wprintf(L"Formatted: %s \n", reinterpret_cast<const wchar_t*>(buffer));
}
return 0;
}
/*
Output:
Hello World!
parsing date: 1994-11-05T13:15:30Z
Note: falling back to second parser.
Formatted: 1994-11-05T13:15:30.000+0000
parsing date: 2016-03-31T15:04:32.049Z
Formatted: 2016-03-31T15:04:32.049+0000
parsing date: 2017-10-30T15:05:33Z
Note: falling back to second parser.
Formatted: 2017-10-30T15:05:33.000+0000
parsing date: 1985-04-12T23:20:30.1234567-03
Formatted: 1985-04-13T02:20:30.123+0000
parsing date: 2012-01-01T12:34:56+09:00
Note: falling back to second parser.
Formatted: 2012-01-01T03:34:56.000+0000
*/
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment