Skip to content

Instantly share code, notes, and snippets.

@apankrat
Last active October 22, 2022 23:50
Show Gist options
  • Star 4 You must be signed in to star a gist
  • Fork 1 You must be signed in to fork a gist
  • Save apankrat/20776d68d1d97bca12576a6e204b2f74 to your computer and use it in GitHub Desktop.
Save apankrat/20776d68d1d97bca12576a6e204b2f74 to your computer and use it in GitHub Desktop.
/*
* Given a log line that starts like this:
*
* 2014.01.27 19:35:51.639 (UTC-1) 3 1 ...
*
* the following takes about ~60 ms per call in a release build
* produced by a VC++ 2017 using its stock CRT (v141):
*/
sscanf(line,
"%hu.%hu.%hu %hu:%hu:%hu.%hu (UTC%d) %c %u %n",
&t.wYear, &t.wMonth, &t.wDay,
&t.wHour, &t.wMinute, &t.wSecond, &t.wMilliseconds,
&foo, &type_sym, &level, &read);
/*
* if the same is done by simply advancing a ptr through the line
* and doing the cs101-style number parsing, it runs in... 0.2 ms
*
* that's 300 times faster
*/
src.ptr = line;
src.end = line + strlen(line);
if (src.size() < 36)
return false;
if (! parse_number(src, t.wYear) ||
! consume_char(src, '.') ||
! parse_number(src, t.wMonth) ||
! consume_char(src, '.') ||
! parse_number(src, t.wDay) ||
! consume_char(src, ' ') ||
! parse_number(src, t.wHour) ||
! consume_char(src, ':') ||
! parse_number(src, t.wMinute) ||
! consume_char(src, ':') ||
! parse_number(src, t.wSecond) ||
! consume_char(src, '.') ||
! parse_number(src, t.wMilliseconds) ||
! consume_str (src, " (UTC") ||
! consume_num (src) ||
! consume_str (src, ") ") ||
! parse_char (src, type_sym) ||
! consume_char(src, ' ') ||
! parse_number(src, level) ||
! consume_char(src, ' '))
{
return false;
}
/*
* tl;dr - don't use stock MSVC sscanf() in fast paths
*/
@apankrat
Copy link
Author

apankrat commented Oct 1, 2019

inline bool parse_char(char_range & str, char & val)
{
	if (! str.size())
		return false;

	val = *str.ptr++;
	return true;
}

template <typename T>
inline bool parse_number(char_range & str, T & val)
{
	const char * was = str.ptr;

	val = 0;
	for (val = 0; str.size() && isdigit(*str.ptr); str.ptr++)
		val = 10 * val + (*str.ptr - '0');

	return (was < str.ptr);
}

inline bool consume_char(char_range & str, char ch)
{
	if (! str.size() || *str.ptr != ch)
		return false;

	str.ptr++;
	return true;
}

inline bool consume_str(char_range & src, const char * str)
{
	for ( ; *str; str++, src.ptr++)
		if (! src.size() || *src.ptr != *str)
			return false;

	return true;
}

inline bool consume_num(char_range & src)
{
	const char * was;
	
	if (src.empty())
		return false;

	if (*src.ptr == '+' || *src.ptr == '-')
		src.ptr++;

	was = src.ptr;
	while (src.size() && isdigit(*src.ptr))
		src.ptr++;

	return (was < src.ptr);
}

@apankrat
Copy link
Author

apankrat commented Oct 3, 2019

parse_number() obviously doesn't check for the overflows, but that's trivial to add and with very little penalty.

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment