Skip to content

Instantly share code, notes, and snippets.

Embed
What would you like to do?
/*
* Given a log line that starts like this:
*
* 2014.01.27 19:35:51.639 (UTC-1) 3 1 ...
*
* the following takes about ~60 ms per call in a release build
* produced by a VC++ 2017 using its stock CRT (v141):
*/
sscanf(line,
"%hu.%hu.%hu %hu:%hu:%hu.%hu (UTC%d) %c %u %n",
&t.wYear, &t.wMonth, &t.wDay,
&t.wHour, &t.wMinute, &t.wSecond, &t.wMilliseconds,
&foo, &type_sym, &level, &read);
/*
* if the same is done by simply advancing a ptr through the line
* and doing the cs101-style number parsing, it runs in... 0.2 ms
*
* that's 300 times faster
*/
src.ptr = line;
src.end = line + strlen(line);
if (src.size() < 36)
return false;
if (! parse_number(src, t.wYear) ||
! consume_char(src, '.') ||
! parse_number(src, t.wMonth) ||
! consume_char(src, '.') ||
! parse_number(src, t.wDay) ||
! consume_char(src, ' ') ||
! parse_number(src, t.wHour) ||
! consume_char(src, ':') ||
! parse_number(src, t.wMinute) ||
! consume_char(src, ':') ||
! parse_number(src, t.wSecond) ||
! consume_char(src, '.') ||
! parse_number(src, t.wMilliseconds) ||
! consume_str (src, " (UTC") ||
! consume_num (src) ||
! consume_str (src, ") ") ||
! parse_char (src, type_sym) ||
! consume_char(src, ' ') ||
! parse_number(src, level) ||
! consume_char(src, ' '))
{
return false;
}
/*
* tl;dr - don't use stock MSVC sscanf() in fast paths
*/
@apankrat

This comment has been minimized.

Copy link
Owner Author

@apankrat apankrat commented Oct 1, 2019

inline bool parse_char(char_range & str, char & val)
{
	if (! str.size())
		return false;

	val = *str.ptr++;
	return true;
}

template <typename T>
inline bool parse_number(char_range & str, T & val)
{
	const char * was = str.ptr;

	val = 0;
	for (val = 0; str.size() && isdigit(*str.ptr); str.ptr++)
		val = 10 * val + (*str.ptr - '0');

	return (was < str.ptr);
}

inline bool consume_char(char_range & str, char ch)
{
	if (! str.size() || *str.ptr != ch)
		return false;

	str.ptr++;
	return true;
}

inline bool consume_str(char_range & src, const char * str)
{
	for ( ; *str; str++, src.ptr++)
		if (! src.size() || *src.ptr != *str)
			return false;

	return true;
}

inline bool consume_num(char_range & src)
{
	const char * was;
	
	if (src.empty())
		return false;

	if (*src.ptr == '+' || *src.ptr == '-')
		src.ptr++;

	was = src.ptr;
	while (src.size() && isdigit(*src.ptr))
		src.ptr++;

	return (was < src.ptr);
}
@apankrat

This comment has been minimized.

Copy link
Owner Author

@apankrat apankrat commented Oct 3, 2019

parse_number() obviously doesn't check for the overflows, but that's trivial to add and with very little penalty.

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment