Skip to content

Instantly share code, notes, and snippets.

@atcarter714
Created February 24, 2024 05:24
Show Gist options
  • Save atcarter714/00dd7115ac0bc55ddd47dfa7dd1bca3e to your computer and use it in GitHub Desktop.
Save atcarter714/00dd7115ac0bc55ddd47dfa7dd1bca3e to your computer and use it in GitHub Desktop.
Extreme high-performance digit/number parsing of UTF-8 text data in C# ...
using System;
using System.Linq;
using System.Text;
using System.Threading.Tasks;
using System.Collections.Generic;
using System.Runtime.CompilerServices;
namespace Arkaen.LowLevel {
/// <summary>High-performance conversion of digits in (UTF-8) text form into numeric primitive values.</summary>
/// <remarks><b>WARNING:</b>
/// <para>Does not perform error-checking or throw exceptions. Make sure inputs are santizied/valid.</para>
/// <para>Designed to operate on <b>UTF-8</b> bytes ...</para>
/// <para>The idea behind the implementation is to provide:</para>
/// <list type="bullet">
/// <item>Self-contained logic without call overhead</item>
/// <item>Skip .NET bounds/overflow/safety checks</item>
/// <item>Allow compiler to inline all the code</item>
/// <item>Extreme high-performance parsing</item>
/// <item>Work in Unity with <i>Burst</i></item>
/// </list>
/// </remarks>
public static class FastParser {
// -------------------------------------------------------------------
//! UTF8 const values of characters:
const byte ZERO = 0x30, NINE = 0x39,
CR = 0x0D, DOT = 0x2E,
MINUS = 0x2D, PLUS = 0x2B,
NL = 0x0A ;
// -------------------------------------------------------------------
/// <summary>Finds total number of digits in a <see cref="uint"/> value</summary>
/// <param name="value">An unsigned (32-bit) integer value</param>
/// <returns>The numeric value's digit count.</returns>
[MethodImpl(0x100 | 0x200)]
public static uint NumberOfDigits( uint value ) =>
(uint)MathF.Floor(MathF.Log10(value) + 1 ) ;
/// <summary>Finds total number of digits in a <see cref="ulong"/> value</summary>
/// <param name="value">An unsigned (64-bit) integer value</param>
/// <returns>The numeric value's digit count.</returns>
[MethodImpl(0x100 | 0x200)]
public static ulong NumberOfDigits( ulong value ) =>
(ulong)Math.Floor(Math.Log10(value) + 1 ) ;
// -------------------------------------------------------------------
/// <summary>Parses a <see cref="long"/> value from UTF-8 character bytes.</summary>
/// <param name="str">Pointer to the UTF-8 string data</param>
/// <param name="length">The length of the UTF-8 string data.</param>
/// <param name="pCharCount">A pointer to an integer (optional) to receive the number of digits processed.</param>
/// <returns>A <see cref="long"/> value parsed from UTF-8 text (<see cref="string"/>) data.</returns>
[MethodImpl(0x100 | 0x200)]
public static unsafe long FastParse64( byte* str, long length,
long* pCharCount = null ) {
long result = 0 ;
int nChars = 0, sign = 1;
byte* end = str + length ;
int z = 0 ;
switch ( *str ) {
case MINUS:
sign = -1 ;
++nChars ;
++str ; // negative
break ;
case PLUS:
++nChars;
++str; // positive
break ;
}
while ( *str is ZERO ) { ++str; ++z; }
nChars += z ;
byte digitByte ;
while ( str < end ) {
digitByte = *str ;
if ( digitByte is < ZERO or > NINE ) break ;
long digitValue = digitByte - ZERO ;
result = result * 10 + digitValue ;
++str ;
++nChars ;
}
if ( pCharCount is not null ) *pCharCount = nChars ;
return result * sign ;
}
/// <summary>Parses a <see cref="int"/> value from UTF-8 character bytes.</summary>
/// <param name="str">Pointer to the UTF-8 string data</param>
/// <param name="length">The length of the UTF-8 string data.</param>
/// <param name="pCharCount">A pointer to an integer (optional) to receive the number of digits processed.</param>
/// <returns>A <see cref="int"/> value parsed from UTF-8 text (<see cref="string"/>) data.</returns>
[MethodImpl(0x100 | 0x200)]
public static unsafe int FastParse( byte* str, int length,
int* pCharCount = null) {
int nChars = 0, sign = 1, result = 0 ;
byte* end = str + length ;
switch ( *str ) {
case MINUS:
sign = -1 ;
++nChars ;
++str ; // negative
break ;
case PLUS:
++nChars;
++str; // positive
break ;
}
int z = 0; // skip leading zeros:
while ( *str is ZERO ) {
++str ;
++z ;
}
nChars += z ;
byte digitByte ; //! keep on stack
while ( str < end ) {
digitByte = *str;
if ( digitByte is < ZERO or > NINE ) break ;
int digitValue = digitByte - ZERO ;
result = result * 10 + digitValue ;
++str ;
++nChars ; // next byte
}
if ( pCharCount is not null ) *pCharCount = nChars ;
return result * sign ;
}
/// <summary>Parses a <see cref="uint"/> value from UTF-8 character bytes.</summary>
/// <param name="str">Pointer to the UTF-8 string data</param>
/// <param name="length">The length of the UTF-8 string data.</param>
/// <param name="pCharCount">A pointer to an integer (optional) to receive the number of digits processed.</param>
/// <returns>A <see cref="uint"/> value parsed from UTF-8 text (<see cref="string"/>) data.</returns>
[MethodImpl(0x100 | 0x200)]
public static unsafe uint FastParseUnsigned( byte* str, uint length,
uint* pCharCount = null ) {
uint z = 0, nChars = 0, sign = 1, result = 0 ;
byte* end = str + length ;
while ( *str is ZERO ) { ++str ; ++z ; } // skip leading zeros:
nChars += z ;
byte digitByte ;
while ( str < end ) {
digitByte = *str ;
if ( digitByte is < ZERO or > NINE ) break ;
int digitValue = digitByte - ZERO ;
result = result * 10U + (uint)digitValue ;
++str ;
++nChars ;
}
if ( pCharCount is not null ) *pCharCount = nChars ;
return result * sign ;
}
/// <summary>Parses a <see cref="float"/> value from UTF-8 character bytes.</summary>
/// <param name="str">Pointer to the UTF-8 string data</param>
/// <param name="length">The length of the UTF-8 string data.</param>
/// <returns>A <see cref="float"/> value parsed from UTF-8 text (<see cref="string"/>) data.</returns>
[MethodImpl( 0x100 | 0x200 )]
public static unsafe float FastParseF( byte* str, int length ) {
int z = 0, leftHand = 0, decIndex = -1 ;
byte* pSrc = str, end = str + length ;
leftHand = FastParse( pSrc, length, &decIndex ) ;
if( decIndex is -1 || decIndex >= length ) return (float)leftHand ;
while( *pSrc is ZERO ) { ++pSrc; ++z; } //! skips any leading zeros
int rhDigits = 0 ;
byte* pRight = pSrc + decIndex + 1 ;
int right = FastParse( pRight, (int)(end - pSrc), &rhDigits ) ;
int places = rhDigits + z ;
float f = 1 / MathF.Pow( 10, places ) ;
float fractional = right * f ;
return leftHand + fractional ;
}
/// <summary>Parses a <see cref="double"/> value from UTF-8 character bytes.</summary>
/// <param name="str">Pointer to the UTF-8 string data</param>
/// <param name="length">The length of the UTF-8 string data.</param>
/// <returns>A <see cref="double"/> value parsed from UTF-8 text (<see cref="string"/>) data.</returns>
[MethodImpl( 0x100 | 0x200 )]
public static unsafe double FastParseD( byte* str, int length ) {
int z = 0, leftHand = 0, decIndex = -1 ;
byte* pSrc = str, end = str + length ;
leftHand = FastParse( pSrc, length, &decIndex ) ;
if( decIndex is -1 || decIndex >= length ) return (double)leftHand ;
while( *pSrc is ZERO ) { ++pSrc; ++z; } //! skips any leading zeros
int rhDigits = 0 ;
byte* pRight = pSrc + decIndex + 1 ;
int right = FastParse( pRight, (int)(end - pSrc), &rhDigits ) ;
int places = rhDigits + z ;
double f = 1 / Math.Pow( 10, places ) ;
double fractional = right * f ;
return leftHand + fractional ;
}
// ===================================================================
}
}
@atcarter714
Copy link
Author

Quick first iteration upload 2/23/2024 ... requires /unsafe compiler switch. Be sure to add [BurstCompile] to class and methods for Unity + Burst support. Beware of "dirty" inputs: not designed to handle errors or support exceptions (which is literally by design) ...

@atcarter714
Copy link
Author

NOTE: Designed to use when your data is already in UTF-8 ... don't convert char data just to use this unless you want to convert it in advanced and save to file or when retrieving web/cloud data in UTF-8 form. I will update this ASAP however with a char version and handle other encodings.

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment