Created
February 24, 2024 05:24
-
-
Save atcarter714/00dd7115ac0bc55ddd47dfa7dd1bca3e to your computer and use it in GitHub Desktop.
Extreme high-performance digit/number parsing of UTF-8 text data in C# ...
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
using System; | |
using System.Linq; | |
using System.Text; | |
using System.Threading.Tasks; | |
using System.Collections.Generic; | |
using System.Runtime.CompilerServices; | |
namespace Arkaen.LowLevel { | |
/// <summary>High-performance conversion of digits in (UTF-8) text form into numeric primitive values.</summary> | |
/// <remarks><b>WARNING:</b> | |
/// <para>Does not perform error-checking or throw exceptions. Make sure inputs are santizied/valid.</para> | |
/// <para>Designed to operate on <b>UTF-8</b> bytes ...</para> | |
/// <para>The idea behind the implementation is to provide:</para> | |
/// <list type="bullet"> | |
/// <item>Self-contained logic without call overhead</item> | |
/// <item>Skip .NET bounds/overflow/safety checks</item> | |
/// <item>Allow compiler to inline all the code</item> | |
/// <item>Extreme high-performance parsing</item> | |
/// <item>Work in Unity with <i>Burst</i></item> | |
/// </list> | |
/// </remarks> | |
public static class FastParser { | |
// ------------------------------------------------------------------- | |
//! UTF8 const values of characters: | |
const byte ZERO = 0x30, NINE = 0x39, | |
CR = 0x0D, DOT = 0x2E, | |
MINUS = 0x2D, PLUS = 0x2B, | |
NL = 0x0A ; | |
// ------------------------------------------------------------------- | |
/// <summary>Finds total number of digits in a <see cref="uint"/> value</summary> | |
/// <param name="value">An unsigned (32-bit) integer value</param> | |
/// <returns>The numeric value's digit count.</returns> | |
[MethodImpl(0x100 | 0x200)] | |
public static uint NumberOfDigits( uint value ) => | |
(uint)MathF.Floor(MathF.Log10(value) + 1 ) ; | |
/// <summary>Finds total number of digits in a <see cref="ulong"/> value</summary> | |
/// <param name="value">An unsigned (64-bit) integer value</param> | |
/// <returns>The numeric value's digit count.</returns> | |
[MethodImpl(0x100 | 0x200)] | |
public static ulong NumberOfDigits( ulong value ) => | |
(ulong)Math.Floor(Math.Log10(value) + 1 ) ; | |
// ------------------------------------------------------------------- | |
/// <summary>Parses a <see cref="long"/> value from UTF-8 character bytes.</summary> | |
/// <param name="str">Pointer to the UTF-8 string data</param> | |
/// <param name="length">The length of the UTF-8 string data.</param> | |
/// <param name="pCharCount">A pointer to an integer (optional) to receive the number of digits processed.</param> | |
/// <returns>A <see cref="long"/> value parsed from UTF-8 text (<see cref="string"/>) data.</returns> | |
[MethodImpl(0x100 | 0x200)] | |
public static unsafe long FastParse64( byte* str, long length, | |
long* pCharCount = null ) { | |
long result = 0 ; | |
int nChars = 0, sign = 1; | |
byte* end = str + length ; | |
int z = 0 ; | |
switch ( *str ) { | |
case MINUS: | |
sign = -1 ; | |
++nChars ; | |
++str ; // negative | |
break ; | |
case PLUS: | |
++nChars; | |
++str; // positive | |
break ; | |
} | |
while ( *str is ZERO ) { ++str; ++z; } | |
nChars += z ; | |
byte digitByte ; | |
while ( str < end ) { | |
digitByte = *str ; | |
if ( digitByte is < ZERO or > NINE ) break ; | |
long digitValue = digitByte - ZERO ; | |
result = result * 10 + digitValue ; | |
++str ; | |
++nChars ; | |
} | |
if ( pCharCount is not null ) *pCharCount = nChars ; | |
return result * sign ; | |
} | |
/// <summary>Parses a <see cref="int"/> value from UTF-8 character bytes.</summary> | |
/// <param name="str">Pointer to the UTF-8 string data</param> | |
/// <param name="length">The length of the UTF-8 string data.</param> | |
/// <param name="pCharCount">A pointer to an integer (optional) to receive the number of digits processed.</param> | |
/// <returns>A <see cref="int"/> value parsed from UTF-8 text (<see cref="string"/>) data.</returns> | |
[MethodImpl(0x100 | 0x200)] | |
public static unsafe int FastParse( byte* str, int length, | |
int* pCharCount = null) { | |
int nChars = 0, sign = 1, result = 0 ; | |
byte* end = str + length ; | |
switch ( *str ) { | |
case MINUS: | |
sign = -1 ; | |
++nChars ; | |
++str ; // negative | |
break ; | |
case PLUS: | |
++nChars; | |
++str; // positive | |
break ; | |
} | |
int z = 0; // skip leading zeros: | |
while ( *str is ZERO ) { | |
++str ; | |
++z ; | |
} | |
nChars += z ; | |
byte digitByte ; //! keep on stack | |
while ( str < end ) { | |
digitByte = *str; | |
if ( digitByte is < ZERO or > NINE ) break ; | |
int digitValue = digitByte - ZERO ; | |
result = result * 10 + digitValue ; | |
++str ; | |
++nChars ; // next byte | |
} | |
if ( pCharCount is not null ) *pCharCount = nChars ; | |
return result * sign ; | |
} | |
/// <summary>Parses a <see cref="uint"/> value from UTF-8 character bytes.</summary> | |
/// <param name="str">Pointer to the UTF-8 string data</param> | |
/// <param name="length">The length of the UTF-8 string data.</param> | |
/// <param name="pCharCount">A pointer to an integer (optional) to receive the number of digits processed.</param> | |
/// <returns>A <see cref="uint"/> value parsed from UTF-8 text (<see cref="string"/>) data.</returns> | |
[MethodImpl(0x100 | 0x200)] | |
public static unsafe uint FastParseUnsigned( byte* str, uint length, | |
uint* pCharCount = null ) { | |
uint z = 0, nChars = 0, sign = 1, result = 0 ; | |
byte* end = str + length ; | |
while ( *str is ZERO ) { ++str ; ++z ; } // skip leading zeros: | |
nChars += z ; | |
byte digitByte ; | |
while ( str < end ) { | |
digitByte = *str ; | |
if ( digitByte is < ZERO or > NINE ) break ; | |
int digitValue = digitByte - ZERO ; | |
result = result * 10U + (uint)digitValue ; | |
++str ; | |
++nChars ; | |
} | |
if ( pCharCount is not null ) *pCharCount = nChars ; | |
return result * sign ; | |
} | |
/// <summary>Parses a <see cref="float"/> value from UTF-8 character bytes.</summary> | |
/// <param name="str">Pointer to the UTF-8 string data</param> | |
/// <param name="length">The length of the UTF-8 string data.</param> | |
/// <returns>A <see cref="float"/> value parsed from UTF-8 text (<see cref="string"/>) data.</returns> | |
[MethodImpl( 0x100 | 0x200 )] | |
public static unsafe float FastParseF( byte* str, int length ) { | |
int z = 0, leftHand = 0, decIndex = -1 ; | |
byte* pSrc = str, end = str + length ; | |
leftHand = FastParse( pSrc, length, &decIndex ) ; | |
if( decIndex is -1 || decIndex >= length ) return (float)leftHand ; | |
while( *pSrc is ZERO ) { ++pSrc; ++z; } //! skips any leading zeros | |
int rhDigits = 0 ; | |
byte* pRight = pSrc + decIndex + 1 ; | |
int right = FastParse( pRight, (int)(end - pSrc), &rhDigits ) ; | |
int places = rhDigits + z ; | |
float f = 1 / MathF.Pow( 10, places ) ; | |
float fractional = right * f ; | |
return leftHand + fractional ; | |
} | |
/// <summary>Parses a <see cref="double"/> value from UTF-8 character bytes.</summary> | |
/// <param name="str">Pointer to the UTF-8 string data</param> | |
/// <param name="length">The length of the UTF-8 string data.</param> | |
/// <returns>A <see cref="double"/> value parsed from UTF-8 text (<see cref="string"/>) data.</returns> | |
[MethodImpl( 0x100 | 0x200 )] | |
public static unsafe double FastParseD( byte* str, int length ) { | |
int z = 0, leftHand = 0, decIndex = -1 ; | |
byte* pSrc = str, end = str + length ; | |
leftHand = FastParse( pSrc, length, &decIndex ) ; | |
if( decIndex is -1 || decIndex >= length ) return (double)leftHand ; | |
while( *pSrc is ZERO ) { ++pSrc; ++z; } //! skips any leading zeros | |
int rhDigits = 0 ; | |
byte* pRight = pSrc + decIndex + 1 ; | |
int right = FastParse( pRight, (int)(end - pSrc), &rhDigits ) ; | |
int places = rhDigits + z ; | |
double f = 1 / Math.Pow( 10, places ) ; | |
double fractional = right * f ; | |
return leftHand + fractional ; | |
} | |
// =================================================================== | |
} | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
NOTE: Designed to use when your data is already in UTF-8 ... don't convert
char
data just to use this unless you want to convert it in advanced and save to file or when retrieving web/cloud data in UTF-8 form. I will update this ASAP however with achar
version and handle other encodings.