Skip to content

Instantly share code, notes, and snippets.

@gfoidl
Created April 14, 2019 14:06
Show Gist options
  • Save gfoidl/5f6453e9b038ee7cf82940a0182043ef to your computer and use it in GitHub Desktop.
Save gfoidl/5f6453e9b038ee7cf82940a0182043ef to your computer and use it in GitHub Desktop.
Byte2Hex conversions
using System;
using System.Runtime.CompilerServices;
using System.Runtime.InteropServices;
using BenchmarkDotNet.Attributes;
using BenchmarkDotNet.Running;
// reference for branchless: http://0x80.pl/articles/convert-to-hex.html
namespace ConsoleApp2
{
class Program
{
static void Main(string[] args)
{
byte i = 46;
string s1 = i.ToString("x");
string s2 = string.Create(2, i, (span, state) => Bench.ToHex(span, state));
string s3 = string.Create(2, i, (span, state) => Bench.ToHexUnsafe(span, state));
string s4 = string.Create(2, i, (span, state) => Bench.ToHexWOMap(span, state));
string s5 = string.Create(2, i, (span, state) => Bench.ToHexWOMap1(span, state));
string s6 = string.Create(2, i, (span, state) => Bench.ToHexBranchless(span, state));
string s7 = string.Create(2, i, (span, state) => Bench.ToHexBranchless1(span, state));
string s8 = string.Create(2, i, (span, state) => Bench.ToHexBranchless2(span, state));
string s9 = string.Create(2, i, (span, state) => Bench.ToHexBranchless3(span, state));
string sA = string.Create(2, i, (span, state) => Bench.ToHexBranchless4(span, state));
#if DEV
Console.WriteLine($"s1: {s1}");
Console.WriteLine($"s2: {s2}");
Console.WriteLine($"s3: {s3}");
Console.WriteLine($"s4: {s4}");
Console.WriteLine($"s5: {s5}");
Console.WriteLine($"s6: {s6}");
Console.WriteLine($"s7: {s7}");
Console.WriteLine($"s8: {s8}");
Console.WriteLine($"s9: {s9}");
Console.WriteLine($"sA: {sA}");
#if !DEBUG
BenchmarkRunner.Run<Bench>();
#endif
#endif
}
}
public class Bench
{
public byte _value;
public char[] _buffer = new char[2];
//[Benchmark(Baseline = true)]
public void ToHex() => ToHex(_buffer, _value);
[Benchmark(Baseline = true)]
public void ToHexUnsafe() => ToHexUnsafe(_buffer, _value);
[Benchmark]
public void ToHexWOMap() => ToHexWOMap(_buffer, _value);
[Benchmark]
public void ToHexWOMap1() => ToHexWOMap1(_buffer, _value);
[Benchmark]
public void ToHexBranchless() => ToHexBranchless(_buffer, _value);
[Benchmark]
public void ToHexBranchless1() => ToHexBranchless1(_buffer, _value);
[Benchmark]
public void ToHexBranchless2() => ToHexBranchless2(_buffer, _value);
[Benchmark]
public void ToHexBranchless3() => ToHexBranchless3(_buffer, _value);
[Benchmark]
public void ToHexBranchless4() => ToHexBranchless4(_buffer, _value);
private static ReadOnlySpan<byte> s_map => new byte[]
{
(byte)'0',
(byte)'1',
(byte)'2',
(byte)'3',
(byte)'4',
(byte)'5',
(byte)'6',
(byte)'7',
(byte)'8',
(byte)'9',
(byte)'A',
(byte)'B',
(byte)'C',
(byte)'D',
(byte)'E',
(byte)'F'
};
// void* because nunit is not available
// uint is good for x86
// ulong is good for x64
// void* is a less readable workaround
internal static unsafe void ToHex(Span<char> buffer, byte value)
{
uint val = value;
void* hi = (void*)(val >> 4);
void* lo = (void*)(val & 0xF);
ref byte m = ref MemoryMarshal.GetReference(s_map);
byte hiChar = Unsafe.Add(ref m, (IntPtr)hi);
byte loChar = Unsafe.Add(ref m, (IntPtr)lo);
buffer[1] = (char)loChar;
buffer[0] = (char)hiChar;
}
internal static unsafe void ToHexUnsafe(Span<char> buffer, byte value)
{
uint val = value;
void* hi = (void*)(val >> 4);
void* lo = (void*)(val & 0xF);
ref byte m = ref MemoryMarshal.GetReference(s_map);
byte hiChar = Unsafe.Add(ref m, (IntPtr)hi);
byte loChar = Unsafe.Add(ref m, (IntPtr)lo);
// Clearly faster than the above, as it safes the bounds check (fetch length, cmp, jump)
ref char b = ref MemoryMarshal.GetReference(buffer);
Unsafe.Add(ref b, 1) = (char)loChar;
Unsafe.Add(ref b, 0) = (char)hiChar;
}
internal static unsafe void ToHexWOMap(Span<char> buffer, byte value)
{
uint val = value;
uint hi = val >> 4;
uint lo = val & 0xF;
uint hiChar = hi < 10 ? hi + '0' : hi - 10 + 'A';
uint loChar = lo < 10 ? lo + '0' : lo - 10 + 'A';
ref char b = ref MemoryMarshal.GetReference(buffer);
Unsafe.Add(ref b, 1) = (char)loChar;
Unsafe.Add(ref b, 0) = (char)hiChar;
}
internal static unsafe void ToHexWOMap1(Span<char> buffer, byte value)
{
#if PTR // is worse, because add cmp is with 64 bits instead of 32 bits
uint val = value;
byte* hi = (byte*)(val >> 4);
byte* lo = (byte*)(val & 0xF);
byte* hiChar = hi < (byte*)10 ? hi + '0' : hi - 10 + 'A';
byte* loChar = lo < (byte*)10 ? lo + '0' : lo - 10 + 'A';
ref char b = ref MemoryMarshal.GetReference(buffer);
Unsafe.Add(ref b, 1) = (char)(uint)loChar;
Unsafe.Add(ref b, 0) = (char)(uint)hiChar;
#else
uint val = value;
uint hi = val >> 4;
uint lo = val & 0xF;
// is worse than above, as here only the summand is taken, then in an extra step add is performed
// above add is done in the branch directly, thus safing instructions and a dependency
uint hiChar = hi + (hi < 10 ? '0' : (uint)('A' - 10));
uint loChar = lo + (lo < 10 ? '0' : (uint)('A' - 10));
ref char b = ref MemoryMarshal.GetReference(buffer);
Unsafe.Add(ref b, 1) = (char)loChar;
Unsafe.Add(ref b, 0) = (char)hiChar;
#endif
}
internal static unsafe void ToHexBranchless(Span<char> buffer, byte value)
{
uint val = value;
uint hi = val >> 4;
uint lo = val & 0xF;
// Introduces high dependency chain, thus it's slower
uint hiChar = NibbleToHex(hi);
uint loChar = NibbleToHex(lo);
ref char b = ref MemoryMarshal.GetReference(buffer);
Unsafe.Add(ref b, 1) = (char)loChar;
Unsafe.Add(ref b, 0) = (char)hiChar;
}
[MethodImpl(MethodImplOptions.AggressiveInlining)]
private static uint NibbleToHex(uint nibble)
{
const uint corr = 'A' - '0' - 10;
uint c = nibble + '0';
uint tmp = 128 - 10 + nibble;
uint msb = tmp & 0x80; // highest bit set?
uint mask = msb - (msb >> 7); // 0x7F or 0x00
return c + (mask & corr);
}
internal static unsafe void ToHexBranchless1(Span<char> buffer, byte value)
{
const uint corr = 'A' - '0' - 10;
uint val = value;
uint hi = val >> 4;
uint lo = val & 0xF;
// less dependencies as above, still not good
uint cHi = hi + '0';
uint cLo = lo + '0';
uint tmpHi = 128 - 10 + hi;
uint tmpLo = 128 - 10 + lo;
uint msbHi = tmpHi & 0x80;
uint msbLo = tmpLo & 0x80;
uint maskHi = msbHi - (msbHi >> 7);
uint maskLo = msbLo - (msbLo >> 7);
uint hiChar = cHi + (maskHi & corr);
uint loChar = cLo + (maskLo & corr);
ref char b = ref MemoryMarshal.GetReference(buffer);
Unsafe.Add(ref b, 1) = (char)loChar;
Unsafe.Add(ref b, 0) = (char)hiChar;
}
internal static unsafe void ToHexBranchless2(Span<char> buffer, byte value)
{
uint val = value;
//uint nibbles = (val & 0xF) | ((val >> 4) << 8);
uint nibbles = (val & 0xF) | ((val & 0xF0) << 4);
const uint corr = ('A' - '0' - 10) * 0x0101U;
uint c = nibbles + '0' * 0x0101U;
uint tmp = (128 - 10) * 0x0101U + nibbles;
uint msb = tmp & (0x80 * 0x0101U); // 0x80 * 0x0101U = 0x8080
uint mask = msb - (msb >> 7);
uint hex = c + (mask & corr);
ref char b = ref MemoryMarshal.GetReference(buffer);
Unsafe.Add(ref b, 1) = (char)(hex & 0xFF);
Unsafe.Add(ref b, 0) = (char)(hex >> 8);
}
internal static unsafe void ToHexBranchless3(Span<char> buffer, byte value)
{
uint val = value;
uint nibbles = ((val & 0xF) << 8) | (val >> 4);
const uint corr = ('A' - '0' - 10) * 0x0101U;
uint c = nibbles + '0' * 0x0101U;
uint tmp = (128 - 10) * 0x0101U + nibbles;
uint msb = tmp & (0x80 * 0x0101U); // 0x80 * 0x0101U = 0x8080
uint mask = msb - (msb >> 7);
uint hex = c + (mask & corr);
ref char b = ref MemoryMarshal.GetReference(buffer);
// Similar to ToHexBranchless2, expect written with one mem-access
if (BitConverter.IsLittleEndian)
{
hex = (hex & 0xFF) | ((hex & 0xFF00) << 8);
}
else
{
hex = ((hex & 0xFF00) >> 8) | ((hex & 0xFF) << 16);
}
Unsafe.As<char, uint>(ref b) = hex;
}
internal static unsafe void ToHexBranchless4(Span<char> buffer, byte value)
{
uint val = value;
uint nibbles = ((val & 0xF) << 8) | (val >> 4);
const uint corr = ('A' - '0' - 10) * 0x0101U;
uint c = nibbles + '0' * 0x0101U;
uint tmp = (128 - 10) * 0x0101U + nibbles;
uint msb = tmp & (0x80 * 0x0101U); // 0x80 * 0x0101U = 0x8080
uint mask = msb - (msb >> 7);
uint hex = c + (mask & corr);
ref char b = ref MemoryMarshal.GetReference(buffer);
const uint shift = (0x1 << 8) | 0x1;
hex *= shift;
Unsafe.As<char, uint>(ref b) = hex & 0xFF_00_FF;
}
}
}
@gfoidl
Copy link
Author

gfoidl commented Apr 14, 2019

BenchmarkDotNet=v0.11.5, OS=ubuntu 16.04
Intel Xeon CPU 2.00GHz, 1 CPU, 2 logical cores and 1 physical core
.NET Core SDK=3.0.100-preview3-010431
  [Host]     : .NET Core 3.0.0-preview3-27503-5 (CoreCLR 4.6.27422.72, CoreFX 4.7.19.12807), 64bit RyuJIT
  DefaultJob : .NET Core 3.0.0-preview3-27503-5 (CoreCLR 4.6.27422.72, CoreFX 4.7.19.12807), 64bit RyuJIT

Method Mean Error StdDev Ratio
ToHexUnsafe 7.929 ns 0.0277 ns 0.0259 ns 1.00
ToHexWOMap 8.384 ns 0.0413 ns 0.0345 ns 1.06
ToHexWOMap1 8.597 ns 0.0386 ns 0.0361 ns 1.08
ToHexBranchless 8.946 ns 0.0196 ns 0.0174 ns 1.13
ToHexBranchless1 9.317 ns 0.1150 ns 0.1076 ns 1.18
ToHexBranchless2 8.691 ns 0.0193 ns 0.0180 ns 1.10
ToHexBranchless3 9.037 ns 0.0214 ns 0.0190 ns 1.14
ToHexBranchless4 8.614 ns 0.0186 ns 0.0155 ns 1.09

@gfoidl
Copy link
Author

gfoidl commented Nov 27, 2019

Another intersting bit-hack is

// http://0x80.pl/notesen/2010-06-09-brancheless-hex-print.html
[MethodImpl(MethodImplOptions.NoInlining)]
public static char Hex2Ascii(int hex)
{
    const int k = 32;
    const int n = 9;
    const int m = unchecked((1 << (k - 1)) - 1 - n);    // 0x7FFFFFF6

    hex    &= 0xF;
    int tmp = m + hex;
    tmp   >>= 31;
    tmp    &= 7;         // A-F
    //tmp  &= 39;        // a-f
    hex    += tmp + '0';

    return (char)hex;
}

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment