Skip to content

Instantly share code, notes, and snippets.

@lemire
Created October 9, 2021 23:15
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save lemire/13746f1ea34ee28bdea6306f73598c65 to your computer and use it in GitHub Desktop.
Save lemire/13746f1ea34ee28bdea6306f73598c65 to your computer and use it in GitHub Desktop.
using System;
using System.Globalization;
using System.Runtime.Intrinsics.X86;
using System.Runtime.Intrinsics;
//using System.Linq;
using System.Runtime.CompilerServices;
// return true if 16 ASCII digits are found, caller is responsible
// to ensure that 16 bytes can be loaded.
[MethodImpl(MethodImplOptions.AggressiveInlining)]
unsafe static bool is_made_of_sixteen_digits(byte* chars) {
Vector128<sbyte> ascii0 = Vector128.Create((sbyte)47);
Vector128<sbyte> after_ascii9 = Vector128.Create((sbyte)58);
Vector128<sbyte> raw = Sse41.LoadDquVector128((sbyte*)chars);
var a = Sse2.CompareGreaterThan(raw, ascii0);
var b = Sse2.CompareLessThan(raw, after_ascii9);
var c = Sse2.Subtract(a, b);;
return (Sse41.TestZ(c,c));
}
// return 2 if 32 digits are found
// return 1 if 16 digits are found
// otherwise return 1
unsafe static int ParseNumberString(byte* p, byte* pend) {
if ((p + 16 <= pend) && is_made_of_sixteen_digits(p)) {
if((p + 32 <= pend) && is_made_of_sixteen_digits(p + 16)) {
return 2;
}
return 1;
}
return 0;
}
// return 2 if 32 digits are found
// return 1 if 16 digits are found
// otherwise return 1
unsafe static int ParseNumberStringInline(byte* p, byte* pend) {
if (p + 16 <= pend) {
Vector128<sbyte> ascii0 = Vector128.Create((sbyte)47);
Vector128<sbyte> after_ascii9 = Vector128.Create((sbyte)58);
Vector128<sbyte> raw = Sse41.LoadDquVector128((sbyte*)p);
var a = Sse2.CompareGreaterThan(raw, ascii0);
var b = Sse2.CompareLessThan(raw, after_ascii9);
var c = Sse2.Subtract(a, b);
if((p + 32 <= pend) && Sse41.TestZ(c,c)){
raw = Sse41.LoadDquVector128((sbyte*)p + 16);
a = Sse2.CompareGreaterThan(raw, ascii0);
b = Sse2.CompareLessThan(raw, after_ascii9);
c = Sse2.Subtract(a, b);
if(Sse41.TestZ(c,c)) { return 2; }
}
return 1;
}
return 0;
}
; Core CLR 5.0.921.35908 on x86
<Program>$.<Main>$(System.String[])
L0000: ret
<Program>$.<<Main>$>g__is_made_of_sixteen_digits|0_0(Byte*)
L0000: vzeroupper
L0003: vmovupd xmm0, [<Program>$.<<Main>$>g__is_made_of_sixteen_digits|0_0(Byte*)]
L000b: vmovupd xmm1, [<Program>$.<<Main>$>g__is_made_of_sixteen_digits|0_0(Byte*)]
L0013: vlddqu xmm2, [ecx]
L0017: vpcmpgtb xmm0, xmm2, xmm0
L001b: vpcmpgtb xmm1, xmm1, xmm2
L001f: vpsubb xmm0, xmm0, xmm1
L0023: vptest xmm0, xmm0
L0028: sete al
L002b: movzx eax, al
L002e: ret
<Program>$.<<Main>$>g__ParseNumberString|0_1(Byte*, Byte*)
L0000: push ebp
L0001: mov ebp, esp
L0003: vzeroupper
L0006: lea eax, [ecx+0x10]
L0009: cmp eax, edx
L000b: ja short L0071
L000d: vmovupd xmm0, [<Program>$.<<Main>$>g__ParseNumberString|0_1(Byte*, Byte*)]
L0015: vmovupd xmm1, [<Program>$.<<Main>$>g__ParseNumberString|0_1(Byte*, Byte*)]
L001d: vlddqu xmm2, [ecx]
L0021: vpcmpgtb xmm0, xmm2, xmm0
L0025: vpcmpgtb xmm1, xmm1, xmm2
L0029: vpsubb xmm0, xmm0, xmm1
L002d: vptest xmm0, xmm0
L0032: jne short L0071
L0034: lea eax, [ecx+0x20]
L0037: cmp eax, edx
L0039: ja short L006a
L003b: vmovupd xmm0, [<Program>$.<<Main>$>g__ParseNumberString|0_1(Byte*, Byte*)]
L0043: vmovupd xmm1, [<Program>$.<<Main>$>g__ParseNumberString|0_1(Byte*, Byte*)]
L004b: vlddqu xmm2, [ecx+0x10]
L0050: vpcmpgtb xmm0, xmm2, xmm0
L0054: vpcmpgtb xmm1, xmm1, xmm2
L0058: vpsubb xmm0, xmm0, xmm1
L005c: vptest xmm0, xmm0
L0061: jne short L006a
L0063: mov eax, 2
L0068: pop ebp
L0069: ret
L006a: mov eax, 1
L006f: pop ebp
L0070: ret
L0071: xor eax, eax
L0073: pop ebp
L0074: ret
<Program>$.<<Main>$>g__ParseNumberStringInline|0_2(Byte*, Byte*)
L0000: push ebp
L0001: mov ebp, esp
L0003: vzeroupper
L0006: lea eax, [ecx+0x10]
L0009: cmp eax, edx
L000b: ja short L0061
L000d: vmovupd xmm0, [<Program>$.<<Main>$>g__ParseNumberStringInline|0_2(Byte*, Byte*)]
L0015: vmovupd xmm1, [<Program>$.<<Main>$>g__ParseNumberStringInline|0_2(Byte*, Byte*)]
L001d: vlddqu xmm2, [ecx]
L0021: vpcmpgtb xmm3, xmm2, xmm0
L0025: vpcmpgtb xmm2, xmm1, xmm2
L0029: vpsubb xmm4, xmm3, xmm2
L002d: lea eax, [ecx+0x20]
L0030: cmp eax, edx
L0032: ja short L005a
L0034: vptest xmm4, xmm4
L0039: jne short L005a
L003b: vlddqu xmm2, [ecx+0x10]
L0040: vpcmpgtb xmm3, xmm2, xmm0
L0044: vpcmpgtb xmm2, xmm1, xmm2
L0048: vpsubb xmm4, xmm3, xmm2
L004c: vptest xmm4, xmm4
L0051: jne short L005a
L0053: mov eax, 2
L0058: pop ebp
L0059: ret
L005a: mov eax, 1
L005f: pop ebp
L0060: ret
L0061: xor eax, eax
L0063: pop ebp
L0064: ret
{
"version": 1,
"target": "JIT ASM",
"mode": "Release"
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment