Last active
March 31, 2018 10:02
-
-
Save gfoidl/9138af7412b0adcab134a7cbc850d589 to your computer and use it in GitHub Desktop.
Pointer alignment
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
using System; | |
using System.Numerics; | |
using System.Runtime.CompilerServices; | |
using System.Runtime.InteropServices; | |
namespace ConsoleApp1 | |
{ | |
class Program | |
{ | |
static unsafe void Main(string[] args) | |
{ | |
double[] arr = new double[1_000]; // is aligned (from GC / runtime) | |
Span<double> span = arr.AsSpan(1); // is not aligned by slicing, so next boundary will be at +3 = 4 again | |
fixed (double* pArr = &MemoryMarshal.GetReference(span)) | |
{ | |
double* ptr = pArr; | |
int elementsToAlign = PointerHelper.GetElementsToAlign<double>(ptr); | |
#if DEBUG | |
Console.WriteLine(elementsToAlign); | |
#endif | |
while (elementsToAlign > 0) | |
{ | |
elementsToAlign--; | |
ptr++; | |
} | |
elementsToAlign = PointerHelper.GetElementsToAlign<double>(ptr); | |
#if DEBUG | |
Console.WriteLine(elementsToAlign); | |
#endif | |
} | |
} | |
} | |
//------------------------------------------------------------------------- | |
internal static unsafe class PointerHelper | |
{ | |
[MethodImpl(MethodImplOptions.AggressiveInlining)] | |
public static int GetElementsToAlign<T>(void* ptr) where T : struct | |
{ | |
//const int elementsPerByte = sizeof(double) / sizeof(byte); | |
int elementsPerByte = Unsafe.SizeOf<T>() / sizeof(byte); | |
int sizeOfVector = Unsafe.SizeOf<Vector<T>>(); | |
int vectorElements = Vector<T>.Count; | |
int address = (int)ptr; | |
int unalignedBytes = address & (sizeOfVector - 1); | |
int unalignedElements = unalignedBytes / elementsPerByte; | |
// (vectorElements - unalignedElements) would be OK, but only in the case | |
// unalignedElements > 0. For the 0-case the % has to be done. | |
int elementsToAlign = (vectorElements - unalignedElements) & (vectorElements - 1); | |
return elementsToAlign; | |
/* | |
* Bit hack for modulus: https://graphics.stanford.edu/~seander/bithacks.html#ModulusDivisionEasy | |
* a % b = a & (b - 1) | |
* is way faster :-) | |
* | |
* See also: | |
* https://github.com/ahsonkhan/coreclr/blob/46b075fc1877e7087da53579c13c8c9069058b42/src/mscorlib/shared/System/SpanHelpers.Char.cs#L95-L97 | |
* https://github.com/ahsonkhan/coreclr/blob/46b075fc1877e7087da53579c13c8c9069058b42/src/mscorlib/shared/System/SpanHelpers.Char.cs#L131 | |
*/ | |
} | |
} | |
} |
JIT will produce
; Assembly listing for method Program:Do(long):int
; Emitting BLENDED_CODE for X64 CPU with AVX
; optimized code
; rbp based frame
; fully interruptible
; Final local variable assignments
;
; V00 arg0 [V00,T01] ( 6, 12 ) long -> rdi
; V01 loc0 [V01,T00] ( 7, 16 ) int -> rax
;* V02 tmp0 [V02 ] ( 0, 0 ) int -> zero-ref
;* V03 tmp1 [V03 ] ( 0, 0 ) int -> zero-ref
;* V04 tmp2 [V04 ] ( 0, 0 ) int -> zero-ref
; V05 tmp3 [V05,T04] ( 2, 2 ) int -> rax
;* V06 tmp4 [V06 ] ( 0, 0 ) int -> zero-ref
;* V07 tmp5 [V07 ] ( 0, 0 ) int -> zero-ref
;* V08 tmp6 [V08 ] ( 0, 0 ) int -> zero-ref
; V09 tmp7 [V09,T05] ( 2, 2 ) int -> rax
;# V10 OutArgs [V10 ] ( 1, 1 ) lclBlk ( 0) [rsp+0x00]
; V11 rat0 [V11,T02] ( 3, 6 ) int -> rax
; V12 rat1 [V12,T03] ( 3, 6 ) int -> rax
;
; Lcl frame size = 0
G_M56189_IG01:
55 push rbp
488BEC mov rbp, rsp
G_M56189_IG02:
8BC7 mov eax, edi
83E01F and eax, 31
8BF0 mov esi, eax
C1FE1F sar esi, 31
83E607 and esi, 7
03F0 add esi, eax
8BC6 mov eax, esi
C1F803 sar eax, 3
F7D8 neg eax
83C004 add eax, 4
83E003 and eax, 3
85C0 test eax, eax
7E0A jle SHORT G_M56189_IG04
G_M56189_IG03:
FFC8 dec eax
4883C708 add rdi, 8
85C0 test eax, eax
7FF6 jg SHORT G_M56189_IG03
G_M56189_IG04:
8BC7 mov eax, edi
83E01F and eax, 31
8BF8 mov edi, eax
C1FF1F sar edi, 31
83E707 and edi, 7
03F8 add edi, eax
8BC7 mov eax, edi
C1F803 sar eax, 3
F7D8 neg eax
83C004 add eax, 4
83E003 and eax, 3
G_M56189_IG05:
5D pop rbp
C3 ret
; Total bytes of code 76, prolog size 4 for method Program:Do(long):int
; ============================================================
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
The JIT will treat this as constant, but it can also be written as
because it is always the size of the SIMD-register.