-
-
Save mattwarren/a72cdb3ae427957af10635153d79555b to your computer and use it in GitHub Desktop.
public class Program | |
{ | |
static void Main(string[] args) | |
{ | |
// From https://github.com/dotnet/coreclr/blob/32f0f9721afb584b4a14d69135bea7ddc129f755/tests/src/JIT/jit64/opt/rngchk/SimpleArray_01.cs | |
//SimpleArray_01.Class1.RunTest(); | |
// Sanity checks, to make sure all the functions return the same value (i.e. it's a fair test!!) | |
var test = new Program(); | |
Console.WriteLine("// Sanity Checks on {0}:", arrayToTest.ToString()); // 'System.Int32[]' | |
foreach (var method in typeof(Program).GetMethods(BindingFlags.Instance | BindingFlags.Public | BindingFlags.DeclaredOnly)) | |
{ | |
if (method.GetParameters().Length != 0 || | |
method.Name.StartsWith("RangeCheck") == false || | |
method.ReturnType != typeof(int)) | |
continue; | |
try | |
{ | |
int result = (int)method.Invoke(test, null); | |
Console.WriteLine("// {0,40} = {1:N0} {2}", method.Name, result, result != 4950 ? "**" : ""); | |
} | |
catch (Exception ex) | |
{ | |
Console.WriteLine("// {0,40} - {1}", method.Name, ex.InnerException?.Message ?? ex.Message); | |
} | |
} | |
BenchmarkRunner.Run<Program>(); | |
} | |
//private static int[] arrayToTest = new int [100]; | |
private static int[] arrayToTest; | |
static Program() | |
{ | |
arrayToTest = new int[100]; | |
for (int i = 0; i < arrayToTest.Length; i++) | |
{ | |
arrayToTest[i] = i; | |
} | |
} | |
[Benchmark] | |
public int RangeCheckEliminatedOne() // standard way of looping thru an array | |
{ | |
var result = 0; | |
for (int i = 0; i < arrayToTest.Length; i++) | |
{ | |
result += arrayToTest[i]; | |
} | |
return result; | |
} | |
[Benchmark] | |
public int RangeCheckEliminatedTwo() // hoist 'array.Length' into a local variable, outside the loop | |
{ | |
var result = 0; | |
var max = arrayToTest.Length; | |
for (int i = 0; i < max; i++) | |
{ | |
result += arrayToTest[i]; | |
} | |
return result; | |
} | |
[Benchmark] | |
public int RangeCheckEliminatedThree() // walk the array backwards | |
{ | |
var result = 0; | |
for (int i = arrayToTest.Length - 1; i >= 0; i--) | |
{ | |
result += arrayToTest[i]; | |
} | |
return result; | |
} | |
[Benchmark] | |
public int RangeCheckEliminatedFour() // move through the array in steps of 2 (not 1) | |
{ | |
var result = 0; | |
for (int i = 0; i < arrayToTest.Length; i += 2) | |
{ | |
result += arrayToTest[i]; | |
result += arrayToTest[i + 1]; | |
} | |
return result; | |
} | |
[Benchmark] | |
public int RangeCheckEliminatedFive() | |
{ | |
var result = 0; | |
// Rather than i = 0, 1, ..., we do i = 2, 3, ... and then use 'i - 2' below!! | |
for (int i = 10; i < arrayToTest.Length + 10; i++) | |
{ | |
result += arrayToTest[i - 10]; | |
} | |
return result; | |
} | |
[Benchmark] | |
public int RangeCheckEliminatedSix() | |
{ | |
var result = 0; | |
for (int i = 0; i < arrayToTest.Length; i++) | |
{ | |
// Do a manual check, to ensure we don't access out-of-bounds | |
if (i < arrayToTest.Length) | |
result += arrayToTest[i]; | |
else | |
result += i; | |
} | |
return result; | |
} | |
/// #################################################### | |
/// Examples where the range check is NOT eliminated!!! | |
/// #################################################### | |
[Benchmark] | |
public int RangeCheckNotEliminatedOne() // defeat the JIT!! fetch 'array.Length' via a non-inlined method call!! | |
{ | |
var result = 0; | |
for (int i = 0; i < GetLengthDefeatJit(); i++) | |
{ | |
result += arrayToTest[i]; // we never actually cause an System.IndexOutOfRangeException | |
} | |
return result; | |
} | |
[Benchmark] | |
public int RangeCheckNotEliminatedOneTryCatch() // defeat the JIT!! fetch 'array.Length' via a non-inlined method call!! | |
{ | |
var result = 0; | |
for (int i = 0; i < GetLengthDefeatJit(); i++) | |
{ | |
// we never actually cause an System.IndexOutOfRangeException | |
// just want to measure the overhead of having a try/catch block in place | |
try | |
{ | |
result += arrayToTest[i]; | |
} catch (IndexOutOfRangeException ioEx) { | |
result += i; | |
} | |
} | |
return result; | |
} | |
[Benchmark] | |
public int RangeCheckNotEliminatedTwo() // Access beyond the allowed values | |
{ | |
var result = 0; | |
for (int i = 0; i < arrayToTest.Length; i++) | |
{ | |
//result += arrayToTest[i + 1]; // Use i + 1 here, to go out-of-bounds | |
try { | |
result += arrayToTest[i + 1]; // Use i + 1 here, to go out-of-bounds | |
} catch (IndexOutOfRangeException ioEx) { | |
result += 1; | |
} | |
} | |
return result; | |
} | |
[MethodImpl(MethodImplOptions.NoInlining)] | |
private int GetLengthDefeatJit() | |
{ | |
return arrayToTest.Length; | |
} | |
} |
Instructions
Try and find different/unique scenarios where the JIT does not eliminate Array 'bounds checks', but you can still successfully access all the array items in a loop, see RangeCheckNotEliminatedOne
for an example.
(I already have 6 scenarios where 'bounds checks' are eliminated, but they're less interesting as they're easier to find ;-)
MEGA Bonus points if you can get the JIT to eliminate 'bounds checks' and then access beyond the end of the array, if you do this file a bug on the CoreCLR repo!!
Rules
I've just been looking at the timings to verify if the range checks are eliminated or not, a better/proper check would be to look at the assembly emitted.
No usage of unsafe
, or fixed
is allowed, that's cheating
Are you 100% sure about the walk backwards? I see this in the assembly
00007FFE3BAC34B9 xor eax,eax
00007FFE3BAC34BB mov rdx,1B3900027D0h
00007FFE3BAC34C5 mov rdx,qword ptr [rdx]
00007FFE3BAC34C8 mov edx,dword ptr [rdx+8]
00007FFE3BAC34CB dec edx
00007FFE3BAC34CD test edx,edx
00007FFE3BAC34CF jl 00007FFE3BAC34F1
00007FFE3BAC34D1 mov rcx,1B3900027D0h
00007FFE3BAC34DB mov rcx,qword ptr [rcx]
00007FFE3BAC34DE cmp edx,dword ptr [rcx+8]
00007FFE3BAC34E1 jae 00007FFE3BAC34F6
00007FFE3BAC34E3 movsxd r8,edx
00007FFE3BAC34E6 add eax,dword ptr [rcx+r8*4+10h]
00007FFE3BAC34EB dec edx
00007FFE3BAC34ED test edx,edx
00007FFE3BAC34EF jge 00007FFE3BAC34D1
00007FFE3BAC34F1 add rsp,28h
Looks like a compare in there to me...??
I am no "low level expert" but the cmp + a test in one loop looks like 2 tests, one for the for and a bounds check?
@Drawaes thanks for taking a look, I guess I need to revisit this and do a proper test where I look at the ASM, not just base my conclusions on the benchmark timings
This will do, I think:
public int RangeCheckNotEliminatedThree() {
var result = 0;
var offset = 3;
for (int i = 0; i < arrayToTest.Length; i++) {
result += arrayToTest[i + 3 - offset];
}
return result;
}
@Drawaes @matt: from the looks of it the backward walk is not optimized.
@mattwarren any reason in particular you're testing the 32bit legacy JIT?
Given that the arrayToTest
field is not readonly
I'm not sure how the JIT can eliminate any of the bounds checks...
I'm not sure any of the examples actually eliminiate bounds checking, capturing the array field into a local variable makes it almost two times faster on my machine. Unfortunately the JIT isn't smart enough to perform the same optimisation when the field is readonly.
[Benchmark]
public long RangeCheckEliminatedCapture() // Capture the reference to the array
{
var result = 0L;
var array = arrayToTest;
for (int i = 0; i < array.Length; i++)
{
result += array[i];
}
return result;
}
Two, Three and Four seem to perform better because the call to the Length property is not performed on each iteration whereas in One it cannot be hoisted from the loop (as the array field could be modified by another thread).
Method | Mean | Error | StdDev |
---|---|---|---|
RangeCheckEliminatedOne | 119.38 ns | 2.3021 ns | 2.2610 ns |
RangeCheckEliminatedReadOnly | 120.73 ns | 2.4218 ns | 2.3786 ns |
RangeCheckEliminatedCapture | 72.80 ns | 1.4379 ns | 1.3451 ns |
RangeCheckEliminatedTwo | 90.18 ns | 1.0244 ns | 0.9582 ns |
RangeCheckEliminatedThree | 91.51 ns | 1.0185 ns | 0.9527 ns |
RangeCheckEliminatedFour | 98.25 ns | 0.7102 ns | 0.6643 ns |
RangeCheckEliminatedFive | 133.07 ns | 1.7149 ns | 1.5202 ns |
RangeCheckEliminatedSix | 149.82 ns | 2.4185 ns | 2.1440 ns |
RangeCheckNotEliminatedOne | 217.60 ns | 1.0716 ns | 0.8366 ns |
RangeCheckNotEliminatedOneTryCatch | 263.01 ns | 5.1719 ns | 4.8378 ns |
RangeCheckNotEliminatedTwo | 45,102.88 ns | 410.5755 ns | 384.0525 ns |
Results - Clr 4.0.30319.42000 - 32bit LegacyJIT- v4.6.1590.0