Create a gist now

Instantly share code, notes, and snippets.

What would you like to do?
Benchmarks of Array 'Bounds Check' Elimination int the JIT
public class Program
{
static void Main(string[] args)
{
// From https://github.com/dotnet/coreclr/blob/32f0f9721afb584b4a14d69135bea7ddc129f755/tests/src/JIT/jit64/opt/rngchk/SimpleArray_01.cs
//SimpleArray_01.Class1.RunTest();
// Sanity checks, to make sure all the functions return the same value (i.e. it's a fair test!!)
var test = new Program();
Console.WriteLine("// Sanity Checks on {0}:", arrayToTest.ToString()); // 'System.Int32[]'
foreach (var method in typeof(Program).GetMethods(BindingFlags.Instance | BindingFlags.Public | BindingFlags.DeclaredOnly))
{
if (method.GetParameters().Length != 0 ||
method.Name.StartsWith("RangeCheck") == false ||
method.ReturnType != typeof(int))
continue;
try
{
int result = (int)method.Invoke(test, null);
Console.WriteLine("// {0,40} = {1:N0} {2}", method.Name, result, result != 4950 ? "**" : "");
}
catch (Exception ex)
{
Console.WriteLine("// {0,40} - {1}", method.Name, ex.InnerException?.Message ?? ex.Message);
}
}
BenchmarkRunner.Run<Program>();
}
//private static int[] arrayToTest = new int [100];
private static int[] arrayToTest;
static Program()
{
arrayToTest = new int[100];
for (int i = 0; i < arrayToTest.Length; i++)
{
arrayToTest[i] = i;
}
}
[Benchmark]
public int RangeCheckEliminatedOne() // standard way of looping thru an array
{
var result = 0;
for (int i = 0; i < arrayToTest.Length; i++)
{
result += arrayToTest[i];
}
return result;
}
[Benchmark]
public int RangeCheckEliminatedTwo() // hoist 'array.Length' into a local variable, outside the loop
{
var result = 0;
var max = arrayToTest.Length;
for (int i = 0; i < max; i++)
{
result += arrayToTest[i];
}
return result;
}
[Benchmark]
public int RangeCheckEliminatedThree() // walk the array backwards
{
var result = 0;
for (int i = arrayToTest.Length - 1; i >= 0; i--)
{
result += arrayToTest[i];
}
return result;
}
[Benchmark]
public int RangeCheckEliminatedFour() // move through the array in steps of 2 (not 1)
{
var result = 0;
for (int i = 0; i < arrayToTest.Length; i += 2)
{
result += arrayToTest[i];
result += arrayToTest[i + 1];
}
return result;
}
[Benchmark]
public int RangeCheckEliminatedFive()
{
var result = 0;
// Rather than i = 0, 1, ..., we do i = 2, 3, ... and then use 'i - 2' below!!
for (int i = 10; i < arrayToTest.Length + 10; i++)
{
result += arrayToTest[i - 10];
}
return result;
}
[Benchmark]
public int RangeCheckEliminatedSix()
{
var result = 0;
for (int i = 0; i < arrayToTest.Length; i++)
{
// Do a manual check, to ensure we don't access out-of-bounds
if (i < arrayToTest.Length)
result += arrayToTest[i];
else
result += i;
}
return result;
}
/// ####################################################
/// Examples where the range check is NOT eliminated!!!
/// ####################################################
[Benchmark]
public int RangeCheckNotEliminatedOne() // defeat the JIT!! fetch 'array.Length' via a non-inlined method call!!
{
var result = 0;
for (int i = 0; i < GetLengthDefeatJit(); i++)
{
result += arrayToTest[i]; // we never actually cause an System.IndexOutOfRangeException
}
return result;
}
[Benchmark]
public int RangeCheckNotEliminatedOneTryCatch() // defeat the JIT!! fetch 'array.Length' via a non-inlined method call!!
{
var result = 0;
for (int i = 0; i < GetLengthDefeatJit(); i++)
{
// we never actually cause an System.IndexOutOfRangeException
// just want to measure the overhead of having a try/catch block in place
try
{
result += arrayToTest[i];
} catch (IndexOutOfRangeException ioEx) {
result += i;
}
}
return result;
}
[Benchmark]
public int RangeCheckNotEliminatedTwo() // Access beyond the allowed values
{
var result = 0;
for (int i = 0; i < arrayToTest.Length; i++)
{
//result += arrayToTest[i + 1]; // Use i + 1 here, to go out-of-bounds
try {
result += arrayToTest[i + 1]; // Use i + 1 here, to go out-of-bounds
} catch (IndexOutOfRangeException ioEx) {
result += 1;
}
}
return result;
}
[MethodImpl(MethodImplOptions.NoInlining)]
private int GetLengthDefeatJit()
{
return arrayToTest.Length;
}
}
Owner

mattwarren commented Apr 26, 2017 edited

Results - Clr 4.0.30319.42000 - 32bit LegacyJIT- v4.6.1590.0

BenchmarkDotNet=v0.10.4, OS=Windows 6.1.7601
Processor=Intel Core i7-4800MQ CPU 2.70GHz (Haswell), ProcessorCount=8
Frequency=2630781 Hz, Resolution=380.1153 ns, Timer=TSC
  [Host]     : Clr 4.0.30319.42000, 32bit LegacyJIT-v4.6.1590.0
  DefaultJob : Clr 4.0.30319.42000, 32bit LegacyJIT-v4.6.1590.0

Method Mean Error StdDev
RangeCheckEliminatedOne 53.2372 ns 0.2175 ns 0.2034 ns
RangeCheckEliminatedTwo 53.2157 ns 0.1847 ns 0.1727 ns
RangeCheckEliminatedThree 49.8253 ns 0.1022 ns 0.0956 ns
RangeCheckEliminatedFour 51.0881 ns 0.1058 ns 0.0990 ns
RangeCheckEliminatedFive 56.8236 ns 0.2164 ns 0.2025 ns
RangeCheckEliminatedSix 72.7940 ns 0.2705 ns 0.2258 ns
RangeCheckNotEliminatedOne 187.2559 ns 0.7052 ns 0.6596 ns
RangeCheckNotEliminatedOneTryCatch 271.2448 ns 1.2190 ns 1.0806 ns
RangeCheckNotEliminatedTwo 17,589.8970 ns 40.0930 ns 37.5030 ns
Owner

mattwarren commented Apr 26, 2017

Instructions

Try and find different/unique scenarios where the JIT does not eliminate Array 'bounds checks', but you can still successfully access all the array items in a loop, see RangeCheckNotEliminatedOne for an example.

(I already have 6 scenarios where 'bounds checks' are eliminated, but they're less interesting as they're easier to find ;-)

MEGA Bonus points if you can get the JIT to eliminate 'bounds checks' and then access beyond the end of the array, if you do this file a bug on the CoreCLR repo!!


Rules

I've just been looking at the timings to verify if the range checks are eliminated or not, a better/proper check would be to look at the assembly emitted.

No usage of unsafe, or fixed is allowed, that's cheating

Drawaes commented Apr 26, 2017

Are you 100% sure about the walk backwards? I see this in the assembly

00007FFE3BAC34B9 xor eax,eax
00007FFE3BAC34BB mov rdx,1B3900027D0h
00007FFE3BAC34C5 mov rdx,qword ptr [rdx]
00007FFE3BAC34C8 mov edx,dword ptr [rdx+8]
00007FFE3BAC34CB dec edx
00007FFE3BAC34CD test edx,edx
00007FFE3BAC34CF jl 00007FFE3BAC34F1
00007FFE3BAC34D1 mov rcx,1B3900027D0h
00007FFE3BAC34DB mov rcx,qword ptr [rcx]
00007FFE3BAC34DE cmp edx,dword ptr [rcx+8]
00007FFE3BAC34E1 jae 00007FFE3BAC34F6
00007FFE3BAC34E3 movsxd r8,edx
00007FFE3BAC34E6 add eax,dword ptr [rcx+r8*4+10h]
00007FFE3BAC34EB dec edx
00007FFE3BAC34ED test edx,edx
00007FFE3BAC34EF jge 00007FFE3BAC34D1
00007FFE3BAC34F1 add rsp,28h

Looks like a compare in there to me...??

Drawaes commented Apr 26, 2017 edited

I am no "low level expert" but the cmp + a test in one loop looks like 2 tests, one for the for and a bounds check?

Owner

mattwarren commented Apr 29, 2017

@Drawaes thanks for taking a look, I guess I need to revisit this and do a proper test where I look at the ASM, not just base my conclusions on the benchmark timings

ionuttamas commented May 5, 2017 edited

This will do, I think:

public int RangeCheckNotEliminatedThree() {
var result = 0;
var offset = 3;
for (int i = 0; i < arrayToTest.Length; i++) {
result += arrayToTest[i + 3 - offset];
}
return result;
}

@Drawaes @matt: from the looks of it the backward walk is not optimized.

@mattwarren any reason in particular you're testing the 32bit legacy JIT?

Given that the arrayToTest field is not readonly I'm not sure how the JIT can eliminate any of the bounds checks...

I'm not sure any of the examples actually eliminiate bounds checking, capturing the array field into a local variable makes it almost two times faster on my machine. Unfortunately the JIT isn't smart enough to perform the same optimisation when the field is readonly.

        [Benchmark]
        public long RangeCheckEliminatedCapture() // Capture the reference to the array
        {
            var result = 0L;
            var array = arrayToTest;
            for (int i = 0; i < array.Length; i++)
            {
                result += array[i];
            }
            return result;
        }

Two, Three and Four seem to perform better because the call to the Length property is not performed on each iteration whereas in One it cannot be hoisted from the loop (as the array field could be modified by another thread).

Method Mean Error StdDev
RangeCheckEliminatedOne 119.38 ns 2.3021 ns 2.2610 ns
RangeCheckEliminatedReadOnly 120.73 ns 2.4218 ns 2.3786 ns
RangeCheckEliminatedCapture 72.80 ns 1.4379 ns 1.3451 ns
RangeCheckEliminatedTwo 90.18 ns 1.0244 ns 0.9582 ns
RangeCheckEliminatedThree 91.51 ns 1.0185 ns 0.9527 ns
RangeCheckEliminatedFour 98.25 ns 0.7102 ns 0.6643 ns
RangeCheckEliminatedFive 133.07 ns 1.7149 ns 1.5202 ns
RangeCheckEliminatedSix 149.82 ns 2.4185 ns 2.1440 ns
RangeCheckNotEliminatedOne 217.60 ns 1.0716 ns 0.8366 ns
RangeCheckNotEliminatedOneTryCatch 263.01 ns 5.1719 ns 4.8378 ns
RangeCheckNotEliminatedTwo 45,102.88 ns 410.5755 ns 384.0525 ns
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment