Created
October 26, 2014 08:45
-
-
Save navyxliu/cc624857eb54f13db2fb to your computer and use it in GitHub Desktop.
bug011.cc
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
//adapted from bug011 | |
#include <amp.h> | |
using namespace Concurrency; | |
int test() restrict(amp) | |
{ | |
int data0[] = {1, 2, 3, 4}; | |
extent<4> e4(data0); | |
for (int i = 0; i < 4; i++) | |
{ | |
if (e4[i] != i + 1) | |
{ | |
return 12; | |
} | |
} | |
return 0; | |
} | |
void kernel(index<1>& idx, array<int, 1>& result) restrict(amp) | |
{ | |
result[idx] = test(); | |
} | |
const int size = 4; | |
int test_device() | |
{ | |
accelerator device;// = require_device(Device::ALL_DEVICES); | |
accelerator_view av = device.get_default_view(); | |
extent<1> e(size); | |
array<int, 1> result(e, av); | |
std::vector<int> presult(size, 0); | |
parallel_for_each(e, [&](index<1> idx) restrict(amp) { | |
kernel(idx, result); | |
}); | |
presult = result; | |
for (int i = 0; i < size; i++) | |
{ | |
if (presult[i] != 0) | |
{ | |
printf("Test failed. Return code: %d\n", presult[i]); | |
return 1; | |
} | |
} | |
return 0; | |
} | |
int main(int argc, char **argv) | |
{ | |
int result = test_device(); | |
printf("Test %s on device\n", ((result == 0) ? "passed" : "failed")); | |
return result; | |
} |
vadimg: yes, that's wrong. it shouldn't be converted to address space 1 (global) pointer. emitter does that because we have no info about address spaces in WHIRL, and by default it assumes global ptr
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
here is part of llvm ir for the function test. please note %9 is using addressspace(1). it is suspicious. it's inconsistent with in initializer before. should e4 on local memory or global memory here?
define void @"_ZZ11test_devicevENK3$_0clEN11Concurrency5indexILi1EEE.amp"([8 x i8] addrspace(2)* byval %this_struct.s0, %struct.index addrspace(1)* %idx) #0 {
BB:
%_ZZ4testvE2e4.addr = alloca %struct.extent, align 8
%0 = getelementptr inbounds %struct.extent* %_ZZ4testvE2e4.addr, i32 0, i32 0, i32 0
store i32 1, i32* %0, align 8
%1 = getelementptr %struct.extent* %_ZZ4testvE2e4.addr, i32 0, i32 0, i32 1
store i32 2, i32* %1, align 4
%2 = getelementptr %struct.extent* %_ZZ4testvE2e4.addr, i32 0, i32 0, i32 2
store i32 3, i32* %2, align 8
%3 = getelementptr %struct.extent* %_ZZ4testvE2e4.addr, i32 0, i32 0, i32 3
store i32 4, i32* %3, align 4
br label %BB_label_3842
BB_label_3842: ; preds = %BB_label_4098, %BB
%preg.50.addr.0 = phi i32 [ 1, %BB ], [ %13, %BB_label_4098 ]
%preg.49.addr.0 = phi i32 [ 0, %BB ], [ %12, %BB_label_4098 ]
%4 = ptrtoint %struct.extent* %_ZZ4testvE2e4.addr to i32
%5 = zext i32 %4 to i64
%6 = zext i32 %preg.49.addr.0 to i64
%7 = shl nuw nsw i64 %6, 2
%8 = add i64 %5, %7
%9 = inttoptr i64 %8 to i32 addrspace(1)*
%10 = load i32 addrspace(1)* %9, align 4
%11 = icmp eq i32 %10, %preg.50.addr.0
br i1 %11, label %BB_label_4098, label %BB2