Last active
June 6, 2016 02:42
-
-
Save LWisteria/76915d7e7cf2a852b95f98f2e2be4d09 to your computer and use it in GitHub Desktop.
C++AMP vector_add
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
// export PATH=${PATH}:/opr/rocm/bin | |
// hcc `hcc-config --cxxflags` -g -O3 -o obj/main.o -c main.cpp | |
// hcc -o vector_add obj/main.o `hcc-config --ldflags` | |
#include <iostream> | |
#include <algorithm> | |
#include <numeric> | |
#include <array> | |
#include <amp.h> | |
using Device = concurrency::accelerator; | |
void SetDefaultDevice() | |
{ | |
const auto devices = concurrency::accelerator::get_all(); | |
const auto device = std::find_if(devices.crbegin(), devices.crend(), [](const Device& d) | |
{ | |
return !(d.get_is_emulated()); | |
}); | |
if(device == devices.crend()) | |
{ | |
throw "Useful device not found"; | |
} | |
std::wcout << device->get_description() << std::endl; | |
concurrency::accelerator::set_default(device->get_device_path()); | |
} | |
int main() | |
{ | |
constexpr unsigned int n = 10000; | |
std::vector<int> a(n); | |
std::vector<int> b(n); | |
std::vector<int> c(n); | |
std::iota(a.begin(), a.end(), 0); | |
std::iota(b.rbegin(), b.rend(), n); | |
SetDefaultDevice(); | |
concurrency::array_view<int> aa(n, a); | |
concurrency::array_view<int> bb(n, b); | |
concurrency::array_view<int> cc(n, c); | |
concurrency::parallel_for_each( | |
aa.get_extent(), | |
[aa, bb, &cc] (const concurrency::index<1> idx) restrict(amp) | |
{ | |
const int i = idx[0]; | |
cc[i] = aa[i] + bb[i]; | |
}); | |
cc.synchronize(); | |
for(int i = 0; i < n; i++) | |
{ | |
if(c[i] != a[i] + b[i]) | |
{ | |
std::cout << i << ": actual=" << c[i] << ", expected="<< a[i] + b[i] << std::endl; | |
return -1; | |
} | |
} | |
std::cout << "OK" << std::endl; | |
return 0; | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
At line #48, aa, bb, cc should all be captured by value. cc is currently captured by reference, it would cause trouble on Linux with dGPU.