Skip to content

Instantly share code, notes, and snippets.

Show Gist options
  • Save jmeyers314/986ac7670b356eed32f2fecf2b55aa18 to your computer and use it in GitHub Desktop.
Save jmeyers314/986ac7670b356eed32f2fecf2b55aa18 to your computer and use it in GitHub Desktop.
OpenMP c++ GPU offload with object composition
#include <new>
#include <vector>
#include <iostream>
#pragma omp declare target
// polymorphic Abstract Base Class
class Base {
public:
virtual double doOne(double x) = 0; // do something interesting
virtual Base* getDevPtr() = 0; // get a device pointer to device shadow instance of class
};
// Some concrete derived class
class Derived : public Base {
public:
Derived(double c) : _c(c), _devPtr(nullptr) {}
virtual double doOne(double x) override {
return x + _c;
}
virtual Base* getDevPtr() override {
if (_devPtr) // don't create more than one shadow
return _devPtr;
Derived* ptr;
// create device shadow instance
#pragma omp target map(from:ptr)
{
ptr = new Derived(_c);
}
_devPtr = ptr;
return ptr;
}
private:
double _c;
Base* _devPtr; // pointer to device shadow
};
// Compositional object. Sum of arbitrary number of Base objects
class Sum : public Base {
public:
Sum(Base** summands, size_t nsummand) : _nsummand(nsummand), _devPtr(nullptr) {
// make sure to copy contents of summands, and not just the outer-layer pointer.
_summands = new Base*[_nsummand];
for (int i=0; i<_nsummand; i++) {
_summands[i] = summands[i];
}
}
virtual double doOne(double x) override {
double result = 0;
for (int i=0; i<_nsummand; i++) {
result += _summands[i]->doOne(x);
}
return result;
}
virtual Base* getDevPtr() override {
if (_devPtr)
return _devPtr;
Base** _devPtrs = new Base*[_nsummand];
for(int i=0; i<_nsummand; i++) {
_devPtrs[i] = _summands[i]->getDevPtr();
}
Sum* ptr;
#pragma omp target map(from:ptr) map(to:_devPtrs[:_nsummand])
{
ptr = new Sum(_devPtrs, _nsummand);
}
_devPtr = ptr;
return ptr;
}
private:
Base** _summands;
size_t _nsummand;
Base* _devPtr;
};
#pragma omp end declare target
int main() {
Derived d1(1); // 1 + x
Derived d2(2); // 2 + x
Derived d3(3); // 3 + x
Base* summands[3];
summands[0] = &d1;
summands[1] = &d2;
summands[2] = &d3;
Sum sum(summands, 3); // 6 + 3x
Base* devPtr = sum.getDevPtr();
std::vector<double> in(10, 0.0);
for(int i=0; i<10; i++) {
in[i] = i;
} // in = [0 1 2 3 4 5 6 7 8 9]
std::vector<double> out(10, 0.0);
double* inptr = in.data();
double* outptr = out.data();
#pragma omp target teams distribute parallel for map(inptr[:10], outptr[:10]) is_device_ptr(devPtr)
for(int i=0; i<10; i++) {
outptr[i] = devPtr->doOne(inptr[i]);
}
for(int i=0; i<10; i++) {
std::cout << out[i] << '\n';
} // out = 6 + 3x = [6 9 12 15 18 21 24 27 30 33]
}
@jmeyers314
Copy link
Author

Compiles in LLVM 11.0.0 with
clang++ -std=c++11 -fopenmp -fopenmp-targets=nvptx64-nvidia-cuda OpenMP_offload_object_composition.cpp

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment