Skip to content

Instantly share code, notes, and snippets.

@danielwinkler
Last active August 25, 2016 07:17
Show Gist options
  • Save danielwinkler/d8c7b5113adc61fddba22e6882c3a248 to your computer and use it in GitHub Desktop.
Save danielwinkler/d8c7b5113adc61fddba22e6882c3a248 to your computer and use it in GitHub Desktop.
#include <iostream>
#include <stdio.h>
#include "string.h" // for memcpy
#ifdef _OPENACC
#include <openacc.h>
#endif
template<class type>
class OpenACCArray
{
//private:
private:
type* list;
int _size; // number of particles in list
int _capacity; // maximum number of particles
private:
inline int
pow2roundup (int x)
{
if (x <0 ) return 0;
--x;
x |= x >> 1;
x |= x >> 2;
x |= x >> 4;
x |= x >> 8;
x |= x >> 16;
return x+1;
}
public: // access on list
int size()const { return _size; }
int capacity()const { return _capacity; }
inline bool check_bounds(int i) const
{
if (i >= 0 && i < _size) return true;
return false;
}
type& back(){ return list[_size-1]; }
void pop()
{
if(_size>0) _size--;
}
void clear()
{
_size = 0;
}
void resize(int newsize)
{
if(newsize <= _size)
{
_size = newsize;
}
else
{
reserve(newsize);
_size = newsize;
}
}
void fast_push(type* element)
{
list[_size] = *element;
_size++;
}
void fast_push(const type& element)
{
list[_size] = element;
_size++;
}
void push(const type& element)
{
if(_size>=_capacity)
{
int newcapacity = pow2roundup(_size+1);
reserve(newcapacity);
}
list[_size] = element;
_size++;
}
void set(const int i,type val) {
list[i] = val;
}
inline const type& operator[](int i)const
{
return list[i];
}
inline type& operator[](int i)
{
return list[i];
}
void delete_element(int i)
{
list[i] = list[--_size];
}
public: // memory
~OpenACCArray()
{
delete [] list;
}
OpenACCArray():
list(0),
_size(0),
_capacity(0)
{
#pragma acc enter data copyin(this)
}
OpenACCArray(int requested_size):
list(0),
_size(0),
_capacity(0)
{
#pragma acc enter data copyin(this)
if(requested_size > 0) reserve(requested_size);
}
void update_device() {
#pragma acc update device(list[0:_size])
#pragma acc update device(_size)
}
void update_host() {
#pragma acc update host(_size)
#pragma acc update host(list[0:_size])
}
// exchange content of this class with content of x
void swap(OpenACCArray<type>& x)
{
type* tmp_list = list;
int tmp_size = _size;
int tmp_capacity = _capacity;
list = x.list;
_size = x._size;
_capacity = x._capacity;
x.list = tmp_list;
x._size = tmp_size;
x._capacity = tmp_capacity;
}
void reserve(int newcapacity)
{
// ignore request if requested size is too small
if(_capacity >= newcapacity) return;
_capacity = newcapacity;
type* oldList = list;
// the next two lines assume that type has no indirection
list = new type[_capacity];
memcpy(list,oldList,sizeof(type)*_size);
#ifdef _OPENACC
#pragma acc enter data create(list[0:_capacity])
if (_size > 0) {
#ifdef USE_ACC_MEMCPY
// PGI Extension
acc_memcpy(list,oldList,sizeof(type)*_size);
#elif USE_ACC_MEMCPY_DEVICE
// OpenACC 2.5
acc_memcpy_device(acc_deviceptr(list),acc_deviceptr(oldList),sizeof(type)*_size);
#else
#pragma acc kernels loop independent present(list[0:_size],oldList[0:_size])
for(int i=0;i<_size;i++) list[i] = oldList[i];
#endif
#pragma acc exit data delete (oldList)
}
#endif
delete [] oldList;
}
// should rename this function as shrink_to_fit to conform to std::vector.
void realloc_if_smaller_than(int required_max_size)
{
if(_size < required_max_size)
reserve(required_max_size);
}
void shrink()
{
// shrink _capacity by a factor of two if elements will fit.
int proposed_size = pow2rounddown(_capacity/2);
if( _size <= proposed_size && proposed_size < _capacity)
{
reserve(proposed_size);
}
}
};
int main(int argc, char *argv[])
{
const int N = 32;
OpenACCArray<double> zeros(N); zeros.resize(N);
OpenACCArray<double> ones(N/2); ones.resize(N/2);
zeros.update_device();
ones.update_device();
#pragma acc parallel loop present(zeros,ones)
for (int i=0; i < N; ++i) {
if (i < zeros.size())
zeros[i] = 0.0;
if (i < ones.size())
ones[i] = 1.0;
}
zeros.update_host();
ones.update_host();
zeros.swap(ones);
// everythings fine here
// for (int i=0; i < N; ++i) {
// std::cout << i << ":";
// if (i < zeros.capacity())
// std::cout << " Z=" << zeros[i];
// if (i < ones.capacity())
// std::cout << " O=" << ones[i];
// std::cout << std::endl;
// }
zeros.update_device();
ones.update_device();
#pragma acc parallel loop present(zeros,ones)
for (int i=0; i < N; ++i) {
if (i < ones.size())
ones[i] += 1.0; // now zero so correct
if (i < zeros.size())
zeros[i] -= 1.0; // now one so correct
}
zeros.update_host();
ones.update_host();
for (int i=0; i < N; ++i) {
std::cout << i << ":";
if (i < zeros.size())
std::cout << " Z=" << zeros[i];
if (i < ones.size())
std::cout << " O=" << ones[i];
std::cout << std::endl;
}
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment