Skip to content

Instantly share code, notes, and snippets.

@ericcano
Created July 14, 2021 07:27
Show Gist options
  • Save ericcano/c1a7ead347e25413d7e9b0269a35fa14 to your computer and use it in GitHub Desktop.
Save ericcano/c1a7ead347e25413d7e9b0269a35fa14 to your computer and use it in GitHub Desktop.
struct SoA {
using self_type = SoA;
constexpr static size_t defaultAlignment = 128;
__attribute__((host)) static void dump(size_t nElements, size_t byteAlignment = defaultAlignment) {
std::cout << "SoA"
"("
<< nElements << ", " << byteAlignment << "): " << '\n';
std::cout << " sizeof("
"SoA"
"): "
<< sizeof(SoA) << '\n';
std::cout << " computeDataSize(...): " << computeDataSize(nElements, byteAlignment);
size_t offset = 0;
std::cout << " "
"x"
"_ at offset "
<< offset << " has size " << sizeof(double) * nElements << " and padding " << ((nElements * sizeof(double) / byteAlignment) + 1) * byteAlignment - (sizeof(double) * nElements)
<< std::endl;
std::cout << " "
"y"
"_ at offset "
<< offset << " has size " << sizeof(double) * nElements << " and padding " << ((nElements * sizeof(double) / byteAlignment) + 1) * byteAlignment - (sizeof(double) * nElements)
<< std::endl;
std::cout << " "
"z"
"_ at offset "
<< offset << " has size " << sizeof(double) * nElements << " and padding " << ((nElements * sizeof(double) / byteAlignment) + 1) * byteAlignment - (sizeof(double) * nElements)
<< std::endl;
std::cout << " "
"colour"
"_ at offset "
<< offset << " has size " << sizeof(uint16_t) * nElements << " and padding " << ((nElements * sizeof(uint16_t) / byteAlignment) + 1) * byteAlignment - (sizeof(uint16_t) * nElements)
<< std::endl;
std::cout << " "
"value"
"_ at offset "
<< offset << " has size " << sizeof(int32_t) * nElements << " and padding " << ((nElements * sizeof(int32_t) / byteAlignment) + 1) * byteAlignment - (sizeof(int32_t) * nElements)
<< std::endl;
std::cout << " "
"py"
"_ at offset "
<< offset << " has size " << sizeof(double *) * nElements << " and padding " << ((nElements * sizeof(double *) / byteAlignment) + 1) * byteAlignment - (sizeof(double *) * nElements)
<< std::endl;
std::cout << " "
"description"
"_ at offset "
<< offset << " has size " << sizeof(const char *) << " and padding " << (sizeof(const char *) / byteAlignment + 1) * byteAlignment - sizeof(const char *) << std::endl;
std::cout << std::endl;
}
static size_t computeDataSize(size_t nElements, size_t byteAlignment = defaultAlignment) {
size_t ret = 0;
ret += (((nElements * sizeof(double) - 1) / byteAlignment) + 1) * byteAlignment;
ret += (((nElements * sizeof(double) - 1) / byteAlignment) + 1) * byteAlignment;
ret += (((nElements * sizeof(double) - 1) / byteAlignment) + 1) * byteAlignment;
ret += (((nElements * sizeof(uint16_t) - 1) / byteAlignment) + 1) * byteAlignment;
ret += (((nElements * sizeof(int32_t) - 1) / byteAlignment) + 1) * byteAlignment;
ret += (((nElements * sizeof(double *) - 1) / byteAlignment) + 1) * byteAlignment;
ret += (((sizeof(const char *) - 1) / byteAlignment) + 1) * byteAlignment;
return ret;
}
__attribute__((host)) __attribute__((device)) __inline__ __attribute__((always_inline)) size_t nElements() const { return nElements_; }
__attribute__((host)) __attribute__((device)) __inline__ __attribute__((always_inline)) size_t byteAlignment() const { return byteAlignment_; }
SoA(std::byte *mem, size_t nElements, size_t byteAlignment = defaultAlignment) : mem_(mem), nElements_(nElements), byteAlignment_(byteAlignment) {
auto curMem = mem_;
x_ = reinterpret_cast<double *>(curMem);
curMem += (((nElements_ * sizeof(double) - 1) / byteAlignment_) + 1) * byteAlignment_;
y_ = reinterpret_cast<double *>(curMem);
curMem += (((nElements_ * sizeof(double) - 1) / byteAlignment_) + 1) * byteAlignment_;
z_ = reinterpret_cast<double *>(curMem);
curMem += (((nElements_ * sizeof(double) - 1) / byteAlignment_) + 1) * byteAlignment_;
colour_ = reinterpret_cast<uint16_t *>(curMem);
curMem += (((nElements_ * sizeof(uint16_t) - 1) / byteAlignment_) + 1) * byteAlignment_;
value_ = reinterpret_cast<int32_t *>(curMem);
curMem += (((nElements_ * sizeof(int32_t) - 1) / byteAlignment_) + 1) * byteAlignment_;
py_ = reinterpret_cast<double **>(curMem);
curMem += (((nElements_ * sizeof(double *) - 1) / byteAlignment_) + 1) * byteAlignment_;
description_ = reinterpret_cast<const char **>(curMem);
curMem += (((sizeof(const char *) - 1) / byteAlignment_) + 1) * byteAlignment_;
if (mem_ + computeDataSize(nElements_, byteAlignment_) != curMem)
throw std::out_of_range("In "
"SoA"
"::"
"SoA"
": unexpected end pointer.");
}
struct const_element {
__attribute__((host)) __attribute__((device)) __inline__ __attribute__((always_inline)) const_element(SoA const &soa, int index) : soa_(soa), index_(index) {}
__attribute__((host)) __attribute__((device)) __inline__ __attribute__((always_inline)) double const &x() { return *(soa_.x() + index_); }
__attribute__((host)) __attribute__((device)) __inline__ __attribute__((always_inline)) double const &y() { return *(soa_.y() + index_); }
__attribute__((host)) __attribute__((device)) __inline__ __attribute__((always_inline)) double const &z() { return *(soa_.z() + index_); }
__attribute__((host)) __attribute__((device)) __inline__ __attribute__((always_inline)) uint16_t const &colour() { return *(soa_.colour() + index_); }
__attribute__((host)) __attribute__((device)) __inline__ __attribute__((always_inline)) int32_t const &value() { return *(soa_.value() + index_); }
__attribute__((host)) __attribute__((device)) __inline__ __attribute__((always_inline)) double *const &py() { return *(soa_.py() + index_); }
__attribute__((host)) __attribute__((device)) __inline__ __attribute__((always_inline)) const char *const &description() { return soa_.description(); }
private:
SoA const &soa_;
const int index_;
};
struct element {
__attribute__((host)) __attribute__((device)) __inline__ __attribute__((always_inline)) element(size_t index, double *x, double *y, double *z, uint16_t *colour, int32_t *value, double **py)
: x(index, x), y(index, y), z(index, z), colour(index, colour), value(index, value), py(index, py) {}
__attribute__((host)) __attribute__((device)) __inline__ __attribute__((always_inline)) element &operator=(const element &other) {
static_cast<double &>(x) = static_cast<std::add_const<double>::type &>(other.x);
static_cast<double &>(y) = static_cast<std::add_const<double>::type &>(other.y);
static_cast<double &>(z) = static_cast<std::add_const<double>::type &>(other.z);
static_cast<uint16_t &>(colour) = static_cast<std::add_const<uint16_t>::type &>(other.colour);
static_cast<int32_t &>(value) = static_cast<std::add_const<int32_t>::type &>(other.value);
static_cast<double *&>(py) = static_cast<std::add_const<double *>::type &>(other.py);
return *this;
}
SoAValue<double> x;
SoAValue<double> y;
SoAValue<double> z;
SoAValue<uint16_t> colour;
SoAValue<int32_t> value;
SoAValue<double *> py;
};
__attribute__((host)) __attribute__((device)) __inline__ __attribute__((always_inline)) element operator[](size_t index) {
rangeCheck(index);
return element(index, x_, y_, z_, colour_, value_, py_);
}
__attribute__((host)) __attribute__((device)) __inline__ __attribute__((always_inline)) const element operator[](size_t index) const {
rangeCheck(index);
return element(index, x_, y_, z_, colour_, value_, py_);
}
__attribute__((host)) __attribute__((device)) __inline__ __attribute__((always_inline)) double *x() { return x_; }
__attribute__((host)) __attribute__((device)) __inline__ __attribute__((always_inline)) double *y() { return y_; }
__attribute__((host)) __attribute__((device)) __inline__ __attribute__((always_inline)) double *z() { return z_; }
__attribute__((host)) __attribute__((device)) __inline__ __attribute__((always_inline)) uint16_t *colour() { return colour_; }
__attribute__((host)) __attribute__((device)) __inline__ __attribute__((always_inline)) int32_t *value() { return value_; }
__attribute__((host)) __attribute__((device)) __inline__ __attribute__((always_inline)) double **py() { return py_; }
__attribute__((host)) __attribute__((device)) __inline__ __attribute__((always_inline)) const char *&description() { return *description_; }
__attribute__((host)) __attribute__((device)) __inline__ __attribute__((always_inline)) double const *x() const { return x_; }
__attribute__((host)) __attribute__((device)) __inline__ __attribute__((always_inline)) double const *y() const { return y_; }
__attribute__((host)) __attribute__((device)) __inline__ __attribute__((always_inline)) double const *z() const { return z_; }
__attribute__((host)) __attribute__((device)) __inline__ __attribute__((always_inline)) uint16_t const *colour() const { return colour_; }
__attribute__((host)) __attribute__((device)) __inline__ __attribute__((always_inline)) int32_t const *value() const { return value_; }
__attribute__((host)) __attribute__((device)) __inline__ __attribute__((always_inline)) double *const *py() const { return py_; }
__attribute__((host)) __attribute__((device)) __inline__ __attribute__((always_inline)) const char *const &description() const { return *description_; }
template <typename T> __attribute__((host)) friend void dump();
private:
__attribute__((host)) __attribute__((device)) __inline__ __attribute__((always_inline)) void rangeCheck(size_t index) const {
if constexpr (false) {
if (index >= nElements_) {
printf("In "
"SoA"
"::rangeCheck(): index out of range: %zu with nElements: %zu\n",
index, nElements_);
(static_cast<bool>(false) ? void(0) : __assert_fail("false", "soa_v7_cuda.h", 38, __extension__ __PRETTY_FUNCTION__));
}
}
}
std::byte *mem_;
size_t nElements_;
size_t byteAlignment_;
double *x_;
double *y_;
double *z_;
uint16_t *colour_;
int32_t *value_;
double **py_;
const char **description_;
};
struct AoSelement {
double x;
double y;
double z;
uint16_t colour;
int32_t value;
double *py;
};
private:
static constexpr int defaultDevice = 0;
static constexpr size_t elementsCount = 10000;
template <typename T> void checkValuesAlignment(SoA &soa, T SoA::element::*member, const std::string &memberName, size_t byteAlignment) {
for (size_t i = 0; i < soa.nElements(); i++) {
if (reinterpret_cast<std::uintptr_t>(&(soa[i].*member)) % byteAlignment != (i * T::valueSize) % byteAlignment) {
std::stringstream err;
err << "Misaligned value: " << memberName << " at index=" << i << " address=" << &(soa[i].*member) << " byteAlignment=" << byteAlignment
<< " address lower part: " << reinterpret_cast<std::uintptr_t>(&(soa[i].*member)) % byteAlignment << " expected address lower part: " << ((i * T::valueSize) % byteAlignment)
<< " size=" << soa.nElements() << " align=" << soa.byteAlignment();
(CppUnit::Asserter::fail(CppUnit::Message("forced failure", CppUnit::message_to_string(err.str())), CppUnit::SourceLine("soa_v7_cuda.h", 83)));
}
if ((reinterpret_cast<std::uintptr_t>(&(soa[i].*member)) % byteAlignment) &&
(reinterpret_cast<std::uintptr_t>(&(soa[i - 1].*member)) + T::valueSize != reinterpret_cast<std::uintptr_t>(&(soa[i].*member)))) {
std::stringstream err;
err << "Unexpected non-contiguity: " << memberName << " at index=" << i << " address=" << &(soa[i].*member) << " is not contiguous to " << memberName << " at index=" << i - 1
<< "address=" << &(soa[i - 1].*member) << " size=" << soa.nElements() << " align=" << soa.byteAlignment() << " valueSize=" << T::valueSize;
(CppUnit::Asserter::fail(CppUnit::Message("forced failure", CppUnit::message_to_string(err.str())), CppUnit::SourceLine("soa_v7_cuda.h", 96)));
}
}
}
void checkSoAAlignment(size_t nElements, size_t byteAlignment);
std::unique_ptr<std::byte, std::function<void(void *)>> make_aligned_unique(size_t size, size_t alignment) {
return std::unique_ptr<std::byte, std::function<void(void *)>>(static_cast<std::byte *>(std::aligned_alloc(size, alignment)), [](void *p) { std::free(p); });
}
class bad_alloc : public std::bad_alloc {
public:
bad_alloc(const std::string &w) noexcept : what_(w) {}
const char *what() const noexcept override { return what_.c_str(); }
private:
const std::string what_;
};
std::unique_ptr<std::byte, std::function<void(void *)>> make_device_unique(size_t size) {
void *p = nullptr;
cudaError_t e = cudaMalloc(&p, size);
if (e != cudaSuccess) {
std::string m("Failed to allocate device memory: ");
m += cudaGetErrorName(e);
[[unlikely]] throw bad_alloc(m);
}
return std::unique_ptr<std::byte, std::function<void(void *)>>(static_cast<std::byte *>(p), [](void *p) { cudaFree(p); });
}
std::unique_ptr<std::byte, std::function<void(void *)>> make_host_unique(size_t size) {
void *p = nullptr;
cudaError_t e = cudaMallocHost(&p, size);
if (e != cudaSuccess) {
std::string m("Failed to allocate page-locked host memory: ");
m += cudaGetErrorName(e);
[[unlikely]] throw bad_alloc(m);
}
return std::unique_ptr<std::byte, std::function<void(void *)>>(static_cast<std::byte *>(p), [](void *p) { cudaFreeHost(p); });
}
};
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment