Created
July 14, 2021 07:27
-
-
Save ericcano/c1a7ead347e25413d7e9b0269a35fa14 to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
struct SoA { | |
using self_type = SoA; | |
constexpr static size_t defaultAlignment = 128; | |
__attribute__((host)) static void dump(size_t nElements, size_t byteAlignment = defaultAlignment) { | |
std::cout << "SoA" | |
"(" | |
<< nElements << ", " << byteAlignment << "): " << '\n'; | |
std::cout << " sizeof(" | |
"SoA" | |
"): " | |
<< sizeof(SoA) << '\n'; | |
std::cout << " computeDataSize(...): " << computeDataSize(nElements, byteAlignment); | |
size_t offset = 0; | |
std::cout << " " | |
"x" | |
"_ at offset " | |
<< offset << " has size " << sizeof(double) * nElements << " and padding " << ((nElements * sizeof(double) / byteAlignment) + 1) * byteAlignment - (sizeof(double) * nElements) | |
<< std::endl; | |
std::cout << " " | |
"y" | |
"_ at offset " | |
<< offset << " has size " << sizeof(double) * nElements << " and padding " << ((nElements * sizeof(double) / byteAlignment) + 1) * byteAlignment - (sizeof(double) * nElements) | |
<< std::endl; | |
std::cout << " " | |
"z" | |
"_ at offset " | |
<< offset << " has size " << sizeof(double) * nElements << " and padding " << ((nElements * sizeof(double) / byteAlignment) + 1) * byteAlignment - (sizeof(double) * nElements) | |
<< std::endl; | |
std::cout << " " | |
"colour" | |
"_ at offset " | |
<< offset << " has size " << sizeof(uint16_t) * nElements << " and padding " << ((nElements * sizeof(uint16_t) / byteAlignment) + 1) * byteAlignment - (sizeof(uint16_t) * nElements) | |
<< std::endl; | |
std::cout << " " | |
"value" | |
"_ at offset " | |
<< offset << " has size " << sizeof(int32_t) * nElements << " and padding " << ((nElements * sizeof(int32_t) / byteAlignment) + 1) * byteAlignment - (sizeof(int32_t) * nElements) | |
<< std::endl; | |
std::cout << " " | |
"py" | |
"_ at offset " | |
<< offset << " has size " << sizeof(double *) * nElements << " and padding " << ((nElements * sizeof(double *) / byteAlignment) + 1) * byteAlignment - (sizeof(double *) * nElements) | |
<< std::endl; | |
std::cout << " " | |
"description" | |
"_ at offset " | |
<< offset << " has size " << sizeof(const char *) << " and padding " << (sizeof(const char *) / byteAlignment + 1) * byteAlignment - sizeof(const char *) << std::endl; | |
std::cout << std::endl; | |
} | |
static size_t computeDataSize(size_t nElements, size_t byteAlignment = defaultAlignment) { | |
size_t ret = 0; | |
ret += (((nElements * sizeof(double) - 1) / byteAlignment) + 1) * byteAlignment; | |
ret += (((nElements * sizeof(double) - 1) / byteAlignment) + 1) * byteAlignment; | |
ret += (((nElements * sizeof(double) - 1) / byteAlignment) + 1) * byteAlignment; | |
ret += (((nElements * sizeof(uint16_t) - 1) / byteAlignment) + 1) * byteAlignment; | |
ret += (((nElements * sizeof(int32_t) - 1) / byteAlignment) + 1) * byteAlignment; | |
ret += (((nElements * sizeof(double *) - 1) / byteAlignment) + 1) * byteAlignment; | |
ret += (((sizeof(const char *) - 1) / byteAlignment) + 1) * byteAlignment; | |
return ret; | |
} | |
__attribute__((host)) __attribute__((device)) __inline__ __attribute__((always_inline)) size_t nElements() const { return nElements_; } | |
__attribute__((host)) __attribute__((device)) __inline__ __attribute__((always_inline)) size_t byteAlignment() const { return byteAlignment_; } | |
SoA(std::byte *mem, size_t nElements, size_t byteAlignment = defaultAlignment) : mem_(mem), nElements_(nElements), byteAlignment_(byteAlignment) { | |
auto curMem = mem_; | |
x_ = reinterpret_cast<double *>(curMem); | |
curMem += (((nElements_ * sizeof(double) - 1) / byteAlignment_) + 1) * byteAlignment_; | |
y_ = reinterpret_cast<double *>(curMem); | |
curMem += (((nElements_ * sizeof(double) - 1) / byteAlignment_) + 1) * byteAlignment_; | |
z_ = reinterpret_cast<double *>(curMem); | |
curMem += (((nElements_ * sizeof(double) - 1) / byteAlignment_) + 1) * byteAlignment_; | |
colour_ = reinterpret_cast<uint16_t *>(curMem); | |
curMem += (((nElements_ * sizeof(uint16_t) - 1) / byteAlignment_) + 1) * byteAlignment_; | |
value_ = reinterpret_cast<int32_t *>(curMem); | |
curMem += (((nElements_ * sizeof(int32_t) - 1) / byteAlignment_) + 1) * byteAlignment_; | |
py_ = reinterpret_cast<double **>(curMem); | |
curMem += (((nElements_ * sizeof(double *) - 1) / byteAlignment_) + 1) * byteAlignment_; | |
description_ = reinterpret_cast<const char **>(curMem); | |
curMem += (((sizeof(const char *) - 1) / byteAlignment_) + 1) * byteAlignment_; | |
if (mem_ + computeDataSize(nElements_, byteAlignment_) != curMem) | |
throw std::out_of_range("In " | |
"SoA" | |
"::" | |
"SoA" | |
": unexpected end pointer."); | |
} | |
struct const_element { | |
__attribute__((host)) __attribute__((device)) __inline__ __attribute__((always_inline)) const_element(SoA const &soa, int index) : soa_(soa), index_(index) {} | |
__attribute__((host)) __attribute__((device)) __inline__ __attribute__((always_inline)) double const &x() { return *(soa_.x() + index_); } | |
__attribute__((host)) __attribute__((device)) __inline__ __attribute__((always_inline)) double const &y() { return *(soa_.y() + index_); } | |
__attribute__((host)) __attribute__((device)) __inline__ __attribute__((always_inline)) double const &z() { return *(soa_.z() + index_); } | |
__attribute__((host)) __attribute__((device)) __inline__ __attribute__((always_inline)) uint16_t const &colour() { return *(soa_.colour() + index_); } | |
__attribute__((host)) __attribute__((device)) __inline__ __attribute__((always_inline)) int32_t const &value() { return *(soa_.value() + index_); } | |
__attribute__((host)) __attribute__((device)) __inline__ __attribute__((always_inline)) double *const &py() { return *(soa_.py() + index_); } | |
__attribute__((host)) __attribute__((device)) __inline__ __attribute__((always_inline)) const char *const &description() { return soa_.description(); } | |
private: | |
SoA const &soa_; | |
const int index_; | |
}; | |
struct element { | |
__attribute__((host)) __attribute__((device)) __inline__ __attribute__((always_inline)) element(size_t index, double *x, double *y, double *z, uint16_t *colour, int32_t *value, double **py) | |
: x(index, x), y(index, y), z(index, z), colour(index, colour), value(index, value), py(index, py) {} | |
__attribute__((host)) __attribute__((device)) __inline__ __attribute__((always_inline)) element &operator=(const element &other) { | |
static_cast<double &>(x) = static_cast<std::add_const<double>::type &>(other.x); | |
static_cast<double &>(y) = static_cast<std::add_const<double>::type &>(other.y); | |
static_cast<double &>(z) = static_cast<std::add_const<double>::type &>(other.z); | |
static_cast<uint16_t &>(colour) = static_cast<std::add_const<uint16_t>::type &>(other.colour); | |
static_cast<int32_t &>(value) = static_cast<std::add_const<int32_t>::type &>(other.value); | |
static_cast<double *&>(py) = static_cast<std::add_const<double *>::type &>(other.py); | |
return *this; | |
} | |
SoAValue<double> x; | |
SoAValue<double> y; | |
SoAValue<double> z; | |
SoAValue<uint16_t> colour; | |
SoAValue<int32_t> value; | |
SoAValue<double *> py; | |
}; | |
__attribute__((host)) __attribute__((device)) __inline__ __attribute__((always_inline)) element operator[](size_t index) { | |
rangeCheck(index); | |
return element(index, x_, y_, z_, colour_, value_, py_); | |
} | |
__attribute__((host)) __attribute__((device)) __inline__ __attribute__((always_inline)) const element operator[](size_t index) const { | |
rangeCheck(index); | |
return element(index, x_, y_, z_, colour_, value_, py_); | |
} | |
__attribute__((host)) __attribute__((device)) __inline__ __attribute__((always_inline)) double *x() { return x_; } | |
__attribute__((host)) __attribute__((device)) __inline__ __attribute__((always_inline)) double *y() { return y_; } | |
__attribute__((host)) __attribute__((device)) __inline__ __attribute__((always_inline)) double *z() { return z_; } | |
__attribute__((host)) __attribute__((device)) __inline__ __attribute__((always_inline)) uint16_t *colour() { return colour_; } | |
__attribute__((host)) __attribute__((device)) __inline__ __attribute__((always_inline)) int32_t *value() { return value_; } | |
__attribute__((host)) __attribute__((device)) __inline__ __attribute__((always_inline)) double **py() { return py_; } | |
__attribute__((host)) __attribute__((device)) __inline__ __attribute__((always_inline)) const char *&description() { return *description_; } | |
__attribute__((host)) __attribute__((device)) __inline__ __attribute__((always_inline)) double const *x() const { return x_; } | |
__attribute__((host)) __attribute__((device)) __inline__ __attribute__((always_inline)) double const *y() const { return y_; } | |
__attribute__((host)) __attribute__((device)) __inline__ __attribute__((always_inline)) double const *z() const { return z_; } | |
__attribute__((host)) __attribute__((device)) __inline__ __attribute__((always_inline)) uint16_t const *colour() const { return colour_; } | |
__attribute__((host)) __attribute__((device)) __inline__ __attribute__((always_inline)) int32_t const *value() const { return value_; } | |
__attribute__((host)) __attribute__((device)) __inline__ __attribute__((always_inline)) double *const *py() const { return py_; } | |
__attribute__((host)) __attribute__((device)) __inline__ __attribute__((always_inline)) const char *const &description() const { return *description_; } | |
template <typename T> __attribute__((host)) friend void dump(); | |
private: | |
__attribute__((host)) __attribute__((device)) __inline__ __attribute__((always_inline)) void rangeCheck(size_t index) const { | |
if constexpr (false) { | |
if (index >= nElements_) { | |
printf("In " | |
"SoA" | |
"::rangeCheck(): index out of range: %zu with nElements: %zu\n", | |
index, nElements_); | |
(static_cast<bool>(false) ? void(0) : __assert_fail("false", "soa_v7_cuda.h", 38, __extension__ __PRETTY_FUNCTION__)); | |
} | |
} | |
} | |
std::byte *mem_; | |
size_t nElements_; | |
size_t byteAlignment_; | |
double *x_; | |
double *y_; | |
double *z_; | |
uint16_t *colour_; | |
int32_t *value_; | |
double **py_; | |
const char **description_; | |
}; | |
struct AoSelement { | |
double x; | |
double y; | |
double z; | |
uint16_t colour; | |
int32_t value; | |
double *py; | |
}; | |
private: | |
static constexpr int defaultDevice = 0; | |
static constexpr size_t elementsCount = 10000; | |
template <typename T> void checkValuesAlignment(SoA &soa, T SoA::element::*member, const std::string &memberName, size_t byteAlignment) { | |
for (size_t i = 0; i < soa.nElements(); i++) { | |
if (reinterpret_cast<std::uintptr_t>(&(soa[i].*member)) % byteAlignment != (i * T::valueSize) % byteAlignment) { | |
std::stringstream err; | |
err << "Misaligned value: " << memberName << " at index=" << i << " address=" << &(soa[i].*member) << " byteAlignment=" << byteAlignment | |
<< " address lower part: " << reinterpret_cast<std::uintptr_t>(&(soa[i].*member)) % byteAlignment << " expected address lower part: " << ((i * T::valueSize) % byteAlignment) | |
<< " size=" << soa.nElements() << " align=" << soa.byteAlignment(); | |
(CppUnit::Asserter::fail(CppUnit::Message("forced failure", CppUnit::message_to_string(err.str())), CppUnit::SourceLine("soa_v7_cuda.h", 83))); | |
} | |
if ((reinterpret_cast<std::uintptr_t>(&(soa[i].*member)) % byteAlignment) && | |
(reinterpret_cast<std::uintptr_t>(&(soa[i - 1].*member)) + T::valueSize != reinterpret_cast<std::uintptr_t>(&(soa[i].*member)))) { | |
std::stringstream err; | |
err << "Unexpected non-contiguity: " << memberName << " at index=" << i << " address=" << &(soa[i].*member) << " is not contiguous to " << memberName << " at index=" << i - 1 | |
<< "address=" << &(soa[i - 1].*member) << " size=" << soa.nElements() << " align=" << soa.byteAlignment() << " valueSize=" << T::valueSize; | |
(CppUnit::Asserter::fail(CppUnit::Message("forced failure", CppUnit::message_to_string(err.str())), CppUnit::SourceLine("soa_v7_cuda.h", 96))); | |
} | |
} | |
} | |
void checkSoAAlignment(size_t nElements, size_t byteAlignment); | |
std::unique_ptr<std::byte, std::function<void(void *)>> make_aligned_unique(size_t size, size_t alignment) { | |
return std::unique_ptr<std::byte, std::function<void(void *)>>(static_cast<std::byte *>(std::aligned_alloc(size, alignment)), [](void *p) { std::free(p); }); | |
} | |
class bad_alloc : public std::bad_alloc { | |
public: | |
bad_alloc(const std::string &w) noexcept : what_(w) {} | |
const char *what() const noexcept override { return what_.c_str(); } | |
private: | |
const std::string what_; | |
}; | |
std::unique_ptr<std::byte, std::function<void(void *)>> make_device_unique(size_t size) { | |
void *p = nullptr; | |
cudaError_t e = cudaMalloc(&p, size); | |
if (e != cudaSuccess) { | |
std::string m("Failed to allocate device memory: "); | |
m += cudaGetErrorName(e); | |
[[unlikely]] throw bad_alloc(m); | |
} | |
return std::unique_ptr<std::byte, std::function<void(void *)>>(static_cast<std::byte *>(p), [](void *p) { cudaFree(p); }); | |
} | |
std::unique_ptr<std::byte, std::function<void(void *)>> make_host_unique(size_t size) { | |
void *p = nullptr; | |
cudaError_t e = cudaMallocHost(&p, size); | |
if (e != cudaSuccess) { | |
std::string m("Failed to allocate page-locked host memory: "); | |
m += cudaGetErrorName(e); | |
[[unlikely]] throw bad_alloc(m); | |
} | |
return std::unique_ptr<std::byte, std::function<void(void *)>>(static_cast<std::byte *>(p), [](void *p) { cudaFreeHost(p); }); | |
} | |
}; |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment