Last active
December 14, 2015 19:09
-
-
Save xionluhnis/5134430 to your computer and use it in GitHub Desktop.
Cuda Memory wrapper class
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
/* | |
* Cuda memory wrapper | |
*/ | |
#ifndef CUDAMEMORY_H | |
#define CUDAMEMORY_H | |
#ifndef NDEBUG | |
#include <iostream> | |
#include <stdlib.h> | |
#endif | |
#include <vector> | |
#include <cuda_runtime.h> | |
namespace gpu { | |
// assert macro | |
#ifndef NDEBUG | |
#define gpu__assert(condition, message) \ | |
do { \ | |
if (! (condition)) { \ | |
std::cerr << "Assertion `" #condition "` failed in " << __FILE__ \ | |
<< " line " << __LINE__ << ": " << message << std::endl; \ | |
/*std::exit(EXIT_FAILURE); */ throw "Assertion error!"; \ | |
} \ | |
} while (false) | |
#else | |
#define gpu__assert(condition, message) do { } while (false) | |
#endif | |
// general types | |
typedef unsigned int ref_index; | |
typedef unsigned int memcount_t; | |
typedef unsigned int index_t; | |
typedef long count_t; | |
namespace internal { | |
static std::vector<count_t> refCounts; | |
static std::vector<index_t> freeIndexes; | |
index_t newReference() { | |
if (!freeIndexes.empty()) { | |
index_t r = freeIndexes[freeIndexes.size() - 1]; | |
freeIndexes.pop_back(); | |
refCounts[r] = 1; // init the new reference | |
return r; | |
} | |
index_t ref_id = refCounts.size(); | |
refCounts.push_back(1); // init the new reference | |
return ref_id; | |
} | |
void freeReference(index_t ref_id, void* ptr) { | |
count_t newCount = --refCounts[ref_id]; | |
gpu__assert(newCount >= 0, "Count is negative!"); | |
if (newCount == 0) { | |
// we should free it now | |
cudaFree(ptr); | |
// and make the reference index available | |
freeIndexes.push_back(ref_id); | |
// std::cout << "Free indexes: " << freeIndexes.size() << std::endl; | |
} | |
} | |
// return the number of references which are still alive (entries, not their count) | |
count_t countReferences(){ | |
return count_t(refCounts.size()) - freeIndexes.size(); | |
} | |
} | |
template <class S> | |
class CudaMemory { | |
public: | |
// template types | |
typedef CudaMemory<S> this_type; | |
typedef S scalar_type; | |
// constructors | |
CudaMemory() : count(0), d_ptr(0), ref_id(0) { | |
// std::cout << "CudaMemory()" << std::endl; | |
} | |
CudaMemory(memcount_t c) : count(0), d_ptr(0), ref_id(0) { | |
// std::cout << "CudaMemory(" << c << ") - "; | |
// trying to allocate the data | |
if (cudaMalloc((void**) &d_ptr, sizeof (scalar_type) * c) == cudaSuccess) { | |
// now we can do something | |
count = c; | |
ref_id = internal::newReference(); | |
// std::cout << "worked!" << std::endl; | |
} | |
} | |
// copy | |
CudaMemory(const CudaMemory::this_type& orig) : count(orig.count), d_ptr(orig.d_ptr), ref_id(orig.ref_id) { | |
// std::cout << "Copying (size=" << count << ", ref=" << orig.ref_count() << ")" << std::endl; | |
// update reference count | |
if (count > 0) { | |
// let's increment the count | |
++internal::refCounts[ref_id]; | |
} | |
// std::cout << "... now ref=" << ref_count() << std::endl; | |
} | |
this_type& operator =(const this_type& other) { | |
// std::cout << "Assigning: size=" << other.count << std::endl; | |
// free current content | |
if (count > 0) { | |
internal::freeReference(ref_id, (void*) d_ptr); | |
count = 0; | |
} | |
// copy new content | |
count = other.count; | |
d_ptr = other.d_ptr; | |
ref_id = other.ref_id; | |
// update reference count | |
if (count > 0) { | |
++internal::refCounts[ref_id]; | |
} | |
// std::cout << "... now ref=" << ref_count() << std::endl; | |
} | |
// free | |
~CudaMemory() { | |
// std::cout << "Deleting for count=" << count << std::endl; | |
// free content | |
if (count > 0) { | |
internal::freeReference(ref_id, (void*) d_ptr); | |
} | |
} | |
// transfers | |
void copyFrom(S* host_ptr) { | |
gpu__assert(count > 0, "Nothing to copy!"); | |
cudaMemcpy(d_ptr, host_ptr, sizeof (scalar_type) * count, cudaMemcpyHostToDevice); | |
} | |
void copyTo(S* host_ptr) { | |
gpu__assert(count > 0, "Nothing to copy!"); | |
cudaMemcpy(host_ptr, d_ptr, sizeof (scalar_type) * count, cudaMemcpyDeviceToHost); | |
} | |
// overloading i/o operators | |
template<typename T> | |
friend this_type& operator <<(this_type& mem, const T&); | |
template<typename T> | |
friend this_type& operator >>(this_type& mem, T&); | |
// getters | |
bool empty() const { | |
return count == 0; | |
} | |
scalar_type* get() { | |
return d_ptr; | |
} | |
count_t ref_count() const { | |
if (count > 0) return internal::refCounts[ref_id]; | |
else return 0; | |
} | |
memcount_t size() const { | |
return count; | |
} | |
// implicit conversion | |
/* operator scalar_type*() { | |
return d_ptr; | |
} */ | |
private: | |
memcount_t count; | |
scalar_type* d_ptr; | |
ref_index ref_id; | |
}; | |
// names | |
typedef CudaMemory<char> CudaCharMemory; | |
typedef CudaMemory<int> CudaIntMemory; | |
typedef CudaMemory<long> CudaLongMemory; | |
typedef CudaMemory<float> CudaFloatMemory; | |
typedef CudaMemory<double> CudaDoubleMemory; | |
} | |
#endif /* CUDAMEMORY_H */ |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment