xionluhnis/CudaMemory.h

## CudaMemory.h
/*
 * Cuda memory wrapper
 */

#ifndef CUDAMEMORY_H
#define	CUDAMEMORY_H

#ifndef NDEBUG
#include <iostream>
#include <stdlib.h>
#endif

#include <vector>
#include <cuda_runtime.h>

namespace gpu {
    // assert macro
#ifndef NDEBUG
#define gpu__assert(condition, message) \
    do { \
        if (! (condition)) { \
            std::cerr << "Assertion `" #condition "` failed in " << __FILE__ \
                      << " line " << __LINE__ << ": " << message << std::endl; \
            /*std::exit(EXIT_FAILURE); */ throw "Assertion error!"; \
        } \
    } while (false)
#else
#define gpu__assert(condition, message) do { } while (false)
#endif

    // general types
    typedef unsigned int ref_index;
    typedef unsigned int memcount_t;
    typedef unsigned int index_t;
    typedef long count_t;

    namespace internal {
        std::vector<count_t> refCounts;
        static std::vector<index_t> freeIndexes;

        index_t newReference() {
            if (!freeIndexes.empty()) {
                index_t r = freeIndexes[freeIndexes.size() - 1];
                freeIndexes.pop_back();
                return r;
            }
            index_t ref_id = refCounts.size();
            refCounts.push_back(1);
            return ref_id;
        }

        void freeReference(index_t ref_id, void* ptr) {
            count_t newCount = --refCounts[ref_id];
            gpu__assert(newCount >= 0, "Count is negative!");
            if (newCount == 0) {
                std::cout << "Freeing ref#" << ref_id << std::endl;
                // we should free it now
                cudaFree(ptr);
                // and make the reference index available
                freeIndexes.push_back(ref_id);
            }
        }
    }

    template <class S>
    class CudaMemory {
    public:
        // template types
        typedef CudaMemory<S> this_type;
        typedef S scalar_type;

        // constructors

        CudaMemory() : count(0), d_ptr(0), ref_id(0) {
        }

        CudaMemory(memcount_t c) : count(0), d_ptr(0), ref_id(0) {
            // trying to allocate the data
            if (cudaMalloc((void**) &d_ptr, sizeof (scalar_type) * c) == cudaSuccess) {
                // now we can do something
                count = c;
                ref_id = internal::newReference();
            }
        }
        // copy

        CudaMemory(const CudaMemory::this_type& orig) : count(orig.count), d_ptr(orig.d_ptr), ref_id(orig.ref_id) {
            // update reference count
            if (count > 0) {
                // let's increment the count
                ++internal::refCounts[ref_id];
            }
        }

        this_type& operator =(const this_type& other) {
            // free current content
            if (count > 0) {
                internal::freeReference(ref_id, (void*) d_ptr);
                count = 0;
            }
            // copy new content
            count = other.count;
            d_ptr = other.d_ptr;
            ref_id = other.ref_id;
            // update reference count
            if (count > 0) {
                ++internal::refCounts[ref_id];
            }
        }

        // free

        ~CudaMemory() {
            // free content
            if (count > 0) {
                internal::freeReference(ref_id, (void*) d_ptr);
            }
        }

        // transfers

        void copyFrom(S* host_ptr) {
            gpu__assert(count > 0, "Nothing to copy!");
            cudaMemcpy(d_ptr, host_ptr, sizeof (scalar_type) * count, cudaMemcpyHostToDevice);
        }

        void copyTo(S* host_ptr) {
            gpu__assert(count > 0, "Nothing to copy!");
            cudaMemcpy(host_ptr, d_ptr, sizeof (scalar_type) * count, cudaMemcpyDeviceToHost);
        }

        // overloading i/o operators
        template<typename T>
        friend this_type& operator <<(this_type& mem, const T&);
        template<typename T>
        friend this_type& operator >>(this_type& mem, T&);

        // getters

        bool empty() const {
            return count == 0;
        }

        scalar_type* get() {
            return d_ptr;
        }

        count_t ref_count() const {
            if (count > 0) return internal::refCounts[ref_id];
            else return 0;
        }

        memcount_t size() const {
            return count;
        }

        // implicit conversion

        /* operator scalar_type*() {
            return d_ptr;
        } */
    private:
        memcount_t count;
        scalar_type* d_ptr;
        ref_index ref_id;
    };

    // names
    typedef CudaMemory<char> CudaCharMemory;
    typedef CudaMemory<int> CudaIntMemory;
    typedef CudaMemory<long> CudaLongMemory;
    typedef CudaMemory<float> CudaFloatMemory;
    typedef CudaMemory<double> CudaDoubleMemory;

}

#endif	/* CUDAMEMORY_H */
	/*
	* Cuda memory wrapper
	*/

	#ifndef CUDAMEMORY_H
	#define CUDAMEMORY_H

	#ifndef NDEBUG
	#include <iostream>
	#include <stdlib.h>
	#endif

	#include <vector>
	#include <cuda_runtime.h>

	namespace gpu {
	// assert macro
	#ifndef NDEBUG
	#define gpu__assert(condition, message) \
	do { \
	if (! (condition)) { \
	std::cerr << "Assertion `" #condition "` failed in " << __FILE__ \
	<< " line " << __LINE__ << ": " << message << std::endl; \
	/std::exit(EXIT_FAILURE); / throw "Assertion error!"; \
	} \
	} while (false)
	#else
	#define gpu__assert(condition, message) do { } while (false)
	#endif

	// general types
	typedef unsigned int ref_index;
	typedef unsigned int memcount_t;
	typedef unsigned int index_t;
	typedef long count_t;

	namespace internal {
	std::vector<count_t> refCounts;
	static std::vector<index_t> freeIndexes;

	index_t newReference() {
	if (!freeIndexes.empty()) {
	index_t r = freeIndexes[freeIndexes.size() - 1];
	freeIndexes.pop_back();
	return r;
	}
	index_t ref_id = refCounts.size();
	refCounts.push_back(1);
	return ref_id;
	}

	void freeReference(index_t ref_id, void* ptr) {
	count_t newCount = --refCounts[ref_id];
	gpu__assert(newCount >= 0, "Count is negative!");
	if (newCount == 0) {
	std::cout << "Freeing ref#" << ref_id << std::endl;
	// we should free it now
	cudaFree(ptr);
	// and make the reference index available
	freeIndexes.push_back(ref_id);
	}
	}
	}

	template <class S>
	class CudaMemory {
	public:
	// template types
	typedef CudaMemory<S> this_type;
	typedef S scalar_type;

	// constructors

	CudaMemory() : count(0), d_ptr(0), ref_id(0) {
	}

	CudaMemory(memcount_t c) : count(0), d_ptr(0), ref_id(0) {
	// trying to allocate the data
	if (cudaMalloc((void*) &d_ptr, sizeof (scalar_type) c) == cudaSuccess) {
	// now we can do something
	count = c;
	ref_id = internal::newReference();
	}
	}
	// copy

	CudaMemory(const CudaMemory::this_type& orig) : count(orig.count), d_ptr(orig.d_ptr), ref_id(orig.ref_id) {
	// update reference count
	if (count > 0) {
	// let's increment the count
	++internal::refCounts[ref_id];
	}
	}

	this_type& operator =(const this_type& other) {
	// free current content
	if (count > 0) {
	internal::freeReference(ref_id, (void*) d_ptr);
	count = 0;
	}
	// copy new content
	count = other.count;
	d_ptr = other.d_ptr;
	ref_id = other.ref_id;
	// update reference count
	if (count > 0) {
	++internal::refCounts[ref_id];
	}
	}

	// free

	~CudaMemory() {
	// free content
	if (count > 0) {
	internal::freeReference(ref_id, (void*) d_ptr);
	}
	}

	// transfers

	void copyFrom(S* host_ptr) {
	gpu__assert(count > 0, "Nothing to copy!");
	cudaMemcpy(d_ptr, host_ptr, sizeof (scalar_type) * count, cudaMemcpyHostToDevice);
	}

	void copyTo(S* host_ptr) {
	gpu__assert(count > 0, "Nothing to copy!");
	cudaMemcpy(host_ptr, d_ptr, sizeof (scalar_type) * count, cudaMemcpyDeviceToHost);
	}

	// overloading i/o operators
	template<typename T>
	friend this_type& operator <<(this_type& mem, const T&);
	template<typename T>
	friend this_type& operator >>(this_type& mem, T&);

	// getters

	bool empty() const {
	return count == 0;
	}

	scalar_type* get() {
	return d_ptr;
	}

	count_t ref_count() const {
	if (count > 0) return internal::refCounts[ref_id];
	else return 0;
	}

	memcount_t size() const {
	return count;
	}

	// implicit conversion

	/* operator scalar_type*() {
	return d_ptr;
	} */
	private:
	memcount_t count;
	scalar_type* d_ptr;
	ref_index ref_id;
	};

	// names
	typedef CudaMemory<char> CudaCharMemory;
	typedef CudaMemory<int> CudaIntMemory;
	typedef CudaMemory<long> CudaLongMemory;
	typedef CudaMemory<float> CudaFloatMemory;
	typedef CudaMemory<double> CudaDoubleMemory;

	}

	#endif /* CUDAMEMORY_H */