hertzsprung/doubletexture.cu

## doubletexture.cu
// inspired by https://devtalk.nvidia.com/default/topic/419190/texture-fetching-for-double-precision-floats/
#include <cuda.h>
#include <cuda_runtime_api.h>
#include <cstdio>

#define cudaCheckErrors(ans) { gpuAssert((ans), __FILE__, __LINE__); }
inline void gpuAssert(cudaError_t code, const char *file, int line)
{
    if (code != cudaSuccess)
    {
        fprintf(stderr, "%s %s %d\n", cudaGetErrorString(code), file, line);
        exit(code);
    }
}

__global__ void split_into_channels(int* dest, double* src, int width, int height)
{
    for (int j=0; j<height; j++)
    {
        for (int i=0; i<width; i++)
        {
            double value = src[j*width + i];
            dest[j*width*2 + 2*i] = __double2hiint(value);
            dest[j*width*2 + (2*i)+1] = __double2loint(value);
        }
    }
}

__global__ void print(cudaTextureObject_t t)
{
    for (int j=0; j<8; j++)
    {
        for (int i=0; i<8; i++)
        {
            int2 v = tex1Dfetch<int2>(t, j*8 + i);
            double value = __hiloint2double(v.x, v.y);

            printf("GPU: %i %i %lf\n", i, j, value);
        }
    }
}

int main()
{
    const int width = 8;
    const int height = 8;

    double* source_data;
    cudaCheckErrors(cudaMallocManaged(&source_data, width*height*sizeof(double)));;
    source_data[0*width + 0] = 1.3;
    source_data[0*width + 1] = 2.3;
    source_data[1*width + 0] = 3.3;
    source_data[2*width + 0] = 4.3;

    int* twochannel_data;
    cudaCheckErrors(cudaMallocManaged(&twochannel_data, 2*width*height*sizeof(int)));
    split_into_channels<<<1, 1>>>(twochannel_data, source_data, width, height);
    cudaCheckErrors(cudaDeviceSynchronize());

    struct cudaResourceDesc resourceDesc;
    memset(&resourceDesc, 0, sizeof(resourceDesc));
    resourceDesc.resType = cudaResourceTypeLinear;
    resourceDesc.res.linear.devPtr = twochannel_data;
    resourceDesc.res.linear.desc.f = cudaChannelFormatKindSigned;
    resourceDesc.res.linear.desc.x = 32;
    resourceDesc.res.linear.desc.y = 32;
    resourceDesc.res.linear.sizeInBytes = 2*width*height*sizeof(int);

    struct cudaTextureDesc textureDesc;
    memset(&textureDesc, 0, sizeof(textureDesc));
    textureDesc.addressMode[0] = cudaAddressModeClamp;
    textureDesc.addressMode[1] = cudaAddressModeClamp;
    textureDesc.filterMode = cudaFilterModePoint;
    textureDesc.readMode = cudaReadModeElementType;
    textureDesc.normalizedCoords = false;

    cudaTextureObject_t texture = 0;
    cudaCheckErrors(cudaCreateTextureObject(&texture, &resourceDesc, &textureDesc, nullptr));

    print<<<1, 1>>>(texture);

    cudaCheckErrors(cudaDeviceSynchronize());

    cudaCheckErrors(cudaDestroyTextureObject(texture));
    cudaCheckErrors(cudaFree(twochannel_data));
    cudaCheckErrors(cudaFree(source_data));

    return 0;
}
	// inspired by https://devtalk.nvidia.com/default/topic/419190/texture-fetching-for-double-precision-floats/
	#include <cuda.h>
	#include <cuda_runtime_api.h>
	#include <cstdio>

	#define cudaCheckErrors(ans) { gpuAssert((ans), __FILE__, __LINE__); }
	inline void gpuAssert(cudaError_t code, const char *file, int line)
	{
	if (code != cudaSuccess)
	{
	fprintf(stderr, "%s %s %d\n", cudaGetErrorString(code), file, line);
	exit(code);
	}
	}

	__global__ void split_into_channels(int* dest, double* src, int width, int height)
	{
	for (int j=0; j<height; j++)
	{
	for (int i=0; i<width; i++)
	{
	double value = src[j*width + i];
	dest[jwidth2 + 2*i] = __double2hiint(value);
	dest[jwidth2 + (2*i)+1] = __double2loint(value);
	}
	}
	}

	__global__ void print(cudaTextureObject_t t)
	{
	for (int j=0; j<8; j++)
	{
	for (int i=0; i<8; i++)
	{
	int2 v = tex1Dfetch<int2>(t, j*8 + i);
	double value = __hiloint2double(v.x, v.y);

	printf("GPU: %i %i %lf\n", i, j, value);
	}
	}
	}

	int main()
	{
	const int width = 8;
	const int height = 8;

	double* source_data;
	cudaCheckErrors(cudaMallocManaged(&source_data, widthheightsizeof(double)));;
	source_data[0*width + 0] = 1.3;
	source_data[0*width + 1] = 2.3;
	source_data[1*width + 0] = 3.3;
	source_data[2*width + 0] = 4.3;

	int* twochannel_data;
	cudaCheckErrors(cudaMallocManaged(&twochannel_data, 2widthheight*sizeof(int)));
	split_into_channels<<<1, 1>>>(twochannel_data, source_data, width, height);
	cudaCheckErrors(cudaDeviceSynchronize());

	struct cudaResourceDesc resourceDesc;
	memset(&resourceDesc, 0, sizeof(resourceDesc));
	resourceDesc.resType = cudaResourceTypeLinear;
	resourceDesc.res.linear.devPtr = twochannel_data;
	resourceDesc.res.linear.desc.f = cudaChannelFormatKindSigned;
	resourceDesc.res.linear.desc.x = 32;
	resourceDesc.res.linear.desc.y = 32;
	resourceDesc.res.linear.sizeInBytes = 2widthheight*sizeof(int);

	struct cudaTextureDesc textureDesc;
	memset(&textureDesc, 0, sizeof(textureDesc));
	textureDesc.addressMode[0] = cudaAddressModeClamp;
	textureDesc.addressMode[1] = cudaAddressModeClamp;
	textureDesc.filterMode = cudaFilterModePoint;
	textureDesc.readMode = cudaReadModeElementType;
	textureDesc.normalizedCoords = false;

	cudaTextureObject_t texture = 0;
	cudaCheckErrors(cudaCreateTextureObject(&texture, &resourceDesc, &textureDesc, nullptr));

	print<<<1, 1>>>(texture);

	cudaCheckErrors(cudaDeviceSynchronize());

	cudaCheckErrors(cudaDestroyTextureObject(texture));
	cudaCheckErrors(cudaFree(twochannel_data));
	cudaCheckErrors(cudaFree(source_data));

	return 0;
	}