juniorprincewang/test_cudaLaunchKernel.cu

## test_cudaLaunchKernel.cu
// Copyright (c) 2019, NVIDIA Corporation. All rights reserved.
//
// This work is made available under the Nvidia Source Code License-NC.
// To view a copy of this license, visit
// https://nvlabs.github.io/stylegan2/license.html
// From https://github.com/NVlabs/stylegan2/blob/master/test_nvcc.cu

#include <cstdio>

void checkCudaError(cudaError_t err)
{
    if (err != cudaSuccess)
    {
        printf("%s: %s\n", cudaGetErrorName(err), cudaGetErrorString(err));
        exit(1);
    }
}

__global__ void cudaKernel(void)
{
    printf("GPU says hello.\n");
}

int main(void)
{
    printf("CPU says hello.\n");
    checkCudaError(cudaLaunchKernel((void*)cudaKernel, 1, 1, NULL, 0, NULL));
    checkCudaError(cudaDeviceSynchronize());
    return 0;
}

## test_cudalaunchkernel_params.cu
#include <cuda.h>
#include <stdio.h>

#define CHECK(call) { \
	cudaError_t err; \
	if ( (err = (call)) != cudaSuccess) { \
		fprintf(stderr, "Got error %s at %s:%d\n", cudaGetErrorString(err), \
				__FILE__, __LINE__); \
		exit(1); \
	} \
}

__global__ void kernel(float *g_data, float value)
{
    int idx = blockIdx.x * blockDim.x + threadIdx.x;
    g_data[idx] = g_data[idx] + value;
}

int checkResult(float *data, const int n, const float x)
{
    for (int i = 0; i < n; i++)
    {
        if (data[i] != x)
        {
            printf("Error! data[%d] = %f, ref = %f\n", i, data[i], x);
            return 0;
        }
    }

    return 1;
}

__global__ void cudaKernel(void)
{
    printf("GPU says hello!\n");
}


int main()
{
	int devID=1;
	int count = 0;
	struct cudaDeviceProp props;
	float *d_a=0;
	float *h_a=0;
	dim3 block, grid;
	int num = 1 << 22;
    int nbytes = num * sizeof(float);
    float value=41;

	devID = 0;
	CHECK(cudaSetDevice(devID));
	CHECK(cudaGetDeviceCount(&count));
	printf("cuda count=%d\n", count);
	CHECK(cudaGetDeviceProperties(&props, devID));
	printf("Device %d: \"%s\" with Compute %d.%d capability\n",devID, props.name, props.major, props.minor);

	h_a=(float*)malloc(nbytes);
	memset(h_a, 0, nbytes);
	CHECK(cudaMalloc((void**)&d_a, nbytes));
	CHECK(cudaMemset(d_a, 0, nbytes));
	// set kernel launch configuration
    block = dim3(32,1,1);
    grid  = dim3((num + block.x - 1) / block.x);
	CHECK(cudaMemcpy(d_a, h_a, nbytes, cudaMemcpyHostToDevice));

	// cudaKernel<<<1, 1>>>();
	CHECK(cudaLaunchKernel((void*)cudaKernel, 1, 1, NULL, 0, NULL));

	// kernel<<<grid, block>>>(d_a, value);
	void *args[] = {&d_a, &value};
	CHECK(cudaLaunchKernel((void*)kernel, grid, block, args, 0, NULL));
	CHECK(cudaMemcpy(h_a, d_a, nbytes, cudaMemcpyDeviceToHost));

 	bool bFinalResults = (bool) checkResult(h_a, num, value);
	printf("result:%s\n", bFinalResults? "PASS" : "FAILED");
	CHECK(cudaFree(d_a));
	free(h_a);
	return EXIT_SUCCESS;
}
	// Copyright (c) 2019, NVIDIA Corporation. All rights reserved.
	//
	// This work is made available under the Nvidia Source Code License-NC.
	// To view a copy of this license, visit
	// https://nvlabs.github.io/stylegan2/license.html
	// From https://github.com/NVlabs/stylegan2/blob/master/test_nvcc.cu

	#include <cstdio>

	void checkCudaError(cudaError_t err)
	{
	if (err != cudaSuccess)
	{
	printf("%s: %s\n", cudaGetErrorName(err), cudaGetErrorString(err));
	exit(1);
	}
	}

	__global__ void cudaKernel(void)
	{
	printf("GPU says hello.\n");
	}

	int main(void)
	{
	printf("CPU says hello.\n");
	checkCudaError(cudaLaunchKernel((void*)cudaKernel, 1, 1, NULL, 0, NULL));
	checkCudaError(cudaDeviceSynchronize());
	return 0;
	}
	#include <cuda.h>
	#include <stdio.h>

	#define CHECK(call) { \
	cudaError_t err; \
	if ( (err = (call)) != cudaSuccess) { \
	fprintf(stderr, "Got error %s at %s:%d\n", cudaGetErrorString(err), \
	__FILE__, __LINE__); \
	exit(1); \
	} \
	}

	__global__ void kernel(float *g_data, float value)
	{
	int idx = blockIdx.x * blockDim.x + threadIdx.x;
	g_data[idx] = g_data[idx] + value;
	}

	int checkResult(float *data, const int n, const float x)
	{
	for (int i = 0; i < n; i++)
	{
	if (data[i] != x)
	{
	printf("Error! data[%d] = %f, ref = %f\n", i, data[i], x);
	return 0;
	}
	}

	return 1;
	}

	__global__ void cudaKernel(void)
	{
	printf("GPU says hello!\n");
	}


	int main()
	{
	int devID=1;
	int count = 0;
	struct cudaDeviceProp props;
	float *d_a=0;
	float *h_a=0;
	dim3 block, grid;
	int num = 1 << 22;
	int nbytes = num * sizeof(float);
	float value=41;

	devID = 0;
	CHECK(cudaSetDevice(devID));
	CHECK(cudaGetDeviceCount(&count));
	printf("cuda count=%d\n", count);
	CHECK(cudaGetDeviceProperties(&props, devID));
	printf("Device %d: \"%s\" with Compute %d.%d capability\n",devID, props.name, props.major, props.minor);

	h_a=(float*)malloc(nbytes);
	memset(h_a, 0, nbytes);
	CHECK(cudaMalloc((void**)&d_a, nbytes));
	CHECK(cudaMemset(d_a, 0, nbytes));
	// set kernel launch configuration
	block = dim3(32,1,1);
	grid = dim3((num + block.x - 1) / block.x);
	CHECK(cudaMemcpy(d_a, h_a, nbytes, cudaMemcpyHostToDevice));

	// cudaKernel<<<1, 1>>>();
	CHECK(cudaLaunchKernel((void*)cudaKernel, 1, 1, NULL, 0, NULL));

	// kernel<<<grid, block>>>(d_a, value);
	void *args[] = {&d_a, &value};
	CHECK(cudaLaunchKernel((void*)kernel, grid, block, args, 0, NULL));
	CHECK(cudaMemcpy(h_a, d_a, nbytes, cudaMemcpyDeviceToHost));

	bool bFinalResults = (bool) checkResult(h_a, num, value);
	printf("result:%s\n", bFinalResults? "PASS" : "FAILED");
	CHECK(cudaFree(d_a));
	free(h_a);
	return EXIT_SUCCESS;
	}