msharmavikram/dragon_test.cu

## dragon_test.cu
/**
 * Copyright 1993-2015 NVIDIA Corporation.  All rights reserved.
 *
 * Please refer to the NVIDIA end user license agreement (EULA) associated
 * with this source code for terms and conditions that govern your use of
 * this software. Any use, reproduction, disclosure, or distribution of
 * this software and related documentation outside the terms of the EULA
 * is strictly prohibited.
 *
 */

#include <stdio.h>
#include <stdint.h>
#include <stdlib.h>
#include <unistd.h>
#include <sys/types.h>
#include <sys/stat.h>
#include <fcntl.h>
#include <sys/time.h>

// For the CUDA runtime routines (prefixed with "cuda_")
#include <cuda_runtime.h>

#include <helper_cuda.h>

#include <dragon.h>

#define CUDA_CALL_SAFE(f) \
    do \
    {                                                        \
        cudaError_t _cuda_error = f;                         \
        if (_cuda_error != cudaSuccess)                      \
        {                                                    \
            fprintf(stderr,  \
                "%s, %d, CUDA ERROR: %s %s\n",  \
                __FILE__,   \
                __LINE__,   \
                cudaGetErrorName(_cuda_error),  \
                cudaGetErrorString(_cuda_error) \
            ); \
            abort(); \
            return EXIT_FAILURE; \
        } \
    } while (0)

double time_diff(struct timeval tv_start, struct timeval tv_stop)
{
    return (double)(tv_stop.tv_sec - tv_start.tv_sec) * 1000.0 + (double)(tv_stop.tv_usec - tv_start.tv_usec) / 1000.0;
}
/**
 * CUDA Kernel Device code
 *
 * Computes the vector addition of A and B into C. The 3 vectors have the same
 * number of elements numElements.
 */
__global__ void
dragon_test(unsigned int *C)
{
    unsigned long i = (unsigned long)blockDim.x * (unsigned long)blockIdx.x + (unsigned long)threadIdx.x;

    if (i == 0)
    {
        C[i] = 1;
        printf("Write 1\n");
    }
}

/**
 * Host main routine
 */
int main(int argc, char *argv[])
{
    // Error code to check return values for CUDA calls
    cudaError_t err = cudaSuccess;
    unsigned int *g_C= NULL;
    unsigned long size = 128;

    char filepath[] = "/home/vsm2/dragon/examples/dragon_test/data/new.mem";


    if (dragon_map(filepath, size, D_F_CREATE | D_F_WRITE , (void **) &g_C) != D_OK)
    {
        fprintf(stderr, "Cannot do dragon_map %s\n", filepath);
        exit(EXIT_FAILURE);
    }


    long blocksPerGrid = 1;
    long threads_per_block = 1;
    printf("CUDA kernel launch with %d blocks of %d threads\n", blocksPerGrid, threads_per_block);
    dragon_test<<<blocksPerGrid, threads_per_block>>>(g_C);
    CUDA_CALL_SAFE(cudaDeviceSynchronize());

    printf("Kenrel completed\n");
    fflush(stdout);

    if (dragon_unmap(g_C) != D_OK)
    {
        fprintf(stderr, "Cannot do dragon_unmap g_C\n");
        exit(EXIT_FAILURE);
    }

    printf("Run success\n");
    return 0;
}
	/**
	* Copyright 1993-2015 NVIDIA Corporation. All rights reserved.
	*
	* Please refer to the NVIDIA end user license agreement (EULA) associated
	* with this source code for terms and conditions that govern your use of
	* this software. Any use, reproduction, disclosure, or distribution of
	* this software and related documentation outside the terms of the EULA
	* is strictly prohibited.
	*
	*/

	#include <stdio.h>
	#include <stdint.h>
	#include <stdlib.h>
	#include <unistd.h>
	#include <sys/types.h>
	#include <sys/stat.h>
	#include <fcntl.h>
	#include <sys/time.h>

	// For the CUDA runtime routines (prefixed with "cuda_")
	#include <cuda_runtime.h>

	#include <helper_cuda.h>

	#include <dragon.h>

	#define CUDA_CALL_SAFE(f) \
	do \
	{ \
	cudaError_t _cuda_error = f; \
	if (_cuda_error != cudaSuccess) \
	{ \
	fprintf(stderr, \
	"%s, %d, CUDA ERROR: %s %s\n", \
	__FILE__, \
	__LINE__, \
	cudaGetErrorName(_cuda_error), \
	cudaGetErrorString(_cuda_error) \
	); \
	abort(); \
	return EXIT_FAILURE; \
	} \
	} while (0)

	double time_diff(struct timeval tv_start, struct timeval tv_stop)
	{
	return (double)(tv_stop.tv_sec - tv_start.tv_sec) * 1000.0 + (double)(tv_stop.tv_usec - tv_start.tv_usec) / 1000.0;
	}
	/**
	* CUDA Kernel Device code
	*
	* Computes the vector addition of A and B into C. The 3 vectors have the same
	* number of elements numElements.
	*/
	__global__ void
	dragon_test(unsigned int *C)
	{
	unsigned long i = (unsigned long)blockDim.x * (unsigned long)blockIdx.x + (unsigned long)threadIdx.x;

	if (i == 0)
	{
	C[i] = 1;
	printf("Write 1\n");
	}
	}

	/**
	* Host main routine
	*/
	int main(int argc, char *argv[])
	{
	// Error code to check return values for CUDA calls
	cudaError_t err = cudaSuccess;
	unsigned int *g_C= NULL;
	unsigned long size = 128;

	char filepath[] = "/home/vsm2/dragon/examples/dragon_test/data/new.mem";


	if (dragon_map(filepath, size, D_F_CREATE \| D_F_WRITE , (void **) &g_C) != D_OK)
	{
	fprintf(stderr, "Cannot do dragon_map %s\n", filepath);
	exit(EXIT_FAILURE);
	}


	long blocksPerGrid = 1;
	long threads_per_block = 1;
	printf("CUDA kernel launch with %d blocks of %d threads\n", blocksPerGrid, threads_per_block);
	dragon_test<<<blocksPerGrid, threads_per_block>>>(g_C);
	CUDA_CALL_SAFE(cudaDeviceSynchronize());

	printf("Kenrel completed\n");
	fflush(stdout);

	if (dragon_unmap(g_C) != D_OK)
	{
	fprintf(stderr, "Cannot do dragon_unmap g_C\n");
	exit(EXIT_FAILURE);
	}

	printf("Run success\n");
	return 0;
	}