Skip to content

Instantly share code, notes, and snippets.

@msharmavikram
Last active September 9, 2020 03:02
Show Gist options
  • Save msharmavikram/24a59be830c51e6208aeaa51a075cd30 to your computer and use it in GitHub Desktop.
Save msharmavikram/24a59be830c51e6208aeaa51a075cd30 to your computer and use it in GitHub Desktop.
dragon_test - failed case
/**
* Copyright 1993-2015 NVIDIA Corporation. All rights reserved.
*
* Please refer to the NVIDIA end user license agreement (EULA) associated
* with this source code for terms and conditions that govern your use of
* this software. Any use, reproduction, disclosure, or distribution of
* this software and related documentation outside the terms of the EULA
* is strictly prohibited.
*
*/
#include <stdio.h>
#include <stdint.h>
#include <stdlib.h>
#include <unistd.h>
#include <sys/types.h>
#include <sys/stat.h>
#include <fcntl.h>
#include <sys/time.h>
// For the CUDA runtime routines (prefixed with "cuda_")
#include <cuda_runtime.h>
#include <helper_cuda.h>
#include <dragon.h>
#define CUDA_CALL_SAFE(f) \
do \
{ \
cudaError_t _cuda_error = f; \
if (_cuda_error != cudaSuccess) \
{ \
fprintf(stderr, \
"%s, %d, CUDA ERROR: %s %s\n", \
__FILE__, \
__LINE__, \
cudaGetErrorName(_cuda_error), \
cudaGetErrorString(_cuda_error) \
); \
abort(); \
return EXIT_FAILURE; \
} \
} while (0)
double time_diff(struct timeval tv_start, struct timeval tv_stop)
{
return (double)(tv_stop.tv_sec - tv_start.tv_sec) * 1000.0 + (double)(tv_stop.tv_usec - tv_start.tv_usec) / 1000.0;
}
/**
* CUDA Kernel Device code
*
* Computes the vector addition of A and B into C. The 3 vectors have the same
* number of elements numElements.
*/
__global__ void
dragon_test(unsigned int *C)
{
unsigned long i = (unsigned long)blockDim.x * (unsigned long)blockIdx.x + (unsigned long)threadIdx.x;
if (i == 0)
{
C[i] = 1;
printf("Write 1\n");
}
}
/**
* Host main routine
*/
int main(int argc, char *argv[])
{
// Error code to check return values for CUDA calls
cudaError_t err = cudaSuccess;
unsigned int *g_C= NULL;
unsigned long size = 128;
char filepath[] = "/home/vsm2/dragon/examples/dragon_test/data/new.mem";
if (dragon_map(filepath, size, D_F_CREATE | D_F_WRITE , (void **) &g_C) != D_OK)
{
fprintf(stderr, "Cannot do dragon_map %s\n", filepath);
exit(EXIT_FAILURE);
}
long blocksPerGrid = 1;
long threads_per_block = 1;
printf("CUDA kernel launch with %d blocks of %d threads\n", blocksPerGrid, threads_per_block);
dragon_test<<<blocksPerGrid, threads_per_block>>>(g_C);
CUDA_CALL_SAFE(cudaDeviceSynchronize());
printf("Kenrel completed\n");
fflush(stdout);
if (dragon_unmap(g_C) != D_OK)
{
fprintf(stderr, "Cannot do dragon_unmap g_C\n");
exit(EXIT_FAILURE);
}
printf("Run success\n");
return 0;
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment