-
-
Save Dounm/bafb9d42feb18fd697df7103537e0b2b to your computer and use it in GitHub Desktop.
Test if cudaMemcpyAsync(H2D, different_streams) is sequential
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#include <stdio.h> | |
#define NUM_STREAMS 4 | |
cudaError_t memcpyUsingStreams (float *fDest, | |
float *fSrc, | |
int iBytes, | |
cudaMemcpyKind eDirection) | |
{ | |
cudaStream_t *pCuStream = NULL; | |
int iIndex = 0 ; | |
cudaError_t cuError = cudaSuccess ; | |
int iOffset = 0 ; | |
iOffset = (iBytes / NUM_STREAMS) ; | |
/*Creating streams if not present */ | |
pCuStream = (cudaStream_t *) malloc(NUM_STREAMS * sizeof(cudaStream_t)); | |
for (iIndex = 0 ; iIndex < NUM_STREAMS; iIndex++) | |
{ | |
cuError = cudaStreamCreate (&pCuStream[iIndex]) ; | |
} | |
if (cuError != cudaSuccess) | |
{ | |
cuError = cudaMemcpy (fDest, fSrc, iBytes, eDirection) ; | |
} | |
else | |
{ | |
for (iIndex = 0 ; iIndex < NUM_STREAMS; iIndex++) | |
{ | |
iOffset = iIndex * iOffset ; | |
cuError = cudaMemcpyAsync (fDest + iOffset , fSrc + iOffset, iBytes / NUM_STREAMS , eDirection, pCuStream[iIndex]) ; | |
} | |
} | |
if (NULL != pCuStream) | |
{ | |
for (iIndex = 0 ; iIndex < NUM_STREAMS; iIndex++) | |
{ | |
cuError = cudaStreamDestroy (pCuStream[iIndex]) ; | |
} | |
free (pCuStream) ; | |
} | |
return cuError ; | |
} | |
int main() | |
{ | |
float *hdata = NULL ; | |
float *ddata = NULL ; | |
int i, j, k, index ; | |
const int bytes = 256 * 256 * 256 * 16; | |
cudaMallocHost ((void **) &hdata, sizeof (float) * bytes) ; | |
cudaMalloc ((void **) &ddata, sizeof (float) * bytes) ; | |
for (i=0 ; i< 256 ; i++) | |
{ | |
for (j=0; j< 256; j++) | |
{ | |
for (k=0; k< 256 ; k++) | |
{ | |
index = (((i * 256) + j) * 256) + k; | |
hdata [index] = index ; | |
} | |
} | |
} | |
memcpyUsingStreams (ddata, hdata, sizeof (float) * bytes, cudaMemcpyHostToDevice) ; | |
cudaFree (ddata) ; | |
cudaFreeHost (hdata) ; | |
return 0; | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment