Created
July 23, 2023 06:42
-
-
Save cgmb/a5feea2fbd5cb7b7142fdc3f8cb0282f to your computer and use it in GitHub Desktop.
HIP Graph Host-to-Host Copy Bug
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
From b566e2bdb3a6d088d6155393afe31c4b5b617dc8 Mon Sep 17 00:00:00 2001 | |
From: Jaydeep Patel <jaydeepkumar.patel@amd.com> | |
Date: Mon, 2 Jan 2023 13:08:13 +0000 | |
Subject: [PATCH] SWDEV-375366 - SWDEV-375351 - Handle HtoH case for graph mem | |
cpy impl. | |
Change-Id: I5a8c3c3c22db045f714b0443b8d70a8c6b4a8cea | |
--- | |
src/hip_graph_helper.hpp | 4 +++ | |
src/hip_graph_internal.hpp | 30 ++++++++++++++++++++--- | |
src/hip_memory.cpp | 50 +++++++++++++++++++++++++++++--------- | |
3 files changed, 70 insertions(+), 14 deletions(-) | |
diff --git a/src/hip_graph_helper.hpp b/src/hip_graph_helper.hpp | |
index 4af57c69..69780338 100644 | |
--- a/src/hip_graph_helper.hpp | |
+++ b/src/hip_graph_helper.hpp | |
@@ -7,6 +7,10 @@ hipError_t ihipMemcpy_validate(void* dst, const void* src, size_t sizeBytes, hip | |
hipError_t ihipMemcpyCommand(amd::Command*& command, void* dst, const void* src, size_t sizeBytes, | |
hipMemcpyKind kind, amd::HostQueue& queue, bool isAsync = false); | |
+void ihipHtoHMemcpy(void* dst, const void* src, size_t sizeBytes, amd::HostQueue& queue); | |
+ | |
+bool IsHtoHMemcpy(void* dst, const void* src, hipMemcpyKind kind); | |
+ | |
hipError_t ihipLaunchKernel_validate(hipFunction_t f, uint32_t globalWorkSizeX, | |
uint32_t globalWorkSizeY, uint32_t globalWorkSizeZ, | |
uint32_t blockDimX, uint32_t blockDimY, uint32_t blockDimZ, | |
diff --git a/src/hip_graph_internal.hpp b/src/hip_graph_internal.hpp | |
index ec016d1f..65ccfdc3 100644 | |
--- a/src/hip_graph_internal.hpp | |
+++ b/src/hip_graph_internal.hpp | |
@@ -1037,6 +1037,9 @@ class hipGraphMemcpyNode : public hipGraphNode { | |
} | |
hipError_t CreateCommand(amd::HostQueue* queue) { | |
+ if (IsHtoHMemcpy(pCopyParams_->dstPtr.ptr, pCopyParams_->srcPtr.ptr, pCopyParams_->kind)) { | |
+ return hipSuccess; | |
+ } | |
hipError_t status = hipGraphNode::CreateCommand(queue); | |
if (status != hipSuccess) { | |
return status; | |
@@ -1048,6 +1051,16 @@ class hipGraphMemcpyNode : public hipGraphNode { | |
return status; | |
} | |
+ void EnqueueCommands(hipStream_t stream) override { | |
+ if (isEnabled_ && IsHtoHMemcpy(pCopyParams_->dstPtr.ptr, pCopyParams_->srcPtr.ptr, pCopyParams_->kind)) { | |
+ ihipHtoHMemcpy(pCopyParams_->dstPtr.ptr, pCopyParams_->srcPtr.ptr, | |
+ pCopyParams_->extent.width * pCopyParams_->extent.height * | |
+ pCopyParams_->extent.depth, *hip::getQueue(stream)); | |
+ return; | |
+ } | |
+ hipGraphNode::EnqueueCommands(stream); | |
+ } | |
+ | |
void GetParams(hipMemcpy3DParms* params) { | |
std::memcpy(params, pCopyParams_, sizeof(hipMemcpy3DParms)); | |
} | |
@@ -1170,6 +1183,9 @@ class hipGraphMemcpyNode1D : public hipGraphNode { | |
} | |
virtual hipError_t CreateCommand(amd::HostQueue* queue) { | |
+ if (IsHtoHMemcpy(dst_, src_, kind_)) { | |
+ return hipSuccess; | |
+ } | |
hipError_t status = hipGraphNode::CreateCommand(queue); | |
if (status != hipSuccess) { | |
return status; | |
@@ -1182,10 +1198,18 @@ class hipGraphMemcpyNode1D : public hipGraphNode { | |
} | |
void EnqueueCommands(hipStream_t stream) { | |
- if (commands_.empty()) return; | |
- // commands_ should have just 1 item | |
- assert(commands_.size() == 1 && "Invalid command size in hipGraphMemcpyNode1D"); | |
+ bool isH2H = IsHtoHMemcpy(dst_, src_, kind_); | |
+ if (!isH2H) { | |
+ if (commands_.empty()) return; | |
+ // commands_ should have just 1 item | |
+ assert(commands_.size() == 1 && "Invalid command size in hipGraphMemcpyNode1D"); | |
+ } | |
if (isEnabled_) { | |
+ //HtoH | |
+ if (isH2H) { | |
+ ihipHtoHMemcpy(dst_, src_, count_, *hip::getQueue(stream)); | |
+ return; | |
+ } | |
amd::Command* command = commands_[0]; | |
amd::HostQueue* cmdQueue = command->queue(); | |
amd::HostQueue* queue = hip::getQueue(stream); | |
diff --git a/src/hip_memory.cpp b/src/hip_memory.cpp | |
index e333dce8..37a53e26 100644 | |
--- a/src/hip_memory.cpp | |
+++ b/src/hip_memory.cpp | |
@@ -321,7 +321,18 @@ hipError_t ihipMalloc(void** ptr, size_t sizeBytes, unsigned int flags) | |
memObj->getUserData().deviceId = hip::getCurrentDevice()->deviceId(); | |
return hipSuccess; | |
} | |
- | |
+bool IsHtoHMemcpyValid(void* dst, const void* src, hipMemcpyKind kind) { | |
+ size_t sOffset = 0; | |
+ amd::Memory* srcMemory = getMemoryObject(src, sOffset); | |
+ size_t dOffset = 0; | |
+ amd::Memory* dstMemory = getMemoryObject(dst, dOffset); | |
+ if (src && dst && srcMemory == nullptr && dstMemory == nullptr) { | |
+ if (kind != hipMemcpyHostToHost && kind != hipMemcpyDefault) { | |
+ return false; | |
+ } | |
+ } | |
+ return true; | |
+} | |
hipError_t ihipMemcpy_validate(void* dst, const void* src, size_t sizeBytes, | |
hipMemcpyKind kind) { | |
if (dst == nullptr || src == nullptr) { | |
@@ -337,6 +348,10 @@ hipError_t ihipMemcpy_validate(void* dst, const void* src, size_t sizeBytes, | |
(srcMemory && sizeBytes > (srcMemory->getSize() - sOffset))) { | |
return hipErrorInvalidValue; | |
} | |
+ //If src and dst ptr are null then kind must be either h2h or def. | |
+ if (!IsHtoHMemcpyValid(dst, src, kind)) { | |
+ return hipErrorInvalidValue; | |
+ } | |
return hipSuccess; | |
} | |
@@ -431,7 +446,22 @@ hipError_t ihipMemcpyCommand(amd::Command*& command, void* dst, const void* src, | |
} | |
return hipSuccess; | |
} | |
- | |
+bool IsHtoHMemcpy(void* dst, const void* src, hipMemcpyKind kind) { | |
+ size_t sOffset = 0; | |
+ amd::Memory* srcMemory = getMemoryObject(src, sOffset); | |
+ size_t dOffset = 0; | |
+ amd::Memory* dstMemory = getMemoryObject(dst, dOffset); | |
+ if (srcMemory == nullptr && dstMemory == nullptr) { | |
+ if (kind == hipMemcpyHostToHost || kind == hipMemcpyDefault) { | |
+ return true; | |
+ } | |
+ } | |
+ return false; | |
+} | |
+void ihipHtoHMemcpy(void* dst, const void* src, size_t sizeBytes, amd::HostQueue& queue) { | |
+ queue.finish(); | |
+ memcpy(dst, src, sizeBytes); | |
+} | |
// ================================================================================================ | |
hipError_t ihipMemcpy(void* dst, const void* src, size_t sizeBytes, hipMemcpyKind kind, | |
amd::HostQueue& queue, bool isAsync = false) { | |
@@ -451,14 +481,9 @@ hipError_t ihipMemcpy(void* dst, const void* src, size_t sizeBytes, hipMemcpyKin | |
amd::Memory* srcMemory = getMemoryObject(src, sOffset); | |
size_t dOffset = 0; | |
amd::Memory* dstMemory = getMemoryObject(dst, dOffset); | |
- if ((srcMemory == nullptr) && (dstMemory == nullptr)) { | |
- if ((kind == hipMemcpyHostToHost) || (kind == hipMemcpyDefault)) { | |
- queue.finish(); | |
- memcpy(dst, src, sizeBytes); | |
- return hipSuccess; | |
- } else { | |
- return hipErrorInvalidValue; | |
- } | |
+ if (srcMemory == nullptr && dstMemory == nullptr) { | |
+ ihipHtoHMemcpy(dst, src, sizeBytes, queue); | |
+ return hipSuccess; | |
} else if ((srcMemory == nullptr) && (dstMemory != nullptr)) { | |
isAsync = false; | |
} else if ((srcMemory != nullptr) && (dstMemory == nullptr)) { | |
@@ -2631,7 +2656,10 @@ hipError_t ihipMemcpy3D_validate(const hipMemcpy3DParms* p) { | |
if (p->kind < hipMemcpyHostToHost || p->kind > hipMemcpyDefault) { | |
return hipErrorInvalidMemcpyDirection; | |
} | |
- | |
+ //If src and dst ptr are null then kind must be either h2h or def. | |
+ if (!IsHtoHMemcpyValid(p->dstPtr.ptr, p->srcPtr.ptr, p->kind)) { | |
+ return hipErrorInvalidValue; | |
+ } | |
return hipSuccess; | |
} | |
https://github.com/ROCm-Developer-Tools/hipamd/commit/b566e2bdb3a6d088d6155393afe31c4b5b617dc8#diff-29a39aabc36bb48630574d8630dec58fc464c753abf755043766c620bcc71e5d |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#include <iostream> | |
#include "hip/hip_runtime.h" | |
#define HIP_CHECK(error) \ | |
{ \ | |
hipError_t localError = error; \ | |
if ((localError != hipSuccess) && (localError != hipErrorPeerAccessAlreadyEnabled)) { \ | |
printf("error: '%s'(%d) from %s at %s:%d\n", hipGetErrorString(localError), \ | |
localError, #error, __FUNCTION__, __LINE__); \ | |
exit(0);\ | |
} \ | |
} | |
int main() { | |
size_t size = 1024; | |
int *A_h{nullptr}, *B_h{nullptr}; | |
size_t numBytes{size * sizeof(int)}; | |
A_h = reinterpret_cast<int*>(malloc(sizeof(int)*size)); | |
B_h = reinterpret_cast<int*>(malloc(sizeof(int)*size)); | |
// Initilize the host | |
for(size_t i = 0; i < size; i++) { | |
A_h[i] = i; | |
B_h[i] = 0; | |
} | |
hipGraph_t graph; | |
hipStream_t streamForGraph; | |
hipGraphExec_t graphExec; | |
hipGraphNode_t memcpyH2H; | |
HIP_CHECK(hipGraphCreate(&graph, 0)); | |
HIP_CHECK(hipStreamCreate(&streamForGraph)); | |
// Host to Host | |
HIP_CHECK(hipGraphAddMemcpyNode1D(&memcpyH2H, graph, nullptr, 0, | |
B_h, A_h, numBytes, hipMemcpyHostToHost)); | |
// Instantiate and launch the graph | |
HIP_CHECK(hipGraphInstantiate(&graphExec, graph, nullptr, nullptr, 0)); | |
HIP_CHECK(hipGraphLaunch(graphExec, streamForGraph)); | |
HIP_CHECK(hipStreamSynchronize(streamForGraph)); | |
HIP_CHECK(hipGraphExecDestroy(graphExec)); | |
HIP_CHECK(hipGraphDestroy(graph)); | |
HIP_CHECK(hipStreamDestroy(streamForGraph)); | |
// Validation | |
for (size_t i = 0; i < size; i++) { | |
if( A_h[i] == B_h[i]) { | |
printf("Test Passed\n"); | |
} | |
else { | |
printf("Test Failed\n"); | |
} | |
} | |
free(A_h); | |
free(B_h); | |
return 0; | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment