Skip to content

Instantly share code, notes, and snippets.

@youkaichao
Created June 4, 2024 04:15
Show Gist options
  • Save youkaichao/d96433deea23fbbcfce479ba0c1f73eb to your computer and use it in GitHub Desktop.
Save youkaichao/d96433deea23fbbcfce479ba0c1f73eb to your computer and use it in GitHub Desktop.
wait kernel, gpu waits for cpu signal to continue
#include <cstdio>
#include <iostream>
#include <cuda_runtime.h>
__global__ void waitKernel(volatile bool *flag) {
// Busy-wait loop
while (!*flag) {
// The use of volatile ensures that the GPU fetches the flag value from memory each time
// This is necessary because without volatile, the compiler might optimize the memory read
__threadfence_system(); // Optional for system-wide memory coherence
}
printf("will finish!\n");
}
int main() {
bool *d_flag;
// Allocate managed memory
cudaMallocManaged(&d_flag, sizeof(bool));
// Initialize flag to false
*d_flag = false;
// Launch the kernel
waitKernel<<<1, 1>>>((volatile bool *)d_flag);
// Change the flag on the host after some condition or delay
std::cout << "Kernel is waiting. Press enter to continue..." << std::endl;
std::cin.get();
// Set the flag to true to stop the GPU busy-wait loop
*d_flag = true;
// Wait for the kernel to finish
cudaDeviceSynchronize();
// Free the managed memory
cudaFree(d_flag);
return 0;
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment