Created
March 14, 2019 16:01
-
-
Save maleadt/2c488b3458fdfa0d599b0348217c6326 to your computer and use it in GitHub Desktop.
cuda-gdb WIPs
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#include "cuda.h" | |
#include "cudadebugger.h" | |
#include <iostream> | |
#include <pthread.h> | |
#include <signal.h> | |
// helpers | |
CUDBGAPI cudbgAPI; | |
void exit_safely(int code) { | |
cudbgAPI->finalize(); | |
exit(code); | |
} | |
#define cudbgCheck(ans) \ | |
{ __cudbgCheck((ans), __FILE__, __LINE__); } | |
inline void __cudbgCheck(CUDBGResult res, const char *file, int line) { | |
if (res != CUDBG_SUCCESS) { | |
std::cerr << "ERROR: " << cudbgGetErrorString(res) << " at " << file << ":" | |
<< line << std::endl; | |
exit_safely(EXIT_FAILURE); | |
} | |
} | |
// event handling | |
pthread_cond_t event_cond = PTHREAD_COND_INITIALIZER; | |
pthread_mutex_t event_lock = PTHREAD_MUTEX_INITIALIZER; | |
void event_callback(CUDBGEventCallbackData *data) { | |
std::cout << "Event notification" << std::endl; | |
pthread_cond_signal(&event_cond); | |
} | |
void handle_events() { | |
while (true) { | |
CUDBGEvent event; | |
CUDBGResult res = cudbgAPI->getNextEvent(CUDBG_EVENT_QUEUE_TYPE_SYNC, &event); | |
if (res == CUDBG_ERROR_NO_EVENT_AVAILABLE) { | |
break; | |
} else if (res != CUDBG_SUCCESS) { | |
std::cerr << "HANDLER ERROR: " << cudbgGetErrorString(res) << std::endl; | |
break; | |
} | |
std::cout << "Event: "; | |
switch (event.kind) { | |
case CUDBG_EVENT_INVALID: | |
std::cout << "CUDBG_EVENT_INVALID"; | |
break; | |
case CUDBG_EVENT_ELF_IMAGE_LOADED: | |
std::cout << "CUDBG_EVENT_ELF_IMAGE_LOADED"; | |
break; | |
case CUDBG_EVENT_KERNEL_READY: | |
std::cout << "CUDBG_EVENT_KERNEL_READY"; | |
break; | |
case CUDBG_EVENT_KERNEL_FINISHED: | |
std::cout << "CUDBG_EVENT_KERNEL_FINISHED"; | |
break; | |
case CUDBG_EVENT_INTERNAL_ERROR: | |
std::cout << "CUDBG_EVENT_INTERNAL_ERROR (" | |
<< cudbgGetErrorString(event.cases.internalError.errorType) | |
<< ")"; | |
break; | |
case CUDBG_EVENT_CTX_PUSH: | |
std::cout << "CUDBG_EVENT_CTX_PUSH"; | |
break; | |
case CUDBG_EVENT_CTX_POP: | |
std::cout << "CUDBG_EVENT_CTX_POP"; | |
break; | |
case CUDBG_EVENT_CTX_CREATE: | |
std::cout << "CUDBG_EVENT_CTX_CREATE"; | |
break; | |
case CUDBG_EVENT_CTX_DESTROY: | |
std::cout << "CUDBG_EVENT_CTX_DESTROY"; | |
break; | |
case CUDBG_EVENT_TIMEOUT: | |
std::cout << "CUDBG_EVENT_TIMEOUT"; | |
break; | |
case CUDBG_EVENT_ATTACH_COMPLETE: | |
std::cout << "CUDBG_EVENT_ATTACH_COMPLETE"; | |
break; | |
case CUDBG_EVENT_DETACH_COMPLETE: | |
std::cout << "CUDBG_EVENT_DETACH_COMPLETE"; | |
break; | |
case CUDBG_EVENT_ELF_IMAGE_UNLOADED: | |
std::cout << "CUDBG_EVENT_ELF_IMAGE_UNLOADED"; | |
break; | |
default: | |
std::cout << "unknown event"; | |
break; | |
} | |
std::cout << std::endl; | |
} | |
// TODO: we should probably acknowledge the sync events here; | |
// I think that's why I'm getting the timeout events. | |
} | |
void *event_handler(void *null) { | |
while (true) { | |
pthread_mutex_lock(&event_lock); | |
pthread_cond_wait(&event_cond, &event_lock); | |
handle_events(); | |
pthread_mutex_unlock(&event_lock); | |
} | |
} | |
// main | |
__global__ void kernel() { printf("Hello, World!\n"); } | |
int main(int argc, char const *argv[]) { | |
signal(SIGINT, exit_safely); | |
// gets the api | |
std::cout << "Initializing debug API" << std::endl; | |
uint32_t major, minor, rev; | |
cudbgCheck(cudbgGetAPIVersion(&major, &minor, &rev)); | |
cudbgCheck(cudbgGetAPI(major, minor, rev, &cudbgAPI)); | |
cudbgCheck(cudbgAPI->initialize()); | |
// starts thread to print out events | |
std::cout << "Starting event handler" << std::endl; | |
pthread_t mannage_event_thread; | |
pthread_create(&mannage_event_thread, NULL, event_handler, NULL); | |
cudbgCheck(cudbgAPI->setNotifyNewEventCallback(event_callback)); | |
// Causes the program to freeze | |
std::cout << "Launching kernel" << std::endl; | |
kernel<<<1, 1>>>(); | |
exit_safely(0); | |
return 0; | |
} | |
// this example does not work for unknown reasons, resulting in an "internal error (invalid | |
// context)" event that really does not make any sense. | |
// | |
// the subsequent timeouts are due to not acknowledging the sync events. |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
module CUDAdbg | |
using CUDAdrv, CUDAnative | |
using Printf | |
## base.jl | |
const CUDBGResult_t = Cint | |
struct CUDBGResult <: Exception | |
code::CUDBGResult_t | |
end | |
function api_version() | |
major_ref = Ref{UInt32}() | |
minor_ref = Ref{UInt32}() | |
patch_ref = Ref{UInt32}() | |
CUDAdrv.@apicall(:cudbgGetAPIVersion, (Ptr{UInt32}, Ptr{UInt32}, Ptr{UInt32}), major_ref, minor_ref, patch_ref) | |
return VersionNumber(major_ref[], minor_ref[], patch_ref[]) | |
end | |
struct CUDBGAPI | |
# Initialization | |
initialize::Ptr{Cvoid} # (void) | |
finalize::Ptr{Cvoid} # (void) | |
# Device Execution Control | |
suspendDevice::Ptr{Cvoid} # (uint32_t dev) | |
resumeDevice::Ptr{Cvoid} # (uint32_t dev) | |
singleStepWarp40::Ptr{Cvoid} # (uint32_t dev, uint32_t sm, uint32_t wp) | |
# Breakpoints | |
setBreakpoint31::Ptr{Cvoid} # (uint64_t addr) | |
unsetBreakpoint31::Ptr{Cvoid} # (uint64_t addr) | |
# Device State Inspection | |
readGridId50::Ptr{Cvoid} # (uint32_t dev, uint32_t sm, uint32_t wp, uint32_t *gridId) | |
readBlockIdx32::Ptr{Cvoid} # (uint32_t dev, uint32_t sm, uint32_t wp, CuDim2 *blockIdx) | |
readThreadIdx::Ptr{Cvoid} # (uint32_t dev, uint32_t sm, uint32_t wp, uint32_t ln, CuDim3 *threadIdx) | |
readBrokenWarps::Ptr{Cvoid} # (uint32_t dev, uint32_t sm, uint64_t *brokenWarpsMask) | |
readValidWarps::Ptr{Cvoid} # (uint32_t dev, uint32_t sm, uint64_t *validWarpsMask) | |
readValidLanes::Ptr{Cvoid} # (uint32_t dev, uint32_t sm, uint32_t wp, uint32_t *validLanesMask) | |
readActiveLanes::Ptr{Cvoid} # (uint32_t dev, uint32_t sm, uint32_t wp, uint32_t *activeLanesMask) | |
readCodeMemory::Ptr{Cvoid} # (uint32_t dev, uint64_t addr, void *buf, uint32_t sz) | |
readConstMemory::Ptr{Cvoid} # (uint32_t dev, uint64_t addr, void *buf, uint32_t sz) | |
readGlobalMemory31::Ptr{Cvoid} # (uint32_t dev, uint64_t addr, void *buf, uint32_t sz) | |
readParamMemory::Ptr{Cvoid} # (uint32_t dev, uint32_t sm, uint32_t wp, uint64_t addr, void *buf, uint32_t sz) | |
readSharedMemory::Ptr{Cvoid} # (uint32_t dev, uint32_t sm, uint32_t wp, uint64_t addr, void *buf, uint32_t sz) | |
readLocalMemory::Ptr{Cvoid} # (uint32_t dev, uint32_t sm, uint32_t wp, uint32_t ln, uint64_t addr, void *buf, uint32_t sz) | |
readRegister::Ptr{Cvoid} # (uint32_t dev, uint32_t sm, uint32_t wp, uint32_t ln, uint32_t regno, uint32_t *val) | |
readPC::Ptr{Cvoid} # (uint32_t dev, uint32_t sm, uint32_t wp, uint32_t ln, uint64_t *pc) | |
readVirtualPC::Ptr{Cvoid} # (uint32_t dev, uint32_t sm, uint32_t wp, uint32_t ln, uint64_t *pc) | |
readLaneStatus::Ptr{Cvoid} # (uint32_t dev, uint32_t sm, uint32_t wp, uint32_t ln, bool *error) | |
# Device State Alteration | |
writeGlobalMemory31::Ptr{Cvoid} # (uint32_t dev, uint64_t addr, const void *buf, uint32_t sz) | |
writeParamMemory::Ptr{Cvoid} # (uint32_t dev, uint32_t sm, uint32_t wp, uint64_t addr, const void *buf, uint32_t sz) | |
writeSharedMemory::Ptr{Cvoid} # (uint32_t dev, uint32_t sm, uint32_t wp, uint64_t addr, const void *buf, uint32_t sz) | |
writeLocalMemory::Ptr{Cvoid} # (uint32_t dev, uint32_t sm, uint32_t wp, uint32_t ln, uint64_t addr, const void *buf, uint32_t sz) | |
writeRegister::Ptr{Cvoid} # (uint32_t dev, uint32_t sm, uint32_t wp, uint32_t ln, uint32_t regno, uint32_t val) | |
# Grid Properties | |
getGridDim32::Ptr{Cvoid} # (uint32_t dev, uint32_t sm, uint32_t wp, CuDim2 *gridDim) | |
getBlockDim::Ptr{Cvoid} # (uint32_t dev, uint32_t sm, uint32_t wp, CuDim3 *blockDim) | |
getTID::Ptr{Cvoid} # (uint32_t dev, uint32_t sm, uint32_t wp, uint32_t *tid) | |
getElfImage32::Ptr{Cvoid} # (uint32_t dev, uint32_t sm, uint32_t wp, bool relocated, void **elfImage, uint32_t *size) | |
# Device Properties | |
getDeviceType::Ptr{Cvoid} # (uint32_t dev, char *buf, uint32_t sz) | |
getSmType::Ptr{Cvoid} # (uint32_t dev, char *buf, uint32_t sz) | |
getNumDevices::Ptr{Cvoid} # (uint32_t *numDev) | |
getNumSMs::Ptr{Cvoid} # (uint32_t dev, uint32_t *numSMs) | |
getNumWarps::Ptr{Cvoid} # (uint32_t dev, uint32_t *numWarps) | |
getNumLanes::Ptr{Cvoid} # (uint32_t dev, uint32_t *numLanes) | |
getNumRegisters::Ptr{Cvoid} # (uint32_t dev, uint32_t *numRegs) | |
# DWARF-related routines | |
getPhysicalRegister30::Ptr{Cvoid} # (uint64_t pc, char *reg, uint32_t *buf, uint32_t sz, uint32_t *numPhysRegs, CUDBGRegClass *regClass) | |
disassemble::Ptr{Cvoid} # (uint32_t dev, uint64_t addr, uint32_t *instSize, char *buf, uint32_t sz) | |
isDeviceCodeAddress55::Ptr{Cvoid} # (uintptr_t addr, bool *isDeviceAddress) | |
lookupDeviceCodeSymbol::Ptr{Cvoid} # (char *symName, bool *symFound, uintptr_t *symAddr) | |
# Events | |
setNotifyNewEventCallback31::Ptr{Cvoid} # (CUDBGNotifyNewEventCallback31 callback, void *data) | |
getNextEvent30::Ptr{Cvoid} # (CUDBGEvent30 *event) | |
acknowledgeEvent30::Ptr{Cvoid} # (CUDBGEvent30 *event) | |
# 3.1 Extensions | |
getGridAttribute::Ptr{Cvoid} # (uint32_t dev, uint32_t sm, uint32_t wp, CUDBGAttribute attr, uint64_t *value) | |
getGridAttributes::Ptr{Cvoid} # (uint32_t dev, uint32_t sm, uint32_t wp, CUDBGAttributeValuePair *pairs, uint32_t numPairs) | |
getPhysicalRegister40::Ptr{Cvoid} # (uint32_t dev, uint32_t sm, uint32_t wp, uint64_t pc, char *reg, uint32_t *buf, uint32_t sz, uint32_t *numPhysRegs, CUDBGRegClass *regClass) | |
readLaneException::Ptr{Cvoid} # (uint32_t dev, uint32_t sm, uint32_t wp, uint32_t ln, CUDBGException_t *exception) | |
getNextEvent32::Ptr{Cvoid} # (CUDBGEvent32 *event) | |
acknowledgeEvents42::Ptr{Cvoid} # (void) | |
# 3.1 - ABI | |
readCallDepth32::Ptr{Cvoid} # (uint32_t dev, uint32_t sm, uint32_t wp, uint32_t *depth) | |
readReturnAddress32::Ptr{Cvoid} # (uint32_t dev, uint32_t sm, uint32_t wp, uint32_t level, uint64_t *ra) | |
readVirtualReturnAddress32::Ptr{Cvoid} # (uint32_t dev, uint32_t sm, uint32_t wp, uint32_t level, uint64_t *ra) | |
# 3.2 Extensions | |
readGlobalMemory55::Ptr{Cvoid} # (uint32_t dev, uint32_t sm, uint32_t wp, uint32_t ln, uint64_t addr, void *buf, uint32_t sz) | |
writeGlobalMemory55::Ptr{Cvoid} # (uint32_t dev, uint32_t sm, uint32_t wp, uint32_t ln, uint64_t addr, const void *buf, uint32_t sz) | |
readPinnedMemory::Ptr{Cvoid} # (uint64_t addr, void *buf, uint32_t sz) | |
writePinnedMemory::Ptr{Cvoid} # (uint64_t addr, const void *buf, uint32_t sz) | |
setBreakpoint::Ptr{Cvoid} # (uint32_t dev, uint64_t addr) | |
unsetBreakpoint::Ptr{Cvoid} # (uint32_t dev, uint64_t addr) | |
setNotifyNewEventCallback40::Ptr{Cvoid} # (CUDBGNotifyNewEventCallback40 callback) | |
# 4.0 Extensions | |
getNextEvent42::Ptr{Cvoid} # (CUDBGEvent42 *event) | |
readTextureMemory::Ptr{Cvoid} # (uint32_t devId, uint32_t vsm, uint32_t wp, uint32_t id, uint32_t dim, uint32_t *coords, void *buf, uint32_t sz) | |
readBlockIdx::Ptr{Cvoid} # (uint32_t dev, uint32_t sm, uint32_t wp, CuDim3 *blockIdx) | |
getGridDim::Ptr{Cvoid} # (uint32_t dev, uint32_t sm, uint32_t wp, CuDim3 *gridDim) | |
readCallDepth::Ptr{Cvoid} # (uint32_t dev, uint32_t sm, uint32_t wp, uint32_t ln, uint32_t *depth) | |
readReturnAddress::Ptr{Cvoid} # (uint32_t dev, uint32_t sm, uint32_t wp, uint32_t ln, uint32_t level, uint64_t *ra) | |
readVirtualReturnAddress::Ptr{Cvoid} # (uint32_t dev, uint32_t sm, uint32_t wp, uint32_t ln, uint32_t level, uint64_t *ra) | |
getElfImage::Ptr{Cvoid} # (uint32_t dev, uint32_t sm, uint32_t wp, bool relocated, void **elfImage, uint64_t *size) | |
# 4.1 Extensions | |
getHostAddrFromDeviceAddr::Ptr{Cvoid} # (uint32_t dev, uint64_t device_addr, uint64_t *host_addr) | |
singleStepWarp41::Ptr{Cvoid} # (uint32_t dev, uint32_t sm, uint32_t wp, uint64_t *warpMask) | |
setNotifyNewEventCallback::Ptr{Cvoid} # (CUDBGNotifyNewEventCallback callback) | |
readSyscallCallDepth::Ptr{Cvoid} # (uint32_t dev, uint32_t sm, uint32_t wp, uint32_t ln, uint32_t *depth) | |
# 4.2 Extensions | |
readTextureMemoryBindless::Ptr{Cvoid} # (uint32_t devId, uint32_t vsm, uint32_t wp, uint32_t texSymtabIndex, uint32_t dim, uint32_t *coords, void *buf, uint32_t sz) | |
# 5.0 Extensions | |
clearAttachState::Ptr{Cvoid} # (void) | |
getNextSyncEvent50::Ptr{Cvoid} # (CUDBGEvent50 *event) | |
memcheckReadErrorAddress::Ptr{Cvoid} # (uint32_t dev, uint32_t sm, uint32_t wp, uint32_t ln, uint64_t *address, ptxStorageKind *storage) | |
acknowledgeSyncEvents::Ptr{Cvoid} # (void) | |
getNextAsyncEvent50::Ptr{Cvoid} # (CUDBGEvent50 *event) | |
requestCleanupOnDetach55::Ptr{Cvoid} # (void) | |
initializeAttachStub::Ptr{Cvoid} # (void) | |
getGridStatus50::Ptr{Cvoid} # (uint32_t dev, uint32_t gridId, CUDBGGridStatus *status) | |
# 5.5 Extensions | |
getNextSyncEvent55::Ptr{Cvoid} # (CUDBGEvent55 *event) | |
getNextAsyncEvent55::Ptr{Cvoid} # (CUDBGEvent55 *event) | |
getGridInfo::Ptr{Cvoid} # (uint32_t dev, uint64_t gridId64, CUDBGGridInfo *gridInfo) | |
readGridId::Ptr{Cvoid} # (uint32_t dev, uint32_t sm, uint32_t wp, uint64_t *gridId64) | |
getGridStatus::Ptr{Cvoid} # (uint32_t dev, uint64_t gridId64, CUDBGGridStatus *status) | |
setKernelLaunchNotificationMode::Ptr{Cvoid} # (CUDBGKernelLaunchNotifyMode mode) | |
getDevicePCIBusInfo::Ptr{Cvoid} # (uint32_t devId, uint32_t *pciBusId, uint32_t *pciDevId) | |
readDeviceExceptionState80::Ptr{Cvoid} # (uint32_t devId, uint64_t *exceptionSMMask) | |
# 6.0 Extensions | |
getAdjustedCodeAddress::Ptr{Cvoid} # (uint32_t devId, uint64_t address, uint64_t *adjustedAddress, CUDBGAdjAddrAction adjAction) | |
readErrorPC::Ptr{Cvoid} # (uint32_t devId, uint32_t sm, uint32_t wp, uint64_t *errorPC, bool *errorPCValid) | |
getNextEvent::Ptr{Cvoid} # (CUDBGEventQueueType type, CUDBGEvent *event) | |
getElfImageByHandle::Ptr{Cvoid} # (uint32_t devId, uint64_t handle, CUDBGElfImageType type, void *elfImage, uint64_t size) | |
resumeWarpsUntilPC::Ptr{Cvoid} # (uint32_t devId, uint32_t sm, uint64_t warpMask, uint64_t virtPC) | |
readWarpState::Ptr{Cvoid} # (uint32_t devId, uint32_t sm, uint32_t wp, CUDBGWarpState *state) | |
readRegisterRange::Ptr{Cvoid} # (uint32_t devId, uint32_t sm, uint32_t wp, uint32_t ln, uint32_t index, uint32_t registers_size, uint32_t *registers) | |
readGenericMemory::Ptr{Cvoid} # (uint32_t dev, uint32_t sm, uint32_t wp, uint32_t ln, uint64_t addr, void *buf, uint32_t sz) | |
writeGenericMemory::Ptr{Cvoid} # (uint32_t dev, uint32_t sm, uint32_t wp, uint32_t ln, uint64_t addr, const void *buf, uint32_t sz) | |
readGlobalMemory::Ptr{Cvoid} # (uint64_t addr, void *buf, uint32_t sz) | |
writeGlobalMemory::Ptr{Cvoid} # (uint64_t addr, const void *buf, uint32_t sz) | |
getManagedMemoryRegionInfo::Ptr{Cvoid} # (uint64_t startAddress, CUDBGMemoryInfo *memoryInfo, uint32_t memoryInfo_size, uint32_t *numEntries) | |
isDeviceCodeAddress::Ptr{Cvoid} # (uintptr_t addr, bool *isDeviceAddress) | |
requestCleanupOnDetach::Ptr{Cvoid} # (uint32_t appResumeFlag) | |
# 6.5 Extensions | |
readPredicates::Ptr{Cvoid} # (uint32_t dev, uint32_t sm, uint32_t wp, uint32_t ln, uint32_t predicates_size, uint32_t *predicates) | |
writePredicates::Ptr{Cvoid} # (uint32_t dev, uint32_t sm, uint32_t wp, uint32_t ln, uint32_t predicates_size, const uint32_t *predicates) | |
getNumPredicates::Ptr{Cvoid} # (uint32_t dev, uint32_t *numPredicates) | |
readCCRegister::Ptr{Cvoid} # (uint32_t dev, uint32_t sm, uint32_t wp, uint32_t ln, uint32_t *val) | |
writeCCRegister::Ptr{Cvoid} # (uint32_t dev, uint32_t sm, uint32_t wp, uint32_t ln, uint32_t val) | |
getDeviceName::Ptr{Cvoid} # (uint32_t dev, char *buf, uint32_t sz) | |
singleStepWarp::Ptr{Cvoid} # (uint32_t dev, uint32_t sm, uint32_t wp, uint32_t nsteps, uint64_t *warpMask) | |
# 9.0 Extensions | |
readDeviceExceptionState::Ptr{Cvoid} # (uint32_t devId, uint64_t *mask, uint32_t numWords) | |
# 10.0 Extensions | |
getNumUniformRegisters::Ptr{Cvoid} # (uint32_t dev, uint32_t *numRegs) | |
readUniformRegisterRange::Ptr{Cvoid} # (uint32_t devId, uint32_t sm, uint32_t wp, uint32_t regno, uint32_t registers_size, uint32_t *registers) | |
writeUniformRegister::Ptr{Cvoid} # (uint32_t dev, uint32_t sm, uint32_t wp, uint32_t regno, uint32_t val) | |
getNumUniformPredicates::Ptr{Cvoid} # (uint32_t dev, uint32_t *numPredicates) | |
readUniformPredicates::Ptr{Cvoid} # (uint32_t dev, uint32_t sm, uint32_t wp, uint32_t predicates_size, uint32_t *predicates) | |
writeUniformPredicates::Ptr{Cvoid} # (uint32_t dev, uint32_t sm, uint32_t wp, uint32_t predicates_size, const uint32_t *predicates) | |
end | |
const api = Ref{CUDBGAPI}() | |
macro apicall(funspec, argtypes, args...) | |
isa(funspec, QuoteNode) || error("first argument to @apicall should be a symbol") | |
fun = funspec.value | |
return quote | |
global api | |
status = ccall(api[].$fun, CUDBGResult_t, | |
$(esc(argtypes)), $(map(esc, args)...)) | |
if status != SUCCESS.code | |
err = CUDBGResult(status) | |
throw(err) | |
end | |
end | |
end | |
## errors.jl | |
function name(err::CUDBGResult) | |
return_codes[err.code] | |
end | |
Base.showerror(io::IO, err::CUDBGResult) = | |
@printf(io, "CUDA debugger error (code #%d, %s)", err.code, name(err)) | |
Base.show(io::IO, err::CUDBGResult) = @printf(io, "CUDBGResult(%d, %s)", err.code, name(err)) | |
# known error constants | |
const return_codes = Dict{CUDBGResult_t,Symbol}( | |
0x0000 => :SUCCESS, | |
0x0001 => :ERROR_UNKNOWN, | |
0x0002 => :ERROR_BUFFER_TOO_SMALL, | |
0x0003 => :ERROR_UNKNOWN_FUNCTION, | |
0x0004 => :ERROR_INVALID_ARGS, | |
0x0005 => :ERROR_UNINITIALIZED, | |
0x0006 => :ERROR_INVALID_COORDINATES, | |
0x0007 => :ERROR_INVALID_MEMORY_SEGMENT, | |
0x0008 => :ERROR_INVALID_MEMORY_ACCESS, | |
0x0009 => :ERROR_MEMORY_MAPPING_FAILED, | |
0x000a => :ERROR_INTERNAL, | |
0x000b => :ERROR_INVALID_DEVICE, | |
0x000c => :ERROR_INVALID_SM, | |
0x000d => :ERROR_INVALID_WARP, | |
0x000e => :ERROR_INVALID_LANE, | |
0x000f => :ERROR_SUSPENDED_DEVICE, | |
0x0010 => :ERROR_RUNNING_DEVICE, | |
0x0011 => :ERROR_RESERVED_0, | |
0x0012 => :ERROR_INVALID_ADDRESS, | |
0x0013 => :ERROR_INCOMPATIBLE_API, | |
0x0014 => :ERROR_INITIALIZATION_FAILURE, | |
0x0015 => :ERROR_INVALID_GRID, | |
0x0016 => :ERROR_NO_EVENT_AVAILABLE, | |
0x0017 => :ERROR_SOME_DEVICES_WATCHDOGGED, | |
0x0018 => :ERROR_ALL_DEVICES_WATCHDOGGED, | |
0x0019 => :ERROR_INVALID_ATTRIBUTE, | |
0x001a => :ERROR_ZERO_CALL_DEPTH, | |
0x001b => :ERROR_INVALID_CALL_LEVEL, | |
0x001c => :ERROR_COMMUNICATION_FAILURE, | |
0x001d => :ERROR_INVALID_CONTEXT, | |
0x001e => :ERROR_ADDRESS_NOT_IN_DEVICE_MEM, | |
0x001f => :ERROR_MEMORY_UNMAPPING_FAILED, | |
0x0020 => :ERROR_INCOMPATIBLE_DISPLAY_DRIVER, | |
0x0021 => :ERROR_INVALID_MODULE, | |
0x0022 => :ERROR_LANE_NOT_IN_SYSCALL, | |
0x0023 => :ERROR_MEMCHECK_NOT_ENABLED, | |
0x0024 => :ERROR_INVALID_ENVVAR_ARGS, | |
0x0025 => :ERROR_OS_RESOURCES, | |
0x0026 => :ERROR_FORK_FAILED, | |
0x0027 => :ERROR_NO_DEVICE_AVAILABLE, | |
0x0028 => :ERROR_ATTACH_NOT_POSSIBLE, | |
0x0029 => :ERROR_WARP_RESUME_NOT_POSSIBLE, | |
0x002a => :ERROR_INVALID_WARP_MASK, | |
0x002b => :ERROR_AMBIGUOUS_MEMORY_ADDRESS, | |
0x002c => :ERROR_RECURSIVE_API_CALL, | |
0x002d => :ERROR_MISSING_DATA, | |
) | |
for code in return_codes | |
@eval const $(code[2]) = CUDBGResult($(code[1])) | |
end | |
## events | |
struct CUDBGEventCallbackData | |
tid::UInt32 | |
timeout::UInt32 | |
end | |
const event_cond = Base.AsyncCondition() | |
function event_callback(data) | |
global event_cond | |
ccall(:uv_async_send, Cint, (Ptr{Cvoid},), event_cond.handle) | |
# NOTE: this callback happens in a thread that is not managed by Julia, | |
# so we can't do anything useful here. Wake the main event handler. | |
return | |
end | |
@enum CUDBGEventQueueType::Cint begin | |
EVENT_QUEUE_TYPE_SYNC = 0 | |
EVENT_QUEUE_TYPE_ASYNC = 1 | |
end | |
@enum CUDBGEventKind::Cint begin | |
EVENT_INVALID = 0x000 | |
EVENT_ELF_IMAGE_LOADED = 0x001 | |
EVENT_KERNEL_READY = 0x002 | |
EVENT_KERNEL_FINISHED = 0x003 | |
EVENT_INTERNAL_ERROR = 0x004 | |
EVENT_CTX_PUSH = 0x005 | |
EVENT_CTX_POP = 0x006 | |
EVENT_CTX_CREATE = 0x007 | |
EVENT_CTX_DESTROY = 0x008 | |
EVENT_TIMEOUT = 0x009 | |
EVENT_ATTACH_COMPLETE = 0x00a | |
EVENT_DETACH_COMPLETE = 0x00b | |
EVENT_ELF_IMAGE_UNLOADED = 0x00c | |
end | |
struct CUDBGEvent | |
kind::CUDBGEventKind | |
cases::NTuple{88, Int8} | |
end | |
function event_handler() | |
global event_cond | |
while true | |
println("Waiting for event...") | |
wait(event_cond) | |
println("Got one!") | |
end | |
end | |
## init.jl | |
initialize() = @apicall(:initialize, ()) | |
finalize() = @apicall(:finalize, ()) | |
function __init__() | |
# get an API handle | |
ver = api_version() | |
api_ptr = Ref{Ptr{CUDBGAPI}}() | |
CUDAdrv.@apicall(:cudbgGetAPI, | |
(UInt32, UInt32, UInt32, Ptr{Ptr{CUDBGAPI}},), | |
ver.major, ver.minor, ver.patch, api_ptr) | |
api[] = unsafe_load(api_ptr[]) | |
# initialize the API | |
println("Initializing debug API") | |
initialize() | |
atexit(finalize) | |
# FIXME: calling CUDAdbg's finalize causes subsequent cuContextDestroy calls to hang... | |
# register and launch the event handler | |
println("Starting event handler") | |
# TODO: use constructor of AsyncCondition that takes a callback directly | |
event_cb = @cfunction(event_callback, Nothing, (Ptr{CUDBGEventCallbackData},)) | |
@apicall(:setNotifyNewEventCallback, (Ptr{Cvoid},), event_cb) | |
schedule(@task event_handler()) | |
end | |
end | |
## main | |
using .CUDAdbg | |
using CUDAnative | |
function kernel() | |
@cuprintf("Hello, World!\n") | |
return | |
end | |
println("Launching kernel") | |
@cuda kernel() | |
# this currently does not work _at all_ (ie. doesn't even catch any event) because Julia is | |
# single-threaded, and the main event loop is stuck in the kernel that fails to launch | |
# (which the CUDA C version also suffers from). in the future, with PATR, the event callback | |
# should wake the event handler in a Julia thread. |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment