Skip to content

Instantly share code, notes, and snippets.

@maleadt
Created March 14, 2019 16:01
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save maleadt/2c488b3458fdfa0d599b0348217c6326 to your computer and use it in GitHub Desktop.
Save maleadt/2c488b3458fdfa0d599b0348217c6326 to your computer and use it in GitHub Desktop.
cuda-gdb WIPs
#include "cuda.h"
#include "cudadebugger.h"
#include <iostream>
#include <pthread.h>
#include <signal.h>
// helpers
CUDBGAPI cudbgAPI;
void exit_safely(int code) {
cudbgAPI->finalize();
exit(code);
}
#define cudbgCheck(ans) \
{ __cudbgCheck((ans), __FILE__, __LINE__); }
inline void __cudbgCheck(CUDBGResult res, const char *file, int line) {
if (res != CUDBG_SUCCESS) {
std::cerr << "ERROR: " << cudbgGetErrorString(res) << " at " << file << ":"
<< line << std::endl;
exit_safely(EXIT_FAILURE);
}
}
// event handling
pthread_cond_t event_cond = PTHREAD_COND_INITIALIZER;
pthread_mutex_t event_lock = PTHREAD_MUTEX_INITIALIZER;
void event_callback(CUDBGEventCallbackData *data) {
std::cout << "Event notification" << std::endl;
pthread_cond_signal(&event_cond);
}
void handle_events() {
while (true) {
CUDBGEvent event;
CUDBGResult res = cudbgAPI->getNextEvent(CUDBG_EVENT_QUEUE_TYPE_SYNC, &event);
if (res == CUDBG_ERROR_NO_EVENT_AVAILABLE) {
break;
} else if (res != CUDBG_SUCCESS) {
std::cerr << "HANDLER ERROR: " << cudbgGetErrorString(res) << std::endl;
break;
}
std::cout << "Event: ";
switch (event.kind) {
case CUDBG_EVENT_INVALID:
std::cout << "CUDBG_EVENT_INVALID";
break;
case CUDBG_EVENT_ELF_IMAGE_LOADED:
std::cout << "CUDBG_EVENT_ELF_IMAGE_LOADED";
break;
case CUDBG_EVENT_KERNEL_READY:
std::cout << "CUDBG_EVENT_KERNEL_READY";
break;
case CUDBG_EVENT_KERNEL_FINISHED:
std::cout << "CUDBG_EVENT_KERNEL_FINISHED";
break;
case CUDBG_EVENT_INTERNAL_ERROR:
std::cout << "CUDBG_EVENT_INTERNAL_ERROR ("
<< cudbgGetErrorString(event.cases.internalError.errorType)
<< ")";
break;
case CUDBG_EVENT_CTX_PUSH:
std::cout << "CUDBG_EVENT_CTX_PUSH";
break;
case CUDBG_EVENT_CTX_POP:
std::cout << "CUDBG_EVENT_CTX_POP";
break;
case CUDBG_EVENT_CTX_CREATE:
std::cout << "CUDBG_EVENT_CTX_CREATE";
break;
case CUDBG_EVENT_CTX_DESTROY:
std::cout << "CUDBG_EVENT_CTX_DESTROY";
break;
case CUDBG_EVENT_TIMEOUT:
std::cout << "CUDBG_EVENT_TIMEOUT";
break;
case CUDBG_EVENT_ATTACH_COMPLETE:
std::cout << "CUDBG_EVENT_ATTACH_COMPLETE";
break;
case CUDBG_EVENT_DETACH_COMPLETE:
std::cout << "CUDBG_EVENT_DETACH_COMPLETE";
break;
case CUDBG_EVENT_ELF_IMAGE_UNLOADED:
std::cout << "CUDBG_EVENT_ELF_IMAGE_UNLOADED";
break;
default:
std::cout << "unknown event";
break;
}
std::cout << std::endl;
}
// TODO: we should probably acknowledge the sync events here;
// I think that's why I'm getting the timeout events.
}
void *event_handler(void *null) {
while (true) {
pthread_mutex_lock(&event_lock);
pthread_cond_wait(&event_cond, &event_lock);
handle_events();
pthread_mutex_unlock(&event_lock);
}
}
// main
__global__ void kernel() { printf("Hello, World!\n"); }
int main(int argc, char const *argv[]) {
signal(SIGINT, exit_safely);
// gets the api
std::cout << "Initializing debug API" << std::endl;
uint32_t major, minor, rev;
cudbgCheck(cudbgGetAPIVersion(&major, &minor, &rev));
cudbgCheck(cudbgGetAPI(major, minor, rev, &cudbgAPI));
cudbgCheck(cudbgAPI->initialize());
// starts thread to print out events
std::cout << "Starting event handler" << std::endl;
pthread_t mannage_event_thread;
pthread_create(&mannage_event_thread, NULL, event_handler, NULL);
cudbgCheck(cudbgAPI->setNotifyNewEventCallback(event_callback));
// Causes the program to freeze
std::cout << "Launching kernel" << std::endl;
kernel<<<1, 1>>>();
exit_safely(0);
return 0;
}
// this example does not work for unknown reasons, resulting in an "internal error (invalid
// context)" event that really does not make any sense.
//
// the subsequent timeouts are due to not acknowledging the sync events.
module CUDAdbg
using CUDAdrv, CUDAnative
using Printf
## base.jl
const CUDBGResult_t = Cint
struct CUDBGResult <: Exception
code::CUDBGResult_t
end
function api_version()
major_ref = Ref{UInt32}()
minor_ref = Ref{UInt32}()
patch_ref = Ref{UInt32}()
CUDAdrv.@apicall(:cudbgGetAPIVersion, (Ptr{UInt32}, Ptr{UInt32}, Ptr{UInt32}), major_ref, minor_ref, patch_ref)
return VersionNumber(major_ref[], minor_ref[], patch_ref[])
end
struct CUDBGAPI
# Initialization
initialize::Ptr{Cvoid} # (void)
finalize::Ptr{Cvoid} # (void)
# Device Execution Control
suspendDevice::Ptr{Cvoid} # (uint32_t dev)
resumeDevice::Ptr{Cvoid} # (uint32_t dev)
singleStepWarp40::Ptr{Cvoid} # (uint32_t dev, uint32_t sm, uint32_t wp)
# Breakpoints
setBreakpoint31::Ptr{Cvoid} # (uint64_t addr)
unsetBreakpoint31::Ptr{Cvoid} # (uint64_t addr)
# Device State Inspection
readGridId50::Ptr{Cvoid} # (uint32_t dev, uint32_t sm, uint32_t wp, uint32_t *gridId)
readBlockIdx32::Ptr{Cvoid} # (uint32_t dev, uint32_t sm, uint32_t wp, CuDim2 *blockIdx)
readThreadIdx::Ptr{Cvoid} # (uint32_t dev, uint32_t sm, uint32_t wp, uint32_t ln, CuDim3 *threadIdx)
readBrokenWarps::Ptr{Cvoid} # (uint32_t dev, uint32_t sm, uint64_t *brokenWarpsMask)
readValidWarps::Ptr{Cvoid} # (uint32_t dev, uint32_t sm, uint64_t *validWarpsMask)
readValidLanes::Ptr{Cvoid} # (uint32_t dev, uint32_t sm, uint32_t wp, uint32_t *validLanesMask)
readActiveLanes::Ptr{Cvoid} # (uint32_t dev, uint32_t sm, uint32_t wp, uint32_t *activeLanesMask)
readCodeMemory::Ptr{Cvoid} # (uint32_t dev, uint64_t addr, void *buf, uint32_t sz)
readConstMemory::Ptr{Cvoid} # (uint32_t dev, uint64_t addr, void *buf, uint32_t sz)
readGlobalMemory31::Ptr{Cvoid} # (uint32_t dev, uint64_t addr, void *buf, uint32_t sz)
readParamMemory::Ptr{Cvoid} # (uint32_t dev, uint32_t sm, uint32_t wp, uint64_t addr, void *buf, uint32_t sz)
readSharedMemory::Ptr{Cvoid} # (uint32_t dev, uint32_t sm, uint32_t wp, uint64_t addr, void *buf, uint32_t sz)
readLocalMemory::Ptr{Cvoid} # (uint32_t dev, uint32_t sm, uint32_t wp, uint32_t ln, uint64_t addr, void *buf, uint32_t sz)
readRegister::Ptr{Cvoid} # (uint32_t dev, uint32_t sm, uint32_t wp, uint32_t ln, uint32_t regno, uint32_t *val)
readPC::Ptr{Cvoid} # (uint32_t dev, uint32_t sm, uint32_t wp, uint32_t ln, uint64_t *pc)
readVirtualPC::Ptr{Cvoid} # (uint32_t dev, uint32_t sm, uint32_t wp, uint32_t ln, uint64_t *pc)
readLaneStatus::Ptr{Cvoid} # (uint32_t dev, uint32_t sm, uint32_t wp, uint32_t ln, bool *error)
# Device State Alteration
writeGlobalMemory31::Ptr{Cvoid} # (uint32_t dev, uint64_t addr, const void *buf, uint32_t sz)
writeParamMemory::Ptr{Cvoid} # (uint32_t dev, uint32_t sm, uint32_t wp, uint64_t addr, const void *buf, uint32_t sz)
writeSharedMemory::Ptr{Cvoid} # (uint32_t dev, uint32_t sm, uint32_t wp, uint64_t addr, const void *buf, uint32_t sz)
writeLocalMemory::Ptr{Cvoid} # (uint32_t dev, uint32_t sm, uint32_t wp, uint32_t ln, uint64_t addr, const void *buf, uint32_t sz)
writeRegister::Ptr{Cvoid} # (uint32_t dev, uint32_t sm, uint32_t wp, uint32_t ln, uint32_t regno, uint32_t val)
# Grid Properties
getGridDim32::Ptr{Cvoid} # (uint32_t dev, uint32_t sm, uint32_t wp, CuDim2 *gridDim)
getBlockDim::Ptr{Cvoid} # (uint32_t dev, uint32_t sm, uint32_t wp, CuDim3 *blockDim)
getTID::Ptr{Cvoid} # (uint32_t dev, uint32_t sm, uint32_t wp, uint32_t *tid)
getElfImage32::Ptr{Cvoid} # (uint32_t dev, uint32_t sm, uint32_t wp, bool relocated, void **elfImage, uint32_t *size)
# Device Properties
getDeviceType::Ptr{Cvoid} # (uint32_t dev, char *buf, uint32_t sz)
getSmType::Ptr{Cvoid} # (uint32_t dev, char *buf, uint32_t sz)
getNumDevices::Ptr{Cvoid} # (uint32_t *numDev)
getNumSMs::Ptr{Cvoid} # (uint32_t dev, uint32_t *numSMs)
getNumWarps::Ptr{Cvoid} # (uint32_t dev, uint32_t *numWarps)
getNumLanes::Ptr{Cvoid} # (uint32_t dev, uint32_t *numLanes)
getNumRegisters::Ptr{Cvoid} # (uint32_t dev, uint32_t *numRegs)
# DWARF-related routines
getPhysicalRegister30::Ptr{Cvoid} # (uint64_t pc, char *reg, uint32_t *buf, uint32_t sz, uint32_t *numPhysRegs, CUDBGRegClass *regClass)
disassemble::Ptr{Cvoid} # (uint32_t dev, uint64_t addr, uint32_t *instSize, char *buf, uint32_t sz)
isDeviceCodeAddress55::Ptr{Cvoid} # (uintptr_t addr, bool *isDeviceAddress)
lookupDeviceCodeSymbol::Ptr{Cvoid} # (char *symName, bool *symFound, uintptr_t *symAddr)
# Events
setNotifyNewEventCallback31::Ptr{Cvoid} # (CUDBGNotifyNewEventCallback31 callback, void *data)
getNextEvent30::Ptr{Cvoid} # (CUDBGEvent30 *event)
acknowledgeEvent30::Ptr{Cvoid} # (CUDBGEvent30 *event)
# 3.1 Extensions
getGridAttribute::Ptr{Cvoid} # (uint32_t dev, uint32_t sm, uint32_t wp, CUDBGAttribute attr, uint64_t *value)
getGridAttributes::Ptr{Cvoid} # (uint32_t dev, uint32_t sm, uint32_t wp, CUDBGAttributeValuePair *pairs, uint32_t numPairs)
getPhysicalRegister40::Ptr{Cvoid} # (uint32_t dev, uint32_t sm, uint32_t wp, uint64_t pc, char *reg, uint32_t *buf, uint32_t sz, uint32_t *numPhysRegs, CUDBGRegClass *regClass)
readLaneException::Ptr{Cvoid} # (uint32_t dev, uint32_t sm, uint32_t wp, uint32_t ln, CUDBGException_t *exception)
getNextEvent32::Ptr{Cvoid} # (CUDBGEvent32 *event)
acknowledgeEvents42::Ptr{Cvoid} # (void)
# 3.1 - ABI
readCallDepth32::Ptr{Cvoid} # (uint32_t dev, uint32_t sm, uint32_t wp, uint32_t *depth)
readReturnAddress32::Ptr{Cvoid} # (uint32_t dev, uint32_t sm, uint32_t wp, uint32_t level, uint64_t *ra)
readVirtualReturnAddress32::Ptr{Cvoid} # (uint32_t dev, uint32_t sm, uint32_t wp, uint32_t level, uint64_t *ra)
# 3.2 Extensions
readGlobalMemory55::Ptr{Cvoid} # (uint32_t dev, uint32_t sm, uint32_t wp, uint32_t ln, uint64_t addr, void *buf, uint32_t sz)
writeGlobalMemory55::Ptr{Cvoid} # (uint32_t dev, uint32_t sm, uint32_t wp, uint32_t ln, uint64_t addr, const void *buf, uint32_t sz)
readPinnedMemory::Ptr{Cvoid} # (uint64_t addr, void *buf, uint32_t sz)
writePinnedMemory::Ptr{Cvoid} # (uint64_t addr, const void *buf, uint32_t sz)
setBreakpoint::Ptr{Cvoid} # (uint32_t dev, uint64_t addr)
unsetBreakpoint::Ptr{Cvoid} # (uint32_t dev, uint64_t addr)
setNotifyNewEventCallback40::Ptr{Cvoid} # (CUDBGNotifyNewEventCallback40 callback)
# 4.0 Extensions
getNextEvent42::Ptr{Cvoid} # (CUDBGEvent42 *event)
readTextureMemory::Ptr{Cvoid} # (uint32_t devId, uint32_t vsm, uint32_t wp, uint32_t id, uint32_t dim, uint32_t *coords, void *buf, uint32_t sz)
readBlockIdx::Ptr{Cvoid} # (uint32_t dev, uint32_t sm, uint32_t wp, CuDim3 *blockIdx)
getGridDim::Ptr{Cvoid} # (uint32_t dev, uint32_t sm, uint32_t wp, CuDim3 *gridDim)
readCallDepth::Ptr{Cvoid} # (uint32_t dev, uint32_t sm, uint32_t wp, uint32_t ln, uint32_t *depth)
readReturnAddress::Ptr{Cvoid} # (uint32_t dev, uint32_t sm, uint32_t wp, uint32_t ln, uint32_t level, uint64_t *ra)
readVirtualReturnAddress::Ptr{Cvoid} # (uint32_t dev, uint32_t sm, uint32_t wp, uint32_t ln, uint32_t level, uint64_t *ra)
getElfImage::Ptr{Cvoid} # (uint32_t dev, uint32_t sm, uint32_t wp, bool relocated, void **elfImage, uint64_t *size)
# 4.1 Extensions
getHostAddrFromDeviceAddr::Ptr{Cvoid} # (uint32_t dev, uint64_t device_addr, uint64_t *host_addr)
singleStepWarp41::Ptr{Cvoid} # (uint32_t dev, uint32_t sm, uint32_t wp, uint64_t *warpMask)
setNotifyNewEventCallback::Ptr{Cvoid} # (CUDBGNotifyNewEventCallback callback)
readSyscallCallDepth::Ptr{Cvoid} # (uint32_t dev, uint32_t sm, uint32_t wp, uint32_t ln, uint32_t *depth)
# 4.2 Extensions
readTextureMemoryBindless::Ptr{Cvoid} # (uint32_t devId, uint32_t vsm, uint32_t wp, uint32_t texSymtabIndex, uint32_t dim, uint32_t *coords, void *buf, uint32_t sz)
# 5.0 Extensions
clearAttachState::Ptr{Cvoid} # (void)
getNextSyncEvent50::Ptr{Cvoid} # (CUDBGEvent50 *event)
memcheckReadErrorAddress::Ptr{Cvoid} # (uint32_t dev, uint32_t sm, uint32_t wp, uint32_t ln, uint64_t *address, ptxStorageKind *storage)
acknowledgeSyncEvents::Ptr{Cvoid} # (void)
getNextAsyncEvent50::Ptr{Cvoid} # (CUDBGEvent50 *event)
requestCleanupOnDetach55::Ptr{Cvoid} # (void)
initializeAttachStub::Ptr{Cvoid} # (void)
getGridStatus50::Ptr{Cvoid} # (uint32_t dev, uint32_t gridId, CUDBGGridStatus *status)
# 5.5 Extensions
getNextSyncEvent55::Ptr{Cvoid} # (CUDBGEvent55 *event)
getNextAsyncEvent55::Ptr{Cvoid} # (CUDBGEvent55 *event)
getGridInfo::Ptr{Cvoid} # (uint32_t dev, uint64_t gridId64, CUDBGGridInfo *gridInfo)
readGridId::Ptr{Cvoid} # (uint32_t dev, uint32_t sm, uint32_t wp, uint64_t *gridId64)
getGridStatus::Ptr{Cvoid} # (uint32_t dev, uint64_t gridId64, CUDBGGridStatus *status)
setKernelLaunchNotificationMode::Ptr{Cvoid} # (CUDBGKernelLaunchNotifyMode mode)
getDevicePCIBusInfo::Ptr{Cvoid} # (uint32_t devId, uint32_t *pciBusId, uint32_t *pciDevId)
readDeviceExceptionState80::Ptr{Cvoid} # (uint32_t devId, uint64_t *exceptionSMMask)
# 6.0 Extensions
getAdjustedCodeAddress::Ptr{Cvoid} # (uint32_t devId, uint64_t address, uint64_t *adjustedAddress, CUDBGAdjAddrAction adjAction)
readErrorPC::Ptr{Cvoid} # (uint32_t devId, uint32_t sm, uint32_t wp, uint64_t *errorPC, bool *errorPCValid)
getNextEvent::Ptr{Cvoid} # (CUDBGEventQueueType type, CUDBGEvent *event)
getElfImageByHandle::Ptr{Cvoid} # (uint32_t devId, uint64_t handle, CUDBGElfImageType type, void *elfImage, uint64_t size)
resumeWarpsUntilPC::Ptr{Cvoid} # (uint32_t devId, uint32_t sm, uint64_t warpMask, uint64_t virtPC)
readWarpState::Ptr{Cvoid} # (uint32_t devId, uint32_t sm, uint32_t wp, CUDBGWarpState *state)
readRegisterRange::Ptr{Cvoid} # (uint32_t devId, uint32_t sm, uint32_t wp, uint32_t ln, uint32_t index, uint32_t registers_size, uint32_t *registers)
readGenericMemory::Ptr{Cvoid} # (uint32_t dev, uint32_t sm, uint32_t wp, uint32_t ln, uint64_t addr, void *buf, uint32_t sz)
writeGenericMemory::Ptr{Cvoid} # (uint32_t dev, uint32_t sm, uint32_t wp, uint32_t ln, uint64_t addr, const void *buf, uint32_t sz)
readGlobalMemory::Ptr{Cvoid} # (uint64_t addr, void *buf, uint32_t sz)
writeGlobalMemory::Ptr{Cvoid} # (uint64_t addr, const void *buf, uint32_t sz)
getManagedMemoryRegionInfo::Ptr{Cvoid} # (uint64_t startAddress, CUDBGMemoryInfo *memoryInfo, uint32_t memoryInfo_size, uint32_t *numEntries)
isDeviceCodeAddress::Ptr{Cvoid} # (uintptr_t addr, bool *isDeviceAddress)
requestCleanupOnDetach::Ptr{Cvoid} # (uint32_t appResumeFlag)
# 6.5 Extensions
readPredicates::Ptr{Cvoid} # (uint32_t dev, uint32_t sm, uint32_t wp, uint32_t ln, uint32_t predicates_size, uint32_t *predicates)
writePredicates::Ptr{Cvoid} # (uint32_t dev, uint32_t sm, uint32_t wp, uint32_t ln, uint32_t predicates_size, const uint32_t *predicates)
getNumPredicates::Ptr{Cvoid} # (uint32_t dev, uint32_t *numPredicates)
readCCRegister::Ptr{Cvoid} # (uint32_t dev, uint32_t sm, uint32_t wp, uint32_t ln, uint32_t *val)
writeCCRegister::Ptr{Cvoid} # (uint32_t dev, uint32_t sm, uint32_t wp, uint32_t ln, uint32_t val)
getDeviceName::Ptr{Cvoid} # (uint32_t dev, char *buf, uint32_t sz)
singleStepWarp::Ptr{Cvoid} # (uint32_t dev, uint32_t sm, uint32_t wp, uint32_t nsteps, uint64_t *warpMask)
# 9.0 Extensions
readDeviceExceptionState::Ptr{Cvoid} # (uint32_t devId, uint64_t *mask, uint32_t numWords)
# 10.0 Extensions
getNumUniformRegisters::Ptr{Cvoid} # (uint32_t dev, uint32_t *numRegs)
readUniformRegisterRange::Ptr{Cvoid} # (uint32_t devId, uint32_t sm, uint32_t wp, uint32_t regno, uint32_t registers_size, uint32_t *registers)
writeUniformRegister::Ptr{Cvoid} # (uint32_t dev, uint32_t sm, uint32_t wp, uint32_t regno, uint32_t val)
getNumUniformPredicates::Ptr{Cvoid} # (uint32_t dev, uint32_t *numPredicates)
readUniformPredicates::Ptr{Cvoid} # (uint32_t dev, uint32_t sm, uint32_t wp, uint32_t predicates_size, uint32_t *predicates)
writeUniformPredicates::Ptr{Cvoid} # (uint32_t dev, uint32_t sm, uint32_t wp, uint32_t predicates_size, const uint32_t *predicates)
end
const api = Ref{CUDBGAPI}()
macro apicall(funspec, argtypes, args...)
isa(funspec, QuoteNode) || error("first argument to @apicall should be a symbol")
fun = funspec.value
return quote
global api
status = ccall(api[].$fun, CUDBGResult_t,
$(esc(argtypes)), $(map(esc, args)...))
if status != SUCCESS.code
err = CUDBGResult(status)
throw(err)
end
end
end
## errors.jl
function name(err::CUDBGResult)
return_codes[err.code]
end
Base.showerror(io::IO, err::CUDBGResult) =
@printf(io, "CUDA debugger error (code #%d, %s)", err.code, name(err))
Base.show(io::IO, err::CUDBGResult) = @printf(io, "CUDBGResult(%d, %s)", err.code, name(err))
# known error constants
const return_codes = Dict{CUDBGResult_t,Symbol}(
0x0000 => :SUCCESS,
0x0001 => :ERROR_UNKNOWN,
0x0002 => :ERROR_BUFFER_TOO_SMALL,
0x0003 => :ERROR_UNKNOWN_FUNCTION,
0x0004 => :ERROR_INVALID_ARGS,
0x0005 => :ERROR_UNINITIALIZED,
0x0006 => :ERROR_INVALID_COORDINATES,
0x0007 => :ERROR_INVALID_MEMORY_SEGMENT,
0x0008 => :ERROR_INVALID_MEMORY_ACCESS,
0x0009 => :ERROR_MEMORY_MAPPING_FAILED,
0x000a => :ERROR_INTERNAL,
0x000b => :ERROR_INVALID_DEVICE,
0x000c => :ERROR_INVALID_SM,
0x000d => :ERROR_INVALID_WARP,
0x000e => :ERROR_INVALID_LANE,
0x000f => :ERROR_SUSPENDED_DEVICE,
0x0010 => :ERROR_RUNNING_DEVICE,
0x0011 => :ERROR_RESERVED_0,
0x0012 => :ERROR_INVALID_ADDRESS,
0x0013 => :ERROR_INCOMPATIBLE_API,
0x0014 => :ERROR_INITIALIZATION_FAILURE,
0x0015 => :ERROR_INVALID_GRID,
0x0016 => :ERROR_NO_EVENT_AVAILABLE,
0x0017 => :ERROR_SOME_DEVICES_WATCHDOGGED,
0x0018 => :ERROR_ALL_DEVICES_WATCHDOGGED,
0x0019 => :ERROR_INVALID_ATTRIBUTE,
0x001a => :ERROR_ZERO_CALL_DEPTH,
0x001b => :ERROR_INVALID_CALL_LEVEL,
0x001c => :ERROR_COMMUNICATION_FAILURE,
0x001d => :ERROR_INVALID_CONTEXT,
0x001e => :ERROR_ADDRESS_NOT_IN_DEVICE_MEM,
0x001f => :ERROR_MEMORY_UNMAPPING_FAILED,
0x0020 => :ERROR_INCOMPATIBLE_DISPLAY_DRIVER,
0x0021 => :ERROR_INVALID_MODULE,
0x0022 => :ERROR_LANE_NOT_IN_SYSCALL,
0x0023 => :ERROR_MEMCHECK_NOT_ENABLED,
0x0024 => :ERROR_INVALID_ENVVAR_ARGS,
0x0025 => :ERROR_OS_RESOURCES,
0x0026 => :ERROR_FORK_FAILED,
0x0027 => :ERROR_NO_DEVICE_AVAILABLE,
0x0028 => :ERROR_ATTACH_NOT_POSSIBLE,
0x0029 => :ERROR_WARP_RESUME_NOT_POSSIBLE,
0x002a => :ERROR_INVALID_WARP_MASK,
0x002b => :ERROR_AMBIGUOUS_MEMORY_ADDRESS,
0x002c => :ERROR_RECURSIVE_API_CALL,
0x002d => :ERROR_MISSING_DATA,
)
for code in return_codes
@eval const $(code[2]) = CUDBGResult($(code[1]))
end
## events
struct CUDBGEventCallbackData
tid::UInt32
timeout::UInt32
end
const event_cond = Base.AsyncCondition()
function event_callback(data)
global event_cond
ccall(:uv_async_send, Cint, (Ptr{Cvoid},), event_cond.handle)
# NOTE: this callback happens in a thread that is not managed by Julia,
# so we can't do anything useful here. Wake the main event handler.
return
end
@enum CUDBGEventQueueType::Cint begin
EVENT_QUEUE_TYPE_SYNC = 0
EVENT_QUEUE_TYPE_ASYNC = 1
end
@enum CUDBGEventKind::Cint begin
EVENT_INVALID = 0x000
EVENT_ELF_IMAGE_LOADED = 0x001
EVENT_KERNEL_READY = 0x002
EVENT_KERNEL_FINISHED = 0x003
EVENT_INTERNAL_ERROR = 0x004
EVENT_CTX_PUSH = 0x005
EVENT_CTX_POP = 0x006
EVENT_CTX_CREATE = 0x007
EVENT_CTX_DESTROY = 0x008
EVENT_TIMEOUT = 0x009
EVENT_ATTACH_COMPLETE = 0x00a
EVENT_DETACH_COMPLETE = 0x00b
EVENT_ELF_IMAGE_UNLOADED = 0x00c
end
struct CUDBGEvent
kind::CUDBGEventKind
cases::NTuple{88, Int8}
end
function event_handler()
global event_cond
while true
println("Waiting for event...")
wait(event_cond)
println("Got one!")
end
end
## init.jl
initialize() = @apicall(:initialize, ())
finalize() = @apicall(:finalize, ())
function __init__()
# get an API handle
ver = api_version()
api_ptr = Ref{Ptr{CUDBGAPI}}()
CUDAdrv.@apicall(:cudbgGetAPI,
(UInt32, UInt32, UInt32, Ptr{Ptr{CUDBGAPI}},),
ver.major, ver.minor, ver.patch, api_ptr)
api[] = unsafe_load(api_ptr[])
# initialize the API
println("Initializing debug API")
initialize()
atexit(finalize)
# FIXME: calling CUDAdbg's finalize causes subsequent cuContextDestroy calls to hang...
# register and launch the event handler
println("Starting event handler")
# TODO: use constructor of AsyncCondition that takes a callback directly
event_cb = @cfunction(event_callback, Nothing, (Ptr{CUDBGEventCallbackData},))
@apicall(:setNotifyNewEventCallback, (Ptr{Cvoid},), event_cb)
schedule(@task event_handler())
end
end
## main
using .CUDAdbg
using CUDAnative
function kernel()
@cuprintf("Hello, World!\n")
return
end
println("Launching kernel")
@cuda kernel()
# this currently does not work _at all_ (ie. doesn't even catch any event) because Julia is
# single-threaded, and the main event loop is stuck in the kernel that fails to launch
# (which the CUDA C version also suffers from). in the future, with PATR, the event callback
# should wake the event handler in a Julia thread.
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment