Skip to content

Instantly share code, notes, and snippets.

View trishume's full-sized avatar

Tristan Hume trishume

View GitHub Profile
oxyflour /
Created May 2, 2019 05:24
cuda graph with numba
from numba.cuda.cudadrv import error
from numba.cuda.cudadrv.driver import driver, is_device_memory, device_ctypes_pointer
from ctypes import POINTER, Structure, c_void_p, c_int, c_uint, byref, addressof, pointer
def get_fn(fname, restype, *argtypes):
if not driver.is_initialized:
if driver.initialization_error is not None:
// Merge pass
static void merge_pass(S16 *out, const S16 *inA, const S16 *inB, size_t elemsPerRun)
// need pow2 elemsPerRun>=16!
const S16 *endA = inA + elemsPerRun;
const S16 *endB = inB + elemsPerRun;
Vec vMin0 = load8_s16(inA + 0);
Vec vMin1 = load8_s16(inA + 8);
Vec vMax0 = load8_s16(inB + 0);
Vec vMax1 = load8_s16(inB + 8);
#include <assert.h>
#include <tuple>
#include <vector>
#include <string>
typedef uint32_t Str;
std::vector<const char*> strs;
#include <stdio.h>
#include <stdlib.h>
#include <stdint.h>
#include <string.h>
#if defined(__x86_64__)
#define BREAK asm("int3")
#error Implement macros for your CPU.
jld / bp_test.c
Created August 5, 2014 22:26
Example of using a perf_event breakpoint counter to crash on write to a specific location.
#include <fcntl.h>
#include <linux/hw_breakpoint.h>
#include <linux/perf_event.h>
#include <signal.h>
#include <stdint.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <sys/syscall.h>
#include <sys/types.h>
pervognsen / expr.c
Last active February 5, 2023 17:27
void parse_expr(Value *dest);
Sym *parse_ident(void) {
if (tok != TOK_IDENT) {
error("Expected identifier");
Sym *ident = tok_sym;
return ident;
niklas-ourmachinery /
Last active January 24, 2023 06:09
Identifying controls in an IMGUI

Identifying controls in an IMGUI

In an IMGUI we need a way to identify which control is active or hot. For example, when we press a key, which text box does the text go to.

Possible options:

  1. Sequential number (Unity)

Each control drawn gets a sequentially increasing number.

pervognsen / vex.c
Last active October 23, 2022 11:02
// The stack is 8-byte aligned.
#define ALIGN(p) ((uint64_t *)(((uintptr_t)(p) + 7) & ~7))
#define STACK(stack, size) uint64_t *_stack = ALIGN(stack), *_stack_end = (uint64_t *)((char *)(stack) + size)
#define PUSH(x) do { memcpy(_stack, &x, sizeof(x)); _stack += (sizeof(x) + 7) / 8; } while (0)
#define POP(x) do { _stack -= (sizeof(x) + 7) / 8; memcpy(&x, _stack, sizeof(x)); } while (0)
#if __GNUC__
// Use computed gotos on GCC-compatible compilers (including Clang).
#define JOIN(x, y) x##y
#define LABEL(name) JOIN(label, name)
// Linear-scan mark-compact collector for causal data structures, i.e. the reference graph is acyclic and the objects
// are ordered by age in the heap (which happens if you use a linear allocator) so they are automatically topologically sorted.
// This algorithm has very high memory-level parallelism which can be exploited by modern out-of-order processors, and should
// be memory throughput limited.
void collect(void) {
// Initialize marks from roots.
memset(marks, 0, num_nodes * sizeof(uint32_t));
int newest = 0, oldest = num_nodes;
for (int i = 0; i < num_roots; i++) {
marks[roots[i]] = 1;
typedef enum {
CMD_RET = 128,
} Cmd;