Edit 2024-02-05:
The instructions below are outdated.
There is an official PR (#2027) for mouse functionality. You should use that rather than what is described below.
// example how to set up WebGPU rendering on Windows in C | |
// uses Dawn implementation of WebGPU: https://dawn.googlesource.com/dawn/ | |
// download pre-built Dawn webgpu.h/dll/lib files from https://github.com/mmozeiko/build-dawn/releases/latest | |
#include "webgpu.h" | |
#define WIN32_LEAN_AND_MEAN | |
#include <windows.h> | |
#define _USE_MATH_DEFINES |
# I happened to be looking at some of Cranelift's code, and I noticed that their constant-time dominates() | |
# check was using a somewhat more ad-hoc version of a hidden gem from the data structures literature called the | |
# parenthesis representation for trees. As far as I know, this was invented by Jacobson in his 1989 paper | |
# Space-Efficient Static Trees and Graphs. I first learned about it from the slightly later paper by Munro and Raman | |
# called Succinct Representations of Balanced Parentheses and Static Trees. I figured I'd give it an extremely | |
# quick intro and then show how it leads to a (slightly better) version of Cranelift's algorithm. | |
# | |
# This parenthesis representation of trees is surprisingly versatile, but its most striking feature is that | |
# it lets us query the ancestor relationship between two nodes in a tree in constant time, with a few instructions. | |
# And the idea is extremely simple and intuitive if you just draw the right kind of picture. |
partial ordering: time/events are related to each other as graphs of dependencies | |
total ordering: time/events are related to each other as a sequence of steps | |
atomic op: op which is observed to either happen fully or not at all (no in-between) | |
atomic variable: memory location which atomic ops happen to | |
data race: non-atomic ops from 2+ threads on same memory location where one op is a write | |
load: an atomic read to observe a value | |
store: an atomic write to publish a value | |
read-modify-write (rmw): a read, updating the value, then a write, all atomically |
#define WIN32_LEAN_AND_MEAN | |
#include <winsock2.h> | |
#include <windows.h> | |
#define SECURITY_WIN32 | |
#include <security.h> | |
#include <schannel.h> | |
#include <shlwapi.h> | |
#include <assert.h> | |
#include <stdio.h> |
static inline __m128i prefix_sum_u8(__m128i x) | |
{ | |
#if 1 | |
// alternative form that uses shifts, not the general shuffle network on port 5 (which is a bottleneck | |
// for us) | |
x = _mm_add_epi8(x, _mm_slli_epi64(x, 8)); | |
x = _mm_add_epi8(x, _mm_slli_epi64(x, 16)); | |
x = _mm_add_epi8(x, _mm_slli_epi64(x, 32)); | |
x = _mm_add_epi8(x, _mm_shuffle_epi8(x, _mm_setr_epi8(-1,-1,-1,-1,-1,-1,-1,-1, 7,7,7,7,7,7,7,7))); | |
#else |
#include <assert.h> | |
#include <tuple> | |
#include <vector> | |
#include <string> | |
typedef uint32_t Str; | |
std::vector<const char*> strs; |
// Lexer | |
#define TOK_CHAR(ch1, tok1) \ | |
case ch1: \ | |
next_ch(); \ | |
tok = TOK_##tok1; \ | |
tok_prec = 0; \ | |
break; | |
#define TOK_EXPR(ch1, tok1, prec1, lexpr1, rexpr1) \ |
// x64 encoding | |
enum Reg { | |
RAX, RCX, RDX, RBX, RSP, RBP, RSI, RDI, | |
R8, R9, R10, R11, R12, R13, R14, R15, | |
}; | |
enum XmmReg { | |
XMM0, XMM1, XMM2, XMM3, XMM4, XMM5, XMM6, XMM7, | |
XMM8, XMM9, XMM10, XMM11, XMM12, XMM13, XMM14, XMM15, |
#if _WIN32 | |
struct Timer { | |
LARGE_INTEGER win32_freq; | |
LARGE_INTEGER win32_start; | |
Timer() { | |
QueryPerformanceFrequency(&win32_freq); | |
win32_start.QuadPart = 0; | |
} |