Skip to content

Instantly share code, notes, and snippets.

View gist:e4991ed243a3c7ffa58ab0d74c266baa
#include <stdint.h>
typedef unsigned char U8;
typedef unsigned short U16;
typedef unsigned int U32;
typedef unsigned long long U64;
typedef intptr_t SINTa;
struct KernelState
{
@rygorous
rygorous / hull.py
Last active May 26, 2020
Convex hull
View hull.py
import random
# Determinant predicate (line sidedness test)
def det3x3_pt(p, q, r):
a = (q[0] - p[0], q[1] - p[1])
b = (r[0] - p[0], r[1] - p[1])
return a[0]*b[1] - a[1]*b[0]
def convex_hull(points):
# sorts points by x then y which works for us (we only need the sort by x part here, but it doesn't hurt)
View gist:e6076b706ad1f423f4fbc79227b72fb2
---- On Ryzen 3950X
SimpleProf :seconds calls count : clk/call clk/count
search_one : 0.3081 1 8388480 : 1078358365.0 128.55
search_one_pf : 0.3415 1 8388480 : 1195232920.0 142.49 <-- speculative prefetching (next options for L and R)
search_multi2 : 0.3062 1 8388480 : 1071663705.0 127.75
search_multi4 : 0.2454 1 8388480 : 859008465.0 102.40
search_multi8 : 0.2113 1 8388480 : 739533515.0 88.16
search_multi16 : 0.1996 1 8388480 : 698443550.0 83.26
search_multi32 : 0.1785 1 8388480 : 624785840.0 74.48
View example.html
<!DOCTYPE html>
<html>
<head>
<title>Some fun HTML5 stuff</title>
<body>
<p>All of this is valid syntax!
<ul>
<li>Don't need to close paragraphs (that one's old)
<li>or list items
<li>since it's clear from context!
@rygorous
rygorous / rast.c
Created Mar 2, 2020
Simple watertight triangle rasterizer
View rast.c
// ---- triangle rasterizer
#define SUBPIXEL_SHIFT 8
#define SUBPIXEL_SCALE (1 << SUBPIXEL_SHIFT)
static RADINLINE S64 det2x2(S32 a, S32 b, S32 c, S32 d)
{
S64 r = (S64) a*d - (S64) b*c;
return r >> SUBPIXEL_SHIFT;
}
@rygorous
rygorous / gist:a549832e23b913ac70237d23c1600f8a
Created Aug 16, 2019
pseudo-ucode expansion for LOOP <dest>
View gist:a549832e23b913ac70237d23c1600f8a
lea rcx, [rcx-1] ; decrement rcx w/o flag update
mov temp0, rax ; save rax that we're about to trash
lahf ; save original flags
test rcx, rcx ; check whether updated rcx is zero
setz temp1 ; temp1 = 1 if rcx=0, 0 otherwise
sahf ; restore flags
mov rax, temp0 ; restore rax
jecxz temp1, dest ; jump if temp1 is zero, not rcx (doesn't exist in regular ISA but rcx is renamed anyway so the internal uop can do any source)
NOTE the actual ucode expansion probably doesn't have the MOVs since I would expect the internal LAHF/SAHF uops
@rygorous
rygorous / b.bat
Created Aug 9, 2019
Histogram code with all the tricks :) Needs NASM + VC++
View b.bat
@echo off
setlocal
cd %~dp0
call vcvars amd64
..\..\bin\win32\nasm -f win64 -g -o histo_asm.obj histo_asm.nas || exit /b 1
cl /Zi /O2 /nologo histotest.cpp histo_asm.obj || exit /b 1
@rygorous
rygorous / rcpss.cpp
Created Jul 8, 2019
Tabled rcpss (should match HW version for stated value range on Skylake, anyway)
View rcpss.cpp
#include <stdint.h>
#include <stdio.h>
#include <string.h>
#include <emmintrin.h>
static uint32_t recip(uint32_t bits)
{
uint32_t u;
float f;
memcpy(&f, &bits, sizeof(bits));
@rygorous
rygorous / convergents.py
Created Jun 6, 2019
Approximate rational fractions using convergents of the continued fraction expansion
View convergents.py
# Returns (exact, p, q) where p/q is an approximation to numer/denom and exact is true if
# the approximation is exact.
def convergent(numer, denom, limit):
"""Find an approximation to numer/denom with neither numerator nor denominator above limit"""
prev_p, cur_p = 0, 1
prev_q, cur_q = 1, 0
rest_p = numer
rest_q = denom
while rest_q != 0:
@rygorous
rygorous / conformance_basic_table.xml
Created May 29, 2019
gstpeaq self-compiled conformance test results
View conformance_basic_table.xml
<table frame="none" id="conformance_basic_table">
<title>Conformance test results for the basic version.</title>
<tgroup cols='4' align='right' colsep='1' rowsep='1'>
<colspec align='left' />
<thead>
<row>
<entry>Item</entry>
<entry>Reference DI</entry>
<entry>Actual DI</entry>
<entry>Difference</entry>