Arseny Kapoulkine zeux

## stbench.py
import argparse
import json
import os
import safetensors
import safetensors.torch
import sys
import time
import torch

def fast_save_file(tensors, filename, metadata=None):

## matbench.c
// brew install libomp
// cc -o matbench matbench.c -O3 -ffast-math -Xclang -fopenmp -I/opt/homebrew/opt/libomp/include -L/opt/homebrew/opt/libomp/lib -lomp
// ./matbench

#include <assert.h>
#include <math.h>
#include <omp.h>
#include <stdio.h>
#include <time.h>

## qt.js
// This code looks at precision impact of transforming a vector repeatedly by a slightly-non-unit quaternion
// Slightly-non-unit quaternions are important: they result in the process of quaternion computations naturally
// Repeated transformations are important: they may occur during simulation or complex long chains of computation

// Note that because this code runs in JS in double precision, this doesn't model floating-point roundoff.

function applyQuaternion1( q, v ) {

	const x = v.x, y = v.y, z = v.z;
	const qx = q.x, qy = q.y, qz = q.z, qw = q.w;

## bounds-frag.glsl
#version 450

// 2D Polyhedral Bounds of a Clipped, Perspective-Projected 3D Sphere. Michael Mara, Morgan McGuire. 2013
bool projectSphereView(vec3 c, float r, float znear, float P00, float P11, out vec4 aabb)
{
    if (c.z < r + znear) return false;

    vec3 cr = c * r;
    float czr2 = c.z * c.z - r * r;

## alphasort.cpp
/*
 The Nature paper about sorting algorithms has an "improvement" for sort3 that saves a mov.
 Thread for context: https://mastodon.gamedev.place/@zeux/110510029570470184

 This code is experimentally verifying that the proposed optimization is perf neutral
 (aka is not improving performance). You'll need to remove the mov from all 3 versions
 and retest; feel free to test one version at a time.

 Cycle count established by using 'perf stat' on Ryzen 7 5900X - it does not depend on
 whether the mov is there.

## meshlets.py
tl = 512

for vl in [32, 64, 96, 128, 256]:
	bestx = 0
	besty = 0
	bests = vl

	for x in range(1, vl):
		for y in range(1, vl):
			v = (x+1)*(y+1)

## gctracker.lua
--!strict

--[[
BSD Zero Clause License

Copyright (c) 2022 Arseny Kapoulkine

Permission to use, copy, modify, and/or distribute this software for any
purpose with or without fee is hereby granted.

## gcpacer.md

      
              1 file
            
          
              0 forks
            
          
              0 comments
            
          
              0 stars
            
          
                zeux
                / gcpacer.md
            
            
              Last active
              February 23, 2022 22:53
            
              
                Luau GC exploration: doing some math on relationship between S / G / P
              
          
    GC pacing

This document tries to establish a mathematical formulation for GC pacing in Luau GC, assuming a uniform rate of allocation in an application with steady live set.
GC algorithm assumptions


GC proceeds in three phases: mark, atomic, sweep
During mark, the heap size only grows as we don't deallocate memory short of table resize
During sweep, the heap size grows due to new allocations and shrinks due to swept objects
Live set is fixed at atomic time (between mark & sweep)


## usleep.cpp
#include <stdio.h>
#include <stdlib.h>
#include <time.h>

#include <unistd.h>
#include <stdint.h>

#ifdef __APPLE__
#include <mach/mach_time.h>
#else

## luaubind.hpp
// This file is part of the Luau programming language and is licensed under MIT License; see LICENSE.txt for details
#pragma once

// Use this with luaL_Reg + luaL_register:
//
// static const luaL_Reg funcs[] = {
//   {"test123", LUAU_BIND(test123)},
//   {NULL, NULL},
// };
	import argparse
	import json
	import os
	import safetensors
	import safetensors.torch
	import sys
	import time
	import torch

	def fast_save_file(tensors, filename, metadata=None):
	// brew install libomp
	// cc -o matbench matbench.c -O3 -ffast-math -Xclang -fopenmp -I/opt/homebrew/opt/libomp/include -L/opt/homebrew/opt/libomp/lib -lomp
	// ./matbench

	#include <assert.h>
	#include <math.h>
	#include <omp.h>
	#include <stdio.h>
	#include <time.h>
	// This code looks at precision impact of transforming a vector repeatedly by a slightly-non-unit quaternion
	// Slightly-non-unit quaternions are important: they result in the process of quaternion computations naturally
	// Repeated transformations are important: they may occur during simulation or complex long chains of computation

	// Note that because this code runs in JS in double precision, this doesn't model floating-point roundoff.

	function applyQuaternion1( q, v ) {

	const x = v.x, y = v.y, z = v.z;
	const qx = q.x, qy = q.y, qz = q.z, qw = q.w;
	#version 450

	// 2D Polyhedral Bounds of a Clipped, Perspective-Projected 3D Sphere. Michael Mara, Morgan McGuire. 2013
	bool projectSphereView(vec3 c, float r, float znear, float P00, float P11, out vec4 aabb)
	{
	if (c.z < r + znear) return false;

	vec3 cr = c * r;
	float czr2 = c.z * c.z - r * r;
	/*
	The Nature paper about sorting algorithms has an "improvement" for sort3 that saves a mov.
	Thread for context: https://mastodon.gamedev.place/@zeux/110510029570470184

	This code is experimentally verifying that the proposed optimization is perf neutral
	(aka is not improving performance). You'll need to remove the mov from all 3 versions
	and retest; feel free to test one version at a time.

	Cycle count established by using 'perf stat' on Ryzen 7 5900X - it does not depend on
	whether the mov is there.
	tl = 512

	for vl in [32, 64, 96, 128, 256]:
	bestx = 0
	besty = 0
	bests = vl

	for x in range(1, vl):
	for y in range(1, vl):
	v = (x+1)*(y+1)
	--!strict

	--[[
	BSD Zero Clause License

	Copyright (c) 2022 Arseny Kapoulkine

	Permission to use, copy, modify, and/or distribute this software for any
	purpose with or without fee is hereby granted.
	#include <stdio.h>
	#include <stdlib.h>
	#include <time.h>

	#include <unistd.h>
	#include <stdint.h>

	#ifdef __APPLE__
	#include <mach/mach_time.h>
	#else
	// This file is part of the Luau programming language and is licensed under MIT License; see LICENSE.txt for details
	#pragma once

	// Use this with luaL_Reg + luaL_register:
	//
	// static const luaL_Reg funcs[] = {
	// {"test123", LUAU_BIND(test123)},
	// {NULL, NULL},
	// };