Stephen Hill selfshadow

## gist:2156668
// float->half variants.
// by Fabian "ryg" Giesen.
//
// I hereby place this code in the public domain, as per the terms of the
// CC0 license:
//
//   https://creativecommons.org/publicdomain/zero/1.0/
//
// float_to_half_full: This is basically the ISPC stdlib code, except
// I preserve the sign of NaNs (any good reason not to?)

## gist:2203834
// float->sRGB8 conversions - two variants.
// by Fabian "ryg" Giesen
//
// I hereby place this code in the public domain.
//
// Both variants come with absolute error bounds and a reversibility and monotonicity
// guarantee (see test driver code below). They should pass D3D10 conformance testing
// (not that you can verify this, but still). They are verified against a clean reference
// implementation provided below, and the test driver checks all floats exhaustively.
//

## rastafontgpu.cpp
// cl /nologo /I "%DXSDK_DIR%\Include" rastafontgpu.cpp /link /LIBPATH:"%DXSDK_DIR%\Lib\x86"

#include <tchar.h>
#include <stdint.h>
#include <stdio.h>
#include <windows.h>

#include <D3D11.h>

#pragma comment(lib, "kernel32.lib")

## Tetrahedra-based Volumetric Meshes.txt
Below I collected relevant links and papers more or less pertaining to the subject of tetrahedral meshes.
It's an ever-growing list.

------------------------------
Relevant links:

http://en.wikipedia.org/wiki/Types_of_mesh
http://en.wikipedia.org/wiki/Tetrahedron
http://en.wikipedia.org/wiki/Simplicial_complex

## FloorDouble.hlsl
double MaskOutFraction(double v)
{
	// Alias double as 2 32-bit integers
	uint d0, d1;
	asuint(v, d0, d1);

	// 0  ... 51   mantissa		0  ... 19
	// 52 ... 62   exponent		20 ... 30
	// 63 ... 63   sign

## springer-free-maths-books.md

      
              1 file
            
          
              474 forks
            
          
              248 comments
            
          
              2243 stars
            
          
                bishboria
                / springer-free-maths-books.md
            
            
              Last active
              June 8, 2024 06:39
            
              
                Springer made a bunch of books available for free, these were the direct links
              
          
    These links no longer work. Springer have pulled the free plug.

Graduate texts in mathematics

duplicates = multiple editions
A Classical Introduction to Modern Number Theory, Kenneth Ireland Michael Rosen
A Classical Introduction to Modern Number Theory, Kenneth Ireland Michael Rosen

  
## Tex2DCatmullRom.hlsl
// The following code is licensed under the MIT license: https://gist.github.com/TheRealMJP/bc503b0b87b643d3505d41eab8b332ae

// Samples a texture with Catmull-Rom filtering, using 9 texture fetches instead of 16.
// See http://vec3.ca/bicubic-filtering-in-fewer-taps/ for more details
float4 SampleTextureCatmullRom(in Texture2D<float4> tex, in SamplerState linearSampler, in float2 uv, in float2 texSize)
{
    // We're going to sample a a 4x4 grid of texels surrounding the target UV coordinate. We'll do this by rounding
    // down the sample location to get the exact center of our "starting" texel. The starting texel will be at
    // location [1, 1] in the grid, where [0, 0] is the top left corner.
    float2 samplePos = uv * texSize;

## Swizzles.h
#include <stdio.h>

// #define CLANG_EXTENSION
// Clang compile with -O3

#define VS_EXTENSION
// https://godbolt.org/z/sVWrF4
// Clang  compile with -O3 -fms-compatibility
// VS2017 compile with /O3

## mergesort_kit.cpp
#include <emmintrin.h>
#include <tmmintrin.h> // for PSHUFB; this isn't strictly necessary (see comments in reverse_s16)

typedef int16_t S16;
typedef __m128i Vec;

static inline Vec  load8_s16(const S16 *x)      { return _mm_loadu_si128((const __m128i *) x); }
static inline void store8_s16(S16 *x, Vec v)    { _mm_storeu_si128((__m128i *) x, v); }

static inline void sort_two(Vec &a, Vec &b)     { Vec t = a; a = _mm_min_epi16(a, b); b = _mm_max_epi16(b, t); }

## FastUniformLoadWithWaveOps.txt
In shader programming, you often run into a problem where you want to iterate an array in memory over all pixels in a compute shader
group (tile). Tiled deferred lighting is the most common case. 8x8 tile loops over a light list culled for that tile.

Simplified HLSL code looks like this:

Buffer<float4> lightDatas;
Texture2D<uint2> lightStartCounts;
RWTexture2D<float4> output;

[numthreads(8, 8, 1)]
	// float->half variants.
	// by Fabian "ryg" Giesen.
	//
	// I hereby place this code in the public domain, as per the terms of the
	// CC0 license:
	//
	// https://creativecommons.org/publicdomain/zero/1.0/
	//
	// float_to_half_full: This is basically the ISPC stdlib code, except
	// I preserve the sign of NaNs (any good reason not to?)
	// float->sRGB8 conversions - two variants.
	// by Fabian "ryg" Giesen
	//
	// I hereby place this code in the public domain.
	//
	// Both variants come with absolute error bounds and a reversibility and monotonicity
	// guarantee (see test driver code below). They should pass D3D10 conformance testing
	// (not that you can verify this, but still). They are verified against a clean reference
	// implementation provided below, and the test driver checks all floats exhaustively.
	//
	// cl /nologo /I "%DXSDK_DIR%\Include" rastafontgpu.cpp /link /LIBPATH:"%DXSDK_DIR%\Lib\x86"

	#include <tchar.h>
	#include <stdint.h>
	#include <stdio.h>
	#include <windows.h>

	#include <D3D11.h>

	#pragma comment(lib, "kernel32.lib")
	Below I collected relevant links and papers more or less pertaining to the subject of tetrahedral meshes.
	It's an ever-growing list.

	------------------------------
	Relevant links:

	http://en.wikipedia.org/wiki/Types_of_mesh
	http://en.wikipedia.org/wiki/Tetrahedron
	http://en.wikipedia.org/wiki/Simplicial_complex
	double MaskOutFraction(double v)
	{
	// Alias double as 2 32-bit integers
	uint d0, d1;
	asuint(v, d0, d1);

	// 0 ... 51 mantissa 0 ... 19
	// 52 ... 62 exponent 20 ... 30
	// 63 ... 63 sign
	// The following code is licensed under the MIT license: https://gist.github.com/TheRealMJP/bc503b0b87b643d3505d41eab8b332ae

	// Samples a texture with Catmull-Rom filtering, using 9 texture fetches instead of 16.
	// See http://vec3.ca/bicubic-filtering-in-fewer-taps/ for more details
	float4 SampleTextureCatmullRom(in Texture2D<float4> tex, in SamplerState linearSampler, in float2 uv, in float2 texSize)
	{
	// We're going to sample a a 4x4 grid of texels surrounding the target UV coordinate. We'll do this by rounding
	// down the sample location to get the exact center of our "starting" texel. The starting texel will be at
	// location [1, 1] in the grid, where [0, 0] is the top left corner.
	float2 samplePos = uv * texSize;
	#include <stdio.h>

	// #define CLANG_EXTENSION
	// Clang compile with -O3

	#define VS_EXTENSION
	// https://godbolt.org/z/sVWrF4
	// Clang compile with -O3 -fms-compatibility
	// VS2017 compile with /O3
	#include <emmintrin.h>
	#include <tmmintrin.h> // for PSHUFB; this isn't strictly necessary (see comments in reverse_s16)

	typedef int16_t S16;
	typedef __m128i Vec;

	static inline Vec load8_s16(const S16 x) { return _mm_loadu_si128((const __m128i ) x); }
	static inline void store8_s16(S16 x, Vec v) { _mm_storeu_si128((__m128i ) x, v); }

	static inline void sort_two(Vec &a, Vec &b) { Vec t = a; a = _mm_min_epi16(a, b); b = _mm_max_epi16(b, t); }
	In shader programming, you often run into a problem where you want to iterate an array in memory over all pixels in a compute shader
	group (tile). Tiled deferred lighting is the most common case. 8x8 tile loops over a light list culled for that tile.

	Simplified HLSL code looks like this:

	Buffer<float4> lightDatas;
	Texture2D<uint2> lightStartCounts;
	RWTexture2D<float4> output;

	[numthreads(8, 8, 1)]