Brian Merchant bzm3r

## gist:fbfc4e8a7dc09c2067da
# -*- coding: utf-8 -*-
"""
Created on Mon Mar 30 21:49:55 2015

@author: brian
"""

import PyDSTool as dst
from PyDSTool.Toolbox import phaseplane as pp
from matplotlib import pyplot as plt

## gsoc-2019.md

      
              1 file
            
          
              0 forks
            
          
              0 comments
            
          
              0 stars
            
          
                bzm3r
                / gsoc-2019.md
            
            
              Last active
              August 26, 2019 03:12
            
              
                GSoC 2019 project overview.
              
          
    Despite the ubiquity of text in computing applications, rendering high quality text using the GPU remains a challenging problem. Brief histories of the problem, and work done to date are discussed in this course description, and a recent blog post by Raph Levien and its accompanying HN discussion.
Patrick Walton et al. have produced a promising solution in the form of a Rust library, Pathfinder, whose general design has been beautifully documented by Nicolas Silva. Pathfinder 3 currently uses OpenGL as its GPU API, and we wondered if a port of it to more modern graphics APIs (e.g. Vulkan) might prove interesting in terms of performance results. To this end, I tried to use gfx-rs re-write how Pathfinder accesses/uses the GPU. While [progress was

  
## annotated-piet-gpu-fancy-kernel-1.metal
struct StackElement {
    PietGroupRef group;
    uint index;
    float2 offset; // Maybe pack as short2?
}

kernel1(Buf scene, PietGroupRef root) {
    StackElement stack[MAX_STACK];
    uint stack_ix = 0;
    uint tos_group = root;

## matrix-transpose-emu.py
import numpy as np
from copy import deepcopy

def subgroup_shuffle_xor(caller_tid, values, mask):
    rd_tid = caller_tid^mask
    return values[rd_tid]


def bnot(n, nbits):
    return (1 << nbits) - 1 - n

## transpose-threadgroup.glsl
#version 450
layout(local_size_x = 32) in;

layout(set = 0, binding = 0) buffer BM {
    uint[32] bm;
};

inline uint shuffle_round(uint a, uint b, uint m, uint s) {
    uint c;
    if ((tix & s) == 0) {

## transpose-shuffle.glsl
#version 450

#extension GL_KHR_shader_subgroup_shuffle: enable

layout(local_size_x = 32) in;

layout(set = 0, binding = 0) buffer BM {
    uint[32] bm;
};

## execute-test.rs
extern crate gfx_hal;

use std::{fs, ptr, slice, str::FromStr};

use hal::{adapter::MemoryType, buffer, command, memory, pool, prelude::*, pso};

pub struct DispatchTime(f64);

pub fn execute_test<B>(test_kernel: String, input_bm: [u32; 32], num_executions: usize) -> DispatchTime {
    #[cfg(debug_assertions)]

## transpose-threadgroup-template.glsl
#version 450
#define NUM_BIT_MATRICES ~NUM_BMS~
#define WORKGROUP_SIZE ~WG_SIZE~

layout(local_size_x = WORKGROUP_SIZE, local_size_y = 32) in;

layout(set = 0, binding = 0) buffer BM {
    uint num_bms;
    uint[32] bms[NUM_BIT_MATRICES];
};

## transpose-shuffle-template.glsl
#version 450
#extension GL_KHR_shader_subgroup_shuffle: enable

#define WORKGROUP_SIZE ~WG_SIZE~
#define NUM_EXECUTIONS ~NUM_EXECS~

// Unlike the threadgroup case, the Y-dimension of the workgroup size is not used.
// This is because the Y-dimension will be implicit in the number of subgroups in a workgroup.
layout(local_size_x = WORKGROUP_SIZE) in;

## transpose-ballot-template.glsl
#version 450
#extension GL_KHR_shader_subgroup_ballot: enable

#define WORKGROUP_SIZE ~WG_SIZE~
#define NUM_EXECUTIONS ~NUM_EXECS~

layout(local_size_x = WORKGROUP_SIZE) in;

layout(set = 0, binding = 0) buffer BM {
    uint[32] bms[];
	# -- coding: utf-8 --
	"""
	Created on Mon Mar 30 21:49:55 2015

	@author: brian
	"""

	import PyDSTool as dst
	from PyDSTool.Toolbox import phaseplane as pp
	from matplotlib import pyplot as plt
	struct StackElement {
	PietGroupRef group;
	uint index;
	float2 offset; // Maybe pack as short2?
	}

	kernel1(Buf scene, PietGroupRef root) {
	StackElement stack[MAX_STACK];
	uint stack_ix = 0;
	uint tos_group = root;
	import numpy as np
	from copy import deepcopy

	def subgroup_shuffle_xor(caller_tid, values, mask):
	rd_tid = caller_tid^mask
	return values[rd_tid]


	def bnot(n, nbits):
	return (1 << nbits) - 1 - n
	#version 450
	layout(local_size_x = 32) in;

	layout(set = 0, binding = 0) buffer BM {
	uint[32] bm;
	};

	inline uint shuffle_round(uint a, uint b, uint m, uint s) {
	uint c;
	if ((tix & s) == 0) {
	#version 450

	#extension GL_KHR_shader_subgroup_shuffle: enable

	layout(local_size_x = 32) in;

	layout(set = 0, binding = 0) buffer BM {
	uint[32] bm;
	};
	extern crate gfx_hal;

	use std::{fs, ptr, slice, str::FromStr};

	use hal::{adapter::MemoryType, buffer, command, memory, pool, prelude::*, pso};

	pub struct DispatchTime(f64);

	pub fn execute_test<B>(test_kernel: String, input_bm: [u32; 32], num_executions: usize) -> DispatchTime {
	#[cfg(debug_assertions)]
	#version 450
	#define NUM_BIT_MATRICES ~NUM_BMS~
	#define WORKGROUP_SIZE ~WG_SIZE~

	layout(local_size_x = WORKGROUP_SIZE, local_size_y = 32) in;

	layout(set = 0, binding = 0) buffer BM {
	uint num_bms;
	uint[32] bms[NUM_BIT_MATRICES];
	};
	#version 450
	#extension GL_KHR_shader_subgroup_ballot: enable

	#define WORKGROUP_SIZE ~WG_SIZE~
	#define NUM_EXECUTIONS ~NUM_EXECS~

	layout(local_size_x = WORKGROUP_SIZE) in;

	layout(set = 0, binding = 0) buffer BM {
	uint[32] bms[];