Skip to content

Instantly share code, notes, and snippets.

@maleadt
maleadt / sort.jl
Created April 13, 2020 07:19
GPU sort using dynamic parallelism (WIP, slow)
using Test
using CUDA
const MAX_DEPTH = 16
const SELECTION_SORT = 32
function selection_sort(data, left, right)
@inbounds for i in left:right
min_val = data[i]
; ModuleID = 'permutedims!'
source_filename = "permutedims!"
target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128-ni:10:11:12:13"
target triple = "x86_64-pc-linux-gnu"
%jl_value_t = type opaque
%jl_array_t = type { i8 addrspace(13)*, i64, i16, i16, i32 }
declare %jl_value_t addrspace(10)* @japi1_checkdims_perm(%jl_value_t addrspace(10)*, %jl_value_t addrspace(10)**, i32)
@maleadt
maleadt / tdma.jl
Created July 25, 2019 14:00
batched tdma
# experimentation with batched tridiagonal solvers on the GPU for Oceananigans.jl
#
# - reference serial CPU implementation
# - batched GPU implementation using cuSPARSE (fastest)
# - batched GPU implementation based on the serial CPU implementation (slow but flexible)
# - parallel GPU implementation (potentially fast and flexible)
#
# see `test_batched` and `bench_batched`
using LinearAlgebra
@maleadt
maleadt / tdma.jl
Created June 6, 2019 22:20
Tridiagonal matrix algorithm on the GPU with Julia
# experimentation with batched tridiagonal solvers on the GPU for Oceananigans.jl
#
# - reference serial CPU implementation
# - batched GPU implementation using cuSPARSE (fastest)
# - batched GPU implementation based on the serial CPU implementation (slow but flexible)
# - parallel GPU implementation (potentially fast and flexible)
#
# see `test_batched` and `bench_batched`
using CUDAdrv
@maleadt
maleadt / fritzbox.py
Created May 30, 2019 10:59
Munin plugin for FritzBox DSL properties
#!/opt/fritzconnection/bin/python2
import sys
from fritzconnection import FritzConnection
if len(sys.argv)>1:
if sys.argv[1]=="config":
print '''
host_name TODO
@maleadt
maleadt / wip.cu
Created March 14, 2019 16:01
cuda-gdb WIPs
#include "cuda.h"
#include "cudadebugger.h"
#include <iostream>
#include <pthread.h>
#include <signal.h>
// helpers
CUDBGAPI cudbgAPI;
@maleadt
maleadt / CMakeLists.txt
Created December 7, 2018 18:52
LLVM symbol renaming PoC
if(DEFINED ENV{LOCAL_NAMESPACE})
# NOTE: if localizing the namespace, by rewriting just before linking, we
# really do need to use static linking because we can't (easily)
# rewrite dynamic symbols
# NOTE: the directive below is copied from the Linux rules in
# CMakeDefaultMakeRuleVariables.cmake, and may have to be adjusted for
# your platform
set(CMAKE_CXX_CREATE_SHARED_LIBRARY "${CMAKE_SOURCE_DIR}/rewrite_wrapper <CMAKE_SHARED_LIBRARY_CXX_FLAGS> <LINK_FLAGS> <CMAKE_SHARED_LIBRARY_CREATE_CXX_FLAGS> <CMAKE_SHARED_LIBRARY_SONAME_CXX_FLAG><TARGET_SONAME> -o <TARGET> <OBJECTS> <LINK_LIBRARIES>")
endif()
@maleadt
maleadt / julia.dict
Created November 20, 2018 08:07
AFL dictionary for Julia
#
# AFL dictionary for Julia
# ------------------------
#
# Contains basic keywords, operators, and some syntax building blocks.
#
# Created by Tim Besard <tim.besard@gmail.com>
#
# The Computer Language Benchmarks Game
# binary-trees benchmark
# http://shootout.alioth.debian.org/u32/performance.php?test=binarytrees
#
# Ported from an OCaml version
abstract type BTree end
mutable struct Empty <: BTree
end
using LinearAlgebra, Statistics
using StaticArrays, Parameters
using CUDAdrv, CUDAnative
struct Lattice{D,Q,T}
δt :: T
δx :: T
τ :: T
e :: Vector{SVector{D,T}}
w :: Vector{T}