Skip to content

Instantly share code, notes, and snippets.

View Sixzero's full-sized avatar

SixZero Sixzero

View GitHub Profile
@Sixzero
Sixzero / gist:8f9c8cab34731e6cffff498ee6c513cc
Last active June 25, 2024 15:54
Efficient multi GPU matmul in julia
using CUDA
function distributed_matmul(C, A::CuMatrix{Float32}, B::CuMatrix{Float32}, num_gpus::Int)
m, n = size(A)
n, k = size(B)
# Ensure we have the correct number of GPUs
@assert CUDA.ndevices() >= num_gpus "Not enough GPUs available"
# Calculate rows per GPU
@Sixzero
Sixzero / gist:3312071709aadc9e7e6fcc1290cfd58a
Created January 4, 2023 12:20
haiku mlp with custom data
#%%
# Copyright 2020 DeepMind Technologies Limited. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
@Sixzero
Sixzero / test_gpu_bigFN_compilation.jl
Last active November 18, 2022 18:59
Function size vs speed
using BenchmarkTools
using CUDA
BenchmarkTools.DEFAULT_PARAMETERS.seconds = 1.00
test_sum(a, b, c) = begin
I = (blockIdx().x - 1) * blockDim().x + threadIdx().x
if I > 1000
return
end
@Sixzero
Sixzero / Zygote simple local min problem
Created June 24, 2022 11:37
Zygote simple local min problem
X, Y = (Float32[-0.31125240132442067; 0.8163067649323273;;;],
Float32[5.7064323; 2.599511;;;])
# w = randn(1,1,1) .* ones(2,1,1)
b = Float32[25.510088, ]
# b = randn(1,1,1) .* ones(2,1,1)
w = Float32[0.15980364, ]
modl(X,w,b) = begin
@Sixzero
Sixzero / example.jl
Last active December 28, 2020 16:27
assign_opt_problem
a = randn(1024)
b = randn(1024)
c = randn(1024)
@time c .= a .+ b
@time c .= a .+ b
;
@Sixzero
Sixzero / assign_test.jl
Last active December 28, 2020 23:42
opaque_closure_test
#%%
using BenchmarkTools
BenchmarkTools.DEFAULT_PARAMETERS.seconds = 2.50
using Base: @opaque
add_opaq = @opaque (a::Vector, b::Vector) -> a .+ b
@noinline add(a::Vector, b::Vector) = a .+ b
@noinline add!(c::Vector, a::Vector, b::Vector) = c .= a .+ b
@inline add_inline(a::Vector, b::Vector) = a .+ b
#%%