Skip to content

Instantly share code, notes, and snippets.

@chyanju
Created January 15, 2016 10:41
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save chyanju/6b88f9c64e8e1d7a26fb to your computer and use it in GitHub Desktop.
Save chyanju/6b88f9c64e8e1d7a26fb to your computer and use it in GitHub Desktop.
A Demo of Matrix Multiplication on A GPU, Using MXNet Lower-Level API
# A Demo of Matrix Multiplication on Multiple GPUs
using MXNet
"Delete & Release NDArray Memory - Good Status"
macro del(arr)
quote
mx.@mxcall(:MXNDArrayFree,(mx.MX_handle,),$(esc(arr)).handle.value)
mx.@mxcall(:MXNDArrayWaitAll,())
$(esc(arr)).handle.value = C_NULL
end
end
"Distributed Splitting Function, Splitting M into N pieces"
function dSplitFast(M,N)
a = [floor(Int64,M/N)+(i<=M%N) for i=1:N]
b = unshift!([sum(a[1:j]) for j=1:N],0)
c = [p[1]+1:p[2] for p in[b[k:k+1] for k=1:N]]
return c
end
"2D Matrix Multiplication Splitting on A Single GPU"
function gmul{T}(MTX_A::Array{T,2},MTX_B::Array{T,2},PA::Int64=1,PB::Int64=1,CTX::MXNet.mx.Context=mx.gpu(0),RTYPE::DataType=T)
rA,cA = size(MTX_A)
rB,cB = size(MTX_B)
MTX_R = zeros(T,(rA,cB))
AList = dSplitFast(rA,PA)
BList = dSplitFast(cB,PB)
for i in AList
lpart = MTX_A[i,:]
LP = mx.copy(lpart,CTX)
for j in BList
rpart = MTX_B[:,j]
RP = mx.copy(rpart,CTX)
PP = mx.dot(RP,LP) # exchange
MTX_R[i,j] = mx.convert(Array{RTYPE},PP)
@del RP
end
@del LP
end
return MTX_R
end
a = rand(30000,20000)
b = rand(20000,15000)
c = gmul(a,b,2,200,mx.gpu(0))
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment