Skip to content

Instantly share code, notes, and snippets.

@timholy
Created April 26, 2012 14:44
Show Gist options
  • Save timholy/2500065 to your computer and use it in GitHub Desktop.
Save timholy/2500065 to your computer and use it in GitHub Desktop.
Testing Julia array timing
step(i::Int) = 1
type ForwardSubarrayIterator
sub_min::Vector{Int}
sub_max::Vector{Int}
inc::Vector{Int}
pos::Vector{Int}
index::Int # the current linear index
end
function ForwardSubarrayIterator(dims::Dims,ind::RangeIndex...)
# Compute increments for the "carry" operation
# This calculates the linear-index difference in going from
# [max1,max2,i3,...]
# to
# [min1,min2,i3+1,...]
inc = Array(Int,length(dims))
sub_min = Array(Int,length(dims))
sub_max = Array(Int,length(dims))
inc[1] = step(ind[1])
sub_min[1] = min(ind[1])
sub_max[1] = max(ind[1])
index_min::Int = sub_min[1]
index_max::Int = sub_max[1]
s::Int = 1 # will hold the stride
for idim = 2:length(dims)
s *= dims[idim-1]
inc[idim] = s - index_max + index_min
sub_min[idim] = min(ind[idim])
sub_max[idim] = max(ind[idim])
index_max += (sub_max[idim]-1)*s
index_min += (sub_min[idim]-1)*s
end
ForwardSubarrayIterator(sub_min,sub_max,inc,copy(sub_min),index_min)
end
function next(iter::ForwardSubarrayIterator)
idim = 1
iter.pos[1] += 1
while idim < length(iter.pos) && iter.pos[idim] > iter.sub_max[idim]
iter.pos[idim] = iter.sub_min[idim]
idim += 1
iter.pos[idim] += 1
end
iter.index += iter.inc[idim]
end
function myref(A::Array, I::RangeIndex...)
i = length(I)
while i > 0 && isa(I[i],Integer); i-=1; end
d = map(length, I)::Dims
X = similar(A, d[1:i])
iter = ForwardSubarrayIterator(size(A),I...)
X[1] = A[iter.index]
for i = 2:numel(X)
X[i] = A[next(iter)]
end
end
function myassign(A::Array, X::Array, I::RangeIndex...)
iter = ForwardSubarrayIterator(size(A),I...)
A[iter.index] = X[1]
for i = 2:numel(X)
A[next(iter)] = X[i]
end
end
# The following should go into range.jl
step(i::Int) = 1
first(i::Int) = i
min(r::Range1) = r.start
max(r::Range1) = last(r)
min(r::Range) = r.step > 0 ? r.start : last(r)
max(r::Range) = r.step > 0 ? last(r) : r.start
# From here on is the material for ref/assign with RangeIndexes
type ForwardSubarrayIterator
index::Int # the current linear index
copy_len::Int # number of elements that can be copied in blocks
len::Vector{Int} # length of iterator along each non-copied dimension
pos::Vector{Int} # current state of iterator relative to len
inc::Vector{Int} # increment within dimension
end
function ForwardSubarrayIterator(dims::Dims,ind::RangeIndex...)
# Check dimensions
# Because we can use linear indexing, the sizes don't have to match
if length(dims) < length(ind)
error("Dimensionality mismatch")
end
if length(ind) < length(dims)
sz = tuple(dims[1:length(ind)-1]...,prod(dims[length(ind):end]))
else
sz = dims
end
for idim = 1:length(sz)
if min(ind[idim]) < 1 || max(ind[idim]) > sz[idim]
println(ind)
error(min(ind[idim]),' ',max(ind[idim])," index out of range")
end
end
# Parse ind, looking for contiguous blocks of memory (for which we
# can use copy_to). Then set up each remaining coordinate's
# increment behavior.
iscontiguous = true
copy_len = 1
offset_first = 0
offset_last = 0
len = Array(Int,0)
inc = Array(Int,0)
s = 1
idim = 1
while idim <= length(ind)
if iscontiguous && step(ind[idim]) == 1
if length(ind[idim]) == sz[idim]
# We're still accumulating coordinates that form a
# contiguous block
copy_len *= sz[idim]
else
# The contiguous block is broken, but we can
# copy along a subset of this dimension
offset_first = copy_len*(min(ind[idim])-1)
copy_len *= length(ind[idim])
offset_last = offset_first
iscontiguous = false
end
s *= sz[idim]
else
iscontiguous = false
if length(ind[idim]) > 1
push(len,length(ind[idim]))
push(inc,s*step(ind[idim]) + offset_first - offset_last)
end
offset_first += s*(first(ind[idim])-1)
offset_last = offset_first + s*(length(ind[idim])-1)*step(ind[idim])
s *= sz[idim]
end
idim += 1
end
pos = ones(Int,length(len))
ForwardSubarrayIterator(offset_first+1,copy_len,len,pos,inc)
end
function next(iter::ForwardSubarrayIterator)
idim::Int = 1
iter.pos[1] += 1
while iter.pos[idim] > iter.len[idim] && idim < length(iter.pos)
iter.pos[idim] = 1
idim += 1
iter.pos[idim] += 1
end
iter.index += iter.inc[idim]
end
# Assign/ref goes here
function myassign{T}(A::Array{T}, X::Array{T}, I::RangeIndex...)
iter = ForwardSubarrayIterator(size(A),I...)
if iter.copy_len > 1
if isempty(iter.pos)
#copy_to(A,iter.index,X,1,iter.copy_len)
ccall(:memcpy, Ptr{Void}, (Ptr{Void}, Ptr{Void}, Uint),
pointer(A, iter.index), pointer(X, 1), iter.copy_len*sizeof(T))
else
i = 1
while iter.pos[end] <= iter.len[end]
#copy_to(A,iter.index,X,i,iter.copy_len)
ccall(:memcpy, Ptr{Void}, (Ptr{Void}, Ptr{Void}, Uint),
pointer(A, iter.index), pointer(X, i), iter.copy_len*sizeof(T))
i += iter.copy_len
next(iter)
end
end
else
arrayset(A,iter.index,X[1])
for i = 2:numel(X)
arrayset(A,next(iter),X[i])
end
end
end
void fsaiter_assign(double* A,const double *X,int *pos,const int *sub_min,const int *sub_max, const int *inc, int index, int n_dims)
{
A--; // convert from 0-offset to 1-offset
X--;
int i = 1;
int idim;
while (1) {
A[index] = X[i];
idim = 0;
pos[0]++;
while (pos[idim] > sub_max[idim])
if (idim < n_dims-1) {
pos[idim] = sub_min[idim];
idim++;
pos[idim]++;
} else
return;
index += inc[idim];
i++;
}
}
function tile1(A::Vector,tilesize::Int)
ntiles = ifloor(length(A)/tilesize)
TA = Array(eltype(A),tilesize,ntiles)
for i1 = 1:ntiles
indxi1 = (i1-1)*tilesize+1:i1*tilesize
tmp = A[indxi1]
TA[:,i1] = tmp
end
end
function tile2(A::Matrix,tilesize::Dims)
ntiles = ntuple(2,i->ifloor(size(A,i)/tilesize[i]))
TA = Array(eltype(A),tilesize...,ntiles...)
for i2 = 1:ntiles[2]
indxi2 = (i2-1)*tilesize[2]+1:i2*tilesize[2]
for i1 = 1:ntiles[1]
indxi1 = (i1-1)*tilesize[1]+1:i1*tilesize[1]
tmp = A[indxi1,indxi2]
TA[:,:,i1,i2] = tmp
end
end
end
function tile3(A::Array,tilesize::Dims)
ntiles = ntuple(3,i->ifloor(size(A,i)/tilesize[i]))
TA = Array(eltype(A),tilesize...,ntiles...)
for i3 = 1:ntiles[3]
indxi3 = (i3-1)*tilesize[3]+1:i3*tilesize[3]
for i2 = 1:ntiles[2]
indxi2 = (i2-1)*tilesize[2]+1:i2*tilesize[2]
for i1 = 1:ntiles[1]
indxi1 = (i1-1)*tilesize[1]+1:i1*tilesize[1]
tmp = A[indxi1,indxi2,indxi3]
TA[:,:,:,i1,i2,i3] = tmp
end
end
end
end
function test1()
arraysize = 4096*4096
tilesize = 32*32
A = randn(arraysize)
@time TA = tile1(A,tilesize)
@time TA = tile1(A,tilesize)
@time TA = tile1(A,tilesize)
@time TA = tile1(A,tilesize)
end
function test2()
arraysize = (4096,4096)
tilesize = (32,32)
A = randn(arraysize...)
@time TA = tile2(A,tilesize)
@time TA = tile2(A,tilesize)
@time TA = tile2(A,tilesize)
@time TA = tile2(A,tilesize)
end
function test3()
arraysize = (256,256,256)
tilesize = (16,16,4)
A = randn(arraysize...)
@time TA = tile3(A,tilesize)
@time TA = tile3(A,tilesize)
@time TA = tile3(A,tilesize)
@time TA = tile3(A,tilesize)
end
function testtarray
fprintf('One-dimensional tiling:\n')
test1
fprintf('Two-dimensional tiling:\n')
test2
fprintf('Three-dimensional tiling:\n')
test3
end
function TA = tile1(A,tilesize)
ntiles = floor(length(A)/tilesize);
TA = zeros([tilesize ntiles],class(A));
for i1 = 1:ntiles
indxi1 = (i1-1)*tilesize+1:i1*tilesize;
tmp = A(indxi1);
TA(:,i1) = tmp;
end
end
function TA = tile2(A,tilesize)
ntiles = floor(size(A)./tilesize);
TA = zeros([tilesize ntiles],class(A));
for i2 = 1:ntiles(2)
indxi2 = (i2-1)*tilesize(2)+1:i2*tilesize(2);
for i1 = 1:ntiles(1)
indxi1 = (i1-1)*tilesize(1)+1:i1*tilesize(1);
tmp = A(indxi1,indxi2);
TA(:,:,i1,i2) = tmp;
end
end
end
function TA = tile3(A,tilesize)
ntiles = floor(size(A)./tilesize);
TA = zeros([tilesize ntiles],class(A));
for i3 = 1:ntiles(3)
indxi3 = (i3-1)*tilesize(3)+1:i3*tilesize(3);
for i2 = 1:ntiles(2)
indxi2 = (i2-1)*tilesize(2)+1:i2*tilesize(2);
for i1 = 1:ntiles(1)
indxi1 = (i1-1)*tilesize(1)+1:i1*tilesize(1);
tmp = A(indxi1,indxi2,indxi3);
TA(:,:,:,i1,i2,i3) = tmp;
end
end
end
end
function test1()
arraysize = 4096*4096;
tilesize = 32*32;
A = randn(arraysize,1);
tic; TA = tile1(A,tilesize); toc
tic; TA = tile1(A,tilesize); toc
tic; TA = tile1(A,tilesize); toc
tic; TA = tile1(A,tilesize); toc
end
function test2()
arraysize = [4096,4096];
tilesize = [32,32];
A = randn(arraysize);
tic; TA = tile2(A,tilesize); toc
tic; TA = tile2(A,tilesize); toc
tic; TA = tile2(A,tilesize); toc
tic; TA = tile2(A,tilesize); toc
end
function test3()
arraysize = [256,256,256];
tilesize = [16,16,4];
A = randn(arraysize);
tic; TA = tile3(A,tilesize); toc
tic; TA = tile3(A,tilesize); toc
tic; TA = tile3(A,tilesize); toc
tic; TA = tile3(A,tilesize); toc
end
function tile1(A::Vector,tilesize::Int)
ntiles = ifloor(length(A)/tilesize)
TA = Array(eltype(A),tilesize,ntiles)
for i1 = 1:ntiles
indxi1 = (i1-1)*tilesize+1:i1*tilesize
tmp = A[indxi1]
# TA[:,i1] = tmp
myassign(TA,tmp,1:tilesize,i1)
end
end
function tile2(A::Matrix,tilesize::Dims)
ntiles = ntuple(2,i->ifloor(size(A,i)/tilesize[i]))
s = cumprod([tilesize...])
TA = Array(eltype(A),tilesize...,ntiles...)
for i2 = 1:ntiles[2]
indxi2 = (i2-1)*tilesize[2]+1:i2*tilesize[2]
for i1 = 1:ntiles[1]
indxi1 = (i1-1)*tilesize[1]+1:i1*tilesize[1]
tmp = A[indxi1,indxi2]
# TA[:,:,i1,i2] = tmp
myassign(TA,tmp,1:tilesize[1],1:tilesize[2],i1,i2)
end
end
end
function tile3(A::Array,tilesize::Dims)
ntiles = ntuple(3,i->ifloor(size(A,i)/tilesize[i]))
s = cumprod([tilesize...])
TA = Array(eltype(A),tilesize...,ntiles...)
for i3 = 1:ntiles[3]
indxi3 = (i3-1)*tilesize[3]+1:i3*tilesize[3]
for i2 = 1:ntiles[2]
indxi2 = (i2-1)*tilesize[2]+1:i2*tilesize[2]
for i1 = 1:ntiles[1]
indxi1 = (i1-1)*tilesize[1]+1:i1*tilesize[1]
tmp = A[indxi1,indxi2,indxi3]
# TA[:,:,:,i1,i2,i3] = tmp
myassign(TA,tmp,1:tilesize[1],1:tilesize[2],1:tilesize[3],i1,i2,i3)
end
end
end
end
function test1()
arraysize = 4096*4096
tilesize = 32*32
A = randn(arraysize)
@time TA = tile1(A,tilesize)
@time TA = tile1(A,tilesize)
@time TA = tile1(A,tilesize)
@time TA = tile1(A,tilesize)
end
function test2()
arraysize = (4096,4096)
tilesize = (32,32)
A = randn(arraysize...)
@time TA = tile2(A,tilesize)
@time TA = tile2(A,tilesize)
@time TA = tile2(A,tilesize)
@time TA = tile2(A,tilesize)
end
function test3()
arraysize = (256,256,256)
tilesize = (16,16,4)
A = randn(arraysize...)
@time TA = tile3(A,tilesize)
@time TA = tile3(A,tilesize)
@time TA = tile3(A,tilesize)
@time TA = tile3(A,tilesize)
end
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment