Skip to content

Instantly share code, notes, and snippets.

@amroamroamro
Last active August 29, 2015 14:10
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save amroamroamro/0f104986796f2e0aa618 to your computer and use it in GitHub Desktop.
Save amroamroamro/0f104986796f2e0aa618 to your computer and use it in GitHub Desktop.
[MATLAB] benchmark for growing arrays

Goal

Comparing performance of growing arrays in MATLAB with and without pre-allocation, in addition to expanding the size in batches.

Refer to this Stack Overflow thread.

Tests

There are two version of the tests:

  • one that adds columns to a matrix
  • one that adds rows to a matrix

Keep in mind that MATLAB stores arrays in column-major order, so it the first variant is expected to be faster.

Notes

In recent versions, MATLAB has greatly improved the performance of automatic array growth. This is especially true when growing the arrays along the last dimension.

In fact, the tests show that when adding columns, the performance is very similar with or without preallocation, which is pretty amazing!

Unfortunately adding rows without preallocation is extremely slow. This is where the proposed solution of growing in batches is helpful.

Now when it comes to implementation, it is sadly known that calling methods on OOP objects has a higher overhead than calling regular functions. This explains why the implemented classes are slower than the equivalent inlined code...

Timings

In the results below, 50K samples are inserted (each sample is a row/column vector of length 20), with a batch size of 10K for the grow-in-batch solutions. Tests were performed in the latest MATLAB R2014b running on Win8.1 64-bit quad-core machine.

% COLUMNS
>> testDynamicArrayC
ans =
    0.0164   % pre-allocated
    0.0249   % un-allocated
    0.6885   % DynamicArrayC class
    0.0597   % batch grow function
    
% ROWS
>> testDynamicArrayR
ans =
    0.0186   % pre-allocated
   13.8400   % un-allocated
    0.7116   % DynamicArrayC class
    0.0610   % batch grow function
classdef DynamicArrayC < handle
%DYNAMICARRAYC Dynamic matrix that grows in size across the columns
%
% Example:
% da = DynamicArrayC(10); % 10 rows, unspecified number of columns
% for i=1:50000
% col = rand(10,1); % new column vector
% addCol(da, col);
% end
% mat = getArray(da);
%
properties (Constant = true, Hidden = true)
% default increment size
DEFAULT_BLOCK_SIZE = 10000;
end
properties (Access = private)
m_arr % matrix
m_idx % position of last column
m_rows % current matrix size (number of rows)
m_cols % current matrix size (number of columns)
m_blockSize % increment size
end
methods
function obj = DynamicArrayC(nrows, ncols)
%DYNAMICARRAYC Constructor
% initialize array of specified size
if nargin < 1, nrows = 1; end
if nargin < 2, ncols = DynamicArrayC.DEFAULT_BLOCK_SIZE; end
obj.m_rows = nrows;
obj.m_cols = ncols;
obj.m_blockSize = ncols;
obj.m_idx = 1;
obj.m_arr = zeros(nrows, ncols);
end
function addCol(obj, colVec)
%ADDCOL Add new column vector
% add column, and increment index position
obj.m_arr(:,obj.m_idx) = colVec;
obj.m_idx = obj.m_idx + 1;
% expand size if needed (less than 10% blockSize left free)
if ( obj.m_idx + (obj.m_blockSize/10) > obj.m_cols)
% expand by blockSize columns, and fill with zeros
obj.m_cols = obj.m_cols + obj.m_blockSize;
obj.m_arr(:,obj.m_cols) = 0;
end
end
function arr = getArray(obj)
%GETARRAY Return allocated matrix
% return array (the used part)
arr = obj.m_arr(:,1:obj.m_idx-1);
end
end
end
classdef DynamicArrayR < handle
%DYNAMICARRAYR Dynamic matrix that grows in size across the rows
%
% Example:
% da = DynamicArrayR(10); % 10 cols, unspecified number of rows
% for i=1:50000
% row = rand(1,10); % new row vector
% addRow(da, row);
% end
% mat = getArray(da);
%
properties (Constant = true, Hidden = true)
% default increment size
DEFAULT_BLOCK_SIZE = 10000;
end
properties (Access = private)
m_arr % matrix
m_idx % position of last row
m_rows % current matrix size (number of rows)
m_cols % current matrix size (number of columns)
m_blockSize % increment size
end
methods
function obj = DynamicArrayR(ncols, nrows)
%DYNAMICARRAYR Constructor
% initialize array of specified size
if nargin < 1, nrows = 1; end
if nargin < 2, nrows = DynamicArrayR.DEFAULT_BLOCK_SIZE; end
obj.m_rows = nrows;
obj.m_cols = ncols;
obj.m_blockSize = nrows;
obj.m_idx = 1;
obj.m_arr = zeros(nrows, ncols);
end
function addRow(obj, rowVec)
%ADDROW Add new row vector
% add row, and increment index position
obj.m_arr(obj.m_idx,:) = rowVec;
obj.m_idx = obj.m_idx + 1;
% expand size if needed (less than 10% blockSize left free)
if ( obj.m_idx + (obj.m_blockSize/10) > obj.m_rows)
% expand by blockSize rows, and fill with zeros
obj.m_rows = obj.m_rows + obj.m_blockSize;
obj.m_arr(obj.m_rows,:) = 0;
end
end
function arr = getArray(obj)
%GETARRAY Return allocated matrix
% return array (the used part)
arr = obj.m_arr(1:obj.m_idx-1,:);
end
end
end
function t = testDynamicArrayC()
%TESTDYNAMICARRAYC Benchmark for growing arrays by columns
nrows = 20;
ncols = 50000; % final number of columns
% functions to test
funcs = {
@() func_preallocated(nrows, ncols)
@() func_unallocated(nrows, ncols)
@() func_DynamicArrayC(nrows, ncols)
@() func_batch_grow(nrows, ncols)
};
% measure performance
t = cellfun(@timeit, funcs, 'UniformOutput',true);
% check results are equal
arrays = cellfun(@feval, funcs, 'UniformOutput',false);
assert(isequal(arrays{:}))
end
function arr = func_preallocated(nrows, ncols)
%FUNC_PREALLOCATION Fully preallocated array with fixed size
arr = zeros(nrows, ncols);
for i=1:ncols
arr(:,i) = i;
end
end
function arr = func_unallocated(nrows, ncols)
%FUNC_UNALLOCATED Unallocated array, grows one column at-a-time
arr = zeros(nrows, 0);
for i=1:ncols
arr(:,i) = i;
end
end
function arr = func_DynamicArrayC(nrows, ncols)
%FUNC_DYNAMICARRAYC Dynamic-size array (grows across columns)
da = DynamicArrayC(nrows);
for i=1:ncols
addCol(da, i);
end
arr = getArray(da);
end
function arr = func_batch_grow(nrows, ncols)
%FUNC_BATCH_GROW Dynamic-size array, grows in batches across columns
idx = 1; % keeps track of current size
numCols = 10000; % initial size
arr = zeros(nrows, numCols);
for i=1:ncols
% add new column
arr(:,idx) = i;
idx = idx + 1;
% allocate space for more columns by doubling in size
if idx > numCols
numCols = numCols * 2;
arr(:,numCols) = 0;
end
end
% truncate excess, and return final array
arr(:,idx:end) = [];
end
function t = testDynamicArrayR()
%TESTDYNAMICARRAYR Benchmark for growing arrays by rows
nrows = 50000; % final number of rows
ncols = 20;
% functions to test
funcs = {
@() func_preallocated(nrows, ncols)
@() func_unallocated(nrows, ncols)
@() func_DynamicArrayR(nrows, ncols)
@() func_batch_grow(nrows, ncols)
};
% measure performance
t = cellfun(@timeit, funcs, 'UniformOutput',true);
% check results are equal
arrays = cellfun(@feval, funcs, 'UniformOutput',false);
assert(isequal(arrays{:}))
end
function arr = func_preallocated(nrows, ncols)
%FUNC_PREALLOCATION Fully preallocated array with fixed size
arr = zeros(nrows, ncols);
for i=1:nrows
arr(i,:) = i;
end
end
function arr = func_unallocated(nrows, ncols)
%FUNC_UNALLOCATED Unallocated array, grows one row at-a-time
arr = zeros(0, ncols);
for i=1:nrows
arr(i,:) = i;
end
end
function arr = func_DynamicArrayR(nrows, ncols)
%FUNC_DYNAMICARRAYR Dynamic-size array (grows across rows)
da = DynamicArrayR(ncols);
for i=1:nrows
addRow(da, i);
end
arr = getArray(da);
end
function arr = func_batch_grow(nrows, ncols)
%FUNC_BATCH_GROW Dynamic-size array, grows in batches across rows
idx = 1; % keeps track of current size
numRows = 10000; % initial size
arr = zeros(numRows, ncols);
for i=1:nrows
% add new row
arr(idx,:) = i;
idx = idx + 1;
% allocate space for more rows by doubling in size
if idx > numRows
numRows = numRows * 2;
arr(numRows,:) = 0;
end
end
% truncate excess, and return final array
arr(idx:end,:) = [];
end
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment