Skip to content

Instantly share code, notes, and snippets.

@szihs
Last active August 15, 2021 11:33
Show Gist options
  • Star 1 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save szihs/bb6eefebea8ffe1cdcb05089edde0244 to your computer and use it in GitHub Desktop.
Save szihs/bb6eefebea8ffe1cdcb05089edde0244 to your computer and use it in GitHub Desktop.
Sample code to compute convolution output
#include <iostream>
#include "include/libnpy/npy.hpp"
#include "arm_compute/runtime/NEON/NEFunctions.h"
#include "arm_compute/core/Types.h"
#include "arm_compute/runtime/Allocator.h"
#include "arm_compute/runtime/BlobLifetimeManager.h"
#include "arm_compute/runtime/MemoryManagerOnDemand.h"
#include "arm_compute/runtime/PoolManager.h"
#include "utils/Utils.h"
using namespace arm_compute;
using namespace utils;
using namespace std;
class TestCNNExample : public Example
{
public:
bool do_setup(int argc, char **argv) override
{
ARM_COMPUTE_UNUSED(argc);
ARM_COMPUTE_UNUSED(argv);
// Create memory manager components
// We need 2 memory managers: 1 for handling the tensors within the functions (mm_layers) and 1 for handling the input and output tensors of the functions (mm_transitions))
auto lifetime_mgr0 = std::make_shared<BlobLifetimeManager>(); // Create lifetime manager
auto lifetime_mgr1 = std::make_shared<BlobLifetimeManager>(); // Create lifetime manager
auto pool_mgr0 = std::make_shared<PoolManager>(); // Create pool manager
auto pool_mgr1 = std::make_shared<PoolManager>(); // Create pool manager
auto mm_layers = std::make_shared<MemoryManagerOnDemand>(lifetime_mgr0, pool_mgr0); // Create the memory manager
auto mm_transitions = std::make_shared<MemoryManagerOnDemand>(lifetime_mgr1, pool_mgr1); // Create the memory manager
// The weights and biases tensors should be initialized with the values inferred with the training
// Set memory manager where allowed to manage internal memory requirements
conv0 = arm_compute::support::cpp14::make_unique<NEConvolutionLayer>(mm_layers);
/*
From
https://www.tensorflow.org/api_docs/python/tf/nn/conv2d
input -> (1,6,3,2) [batch = 1, in_height = 6, in_width = 3, in_channels = 2]
filter -> (4, 3, 2, 4) [filter_height =4, filter_width =3, in_channels=2, out_channels=4]
output -> (1, 6, 3, 4), height=6, width=3,channels=4
*/
// Initialize src tensor
constexpr unsigned int width_src_image = 3;
constexpr unsigned int height_src_image = 6;
constexpr unsigned int ifm_src_img = 2;
const TensorShape src_shape(width_src_image, height_src_image, ifm_src_img);
src.allocator()->init(TensorInfo(src_shape, 1, DataType::F32));
// Initialize tensors of conv0
constexpr unsigned int kernel_x_conv0 = 3;
constexpr unsigned int kernel_y_conv0 = 4;
constexpr unsigned int ofm_conv0 = 4;
//HWIO in TF, OIHW in ACL, i.e ( W, H, I, O)
const TensorShape weights_shape_conv0(kernel_x_conv0, kernel_y_conv0, src_shape.z(), ofm_conv0);
const TensorShape biases_shape_conv0(ofm_conv0);
const TensorShape out_shape_conv0(src_shape.x(), src_shape.y(), weights_shape_conv0[3]);
weights0.allocator()->init(TensorInfo(weights_shape_conv0, 1, DataType::F32));
biases0.allocator()->init(TensorInfo(biases_shape_conv0, 1, DataType::F32));
out_conv0.allocator()->init(TensorInfo(out_shape_conv0, 1, DataType::F32));
/* -----------------------End: [Initialize tensors] */
/* [Configure functions] */
//PadStrideInfo (unsigned int stride_x, unsigned int stride_y, unsigned int pad_left, unsigned int pad_right, unsigned int pad_top, unsigned int pad_bottom, DimensionRoundingType round
conv0->configure(&src, &weights0, &biases0, &out_conv0, PadStrideInfo(1 /* stride_x */, 1 /* stride_y */, 1 /* pad_left */, 1 /* pad_right */, 1, 2, DimensionRoundingType::FLOOR ));
/* -----------------------End: [Configure functions] */
/*[ Add tensors to memory manager ]*/
// We need 2 memory groups for handling the input and output
// We call explicitly allocate after manage() in order to avoid overlapping lifetimes
memory_group0 = arm_compute::support::cpp14::make_unique<MemoryGroup>(mm_transitions);
memory_group1 = arm_compute::support::cpp14::make_unique<MemoryGroup>(mm_transitions);
memory_group0->manage(&out_conv0);
out_conv0.allocator()->allocate();
/* -----------------------End: [ Add tensors to memory manager ] */
/* [Allocate tensors] */
// Now that the padding requirements are known we can allocate all tensors
src.allocator()->allocate();
weights0.allocator()->allocate();
biases0.allocator()->allocate();
/* -----------------------End: [Allocate tensors] */
// Populate the layers manager. (Validity checks, memory allocations etc)
mm_layers->populate(allocator, 1 /* num_pools */);
// Populate the transitions manager. (Validity checks, memory allocations etc)
mm_transitions->populate(allocator, 2 /* num_pools */);
return true;
}
void do_run() override
{
// Acquire memory for the memory groups
memory_group0->acquire();
memory_group1->acquire();
{
Window window;
window.use_tensor_dimensions(src.info()->tensor_shape());
execute_window_loop(window, [&](const Coordinates & id)
{
*reinterpret_cast<float *>(src.ptr_to_element(id)) = 1.0f;
});
}
{
vector<unsigned long> shape;
vector<float>data;
npy::LoadArrayFromNumpy("./build/examples/w.npy", shape, data);
std::cout << "weights shape contains:";
for (std::vector<unsigned long>::iterator it = shape.begin() ; it != shape.end(); ++it)
std::cout << ' ' << *it;
std::cout << '\n';
Window window;
window.use_tensor_dimensions(weights0.info()->tensor_shape());
const DataLayout data_layout = weights0.info()->data_layout();
const TensorShape tensor_shape = weights0.info()->tensor_shape();
cout << "Num dims " << tensor_shape.num_dimensions() << std::endl;
for (int i = 0; i < tensor_shape.num_dimensions(); i++)
std::cout << " " << tensor_shape[i];
std::cout << '\n';
std::cout << " WEIGHT WIDTH " << window.x().end();
std::cout << " HT " << window.y().end();
std::cout << " CHANNEL " << window.z().end();
std::cout << " BATCH " << window[3].end() << endl;
Iterator out(&weights0, window);
int count = 0;
execute_window_loop(window, [&](const Coordinates & id)
{
std::cout << "( " << id[3] << ", " << id[2] << ", " << id[1] << ", " << id[0] << " ) " ;
std::cout << " - " << data[count] << endl;
*reinterpret_cast<float *>(out.ptr()) = static_cast<float>( data[count]);
count++;
}, out);
}
{
vector<unsigned long> shape;
vector<float>data;
npy::LoadArrayFromNumpy("./build/examples/b.npy", shape, data);
std::cout << "bias shape contains:";
for (std::vector<unsigned long>::iterator it = shape.begin() ; it != shape.end(); ++it)
std::cout << ' ' << *it;
std::cout << '\n';
int count = 0;
Window window;
window.use_tensor_dimensions(biases0.info()->tensor_shape());
const DataLayout data_layout = biases0.info()->data_layout();
const TensorShape tensor_shape = biases0.info()->tensor_shape();
cout << "Num dims " << tensor_shape.num_dimensions() << std::endl;
for (int i = 0; i < tensor_shape.num_dimensions(); i++)
std::cout << " " << tensor_shape[i];
std::cout << '\n';
std::cout << " BIAS WIDTH " << window.x().end();
std::cout << " HT " << window.y().end();
std::cout << " CHANNEL" << window.z().end();
std::cout << " BATCH" << window[3].end() << endl;
Iterator out(&biases0, window);
execute_window_loop(window, [&](const Coordinates & id)
{
std::cout << " B " << data[count] << std::endl;
*reinterpret_cast<float *>(out.ptr()) = static_cast<float>( data[count]);
count++;
}, out);
}
conv0->run();
{
Window window;
window.use_tensor_dimensions(out_conv0.info()->tensor_shape());
const DataLayout data_layout = out_conv0.info()->data_layout();
const TensorShape tensor_shape = out_conv0.info()->tensor_shape();
cout << "max dims " << tensor_shape.num_dimensions() << std::endl;
for (int i = 0; i < tensor_shape.num_dimensions(); i++)
std::cout << " " << tensor_shape[i];
std::cout << '\n';
std::cout << " OUT WIDTH " << window.x().end();
std::cout << " HT " << window.y().end();
std::cout << " CHANNEL " << window.z().end();
std::cout << " BATCH " << window[3].end() << endl;
unsigned long ww = window.x().end();
unsigned long hh = window.y().end();
unsigned long cc = window.z().end();
unsigned long bb = window[3].end();
//const unsigned long shape[] = {ww, hh, cc, bb};
const unsigned long shape[] = {bb, cc, hh, ww};
std::vector <float> data (shape[0] *shape[1] * shape[2] * shape[3]);
int count = 0;
execute_window_loop(window, [&](const Coordinates & id)
{
data[count++] = *reinterpret_cast<float *>(out_conv0.ptr_to_element(id));
});
npy::SaveArrayAsNumpy( "./build/examples/out.npy", false, 4, shape, data);
}
// Release memory
memory_group0->release();
memory_group1->release();
}
private:
// The src tensor should contain the input image
Tensor src{};
// Intermediate tensors used
Tensor weights0{};
Tensor biases0{};
Tensor out_conv0{};
// NEON allocator
Allocator allocator{};
// Memory groups
std::unique_ptr<MemoryGroup> memory_group0{};
std::unique_ptr<MemoryGroup> memory_group1{};
// Layers
std::unique_ptr<NEConvolutionLayer> conv0{};
};
/** Main program for cnn test
*
* The example implements the following CNN architecture:
*
* Input -> conv0
*
* @param[in] argc Number of arguments
* @param[in] argv Arguments
*/
int main(int argc, char **argv)
{
return utils::run_example<TestCNNExample>(argc, argv);
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment