-
-
Save yd2102/f16346d9fcc173450490fcb8ae00aa13 to your computer and use it in GitHub Desktop.
NEGEMMLowpMatrixMultiplyCore performance
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#include "arm_compute/core/Types.h" | |
#include "arm_compute/runtime/NEON/NEFunctions.h" | |
#include "arm_compute/runtime/NEON/NEScheduler.h" | |
#include "utils/Utils.h" | |
#include <cstdlib> | |
#include <chrono> | |
using namespace arm_compute; | |
static const size_t M = 10; | |
static const size_t N = 768; | |
static const size_t K = 768; | |
static const size_t iterations = 10000; | |
static const float alpha = 1.0f; | |
static const float beta = 0.0f; | |
void benchmark(IFunction *kernel, const std::string& name, const int threads) | |
{ | |
printf("[%s] Using %d threads...\n", name.c_str(), threads); | |
// Use specified number of threads | |
NEScheduler::get().set_num_threads(threads); | |
// Warm up kernel | |
for (auto i = 0; i < 100; i++) | |
{ | |
kernel->run(); | |
} | |
auto total = threads * iterations; | |
auto start = std::chrono::steady_clock::now(); | |
// Execute kernel | |
for (auto i = 0; i < total; i++) | |
{ | |
kernel->run(); | |
} | |
auto stop = std::chrono::steady_clock::now(); | |
std::chrono::duration<double> diff = stop - start; | |
double time = diff.count(); | |
double tp = 2 * M * N * K * total / time / 1e9; | |
printf("[%s] %f ms/iter, %f op/s\n", name.c_str(), 1e3 * time / total, tp); | |
} | |
// Find min and max value in a float array | |
void find_min_max(int size, const float *data, float *min, float *max) | |
{ | |
*min = *max = data[0]; | |
for(int i = 0; i < size; i++) | |
{ | |
const float val = data[i]; | |
*min = std::min(*min, val); | |
*max = std::max(*max, val); | |
} | |
} | |
// Return reasonable quantisation parameters to use for an array of floats | |
// based on min and max values | |
QuantizationInfo choose_quantization_params(float min, float max) | |
{ | |
// Extend the [min,max] interval to contain 0 so we can represent it exactly | |
min = std::min(min, 0.f); | |
max = std::max(max, 0.f); | |
// Set the quantized min and max in float values | |
const float qmin = 0; | |
const float qmax = 255; | |
// Determine the scale | |
const float scale = (max - min) / (qmax - qmin); | |
// Determine the zero-point; using affine equation val = (qval-zerop) * scale | |
const float zero_point_real = qmin - min / scale; | |
// But we need to nudge the zero_point to an integer (exact quantized value) | |
std::uint8_t zero_point_nudged = 0; | |
if(zero_point_real < qmin) | |
{ | |
zero_point_nudged = qmin; | |
} | |
else if(zero_point_real > qmax) | |
{ | |
zero_point_nudged = qmax; | |
} | |
else | |
{ | |
zero_point_nudged = static_cast<std::uint8_t>(support::cpp11::round(zero_point_real)); | |
} | |
QuantizationInfo qinfo = QuantizationInfo(scale, zero_point_nudged); | |
return qinfo; | |
} | |
int main(int argc, char **argv) | |
{ | |
Tensor src0; | |
Tensor src1; | |
Tensor dst0; | |
NEGEMM fgemm; | |
// Populate tensor information | |
src0.allocator()->init(TensorInfo(TensorShape(K, M), 1, DataType::F32, DataLayout::NHWC)); | |
src1.allocator()->init(TensorInfo(TensorShape(N, K), 1, DataType::F32, DataLayout::NHWC)); | |
dst0.allocator()->init(TensorInfo(TensorShape(N, M), 1, DataType::F32, DataLayout::NHWC)); | |
// Configure kernel | |
fgemm.configure(&src0, &src1, nullptr, &dst0, alpha, beta); | |
// Allocate all tensors | |
src0.allocator()->allocate(); | |
src1.allocator()->allocate(); | |
dst0.allocator()->allocate(); | |
auto *src0_ptr = reinterpret_cast<float *>(src0.buffer()); | |
auto *src1_ptr = reinterpret_cast<float *>(src1.buffer()); | |
auto *dst0_ptr = reinterpret_cast<float *>(dst0.buffer()); | |
// Initialize random inputs | |
utils::fill_random_tensor(src0, -1.f, 1.f); | |
utils::fill_random_tensor(src1, -1.f, 1.f); | |
// Run benchmarking | |
for (auto i : {1, 2, 4, 8}) | |
{ | |
benchmark(&fgemm, "NEGEMM", i); | |
} | |
float src0_min; | |
float src0_max; | |
float src1_min; | |
float src1_max; | |
find_min_max(M * K, src0_ptr, &src0_min, &src0_max); | |
find_min_max(K * N, src1_ptr, &src1_min, &src1_max); | |
// Get quantization parameters | |
const QuantizationInfo src0_qinfo = choose_quantization_params(src0_min, src0_max); | |
const QuantizationInfo src1_qinfo = choose_quantization_params(src1_min, src1_max); | |
std::cout << "Matrix 1: min=" << src0_min << ", max=" << src0_max << ", "; | |
std::cout << "QuantisationInfo(" << src0_qinfo.scale()[0] << ", " << src0_qinfo.offset()[0] << ")\n"; | |
std::cout << "Matrix 2: min=" << src1_min << ", max=" << src1_max << ", "; | |
std::cout << "QuantisationInfo(" << src1_qinfo.scale()[0] << ", " << src1_qinfo.offset()[0] << ")\n"; | |
// Populate tensor information | |
Tensor q_src0; | |
Tensor q_src1; | |
Tensor q_acc; | |
TensorInfo q_src0_tinfo = TensorInfo(TensorShape(K, M), 1, DataType::QASYMM8, DataLayout::NHWC); | |
TensorInfo q_src1_tinfo = TensorInfo(TensorShape(N, K), 1, DataType::QASYMM8, DataLayout::NHWC); | |
TensorInfo q_acc_tinfo = TensorInfo(TensorShape(N, M), 1, DataType::S32, DataLayout::NHWC); | |
q_src0_tinfo.set_quantization_info(src0_qinfo); | |
q_src1_tinfo.set_quantization_info(src1_qinfo); | |
q_src0.allocator()->init(q_src0_tinfo); | |
q_src1.allocator()->init(q_src1_tinfo); | |
q_acc.allocator()->init(q_acc_tinfo); | |
// Quantize inputs | |
NEQuantizationLayer ql_src0; | |
NEQuantizationLayer ql_src1; | |
ql_src0.configure(&src0, &q_src0); | |
ql_src1.configure(&src1, &q_src1); | |
// Configure kernel | |
NEGEMMLowpMatrixMultiplyCore qgemm; | |
qgemm.configure(&q_src0, &q_src1, nullptr, &q_acc); | |
// Allocate all tensors | |
q_src0.allocator()->allocate(); | |
q_src1.allocator()->allocate(); | |
q_acc.allocator()->allocate(); | |
// Run quantization layers | |
ql_src0.run(); | |
ql_src1.run(); | |
// Run benchmarking | |
for (auto i : {1, 2, 4, 8}) | |
{ | |
benchmark(&qgemm, "NEGEMMLowpMatrixMultiplyCore", i); | |
} | |
return 0; | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment