Skip to content

Instantly share code, notes, and snippets.

@emfomenk
Created June 5, 2020 15:17
Show Gist options
  • Save emfomenk/5db830829b07d9c8114e61f11d06edb8 to your computer and use it in GitHub Desktop.
Save emfomenk/5db830829b07d9c8114e61f11d06edb8 to your computer and use it in GitHub Desktop.
#include <stdio.h>
#include <chrono>
#include <assert.h>
#include "dnnl.hpp"
using namespace dnnl;
void convolution_param(engine eng, dnnl::memory user_src_memory, int batch, int channel, int height, int width,dnnl::memory user_weights_memory, int no_of_filter, int kernel_h, int kernel_w, int pad_h,int pad_w, int stride_h, int stride_w, dnnl::memory conv1_user_bias_memory,dnnl::memory conv1_dst_memory, int out_height, int out_width) {
int times = 1;
using tag = memory::format_tag;
using dt = memory::data_type;
/// Initialize an engine and stream. The last parameter in the call represents
/// the index of the engine.
/// @snippet cnn_inference_f32.cpp Initialize engine and stream
//[Initialize engine and stream]
//engine eng(engine_kind, 0);
stream s(eng);
//[Initialize engine and stream]
/// Create a vector for the primitives and a vector to hold memory
/// that will be used as arguments.
/// @snippet cnn_inference_f32.cpp Create network
//[Create network]
std::vector<primitive> net;
std::vector<std::unordered_map<int, memory>> net_args;
//[Create network]
// AlexNet: conv1
// {batch, 3, 227, 227} (x) {96, 3, 11, 11} -> {batch, 96, 55, 55}
// strides: {4, 4}
memory::dims conv1_src_tz = {batch, channel, height, width};
memory::dims conv1_weights_tz = {no_of_filter, channel, kernel_h, kernel_w};
memory::dims conv1_bias_tz = {no_of_filter};
memory::dims conv1_dst_tz = {batch, no_of_filter, out_height, out_width};
memory::dims conv1_strides = {stride_h, stride_w};
memory::dims conv1_padding = {pad_h, pad_w};
//[Create user memory]
/// Create memory descriptors with layout tag::any. The `any` format enables
/// the convolution primitive to choose the data format that will result in
/// best performance based on its input parameters (convolution kernel
/// sizes, strides, padding, and so on). If the resulting format is different
/// from `nchw`, the user data must be transformed to the format required for
/// the convolution (as explained below).
/// @snippet cnn_inference_f32.cpp Create convolution memory descriptors
//[Create convolution memory descriptors]
auto conv1_src_md = memory::desc({conv1_src_tz}, dt::f32, tag::any);
auto conv1_bias_md = memory::desc({conv1_bias_tz}, dt::f32, tag::any);
auto conv1_weights_md = memory::desc({conv1_weights_tz}, dt::f32, tag::any);
auto conv1_dst_md = memory::desc({conv1_dst_tz}, dt::f32, tag::any);
//[Create convolution memory descriptors]
/// Create a convolution descriptor by specifying propagation kind,
/// [convolution algorithm](@ref dev_guide_convolution), shapes of input,
/// weights, bias, output, convolution strides, padding, and kind of padding.
/// Propagation kind is set to prop_kind::forward_inference to optimize for
/// inference execution and omit computations that are necessary only for
/// backward propagation.
/// @snippet cnn_inference_f32.cpp Create convolution descriptor
//[Create convolution descriptor]
auto conv1_desc = convolution_forward::desc(prop_kind::forward_inference,
algorithm::convolution_direct, conv1_src_md, conv1_weights_md,
conv1_bias_md, conv1_dst_md, conv1_strides, conv1_padding,
conv1_padding);
//[Create convolution descriptor]
/// Create a convolution primitive descriptor. Once created, this
/// descriptor has specific formats instead of the `any` format specified
/// in the convolution descriptor.
/// @snippet cnn_inference_f32.cpp Create convolution primitive descriptor
//[Create convolution primitive descriptor]
auto conv1_prim_desc = convolution_forward::primitive_desc(conv1_desc, eng);
//[Create convolution primitive descriptor]
/// Check whether data and weights formats required by convolution is different
/// from the user format. In case it is different change the layout using
/// reorder primitive.
/// @snippet cnn_inference_f32.cpp Reorder data and weights
//[Reorder data and weights]
auto conv1_src_memory = user_src_memory;
if (conv1_prim_desc.src_desc() != user_src_memory.get_desc()) {
conv1_src_memory = memory(conv1_prim_desc.src_desc(), eng);
net.push_back(reorder(user_src_memory, conv1_src_memory));
net_args.push_back({{DNNL_ARG_FROM, user_src_memory},
{DNNL_ARG_TO, conv1_src_memory}});
}
auto conv1_weights_memory = user_weights_memory;
if (conv1_prim_desc.weights_desc() != user_weights_memory.get_desc()) {
conv1_weights_memory = memory(conv1_prim_desc.weights_desc(), eng);
reorder(user_weights_memory, conv1_weights_memory)
.execute(s, user_weights_memory, conv1_weights_memory);
}
//[Reorder data and weights]
/// Create a memory primitive for output.
/// @snippet cnn_inference_f32.cpp Create memory for output
//[Create memory for output]
//auto conv1_dst_memory = memory(conv1_prim_desc.dst_desc(), eng);
//[Create memory for output]
/// Create a convolution primitive and add it to the net.
/// @snippet cnn_inference_f32.cpp Create memory for output
//[Create convolution primitive]
net.push_back(convolution_forward(conv1_prim_desc));
net_args.push_back({{DNNL_ARG_SRC, conv1_src_memory},
{DNNL_ARG_WEIGHTS, conv1_weights_memory},
{DNNL_ARG_BIAS, conv1_user_bias_memory},
{DNNL_ARG_DST, conv1_dst_memory}
});
//[Create convolution primitive]
/// @page cnn_inference_f32_cpp
/// Finally, execute the primitives. For this example, the net is executed
/// multiple times and each execution is timed individually.
/// @snippet cnn_inference_f32.cpp Execute model
//[Execute model]
for (int j = 0; j < times; ++j) {
assert(net.size() == net_args.size() && "something is missing");
for (size_t i = 0; i < net.size(); ++i) {
net.at(i).execute(s, net_args.at(i));
}
}
//[Execute model]
s.wait();
}
int main(int argc, char **argv) {
//Input parameters to convolution
int times = 1; //100
int batch = 2;//;3;
int channel = 8;//3;
int height = 222;//227;
int width = 222;//227;
int no_of_filter = 8;//96;
int kernel_h = 9;//11;
int kernel_w = 9;//11;
int pad_h = 4;
int pad_w = 4;
int stride_h = 1;//4;
int stride_w = 1;//4;
int out_height = (height + pad_h + pad_w - kernel_h) / stride_h + 1;
int out_width = (width + pad_h + pad_w - kernel_w) / stride_w + 1;
using tag = memory::format_tag;
using dt = memory::data_type;
memory::dims conv1_src_tz = {batch, channel, height, width};
memory::dims conv1_weights_tz = {no_of_filter, channel, kernel_h, kernel_w};
memory::dims conv1_bias_tz = {no_of_filter};
memory::dims conv1_dst_tz = {batch, no_of_filter, out_height, out_width};
engine::kind engine_kind = engine::kind::cpu;
engine eng(engine_kind, 0);
stream s(eng);
//memory allocation
auto user_src_memory = memory({{conv1_src_tz}, dt::f32, tag::nhwc}, eng);
auto user_weights_memory = memory({{conv1_weights_tz}, dt::f32, tag::hwio}, eng);
auto conv1_user_bias_memory = memory({{conv1_bias_tz}, dt::f32, tag::x}, eng);
auto conv1_dst_memory = memory({{conv1_dst_tz}, dt::f32, tag::aBcd8b }, eng);
//data initialization
//init_data(user_src_memory);
//init_data(user_weights_memory);
//init_data(conv1_user_bias_memory);
auto begin = std::chrono::duration_cast<std::chrono::milliseconds>(
std::chrono::steady_clock::now().time_since_epoch())
.count();
convolution_param(eng, user_src_memory, batch, channel, height, width, user_weights_memory,
no_of_filter, kernel_h, kernel_w, pad_h, pad_w, stride_h, stride_w, conv1_user_bias_memory, conv1_dst_memory, out_height, out_width);
auto end = std::chrono::duration_cast<std::chrono::milliseconds>(
std::chrono::steady_clock::now().time_since_epoch())
.count();
auto conv1_dst_memory_new = memory({{conv1_dst_tz}, dt::f32, tag::nhwc}, eng);
reorder(conv1_dst_memory, conv1_dst_memory_new).execute(s, conv1_dst_memory, conv1_dst_memory_new);
float *dataHandle= (float *)conv1_dst_memory_new.get_data_handle();
return 0;
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment