emfomenk/gh742.cpp

## gh742.cpp
#include <stdio.h>
#include <chrono>

#include <assert.h>

#include "dnnl.hpp"

using namespace dnnl;

void convolution_param(engine eng, dnnl::memory user_src_memory, int batch, int channel, int height, int width,dnnl::memory user_weights_memory, int no_of_filter, int kernel_h, int kernel_w, int pad_h,int pad_w, int stride_h, int stride_w, dnnl::memory conv1_user_bias_memory,dnnl::memory conv1_dst_memory, int out_height, int out_width) {

int times = 1;
using tag = memory::format_tag;
using dt = memory::data_type;

/// Initialize an engine and stream. The last parameter in the call represents
/// the index of the engine.
/// @snippet cnn_inference_f32.cpp Initialize engine and stream
//[Initialize engine and stream]
//engine eng(engine_kind, 0);
stream s(eng);
//[Initialize engine and stream]

/// Create a vector for the primitives and a vector to hold memory
/// that will be used as arguments.
/// @snippet cnn_inference_f32.cpp Create network
//[Create network]
std::vector<primitive> net;
std::vector<std::unordered_map<int, memory>> net_args;
//[Create network]

// AlexNet: conv1
// {batch, 3, 227, 227} (x) {96, 3, 11, 11} -> {batch, 96, 55, 55}
// strides: {4, 4}
memory::dims conv1_src_tz = {batch, channel, height, width};
memory::dims conv1_weights_tz = {no_of_filter, channel, kernel_h, kernel_w};
memory::dims conv1_bias_tz = {no_of_filter};
memory::dims conv1_dst_tz = {batch, no_of_filter, out_height, out_width};
memory::dims conv1_strides = {stride_h, stride_w};
memory::dims conv1_padding = {pad_h, pad_w};


//[Create user memory]
/// Create memory descriptors with layout tag::any. The `any` format enables
/// the convolution primitive to choose the data format that will result in
/// best performance based on its input parameters (convolution kernel
/// sizes, strides, padding, and so on). If the resulting format is different
/// from `nchw`, the user data must be transformed to the format required for
/// the convolution (as explained below).
/// @snippet cnn_inference_f32.cpp Create convolution memory descriptors
//[Create convolution memory descriptors]
auto conv1_src_md = memory::desc({conv1_src_tz}, dt::f32, tag::any);
auto conv1_bias_md = memory::desc({conv1_bias_tz}, dt::f32, tag::any);
auto conv1_weights_md = memory::desc({conv1_weights_tz}, dt::f32, tag::any);
auto conv1_dst_md = memory::desc({conv1_dst_tz}, dt::f32, tag::any);
//[Create convolution memory descriptors]

/// Create a convolution descriptor by specifying propagation kind,
/// [convolution algorithm](@ref dev_guide_convolution), shapes of input,
/// weights, bias, output, convolution strides, padding, and kind of padding.
/// Propagation kind is set to prop_kind::forward_inference to optimize for
/// inference execution and omit computations that are necessary only for
/// backward propagation.
/// @snippet cnn_inference_f32.cpp Create convolution descriptor
//[Create convolution descriptor]
auto conv1_desc = convolution_forward::desc(prop_kind::forward_inference,
                  algorithm::convolution_direct, conv1_src_md, conv1_weights_md,
                  conv1_bias_md, conv1_dst_md, conv1_strides, conv1_padding,
                  conv1_padding);
//[Create convolution descriptor]

/// Create a convolution primitive descriptor. Once created, this
/// descriptor has specific formats instead of the `any` format specified
/// in the convolution descriptor.
/// @snippet cnn_inference_f32.cpp Create convolution primitive descriptor
//[Create convolution primitive descriptor]
auto conv1_prim_desc = convolution_forward::primitive_desc(conv1_desc, eng);
//[Create convolution primitive descriptor]

/// Check whether data and weights formats required by convolution is different
/// from the user format. In case it is different change the layout using
/// reorder primitive.
/// @snippet cnn_inference_f32.cpp Reorder data and weights
//[Reorder data and weights]

auto conv1_src_memory = user_src_memory;

if (conv1_prim_desc.src_desc() != user_src_memory.get_desc()) {
    conv1_src_memory = memory(conv1_prim_desc.src_desc(), eng);
    net.push_back(reorder(user_src_memory, conv1_src_memory));
    net_args.push_back({{DNNL_ARG_FROM, user_src_memory},
        {DNNL_ARG_TO, conv1_src_memory}});
}

auto conv1_weights_memory = user_weights_memory;
if (conv1_prim_desc.weights_desc() != user_weights_memory.get_desc()) {
    conv1_weights_memory = memory(conv1_prim_desc.weights_desc(), eng);
    reorder(user_weights_memory, conv1_weights_memory)
    .execute(s, user_weights_memory, conv1_weights_memory);
}

//[Reorder data and weights]

/// Create a memory primitive for output.
/// @snippet cnn_inference_f32.cpp Create memory for output
//[Create memory for output]
//auto conv1_dst_memory = memory(conv1_prim_desc.dst_desc(), eng);
//[Create memory for output]

/// Create a convolution primitive and add it to the net.
/// @snippet cnn_inference_f32.cpp Create memory for output
//[Create convolution primitive]
net.push_back(convolution_forward(conv1_prim_desc));
net_args.push_back({{DNNL_ARG_SRC, conv1_src_memory},
    {DNNL_ARG_WEIGHTS, conv1_weights_memory},
    {DNNL_ARG_BIAS, conv1_user_bias_memory},
    {DNNL_ARG_DST, conv1_dst_memory}
});
//[Create convolution primitive]


/// @page cnn_inference_f32_cpp
/// Finally, execute the primitives. For this example, the net is executed
/// multiple times and each execution is timed individually.
/// @snippet cnn_inference_f32.cpp Execute model
//[Execute model]
for (int j = 0; j < times; ++j) {
    assert(net.size() == net_args.size() && "something is missing");
    for (size_t i = 0; i < net.size(); ++i) {
        net.at(i).execute(s, net_args.at(i));
    }
}
//[Execute model]
s.wait();

}

int main(int argc, char **argv) {
//Input parameters to convolution
int times = 1; //100
int batch = 2;//;3;
int channel = 8;//3;
int height = 222;//227;
int width = 222;//227;
int no_of_filter = 8;//96;
int kernel_h = 9;//11;
int kernel_w = 9;//11;
int pad_h = 4;
int pad_w = 4;
int stride_h = 1;//4;
int stride_w = 1;//4;

    int out_height = (height + pad_h + pad_w - kernel_h) / stride_h + 1;
    int out_width = (width + pad_h + pad_w - kernel_w) / stride_w + 1;

    using tag = memory::format_tag;
    using dt = memory::data_type;
    memory::dims conv1_src_tz = {batch, channel, height, width};
    memory::dims conv1_weights_tz = {no_of_filter, channel, kernel_h, kernel_w};
    memory::dims conv1_bias_tz = {no_of_filter};
    memory::dims conv1_dst_tz = {batch, no_of_filter, out_height, out_width};

    engine::kind engine_kind = engine::kind::cpu;
    engine eng(engine_kind, 0);
    stream s(eng);

    //memory allocation
    auto user_src_memory = memory({{conv1_src_tz}, dt::f32, tag::nhwc}, eng);
    auto user_weights_memory = memory({{conv1_weights_tz}, dt::f32, tag::hwio}, eng);
    auto conv1_user_bias_memory = memory({{conv1_bias_tz}, dt::f32, tag::x}, eng);
    auto conv1_dst_memory = memory({{conv1_dst_tz}, dt::f32, tag::aBcd8b }, eng);

    //data initialization
    //init_data(user_src_memory);
    //init_data(user_weights_memory);
    //init_data(conv1_user_bias_memory);

    auto begin = std::chrono::duration_cast<std::chrono::milliseconds>(
                     std::chrono::steady_clock::now().time_since_epoch())
                 .count();

    convolution_param(eng, user_src_memory, batch, channel, height, width, user_weights_memory,
                      no_of_filter, kernel_h, kernel_w, pad_h, pad_w, stride_h, stride_w, conv1_user_bias_memory, conv1_dst_memory, out_height, out_width);


    auto end = std::chrono::duration_cast<std::chrono::milliseconds>(
                   std::chrono::steady_clock::now().time_since_epoch())
               .count();

    auto conv1_dst_memory_new = memory({{conv1_dst_tz}, dt::f32, tag::nhwc}, eng);
    reorder(conv1_dst_memory, conv1_dst_memory_new).execute(s, conv1_dst_memory, conv1_dst_memory_new);
    float *dataHandle= (float *)conv1_dst_memory_new.get_data_handle();

    return 0;
}
	#include <stdio.h>
	#include <chrono>

	#include <assert.h>

	#include "dnnl.hpp"

	using namespace dnnl;

	void convolution_param(engine eng, dnnl::memory user_src_memory, int batch, int channel, int height, int width,dnnl::memory user_weights_memory, int no_of_filter, int kernel_h, int kernel_w, int pad_h,int pad_w, int stride_h, int stride_w, dnnl::memory conv1_user_bias_memory,dnnl::memory conv1_dst_memory, int out_height, int out_width) {

	int times = 1;
	using tag = memory::format_tag;
	using dt = memory::data_type;

	/// Initialize an engine and stream. The last parameter in the call represents
	/// the index of the engine.
	/// @snippet cnn_inference_f32.cpp Initialize engine and stream
	//[Initialize engine and stream]
	//engine eng(engine_kind, 0);
	stream s(eng);
	//[Initialize engine and stream]

	/// Create a vector for the primitives and a vector to hold memory
	/// that will be used as arguments.
	/// @snippet cnn_inference_f32.cpp Create network
	//[Create network]
	std::vector<primitive> net;
	std::vector<std::unordered_map<int, memory>> net_args;
	//[Create network]

	// AlexNet: conv1
	// {batch, 3, 227, 227} (x) {96, 3, 11, 11} -> {batch, 96, 55, 55}
	// strides: {4, 4}
	memory::dims conv1_src_tz = {batch, channel, height, width};
	memory::dims conv1_weights_tz = {no_of_filter, channel, kernel_h, kernel_w};
	memory::dims conv1_bias_tz = {no_of_filter};
	memory::dims conv1_dst_tz = {batch, no_of_filter, out_height, out_width};
	memory::dims conv1_strides = {stride_h, stride_w};
	memory::dims conv1_padding = {pad_h, pad_w};


	//[Create user memory]
	/// Create memory descriptors with layout tag::any. The `any` format enables
	/// the convolution primitive to choose the data format that will result in
	/// best performance based on its input parameters (convolution kernel
	/// sizes, strides, padding, and so on). If the resulting format is different
	/// from `nchw`, the user data must be transformed to the format required for
	/// the convolution (as explained below).
	/// @snippet cnn_inference_f32.cpp Create convolution memory descriptors
	//[Create convolution memory descriptors]
	auto conv1_src_md = memory::desc({conv1_src_tz}, dt::f32, tag::any);
	auto conv1_bias_md = memory::desc({conv1_bias_tz}, dt::f32, tag::any);
	auto conv1_weights_md = memory::desc({conv1_weights_tz}, dt::f32, tag::any);
	auto conv1_dst_md = memory::desc({conv1_dst_tz}, dt::f32, tag::any);
	//[Create convolution memory descriptors]

	/// Create a convolution descriptor by specifying propagation kind,
	/// [convolution algorithm](@ref dev_guide_convolution), shapes of input,
	/// weights, bias, output, convolution strides, padding, and kind of padding.
	/// Propagation kind is set to prop_kind::forward_inference to optimize for
	/// inference execution and omit computations that are necessary only for
	/// backward propagation.
	/// @snippet cnn_inference_f32.cpp Create convolution descriptor
	//[Create convolution descriptor]
	auto conv1_desc = convolution_forward::desc(prop_kind::forward_inference,
	algorithm::convolution_direct, conv1_src_md, conv1_weights_md,
	conv1_bias_md, conv1_dst_md, conv1_strides, conv1_padding,
	conv1_padding);
	//[Create convolution descriptor]

	/// Create a convolution primitive descriptor. Once created, this
	/// descriptor has specific formats instead of the `any` format specified
	/// in the convolution descriptor.
	/// @snippet cnn_inference_f32.cpp Create convolution primitive descriptor
	//[Create convolution primitive descriptor]
	auto conv1_prim_desc = convolution_forward::primitive_desc(conv1_desc, eng);
	//[Create convolution primitive descriptor]

	/// Check whether data and weights formats required by convolution is different
	/// from the user format. In case it is different change the layout using
	/// reorder primitive.
	/// @snippet cnn_inference_f32.cpp Reorder data and weights
	//[Reorder data and weights]

	auto conv1_src_memory = user_src_memory;

	if (conv1_prim_desc.src_desc() != user_src_memory.get_desc()) {
	conv1_src_memory = memory(conv1_prim_desc.src_desc(), eng);
	net.push_back(reorder(user_src_memory, conv1_src_memory));
	net_args.push_back({{DNNL_ARG_FROM, user_src_memory},
	{DNNL_ARG_TO, conv1_src_memory}});
	}

	auto conv1_weights_memory = user_weights_memory;
	if (conv1_prim_desc.weights_desc() != user_weights_memory.get_desc()) {
	conv1_weights_memory = memory(conv1_prim_desc.weights_desc(), eng);
	reorder(user_weights_memory, conv1_weights_memory)
	.execute(s, user_weights_memory, conv1_weights_memory);
	}

	//[Reorder data and weights]

	/// Create a memory primitive for output.
	/// @snippet cnn_inference_f32.cpp Create memory for output
	//[Create memory for output]
	//auto conv1_dst_memory = memory(conv1_prim_desc.dst_desc(), eng);
	//[Create memory for output]

	/// Create a convolution primitive and add it to the net.
	/// @snippet cnn_inference_f32.cpp Create memory for output
	//[Create convolution primitive]
	net.push_back(convolution_forward(conv1_prim_desc));
	net_args.push_back({{DNNL_ARG_SRC, conv1_src_memory},
	{DNNL_ARG_WEIGHTS, conv1_weights_memory},
	{DNNL_ARG_BIAS, conv1_user_bias_memory},
	{DNNL_ARG_DST, conv1_dst_memory}
	});
	//[Create convolution primitive]


	/// @page cnn_inference_f32_cpp
	/// Finally, execute the primitives. For this example, the net is executed
	/// multiple times and each execution is timed individually.
	/// @snippet cnn_inference_f32.cpp Execute model
	//[Execute model]
	for (int j = 0; j < times; ++j) {
	assert(net.size() == net_args.size() && "something is missing");
	for (size_t i = 0; i < net.size(); ++i) {
	net.at(i).execute(s, net_args.at(i));
	}
	}
	//[Execute model]
	s.wait();

	}

	int main(int argc, char **argv) {
	//Input parameters to convolution
	int times = 1; //100
	int batch = 2;//;3;
	int channel = 8;//3;
	int height = 222;//227;
	int width = 222;//227;
	int no_of_filter = 8;//96;
	int kernel_h = 9;//11;
	int kernel_w = 9;//11;
	int pad_h = 4;
	int pad_w = 4;
	int stride_h = 1;//4;
	int stride_w = 1;//4;

	int out_height = (height + pad_h + pad_w - kernel_h) / stride_h + 1;
	int out_width = (width + pad_h + pad_w - kernel_w) / stride_w + 1;

	using tag = memory::format_tag;
	using dt = memory::data_type;
	memory::dims conv1_src_tz = {batch, channel, height, width};
	memory::dims conv1_weights_tz = {no_of_filter, channel, kernel_h, kernel_w};
	memory::dims conv1_bias_tz = {no_of_filter};
	memory::dims conv1_dst_tz = {batch, no_of_filter, out_height, out_width};

	engine::kind engine_kind = engine::kind::cpu;
	engine eng(engine_kind, 0);
	stream s(eng);

	//memory allocation
	auto user_src_memory = memory({{conv1_src_tz}, dt::f32, tag::nhwc}, eng);
	auto user_weights_memory = memory({{conv1_weights_tz}, dt::f32, tag::hwio}, eng);
	auto conv1_user_bias_memory = memory({{conv1_bias_tz}, dt::f32, tag::x}, eng);
	auto conv1_dst_memory = memory({{conv1_dst_tz}, dt::f32, tag::aBcd8b }, eng);

	//data initialization
	//init_data(user_src_memory);
	//init_data(user_weights_memory);
	//init_data(conv1_user_bias_memory);

	auto begin = std::chrono::duration_cast<std::chrono::milliseconds>(
	std::chrono::steady_clock::now().time_since_epoch())
	.count();

	convolution_param(eng, user_src_memory, batch, channel, height, width, user_weights_memory,
	no_of_filter, kernel_h, kernel_w, pad_h, pad_w, stride_h, stride_w, conv1_user_bias_memory, conv1_dst_memory, out_height, out_width);


	auto end = std::chrono::duration_cast<std::chrono::milliseconds>(
	std::chrono::steady_clock::now().time_since_epoch())
	.count();

	auto conv1_dst_memory_new = memory({{conv1_dst_tz}, dt::f32, tag::nhwc}, eng);
	reorder(conv1_dst_memory, conv1_dst_memory_new).execute(s, conv1_dst_memory, conv1_dst_memory_new);
	float dataHandle= (float )conv1_dst_memory_new.get_data_handle();

	return 0;
	}