emfomenk/ip.cpp

## ip.cpp
#include <iostream>
#include <vector>
#include <mkldnn.hpp>

using namespace std;
using namespace mkldnn;
using tag = memory::format_tag;
using dt = memory::data_type;

int main(int argc, char *argv[]) {
    engine eng(engine::kind::cpu, 0);
    stream s(eng);
    int n = 2, ic = 2, h = 1, w = 3;
    int oc = 5;

    // input tensor
    vector<float> v_in0 {
        // num0
        0.090806,   0.899200,   0.654030,
       -0.255590,   0.082069,   0.598238,
        // num1
       -0.640039,  -0.816867,  -0.146911,
        0.436301,  -0.978258,   0.222948
    };
    //weights
    vector<float> v_in1 {
        0.090806,   0.899200,   0.654030,  -0.255590,   0.082069,   0.598238,
       -0.640039,  -0.816867,  -0.146911,   0.436301,  -0.978258,   0.222948,
        0.625901,  -0.546993,   0.629550,   0.759302,  -0.318601,  -0.329514,
       -0.518760,   0.777055,   0.590312,  -0.744923,  -0.701665,   0.673724,
        0.885093,  -0.066971,  -0.483985,   0.398048,  -0.311398,   0.602747
    };
    // bias
    vector<float> v_in2 {0.090806,   0.899200,   0.654030,  -0.255590,   0.082069};
    vector<float> v_out(n * oc);

    // 1. Define user's memory based on given arrays
    //    For them we know the formats.
    auto user_input_desc = memory::desc({n, ic, h, w}, dt::f32, tag::nchw);
    auto user_weight_desc = memory::desc({oc, ic, h, w}, dt::f32, tag::oihw);
    auto user_bias_desc = memory::desc({oc}, dt::f32, tag::x);
    auto user_output_desc = memory::desc({n, oc}, dt::f32, tag::nc);

    auto user_input_mem = memory(user_input_desc, eng, v_in0.data());
    auto user_weight_mem = memory(user_weight_desc, eng, v_in1.data());
    auto user_bias_mem = memory(user_bias_desc, eng, v_in2.data());
    auto user_output_mem = memory(user_output_desc, eng, v_out.data());

    // 2. Now, to create an inner product, let's define memory descriptors
    //    of the same shape, but left the memory format be unspecified (any).
    //    This will allow Inner Product to define the appropriate memory
    //    format.
    auto any_input_desc = memory::desc({n, ic, h, w}, dt::f32, tag::any);
    auto any_weight_desc = memory::desc({oc, ic, h, w}, dt::f32, tag::any);
    auto any_output_desc = memory::desc({n, oc}, dt::f32, tag::any);

    auto ip_desc = inner_product_forward::desc(prop_kind::forward_inference,
            any_input_desc, any_weight_desc, user_bias_desc, any_output_desc);
    auto ip_pd = inner_product_forward::primitive_desc(ip_desc, eng);

    // 3. Once created, we need to query for formats for src, weights, and
    //    destination the inner product wants. If the formats match what a
    //    user has, then we are good to go. If not, we need to reorder the
    //    data from the user's format to the Inner Product's one.

    // 3.1. Check whether we need user_src -> ip_src reorder
    auto ip_input_desc = ip_pd.src_desc();
    auto ip_input_mem = user_input_mem;
    if (ip_input_desc != user_input_desc) {
        ip_input_mem = memory(ip_input_desc, eng);
        reorder(user_input_mem, ip_input_mem).execute(s,
                user_input_mem, ip_input_mem);
    }

    // 3.2. Check whether we need user_weights -> ip_weights reorder
    auto ip_weight_desc = ip_pd.weights_desc();
    auto ip_weight_mem = user_weight_mem;
    if (ip_weight_desc != user_weight_desc) {
        ip_weight_mem = memory(ip_weight_desc, eng);
        reorder(user_weight_mem, ip_weight_mem).execute(s,
                user_weight_mem, ip_weight_mem);
    }

    // 3.3. Check if user_dst != ip_dst. If they are different, we need to
    //      prepare a space for ip_dst, and emit the reorder ip_dst -> user_dst
    //      later, when Inner Product computations are done.
    auto ip_output_desc = ip_pd.dst_desc();
    auto ip_output_mem = user_output_mem;
    if (ip_output_desc != user_output_desc)
        ip_output_mem = memory(ip_output_desc, eng);

    // 4. Execute Inner Product
    auto ip = inner_product_forward(ip_pd);
    ip.execute(s, { {MKLDNN_ARG_SRC, ip_input_mem},
            {MKLDNN_ARG_WEIGHTS, ip_weight_mem},
            {MKLDNN_ARG_BIAS, user_bias_mem},
            {MKLDNN_ARG_DST, ip_output_mem}});

    // 5. If user_dst != ip_dst, emit the reorder user_dst -> ip_dst.
    if (ip_output_desc != user_output_desc)
        reorder(ip_output_mem, user_output_mem).execute(s,
                ip_output_mem, user_output_mem);

    int index = 0;
    for (int i = 0 ; i < n; ++i )
    {
        for (int j = 0; j < oc; ++j)
        {
            cout << v_out[index++] << ", ";
        }
        cout << endl;
    }
    return 0;
}

/*
output in v0.95:
1.76532, -0.0479534, 0.213409, 1.31797, 0.0189727,
-0.856347, 3.19476, 1.17726, -0.133433, 0.254062,

output in v1.0:
<same>
*/
	#include <iostream>
	#include <vector>
	#include <mkldnn.hpp>

	using namespace std;
	using namespace mkldnn;
	using tag = memory::format_tag;
	using dt = memory::data_type;

	int main(int argc, char *argv[]) {
	engine eng(engine::kind::cpu, 0);
	stream s(eng);
	int n = 2, ic = 2, h = 1, w = 3;
	int oc = 5;

	// input tensor
	vector<float> v_in0 {
	// num0
	0.090806, 0.899200, 0.654030,
	-0.255590, 0.082069, 0.598238,
	// num1
	-0.640039, -0.816867, -0.146911,
	0.436301, -0.978258, 0.222948
	};
	//weights
	vector<float> v_in1 {
	0.090806, 0.899200, 0.654030, -0.255590, 0.082069, 0.598238,
	-0.640039, -0.816867, -0.146911, 0.436301, -0.978258, 0.222948,
	0.625901, -0.546993, 0.629550, 0.759302, -0.318601, -0.329514,
	-0.518760, 0.777055, 0.590312, -0.744923, -0.701665, 0.673724,
	0.885093, -0.066971, -0.483985, 0.398048, -0.311398, 0.602747
	};
	// bias
	vector<float> v_in2 {0.090806, 0.899200, 0.654030, -0.255590, 0.082069};
	vector<float> v_out(n * oc);

	// 1. Define user's memory based on given arrays
	// For them we know the formats.
	auto user_input_desc = memory::desc({n, ic, h, w}, dt::f32, tag::nchw);
	auto user_weight_desc = memory::desc({oc, ic, h, w}, dt::f32, tag::oihw);
	auto user_bias_desc = memory::desc({oc}, dt::f32, tag::x);
	auto user_output_desc = memory::desc({n, oc}, dt::f32, tag::nc);

	auto user_input_mem = memory(user_input_desc, eng, v_in0.data());
	auto user_weight_mem = memory(user_weight_desc, eng, v_in1.data());
	auto user_bias_mem = memory(user_bias_desc, eng, v_in2.data());
	auto user_output_mem = memory(user_output_desc, eng, v_out.data());

	// 2. Now, to create an inner product, let's define memory descriptors
	// of the same shape, but left the memory format be unspecified (any).
	// This will allow Inner Product to define the appropriate memory
	// format.
	auto any_input_desc = memory::desc({n, ic, h, w}, dt::f32, tag::any);
	auto any_weight_desc = memory::desc({oc, ic, h, w}, dt::f32, tag::any);
	auto any_output_desc = memory::desc({n, oc}, dt::f32, tag::any);

	auto ip_desc = inner_product_forward::desc(prop_kind::forward_inference,
	any_input_desc, any_weight_desc, user_bias_desc, any_output_desc);
	auto ip_pd = inner_product_forward::primitive_desc(ip_desc, eng);

	// 3. Once created, we need to query for formats for src, weights, and
	// destination the inner product wants. If the formats match what a
	// user has, then we are good to go. If not, we need to reorder the
	// data from the user's format to the Inner Product's one.

	// 3.1. Check whether we need user_src -> ip_src reorder
	auto ip_input_desc = ip_pd.src_desc();
	auto ip_input_mem = user_input_mem;
	if (ip_input_desc != user_input_desc) {
	ip_input_mem = memory(ip_input_desc, eng);
	reorder(user_input_mem, ip_input_mem).execute(s,
	user_input_mem, ip_input_mem);
	}

	// 3.2. Check whether we need user_weights -> ip_weights reorder
	auto ip_weight_desc = ip_pd.weights_desc();
	auto ip_weight_mem = user_weight_mem;
	if (ip_weight_desc != user_weight_desc) {
	ip_weight_mem = memory(ip_weight_desc, eng);
	reorder(user_weight_mem, ip_weight_mem).execute(s,
	user_weight_mem, ip_weight_mem);
	}

	// 3.3. Check if user_dst != ip_dst. If they are different, we need to
	// prepare a space for ip_dst, and emit the reorder ip_dst -> user_dst
	// later, when Inner Product computations are done.
	auto ip_output_desc = ip_pd.dst_desc();
	auto ip_output_mem = user_output_mem;
	if (ip_output_desc != user_output_desc)
	ip_output_mem = memory(ip_output_desc, eng);

	// 4. Execute Inner Product
	auto ip = inner_product_forward(ip_pd);
	ip.execute(s, { {MKLDNN_ARG_SRC, ip_input_mem},
	{MKLDNN_ARG_WEIGHTS, ip_weight_mem},
	{MKLDNN_ARG_BIAS, user_bias_mem},
	{MKLDNN_ARG_DST, ip_output_mem}});

	// 5. If user_dst != ip_dst, emit the reorder user_dst -> ip_dst.
	if (ip_output_desc != user_output_desc)
	reorder(ip_output_mem, user_output_mem).execute(s,
	ip_output_mem, user_output_mem);

	int index = 0;
	for (int i = 0 ; i < n; ++i )
	{
	for (int j = 0; j < oc; ++j)
	{
	cout << v_out[index++] << ", ";
	}
	cout << endl;
	}
	return 0;
	}

	/*
	output in v0.95:
	1.76532, -0.0479534, 0.213409, 1.31797, 0.0189727,
	-0.856347, 3.19476, 1.17726, -0.133433, 0.254062,

	output in v1.0:
	<same>
	*/