Skip to content

Instantly share code, notes, and snippets.

@emfomenk
Last active June 27, 2019 14:45
Show Gist options
  • Save emfomenk/5080c93caf0d91d573e30c79f2baca41 to your computer and use it in GitHub Desktop.
Save emfomenk/5080c93caf0d91d573e30c79f2baca41 to your computer and use it in GitHub Desktop.
simple inner product example (related to https://github.com/intel/mkl-dnn/issues/499)
#include <iostream>
#include <vector>
#include <mkldnn.hpp>
using namespace std;
using namespace mkldnn;
using tag = memory::format_tag;
using dt = memory::data_type;
int main(int argc, char *argv[]) {
engine eng(engine::kind::cpu, 0);
stream s(eng);
int n = 2, ic = 2, h = 1, w = 3;
int oc = 5;
// input tensor
vector<float> v_in0 {
// num0
0.090806, 0.899200, 0.654030,
-0.255590, 0.082069, 0.598238,
// num1
-0.640039, -0.816867, -0.146911,
0.436301, -0.978258, 0.222948
};
//weights
vector<float> v_in1 {
0.090806, 0.899200, 0.654030, -0.255590, 0.082069, 0.598238,
-0.640039, -0.816867, -0.146911, 0.436301, -0.978258, 0.222948,
0.625901, -0.546993, 0.629550, 0.759302, -0.318601, -0.329514,
-0.518760, 0.777055, 0.590312, -0.744923, -0.701665, 0.673724,
0.885093, -0.066971, -0.483985, 0.398048, -0.311398, 0.602747
};
// bias
vector<float> v_in2 {0.090806, 0.899200, 0.654030, -0.255590, 0.082069};
vector<float> v_out(n * oc);
// 1. Define user's memory based on given arrays
// For them we know the formats.
auto user_input_desc = memory::desc({n, ic, h, w}, dt::f32, tag::nchw);
auto user_weight_desc = memory::desc({oc, ic, h, w}, dt::f32, tag::oihw);
auto user_bias_desc = memory::desc({oc}, dt::f32, tag::x);
auto user_output_desc = memory::desc({n, oc}, dt::f32, tag::nc);
auto user_input_mem = memory(user_input_desc, eng, v_in0.data());
auto user_weight_mem = memory(user_weight_desc, eng, v_in1.data());
auto user_bias_mem = memory(user_bias_desc, eng, v_in2.data());
auto user_output_mem = memory(user_output_desc, eng, v_out.data());
// 2. Now, to create an inner product, let's define memory descriptors
// of the same shape, but left the memory format be unspecified (any).
// This will allow Inner Product to define the appropriate memory
// format.
auto any_input_desc = memory::desc({n, ic, h, w}, dt::f32, tag::any);
auto any_weight_desc = memory::desc({oc, ic, h, w}, dt::f32, tag::any);
auto any_output_desc = memory::desc({n, oc}, dt::f32, tag::any);
auto ip_desc = inner_product_forward::desc(prop_kind::forward_inference,
any_input_desc, any_weight_desc, user_bias_desc, any_output_desc);
auto ip_pd = inner_product_forward::primitive_desc(ip_desc, eng);
// 3. Once created, we need to query for formats for src, weights, and
// destination the inner product wants. If the formats match what a
// user has, then we are good to go. If not, we need to reorder the
// data from the user's format to the Inner Product's one.
// 3.1. Check whether we need user_src -> ip_src reorder
auto ip_input_desc = ip_pd.src_desc();
auto ip_input_mem = user_input_mem;
if (ip_input_desc != user_input_desc) {
ip_input_mem = memory(ip_input_desc, eng);
reorder(user_input_mem, ip_input_mem).execute(s,
user_input_mem, ip_input_mem);
}
// 3.2. Check whether we need user_weights -> ip_weights reorder
auto ip_weight_desc = ip_pd.weights_desc();
auto ip_weight_mem = user_weight_mem;
if (ip_weight_desc != user_weight_desc) {
ip_weight_mem = memory(ip_weight_desc, eng);
reorder(user_weight_mem, ip_weight_mem).execute(s,
user_weight_mem, ip_weight_mem);
}
// 3.3. Check if user_dst != ip_dst. If they are different, we need to
// prepare a space for ip_dst, and emit the reorder ip_dst -> user_dst
// later, when Inner Product computations are done.
auto ip_output_desc = ip_pd.dst_desc();
auto ip_output_mem = user_output_mem;
if (ip_output_desc != user_output_desc)
ip_output_mem = memory(ip_output_desc, eng);
// 4. Execute Inner Product
auto ip = inner_product_forward(ip_pd);
ip.execute(s, { {MKLDNN_ARG_SRC, ip_input_mem},
{MKLDNN_ARG_WEIGHTS, ip_weight_mem},
{MKLDNN_ARG_BIAS, user_bias_mem},
{MKLDNN_ARG_DST, ip_output_mem}});
// 5. If user_dst != ip_dst, emit the reorder user_dst -> ip_dst.
if (ip_output_desc != user_output_desc)
reorder(ip_output_mem, user_output_mem).execute(s,
ip_output_mem, user_output_mem);
int index = 0;
for (int i = 0 ; i < n; ++i )
{
for (int j = 0; j < oc; ++j)
{
cout << v_out[index++] << ", ";
}
cout << endl;
}
return 0;
}
/*
output in v0.95:
1.76532, -0.0479534, 0.213409, 1.31797, 0.0189727,
-0.856347, 3.19476, 1.17726, -0.133433, 0.254062,
output in v1.0:
<same>
*/
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment