Skip to content

Instantly share code, notes, and snippets.

Show Gist options
  • Save emfomenk/eb07b8622cb3ece4262c265c5dd5cece to your computer and use it in GitHub Desktop.
Save emfomenk/eb07b8622cb3ece4262c265c5dd5cece to your computer and use it in GitHub Desktop.
fix for mkl-dnn/#608: common: return the original behavior of sub-memory
From abca9ca2329c5eeaa92a05b3d8d09a1629e2e62e Mon Sep 17 00:00:00 2001
From: "Fomenko, Evarist M" <evarist.m.fomenko@intel.com>
Date: Fri, 29 Nov 2019 09:13:55 +0000
Subject: [PATCH] common: return the original behavior of sub-memory
The unwanted change appeared with recent MatMul enabling, namely with
the run-time dimensions support. In order to retrieve the run-time sizes
and strides, the primitives started using memory descriptors from the
memory arguments instead of the memory descriptors kept in the primitive
descriptor.
``` cpp
// Before:
auto src_mdw = memory_descriptor_wrapper(pd()->src_md());
// After:
auto src_mdw = ctx.memory_mdw(DNNL_ARG_SRC);
```
These two approaches are **almost** interchangeable when the original
memory descriptor was fully defined (i.e. no run-time dimensions),
except for the sub-memory case.
The sub-memory workflow (as a successor of view from Intel MKL-DNN v0.x)
assumes that it is used at primitive descriptor creation time only,
while input memory should be used as is. Example:
``` cpp
memory src({{10, 10}, ...}, ...); // original src memory
memory::desc submemory_src_md
= src.get_desc().submemory_desc({5, 5}, {1, 1});
primitive_desc pd(submemory_src_md, ...); // use sub-memory
primitive p(pd);
p.execute(stream, {
{DNNL_ARG_SRC, src}, // use the original src memory,
// not a sub-memory based!
...});
```
To restore the original sub-memory behavior, the recently added function
`ctx.memory_mdw()` is extended with new optional parameter
`md_from_primitive_desc` that points to the memory descriptor stored in
the primitive descriptor, and is used whenever it is fully defined (i.e.
there is no intention to use run-time sizes and / or strides):
``` cpp
memory_desc_wrapper memory_mdw(int arg,
const memory_desc_t *md_from_primitive_desc = nullptr) const;
```
In general, the interaction between run-time defined memory descriptors
and sub-memory seems to be broken. This should probably be revisited by
DNNL v2 time-frame. All future extensions of the run-time memory
descriptors support should be done with caution and this particular
issue in mind.
However, for now this closes #608 ...
---
src/common/primitive_exec_types.cpp | 6 +-
src/common/primitive_exec_types.hpp | 15 +++-
src/cpu/matmul/gemm_f32_matmul.cpp | 6 +-
src/cpu/matmul/gemm_x8s8s32x_matmul.cpp | 6 +-
src/cpu/matmul/ref_matmul.cpp | 8 +--
src/cpu/simple_reorder.hpp | 8 +--
src/ocl/ref_matmul.cpp | 8 +--
tests/gtests/api/test_submemory.cpp | 94 +++++++++++++++++++++++++
8 files changed, 131 insertions(+), 20 deletions(-)
create mode 100644 tests/gtests/api/test_submemory.cpp
diff --git a/src/common/primitive_exec_types.cpp b/src/common/primitive_exec_types.cpp
index ae0bf8241..76294d2d5 100644
--- a/src/common/primitive_exec_types.cpp
+++ b/src/common/primitive_exec_types.cpp
@@ -83,7 +83,11 @@ memory_t *exec_ctx_t::memory(int arg) const {
return ma.mem;
}
-memory_desc_wrapper exec_ctx_t::memory_mdw(int arg) const {
+memory_desc_wrapper exec_ctx_t::memory_mdw(
+ int arg, const memory_desc_t *md_from_primitive_desc) const {
+ memory_desc_wrapper mdw_from_primitive_desc(md_from_primitive_desc);
+ if (!mdw_from_primitive_desc.has_runtime_dims_or_strides())
+ return mdw_from_primitive_desc;
if (args_.count(arg) != 1) return memory_desc_wrapper(&glob_zero_md);
return memory_desc_wrapper(args_.at(arg).mem->md());
}
diff --git a/src/common/primitive_exec_types.hpp b/src/common/primitive_exec_types.hpp
index 8133d9258..83012509a 100644
--- a/src/common/primitive_exec_types.hpp
+++ b/src/common/primitive_exec_types.hpp
@@ -64,7 +64,20 @@ struct exec_ctx_t {
memory_t *output(int arg) const;
memory_t *memory(int arg) const;
- memory_desc_wrapper memory_mdw(int arg) const;
+ // Returns memory descriptor wrapper for the corresponding memory argument.
+ //
+ // To support sub-memory flow (when primitive descriptor was created with
+ // a sub-memory, but the primitive is executed on the original memory),
+ // it is recommended to pass the memory descriptor form the primitive
+ // descriptor. If this memory descriptor is defined (i.e. no reason to
+ // use memory descriptor form the input memory), exactly it will be
+ // returned.
+ //
+ // XXX: revisit this behavior in DNNL 2.0. It would be more consistent to
+ // take memory description from the incoming argument. This will
+ // require a sub-memory object, though...
+ memory_desc_wrapper memory_mdw(int arg,
+ const memory_desc_t *md_from_primitive_desc = nullptr) const;
void set_scratchpad_grantor(
const memory_tracking::grantor_t &scratchpad_grantor);
diff --git a/src/cpu/matmul/gemm_f32_matmul.cpp b/src/cpu/matmul/gemm_f32_matmul.cpp
index 6dc193673..5d8f6ae3e 100644
--- a/src/cpu/matmul/gemm_f32_matmul.cpp
+++ b/src/cpu/matmul/gemm_f32_matmul.cpp
@@ -97,9 +97,9 @@ status_t gemm_f32_matmul_t::execute_ref(const exec_ctx_t &ctx) const {
DEFINE_SCALES_BUFFER(scales);
- const auto src_d = ctx.memory_mdw(DNNL_ARG_SRC);
- const auto weights_d = ctx.memory_mdw(DNNL_ARG_WEIGHTS);
- const auto dst_d = ctx.memory_mdw(DNNL_ARG_DST);
+ const auto src_d = ctx.memory_mdw(DNNL_ARG_SRC, pd()->src_md());
+ const auto weights_d = ctx.memory_mdw(DNNL_ARG_WEIGHTS, pd()->weights_md());
+ const auto dst_d = ctx.memory_mdw(DNNL_ARG_DST, pd()->dst_md());
const auto &dst_bd = dst_d.blocking_desc();
diff --git a/src/cpu/matmul/gemm_x8s8s32x_matmul.cpp b/src/cpu/matmul/gemm_x8s8s32x_matmul.cpp
index 73eddebe0..04dc8ab65 100644
--- a/src/cpu/matmul/gemm_x8s8s32x_matmul.cpp
+++ b/src/cpu/matmul/gemm_x8s8s32x_matmul.cpp
@@ -119,9 +119,9 @@ status_t gemm_x8s8s32x_matmul_t<src_type, weights_type, dst_type>::execute_ref(
}
const float dst_zero_point_f32 = (float)dst_zero_point;
- const auto src_d = ctx.memory_mdw(DNNL_ARG_SRC);
- const auto weights_d = ctx.memory_mdw(DNNL_ARG_WEIGHTS);
- const auto dst_d = ctx.memory_mdw(DNNL_ARG_DST);
+ const auto src_d = ctx.memory_mdw(DNNL_ARG_SRC, pd()->src_md());
+ const auto weights_d = ctx.memory_mdw(DNNL_ARG_WEIGHTS, pd()->weights_md());
+ const auto dst_d = ctx.memory_mdw(DNNL_ARG_DST, pd()->dst_md());
acc_data_t *acc = pd()->dst_is_acc_
? (acc_data_t *)dst
diff --git a/src/cpu/matmul/ref_matmul.cpp b/src/cpu/matmul/ref_matmul.cpp
index a277f0f4c..db796d074 100644
--- a/src/cpu/matmul/ref_matmul.cpp
+++ b/src/cpu/matmul/ref_matmul.cpp
@@ -44,10 +44,10 @@ status_t ref_matmul_t<src_type, weights_type, dst_type, acc_type>::execute_ref(
DEFINE_ZERO_POINT_VALUE(weights_zero_point, DNNL_ARG_WEIGHTS);
DEFINE_ZERO_POINT_VALUE(dst_zero_point, DNNL_ARG_DST);
- const auto src_d = ctx.memory_mdw(DNNL_ARG_SRC);
- const auto weights_d = ctx.memory_mdw(DNNL_ARG_WEIGHTS);
- const auto dst_d = ctx.memory_mdw(DNNL_ARG_DST);
- const auto bia_d = ctx.memory_mdw(DNNL_ARG_BIAS);
+ const auto src_d = ctx.memory_mdw(DNNL_ARG_SRC, pd()->src_md());
+ const auto weights_d = ctx.memory_mdw(DNNL_ARG_WEIGHTS, pd()->weights_md());
+ const auto dst_d = ctx.memory_mdw(DNNL_ARG_DST, pd()->dst_md());
+ const auto bia_d = ctx.memory_mdw(DNNL_ARG_BIAS, pd()->weights_md(1));
const bool batched = pd()->batched();
const bool non_default_attrs = !pd()->attr()->has_default_values();
diff --git a/src/cpu/simple_reorder.hpp b/src/cpu/simple_reorder.hpp
index 63fdb4bcf..87d69f7ad 100644
--- a/src/cpu/simple_reorder.hpp
+++ b/src/cpu/simple_reorder.hpp
@@ -73,8 +73,8 @@ struct conv_s8s8 {};
auto output = CTX_OUT_MEM(data_t<type_o> *, DNNL_ARG_TO); \
const auto &scratchpad = ctx.get_scratchpad_grantor(); \
MAYBE_UNUSED(scratchpad); \
- const auto input_d = ctx.memory_mdw(DNNL_ARG_FROM); \
- const auto output_d = ctx.memory_mdw(DNNL_ARG_TO); \
+ const auto input_d = ctx.memory_mdw(DNNL_ARG_FROM, pd->src_md()); \
+ const auto output_d = ctx.memory_mdw(DNNL_ARG_TO, pd->dst_md()); \
const float alpha = pd->alpha(); \
MAYBE_UNUSED(alpha); \
const float beta = pd->beta(); \
@@ -1242,8 +1242,8 @@ struct simple_reorder_impl<SIMPLE_REORDER_TEMPL_CALL,
DEFINE_ZERO_POINT_VALUE(i0, DNNL_ARG_FROM);
DEFINE_ZERO_POINT_VALUE(o0, DNNL_ARG_TO);
- const auto input_d = ctx.memory_mdw(DNNL_ARG_FROM);
- const auto output_d = ctx.memory_mdw(DNNL_ARG_TO);
+ const auto input_d = ctx.memory_mdw(DNNL_ARG_FROM, pd()->src_md());
+ const auto output_d = ctx.memory_mdw(DNNL_ARG_TO, pd()->dst_md());
const size_t nelems = input_d.nelems();
diff --git a/src/ocl/ref_matmul.cpp b/src/ocl/ref_matmul.cpp
index b53ba627e..dd6edc205 100644
--- a/src/ocl/ref_matmul.cpp
+++ b/src/ocl/ref_matmul.cpp
@@ -44,10 +44,10 @@ status_t ref_matmul_t::execute_ref(const exec_ctx_t &ctx) const {
? &CTX_IN_STORAGE(DNNL_ARG_ATTR_ZERO_POINTS | DNNL_ARG_DST)
: c0_mem_storage_.get();
- const auto a_d = ctx.memory_mdw(DNNL_ARG_SRC);
- const auto b_d = ctx.memory_mdw(DNNL_ARG_WEIGHTS);
- const auto c_d = ctx.memory_mdw(DNNL_ARG_DST);
- const auto bia_d = ctx.memory_mdw(DNNL_ARG_BIAS);
+ const auto a_d = ctx.memory_mdw(DNNL_ARG_SRC, pd()->src_md());
+ const auto b_d = ctx.memory_mdw(DNNL_ARG_WEIGHTS, pd()->weights_md());
+ const auto c_d = ctx.memory_mdw(DNNL_ARG_DST, pd()->dst_md());
+ const auto bia_d = ctx.memory_mdw(DNNL_ARG_BIAS, pd()->weights_md(1));
const bool is_batched = pd()->batched();
dim_t a_stride_mb, a_stride_m, a_stride_k;
diff --git a/tests/gtests/api/test_submemory.cpp b/tests/gtests/api/test_submemory.cpp
new file mode 100644
index 000000000..2c1a48bc6
--- /dev/null
+++ b/tests/gtests/api/test_submemory.cpp
@@ -0,0 +1,94 @@
+/*******************************************************************************
+* Copyright 2019 Intel Corporation
+*
+* Licensed under the Apache License, Version 2.0 (the "License");
+* you may not use this file except in compliance with the License.
+* You may obtain a copy of the License at
+*
+* http://www.apache.org/licenses/LICENSE-2.0
+*
+* Unless required by applicable law or agreed to in writing, software
+* distributed under the License is distributed on an "AS IS" BASIS,
+* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+* See the License for the specific language governing permissions and
+* limitations under the License.
+*******************************************************************************/
+
+#include "dnnl_test_common.hpp"
+#include "gtest/gtest.h"
+
+#include "dnnl.h"
+
+#include <algorithm>
+#include <memory>
+#include <sstream>
+#include <vector>
+
+namespace dnnl {
+
+class submemory_test_cpp : public ::testing::TestWithParam<dnnl_engine_kind_t> {
+};
+
+TEST_P(submemory_test_cpp, SubmemoryMemoryInteraction) {
+ auto engine_kind = static_cast<engine::kind>(GetParam());
+
+ SKIP_IF(engine::get_count(engine_kind) == 0,
+ "Engine kind is not supported");
+
+ engine eng(engine_kind, 0);
+
+ const memory::dim dst_offset = 1;
+ const memory::dim copy_size = 1;
+
+ float src_buf[1] = {35};
+ memory src({{1}, memory::data_type::f32, memory::format_tag::a}, eng);
+ {
+ auto mapped_src_ptr = map_memory<float>(src);
+ std::copy(src_buf, src_buf + sizeof(src_buf) / sizeof(src_buf[0]),
+ static_cast<float *>(mapped_src_ptr));
+ }
+
+ float dst_buf[2] = {1, 0};
+ memory dst({{2}, memory::data_type::f32, memory::format_tag::a}, eng);
+ {
+ auto mapped_dst_ptr = map_memory<float>(dst);
+ std::copy(dst_buf, dst_buf + sizeof(dst_buf) / sizeof(dst_buf[0]),
+ static_cast<float *>(mapped_dst_ptr));
+ }
+
+ memory::desc dst_submemory
+ = dst.get_desc().submemory_desc({copy_size}, {dst_offset});
+
+ reorder::primitive_desc reorder_pd(
+ eng, src.get_desc(), eng, dst_submemory, primitive_attr());
+ reorder reorder_prim(reorder_pd);
+
+ stream strm(eng);
+ reorder_prim.execute(strm, src, dst);
+ strm.wait();
+
+ dst_buf[dst_offset] = src_buf[0];
+ {
+ auto mapped_dst_ptr = map_memory<float>(dst);
+ for (size_t i = 0; i < sizeof(dst_buf) / sizeof(dst_buf[0]); ++i)
+ ASSERT_EQ(mapped_dst_ptr[i], dst_buf[i]) << "at position " << i;
+ }
+}
+
+namespace {
+struct PrintToStringParamName {
+ template <class ParamType>
+ std::string operator()(
+ const ::testing::TestParamInfo<ParamType> &info) const {
+ return to_string(info.param);
+ }
+};
+
+auto all_engine_kinds = ::testing::Values(dnnl_cpu, dnnl_gpu);
+
+} // namespace
+
+INSTANTIATE_TEST_SUITE_P(AllEngineKinds, submemory_test_cpp, all_engine_kinds,
+ PrintToStringParamName());
+
+} // namespace dnnl
--
2.22.0
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment