Created
November 29, 2019 09:36
-
-
Save emfomenk/eb07b8622cb3ece4262c265c5dd5cece to your computer and use it in GitHub Desktop.
fix for mkl-dnn/#608: common: return the original behavior of sub-memory
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
From abca9ca2329c5eeaa92a05b3d8d09a1629e2e62e Mon Sep 17 00:00:00 2001 | |
From: "Fomenko, Evarist M" <evarist.m.fomenko@intel.com> | |
Date: Fri, 29 Nov 2019 09:13:55 +0000 | |
Subject: [PATCH] common: return the original behavior of sub-memory | |
The unwanted change appeared with recent MatMul enabling, namely with | |
the run-time dimensions support. In order to retrieve the run-time sizes | |
and strides, the primitives started using memory descriptors from the | |
memory arguments instead of the memory descriptors kept in the primitive | |
descriptor. | |
``` cpp | |
// Before: | |
auto src_mdw = memory_descriptor_wrapper(pd()->src_md()); | |
// After: | |
auto src_mdw = ctx.memory_mdw(DNNL_ARG_SRC); | |
``` | |
These two approaches are **almost** interchangeable when the original | |
memory descriptor was fully defined (i.e. no run-time dimensions), | |
except for the sub-memory case. | |
The sub-memory workflow (as a successor of view from Intel MKL-DNN v0.x) | |
assumes that it is used at primitive descriptor creation time only, | |
while input memory should be used as is. Example: | |
``` cpp | |
memory src({{10, 10}, ...}, ...); // original src memory | |
memory::desc submemory_src_md | |
= src.get_desc().submemory_desc({5, 5}, {1, 1}); | |
primitive_desc pd(submemory_src_md, ...); // use sub-memory | |
primitive p(pd); | |
p.execute(stream, { | |
{DNNL_ARG_SRC, src}, // use the original src memory, | |
// not a sub-memory based! | |
...}); | |
``` | |
To restore the original sub-memory behavior, the recently added function | |
`ctx.memory_mdw()` is extended with new optional parameter | |
`md_from_primitive_desc` that points to the memory descriptor stored in | |
the primitive descriptor, and is used whenever it is fully defined (i.e. | |
there is no intention to use run-time sizes and / or strides): | |
``` cpp | |
memory_desc_wrapper memory_mdw(int arg, | |
const memory_desc_t *md_from_primitive_desc = nullptr) const; | |
``` | |
In general, the interaction between run-time defined memory descriptors | |
and sub-memory seems to be broken. This should probably be revisited by | |
DNNL v2 time-frame. All future extensions of the run-time memory | |
descriptors support should be done with caution and this particular | |
issue in mind. | |
However, for now this closes #608 ... | |
--- | |
src/common/primitive_exec_types.cpp | 6 +- | |
src/common/primitive_exec_types.hpp | 15 +++- | |
src/cpu/matmul/gemm_f32_matmul.cpp | 6 +- | |
src/cpu/matmul/gemm_x8s8s32x_matmul.cpp | 6 +- | |
src/cpu/matmul/ref_matmul.cpp | 8 +-- | |
src/cpu/simple_reorder.hpp | 8 +-- | |
src/ocl/ref_matmul.cpp | 8 +-- | |
tests/gtests/api/test_submemory.cpp | 94 +++++++++++++++++++++++++ | |
8 files changed, 131 insertions(+), 20 deletions(-) | |
create mode 100644 tests/gtests/api/test_submemory.cpp | |
diff --git a/src/common/primitive_exec_types.cpp b/src/common/primitive_exec_types.cpp | |
index ae0bf8241..76294d2d5 100644 | |
--- a/src/common/primitive_exec_types.cpp | |
+++ b/src/common/primitive_exec_types.cpp | |
@@ -83,7 +83,11 @@ memory_t *exec_ctx_t::memory(int arg) const { | |
return ma.mem; | |
} | |
-memory_desc_wrapper exec_ctx_t::memory_mdw(int arg) const { | |
+memory_desc_wrapper exec_ctx_t::memory_mdw( | |
+ int arg, const memory_desc_t *md_from_primitive_desc) const { | |
+ memory_desc_wrapper mdw_from_primitive_desc(md_from_primitive_desc); | |
+ if (!mdw_from_primitive_desc.has_runtime_dims_or_strides()) | |
+ return mdw_from_primitive_desc; | |
if (args_.count(arg) != 1) return memory_desc_wrapper(&glob_zero_md); | |
return memory_desc_wrapper(args_.at(arg).mem->md()); | |
} | |
diff --git a/src/common/primitive_exec_types.hpp b/src/common/primitive_exec_types.hpp | |
index 8133d9258..83012509a 100644 | |
--- a/src/common/primitive_exec_types.hpp | |
+++ b/src/common/primitive_exec_types.hpp | |
@@ -64,7 +64,20 @@ struct exec_ctx_t { | |
memory_t *output(int arg) const; | |
memory_t *memory(int arg) const; | |
- memory_desc_wrapper memory_mdw(int arg) const; | |
+ // Returns memory descriptor wrapper for the corresponding memory argument. | |
+ // | |
+ // To support sub-memory flow (when primitive descriptor was created with | |
+ // a sub-memory, but the primitive is executed on the original memory), | |
+ // it is recommended to pass the memory descriptor form the primitive | |
+ // descriptor. If this memory descriptor is defined (i.e. no reason to | |
+ // use memory descriptor form the input memory), exactly it will be | |
+ // returned. | |
+ // | |
+ // XXX: revisit this behavior in DNNL 2.0. It would be more consistent to | |
+ // take memory description from the incoming argument. This will | |
+ // require a sub-memory object, though... | |
+ memory_desc_wrapper memory_mdw(int arg, | |
+ const memory_desc_t *md_from_primitive_desc = nullptr) const; | |
void set_scratchpad_grantor( | |
const memory_tracking::grantor_t &scratchpad_grantor); | |
diff --git a/src/cpu/matmul/gemm_f32_matmul.cpp b/src/cpu/matmul/gemm_f32_matmul.cpp | |
index 6dc193673..5d8f6ae3e 100644 | |
--- a/src/cpu/matmul/gemm_f32_matmul.cpp | |
+++ b/src/cpu/matmul/gemm_f32_matmul.cpp | |
@@ -97,9 +97,9 @@ status_t gemm_f32_matmul_t::execute_ref(const exec_ctx_t &ctx) const { | |
DEFINE_SCALES_BUFFER(scales); | |
- const auto src_d = ctx.memory_mdw(DNNL_ARG_SRC); | |
- const auto weights_d = ctx.memory_mdw(DNNL_ARG_WEIGHTS); | |
- const auto dst_d = ctx.memory_mdw(DNNL_ARG_DST); | |
+ const auto src_d = ctx.memory_mdw(DNNL_ARG_SRC, pd()->src_md()); | |
+ const auto weights_d = ctx.memory_mdw(DNNL_ARG_WEIGHTS, pd()->weights_md()); | |
+ const auto dst_d = ctx.memory_mdw(DNNL_ARG_DST, pd()->dst_md()); | |
const auto &dst_bd = dst_d.blocking_desc(); | |
diff --git a/src/cpu/matmul/gemm_x8s8s32x_matmul.cpp b/src/cpu/matmul/gemm_x8s8s32x_matmul.cpp | |
index 73eddebe0..04dc8ab65 100644 | |
--- a/src/cpu/matmul/gemm_x8s8s32x_matmul.cpp | |
+++ b/src/cpu/matmul/gemm_x8s8s32x_matmul.cpp | |
@@ -119,9 +119,9 @@ status_t gemm_x8s8s32x_matmul_t<src_type, weights_type, dst_type>::execute_ref( | |
} | |
const float dst_zero_point_f32 = (float)dst_zero_point; | |
- const auto src_d = ctx.memory_mdw(DNNL_ARG_SRC); | |
- const auto weights_d = ctx.memory_mdw(DNNL_ARG_WEIGHTS); | |
- const auto dst_d = ctx.memory_mdw(DNNL_ARG_DST); | |
+ const auto src_d = ctx.memory_mdw(DNNL_ARG_SRC, pd()->src_md()); | |
+ const auto weights_d = ctx.memory_mdw(DNNL_ARG_WEIGHTS, pd()->weights_md()); | |
+ const auto dst_d = ctx.memory_mdw(DNNL_ARG_DST, pd()->dst_md()); | |
acc_data_t *acc = pd()->dst_is_acc_ | |
? (acc_data_t *)dst | |
diff --git a/src/cpu/matmul/ref_matmul.cpp b/src/cpu/matmul/ref_matmul.cpp | |
index a277f0f4c..db796d074 100644 | |
--- a/src/cpu/matmul/ref_matmul.cpp | |
+++ b/src/cpu/matmul/ref_matmul.cpp | |
@@ -44,10 +44,10 @@ status_t ref_matmul_t<src_type, weights_type, dst_type, acc_type>::execute_ref( | |
DEFINE_ZERO_POINT_VALUE(weights_zero_point, DNNL_ARG_WEIGHTS); | |
DEFINE_ZERO_POINT_VALUE(dst_zero_point, DNNL_ARG_DST); | |
- const auto src_d = ctx.memory_mdw(DNNL_ARG_SRC); | |
- const auto weights_d = ctx.memory_mdw(DNNL_ARG_WEIGHTS); | |
- const auto dst_d = ctx.memory_mdw(DNNL_ARG_DST); | |
- const auto bia_d = ctx.memory_mdw(DNNL_ARG_BIAS); | |
+ const auto src_d = ctx.memory_mdw(DNNL_ARG_SRC, pd()->src_md()); | |
+ const auto weights_d = ctx.memory_mdw(DNNL_ARG_WEIGHTS, pd()->weights_md()); | |
+ const auto dst_d = ctx.memory_mdw(DNNL_ARG_DST, pd()->dst_md()); | |
+ const auto bia_d = ctx.memory_mdw(DNNL_ARG_BIAS, pd()->weights_md(1)); | |
const bool batched = pd()->batched(); | |
const bool non_default_attrs = !pd()->attr()->has_default_values(); | |
diff --git a/src/cpu/simple_reorder.hpp b/src/cpu/simple_reorder.hpp | |
index 63fdb4bcf..87d69f7ad 100644 | |
--- a/src/cpu/simple_reorder.hpp | |
+++ b/src/cpu/simple_reorder.hpp | |
@@ -73,8 +73,8 @@ struct conv_s8s8 {}; | |
auto output = CTX_OUT_MEM(data_t<type_o> *, DNNL_ARG_TO); \ | |
const auto &scratchpad = ctx.get_scratchpad_grantor(); \ | |
MAYBE_UNUSED(scratchpad); \ | |
- const auto input_d = ctx.memory_mdw(DNNL_ARG_FROM); \ | |
- const auto output_d = ctx.memory_mdw(DNNL_ARG_TO); \ | |
+ const auto input_d = ctx.memory_mdw(DNNL_ARG_FROM, pd->src_md()); \ | |
+ const auto output_d = ctx.memory_mdw(DNNL_ARG_TO, pd->dst_md()); \ | |
const float alpha = pd->alpha(); \ | |
MAYBE_UNUSED(alpha); \ | |
const float beta = pd->beta(); \ | |
@@ -1242,8 +1242,8 @@ struct simple_reorder_impl<SIMPLE_REORDER_TEMPL_CALL, | |
DEFINE_ZERO_POINT_VALUE(i0, DNNL_ARG_FROM); | |
DEFINE_ZERO_POINT_VALUE(o0, DNNL_ARG_TO); | |
- const auto input_d = ctx.memory_mdw(DNNL_ARG_FROM); | |
- const auto output_d = ctx.memory_mdw(DNNL_ARG_TO); | |
+ const auto input_d = ctx.memory_mdw(DNNL_ARG_FROM, pd()->src_md()); | |
+ const auto output_d = ctx.memory_mdw(DNNL_ARG_TO, pd()->dst_md()); | |
const size_t nelems = input_d.nelems(); | |
diff --git a/src/ocl/ref_matmul.cpp b/src/ocl/ref_matmul.cpp | |
index b53ba627e..dd6edc205 100644 | |
--- a/src/ocl/ref_matmul.cpp | |
+++ b/src/ocl/ref_matmul.cpp | |
@@ -44,10 +44,10 @@ status_t ref_matmul_t::execute_ref(const exec_ctx_t &ctx) const { | |
? &CTX_IN_STORAGE(DNNL_ARG_ATTR_ZERO_POINTS | DNNL_ARG_DST) | |
: c0_mem_storage_.get(); | |
- const auto a_d = ctx.memory_mdw(DNNL_ARG_SRC); | |
- const auto b_d = ctx.memory_mdw(DNNL_ARG_WEIGHTS); | |
- const auto c_d = ctx.memory_mdw(DNNL_ARG_DST); | |
- const auto bia_d = ctx.memory_mdw(DNNL_ARG_BIAS); | |
+ const auto a_d = ctx.memory_mdw(DNNL_ARG_SRC, pd()->src_md()); | |
+ const auto b_d = ctx.memory_mdw(DNNL_ARG_WEIGHTS, pd()->weights_md()); | |
+ const auto c_d = ctx.memory_mdw(DNNL_ARG_DST, pd()->dst_md()); | |
+ const auto bia_d = ctx.memory_mdw(DNNL_ARG_BIAS, pd()->weights_md(1)); | |
const bool is_batched = pd()->batched(); | |
dim_t a_stride_mb, a_stride_m, a_stride_k; | |
diff --git a/tests/gtests/api/test_submemory.cpp b/tests/gtests/api/test_submemory.cpp | |
new file mode 100644 | |
index 000000000..2c1a48bc6 | |
--- /dev/null | |
+++ b/tests/gtests/api/test_submemory.cpp | |
@@ -0,0 +1,94 @@ | |
+/******************************************************************************* | |
+* Copyright 2019 Intel Corporation | |
+* | |
+* Licensed under the Apache License, Version 2.0 (the "License"); | |
+* you may not use this file except in compliance with the License. | |
+* You may obtain a copy of the License at | |
+* | |
+* http://www.apache.org/licenses/LICENSE-2.0 | |
+* | |
+* Unless required by applicable law or agreed to in writing, software | |
+* distributed under the License is distributed on an "AS IS" BASIS, | |
+* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |
+* See the License for the specific language governing permissions and | |
+* limitations under the License. | |
+*******************************************************************************/ | |
+ | |
+#include "dnnl_test_common.hpp" | |
+#include "gtest/gtest.h" | |
+ | |
+#include "dnnl.h" | |
+ | |
+#include <algorithm> | |
+#include <memory> | |
+#include <sstream> | |
+#include <vector> | |
+ | |
+namespace dnnl { | |
+ | |
+class submemory_test_cpp : public ::testing::TestWithParam<dnnl_engine_kind_t> { | |
+}; | |
+ | |
+TEST_P(submemory_test_cpp, SubmemoryMemoryInteraction) { | |
+ auto engine_kind = static_cast<engine::kind>(GetParam()); | |
+ | |
+ SKIP_IF(engine::get_count(engine_kind) == 0, | |
+ "Engine kind is not supported"); | |
+ | |
+ engine eng(engine_kind, 0); | |
+ | |
+ const memory::dim dst_offset = 1; | |
+ const memory::dim copy_size = 1; | |
+ | |
+ float src_buf[1] = {35}; | |
+ memory src({{1}, memory::data_type::f32, memory::format_tag::a}, eng); | |
+ { | |
+ auto mapped_src_ptr = map_memory<float>(src); | |
+ std::copy(src_buf, src_buf + sizeof(src_buf) / sizeof(src_buf[0]), | |
+ static_cast<float *>(mapped_src_ptr)); | |
+ } | |
+ | |
+ float dst_buf[2] = {1, 0}; | |
+ memory dst({{2}, memory::data_type::f32, memory::format_tag::a}, eng); | |
+ { | |
+ auto mapped_dst_ptr = map_memory<float>(dst); | |
+ std::copy(dst_buf, dst_buf + sizeof(dst_buf) / sizeof(dst_buf[0]), | |
+ static_cast<float *>(mapped_dst_ptr)); | |
+ } | |
+ | |
+ memory::desc dst_submemory | |
+ = dst.get_desc().submemory_desc({copy_size}, {dst_offset}); | |
+ | |
+ reorder::primitive_desc reorder_pd( | |
+ eng, src.get_desc(), eng, dst_submemory, primitive_attr()); | |
+ reorder reorder_prim(reorder_pd); | |
+ | |
+ stream strm(eng); | |
+ reorder_prim.execute(strm, src, dst); | |
+ strm.wait(); | |
+ | |
+ dst_buf[dst_offset] = src_buf[0]; | |
+ { | |
+ auto mapped_dst_ptr = map_memory<float>(dst); | |
+ for (size_t i = 0; i < sizeof(dst_buf) / sizeof(dst_buf[0]); ++i) | |
+ ASSERT_EQ(mapped_dst_ptr[i], dst_buf[i]) << "at position " << i; | |
+ } | |
+} | |
+ | |
+namespace { | |
+struct PrintToStringParamName { | |
+ template <class ParamType> | |
+ std::string operator()( | |
+ const ::testing::TestParamInfo<ParamType> &info) const { | |
+ return to_string(info.param); | |
+ } | |
+}; | |
+ | |
+auto all_engine_kinds = ::testing::Values(dnnl_cpu, dnnl_gpu); | |
+ | |
+} // namespace | |
+ | |
+INSTANTIATE_TEST_SUITE_P(AllEngineKinds, submemory_test_cpp, all_engine_kinds, | |
+ PrintToStringParamName()); | |
+ | |
+} // namespace dnnl | |
-- | |
2.22.0 | |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment