Skip to content

Instantly share code, notes, and snippets.

Show Gist options
  • Save rsdubtso/baf6e359f98baceedb490de46d8c545c to your computer and use it in GitHub Desktop.
Save rsdubtso/baf6e359f98baceedb490de46d8c545c to your computer and use it in GitHub Desktop.
From 52157cef4c98294b96dfb14ed476a59e647dba57 Mon Sep 17 00:00:00 2001
From: Roman Dubtsov <roman.s.dubtsov@intel.com>
Date: Tue, 29 May 2018 10:24:14 -0700
Subject: tentative: cpu: jit: add vzeroupper to postamble()
---
src/cpu/jit_avx512_common_conv_winograd_kernel_f32.cpp | 3 +++
src/cpu/jit_avx512_core_conv_winograd_kernel_f32.cpp | 2 ++
src/cpu/jit_generator.hpp | 14 +++++++++++++-
src/cpu/jit_uni_1x1_conv_utils.hpp | 1 +
4 files changed, 19 insertions(+), 1 deletion(-)
diff --git a/src/cpu/jit_avx512_common_conv_winograd_kernel_f32.cpp b/src/cpu/jit_avx512_common_conv_winograd_kernel_f32.cpp
index b6d09ed0..c7842a3b 100644
--- a/src/cpu/jit_avx512_common_conv_winograd_kernel_f32.cpp
+++ b/src/cpu/jit_avx512_common_conv_winograd_kernel_f32.cpp
@@ -351,6 +351,7 @@ void _jit_avx512_common_conv_winograd_data_kernel_f32::gemm_loop_generate(
/* Postamble */
pop(reg_EVEX_max_8b_offt);
+ uni_vzeroupper();
ret();
}
@@ -774,6 +775,7 @@ void jit_avx512_common_conv_winograd_bwd_weights_kernel_f32::transpose_ker_gener
}
}
}
+ uni_vzeroupper();
ret();
}
void jit_avx512_common_conv_winograd_bwd_weights_kernel_f32::gemm_loop_generate(
@@ -949,6 +951,7 @@ void jit_avx512_common_conv_winograd_bwd_weights_kernel_f32::gemm_loop_generate(
/* Postamble */
pop(reg_dimK_block_loop_cnt);
pop(reg_EVEX_max_8b_offt);
+ uni_vzeroupper();
ret();
}
diff --git a/src/cpu/jit_avx512_core_conv_winograd_kernel_f32.cpp b/src/cpu/jit_avx512_core_conv_winograd_kernel_f32.cpp
index 56ade5a2..d562e38b 100644
--- a/src/cpu/jit_avx512_core_conv_winograd_kernel_f32.cpp
+++ b/src/cpu/jit_avx512_core_conv_winograd_kernel_f32.cpp
@@ -371,6 +371,7 @@ void _jit_avx512_core_conv_winograd_data_kernel_f32::gemm_loop_generate()
/* Postamble */
pop(reg_EVEX_max_8b_offt);
+ uni_vzeroupper();
ret();
}
@@ -2235,6 +2236,7 @@ void jit_avx512_core_conv_winograd_bwd_weights_kernel_f32::gemm_loop_generate(
/* Postamble */
pop(reg_dimK_block_loop_cnt);
pop(reg_EVEX_max_8b_offt);
+ uni_vzeroupper();
ret();
}
diff --git a/src/cpu/jit_generator.hpp b/src/cpu/jit_generator.hpp
index 4c730e15..471cfabe 100644
--- a/src/cpu/jit_generator.hpp
+++ b/src/cpu/jit_generator.hpp
@@ -63,6 +63,7 @@ namespace cpu {
typedef enum {
@@ -78,11 +79,14 @@ template <> struct cpu_isa_traits<sse42> {
static constexpr int vlen = 16;
static constexpr int n_vregs = 16;
};
-template <> struct cpu_isa_traits<avx2> {
+template <> struct cpu_isa_traits<avx> {
static constexpr int vlen_shift = 5;
static constexpr int vlen = 32;
static constexpr int n_vregs = 16;
};
+template <> struct cpu_isa_traits<avx2>:
+ public cpu_isa_traits<avx> {};
+
template <> struct cpu_isa_traits<avx512_common> {
static constexpr int vlen_shift = 6;
static constexpr int vlen = 64;
@@ -175,6 +179,8 @@ static inline bool mayiuse(const cpu_isa_t cpu_isa) {
switch (cpu_isa) {
case sse42:
return cpu.has(Cpu::tSSE42);
+ case avx:
+ return cpu.has(Cpu::tAVX);
case avx2:
return cpu.has(Cpu::tAVX2);
case avx512_common:
@@ -341,6 +347,11 @@ public:
prefetcht2(a);
}
+ void uni_vzeroupper() {
+ if (mayiuse(avx) && !mayiuse(avx512_mic))
+ vzeroupper();
+ }
+
void postamble() {
for (size_t i = 0; i < num_abi_save_gpr_regs; ++i)
pop(Xbyak::Reg64(abi_save_gpr_regs[num_abi_save_gpr_regs - 1 - i]));
@@ -349,6 +360,7 @@ public:
movdqu(Xbyak::Xmm(xmm_to_preserve_start + i), ptr[rsp + i * xmm_len]);
add(rsp, xmm_to_preserve * xmm_len);
}
+ uni_vzeroupper();
ret();
}
diff --git a/src/cpu/jit_uni_1x1_conv_utils.hpp b/src/cpu/jit_uni_1x1_conv_utils.hpp
index 4ab823ae..d05ed911 100644
--- a/src/cpu/jit_uni_1x1_conv_utils.hpp
+++ b/src/cpu/jit_uni_1x1_conv_utils.hpp
@@ -227,6 +227,7 @@ struct rtus_driver_t: public jit_generator {
pop(rdi);
#endif
+ uni_vzeroupper();
ret();
this->ker_ = reinterpret_cast<decltype(ker_)>(const_cast<uint8_t*>(
this->getCode()));
--
2.16.2
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment