Last active
June 2, 2018 00:49
-
-
Save rsdubtso/baf6e359f98baceedb490de46d8c545c to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
From 52157cef4c98294b96dfb14ed476a59e647dba57 Mon Sep 17 00:00:00 2001 | |
From: Roman Dubtsov <roman.s.dubtsov@intel.com> | |
Date: Tue, 29 May 2018 10:24:14 -0700 | |
Subject: tentative: cpu: jit: add vzeroupper to postamble() | |
--- | |
src/cpu/jit_avx512_common_conv_winograd_kernel_f32.cpp | 3 +++ | |
src/cpu/jit_avx512_core_conv_winograd_kernel_f32.cpp | 2 ++ | |
src/cpu/jit_generator.hpp | 14 +++++++++++++- | |
src/cpu/jit_uni_1x1_conv_utils.hpp | 1 + | |
4 files changed, 19 insertions(+), 1 deletion(-) | |
diff --git a/src/cpu/jit_avx512_common_conv_winograd_kernel_f32.cpp b/src/cpu/jit_avx512_common_conv_winograd_kernel_f32.cpp | |
index b6d09ed0..c7842a3b 100644 | |
--- a/src/cpu/jit_avx512_common_conv_winograd_kernel_f32.cpp | |
+++ b/src/cpu/jit_avx512_common_conv_winograd_kernel_f32.cpp | |
@@ -351,6 +351,7 @@ void _jit_avx512_common_conv_winograd_data_kernel_f32::gemm_loop_generate( | |
/* Postamble */ | |
pop(reg_EVEX_max_8b_offt); | |
+ uni_vzeroupper(); | |
ret(); | |
} | |
@@ -774,6 +775,7 @@ void jit_avx512_common_conv_winograd_bwd_weights_kernel_f32::transpose_ker_gener | |
} | |
} | |
} | |
+ uni_vzeroupper(); | |
ret(); | |
} | |
void jit_avx512_common_conv_winograd_bwd_weights_kernel_f32::gemm_loop_generate( | |
@@ -949,6 +951,7 @@ void jit_avx512_common_conv_winograd_bwd_weights_kernel_f32::gemm_loop_generate( | |
/* Postamble */ | |
pop(reg_dimK_block_loop_cnt); | |
pop(reg_EVEX_max_8b_offt); | |
+ uni_vzeroupper(); | |
ret(); | |
} | |
diff --git a/src/cpu/jit_avx512_core_conv_winograd_kernel_f32.cpp b/src/cpu/jit_avx512_core_conv_winograd_kernel_f32.cpp | |
index 56ade5a2..d562e38b 100644 | |
--- a/src/cpu/jit_avx512_core_conv_winograd_kernel_f32.cpp | |
+++ b/src/cpu/jit_avx512_core_conv_winograd_kernel_f32.cpp | |
@@ -371,6 +371,7 @@ void _jit_avx512_core_conv_winograd_data_kernel_f32::gemm_loop_generate() | |
/* Postamble */ | |
pop(reg_EVEX_max_8b_offt); | |
+ uni_vzeroupper(); | |
ret(); | |
} | |
@@ -2235,6 +2236,7 @@ void jit_avx512_core_conv_winograd_bwd_weights_kernel_f32::gemm_loop_generate( | |
/* Postamble */ | |
pop(reg_dimK_block_loop_cnt); | |
pop(reg_EVEX_max_8b_offt); | |
+ uni_vzeroupper(); | |
ret(); | |
} | |
diff --git a/src/cpu/jit_generator.hpp b/src/cpu/jit_generator.hpp | |
index 4c730e15..471cfabe 100644 | |
--- a/src/cpu/jit_generator.hpp | |
+++ b/src/cpu/jit_generator.hpp | |
@@ -63,6 +63,7 @@ namespace cpu { | |
typedef enum { | |
@@ -78,11 +79,14 @@ template <> struct cpu_isa_traits<sse42> { | |
static constexpr int vlen = 16; | |
static constexpr int n_vregs = 16; | |
}; | |
-template <> struct cpu_isa_traits<avx2> { | |
+template <> struct cpu_isa_traits<avx> { | |
static constexpr int vlen_shift = 5; | |
static constexpr int vlen = 32; | |
static constexpr int n_vregs = 16; | |
}; | |
+template <> struct cpu_isa_traits<avx2>: | |
+ public cpu_isa_traits<avx> {}; | |
+ | |
template <> struct cpu_isa_traits<avx512_common> { | |
static constexpr int vlen_shift = 6; | |
static constexpr int vlen = 64; | |
@@ -175,6 +179,8 @@ static inline bool mayiuse(const cpu_isa_t cpu_isa) { | |
switch (cpu_isa) { | |
case sse42: | |
return cpu.has(Cpu::tSSE42); | |
+ case avx: | |
+ return cpu.has(Cpu::tAVX); | |
case avx2: | |
return cpu.has(Cpu::tAVX2); | |
case avx512_common: | |
@@ -341,6 +347,11 @@ public: | |
prefetcht2(a); | |
} | |
+ void uni_vzeroupper() { | |
+ if (mayiuse(avx) && !mayiuse(avx512_mic)) | |
+ vzeroupper(); | |
+ } | |
+ | |
void postamble() { | |
for (size_t i = 0; i < num_abi_save_gpr_regs; ++i) | |
pop(Xbyak::Reg64(abi_save_gpr_regs[num_abi_save_gpr_regs - 1 - i])); | |
@@ -349,6 +360,7 @@ public: | |
movdqu(Xbyak::Xmm(xmm_to_preserve_start + i), ptr[rsp + i * xmm_len]); | |
add(rsp, xmm_to_preserve * xmm_len); | |
} | |
+ uni_vzeroupper(); | |
ret(); | |
} | |
diff --git a/src/cpu/jit_uni_1x1_conv_utils.hpp b/src/cpu/jit_uni_1x1_conv_utils.hpp | |
index 4ab823ae..d05ed911 100644 | |
--- a/src/cpu/jit_uni_1x1_conv_utils.hpp | |
+++ b/src/cpu/jit_uni_1x1_conv_utils.hpp | |
@@ -227,6 +227,7 @@ struct rtus_driver_t: public jit_generator { | |
pop(rdi); | |
#endif | |
+ uni_vzeroupper(); | |
ret(); | |
this->ker_ = reinterpret_cast<decltype(ker_)>(const_cast<uint8_t*>( | |
this->getCode())); | |
-- | |
2.16.2 |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment