Last active
September 12, 2021 15:43
-
-
Save ohga/9aaff079043e2ad6a9c29d0a3e30224e to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
diff --git a/source/Makefile b/source/Makefile | |
index 222d35e..692a469 100644 | |
--- a/source/Makefile | |
+++ b/source/Makefile | |
@@ -155,6 +155,10 @@ tournament: | |
$(MAKE) CFLAGS='$(CFLAGS) -DNDEBUG -DUSE_MAKEFILE -D$(YANEURAOU_EDITION) -DUSE_AVX2 -mbmi2 -mavx2 -DFOR_TOURNAMENT -march=corei7-avx' LDFLAGS='$(LDFLAGS) $(LTOFLAGS)' $(TARGET) | |
tournament-sse42: | |
$(MAKE) CFLAGS='$(CFLAGS) -DNDEBUG -DUSE_MAKEFILE -D$(YANEURAOU_EDITION) -DUSE_SSE42 -msse4.2 -DFOR_TOURNAMENT -march=corei7' LDFLAGS='$(LDFLAGS) $(LTOFLAGS)' $(TARGET) | |
+tournament-simple-neon: | |
+ $(MAKE) CFLAGS='$(CFLAGS) -DNDEBUG -DUSE_MAKEFILE -D$(YANEURAOU_EDITION) -DNO_SSE -mfloat-abi=hard -mfpu=neon -DFOR_TOURNAMENT -march=armv7-a -mtune=cortex-a8 -ffast-math -mabi=aapcs-linux -fforce-addr -fomit-frame-pointer' LDFLAGS='$(LDFLAGS) $(LTOFLAGS)' $(TARGET) | |
+tournament-vain-neon: | |
+ $(MAKE) CFLAGS='$(CFLAGS) -DNDEBUG -DUSE_MAKEFILE -D$(YANEURAOU_EDITION) -DUSE_NEON -mfloat-abi=hard -mfpu=neon -DFOR_TOURNAMENT -march=armv7-a -mtune=cortex-a8 -ffast-math -mabi=aapcs-linux -fforce-addr -fomit-frame-pointer' LDFLAGS='$(LDFLAGS) $(LTOFLAGS)' $(TARGET) | |
avx2: | |
$(MAKE) CFLAGS='$(CFLAGS) -DNDEBUG -DUSE_MAKEFILE -D$(YANEURAOU_EDITION) -DUSE_AVX2 -mbmi2 -mavx2 -march=corei7-avx' LDFLAGS='$(LDFLAGS) $(LTOFLAGS)' $(TARGET) | |
@@ -173,6 +177,12 @@ sse2: | |
nosse: | |
$(MAKE) CFLAGS='$(CFLAGS) -DNDEBUG -DUSE_MAKEFILE -D$(YANEURAOU_EDITION) -DNO_SSE -m32 -march=pentium3' LDFLAGS='$(LDFLAGS) $(LTOFLAGS)' $(TARGET) | |
+simple-neon: | |
+ $(MAKE) CFLAGS='$(CFLAGS) -DNDEBUG -DUSE_MAKEFILE -D$(YANEURAOU_EDITION) -DNO_SSE -mfloat-abi=hard -mfpu=neon -march=armv7-a -mtune=cortex-a8 -ffast-math -mabi=aapcs-linux -fforce-addr -fomit-frame-pointer' LDFLAGS='$(LDFLAGS) $(LTOFLAGS)' $(TARGET) | |
+ | |
+vain-neon: | |
+ $(MAKE) CFLAGS='$(CFLAGS) -DNDEBUG -DUSE_MAKEFILE -D$(YANEURAOU_EDITION) -DUSE_NEON -mfloat-abi=hard -mfpu=neon -march=armv7-a -mtune=cortex-a8 -ffast-math -mabi=aapcs-linux -fforce-addr -fomit-frame-pointer' LDFLAGS='$(LDFLAGS) $(LTOFLAGS)' $(TARGET) | |
+ | |
# ARMなどのCPU | |
other: | |
$(MAKE) CFLAGS='$(CFLAGS) -DNDEBUG -DUSE_MAKEFILE -D$(YANEURAOU_EDITION) -DNO_SSE' LDFLAGS='$(LDFLAGS) $(LTOFLAGS)' $(TARGET) | |
diff --git a/source/bitboard.h b/source/bitboard.h | |
index a9af50a..dc79d6d 100644 | |
--- a/source/bitboard.h | |
+++ b/source/bitboard.h | |
@@ -33,6 +33,12 @@ struct alignas(16) Bitboard | |
// Aperyを始めとするmagic bitboard派によって考案された。 | |
__m128i m; | |
}; | |
+#elif defined (USE_NEON) | |
+ union | |
+ { | |
+ u64 p[2]; | |
+ int16x8_t m; | |
+ }; | |
#else // no SSE | |
u64 p[2]; | |
#endif | |
@@ -43,6 +49,9 @@ struct alignas(16) Bitboard | |
Bitboard& operator = (const Bitboard& rhs) { _mm_store_si128(&this->m, rhs.m); return *this; } | |
Bitboard(const Bitboard& bb) { _mm_store_si128(&this->m, bb.m); } | |
+#elif defined (USE_NEON) | |
+ Bitboard& operator = (const Bitboard& rhs) { vst1q_s16((short*)&this->m, rhs.m); return *this; } | |
+ Bitboard(const Bitboard& bb) { vst1q_s16((short*)&this->m, bb.m); } | |
#endif | |
// --- ctor | |
@@ -125,6 +134,19 @@ struct alignas(16) Bitboard | |
// 右シフト(縦型Bitboardでは右1回シフトで1段上の升に移動する) | |
Bitboard& operator >>= (int shift) { /*ASSERT_LV3(shift == 1);*/ m = _mm_srli_epi64(m, shift); return *this; } | |
+#elif defined (USE_NEON) | |
+ Bitboard& operator |= (const Bitboard& b1) { this->m = vorrq_u16( m, b1.m); return *this; } | |
+ Bitboard& operator &= (const Bitboard& b1) { this->m = vandq_u16(m, b1.m); return *this; } | |
+ Bitboard& operator ^= (const Bitboard& b1) { this->m = veorq_u16(m, b1.m); return *this; } | |
+ Bitboard& operator += (const Bitboard& b1) { this->m = vaddq_s16(m, b1.m); return *this; } | |
+ Bitboard& operator -= (const Bitboard& b1) { this->m = vsubq_s16(m, b1.m); return *this; } | |
+ | |
+ // TODO: shift はなんかあやしいので使わない。 | |
+ // つか、 argument to '__builtin_neon_vshlq_n_v' must be a constant integer | |
+ //Bitboard& operator <<= (int shift) { ASSERT_LV3(shift == 1); m = vshlq_n_s16(m, 1); return *this; } | |
+ //Bitboard& operator >>= (int shift) { ASSERT_LV3(shift == 1); m = vshrq_n_s16(m, 1); return *this; } | |
+ Bitboard& operator <<= (int shift) { /*ASSERT_LV3(shift == 1);*/ this->p[0] <<= shift; this->p[1] <<= shift; return *this; } | |
+ Bitboard& operator >>= (int shift) { /*ASSERT_LV3(shift == 1);*/ this->p[0] >>= shift; this->p[1] >>= shift; return *this; } | |
#else | |
Bitboard& operator |= (const Bitboard& b1) { this->p[0] |= b1.p[0]; this->p[1] |= b1.p[1]; return *this; } | |
Bitboard& operator &= (const Bitboard& b1) { this->p[0] &= b1.p[0]; this->p[1] &= b1.p[1]; return *this; } | |
@@ -166,6 +188,8 @@ inline Bitboard::Bitboard(u64 p0, u64 p1) : | |
#if defined(USE_SSE2) | |
// この命令、引数の順に注意。 | |
m( _mm_set_epi64x(p1,p0)) | |
+#elif defined (USE_NEON) // TODO | |
+ p { p0 , p1 } | |
#else | |
p { p0 , p1 } | |
#endif | |
@@ -176,6 +200,8 @@ inline void Bitboard::set(u64 p0, u64 p1) | |
{ | |
#if defined(USE_SSE2) | |
m = _mm_set_epi64x(p1,p0); | |
+#elif defined (USE_NEON) // TODO | |
+ p[0] = p0; p[1] = p1; | |
#else | |
p[0] = p0; p[1] = p1; | |
#endif | |
diff --git a/source/eval/evalsum.h b/source/eval/evalsum.h | |
index 58c82b9..8afcfcb 100644 | |
--- a/source/eval/evalsum.h | |
+++ b/source/eval/evalsum.h | |
@@ -91,6 +91,16 @@ namespace Eval { | |
_mm_store_si128(&m[1], rhs.m[1]); | |
return *this; | |
} | |
+#elif defined(USE_NEON) | |
+ EvalSum(const EvalSum& es) { | |
+ vst1q_s16((short*)&m[0], es.m[0]); | |
+ vst1q_s16((short*)&m[1], es.m[1]); | |
+ } | |
+ EvalSum& operator = (const EvalSum& rhs) { | |
+ vst1q_s16((short*)&m[0], rhs.m[0]); | |
+ vst1q_s16((short*)&m[1], rhs.m[1]); | |
+ return *this; | |
+ } | |
#endif | |
EvalSum() {} | |
@@ -119,6 +129,9 @@ namespace Eval { | |
#elif defined(USE_SSE2) | |
m[0] = _mm_add_epi32(m[0], rhs.m[0]); | |
m[1] = _mm_add_epi32(m[1], rhs.m[1]); | |
+#elif defined(USE_NEON) | |
+ m[0] = vaddq_s16(m[0], rhs.m[0]); | |
+ m[1] = vaddq_s16(m[1], rhs.m[1]); | |
#else | |
p[0][0] += rhs.p[0][0]; | |
p[0][1] += rhs.p[0][1]; | |
@@ -136,6 +149,9 @@ namespace Eval { | |
#elif defined(USE_SSE2) | |
m[0] = _mm_sub_epi32(m[0], rhs.m[0]); | |
m[1] = _mm_sub_epi32(m[1], rhs.m[1]); | |
+#elif defined(USE_NEON) | |
+ m[0] = vsubq_s16(m[0], rhs.m[0]); | |
+ m[1] = vsubq_s16(m[1], rhs.m[1]); | |
#else | |
p[0][0] -= rhs.p[0][0]; | |
p[0][1] -= rhs.p[0][1]; | |
@@ -179,6 +195,8 @@ namespace Eval { | |
__m128i m[2]; | |
#elif defined(USE_SSE2) | |
__m128i m[2]; | |
+#elif defined(USE_NEON) | |
+ int16x8_t m[2]; | |
#endif | |
}; | |
}; | |
diff --git a/source/eval/evaluate_io.cpp b/source/eval/evaluate_io.cpp | |
index 4993d5d..b083347 100644 | |
--- a/source/eval/evaluate_io.cpp | |
+++ b/source/eval/evaluate_io.cpp | |
@@ -79,7 +79,7 @@ namespace EvalIO | |
return out_.file_or_memory.ptr; | |
}) != 0) | |
{ | |
-#if defined(EVAL_LEARN) | |
+#if ! defined(FOR_TOURNAMENT) | |
if (Options["SkipLoadingEval"]) | |
{ | |
std::cout << "info string read file error , file = " << in_.file_or_memory.filename << " , but SkipLoadingEval == true , so ignore this." << std::endl; | |
diff --git a/source/eval/kpp_kkpt/evaluate_kpp_kkpt.cpp b/source/eval/kpp_kkpt/evaluate_kpp_kkpt.cpp | |
index 5db5650..edb7b54 100644 | |
--- a/source/eval/kpp_kkpt/evaluate_kpp_kkpt.cpp | |
+++ b/source/eval/kpp_kkpt/evaluate_kpp_kkpt.cpp | |
@@ -285,6 +285,8 @@ namespace Eval | |
#if defined(USE_SSE2) | |
// sum.p[0](BKPP)とsum.p[1](WKPP)をゼロクリア | |
sum.m[0] = _mm_setzero_si128(); | |
+#elif defined (USE_NEON) | |
+ sum.m[0] = vmovq_n_s16(0); | |
#else | |
sum.p[0][0] = sum.p[0][1] = sum.p[1][0] = sum.p[1][1] = 0; | |
#endif | |
@@ -375,6 +377,8 @@ namespace Eval | |
// sum.p[0](BKPP)とsum.p[1](WKPP)をゼロクリア | |
#if defined(USE_SSE2) | |
sum.m[0] = _mm_setzero_si128(); | |
+#elif defined (USE_NEON) | |
+ sum.m[0] = vmovq_n_s16(0); | |
#else | |
sum.p[0] = { 0, 0 }; | |
sum.p[1] = { 0, 0 }; | |
@@ -1027,6 +1031,8 @@ namespace Eval | |
#if defined(USE_SSE2) | |
// sum.p[0](BKPP)とsum.p[1](WKPP)をゼロクリア | |
sum.m[0] = _mm_setzero_si128(); | |
+#elif defined (USE_NEON) | |
+ sum.m[0] = vmovq_n_s16(0); | |
#else | |
sum.p[0][0] = sum.p[0][1] = sum.p[1][0] = sum.p[1][1] = 0; | |
#endif | |
diff --git a/source/eval/kppt/evaluate_kppt.cpp b/source/eval/kppt/evaluate_kppt.cpp | |
index 9e5bf73..679e19b 100644 | |
--- a/source/eval/kppt/evaluate_kppt.cpp | |
+++ b/source/eval/kppt/evaluate_kppt.cpp | |
@@ -345,6 +345,8 @@ namespace Eval | |
#if defined(USE_SSE2) | |
// sum.p[0](BKPP)とsum.p[1](WKPP)をゼロクリア | |
sum.m[0] = _mm_setzero_si128(); | |
+#elif defined (USE_NEON) | |
+ sum.m[0] = vmovq_n_s16(0); | |
#else | |
sum.p[0][0] = sum.p[0][1] = sum.p[1][0] = sum.p[1][1] = 0; | |
#endif | |
@@ -446,6 +448,8 @@ namespace Eval | |
// sum.p[0](BKPP)とsum.p[1](WKPP)をゼロクリア | |
#if defined(USE_SSE2) | |
sum.m[0] = _mm_setzero_si128(); | |
+#elif defined (USE_NEON) | |
+ sum.m[0] = vmovq_n_s16(0); | |
#else | |
sum.p[0] = { 0, 0 }; | |
sum.p[1] = { 0, 0 }; | |
@@ -1091,6 +1095,8 @@ namespace Eval | |
#if defined(USE_SSE2) | |
// sum.p[0](BKPP)とsum.p[1](WKPP)をゼロクリア | |
sum.m[0] = _mm_setzero_si128(); | |
+#elif defined (USE_NEON) | |
+ sum.m[0] = vmovq_n_s16(0); | |
#else | |
sum.p[0][0] = sum.p[0][1] = sum.p[1][0] = sum.p[1][1] = 0; | |
#endif | |
diff --git a/source/extra/bitop.h b/source/extra/bitop.h | |
index 83231c2..4e04181 100644 | |
--- a/source/extra/bitop.h | |
+++ b/source/extra/bitop.h | |
@@ -24,6 +24,38 @@ | |
#include <smmintrin.h> | |
#elif defined (USE_SSE2) | |
#include <emmintrin.h> | |
+#elif defined (USE_NEON) | |
+#include <arm_neon.h> | |
+// https://raw.githubusercontent.com/otim/SSE-to-NEON/master/sse_to_neon.hpp | |
+#include <stdlib.h> | |
+ | |
+/* We can't depend on <stdlib.h> since the prototype of posix_memalign | |
+ may not be visible. */ | |
+#ifndef __cplusplus | |
+extern int posix_memalign (void **, size_t, size_t); | |
+#else | |
+extern "C" int posix_memalign (void **, size_t, size_t) throw (); | |
+#endif | |
+ | |
+static __inline void * | |
+_mm_malloc (size_t size, size_t alignment) | |
+{ | |
+ void *ptr; | |
+ if (alignment == 1) | |
+ return malloc (size); | |
+ if (alignment == 2 || (sizeof (void *) == 8 && alignment == 4)) | |
+ alignment = sizeof (void *); | |
+ if (posix_memalign (&ptr, alignment, size) == 0) | |
+ return ptr; | |
+ else | |
+ return NULL; | |
+} | |
+ | |
+static __inline void | |
+_mm_free (void * ptr) | |
+{ | |
+ free (ptr); | |
+} | |
#else | |
#if defined (__GNUC__) | |
#include <mm_malloc.h> // for _mm_alloc() | |
@@ -158,7 +190,16 @@ FORCE_INLINE int MSB32(uint32_t v) { ASSERT_LV3(v != 0); unsigned long index; _B | |
FORCE_INLINE int MSB64(uint64_t v) { ASSERT_LV3(v != 0); return uint32_t(v >> 32) ? 32 + MSB32(uint32_t(v >> 32)) : MSB32(uint32_t(v)); } | |
#endif | |
-#elif defined(__GNUC__) && ( defined(__i386__) || defined(__x86_64__) ) | |
+// use built-in functions. | |
+#elif defined(__GNUC__) | |
+ | |
+# ifndef __has_builtin | |
+# error built-in functions required. (__has_builtin) | |
+# endif | |
+ | |
+# if ! __has_builtin(__builtin_clzll) | |
+# error built-in functions required. (__builtin_clzll) | |
+# endif | |
FORCE_INLINE int LSB32(const u32 v) { ASSERT_LV3(v != 0); return __builtin_ctzll(v); } | |
FORCE_INLINE int LSB64(const u64 v) { ASSERT_LV3(v != 0); return __builtin_ctzll(v); } | |
diff --git a/source/extra/config.h b/source/extra/config.h | |
index 3672f49..a147c3b 100644 | |
--- a/source/extra/config.h | |
+++ b/source/extra/config.h | |
@@ -211,8 +211,8 @@ | |
#ifdef YANEURAOU_2017_EARLY_ENGINE | |
#define ENGINE_NAME "YaneuraOu 2017 Early" | |
-#define EVAL_KPPT | |
-//#define EVAL_KPP_KKPT | |
+//#define EVAL_KPPT | |
+#define EVAL_KPP_KKPT | |
//#define EVAL_KPPP_KKPT 18 | |
//#define EVAL_KPPP_KKPT 36 | |
//#define EVAL_NABLA | |
@@ -524,6 +524,8 @@ const bool Is64Bit = false; | |
#define TARGET_CPU "SSE4.1" | |
#elif defined(USE_SSE2) | |
#define TARGET_CPU "SSE2" | |
+#elif defined (USE_NEON) | |
+#define TARGET_CPU "NEON" | |
#else | |
#define TARGET_CPU "noSSE" | |
#endif | |
diff --git a/source/usi.cpp b/source/usi.cpp | |
index 47526b2..74deaa2 100644 | |
--- a/source/usi.cpp | |
+++ b/source/usi.cpp | |
@@ -389,7 +389,7 @@ namespace USI | |
o["EngineNuma"] << Option(-1, -1, 99999); | |
#endif | |
-#if defined(EVAL_LEARN) | |
+#if ! defined(FOR_TOURNAMENT) | |
// isreadyタイミングで評価関数を読み込まれると、新しい評価関数の変換のために | |
// test evalconvertコマンドを叩きたいのに、その新しい評価関数がないがために | |
// このコマンドの実行前に異常終了してしまう。 |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment