Skip to content

Instantly share code, notes, and snippets.

@ohga
Last active September 12, 2021 15:43
Show Gist options
  • Star 1 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save ohga/9aaff079043e2ad6a9c29d0a3e30224e to your computer and use it in GitHub Desktop.
Save ohga/9aaff079043e2ad6a9c29d0a3e30224e to your computer and use it in GitHub Desktop.
diff --git a/source/Makefile b/source/Makefile
index 222d35e..692a469 100644
--- a/source/Makefile
+++ b/source/Makefile
@@ -155,6 +155,10 @@ tournament:
$(MAKE) CFLAGS='$(CFLAGS) -DNDEBUG -DUSE_MAKEFILE -D$(YANEURAOU_EDITION) -DUSE_AVX2 -mbmi2 -mavx2 -DFOR_TOURNAMENT -march=corei7-avx' LDFLAGS='$(LDFLAGS) $(LTOFLAGS)' $(TARGET)
tournament-sse42:
$(MAKE) CFLAGS='$(CFLAGS) -DNDEBUG -DUSE_MAKEFILE -D$(YANEURAOU_EDITION) -DUSE_SSE42 -msse4.2 -DFOR_TOURNAMENT -march=corei7' LDFLAGS='$(LDFLAGS) $(LTOFLAGS)' $(TARGET)
+tournament-simple-neon:
+ $(MAKE) CFLAGS='$(CFLAGS) -DNDEBUG -DUSE_MAKEFILE -D$(YANEURAOU_EDITION) -DNO_SSE -mfloat-abi=hard -mfpu=neon -DFOR_TOURNAMENT -march=armv7-a -mtune=cortex-a8 -ffast-math -mabi=aapcs-linux -fforce-addr -fomit-frame-pointer' LDFLAGS='$(LDFLAGS) $(LTOFLAGS)' $(TARGET)
+tournament-vain-neon:
+ $(MAKE) CFLAGS='$(CFLAGS) -DNDEBUG -DUSE_MAKEFILE -D$(YANEURAOU_EDITION) -DUSE_NEON -mfloat-abi=hard -mfpu=neon -DFOR_TOURNAMENT -march=armv7-a -mtune=cortex-a8 -ffast-math -mabi=aapcs-linux -fforce-addr -fomit-frame-pointer' LDFLAGS='$(LDFLAGS) $(LTOFLAGS)' $(TARGET)
avx2:
$(MAKE) CFLAGS='$(CFLAGS) -DNDEBUG -DUSE_MAKEFILE -D$(YANEURAOU_EDITION) -DUSE_AVX2 -mbmi2 -mavx2 -march=corei7-avx' LDFLAGS='$(LDFLAGS) $(LTOFLAGS)' $(TARGET)
@@ -173,6 +177,12 @@ sse2:
nosse:
$(MAKE) CFLAGS='$(CFLAGS) -DNDEBUG -DUSE_MAKEFILE -D$(YANEURAOU_EDITION) -DNO_SSE -m32 -march=pentium3' LDFLAGS='$(LDFLAGS) $(LTOFLAGS)' $(TARGET)
+simple-neon:
+ $(MAKE) CFLAGS='$(CFLAGS) -DNDEBUG -DUSE_MAKEFILE -D$(YANEURAOU_EDITION) -DNO_SSE -mfloat-abi=hard -mfpu=neon -march=armv7-a -mtune=cortex-a8 -ffast-math -mabi=aapcs-linux -fforce-addr -fomit-frame-pointer' LDFLAGS='$(LDFLAGS) $(LTOFLAGS)' $(TARGET)
+
+vain-neon:
+ $(MAKE) CFLAGS='$(CFLAGS) -DNDEBUG -DUSE_MAKEFILE -D$(YANEURAOU_EDITION) -DUSE_NEON -mfloat-abi=hard -mfpu=neon -march=armv7-a -mtune=cortex-a8 -ffast-math -mabi=aapcs-linux -fforce-addr -fomit-frame-pointer' LDFLAGS='$(LDFLAGS) $(LTOFLAGS)' $(TARGET)
+
# ARMなどのCPU
other:
$(MAKE) CFLAGS='$(CFLAGS) -DNDEBUG -DUSE_MAKEFILE -D$(YANEURAOU_EDITION) -DNO_SSE' LDFLAGS='$(LDFLAGS) $(LTOFLAGS)' $(TARGET)
diff --git a/source/bitboard.h b/source/bitboard.h
index a9af50a..dc79d6d 100644
--- a/source/bitboard.h
+++ b/source/bitboard.h
@@ -33,6 +33,12 @@ struct alignas(16) Bitboard
// Aperyを始めとするmagic bitboard派によって考案された。
__m128i m;
};
+#elif defined (USE_NEON)
+ union
+ {
+ u64 p[2];
+ int16x8_t m;
+ };
#else // no SSE
u64 p[2];
#endif
@@ -43,6 +49,9 @@ struct alignas(16) Bitboard
Bitboard& operator = (const Bitboard& rhs) { _mm_store_si128(&this->m, rhs.m); return *this; }
Bitboard(const Bitboard& bb) { _mm_store_si128(&this->m, bb.m); }
+#elif defined (USE_NEON)
+ Bitboard& operator = (const Bitboard& rhs) { vst1q_s16((short*)&this->m, rhs.m); return *this; }
+ Bitboard(const Bitboard& bb) { vst1q_s16((short*)&this->m, bb.m); }
#endif
// --- ctor
@@ -125,6 +134,19 @@ struct alignas(16) Bitboard
// 右シフト(縦型Bitboardでは右1回シフトで1段上の升に移動する)
Bitboard& operator >>= (int shift) { /*ASSERT_LV3(shift == 1);*/ m = _mm_srli_epi64(m, shift); return *this; }
+#elif defined (USE_NEON)
+ Bitboard& operator |= (const Bitboard& b1) { this->m = vorrq_u16( m, b1.m); return *this; }
+ Bitboard& operator &= (const Bitboard& b1) { this->m = vandq_u16(m, b1.m); return *this; }
+ Bitboard& operator ^= (const Bitboard& b1) { this->m = veorq_u16(m, b1.m); return *this; }
+ Bitboard& operator += (const Bitboard& b1) { this->m = vaddq_s16(m, b1.m); return *this; }
+ Bitboard& operator -= (const Bitboard& b1) { this->m = vsubq_s16(m, b1.m); return *this; }
+
+ // TODO: shift はなんかあやしいので使わない。
+ // つか、 argument to '__builtin_neon_vshlq_n_v' must be a constant integer
+ //Bitboard& operator <<= (int shift) { ASSERT_LV3(shift == 1); m = vshlq_n_s16(m, 1); return *this; }
+ //Bitboard& operator >>= (int shift) { ASSERT_LV3(shift == 1); m = vshrq_n_s16(m, 1); return *this; }
+ Bitboard& operator <<= (int shift) { /*ASSERT_LV3(shift == 1);*/ this->p[0] <<= shift; this->p[1] <<= shift; return *this; }
+ Bitboard& operator >>= (int shift) { /*ASSERT_LV3(shift == 1);*/ this->p[0] >>= shift; this->p[1] >>= shift; return *this; }
#else
Bitboard& operator |= (const Bitboard& b1) { this->p[0] |= b1.p[0]; this->p[1] |= b1.p[1]; return *this; }
Bitboard& operator &= (const Bitboard& b1) { this->p[0] &= b1.p[0]; this->p[1] &= b1.p[1]; return *this; }
@@ -166,6 +188,8 @@ inline Bitboard::Bitboard(u64 p0, u64 p1) :
#if defined(USE_SSE2)
// この命令、引数の順に注意。
m( _mm_set_epi64x(p1,p0))
+#elif defined (USE_NEON) // TODO
+ p { p0 , p1 }
#else
p { p0 , p1 }
#endif
@@ -176,6 +200,8 @@ inline void Bitboard::set(u64 p0, u64 p1)
{
#if defined(USE_SSE2)
m = _mm_set_epi64x(p1,p0);
+#elif defined (USE_NEON) // TODO
+ p[0] = p0; p[1] = p1;
#else
p[0] = p0; p[1] = p1;
#endif
diff --git a/source/eval/evalsum.h b/source/eval/evalsum.h
index 58c82b9..8afcfcb 100644
--- a/source/eval/evalsum.h
+++ b/source/eval/evalsum.h
@@ -91,6 +91,16 @@ namespace Eval {
_mm_store_si128(&m[1], rhs.m[1]);
return *this;
}
+#elif defined(USE_NEON)
+ EvalSum(const EvalSum& es) {
+ vst1q_s16((short*)&m[0], es.m[0]);
+ vst1q_s16((short*)&m[1], es.m[1]);
+ }
+ EvalSum& operator = (const EvalSum& rhs) {
+ vst1q_s16((short*)&m[0], rhs.m[0]);
+ vst1q_s16((short*)&m[1], rhs.m[1]);
+ return *this;
+ }
#endif
EvalSum() {}
@@ -119,6 +129,9 @@ namespace Eval {
#elif defined(USE_SSE2)
m[0] = _mm_add_epi32(m[0], rhs.m[0]);
m[1] = _mm_add_epi32(m[1], rhs.m[1]);
+#elif defined(USE_NEON)
+ m[0] = vaddq_s16(m[0], rhs.m[0]);
+ m[1] = vaddq_s16(m[1], rhs.m[1]);
#else
p[0][0] += rhs.p[0][0];
p[0][1] += rhs.p[0][1];
@@ -136,6 +149,9 @@ namespace Eval {
#elif defined(USE_SSE2)
m[0] = _mm_sub_epi32(m[0], rhs.m[0]);
m[1] = _mm_sub_epi32(m[1], rhs.m[1]);
+#elif defined(USE_NEON)
+ m[0] = vsubq_s16(m[0], rhs.m[0]);
+ m[1] = vsubq_s16(m[1], rhs.m[1]);
#else
p[0][0] -= rhs.p[0][0];
p[0][1] -= rhs.p[0][1];
@@ -179,6 +195,8 @@ namespace Eval {
__m128i m[2];
#elif defined(USE_SSE2)
__m128i m[2];
+#elif defined(USE_NEON)
+ int16x8_t m[2];
#endif
};
};
diff --git a/source/eval/evaluate_io.cpp b/source/eval/evaluate_io.cpp
index 4993d5d..b083347 100644
--- a/source/eval/evaluate_io.cpp
+++ b/source/eval/evaluate_io.cpp
@@ -79,7 +79,7 @@ namespace EvalIO
return out_.file_or_memory.ptr;
}) != 0)
{
-#if defined(EVAL_LEARN)
+#if ! defined(FOR_TOURNAMENT)
if (Options["SkipLoadingEval"])
{
std::cout << "info string read file error , file = " << in_.file_or_memory.filename << " , but SkipLoadingEval == true , so ignore this." << std::endl;
diff --git a/source/eval/kpp_kkpt/evaluate_kpp_kkpt.cpp b/source/eval/kpp_kkpt/evaluate_kpp_kkpt.cpp
index 5db5650..edb7b54 100644
--- a/source/eval/kpp_kkpt/evaluate_kpp_kkpt.cpp
+++ b/source/eval/kpp_kkpt/evaluate_kpp_kkpt.cpp
@@ -285,6 +285,8 @@ namespace Eval
#if defined(USE_SSE2)
// sum.p[0](BKPP)とsum.p[1](WKPP)をゼロクリア
sum.m[0] = _mm_setzero_si128();
+#elif defined (USE_NEON)
+ sum.m[0] = vmovq_n_s16(0);
#else
sum.p[0][0] = sum.p[0][1] = sum.p[1][0] = sum.p[1][1] = 0;
#endif
@@ -375,6 +377,8 @@ namespace Eval
// sum.p[0](BKPP)とsum.p[1](WKPP)をゼロクリア
#if defined(USE_SSE2)
sum.m[0] = _mm_setzero_si128();
+#elif defined (USE_NEON)
+ sum.m[0] = vmovq_n_s16(0);
#else
sum.p[0] = { 0, 0 };
sum.p[1] = { 0, 0 };
@@ -1027,6 +1031,8 @@ namespace Eval
#if defined(USE_SSE2)
// sum.p[0](BKPP)とsum.p[1](WKPP)をゼロクリア
sum.m[0] = _mm_setzero_si128();
+#elif defined (USE_NEON)
+ sum.m[0] = vmovq_n_s16(0);
#else
sum.p[0][0] = sum.p[0][1] = sum.p[1][0] = sum.p[1][1] = 0;
#endif
diff --git a/source/eval/kppt/evaluate_kppt.cpp b/source/eval/kppt/evaluate_kppt.cpp
index 9e5bf73..679e19b 100644
--- a/source/eval/kppt/evaluate_kppt.cpp
+++ b/source/eval/kppt/evaluate_kppt.cpp
@@ -345,6 +345,8 @@ namespace Eval
#if defined(USE_SSE2)
// sum.p[0](BKPP)とsum.p[1](WKPP)をゼロクリア
sum.m[0] = _mm_setzero_si128();
+#elif defined (USE_NEON)
+ sum.m[0] = vmovq_n_s16(0);
#else
sum.p[0][0] = sum.p[0][1] = sum.p[1][0] = sum.p[1][1] = 0;
#endif
@@ -446,6 +448,8 @@ namespace Eval
// sum.p[0](BKPP)とsum.p[1](WKPP)をゼロクリア
#if defined(USE_SSE2)
sum.m[0] = _mm_setzero_si128();
+#elif defined (USE_NEON)
+ sum.m[0] = vmovq_n_s16(0);
#else
sum.p[0] = { 0, 0 };
sum.p[1] = { 0, 0 };
@@ -1091,6 +1095,8 @@ namespace Eval
#if defined(USE_SSE2)
// sum.p[0](BKPP)とsum.p[1](WKPP)をゼロクリア
sum.m[0] = _mm_setzero_si128();
+#elif defined (USE_NEON)
+ sum.m[0] = vmovq_n_s16(0);
#else
sum.p[0][0] = sum.p[0][1] = sum.p[1][0] = sum.p[1][1] = 0;
#endif
diff --git a/source/extra/bitop.h b/source/extra/bitop.h
index 83231c2..4e04181 100644
--- a/source/extra/bitop.h
+++ b/source/extra/bitop.h
@@ -24,6 +24,38 @@
#include <smmintrin.h>
#elif defined (USE_SSE2)
#include <emmintrin.h>
+#elif defined (USE_NEON)
+#include <arm_neon.h>
+// https://raw.githubusercontent.com/otim/SSE-to-NEON/master/sse_to_neon.hpp
+#include <stdlib.h>
+
+/* We can't depend on <stdlib.h> since the prototype of posix_memalign
+ may not be visible. */
+#ifndef __cplusplus
+extern int posix_memalign (void **, size_t, size_t);
+#else
+extern "C" int posix_memalign (void **, size_t, size_t) throw ();
+#endif
+
+static __inline void *
+_mm_malloc (size_t size, size_t alignment)
+{
+ void *ptr;
+ if (alignment == 1)
+ return malloc (size);
+ if (alignment == 2 || (sizeof (void *) == 8 && alignment == 4))
+ alignment = sizeof (void *);
+ if (posix_memalign (&ptr, alignment, size) == 0)
+ return ptr;
+ else
+ return NULL;
+}
+
+static __inline void
+_mm_free (void * ptr)
+{
+ free (ptr);
+}
#else
#if defined (__GNUC__)
#include <mm_malloc.h> // for _mm_alloc()
@@ -158,7 +190,16 @@ FORCE_INLINE int MSB32(uint32_t v) { ASSERT_LV3(v != 0); unsigned long index; _B
FORCE_INLINE int MSB64(uint64_t v) { ASSERT_LV3(v != 0); return uint32_t(v >> 32) ? 32 + MSB32(uint32_t(v >> 32)) : MSB32(uint32_t(v)); }
#endif
-#elif defined(__GNUC__) && ( defined(__i386__) || defined(__x86_64__) )
+// use built-in functions.
+#elif defined(__GNUC__)
+
+# ifndef __has_builtin
+# error built-in functions required. (__has_builtin)
+# endif
+
+# if ! __has_builtin(__builtin_clzll)
+# error built-in functions required. (__builtin_clzll)
+# endif
FORCE_INLINE int LSB32(const u32 v) { ASSERT_LV3(v != 0); return __builtin_ctzll(v); }
FORCE_INLINE int LSB64(const u64 v) { ASSERT_LV3(v != 0); return __builtin_ctzll(v); }
diff --git a/source/extra/config.h b/source/extra/config.h
index 3672f49..a147c3b 100644
--- a/source/extra/config.h
+++ b/source/extra/config.h
@@ -211,8 +211,8 @@
#ifdef YANEURAOU_2017_EARLY_ENGINE
#define ENGINE_NAME "YaneuraOu 2017 Early"
-#define EVAL_KPPT
-//#define EVAL_KPP_KKPT
+//#define EVAL_KPPT
+#define EVAL_KPP_KKPT
//#define EVAL_KPPP_KKPT 18
//#define EVAL_KPPP_KKPT 36
//#define EVAL_NABLA
@@ -524,6 +524,8 @@ const bool Is64Bit = false;
#define TARGET_CPU "SSE4.1"
#elif defined(USE_SSE2)
#define TARGET_CPU "SSE2"
+#elif defined (USE_NEON)
+#define TARGET_CPU "NEON"
#else
#define TARGET_CPU "noSSE"
#endif
diff --git a/source/usi.cpp b/source/usi.cpp
index 47526b2..74deaa2 100644
--- a/source/usi.cpp
+++ b/source/usi.cpp
@@ -389,7 +389,7 @@ namespace USI
o["EngineNuma"] << Option(-1, -1, 99999);
#endif
-#if defined(EVAL_LEARN)
+#if ! defined(FOR_TOURNAMENT)
// isreadyタイミングで評価関数を読み込まれると、新しい評価関数の変換のために
// test evalconvertコマンドを叩きたいのに、その新しい評価関数がないがために
// このコマンドの実行前に異常終了してしまう。
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment