ohga/yaneuraou_raspbian.patch

## yaneuraou_raspbian.patch
diff --git a/source/Makefile b/source/Makefile
index 222d35e..692a469 100644
--- a/source/Makefile
+++ b/source/Makefile
@@ -155,6 +155,10 @@ tournament:
 	$(MAKE) CFLAGS='$(CFLAGS) -DNDEBUG -DUSE_MAKEFILE -D$(YANEURAOU_EDITION) -DUSE_AVX2 -mbmi2 -mavx2 -DFOR_TOURNAMENT -march=corei7-avx' LDFLAGS='$(LDFLAGS) $(LTOFLAGS)' $(TARGET)
 tournament-sse42:
 	$(MAKE) CFLAGS='$(CFLAGS) -DNDEBUG -DUSE_MAKEFILE -D$(YANEURAOU_EDITION) -DUSE_SSE42 -msse4.2 -DFOR_TOURNAMENT -march=corei7' LDFLAGS='$(LDFLAGS) $(LTOFLAGS)' $(TARGET)
+tournament-simple-neon:
+	$(MAKE) CFLAGS='$(CFLAGS) -DNDEBUG -DUSE_MAKEFILE -D$(YANEURAOU_EDITION) -DNO_SSE -mfloat-abi=hard -mfpu=neon -DFOR_TOURNAMENT -march=armv7-a -mtune=cortex-a8 -ffast-math -mabi=aapcs-linux -fforce-addr -fomit-frame-pointer' LDFLAGS='$(LDFLAGS) $(LTOFLAGS)' $(TARGET)
+tournament-vain-neon:
+	$(MAKE) CFLAGS='$(CFLAGS) -DNDEBUG -DUSE_MAKEFILE -D$(YANEURAOU_EDITION) -DUSE_NEON -mfloat-abi=hard -mfpu=neon -DFOR_TOURNAMENT -march=armv7-a -mtune=cortex-a8 -ffast-math -mabi=aapcs-linux -fforce-addr -fomit-frame-pointer' LDFLAGS='$(LDFLAGS) $(LTOFLAGS)' $(TARGET)

 avx2:
 	$(MAKE) CFLAGS='$(CFLAGS) -DNDEBUG -DUSE_MAKEFILE -D$(YANEURAOU_EDITION) -DUSE_AVX2 -mbmi2 -mavx2 -march=corei7-avx' LDFLAGS='$(LDFLAGS) $(LTOFLAGS)' $(TARGET)
@@ -173,6 +177,12 @@ sse2:
 nosse:
 	$(MAKE) CFLAGS='$(CFLAGS) -DNDEBUG -DUSE_MAKEFILE -D$(YANEURAOU_EDITION) -DNO_SSE -m32 -march=pentium3' LDFLAGS='$(LDFLAGS) $(LTOFLAGS)' $(TARGET)

+simple-neon:
+	$(MAKE) CFLAGS='$(CFLAGS) -DNDEBUG -DUSE_MAKEFILE -D$(YANEURAOU_EDITION) -DNO_SSE -mfloat-abi=hard -mfpu=neon -march=armv7-a -mtune=cortex-a8 -ffast-math -mabi=aapcs-linux -fforce-addr -fomit-frame-pointer' LDFLAGS='$(LDFLAGS) $(LTOFLAGS)' $(TARGET)
+
+vain-neon:
+	$(MAKE) CFLAGS='$(CFLAGS) -DNDEBUG -DUSE_MAKEFILE -D$(YANEURAOU_EDITION) -DUSE_NEON -mfloat-abi=hard -mfpu=neon -march=armv7-a -mtune=cortex-a8 -ffast-math -mabi=aapcs-linux -fforce-addr -fomit-frame-pointer' LDFLAGS='$(LDFLAGS) $(LTOFLAGS)' $(TARGET)
+
 # ARMなどのCPU
 other:
 	$(MAKE) CFLAGS='$(CFLAGS) -DNDEBUG -DUSE_MAKEFILE -D$(YANEURAOU_EDITION) -DNO_SSE' LDFLAGS='$(LDFLAGS) $(LTOFLAGS)' $(TARGET)
diff --git a/source/bitboard.h b/source/bitboard.h
index a9af50a..dc79d6d 100644
--- a/source/bitboard.h
+++ b/source/bitboard.h
@@ -33,6 +33,12 @@ struct alignas(16) Bitboard
 		// Aperyを始めとするmagic bitboard派によって考案された。
 		__m128i m;
 	};
+#elif defined (USE_NEON)
+	union
+	{
+		u64 p[2];
+		int16x8_t m;
+	};
 #else // no SSE
 	u64 p[2];
 #endif
@@ -43,6 +49,9 @@ struct alignas(16) Bitboard
 	Bitboard& operator = (const Bitboard& rhs) { _mm_store_si128(&this->m, rhs.m); return *this; }

 	Bitboard(const Bitboard& bb) { _mm_store_si128(&this->m, bb.m); }
+#elif defined (USE_NEON)
+	Bitboard& operator = (const Bitboard& rhs) { vst1q_s16((short*)&this->m, rhs.m); return *this; }
+	Bitboard(const Bitboard& bb) { vst1q_s16((short*)&this->m, bb.m); }
 #endif

 	// --- ctor
@@ -125,6 +134,19 @@ struct alignas(16) Bitboard
 	// 右シフト(縦型Bitboardでは右1回シフトで1段上の升に移動する)
 	Bitboard& operator >>= (int shift) { /*ASSERT_LV3(shift == 1);*/ m = _mm_srli_epi64(m, shift); return *this; }

+#elif defined (USE_NEON)
+	Bitboard& operator |= (const Bitboard& b1) { this->m = vorrq_u16( m, b1.m); return *this; }
+	Bitboard& operator &= (const Bitboard& b1) { this->m = vandq_u16(m, b1.m); return *this; }
+	Bitboard& operator ^= (const Bitboard& b1) { this->m = veorq_u16(m, b1.m); return *this; }
+	Bitboard& operator += (const Bitboard& b1) { this->m = vaddq_s16(m, b1.m); return *this; }
+	Bitboard& operator -= (const Bitboard& b1) { this->m = vsubq_s16(m, b1.m); return *this; }
+
+	// TODO: shift はなんかあやしいので使わない。
+	// つか、 argument to '__builtin_neon_vshlq_n_v' must be a constant integer
+	//Bitboard& operator <<= (int shift) { ASSERT_LV3(shift == 1); m = vshlq_n_s16(m, 1); return *this; }
+	//Bitboard& operator >>= (int shift) { ASSERT_LV3(shift == 1); m = vshrq_n_s16(m, 1); return *this; }
+	Bitboard& operator <<= (int shift) { /*ASSERT_LV3(shift == 1);*/ this->p[0] <<= shift; this->p[1] <<= shift; return *this; }
+	Bitboard& operator >>= (int shift) { /*ASSERT_LV3(shift == 1);*/ this->p[0] >>= shift; this->p[1] >>= shift; return *this; }
 #else
 	Bitboard& operator |= (const Bitboard& b1) { this->p[0] |= b1.p[0]; this->p[1] |= b1.p[1]; return *this; }
 	Bitboard& operator &= (const Bitboard& b1) { this->p[0] &= b1.p[0]; this->p[1] &= b1.p[1]; return *this; }
@@ -166,6 +188,8 @@ inline Bitboard::Bitboard(u64 p0, u64 p1) :
 #if defined(USE_SSE2)
 	// この命令、引数の順に注意。
 	m( _mm_set_epi64x(p1,p0))
+#elif defined (USE_NEON) // TODO
+	p { p0 , p1 }
 #else
 	p { p0 , p1 }
 #endif
@@ -176,6 +200,8 @@ inline void Bitboard::set(u64 p0, u64 p1)
 {
 #if defined(USE_SSE2)
 	m = _mm_set_epi64x(p1,p0);
+#elif defined (USE_NEON) // TODO
+	p[0] = p0; p[1] = p1;
 #else
 	p[0] = p0; p[1] = p1;
 #endif
diff --git a/source/eval/evalsum.h b/source/eval/evalsum.h
index 58c82b9..8afcfcb 100644
--- a/source/eval/evalsum.h
+++ b/source/eval/evalsum.h
@@ -91,6 +91,16 @@ namespace Eval {
 		  _mm_store_si128(&m[1], rhs.m[1]);
 		  return *this;
 		}
+#elif defined(USE_NEON)
+		EvalSum(const EvalSum& es) {
+		  vst1q_s16((short*)&m[0], es.m[0]);
+		  vst1q_s16((short*)&m[1], es.m[1]);
+		}
+		EvalSum& operator = (const EvalSum& rhs) {
+		  vst1q_s16((short*)&m[0], rhs.m[0]);
+		  vst1q_s16((short*)&m[1], rhs.m[1]);
+		  return *this;
+		}
 #endif

 		EvalSum() {}
@@ -119,6 +129,9 @@ namespace Eval {
 #elif defined(USE_SSE2)
 			m[0] = _mm_add_epi32(m[0], rhs.m[0]);
 			m[1] = _mm_add_epi32(m[1], rhs.m[1]);
+#elif defined(USE_NEON)
+			m[0] = vaddq_s16(m[0], rhs.m[0]);
+			m[1] = vaddq_s16(m[1], rhs.m[1]);
 #else
 			p[0][0] += rhs.p[0][0];
 			p[0][1] += rhs.p[0][1];
@@ -136,6 +149,9 @@ namespace Eval {
 #elif defined(USE_SSE2)
 			m[0] = _mm_sub_epi32(m[0], rhs.m[0]);
 			m[1] = _mm_sub_epi32(m[1], rhs.m[1]);
+#elif defined(USE_NEON)
+			m[0] = vsubq_s16(m[0], rhs.m[0]);
+			m[1] = vsubq_s16(m[1], rhs.m[1]);
 #else
 			p[0][0] -= rhs.p[0][0];
 			p[0][1] -= rhs.p[0][1];
@@ -179,6 +195,8 @@ namespace Eval {
 			__m128i m[2];
 #elif defined(USE_SSE2)
 	      __m128i m[2];
+#elif defined(USE_NEON)
+	      int16x8_t m[2];
 #endif
 			};
 		};
diff --git a/source/eval/evaluate_io.cpp b/source/eval/evaluate_io.cpp
index 4993d5d..b083347 100644
--- a/source/eval/evaluate_io.cpp
+++ b/source/eval/evaluate_io.cpp
@@ -79,7 +79,7 @@ namespace EvalIO
 						return out_.file_or_memory.ptr;
 					}) != 0)
 					{
-#if defined(EVAL_LEARN)
+#if ! defined(FOR_TOURNAMENT)
 						if (Options["SkipLoadingEval"])
 						{
 							std::cout << "info string read file error , file = " << in_.file_or_memory.filename << " , but SkipLoadingEval == true , so ignore this." << std::endl;
diff --git a/source/eval/kpp_kkpt/evaluate_kpp_kkpt.cpp b/source/eval/kpp_kkpt/evaluate_kpp_kkpt.cpp
index 5db5650..edb7b54 100644
--- a/source/eval/kpp_kkpt/evaluate_kpp_kkpt.cpp
+++ b/source/eval/kpp_kkpt/evaluate_kpp_kkpt.cpp
@@ -285,6 +285,8 @@ namespace Eval
 #if defined(USE_SSE2)
 		// sum.p[0](BKPP)とsum.p[1](WKPP)をゼロクリア
 		sum.m[0] = _mm_setzero_si128();
+#elif defined (USE_NEON)
+		sum.m[0] = vmovq_n_s16(0);
 #else
 		sum.p[0][0] = sum.p[0][1] = sum.p[1][0] = sum.p[1][1] = 0;
 #endif
@@ -375,6 +377,8 @@ namespace Eval
 		// sum.p[0](BKPP)とsum.p[1](WKPP)をゼロクリア
 #if defined(USE_SSE2)
 		sum.m[0] = _mm_setzero_si128();
+#elif defined (USE_NEON)
+		sum.m[0] = vmovq_n_s16(0);
 #else
 		sum.p[0] = { 0, 0 };
 		sum.p[1] = { 0, 0 };
@@ -1027,6 +1031,8 @@ namespace Eval
 #if defined(USE_SSE2)
 		// sum.p[0](BKPP)とsum.p[1](WKPP)をゼロクリア
 		sum.m[0] = _mm_setzero_si128();
+#elif defined (USE_NEON)
+		sum.m[0] = vmovq_n_s16(0);
 #else
 		sum.p[0][0] = sum.p[0][1] = sum.p[1][0] = sum.p[1][1] = 0;
 #endif
diff --git a/source/eval/kppt/evaluate_kppt.cpp b/source/eval/kppt/evaluate_kppt.cpp
index 9e5bf73..679e19b 100644
--- a/source/eval/kppt/evaluate_kppt.cpp
+++ b/source/eval/kppt/evaluate_kppt.cpp
@@ -345,6 +345,8 @@ namespace Eval
 #if defined(USE_SSE2)
 		// sum.p[0](BKPP)とsum.p[1](WKPP)をゼロクリア
 		sum.m[0] = _mm_setzero_si128();
+#elif defined (USE_NEON)
+		sum.m[0] = vmovq_n_s16(0);
 #else
 		sum.p[0][0] = sum.p[0][1] = sum.p[1][0] = sum.p[1][1] = 0;
 #endif
@@ -446,6 +448,8 @@ namespace Eval
 		// sum.p[0](BKPP)とsum.p[1](WKPP)をゼロクリア
 #if defined(USE_SSE2)
 		sum.m[0] = _mm_setzero_si128();
+#elif defined (USE_NEON)
+		sum.m[0] = vmovq_n_s16(0);
 #else
 		sum.p[0] = { 0, 0 };
 		sum.p[1] = { 0, 0 };
@@ -1091,6 +1095,8 @@ namespace Eval
 #if defined(USE_SSE2)
 		// sum.p[0](BKPP)とsum.p[1](WKPP)をゼロクリア
 		sum.m[0] = _mm_setzero_si128();
+#elif defined (USE_NEON)
+		sum.m[0] = vmovq_n_s16(0);
 #else
 		sum.p[0][0] = sum.p[0][1] = sum.p[1][0] = sum.p[1][1] = 0;
 #endif
diff --git a/source/extra/bitop.h b/source/extra/bitop.h
index 83231c2..4e04181 100644
--- a/source/extra/bitop.h
+++ b/source/extra/bitop.h
@@ -24,6 +24,38 @@
 #include <smmintrin.h>
 #elif defined (USE_SSE2)
 #include <emmintrin.h>
+#elif defined (USE_NEON)
+#include <arm_neon.h>
+// https://raw.githubusercontent.com/otim/SSE-to-NEON/master/sse_to_neon.hpp
+#include <stdlib.h>
+
+/* We can't depend on <stdlib.h> since the prototype of posix_memalign
+ may not be visible.  */
+#ifndef __cplusplus
+extern int posix_memalign (void **, size_t, size_t);
+#else
+extern "C" int posix_memalign (void **, size_t, size_t) throw ();
+#endif
+
+static __inline void *
+_mm_malloc (size_t size, size_t alignment)
+{
+    void *ptr;
+    if (alignment == 1)
+        return malloc (size);
+    if (alignment == 2 || (sizeof (void *) == 8 && alignment == 4))
+        alignment = sizeof (void *);
+    if (posix_memalign (&ptr, alignment, size) == 0)
+        return ptr;
+    else
+        return NULL;
+}
+
+static __inline void
+_mm_free (void * ptr)
+{
+    free (ptr);
+}
 #else
 #if defined (__GNUC__)
 #include <mm_malloc.h> // for _mm_alloc()
@@ -158,7 +190,16 @@ FORCE_INLINE int MSB32(uint32_t v) { ASSERT_LV3(v != 0); unsigned long index; _B
 FORCE_INLINE int MSB64(uint64_t v) { ASSERT_LV3(v != 0); return uint32_t(v >> 32) ? 32 + MSB32(uint32_t(v >> 32)) : MSB32(uint32_t(v)); }
 #endif

-#elif defined(__GNUC__) && ( defined(__i386__) || defined(__x86_64__) )
+// use built-in functions.
+#elif defined(__GNUC__)
+
+# ifndef __has_builtin
+#  error built-in functions required. (__has_builtin)
+# endif
+
+# if ! __has_builtin(__builtin_clzll)
+#  error built-in functions required. (__builtin_clzll)
+# endif

 FORCE_INLINE int LSB32(const u32 v) { ASSERT_LV3(v != 0); return __builtin_ctzll(v); }
 FORCE_INLINE int LSB64(const u64 v) { ASSERT_LV3(v != 0); return __builtin_ctzll(v); }
diff --git a/source/extra/config.h b/source/extra/config.h
index 3672f49..a147c3b 100644
--- a/source/extra/config.h
+++ b/source/extra/config.h
@@ -211,8 +211,8 @@

 #ifdef YANEURAOU_2017_EARLY_ENGINE
 #define ENGINE_NAME "YaneuraOu 2017 Early"
-#define EVAL_KPPT
-//#define EVAL_KPP_KKPT
+//#define EVAL_KPPT
+#define EVAL_KPP_KKPT
 //#define EVAL_KPPP_KKPT 18
 //#define EVAL_KPPP_KKPT 36
 //#define EVAL_NABLA
@@ -524,6 +524,8 @@ const bool Is64Bit = false;
 #define TARGET_CPU "SSE4.1"
 #elif defined(USE_SSE2)
 #define TARGET_CPU "SSE2"
+#elif defined (USE_NEON)
+#define TARGET_CPU "NEON"
 #else
 #define TARGET_CPU "noSSE"
 #endif
diff --git a/source/usi.cpp b/source/usi.cpp
index 47526b2..74deaa2 100644
--- a/source/usi.cpp
+++ b/source/usi.cpp
@@ -389,7 +389,7 @@ namespace USI
 		o["EngineNuma"] << Option(-1, -1, 99999);
 #endif

-#if defined(EVAL_LEARN)
+#if ! defined(FOR_TOURNAMENT)
 		// isreadyタイミングで評価関数を読み込まれると、新しい評価関数の変換のために
 		// test evalconvertコマンドを叩きたいのに、その新しい評価関数がないがために
 		// このコマンドの実行前に異常終了してしまう。
	diff --git a/source/Makefile b/source/Makefile
	index 222d35e..692a469 100644
	--- a/source/Makefile
	+++ b/source/Makefile
	@@ -155,6 +155,10 @@ tournament:
	$(MAKE) CFLAGS='$(CFLAGS) -DNDEBUG -DUSE_MAKEFILE -D$(YANEURAOU_EDITION) -DUSE_AVX2 -mbmi2 -mavx2 -DFOR_TOURNAMENT -march=corei7-avx' LDFLAGS='$(LDFLAGS) $(LTOFLAGS)' $(TARGET)
	tournament-sse42:
	$(MAKE) CFLAGS='$(CFLAGS) -DNDEBUG -DUSE_MAKEFILE -D$(YANEURAOU_EDITION) -DUSE_SSE42 -msse4.2 -DFOR_TOURNAMENT -march=corei7' LDFLAGS='$(LDFLAGS) $(LTOFLAGS)' $(TARGET)
	+tournament-simple-neon:
	+ $(MAKE) CFLAGS='$(CFLAGS) -DNDEBUG -DUSE_MAKEFILE -D$(YANEURAOU_EDITION) -DNO_SSE -mfloat-abi=hard -mfpu=neon -DFOR_TOURNAMENT -march=armv7-a -mtune=cortex-a8 -ffast-math -mabi=aapcs-linux -fforce-addr -fomit-frame-pointer' LDFLAGS='$(LDFLAGS) $(LTOFLAGS)' $(TARGET)
	+tournament-vain-neon:
	+ $(MAKE) CFLAGS='$(CFLAGS) -DNDEBUG -DUSE_MAKEFILE -D$(YANEURAOU_EDITION) -DUSE_NEON -mfloat-abi=hard -mfpu=neon -DFOR_TOURNAMENT -march=armv7-a -mtune=cortex-a8 -ffast-math -mabi=aapcs-linux -fforce-addr -fomit-frame-pointer' LDFLAGS='$(LDFLAGS) $(LTOFLAGS)' $(TARGET)

	avx2:
	$(MAKE) CFLAGS='$(CFLAGS) -DNDEBUG -DUSE_MAKEFILE -D$(YANEURAOU_EDITION) -DUSE_AVX2 -mbmi2 -mavx2 -march=corei7-avx' LDFLAGS='$(LDFLAGS) $(LTOFLAGS)' $(TARGET)
	@@ -173,6 +177,12 @@ sse2:
	nosse:
	$(MAKE) CFLAGS='$(CFLAGS) -DNDEBUG -DUSE_MAKEFILE -D$(YANEURAOU_EDITION) -DNO_SSE -m32 -march=pentium3' LDFLAGS='$(LDFLAGS) $(LTOFLAGS)' $(TARGET)

	+simple-neon:
	+ $(MAKE) CFLAGS='$(CFLAGS) -DNDEBUG -DUSE_MAKEFILE -D$(YANEURAOU_EDITION) -DNO_SSE -mfloat-abi=hard -mfpu=neon -march=armv7-a -mtune=cortex-a8 -ffast-math -mabi=aapcs-linux -fforce-addr -fomit-frame-pointer' LDFLAGS='$(LDFLAGS) $(LTOFLAGS)' $(TARGET)
	+
	+vain-neon:
	+ $(MAKE) CFLAGS='$(CFLAGS) -DNDEBUG -DUSE_MAKEFILE -D$(YANEURAOU_EDITION) -DUSE_NEON -mfloat-abi=hard -mfpu=neon -march=armv7-a -mtune=cortex-a8 -ffast-math -mabi=aapcs-linux -fforce-addr -fomit-frame-pointer' LDFLAGS='$(LDFLAGS) $(LTOFLAGS)' $(TARGET)
	+
	# ARMなどのCPU
	other:
	$(MAKE) CFLAGS='$(CFLAGS) -DNDEBUG -DUSE_MAKEFILE -D$(YANEURAOU_EDITION) -DNO_SSE' LDFLAGS='$(LDFLAGS) $(LTOFLAGS)' $(TARGET)
	diff --git a/source/bitboard.h b/source/bitboard.h
	index a9af50a..dc79d6d 100644
	--- a/source/bitboard.h
	+++ b/source/bitboard.h
	@@ -33,6 +33,12 @@ struct alignas(16) Bitboard
	// Aperyを始めとするmagic bitboard派によって考案された。
	__m128i m;
	};
	+#elif defined (USE_NEON)
	+ union
	+ {
	+ u64 p[2];
	+ int16x8_t m;
	+ };
	#else // no SSE
	u64 p[2];
	#endif
	@@ -43,6 +49,9 @@ struct alignas(16) Bitboard
	Bitboard& operator = (const Bitboard& rhs) { _mm_store_si128(&this->m, rhs.m); return *this; }

	Bitboard(const Bitboard& bb) { _mm_store_si128(&this->m, bb.m); }
	+#elif defined (USE_NEON)
	+ Bitboard& operator = (const Bitboard& rhs) { vst1q_s16((short)&this->m, rhs.m); return this; }
	+ Bitboard(const Bitboard& bb) { vst1q_s16((short*)&this->m, bb.m); }
	#endif

	// --- ctor
	@@ -125,6 +134,19 @@ struct alignas(16) Bitboard
	// 右シフト(縦型Bitboardでは右1回シフトで1段上の升に移動する)
	Bitboard& operator >>= (int shift) { /ASSERT_LV3(shift == 1);/ m = _mm_srli_epi64(m, shift); return *this; }

	+#elif defined (USE_NEON)
	+ Bitboard& operator \|= (const Bitboard& b1) { this->m = vorrq_u16( m, b1.m); return *this; }
	+ Bitboard& operator &= (const Bitboard& b1) { this->m = vandq_u16(m, b1.m); return *this; }
	+ Bitboard& operator ^= (const Bitboard& b1) { this->m = veorq_u16(m, b1.m); return *this; }
	+ Bitboard& operator += (const Bitboard& b1) { this->m = vaddq_s16(m, b1.m); return *this; }
	+ Bitboard& operator -= (const Bitboard& b1) { this->m = vsubq_s16(m, b1.m); return *this; }
	+
	+ // TODO: shift はなんかあやしいので使わない。
	+ // つか、 argument to '__builtin_neon_vshlq_n_v' must be a constant integer
	+ //Bitboard& operator <<= (int shift) { ASSERT_LV3(shift == 1); m = vshlq_n_s16(m, 1); return *this; }
	+ //Bitboard& operator >>= (int shift) { ASSERT_LV3(shift == 1); m = vshrq_n_s16(m, 1); return *this; }
	+ Bitboard& operator <<= (int shift) { /ASSERT_LV3(shift == 1);/ this->p[0] <<= shift; this->p[1] <<= shift; return *this; }
	+ Bitboard& operator >>= (int shift) { /ASSERT_LV3(shift == 1);/ this->p[0] >>= shift; this->p[1] >>= shift; return *this; }
	#else
	Bitboard& operator \|= (const Bitboard& b1) { this->p[0] \|= b1.p[0]; this->p[1] \|= b1.p[1]; return *this; }
	Bitboard& operator &= (const Bitboard& b1) { this->p[0] &= b1.p[0]; this->p[1] &= b1.p[1]; return *this; }
	@@ -166,6 +188,8 @@ inline Bitboard::Bitboard(u64 p0, u64 p1) :
	#if defined(USE_SSE2)
	// この命令、引数の順に注意。
	m( _mm_set_epi64x(p1,p0))
	+#elif defined (USE_NEON) // TODO
	+ p { p0 , p1 }
	#else
	p { p0 , p1 }
	#endif
	@@ -176,6 +200,8 @@ inline void Bitboard::set(u64 p0, u64 p1)
	{
	#if defined(USE_SSE2)
	m = _mm_set_epi64x(p1,p0);
	+#elif defined (USE_NEON) // TODO
	+ p[0] = p0; p[1] = p1;
	#else
	p[0] = p0; p[1] = p1;
	#endif
	diff --git a/source/eval/evalsum.h b/source/eval/evalsum.h
	index 58c82b9..8afcfcb 100644
	--- a/source/eval/evalsum.h
	+++ b/source/eval/evalsum.h
	@@ -91,6 +91,16 @@ namespace Eval {
	_mm_store_si128(&m[1], rhs.m[1]);
	return *this;
	}
	+#elif defined(USE_NEON)
	+ EvalSum(const EvalSum& es) {
	+ vst1q_s16((short*)&m[0], es.m[0]);
	+ vst1q_s16((short*)&m[1], es.m[1]);
	+ }
	+ EvalSum& operator = (const EvalSum& rhs) {
	+ vst1q_s16((short*)&m[0], rhs.m[0]);
	+ vst1q_s16((short*)&m[1], rhs.m[1]);
	+ return *this;
	+ }
	#endif

	EvalSum() {}
	@@ -119,6 +129,9 @@ namespace Eval {
	#elif defined(USE_SSE2)
	m[0] = _mm_add_epi32(m[0], rhs.m[0]);
	m[1] = _mm_add_epi32(m[1], rhs.m[1]);
	+#elif defined(USE_NEON)
	+ m[0] = vaddq_s16(m[0], rhs.m[0]);
	+ m[1] = vaddq_s16(m[1], rhs.m[1]);
	#else
	p[0][0] += rhs.p[0][0];
	p[0][1] += rhs.p[0][1];
	@@ -136,6 +149,9 @@ namespace Eval {
	#elif defined(USE_SSE2)
	m[0] = _mm_sub_epi32(m[0], rhs.m[0]);
	m[1] = _mm_sub_epi32(m[1], rhs.m[1]);
	+#elif defined(USE_NEON)
	+ m[0] = vsubq_s16(m[0], rhs.m[0]);
	+ m[1] = vsubq_s16(m[1], rhs.m[1]);
	#else
	p[0][0] -= rhs.p[0][0];
	p[0][1] -= rhs.p[0][1];
	@@ -179,6 +195,8 @@ namespace Eval {
	__m128i m[2];
	#elif defined(USE_SSE2)
	__m128i m[2];
	+#elif defined(USE_NEON)
	+ int16x8_t m[2];
	#endif
	};
	};
	diff --git a/source/eval/evaluate_io.cpp b/source/eval/evaluate_io.cpp
	index 4993d5d..b083347 100644
	--- a/source/eval/evaluate_io.cpp
	+++ b/source/eval/evaluate_io.cpp
	@@ -79,7 +79,7 @@ namespace EvalIO
	return out_.file_or_memory.ptr;
	}) != 0)
	{
	-#if defined(EVAL_LEARN)
	+#if ! defined(FOR_TOURNAMENT)
	if (Options["SkipLoadingEval"])
	{
	std::cout << "info string read file error , file = " << in_.file_or_memory.filename << " , but SkipLoadingEval == true , so ignore this." << std::endl;
	diff --git a/source/eval/kpp_kkpt/evaluate_kpp_kkpt.cpp b/source/eval/kpp_kkpt/evaluate_kpp_kkpt.cpp
	index 5db5650..edb7b54 100644
	--- a/source/eval/kpp_kkpt/evaluate_kpp_kkpt.cpp
	+++ b/source/eval/kpp_kkpt/evaluate_kpp_kkpt.cpp
	@@ -285,6 +285,8 @@ namespace Eval
	#if defined(USE_SSE2)
	// sum.p[0](BKPP)とsum.p[1](WKPP)をゼロクリア
	sum.m[0] = _mm_setzero_si128();
	+#elif defined (USE_NEON)
	+ sum.m[0] = vmovq_n_s16(0);
	#else
	sum.p[0][0] = sum.p[0][1] = sum.p[1][0] = sum.p[1][1] = 0;
	#endif
	@@ -375,6 +377,8 @@ namespace Eval
	// sum.p[0](BKPP)とsum.p[1](WKPP)をゼロクリア
	#if defined(USE_SSE2)
	sum.m[0] = _mm_setzero_si128();
	+#elif defined (USE_NEON)
	+ sum.m[0] = vmovq_n_s16(0);
	#else
	sum.p[0] = { 0, 0 };
	sum.p[1] = { 0, 0 };
	@@ -1027,6 +1031,8 @@ namespace Eval
	#if defined(USE_SSE2)
	// sum.p[0](BKPP)とsum.p[1](WKPP)をゼロクリア
	sum.m[0] = _mm_setzero_si128();
	+#elif defined (USE_NEON)
	+ sum.m[0] = vmovq_n_s16(0);
	#else
	sum.p[0][0] = sum.p[0][1] = sum.p[1][0] = sum.p[1][1] = 0;
	#endif
	diff --git a/source/eval/kppt/evaluate_kppt.cpp b/source/eval/kppt/evaluate_kppt.cpp
	index 9e5bf73..679e19b 100644
	--- a/source/eval/kppt/evaluate_kppt.cpp
	+++ b/source/eval/kppt/evaluate_kppt.cpp
	@@ -345,6 +345,8 @@ namespace Eval
	#if defined(USE_SSE2)
	// sum.p[0](BKPP)とsum.p[1](WKPP)をゼロクリア
	sum.m[0] = _mm_setzero_si128();
	+#elif defined (USE_NEON)
	+ sum.m[0] = vmovq_n_s16(0);
	#else
	sum.p[0][0] = sum.p[0][1] = sum.p[1][0] = sum.p[1][1] = 0;
	#endif
	@@ -446,6 +448,8 @@ namespace Eval
	// sum.p[0](BKPP)とsum.p[1](WKPP)をゼロクリア
	#if defined(USE_SSE2)
	sum.m[0] = _mm_setzero_si128();
	+#elif defined (USE_NEON)
	+ sum.m[0] = vmovq_n_s16(0);
	#else
	sum.p[0] = { 0, 0 };
	sum.p[1] = { 0, 0 };
	@@ -1091,6 +1095,8 @@ namespace Eval
	#if defined(USE_SSE2)
	// sum.p[0](BKPP)とsum.p[1](WKPP)をゼロクリア
	sum.m[0] = _mm_setzero_si128();
	+#elif defined (USE_NEON)
	+ sum.m[0] = vmovq_n_s16(0);
	#else
	sum.p[0][0] = sum.p[0][1] = sum.p[1][0] = sum.p[1][1] = 0;
	#endif
	diff --git a/source/extra/bitop.h b/source/extra/bitop.h
	index 83231c2..4e04181 100644
	--- a/source/extra/bitop.h
	+++ b/source/extra/bitop.h
	@@ -24,6 +24,38 @@
	#include <smmintrin.h>
	#elif defined (USE_SSE2)
	#include <emmintrin.h>
	+#elif defined (USE_NEON)
	+#include <arm_neon.h>
	+// https://raw.githubusercontent.com/otim/SSE-to-NEON/master/sse_to_neon.hpp
	+#include <stdlib.h>
	+
	+/* We can't depend on <stdlib.h> since the prototype of posix_memalign
	+ may not be visible. */
	+#ifndef __cplusplus
	+extern int posix_memalign (void **, size_t, size_t);
	+#else
	+extern "C" int posix_memalign (void **, size_t, size_t) throw ();
	+#endif
	+
	+static __inline void *
	+_mm_malloc (size_t size, size_t alignment)
	+{
	+ void *ptr;
	+ if (alignment == 1)
	+ return malloc (size);
	+ if (alignment == 2 \|\| (sizeof (void *) == 8 && alignment == 4))
	+ alignment = sizeof (void *);
	+ if (posix_memalign (&ptr, alignment, size) == 0)
	+ return ptr;
	+ else
	+ return NULL;
	+}
	+
	+static __inline void
	+_mm_free (void * ptr)
	+{
	+ free (ptr);
	+}
	#else
	#if defined (__GNUC__)
	#include <mm_malloc.h> // for _mm_alloc()
	@@ -158,7 +190,16 @@ FORCE_INLINE int MSB32(uint32_t v) { ASSERT_LV3(v != 0); unsigned long index; _B
	FORCE_INLINE int MSB64(uint64_t v) { ASSERT_LV3(v != 0); return uint32_t(v >> 32) ? 32 + MSB32(uint32_t(v >> 32)) : MSB32(uint32_t(v)); }
	#endif

	-#elif defined(__GNUC__) && ( defined(__i386__) \|\| defined(__x86_64__) )
	+// use built-in functions.
	+#elif defined(__GNUC__)
	+
	+# ifndef __has_builtin
	+# error built-in functions required. (__has_builtin)
	+# endif
	+
	+# if ! __has_builtin(__builtin_clzll)
	+# error built-in functions required. (__builtin_clzll)
	+# endif

	FORCE_INLINE int LSB32(const u32 v) { ASSERT_LV3(v != 0); return __builtin_ctzll(v); }
	FORCE_INLINE int LSB64(const u64 v) { ASSERT_LV3(v != 0); return __builtin_ctzll(v); }
	diff --git a/source/extra/config.h b/source/extra/config.h
	index 3672f49..a147c3b 100644
	--- a/source/extra/config.h
	+++ b/source/extra/config.h
	@@ -211,8 +211,8 @@

	#ifdef YANEURAOU_2017_EARLY_ENGINE
	#define ENGINE_NAME "YaneuraOu 2017 Early"
	-#define EVAL_KPPT
	-//#define EVAL_KPP_KKPT
	+//#define EVAL_KPPT
	+#define EVAL_KPP_KKPT
	//#define EVAL_KPPP_KKPT 18
	//#define EVAL_KPPP_KKPT 36
	//#define EVAL_NABLA
	@@ -524,6 +524,8 @@ const bool Is64Bit = false;
	#define TARGET_CPU "SSE4.1"
	#elif defined(USE_SSE2)
	#define TARGET_CPU "SSE2"
	+#elif defined (USE_NEON)
	+#define TARGET_CPU "NEON"
	#else
	#define TARGET_CPU "noSSE"
	#endif
	diff --git a/source/usi.cpp b/source/usi.cpp
	index 47526b2..74deaa2 100644
	--- a/source/usi.cpp
	+++ b/source/usi.cpp
	@@ -389,7 +389,7 @@ namespace USI
	o["EngineNuma"] << Option(-1, -1, 99999);
	#endif

	-#if defined(EVAL_LEARN)
	+#if ! defined(FOR_TOURNAMENT)
	// isreadyタイミングで評価関数を読み込まれると、新しい評価関数の変換のために
	// test evalconvertコマンドを叩きたいのに、その新しい評価関数がないがために
	// このコマンドの実行前に異常終了してしまう。