Last active
July 5, 2022 01:19
-
-
Save sugyan/03c1e73a253b4591dc9f29f9609ad93e to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#![feature(test)] | |
extern crate test; | |
const NUM_ITER: usize = 10; | |
#[allow(dead_code)] | |
#[inline(always)] | |
fn pop_lsb(n: &mut u64) -> u32 { | |
let ret = n.trailing_zeros(); | |
*n = *n & n.wrapping_sub(1); | |
ret | |
} | |
#[allow(dead_code)] | |
#[inline(always)] | |
fn pop_lsbi(n: &mut i64) -> u32 { | |
let ret = n.trailing_zeros(); | |
*n = *n & n.wrapping_sub(1); | |
ret | |
} | |
mod nosimd_u128_pop { | |
#[derive(Clone, Copy)] | |
pub struct Bitboard(pub [u64; 2]); | |
impl Bitboard { | |
pub fn pop(&mut self) -> Option<u32> { | |
let repr = self.to_u128(); | |
if repr == 0 { | |
return None; | |
} | |
let pos = repr.trailing_zeros(); | |
let square = if pos < 63 { pos + 1 } else { pos }; | |
let newrepr = repr & repr.wrapping_sub(1); | |
*self = Self::from_u128(newrepr); | |
Some(square) | |
} | |
pub fn to_u128(self) -> u128 { | |
(self.0[1] as u128) << 64 | self.0[0] as u128 | |
} | |
pub fn from_u128(a: u128) -> Self { | |
let v0 = a as u64; | |
let v1 = (a >> 64) as u64; | |
Self([v0, v1]) | |
} | |
} | |
impl Iterator for Bitboard { | |
type Item = u32; | |
fn next(&mut self) -> Option<Self::Item> { | |
self.pop() | |
} | |
} | |
} | |
mod nosimd_u64_pop { | |
use super::pop_lsb; | |
#[derive(Clone, Copy)] | |
pub struct Bitboard(pub [u64; 2]); | |
impl Bitboard { | |
pub fn pop(&mut self) -> Option<u32> { | |
if self.0[0] != 0 { | |
return Some(pop_lsb(&mut self.0[0]) + 1); | |
} | |
if self.0[1] != 0 { | |
return Some(pop_lsb(&mut self.0[1]) + 64); | |
} | |
None | |
} | |
} | |
impl Iterator for Bitboard { | |
type Item = u32; | |
fn next(&mut self) -> Option<Self::Item> { | |
self.pop() | |
} | |
} | |
} | |
mod nosimd_u64_iterator { | |
use super::pop_lsb; | |
#[derive(Clone, Copy)] | |
pub struct Bitboard(pub [u64; 2]); | |
pub struct BitboardIterator([u64; 2]); | |
impl Iterator for BitboardIterator { | |
type Item = u32; | |
#[inline(always)] | |
fn next(&mut self) -> Option<Self::Item> { | |
if self.0[0] != 0 { | |
return Some(pop_lsb(&mut self.0[0]) + 1); | |
} | |
if self.0[1] != 0 { | |
return Some(pop_lsb(&mut self.0[1]) + 64); | |
} | |
None | |
} | |
} | |
impl IntoIterator for Bitboard { | |
type Item = u32; | |
type IntoIter = BitboardIterator; | |
fn into_iter(self) -> Self::IntoIter { | |
BitboardIterator(self.0) | |
} | |
} | |
} | |
#[cfg(target_arch = "aarch64")] | |
mod aarch64_pop_1 { | |
use super::pop_lsb; | |
use std::arch::aarch64; | |
#[derive(Clone, Copy)] | |
pub struct Bitboard(pub aarch64::uint64x2_t); | |
impl Bitboard { | |
pub fn pop(&mut self) -> Option<u32> { | |
let mut m0 = unsafe { aarch64::vgetq_lane_u64::<0>(self.0) }; | |
if m0 != 0 { | |
let sq = Some(pop_lsb(&mut m0) + 1); | |
self.0 = unsafe { aarch64::vsetq_lane_u64::<0>(m0, self.0) }; | |
sq | |
} else { | |
let mut m1 = unsafe { aarch64::vgetq_lane_u64::<1>(self.0) }; | |
if m1 != 0 { | |
let sq = Some(pop_lsb(&mut m1) + 64); | |
self.0 = unsafe { aarch64::vsetq_lane_u64::<1>(m1, self.0) }; | |
sq | |
} else { | |
None | |
} | |
} | |
} | |
} | |
impl Iterator for Bitboard { | |
type Item = u32; | |
fn next(&mut self) -> Option<Self::Item> { | |
self.pop() | |
} | |
} | |
} | |
#[cfg(target_arch = "aarch64")] | |
mod aarch64_pop_2 { | |
use super::pop_lsb; | |
use std::arch::aarch64; | |
#[derive(Clone, Copy)] | |
pub struct Bitboard(pub aarch64::uint64x2_t); | |
impl Bitboard { | |
pub fn pop(&mut self) -> Option<u32> { | |
let mut m = unsafe { | |
let u = std::mem::MaybeUninit::<[u64; 2]>::uninit(); | |
aarch64::vst1q_u64(u.as_ptr() as *mut _, self.0); | |
u.assume_init() | |
}; | |
if m[0] != 0 { | |
let sq = Some(pop_lsb(&mut m[0]) + 1); | |
self.0 = unsafe { aarch64::vld1q_u64(m.as_ptr()) }; | |
sq | |
} else if m[1] != 0 { | |
let sq = Some(pop_lsb(&mut m[1]) + 64); | |
self.0 = unsafe { aarch64::vld1q_u64(m.as_ptr()) }; | |
sq | |
} else { | |
None | |
} | |
} | |
} | |
impl Iterator for Bitboard { | |
type Item = u32; | |
fn next(&mut self) -> Option<Self::Item> { | |
self.pop() | |
} | |
} | |
} | |
#[cfg(target_arch = "aarch64")] | |
mod aarch64_iterator { | |
use super::pop_lsb; | |
use std::arch::aarch64; | |
#[derive(Clone, Copy)] | |
pub struct Bitboard(pub aarch64::uint64x2_t); | |
pub struct BitboardIterator([u64; 2]); | |
impl Iterator for BitboardIterator { | |
type Item = u32; | |
fn next(&mut self) -> Option<Self::Item> { | |
if self.0[0] != 0 { | |
return Some(pop_lsb(&mut self.0[0]) + 1); | |
} | |
if self.0[1] != 0 { | |
return Some(pop_lsb(&mut self.0[1]) + 64); | |
} | |
None | |
} | |
} | |
impl IntoIterator for Bitboard { | |
type Item = u32; | |
type IntoIter = BitboardIterator; | |
fn into_iter(self) -> Self::IntoIter { | |
unsafe { | |
let u = std::mem::MaybeUninit::<[u64; 2]>::uninit(); | |
aarch64::vst1q_u64(u.as_ptr() as *mut _, self.0); | |
BitboardIterator(u.assume_init()) | |
} | |
} | |
} | |
} | |
#[cfg(target_arch = "aarch64")] | |
mod aarch64_iterator_inline { | |
use super::pop_lsb; | |
use std::arch::aarch64; | |
#[derive(Clone, Copy)] | |
pub struct Bitboard(pub aarch64::uint64x2_t); | |
pub struct BitboardIterator([u64; 2]); | |
impl Iterator for BitboardIterator { | |
type Item = u32; | |
#[inline(always)] | |
fn next(&mut self) -> Option<Self::Item> { | |
if self.0[0] != 0 { | |
return Some(pop_lsb(&mut self.0[0]) + 1); | |
} | |
if self.0[1] != 0 { | |
return Some(pop_lsb(&mut self.0[1]) + 64); | |
} | |
None | |
} | |
} | |
impl IntoIterator for Bitboard { | |
type Item = u32; | |
type IntoIter = BitboardIterator; | |
fn into_iter(self) -> Self::IntoIter { | |
unsafe { | |
let u = std::mem::MaybeUninit::<[u64; 2]>::uninit(); | |
aarch64::vst1q_u64(u.as_ptr() as *mut _, self.0); | |
BitboardIterator(u.assume_init()) | |
} | |
} | |
} | |
} | |
#[cfg(target_arch = "x86_64")] | |
mod x86_64_pop_1 { | |
use super::pop_lsbi; | |
use std::arch::x86_64; | |
#[derive(Clone, Copy)] | |
pub struct Bitboard(pub x86_64::__m128i); | |
impl Bitboard { | |
pub fn pop(&mut self) -> Option<u32> { | |
let mut m0 = unsafe { x86_64::_mm_extract_epi64::<0>(self.0) }; | |
if m0 != 0 { | |
let sq = Some(pop_lsbi(&mut m0) + 1); | |
self.0 = unsafe { x86_64::_mm_insert_epi64::<0>(self.0, m0) }; | |
sq | |
} else { | |
let mut m1 = unsafe { x86_64::_mm_extract_epi64::<1>(self.0) }; | |
if m1 != 0 { | |
let sq = Some(pop_lsbi(&mut m1) + 64); | |
self.0 = unsafe { x86_64::_mm_insert_epi64::<1>(self.0, m1) }; | |
sq | |
} else { | |
None | |
} | |
} | |
} | |
} | |
impl Iterator for Bitboard { | |
type Item = u32; | |
fn next(&mut self) -> Option<Self::Item> { | |
self.pop() | |
} | |
} | |
} | |
#[cfg(target_arch = "x86_64")] | |
mod x86_64_pop_2 { | |
use super::pop_lsbi; | |
use std::arch::x86_64; | |
#[derive(Clone, Copy)] | |
pub struct Bitboard(pub x86_64::__m128i); | |
impl Bitboard { | |
pub fn pop(&mut self) -> Option<u32> { | |
let mut m = unsafe { | |
let u = std::mem::MaybeUninit::<[i64; 2]>::uninit(); | |
x86_64::_mm_store_si128(u.as_ptr() as *mut _, self.0); | |
u.assume_init() | |
}; | |
if m[0] != 0 { | |
let sq = Some(pop_lsbi(&mut m[0]) + 1); | |
self.0 = unsafe { x86_64::_mm_load_si128(m.as_ptr() as *const _) }; | |
sq | |
} else if m[1] != 0 { | |
let sq = Some(pop_lsbi(&mut m[1]) + 64); | |
self.0 = unsafe { x86_64::_mm_load_si128(m.as_ptr() as *const _) }; | |
sq | |
} else { | |
None | |
} | |
} | |
} | |
impl Iterator for Bitboard { | |
type Item = u32; | |
fn next(&mut self) -> Option<Self::Item> { | |
self.pop() | |
} | |
} | |
} | |
#[cfg(target_arch = "x86_64")] | |
mod x86_64_iterator { | |
use super::pop_lsbi; | |
use std::arch::x86_64; | |
#[derive(Clone, Copy)] | |
pub struct Bitboard(pub x86_64::__m128i); | |
pub struct BitboardIterator([i64; 2]); | |
impl Iterator for BitboardIterator { | |
type Item = u32; | |
fn next(&mut self) -> Option<Self::Item> { | |
if self.0[0] != 0 { | |
return Some(pop_lsbi(&mut self.0[0]) + 1); | |
} | |
if self.0[1] != 0 { | |
return Some(pop_lsbi(&mut self.0[1]) + 64); | |
} | |
None | |
} | |
} | |
impl IntoIterator for Bitboard { | |
type Item = u32; | |
type IntoIter = BitboardIterator; | |
fn into_iter(self) -> Self::IntoIter { | |
unsafe { | |
let u = std::mem::MaybeUninit::<[i64; 2]>::uninit(); | |
x86_64::_mm_store_si128(u.as_ptr() as *mut _, self.0); | |
BitboardIterator(u.assume_init()) | |
} | |
} | |
} | |
} | |
#[cfg(target_arch = "x86_64")] | |
mod x86_64_iterator_inline { | |
use super::pop_lsbi; | |
use std::arch::x86_64; | |
#[derive(Clone, Copy)] | |
pub struct Bitboard(pub x86_64::__m128i); | |
pub struct BitboardIterator([i64; 2]); | |
impl Iterator for BitboardIterator { | |
type Item = u32; | |
#[inline(always)] | |
fn next(&mut self) -> Option<Self::Item> { | |
if self.0[0] != 0 { | |
return Some(pop_lsbi(&mut self.0[0]) + 1); | |
} | |
if self.0[1] != 0 { | |
return Some(pop_lsbi(&mut self.0[1]) + 64); | |
} | |
None | |
} | |
} | |
impl IntoIterator for Bitboard { | |
type Item = u32; | |
type IntoIter = BitboardIterator; | |
fn into_iter(self) -> Self::IntoIter { | |
unsafe { | |
let u = std::mem::MaybeUninit::<[i64; 2]>::uninit(); | |
x86_64::_mm_store_si128(u.as_ptr() as *mut _, self.0); | |
BitboardIterator(u.assume_init()) | |
} | |
} | |
} | |
} | |
#[cfg(test)] | |
mod bitboard { | |
use super::*; | |
use test::Bencher; | |
#[bench] | |
fn bench_nosimd_u128_pop(b: &mut Bencher) { | |
let bb = nosimd_u128_pop::Bitboard([0x7fff_ffff_ffff_ffff, 0x0003_ffff]); | |
b.iter(|| { | |
for _ in 0..NUM_ITER { | |
let mut i = 0; | |
for pos in bb { | |
assert_eq!(pos, i + 1); | |
i += 1; | |
} | |
assert_eq!(81, i); | |
} | |
}); | |
} | |
#[bench] | |
fn bench_nosimd_u64_pop(b: &mut Bencher) { | |
let bb = nosimd_u64_pop::Bitboard([0x7fff_ffff_ffff_ffff, 0x0003_ffff]); | |
b.iter(|| { | |
for _ in 0..NUM_ITER { | |
let mut i = 0; | |
for pos in bb { | |
assert_eq!(pos, i + 1); | |
i += 1; | |
} | |
assert_eq!(81, i); | |
} | |
}); | |
} | |
#[bench] | |
fn bench_nosimd_u64_iterator(b: &mut Bencher) { | |
let bb = nosimd_u64_iterator::Bitboard([0x7fff_ffff_ffff_ffff, 0x0003_ffff]); | |
b.iter(|| { | |
for _ in 0..NUM_ITER { | |
let mut i = 0; | |
for pos in bb { | |
assert_eq!(pos, i + 1); | |
i += 1; | |
} | |
assert_eq!(81, i); | |
} | |
}); | |
} | |
#[cfg(target_arch = "aarch64")] | |
#[bench] | |
fn bench_aarch64_pop_1(b: &mut Bencher) { | |
let bb = aarch64_pop_1::Bitboard(unsafe { | |
std::arch::aarch64::vld1q_u64([0x7fff_ffff_ffff_ffff, 0x0003_ffff].as_ptr()) | |
}); | |
b.iter(|| { | |
for _ in 0..NUM_ITER { | |
let mut i = 0; | |
for pos in bb { | |
assert_eq!(pos, i + 1); | |
i += 1; | |
} | |
assert_eq!(81, i); | |
} | |
}); | |
} | |
#[cfg(target_arch = "aarch64")] | |
#[bench] | |
fn bench_aarch64_pop_2(b: &mut Bencher) { | |
let bb = aarch64_pop_2::Bitboard(unsafe { | |
std::arch::aarch64::vld1q_u64([0x7fff_ffff_ffff_ffff, 0x0003_ffff].as_ptr()) | |
}); | |
b.iter(|| { | |
for _ in 0..NUM_ITER { | |
let mut i = 0; | |
for pos in bb { | |
assert_eq!(pos, i + 1); | |
i += 1; | |
} | |
assert_eq!(81, i); | |
} | |
}); | |
} | |
#[cfg(target_arch = "aarch64")] | |
#[bench] | |
fn bench_aarch64_iterator(b: &mut Bencher) { | |
let bb = aarch64_iterator::Bitboard(unsafe { | |
std::arch::aarch64::vld1q_u64([0x7fff_ffff_ffff_ffff, 0x0003_ffff].as_ptr()) | |
}); | |
b.iter(|| { | |
for _ in 0..NUM_ITER { | |
let mut i = 0; | |
for pos in bb { | |
assert_eq!(pos, i + 1); | |
i += 1; | |
} | |
assert_eq!(81, i); | |
} | |
}); | |
} | |
#[cfg(target_arch = "aarch64")] | |
#[bench] | |
fn bench_aarch64_iterator_inline(b: &mut Bencher) { | |
let bb = aarch64_iterator_inline::Bitboard(unsafe { | |
std::arch::aarch64::vld1q_u64([0x7fff_ffff_ffff_ffff, 0x0003_ffff].as_ptr()) | |
}); | |
b.iter(|| { | |
for _ in 0..NUM_ITER { | |
let mut i = 0; | |
for pos in bb { | |
assert_eq!(pos, i + 1); | |
i += 1; | |
} | |
assert_eq!(81, i); | |
} | |
}); | |
} | |
#[cfg(target_arch = "x86_64")] | |
#[bench] | |
fn bench_x86_64_pop_1(b: &mut Bencher) { | |
let bb = x86_64_pop_1::Bitboard(unsafe { | |
std::arch::x86_64::_mm_set_epi64x(0x0003_ffff, 0x7fff_ffff_ffff_ffff) | |
}); | |
b.iter(|| { | |
for _ in 0..NUM_ITER { | |
let mut i = 0; | |
for pos in bb { | |
assert_eq!(pos, i + 1); | |
i += 1; | |
} | |
assert_eq!(81, i); | |
} | |
}); | |
} | |
#[cfg(target_arch = "x86_64")] | |
#[bench] | |
fn bench_x86_64_pop_2(b: &mut Bencher) { | |
let bb = x86_64_pop_2::Bitboard(unsafe { | |
std::arch::x86_64::_mm_set_epi64x(0x0003_ffff, 0x7fff_ffff_ffff_ffff) | |
}); | |
b.iter(|| { | |
for _ in 0..NUM_ITER { | |
let mut i = 0; | |
for pos in bb { | |
assert_eq!(pos, i + 1); | |
i += 1; | |
} | |
assert_eq!(81, i); | |
} | |
}); | |
} | |
#[cfg(target_arch = "x86_64")] | |
#[bench] | |
fn bench_x86_64_iterator(b: &mut Bencher) { | |
let bb = x86_64_iterator::Bitboard(unsafe { | |
std::arch::x86_64::_mm_set_epi64x(0x0003_ffff, 0x7fff_ffff_ffff_ffff) | |
}); | |
b.iter(|| { | |
for _ in 0..NUM_ITER { | |
let mut i = 0; | |
for pos in bb { | |
assert_eq!(pos, i + 1); | |
i += 1; | |
} | |
assert_eq!(81, i); | |
} | |
}); | |
} | |
#[cfg(target_arch = "x86_64")] | |
#[bench] | |
fn bench_x86_64_iterator_inline(b: &mut Bencher) { | |
let bb = x86_64_iterator_inline::Bitboard(unsafe { | |
std::arch::x86_64::_mm_set_epi64x(0x0003_ffff, 0x7fff_ffff_ffff_ffff) | |
}); | |
b.iter(|| { | |
for _ in 0..NUM_ITER { | |
let mut i = 0; | |
for pos in bb { | |
assert_eq!(pos, i + 1); | |
i += 1; | |
} | |
assert_eq!(81, i); | |
} | |
}); | |
} | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
benchmark with MacBook Pro (14-inch, 2021) Apple M1 Pro:
benchmark with MacBook Pro (13-inch, 2017, Four Thunderbolt 3 Ports) 3.3 GHz Dual-Core Intel Core i5: