Skip to content

Instantly share code, notes, and snippets.

@sugyan
Last active July 5, 2022 01:19
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save sugyan/03c1e73a253b4591dc9f29f9609ad93e to your computer and use it in GitHub Desktop.
Save sugyan/03c1e73a253b4591dc9f29f9609ad93e to your computer and use it in GitHub Desktop.
#![feature(test)]
extern crate test;
const NUM_ITER: usize = 10;
#[allow(dead_code)]
#[inline(always)]
fn pop_lsb(n: &mut u64) -> u32 {
let ret = n.trailing_zeros();
*n = *n & n.wrapping_sub(1);
ret
}
#[allow(dead_code)]
#[inline(always)]
fn pop_lsbi(n: &mut i64) -> u32 {
let ret = n.trailing_zeros();
*n = *n & n.wrapping_sub(1);
ret
}
mod nosimd_u128_pop {
#[derive(Clone, Copy)]
pub struct Bitboard(pub [u64; 2]);
impl Bitboard {
pub fn pop(&mut self) -> Option<u32> {
let repr = self.to_u128();
if repr == 0 {
return None;
}
let pos = repr.trailing_zeros();
let square = if pos < 63 { pos + 1 } else { pos };
let newrepr = repr & repr.wrapping_sub(1);
*self = Self::from_u128(newrepr);
Some(square)
}
pub fn to_u128(self) -> u128 {
(self.0[1] as u128) << 64 | self.0[0] as u128
}
pub fn from_u128(a: u128) -> Self {
let v0 = a as u64;
let v1 = (a >> 64) as u64;
Self([v0, v1])
}
}
impl Iterator for Bitboard {
type Item = u32;
fn next(&mut self) -> Option<Self::Item> {
self.pop()
}
}
}
mod nosimd_u64_pop {
use super::pop_lsb;
#[derive(Clone, Copy)]
pub struct Bitboard(pub [u64; 2]);
impl Bitboard {
pub fn pop(&mut self) -> Option<u32> {
if self.0[0] != 0 {
return Some(pop_lsb(&mut self.0[0]) + 1);
}
if self.0[1] != 0 {
return Some(pop_lsb(&mut self.0[1]) + 64);
}
None
}
}
impl Iterator for Bitboard {
type Item = u32;
fn next(&mut self) -> Option<Self::Item> {
self.pop()
}
}
}
mod nosimd_u64_iterator {
use super::pop_lsb;
#[derive(Clone, Copy)]
pub struct Bitboard(pub [u64; 2]);
pub struct BitboardIterator([u64; 2]);
impl Iterator for BitboardIterator {
type Item = u32;
#[inline(always)]
fn next(&mut self) -> Option<Self::Item> {
if self.0[0] != 0 {
return Some(pop_lsb(&mut self.0[0]) + 1);
}
if self.0[1] != 0 {
return Some(pop_lsb(&mut self.0[1]) + 64);
}
None
}
}
impl IntoIterator for Bitboard {
type Item = u32;
type IntoIter = BitboardIterator;
fn into_iter(self) -> Self::IntoIter {
BitboardIterator(self.0)
}
}
}
#[cfg(target_arch = "aarch64")]
mod aarch64_pop_1 {
use super::pop_lsb;
use std::arch::aarch64;
#[derive(Clone, Copy)]
pub struct Bitboard(pub aarch64::uint64x2_t);
impl Bitboard {
pub fn pop(&mut self) -> Option<u32> {
let mut m0 = unsafe { aarch64::vgetq_lane_u64::<0>(self.0) };
if m0 != 0 {
let sq = Some(pop_lsb(&mut m0) + 1);
self.0 = unsafe { aarch64::vsetq_lane_u64::<0>(m0, self.0) };
sq
} else {
let mut m1 = unsafe { aarch64::vgetq_lane_u64::<1>(self.0) };
if m1 != 0 {
let sq = Some(pop_lsb(&mut m1) + 64);
self.0 = unsafe { aarch64::vsetq_lane_u64::<1>(m1, self.0) };
sq
} else {
None
}
}
}
}
impl Iterator for Bitboard {
type Item = u32;
fn next(&mut self) -> Option<Self::Item> {
self.pop()
}
}
}
#[cfg(target_arch = "aarch64")]
mod aarch64_pop_2 {
use super::pop_lsb;
use std::arch::aarch64;
#[derive(Clone, Copy)]
pub struct Bitboard(pub aarch64::uint64x2_t);
impl Bitboard {
pub fn pop(&mut self) -> Option<u32> {
let mut m = unsafe {
let u = std::mem::MaybeUninit::<[u64; 2]>::uninit();
aarch64::vst1q_u64(u.as_ptr() as *mut _, self.0);
u.assume_init()
};
if m[0] != 0 {
let sq = Some(pop_lsb(&mut m[0]) + 1);
self.0 = unsafe { aarch64::vld1q_u64(m.as_ptr()) };
sq
} else if m[1] != 0 {
let sq = Some(pop_lsb(&mut m[1]) + 64);
self.0 = unsafe { aarch64::vld1q_u64(m.as_ptr()) };
sq
} else {
None
}
}
}
impl Iterator for Bitboard {
type Item = u32;
fn next(&mut self) -> Option<Self::Item> {
self.pop()
}
}
}
#[cfg(target_arch = "aarch64")]
mod aarch64_iterator {
use super::pop_lsb;
use std::arch::aarch64;
#[derive(Clone, Copy)]
pub struct Bitboard(pub aarch64::uint64x2_t);
pub struct BitboardIterator([u64; 2]);
impl Iterator for BitboardIterator {
type Item = u32;
fn next(&mut self) -> Option<Self::Item> {
if self.0[0] != 0 {
return Some(pop_lsb(&mut self.0[0]) + 1);
}
if self.0[1] != 0 {
return Some(pop_lsb(&mut self.0[1]) + 64);
}
None
}
}
impl IntoIterator for Bitboard {
type Item = u32;
type IntoIter = BitboardIterator;
fn into_iter(self) -> Self::IntoIter {
unsafe {
let u = std::mem::MaybeUninit::<[u64; 2]>::uninit();
aarch64::vst1q_u64(u.as_ptr() as *mut _, self.0);
BitboardIterator(u.assume_init())
}
}
}
}
#[cfg(target_arch = "aarch64")]
mod aarch64_iterator_inline {
use super::pop_lsb;
use std::arch::aarch64;
#[derive(Clone, Copy)]
pub struct Bitboard(pub aarch64::uint64x2_t);
pub struct BitboardIterator([u64; 2]);
impl Iterator for BitboardIterator {
type Item = u32;
#[inline(always)]
fn next(&mut self) -> Option<Self::Item> {
if self.0[0] != 0 {
return Some(pop_lsb(&mut self.0[0]) + 1);
}
if self.0[1] != 0 {
return Some(pop_lsb(&mut self.0[1]) + 64);
}
None
}
}
impl IntoIterator for Bitboard {
type Item = u32;
type IntoIter = BitboardIterator;
fn into_iter(self) -> Self::IntoIter {
unsafe {
let u = std::mem::MaybeUninit::<[u64; 2]>::uninit();
aarch64::vst1q_u64(u.as_ptr() as *mut _, self.0);
BitboardIterator(u.assume_init())
}
}
}
}
#[cfg(target_arch = "x86_64")]
mod x86_64_pop_1 {
use super::pop_lsbi;
use std::arch::x86_64;
#[derive(Clone, Copy)]
pub struct Bitboard(pub x86_64::__m128i);
impl Bitboard {
pub fn pop(&mut self) -> Option<u32> {
let mut m0 = unsafe { x86_64::_mm_extract_epi64::<0>(self.0) };
if m0 != 0 {
let sq = Some(pop_lsbi(&mut m0) + 1);
self.0 = unsafe { x86_64::_mm_insert_epi64::<0>(self.0, m0) };
sq
} else {
let mut m1 = unsafe { x86_64::_mm_extract_epi64::<1>(self.0) };
if m1 != 0 {
let sq = Some(pop_lsbi(&mut m1) + 64);
self.0 = unsafe { x86_64::_mm_insert_epi64::<1>(self.0, m1) };
sq
} else {
None
}
}
}
}
impl Iterator for Bitboard {
type Item = u32;
fn next(&mut self) -> Option<Self::Item> {
self.pop()
}
}
}
#[cfg(target_arch = "x86_64")]
mod x86_64_pop_2 {
use super::pop_lsbi;
use std::arch::x86_64;
#[derive(Clone, Copy)]
pub struct Bitboard(pub x86_64::__m128i);
impl Bitboard {
pub fn pop(&mut self) -> Option<u32> {
let mut m = unsafe {
let u = std::mem::MaybeUninit::<[i64; 2]>::uninit();
x86_64::_mm_store_si128(u.as_ptr() as *mut _, self.0);
u.assume_init()
};
if m[0] != 0 {
let sq = Some(pop_lsbi(&mut m[0]) + 1);
self.0 = unsafe { x86_64::_mm_load_si128(m.as_ptr() as *const _) };
sq
} else if m[1] != 0 {
let sq = Some(pop_lsbi(&mut m[1]) + 64);
self.0 = unsafe { x86_64::_mm_load_si128(m.as_ptr() as *const _) };
sq
} else {
None
}
}
}
impl Iterator for Bitboard {
type Item = u32;
fn next(&mut self) -> Option<Self::Item> {
self.pop()
}
}
}
#[cfg(target_arch = "x86_64")]
mod x86_64_iterator {
use super::pop_lsbi;
use std::arch::x86_64;
#[derive(Clone, Copy)]
pub struct Bitboard(pub x86_64::__m128i);
pub struct BitboardIterator([i64; 2]);
impl Iterator for BitboardIterator {
type Item = u32;
fn next(&mut self) -> Option<Self::Item> {
if self.0[0] != 0 {
return Some(pop_lsbi(&mut self.0[0]) + 1);
}
if self.0[1] != 0 {
return Some(pop_lsbi(&mut self.0[1]) + 64);
}
None
}
}
impl IntoIterator for Bitboard {
type Item = u32;
type IntoIter = BitboardIterator;
fn into_iter(self) -> Self::IntoIter {
unsafe {
let u = std::mem::MaybeUninit::<[i64; 2]>::uninit();
x86_64::_mm_store_si128(u.as_ptr() as *mut _, self.0);
BitboardIterator(u.assume_init())
}
}
}
}
#[cfg(target_arch = "x86_64")]
mod x86_64_iterator_inline {
use super::pop_lsbi;
use std::arch::x86_64;
#[derive(Clone, Copy)]
pub struct Bitboard(pub x86_64::__m128i);
pub struct BitboardIterator([i64; 2]);
impl Iterator for BitboardIterator {
type Item = u32;
#[inline(always)]
fn next(&mut self) -> Option<Self::Item> {
if self.0[0] != 0 {
return Some(pop_lsbi(&mut self.0[0]) + 1);
}
if self.0[1] != 0 {
return Some(pop_lsbi(&mut self.0[1]) + 64);
}
None
}
}
impl IntoIterator for Bitboard {
type Item = u32;
type IntoIter = BitboardIterator;
fn into_iter(self) -> Self::IntoIter {
unsafe {
let u = std::mem::MaybeUninit::<[i64; 2]>::uninit();
x86_64::_mm_store_si128(u.as_ptr() as *mut _, self.0);
BitboardIterator(u.assume_init())
}
}
}
}
#[cfg(test)]
mod bitboard {
use super::*;
use test::Bencher;
#[bench]
fn bench_nosimd_u128_pop(b: &mut Bencher) {
let bb = nosimd_u128_pop::Bitboard([0x7fff_ffff_ffff_ffff, 0x0003_ffff]);
b.iter(|| {
for _ in 0..NUM_ITER {
let mut i = 0;
for pos in bb {
assert_eq!(pos, i + 1);
i += 1;
}
assert_eq!(81, i);
}
});
}
#[bench]
fn bench_nosimd_u64_pop(b: &mut Bencher) {
let bb = nosimd_u64_pop::Bitboard([0x7fff_ffff_ffff_ffff, 0x0003_ffff]);
b.iter(|| {
for _ in 0..NUM_ITER {
let mut i = 0;
for pos in bb {
assert_eq!(pos, i + 1);
i += 1;
}
assert_eq!(81, i);
}
});
}
#[bench]
fn bench_nosimd_u64_iterator(b: &mut Bencher) {
let bb = nosimd_u64_iterator::Bitboard([0x7fff_ffff_ffff_ffff, 0x0003_ffff]);
b.iter(|| {
for _ in 0..NUM_ITER {
let mut i = 0;
for pos in bb {
assert_eq!(pos, i + 1);
i += 1;
}
assert_eq!(81, i);
}
});
}
#[cfg(target_arch = "aarch64")]
#[bench]
fn bench_aarch64_pop_1(b: &mut Bencher) {
let bb = aarch64_pop_1::Bitboard(unsafe {
std::arch::aarch64::vld1q_u64([0x7fff_ffff_ffff_ffff, 0x0003_ffff].as_ptr())
});
b.iter(|| {
for _ in 0..NUM_ITER {
let mut i = 0;
for pos in bb {
assert_eq!(pos, i + 1);
i += 1;
}
assert_eq!(81, i);
}
});
}
#[cfg(target_arch = "aarch64")]
#[bench]
fn bench_aarch64_pop_2(b: &mut Bencher) {
let bb = aarch64_pop_2::Bitboard(unsafe {
std::arch::aarch64::vld1q_u64([0x7fff_ffff_ffff_ffff, 0x0003_ffff].as_ptr())
});
b.iter(|| {
for _ in 0..NUM_ITER {
let mut i = 0;
for pos in bb {
assert_eq!(pos, i + 1);
i += 1;
}
assert_eq!(81, i);
}
});
}
#[cfg(target_arch = "aarch64")]
#[bench]
fn bench_aarch64_iterator(b: &mut Bencher) {
let bb = aarch64_iterator::Bitboard(unsafe {
std::arch::aarch64::vld1q_u64([0x7fff_ffff_ffff_ffff, 0x0003_ffff].as_ptr())
});
b.iter(|| {
for _ in 0..NUM_ITER {
let mut i = 0;
for pos in bb {
assert_eq!(pos, i + 1);
i += 1;
}
assert_eq!(81, i);
}
});
}
#[cfg(target_arch = "aarch64")]
#[bench]
fn bench_aarch64_iterator_inline(b: &mut Bencher) {
let bb = aarch64_iterator_inline::Bitboard(unsafe {
std::arch::aarch64::vld1q_u64([0x7fff_ffff_ffff_ffff, 0x0003_ffff].as_ptr())
});
b.iter(|| {
for _ in 0..NUM_ITER {
let mut i = 0;
for pos in bb {
assert_eq!(pos, i + 1);
i += 1;
}
assert_eq!(81, i);
}
});
}
#[cfg(target_arch = "x86_64")]
#[bench]
fn bench_x86_64_pop_1(b: &mut Bencher) {
let bb = x86_64_pop_1::Bitboard(unsafe {
std::arch::x86_64::_mm_set_epi64x(0x0003_ffff, 0x7fff_ffff_ffff_ffff)
});
b.iter(|| {
for _ in 0..NUM_ITER {
let mut i = 0;
for pos in bb {
assert_eq!(pos, i + 1);
i += 1;
}
assert_eq!(81, i);
}
});
}
#[cfg(target_arch = "x86_64")]
#[bench]
fn bench_x86_64_pop_2(b: &mut Bencher) {
let bb = x86_64_pop_2::Bitboard(unsafe {
std::arch::x86_64::_mm_set_epi64x(0x0003_ffff, 0x7fff_ffff_ffff_ffff)
});
b.iter(|| {
for _ in 0..NUM_ITER {
let mut i = 0;
for pos in bb {
assert_eq!(pos, i + 1);
i += 1;
}
assert_eq!(81, i);
}
});
}
#[cfg(target_arch = "x86_64")]
#[bench]
fn bench_x86_64_iterator(b: &mut Bencher) {
let bb = x86_64_iterator::Bitboard(unsafe {
std::arch::x86_64::_mm_set_epi64x(0x0003_ffff, 0x7fff_ffff_ffff_ffff)
});
b.iter(|| {
for _ in 0..NUM_ITER {
let mut i = 0;
for pos in bb {
assert_eq!(pos, i + 1);
i += 1;
}
assert_eq!(81, i);
}
});
}
#[cfg(target_arch = "x86_64")]
#[bench]
fn bench_x86_64_iterator_inline(b: &mut Bencher) {
let bb = x86_64_iterator_inline::Bitboard(unsafe {
std::arch::x86_64::_mm_set_epi64x(0x0003_ffff, 0x7fff_ffff_ffff_ffff)
});
b.iter(|| {
for _ in 0..NUM_ITER {
let mut i = 0;
for pos in bb {
assert_eq!(pos, i + 1);
i += 1;
}
assert_eq!(81, i);
}
});
}
}
@sugyan
Copy link
Author

sugyan commented Jun 30, 2022

benchmark with MacBook Pro (14-inch, 2021) Apple M1 Pro:

running 7 tests
test bitboard::bench_aarch64_iterator        ... bench:       2,985 ns/iter (+/- 16)
test bitboard::bench_aarch64_iterator_inline ... bench:         688 ns/iter (+/- 7)
test bitboard::bench_aarch64_pop_1           ... bench:       2,953 ns/iter (+/- 53)
test bitboard::bench_aarch64_pop_2           ... bench:       3,053 ns/iter (+/- 37)
test bitboard::bench_nosimd_u128_pop         ... bench:       1,031 ns/iter (+/- 19)
test bitboard::bench_nosimd_u64_iterator     ... bench:         649 ns/iter (+/- 38)
test bitboard::bench_nosimd_u64_pop          ... bench:         666 ns/iter (+/- 16)

benchmark with MacBook Pro (13-inch, 2017, Four Thunderbolt 3 Ports) 3.3 GHz Dual-Core Intel Core i5:

running 7 tests
test bitboard::bench_nosimd_u128_pop        ... bench:       1,422 ns/iter (+/- 30)
test bitboard::bench_nosimd_u64_iterator    ... bench:         632 ns/iter (+/- 5)
test bitboard::bench_nosimd_u64_pop         ... bench:       1,140 ns/iter (+/- 542)
test bitboard::bench_x86_64_iterator        ... bench:       1,591 ns/iter (+/- 44)
test bitboard::bench_x86_64_iterator_inline ... bench:         636 ns/iter (+/- 13)
test bitboard::bench_x86_64_pop_1           ... bench:       1,576 ns/iter (+/- 7)
test bitboard::bench_x86_64_pop_2           ... bench:       1,583 ns/iter (+/- 8)

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment