Skip to content

@jcranmer /deasm.cpp
Created

Embed URL

HTTPS clone URL

Subversion checkout URL

You can clone with
or
.
Download ZIP
Improved n-queens code
#include <immintrin.h>
#include <emmintrin.h>
#include <smmintrin.h>
#include <stdio.h>
typedef unsigned short vector_t __attribute__((vector_size(16)));
int nqueens(int depth) {
unsigned short poss = (1 << depth) - 1;
int sum = 0;
vector_t attacked = {0};
vector_t mask = attacked;
mask[0] = poss;
static unsigned long long storage[32];
unsigned long sp = 0;
while (true) {
if (poss == 0) {
if (sp == 0) break;
attacked = *(vector_t*)(&storage[--sp]);
poss = storage[--sp];
continue;
}
unsigned short bit = poss & -poss;
poss = poss - bit;
if (sp / 2 == (depth - 1)) {
sum++;
continue;
}
storage[sp++] = poss;
*(vector_t*)(&storage[sp++]) = attacked;
// Compute the diffs to the registers
attacked = attacked | bit;
attacked = (vector_t)_mm_blend_epi16((__m128i)attacked,
(__m128i)(attacked << 1), 4);
attacked = (vector_t)_mm_blend_epi16((__m128i)attacked,
(__m128i)(attacked >> 1), 2);
vector_t temp = (vector_t)_mm_srli_si128((__m128i)attacked, 4) |
(vector_t)_mm_srli_si128((__m128i)attacked, 2) | attacked;
poss = _mm_andnot_si128((__m128i)temp, (__m128i)mask)[0];
}
return sum;
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment
Something went wrong with that request. Please try again.