Created

Embed URL

HTTPS clone URL

SSH clone URL

You can clone with HTTPS or SSH.

Download Gist

Improved n-queens code

View deasm.cpp
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44
#include <immintrin.h>
#include <emmintrin.h>
#include <smmintrin.h>
#include <stdio.h>
 
typedef unsigned short vector_t __attribute__((vector_size(16)));
 
int nqueens(int depth) {
unsigned short poss = (1 << depth) - 1;
int sum = 0;
vector_t attacked = {0};
vector_t mask = attacked;
mask[0] = poss;
static unsigned long long storage[32];
unsigned long sp = 0;
 
while (true) {
if (poss == 0) {
if (sp == 0) break;
attacked = *(vector_t*)(&storage[--sp]);
poss = storage[--sp];
continue;
}
unsigned short bit = poss & -poss;
poss = poss - bit;
if (sp / 2 == (depth - 1)) {
sum++;
continue;
}
storage[sp++] = poss;
*(vector_t*)(&storage[sp++]) = attacked;
 
// Compute the diffs to the registers
attacked = attacked | bit;
attacked = (vector_t)_mm_blend_epi16((__m128i)attacked,
(__m128i)(attacked << 1), 4);
attacked = (vector_t)_mm_blend_epi16((__m128i)attacked,
(__m128i)(attacked >> 1), 2);
vector_t temp = (vector_t)_mm_srli_si128((__m128i)attacked, 4) |
(vector_t)_mm_srli_si128((__m128i)attacked, 2) | attacked;
poss = _mm_andnot_si128((__m128i)temp, (__m128i)mask)[0];
}
return sum;
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment
Something went wrong with that request. Please try again.