Skip to content

Instantly share code, notes, and snippets.

@kaja47
Last active August 29, 2015 14:26
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save kaja47/4cb4404222c7b890cfc5 to your computer and use it in GitHub Desktop.
Save kaja47/4cb4404222c7b890cfc5 to your computer and use it in GitHub Desktop.
nearest neighbor
#include <stdio.h>
#include <string.h>
#include <time.h>
#include <math.h>
#include <stdlib.h>
#include <stdint.h>
#include <immintrin.h>
#include "sys/param.h"
float euclidean_distance_sq(float *vecs, int veclen, int a, int b) {
float d = 0;
for (int i = 0; i < veclen; i++) {
float t = vecs[veclen * a + i] - vecs[veclen * b + i];
d += t * t;
}
return d;
}
double euclidean_distance_sq_vec(double *vecs, int veclen, int a, int b) {
__m256d d = _mm256_set1_pd(0.0);
for (int i = 0; i < veclen; i+=4) {
__m256d aa = _mm256_load_pd(vecs + veclen * a + i);
__m256d bb = _mm256_load_pd(vecs + veclen * b + i);
__m256d t = _mm256_sub_pd(aa, bb);
d = _mm256_add_pd(d, _mm256_mul_pd(t, t));
}
__m256d sum = _mm256_hadd_pd(d, d);
return ((double*)&sum)[0] + ((double*)&sum)[2];
}
float euclidean_distance_sq_vec_fl(float *vecs, int veclen, int a, int b) {
__m256 d = _mm256_set1_ps(0.0);
for (int i = 0; i < veclen; i+=8) {
__m256 aa = _mm256_load_ps(vecs + veclen * a + i);
__m256 bb = _mm256_load_ps(vecs + veclen * b + i);
__m256 t = _mm256_sub_ps(aa, bb);
d = _mm256_add_ps(d, _mm256_mul_ps(t, t));
}
d = _mm256_hadd_ps(d, d);
d = _mm256_hadd_ps(d, d);
return ((float*)&d)[0] + ((float*)&d)[4];
}
float euclidean_distance_sq_ssse(float *vecs, int veclen, int a, int b) {
__m128 d = _mm_set1_ps(0.0f);
for (int i = 0; i < veclen; i+=4) {
__m128 aa = _mm_load_ps(vecs + veclen * a + i);
__m128 bb = _mm_load_ps(vecs + veclen * b + i);
__m128 t = _mm_sub_ps(aa, bb);
d = _mm_add_ps(d, _mm_mul_ps(t, t));
}
__m128 sum = _mm_hadd_ps(d, d);
return ((float*)&sum)[0] + ((float*)&sum)[1];
}
int nn(float *vecs, int veccnt, int veclen, int idx) {
int nearest = -1;
float dist = INFINITY;
for (int i = 0; i < veccnt; i++) {
float d = euclidean_distance_sq(vecs, veclen, idx, i);
if (i != idx && d < dist) {
nearest = i;
dist = d;
}
}
return nearest;
}
struct idxdist {
int idx;
float dist;
};
int main(int argc, char *argv[]) {
int veccnt = atoi(argv[1]);
int veclen = atoi(argv[2]);
if (veclen % 8 != 0) {
printf("veclen not divisible by 8\n");
return 1;
}
printf("count: %d, vector length: %d\n", veccnt, veclen);
float *vecs;
posix_memalign((void**)&vecs, 32, sizeof(float) * veccnt * veclen);
struct idxdist *idxdists = malloc(sizeof(struct idxdist) * veccnt);
srand(47);
for (int i = 0; i < veccnt * veclen; i++) {
vecs[i] = ((float) rand() / (float) RAND_MAX);
}
for (int i = 0; i < veccnt; i++) {
idxdists[i].idx = -1;
idxdists[i].dist = INFINITY;
}
/*
for (int i = 0; i < veccnt; i++) {
idxdists[i].idx = nn(vecs, veccnt, veclen, i);
}
*/
int tile = 64;
#pragma omp parallel for
for (int ti = 0; ti < veccnt; ti += tile) {
for (int tj = 0; tj < veccnt; tj += tile) {
int maxi = MIN(ti+tile, veccnt);
for (int i = ti; i < maxi; i++) { // current vec idx
int maxj = MIN(tj+tile, veccnt);
for (int j = tj; j < maxj; j++) {
float d = euclidean_distance_sq_vec_fl(vecs, veclen, i, j);
if (j != i && d < idxdists[i].dist) {
idxdists[i].idx = j;
idxdists[i].dist = d;
}
}
}
}
}
for (int i = 0; i < MIN(32, veccnt); i++) {
printf("%d ", idxdists[i].idx);
}
printf("\n");
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment