Last active
August 29, 2015 14:26
-
-
Save kaja47/4cb4404222c7b890cfc5 to your computer and use it in GitHub Desktop.
nearest neighbor
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#include <stdio.h> | |
#include <string.h> | |
#include <time.h> | |
#include <math.h> | |
#include <stdlib.h> | |
#include <stdint.h> | |
#include <immintrin.h> | |
#include "sys/param.h" | |
float euclidean_distance_sq(float *vecs, int veclen, int a, int b) { | |
float d = 0; | |
for (int i = 0; i < veclen; i++) { | |
float t = vecs[veclen * a + i] - vecs[veclen * b + i]; | |
d += t * t; | |
} | |
return d; | |
} | |
double euclidean_distance_sq_vec(double *vecs, int veclen, int a, int b) { | |
__m256d d = _mm256_set1_pd(0.0); | |
for (int i = 0; i < veclen; i+=4) { | |
__m256d aa = _mm256_load_pd(vecs + veclen * a + i); | |
__m256d bb = _mm256_load_pd(vecs + veclen * b + i); | |
__m256d t = _mm256_sub_pd(aa, bb); | |
d = _mm256_add_pd(d, _mm256_mul_pd(t, t)); | |
} | |
__m256d sum = _mm256_hadd_pd(d, d); | |
return ((double*)&sum)[0] + ((double*)&sum)[2]; | |
} | |
float euclidean_distance_sq_vec_fl(float *vecs, int veclen, int a, int b) { | |
__m256 d = _mm256_set1_ps(0.0); | |
for (int i = 0; i < veclen; i+=8) { | |
__m256 aa = _mm256_load_ps(vecs + veclen * a + i); | |
__m256 bb = _mm256_load_ps(vecs + veclen * b + i); | |
__m256 t = _mm256_sub_ps(aa, bb); | |
d = _mm256_add_ps(d, _mm256_mul_ps(t, t)); | |
} | |
d = _mm256_hadd_ps(d, d); | |
d = _mm256_hadd_ps(d, d); | |
return ((float*)&d)[0] + ((float*)&d)[4]; | |
} | |
float euclidean_distance_sq_ssse(float *vecs, int veclen, int a, int b) { | |
__m128 d = _mm_set1_ps(0.0f); | |
for (int i = 0; i < veclen; i+=4) { | |
__m128 aa = _mm_load_ps(vecs + veclen * a + i); | |
__m128 bb = _mm_load_ps(vecs + veclen * b + i); | |
__m128 t = _mm_sub_ps(aa, bb); | |
d = _mm_add_ps(d, _mm_mul_ps(t, t)); | |
} | |
__m128 sum = _mm_hadd_ps(d, d); | |
return ((float*)&sum)[0] + ((float*)&sum)[1]; | |
} | |
int nn(float *vecs, int veccnt, int veclen, int idx) { | |
int nearest = -1; | |
float dist = INFINITY; | |
for (int i = 0; i < veccnt; i++) { | |
float d = euclidean_distance_sq(vecs, veclen, idx, i); | |
if (i != idx && d < dist) { | |
nearest = i; | |
dist = d; | |
} | |
} | |
return nearest; | |
} | |
struct idxdist { | |
int idx; | |
float dist; | |
}; | |
int main(int argc, char *argv[]) { | |
int veccnt = atoi(argv[1]); | |
int veclen = atoi(argv[2]); | |
if (veclen % 8 != 0) { | |
printf("veclen not divisible by 8\n"); | |
return 1; | |
} | |
printf("count: %d, vector length: %d\n", veccnt, veclen); | |
float *vecs; | |
posix_memalign((void**)&vecs, 32, sizeof(float) * veccnt * veclen); | |
struct idxdist *idxdists = malloc(sizeof(struct idxdist) * veccnt); | |
srand(47); | |
for (int i = 0; i < veccnt * veclen; i++) { | |
vecs[i] = ((float) rand() / (float) RAND_MAX); | |
} | |
for (int i = 0; i < veccnt; i++) { | |
idxdists[i].idx = -1; | |
idxdists[i].dist = INFINITY; | |
} | |
/* | |
for (int i = 0; i < veccnt; i++) { | |
idxdists[i].idx = nn(vecs, veccnt, veclen, i); | |
} | |
*/ | |
int tile = 64; | |
#pragma omp parallel for | |
for (int ti = 0; ti < veccnt; ti += tile) { | |
for (int tj = 0; tj < veccnt; tj += tile) { | |
int maxi = MIN(ti+tile, veccnt); | |
for (int i = ti; i < maxi; i++) { // current vec idx | |
int maxj = MIN(tj+tile, veccnt); | |
for (int j = tj; j < maxj; j++) { | |
float d = euclidean_distance_sq_vec_fl(vecs, veclen, i, j); | |
if (j != i && d < idxdists[i].dist) { | |
idxdists[i].idx = j; | |
idxdists[i].dist = d; | |
} | |
} | |
} | |
} | |
} | |
for (int i = 0; i < MIN(32, veccnt); i++) { | |
printf("%d ", idxdists[i].idx); | |
} | |
printf("\n"); | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment