Skip to content

Instantly share code, notes, and snippets.

@kaja47 kaja47/1nn.c
Last active Aug 29, 2015

Embed
What would you like to do?
nearest neighbor
#include <stdio.h>
#include <string.h>
#include <time.h>
#include <math.h>
#include <stdlib.h>
#include <stdint.h>
#include <immintrin.h>
#include "sys/param.h"
float euclidean_distance_sq(float *vecs, int veclen, int a, int b) {
float d = 0;
for (int i = 0; i < veclen; i++) {
float t = vecs[veclen * a + i] - vecs[veclen * b + i];
d += t * t;
}
return d;
}
double euclidean_distance_sq_vec(double *vecs, int veclen, int a, int b) {
__m256d d = _mm256_set1_pd(0.0);
for (int i = 0; i < veclen; i+=4) {
__m256d aa = _mm256_load_pd(vecs + veclen * a + i);
__m256d bb = _mm256_load_pd(vecs + veclen * b + i);
__m256d t = _mm256_sub_pd(aa, bb);
d = _mm256_add_pd(d, _mm256_mul_pd(t, t));
}
__m256d sum = _mm256_hadd_pd(d, d);
return ((double*)&sum)[0] + ((double*)&sum)[2];
}
float euclidean_distance_sq_vec_fl(float *vecs, int veclen, int a, int b) {
__m256 d = _mm256_set1_ps(0.0);
for (int i = 0; i < veclen; i+=8) {
__m256 aa = _mm256_load_ps(vecs + veclen * a + i);
__m256 bb = _mm256_load_ps(vecs + veclen * b + i);
__m256 t = _mm256_sub_ps(aa, bb);
d = _mm256_add_ps(d, _mm256_mul_ps(t, t));
}
d = _mm256_hadd_ps(d, d);
d = _mm256_hadd_ps(d, d);
return ((float*)&d)[0] + ((float*)&d)[4];
}
float euclidean_distance_sq_ssse(float *vecs, int veclen, int a, int b) {
__m128 d = _mm_set1_ps(0.0f);
for (int i = 0; i < veclen; i+=4) {
__m128 aa = _mm_load_ps(vecs + veclen * a + i);
__m128 bb = _mm_load_ps(vecs + veclen * b + i);
__m128 t = _mm_sub_ps(aa, bb);
d = _mm_add_ps(d, _mm_mul_ps(t, t));
}
__m128 sum = _mm_hadd_ps(d, d);
return ((float*)&sum)[0] + ((float*)&sum)[1];
}
int nn(float *vecs, int veccnt, int veclen, int idx) {
int nearest = -1;
float dist = INFINITY;
for (int i = 0; i < veccnt; i++) {
float d = euclidean_distance_sq(vecs, veclen, idx, i);
if (i != idx && d < dist) {
nearest = i;
dist = d;
}
}
return nearest;
}
struct idxdist {
int idx;
float dist;
};
int main(int argc, char *argv[]) {
int veccnt = atoi(argv[1]);
int veclen = atoi(argv[2]);
if (veclen % 8 != 0) {
printf("veclen not divisible by 8\n");
return 1;
}
printf("count: %d, vector length: %d\n", veccnt, veclen);
float *vecs;
posix_memalign((void**)&vecs, 32, sizeof(float) * veccnt * veclen);
struct idxdist *idxdists = malloc(sizeof(struct idxdist) * veccnt);
srand(47);
for (int i = 0; i < veccnt * veclen; i++) {
vecs[i] = ((float) rand() / (float) RAND_MAX);
}
for (int i = 0; i < veccnt; i++) {
idxdists[i].idx = -1;
idxdists[i].dist = INFINITY;
}
/*
for (int i = 0; i < veccnt; i++) {
idxdists[i].idx = nn(vecs, veccnt, veclen, i);
}
*/
int tile = 64;
#pragma omp parallel for
for (int ti = 0; ti < veccnt; ti += tile) {
for (int tj = 0; tj < veccnt; tj += tile) {
int maxi = MIN(ti+tile, veccnt);
for (int i = ti; i < maxi; i++) { // current vec idx
int maxj = MIN(tj+tile, veccnt);
for (int j = tj; j < maxj; j++) {
float d = euclidean_distance_sq_vec_fl(vecs, veclen, i, j);
if (j != i && d < idxdists[i].dist) {
idxdists[i].idx = j;
idxdists[i].dist = d;
}
}
}
}
}
for (int i = 0; i < MIN(32, veccnt); i++) {
printf("%d ", idxdists[i].idx);
}
printf("\n");
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment
You can’t perform that action at this time.
You signed in with another tab or window. Reload to refresh your session. You signed out in another tab or window. Reload to refresh your session.