Skip to content

Instantly share code, notes, and snippets.

@leuc
Created January 27, 2023 07:33
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save leuc/20a43793d9200b5278ea4fa5b4314a85 to your computer and use it in GitHub Desktop.
Save leuc/20a43793d9200b5278ea4fa5b4314a85 to your computer and use it in GitHub Desktop.
test igraph umap layout with avx2
src/layout/umap.c | 58 +++++++++++++++++++++++++++++++++++++++++++++++++++++++
1 file changed, 58 insertions(+)
diff --git a/src/layout/umap.c b/src/layout/umap.c
index d1f334313..a9e40d7d7 100644
--- a/src/layout/umap.c
+++ b/src/layout/umap.c
@@ -28,6 +28,7 @@
#include "layout/layout_internal.h"
#include "core/interruption.h"
+#include <immintrin.h>
#include <math.h>
@@ -720,6 +721,7 @@ static igraph_real_t igraph_i_umap_clip_force(igraph_real_t force, igraph_real_t
return force > limit ? limit : (force < -limit ? -limit : force);
}
+/*
static igraph_real_t igraph_i_umap_attract(
igraph_real_t dsq,
igraph_real_t a,
@@ -727,7 +729,42 @@ static igraph_real_t igraph_i_umap_attract(
{
return - (2 * a * b * pow(dsq, b - 1.)) / (1. + a * pow(dsq, b));
}
+*/
+
+
+#ifndef _mm256_extract_pd
+#define _mm256_extract_pd(R, I) (*((double*)(&R)+I))
+#endif
+
+__m256d _mm256_pow_pd(__m256d base, __m256d exponent) {
+ __m256d result = _mm256_set1_pd(1.0);
+ __m256d base_n = base;
+ __m256d exponent_n = exponent;
+ for (int i = 0; i < 4; i++) {
+ if (_mm256_extract_pd(exponent_n, i) >= 1)
+ result = _mm256_mul_pd(result, base_n);
+ base_n = _mm256_mul_pd(base_n, base);
+ exponent_n = _mm256_sub_pd(exponent_n, _mm256_set1_pd(1));
+ }
+ return result;
+}
+
+static igraph_real_t igraph_i_umap_attract(
+ igraph_real_t dsq,
+ igraph_real_t a,
+ igraph_real_t b)
+{
+ __m256d dsq_vec = _mm256_set1_pd(dsq);
+ __m256d a_vec = _mm256_set1_pd(a);
+ __m256d b_vec = _mm256_set1_pd(b);
+ __m256d temp1 = _mm256_mul_pd(a_vec, _mm256_pow_pd(dsq_vec, b_vec));
+ __m256d temp2 = _mm256_add_pd(temp1, _mm256_set1_pd(1));
+ __m256d temp3 = _mm256_mul_pd(_mm256_mul_pd(b_vec, _mm256_set1_pd(2)), _mm256_pow_pd(dsq_vec, _mm256_sub_pd(b_vec, _mm256_set1_pd(1))));
+ __m256d result = _mm256_div_pd(_mm256_mul_pd(temp3, a_vec), temp2);
+ return -_mm256_extract_pd(result, 0);
+}
+/*
static igraph_real_t igraph_i_umap_repel(
igraph_real_t dsq,
igraph_real_t a,
@@ -737,6 +774,27 @@ static igraph_real_t igraph_i_umap_repel(
return (2 * b) / (dsq_min + dsq) / (1. + a * pow(dsq, b));
}
+*/
+
+
+
+static igraph_real_t igraph_i_umap_repel(igraph_real_t dsq, igraph_real_t a, igraph_real_t b) {
+ igraph_real_t dsq_min = UMAP_CORRECT_DISTANCE_REPULSION * UMAP_CORRECT_DISTANCE_REPULSION;
+
+ __m256d dsq_min_vec = _mm256_set1_pd(dsq_min);
+ __m256d dsq_vec = _mm256_set1_pd(dsq);
+ __m256d a_vec = _mm256_set1_pd(a);
+ __m256d b_vec = _mm256_set1_pd(b);
+ __m256d dsq_pow_b = _mm256_pow_pd(dsq_vec, b_vec);
+ __m256d dsq_pow_b_a = _mm256_mul_pd(dsq_pow_b, a_vec);
+ __m256d one_plus_dsq_pow_b_a = _mm256_add_pd(dsq_pow_b_a, _mm256_set1_pd(1.0));
+ __m256d dsq_plus_dsq_min = _mm256_add_pd(dsq_vec, dsq_min_vec);
+ __m256d two_b_vec = _mm256_set1_pd(2.0 * b);
+ __m256d result = _mm256_div_pd(two_b_vec, _mm256_div_pd(dsq_plus_dsq_min, one_plus_dsq_pow_b_a));
+ return _mm256_extract_pd(result,0);
+}
+
+
static igraph_error_t igraph_i_umap_apply_forces(
const igraph_t *graph,
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment