Skip to content

Instantly share code, notes, and snippets.

@TeaPoly
Created February 20, 2023 08:25
Show Gist options
  • Save TeaPoly/04dbe7a750b1f8a25826f5eece8cd736 to your computer and use it in GitHub Desktop.
Save TeaPoly/04dbe7a750b1f8a25826f5eece8cd736 to your computer and use it in GitHub Desktop.
Neon to RISC-V V
/*
* Copyright (c), Lucky Wong.
*
* MIT License (MIT), http://opensource.org/licenses/MIT
* Full license can be found in the LICENSE file
*/
#include <riscv_vector.h>
typedef vfloat32m4_t float32x4_t;
typedef vfloat32m2_t float32x2_t;
const size_t kE32m4 = 4; // vsetvlmax_e32m4();
const size_t kE32m2 = 2; // vsetvlmax_e32m2();
__inline static float32x2_t vget_low_f32(float32x4_t x) {
return vget_v_f32m4_f32m2(x, 0);
}
__inline static float32x2_t vget_high_f32(float32x4_t x) {
return vget_v_f32m4_f32m2(x, 1);
}
__inline static float32x4_t vcombine_f32(float32x2_t low, float32x2_t high) {
float32x4_t x = vset_v_f32m2_f32m4(x, 0, low);
x = vset_v_f32m2_f32m4(x, 1, high);
return x;
}
__inline static float32x4_t vld1q_f32(float32_t const* ptr) {
return vle32_v_f32m4(ptr, kE32m4);
}
__inline static float32x2_t vld1_f32 (float32_t const* ptr) {
return vle32_v_f32m2(ptr, kE32m2);
}
__inline static float32x4_t vaddq_f32 (float32x4_t a, float32x4_t b) {
return vfadd_vv_f32m4(a, b, kE32m4);
}
__inline static float32x4_t vsubq_f32 (float32x4_t a, float32x4_t b) {
return vfsub_vv_f32m4(a, b, kE32m4);
}
__inline static float32x4_t vmulq_f32 (float32x4_t a, float32x4_t b) {
return vfmul_vv_f32m4(a, b, kE32m4);
}
__inline static float32x4_t vmlaq_f32 (float32x4_t a, float32x4_t b, float32x4_t c) {
return vfmacc_vv_f32m4(a, b, c, kE32m4);
}
__inline static float32x4_t vmlsq_f32 (float32x4_t a, float32x4_t b, float32x4_t c) {
return vfnmsac_vv_f32m4(a, b, c, kE32m4);
}
__inline static void vst1q_f32 (float32_t* ptr, float32x4_t val) {
vse32_v_f32m4(ptr, val, kE32m4);
}
__inline static void vst1_f32 (float32_t* ptr, float32x2_t val) {
vse32_v_f32m2(ptr, val, kE32m2);
}
__inline static float32x2_t vdup_lane_f32 (float32x2_t vec, const int lane) {
const vfloat32m2_t x = vfmv_s_f_f32m2(
x,
vfmv_f_s_f32m1_f32(vget_v_f32m2_f32m1(vec, lane)),
kE32m2
);
return x;
}
__inline static float32x4_t vdupq_n_f32 (float32_t a) {
float32x4_t x = vfmv_s_f_f32m4(x, a, kE32m4);
return x;
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment