Skip to content

Instantly share code, notes, and snippets.

@0x1F9F1
Created October 10, 2023 21:49
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save 0x1F9F1/9d44ebfaa31f7271d0ef11cc7f09d304 to your computer and use it in GitHub Desktop.
Save 0x1F9F1/9d44ebfaa31f7271d0ef11cc7f09d304 to your computer and use it in GitHub Desktop.
Rough attempt and some neon audio conversion
void SDL_Convert_S8_to_F32_NEON(const Sint8* src, float* dst)
{
uint8x16_t flipper = vdupq_n_u8(0x80);
uint32x4_t caster = vdupq_n_u32(0x47800000u);
float32x4_t offset = vdupq_n_f32(-65537.0f);
uint8x16_t bytes = veorq_u8(vld1q_u8((const uint8_t*) src), flipper);
uint16x8_t shorts1 = vmovl_u8(vget_low_u8(bytes));
uint16x8_t shorts2 = vmovl_u8(vget_high_u8(bytes));
float32x4_t floats1 = vaddq_f32(vreinterpretq_f32_u32(vaddw_u16(caster, vget_low_u16(shorts1))), offset);
float32x4_t floats2 = vaddq_f32(vreinterpretq_f32_u32(vaddw_u16(caster, vget_high_u16(shorts1))), offset);
float32x4_t floats3 = vaddq_f32(vreinterpretq_f32_u32(vaddw_u16(caster, vget_low_u16(shorts2))), offset);
float32x4_t floats4 = vaddq_f32(vreinterpretq_f32_u32(vaddw_u16(caster, vget_high_u16(shorts2))), offset);
vst1q_f32(&dst[0], floats1);
vst1q_f32(&dst[4], floats2);
vst1q_f32(&dst[8], floats3);
vst1q_f32(&dst[12], floats4);
}
void SDL_Convert_U8_to_F32_NEON(const Uint8* src, float* dst)
{
uint32x4_t caster = vdupq_n_u32(0x47800000u);
float32x4_t offset = vdupq_n_f32(-65537.0f);
uint8x16_t bytes = vld1q_u8((const uint8_t*) src);
uint16x8_t shorts1 = vmovl_u8(vget_low_u8(bytes));
uint16x8_t shorts2 = vmovl_u8(vget_high_u8(bytes));
float32x4_t floats1 = vaddq_f32(vreinterpretq_f32_u32(vaddw_u16(caster, vget_low_u16(shorts1))), offset);
float32x4_t floats2 = vaddq_f32(vreinterpretq_f32_u32(vaddw_u16(caster, vget_high_u16(shorts1))), offset);
float32x4_t floats3 = vaddq_f32(vreinterpretq_f32_u32(vaddw_u16(caster, vget_low_u16(shorts2))), offset);
float32x4_t floats4 = vaddq_f32(vreinterpretq_f32_u32(vaddw_u16(caster, vget_high_u16(shorts2))), offset);
vst1q_f32(&dst[0], floats1);
vst1q_f32(&dst[4], floats2);
vst1q_f32(&dst[8], floats3);
vst1q_f32(&dst[12], floats4);
}
void SDL_Convert_S16_to_F32_NEON(const Sint16* src, float* dst)
{
uint16x8_t flipper = vdupq_n_u16(0x8000);
uint32x4_t caster = vdupq_n_u32(0x43800000);
float32x4_t offset = vdupq_n_f32(-257.0f);
uint16x8_t shorts1 = veorq_u16(vld1q_u16((const uint16_t*) &src[0]), flipper);
uint16x8_t shorts2 = veorq_u16(vld1q_u16((const uint16_t*) &src[8]), flipper);
float32x4_t floats1 = vaddq_f32(vreinterpretq_f32_u32(vaddw_u16(caster, vget_low_u16(shorts1))), offset);
float32x4_t floats2 = vaddq_f32(vreinterpretq_f32_u32(vaddw_u16(caster, vget_high_u16(shorts1))), offset);
float32x4_t floats3 = vaddq_f32(vreinterpretq_f32_u32(vaddw_u16(caster, vget_low_u16(shorts2))), offset);
float32x4_t floats4 = vaddq_f32(vreinterpretq_f32_u32(vaddw_u16(caster, vget_high_u16(shorts2))), offset);
vst1q_f32(&dst[0], floats1);
vst1q_f32(&dst[4], floats2);
vst1q_f32(&dst[8], floats3);
vst1q_f32(&dst[12], floats4);
}
void SDL_Convert_U16_to_F32_NEON(const Uint16* src, float* dst)
{
uint32x4_t caster = vdupq_n_u32(0x43800000);
float32x4_t offset = vdupq_n_f32(-257.0f);
uint16x8_t shorts1 = vld1q_u16((const uint16_t*) &src[0]);
uint16x8_t shorts2 = vld1q_u16((const uint16_t*) &src[8]);
float32x4_t floats1 = vaddq_f32(vreinterpretq_f32_u32(vaddw_u16(caster, vget_low_u16(shorts1))), offset);
float32x4_t floats2 = vaddq_f32(vreinterpretq_f32_u32(vaddw_u16(caster, vget_high_u16(shorts1))), offset);
float32x4_t floats3 = vaddq_f32(vreinterpretq_f32_u32(vaddw_u16(caster, vget_low_u16(shorts2))), offset);
float32x4_t floats4 = vaddq_f32(vreinterpretq_f32_u32(vaddw_u16(caster, vget_high_u16(shorts2))), offset);
vst1q_f32(&dst[0], floats1);
vst1q_f32(&dst[4], floats2);
vst1q_f32(&dst[8], floats3);
vst1q_f32(&dst[12], floats4);
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment