Created
October 10, 2023 21:49
-
-
Save 0x1F9F1/9d44ebfaa31f7271d0ef11cc7f09d304 to your computer and use it in GitHub Desktop.
Rough attempt and some neon audio conversion
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
void SDL_Convert_S8_to_F32_NEON(const Sint8* src, float* dst) | |
{ | |
uint8x16_t flipper = vdupq_n_u8(0x80); | |
uint32x4_t caster = vdupq_n_u32(0x47800000u); | |
float32x4_t offset = vdupq_n_f32(-65537.0f); | |
uint8x16_t bytes = veorq_u8(vld1q_u8((const uint8_t*) src), flipper); | |
uint16x8_t shorts1 = vmovl_u8(vget_low_u8(bytes)); | |
uint16x8_t shorts2 = vmovl_u8(vget_high_u8(bytes)); | |
float32x4_t floats1 = vaddq_f32(vreinterpretq_f32_u32(vaddw_u16(caster, vget_low_u16(shorts1))), offset); | |
float32x4_t floats2 = vaddq_f32(vreinterpretq_f32_u32(vaddw_u16(caster, vget_high_u16(shorts1))), offset); | |
float32x4_t floats3 = vaddq_f32(vreinterpretq_f32_u32(vaddw_u16(caster, vget_low_u16(shorts2))), offset); | |
float32x4_t floats4 = vaddq_f32(vreinterpretq_f32_u32(vaddw_u16(caster, vget_high_u16(shorts2))), offset); | |
vst1q_f32(&dst[0], floats1); | |
vst1q_f32(&dst[4], floats2); | |
vst1q_f32(&dst[8], floats3); | |
vst1q_f32(&dst[12], floats4); | |
} | |
void SDL_Convert_U8_to_F32_NEON(const Uint8* src, float* dst) | |
{ | |
uint32x4_t caster = vdupq_n_u32(0x47800000u); | |
float32x4_t offset = vdupq_n_f32(-65537.0f); | |
uint8x16_t bytes = vld1q_u8((const uint8_t*) src); | |
uint16x8_t shorts1 = vmovl_u8(vget_low_u8(bytes)); | |
uint16x8_t shorts2 = vmovl_u8(vget_high_u8(bytes)); | |
float32x4_t floats1 = vaddq_f32(vreinterpretq_f32_u32(vaddw_u16(caster, vget_low_u16(shorts1))), offset); | |
float32x4_t floats2 = vaddq_f32(vreinterpretq_f32_u32(vaddw_u16(caster, vget_high_u16(shorts1))), offset); | |
float32x4_t floats3 = vaddq_f32(vreinterpretq_f32_u32(vaddw_u16(caster, vget_low_u16(shorts2))), offset); | |
float32x4_t floats4 = vaddq_f32(vreinterpretq_f32_u32(vaddw_u16(caster, vget_high_u16(shorts2))), offset); | |
vst1q_f32(&dst[0], floats1); | |
vst1q_f32(&dst[4], floats2); | |
vst1q_f32(&dst[8], floats3); | |
vst1q_f32(&dst[12], floats4); | |
} | |
void SDL_Convert_S16_to_F32_NEON(const Sint16* src, float* dst) | |
{ | |
uint16x8_t flipper = vdupq_n_u16(0x8000); | |
uint32x4_t caster = vdupq_n_u32(0x43800000); | |
float32x4_t offset = vdupq_n_f32(-257.0f); | |
uint16x8_t shorts1 = veorq_u16(vld1q_u16((const uint16_t*) &src[0]), flipper); | |
uint16x8_t shorts2 = veorq_u16(vld1q_u16((const uint16_t*) &src[8]), flipper); | |
float32x4_t floats1 = vaddq_f32(vreinterpretq_f32_u32(vaddw_u16(caster, vget_low_u16(shorts1))), offset); | |
float32x4_t floats2 = vaddq_f32(vreinterpretq_f32_u32(vaddw_u16(caster, vget_high_u16(shorts1))), offset); | |
float32x4_t floats3 = vaddq_f32(vreinterpretq_f32_u32(vaddw_u16(caster, vget_low_u16(shorts2))), offset); | |
float32x4_t floats4 = vaddq_f32(vreinterpretq_f32_u32(vaddw_u16(caster, vget_high_u16(shorts2))), offset); | |
vst1q_f32(&dst[0], floats1); | |
vst1q_f32(&dst[4], floats2); | |
vst1q_f32(&dst[8], floats3); | |
vst1q_f32(&dst[12], floats4); | |
} | |
void SDL_Convert_U16_to_F32_NEON(const Uint16* src, float* dst) | |
{ | |
uint32x4_t caster = vdupq_n_u32(0x43800000); | |
float32x4_t offset = vdupq_n_f32(-257.0f); | |
uint16x8_t shorts1 = vld1q_u16((const uint16_t*) &src[0]); | |
uint16x8_t shorts2 = vld1q_u16((const uint16_t*) &src[8]); | |
float32x4_t floats1 = vaddq_f32(vreinterpretq_f32_u32(vaddw_u16(caster, vget_low_u16(shorts1))), offset); | |
float32x4_t floats2 = vaddq_f32(vreinterpretq_f32_u32(vaddw_u16(caster, vget_high_u16(shorts1))), offset); | |
float32x4_t floats3 = vaddq_f32(vreinterpretq_f32_u32(vaddw_u16(caster, vget_low_u16(shorts2))), offset); | |
float32x4_t floats4 = vaddq_f32(vreinterpretq_f32_u32(vaddw_u16(caster, vget_high_u16(shorts2))), offset); | |
vst1q_f32(&dst[0], floats1); | |
vst1q_f32(&dst[4], floats2); | |
vst1q_f32(&dst[8], floats3); | |
vst1q_f32(&dst[12], floats4); | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment