Created
February 6, 2019 05:52
-
-
Save easyaspi314/6e330ef7edf2b7a396a406b381163486 to your computer and use it in GitHub Desktop.
tolower SIMD implementation
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
/* Created by easyaspi314. | |
* Released into the public domain. */ | |
#include <ctype.h> | |
#include <stdio.h> | |
#include <stdlib.h> | |
#include <string.h> | |
#ifdef __SSE2__ | |
#include <immintrin.h> | |
/* A SIMD function for SSE2 which changes all uppercase ASCII digits | |
* to lowercasem */ | |
void StringToLower(char *str) | |
{ | |
size_t len = strlen(str); | |
const __m128i asciiA = _mm_set1_epi8('A'); | |
const __m128i asciiZ = _mm_set1_epi8('Z' + 1); | |
const __m128i add = _mm_set1_epi8('a' - 'A'); | |
while (len >= 16) { | |
__m128i inp = _mm_loadu_si128((__m128i*)str); | |
/* > 'A': 0xff, < 'A': 0x00 */ | |
__m128i greaterThanA = _mm_cmpgt_epi8(inp, asciiA); | |
/* <= 'Z': 0xff, > 'Z': 0x00 */ | |
__m128i lessEqualZ = _mm_cmplt_epi8(inp, asciiZ); | |
/* 'Z' >= x >= 'A': 0xFF, else 0x00 */ | |
__m128i mask = _mm_and_si128(greaterThanA, lessEqualZ); | |
/* 'Z' >= x >= 'A': 'a' - 'A', else 0x00 */ | |
__m128i toAdd = _mm_and_si128(mask, add); | |
/* add to change to lowercase */ | |
__m128i added = _mm_add_epi8(inp, toAdd); | |
_mm_storeu_si128((__m128i *)str, added); | |
len -= 16; | |
str += 16; | |
} | |
while (len-- > 0) { | |
*str = tolower(*str); | |
++str; | |
} | |
} | |
#elif defined(__ARM_NEON) || defined(__ARM_NEON__) | |
#include <arm_neon.h> | |
/* Literally the exact same code as above, but for NEON. */ | |
void StringToLower(char *str) | |
{ | |
size_t len = strlen(str); | |
const uint8x16_t asciiA = vdupq_n_u8('A'); | |
const uint8x16_t asciiZ = vdupq_n_u8('Z' + 1); | |
const uint8x16_t add = vdupq_n_u8('a' - 'A'); | |
while (len >= 16) { | |
uint8x16_t inp = vld1q_u8((uint8_t *)str); | |
uint8x16_t greaterThanA = vcgtq_u8(inp, asciiA); | |
uint8x16_t lessEqualZ = vcltq_u8(inp, asciiZ); | |
uint8x16_t mask = vandq_u8(greaterThanA, lessEqualZ); | |
uint8x16_t toAdd = vandq_u8(mask, add); | |
uint8x16_t added = vaddq_u8(inp, toAdd); | |
vst1q_u8((uint8_t *)str, added); | |
len -= 16; | |
str += 16; | |
} | |
while (len-- > 0) { | |
*str = tolower(*str); | |
++str; | |
} | |
} | |
#else | |
/* Just go scalar. */ | |
void StringToLower(char *str) | |
{ | |
size_t len = strlen(str); | |
while (len-- > 0) { | |
*str = tolower(*str); | |
++str; | |
} | |
} | |
#endif | |
#ifdef TEST | |
#include <assert.h> | |
int main(void) | |
{ | |
char str[] = "Hello world 12345 HI AZaz"; | |
StringToLower(str); | |
assert(!strcmp(str, "hello world 12345 hi azaz")); | |
puts(str); | |
} | |
#endif |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment