Skip to content

Instantly share code, notes, and snippets.

@pankkor
Last active April 18, 2024 13:00
Show Gist options
  • Save pankkor/608e9702c0051a9692c94e2baa277c7e to your computer and use it in GitHub Desktop.
Save pankkor/608e9702c0051a9692c94e2baa277c7e to your computer and use it in GitHub Desktop.
Twitter troll task to remove if-else from grading function. Compile with clang/gcc -Wpedantic -Wall -Wextra -O3 -msse -mpopcnt
#include <immintrin.h>
typedef unsigned int u32;
typedef unsigned char u8;
char check_grade(u8 score) {
score = score > 100 ? 100 : score;
__m128i scores = _mm_set1_epi8(score);
__m128i brackets = _mm_set_epi8(89, 79, 69, 59, 0x7F, 0x7F, 0x7F, 0x7F, 0x7F, 0x7F, 0x7F, 0x7F, 0x7F, 0x7F, 0x7F, 0x7F);
__m128i res_mask = _mm_cmpgt_epi8(scores, brackets);
u32 res = _mm_movemask_epi8(res_mask);
int index = _mm_popcnt_u32(res);
return "FDCBA"[index];
}
void check_grade4(char out_grades[restrict 4], u8 score[restrict 4]) {
__m128i scores = _mm_set_epi8(
score[0], score[0], score[0], score[0],
score[1], score[1], score[1], score[1],
score[2], score[2], score[2], score[2],
score[3], score[3], score[3], score[3]
);
__m128i max_scores = _mm_set1_epi8(100);
scores = _mm_min_epu8(scores, max_scores);
__m128i brackets = _mm_set_epi8(
89, 79, 69, 59,
89, 79, 69, 59,
89, 79, 69, 59,
89, 79, 69, 59
);
__m128i res_masks = _mm_cmpgt_epi8(scores, brackets);
u32 res = _mm_movemask_epi8(res_masks);
static const char * const LUT = "FDCBA";
int index0 = _mm_popcnt_u32(res & 0xF000);
int index1 = _mm_popcnt_u32(res & 0x0F00);
int index2 = _mm_popcnt_u32(res & 0x00F0);
int index3 = _mm_popcnt_u32(res & 0x000F);
out_grades[0] = LUT[index0];
out_grades[1] = LUT[index1];
out_grades[2] = LUT[index2];
out_grades[3] = LUT[index3];
}
// Test
#include <stdio.h>
#define ALIGNED(bytes) __attribute__ ((aligned (bytes)))
int main(void) {
enum {SIZE = 128};
ALIGNED(16) u8 scores[SIZE];
ALIGNED(16) char grades[SIZE];
ALIGNED(16) char grades4[SIZE];
for (u32 i = 0; i < SIZE; ++i) {
scores[i] = i;
}
for (u32 i = 0; i < SIZE; ++i) {
grades[i] = check_grade(scores[i]);
}
for (u32 i = 0; i < SIZE; i += 4) {
check_grade4(grades4 + i, scores + i);
}
puts("Score | check_grade | check_grade4'");
puts("-----------------------------------");
for (u32 i = 0; i < SIZE; ++i) {
printf("%-3u | '%c' | '%c'\n", i, grades[i], grades4[i]);
fflush(stdout);
if (grades[i] != grades4[i])
{
__builtin_trap();
}
}
return 0;
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment