Last active
September 29, 2023 23:00
-
-
Save pcercuei/051c97840c52b1656aa42da2936d600e to your computer and use it in GitHub Desktop.
Small code to test GCC's loop unrolling on SH4
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#include <sys/time.h> | |
#include <stdio.h> | |
#include <stdint.h> | |
#include <kos.h> | |
static const unsigned int test = 8; | |
KOS_INIT_FLAGS(INIT_DEFAULT); | |
static uint16_t buffer_bgr555[0x80000]; | |
static uint16_t buffer_rgb565[0x80000]; | |
static uint64_t read_counter_us(void) | |
{ | |
struct timeval tv; | |
gettimeofday(&tv, NULL); | |
return tv.tv_sec * 1000000ull + (uint64_t)tv.tv_usec; | |
} | |
static uint16_t bgr555_to_rgb565(uint16_t bgr) | |
{ | |
return ((bgr & 0x001f) << 11) | |
| ((bgr & 0x03e0) << 1) | |
| ((bgr & 0x7c00) >> 10); | |
} | |
static uint32_t bgr32_to_rgb32(uint32_t bgr) | |
{ | |
return ((bgr & 0x001f001f) << 11) | |
| ((bgr & 0x03e003e0) << 1) | |
| ((bgr & 0x7c007c00) >> 10); | |
} | |
static uint64_t bgr64_to_rgb64(uint64_t bgr) | |
{ | |
return ((bgr & 0x001f001f001f001full) << 11) | |
| ((bgr & 0x03e003e003e003e0ull) << 1) | |
| ((bgr & 0x7c007c007c007c00ull) >> 10); | |
} | |
static void convert_buffer(uint16_t * restrict bgr555, | |
uint16_t * restrict rgb565, | |
unsigned int size) | |
{ | |
unsigned int i; | |
uint32_t * restrict bgr32; | |
uint32_t * restrict rgb32; | |
uint64_t * restrict bgr64; | |
uint64_t * restrict rgb64; | |
switch (test) { | |
case 0: | |
for (i = 0; i < size; i++) | |
rgb565[i] = bgr555_to_rgb565(bgr555[i]); | |
break; | |
case 1: | |
for (i = 0; i < size; i += 2) { | |
rgb565[i + 0] = bgr555_to_rgb565(bgr555[i + 0]); | |
rgb565[i + 1] = bgr555_to_rgb565(bgr555[i + 1]); | |
} | |
break; | |
case 2: | |
for (i = 0; i < size; i += 4) { | |
rgb565[i + 0] = bgr555_to_rgb565(bgr555[i + 0]); | |
rgb565[i + 1] = bgr555_to_rgb565(bgr555[i + 1]); | |
rgb565[i + 2] = bgr555_to_rgb565(bgr555[i + 2]); | |
rgb565[i + 3] = bgr555_to_rgb565(bgr555[i + 3]); | |
} | |
break; | |
case 3: | |
for (i = 0; i < size; i += 8) { | |
rgb565[i + 0] = bgr555_to_rgb565(bgr555[i + 0]); | |
rgb565[i + 1] = bgr555_to_rgb565(bgr555[i + 1]); | |
rgb565[i + 2] = bgr555_to_rgb565(bgr555[i + 2]); | |
rgb565[i + 3] = bgr555_to_rgb565(bgr555[i + 3]); | |
rgb565[i + 4] = bgr555_to_rgb565(bgr555[i + 4]); | |
rgb565[i + 5] = bgr555_to_rgb565(bgr555[i + 5]); | |
rgb565[i + 6] = bgr555_to_rgb565(bgr555[i + 6]); | |
rgb565[i + 7] = bgr555_to_rgb565(bgr555[i + 7]); | |
} | |
break; | |
case 4: | |
bgr32 = (uint32_t *) bgr555; | |
rgb32 = (uint32_t *) rgb565; | |
for (i = 0; i < size / 2; i++) | |
rgb32[i] = bgr32_to_rgb32(bgr32[i]); | |
break; | |
case 5: | |
bgr32 = (uint32_t *) bgr555; | |
rgb32 = (uint32_t *) rgb565; | |
for (i = 0; i < size / 2; i += 2) { | |
rgb32[i + 0] = bgr32_to_rgb32(bgr32[i + 0]); | |
rgb32[i + 1] = bgr32_to_rgb32(bgr32[i + 1]); | |
} | |
break; | |
case 6: | |
bgr32 = (uint32_t *) bgr555; | |
rgb32 = (uint32_t *) rgb565; | |
for (i = 0; i < size / 2; i += 4) { | |
rgb32[i + 0] = bgr32_to_rgb32(bgr32[i + 0]); | |
rgb32[i + 1] = bgr32_to_rgb32(bgr32[i + 1]); | |
rgb32[i + 2] = bgr32_to_rgb32(bgr32[i + 2]); | |
rgb32[i + 3] = bgr32_to_rgb32(bgr32[i + 3]); | |
} | |
break; | |
case 7: | |
bgr32 = (uint32_t *) bgr555; | |
rgb32 = (uint32_t *) rgb565; | |
for (i = 0; i < size / 2; i += 8) { | |
rgb32[i + 0] = bgr32_to_rgb32(bgr32[i + 0]); | |
rgb32[i + 1] = bgr32_to_rgb32(bgr32[i + 1]); | |
rgb32[i + 2] = bgr32_to_rgb32(bgr32[i + 2]); | |
rgb32[i + 3] = bgr32_to_rgb32(bgr32[i + 3]); | |
rgb32[i + 4] = bgr32_to_rgb32(bgr32[i + 4]); | |
rgb32[i + 5] = bgr32_to_rgb32(bgr32[i + 5]); | |
rgb32[i + 6] = bgr32_to_rgb32(bgr32[i + 6]); | |
rgb32[i + 7] = bgr32_to_rgb32(bgr32[i + 7]); | |
} | |
break; | |
case 8: | |
bgr64 = (uint64_t *) bgr555; | |
rgb64 = (uint64_t *) rgb565; | |
for (i = 0; i < size / 4; i++) | |
rgb64[i] = bgr64_to_rgb64(bgr64[i]); | |
break; | |
case 9: | |
bgr64 = (uint64_t *) bgr555; | |
rgb64 = (uint64_t *) rgb565; | |
for (i = 0; i < size / 4; i += 2) { | |
rgb64[i + 0] = bgr64_to_rgb64(bgr64[i + 0]); | |
rgb64[i + 1] = bgr64_to_rgb64(bgr64[i + 1]); | |
} | |
break; | |
case 10: | |
bgr64 = (uint64_t *) bgr555; | |
rgb64 = (uint64_t *) rgb565; | |
for (i = 0; i < size / 4; i += 4) { | |
rgb64[i + 0] = bgr64_to_rgb64(bgr64[i + 0]); | |
rgb64[i + 1] = bgr64_to_rgb64(bgr64[i + 1]); | |
rgb64[i + 2] = bgr64_to_rgb64(bgr64[i + 2]); | |
rgb64[i + 3] = bgr64_to_rgb64(bgr64[i + 3]); | |
} | |
break; | |
case 11: | |
bgr64 = (uint64_t *) bgr555; | |
rgb64 = (uint64_t *) rgb565; | |
for (i = 0; i < size / 4; i += 8) { | |
rgb64[i + 0] = bgr64_to_rgb64(bgr64[i + 0]); | |
rgb64[i + 1] = bgr64_to_rgb64(bgr64[i + 1]); | |
rgb64[i + 2] = bgr64_to_rgb64(bgr64[i + 2]); | |
rgb64[i + 3] = bgr64_to_rgb64(bgr64[i + 3]); | |
rgb64[i + 4] = bgr64_to_rgb64(bgr64[i + 4]); | |
rgb64[i + 5] = bgr64_to_rgb64(bgr64[i + 5]); | |
rgb64[i + 6] = bgr64_to_rgb64(bgr64[i + 6]); | |
rgb64[i + 7] = bgr64_to_rgb64(bgr64[i + 7]); | |
} | |
break; | |
} | |
} | |
int main(void) | |
{ | |
uint64_t before, after; | |
before = read_counter_us(); | |
convert_buffer(buffer_bgr555, buffer_rgb565, 0x80000); | |
after = read_counter_us(); | |
printf("Frames converted in %llu us\n", after - before); | |
return 0; | |
} | |
/* | |
* Compile with: kos-cc -O3 test.c -o test | |
* | |
* RESULTS: | |
* | |
* test 0: 157 ms | |
* test 1: 90 ms | |
* test 2: 124 ms | |
* test 3: 140 ms | |
* | |
* test 4: 80 ms | |
* test 5: 46 ms | |
* test 6: 64 ms | |
* test 7: 72 ms | |
* | |
* test 8: 53 ms | |
* test 9: 36 ms | |
* test 10: 47 ms | |
* test 11: 48 ms | |
* | |
* Compiling with: kos-cc -O3 -funroll-loops test.c -o test | |
* | |
* RESULTS: | |
* | |
* test 0: 140 ms | |
* test 4: 72 ms | |
* test 8: 54 ms | |
*/ |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment