Skip to content

Instantly share code, notes, and snippets.

@pcercuei
Last active September 29, 2023 23:00
Show Gist options
  • Star 1 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save pcercuei/051c97840c52b1656aa42da2936d600e to your computer and use it in GitHub Desktop.
Save pcercuei/051c97840c52b1656aa42da2936d600e to your computer and use it in GitHub Desktop.
Small code to test GCC's loop unrolling on SH4
#include <sys/time.h>
#include <stdio.h>
#include <stdint.h>
#include <kos.h>
static const unsigned int test = 8;
KOS_INIT_FLAGS(INIT_DEFAULT);
static uint16_t buffer_bgr555[0x80000];
static uint16_t buffer_rgb565[0x80000];
static uint64_t read_counter_us(void)
{
struct timeval tv;
gettimeofday(&tv, NULL);
return tv.tv_sec * 1000000ull + (uint64_t)tv.tv_usec;
}
static uint16_t bgr555_to_rgb565(uint16_t bgr)
{
return ((bgr & 0x001f) << 11)
| ((bgr & 0x03e0) << 1)
| ((bgr & 0x7c00) >> 10);
}
static uint32_t bgr32_to_rgb32(uint32_t bgr)
{
return ((bgr & 0x001f001f) << 11)
| ((bgr & 0x03e003e0) << 1)
| ((bgr & 0x7c007c00) >> 10);
}
static uint64_t bgr64_to_rgb64(uint64_t bgr)
{
return ((bgr & 0x001f001f001f001full) << 11)
| ((bgr & 0x03e003e003e003e0ull) << 1)
| ((bgr & 0x7c007c007c007c00ull) >> 10);
}
static void convert_buffer(uint16_t * restrict bgr555,
uint16_t * restrict rgb565,
unsigned int size)
{
unsigned int i;
uint32_t * restrict bgr32;
uint32_t * restrict rgb32;
uint64_t * restrict bgr64;
uint64_t * restrict rgb64;
switch (test) {
case 0:
for (i = 0; i < size; i++)
rgb565[i] = bgr555_to_rgb565(bgr555[i]);
break;
case 1:
for (i = 0; i < size; i += 2) {
rgb565[i + 0] = bgr555_to_rgb565(bgr555[i + 0]);
rgb565[i + 1] = bgr555_to_rgb565(bgr555[i + 1]);
}
break;
case 2:
for (i = 0; i < size; i += 4) {
rgb565[i + 0] = bgr555_to_rgb565(bgr555[i + 0]);
rgb565[i + 1] = bgr555_to_rgb565(bgr555[i + 1]);
rgb565[i + 2] = bgr555_to_rgb565(bgr555[i + 2]);
rgb565[i + 3] = bgr555_to_rgb565(bgr555[i + 3]);
}
break;
case 3:
for (i = 0; i < size; i += 8) {
rgb565[i + 0] = bgr555_to_rgb565(bgr555[i + 0]);
rgb565[i + 1] = bgr555_to_rgb565(bgr555[i + 1]);
rgb565[i + 2] = bgr555_to_rgb565(bgr555[i + 2]);
rgb565[i + 3] = bgr555_to_rgb565(bgr555[i + 3]);
rgb565[i + 4] = bgr555_to_rgb565(bgr555[i + 4]);
rgb565[i + 5] = bgr555_to_rgb565(bgr555[i + 5]);
rgb565[i + 6] = bgr555_to_rgb565(bgr555[i + 6]);
rgb565[i + 7] = bgr555_to_rgb565(bgr555[i + 7]);
}
break;
case 4:
bgr32 = (uint32_t *) bgr555;
rgb32 = (uint32_t *) rgb565;
for (i = 0; i < size / 2; i++)
rgb32[i] = bgr32_to_rgb32(bgr32[i]);
break;
case 5:
bgr32 = (uint32_t *) bgr555;
rgb32 = (uint32_t *) rgb565;
for (i = 0; i < size / 2; i += 2) {
rgb32[i + 0] = bgr32_to_rgb32(bgr32[i + 0]);
rgb32[i + 1] = bgr32_to_rgb32(bgr32[i + 1]);
}
break;
case 6:
bgr32 = (uint32_t *) bgr555;
rgb32 = (uint32_t *) rgb565;
for (i = 0; i < size / 2; i += 4) {
rgb32[i + 0] = bgr32_to_rgb32(bgr32[i + 0]);
rgb32[i + 1] = bgr32_to_rgb32(bgr32[i + 1]);
rgb32[i + 2] = bgr32_to_rgb32(bgr32[i + 2]);
rgb32[i + 3] = bgr32_to_rgb32(bgr32[i + 3]);
}
break;
case 7:
bgr32 = (uint32_t *) bgr555;
rgb32 = (uint32_t *) rgb565;
for (i = 0; i < size / 2; i += 8) {
rgb32[i + 0] = bgr32_to_rgb32(bgr32[i + 0]);
rgb32[i + 1] = bgr32_to_rgb32(bgr32[i + 1]);
rgb32[i + 2] = bgr32_to_rgb32(bgr32[i + 2]);
rgb32[i + 3] = bgr32_to_rgb32(bgr32[i + 3]);
rgb32[i + 4] = bgr32_to_rgb32(bgr32[i + 4]);
rgb32[i + 5] = bgr32_to_rgb32(bgr32[i + 5]);
rgb32[i + 6] = bgr32_to_rgb32(bgr32[i + 6]);
rgb32[i + 7] = bgr32_to_rgb32(bgr32[i + 7]);
}
break;
case 8:
bgr64 = (uint64_t *) bgr555;
rgb64 = (uint64_t *) rgb565;
for (i = 0; i < size / 4; i++)
rgb64[i] = bgr64_to_rgb64(bgr64[i]);
break;
case 9:
bgr64 = (uint64_t *) bgr555;
rgb64 = (uint64_t *) rgb565;
for (i = 0; i < size / 4; i += 2) {
rgb64[i + 0] = bgr64_to_rgb64(bgr64[i + 0]);
rgb64[i + 1] = bgr64_to_rgb64(bgr64[i + 1]);
}
break;
case 10:
bgr64 = (uint64_t *) bgr555;
rgb64 = (uint64_t *) rgb565;
for (i = 0; i < size / 4; i += 4) {
rgb64[i + 0] = bgr64_to_rgb64(bgr64[i + 0]);
rgb64[i + 1] = bgr64_to_rgb64(bgr64[i + 1]);
rgb64[i + 2] = bgr64_to_rgb64(bgr64[i + 2]);
rgb64[i + 3] = bgr64_to_rgb64(bgr64[i + 3]);
}
break;
case 11:
bgr64 = (uint64_t *) bgr555;
rgb64 = (uint64_t *) rgb565;
for (i = 0; i < size / 4; i += 8) {
rgb64[i + 0] = bgr64_to_rgb64(bgr64[i + 0]);
rgb64[i + 1] = bgr64_to_rgb64(bgr64[i + 1]);
rgb64[i + 2] = bgr64_to_rgb64(bgr64[i + 2]);
rgb64[i + 3] = bgr64_to_rgb64(bgr64[i + 3]);
rgb64[i + 4] = bgr64_to_rgb64(bgr64[i + 4]);
rgb64[i + 5] = bgr64_to_rgb64(bgr64[i + 5]);
rgb64[i + 6] = bgr64_to_rgb64(bgr64[i + 6]);
rgb64[i + 7] = bgr64_to_rgb64(bgr64[i + 7]);
}
break;
}
}
int main(void)
{
uint64_t before, after;
before = read_counter_us();
convert_buffer(buffer_bgr555, buffer_rgb565, 0x80000);
after = read_counter_us();
printf("Frames converted in %llu us\n", after - before);
return 0;
}
/*
* Compile with: kos-cc -O3 test.c -o test
*
* RESULTS:
*
* test 0: 157 ms
* test 1: 90 ms
* test 2: 124 ms
* test 3: 140 ms
*
* test 4: 80 ms
* test 5: 46 ms
* test 6: 64 ms
* test 7: 72 ms
*
* test 8: 53 ms
* test 9: 36 ms
* test 10: 47 ms
* test 11: 48 ms
*
* Compiling with: kos-cc -O3 -funroll-loops test.c -o test
*
* RESULTS:
*
* test 0: 140 ms
* test 4: 72 ms
* test 8: 54 ms
*/
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment