Skip to content

Instantly share code, notes, and snippets.

@vhxs
Created March 13, 2022 19:52
Show Gist options
  • Save vhxs/14526f782dc80f34158b79dd7cec738e to your computer and use it in GitHub Desktop.
Save vhxs/14526f782dc80f34158b79dd7cec738e to your computer and use it in GitHub Desktop.
Example program that times non-SIMD vs SIMD on ARM64 architecture
#include <arm_neon.h>
#include <stdio.h>
#include <stdlib.h>
#include <time.h>
#include <stdbool.h>
#include <string.h>
void rgb_deinterleave_c(uint8_t *r, uint8_t *g, uint8_t *b, uint8_t *rgb, int len_color) {
/*
* Take the elements of "rgb" and store the individual colors "r", "g", and "b".
*/
for (int i=0; i < len_color; i++) {
r[i] = rgb[3*i];
g[i] = rgb[3*i+1];
b[i] = rgb[3*i+2];
}
}
void rgb_deinterleave_neon(uint8_t *r, uint8_t *g, uint8_t *b, uint8_t *rgb, int len_color) {
/*
* Take the elements of "rgb" and store the individual colors "r", "g", and "b"
*/
int num8x16 = len_color / 16;
uint8x16x3_t intlv_rgb;
for (int i=0; i < num8x16; i++) {
intlv_rgb = vld3q_u8(rgb+3*16*i);
vst1q_u8(r+16*i, intlv_rgb.val[0]);
vst1q_u8(g+16*i, intlv_rgb.val[1]);
vst1q_u8(b+16*i, intlv_rgb.val[2]);
}
}
uint8_t** create_arrays(unsigned int num_arrays, unsigned int size_of_array, bool set_rand) {
uint8_t **arrays = calloc(num_arrays, sizeof(uint8_t*));
for (int i = 0; i < num_arrays; i++) {
arrays[i] = calloc(size_of_array, sizeof(uint8_t));
if (set_rand) {
for (int j = 0; j < size_of_array; j++) {
arrays[i][j] = rand();
}
}
}
return arrays;
}
void free_arrays(uint8_t** arrays, unsigned int num_arrays) {
for (int i = 0; i < num_arrays; i++) {
free(arrays[i]);
}
free(arrays);
}
int main(int argc, char *argv[]) {
unsigned int num_arrays = atoi(argv[1]);
unsigned int size_of_array = atoi(argv[2]);
/* without SIMD */
uint8_t **rgb_arrays_c = create_arrays(num_arrays, 3 * size_of_array, true);
uint8_t **r_arrays_c = create_arrays(num_arrays, size_of_array, false);
uint8_t **g_arrays_c = create_arrays(num_arrays, size_of_array, false);
uint8_t **b_arrays_c = create_arrays(num_arrays, size_of_array, false);
clock_t before_c = clock();
for (int i = 0; i < num_arrays; i++) {
rgb_deinterleave_c(r_arrays_c[i], g_arrays_c[i], b_arrays_c[i], rgb_arrays_c[i], size_of_array);
}
double difference_c = (double) (clock() - before_c) / CLOCKS_PER_SEC;
printf("Without SIMD: %f\n", difference_c);
free_arrays(rgb_arrays_c, num_arrays);
free_arrays(r_arrays_c, num_arrays);
free_arrays(g_arrays_c, num_arrays);
free_arrays(b_arrays_c, num_arrays);
/* with SIMD */
uint8_t **rgb_arrays_neon = create_arrays(num_arrays, 3 * size_of_array, true);
uint8_t **r_arrays_neon = create_arrays(num_arrays, size_of_array, false);
uint8_t **g_arrays_neon = create_arrays(num_arrays, size_of_array, false);
uint8_t **b_arrays_neon = create_arrays(num_arrays, size_of_array, false);
clock_t before_neon = clock();
for (int i = 0; i < num_arrays; i++) {
rgb_deinterleave_neon(r_arrays_neon[i], g_arrays_neon[i], b_arrays_neon[i], rgb_arrays_neon[i], size_of_array);
}
double difference_neon = (double) (clock() - before_neon) / CLOCKS_PER_SEC;
printf("With SIMD: %f\n", difference_neon);
free_arrays(rgb_arrays_neon, num_arrays);
free_arrays(r_arrays_neon, num_arrays);
free_arrays(g_arrays_neon, num_arrays);
free_arrays(b_arrays_neon, num_arrays);
return 0;
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment