Skip to content

Instantly share code, notes, and snippets.

@ahmetaa
Last active August 29, 2015 14:05
Show Gist options
  • Save ahmetaa/a0000a2a06fc483fd9ea to your computer and use it in GitHub Desktop.
Save ahmetaa/a0000a2a06fc483fd9ea to your computer and use it in GitHub Desktop.
avx
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <time.h>
#include <immintrin.h>
// gcc -Wall -O3 -march=native -mtune=native -mavx avx_simple.c -o avx
void check_alloc(int alloc_result);
void initialize(int amount);
void initialize_gmm(int index, int gauss_count, float *mixture_values);
void initialize_gaussian(int gmm_index, int gauss_index, int dimension, float *means, float *presicions, float c);
void print_gmm(int index);
void print_floats(float *floats, int amount);
void print_gaussian(int gmm_index, int gauss_index);
__m256 * convert_input(float *input, int dimension);
/* Represents a diagonal gaussian. However it only contains enough information to
calculate log likelihoods. presicions contain -0.5/variance values. */
typedef struct gaussian{
__m256 *means;
__m256 *presicions;
float c;
int dimension;
int aligned_size;
} gaussian;
/* Represents a gauss mixture model. mixture weights are actually log values. */
typedef struct gmm{
float *mixture_weights;
gaussian *gaussians;
int count;
} gmm;
gmm *gmms;
int gmm_count;
/* Allocates for `amount` of gmms. */
void initialize(int amount) {
gmm_count = amount;
check_alloc(posix_memalign((void*) &gmms, 32, sizeof(gmm) * amount));
}
/* Allocates 'gauss_count' amount of gaussian for gmm[index] with mixture values. */
void initialize_gmm(int index, int gauss_count, float *mixture_values) {
gmm *g = &gmms[index];
g->count = gauss_count;
check_alloc(posix_memalign((void*)&g->mixture_weights, 32, sizeof(float) * gauss_count));
check_alloc(posix_memalign((void*)&g->gaussians, 32, sizeof(gaussian) * gauss_count));
memcpy(g->mixture_weights, mixture_values, sizeof(float)*gauss_count);
}
void initialize_gaussian(int gmm_index, int gauss_index, int dimension, float *means, float *presicions, float c) {
gmm *g = &gmms[gmm_index];
gaussian *gauss = &g->gaussians[gauss_index];
int k = dimension/8;
gauss->dimension = dimension;
gauss->aligned_size = k;
check_alloc(posix_memalign((void*)&gauss->means, 32, sizeof(__m256) * k));
check_alloc(posix_memalign((void*)&gauss->presicions, 32, sizeof(__m256) * k));
int i;
for(i = 0; i< k; ++i) {
float temp_m[8] __attribute((aligned(32)));
memcpy(&temp_m, means, 32);
gauss->means[i] = _mm256_load_ps(temp_m);
float temp_p[8] __attribute((aligned(32)));
memcpy(&temp_p, presicions, 32);
gauss->presicions[i] = _mm256_load_ps(temp_p);
means+=8;
presicions+=8;
}
}
static inline float __reduce_add_ps(__m256 x){
const __m128 x128 = _mm_add_ps(_mm256_extractf128_ps(x, 1), _mm256_castps256_ps128(x));
const __m128 x64 = _mm_add_ps(x128, _mm_movehl_ps(x128, x128));
const __m128 x32 = _mm_add_ss(x64, _mm_shuffle_ps(x64, x64, 0x55));
return _mm_cvtss_f32(x32);
}
static inline float log_likelihood(gaussian *gauss, __m256 *input) {
int i;
float result = 0.0f;
for(i = 0; i<gauss->aligned_size; i++) {
const __m256 dif = _mm256_sub_ps(input[i], gauss->means[i]);
const __m256 square = _mm256_mul_ps(dif, dif);
const __m256 mul_presicion = _mm256_mul_ps(square, gauss->presicions[i]);
result+=__reduce_add_ps(mul_presicion);
}
return result + gauss->c;
}
float score_gmm (int gmm_index, __m256 *input) {
gmm *g = &gmms[gmm_index];
float score = 0.0f;
int k=0;
for(k = 0; k < g->count; k++) {
float likelihood = log_likelihood( &g->gaussians[k], input);
float weighted = likelihood + g->mixture_weights[k];
score += weighted;
}
return score;
}
void print_gmm(int index) {
printf("gmm index = %d\n", index);
gmm g = gmms[index];
printf("gauss count = %d\n", g.count);
printf("mixture weights = ");
print_floats(g.mixture_weights, g.count);
printf("\n");
}
void print_gaussian(int gmm_index, int gauss_index) {
gmm *g = &gmms[gmm_index];
gaussian *gauss = &g->gaussians[gauss_index];
printf("gmm, gauss_index = %d,%d\n", gmm_index, gauss_index);
int i;
printf("means = ");
for(i = 0; i < gauss->aligned_size; ++i) {
float temp[8] __attribute((aligned(32)));
_mm256_store_ps(&temp[0], gauss->means[i]);
print_floats(&temp[0], 8);
}
printf("\n");
printf("presicions = ");
for(i = 0; i < gauss->aligned_size; ++i) {
float temp[8] __attribute((aligned(32)));
_mm256_store_ps(&temp[0], gauss->presicions[i]);
print_floats(&temp[0], 8);
}
printf("\n");
}
__m256 * convert_input(float *input, int dimension) {
int k = dimension/8;
__m256 *result;
check_alloc(posix_memalign((void*)&result, 32, sizeof(__m256) * k));
int i;
for(i = 0; i < k; ++i) {
float temp[8] __attribute((aligned(32)));
memcpy(&temp, input, 32);
result[i] = _mm256_load_ps(temp);
}
return result;
}
void print_floats(float *floats, int amount) {
int i;
printf("[");
for(i = 0; i < amount ; ++i) {
printf("%.3f", floats[i]);
if(i < amount-1) {
printf(" ");
}
}
printf("]");
}
void check_alloc(int i) {
if(i!=0) {
printf("Allocation failure %d\n", i);
exit(i);
}
}
void test_simd256() {
int gmm_count = 1000;
int dimension = 40;
int gauss_count = 16;
int input_amount = 1000;
initialize(gmm_count);
printf("gmms allocated\n");
// prepare gmms
float mi = 0.0033f;
float mi_start = -1.43f;
int i,k,z;
for(i = 0; i < gmm_count; ++i) {
float *mixtures = malloc(sizeof(float) * gauss_count);
for(k = 0; k < gauss_count; ++k) {
mixtures[k] = mi_start + (float)k * mi;
}
mi_start+=mi;
initialize_gmm(i, gauss_count, mixtures);
}
// prepare gaussians
float ma = 0.0011f;
float ma_start = -0.75f;
float pa = 0.0025f;
float pa_start = -0.33f;
for(i = 0; i < gmm_count; ++i) {
for(k = 0; k < gauss_count; ++k) {
float *means = malloc(sizeof(float) * dimension);
float *presicions = malloc(sizeof(float) * dimension);
for(z = 0; z < dimension ; ++z) {
means[z] = ma_start + (float)z * ma;
presicions[z] = pa_start + (float)z * pa;
}
ma_start+=ma;
pa_start+=pa;
initialize_gaussian(i, k, dimension, means, presicions, 0.3f);
}
}
printf("Gausses initialized.");
// Prepare input
float **input = malloc(sizeof(float*) * input_amount);
for(i=0; i<input_amount; ++i) {
input[i] = malloc(sizeof(float) * dimension);
}
float ia = 0.0011f;
float ia_start = -0.75f;
for(i = 0; i < input_amount; ++i) {
for(k = 0; k < dimension; ++k) {
input[i][k] = ia_start + (float)k * ia;
}
ia_start+=ia;
}
// run test
clock_t start = clock(), diff;
float result = 0.0f;
for(i = 0; i < input_amount; ++i) {
__m256 *i256 = convert_input(&input[i][0], dimension);
for(k = 0; k < gmm_count; ++k) {
result+= score_gmm(k, i256);
}
}
diff = clock() - start;
int msec = diff * 1000 / CLOCKS_PER_SEC;
printf("result = %f", result);
printf("Time taken %d seconds %d milliseconds\n", msec/1000, msec%1000);
}
int main( int argc, char *argv[] )
{
test_simd256();
return 0;
}
void simple_test() {
// initialize gmms with count.
initialize(2);
printf("initialized\n");
// mixture values.
float mixture1[] = {-0.5, -0.3};
float mixture2[] = {-0.2};
initialize_gmm(0, 2, mixture1);
initialize_gmm(1, 1, mixture2);
print_gmm(0);
print_gmm(1);
float means[] = {0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9, 1.0, 1.1, 1.2, 1.3, 1.4, 1.5, 1.6};
float presicions[] = {0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9, 1.0, 1.1, 1.2, 1.3, 1.4, 1.5, 1.6};
initialize_gaussian(0, 0, 16, means, presicions, 0.4f);
print_gaussian(0, 0);
float input[] = {0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9, 1.0, 1.1, 1.2, 1.3, 1.4, 1.5, 1.6};
__m256 *i256 = convert_input(&input[0], 16);
gmm *g = &gmms[0];
gaussian *gauss = &g->gaussians[0];
float l = log_likelihood(gauss, i256);
printf("likelihood=%.3f\n", l);
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment