-
-
Save ahmetaa/a0000a2a06fc483fd9ea to your computer and use it in GitHub Desktop.
avx
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#include <stdio.h> | |
#include <stdlib.h> | |
#include <string.h> | |
#include <time.h> | |
#include <immintrin.h> | |
// gcc -Wall -O3 -march=native -mtune=native -mavx avx_simple.c -o avx | |
void check_alloc(int alloc_result); | |
void initialize(int amount); | |
void initialize_gmm(int index, int gauss_count, float *mixture_values); | |
void initialize_gaussian(int gmm_index, int gauss_index, int dimension, float *means, float *presicions, float c); | |
void print_gmm(int index); | |
void print_floats(float *floats, int amount); | |
void print_gaussian(int gmm_index, int gauss_index); | |
__m256 * convert_input(float *input, int dimension); | |
/* Represents a diagonal gaussian. However it only contains enough information to | |
calculate log likelihoods. presicions contain -0.5/variance values. */ | |
typedef struct gaussian{ | |
__m256 *means; | |
__m256 *presicions; | |
float c; | |
int dimension; | |
int aligned_size; | |
} gaussian; | |
/* Represents a gauss mixture model. mixture weights are actually log values. */ | |
typedef struct gmm{ | |
float *mixture_weights; | |
gaussian *gaussians; | |
int count; | |
} gmm; | |
gmm *gmms; | |
int gmm_count; | |
/* Allocates for `amount` of gmms. */ | |
void initialize(int amount) { | |
gmm_count = amount; | |
check_alloc(posix_memalign((void*) &gmms, 32, sizeof(gmm) * amount)); | |
} | |
/* Allocates 'gauss_count' amount of gaussian for gmm[index] with mixture values. */ | |
void initialize_gmm(int index, int gauss_count, float *mixture_values) { | |
gmm *g = &gmms[index]; | |
g->count = gauss_count; | |
check_alloc(posix_memalign((void*)&g->mixture_weights, 32, sizeof(float) * gauss_count)); | |
check_alloc(posix_memalign((void*)&g->gaussians, 32, sizeof(gaussian) * gauss_count)); | |
memcpy(g->mixture_weights, mixture_values, sizeof(float)*gauss_count); | |
} | |
void initialize_gaussian(int gmm_index, int gauss_index, int dimension, float *means, float *presicions, float c) { | |
gmm *g = &gmms[gmm_index]; | |
gaussian *gauss = &g->gaussians[gauss_index]; | |
int k = dimension/8; | |
gauss->dimension = dimension; | |
gauss->aligned_size = k; | |
check_alloc(posix_memalign((void*)&gauss->means, 32, sizeof(__m256) * k)); | |
check_alloc(posix_memalign((void*)&gauss->presicions, 32, sizeof(__m256) * k)); | |
int i; | |
for(i = 0; i< k; ++i) { | |
float temp_m[8] __attribute((aligned(32))); | |
memcpy(&temp_m, means, 32); | |
gauss->means[i] = _mm256_load_ps(temp_m); | |
float temp_p[8] __attribute((aligned(32))); | |
memcpy(&temp_p, presicions, 32); | |
gauss->presicions[i] = _mm256_load_ps(temp_p); | |
means+=8; | |
presicions+=8; | |
} | |
} | |
static inline float __reduce_add_ps(__m256 x){ | |
const __m128 x128 = _mm_add_ps(_mm256_extractf128_ps(x, 1), _mm256_castps256_ps128(x)); | |
const __m128 x64 = _mm_add_ps(x128, _mm_movehl_ps(x128, x128)); | |
const __m128 x32 = _mm_add_ss(x64, _mm_shuffle_ps(x64, x64, 0x55)); | |
return _mm_cvtss_f32(x32); | |
} | |
static inline float log_likelihood(gaussian *gauss, __m256 *input) { | |
int i; | |
float result = 0.0f; | |
for(i = 0; i<gauss->aligned_size; i++) { | |
const __m256 dif = _mm256_sub_ps(input[i], gauss->means[i]); | |
const __m256 square = _mm256_mul_ps(dif, dif); | |
const __m256 mul_presicion = _mm256_mul_ps(square, gauss->presicions[i]); | |
result+=__reduce_add_ps(mul_presicion); | |
} | |
return result + gauss->c; | |
} | |
float score_gmm (int gmm_index, __m256 *input) { | |
gmm *g = &gmms[gmm_index]; | |
float score = 0.0f; | |
int k=0; | |
for(k = 0; k < g->count; k++) { | |
float likelihood = log_likelihood( &g->gaussians[k], input); | |
float weighted = likelihood + g->mixture_weights[k]; | |
score += weighted; | |
} | |
return score; | |
} | |
void print_gmm(int index) { | |
printf("gmm index = %d\n", index); | |
gmm g = gmms[index]; | |
printf("gauss count = %d\n", g.count); | |
printf("mixture weights = "); | |
print_floats(g.mixture_weights, g.count); | |
printf("\n"); | |
} | |
void print_gaussian(int gmm_index, int gauss_index) { | |
gmm *g = &gmms[gmm_index]; | |
gaussian *gauss = &g->gaussians[gauss_index]; | |
printf("gmm, gauss_index = %d,%d\n", gmm_index, gauss_index); | |
int i; | |
printf("means = "); | |
for(i = 0; i < gauss->aligned_size; ++i) { | |
float temp[8] __attribute((aligned(32))); | |
_mm256_store_ps(&temp[0], gauss->means[i]); | |
print_floats(&temp[0], 8); | |
} | |
printf("\n"); | |
printf("presicions = "); | |
for(i = 0; i < gauss->aligned_size; ++i) { | |
float temp[8] __attribute((aligned(32))); | |
_mm256_store_ps(&temp[0], gauss->presicions[i]); | |
print_floats(&temp[0], 8); | |
} | |
printf("\n"); | |
} | |
__m256 * convert_input(float *input, int dimension) { | |
int k = dimension/8; | |
__m256 *result; | |
check_alloc(posix_memalign((void*)&result, 32, sizeof(__m256) * k)); | |
int i; | |
for(i = 0; i < k; ++i) { | |
float temp[8] __attribute((aligned(32))); | |
memcpy(&temp, input, 32); | |
result[i] = _mm256_load_ps(temp); | |
} | |
return result; | |
} | |
void print_floats(float *floats, int amount) { | |
int i; | |
printf("["); | |
for(i = 0; i < amount ; ++i) { | |
printf("%.3f", floats[i]); | |
if(i < amount-1) { | |
printf(" "); | |
} | |
} | |
printf("]"); | |
} | |
void check_alloc(int i) { | |
if(i!=0) { | |
printf("Allocation failure %d\n", i); | |
exit(i); | |
} | |
} | |
void test_simd256() { | |
int gmm_count = 1000; | |
int dimension = 40; | |
int gauss_count = 16; | |
int input_amount = 1000; | |
initialize(gmm_count); | |
printf("gmms allocated\n"); | |
// prepare gmms | |
float mi = 0.0033f; | |
float mi_start = -1.43f; | |
int i,k,z; | |
for(i = 0; i < gmm_count; ++i) { | |
float *mixtures = malloc(sizeof(float) * gauss_count); | |
for(k = 0; k < gauss_count; ++k) { | |
mixtures[k] = mi_start + (float)k * mi; | |
} | |
mi_start+=mi; | |
initialize_gmm(i, gauss_count, mixtures); | |
} | |
// prepare gaussians | |
float ma = 0.0011f; | |
float ma_start = -0.75f; | |
float pa = 0.0025f; | |
float pa_start = -0.33f; | |
for(i = 0; i < gmm_count; ++i) { | |
for(k = 0; k < gauss_count; ++k) { | |
float *means = malloc(sizeof(float) * dimension); | |
float *presicions = malloc(sizeof(float) * dimension); | |
for(z = 0; z < dimension ; ++z) { | |
means[z] = ma_start + (float)z * ma; | |
presicions[z] = pa_start + (float)z * pa; | |
} | |
ma_start+=ma; | |
pa_start+=pa; | |
initialize_gaussian(i, k, dimension, means, presicions, 0.3f); | |
} | |
} | |
printf("Gausses initialized."); | |
// Prepare input | |
float **input = malloc(sizeof(float*) * input_amount); | |
for(i=0; i<input_amount; ++i) { | |
input[i] = malloc(sizeof(float) * dimension); | |
} | |
float ia = 0.0011f; | |
float ia_start = -0.75f; | |
for(i = 0; i < input_amount; ++i) { | |
for(k = 0; k < dimension; ++k) { | |
input[i][k] = ia_start + (float)k * ia; | |
} | |
ia_start+=ia; | |
} | |
// run test | |
clock_t start = clock(), diff; | |
float result = 0.0f; | |
for(i = 0; i < input_amount; ++i) { | |
__m256 *i256 = convert_input(&input[i][0], dimension); | |
for(k = 0; k < gmm_count; ++k) { | |
result+= score_gmm(k, i256); | |
} | |
} | |
diff = clock() - start; | |
int msec = diff * 1000 / CLOCKS_PER_SEC; | |
printf("result = %f", result); | |
printf("Time taken %d seconds %d milliseconds\n", msec/1000, msec%1000); | |
} | |
int main( int argc, char *argv[] ) | |
{ | |
test_simd256(); | |
return 0; | |
} | |
void simple_test() { | |
// initialize gmms with count. | |
initialize(2); | |
printf("initialized\n"); | |
// mixture values. | |
float mixture1[] = {-0.5, -0.3}; | |
float mixture2[] = {-0.2}; | |
initialize_gmm(0, 2, mixture1); | |
initialize_gmm(1, 1, mixture2); | |
print_gmm(0); | |
print_gmm(1); | |
float means[] = {0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9, 1.0, 1.1, 1.2, 1.3, 1.4, 1.5, 1.6}; | |
float presicions[] = {0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9, 1.0, 1.1, 1.2, 1.3, 1.4, 1.5, 1.6}; | |
initialize_gaussian(0, 0, 16, means, presicions, 0.4f); | |
print_gaussian(0, 0); | |
float input[] = {0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9, 1.0, 1.1, 1.2, 1.3, 1.4, 1.5, 1.6}; | |
__m256 *i256 = convert_input(&input[0], 16); | |
gmm *g = &gmms[0]; | |
gaussian *gauss = &g->gaussians[0]; | |
float l = log_likelihood(gauss, i256); | |
printf("likelihood=%.3f\n", l); | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment