Last active
January 13, 2018 16:55
-
-
Save 14roiron/da701502dcaa3e4b7de542951cbbdf03 to your computer and use it in GitHub Desktop.
code vect
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#include <stdio.h> | |
#include <stdlib.h> | |
#include <math.h> | |
#include <immintrin.h> | |
#include <sys/time.h> // for timing | |
#include <pthread.h> | |
#define VEC_SIZE 2147483647 | |
double t1,t2; | |
float vecteurBase[VEC_SIZE] __attribute__((aligned(32))); | |
__m256 array; | |
struct thread_data{ | |
unsigned int thread_id; | |
float *U; | |
int n; | |
int k; | |
float a; | |
float result; | |
}; | |
double now(){ | |
// Retourne l'heure actuelle en secondes | |
struct timeval t; double f_t; | |
gettimeofday(&t, NULL); | |
f_t = t.tv_usec; f_t = f_t/1000000.0; f_t +=t.tv_sec; | |
return f_t; | |
} | |
void initVect(float *vecteurBase) | |
{ | |
srand(time(0)); | |
for(int i=0;i<VEC_SIZE;i++) | |
{ | |
vecteurBase[i] = (float) i;//rand(); | |
} | |
return; | |
} | |
float* copyVec(float *vecteurBase) | |
{ | |
float* copy; | |
copy = (float*) _mm_malloc(sizeof(float)*VEC_SIZE,32); | |
for(int i =0;i<VEC_SIZE;i++) | |
{ | |
copy[i]=vecteurBase[i]; | |
} | |
return copy; | |
} | |
//first naive implementation of the function, double loop | |
float gm(float *U, float a, int k, int n){ | |
float result,p; | |
result=0; | |
for(int i=0;i<n;i++){ | |
p=1; | |
for(int j=0;j<k;j++) | |
p*=U[i]-a; //do the exponentiation | |
result+=p;//sum all the steps | |
} | |
return result; | |
} | |
/*second implementation of the function*/ | |
/*float gm2(float *U, float a, int k, int n){*/ | |
/*float result;*/ | |
/*result=0;*/ | |
/*for(int i=0;i<n;i++){*/ | |
/*result+=(float) exp(k* log((double)(U[i]-a)));*/ | |
/*}*/ | |
/*return result;*/ | |
/*}*/ | |
/** | |
* doing the vector function | |
*/ | |
float gm_vec(float* vec,float a, int k, int n){ | |
float result[8] __attribute__((aligned(32))); | |
float *rest; | |
int sizeMod8 = n / 8; //we are going to loop around sizeMod8, and then we are going to finish | |
__m256 vector, vectA,accumulator,temp; | |
const vectA = _mm256_set1_ps(a);//populate vectA with 8 times a to do the vector sub | |
accumulator = _mm256_setzero_ps(); | |
for(int i=0;i<sizeMod8;i++){ | |
vector = _mm256_load_ps(&(vec[8*i])); // we load 8 floats, starting from the 8 * k | |
vector = _mm256_sub_ps(vector,vectA); | |
temp = _mm256_set1_ps(1.); //init to 1 | |
for(int j=0;j<k;j++) | |
{ | |
temp = _mm256_mul_ps(temp,vector); | |
} | |
accumulator = _mm256_add_ps(accumulator,temp); | |
} | |
accumulator = _mm256_hadd_ps(accumulator,accumulator); | |
accumulator = _mm256_hadd_ps(accumulator,accumulator); | |
accumulator = _mm256_hadd_ps(accumulator,accumulator); | |
//after three times all the cells equals to the sum of the vector, we wan to extract the first one | |
_mm256_store_ps(result,accumulator); | |
rest = (&(vec[sizeMod8*8])); // the modulo 8 array we can't vectorize | |
result[0] = result[0] + gm(rest,a,k,n-sizeMod8*8);//we add the end that we can't vectorize, using previous function | |
return result[0]; | |
} | |
float variance(float* U, int n, int mode) | |
{ | |
float mean; | |
if(mode) | |
{ | |
mean = gm_vec(U,0.,1,n)/(float) n; | |
return gm_vec(U,mean,2,n)/(float) n; | |
} | |
else{ | |
mean = gm(U,0.,1,n)/(float) n; | |
return gm(U,mean,2,n)/(float) n; | |
} | |
} | |
/*-------------------------------------------------------------*/ | |
/* This is the thread version of the selected code portion */ | |
/*-------------------------------------------------------------*/ | |
void *thread_function(void *threadarg){ | |
/* Local variables */ | |
int s; | |
/* Association between shared variables and their correspondances */ | |
struct thread_data *thread_pointer_data; | |
thread_pointer_data = (struct thread_data *)threadarg; | |
thread_pointer_data->result = | |
gm_vec(thread_pointer_data->U,thread_pointer_data->a,thread_pointer_data->k,thread_pointer_data->n); | |
//send the result of the vect function to the structure back | |
pthread_exit(NULL); | |
return 0; | |
} | |
float create_thread(float* U, float a,int k,int n, int nb_threads) | |
{ | |
float result=0; | |
int i; | |
int split_size = n/nb_threads; | |
struct thread_data *threads_data = malloc(nb_threads*sizeof(struct thread_data)); | |
pthread_t *(thread_ptr) = malloc(nb_threads*sizeof(pthread_t *)); | |
//create pointers for threads and data | |
//we split the datas before launching each thread | |
for(i=0;i<nb_threads-1;i++){ // last thread is different | |
threads_data[i].thread_id = i; | |
threads_data[i].n = split_size; | |
threads_data[i].U = &(U[i*split_size]); | |
threads_data[i].result=0; | |
threads_data[i].a = a; | |
threads_data[i].k = k; | |
pthread_create(&(thread_ptr[i]), NULL, thread_function, (void *) &threads_data[i]); | |
} | |
//last thread | |
i=nb_threads - 1; | |
threads_data[i].thread_id = i; | |
threads_data[i].n = n - i*split_size; | |
threads_data[i].U = &(U[i*split_size]); | |
threads_data[i].result=0; | |
threads_data[i].a = a; | |
threads_data[i].k = k; | |
pthread_create(&thread_ptr[i], NULL, thread_function, (void *) &threads_data[i]); | |
/* Wait for every thread to complete */ | |
for(i=0;i<nb_threads;i++) pthread_join(thread_ptr[i], NULL); | |
for(i=0;i<nb_threads;i++) result += threads_data[i].result;//add all the results | |
return result/(float) n; | |
} | |
float variance_thread(float* U, int n, int nb_threads) | |
{ | |
float mean; | |
mean = create_thread(U,0.,1,n,nb_threads); | |
return create_thread(U,mean,2,n,nb_threads); | |
} | |
int main (int argc, char *argv[]){ | |
double t1,t2,scalaire,vect,thread; | |
float result,a; | |
int k, nb_threads; | |
initVect(vecteurBase); | |
if(argc > 1) | |
nb_threads = atoi(argv[1]); | |
else | |
nb_threads = 2; | |
printf("Calcul scalaire\n"); | |
t1=now(); | |
result = variance(vecteurBase,VEC_SIZE,0); | |
t2=now(); | |
scalaire = t2-t1; | |
printf("Resultat: %.3e, time: %.4e\n", result,t2-t1); | |
printf ("\n ---------- \n"); | |
printf("Calcul vectoriel\n"); | |
t1=now(); | |
result = variance(vecteurBase,VEC_SIZE,1); | |
t2=now(); | |
vect=t2-t1; | |
printf("Resultat: %.3e, time: %.4e\n", result,t2-t1); | |
//*/ | |
// | |
printf ("\n ---------- \n"); | |
printf("Calcul Vectoriel Multi-Thread\n"); | |
printf("Nombre de threads: %i\n", nb_threads); | |
t1=now(); | |
result = variance_thread(vecteurBase,VEC_SIZE,nb_threads); | |
t2=now(); | |
thread = t2-t1; | |
printf("Resultat: %.3e, time: %.4e\n", result,t2-t1); | |
//*/ | |
printf ("\n ---------- \n"); | |
printf ("Acceleration Vectoriel: %f %%\n", (scalaire-vect)/vect*100); | |
printf ("Acceleration Thread : %f %%\n", (vect-thread)/thread*100); | |
printf ("Acceleration Thread Vectoriel: %f %%\n", (scalaire-thread)/thread*100); | |
printf ("\n ---------- \n"); | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment