Skip to content

Instantly share code, notes, and snippets.

@14roiron
Last active January 13, 2018 16:55
Show Gist options
  • Save 14roiron/da701502dcaa3e4b7de542951cbbdf03 to your computer and use it in GitHub Desktop.
Save 14roiron/da701502dcaa3e4b7de542951cbbdf03 to your computer and use it in GitHub Desktop.
code vect
#include <stdio.h>
#include <stdlib.h>
#include <math.h>
#include <immintrin.h>
#include <sys/time.h> // for timing
#include <pthread.h>
#define VEC_SIZE 2147483647
double t1,t2;
float vecteurBase[VEC_SIZE] __attribute__((aligned(32)));
__m256 array;
struct thread_data{
unsigned int thread_id;
float *U;
int n;
int k;
float a;
float result;
};
double now(){
// Retourne l'heure actuelle en secondes
struct timeval t; double f_t;
gettimeofday(&t, NULL);
f_t = t.tv_usec; f_t = f_t/1000000.0; f_t +=t.tv_sec;
return f_t;
}
void initVect(float *vecteurBase)
{
srand(time(0));
for(int i=0;i<VEC_SIZE;i++)
{
vecteurBase[i] = (float) i;//rand();
}
return;
}
float* copyVec(float *vecteurBase)
{
float* copy;
copy = (float*) _mm_malloc(sizeof(float)*VEC_SIZE,32);
for(int i =0;i<VEC_SIZE;i++)
{
copy[i]=vecteurBase[i];
}
return copy;
}
//first naive implementation of the function, double loop
float gm(float *U, float a, int k, int n){
float result,p;
result=0;
for(int i=0;i<n;i++){
p=1;
for(int j=0;j<k;j++)
p*=U[i]-a; //do the exponentiation
result+=p;//sum all the steps
}
return result;
}
/*second implementation of the function*/
/*float gm2(float *U, float a, int k, int n){*/
/*float result;*/
/*result=0;*/
/*for(int i=0;i<n;i++){*/
/*result+=(float) exp(k* log((double)(U[i]-a)));*/
/*}*/
/*return result;*/
/*}*/
/**
* doing the vector function
*/
float gm_vec(float* vec,float a, int k, int n){
float result[8] __attribute__((aligned(32)));
float *rest;
int sizeMod8 = n / 8; //we are going to loop around sizeMod8, and then we are going to finish
__m256 vector, vectA,accumulator,temp;
const vectA = _mm256_set1_ps(a);//populate vectA with 8 times a to do the vector sub
accumulator = _mm256_setzero_ps();
for(int i=0;i<sizeMod8;i++){
vector = _mm256_load_ps(&(vec[8*i])); // we load 8 floats, starting from the 8 * k
vector = _mm256_sub_ps(vector,vectA);
temp = _mm256_set1_ps(1.); //init to 1
for(int j=0;j<k;j++)
{
temp = _mm256_mul_ps(temp,vector);
}
accumulator = _mm256_add_ps(accumulator,temp);
}
accumulator = _mm256_hadd_ps(accumulator,accumulator);
accumulator = _mm256_hadd_ps(accumulator,accumulator);
accumulator = _mm256_hadd_ps(accumulator,accumulator);
//after three times all the cells equals to the sum of the vector, we wan to extract the first one
_mm256_store_ps(result,accumulator);
rest = (&(vec[sizeMod8*8])); // the modulo 8 array we can't vectorize
result[0] = result[0] + gm(rest,a,k,n-sizeMod8*8);//we add the end that we can't vectorize, using previous function
return result[0];
}
float variance(float* U, int n, int mode)
{
float mean;
if(mode)
{
mean = gm_vec(U,0.,1,n)/(float) n;
return gm_vec(U,mean,2,n)/(float) n;
}
else{
mean = gm(U,0.,1,n)/(float) n;
return gm(U,mean,2,n)/(float) n;
}
}
/*-------------------------------------------------------------*/
/* This is the thread version of the selected code portion */
/*-------------------------------------------------------------*/
void *thread_function(void *threadarg){
/* Local variables */
int s;
/* Association between shared variables and their correspondances */
struct thread_data *thread_pointer_data;
thread_pointer_data = (struct thread_data *)threadarg;
thread_pointer_data->result =
gm_vec(thread_pointer_data->U,thread_pointer_data->a,thread_pointer_data->k,thread_pointer_data->n);
//send the result of the vect function to the structure back
pthread_exit(NULL);
return 0;
}
float create_thread(float* U, float a,int k,int n, int nb_threads)
{
float result=0;
int i;
int split_size = n/nb_threads;
struct thread_data *threads_data = malloc(nb_threads*sizeof(struct thread_data));
pthread_t *(thread_ptr) = malloc(nb_threads*sizeof(pthread_t *));
//create pointers for threads and data
//we split the datas before launching each thread
for(i=0;i<nb_threads-1;i++){ // last thread is different
threads_data[i].thread_id = i;
threads_data[i].n = split_size;
threads_data[i].U = &(U[i*split_size]);
threads_data[i].result=0;
threads_data[i].a = a;
threads_data[i].k = k;
pthread_create(&(thread_ptr[i]), NULL, thread_function, (void *) &threads_data[i]);
}
//last thread
i=nb_threads - 1;
threads_data[i].thread_id = i;
threads_data[i].n = n - i*split_size;
threads_data[i].U = &(U[i*split_size]);
threads_data[i].result=0;
threads_data[i].a = a;
threads_data[i].k = k;
pthread_create(&thread_ptr[i], NULL, thread_function, (void *) &threads_data[i]);
/* Wait for every thread to complete */
for(i=0;i<nb_threads;i++) pthread_join(thread_ptr[i], NULL);
for(i=0;i<nb_threads;i++) result += threads_data[i].result;//add all the results
return result/(float) n;
}
float variance_thread(float* U, int n, int nb_threads)
{
float mean;
mean = create_thread(U,0.,1,n,nb_threads);
return create_thread(U,mean,2,n,nb_threads);
}
int main (int argc, char *argv[]){
double t1,t2,scalaire,vect,thread;
float result,a;
int k, nb_threads;
initVect(vecteurBase);
if(argc > 1)
nb_threads = atoi(argv[1]);
else
nb_threads = 2;
printf("Calcul scalaire\n");
t1=now();
result = variance(vecteurBase,VEC_SIZE,0);
t2=now();
scalaire = t2-t1;
printf("Resultat: %.3e, time: %.4e\n", result,t2-t1);
printf ("\n ---------- \n");
printf("Calcul vectoriel\n");
t1=now();
result = variance(vecteurBase,VEC_SIZE,1);
t2=now();
vect=t2-t1;
printf("Resultat: %.3e, time: %.4e\n", result,t2-t1);
//*/
//
printf ("\n ---------- \n");
printf("Calcul Vectoriel Multi-Thread\n");
printf("Nombre de threads: %i\n", nb_threads);
t1=now();
result = variance_thread(vecteurBase,VEC_SIZE,nb_threads);
t2=now();
thread = t2-t1;
printf("Resultat: %.3e, time: %.4e\n", result,t2-t1);
//*/
printf ("\n ---------- \n");
printf ("Acceleration Vectoriel: %f %%\n", (scalaire-vect)/vect*100);
printf ("Acceleration Thread : %f %%\n", (vect-thread)/thread*100);
printf ("Acceleration Thread Vectoriel: %f %%\n", (scalaire-thread)/thread*100);
printf ("\n ---------- \n");
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment