Skip to content

Instantly share code, notes, and snippets.

@zzt93
Created June 4, 2016 08:03
Show Gist options
  • Save zzt93/57622ee1f1060e15736c8b28ed243580 to your computer and use it in GitHub Desktop.
Save zzt93/57622ee1f1060e15736c8b28ed243580 to your computer and use it in GitHub Desktop.
Code sample to learn pthread and performance tuning
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <math.h>
#include "prime.h"
void prime(long long n) {
// too large n cause stack overflow
size_t mem = sizeof(int)*(n + 1);
int *odd = malloc(mem);
memset(odd, MAY_BE_PRIME, mem);
int size = sqrt(n) + 1;
int i, j;
for (i = 2; i < size; i++) {
if (odd[i] == MAY_BE_PRIME) {
for (j = i * i; j < n; j+=i) {
odd[j] = NOT_PRIME;
}
}
}
//output(odd, n);
}
//int main(int argc, char *argv[]) {
// if (argc <= 1) {
// puts("Usage: prime N(upper bounds)");
// return 0;
// }
//
// long long n = atoll(argv[1]);
// prime(n);
// return 0;
/
#ifndef __PRIME_H__
#define __PRIME_H__
#define NOT_PRIME 1
#define MAY_BE_PRIME 0
#define START 2
#define START_OPT 3
typedef struct {
int *has;
int start;
int num_cpus;
} arg_type;
static
void output(int *has, int n) {
int k;
for (k = 2; k < n; k++) {
if (has[k] == MAY_BE_PRIME) {
printf("%d ", k);
}
}
puts("");
}
#define false 0
#define true 1
// can't (void *)(*)(void *)?
typedef void *(*Thread_Funcition)(void *);
#endif /* __PRIME_H__ */
#include <stdio.h>
#include <pthread.h>
#include <stdlib.h>
#include <unistd.h>
#include <string.h>
#include <assert.h>
#include <math.h>
#include "prime.h"
int size;
long long n;
int num_cpus;
static inline
int num_to_index(int num) {
//assert((num & 1) == 1);
return (num - 3) / 2;
}
static inline
int index_to_num(int i) {
return i * 2 + 3;
}
void special_output(int *has, int n) {
int k;
printf("2 ");
for (k = 0; k < n; k++) {
if (has[k] == MAY_BE_PRIME) {
printf("%d ", index_to_num(k));
}
}
puts("");
}
void *find_prime_unrolling(void *arg) {
arg_type *tmp = (arg_type *)arg;
int start = tmp->start;
int gap = start - START;
int *has = tmp->has;
int i, j;
for (i = START_OPT; i < size; i+=2) {
if (has[num_to_index(i)] == MAY_BE_PRIME) {
for (j = i * i + gap * i * 2; j < n; j = j + i * num_cpus * 2) {
//printf("d%d ", j);
has[num_to_index(j)] = NOT_PRIME;
j += 2 * num_cpus * i;
if (j < n) {
has[num_to_index(j)] = NOT_PRIME;
j += 2 * num_cpus * i;
if (j < n) {
has[num_to_index(j)] = NOT_PRIME;
j += 2 * num_cpus * i;
if (j < n) {
has[num_to_index(j)] = NOT_PRIME;
}
}
}
}
}
}
return NULL;
}
void *find_prime_opt(void *arg) {
arg_type *tmp = (arg_type *)arg;
int start = tmp->start;
int gap = start - START;
int *has = tmp->has;
int i = START_OPT, j;
int has_i = num_to_index(i);
for (i = START_OPT; i < size; i+=2) {
if (has[has_i++] == MAY_BE_PRIME) {
for (j = i * i + gap * i * 2; j < n; j = j + i * num_cpus * 2) {
//printf("d%d ", j);
has[num_to_index(j)] = NOT_PRIME;
}
}
}
return NULL;
}
void prime_opt(long long num) {
n = num;
size = sqrt(n) + 1;
num_cpus = sysconf(_SC_NPROCESSORS_ONLN);
// store only odd number >= 3
int count = (n - 2) / 2 + (n & 1);
int res;
size_t mem = sizeof(int)*count;
int *odd = malloc(mem);
memset(odd, MAY_BE_PRIME, mem);
int i;
pthread_t a_thread[num_cpus];
arg_type *arg = calloc(num_cpus, sizeof(arg_type));
Thread_Funcition thread_f = find_prime_opt;
for (i = 1; i < num_cpus; i++) {
arg[i].has = odd;
arg[i].start = i + START;
// argument will be sent to thread directly,
// so use malloc or static
res = pthread_create(&a_thread[i], NULL, thread_f, arg+i);
if (res != 0) {
perror("");
exit(EXIT_FAILURE);
}
}
i = 0;
arg[i].has = odd;
arg[i].start = i + START;
thread_f(arg+i);
for (i = 1; i < num_cpus; i++) {
pthread_join(a_thread[i], NULL);
}
//special_output(odd, count);
}
// int main(int argc, char *argv[]){
// if (argc <= 1) {
// puts("Usage: prime N(upper bounds)");
// return 0;
// }
// prime_opt(atoll(argv[1]));
// return 0;
// }
#include <stdio.h>
#include <pthread.h>
#include <stdlib.h>
#include <unistd.h>
#include <string.h>
#include <math.h>
#include "prime.h"
int size;
long long n;
// following two version has no synchronization, which
// not affect the correctness, but will make some duplicate check
// whether the lock will increase the throughput need to compare
// with find_prime_locked();
void *find_prime(void *arg) {
arg_type *tmp = (arg_type *)arg;
int start = tmp->start;
int num_cpus = tmp->num_cpus;
int *has = tmp->has;
int i, j;
//printf("s%d ", start);
for (i = start; i < size; i+=num_cpus) {
if (has[i] == MAY_BE_PRIME) {
for (j = i * i; j < n; j+=i) {
//printf("d%d ", j);
has[j] = NOT_PRIME;
}
}
}
return NULL;
}
void *find_prime2(void *arg) {
arg_type *tmp = (arg_type *)arg;
int start = tmp->start;
int gap = start - START;
int num_cpus = tmp->num_cpus;
int *has = tmp->has;
int i, j;
for (i = START; i < size; i++) {
if (has[i] == MAY_BE_PRIME) {
for (j = i * i + gap * i; j < n; j = j + i * num_cpus) {
//printf("d%d ", j);
has[j] = NOT_PRIME;
}
}
}
return NULL;
}
pthread_mutex_t work_mutex =
PTHREAD_MUTEX_INITIALIZER;
pthread_cond_t cond =
PTHREAD_COND_INITIALIZER;
long long reach_count = 0;
void wait_or_notify(int num_cpus) {
/**
Failed trial to simulate cyclic barrier:
if the fourth thread run immediately to
reach here again, other thread will not wake,
cause deadlock
pthread_mutex_lock(&work_mutex);
reach_count++;
if (reach_count != num_cpus) {
while (reach_count != num_cpus || reach_count != 0) {
pthread_cond_wait(&cond, &work_mutex);
}
} else {
pthread_cond_broadcast(&cond);
reach_count = 0;
}
pthread_mutex_unlock(&work_mutex);
*/
pthread_mutex_lock(&work_mutex);
reach_count++;
if (reach_count != num_cpus) {
if (reach_count != num_cpus || reach_count != 0) {
pthread_cond_wait(&cond, &work_mutex);
}
} else {
pthread_cond_broadcast(&cond);
reach_count = 0;
}
pthread_mutex_unlock(&work_mutex);
}
void *find_prime_locked(void *arg) {
arg_type *tmp = (arg_type *)arg;
int gap = tmp->start - START;
int num_cpus = tmp->num_cpus;
int *has = tmp->has;
int i, j;
for (i = START; i < size; i++) {
if (has[i] == MAY_BE_PRIME) {
for (j = i * i + gap * i; j < n; j = j + i * num_cpus) {
//printf("d%d ", j);
has[j] = NOT_PRIME;
}
wait_or_notify(num_cpus);
}
}
return NULL;
}
int is_prime(int aim) {
if ((aim & 1) == 0) {
return false;
}
int i;
int limit = sqrt(aim) + 1;
for (i = 3; i < limit; i+=2) {
if (aim % i == 0) {
return false;
}
}
return true;
}
void *another_algo(void *arg) {
arg_type *tmp = (arg_type *)arg;
int start = tmp->start;
int num_cpus = tmp->num_cpus;
int *has = tmp->has;
int i;
for (i = start + 2; i < n; i+=num_cpus) {
if (!is_prime(i)) {
has[i] = NOT_PRIME;
}
}
return NULL;
}
void prime_pthread() {
int res;
size_t mem = sizeof(int)*(n + 1);
int *has = malloc(mem);
memset(has, MAY_BE_PRIME, mem);
int num_cpus = sysconf(_SC_NPROCESSORS_ONLN);
int i;
pthread_t a_thread[num_cpus];
arg_type *arg = calloc(num_cpus, sizeof(arg_type));
Thread_Funcition thread_f = find_prime_locked;
for (i = 1; i < num_cpus; i++) {
arg[i].has = has;
arg[i].num_cpus = num_cpus;
arg[i].start = i + START;
// argument will be sent to thread directly,
// so use malloc or static
res = pthread_create(&a_thread[i], NULL, thread_f, arg+i);
if (res != 0) {
perror("");
exit(EXIT_FAILURE);
}
}
i = 0;
arg[i].has = has;
arg[i].num_cpus = num_cpus;
arg[i].start = i + START;
thread_f(arg+i);
for (i = 1; i < num_cpus; i++) {
pthread_join(a_thread[i], NULL);
}
//output(has, n);
}
void set_argu_prime(long long num) {
n = num;
size = sqrt(n) + 1;
prime_pthread();
}
#include <stdio.h>
#include <time.h>
#include <sys/time.h>
#include <unistd.h>
suseconds_t getms(struct timeval *s, struct timeval *e) {
return (e->tv_sec - s->tv_sec) * 1000000 + e->tv_usec - s->tv_usec;
}
void prime(long long);
void set_argu_prime(long long n);
void prime_opt(long long num);
int main(int argc, char *argv[]) {
int res;
long long n;
int times = 10;
int i;
// warm up
prime(1000);
set_argu_prime(1000);
prime_opt(1000);
printf("simple : multiple : opt -- ");
while((res = scanf("%lld", &n)) != EOF) {
//clock_t start = clock();
struct timeval start, end;
gettimeofday(&start, NULL);
for (i = 0; i < times; i++) {
prime(n);
}
gettimeofday(&end, NULL);
printf("%ld ", getms(&start, &end));
sleep(1);
//start = clock();
gettimeofday(&start, NULL);
for (i = 0; i < times; i++) {
set_argu_prime(n);
}
gettimeofday(&end, NULL);
printf("\t%ld ", getms(&start, &end));
sleep(1);
gettimeofday(&start, NULL);
for (i = 0; i < times; i++) {
prime_opt(n);
}
gettimeofday(&end, NULL);
printf("\t%ld ", getms(&start, &end));
printf("\nsimple : multiple : opt -- ");
}
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment