Created
June 4, 2016 08:03
-
-
Save zzt93/57622ee1f1060e15736c8b28ed243580 to your computer and use it in GitHub Desktop.
Code sample to learn pthread and performance tuning
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#include <stdio.h> | |
#include <stdlib.h> | |
#include <string.h> | |
#include <math.h> | |
#include "prime.h" | |
void prime(long long n) { | |
// too large n cause stack overflow | |
size_t mem = sizeof(int)*(n + 1); | |
int *odd = malloc(mem); | |
memset(odd, MAY_BE_PRIME, mem); | |
int size = sqrt(n) + 1; | |
int i, j; | |
for (i = 2; i < size; i++) { | |
if (odd[i] == MAY_BE_PRIME) { | |
for (j = i * i; j < n; j+=i) { | |
odd[j] = NOT_PRIME; | |
} | |
} | |
} | |
//output(odd, n); | |
} | |
//int main(int argc, char *argv[]) { | |
// if (argc <= 1) { | |
// puts("Usage: prime N(upper bounds)"); | |
// return 0; | |
// } | |
// | |
// long long n = atoll(argv[1]); | |
// prime(n); | |
// return 0; | |
/ |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#ifndef __PRIME_H__ | |
#define __PRIME_H__ | |
#define NOT_PRIME 1 | |
#define MAY_BE_PRIME 0 | |
#define START 2 | |
#define START_OPT 3 | |
typedef struct { | |
int *has; | |
int start; | |
int num_cpus; | |
} arg_type; | |
static | |
void output(int *has, int n) { | |
int k; | |
for (k = 2; k < n; k++) { | |
if (has[k] == MAY_BE_PRIME) { | |
printf("%d ", k); | |
} | |
} | |
puts(""); | |
} | |
#define false 0 | |
#define true 1 | |
// can't (void *)(*)(void *)? | |
typedef void *(*Thread_Funcition)(void *); | |
#endif /* __PRIME_H__ */ |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#include <stdio.h> | |
#include <pthread.h> | |
#include <stdlib.h> | |
#include <unistd.h> | |
#include <string.h> | |
#include <assert.h> | |
#include <math.h> | |
#include "prime.h" | |
int size; | |
long long n; | |
int num_cpus; | |
static inline | |
int num_to_index(int num) { | |
//assert((num & 1) == 1); | |
return (num - 3) / 2; | |
} | |
static inline | |
int index_to_num(int i) { | |
return i * 2 + 3; | |
} | |
void special_output(int *has, int n) { | |
int k; | |
printf("2 "); | |
for (k = 0; k < n; k++) { | |
if (has[k] == MAY_BE_PRIME) { | |
printf("%d ", index_to_num(k)); | |
} | |
} | |
puts(""); | |
} | |
void *find_prime_unrolling(void *arg) { | |
arg_type *tmp = (arg_type *)arg; | |
int start = tmp->start; | |
int gap = start - START; | |
int *has = tmp->has; | |
int i, j; | |
for (i = START_OPT; i < size; i+=2) { | |
if (has[num_to_index(i)] == MAY_BE_PRIME) { | |
for (j = i * i + gap * i * 2; j < n; j = j + i * num_cpus * 2) { | |
//printf("d%d ", j); | |
has[num_to_index(j)] = NOT_PRIME; | |
j += 2 * num_cpus * i; | |
if (j < n) { | |
has[num_to_index(j)] = NOT_PRIME; | |
j += 2 * num_cpus * i; | |
if (j < n) { | |
has[num_to_index(j)] = NOT_PRIME; | |
j += 2 * num_cpus * i; | |
if (j < n) { | |
has[num_to_index(j)] = NOT_PRIME; | |
} | |
} | |
} | |
} | |
} | |
} | |
return NULL; | |
} | |
void *find_prime_opt(void *arg) { | |
arg_type *tmp = (arg_type *)arg; | |
int start = tmp->start; | |
int gap = start - START; | |
int *has = tmp->has; | |
int i = START_OPT, j; | |
int has_i = num_to_index(i); | |
for (i = START_OPT; i < size; i+=2) { | |
if (has[has_i++] == MAY_BE_PRIME) { | |
for (j = i * i + gap * i * 2; j < n; j = j + i * num_cpus * 2) { | |
//printf("d%d ", j); | |
has[num_to_index(j)] = NOT_PRIME; | |
} | |
} | |
} | |
return NULL; | |
} | |
void prime_opt(long long num) { | |
n = num; | |
size = sqrt(n) + 1; | |
num_cpus = sysconf(_SC_NPROCESSORS_ONLN); | |
// store only odd number >= 3 | |
int count = (n - 2) / 2 + (n & 1); | |
int res; | |
size_t mem = sizeof(int)*count; | |
int *odd = malloc(mem); | |
memset(odd, MAY_BE_PRIME, mem); | |
int i; | |
pthread_t a_thread[num_cpus]; | |
arg_type *arg = calloc(num_cpus, sizeof(arg_type)); | |
Thread_Funcition thread_f = find_prime_opt; | |
for (i = 1; i < num_cpus; i++) { | |
arg[i].has = odd; | |
arg[i].start = i + START; | |
// argument will be sent to thread directly, | |
// so use malloc or static | |
res = pthread_create(&a_thread[i], NULL, thread_f, arg+i); | |
if (res != 0) { | |
perror(""); | |
exit(EXIT_FAILURE); | |
} | |
} | |
i = 0; | |
arg[i].has = odd; | |
arg[i].start = i + START; | |
thread_f(arg+i); | |
for (i = 1; i < num_cpus; i++) { | |
pthread_join(a_thread[i], NULL); | |
} | |
//special_output(odd, count); | |
} | |
// int main(int argc, char *argv[]){ | |
// if (argc <= 1) { | |
// puts("Usage: prime N(upper bounds)"); | |
// return 0; | |
// } | |
// prime_opt(atoll(argv[1])); | |
// return 0; | |
// } |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#include <stdio.h> | |
#include <pthread.h> | |
#include <stdlib.h> | |
#include <unistd.h> | |
#include <string.h> | |
#include <math.h> | |
#include "prime.h" | |
int size; | |
long long n; | |
// following two version has no synchronization, which | |
// not affect the correctness, but will make some duplicate check | |
// whether the lock will increase the throughput need to compare | |
// with find_prime_locked(); | |
void *find_prime(void *arg) { | |
arg_type *tmp = (arg_type *)arg; | |
int start = tmp->start; | |
int num_cpus = tmp->num_cpus; | |
int *has = tmp->has; | |
int i, j; | |
//printf("s%d ", start); | |
for (i = start; i < size; i+=num_cpus) { | |
if (has[i] == MAY_BE_PRIME) { | |
for (j = i * i; j < n; j+=i) { | |
//printf("d%d ", j); | |
has[j] = NOT_PRIME; | |
} | |
} | |
} | |
return NULL; | |
} | |
void *find_prime2(void *arg) { | |
arg_type *tmp = (arg_type *)arg; | |
int start = tmp->start; | |
int gap = start - START; | |
int num_cpus = tmp->num_cpus; | |
int *has = tmp->has; | |
int i, j; | |
for (i = START; i < size; i++) { | |
if (has[i] == MAY_BE_PRIME) { | |
for (j = i * i + gap * i; j < n; j = j + i * num_cpus) { | |
//printf("d%d ", j); | |
has[j] = NOT_PRIME; | |
} | |
} | |
} | |
return NULL; | |
} | |
pthread_mutex_t work_mutex = | |
PTHREAD_MUTEX_INITIALIZER; | |
pthread_cond_t cond = | |
PTHREAD_COND_INITIALIZER; | |
long long reach_count = 0; | |
void wait_or_notify(int num_cpus) { | |
/** | |
Failed trial to simulate cyclic barrier: | |
if the fourth thread run immediately to | |
reach here again, other thread will not wake, | |
cause deadlock | |
pthread_mutex_lock(&work_mutex); | |
reach_count++; | |
if (reach_count != num_cpus) { | |
while (reach_count != num_cpus || reach_count != 0) { | |
pthread_cond_wait(&cond, &work_mutex); | |
} | |
} else { | |
pthread_cond_broadcast(&cond); | |
reach_count = 0; | |
} | |
pthread_mutex_unlock(&work_mutex); | |
*/ | |
pthread_mutex_lock(&work_mutex); | |
reach_count++; | |
if (reach_count != num_cpus) { | |
if (reach_count != num_cpus || reach_count != 0) { | |
pthread_cond_wait(&cond, &work_mutex); | |
} | |
} else { | |
pthread_cond_broadcast(&cond); | |
reach_count = 0; | |
} | |
pthread_mutex_unlock(&work_mutex); | |
} | |
void *find_prime_locked(void *arg) { | |
arg_type *tmp = (arg_type *)arg; | |
int gap = tmp->start - START; | |
int num_cpus = tmp->num_cpus; | |
int *has = tmp->has; | |
int i, j; | |
for (i = START; i < size; i++) { | |
if (has[i] == MAY_BE_PRIME) { | |
for (j = i * i + gap * i; j < n; j = j + i * num_cpus) { | |
//printf("d%d ", j); | |
has[j] = NOT_PRIME; | |
} | |
wait_or_notify(num_cpus); | |
} | |
} | |
return NULL; | |
} | |
int is_prime(int aim) { | |
if ((aim & 1) == 0) { | |
return false; | |
} | |
int i; | |
int limit = sqrt(aim) + 1; | |
for (i = 3; i < limit; i+=2) { | |
if (aim % i == 0) { | |
return false; | |
} | |
} | |
return true; | |
} | |
void *another_algo(void *arg) { | |
arg_type *tmp = (arg_type *)arg; | |
int start = tmp->start; | |
int num_cpus = tmp->num_cpus; | |
int *has = tmp->has; | |
int i; | |
for (i = start + 2; i < n; i+=num_cpus) { | |
if (!is_prime(i)) { | |
has[i] = NOT_PRIME; | |
} | |
} | |
return NULL; | |
} | |
void prime_pthread() { | |
int res; | |
size_t mem = sizeof(int)*(n + 1); | |
int *has = malloc(mem); | |
memset(has, MAY_BE_PRIME, mem); | |
int num_cpus = sysconf(_SC_NPROCESSORS_ONLN); | |
int i; | |
pthread_t a_thread[num_cpus]; | |
arg_type *arg = calloc(num_cpus, sizeof(arg_type)); | |
Thread_Funcition thread_f = find_prime_locked; | |
for (i = 1; i < num_cpus; i++) { | |
arg[i].has = has; | |
arg[i].num_cpus = num_cpus; | |
arg[i].start = i + START; | |
// argument will be sent to thread directly, | |
// so use malloc or static | |
res = pthread_create(&a_thread[i], NULL, thread_f, arg+i); | |
if (res != 0) { | |
perror(""); | |
exit(EXIT_FAILURE); | |
} | |
} | |
i = 0; | |
arg[i].has = has; | |
arg[i].num_cpus = num_cpus; | |
arg[i].start = i + START; | |
thread_f(arg+i); | |
for (i = 1; i < num_cpus; i++) { | |
pthread_join(a_thread[i], NULL); | |
} | |
//output(has, n); | |
} | |
void set_argu_prime(long long num) { | |
n = num; | |
size = sqrt(n) + 1; | |
prime_pthread(); | |
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#include <stdio.h> | |
#include <time.h> | |
#include <sys/time.h> | |
#include <unistd.h> | |
suseconds_t getms(struct timeval *s, struct timeval *e) { | |
return (e->tv_sec - s->tv_sec) * 1000000 + e->tv_usec - s->tv_usec; | |
} | |
void prime(long long); | |
void set_argu_prime(long long n); | |
void prime_opt(long long num); | |
int main(int argc, char *argv[]) { | |
int res; | |
long long n; | |
int times = 10; | |
int i; | |
// warm up | |
prime(1000); | |
set_argu_prime(1000); | |
prime_opt(1000); | |
printf("simple : multiple : opt -- "); | |
while((res = scanf("%lld", &n)) != EOF) { | |
//clock_t start = clock(); | |
struct timeval start, end; | |
gettimeofday(&start, NULL); | |
for (i = 0; i < times; i++) { | |
prime(n); | |
} | |
gettimeofday(&end, NULL); | |
printf("%ld ", getms(&start, &end)); | |
sleep(1); | |
//start = clock(); | |
gettimeofday(&start, NULL); | |
for (i = 0; i < times; i++) { | |
set_argu_prime(n); | |
} | |
gettimeofday(&end, NULL); | |
printf("\t%ld ", getms(&start, &end)); | |
sleep(1); | |
gettimeofday(&start, NULL); | |
for (i = 0; i < times; i++) { | |
prime_opt(n); | |
} | |
gettimeofday(&end, NULL); | |
printf("\t%ld ", getms(&start, &end)); | |
printf("\nsimple : multiple : opt -- "); | |
} | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment