-
-
Save taoky/2a84766cfe8a2ffe19ee2b1ca9c57e53 to your computer and use it in GitHub Desktop.
QR decomposition with Givens method (OpenMP)
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#include <math.h> | |
#include <omp.h> | |
#include <stdio.h> | |
#include <stdlib.h> | |
#define MIN(x, y) ((x) < (y) ? (x) : (y)) | |
int n; | |
void givens(int from_row, int start_row, int end_row, double (*a)[n], double (*q)[n]) { | |
for (int i = start_row; i < end_row; i++) { | |
double sq = sqrt(a[from_row][from_row] * a[from_row][from_row] + a[i][from_row] * a[i][from_row]); | |
double c = a[from_row][from_row] / sq; | |
double s = a[i][from_row] / sq; | |
for (int k = 0; k < n; k++) { | |
double a_fk = a[from_row][k]; | |
double a_ik = a[i][k]; | |
double q_fk = q[from_row][k]; | |
double q_ik = q[i][k]; | |
a[from_row][k] = c * a_fk + s * a_ik; | |
q[from_row][k] = c * q_fk + s * q_ik; | |
a[i][k] = -s * a_fk + c * a_ik; | |
q[i][k] = -s * q_fk + c * q_ik; | |
} | |
// printf("thread [%d]: A\n", omp_get_thread_num()); | |
// print_matrix(a); | |
// printf("thread [%d]: Q\n", omp_get_thread_num()); | |
// print_matrix(q); | |
} | |
} | |
void print_matrix(double (*matrix)[n]) { | |
for (int i = 0; i < n; i++) { | |
for (int j = 0; j < n; j++) { | |
printf("%lf ", matrix[i][j]); | |
} | |
puts(""); | |
} | |
} | |
int main(void) { | |
scanf("%d", &n); | |
if (n <= 0) { | |
fprintf(stderr, "n must be positive\n"); | |
return -1; | |
} | |
double(*a)[n] = malloc(n * sizeof(*a)); | |
double(*q)[n] = malloc(n * sizeof(*q)); | |
for (int i = 0; i < n; i++) { | |
for (int j = 0; j < n; j++) { | |
scanf("%lf", &a[i][j]); | |
} | |
} | |
double start_wtime = omp_get_wtime(); | |
// const int NUM_THREADS = 4; | |
// omp_set_num_threads(NUM_THREADS); | |
int num_threads = omp_get_max_threads(); | |
int p = (int)ceil((double)n / num_threads); | |
// fprintf(stderr, "n %d threads %d p %d\n", n, num_threads, p); | |
int counters[n]; | |
for (int i = 0; i < n; i++) { | |
counters[i] = 0; | |
} | |
// init q as identity matrix | |
for (int i = 0; i < n; i++) { | |
for (int j = 0; j < n; j++) { | |
q[i][j] = 0; | |
} | |
q[i][i] = 1; | |
} | |
#pragma omp parallel | |
{ | |
int thread = omp_get_thread_num(); | |
if (thread * p < n) { | |
int end = MIN((thread + 1) * p, n); | |
for (int i = 0; i < thread * p; i++) { | |
// fprintf(stderr, "thread [%d] %d acquiring\n", thread, i); | |
while (1) { | |
int c; | |
#pragma omp atomic read | |
c = counters[i]; | |
c += i / p; | |
if (c >= thread) { | |
break; | |
} | |
} | |
// fprintf(stderr, "thread [%d] %d success\n", thread, i); | |
givens(i, thread * p, end, a, q); | |
#pragma omp atomic | |
counters[i]++; | |
} | |
for (int i = thread * p; i < end; i++) { | |
givens(i, i + 1, end, a, q); | |
#pragma omp atomic | |
counters[i]++; | |
} | |
} | |
// fprintf(stderr, "thread [%d] done\n", thread); | |
} | |
// now R = A | |
// transpose Q | |
for (int i = 1; i < n; i++) { | |
for (int j = 0; j < i; j++) { | |
double tmp = q[i][j]; | |
q[i][j] = q[j][i]; | |
q[j][i] = tmp; | |
} | |
} | |
fprintf(stderr, "Time: %lf\n", omp_get_wtime() - start_wtime); | |
puts("Q:"); | |
print_matrix(q); | |
puts("R:"); | |
print_matrix(a); | |
free(a); | |
free(q); | |
return 0; | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment