Skip to content

Instantly share code, notes, and snippets.

@taoky
Last active August 12, 2022 04:12
Show Gist options
  • Star 1 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save taoky/2a84766cfe8a2ffe19ee2b1ca9c57e53 to your computer and use it in GitHub Desktop.
Save taoky/2a84766cfe8a2ffe19ee2b1ca9c57e53 to your computer and use it in GitHub Desktop.
QR decomposition with Givens method (OpenMP)
#include <math.h>
#include <omp.h>
#include <stdio.h>
#include <stdlib.h>
#define MIN(x, y) ((x) < (y) ? (x) : (y))
int n;
void givens(int from_row, int start_row, int end_row, double (*a)[n], double (*q)[n]) {
for (int i = start_row; i < end_row; i++) {
double sq = sqrt(a[from_row][from_row] * a[from_row][from_row] + a[i][from_row] * a[i][from_row]);
double c = a[from_row][from_row] / sq;
double s = a[i][from_row] / sq;
for (int k = 0; k < n; k++) {
double a_fk = a[from_row][k];
double a_ik = a[i][k];
double q_fk = q[from_row][k];
double q_ik = q[i][k];
a[from_row][k] = c * a_fk + s * a_ik;
q[from_row][k] = c * q_fk + s * q_ik;
a[i][k] = -s * a_fk + c * a_ik;
q[i][k] = -s * q_fk + c * q_ik;
}
// printf("thread [%d]: A\n", omp_get_thread_num());
// print_matrix(a);
// printf("thread [%d]: Q\n", omp_get_thread_num());
// print_matrix(q);
}
}
void print_matrix(double (*matrix)[n]) {
for (int i = 0; i < n; i++) {
for (int j = 0; j < n; j++) {
printf("%lf ", matrix[i][j]);
}
puts("");
}
}
int main(void) {
scanf("%d", &n);
if (n <= 0) {
fprintf(stderr, "n must be positive\n");
return -1;
}
double(*a)[n] = malloc(n * sizeof(*a));
double(*q)[n] = malloc(n * sizeof(*q));
for (int i = 0; i < n; i++) {
for (int j = 0; j < n; j++) {
scanf("%lf", &a[i][j]);
}
}
double start_wtime = omp_get_wtime();
// const int NUM_THREADS = 4;
// omp_set_num_threads(NUM_THREADS);
int num_threads = omp_get_max_threads();
int p = (int)ceil((double)n / num_threads);
// fprintf(stderr, "n %d threads %d p %d\n", n, num_threads, p);
int counters[n];
for (int i = 0; i < n; i++) {
counters[i] = 0;
}
// init q as identity matrix
for (int i = 0; i < n; i++) {
for (int j = 0; j < n; j++) {
q[i][j] = 0;
}
q[i][i] = 1;
}
#pragma omp parallel
{
int thread = omp_get_thread_num();
if (thread * p < n) {
int end = MIN((thread + 1) * p, n);
for (int i = 0; i < thread * p; i++) {
// fprintf(stderr, "thread [%d] %d acquiring\n", thread, i);
while (1) {
int c;
#pragma omp atomic read
c = counters[i];
c += i / p;
if (c >= thread) {
break;
}
}
// fprintf(stderr, "thread [%d] %d success\n", thread, i);
givens(i, thread * p, end, a, q);
#pragma omp atomic
counters[i]++;
}
for (int i = thread * p; i < end; i++) {
givens(i, i + 1, end, a, q);
#pragma omp atomic
counters[i]++;
}
}
// fprintf(stderr, "thread [%d] done\n", thread);
}
// now R = A
// transpose Q
for (int i = 1; i < n; i++) {
for (int j = 0; j < i; j++) {
double tmp = q[i][j];
q[i][j] = q[j][i];
q[j][i] = tmp;
}
}
fprintf(stderr, "Time: %lf\n", omp_get_wtime() - start_wtime);
puts("Q:");
print_matrix(q);
puts("R:");
print_matrix(a);
free(a);
free(q);
return 0;
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment