Skip to content

Instantly share code, notes, and snippets.

@tiulpin
Created February 14, 2019 13:19
Show Gist options
  • Save tiulpin/074200240633dcac66defa6c6c387791 to your computer and use it in GitHub Desktop.
Save tiulpin/074200240633dcac66defa6c6c387791 to your computer and use it in GitHub Desktop.
Systems of linear equations solver by using gaussian elimination
#include <sys/time.h>
#include <iostream>
#include <pthread.h>
#include <cmath>
#include <omp.h>
#define THREADS 64
const long long N = 2000; //dealing with NxN matrix
double **A2; //too drunk to think about calling from any pthread, so let's do a global variable
double *B2;
pthread_barrier_t barrier;
void partialPivoting(int row, double **A, double *B) {
double ksave = -1.0f;
int k_i = 0;
for (int col = row; col < N; col++) { // Find biggest element in current column
if (std::abs(A[col][row]) > ksave) {
ksave = std::abs(A[col][row]);
k_i = col;
}
}
for (int col = row; col < N; col++) { // Swap rows
double tmp = A[k_i][col];
A[k_i][col] = A[row][col];
A[row][col] = tmp;
}
double tmp = B[k_i];
B[k_i] = B[row];
B[row] = tmp;
}
void checkResult(double **A, const double *B, const double *X) { //checks by calculating the L2-norm of Ax-b
auto *r = new double[N];
for (int i = 0; i < N; i++) {
r[i] = 0;
for (int j = 0; j < N; j++)
r[i] += A[i][j] * X[j];
r[i] -= B[i];
}
double result = 0;
for (int i = 0; i < N; i++)
result += r[i] * r[i];
result = sqrt(result);
std::cout << "Error is " << result << "\n";
}
void print(double **A) {
for (int i = 0; i < N; i++) {
for (int j = 0; j < N; j++)
std::cout << (int) A[i][j] << " ";
std::cout << "\n";
}
std::cout << "\n\n";
}
void *gaussianElimination_pthreads(void *s) {
int threadID = *(int *) s; //thread id passed as argument
for (int row = 0; row < N; row++) {
pthread_barrier_wait(&barrier); //wait for all threads to catch up before next row begins
if (threadID == 0)
partialPivoting(row, A2, B2);
pthread_barrier_wait(&barrier); //wait for first thread to complete partial pivoting
double tf = row + threadID * (N - row) / (double) THREADS; //distribute rows left
int threadRow = (int) tf;
for (int k = threadRow; k < std::round(tf + (N - row) / (double) THREADS); k++) { //gaussian elimination, rows divided into seperate threads
if (k <= row)
continue;
double m = A2[k][row] / A2[row][row];
for (int col = row; col < N; col++)
A2[k][col] -= m * A2[row][col];
B2[k] -= m * B2[row];
}
}
pthread_exit(nullptr);
}
void gaussianElimination_openMP(double **A, double* B) {
int k = 0;
for (int row = 0; row < N; row++) {
partialPivoting(row, A, B);
#pragma omp parallel num_threads(THREADS) //current row
#pragma omp for private(k) schedule(dynamic)
for (k = row + 1; k < N; k++) {
double m = A[k][row] / A[row][row];
for (int col = row; col < N; col++)
A[k][col] -= m * A[row][col];
B[k] -= m * B[row];
}
}
}
int main() {
auto A1 = new double *[N]; //init for openmp
A2 = new double *[N]; //a copy for pthreads
for (int i = 0; i < N; i++) {
A1[i] = new double[N];
A2[i] = new double[N];
}
auto B1 = new double[N];
B2 = new double[N];
auto X1 = new double[N];
auto X2 = new double[N];
for (int i = 0; i < N; i++) {
for (int j = 0; j < N; j++) {
A1[i][j] = 1 + drand48() * 10;
A2[i][j] = A1[i][j];
}
B1[i] = drand48() * 10;
B2[i] = B1[i];
}
print(A1);
//openmp
struct timeval tp{}; //Get the current system time
gettimeofday(&tp, nullptr);
long int start = tp.tv_sec * 1000 + tp.tv_usec / 1000;
gaussianElimination_openMP(A1, B1); // Start algorithm
for (int i = N - 1; i >= 0; i--) { // Backwards solving
X1[i] = B1[i];
for (int j = i + 1; j < N; j++)
X1[i] -= A1[i][j] * X1[j];
X1[i] /= A1[i][i];
}
gettimeofday(&tp, nullptr);
long int end = tp.tv_sec * 1000 + tp.tv_usec / 1000;
//print(A1);
std::cout << "OpenMP. Execution time is " << end - start << " ms\n";
checkResult(A1, B1, X1);
//pthreads
gettimeofday(&tp, nullptr);
start = tp.tv_sec * 1000 + tp.tv_usec / 1000;
pthread_barrier_init(&barrier, nullptr, THREADS);
int thread_rows[THREADS]; // Set thread data
pthread_t threads[THREADS];
pthread_attr_t attr;
pthread_attr_init(&attr);
for (int i = 0; i < THREADS; i++) { // Assign the rows a thread will handle
thread_rows[i] = i;
pthread_create(threads + i, &attr, gaussianElimination_pthreads, (void *) (thread_rows + i));
}
for (auto thread : threads) // Wait for gaussian elimination to finish
pthread_join(thread, nullptr);
for (int i = N - 1; i >= 0; i--) { // Backwards solving
X2[i] = B2[i];
for (int j = i + 1; j < N; j++)
X2[i] -= A2[i][j] * X2[j];
X2[i] /= A2[i][i];
}
gettimeofday(&tp, nullptr);
end = tp.tv_sec * 1000 + tp.tv_usec / 1000;
//print(A2);
std::cout << "pthreads. Execution time is " << end - start << " ms\n";
checkResult(A2, B2, X2);
//not to forget to free vectors and matrices
for (int i = 0; i < N; i++)
delete[] A1[i];
delete[] A1;
delete[] B1;
delete[] X1;
for (int i = 0; i < N; i++)
delete[] A2[i];
delete[] A2;
delete[] B2;
delete[] X2;
}
//package ru.spbstu.telematics.java;
public class SOLES{
private static double[][] A;
private static double[] B;
private static double[] X;
private final static int N = 1000;
private final static int threadsCounter = 64;
static class Barrier {
private int awaitingThreads;
Barrier() {
awaitingThreads = threadsCounter;
}
synchronized void await() throws InterruptedException {
awaitingThreads--;
if (awaitingThreads > 0) {
this.wait();
} else {
awaitingThreads = threadsCounter;
notifyAll();
}
}
}
private static void print() {
for (int i = 0; i < N; i++) {
for (int j = 0; j < N; j++)
System.out.print(A[i][j] + " ");
System.out.print("\n");
}
System.out.print("\n\n");
}
private static void checkResult() {
double[] r = new double[N];
for (int i = 0; i < N; i++) {
r[i] = 0;
for (int j = 0; j < N; j++)
r[i] += A[i][j] * X[j];
r[i] -= B[i];
}
double result = 0;
for (int i = 0; i < N; i++)
result += r[i] * r[i];
result = Math.sqrt(result);
System.out.println("Error is " + result + ".\n");
}
private static class MyThread implements Runnable {
int ID;
Barrier b;
MyThread(int id, Barrier barrier) {
ID = id;
b = barrier;
}
public void run() {
for (int row = 0; row < N; row++) {
try {
b.await();
} catch (InterruptedException e) {
e.printStackTrace();
}
if (ID == 0)
partialPivoting(row);
try {
b.await();
} catch (InterruptedException e) {
e.printStackTrace();
}
double tf = row + ID * (N - row) / (float) threadsCounter;
int thread_row = (int) tf;
for (int k = thread_row; k < Math.round(tf + (N - row) / (double) threadsCounter); k++) {
if (k <= row)
continue;
double m = A[k][row] / A[row][row];
for (int col = row; col < N; col++) {
A[k][col] -= m * A[row][col];
}
B[k] -= m * B[row];
}
}
}
}
private static void partialPivoting(int row) {
double ksave = -1.0f;
int k_i = 0;
for (int col = row; col < N; col++) {
if (Math.abs(A[col][row]) > ksave) {
ksave = Math.abs(A[col][row]);
k_i = col;
}
}
for (int col = row; col < N; col++) {
double tmp = A[k_i][col];
A[k_i][col] = A[row][col];
A[row][col] = tmp;
}
double tmp = B[k_i];
B[k_i] = B[row];
B[row] = tmp;
}
public static void main(String[] args) throws InterruptedException {
A = new double[N][];
for (int i = 0; i < N; i++)
A[i] = new double[N];
B = new double[N];
B = new double[N];
X = new double[N];
Barrier barrier = new Barrier();
for (int i = 0; i < N; i++) {
for (int j = 0; j < N; j++) {
A[i][j] = 1 + Math.random() * 10;
}
B[i] = Math.random() * 10;
}
//print();
var startTime = System.currentTimeMillis();
var threads = new Thread[threadsCounter];
for (int i = 0; i < threadsCounter; i++) {
threads[i] = new Thread(new MyThread(i, barrier));
threads[i].start();
}
for (int i = 0; i < threadsCounter; i++)
threads[i].join();
for (int i = N - 1; i >= 0; i--) {
X[i] = B[i];
for (int j = i + 1; j < N; j++)
X[i] -= A[i][j] * X[j];
X[i] /= A[i][i];
}
//print();
var elapsed = System.currentTimeMillis() - startTime;
System.out.println(elapsed + " ms\n");
checkResult();
}
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment