andrewcsmith/main.cpp

## readme.md

      
    Raw
  

              readme.md
            
          
    I used Apple's BLAS and LAPACK implementation to assert that our version of xgetrf was actually returning the correct results.

Run the command clang++ -framework Accelerate main.cpp
Call ./a.out to see the output on your own terminal
Change the second argument of the call to cblas_sgemm on lines 44, 66, and 141 from CblasNoTrans to CblasTrans. (This changes from row-major to column-major ordering before and after the factoring call.)
The output of the final lines should now the identity matrix.


## main.cpp
#include <stdio.h>

#include <iostream>
#include <fstream>
#include <Accelerate/Accelerate.h>

void print_matrix(const int row_count, const int col_count, __CLPK_real* matrix) {
	for(int i = 0; i < row_count; i++) {
		for(int j = 0; j < col_count; j++) {
			printf("% 7.3f", matrix[i*row_count + j]);
			if(j+1 < col_count) {
				printf(", ");
			}
		}
		printf("\n");
	}

}

int main() {
	// std::cout << "Hi there.\n";
	__CLPK_integer row_count = 3;
	__CLPK_integer col_count = 3;
	__CLPK_real input[row_count * col_count];
	__CLPK_real output[row_count * col_count];
	__CLPK_real identity[row_count * col_count];

	// Assign some values we know are factorable
	memcpy(input, (__CLPK_real[]){4,9,2,3,5,7,8,1,6}, sizeof input);

	catlas_sset(row_count * col_count, 0.0, output, 1);
	catlas_sset(row_count * col_count, 0.0, identity, 1);
	catlas_sset(col_count, 1.0, identity, row_count + 1);

	// Print the identity matrix (to prove it's correct)
	std::cout << "\nIdentity matrix: \n";
	print_matrix(row_count, col_count, identity);

	// Print the output
	std::cout << "\nInput matrix: \n";
	print_matrix(row_count, col_count, input);

	// Tranpose the input matrix to column-major form for use with sgetrf
	cblas_sgemm(CblasRowMajor, CblasNoTrans, CblasNoTrans, row_count, col_count, col_count, 1.0, input, row_count, identity, row_count, 1.0, output, row_count);

	std::cout << "\nTransposed matrix: \n";
	print_matrix(row_count, col_count, output);

	__CLPK_integer ipiv[row_count];
	__CLPK_integer info = 0;

	// Factor the output matrix in-place
	sgetrf_(&row_count, &col_count, output, &row_count, ipiv, &info);

	__CLPK_real factored[row_count * col_count];
	catlas_sset(row_count * col_count, 0.0, factored, 1);
	cblas_scopy(col_count * row_count, output, 1, factored, 1);

	std::cout << "\nFactored matrix: \n";
	print_matrix(row_count, col_count, factored);

	__CLPK_real buffer[row_count * col_count];
	catlas_sset(row_count * col_count, 0.0, buffer, 1);

	// Transpose the factored matrix back into row-major form
	cblas_sgemm(CblasRowMajor, CblasNoTrans, CblasNoTrans, row_count, col_count, col_count, 1.0, output, row_count, identity, row_count, 1.0, buffer, row_count);
	cblas_scopy(col_count * row_count, buffer, 1, output, 1);

	std::cout << "\nRetransposed factored matrix: \n";
	print_matrix(row_count, col_count, output);

	__CLPK_real output_lower[row_count * col_count];
	__CLPK_real output_upper[row_count * col_count];

	catlas_sset(row_count * col_count, 0.0, output_upper, 1);
	catlas_sset(row_count * col_count, 0.0, output_lower, 1);
	catlas_sset(col_count, 1.0, output_lower, row_count + 1);

	// Break it into upper and lower triangular portions
	for(int i = 0; i < row_count; i++) {
		for(int j = 0; j < col_count; j++) {
			if(j >= i) {
				output_upper[i*row_count + j] = output[i*row_count + j];
			} else {
				output_lower[i*row_count + j] = output[i*row_count + j];
			}
		}
	}

	std::cout << "\nUpper factor: \n";
	print_matrix(row_count, col_count, output_upper);

	std::cout << "\nLower factor: \n";
	print_matrix(row_count, col_count, output_lower);

	std::cout << "\nPivot indices: \n";
	for (int i = 0; i < row_count; i++) {
		std::cout << ipiv[i] << " ";
	}
	std::cout << "\n";

	__CLPK_real pivot_matrix[row_count * col_count];
	catlas_sset(row_count * col_count, 0.0, pivot_matrix, 1);

	// Create the swap array
	for (int i = 0; i < row_count; i++) {
		if (ipiv[i]-1 != i) {
			ipiv[ipiv[i]-1] = i+1;
		}
	}

	std::cout << "\nModified pivot swap indices: \n";
	for (int i = 0; i < row_count; i++) {
		std::cout << ipiv[i] << " ";
	}
	std::cout << "\n";

	// Use the swap array to initialize the pivot matrix
	for (int i = 0; i < row_count; i++) {
		pivot_matrix[i*col_count + ipiv[i] - 1] = 1;
	}

	std::cout << "\nPivot matrix: \n";
	print_matrix(row_count, col_count, pivot_matrix);

	catlas_sset(row_count * col_count, 0.0, buffer, 1);
	catlas_sset(row_count * col_count, 0.0, output, 1);

	// Multiply P*L*U and see whether we get A back
	cblas_sgemm(CblasRowMajor, CblasNoTrans, CblasNoTrans, row_count, col_count, col_count, 1.0, pivot_matrix, row_count, output_lower, row_count, 1.0, buffer, row_count);
	cblas_sgemm(CblasRowMajor, CblasNoTrans, CblasNoTrans, row_count, col_count, col_count, 1.0, buffer, row_count, output_upper, row_count, 1.0, output, row_count);

	std::cout << "\nRe-multiplied matrix: \n";
	print_matrix(row_count, col_count, output);

	__CLPK_real work[row_count];
	__CLPK_integer lwork = row_count;
	sgetri_(&row_count, factored, &col_count, ipiv, work, &lwork, &info);

	catlas_sset(row_count * col_count, 0, buffer, 1);
	cblas_sgemm(CblasRowMajor, CblasNoTrans, CblasNoTrans, row_count, col_count, col_count, 1.0, factored, row_count, identity, row_count, 1.0, buffer, row_count);
	catlas_sset(row_count * col_count, 0.0, factored, 1);
	cblas_scopy(row_count * col_count, buffer, 1, factored, 1);

	std::cout << "\nInverted matrix: \n";
	print_matrix(row_count, col_count, factored);

	catlas_sset(row_count * col_count, 0, buffer, 1);
	cblas_sgemm(CblasRowMajor, CblasNoTrans, CblasNoTrans, row_count, col_count, col_count, 1.0, factored, row_count, input, row_count, 1.0, buffer, row_count);

	std::cout << "\nShould be identity matrix: \n";
	print_matrix(row_count, col_count, buffer);

	return 0;
}


## results.txt
Identity matrix:
  1.000,   0.000,   0.000
  0.000,   1.000,   0.000
  0.000,   0.000,   1.000

Input matrix:
  4.000,   9.000,   2.000
  3.000,   5.000,   7.000
  8.000,   1.000,   6.000

Transposed matrix:
  4.000,   9.000,   2.000
  3.000,   5.000,   7.000
  8.000,   1.000,   6.000

Factored matrix:
  9.000,   0.222,   0.444
  5.000,   5.889,   0.132
  1.000,   5.778,   6.792

Retransposed factored matrix:
  9.000,   0.222,   0.444
  5.000,   5.889,   0.132
  1.000,   5.778,   6.792

Upper factor:
  9.000,   0.222,   0.444
  0.000,   5.889,   0.132
  0.000,   0.000,   6.792

Lower factor:
  1.000,   0.000,   0.000
  5.000,   1.000,   0.000
  1.000,   5.778,   1.000

Pivot indices:
2 3 3

Modified pivot swap indices:
2 1 3

Pivot matrix:
  0.000,   1.000,   0.000
  1.000,   0.000,   0.000
  0.000,   0.000,   1.000

Re-multiplied matrix:
 45.000,   7.000,   2.354
  9.000,   0.222,   0.444
  9.000,  34.247,   8.000

Inverted matrix:
  0.106,   0.022,  -0.061
 -0.103,   0.189,  -0.019
  0.064,  -0.144,   0.147

Should be identity matrix:
 -0.000,   1.000,   0.000
 -0.000,  -0.000,   1.000
  1.000,   0.000,   0.000

Identity matrix:
  1.000,   0.000,   0.000
  0.000,   1.000,   0.000
  0.000,   0.000,   1.000

Input matrix:
  4.000,   9.000,   2.000
  3.000,   5.000,   7.000
  8.000,   1.000,   6.000

Transposed matrix:
  4.000,   3.000,   8.000
  9.000,   5.000,   1.000
  2.000,   7.000,   6.000

Factored matrix:
  8.000,   0.500,   0.375
  1.000,   8.500,   0.544
  6.000,  -1.000,   5.294

Retransposed factored matrix:
  8.000,   1.000,   6.000
  0.500,   8.500,  -1.000
  0.375,   0.544,   5.294

Upper factor:
  8.000,   1.000,   6.000
  0.000,   8.500,  -1.000
  0.000,   0.000,   5.294

Lower factor:
  1.000,   0.000,   0.000
  0.500,   1.000,   0.000
  0.375,   0.544,   1.000

Pivot indices:
3 3 3

Modified pivot swap indices:
3 3 2

Pivot matrix:
  0.000,   0.000,   1.000
  0.000,   0.000,   1.000
  0.000,   1.000,   0.000

Re-multiplied matrix:
  3.000,   5.000,   7.000
  3.000,   5.000,   7.000
  4.000,   9.000,   2.000

Inverted matrix:
  0.064,  -0.144,   0.147
  0.106,   0.022,  -0.061
 -0.103,   0.189,  -0.019

Should be identity matrix:
  1.000,   0.000,   0.000
  0.000,   1.000,   0.000
  0.000,   0.000,   1.000

Identity matrix:
  1.000,   0.000,   0.000
  0.000,   1.000,   0.000
  0.000,   0.000,   1.000

Input matrix:
  4.000,   9.000,   2.000
  3.000,   5.000,   7.000
  8.000,   1.000,   6.000

Transposed matrix:
  4.000,   9.000,   2.000
  3.000,   5.000,   7.000
  8.000,   1.000,   6.000

Factored matrix:
  9.000,   0.222,   0.444
  5.000,   5.889,   0.132
  1.000,   5.778,   6.792

Retransposed factored matrix:
  9.000,   0.222,   0.444
  5.000,   5.889,   0.132
  1.000,   5.778,   6.792

Upper factor:
  9.000,   0.222,   0.444
  0.000,   5.889,   0.132
  0.000,   0.000,   6.792

Lower factor:
  1.000,   0.000,   0.000
  5.000,   1.000,   0.000
  1.000,   5.778,   1.000

Pivot indices:
2 3 3

Modified pivot swap indices:
2 1 3

Pivot matrix:
  0.000,   1.000,   0.000
  1.000,   0.000,   0.000
  0.000,   0.000,   1.000

Re-multiplied matrix:
 45.000,   7.000,   2.354
  9.000,   0.222,   0.444
  9.000,  34.247,   8.000

Inverted matrix:
  0.106,   0.022,  -0.061
 -0.103,   0.189,  -0.019
  0.064,  -0.144,   0.147

Should be identity matrix:
 -0.000,   1.000,   0.000
 -0.000,  -0.000,   1.000
  1.000,   0.000,   0.000
	#include <stdio.h>

	#include <iostream>
	#include <fstream>
	#include <Accelerate/Accelerate.h>

	void print_matrix(const int row_count, const int col_count, __CLPK_real* matrix) {
	for(int i = 0; i < row_count; i++) {
	for(int j = 0; j < col_count; j++) {
	printf("% 7.3f", matrix[i*row_count + j]);
	if(j+1 < col_count) {
	printf(", ");
	}
	}
	printf("\n");
	}

	}

	int main() {
	// std::cout << "Hi there.\n";
	__CLPK_integer row_count = 3;
	__CLPK_integer col_count = 3;
	__CLPK_real input[row_count * col_count];
	__CLPK_real output[row_count * col_count];
	__CLPK_real identity[row_count * col_count];

	// Assign some values we know are factorable
	memcpy(input, (__CLPK_real[]){4,9,2,3,5,7,8,1,6}, sizeof input);

	catlas_sset(row_count * col_count, 0.0, output, 1);
	catlas_sset(row_count * col_count, 0.0, identity, 1);
	catlas_sset(col_count, 1.0, identity, row_count + 1);

	// Print the identity matrix (to prove it's correct)
	std::cout << "\nIdentity matrix: \n";
	print_matrix(row_count, col_count, identity);

	// Print the output
	std::cout << "\nInput matrix: \n";
	print_matrix(row_count, col_count, input);

	// Tranpose the input matrix to column-major form for use with sgetrf
	cblas_sgemm(CblasRowMajor, CblasNoTrans, CblasNoTrans, row_count, col_count, col_count, 1.0, input, row_count, identity, row_count, 1.0, output, row_count);

	std::cout << "\nTransposed matrix: \n";
	print_matrix(row_count, col_count, output);

	__CLPK_integer ipiv[row_count];
	__CLPK_integer info = 0;

	// Factor the output matrix in-place
	sgetrf_(&row_count, &col_count, output, &row_count, ipiv, &info);

	__CLPK_real factored[row_count * col_count];
	catlas_sset(row_count * col_count, 0.0, factored, 1);
	cblas_scopy(col_count * row_count, output, 1, factored, 1);

	std::cout << "\nFactored matrix: \n";
	print_matrix(row_count, col_count, factored);

	__CLPK_real buffer[row_count * col_count];
	catlas_sset(row_count * col_count, 0.0, buffer, 1);

	// Transpose the factored matrix back into row-major form
	cblas_sgemm(CblasRowMajor, CblasNoTrans, CblasNoTrans, row_count, col_count, col_count, 1.0, output, row_count, identity, row_count, 1.0, buffer, row_count);
	cblas_scopy(col_count * row_count, buffer, 1, output, 1);

	std::cout << "\nRetransposed factored matrix: \n";
	print_matrix(row_count, col_count, output);

	__CLPK_real output_lower[row_count * col_count];
	__CLPK_real output_upper[row_count * col_count];

	catlas_sset(row_count * col_count, 0.0, output_upper, 1);
	catlas_sset(row_count * col_count, 0.0, output_lower, 1);
	catlas_sset(col_count, 1.0, output_lower, row_count + 1);

	// Break it into upper and lower triangular portions
	for(int i = 0; i < row_count; i++) {
	for(int j = 0; j < col_count; j++) {
	if(j >= i) {
	output_upper[irow_count + j] = output[irow_count + j];
	} else {
	output_lower[irow_count + j] = output[irow_count + j];
	}
	}
	}

	std::cout << "\nUpper factor: \n";
	print_matrix(row_count, col_count, output_upper);

	std::cout << "\nLower factor: \n";
	print_matrix(row_count, col_count, output_lower);

	std::cout << "\nPivot indices: \n";
	for (int i = 0; i < row_count; i++) {
	std::cout << ipiv[i] << " ";
	}
	std::cout << "\n";

	__CLPK_real pivot_matrix[row_count * col_count];
	catlas_sset(row_count * col_count, 0.0, pivot_matrix, 1);

	// Create the swap array
	for (int i = 0; i < row_count; i++) {
	if (ipiv[i]-1 != i) {
	ipiv[ipiv[i]-1] = i+1;
	}
	}

	std::cout << "\nModified pivot swap indices: \n";
	for (int i = 0; i < row_count; i++) {
	std::cout << ipiv[i] << " ";
	}
	std::cout << "\n";

	// Use the swap array to initialize the pivot matrix
	for (int i = 0; i < row_count; i++) {
	pivot_matrix[i*col_count + ipiv[i] - 1] = 1;
	}

	std::cout << "\nPivot matrix: \n";
	print_matrix(row_count, col_count, pivot_matrix);

	catlas_sset(row_count * col_count, 0.0, buffer, 1);
	catlas_sset(row_count * col_count, 0.0, output, 1);

	// Multiply PLU and see whether we get A back
	cblas_sgemm(CblasRowMajor, CblasNoTrans, CblasNoTrans, row_count, col_count, col_count, 1.0, pivot_matrix, row_count, output_lower, row_count, 1.0, buffer, row_count);
	cblas_sgemm(CblasRowMajor, CblasNoTrans, CblasNoTrans, row_count, col_count, col_count, 1.0, buffer, row_count, output_upper, row_count, 1.0, output, row_count);

	std::cout << "\nRe-multiplied matrix: \n";
	print_matrix(row_count, col_count, output);

	__CLPK_real work[row_count];
	__CLPK_integer lwork = row_count;
	sgetri_(&row_count, factored, &col_count, ipiv, work, &lwork, &info);

	catlas_sset(row_count * col_count, 0, buffer, 1);
	cblas_sgemm(CblasRowMajor, CblasNoTrans, CblasNoTrans, row_count, col_count, col_count, 1.0, factored, row_count, identity, row_count, 1.0, buffer, row_count);
	catlas_sset(row_count * col_count, 0.0, factored, 1);
	cblas_scopy(row_count * col_count, buffer, 1, factored, 1);

	std::cout << "\nInverted matrix: \n";
	print_matrix(row_count, col_count, factored);

	catlas_sset(row_count * col_count, 0, buffer, 1);
	cblas_sgemm(CblasRowMajor, CblasNoTrans, CblasNoTrans, row_count, col_count, col_count, 1.0, factored, row_count, input, row_count, 1.0, buffer, row_count);

	std::cout << "\nShould be identity matrix: \n";
	print_matrix(row_count, col_count, buffer);

	return 0;
	}
	Identity matrix:
	1.000, 0.000, 0.000
	0.000, 1.000, 0.000
	0.000, 0.000, 1.000

	Input matrix:
	4.000, 9.000, 2.000
	3.000, 5.000, 7.000
	8.000, 1.000, 6.000

	Transposed matrix:
	4.000, 9.000, 2.000
	3.000, 5.000, 7.000
	8.000, 1.000, 6.000

	Factored matrix:
	9.000, 0.222, 0.444
	5.000, 5.889, 0.132
	1.000, 5.778, 6.792

	Retransposed factored matrix:
	9.000, 0.222, 0.444
	5.000, 5.889, 0.132
	1.000, 5.778, 6.792

	Upper factor:
	9.000, 0.222, 0.444
	0.000, 5.889, 0.132
	0.000, 0.000, 6.792

	Lower factor:
	1.000, 0.000, 0.000
	5.000, 1.000, 0.000
	1.000, 5.778, 1.000

	Pivot indices:
	2 3 3

	Modified pivot swap indices:
	2 1 3

	Pivot matrix:
	0.000, 1.000, 0.000
	1.000, 0.000, 0.000
	0.000, 0.000, 1.000

	Re-multiplied matrix:
	45.000, 7.000, 2.354
	9.000, 0.222, 0.444
	9.000, 34.247, 8.000

	Inverted matrix:
	0.106, 0.022, -0.061
	-0.103, 0.189, -0.019
	0.064, -0.144, 0.147

	Should be identity matrix:
	-0.000, 1.000, 0.000
	-0.000, -0.000, 1.000
	1.000, 0.000, 0.000

	Identity matrix:
	1.000, 0.000, 0.000
	0.000, 1.000, 0.000
	0.000, 0.000, 1.000

	Input matrix:
	4.000, 9.000, 2.000
	3.000, 5.000, 7.000
	8.000, 1.000, 6.000

	Transposed matrix:
	4.000, 3.000, 8.000
	9.000, 5.000, 1.000
	2.000, 7.000, 6.000

	Factored matrix:
	8.000, 0.500, 0.375
	1.000, 8.500, 0.544
	6.000, -1.000, 5.294

	Retransposed factored matrix:
	8.000, 1.000, 6.000
	0.500, 8.500, -1.000
	0.375, 0.544, 5.294

	Upper factor:
	8.000, 1.000, 6.000
	0.000, 8.500, -1.000
	0.000, 0.000, 5.294

	Lower factor:
	1.000, 0.000, 0.000
	0.500, 1.000, 0.000
	0.375, 0.544, 1.000

	Pivot indices:
	3 3 3

	Modified pivot swap indices:
	3 3 2

	Pivot matrix:
	0.000, 0.000, 1.000
	0.000, 0.000, 1.000
	0.000, 1.000, 0.000

	Re-multiplied matrix:
	3.000, 5.000, 7.000
	3.000, 5.000, 7.000
	4.000, 9.000, 2.000

	Inverted matrix:
	0.064, -0.144, 0.147
	0.106, 0.022, -0.061
	-0.103, 0.189, -0.019

	Should be identity matrix:
	1.000, 0.000, 0.000
	0.000, 1.000, 0.000
	0.000, 0.000, 1.000

	Identity matrix:
	1.000, 0.000, 0.000
	0.000, 1.000, 0.000
	0.000, 0.000, 1.000

	Input matrix:
	4.000, 9.000, 2.000
	3.000, 5.000, 7.000
	8.000, 1.000, 6.000

	Transposed matrix:
	4.000, 9.000, 2.000
	3.000, 5.000, 7.000
	8.000, 1.000, 6.000

	Factored matrix:
	9.000, 0.222, 0.444
	5.000, 5.889, 0.132
	1.000, 5.778, 6.792

	Retransposed factored matrix:
	9.000, 0.222, 0.444
	5.000, 5.889, 0.132
	1.000, 5.778, 6.792

	Upper factor:
	9.000, 0.222, 0.444
	0.000, 5.889, 0.132
	0.000, 0.000, 6.792

	Lower factor:
	1.000, 0.000, 0.000
	5.000, 1.000, 0.000
	1.000, 5.778, 1.000

	Pivot indices:
	2 3 3

	Modified pivot swap indices:
	2 1 3

	Pivot matrix:
	0.000, 1.000, 0.000
	1.000, 0.000, 0.000
	0.000, 0.000, 1.000

	Re-multiplied matrix:
	45.000, 7.000, 2.354
	9.000, 0.222, 0.444
	9.000, 34.247, 8.000

	Inverted matrix:
	0.106, 0.022, -0.061
	-0.103, 0.189, -0.019
	0.064, -0.144, 0.147

	Should be identity matrix:
	-0.000, 1.000, 0.000
	-0.000, -0.000, 1.000
	1.000, 0.000, 0.000