Skip to content

Instantly share code, notes, and snippets.

@andrewcsmith
Last active August 29, 2015 14:06
Show Gist options
  • Save andrewcsmith/e3b94de3eba8a95c9bcf to your computer and use it in GitHub Desktop.
Save andrewcsmith/e3b94de3eba8a95c9bcf to your computer and use it in GitHub Desktop.
xgetrf results and proper specs

I used Apple's BLAS and LAPACK implementation to assert that our version of xgetrf was actually returning the correct results.

  1. Run the command clang++ -framework Accelerate main.cpp
  2. Call ./a.out to see the output on your own terminal
  3. Change the second argument of the call to cblas_sgemm on lines 44, 66, and 141 from CblasNoTrans to CblasTrans. (This changes from row-major to column-major ordering before and after the factoring call.)
  4. The output of the final lines should now the identity matrix.
#include <stdio.h>
#include <iostream>
#include <fstream>
#include <Accelerate/Accelerate.h>
void print_matrix(const int row_count, const int col_count, __CLPK_real* matrix) {
for(int i = 0; i < row_count; i++) {
for(int j = 0; j < col_count; j++) {
printf("% 7.3f", matrix[i*row_count + j]);
if(j+1 < col_count) {
printf(", ");
}
}
printf("\n");
}
}
int main() {
// std::cout << "Hi there.\n";
__CLPK_integer row_count = 3;
__CLPK_integer col_count = 3;
__CLPK_real input[row_count * col_count];
__CLPK_real output[row_count * col_count];
__CLPK_real identity[row_count * col_count];
// Assign some values we know are factorable
memcpy(input, (__CLPK_real[]){4,9,2,3,5,7,8,1,6}, sizeof input);
catlas_sset(row_count * col_count, 0.0, output, 1);
catlas_sset(row_count * col_count, 0.0, identity, 1);
catlas_sset(col_count, 1.0, identity, row_count + 1);
// Print the identity matrix (to prove it's correct)
std::cout << "\nIdentity matrix: \n";
print_matrix(row_count, col_count, identity);
// Print the output
std::cout << "\nInput matrix: \n";
print_matrix(row_count, col_count, input);
// Tranpose the input matrix to column-major form for use with sgetrf
cblas_sgemm(CblasRowMajor, CblasNoTrans, CblasNoTrans, row_count, col_count, col_count, 1.0, input, row_count, identity, row_count, 1.0, output, row_count);
std::cout << "\nTransposed matrix: \n";
print_matrix(row_count, col_count, output);
__CLPK_integer ipiv[row_count];
__CLPK_integer info = 0;
// Factor the output matrix in-place
sgetrf_(&row_count, &col_count, output, &row_count, ipiv, &info);
__CLPK_real factored[row_count * col_count];
catlas_sset(row_count * col_count, 0.0, factored, 1);
cblas_scopy(col_count * row_count, output, 1, factored, 1);
std::cout << "\nFactored matrix: \n";
print_matrix(row_count, col_count, factored);
__CLPK_real buffer[row_count * col_count];
catlas_sset(row_count * col_count, 0.0, buffer, 1);
// Transpose the factored matrix back into row-major form
cblas_sgemm(CblasRowMajor, CblasNoTrans, CblasNoTrans, row_count, col_count, col_count, 1.0, output, row_count, identity, row_count, 1.0, buffer, row_count);
cblas_scopy(col_count * row_count, buffer, 1, output, 1);
std::cout << "\nRetransposed factored matrix: \n";
print_matrix(row_count, col_count, output);
__CLPK_real output_lower[row_count * col_count];
__CLPK_real output_upper[row_count * col_count];
catlas_sset(row_count * col_count, 0.0, output_upper, 1);
catlas_sset(row_count * col_count, 0.0, output_lower, 1);
catlas_sset(col_count, 1.0, output_lower, row_count + 1);
// Break it into upper and lower triangular portions
for(int i = 0; i < row_count; i++) {
for(int j = 0; j < col_count; j++) {
if(j >= i) {
output_upper[i*row_count + j] = output[i*row_count + j];
} else {
output_lower[i*row_count + j] = output[i*row_count + j];
}
}
}
std::cout << "\nUpper factor: \n";
print_matrix(row_count, col_count, output_upper);
std::cout << "\nLower factor: \n";
print_matrix(row_count, col_count, output_lower);
std::cout << "\nPivot indices: \n";
for (int i = 0; i < row_count; i++) {
std::cout << ipiv[i] << " ";
}
std::cout << "\n";
__CLPK_real pivot_matrix[row_count * col_count];
catlas_sset(row_count * col_count, 0.0, pivot_matrix, 1);
// Create the swap array
for (int i = 0; i < row_count; i++) {
if (ipiv[i]-1 != i) {
ipiv[ipiv[i]-1] = i+1;
}
}
std::cout << "\nModified pivot swap indices: \n";
for (int i = 0; i < row_count; i++) {
std::cout << ipiv[i] << " ";
}
std::cout << "\n";
// Use the swap array to initialize the pivot matrix
for (int i = 0; i < row_count; i++) {
pivot_matrix[i*col_count + ipiv[i] - 1] = 1;
}
std::cout << "\nPivot matrix: \n";
print_matrix(row_count, col_count, pivot_matrix);
catlas_sset(row_count * col_count, 0.0, buffer, 1);
catlas_sset(row_count * col_count, 0.0, output, 1);
// Multiply P*L*U and see whether we get A back
cblas_sgemm(CblasRowMajor, CblasNoTrans, CblasNoTrans, row_count, col_count, col_count, 1.0, pivot_matrix, row_count, output_lower, row_count, 1.0, buffer, row_count);
cblas_sgemm(CblasRowMajor, CblasNoTrans, CblasNoTrans, row_count, col_count, col_count, 1.0, buffer, row_count, output_upper, row_count, 1.0, output, row_count);
std::cout << "\nRe-multiplied matrix: \n";
print_matrix(row_count, col_count, output);
__CLPK_real work[row_count];
__CLPK_integer lwork = row_count;
sgetri_(&row_count, factored, &col_count, ipiv, work, &lwork, &info);
catlas_sset(row_count * col_count, 0, buffer, 1);
cblas_sgemm(CblasRowMajor, CblasNoTrans, CblasNoTrans, row_count, col_count, col_count, 1.0, factored, row_count, identity, row_count, 1.0, buffer, row_count);
catlas_sset(row_count * col_count, 0.0, factored, 1);
cblas_scopy(row_count * col_count, buffer, 1, factored, 1);
std::cout << "\nInverted matrix: \n";
print_matrix(row_count, col_count, factored);
catlas_sset(row_count * col_count, 0, buffer, 1);
cblas_sgemm(CblasRowMajor, CblasNoTrans, CblasNoTrans, row_count, col_count, col_count, 1.0, factored, row_count, input, row_count, 1.0, buffer, row_count);
std::cout << "\nShould be identity matrix: \n";
print_matrix(row_count, col_count, buffer);
return 0;
}
Identity matrix:
1.000, 0.000, 0.000
0.000, 1.000, 0.000
0.000, 0.000, 1.000
Input matrix:
4.000, 9.000, 2.000
3.000, 5.000, 7.000
8.000, 1.000, 6.000
Transposed matrix:
4.000, 9.000, 2.000
3.000, 5.000, 7.000
8.000, 1.000, 6.000
Factored matrix:
9.000, 0.222, 0.444
5.000, 5.889, 0.132
1.000, 5.778, 6.792
Retransposed factored matrix:
9.000, 0.222, 0.444
5.000, 5.889, 0.132
1.000, 5.778, 6.792
Upper factor:
9.000, 0.222, 0.444
0.000, 5.889, 0.132
0.000, 0.000, 6.792
Lower factor:
1.000, 0.000, 0.000
5.000, 1.000, 0.000
1.000, 5.778, 1.000
Pivot indices:
2 3 3
Modified pivot swap indices:
2 1 3
Pivot matrix:
0.000, 1.000, 0.000
1.000, 0.000, 0.000
0.000, 0.000, 1.000
Re-multiplied matrix:
45.000, 7.000, 2.354
9.000, 0.222, 0.444
9.000, 34.247, 8.000
Inverted matrix:
0.106, 0.022, -0.061
-0.103, 0.189, -0.019
0.064, -0.144, 0.147
Should be identity matrix:
-0.000, 1.000, 0.000
-0.000, -0.000, 1.000
1.000, 0.000, 0.000
Identity matrix:
1.000, 0.000, 0.000
0.000, 1.000, 0.000
0.000, 0.000, 1.000
Input matrix:
4.000, 9.000, 2.000
3.000, 5.000, 7.000
8.000, 1.000, 6.000
Transposed matrix:
4.000, 3.000, 8.000
9.000, 5.000, 1.000
2.000, 7.000, 6.000
Factored matrix:
8.000, 0.500, 0.375
1.000, 8.500, 0.544
6.000, -1.000, 5.294
Retransposed factored matrix:
8.000, 1.000, 6.000
0.500, 8.500, -1.000
0.375, 0.544, 5.294
Upper factor:
8.000, 1.000, 6.000
0.000, 8.500, -1.000
0.000, 0.000, 5.294
Lower factor:
1.000, 0.000, 0.000
0.500, 1.000, 0.000
0.375, 0.544, 1.000
Pivot indices:
3 3 3
Modified pivot swap indices:
3 3 2
Pivot matrix:
0.000, 0.000, 1.000
0.000, 0.000, 1.000
0.000, 1.000, 0.000
Re-multiplied matrix:
3.000, 5.000, 7.000
3.000, 5.000, 7.000
4.000, 9.000, 2.000
Inverted matrix:
0.064, -0.144, 0.147
0.106, 0.022, -0.061
-0.103, 0.189, -0.019
Should be identity matrix:
1.000, 0.000, 0.000
0.000, 1.000, 0.000
0.000, 0.000, 1.000
Identity matrix:
1.000, 0.000, 0.000
0.000, 1.000, 0.000
0.000, 0.000, 1.000
Input matrix:
4.000, 9.000, 2.000
3.000, 5.000, 7.000
8.000, 1.000, 6.000
Transposed matrix:
4.000, 9.000, 2.000
3.000, 5.000, 7.000
8.000, 1.000, 6.000
Factored matrix:
9.000, 0.222, 0.444
5.000, 5.889, 0.132
1.000, 5.778, 6.792
Retransposed factored matrix:
9.000, 0.222, 0.444
5.000, 5.889, 0.132
1.000, 5.778, 6.792
Upper factor:
9.000, 0.222, 0.444
0.000, 5.889, 0.132
0.000, 0.000, 6.792
Lower factor:
1.000, 0.000, 0.000
5.000, 1.000, 0.000
1.000, 5.778, 1.000
Pivot indices:
2 3 3
Modified pivot swap indices:
2 1 3
Pivot matrix:
0.000, 1.000, 0.000
1.000, 0.000, 0.000
0.000, 0.000, 1.000
Re-multiplied matrix:
45.000, 7.000, 2.354
9.000, 0.222, 0.444
9.000, 34.247, 8.000
Inverted matrix:
0.106, 0.022, -0.061
-0.103, 0.189, -0.019
0.064, -0.144, 0.147
Should be identity matrix:
-0.000, 1.000, 0.000
-0.000, -0.000, 1.000
1.000, 0.000, 0.000
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment