Skip to content

Instantly share code, notes, and snippets.

@microo8
Created November 13, 2012 13:10
Show Gist options
  • Save microo8/4065693 to your computer and use it in GitHub Desktop.
Save microo8/4065693 to your computer and use it in GitHub Desktop.
PCA procedure in C using GSL
#include <assert.h>
#include <gsl/gsl_matrix.h>
#include <gsl/gsl_statistics.h>
#include <gsl/gsl_eigen.h>
#include <gsl/gsl_blas.h>
gsl_matrix* pca(const gsl_matrix* data, unsigned int L)
{
/*
@param data - matrix of data vectors, MxN matrix, each column is a data vector, M - dimension, N - data vector count
@param L - dimension reduction
*/
assert(data != NULL);
assert(L > 0 && L < data->size2);
unsigned int i;
unsigned int rows = data->size1;
unsigned int cols = data->size2;
gsl_vector* mean = gsl_vector_alloc(rows);
for(i = 0; i < rows; i++) {
gsl_vector_set(mean, i, gsl_stats_mean(data->data + i * cols, 1, cols));
}
// Get mean-substracted data into matrix mean_substracted_data.
gsl_matrix* mean_substracted_data = gsl_matrix_alloc(rows, cols);
gsl_matrix_memcpy(mean_substracted_data, data);
for(i = 0; i < cols; i++) {
gsl_vector_view mean_substracted_point_view = gsl_matrix_column(mean_substracted_data, i);
gsl_vector_sub(&mean_substracted_point_view.vector, mean);
}
gsl_vector_free(mean);
// Compute Covariance matrix
gsl_matrix* covariance_matrix = gsl_matrix_alloc(rows, rows);
gsl_blas_dgemm(CblasNoTrans, CblasTrans, 1.0 / (double)(cols - 1), mean_substracted_data, mean_substracted_data, 0.0, covariance_matrix);
gsl_matrix_free(mean_substracted_data);
// Get eigenvectors, sort by eigenvalue.
gsl_vector* eigenvalues = gsl_vector_alloc(rows);
gsl_matrix* eigenvectors = gsl_matrix_alloc(rows, rows);
gsl_eigen_symmv_workspace* workspace = gsl_eigen_symmv_alloc(rows);
gsl_eigen_symmv(covariance_matrix, eigenvalues, eigenvectors, workspace);
gsl_eigen_symmv_free(workspace);
gsl_matrix_free(covariance_matrix);
// Sort the eigenvectors
gsl_eigen_symmv_sort(eigenvalues, eigenvectors, GSL_EIGEN_SORT_ABS_DESC);
gsl_vector_free(eigenvalues);
// Project the original dataset
gsl_matrix* result = gsl_matrix_alloc(L, cols);
gsl_matrix_view L_eigenvectors = gsl_matrix_submatrix(eigenvectors, 0, 0, rows, L);
gsl_blas_dgemm(CblasTrans, CblasNoTrans, 1.0, &L_eigenvectors.matrix, data, 0.0, result);
gsl_matrix_free(eigenvectors);
// Result is n LxN matrix, each column is the original data vector with reduced dimension from M to L
return result;
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment