Skip to content

Instantly share code, notes, and snippets.

@dvsseed
Created June 23, 2022 06:33
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save dvsseed/9b7d77b79079e5e9b361ed4faf1ca274 to your computer and use it in GitHub Desktop.
Save dvsseed/9b7d77b79079e5e9b361ed4faf1ca274 to your computer and use it in GitHub Desktop.
To get the SVD with Eigen C++ template library
#define EIGEN_USE_MKL_ALL
#define EIGEN_VECTORIZE_SSE4_2
#include <bits/stdc++.h>
#include <sys/time.h>
#include <iostream>
#include <fstream>
#include <string>
#include <vector>
#include <exception>
#include <Eigen/Dense>
#include <Eigen/LU>
#include <Eigen/Core>
#include <Eigen/SVD>
using Eigen::MatrixXd;
using namespace std;
using namespace Eigen;
using namespace Eigen::internal;
using namespace Eigen::Architecture;
using Eigen::IOFormat;
using std::ifstream;
using std::ofstream;
using std::stringstream;
using std::cout;
using std::endl;
using std::string;
using std::getline;
using std::exception;
using std::to_string;
typedef long long int ll;
typedef long double ld;
typedef vector<ld> vld;
// MatrixXd: Matrix of doubles
int csvRead(MatrixXd& outputMatrix, const string& fileName, const streamsize dPrec) {
ifstream inputData;
inputData.open(fileName);
cout.precision(dPrec);
if (!inputData) {
cout << fileName << " is not found." << endl;
return -1;
}
string fileline, filecell;
unsigned int prevNoOfCols = 0, noOfRows = 0, noOfCols = 0;
getline(inputData, fileline); // skip the first line(header)
while (getline(inputData, fileline)) {
noOfCols = 0;
stringstream linestream(fileline);
while (getline(linestream, filecell, ',')) {
// try {
// cout << stod(filecell) << endl;
// if ( !(filecell.empty()) && isdigit(filecell[0]) ) {
// cout << filecell << endl;
// } else {
// stod(filecell);
// }
// } catch (exception &e) {
// cout << "Error: " << e.what() << " <-> for getline" << endl;
// return -1;
// }
noOfCols++;
}
// cout << "Rows: " << noOfRows << endl;
if (noOfRows++ == 0) {
prevNoOfCols = noOfCols;
}
if (prevNoOfCols != noOfCols) {
cout << "prevNoOfCols <> noOfCols" << endl;
return -1;
}
}
inputData.close();
// cout << "No of Rows:" << noOfRows << ", No of Cols:" << noOfCols << endl;
// noOfCols = noOfCols - 1; // 最後一個col=label標籤, 不要列入SVD計算
outputMatrix.resize(noOfRows, noOfCols);
inputData.open(fileName);
getline(inputData, fileline); // skip the first line(header)
noOfRows = 0;
while (getline(inputData, fileline)) {
noOfCols = 0;
stringstream linestream(fileline);
while (getline(linestream, filecell, ',')) {
outputMatrix(noOfRows, noOfCols++) = stod(filecell);
}
noOfRows++;
}
return 0;
}
// Saving data in a file
void saveData(string fileName, MatrixXd matrix) {
// https://eigen.tuxfamily.org/dox/structEigen_1_1IOFormat.html
const static IOFormat CSVFormat(FullPrecision, DontAlignCols, ", ", "\n");
ofstream file(fileName);
if (file.is_open()) {
file << matrix.format(CSVFormat);
file.close();
}
}
MatrixXd openData(string fileToOpen) {
// the inspiration for creating this function was drawn from here (I did NOT copy and paste the code)
// https://stackoverflow.com/questions/34247057/how-to-read-csv-file-and-assign-to-eigen-matrix
// the input is the file: "fileToOpen.csv":
// a,b,c
// d,e,f
// This function converts input file data into the Eigen matrix format
// the matrix entries are stored in this variable row-wise. For example if we have the matrix:
// M=[a b c
// d e f]
// the entries are stored as matrixEntries=[a,b,c,d,e,f], that is the variable "matrixEntries" is a row vector
// later on, this vector is mapped into the Eigen matrix format
vector<double> matrixEntries;
// in this object we store the data from the matrix
ifstream matrixDataFile(fileToOpen);
// this variable is used to store the row of the matrix that contains commas
string matrixRowString;
// this variable is used to store the matrix entry;
string matrixEntry;
// this variable is used to track the number of rows
int matrixRowNumber = 0;
while (getline(matrixDataFile, matrixRowString)) // here we read a row by row of matrixDataFile and store every line into the string variable matrixRowString
{
stringstream matrixRowStringStream(matrixRowString); // convert matrixRowString that is a string to a stream variable.
while (getline(matrixRowStringStream, matrixEntry, ',')) // here we read pieces of the stream matrixRowStringStream until every comma, and store the resulting character into the matrixEntry
{
matrixEntries.push_back(stod(matrixEntry)); // here we convert the string to double and fill in the row vector storing all the matrix entries
}
matrixRowNumber++; // update the column numbers
}
// here we convet the vector variable into the matrix and return the resulting object,
// note that matrixEntries.data() is the pointer to the first memory location at which the entries of the vector matrixEntries are stored;
return Map<Matrix<double, Dynamic, Dynamic, RowMajor>>(matrixEntries.data(), matrixRowNumber, matrixEntries.size() / matrixRowNumber);
}
void solve() {
ll n = 100000000;
vld v;
v.reserve(n);
// omitted
}
int main(int argc, char *argv[]) {
// 解決 std::bad_alloc 問題
// long n = 8196000000;
// vector<double> pointVec;
// vector<double>().swap(pointVec);
// pointVec.reserve(n); // n表示希望预留的size大小
solve();
struct timeval start, end;
// start timer.
gettimeofday(&start, NULL);
int error;
MatrixXd A; // 原始資料=特徵, 不含標籤欄位
// calc 131 times
int i = 0;
// for (int i = 0; i < 132; i++) {
// error = csvRead(A, "./LBPhistogram_Binary_20220524_chunk" + to_string(i) + ".csv", 12);
error = csvRead(A, "./LBPhistogram_Binary_20220609.csv", 12);
cout << "Start...#" << i << endl;
if (error == 0) {
cout << "Matrix: (" << A.rows() << " x " << A.cols() << ")" << ", Size: " << A.size() << endl;
// cout << A << endl;
// cout << "Done." << endl;
// getchar();
// cout << A(0, 0) << endl; // show a matrix(m, n)
// Compute the Singular Value Decomposition(SVD)
// JacobiSVD<Eigen::MatrixXd> svd(A, ComputeThinU | ComputeThinV);
// Eigen::JacobiSVD<Eigen::MatrixXd> svd (A, Eigen::ComputeFullU | Eigen::ComputeFullV);
// implementing a recursive divide & conquer strategy on top of an upper-bidiagonalization which remains fast for large problems
Eigen::BDCSVD<Eigen::MatrixXd> svd (A, Eigen::ComputeFullU | Eigen::ComputeFullV);
// MatrixXd V = svd.matrixV();
// cout << "VT:\n" << V << endl;
// MatrixXd U = svd.matrixU();
// cout << "U:\n" << U << endl;
// W = U * S * VT
MatrixXd W = svd.singularValues();
// MatrixXd S = U.inverse() * A * V.transpose().inverse(); // S = U^-1 * A * VT * -1
// cout << "Sigma:\n" << S << endl;
// cout << "Sigma size: " << S.size() << endl;
// save to matrix
// saveData("./eigenvalue_Binary_20220524_chunk" + to_string(i) + ".csv", W);
saveData("./Eigenvalue_Binary_20220612.csv", W);
// saveData("./sigma_matrix.csv", S);
// cout << "U * Sigma * VT :\n" << U * S * V.transpose() << endl;
// Eigen::EigenSolver<Eigen::MatrixXd> es(A);
// cout << "--- The eigenvalues of A are:\n" << es.eigenvalues() << endl;
// cout << "--- The eigenvectors of A are (one vector per column):\n" << es.eigenvectors() << endl;
// V.resize(0, 0);
// U.resize(0, 0);
W.resize(0, 0);
// S.resize(0, 0);
}
A.resize(0, 0);
// reuse A later
// }
// load the matrix from the file
// matrix_test = openData("matrix.csv");
// print the matrix in console
// cout << matrix_test << endl;
// stop timer.
gettimeofday(&end, NULL);
// Calculating total time taken by the program.
double time_taken;
time_taken = (end.tv_sec - start.tv_sec) * 1e6;
time_taken = (time_taken + (end.tv_usec - start.tv_usec)) * 1e-6;
cout << "Time taken by program is: " << fixed << setprecision(6) << time_taken << " seconds." << endl;
return 0;
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment