wanji/0-Memo.md

## 0-Memo.md

      
    Raw
  

              0-Memo.md
            
          
    Userful links


https://software.intel.com/en-us/node/468480
https://developer.apple.com/library/mac/documentation/Accelerate/Reference/BLAS_Ref/index.html
http://blog.csdn.net/seven_first/article/details/47378697#1-caffecpugemm-函数

Memo

void cblas_sgemm(const enum CBLAS_ORDER Order, const enum CBLAS_TRANSPOSE TransA,
                 const enum CBLAS_TRANSPOSE TransB, const int M, const int N,
                 const int K, const float alpha, const float *A,
                 const int lda, const float *B, const int ldb,
                 const float beta, float *C, const int ldc);

C = alpha * op(A) * op(B) + beta * C


op(A) = A' if TransA is set, otherwise op(A) = A. op(B) is similar.
op(A) is MxK, op(B) is KxN, C is MxN
lda always be the number of cols of A (at least), no matter TransA is set or not. ldb is similar.

NOTES on caffe

Matrix are stored in row-major order in CPU but in col-major order in GPU. So caffe_cpu_gemm computes C=A*B while caffe_gpu_gemm computes C'=B'*A'.

  
## blastoy.cpp
/*************************************************************************
  > File Name: blastoy.cpp
  > Copyright (C) 2013 Wan Ji<wanji@live.com>
  > Created Time: 2016年04月19日 星期二 16时55分52秒
  > Descriptions:
 ************************************************************************/

#include <cblas.h>
#include <iostream>
#include <stdio.h>
#include <stdlib.h>
using namespace std;

template<typename T>
class Matrix {
  public:
    Matrix(int nr, int nc, int step=0) : nr_(nr), nc_(nc), step_(step) {
      if (step == 0) {
        step_ = nc * sizeof(T);
      }
      data_ = new char[nr_ * step_];
    }

    ~Matrix() {
      delete [] data_;
    }

    void print() {
      cout << "--------------------------------" << endl;
      for (int r=0; r<nr_; ++r) {
        T * p = ptr(r);
        for (int c=0; c<nc_; ++c) {
          cout << p[c] << '\t';
        }
        cout << '\n';
      }
      cout << "--------------------------------" << endl;
    }

    T * ptr() {
      return ptr(0);
    }

    T * ptr(int r) {
      return (T *)(data_ + r * step_);
    }

  private:
    int nr_, nc_, step_;
    char * data_;
};

int main(int argc, char * argv[]) {
  int M = 2;
  int K = 3;
  int N = 4;

  Matrix<float> a(M, K);
  Matrix<float> b(K, N);
  Matrix<float> c(M, N);
  Matrix<float> d(K, N);
  Matrix<float> e(K, K);

  for (int i=0; i<6; ++i) {
    a.ptr()[i] = i;
    b.ptr()[i] = i;
  }
  for (int i=6; i<12; ++i) {
    b.ptr()[i] = i;
  }

  a.print();
  b.print();

  cout << "### C = A * B" << endl;

  cblas_sgemm(CblasRowMajor, CblasNoTrans, CblasNoTrans,
      M, N, K,
      1.0, a.ptr(), K, b.ptr(), N,
      0.0, c.ptr(), N);
  c.print();

  cout << "### D = A' * C" << endl;

  cblas_sgemm(CblasRowMajor, CblasTrans, CblasNoTrans,
      K, N, M,
      1.0, a.ptr(), K, c.ptr(), N,
      0.0, d.ptr(), N);
  d.print();

  cout << "### E = D * B'" << endl;

  cblas_sgemm(CblasRowMajor, CblasNoTrans, CblasTrans,
      K, K, N,
      1.0, d.ptr(), N, b.ptr(), N,
      0.0, e.ptr(), K);
  e.print();

  return 0;
}

## blastoy.py
#!/usr/bin/env python
# coding: utf-8
"""
   File Name: blastoy.py
      Author: Wan Ji
      E-mail: wanji@live.com
  Created on: 2016年04月19日 星期二 19时03分09秒
 Description:
"""

import numpy as np

a = np.arange(6).reshape(2, 3)
b = np.arange(12).reshape(3, 4)
a.dot(b)
c = a.dot(b)
d = a.T.dot(c)
e = d.dot(b.T)

print a
print b
print c
print d
print e
	/*************************************************************************
	> File Name: blastoy.cpp
	> Copyright (C) 2013 Wan Ji<wanji@live.com>
	> Created Time: 2016年04月19日星期二 16时55分52秒
	> Descriptions:
	************************************************************************/

	#include <cblas.h>
	#include <iostream>
	#include <stdio.h>
	#include <stdlib.h>
	using namespace std;

	template<typename T>
	class Matrix {
	public:
	Matrix(int nr, int nc, int step=0) : nr_(nr), nc_(nc), step_(step) {
	if (step == 0) {
	step_ = nc * sizeof(T);
	}
	data_ = new char[nr_ * step_];
	}

	~Matrix() {
	delete [] data_;
	}

	void print() {
	cout << "--------------------------------" << endl;
	for (int r=0; r<nr_; ++r) {
	T * p = ptr(r);
	for (int c=0; c<nc_; ++c) {
	cout << p[c] << '\t';
	}
	cout << '\n';
	}
	cout << "--------------------------------" << endl;
	}

	T * ptr() {
	return ptr(0);
	}

	T * ptr(int r) {
	return (T )(data_ + r step_);
	}

	private:
	int nr_, nc_, step_;
	char * data_;
	};

	int main(int argc, char * argv[]) {
	int M = 2;
	int K = 3;
	int N = 4;

	Matrix<float> a(M, K);
	Matrix<float> b(K, N);
	Matrix<float> c(M, N);
	Matrix<float> d(K, N);
	Matrix<float> e(K, K);

	for (int i=0; i<6; ++i) {
	a.ptr()[i] = i;
	b.ptr()[i] = i;
	}
	for (int i=6; i<12; ++i) {
	b.ptr()[i] = i;
	}

	a.print();
	b.print();

	cout << "### C = A * B" << endl;

	cblas_sgemm(CblasRowMajor, CblasNoTrans, CblasNoTrans,
	M, N, K,
	1.0, a.ptr(), K, b.ptr(), N,
	0.0, c.ptr(), N);
	c.print();

	cout << "### D = A' * C" << endl;

	cblas_sgemm(CblasRowMajor, CblasTrans, CblasNoTrans,
	K, N, M,
	1.0, a.ptr(), K, c.ptr(), N,
	0.0, d.ptr(), N);
	d.print();

	cout << "### E = D * B'" << endl;

	cblas_sgemm(CblasRowMajor, CblasNoTrans, CblasTrans,
	K, K, N,
	1.0, d.ptr(), N, b.ptr(), N,
	0.0, e.ptr(), K);
	e.print();

	return 0;
	}
	#!/usr/bin/env python
	# coding: utf-8
	"""
	File Name: blastoy.py
	Author: Wan Ji
	E-mail: wanji@live.com
	Created on: 2016年04月19日星期二 19时03分09秒
	Description:
	"""

	import numpy as np

	a = np.arange(6).reshape(2, 3)
	b = np.arange(12).reshape(3, 4)
	a.dot(b)
	c = a.dot(b)
	d = a.T.dot(c)
	e = d.dot(b.T)

	print a
	print b
	print c
	print d
	print e