Skip to content

Instantly share code, notes, and snippets.

@tell
Created November 3, 2011 02:31
Show Gist options
  • Save tell/1335620 to your computer and use it in GitHub Desktop.
Save tell/1335620 to your computer and use it in GitHub Desktop.
Matrix multiplication in C language
/*
* The author and original source code are from:
* 機械学習序曲(仮) > 数値計算の実行速度 - Python, C, Rの比較
* http://hawaii.naist.jp/~kohei-h/my/python-numeric-comp.html
*
* It is modified version in above Web page.
*
* This program needs data files to construct matrix.
* A generating data files program is published in
* original author's Web page.
*/
/*
* A comparison routine on floating point arithmetic, is added.
* This source code is based on:
* FLP35-C. 浮動小数点数値を比較する際には精度を考慮する
* http://www.jpcert.or.jp/sc-rules/c-flp35-c.html
*/
#include <assert.h>
#include <limits.h>
#include <stdint.h>
#include <stdio.h>
#include <stdlib.h>
#include <sys/errno.h>
#include <time.h>
#include <string.h>
#include <xmmintrin.h>
#define ABS__(x) ((x) < 0 ? -(x) : (x))
#define MAX__(x,y) ((x) < (y) ? (y) : (x))
#define FUNC_ENTRY__(name) { name, #name }
#define ALIGN__ (16)
typedef float mat_elem_t;
const size_t ALIGNMENT = sizeof(__m128); // ALIGN__;
const size_t SHIFT_TO_ALIGN = 0;
const unsigned int NUM = 10;
void matrix_free(mat_elem_t *(mat_ptr[]), const size_t dim)
{
size_t i;
for (i = 0; i < dim; i++) {
free(mat_ptr[i]);
}
}
void matrix_set_NULL(mat_elem_t *(mat_ptr[]), const size_t dim)
{
size_t i;
for (i = 0; i < dim; i++) {
mat_ptr[i] = NULL;
}
}
void setzero(mat_elem_t *(M[]), const size_t num)
{
size_t i, j;
for(i = 0; i < num; i++) {
for(j = 0; j < num; j++) {
M[i][j] = 0;
}
}
}
void vector_set_zero(mat_elem_t *vec, const size_t dim)
{
size_t i;
for (i = 0; i < dim; i++) {
vec[i] = 0;
}
}
size_t num_of_aligned_row_elem(const size_t dim)
{
const size_t n = ALIGNMENT / sizeof(mat_elem_t);
const size_t q = dim / n;
const size_t r = dim % n;
return r == 0 ? q * n : (q + 1) * n;
}
int matrix_allocate(mat_elem_t *((*mat_ptr)[]), const size_t dim)
{
size_t i;
for (i = 0; i < dim; i++) {
const size_t row_size = num_of_aligned_row_elem(dim);
mat_elem_t *row_ptr;
int alloc_ret_code = 0;
alloc_ret_code = posix_memalign((void **)(&row_ptr), ALIGNMENT, row_size * sizeof(mat_elem_t));
switch (alloc_ret_code) {
case EINVAL:
case ENOMEM:
return alloc_ret_code;
case 0:
break;
default:
return -1;
}
vector_set_zero(row_ptr, row_size);
(*mat_ptr)[i] = row_ptr;
}
return 0;
}
// initialize matrix
void *init(mat_elem_t *(arr[]), const char *filename, int num){
int i, j;
char *pt;
FILE *fp;
char buf[num*10];
if (filename == NULL) fp = NULL;
else fp = fopen(filename, "r");
for (i = 0; i < num; i++) {
if (fp == NULL) continue;
fgets(buf, sizeof(buf), fp);
arr[i][0] = atof(strtok(buf, " "));
for (j = 1; ( pt = strtok(NULL, " ") ) != NULL; j++) {
arr[i][j] = atof(pt);
}
}
}
float float_diff_normalized(const float a, const float b)
{
float c = ABS__(a);
float d = ABS__(b);
d = MAX__(c, d);
return d == 0.0 ? 0.0 : ABS__(a - b) / d;
}
int is_eq(const mat_elem_t *(L[restrict]), const mat_elem_t *(R[restrict]), const size_t num)
{
size_t i, j;
float d;
for (i = 0; i < num; i++) {
for (j = 0; j < num; j++) {
d = float_diff_normalized(L[i][j], R[i][j]);
if (! (d <= __FLT_EPSILON__)) {
printf("difference found at (%lu, %lu)\n", i, j);
printf("diff is %f\n", d);
return 0;
}
}
}
return 1;
}
void matprint(const mat_elem_t *(M[]), const size_t num)
{
size_t i, j;
for (i = 0; i < num; i++) {
for (j = 0; j < num; j++) {
printf("%f ", M[i][j]);
}
printf("\n");
}
}
void matmul_orig(mat_elem_t *(M[]), const mat_elem_t *(A[]), const mat_elem_t *(B[]), const size_t num)
{
size_t i, j, k;
for (i = 0; i < num; i++){
for (j = 0; j < num; j++){
for (k = 0; k < num; k++){
M[i][j] += A[i][k] * B[k][j];
}
}
}
}
void matmul_0(mat_elem_t *(M[]), const mat_elem_t *(A[]), const mat_elem_t *(B[]), const size_t num)
{
mat_elem_t t;
size_t i, j, k;
for (j = 0; j < num; j++){
for (i = 0; i < num; i++){
t = A[i][j];
for (k = 0; k < num; k++){
M[i][k] += t * B[j][k];
}
}
}
}
void matmul_1(mat_elem_t *(M[restrict]), const mat_elem_t *(A[restrict]), const mat_elem_t *(B[restrict]), const size_t num)
{
mat_elem_t t;
const mat_elem_t *B_row;
mat_elem_t *M_row;
size_t i, j, k;
for (j = 0; j < num; j++){
B_row = B[j];
for (i = 0; i < num; i++){
t = A[i][j];
M_row = M[i];
for (k = 0; k < num; k++){
M_row[k] += t * B_row[k];
}
}
}
}
void matmul_2(mat_elem_t *(M[restrict]), const mat_elem_t *(A[restrict]), const mat_elem_t *(B[restrict]), const size_t dim)
{
__m128 tvec, tm;
const __m128 *B_row;
__m128 *M_row;
size_t i, j, k;
const size_t aligned_dim = (dim + 3) >> 2;
for (j = 0; j < dim; j++){
B_row = (const __m128 *)B[j];
for (i = 0; i < dim; i++){
tvec = _mm_set1_ps(A[i][j]);
M_row = (__m128 *)M[i];
for (k = 0; k < aligned_dim; k++){
tm = _mm_mul_ps(tvec, B_row[k]);
tm = _mm_add_ps(M_row[k], tm);
_mm_stream_ps((float *)(M_row + k), tm);
}
}
}
}
/**/
typedef void (*matmat_func_t)(mat_elem_t *([]), const mat_elem_t *([]), const mat_elem_t *([]), const size_t);
void initialize_test_data(mat_elem_t *((*A)[]), mat_elem_t *((*B)[]), mat_elem_t *((*M0)[]), mat_elem_t *((*M1)[]), const size_t dim)
{
int alloc_ret_code;
alloc_ret_code = matrix_allocate(A, dim);
if (alloc_ret_code != 0) {
fprintf(stderr, "%s:%d: error: return code = %d\n",
__FILE__, __LINE__, alloc_ret_code);
}
alloc_ret_code = matrix_allocate(B, dim);
if (alloc_ret_code != 0) {
fprintf(stderr, "%s:%d: error: return code = %d\n",
__FILE__, __LINE__, alloc_ret_code);
}
alloc_ret_code = matrix_allocate(M0, dim);
if (alloc_ret_code != 0) {
fprintf(stderr, "%s:%d: error: return code = %d\n",
__FILE__, __LINE__, alloc_ret_code);
}
alloc_ret_code = matrix_allocate(M1, dim);
if (alloc_ret_code != 0) {
fprintf(stderr, "%s:%d: error: return code = %d\n",
__FILE__, __LINE__, alloc_ret_code);
}
init(*A, "A.dat", dim);
init(*B, "B.dat", dim);
{
size_t i;
for (i = 0; i < dim; i++) {
assert( (((uintptr_t)((*A)[i])) % ALIGNMENT ) == 0 );
assert( (((uintptr_t)((*B)[i])) % ALIGNMENT ) == 0 );
assert( (((uintptr_t)((*M0)[i])) % ALIGNMENT ) == 0 );
assert( (((uintptr_t)((*M1)[i])) % ALIGNMENT ) == 0 );
}
}
}
void benchmark()
{
time_t start, end;
double diff;
mat_elem_t *(A[NUM]), *(B[NUM]);
mat_elem_t *(M[NUM]), *(M_orig[NUM]);
struct {
matmat_func_t f_;
const char *name_;
} func_tbl[] = {
FUNC_ENTRY__(matmul_orig),
FUNC_ENTRY__(matmul_0),
FUNC_ENTRY__(matmul_1),
FUNC_ENTRY__(matmul_2),
};
size_t i, flen;
//
initialize_test_data(&A, &B, &M_orig, &M, NUM);
setzero(M_orig, NUM);
func_tbl[0].f_(M_orig, A, B, NUM);
flen = sizeof(func_tbl)/sizeof(*func_tbl);
for (i = 0; i < flen; i++) {
setzero(M, NUM);
time (&start);
func_tbl[i].f_(M, A, B, NUM);
time (&end);
diff = difftime(end,start);
printf("%s : %.2f\n", func_tbl[i].name_, diff);
if (! is_eq(M, M_orig, NUM)) {
printf("different\n");
}
}
}
void check()
{
#ifdef NDEBUG
puts("NDEBUG macro is enabled");
#else
puts("NDEBUG macro is disabled");
#endif
}
int main()
{
check();
printf("dimension = %u\n", NUM);
benchmark();
return 0;
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment