Created
November 3, 2011 02:31
-
-
Save tell/1335620 to your computer and use it in GitHub Desktop.
Matrix multiplication in C language
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
/* | |
* The author and original source code are from: | |
* 機械学習序曲(仮) > 数値計算の実行速度 - Python, C, Rの比較 | |
* http://hawaii.naist.jp/~kohei-h/my/python-numeric-comp.html | |
* | |
* It is modified version in above Web page. | |
* | |
* This program needs data files to construct matrix. | |
* A generating data files program is published in | |
* original author's Web page. | |
*/ | |
/* | |
* A comparison routine on floating point arithmetic, is added. | |
* This source code is based on: | |
* FLP35-C. 浮動小数点数値を比較する際には精度を考慮する | |
* http://www.jpcert.or.jp/sc-rules/c-flp35-c.html | |
*/ | |
#include <assert.h> | |
#include <limits.h> | |
#include <stdint.h> | |
#include <stdio.h> | |
#include <stdlib.h> | |
#include <sys/errno.h> | |
#include <time.h> | |
#include <string.h> | |
#include <xmmintrin.h> | |
#define ABS__(x) ((x) < 0 ? -(x) : (x)) | |
#define MAX__(x,y) ((x) < (y) ? (y) : (x)) | |
#define FUNC_ENTRY__(name) { name, #name } | |
#define ALIGN__ (16) | |
typedef float mat_elem_t; | |
const size_t ALIGNMENT = sizeof(__m128); // ALIGN__; | |
const size_t SHIFT_TO_ALIGN = 0; | |
const unsigned int NUM = 10; | |
void matrix_free(mat_elem_t *(mat_ptr[]), const size_t dim) | |
{ | |
size_t i; | |
for (i = 0; i < dim; i++) { | |
free(mat_ptr[i]); | |
} | |
} | |
void matrix_set_NULL(mat_elem_t *(mat_ptr[]), const size_t dim) | |
{ | |
size_t i; | |
for (i = 0; i < dim; i++) { | |
mat_ptr[i] = NULL; | |
} | |
} | |
void setzero(mat_elem_t *(M[]), const size_t num) | |
{ | |
size_t i, j; | |
for(i = 0; i < num; i++) { | |
for(j = 0; j < num; j++) { | |
M[i][j] = 0; | |
} | |
} | |
} | |
void vector_set_zero(mat_elem_t *vec, const size_t dim) | |
{ | |
size_t i; | |
for (i = 0; i < dim; i++) { | |
vec[i] = 0; | |
} | |
} | |
size_t num_of_aligned_row_elem(const size_t dim) | |
{ | |
const size_t n = ALIGNMENT / sizeof(mat_elem_t); | |
const size_t q = dim / n; | |
const size_t r = dim % n; | |
return r == 0 ? q * n : (q + 1) * n; | |
} | |
int matrix_allocate(mat_elem_t *((*mat_ptr)[]), const size_t dim) | |
{ | |
size_t i; | |
for (i = 0; i < dim; i++) { | |
const size_t row_size = num_of_aligned_row_elem(dim); | |
mat_elem_t *row_ptr; | |
int alloc_ret_code = 0; | |
alloc_ret_code = posix_memalign((void **)(&row_ptr), ALIGNMENT, row_size * sizeof(mat_elem_t)); | |
switch (alloc_ret_code) { | |
case EINVAL: | |
case ENOMEM: | |
return alloc_ret_code; | |
case 0: | |
break; | |
default: | |
return -1; | |
} | |
vector_set_zero(row_ptr, row_size); | |
(*mat_ptr)[i] = row_ptr; | |
} | |
return 0; | |
} | |
// initialize matrix | |
void *init(mat_elem_t *(arr[]), const char *filename, int num){ | |
int i, j; | |
char *pt; | |
FILE *fp; | |
char buf[num*10]; | |
if (filename == NULL) fp = NULL; | |
else fp = fopen(filename, "r"); | |
for (i = 0; i < num; i++) { | |
if (fp == NULL) continue; | |
fgets(buf, sizeof(buf), fp); | |
arr[i][0] = atof(strtok(buf, " ")); | |
for (j = 1; ( pt = strtok(NULL, " ") ) != NULL; j++) { | |
arr[i][j] = atof(pt); | |
} | |
} | |
} | |
float float_diff_normalized(const float a, const float b) | |
{ | |
float c = ABS__(a); | |
float d = ABS__(b); | |
d = MAX__(c, d); | |
return d == 0.0 ? 0.0 : ABS__(a - b) / d; | |
} | |
int is_eq(const mat_elem_t *(L[restrict]), const mat_elem_t *(R[restrict]), const size_t num) | |
{ | |
size_t i, j; | |
float d; | |
for (i = 0; i < num; i++) { | |
for (j = 0; j < num; j++) { | |
d = float_diff_normalized(L[i][j], R[i][j]); | |
if (! (d <= __FLT_EPSILON__)) { | |
printf("difference found at (%lu, %lu)\n", i, j); | |
printf("diff is %f\n", d); | |
return 0; | |
} | |
} | |
} | |
return 1; | |
} | |
void matprint(const mat_elem_t *(M[]), const size_t num) | |
{ | |
size_t i, j; | |
for (i = 0; i < num; i++) { | |
for (j = 0; j < num; j++) { | |
printf("%f ", M[i][j]); | |
} | |
printf("\n"); | |
} | |
} | |
void matmul_orig(mat_elem_t *(M[]), const mat_elem_t *(A[]), const mat_elem_t *(B[]), const size_t num) | |
{ | |
size_t i, j, k; | |
for (i = 0; i < num; i++){ | |
for (j = 0; j < num; j++){ | |
for (k = 0; k < num; k++){ | |
M[i][j] += A[i][k] * B[k][j]; | |
} | |
} | |
} | |
} | |
void matmul_0(mat_elem_t *(M[]), const mat_elem_t *(A[]), const mat_elem_t *(B[]), const size_t num) | |
{ | |
mat_elem_t t; | |
size_t i, j, k; | |
for (j = 0; j < num; j++){ | |
for (i = 0; i < num; i++){ | |
t = A[i][j]; | |
for (k = 0; k < num; k++){ | |
M[i][k] += t * B[j][k]; | |
} | |
} | |
} | |
} | |
void matmul_1(mat_elem_t *(M[restrict]), const mat_elem_t *(A[restrict]), const mat_elem_t *(B[restrict]), const size_t num) | |
{ | |
mat_elem_t t; | |
const mat_elem_t *B_row; | |
mat_elem_t *M_row; | |
size_t i, j, k; | |
for (j = 0; j < num; j++){ | |
B_row = B[j]; | |
for (i = 0; i < num; i++){ | |
t = A[i][j]; | |
M_row = M[i]; | |
for (k = 0; k < num; k++){ | |
M_row[k] += t * B_row[k]; | |
} | |
} | |
} | |
} | |
void matmul_2(mat_elem_t *(M[restrict]), const mat_elem_t *(A[restrict]), const mat_elem_t *(B[restrict]), const size_t dim) | |
{ | |
__m128 tvec, tm; | |
const __m128 *B_row; | |
__m128 *M_row; | |
size_t i, j, k; | |
const size_t aligned_dim = (dim + 3) >> 2; | |
for (j = 0; j < dim; j++){ | |
B_row = (const __m128 *)B[j]; | |
for (i = 0; i < dim; i++){ | |
tvec = _mm_set1_ps(A[i][j]); | |
M_row = (__m128 *)M[i]; | |
for (k = 0; k < aligned_dim; k++){ | |
tm = _mm_mul_ps(tvec, B_row[k]); | |
tm = _mm_add_ps(M_row[k], tm); | |
_mm_stream_ps((float *)(M_row + k), tm); | |
} | |
} | |
} | |
} | |
/**/ | |
typedef void (*matmat_func_t)(mat_elem_t *([]), const mat_elem_t *([]), const mat_elem_t *([]), const size_t); | |
void initialize_test_data(mat_elem_t *((*A)[]), mat_elem_t *((*B)[]), mat_elem_t *((*M0)[]), mat_elem_t *((*M1)[]), const size_t dim) | |
{ | |
int alloc_ret_code; | |
alloc_ret_code = matrix_allocate(A, dim); | |
if (alloc_ret_code != 0) { | |
fprintf(stderr, "%s:%d: error: return code = %d\n", | |
__FILE__, __LINE__, alloc_ret_code); | |
} | |
alloc_ret_code = matrix_allocate(B, dim); | |
if (alloc_ret_code != 0) { | |
fprintf(stderr, "%s:%d: error: return code = %d\n", | |
__FILE__, __LINE__, alloc_ret_code); | |
} | |
alloc_ret_code = matrix_allocate(M0, dim); | |
if (alloc_ret_code != 0) { | |
fprintf(stderr, "%s:%d: error: return code = %d\n", | |
__FILE__, __LINE__, alloc_ret_code); | |
} | |
alloc_ret_code = matrix_allocate(M1, dim); | |
if (alloc_ret_code != 0) { | |
fprintf(stderr, "%s:%d: error: return code = %d\n", | |
__FILE__, __LINE__, alloc_ret_code); | |
} | |
init(*A, "A.dat", dim); | |
init(*B, "B.dat", dim); | |
{ | |
size_t i; | |
for (i = 0; i < dim; i++) { | |
assert( (((uintptr_t)((*A)[i])) % ALIGNMENT ) == 0 ); | |
assert( (((uintptr_t)((*B)[i])) % ALIGNMENT ) == 0 ); | |
assert( (((uintptr_t)((*M0)[i])) % ALIGNMENT ) == 0 ); | |
assert( (((uintptr_t)((*M1)[i])) % ALIGNMENT ) == 0 ); | |
} | |
} | |
} | |
void benchmark() | |
{ | |
time_t start, end; | |
double diff; | |
mat_elem_t *(A[NUM]), *(B[NUM]); | |
mat_elem_t *(M[NUM]), *(M_orig[NUM]); | |
struct { | |
matmat_func_t f_; | |
const char *name_; | |
} func_tbl[] = { | |
FUNC_ENTRY__(matmul_orig), | |
FUNC_ENTRY__(matmul_0), | |
FUNC_ENTRY__(matmul_1), | |
FUNC_ENTRY__(matmul_2), | |
}; | |
size_t i, flen; | |
// | |
initialize_test_data(&A, &B, &M_orig, &M, NUM); | |
setzero(M_orig, NUM); | |
func_tbl[0].f_(M_orig, A, B, NUM); | |
flen = sizeof(func_tbl)/sizeof(*func_tbl); | |
for (i = 0; i < flen; i++) { | |
setzero(M, NUM); | |
time (&start); | |
func_tbl[i].f_(M, A, B, NUM); | |
time (&end); | |
diff = difftime(end,start); | |
printf("%s : %.2f\n", func_tbl[i].name_, diff); | |
if (! is_eq(M, M_orig, NUM)) { | |
printf("different\n"); | |
} | |
} | |
} | |
void check() | |
{ | |
#ifdef NDEBUG | |
puts("NDEBUG macro is enabled"); | |
#else | |
puts("NDEBUG macro is disabled"); | |
#endif | |
} | |
int main() | |
{ | |
check(); | |
printf("dimension = %u\n", NUM); | |
benchmark(); | |
return 0; | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment