Skip to content

Instantly share code, notes, and snippets.

@syadlowsky
Created March 15, 2012 01:10
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save syadlowsky/2040953 to your computer and use it in GitHub Desktop.
Save syadlowsky/2040953 to your computer and use it in GitHub Desktop.
CALLOC FREE weirdness
#include<stdlib.h>
// number of floats that fit in L1 cache
#define L1_SIZE 1024
#define L1_MATRIX_ALLOT 512
// number of floats that fit in L2 cache
#define L2_SIZE 8192
// number of floats that fit in L2 cache
#define L3_SIZE 8192
void transpose( int n, int blocksize, float *dst, float *src ) {
int i,j,k,m;
/* TO DO: implement blocking (two more loops) */
for ( k = 0; k < n; k+=blocksize ) {
for (m = 0; m < n; m+=blocksize ) {
if (k+blocksize-1 < n) {
if (m+blocksize-1 < n) {
for( i = 0; i < blocksize; i++ )
for( j = 0; j < blocksize; j++ )
dst[(j+m)+(i+k)*n] = src[(i+k)+(j+m)*n];
}
else {
for( i = 0; i < blocksize; i++ )
for( j = m; j < n; j++ )
dst[j+(i+k)*n] = src[(i+k)+j*n];
}
}
else {
if (m+blocksize-1 < n) {
for( i = k; i < n; i++ )
for( j = 0; j < blocksize; j++ )
dst[(j+m)+i*n] = src[i+(j+m)*n];
}
else {
for( i = k; i < n; i++ )
for( j = m; j < n; j++ )
dst[j+i*n] = src[i+j*n];
}
}
}
}
}
/* This routine performs a sgemm operation
* C := C + A * B
* where A, B, and C are lda-by-lda matrices stored in column-major format.
* On exit, A and B maintain their input values. */
void square_sgemm (int n, float* A, float* B, float* C)
{
float *A_trans = (float*)calloc(n, sizeof(float));
printf("%d\n", A_trans);
transpose(n, 32, A_trans, A);
printf("%d\n", A_trans);
/* For each row i of A */
for (int i = 0; i < n; ++i) {
/* For each column j of B */
for (int j = 0; j < n; ++j)
{
/* Compute C(i,j) */
float cij = C[i+j*n];
for( int k = 0; k < n; k++ )
cij += A[i+k*n] * B[k+j*n];
C[i+j*n] = cij;
}
}
free(A_trans);
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment