Skip to content

Instantly share code, notes, and snippets.

@blakejohnson
Last active December 18, 2015 19:18
Show Gist options
  • Save blakejohnson/5831456 to your computer and use it in GitHub Desktop.
Save blakejohnson/5831456 to your computer and use it in GitHub Desktop.
Minimal example of BLAS + libunwind on OS X
SUBROUTINE DAXPY(N,DA,DX,INCX,DY,INCY)
* .. Scalar Arguments ..
DOUBLE PRECISION DA
INTEGER INCX,INCY,N
* ..
* .. Array Arguments ..
DOUBLE PRECISION DX(*),DY(*)
* ..
*
* Purpose
* =======
*
* DAXPY constant times a vector plus a vector.
* uses unrolled loops for increments equal to one.
*
* Further Details
* ===============
*
* jack dongarra, linpack, 3/11/78.
* modified 12/3/93, array(1) declarations changed to array(*)
*
* =====================================================================
*
* .. Local Scalars ..
INTEGER I,IX,IY,M,MP1
* ..
* .. Intrinsic Functions ..
INTRINSIC MOD
* ..
IF (N.LE.0) RETURN
IF (DA.EQ.0.0d0) RETURN
IF (INCX.EQ.1 .AND. INCY.EQ.1) THEN
*
* code for both increments equal to 1
*
*
* clean-up loop
*
M = MOD(N,4)
IF (M.NE.0) THEN
DO I = 1,M
DY(I) = DY(I) + DA*DX(I)
END DO
END IF
IF (N.LT.4) RETURN
MP1 = M + 1
DO I = MP1,N,4
DY(I) = DY(I) + DA*DX(I)
DY(I+1) = DY(I+1) + DA*DX(I+1)
DY(I+2) = DY(I+2) + DA*DX(I+2)
DY(I+3) = DY(I+3) + DA*DX(I+3)
END DO
ELSE
*
* code for unequal increments or equal increments
* not equal to 1
*
IX = 1
IY = 1
IF (INCX.LT.0) IX = (-N+1)*INCX + 1
IF (INCY.LT.0) IY = (-N+1)*INCY + 1
DO I = 1,N
DY(IY) = DY(IY) + DA*DX(IX)
IX = IX + INCX
IY = IY + INCY
END DO
END IF
RETURN
END
all: test test2 test3 test4 test5
JULIADIR = /Users/bjohnson/src/julia
JULIALIB = $(JULIADIR)/usr/lib
OPENBLASDIR = $(JULIADIR)/deps/openblas-develop
# LDFLAGS = -Wl,-no_compact_unwind
LDFLAGS =
test: unwind.c
clang -g -DAPPLEBLAS -framework vecLib -lblas $(LDFLAGS) unwind.c -o test
LDFLAGS += -Wl,-rpath,$(JULIALIB)
test2: unwind.c
clang -g -I$(OPENBLASDIR) -L$(JULIALIB) -lopenblas $(LDFLAGS) unwind.c -o test2
test3: unwind.c
clang -g -DFORTRANBLAS -L$(JULIALIB) -lopenblas $(LDFLAGS) unwind.c -o test3
test4: unwind.c libmyblas
clang -g -DMYBLAS -L. -lmyblas unwind.c -o test4
test5: unwind.c libdaxpy
clang -g -DFORTRANBLAS -L. -ldaxpy unwind.c -o test5
libmyblas: myblas.c myblas.h
clang -fPIC -shared -o libmyblas.dylib myblas.c
libdaxpy: daxpy.f
gfortran -fPIC -shared -o libdaxpy.dylib daxpy.f
void myaxpy(int n, double a, double *x, int incx, double *y, int incy) {
int i;
for (i=0; i < n; i++) {
y[i] = a * x[i] + y[i];
}
}
void myaxpy(int n, double a, double *x, int incx, double *y, int incy);
#include <stdlib.h>
#include <stdio.h>
#include <stddef.h>
#include <signal.h>
#include <sys/time.h>
#define UNW_LOCAL_ONLY
#include <libunwind.h>
typedef ptrdiff_t ptrint_t;
#ifdef APPLEBLAS
#include <Accelerate/Accelerate.h>
#else
#ifdef FORTRANBLAS
extern void daxpy_(
ptrdiff_t *n,
double *da,
double *dx,
ptrdiff_t *incx,
double *dy,
ptrdiff_t *incy
);
#else
#ifdef MYBLAS
#include "myblas.h"
#define cblas_daxpy myaxpy
#else
#include "cblas.h"
#endif
#endif
#endif
struct itimerval timerprof;
static u_int64_t usecprof = 500;
int running;
int samples;
size_t rec_backtrace(size_t maxsize)
{
unw_cursor_t cursor; unw_context_t uc;
unw_word_t ip, offset;
size_t n=0;
char fname[64];
unw_getcontext(&uc);
unw_init_local(&cursor, &uc);
while (unw_step(&cursor) > 0 && n < maxsize) {
if (unw_get_reg(&cursor, UNW_REG_IP, &ip) < 0) {
break;
}
fname[0] = '\0';
unw_get_proc_name(&cursor, fname, sizeof(fname), &offset);
printf("%p: (%s + 0x%x)\n", (void *)ip, fname, (unsigned int)offset);
}
printf("\n");
return n;
}
static void sprofile_bt(int dummy) {
if (running) {
printf("Sample %d\n", samples);
rec_backtrace(10);
if (samples-- > 0) {
timerprof.it_value.tv_usec = usecprof;
setitimer(ITIMER_REAL, &timerprof, 0);
}
}
}
int start_timer(void)
{
timerprof.it_interval.tv_sec = 0;
timerprof.it_interval.tv_usec = 0;
timerprof.it_value.tv_sec = 0;
timerprof.it_value.tv_usec = usecprof;
if (setitimer(ITIMER_REAL, &timerprof, 0) == -1)
return -3;
running = 1;
samples = 10;
signal(SIGALRM, sprofile_bt);
return 0;
}
void stop_timer(void)
{
running = 0;
}
int blas_op(int a, double *x, double *y) {
#ifdef FORTRANBLAS
long n = 1000;
double da = (double)a;
long incx = 1, incy = 1;
daxpy_(&n, &da, x, &incx, y, &incy);
#else
cblas_daxpy(1000, (double)a, x, 1, y, 1);
#endif
return y[0];
}
void fill_xy(double *x, double *y, int n) {
int ct;
for (ct=0; ct < n; ct++) {
x[ct] = (double)rand() / (double)RAND_MAX;
y[ct] = (double)rand() / (double)RAND_MAX;
}
}
int main(void) {
int a = 2;
int b;
double x[1000], y[1000];
int ct;
fill_xy(x, y, 1000);
start_timer();
for (ct = 0; ct < 10000; ct++)
b = blas_op(a, x, y);
stop_timer();
return 0;
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment