Last active
December 18, 2015 19:18
-
-
Save blakejohnson/5831456 to your computer and use it in GitHub Desktop.
Minimal example of BLAS + libunwind on OS X
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
SUBROUTINE DAXPY(N,DA,DX,INCX,DY,INCY) | |
* .. Scalar Arguments .. | |
DOUBLE PRECISION DA | |
INTEGER INCX,INCY,N | |
* .. | |
* .. Array Arguments .. | |
DOUBLE PRECISION DX(*),DY(*) | |
* .. | |
* | |
* Purpose | |
* ======= | |
* | |
* DAXPY constant times a vector plus a vector. | |
* uses unrolled loops for increments equal to one. | |
* | |
* Further Details | |
* =============== | |
* | |
* jack dongarra, linpack, 3/11/78. | |
* modified 12/3/93, array(1) declarations changed to array(*) | |
* | |
* ===================================================================== | |
* | |
* .. Local Scalars .. | |
INTEGER I,IX,IY,M,MP1 | |
* .. | |
* .. Intrinsic Functions .. | |
INTRINSIC MOD | |
* .. | |
IF (N.LE.0) RETURN | |
IF (DA.EQ.0.0d0) RETURN | |
IF (INCX.EQ.1 .AND. INCY.EQ.1) THEN | |
* | |
* code for both increments equal to 1 | |
* | |
* | |
* clean-up loop | |
* | |
M = MOD(N,4) | |
IF (M.NE.0) THEN | |
DO I = 1,M | |
DY(I) = DY(I) + DA*DX(I) | |
END DO | |
END IF | |
IF (N.LT.4) RETURN | |
MP1 = M + 1 | |
DO I = MP1,N,4 | |
DY(I) = DY(I) + DA*DX(I) | |
DY(I+1) = DY(I+1) + DA*DX(I+1) | |
DY(I+2) = DY(I+2) + DA*DX(I+2) | |
DY(I+3) = DY(I+3) + DA*DX(I+3) | |
END DO | |
ELSE | |
* | |
* code for unequal increments or equal increments | |
* not equal to 1 | |
* | |
IX = 1 | |
IY = 1 | |
IF (INCX.LT.0) IX = (-N+1)*INCX + 1 | |
IF (INCY.LT.0) IY = (-N+1)*INCY + 1 | |
DO I = 1,N | |
DY(IY) = DY(IY) + DA*DX(IX) | |
IX = IX + INCX | |
IY = IY + INCY | |
END DO | |
END IF | |
RETURN | |
END |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
all: test test2 test3 test4 test5 | |
JULIADIR = /Users/bjohnson/src/julia | |
JULIALIB = $(JULIADIR)/usr/lib | |
OPENBLASDIR = $(JULIADIR)/deps/openblas-develop | |
# LDFLAGS = -Wl,-no_compact_unwind | |
LDFLAGS = | |
test: unwind.c | |
clang -g -DAPPLEBLAS -framework vecLib -lblas $(LDFLAGS) unwind.c -o test | |
LDFLAGS += -Wl,-rpath,$(JULIALIB) | |
test2: unwind.c | |
clang -g -I$(OPENBLASDIR) -L$(JULIALIB) -lopenblas $(LDFLAGS) unwind.c -o test2 | |
test3: unwind.c | |
clang -g -DFORTRANBLAS -L$(JULIALIB) -lopenblas $(LDFLAGS) unwind.c -o test3 | |
test4: unwind.c libmyblas | |
clang -g -DMYBLAS -L. -lmyblas unwind.c -o test4 | |
test5: unwind.c libdaxpy | |
clang -g -DFORTRANBLAS -L. -ldaxpy unwind.c -o test5 | |
libmyblas: myblas.c myblas.h | |
clang -fPIC -shared -o libmyblas.dylib myblas.c | |
libdaxpy: daxpy.f | |
gfortran -fPIC -shared -o libdaxpy.dylib daxpy.f |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
void myaxpy(int n, double a, double *x, int incx, double *y, int incy) { | |
int i; | |
for (i=0; i < n; i++) { | |
y[i] = a * x[i] + y[i]; | |
} | |
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
void myaxpy(int n, double a, double *x, int incx, double *y, int incy); |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#include <stdlib.h> | |
#include <stdio.h> | |
#include <stddef.h> | |
#include <signal.h> | |
#include <sys/time.h> | |
#define UNW_LOCAL_ONLY | |
#include <libunwind.h> | |
typedef ptrdiff_t ptrint_t; | |
#ifdef APPLEBLAS | |
#include <Accelerate/Accelerate.h> | |
#else | |
#ifdef FORTRANBLAS | |
extern void daxpy_( | |
ptrdiff_t *n, | |
double *da, | |
double *dx, | |
ptrdiff_t *incx, | |
double *dy, | |
ptrdiff_t *incy | |
); | |
#else | |
#ifdef MYBLAS | |
#include "myblas.h" | |
#define cblas_daxpy myaxpy | |
#else | |
#include "cblas.h" | |
#endif | |
#endif | |
#endif | |
struct itimerval timerprof; | |
static u_int64_t usecprof = 500; | |
int running; | |
int samples; | |
size_t rec_backtrace(size_t maxsize) | |
{ | |
unw_cursor_t cursor; unw_context_t uc; | |
unw_word_t ip, offset; | |
size_t n=0; | |
char fname[64]; | |
unw_getcontext(&uc); | |
unw_init_local(&cursor, &uc); | |
while (unw_step(&cursor) > 0 && n < maxsize) { | |
if (unw_get_reg(&cursor, UNW_REG_IP, &ip) < 0) { | |
break; | |
} | |
fname[0] = '\0'; | |
unw_get_proc_name(&cursor, fname, sizeof(fname), &offset); | |
printf("%p: (%s + 0x%x)\n", (void *)ip, fname, (unsigned int)offset); | |
} | |
printf("\n"); | |
return n; | |
} | |
static void sprofile_bt(int dummy) { | |
if (running) { | |
printf("Sample %d\n", samples); | |
rec_backtrace(10); | |
if (samples-- > 0) { | |
timerprof.it_value.tv_usec = usecprof; | |
setitimer(ITIMER_REAL, &timerprof, 0); | |
} | |
} | |
} | |
int start_timer(void) | |
{ | |
timerprof.it_interval.tv_sec = 0; | |
timerprof.it_interval.tv_usec = 0; | |
timerprof.it_value.tv_sec = 0; | |
timerprof.it_value.tv_usec = usecprof; | |
if (setitimer(ITIMER_REAL, &timerprof, 0) == -1) | |
return -3; | |
running = 1; | |
samples = 10; | |
signal(SIGALRM, sprofile_bt); | |
return 0; | |
} | |
void stop_timer(void) | |
{ | |
running = 0; | |
} | |
int blas_op(int a, double *x, double *y) { | |
#ifdef FORTRANBLAS | |
long n = 1000; | |
double da = (double)a; | |
long incx = 1, incy = 1; | |
daxpy_(&n, &da, x, &incx, y, &incy); | |
#else | |
cblas_daxpy(1000, (double)a, x, 1, y, 1); | |
#endif | |
return y[0]; | |
} | |
void fill_xy(double *x, double *y, int n) { | |
int ct; | |
for (ct=0; ct < n; ct++) { | |
x[ct] = (double)rand() / (double)RAND_MAX; | |
y[ct] = (double)rand() / (double)RAND_MAX; | |
} | |
} | |
int main(void) { | |
int a = 2; | |
int b; | |
double x[1000], y[1000]; | |
int ct; | |
fill_xy(x, y, 1000); | |
start_timer(); | |
for (ct = 0; ct < 10000; ct++) | |
b = blas_op(a, x, y); | |
stop_timer(); | |
return 0; | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment