Skip to content

Instantly share code, notes, and snippets.

@rundel
Created March 10, 2012 00:28
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save rundel/2009450 to your computer and use it in GitHub Desktop.
Save rundel/2009450 to your computer and use it in GitHub Desktop.
./dtrsm
1.000000 0.000000 0.000000
-0.400000 1.000000 0.000000
-0.080000 -0.300000 1.000000
1.000000 0.000000 0.000000
-0.400000 1.000000 0.000000
-0.080000 -0.300000 1.000000
#include <stdio.h>
#include <cuda.h>
#include <cuda_runtime.h>
#include <cublas_api.h>
#include <magma.h>
#include <clapack.h>
void dpotrf(double *A, double *B, double *C, int N, int gpu) {
int info = 0;
if(gpu) {
double *d_A, *d_B;
cudaMalloc((void**)&d_A, N * N * sizeof(double));
cudaMalloc((void**)&d_B, N * N * sizeof(double));
cublasSetMatrix(N, N, sizeof(*A), A, N, d_A, N);
cublasSetMatrix(N, N, sizeof(*B), B, N, d_B, N);
magma_dpotrs_gpu('U', N, N, d_A, N, d_B, N, &info);
cublasGetMatrix(N, N, sizeof(*d_B), d_B, N, C, N);
cudaFree(d_A);
cudaFree(d_B);
} else {
info = clapack_dpotrs(CblasColMajor, CblasUpper, N, N, A, N, C, N);
}
if(info < 0)
printf("Error: Illegal argument %d in 'dpotrf'", -1 * info);
}
void print_mat(double *A, int M, int N) {
for(int i=0; i<M*N; i++) {
if (i % N == 0)
printf("\n");
printf("%f ",A[i]);
}
printf("\n");
}
int main(void)
{
int N = 3;
double A[9] = {1.0, 0.0, 0.0,
0.4, 1.0, 0.0,
0.2, 0.3, 1.0 };
double B[9] = {1,0,0,
0,1,0,
0,0,1};
double C[9];
double *d_A, *d_B, *d_C;
cudaMalloc((void**)&d_A, N * N * sizeof(double));
cudaMalloc((void**)&d_B, N * N * sizeof(double));
cublasSetMatrix(N, N, sizeof(double), A, N, d_A, N);
cublasSetMatrix(N, N, sizeof(double), B, N, d_B, N);
double alpha=1.0;
magmablas_dtrsm('L','U','N','N', N, N, alpha, d_A, N, d_B, N);
cublasGetVector(N * N, sizeof(double), d_B, 1, C, 1);
cudaFree(d_A);
cudaFree(d_B);
print_mat(C,N,N);
cblas_dtrsm(CblasColMajor, CblasLeft, CblasUpper, CblasNoTrans, CblasNonUnit, N, N, alpha, A, N, B, N);
//clapack_dtrsm("L", "U", "N", "N", &N, &N, &alpha, A, &N, B, &N);
print_mat(B,N,N);
return 0;
}
gcc -I/usr/local/cuda/include -I/usr/local/magma/include -I/usr/include/atlas -std=gnu99 -O3 -g -c dtrsm.c -o dtrsm.o
gcc -o dtrsm dtrsm.o -L/usr/local/cuda/lib64 -L/usr/local/magma/lib -lcuda -lcudart -lcublas -lmagmablas -lmagma -llapack -lblas
./dtrsm
0.000000 0.000000 0.000000
0.000000 0.000000 0.000000
0.000000 0.000000 0.000000
1.000000 0.000000 0.000000
-0.400000 1.000000 0.000000
-0.080000 -0.300000 1.000000
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment