This is my implementation of tanh for CUDA:
#include <iostream>
#include <math.h>
#include <cuComplex.h>
#include <complex>
typedef double rtype;
typedef cuDoubleComplex ctype;
#define rpart(x) (cuCreal(x))
#define ipart(x) (cuCimag(x))
#define cmplx(x,y) (make_cuDoubleComplex(x,y))
#define cneg(x) (cuCmul(make_cuDoubleComplex(-1.0,0.0), x))
__host__ __device__ ctype cexp(ctype z) {
ctype res;
rtype s, c;
rtype e = exp(rpart(z));
sincos(ipart(z), &s, &c);
res.x = c * e;
res.y = s * e;
return res;
}
__host__ __device__ ctype ctanh(const ctype& z) {
return cuCdiv(cuCsub(cexp(z), cexp(cneg(z))) , cuCadd(cexp(z) , cexp(cneg(z))));
}
int main(){
double r = 0.5;
double i = 0.5;
std::complex<double> cpu_num(r,i);
ctype cu_num = cmplx(r,i);
std::complex<double> cpu_ans = std::tanh(cpu_num);
ctype cu_ans = ctanh(cu_num);
std::cout << "CPU real: " << std::real(cpu_ans) << " CPU imag: " << std::imag(cpu_ans) << std::endl;
std::cout << "GPU real: " << rpart(cu_ans) << " GPU imag: " << ipart(cu_ans) << std::endl;
return 0;
}
save it as test.cu
compile like so:
nvcc -arch=sm_20 -O3 -o test test.cu
run it:
./test
and output is:
CPU real: 0.564083 CPU imag: 0.403896
GPU real: 0.564083 GPU imag: 0.403896