Skip to content

Instantly share code, notes, and snippets.

@bonfus
Last active January 4, 2018 08:35
Show Gist options
  • Save bonfus/acf7fa8746c67922b29826105769dbe4 to your computer and use it in GitHub Desktop.
Save bonfus/acf7fa8746c67922b29826105769dbe4 to your computer and use it in GitHub Desktop.
Mix CudaFortran and OpenACC
! compile with pgfortran -acc -Mcuda take1.f90
module addone_mod
use cudafor
implicit none
private
public :: addone
interface addone
module procedure addone_host,addone_dev
end interface
contains
subroutine addone_host(B,N)
integer, intent(in) :: N
real(8), intent(inout) :: B(N)
integer :: i
real(8) :: v
do i=1,N
B(i) = B(i)+1.d0
end do
end subroutine
subroutine addone_dev(B,N)
integer :: N
real(8),device, intent(inout) :: B(N)
integer :: i
real(8), device :: v
!$cuf kernel do
do i=1,N
B(i) = B(i)+1.d0
end do
print *, "device"
end subroutine
attributes(global) subroutine addone_kernel(B,N)
integer,value :: N
real(8) :: B(N)
integer :: i
i = ((blockIdx%x - 1) * blockDim%x) + &
threadIdx%x
if (i.le.N) then
B(i) = B(i)+1.d0
endif
end subroutine
end module
subroutine naprova(B,N)
integer,intent(in) :: N
real(8), intent(inout) :: B(N)
!$acc data present(B)
!$acc kernels
B(N)=-1.d0
!$acc end kernels
!$acc end data
end subroutine
program main
use cudafor
use addone_mod, ONLY : addone
use cublas, only : cublasDaxpy
implicit none
integer, parameter :: N = 1000
real(8) :: A(N),B(N)
real(8) :: alpha = 1.
integer :: i
!$acc enter data create(A,B)
!$acc data present(A,B)
!$acc parallel
A(:) = 1.0
!$acc end parallel
!$acc parallel
B(:) = 2.0 * A(:)
!$acc end parallel
! call to lib
call cublasDaxpy(N,alpha,A,1,B,1)
! call to cuf kernel
call addone(B,N)
!call to openacc sub
call naprova(B,N)
!$acc update self(B)
!$acc end data
!$acc exit data delete(A,B)
!HOST
print *,B(1:6),"\n...\n",B((N-5):N)
end program
module mdata
implicit none
real, allocatable :: a(:)
contains
subroutine mdata_init
allocate(a(100))
!$acc enter data create(a)
end subroutine mdata_init
subroutine mdata_finalize
!$acc exit data delete(a)
deallocate(a)
end subroutine mdata_finalize
end module mdata
program main
use mdata
implicit none
integer :: i
real :: factor = 3.0
call mdata_init()
!$acc data present(a)
!$acc parallel
a(:) = 1.0
!$cuf kernel do
do i=1,100
a(i) = a(i)*factor
end do
!$acc end parallel
!$acc update self(a)
!$acc end data
print *,a(1:6)
call mdata_finalize()
end program
!compile with pgfortran -g -acc -Mcuda=cc35,cuda8.0 prova2.f90
module whythis
contains
subroutine ciao(c,d)
use cudafor
implicit none
integer :: i
real, device, intent(inout) :: c(:)
real, device, intent(out) :: d(:)
!$cuf kernel do
do i=1,100
d(i) = 2.0*c(i)*c(i)/c(i)
end do
end subroutine
end module whythis
subroutine ocio(c,d)
use cudafor
implicit none
integer :: i
real, device, intent(inout) :: c(:)
real, device, intent(out) :: d(:)
!$cuf kernel do
do i=1,100
d(i) = 2.0*c(i)*c(i)/c(i)
end do
end subroutine
program main
use cudafor
use whythis
implicit none
integer :: i
real, allocatable :: c(:)
real, allocatable :: d(:)
real, allocatable, device :: e(:)
real :: factor = 3.0
allocate(c(100))
allocate(d(100))
allocate(e(100))
c=1.0
!$acc data copy(c,d)
!$acc kernels
do i=1,100
d(i) = 2.0*c(i)*c(i)/c(i)
end do
!$acc end kernels
call ciao(c,d)
! the following does not work!!
!call ocio(c,d)
!$acc update self(d)
!$acc end data
!$acc data present(e) copyin(c)
!$acc kernels
do i=1,100
e(i) = 2.0*c(i)*c(i)/c(i)
end do
!$acc end kernels
!$acc end data
! copy array back to host
c = e
print *, d(1:10)
print *, c(1:10)
deallocate(c,d,e)
end program
program main
use cudafor
implicit none
integer :: i
real, allocatable :: c(:)
real, allocatable :: d(:)
real :: factor = 3.0
!$acc declare create(c,d)
allocate(c(100))
allocate(d(100))
c=1.0
d=1.0
!$acc update device( c )
!$cuf kernel do
do i=1,100
d(i) = 2.0*c(i)
end do
!$acc update self( d )
print *, d(1:10)
deallocate(c,d)
end program
subroutine ciao(u,d,t,n)
implicit none
real, intent(out) :: u(n)
real, intent(in) :: d(n), t(n)
integer i,n
!$acc kernels present(u,d,t)
do i=1,100
u(i)=d(i)*t(i)
end do
!$acc end kernels
end subroutine
program ma
implicit none
integer :: n
real, allocatable :: a(:), b(:), c(:)
!$acc declare create(a,b,c)
n=100
allocate(a(n),b(n),c(n))
call ciao(a,b,c,n)
!$acc update self(a,b,c)
deallocate(a,b,c)
end program ma
! Cannot create interfaces with openacc declare?
! Compile with pgfortran -acc -ta=tesla:pinned,cc35,cuda8.0 -Mcuda=cc35,cuda8.0 take6.f90
module store
integer, allocatable :: ints(:)
!$acc declare create(ints)
end module
module add_mod
use cudafor
PUBLIC :: add
INTERFACE add
MODULE PROCEDURE add_cpu, add_gpu
END INTERFACE
CONTAINS
subroutine add_cpu(a,b,N)
implicit none
integer, intent(inout) :: a(N), b(N)
integer :: i, N
print *, "CPU call"
do i =1, N
a(i) = a(i) + b(i)
end do
end subroutine
subroutine add_gpu(a,b,N)
implicit none
integer, device, intent(inout) :: a(N), b(N)
integer :: i, N
print *, "GPU call"
!$cuf kernel do
do i =1, N
a(i) = a(i) + b(i)
end do
end subroutine
end module
program ma
use store
use add_mod
implicit none
allocate(ints(100))
ints = 1
call add(ints,ints,100)
deallocate(ints)
end program
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment