Last active
January 4, 2018 08:35
-
-
Save bonfus/acf7fa8746c67922b29826105769dbe4 to your computer and use it in GitHub Desktop.
Mix CudaFortran and OpenACC
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
! compile with pgfortran -acc -Mcuda take1.f90 | |
module addone_mod | |
use cudafor | |
implicit none | |
private | |
public :: addone | |
interface addone | |
module procedure addone_host,addone_dev | |
end interface | |
contains | |
subroutine addone_host(B,N) | |
integer, intent(in) :: N | |
real(8), intent(inout) :: B(N) | |
integer :: i | |
real(8) :: v | |
do i=1,N | |
B(i) = B(i)+1.d0 | |
end do | |
end subroutine | |
subroutine addone_dev(B,N) | |
integer :: N | |
real(8),device, intent(inout) :: B(N) | |
integer :: i | |
real(8), device :: v | |
!$cuf kernel do | |
do i=1,N | |
B(i) = B(i)+1.d0 | |
end do | |
print *, "device" | |
end subroutine | |
attributes(global) subroutine addone_kernel(B,N) | |
integer,value :: N | |
real(8) :: B(N) | |
integer :: i | |
i = ((blockIdx%x - 1) * blockDim%x) + & | |
threadIdx%x | |
if (i.le.N) then | |
B(i) = B(i)+1.d0 | |
endif | |
end subroutine | |
end module | |
subroutine naprova(B,N) | |
integer,intent(in) :: N | |
real(8), intent(inout) :: B(N) | |
!$acc data present(B) | |
!$acc kernels | |
B(N)=-1.d0 | |
!$acc end kernels | |
!$acc end data | |
end subroutine | |
program main | |
use cudafor | |
use addone_mod, ONLY : addone | |
use cublas, only : cublasDaxpy | |
implicit none | |
integer, parameter :: N = 1000 | |
real(8) :: A(N),B(N) | |
real(8) :: alpha = 1. | |
integer :: i | |
!$acc enter data create(A,B) | |
!$acc data present(A,B) | |
!$acc parallel | |
A(:) = 1.0 | |
!$acc end parallel | |
!$acc parallel | |
B(:) = 2.0 * A(:) | |
!$acc end parallel | |
! call to lib | |
call cublasDaxpy(N,alpha,A,1,B,1) | |
! call to cuf kernel | |
call addone(B,N) | |
!call to openacc sub | |
call naprova(B,N) | |
!$acc update self(B) | |
!$acc end data | |
!$acc exit data delete(A,B) | |
!HOST | |
print *,B(1:6),"\n...\n",B((N-5):N) | |
end program |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
module mdata | |
implicit none | |
real, allocatable :: a(:) | |
contains | |
subroutine mdata_init | |
allocate(a(100)) | |
!$acc enter data create(a) | |
end subroutine mdata_init | |
subroutine mdata_finalize | |
!$acc exit data delete(a) | |
deallocate(a) | |
end subroutine mdata_finalize | |
end module mdata | |
program main | |
use mdata | |
implicit none | |
integer :: i | |
real :: factor = 3.0 | |
call mdata_init() | |
!$acc data present(a) | |
!$acc parallel | |
a(:) = 1.0 | |
!$cuf kernel do | |
do i=1,100 | |
a(i) = a(i)*factor | |
end do | |
!$acc end parallel | |
!$acc update self(a) | |
!$acc end data | |
print *,a(1:6) | |
call mdata_finalize() | |
end program |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
!compile with pgfortran -g -acc -Mcuda=cc35,cuda8.0 prova2.f90 | |
module whythis | |
contains | |
subroutine ciao(c,d) | |
use cudafor | |
implicit none | |
integer :: i | |
real, device, intent(inout) :: c(:) | |
real, device, intent(out) :: d(:) | |
!$cuf kernel do | |
do i=1,100 | |
d(i) = 2.0*c(i)*c(i)/c(i) | |
end do | |
end subroutine | |
end module whythis | |
subroutine ocio(c,d) | |
use cudafor | |
implicit none | |
integer :: i | |
real, device, intent(inout) :: c(:) | |
real, device, intent(out) :: d(:) | |
!$cuf kernel do | |
do i=1,100 | |
d(i) = 2.0*c(i)*c(i)/c(i) | |
end do | |
end subroutine | |
program main | |
use cudafor | |
use whythis | |
implicit none | |
integer :: i | |
real, allocatable :: c(:) | |
real, allocatable :: d(:) | |
real, allocatable, device :: e(:) | |
real :: factor = 3.0 | |
allocate(c(100)) | |
allocate(d(100)) | |
allocate(e(100)) | |
c=1.0 | |
!$acc data copy(c,d) | |
!$acc kernels | |
do i=1,100 | |
d(i) = 2.0*c(i)*c(i)/c(i) | |
end do | |
!$acc end kernels | |
call ciao(c,d) | |
! the following does not work!! | |
!call ocio(c,d) | |
!$acc update self(d) | |
!$acc end data | |
!$acc data present(e) copyin(c) | |
!$acc kernels | |
do i=1,100 | |
e(i) = 2.0*c(i)*c(i)/c(i) | |
end do | |
!$acc end kernels | |
!$acc end data | |
! copy array back to host | |
c = e | |
print *, d(1:10) | |
print *, c(1:10) | |
deallocate(c,d,e) | |
end program |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
program main | |
use cudafor | |
implicit none | |
integer :: i | |
real, allocatable :: c(:) | |
real, allocatable :: d(:) | |
real :: factor = 3.0 | |
!$acc declare create(c,d) | |
allocate(c(100)) | |
allocate(d(100)) | |
c=1.0 | |
d=1.0 | |
!$acc update device( c ) | |
!$cuf kernel do | |
do i=1,100 | |
d(i) = 2.0*c(i) | |
end do | |
!$acc update self( d ) | |
print *, d(1:10) | |
deallocate(c,d) | |
end program |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
subroutine ciao(u,d,t,n) | |
implicit none | |
real, intent(out) :: u(n) | |
real, intent(in) :: d(n), t(n) | |
integer i,n | |
!$acc kernels present(u,d,t) | |
do i=1,100 | |
u(i)=d(i)*t(i) | |
end do | |
!$acc end kernels | |
end subroutine | |
program ma | |
implicit none | |
integer :: n | |
real, allocatable :: a(:), b(:), c(:) | |
!$acc declare create(a,b,c) | |
n=100 | |
allocate(a(n),b(n),c(n)) | |
call ciao(a,b,c,n) | |
!$acc update self(a,b,c) | |
deallocate(a,b,c) | |
end program ma |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
! Cannot create interfaces with openacc declare? | |
! Compile with pgfortran -acc -ta=tesla:pinned,cc35,cuda8.0 -Mcuda=cc35,cuda8.0 take6.f90 | |
module store | |
integer, allocatable :: ints(:) | |
!$acc declare create(ints) | |
end module | |
module add_mod | |
use cudafor | |
PUBLIC :: add | |
INTERFACE add | |
MODULE PROCEDURE add_cpu, add_gpu | |
END INTERFACE | |
CONTAINS | |
subroutine add_cpu(a,b,N) | |
implicit none | |
integer, intent(inout) :: a(N), b(N) | |
integer :: i, N | |
print *, "CPU call" | |
do i =1, N | |
a(i) = a(i) + b(i) | |
end do | |
end subroutine | |
subroutine add_gpu(a,b,N) | |
implicit none | |
integer, device, intent(inout) :: a(N), b(N) | |
integer :: i, N | |
print *, "GPU call" | |
!$cuf kernel do | |
do i =1, N | |
a(i) = a(i) + b(i) | |
end do | |
end subroutine | |
end module | |
program ma | |
use store | |
use add_mod | |
implicit none | |
allocate(ints(100)) | |
ints = 1 | |
call add(ints,ints,100) | |
deallocate(ints) | |
end program |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment