pozdneev/ge-partitioned.f90

## ge-partitioned.f90
! Gaussian partitioned elimination without pivoting using straightforward
! formulas and BLAS routines
!!---------------------------------------------------------------------------
! gfortran -ffree-form -Wall -Wextra -lblas -fopenmp
!!---------------------------------------------------------------------------
!
! [1] J. Demmel "Numerical Linear Algebra"
! [2] N. J. Nigham "Gaussian Elimination"
!
!   ELIMINATION
!   for k = 1: n-1
!       for i = k+1: n
!           a(i, k) /= a(k, k)
!       for i = k+1: n
!           for j = k+1: n
!               a(i, j) -= a(i, k) * a(k, j)
!           end
!       end
!   end
!
!   BACKSUBSTITUTION  -  L U y = f  =>  L x = f  =>  x = L \ f  =>  y = U \ x
!   for i = 1: n
!       x(i) = f(i)
!       for j = 1: i-1
!           x(i) -= a(i, j) * x(j)
!       end
!   end
!
!   for i = n: 1
!       y(i) = x(i)
!       for j = n: i+1
!           y(i) -= a(i, j) * y(j)
!       end
!       y(i) /= a(i, i)
!   end
!!---------------------------------------------------------------------------
program ge
!!---------------------------------------------------------------------------
    use omp_lib, only: omp_get_wtime

    implicit none

    ! Matrix of coefficients; the one is filled by random_number()
    real, dimension(:, :), allocatable :: A

    ! "Analytical" solution; the one is filled by random_number()
    real, dimension(:), allocatable :: u

    ! Right-hand side (RHS); the one is calculated as f = A * u
    ! Numerical solution (NS) of the equation A y = f
    ! RHS is overwritten by NS
    real, dimension(:), allocatable :: y

    ! Size of block
    integer, parameter :: b = 4

    ! Size of matrix; must be a multiple of b
    integer, parameter :: n = b * 3

    ! Time marks
    real(kind(0.d0)) :: elimination_start, elimination_finish
    real(kind(0.d0)) :: backsubstition_start, backsubstition_finish

    ! Allocate memory
    allocate(A(1: n, 1: n))
    allocate(u(1: n))
    allocate(y(1: n))

    ! Algorithm uses straightforward formulas
    call Generate_Data()
    call Straightforward_Elimination()
    call Straightforward_Backsubstition()
    call Print_Norms()
    call Print_Times()

    ! Algorithm uses BLAS
    call Generate_Data()
    call BLAS_Elimination()
    call BLAS_Backsubstition()
    call Print_Norms()
    call Print_Times()

    ! Free memory
    deallocate(A)
    deallocate(u)
    deallocate(y)
!!---------------------------------------------------------------------------
contains
!!---------------------------------------------------------------------------
subroutine Print_Norms()
    write (*, *) "Norms:", maxval(abs(u)), maxval(abs(y - u))
end subroutine Print_Norms
!!---------------------------------------------------------------------------
subroutine Print_Times()
    write (*, *) "Times:", &
            elimination_finish - elimination_start, &
            backsubstition_finish - backsubstition_start
end subroutine Print_Times
!!---------------------------------------------------------------------------
! This version is a simplified modification of
! http://gcc.gnu.org/onlinedocs/gfortran/RANDOM_005fSEED.html
subroutine Init_Random_Seed()
    integer :: i, n
    integer, dimension(:), allocatable :: seed

    call random_seed(size = n)
    allocate(seed(n))

    seed = 37 * (/ (i - 1, i = 1, n) /)
    call random_seed(put = seed)

    deallocate(seed)
end subroutine Init_Random_Seed
!!---------------------------------------------------------------------------
subroutine Generate_Data()
    call Init_Random_Seed()
    call random_number(A)
    call random_number(u)
    y = matmul(A, u)
end subroutine Generate_Data
!!---------------------------------------------------------------------------
subroutine Straightforward_Elimination()
    integer :: ib
    integer :: i, j, k
    integer :: p1b, p1e, p2b, p2e

    elimination_start = omp_get_wtime()

    do ib = 0, n/b - 1
        p1b = ib * b + 1
        p1e = (ib + 1) * b
        p2b = p1e + 1
        p2e = n

        ! L_{11} U_{11} = A_{11}
        do k = p1b, p1e
            do i = k+1, p1e
                a(i, k) = a(i, k) / a(k, k)
            end do

            do j = k+1, p1e
                do i = k+1, p1e
                    a(i, j) = a(i, j) - a(i, k) * a(k, j)
                end do
            end do
        end do

        ! L_{21} = A_{21} U_{11}^{-1}
        do k = p1b, p1e
            do i = p2b, p2e
                do j = p1b, k-1
                    a(i, k) = a(i, k) - a(i, j) * a(j, k)
                end do

                a(i, k) = a(i, k) / a(k, k)
            end do
        end do

        ! U_{12} = L_{11} \ A_{12}
        do j = p2b, p2e
            do k = p1b, p1e
                do i = p1b, k-1
                    a(k, j) = a(k, j) - a(k, i) * a(i, j)
                end do
            end do
        end do

        ! A_{22} -= L_{21} U_{12}
        do j = p2b, p2e
            do k = p1b, p1e
                do i = p2b, p2e
                    a(i, j) = a(i, j) - a(i, k) * a(k, j)
                end do
            end do
        end do
    end do

    elimination_finish = omp_get_wtime()
end subroutine Straightforward_Elimination
!!---------------------------------------------------------------------------
subroutine BLAS_Elimination()
    integer :: ib
    integer :: k
    integer :: p1b, p1e, p2b, p2e

    elimination_start = omp_get_wtime()

    do ib = 0, n/b - 1
        p1b = ib * b + 1
        p1e = (ib + 1) * b
        p2b = p1e + 1
        p2e = n

        ! L_{11} U_{11} = A_{11}
        do k = p1b, p1e
            ! x = a*x
            call sscal(p1e-k, 1.0 / a(k, k), a(k+1, k), 1)

            ! A := alpha*x*y'+ A
            call sger(p1e-k, p1e-k, -1.0, &
                    a(k+1, k), 1, &
                    a(k, k+1), n, &
                    a(k+1, k+1), n)
        end do

        ! L_{21} U_{11} = A_{21}  =>  L_{21} = A_{21} U_{11}^{-1}
        ! X*op(A) = alpha*B
        call strsm('R', 'U', 'N', 'N', p2e-p2b+1, b, 1.0, &
                a(p1b, p1b), n, a(p2b, p1b), n)

        ! L_{11} U_{12} = A_{12}  =>  U_{12} = L_{11} \ A_{12}
        ! op(A)*X = alpha*B
        call strsm('L', 'L', 'N', 'U', b, p2e-p2b+1, 1.0, &
                a(p1b, p1b), n, a(p1b, p2b), n)

        ! A_{22} -= L_{21} U_{12}
        call sgemm('N', 'N', p2e-p2b+1, p2e-p2b+1, b, -1.0, &
                a(p2b, p1b), n, a(p1b, p2b), n, 1.0, a(p2b, p2b), n)
    end do

    elimination_finish = omp_get_wtime()
end subroutine BLAS_Elimination
!!---------------------------------------------------------------------------
subroutine Straightforward_Backsubstition()
    integer :: i, j

    backsubstition_start = omp_get_wtime()

    ! L x = f  =>  x = L \ f
    do i = 1, n
        do j = 1, i-1
            y(i) = y(i) - a(i, j) * y(j)
        end do
    end do

    ! U y = x  =>  y = U \ x
    do i = n, 1, -1
        do j = i+1, n
            y(i) = y(i) - a(i, j) * y(j)
        end do

        y(i) = y(i) / a(i, i)
    end do

    backsubstition_finish = omp_get_wtime()
end subroutine Straightforward_Backsubstition
!!---------------------------------------------------------------------------
subroutine BLAS_Backsubstition()
    backsubstition_start = omp_get_wtime()

    ! L x = f  =>  x = L \ f
    ! op(A)*X = alpha*B
    !call strsm('L', 'L', 'N', 'U', n, 1, 1.0, a, n, y, n)

    ! A * x = b
    call strsv('L', 'N', 'U', n, a, n, y, 1)

    ! U y = x  =>  y = U \ x

    ! op(A)*X = alpha*B
    !call strsm('L', 'U', 'N', 'N', n, 1, 1.0, a, n, y, n)

    ! A * x = b
    call strsv('U', 'N', 'N', n, a, n, y, 1)

    backsubstition_finish = omp_get_wtime()
end subroutine BLAS_Backsubstition
!!---------------------------------------------------------------------------
end program ge
!!---------------------------------------------------------------------------
	! Gaussian partitioned elimination without pivoting using straightforward
	! formulas and BLAS routines
	!!---------------------------------------------------------------------------
	! gfortran -ffree-form -Wall -Wextra -lblas -fopenmp
	!!---------------------------------------------------------------------------
	!
	! [1] J. Demmel "Numerical Linear Algebra"
	! [2] N. J. Nigham "Gaussian Elimination"
	!
	! ELIMINATION
	! for k = 1: n-1
	! for i = k+1: n
	! a(i, k) /= a(k, k)
	! for i = k+1: n
	! for j = k+1: n
	! a(i, j) -= a(i, k) * a(k, j)
	! end
	! end
	! end
	!
	! BACKSUBSTITUTION - L U y = f => L x = f => x = L \ f => y = U \ x
	! for i = 1: n
	! x(i) = f(i)
	! for j = 1: i-1
	! x(i) -= a(i, j) * x(j)
	! end
	! end
	!
	! for i = n: 1
	! y(i) = x(i)
	! for j = n: i+1
	! y(i) -= a(i, j) * y(j)
	! end
	! y(i) /= a(i, i)
	! end
	!!---------------------------------------------------------------------------
	program ge
	!!---------------------------------------------------------------------------
	use omp_lib, only: omp_get_wtime

	implicit none

	! Matrix of coefficients; the one is filled by random_number()
	real, dimension(:, :), allocatable :: A

	! "Analytical" solution; the one is filled by random_number()
	real, dimension(:), allocatable :: u

	! Right-hand side (RHS); the one is calculated as f = A * u
	! Numerical solution (NS) of the equation A y = f
	! RHS is overwritten by NS
	real, dimension(:), allocatable :: y

	! Size of block
	integer, parameter :: b = 4

	! Size of matrix; must be a multiple of b
	integer, parameter :: n = b * 3

	! Time marks
	real(kind(0.d0)) :: elimination_start, elimination_finish
	real(kind(0.d0)) :: backsubstition_start, backsubstition_finish

	! Allocate memory
	allocate(A(1: n, 1: n))
	allocate(u(1: n))
	allocate(y(1: n))

	! Algorithm uses straightforward formulas
	call Generate_Data()
	call Straightforward_Elimination()
	call Straightforward_Backsubstition()
	call Print_Norms()
	call Print_Times()

	! Algorithm uses BLAS
	call Generate_Data()
	call BLAS_Elimination()
	call BLAS_Backsubstition()
	call Print_Norms()
	call Print_Times()

	! Free memory
	deallocate(A)
	deallocate(u)
	deallocate(y)
	!!---------------------------------------------------------------------------
	contains
	!!---------------------------------------------------------------------------
	subroutine Print_Norms()
	write (, ) "Norms:", maxval(abs(u)), maxval(abs(y - u))
	end subroutine Print_Norms
	!!---------------------------------------------------------------------------
	subroutine Print_Times()
	write (, ) "Times:", &
	elimination_finish - elimination_start, &
	backsubstition_finish - backsubstition_start
	end subroutine Print_Times
	!!---------------------------------------------------------------------------
	! This version is a simplified modification of
	! http://gcc.gnu.org/onlinedocs/gfortran/RANDOM_005fSEED.html
	subroutine Init_Random_Seed()
	integer :: i, n
	integer, dimension(:), allocatable :: seed

	call random_seed(size = n)
	allocate(seed(n))

	seed = 37 * (/ (i - 1, i = 1, n) /)
	call random_seed(put = seed)

	deallocate(seed)
	end subroutine Init_Random_Seed
	!!---------------------------------------------------------------------------
	subroutine Generate_Data()
	call Init_Random_Seed()
	call random_number(A)
	call random_number(u)
	y = matmul(A, u)
	end subroutine Generate_Data
	!!---------------------------------------------------------------------------
	subroutine Straightforward_Elimination()
	integer :: ib
	integer :: i, j, k
	integer :: p1b, p1e, p2b, p2e

	elimination_start = omp_get_wtime()

	do ib = 0, n/b - 1
	p1b = ib * b + 1
	p1e = (ib + 1) * b
	p2b = p1e + 1
	p2e = n

	! L_{11} U_{11} = A_{11}
	do k = p1b, p1e
	do i = k+1, p1e
	a(i, k) = a(i, k) / a(k, k)
	end do

	do j = k+1, p1e
	do i = k+1, p1e
	a(i, j) = a(i, j) - a(i, k) * a(k, j)
	end do
	end do
	end do

	! L_{21} = A_{21} U_{11}^{-1}
	do k = p1b, p1e
	do i = p2b, p2e
	do j = p1b, k-1
	a(i, k) = a(i, k) - a(i, j) * a(j, k)
	end do

	a(i, k) = a(i, k) / a(k, k)
	end do
	end do

	! U_{12} = L_{11} \ A_{12}
	do j = p2b, p2e
	do k = p1b, p1e
	do i = p1b, k-1
	a(k, j) = a(k, j) - a(k, i) * a(i, j)
	end do
	end do
	end do

	! A_{22} -= L_{21} U_{12}
	do j = p2b, p2e
	do k = p1b, p1e
	do i = p2b, p2e
	a(i, j) = a(i, j) - a(i, k) * a(k, j)
	end do
	end do
	end do
	end do

	elimination_finish = omp_get_wtime()
	end subroutine Straightforward_Elimination
	!!---------------------------------------------------------------------------
	subroutine BLAS_Elimination()
	integer :: ib
	integer :: k
	integer :: p1b, p1e, p2b, p2e

	elimination_start = omp_get_wtime()

	do ib = 0, n/b - 1
	p1b = ib * b + 1
	p1e = (ib + 1) * b
	p2b = p1e + 1
	p2e = n

	! L_{11} U_{11} = A_{11}
	do k = p1b, p1e
	! x = a*x
	call sscal(p1e-k, 1.0 / a(k, k), a(k+1, k), 1)

	! A := alphaxy'+ A
	call sger(p1e-k, p1e-k, -1.0, &
	a(k+1, k), 1, &
	a(k, k+1), n, &
	a(k+1, k+1), n)
	end do

	! L_{21} U_{11} = A_{21} => L_{21} = A_{21} U_{11}^{-1}
	! Xop(A) = alphaB
	call strsm('R', 'U', 'N', 'N', p2e-p2b+1, b, 1.0, &
	a(p1b, p1b), n, a(p2b, p1b), n)

	! L_{11} U_{12} = A_{12} => U_{12} = L_{11} \ A_{12}
	! op(A)X = alphaB
	call strsm('L', 'L', 'N', 'U', b, p2e-p2b+1, 1.0, &
	a(p1b, p1b), n, a(p1b, p2b), n)

	! A_{22} -= L_{21} U_{12}
	call sgemm('N', 'N', p2e-p2b+1, p2e-p2b+1, b, -1.0, &
	a(p2b, p1b), n, a(p1b, p2b), n, 1.0, a(p2b, p2b), n)
	end do

	elimination_finish = omp_get_wtime()
	end subroutine BLAS_Elimination
	!!---------------------------------------------------------------------------
	subroutine Straightforward_Backsubstition()
	integer :: i, j

	backsubstition_start = omp_get_wtime()

	! L x = f => x = L \ f
	do i = 1, n
	do j = 1, i-1
	y(i) = y(i) - a(i, j) * y(j)
	end do
	end do

	! U y = x => y = U \ x
	do i = n, 1, -1
	do j = i+1, n
	y(i) = y(i) - a(i, j) * y(j)
	end do

	y(i) = y(i) / a(i, i)
	end do

	backsubstition_finish = omp_get_wtime()
	end subroutine Straightforward_Backsubstition
	!!---------------------------------------------------------------------------
	subroutine BLAS_Backsubstition()
	backsubstition_start = omp_get_wtime()

	! L x = f => x = L \ f
	! op(A)X = alphaB
	!call strsm('L', 'L', 'N', 'U', n, 1, 1.0, a, n, y, n)

	! A * x = b
	call strsv('L', 'N', 'U', n, a, n, y, 1)

	! U y = x => y = U \ x

	! op(A)X = alphaB
	!call strsm('L', 'U', 'N', 'N', n, 1, 1.0, a, n, y, n)

	! A * x = b
	call strsv('U', 'N', 'N', n, a, n, y, 1)

	backsubstition_finish = omp_get_wtime()
	end subroutine BLAS_Backsubstition
	!!---------------------------------------------------------------------------
	end program ge
	!!---------------------------------------------------------------------------