SELF_GPUBLAS.f90 Source File


Contents

Source Code


Source Code

module SELF_GPUBLAS

  use SELF_GPU_enums
  use SELF_Constants
  use iso_c_binding
  use iso_fortran_env

  implicit none

  interface hipblasCreate
#ifdef HAVE_CUDA
    function hipblasCreate_(handle) bind(c,name="cublasCreate_v2")
#else
      function hipblasCreate_(handle) bind(c,name="hipblasCreate")
#endif
        use iso_c_binding
        use SELF_GPU_enums
        implicit none
        integer(kind(HIPBLAS_STATUS_SUCCESS)) :: hipblasCreate_
        type(c_ptr) :: handle
      endfunction
      endinterface

      interface hipblasDestroy
#ifdef HAVE_CUDA
        function hipblasDestroy_(handle) bind(c,name="cublasDestroy_v2")
#else
          function hipblasDestroy_(handle) bind(c,name="hipblasDestroy")
#endif
            use iso_c_binding
            use SELF_GPU_enums
            implicit none
            integer(kind(HIPBLAS_STATUS_SUCCESS)) :: hipblasDestroy_
            type(c_ptr),value :: handle
          endfunction
          endinterface

          interface hipblasSgemm
#ifdef HAVE_CUDA
            function hipblasSgemm_(handle,transa,transb,m,n,k,alpha,A,lda,B,ldb,beta,C,ldc) bind(c,name="cublasSgemm_v2")
#else
              function hipblasSgemm_(handle,transa,transb,m,n,k,alpha,A,lda,B,ldb,beta,C,ldc) bind(c,name="hipblasSgemm")
#endif
                use iso_c_binding
                use SELF_GPU_enums
                implicit none
                integer(kind(HIPBLAS_STATUS_SUCCESS)) :: hipblasSgemm_
                type(c_ptr),value :: handle
                integer(kind(HIPBLAS_OP_N)),value :: transa
                integer(kind(HIPBLAS_OP_N)),value :: transb
                integer(c_int),value :: m
                integer(c_int),value :: n
                integer(c_int),value :: k
                real(c_float) :: alpha
                type(c_ptr),value :: A
                integer(c_int),value :: lda
                type(c_ptr),value :: B
                integer(c_int),value :: ldb
                real(c_float) :: beta
                type(c_ptr),value :: C
                integer(c_int),value :: ldc
              endfunction
              endinterface

              interface hipblasDgemm
#ifdef HAVE_CUDA
                function hipblasDgemm_(handle,transa,transb,m,n,k,alpha,A,lda,B,ldb,beta,C,ldc) bind(c,name="cublasDgemm_v2")
#else
                  function hipblasDgemm_(handle,transa,transb,m,n,k,alpha,A,lda,B,ldb,beta,C,ldc) bind(c,name="hipblasDgemm")
#endif
                    use iso_c_binding
                    use SELF_GPU_enums
                    implicit none
                    integer(kind(HIPBLAS_STATUS_SUCCESS)) :: hipblasDgemm_
                    type(c_ptr),value :: handle
                    integer(kind(HIPBLAS_OP_N)),value :: transa
                    integer(kind(HIPBLAS_OP_N)),value :: transb
                    integer(c_int),value :: m
                    integer(c_int),value :: n
                    integer(c_int),value :: k
                    real(c_double) :: alpha
                    type(c_ptr),value :: A
                    integer(c_int),value :: lda
                    type(c_ptr),value :: B
                    integer(c_int),value :: ldb
                    real(c_double) :: beta
                    type(c_ptr),value :: C
                    integer(c_int),value :: ldc
                  endfunction
                  endinterface

                  interface hipblasSgemvStridedBatched
#ifdef HAVE_CUDA
                    function hipblasSgemvStridedBatched_(handle,trans,m,n,alpha,A,lda,strideA,x, &
                                                         incx,stridex,beta,y,incy,stridey,batchCount) &
                      bind(c,name="cublasSgemvStridedBatched")
#else
                      function hipblasSgemvStridedBatched_(handle,trans,m,n,alpha,A,lda,strideA,x, &
                                                           incx,stridex,beta,y,incy,stridey,batchCount) &
                        bind(c,name="hipblasSgemvStridedBatched")
#endif
                        use iso_c_binding
                        use SELF_GPU_enums
                        implicit none
                        integer(kind(HIPBLAS_STATUS_SUCCESS)) :: hipblasSgemvStridedBatched_
                        type(c_ptr),value :: handle
                        integer(kind(HIPBLAS_OP_N)),value :: trans
                        integer(c_int),value :: m
                        integer(c_int),value :: n
                        real(c_float) :: alpha
                        type(c_ptr),value :: A
                        integer(c_int),value :: lda
                        integer(c_int64_t),value :: strideA
                        type(c_ptr),value :: x
                        integer(c_int),value :: incx
                        integer(c_int64_t),value :: stridex
                        real(c_float) :: beta
                        type(c_ptr),value :: y
                        integer(c_int),value :: incy
                        integer(c_int64_t),value :: stridey
                        integer(c_int),value :: batchCount
                      endfunction

                      endinterface

                      interface hipblasDgemvStridedBatched
#ifdef HAVE_CUDA
                        function hipblasDgemvStridedBatched_(handle,trans,m,n,alpha,A,lda,strideA,x, &
                                                             incx,stridex,beta,y,incy,stridey,batchCount) &
                          bind(c,name="cublasDgemvStridedBatched")
#else
                          function hipblasDgemvStridedBatched_(handle,trans,m,n,alpha,A,lda,strideA,x, &
                                                               incx,stridex,beta,y,incy,stridey,batchCount) &
                            bind(c,name="hipblasDgemvStridedBatched")
#endif
                            use iso_c_binding
                            use SELF_GPU_enums
                            implicit none
                            integer(kind(HIPBLAS_STATUS_SUCCESS)) :: hipblasDgemvStridedBatched_
                            type(c_ptr),value :: handle
                            integer(kind(HIPBLAS_OP_N)),value :: trans
                            integer(c_int),value :: m
                            integer(c_int),value :: n
                            real(c_double) :: alpha
                            type(c_ptr),value :: A
                            integer(c_int),value :: lda
                            integer(c_int64_t),value :: strideA
                            type(c_ptr),value :: x
                            integer(c_int),value :: incx
                            integer(c_int64_t),value :: stridex
                            real(c_double) :: beta
                            type(c_ptr),value :: y
                            integer(c_int),value :: incy
                            integer(c_int64_t),value :: stridey
                            integer(c_int),value :: batchCount
                          endfunction
                          endinterface

                          contains

#ifdef DOUBLE_PRECISION
#define hipblasgemm hipblasDgemm
#define hipblasgemvStridedBatched hipblasDgemvStridedBatched
#else
#define hipblasgemm hipblasSgemm
#define hipblasgemvStridedBatched hipblasSgemvStridedBatched
#endif

                          subroutine hipblasCheck(hipblasError_t)
                            use SELF_GPU_enums
                            implicit none
                            integer(kind(HIPBLAS_STATUS_SUCCESS)) :: hipblasError_t

                            if(hipblasError_t /= HIPBLAS_STATUS_SUCCESS) then
                              write(*,*) "GPUBLAS ERROR: Error code = ",hipblasError_t
                              call exit(hipblasError_t)
                            endif
                          endsubroutine hipblasCheck

                          subroutine self_blas_matrixop_1d(A,f,Af,opArows,opAcols,bcols,handle)
                            type(c_ptr),intent(in) :: A
                            real(prec),pointer,intent(in) :: f(:,:,:)
                            real(prec),pointer,intent(inout) :: Af(:,:,:)
                            integer,intent(in) :: opArows,opAcols,bcols
                            type(c_ptr),intent(inout) :: handle
                            ! Local
                            integer(c_int) :: m
                            integer(c_int) :: n
                            integer(c_int) :: k
                            real(c_prec) :: alpha
                            integer(c_int) :: lda
                            integer(c_int) :: ldb
                            integer(c_int) :: ldc
                            real(c_prec) :: beta

                            m = opArows ! number of rows of A^T
                            n = bcols ! number of columns of B
                            k = opAcols ! number of columns of A^T
                            alpha = 1.0_c_prec
                            lda = k ! leading dimension of A (matrix)
                            ldb = k ! leading dimension of B (f)
                            ldc = m ! leading dimension of C (Af)
                            beta = 0.0_c_prec

                            call hipblasCheck(hipblasgemm(handle, &
                                                          HIPBLAS_OP_T,HIPBLAS_OP_N, &
                                                          m,n,k,alpha, &
                                                          A,lda, &
                                                          c_loc(f),ldb, &
                                                          beta, &
                                                          c_loc(Af),ldc))

                          endsubroutine self_blas_matrixop_1d

                          subroutine self_blas_matrixop_dim1_2d(A,f,Af,controldegree,targetdegree,nvars,nelems,handle)
                            type(c_ptr),intent(in) :: A
                            real(prec),pointer,intent(in) :: f(:,:,:,:)
                            real(prec),pointer,intent(inout) :: Af(:,:,:,:)
                            integer,intent(in) :: controldegree,targetdegree,nvars,nelems
                            type(c_ptr),intent(inout) :: handle
                            ! Local
                            integer(c_int) :: m
                            integer(c_int) :: n
                            integer(c_int) :: k
                            real(c_prec) :: alpha
                            integer(c_int) :: lda
                            integer(c_int) :: ldb
                            integer(c_int) :: ldc
                            real(c_prec) :: beta

                            m = targetdegree+1 ! number of rows of A^T
                            n = nvars*nelems*(controldegree+1) ! number of columns of B
                            k = controldegree+1 ! number of columns of A^T
                            alpha = 1.0_c_prec
                            lda = k ! leading dimension of A (interpolation/derivative matrix)
                            ldb = k ! leading dimension of B (f)
                            ldc = m ! leading dimension of C (fTarget)
                            beta = 0.0_c_prec

                            ! First pass interpolates in the first quadrature dimension
                            call hipblasCheck(hipblasgemm(handle, &
                                                          HIPBLAS_OP_T,HIPBLAS_OP_N, &
                                                          m,n,k,alpha, &
                                                          A,lda, &
                                                          c_loc(f),ldb,beta, &
                                                          c_loc(Af),ldc))

                          endsubroutine self_blas_matrixop_dim1_2d

                          subroutine self_blas_matrixop_dim2_2d(A,f,Af,beta,controldegree,targetdegree,nvars,nelems,handle)
                            type(c_ptr),intent(in) :: A
                            real(prec),pointer,intent(in) :: f(:,:,:,:)
                            real(prec),pointer,intent(inout) :: Af(:,:,:,:)
                            real(c_prec),intent(in) :: beta
                            integer,intent(in) :: controldegree,targetdegree,nvars,nelems
                            type(c_ptr),intent(inout) :: handle
                            ! Local
                            integer(c_int) :: m
                            integer(c_int) :: n
                            real(c_prec) :: alpha
                            integer(c_int) :: lda

                            integer :: i
                            integer(c_int64_t) :: strideA
                            integer(c_int) :: incx
                            integer(c_int64_t) :: stridex
                            integer(c_int) :: incy
                            integer(c_int64_t) :: stridey
                            integer(c_int) :: batchCount

                            m = controldegree+1 ! number of rows of A
                            n = targetdegree+1 ! number of columns of A
                            alpha = 1.0_c_prec
                            lda = m ! leading dimension of A
                            strideA = 0 ! stride for the batches of A (no stride)
                            incx = targetdegree+1 !
                            stridex = (controldegree+1)*(targetdegree+1)
                            incy = targetdegree+1
                            stridey = (targetdegree+1)*(targetdegree+1)
                            batchCount = nvars*nelems
                            do i = 0,targetdegree
                              call hipblasCheck(hipblasgemvStridedBatched(handle, &
                                                                          HIPBLAS_OP_T, &
                                                                          m,n,alpha, &
                                                                          A,lda,strideA, &
                                                                          c_loc(f(1+i,1,1,1)),incx,stridex,beta, &
                                                                          c_loc(Af(1+i,1,1,1)),incy,stridey,batchCount))
                            enddo

                          endsubroutine self_blas_matrixop_dim2_2d

                          subroutine self_blas_matrixop_dim1_3d(A,f,Af,controldegree,targetdegree,nvars,nelems,handle)
                            type(c_ptr),intent(in) :: A
                            real(prec),pointer,intent(in) :: f(:,:,:,:,:)
                            real(prec),pointer,intent(inout) :: Af(:,:,:,:,:)
                            integer,intent(in) :: controldegree,targetdegree,nvars,nelems
                            type(c_ptr),intent(inout) :: handle
                            ! Local
                            integer(c_int) :: m
                            integer(c_int) :: n
                            integer(c_int) :: k
                            real(c_prec) :: alpha
                            integer(c_int) :: lda
                            integer(c_int) :: ldb
                            integer(c_int) :: ldc
                            real(c_prec) :: beta

                            m = targetdegree+1 ! number of rows of A^T
                            n = nvars*nelems*(controldegree+1)*(controldegree+1) ! number of columns of B
                            k = controldegree+1 ! number of columns of A^T
                            alpha = 1.0_c_prec
                            lda = k ! leading dimension of A (interoplation matrix)
                            ldb = k ! leading dimension of B (f)
                            ldc = m ! leading dimension of C (fTarget)
                            beta = 0.0_c_prec

                            ! First pass interpolates in the first quadrature dimension
                            call hipblasCheck(hipblasgemm(handle, &
                                                          HIPBLAS_OP_T,HIPBLAS_OP_N, &
                                                          m,n,k,alpha, &
                                                          A,lda, &
                                                          c_loc(f),ldb,beta, &
                                                          c_loc(Af),ldc))

                          endsubroutine self_blas_matrixop_dim1_3d

                          subroutine self_blas_matrixop_dim2_3d(A,f,Af,beta,controldegree,targetdegree,nvars,nelems,handle)
                            type(c_ptr),intent(in) :: A
                            real(prec),pointer,intent(in) :: f(:,:,:,:,:)
                            real(prec),pointer,intent(inout) :: Af(:,:,:,:,:)
                            real(c_prec),intent(in) :: beta
                            integer,intent(in) :: controldegree,targetdegree,nvars,nelems
                            type(c_ptr),intent(inout) :: handle
                            ! Local
                            integer(c_int) :: m
                            integer(c_int) :: n
                            real(c_prec) :: alpha
                            integer(c_int) :: lda

                            integer :: i
                            integer(c_int64_t) :: strideA
                            integer(c_int) :: incx
                            integer(c_int64_t) :: stridex
                            integer(c_int) :: incy
                            integer(c_int64_t) :: stridey
                            integer(c_int) :: batchCount

                            m = controldegree+1 ! number of rows of A
                            n = targetdegree+1 ! number of columns of A
                            alpha = 1.0_c_prec
                            lda = m ! leading dimension of A
                            strideA = 0 ! stride for the batches of A (no stride)
                            incx = targetdegree+1 !
                            stridex = (controldegree+1)*(targetdegree+1)
                            !beta = 0.0_c_prec
                            incy = targetdegree+1
                            stridey = (targetdegree+1)*(targetdegree+1)
                            batchCount = (controldegree+1)*nvars*nelems
                            do i = 0,targetdegree
                              call hipblasCheck(hipblasgemvStridedBatched(handle, &
                                                                          HIPBLAS_OP_T, &
                                                                          m,n,alpha, &
                                                                          A,lda,strideA, &
                                                                          c_loc(f(1+i,1,1,1,1)),incx,stridex,beta, &
                                                                          c_loc(Af(1+i,1,1,1,1)),incy,stridey,batchCount))
                            enddo

                          endsubroutine self_blas_matrixop_dim2_3d

                          subroutine self_blas_matrixop_dim3_3d(A,f,Af,beta,controldegree,targetdegree,nvars,nelems,handle)
                            type(c_ptr),intent(in) :: A
                            real(prec),pointer,intent(in) :: f(:,:,:,:,:)
                            real(prec),pointer,intent(inout) :: Af(:,:,:,:,:)
                            real(c_prec),intent(in) :: beta
                            integer,intent(in) :: controldegree,targetdegree,nvars,nelems
                            type(c_ptr),intent(inout) :: handle
                            ! Local
                            integer(c_int) :: m
                            integer(c_int) :: n
                            real(c_prec) :: alpha
                            integer(c_int) :: lda

                            integer :: i,j
                            integer(c_int64_t) :: strideA
                            integer(c_int) :: incx
                            integer(c_int64_t) :: stridex
                            integer(c_int) :: incy
                            integer(c_int64_t) :: stridey
                            integer(c_int) :: batchCount

                            m = controldegree+1 ! number of rows of A
                            n = targetdegree+1 ! number of columns of A
                            alpha = 1.0_c_prec
                            lda = m ! leading dimension of A
                            strideA = 0 ! stride for the batches of A (no stride)
                            incx = (targetdegree+1)*(targetdegree+1) !
                            stridex = (controldegree+1)*(targetdegree+1)*(targetdegree+1)
                            incy = (targetdegree+1)*(targetdegree+1)
                            stridey = (targetdegree+1)*(targetdegree+1)*(targetdegree+1)
                            batchCount = nvars*nelems
                            do j = 0,targetdegree
                              do i = 0,targetdegree
                                call hipblasCheck(hipblasgemvStridedBatched(handle, &
                                                                            HIPBLAS_OP_T, &
                                                                            m,n,alpha, &
                                                                            A,lda,strideA, &
                                                                            c_loc(f(1+i,1+j,1,1,1)),incx,stridex,beta, &
                                                                            c_loc(Af(1+i,1+j,1,1,1)),incy,stridey,batchCount))
                              enddo
                            enddo

                          endsubroutine self_blas_matrixop_dim3_3d
                          endmodule SELF_GPUBLAS