SELF_ECAdvection3D.cpp

Source Code

#include "SELF_GPU_Macros.h"
#include <math.h>

// ============================================================
// Prescribed-zero boundary state for the tracer.
//
// "No-normal-flow" only has physical meaning when velocity is a
// prognostic variable that can be reflected at the wall. For a
// passive tracer with externally-prescribed velocity the correct
// external state is identically zero on the boundary, which combined
// with the LLF (= upwind for linear advection) Riemann flux gives
// zero injection on inflow faces and upwind sL on outflow faces.
//
// 3D: 6 sides, boundary arrays indexed (i,j,side,iel,ivar)
// ============================================================

// BC kernel for 3D EC Advection: sets extBoundary = 0 on filtered faces.
// (boundary arg is unused; kept for ABI compatibility with the wrapper.)
__global__ void hbc3d_mirror_ecadvection3d_kernel(
    real *extBoundary, real *boundary,
    int *elements, int *sides,
    int nBoundaries, int N, int nel, int nvar)
{
  uint32_t idof = threadIdx.x + blockIdx.x*blockDim.x;
  uint32_t dofs_per_face = (N+1)*(N+1);
  uint32_t total_dofs = nBoundaries * dofs_per_face;

  if (idof < total_dofs) {
    uint32_t i  = idof % (N+1);
    uint32_t j  = (idof / (N+1)) % (N+1);
    uint32_t n  = idof / dofs_per_face;
    uint32_t e1 = elements[n] - 1;
    uint32_t s1 = sides[n] - 1;
    uint32_t ivar = blockIdx.y;
    extBoundary[SCB_3D_INDEX(i,j,s1,e1,ivar,N,nel)] = (real)0;
    (void)boundary;
  }
}

extern "C"
{
  void hbc3d_mirror_ecadvection3d_gpu(
      real *extBoundary, real *boundary,
      int *elements, int *sides,
      int nBoundaries, int N, int nel, int nvar)
  {
    int dofs_per_face = (N+1)*(N+1);
    int total_dofs = nBoundaries * dofs_per_face;
    int threads_per_block = 256;
    int nblocks_x = total_dofs/threads_per_block + 1;
    hbc3d_mirror_ecadvection3d_kernel<<<dim3(nblocks_x,nvar,1),
      dim3(threads_per_block,1,1), 0, 0>>>(extBoundary, boundary,
        elements, sides, nBoundaries, N, nel, nvar);
  }
}

// ============================================================
// LLF boundary flux for 3-D linear advection
//   flux = 0.5*(un*(sL+sR) - lambda*(sR-sL)) * nScale
//   un = u*nx + v*ny + w*nz,  lambda = |un|
//
// Per-face lambda matches upwind (Godunov) for linear advection: at
// tangential faces (u.n=0) lam=0 so flux=0, avoiding spurious dissipation
// on element interfaces whose normal is perpendicular to the velocity.
// ============================================================

__global__ void boundaryflux_ecadvection3d_gpukernel(
    real *fb, real *fextb, real *nhat, real *nscale, real *flux,
    real u, real v, real w, int N, int nel, int nvar)
{
  uint32_t idof = threadIdx.x + blockIdx.x*blockDim.x;
  uint32_t ndof = (N+1)*(N+1)*6*nel;

  if (idof < ndof) {
    uint32_t i   = idof % (N+1);
    uint32_t j   = (idof/(N+1)) % (N+1);
    uint32_t s1  = (idof/(N+1)/(N+1)) % 6;
    uint32_t iel = idof/(N+1)/(N+1)/6;
    uint32_t ivar = blockIdx.y;

    real nx   = nhat[VEB_3D_INDEX(i,j,s1,iel,0,0,N,nel,1)];
    real ny   = nhat[VEB_3D_INDEX(i,j,s1,iel,0,1,N,nel,1)];
    real nz   = nhat[VEB_3D_INDEX(i,j,s1,iel,0,2,N,nel,1)];
    real un   = u*nx + v*ny + w*nz;
    real lam  = fabs(un);
    real nmag = nscale[SCB_3D_INDEX(i,j,s1,iel,0,N,nel)];

    real sL = fb[idof + ivar*ndof];
    real sR = fextb[idof + ivar*ndof];

    flux[idof + ivar*ndof] = 0.5*(un*(sL+sR) - lam*(sR-sL)) * nmag;
  }
}

extern "C"
{
  void boundaryflux_ecadvection3d_gpu(
      real *fb, real *fextb, real *nhat, real *nscale, real *flux,
      real u, real v, real w, int N, int nel, int nvar)
  {
    int ndof = (N+1)*(N+1)*6*nel;
    int threads_per_block = 256;
    int nblocks_x = ndof/threads_per_block + 1;
    boundaryflux_ecadvection3d_gpukernel<<<dim3(nblocks_x,nvar,1),
      dim3(threads_per_block,1,1), 0, 0>>>(fb, fextb, nhat, nscale, flux, u, v, w, N, nel, nvar);
  }
}

// ============================================================
// TwoPointFluxMethod for EC linear advection on a curvilinear 3-D mesh
//
// Computes contravariant two-point fluxes:
//   Fc^r(nn,i,j,k) = avg(Ja^r . a) * (s(node_L) + s(node_R)) / 2
//
// f    : output, TPV_3D_INDEX layout
// s    : solution interior, SC_3D_INDEX layout
// dsdx : metric terms, TE_3D_INDEX layout (nVar=1)
//        dsdx[iq + nq*(iel + nel*(d + 3*r))]  for physical dir d, comp dir r
// ============================================================

template <int blockSize, int matSize>
__global__ void __launch_bounds__(512) twopointfluxmethod_ecadvection3d_gpukernel(
    real *f, real *s, real *dsdx, real u, real v, real w, int nq, int N, int nvar)
{
  uint32_t iq = threadIdx.x;
  if (iq < nq) {
    uint32_t iel  = blockIdx.x;
    uint32_t nel  = gridDim.x;
    uint32_t ivar = blockIdx.y;
    uint32_t ii   = iq % (N+1);
    uint32_t jj   = (iq/(N+1)) % (N+1);
    uint32_t kk   = iq/(N+1)/(N+1);

    real s_ijk = s[iq + nq*(iel + nel*ivar)];
    int nq4 = nq*(N+1);  // stride between idir slices in TPV_3D layout

    for (int nn = 0; nn < N+1; nn++) {
      // xi^1: pair (i,j,k)-(nn,j,k)
      uint32_t iq_nn = nn + (N+1)*(jj + (N+1)*kk);
      real savg1 = 0.5*(s_ijk + s[iq_nn + nq*(iel + nel*ivar)]);
      real uc1 = 0.5*(dsdx[iq + nq*(iel + nel*0)] + dsdx[iq_nn + nq*(iel + nel*0)])*u
               + 0.5*(dsdx[iq + nq*(iel + nel*1)] + dsdx[iq_nn + nq*(iel + nel*1)])*v
               + 0.5*(dsdx[iq + nq*(iel + nel*2)] + dsdx[iq_nn + nq*(iel + nel*2)])*w;
      f[nn + (N+1)*iq + nq4*(iel + nel*ivar)] = uc1 * savg1;

      // xi^2: pair (i,j,k)-(i,nn,k)
      uint32_t iq_nn2 = ii + (N+1)*(nn + (N+1)*kk);
      real savg2 = 0.5*(s_ijk + s[iq_nn2 + nq*(iel + nel*ivar)]);
      real uc2 = 0.5*(dsdx[iq + nq*(iel + nel*3)] + dsdx[iq_nn2 + nq*(iel + nel*3)])*u
               + 0.5*(dsdx[iq + nq*(iel + nel*4)] + dsdx[iq_nn2 + nq*(iel + nel*4)])*v
               + 0.5*(dsdx[iq + nq*(iel + nel*5)] + dsdx[iq_nn2 + nq*(iel + nel*5)])*w;
      f[nn + (N+1)*iq + nq4*(iel + nel*(ivar + nvar))] = uc2 * savg2;

      // xi^3: pair (i,j,k)-(i,j,nn)
      uint32_t iq_nn3 = ii + (N+1)*(jj + (N+1)*nn);
      real savg3 = 0.5*(s_ijk + s[iq_nn3 + nq*(iel + nel*ivar)]);
      real uc3 = 0.5*(dsdx[iq + nq*(iel + nel*6)] + dsdx[iq_nn3 + nq*(iel + nel*6)])*u
               + 0.5*(dsdx[iq + nq*(iel + nel*7)] + dsdx[iq_nn3 + nq*(iel + nel*7)])*v
               + 0.5*(dsdx[iq + nq*(iel + nel*8)] + dsdx[iq_nn3 + nq*(iel + nel*8)])*w;
      f[nn + (N+1)*iq + nq4*(iel + nel*(ivar + 2*nvar))] = uc3 * savg3;
    }
  }
}

extern "C"
{
  void twopointfluxmethod_ecadvection3d_gpu(
      real *f, real *s, real *dsdx, real u, real v, real w, int N, int nvar, int nel)
  {
    int nq = (N+1)*(N+1)*(N+1);
    if (N < 4) {
      twopointfluxmethod_ecadvection3d_gpukernel<64,16><<<dim3(nel,nvar,1),
        dim3(64,1,1), 0, 0>>>(f, s, dsdx, u, v, w, nq, N, nvar);
    } else if (N >= 4 && N < 8) {
      twopointfluxmethod_ecadvection3d_gpukernel<512,64><<<dim3(nel,nvar,1),
        dim3(512,1,1), 0, 0>>>(f, s, dsdx, u, v, w, nq, N, nvar);
    }
  }
}
SELF_ECAdvection3D.cpp Source File

Contents

Source Code

Contents

Source Code

Source Code