SELF_ECAdvection2D.cpp Source File


Contents


Source Code

#include "SELF_GPU_Macros.h"
#include <math.h>

// ============================================================
// Mirror boundary condition: extBoundary = boundary at domain faces
// ============================================================

// Mirror BC kernel for 2D EC Advection
// Sets extBoundary = boundary on pre-filtered boundary faces
__global__ void hbc2d_mirror_ecadvection2d_kernel(
    real *extBoundary, real *boundary,
    int *elements, int *sides,
    int nBoundaries, int N, int nel, int nvar)
{
  uint32_t idof = threadIdx.x + blockIdx.x*blockDim.x;
  uint32_t total_dofs = nBoundaries * (N+1);

  if (idof < total_dofs) {
    uint32_t i  = idof % (N+1);
    uint32_t n  = idof / (N+1);
    uint32_t e1 = elements[n] - 1;
    uint32_t s1 = sides[n] - 1;
    uint32_t ivar = blockIdx.y;
    extBoundary[SCB_2D_INDEX(i,s1,e1,ivar,N,nel)] =
      boundary[SCB_2D_INDEX(i,s1,e1,ivar,N,nel)];
  }
}

extern "C"
{
  void hbc2d_mirror_ecadvection2d_gpu(
      real *extBoundary, real *boundary,
      int *elements, int *sides,
      int nBoundaries, int N, int nel, int nvar)
  {
    int total_dofs = nBoundaries * (N+1);
    int threads_per_block = 256;
    int nblocks_x = total_dofs/threads_per_block + 1;
    hbc2d_mirror_ecadvection2d_kernel<<<dim3(nblocks_x,nvar,1),
      dim3(threads_per_block,1,1), 0, 0>>>(extBoundary, boundary,
        elements, sides, nBoundaries, N, nel, nvar);
  }
}

// ============================================================
// LLF boundary flux for linear advection
//   flux = 0.5*(un*(sL+sR) - lambda*(sR-sL)) * nScale
//   un = u*nx + v*ny,  lambda = sqrt(u^2+v^2)
// ============================================================

__global__ void boundaryflux_ecadvection2d_gpukernel(
    real *fb, real *fextb, real *nhat, real *nscale, real *flux,
    real u, real v, real lam, int N, int nel, int nvar)
{
  uint32_t idof = threadIdx.x + blockIdx.x*blockDim.x;
  uint32_t ndof = (N+1)*4*nel;

  if (idof < ndof) {
    uint32_t i   = idof % (N+1);
    uint32_t j   = (idof/(N+1)) % 4;
    uint32_t iel = idof/(N+1)/4;
    uint32_t ivar = blockIdx.y;

    real nx   = nhat[VEB_2D_INDEX(i,j,iel,0,0,N,nel,1)];
    real ny   = nhat[VEB_2D_INDEX(i,j,iel,0,1,N,nel,1)];
    real un   = u*nx + v*ny;
    real nmag = nscale[SCB_2D_INDEX(i,j,iel,0,N,nel)];

    real sL = fb[idof + ivar*ndof];
    real sR = fextb[idof + ivar*ndof];

    flux[idof + ivar*ndof] = 0.5*(un*(sL+sR) - lam*(sR-sL)) * nmag;
  }
}

extern "C"
{
  void boundaryflux_ecadvection2d_gpu(
      real *fb, real *fextb, real *nhat, real *nscale, real *flux,
      real u, real v, int N, int nel, int nvar)
  {
    real lam = sqrt(u*u + v*v);
    int ndof = (N+1)*4*nel;
    int threads_per_block = 256;
    int nblocks_x = ndof/threads_per_block + 1;
    boundaryflux_ecadvection2d_gpukernel<<<dim3(nblocks_x,nvar,1),
      dim3(threads_per_block,1,1), 0, 0>>>(fb, fextb, nhat, nscale, flux, u, v, lam, N, nel, nvar);
  }
}

// ============================================================
// TwoPointFluxMethod for EC linear advection on a curvilinear mesh
//
// Computes contravariant two-point fluxes:
//   Fc^r(nn,i,j) = avg(Ja^r . a) * (s(node_L) + s(node_R)) / 2
//
// where a = (u,v) is the constant advection velocity and the metric
// averaging uses the correct partner per direction.
//
// f   : output, TPV_2D_INDEX layout
// s   : solution interior, SC_2D_INDEX layout
// dsdx: metric terms, TE_2D_INDEX layout (nVar=1 for geometry)
// ============================================================

template <int blockSize>
__global__ void __launch_bounds__(256) twopointfluxmethod_ecadvection2d_gpukernel(
    real *f, real *s, real *dsdx, real u, real v, int nq, int N, int nvar)
{
  uint32_t iq = threadIdx.x;
  if (iq < nq) {
    uint32_t iel  = blockIdx.x;
    uint32_t nel  = gridDim.x;
    uint32_t ivar = blockIdx.y;
    uint32_t i    = iq % (N+1);
    uint32_t j    = iq / (N+1);

    real s_ij = s[iq + nq*(iel + nel*ivar)];
    int nq3 = nq*(N+1);  // stride between idir slices in TPV layout

    for (int nn = 0; nn < N+1; nn++) {
      uint32_t iq_nn_j = nn + (N+1)*j;    // node (nn,j) for xi^1
      uint32_t iq_i_nn = i  + (N+1)*nn;   // node (i,nn) for xi^2

      // xi^1: pair (i,j)-(nn,j), project onto avg(Ja^1)
      real s_nn_j = s[iq_nn_j + nq*(iel + nel*ivar)];
      real savg1 = 0.5*(s_ij + s_nn_j);
      real un_contra1 = 0.5*(dsdx[iq + nq*(iel + nel*0)] + dsdx[iq_nn_j + nq*(iel + nel*0)])*u
                      + 0.5*(dsdx[iq + nq*(iel + nel*1)] + dsdx[iq_nn_j + nq*(iel + nel*1)])*v;
      f[nn + (N+1)*iq + nq3*(iel + nel*ivar)] = un_contra1 * savg1;

      // xi^2: pair (i,j)-(i,nn), project onto avg(Ja^2)
      real s_i_nn = s[iq_i_nn + nq*(iel + nel*ivar)];
      real savg2 = 0.5*(s_ij + s_i_nn);
      real un_contra2 = 0.5*(dsdx[iq + nq*(iel + nel*2)] + dsdx[iq_i_nn + nq*(iel + nel*2)])*u
                      + 0.5*(dsdx[iq + nq*(iel + nel*3)] + dsdx[iq_i_nn + nq*(iel + nel*3)])*v;
      f[nn + (N+1)*iq + nq3*(iel + nel*(ivar + nvar))] = un_contra2 * savg2;
    }
  }
}

extern "C"
{
  void twopointfluxmethod_ecadvection2d_gpu(
      real *f, real *s, real *dsdx, real u, real v, int N, int nvar, int nel)
  {
    int nq = (N+1)*(N+1);
    if (N <= 7) {
      twopointfluxmethod_ecadvection2d_gpukernel<64><<<dim3(nel,nvar,1),
        dim3(64,1,1), 0, 0>>>(f, s, dsdx, u, v, nq, N, nvar);
    } else {
      twopointfluxmethod_ecadvection2d_gpukernel<256><<<dim3(nel,nvar,1),
        dim3(256,1,1), 0, 0>>>(f, s, dsdx, u, v, nq, N, nvar);
    }
  }
}