_static/doxyhtml/_mass_matrices_deposition_8_h_source.html

/* Copyright 2019 Axel Huebl, David Grote, Maxence Thevenet

 * Remi Lehe, Weiqun Zhang, Michael Rowan

 *

 * This file is part of WarpX.

 *

 * License: BSD-3-Clause-LBNL

 */

#ifndef WARPX_MASS_MATRICES_DEPOSITION_H_

#define WARPX_MASS_MATRICES_DEPOSITION_H_


#include "Particles/Deposition/SharedDepositionUtils.H"

#include "Particles/Pusher/GetAndSetPosition.H"

#include "Particles/Pusher/UpdatePosition.H"

#include "Particles/Gather/FieldGather.H"

#include "Particles/Gather/GetExternalFields.H"

#include "Particles/ShapeFactors.H"

#include "Utils/TextMsg.H"

#include "Utils/WarpXAlgorithmSelection.H"

#include "Utils/WarpXConst.H"

#ifdef WARPX_DIM_RZ

#   include "Utils/WarpX_Complex.H"

#endif


#include <AMReX.H>

#include <AMReX_Arena.H>

#include <AMReX_Array4.H>

#include <AMReX_Dim3.H>

#include <AMReX_REAL.H>


#if defined(WARPX_DIM_RZ) || defined(WARPX_DIM_RCYLINDER)

#endif

AMREX_GPU_HOST_DEVICE AMREX_INLINE


void setMassMatricesKernels (const amrex::ParticleReal qs,

                             const amrex::ParticleReal ms,

                             const amrex::ParticleReal dt,

                             const amrex::ParticleReal rhop,

#if defined(WARPX_DIM_RZ) || defined(WARPX_DIM_RCYLINDER)

                             const amrex::ParticleReal costh,

                             const amrex::ParticleReal sinth,

#endif

                             const amrex::ParticleReal upx,

                             const amrex::ParticleReal upy,

                             const amrex::ParticleReal upz,

                             const amrex::ParticleReal Bpx,

                             const amrex::ParticleReal Bpy,

                             const amrex::ParticleReal Bpz,

                             amrex::ParticleReal& fpxx,

                             amrex::ParticleReal& fpxy,

                             amrex::ParticleReal& fpxz,

                             amrex::ParticleReal& fpyx,

                             amrex::ParticleReal& fpyy,

                             amrex::ParticleReal& fpyz,

                             amrex::ParticleReal& fpzx,

                             amrex::ParticleReal& fpzy,

                             amrex::ParticleReal& fpzz)

{

    using namespace amrex::literals;


    constexpr auto inv_c2 = PhysConst::inv_c2_v<amrex::ParticleReal>;


    // Convert Cartesian B on particle to normalized cyclotron units with dt/2.0

    const amrex::ParticleReal gamma_bar = std::sqrt(1._prt + (upx*upx + upy*upy + upz*upz)*inv_c2);

    const amrex::ParticleReal alpha = qs/ms*0.5_prt*dt/gamma_bar;

    const amrex::ParticleReal bpx = alpha*Bpx;

    const amrex::ParticleReal bpy = alpha*Bpy;

    const amrex::ParticleReal bpz = alpha*Bpz;


    const amrex::ParticleReal bpsq = bpx*bpx + bpy*bpy + bpz*bpz;

    const amrex::ParticleReal arogp = alpha*rhop/(1.0_prt + bpsq);


    // Compute Mass Matrix kernels (non-relativistic for now)

    fpxx = arogp*(bpx*bpx + 1.0_rt);

    fpxy = arogp*(bpx*bpy + bpz);

    fpxz = arogp*(bpx*bpz - bpy);


    fpyx = arogp*(bpy*bpx - bpz);

    fpyy = arogp*(bpy*bpy + 1.0_rt);

    fpyz = arogp*(bpy*bpz + bpx);


    fpzx = arogp*(bpz*bpx + bpy);

    fpzy = arogp*(bpz*bpy - bpx);

    fpzz = arogp*(bpz*bpz + 1.0_rt);


#if defined(WARPX_DIM_RZ) || defined(WARPX_DIM_RCYLINDER)


    // [fprr fprt fprz] = [ costh sinth 0][fpxx fpxy fpxz][costh -sinth 0]

    // [fptr fptt fptz] = [-sinth costh 0][fpyx fpyy fpyz][sinth  costh 0]

    // [fpzr fpzt fpzz] = [ 0     0     1][fpzx fpzy fpzz][0      0     1]

    const amrex::ParticleReal c2 = costh*costh;

    const amrex::ParticleReal s2 = sinth*sinth;

    const amrex::ParticleReal cs = costh*sinth;

    const amrex::ParticleReal fprr = c2*fpxx + cs*(fpxy + fpyx) + s2*fpyy;

    const amrex::ParticleReal fprt = cs*(fpyy - fpxx) + c2*fpxy - s2*fpyx;

    const amrex::ParticleReal fprz = costh*fpxz + sinth*fpyz;

    const amrex::ParticleReal fptr = cs*(fpyy - fpxx) - s2*fpxy + c2*fpyx;

    const amrex::ParticleReal fptt = s2*fpxx + c2*fpyy - cs*(fpxy + fpyx);

    const amrex::ParticleReal fptz = -sinth*fpxz + costh*fpyz;

    const amrex::ParticleReal fpzr =  costh*fpzx + sinth*fpzy;

    const amrex::ParticleReal fpzt = -sinth*fpzx + costh*fpzy;


    // Returned values are named for Cartesian, but they are indeed mapped

    fpxx = fprr;

    fpxy = fprt;

    fpxz = fprz;

    fpyx = fptr;

    fpyy = fptt;

    fpyz = fptz;

    fpzx = fpzr;

    fpzy = fpzt;

#endif


}


template <int depos_order, bool full_mass_matrices, bool deposit_J>

AMREX_GPU_HOST_DEVICE AMREX_INLINE


void doDirectJandSigmaDepositionKernel ([[maybe_unused]] const amrex::ParticleReal xp,

                                        [[maybe_unused]] const amrex::ParticleReal yp,

                                        [[maybe_unused]] const amrex::ParticleReal zp,

                                        const amrex::Real wq_invvol,

                                        const amrex::ParticleReal vx,

                                        [[maybe_unused]] const amrex::ParticleReal vy,

                                        const amrex::ParticleReal vz,

                                        const amrex::ParticleReal fpxx,

                                        [[maybe_unused]] const amrex::ParticleReal fpxy,

                                        [[maybe_unused]] const amrex::ParticleReal fpxz,

                                        [[maybe_unused]] const amrex::ParticleReal fpyx,

                                        const amrex::ParticleReal fpyy,

                                        [[maybe_unused]] const amrex::ParticleReal fpyz,

                                        [[maybe_unused]] const amrex::ParticleReal fpzx,

                                        [[maybe_unused]] const amrex::ParticleReal fpzy,

                                        const amrex::ParticleReal fpzz,

                                        amrex::Array4<amrex::Real> const& jx_arr,

                                        [[maybe_unused]] amrex::Array4<amrex::Real> const& jy_arr,

                                        amrex::Array4<amrex::Real> const& jz_arr,

                                        amrex::Array4<amrex::Real> const& Sxx_arr,

                                        [[maybe_unused]] amrex::Array4<amrex::Real> const& Sxy_arr,

                                        [[maybe_unused]] amrex::Array4<amrex::Real> const& Sxz_arr,

                                        [[maybe_unused]] amrex::Array4<amrex::Real> const& Syx_arr,

                                        [[maybe_unused]] amrex::Array4<amrex::Real> const& Syy_arr,

                                        [[maybe_unused]] amrex::Array4<amrex::Real> const& Syz_arr,

                                        [[maybe_unused]] amrex::Array4<amrex::Real> const& Szx_arr,

                                        [[maybe_unused]] amrex::Array4<amrex::Real> const& Szy_arr,

                                        amrex::Array4<amrex::Real> const& Szz_arr,

                                        const amrex::IntVect& jx_type,

                                        const amrex::IntVect& jy_type,

                                        const amrex::IntVect& jz_type,

                                        const amrex::XDim3& dinv,

                                        const amrex::XDim3& xyzmin,

                                        const amrex::Dim3 lo)

{

    using namespace amrex::literals;


    constexpr int NODE = amrex::IndexType::NODE;

    constexpr int CELL = amrex::IndexType::CELL;


    // MassMatrices index shift parameter

    amrex::IntVect shift = amrex::IntVect::TheZeroVector();


#if defined(WARPX_DIM_RZ) || defined(WARPX_DIM_RCYLINDER)

    // In RZ and RCYLINDER, wqx is actually wqr, and wqy is wqtheta

    // Convert to cylindrical at the mid point

    const amrex::Real rpmid = std::sqrt(xp*xp + yp*yp);

    const amrex::Real costheta = (rpmid > 0._rt ? xp/rpmid : 1._rt);

    const amrex::Real sintheta = (rpmid > 0._rt ? yp/rpmid : 0._rt);

    const amrex::Real wqx = wq_invvol*(+vx*costheta + vy*sintheta);

    const amrex::Real wqy = wq_invvol*(-vx*sintheta + vy*costheta);

    const amrex::Real wqz = wq_invvol*vz;

#elif defined(WARPX_DIM_RSPHERE)

    // Convert to cylindrical at the mid point

    const amrex::Real rpxymid = std::sqrt(xp*xp + yp*yp);

    const amrex::Real rpmid = std::sqrt(xp*xp + yp*yp + zp*zp);

    const amrex::Real costheta = (rpxymid > 0._rt ? xp/rpxymid : 1._rt);

    const amrex::Real sintheta = (rpxymid > 0._rt ? yp/rpxymid : 0._rt);

    const amrex::Real cosphi = (rpmid > 0._rt ? rpxymid/rpmid : 1._rt);

    const amrex::Real sinphi = (rpmid > 0._rt ? zp/rpmid : 0._rt);

    // convert from Cartesian to spherical

    const amrex::Real wqx = wq_invvol*(+vx*costheta*cosphi + vy*sintheta*cosphi + vz*sinphi);

    const amrex::Real wqy = wq_invvol*(-vx*sintheta + vy*costheta);

    const amrex::Real wqz = wq_invvol*(-vx*costheta*sinphi - vy*sintheta*sinphi + vz*cosphi);

#else

    const amrex::Real wqx = wq_invvol*vx;

    const amrex::Real wqy = wq_invvol*vy;

    const amrex::Real wqz = wq_invvol*vz;

#endif


    // --- Compute shape factors

    Compute_shape_factor< depos_order > const compute_shape_factor;

#if !defined(WARPX_DIM_1D_Z)

    // x direction

    // Get particle position after 1/2 push back in position

    // Keep these double to avoid bug in single precision

#if defined(WARPX_DIM_RZ) || defined(WARPX_DIM_RCYLINDER) || defined(WARPX_DIM_RSPHERE)

    const double xmid = (rpmid - xyzmin.x)*dinv.x;

#else

    const double xmid = (xp - xyzmin.x)*dinv.x;

#endif


    // j_j[xyz] leftmost grid point in x that the particle touches for the centering of each current

    // sx_j[xyz] shape factor along x for the centering of each current

    // There are only two possible centerings, node or cell centered, so at most only two shape factor

    // arrays will be needed.

    // Keep these double to avoid bug in single precision

    double sx_node[depos_order + 1] = {0.};

    double sx_cell[depos_order + 1] = {0.};

    int j_node = 0;

    int j_cell = 0;

    if (jx_type[0] == NODE || jy_type[0] == NODE || jz_type[0] == NODE) {

        j_node = compute_shape_factor(sx_node, xmid);

    }

    if (jx_type[0] == CELL || jy_type[0] == CELL || jz_type[0] == CELL) {

        j_cell = compute_shape_factor(sx_cell, xmid - 0.5);

    }


    // Set the index shift parameter

    if (j_node == j_cell) { shift[0] = 1; }


    amrex::Real sx_jx[depos_order + 1] = {0._rt};

    amrex::Real sx_jy[depos_order + 1] = {0._rt};

    amrex::Real sx_jz[depos_order + 1] = {0._rt};

    for (int ix = 0; ix <= depos_order; ix++)

    {

        sx_jx[ix] = ((jx_type[0] == NODE) ? amrex::Real(sx_node[ix]) : amrex::Real(sx_cell[ix]));

        sx_jy[ix] = ((jy_type[0] == NODE) ? amrex::Real(sx_node[ix]) : amrex::Real(sx_cell[ix]));

        sx_jz[ix] = ((jz_type[0] == NODE) ? amrex::Real(sx_node[ix]) : amrex::Real(sx_cell[ix]));

    }


    int const j_jx = ((jx_type[0] == NODE) ? j_node : j_cell);

    int const j_jy = ((jy_type[0] == NODE) ? j_node : j_cell);

    int const j_jz = ((jz_type[0] == NODE) ? j_node : j_cell);

#endif


#if defined(WARPX_DIM_3D)

    // y direction

    // Keep these double to avoid bug in single precision

    const double ymid = (yp - xyzmin.y)*dinv.y;

    double sy_node[depos_order + 1] = {0.};

    double sy_cell[depos_order + 1] = {0.};

    int k_node = 0;

    int k_cell = 0;

    if (jx_type[1] == NODE || jy_type[1] == NODE || jz_type[1] == NODE) {

        k_node = compute_shape_factor(sy_node, ymid);

    }

    if (jx_type[1] == CELL || jy_type[1] == CELL || jz_type[1] == CELL) {

        k_cell = compute_shape_factor(sy_cell, ymid - 0.5);

    }


    // Set the index shift parameter

    if (k_node == k_cell) { shift[1] = 1; }


    amrex::Real sy_jx[depos_order + 1] = {0._rt};

    amrex::Real sy_jy[depos_order + 1] = {0._rt};

    amrex::Real sy_jz[depos_order + 1] = {0._rt};

    for (int iy = 0; iy <= depos_order; iy++)

    {

        sy_jx[iy] = ((jx_type[1] == NODE) ? amrex::Real(sy_node[iy]) : amrex::Real(sy_cell[iy]));

        sy_jy[iy] = ((jy_type[1] == NODE) ? amrex::Real(sy_node[iy]) : amrex::Real(sy_cell[iy]));

        sy_jz[iy] = ((jz_type[1] == NODE) ? amrex::Real(sy_node[iy]) : amrex::Real(sy_cell[iy]));

    }

    int const k_jx = ((jx_type[1] == NODE) ? k_node : k_cell);

    int const k_jy = ((jy_type[1] == NODE) ? k_node : k_cell);

    int const k_jz = ((jz_type[1] == NODE) ? k_node : k_cell);

#endif


#if !defined(WARPX_DIM_RCYLINDER) && !defined(WARPX_DIM_RSPHERE)

    // z direction

    // Keep these double to avoid bug in single precision

    constexpr int zdir = WARPX_ZINDEX;

    const double zmid = (zp - xyzmin.z)*dinv.z;

    double sz_node[depos_order + 1] = {0.};

    double sz_cell[depos_order + 1] = {0.};

    int l_node = 0;

    int l_cell = 0;

    if (jx_type[zdir] == NODE || jy_type[zdir] == NODE || jz_type[zdir] == NODE) {

        l_node = compute_shape_factor(sz_node, zmid);

    }

    if (jx_type[zdir] == CELL || jy_type[zdir] == CELL || jz_type[zdir] == CELL) {

        l_cell = compute_shape_factor(sz_cell, zmid - 0.5);

    }

    amrex::Real sz_jx[depos_order + 1] = {0._rt};

    amrex::Real sz_jy[depos_order + 1] = {0._rt};

    amrex::Real sz_jz[depos_order + 1] = {0._rt};

    for (int iz = 0; iz <= depos_order; iz++)

    {

        sz_jx[iz] = ((jx_type[zdir] == NODE) ? amrex::Real(sz_node[iz]) : amrex::Real(sz_cell[iz]));

        sz_jy[iz] = ((jy_type[zdir] == NODE) ? amrex::Real(sz_node[iz]) : amrex::Real(sz_cell[iz]));

        sz_jz[iz] = ((jz_type[zdir] == NODE) ? amrex::Real(sz_node[iz]) : amrex::Real(sz_cell[iz]));

    }

    int const l_jx = ((jx_type[zdir] == NODE) ? l_node : l_cell);

    int const l_jy = ((jy_type[zdir] == NODE) ? l_node : l_cell);

    int const l_jz = ((jz_type[zdir] == NODE) ? l_node : l_cell);


    // Set the index shift parameter

    if (l_node==l_cell) { shift[zdir] = 1; }


#endif


    // Compute index offset needed when x and y comps have different location on grid

    amrex::IntVect offset_xy, offset_xz, offset_yz;

    for (int dir = 0; dir < AMREX_SPACEDIM; dir++) {

        offset_xy[dir] = (jx_type[dir] + jy_type[dir]) % 2;

        offset_xz[dir] = (jx_type[dir] + jz_type[dir]) % 2;

        offset_yz[dir] = (jy_type[dir] + jz_type[dir]) % 2;

    }


    // Deposit J and mass matrices

#if defined(WARPX_DIM_1D_Z)

    for (int iz = 0; iz <= depos_order; iz++){

        if constexpr (deposit_J) {

            amrex::Gpu::Atomic::AddNoRet(&jx_arr(lo.x+l_jx+iz, 0, 0, 0), sz_jx[iz]*wqx);

            amrex::Gpu::Atomic::AddNoRet(&jy_arr(lo.x+l_jy+iz, 0, 0, 0), sz_jy[iz]*wqy);

            amrex::Gpu::Atomic::AddNoRet(&jz_arr(lo.x+l_jz+iz, 0, 0, 0), sz_jz[iz]*wqz);

        }


        // Deposit mass matrices

        if constexpr (full_mass_matrices) {

            for (int aa = 0; aa <= depos_order; aa++){


                const int col_base = depos_order + aa - iz;

                int Nc = 0;


                // Reduced deposit for diagonal mass matrices

                if (aa <= iz) {

                    Nc = col_base;

                    amrex::Gpu::Atomic::AddNoRet(&Sxx_arr(lo.x+l_jx+iz, 0, 0, Nc), sz_jx[iz]*sz_jx[aa]*fpxx);

                    amrex::Gpu::Atomic::AddNoRet(&Syy_arr(lo.x+l_jy+iz, 0, 0, Nc), sz_jy[iz]*sz_jy[aa]*fpyy);

                    amrex::Gpu::Atomic::AddNoRet(&Szz_arr(lo.x+l_jz+iz, 0, 0, Nc), sz_jz[iz]*sz_jz[aa]*fpzz);

                }


                // Deposit off-diagonal mass matrices for X-current

                Nc = col_base + shift[0]*offset_xy[0];

                amrex::Gpu::Atomic::AddNoRet(&Sxy_arr(lo.x+l_jx+iz, 0, 0, Nc), sz_jx[iz]*sz_jy[aa]*fpxy);

                Nc = col_base + shift[0]*offset_xz[0];

                amrex::Gpu::Atomic::AddNoRet(&Sxz_arr(lo.x+l_jx+iz, 0, 0, Nc), sz_jx[iz]*sz_jz[aa]*fpxz);


                // Deposit off-diagonal mass matrices for Y-current

                Nc = col_base + shift[0]*offset_xy[0];

                amrex::Gpu::Atomic::AddNoRet(&Syx_arr(lo.x+l_jy+iz, 0, 0, Nc), sz_jy[iz]*sz_jx[aa]*fpyx);

                Nc = col_base + shift[0]*offset_yz[0];

                amrex::Gpu::Atomic::AddNoRet(&Syz_arr(lo.x+l_jy+iz, 0, 0, Nc), sz_jy[iz]*sz_jz[aa]*fpyz);


                // Deposit off-diagonal mass matrices for Z-current

                Nc = col_base + 1 - shift[0]*offset_xz[0];

                amrex::Gpu::Atomic::AddNoRet(&Szx_arr(lo.x+l_jz+iz, 0, 0, Nc), sz_jz[iz]*sz_jx[aa]*fpzx);

                Nc = col_base + 1 - shift[0]*offset_yz[0];

                amrex::Gpu::Atomic::AddNoRet(&Szy_arr(lo.x+l_jz+iz, 0, 0, Nc), sz_jz[iz]*sz_jy[aa]*fpzy);

            }

        }

        else { // Deposit mass matrices for diagonal PC only

            amrex::Gpu::Atomic::AddNoRet(&Sxx_arr(lo.x+l_jx+iz, 0, 0, 0), sz_jx[iz]*fpxx);

            amrex::Gpu::Atomic::AddNoRet(&Syy_arr(lo.x+l_jy+iz, 0, 0, 0), sz_jy[iz]*fpyy);

            amrex::Gpu::Atomic::AddNoRet(&Szz_arr(lo.x+l_jz+iz, 0, 0, 0), sz_jz[iz]*fpzz);

        }

    }

#elif defined(WARPX_DIM_RCYLINDER) || defined(WARPX_DIM_RSPHERE)

    for (int ix = 0; ix <= depos_order; ix++){

        if constexpr (deposit_J) {

            amrex::Gpu::Atomic::AddNoRet(&jx_arr(lo.x+j_jx+ix, 0, 0, 0), sx_jx[ix]*wqx);

            amrex::Gpu::Atomic::AddNoRet(&jy_arr(lo.x+j_jy+ix, 0, 0, 0), sx_jy[ix]*wqy);

            amrex::Gpu::Atomic::AddNoRet(&jz_arr(lo.x+j_jz+ix, 0, 0, 0), sx_jz[ix]*wqz);

        }

        // Deposit mass matrices

        if constexpr (full_mass_matrices) {

            for (int aa = 0; aa <= depos_order; aa++) {


                int col_base = depos_order + aa - ix;

                int Nc;


                // Reduced deposit for diagonal mass matrices

                if (aa <= ix) {

                    Nc = col_base;

                    amrex::Gpu::Atomic::AddNoRet(&Sxx_arr(lo.x+j_jx+ix, 0, 0, Nc), sx_jx[ix]*sx_jx[aa]*fpxx);

                    amrex::Gpu::Atomic::AddNoRet(&Syy_arr(lo.x+j_jy+ix, 0, 0, Nc), sx_jy[ix]*sx_jy[aa]*fpyy);

                    amrex::Gpu::Atomic::AddNoRet(&Szz_arr(lo.x+j_jz+ix, 0, 0, Nc), sx_jz[ix]*sx_jz[aa]*fpzz);

                }


                // Deposit off-diagonal mass matrices for X-current

                Nc = col_base + 1 - shift[0]*offset_xy[0];

                amrex::Gpu::Atomic::AddNoRet(&Sxy_arr(lo.x+j_jx+ix, 0, 0, Nc), sx_jx[ix]*sx_jy[aa]*fpxy);

                Nc = col_base + 1 - shift[0]*offset_xz[0];

                amrex::Gpu::Atomic::AddNoRet(&Sxz_arr(lo.x+j_jx+ix, 0, 0, Nc), sx_jx[ix]*sx_jz[aa]*fpxz);


                // Deposit off-diagonal mass matrices for Y-current

                Nc = col_base + shift[0]*offset_xy[0];

                amrex::Gpu::Atomic::AddNoRet(&Syx_arr(lo.x+j_jy+ix, 0, 0, Nc), sx_jy[ix]*sx_jx[aa]*fpyx);

                Nc = col_base + shift[0]*offset_yz[0];

                amrex::Gpu::Atomic::AddNoRet(&Syz_arr(lo.x+j_jy+ix, 0, 0, Nc), sx_jy[ix]*sx_jz[aa]*fpyz);


                // Deposit off-diagonal mass matrices for Z-current

                Nc = col_base + shift[0]*offset_xz[0];

                amrex::Gpu::Atomic::AddNoRet(&Szx_arr(lo.x+j_jz+ix, 0, 0, Nc), sx_jz[ix]*sx_jx[aa]*fpzx);

                Nc = col_base + shift[0]*offset_yz[0];

                amrex::Gpu::Atomic::AddNoRet(&Szy_arr(lo.x+j_jz+ix, 0, 0, Nc), sx_jz[ix]*sx_jy[aa]*fpzy);


            }

        }

        else { // Deposit mass matrices for diagonal PC only

            amrex::Gpu::Atomic::AddNoRet(&Sxx_arr(lo.x + j_jx + ix, 0, 0, 0), sx_jx[ix]*fpxx);

            amrex::Gpu::Atomic::AddNoRet(&Syy_arr(lo.x + j_jy + ix, 0, 0, 0), sx_jy[ix]*fpyy);

            amrex::Gpu::Atomic::AddNoRet(&Szz_arr(lo.x + j_jz + ix, 0, 0, 0), sx_jz[ix]*fpzz);

        }

    }

#elif defined(WARPX_DIM_XZ) || defined(WARPX_DIM_RZ)

    const int base_offset = 1 + 2*depos_order;


    for (int iz = 0; iz <= depos_order; iz++){


        for (int ix = 0; ix <= depos_order; ix++){


            const amrex::Real weight_Jx = sx_jx[ix]*sz_jx[iz];

            const amrex::Real weight_Jy = sx_jy[ix]*sz_jy[iz];

            const amrex::Real weight_Jz = sx_jz[ix]*sz_jz[iz];


            if constexpr (deposit_J) {

                amrex::Gpu::Atomic::AddNoRet(&jx_arr(lo.x+j_jx+ix, lo.y+l_jx+iz, 0, 0), weight_Jx*wqx);

                amrex::Gpu::Atomic::AddNoRet(&jy_arr(lo.x+j_jy+ix, lo.y+l_jy+iz, 0, 0), weight_Jy*wqy);

                amrex::Gpu::Atomic::AddNoRet(&jz_arr(lo.x+j_jz+ix, lo.y+l_jz+iz, 0, 0), weight_Jz*wqz);

            }


            // Deposit mass matrices

            if constexpr (full_mass_matrices) {


                const int Ncomp0 = 1 + 2*depos_order;


                for (int bb = 0; bb <= depos_order; bb++){


                    const int row_base = depos_order + bb - iz;


                    for (int aa = 0; aa <= depos_order; aa++){

                        const int col_base = depos_order + aa - ix;

                        const amrex::Real weight_Ex = sx_jx[aa]*sz_jx[bb];

                        const amrex::Real weight_Ey = sx_jy[aa]*sz_jy[bb];

                        const amrex::Real weight_Ez = sx_jz[aa]*sz_jz[bb];


                        int offset;

                        int Nc;


                        // Reduced deposit for diagonal mass matrices

                        if (col_base <= Ncomp0 - row_base) {

                            offset = base_offset;

                            Nc = col_base + row_base*offset;

                            amrex::Gpu::Atomic::AddNoRet(

                                &Sxx_arr(lo.x+j_jx+ix, lo.y+l_jx+iz, 0, Nc),

                                weight_Jx*weight_Ex*fpxx);

                            amrex::Gpu::Atomic::AddNoRet(

                                &Syy_arr(lo.x+j_jy+ix, lo.y+l_jy+iz, 0, Nc),

                                weight_Jy*weight_Ey*fpyy);

                            amrex::Gpu::Atomic::AddNoRet(

                                &Szz_arr(lo.x+j_jz+ix, lo.y+l_jz+iz, 0, Nc),

                                weight_Jz*weight_Ez*fpzz);

                        }


                        // Deposit off-diagonal mass matrices for X-current

                        offset = base_offset + offset_xy[0];

                        Nc =  col_base + 1 - shift[0]*offset_xy[0]

                           + (row_base + shift[1]*offset_xy[1])*offset;

                        amrex::Gpu::Atomic::AddNoRet(

                            &Sxy_arr(lo.x+j_jx+ix, lo.y+l_jx+iz, 0, Nc),

                            weight_Jx*weight_Ey*fpxy);

                        offset = base_offset + offset_xz[0];

                        Nc =  col_base + 1 - shift[0]*offset_xz[0]

                           + (row_base + shift[1]*offset_xz[1])*offset;

                        amrex::Gpu::Atomic::AddNoRet(

                            &Sxz_arr(lo.x+j_jx+ix, lo.y+l_jx+iz, 0, Nc),

                            weight_Jx*weight_Ez*fpxz);


                        // Deposit off-diagonal mass matrices for Y-current

                        offset = base_offset + offset_xy[0];

                        Nc =  col_base + shift[0]*offset_xy[0]

                           + (row_base + shift[1]*offset_xy[1])*offset;

                        amrex::Gpu::Atomic::AddNoRet(

                            &Syx_arr(lo.x+j_jy+ix, lo.y+l_jy+iz, 0, Nc),

                            weight_Jy*weight_Ex*fpyx);

                        offset = base_offset + offset_yz[0];

                        Nc =  col_base + shift[0]*offset_yz[0]

                           + (row_base + shift[1]*offset_yz[1])*offset;

                        amrex::Gpu::Atomic::AddNoRet(

                            &Syz_arr(lo.x+j_jy+ix, lo.y+l_jy+iz, 0, Nc),

                            weight_Jy*weight_Ez*fpyz);


                        // Deposit off-diagonal mass matrices for Z-current

                        offset = base_offset + offset_xz[0];

                        Nc =  col_base + shift[0]*offset_xz[0]

                           + (row_base + 1 - shift[1]*offset_xz[1])*offset;

                        amrex::Gpu::Atomic::AddNoRet(

                            &Szx_arr(lo.x+j_jz+ix, lo.y+l_jz+iz, 0, Nc),

                            weight_Jz*weight_Ex*fpzx);

                        offset = base_offset + offset_yz[0];

                        Nc =  col_base + shift[0]*offset_yz[0]

                           + (row_base + 1 - shift[1]*offset_yz[1])*offset;

                        amrex::Gpu::Atomic::AddNoRet(

                            &Szy_arr(lo.x+j_jz+ix, lo.y+l_jz+iz, 0, Nc),

                            weight_Jz*weight_Ey*fpzy);

                    }


                }


            }

            else { // Deposit mass matrices for diagonal PC only

                amrex::Gpu::Atomic::AddNoRet(&Syy_arr(lo.x+j_jy+ix, lo.y+l_jy+iz, 0, 0), weight_Jy*fpyy);

                amrex::Gpu::Atomic::AddNoRet(&Sxx_arr(lo.x+j_jx+ix, lo.y+l_jx+iz, 0, 0), weight_Jx*fpxx);

                amrex::Gpu::Atomic::AddNoRet(&Szz_arr(lo.x+j_jz+ix, lo.y+l_jz+iz, 0, 0), weight_Jz*fpzz);

            }

        }

    }

#elif defined(WARPX_DIM_3D)

    for (int iz = 0; iz <= depos_order; iz++){

        for (int iy = 0; iy <= depos_order; iy++){

            for (int ix = 0; ix <= depos_order; ix++){

                const amrex::Real weight_Jx = sx_jx[ix]*sy_jx[iy]*sz_jx[iz];

                const amrex::Real weight_Jy = sx_jy[ix]*sy_jy[iy]*sz_jy[iz];

                const amrex::Real weight_Jz = sx_jz[ix]*sy_jz[iy]*sz_jz[iz];


                if constexpr (deposit_J) {

                    amrex::Gpu::Atomic::AddNoRet(&jx_arr(lo.x+j_jx+ix, lo.y+k_jx+iy, lo.z+l_jx+iz), weight_Jx*wqx);

                    amrex::Gpu::Atomic::AddNoRet(&jy_arr(lo.x+j_jy+ix, lo.y+k_jy+iy, lo.z+l_jy+iz), weight_Jy*wqy);

                    amrex::Gpu::Atomic::AddNoRet(&jz_arr(lo.x+j_jz+ix, lo.y+k_jz+iy, lo.z+l_jz+iz), weight_Jz*wqz);

                }


                // Deposit mass matrices

                if constexpr (full_mass_matrices) {

                    // Should not be here. Full mass matrices not yet implemented in 3D

                }

                else { // Deposit mass matrices for diagonal PC only

                    amrex::Gpu::Atomic::AddNoRet(&Sxx_arr(lo.x+j_jx+ix, lo.y+k_jx+iy, lo.z+l_jx+iz, 0), weight_Jx*fpxx);

                    amrex::Gpu::Atomic::AddNoRet(&Syy_arr(lo.x+j_jy+ix, lo.y+k_jy+iy, lo.z+l_jy+iz, 0), weight_Jy*fpyy);

                    amrex::Gpu::Atomic::AddNoRet(&Szz_arr(lo.x+j_jz+ix, lo.y+k_jz+iy, lo.z+l_jz+iz, 0), weight_Jz*fpzz);

                }

            }

        }

    }

#endif

}


template <int depos_order, bool full_mass_matrices>


void doDirectSigmaDeposition (const GetParticlePosition<PIdx>& GetPosition,

                              [[maybe_unused]] const int* nsuborbits,

                              const amrex::ParticleReal* wp,

                              const amrex::ParticleReal* uxp_n,

                              const amrex::ParticleReal* uyp_n,

                              const amrex::ParticleReal* uzp_n,

                              const amrex::ParticleReal* uxp_nph,

                              const amrex::ParticleReal* uyp_nph,

                              const amrex::ParticleReal* uzp_nph,

                              amrex::Array4<amrex::Real> const& Sxx_arr,

                              amrex::Array4<amrex::Real> const& Sxy_arr,

                              amrex::Array4<amrex::Real> const& Sxz_arr,

                              amrex::Array4<amrex::Real> const& Syx_arr,

                              amrex::Array4<amrex::Real> const& Syy_arr,

                              amrex::Array4<amrex::Real> const& Syz_arr,

                              amrex::Array4<amrex::Real> const& Szx_arr,

                              amrex::Array4<amrex::Real> const& Szy_arr,

                              amrex::Array4<amrex::Real> const& Szz_arr,

                              const amrex::IntVect& jx_type,

                              const amrex::IntVect& jy_type,

                              const amrex::IntVect& jz_type,

                              GetExternalEBField const & getExternalEB,

                              const amrex::ParticleReal Bx_ext,

                              const amrex::ParticleReal By_ext,

                              const amrex::ParticleReal Bz_ext,

                              const amrex::Array4<amrex::Real const>& Bx_arr,

                              const amrex::Array4<amrex::Real const>& By_arr,

                              const amrex::Array4<amrex::Real const>& Bz_arr,

                              const amrex::IndexType Bx_type,

                              const amrex::IndexType By_type,

                              const amrex::IndexType Bz_type,

                              const long np_to_deposit,

                              const amrex::Real dt,

                              const amrex::XDim3& dinv,

                              const amrex::XDim3& xyzmin,

                              const amrex::Dim3 lo,

                              const amrex::Real qs,

                              const amrex::Real ms)

{

    using namespace amrex::literals;


    const amrex::Real invvol = dinv.x*dinv.y*dinv.z;


    enum exteb_flags : int { no_exteb, has_exteb };

    const int exteb_runtime_flag = getExternalEB.isNoOp() ? no_exteb : has_exteb;


    // Loop over particles and deposit mass matrices

    amrex::ParallelFor(

        amrex::TypeList<amrex::CompileTimeOptions<no_exteb, has_exteb>>{},

        {exteb_runtime_flag},

        np_to_deposit,

        [=] AMREX_GPU_DEVICE (long const ip, auto exteb_control) {


            // Skip mass matrix deposition for particles with suborbits.

            if (nsuborbits && nsuborbits[ip] > 1) { return; }


            amrex::ParticleReal xp_nph, yp_nph, zp_nph;

            GetPosition(ip, xp_nph, yp_nph, zp_nph);


            // Initialize B on particle to uniform external B

            amrex::ParticleReal Bxp = Bx_ext;

            amrex::ParticleReal Byp = By_ext;

            amrex::ParticleReal Bzp = Bz_ext;


            // Increment with externally applied B-field with time and spatial variation

            [[maybe_unused]] const auto& getExternalEB_tmp = getExternalEB;

            if constexpr (exteb_control == has_exteb) {

                amrex::ParticleReal Exp = 0._prt;

                amrex::ParticleReal Eyp = 0._prt;

                amrex::ParticleReal Ezp = 0._prt;

                getExternalEB(ip, Exp, Eyp, Ezp, Bxp, Byp, Bzp);

            }


            // Gather magnetic field from the grid

            doDirectGatherVectorField</*depos_order_perp=*/depos_order,/*depos_order_para=*/depos_order>(

                                    xp_nph, yp_nph, zp_nph,

                                    Bxp, Byp, Bzp,

                                    Bx_arr, By_arr, Bz_arr,

                                    Bx_type, By_type, Bz_type,

                                    dinv, xyzmin, lo, /*n_rz_azimuthal_modes=*/0 );


            // Compute inverse Lorentz factor, the average of gamma at time levels n and n+1

            const amrex::ParticleReal gaminv = GetImplicitGammaInverse(uxp_n[ip], uyp_n[ip], uzp_n[ip],

                                                                       uxp_nph[ip], uyp_nph[ip], uzp_nph[ip]);


            // Compute current density kernels to deposit

            const amrex::Real wq_invvol = qs*wp[ip]*invvol;

            const amrex::Real rhop = wq_invvol*gaminv;

            const amrex::Real vx = uxp_nph[ip]*gaminv;

            const amrex::Real vy = uyp_nph[ip]*gaminv;

            const amrex::Real vz = uzp_nph[ip]*gaminv;


#if defined(WARPX_DIM_RZ) || defined(WARPX_DIM_RCYLINDER)

            amrex::Real const rp_mid = std::sqrt(xp_nph*xp_nph + yp_nph*yp_nph);

            amrex::Real const costh = (rp_mid > 0._rt ? xp_nph/rp_mid : 1._rt);

            amrex::Real const sinth = (rp_mid > 0._rt ? yp_nph/rp_mid : 0._rt);

#endif


            // Set the Mass Matrices kernels

            amrex::ParticleReal fpxx, fpxy, fpxz;

            amrex::ParticleReal fpyx, fpyy, fpyz;

            amrex::ParticleReal fpzx, fpzy, fpzz;

            setMassMatricesKernels(qs, ms, dt, rhop,

#if defined(WARPX_DIM_RZ) || defined(WARPX_DIM_RCYLINDER)

                                   costh, sinth,

#endif

                                   uxp_nph[ip], uyp_nph[ip], uzp_nph[ip],

                                   Bxp, Byp, Bzp,

                                   fpxx, fpxy, fpxz,

                                   fpyx, fpyy, fpyz,

                                   fpzx, fpzy, fpzz);


            // Pass dummy arrays for Jx, Jy, Jz (which will not be used)

            amrex::Array4<amrex::Real> const dummy_Jx{};

            amrex::Array4<amrex::Real> const dummy_Jy{};

            amrex::Array4<amrex::Real> const dummy_Jz{};


            doDirectJandSigmaDepositionKernel<depos_order,full_mass_matrices,/*deposit_J=*/false>(

                                                            xp_nph, yp_nph, zp_nph,

                                                            wq_invvol, vx, vy, vz,

                                                            fpxx, fpxy, fpxz,

                                                            fpyx, fpyy, fpyz,

                                                            fpzx, fpzy, fpzz,

                                                            dummy_Jx, dummy_Jy, dummy_Jz,

                                                            Sxx_arr, Sxy_arr, Sxz_arr,

                                                            Syx_arr, Syy_arr, Syz_arr,

                                                            Szx_arr, Szy_arr, Szz_arr,

                                                            jx_type, jy_type, jz_type,

                                                            dinv, xyzmin, lo );


        }

    );

}


template <int depos_order, bool full_mass_matrices, bool deposit_J>

AMREX_GPU_HOST_DEVICE AMREX_INLINE


void doVillasenorJandSigmaDepositionKernel ([[maybe_unused]] const amrex::ParticleReal xp_old,

                                            [[maybe_unused]] const amrex::ParticleReal yp_old,

                                            [[maybe_unused]] const amrex::ParticleReal zp_old,

                                            [[maybe_unused]] const amrex::ParticleReal xp_new,

                                            [[maybe_unused]] const amrex::ParticleReal yp_new,

                                            [[maybe_unused]] const amrex::ParticleReal zp_new,

                                            const amrex::ParticleReal wq_invvol,

                                            [[maybe_unused]] const amrex::ParticleReal uxp_mid,

                                            [[maybe_unused]] const amrex::ParticleReal uyp_mid,

                                            [[maybe_unused]] const amrex::ParticleReal uzp_mid,

                                            [[maybe_unused]] const amrex::ParticleReal gaminv,

                                            const amrex::ParticleReal fpxx,

                                            [[maybe_unused]] const amrex::ParticleReal fpxy,

                                            [[maybe_unused]] const amrex::ParticleReal fpxz,

                                            [[maybe_unused]] const amrex::ParticleReal fpyx,

                                            const amrex::ParticleReal fpyy,

                                            [[maybe_unused]] const amrex::ParticleReal fpyz,

                                            [[maybe_unused]] const amrex::ParticleReal fpzx,

                                            [[maybe_unused]] const amrex::ParticleReal fpzy,

                                            const amrex::ParticleReal fpzz,

                                            amrex::Array4<amrex::Real> const& Jx_arr,

                                            [[maybe_unused]] amrex::Array4<amrex::Real> const& Jy_arr,

                                            amrex::Array4<amrex::Real> const& Jz_arr,

                                            [[maybe_unused]] int max_crossings,

                                            amrex::Array4<amrex::Real> const& Sxx_arr,

                                            [[maybe_unused]] amrex::Array4<amrex::Real> const& Sxy_arr,

                                            [[maybe_unused]] amrex::Array4<amrex::Real> const& Sxz_arr,

                                            [[maybe_unused]] amrex::Array4<amrex::Real> const& Syx_arr,

                                            amrex::Array4<amrex::Real> const& Syy_arr,

                                            [[maybe_unused]] amrex::Array4<amrex::Real> const& Syz_arr,

                                            [[maybe_unused]] amrex::Array4<amrex::Real> const& Szx_arr,

                                            [[maybe_unused]] amrex::Array4<amrex::Real> const& Szy_arr,

                                            amrex::Array4<amrex::Real> const& Szz_arr,

                                            const amrex::Real dt,

                                            const amrex::XDim3& dinv,

                                            const amrex::XDim3& xyzmin,

                                            const amrex::GpuArray<amrex::GpuArray<double,2>, AMREX_SPACEDIM> & domain_double,

                                            const amrex::GpuArray<amrex::GpuArray<bool,2>, AMREX_SPACEDIM> & do_cropping,

                                            const amrex::Dim3 lo)

{


    using namespace amrex::literals;


#if (AMREX_SPACEDIM > 1)

    amrex::Real constexpr one_third = 1.0_rt / 3.0_rt;

    amrex::Real constexpr one_sixth = 1.0_rt / 6.0_rt;

#endif


    // computes current and old position in grid units

#if defined(WARPX_DIM_RZ) || defined(WARPX_DIM_RCYLINDER)

    amrex::Real const rp_new = std::sqrt(xp_new*xp_new + yp_new*yp_new);

    amrex::Real const rp_old = std::sqrt(xp_old*xp_old + yp_old*yp_old);


    // Keep these double to avoid bug in single precision

    double x_new = (rp_new - xyzmin.x)*dinv.x;

    double const x_old = (rp_old - xyzmin.x)*dinv.x;

    amrex::Real const vx = (rp_new - rp_old)/dt;

    amrex::Real const xp_mid = (xp_new + xp_old)*0.5_rt;

    amrex::Real const yp_mid = (yp_new + yp_old)*0.5_rt;

    amrex::Real const rp_mid = (rp_new + rp_old)/2._rt;

    amrex::Real const costheta_mid = (rp_mid > 0._rt ? xp_mid/rp_mid : 1._rt);

    amrex::Real const sintheta_mid = (rp_mid > 0._rt ? yp_mid/rp_mid : 0._rt);

    amrex::Real const vy = (-uxp_mid*sintheta_mid + uyp_mid*costheta_mid)*gaminv;

#if defined(WARPX_DIM_RCYLINDER)

    amrex::Real const vz = uzp_mid*gaminv;

#endif

#elif defined(WARPX_DIM_RSPHERE)

    amrex::Real const xp_mid = (xp_new + xp_old)*0.5_rt;

    amrex::Real const yp_mid = (yp_new + yp_old)*0.5_rt;

    amrex::Real const zp_mid = (zp_new + zp_old)*0.5_rt;

    amrex::Real const rpxy_new = std::sqrt(xp_new*xp_new + yp_new*yp_new);

    amrex::Real const rp_new = std::sqrt(xp_new*xp_new + yp_new*yp_new + zp_new*zp_new);

    amrex::Real const rpxy_old = std::sqrt(xp_old*xp_old + yp_old*yp_old);

    amrex::Real const rp_old = std::sqrt(xp_old*xp_old + yp_old*yp_old + zp_old*zp_old);

    amrex::Real const rpxy_mid = (rpxy_new + rpxy_old)*0.5_rt;

    amrex::Real const rp_mid = (rp_new + rp_old)*0.5_rt;

    amrex::Real const costheta_mid = (rpxy_mid > 0._rt ? xp_mid/rpxy_mid : 1._rt);

    amrex::Real const sintheta_mid = (rpxy_mid > 0._rt ? yp_mid/rpxy_mid : 0._rt);

    amrex::Real const cosphi_mid = (rp_mid > 0._rt ? rpxy_mid/rp_mid : 1._rt);

    amrex::Real const sinphi_mid = (rp_mid > 0._rt ? zp_mid/rp_mid : 0._rt);


    // Keep these double to avoid bug in single precision

    double x_new = (rp_new - xyzmin.x)*dinv.x;

    double const x_old = (rp_old - xyzmin.x)*dinv.x;

    amrex::Real const vx = (rp_new - rp_old)/dt;

    amrex::Real const vy = (-uxp_mid*sintheta_mid + uyp_mid*costheta_mid)*gaminv;

    amrex::Real const vz = (-uxp_mid*costheta_mid*cosphi_mid - uyp_mid*sintheta_mid*cosphi_mid + uzp_mid*sinphi_mid)*gaminv;

#elif defined(WARPX_DIM_XZ)

    // Keep these double to avoid bug in single precision

    double x_new = (xp_new - xyzmin.x)*dinv.x;

    double const x_old = (xp_old - xyzmin.x)*dinv.x;

    amrex::Real const vx = (xp_new - xp_old)/dt;

    amrex::Real const vy = uyp_mid*gaminv;

#elif defined(WARPX_DIM_1D_Z)

    amrex::Real const vx = uxp_mid*gaminv;

    amrex::Real const vy = uyp_mid*gaminv;

#elif defined(WARPX_DIM_3D)

    // Keep these double to avoid bug in single precision

    double x_new = (xp_new - xyzmin.x)*dinv.x;

    double const x_old = (xp_old - xyzmin.x)*dinv.x;

    double y_new = (yp_new - xyzmin.y)*dinv.y;

    double const y_old = (yp_old - xyzmin.y)*dinv.y;

    amrex::Real const vx = (xp_new - xp_old)/dt;

    amrex::Real const vy = (yp_new - yp_old)/dt;

#endif


#if !defined(WARPX_DIM_RCYLINDER) && !defined(WARPX_DIM_RSPHERE)

    // Keep these double to avoid bug in single precision

    double z_new = (zp_new - xyzmin.z)*dinv.z;

    double const z_old = (zp_old - xyzmin.z)*dinv.z;

    amrex::Real const vz = (zp_new - zp_old)/dt;

#endif


    // Define velocity kernels to deposit

    amrex::Real const wqx = wq_invvol*vx;

    amrex::Real const wqy = wq_invvol*vy;

    amrex::Real const wqz = wq_invvol*vz;


    // Compute total change in particle position (always do before cropping)

#if !defined(WARPX_DIM_1D_Z)

    const double dxp = x_new - x_old;

#endif

#if defined(WARPX_DIM_3D)

    const double dyp = y_new - y_old;

#endif

#if !defined(WARPX_DIM_RCYLINDER) && !defined(WARPX_DIM_RSPHERE)

    const double dzp = z_new - z_old;

#endif


    // Crop particle orbits at absorbing domain boundaries

#if defined(WARPX_DIM_3D)

    ParticleUtils::crop_at_boundary(x_old, y_old, z_old, x_new, y_new, z_new,

                                    domain_double[0][0], domain_double[0][1], do_cropping[0]);

    ParticleUtils::crop_at_boundary(y_old, z_old, x_old, y_new, z_new, x_new,

                                    domain_double[1][0], domain_double[1][1], do_cropping[1]);

    ParticleUtils::crop_at_boundary(z_old, x_old, y_old, z_new, x_new, y_new,

                                    domain_double[2][0], domain_double[2][1], do_cropping[2]);

#elif defined(WARPX_DIM_XZ) || defined(WARPX_DIM_RZ)

    ParticleUtils::crop_at_boundary(x_old, z_old, x_new, z_new,

                                    domain_double[0][0], domain_double[0][1], do_cropping[0]);

    ParticleUtils::crop_at_boundary(z_old, x_old, z_new, x_new,

                                    domain_double[1][0], domain_double[1][1], do_cropping[1]);

#elif defined(WARPX_DIM_1D_Z)

    ParticleUtils::crop_at_boundary(z_new, domain_double[0][0], domain_double[0][1], do_cropping[0]);

#elif defined(WARPX_DIM_RCYLINDER) || defined(WARPX_DIM_RSPHERE)

    ParticleUtils::crop_at_boundary(x_new, domain_double[0][0], domain_double[0][1], do_cropping[0]);

#endif


    // 1) Determine the number of segments.

    // 2) Loop over segments and deposit current.


    // cell crossings are defined at cell edges if depos_order is odd

    // cell crossings are defined at cell centers if depos_order is even


    int num_segments = 1;

    double shift = 0.0;

    if ( (depos_order % 2) == 0 ) { shift = 0.5; }


#if defined(WARPX_DIM_3D)


    // compute cell crossings in X-direction

    const auto i_old = static_cast<int>(x_old-shift);

    const auto i_new = static_cast<int>(x_new-shift);

    const int cell_crossings_x = std::abs(i_new-i_old);

    num_segments += cell_crossings_x;


    // compute cell crossings in Y-direction

    const auto j_old = static_cast<int>(y_old-shift);

    const auto j_new = static_cast<int>(y_new-shift);

    const int cell_crossings_y = std::abs(j_new-j_old);

    num_segments += cell_crossings_y;


    // compute cell crossings in Z-direction

    const auto k_old = static_cast<int>(z_old-shift);

    const auto k_new = static_cast<int>(z_new-shift);

    const int cell_crossings_z = std::abs(k_new-k_old);

    num_segments += cell_crossings_z;


    // Compute initial particle cell locations in each direction

    // used to find the position at cell crossings.

    // Keep these double to avoid bug in single precision

    const auto dirX_sign = static_cast<double>(dxp < 0. ? -1. : 1.);

    const auto dirY_sign = static_cast<double>(dyp < 0. ? -1. : 1.);

    const auto dirZ_sign = static_cast<double>(dzp < 0. ? -1. : 1.);

    double Xcell = 0., Ycell = 0., Zcell = 0.;

    if (num_segments > 1) {

        Xcell = static_cast<double>(i_old) + shift + 0.5*(1.-dirX_sign);

        Ycell = static_cast<double>(j_old) + shift + 0.5*(1.-dirY_sign);

        Zcell = static_cast<double>(k_old) + shift + 0.5*(1.-dirZ_sign);

    }


    // loop over the number of segments and deposit

    const Compute_shape_factor< depos_order-1 > compute_shape_factor_cell;

    const Compute_shape_factor_pair< depos_order > compute_shape_factors_node;

    double dxp_seg, dyp_seg, dzp_seg;

    double x0_new, y0_new, z0_new;

    double x0_old = x_old;

    double y0_old = y_old;

    double z0_old = z_old;


    for (int ns = 0; ns < num_segments; ns++) {


        if (ns == num_segments-1) { // final segment


            x0_new = x_new;

            y0_new = y_new;

            z0_new = z_new;

            dxp_seg = x0_new - x0_old;

            dyp_seg = y0_new - y0_old;

            dzp_seg = z0_new - z0_old;


        }

        else {


            x0_new = Xcell + dirX_sign;

            y0_new = Ycell + dirY_sign;

            z0_new = Zcell + dirZ_sign;

            dxp_seg = x0_new - x0_old;

            dyp_seg = y0_new - y0_old;

            dzp_seg = z0_new - z0_old;


            if ( (dyp == 0. || std::abs(dxp_seg) < std::abs(dxp/dyp*dyp_seg))

              && (dzp == 0. || std::abs(dxp_seg) < std::abs(dxp/dzp*dzp_seg)) ) {

                Xcell = x0_new;

                dyp_seg = dyp/dxp*dxp_seg;

                dzp_seg = dzp/dxp*dxp_seg;

                y0_new = y0_old + dyp_seg;

                z0_new = z0_old + dzp_seg;

            }

            else if (dzp == 0. || std::abs(dyp_seg) < std::abs(dyp/dzp*dzp_seg)) {

                Ycell = y0_new;

                dxp_seg = dxp/dyp*dyp_seg;

                dzp_seg = dzp/dyp*dyp_seg;

                x0_new = x0_old + dxp_seg;

                z0_new = z0_old + dzp_seg;

            }

            else {

                Zcell = z0_new;

                dxp_seg = dxp/dzp*dzp_seg;

                dyp_seg = dyp/dzp*dzp_seg;

                x0_new = x0_old + dxp_seg;

                y0_new = y0_old + dyp_seg;

            }


        }


        // Compute the segment factors (each equal to dt_seg/dt for nonzero dxp, dyp, or dzp)

        const auto seg_factor_x = static_cast<amrex::Real>(dxp == 0. ? 1._rt : dxp_seg/dxp);

        const auto seg_factor_y = static_cast<amrex::Real>(dyp == 0. ? 1._rt : dyp_seg/dyp);

        const auto seg_factor_z = static_cast<amrex::Real>(dzp == 0. ? 1._rt : dzp_seg/dzp);


        // Compute cell-based weights using the average segment position

        // Keep these double to avoid bug in single precision

        double sx_cell[depos_order] = {0.};

        double sy_cell[depos_order] = {0.};

        double sz_cell[depos_order] = {0.};

        double const x0_bar = (x0_new + x0_old)/2.0;

        double const y0_bar = (y0_new + y0_old)/2.0;

        double const z0_bar = (z0_new + z0_old)/2.0;

        const int i0_cell = compute_shape_factor_cell( sx_cell, x0_bar-0.5 );

        const int j0_cell = compute_shape_factor_cell( sy_cell, y0_bar-0.5 );

        const int k0_cell = compute_shape_factor_cell( sz_cell, z0_bar-0.5 );


        if constexpr (depos_order >= 3) { // higher-order correction to the cell-based weights

            const Compute_shape_factor_pair<depos_order-1> compute_shape_factors_cell;

            double sx_old_cell[depos_order] = {0.};

            double sx_new_cell[depos_order] = {0.};

            double sy_old_cell[depos_order] = {0.};

            double sy_new_cell[depos_order] = {0.};

            double sz_old_cell[depos_order] = {0.};

            double sz_new_cell[depos_order] = {0.};

            const int i0_cell_2 = compute_shape_factors_cell( sx_old_cell, sx_new_cell, x0_old-0.5, x0_new-0.5 );

            const int j0_cell_2 = compute_shape_factors_cell( sy_old_cell, sy_new_cell, y0_old-0.5, y0_new-0.5 );

            const int k0_cell_2 = compute_shape_factors_cell( sz_old_cell, sz_new_cell, z0_old-0.5, z0_new-0.5 );

            amrex::ignore_unused(i0_cell_2, j0_cell_2, k0_cell_2);

            for (int m = 0; m < depos_order; m++) {

                sx_cell[m] = (4.0*sx_cell[m] + sx_old_cell[m] + sx_new_cell[m])/6.0;

                sy_cell[m] = (4.0*sy_cell[m] + sy_old_cell[m] + sy_new_cell[m])/6.0;

                sz_cell[m] = (4.0*sz_cell[m] + sz_old_cell[m] + sz_new_cell[m])/6.0;

            }

        }


        // Compute node-based weights using the old and new segment positions

        // Keep these double to avoid bug in single precision

        double sx_old_node[depos_order+1] = {0.};

        double sx_new_node[depos_order+1] = {0.};

        double sy_old_node[depos_order+1] = {0.};

        double sy_new_node[depos_order+1] = {0.};

        double sz_old_node[depos_order+1] = {0.};

        double sz_new_node[depos_order+1] = {0.};

        const int i0_node = compute_shape_factors_node( sx_old_node, sx_new_node, x0_old, x0_new );

        const int j0_node = compute_shape_factors_node( sy_old_node, sy_new_node, y0_old, y0_new );

        const int k0_node = compute_shape_factors_node( sz_old_node, sz_new_node, z0_old, z0_new );


        // deposit Jx and Sxx for this segment

        amrex::Real weight;

        for (int i = 0; i <= depos_order - 1; i++) {

            for (int j = 0; j <= depos_order; j++) {

                for (int k = 0; k <= depos_order; k++) {

                    weight = sx_cell[i]*( sy_old_node[j]*sz_old_node[k]*one_third

                                        + sy_old_node[j]*sz_new_node[k]*one_sixth

                                        + sy_new_node[j]*sz_old_node[k]*one_sixth

                                        + sy_new_node[j]*sz_new_node[k]*one_third )*seg_factor_x;

                    if constexpr (deposit_J) {

                        amrex::Gpu::Atomic::AddNoRet( &Jx_arr(lo.x+i0_cell+i, lo.y+j0_node+j, lo.z+k0_node+k), wqx*weight);

                    }

                    amrex::Gpu::Atomic::AddNoRet( &Sxx_arr(lo.x+i0_cell+i, lo.y+j0_node+j, lo.z+k0_node+k, 0), fpxx*weight*weight);

                }

            }

        }


        // deposit Jy and Syy or this segment

        for (int i = 0; i <= depos_order; i++) {

            for (int j = 0; j <= depos_order - 1; j++) {

                for (int k = 0; k <= depos_order; k++) {

                    weight = sy_cell[j]*( sx_old_node[i]*sz_old_node[k]*one_third

                                        + sx_old_node[i]*sz_new_node[k]*one_sixth

                                        + sx_new_node[i]*sz_old_node[k]*one_sixth

                                        + sx_new_node[i]*sz_new_node[k]*one_third )*seg_factor_y;

                    if constexpr (deposit_J) {

                        amrex::Gpu::Atomic::AddNoRet( &Jy_arr(lo.x+i0_node+i, lo.y+j0_cell+j, lo.z+k0_node+k), wqy*weight);

                    }

                    amrex::Gpu::Atomic::AddNoRet( &Syy_arr(lo.x+i0_node+i, lo.y+j0_cell+j, lo.z+k0_node+k, 0), fpyy*weight*weight);

                }

            }

        }


        // deposit Jz and Sz for this segment

        for (int i = 0; i <= depos_order; i++) {

            for (int j = 0; j <= depos_order; j++) {

                for (int k = 0; k <= depos_order - 1; k++) {

                    weight = sz_cell[k]*( sx_old_node[i]*sy_old_node[j]*one_third

                                        + sx_old_node[i]*sy_new_node[j]*one_sixth

                                        + sx_new_node[i]*sy_old_node[j]*one_sixth

                                        + sx_new_node[i]*sy_new_node[j]*one_third )*seg_factor_z;

                    if constexpr (deposit_J) {

                        amrex::Gpu::Atomic::AddNoRet( &Jz_arr(lo.x+i0_node+i, lo.y+j0_node+j, lo.z+k0_cell+k), wqz*weight);

                    }

                    amrex::Gpu::Atomic::AddNoRet( &Szz_arr(lo.x+i0_node+i, lo.y+j0_node+j, lo.z+k0_cell+k, 0), fpzz*weight*weight);

                }

            }

        }


        // update old segment values

        if (ns < num_segments-1) {

            x0_old = x0_new;

            y0_old = y0_new;

            z0_old = z0_new;

        }


    } // end loop over segments


#elif defined(WARPX_DIM_XZ) || defined(WARPX_DIM_RZ)


    // compute cell crossings in X-direction

    const auto i_old = static_cast<int>(x_old-shift);

    const auto i_new = static_cast<int>(x_new-shift);

    const int cell_crossings_x = std::abs(i_new-i_old);

    num_segments += cell_crossings_x;


    // compute cell crossings in Z-direction

    const auto k_old = static_cast<int>(z_old-shift);

    const auto k_new = static_cast<int>(z_new-shift);

    const int cell_crossings_z = std::abs(k_new-k_old);

    num_segments += cell_crossings_z;


    // Compute initial particle cell locations in each direction

    // used to find the position at cell crossings.

    // Keep these double to avoid bug in single precision

    const auto dirX_sign = static_cast<double>(dxp < 0. ? -1. : 1.);

    const auto dirZ_sign = static_cast<double>(dzp < 0. ? -1. : 1.);

    double Xcell = 0., Zcell = 0.;

    if (num_segments > 1) {

        Xcell = static_cast<double>(i_old) + shift + 0.5*(1.-dirX_sign);

        Zcell = static_cast<double>(k_old) + shift + 0.5*(1.-dirZ_sign);

    }


    // loop over the number of segments and deposit

    const Compute_shape_factor< depos_order-1 > compute_shape_factor_cell;

    const Compute_shape_factor_pair< depos_order > compute_shape_factors_node;

    double dxp_seg, dzp_seg;

    double x0_new, z0_new;

    double x0_old = x_old;

    double z0_old = z_old;


    constexpr int num_segments_max = 1 + 4*AMREX_SPACEDIM;

    AMREX_ALWAYS_ASSERT_WITH_MESSAGE( num_segments <= num_segments_max,

        "Error: num_segments must be less than or equal to 1 + 4*AMREX_SPACEDIM.");


    // Save the start index and interpolation weights for each segment

    int i0_cell[num_segments_max];

    int i0_node[num_segments_max];

    int k0_cell[num_segments_max];

    int k0_node[num_segments_max];

    amrex::Real weight_cellX_nodeZ[num_segments_max][depos_order][depos_order+1];

    amrex::Real weight_nodeX_cellZ[num_segments_max][depos_order+1][depos_order];

    amrex::Real weight_nodeX_nodeZ[num_segments_max][depos_order+1][depos_order+1];


    const auto i_mid = static_cast<int>(0.5*(x_new+x_old)-shift);

    const auto k_mid = static_cast<int>(0.5*(z_new+z_old)-shift);

    int SegNumX[num_segments_max];

    int SegNumZ[num_segments_max];


    for (int ns = 0; ns < num_segments; ns++) {


        if (ns == num_segments-1) { // final segment


            x0_new = x_new;

            z0_new = z_new;

            dxp_seg = x0_new - x0_old;

            dzp_seg = z0_new - z0_old;


        }

        else {


            x0_new = Xcell + dirX_sign;

            z0_new = Zcell + dirZ_sign;

            dxp_seg = x0_new - x0_old;

            dzp_seg = z0_new - z0_old;


            if (dzp == 0. || std::abs(dxp_seg) < std::abs(dxp/dzp*dzp_seg)) {

                Xcell = x0_new;

                dzp_seg = dzp/dxp*dxp_seg;

                z0_new = z0_old + dzp_seg;

            }

            else {

                Zcell = z0_new;

                dxp_seg = dxp/dzp*dzp_seg;

                x0_new = x0_old + dxp_seg;

            }


        }


        // Compute the segment factors (each equal to dt_seg/dt for nonzero dxp, or dzp)

        const auto seg_factor_x = static_cast<amrex::Real>(dxp == 0. ? 1._rt : dxp_seg/dxp);

        const auto seg_factor_z = static_cast<amrex::Real>(dzp == 0. ? 1._rt : dzp_seg/dzp);


        // Compute cell-based weights using the average segment position

        // Keep these double to avoid bug in single precision

        double sx_cell[depos_order] = {0.};

        double sz_cell[depos_order] = {0.};

        double const x0_bar = (x0_new + x0_old)/2.0;

        double const z0_bar = (z0_new + z0_old)/2.0;

        i0_cell[ns] = compute_shape_factor_cell( sx_cell, x0_bar-0.5 );

        k0_cell[ns] = compute_shape_factor_cell( sz_cell, z0_bar-0.5 );


        // Set the segment number for the mass matrix component calc

        if constexpr (full_mass_matrices) {

            const auto i0_mid = static_cast<int>(x0_bar-shift);

            const auto k0_mid = static_cast<int>(z0_bar-shift);

            SegNumX[ns] = 1 + i0_mid - i_mid;

            SegNumZ[ns] = 1 + k0_mid - k_mid;

        }


        if constexpr (depos_order >= 3) { // higher-order correction to the cell-based weights

            const Compute_shape_factor_pair<depos_order-1> compute_shape_factors_cell;

            double sx_old_cell[depos_order] = {0.};

            double sx_new_cell[depos_order] = {0.};

            double sz_old_cell[depos_order] = {0.};

            double sz_new_cell[depos_order] = {0.};

            const int i0_cell_2 = compute_shape_factors_cell( sx_old_cell, sx_new_cell, x0_old-0.5, x0_new-0.5 );

            const int k0_cell_2 = compute_shape_factors_cell( sz_old_cell, sz_new_cell, z0_old-0.5, z0_new-0.5 );

            amrex::ignore_unused(i0_cell_2, k0_cell_2);

            for (int m = 0; m < depos_order; m++) {

                sx_cell[m] = (4.0*sx_cell[m] + sx_old_cell[m] + sx_new_cell[m])/6.0;

                sz_cell[m] = (4.0*sz_cell[m] + sz_old_cell[m] + sz_new_cell[m])/6.0;

            }

        }


        // Compute node-based weights using the old and new segment positions

        // Keep these double to avoid bug in single precision

        double sx_old_node[depos_order+1] = {0.};

        double sx_new_node[depos_order+1] = {0.};

        double sz_old_node[depos_order+1] = {0.};

        double sz_new_node[depos_order+1] = {0.};

        i0_node[ns] = compute_shape_factors_node( sx_old_node, sx_new_node, x0_old, x0_new );

        k0_node[ns] = compute_shape_factors_node( sz_old_node, sz_new_node, z0_old, z0_new );


        // deposit Jx and Sx for this segment

        amrex::Real weight;

        for (int i = 0; i <= depos_order - 1; i++) {

            for (int k = 0; k <= depos_order; k++) {

                const int i_J = lo.x + i0_cell[ns] + i;

                const int k_J = lo.y + k0_node[ns] + k;

                weight = sx_cell[i]*(sz_old_node[k] + sz_new_node[k])/2.0_rt*seg_factor_x;

                if constexpr (deposit_J) {

                    amrex::Gpu::Atomic::AddNoRet(&Jx_arr(i_J, k_J, 0, 0), wqx*weight);

                }

                if constexpr (full_mass_matrices) { weight_cellX_nodeZ[ns][i][k] = weight; }

                else {

                    amrex::Gpu::Atomic::AddNoRet(&Sxx_arr(i_J, k_J, 0, 0), fpxx*weight*weight);

                }

            }

        }


        // deposit out-of-plane Jy and Sy for this segment

        const auto seg_factor_y = std::min(seg_factor_x,seg_factor_z);

        for (int i = 0; i <= depos_order; i++) {

            for (int k = 0; k <= depos_order; k++) {

                const int i_J = lo.x + i0_node[ns] + i;

                const int k_J = lo.y + k0_node[ns] + k;

                weight = ( sx_old_node[i]*sz_old_node[k]*one_third

                       +   sx_old_node[i]*sz_new_node[k]*one_sixth

                       +   sx_new_node[i]*sz_old_node[k]*one_sixth

                       +   sx_new_node[i]*sz_new_node[k]*one_third )*seg_factor_y;

                if constexpr (deposit_J) {

                    amrex::Gpu::Atomic::AddNoRet(&Jy_arr(i_J, k_J, 0, 0), wqy*weight);

                }

                if constexpr (full_mass_matrices) { weight_nodeX_nodeZ[ns][i][k] = weight; }

                else {

                    amrex::Gpu::Atomic::AddNoRet(&Syy_arr(i_J, k_J, 0, 0), fpyy*weight*weight);

                }

            }

        }


        // deposit Jz and Szz for this segment

        for (int i = 0; i <= depos_order; i++) {

            for (int k = 0; k <= depos_order - 1; k++) {

                const int i_J = lo.x + i0_node[ns] + i;

                const int k_J = lo.y + k0_cell[ns] + k;

                weight = sz_cell[k]*(sx_old_node[i] + sx_new_node[i])/2.0_rt*seg_factor_z;

                if constexpr (deposit_J) {

                    amrex::Gpu::Atomic::AddNoRet(&Jz_arr(i_J, k_J, 0, 0), wqz*weight);

                }

                if constexpr (full_mass_matrices) { weight_nodeX_cellZ[ns][i][k] = weight; }

                else {

                    amrex::Gpu::Atomic::AddNoRet(&Szz_arr(i_J, k_J, 0, 0), fpzz*weight*weight);

                }

            }

        }


        // update old segment values

        if (ns < num_segments - 1) {

            x0_old = x0_new;

            z0_old = z0_new;

        }


    } // end loop over segments


    if constexpr (full_mass_matrices) {


    const int Ncomp_base = 2*depos_order + 2*max_crossings;

    const int Ncomp_xx0 = Ncomp_base - 1;

    const int Ncomp_xy0 = Ncomp_base;

    const int Ncomp_xz0 = Ncomp_base;

    const int Ncomp_yx0 = Ncomp_base;

    const int Ncomp_yy0 = 1 + Ncomp_base;

    const int Ncomp_yz0 = 1 + Ncomp_base;

    const int Ncomp_zx0 = Ncomp_base;

    const int Ncomp_zy0 = 1 + Ncomp_base;

    const int Ncomp_zz0 = 1 + Ncomp_base;


    const int width_xx1 = depos_order + max_crossings;  // (Ncomp_xx[1] - 1)/2

    const int width_yy1 = width_xx1;                    // (Ncomp_yy[1] - 1)/2

    const int width_zz1 = width_xx1 - 1;                // (Ncomp_zz[1] - 1)/2


    // Loop over segments and deposit full mass matrices

    for (int ns = 0; ns < num_segments; ns++) {


        // Deposit Sxx, Sxz, and Sxy for this segment

        for (int i = 0; i <= depos_order - 1; i++) {

            for (int k = 0; k <= depos_order; k++) {

                const int i_J = lo.x + i0_cell[ns] + i;

                const int k_J = lo.y + k0_node[ns] + k;

                const amrex::Real weight_J = weight_cellX_nodeZ[ns][i][k];

                for (int ms = 0; ms < num_segments; ms++) {

                    const int SegShiftX = max_crossings + SegNumX[ms] - SegNumX[ns];

                    const int SegShiftZ = max_crossings + SegNumZ[ms] - SegNumZ[ns];

                    // Deposit Sxx

                    for (int kE = 0; kE <= depos_order; kE++) {

                        const int row_xx = depos_order - k + kE + SegShiftZ;

                        const int above_diag = (row_xx > width_xx1) ? 1 : 0;

                        for (int iE = 0; iE <= depos_order - 1; iE++) {

                            const int col_xx = depos_order - 1 - i + iE + SegShiftX;

                            if (col_xx > Ncomp_xx0 - row_xx - above_diag) { break; } // Reduced deposit for diagonal mass matrices

                            const int comp_xx = col_xx + Ncomp_xx0*row_xx;

                            const amrex::Real weight_E = weight_cellX_nodeZ[ms][iE][kE];

                            amrex::Gpu::Atomic::AddNoRet(&Sxx_arr(i_J, k_J, 0, comp_xx), fpxx*weight_J*weight_E);

                        }

                    }

                    // Deposit Sxz

                    for (int iE = 0; iE <= depos_order; iE++) {

                        for (int kE = 0; kE <= depos_order - 1; kE++) {

                            const amrex::Real weight_E = weight_nodeX_cellZ[ms][iE][kE];

                            const int comp_xz = depos_order - 1 - i + iE + SegShiftX

                                   + Ncomp_xz0*(depos_order - k + kE + SegShiftZ);

                            amrex::Gpu::Atomic::AddNoRet(&Sxz_arr(i_J, k_J, 0, comp_xz), fpxz*weight_J*weight_E);

                        }

                    }

                    // Deposit Sxy

                    for (int iE = 0; iE <= depos_order; iE++) {

                        for (int kE = 0; kE <= depos_order; kE++) {

                            const amrex::Real weight_E = weight_nodeX_nodeZ[ms][iE][kE];

                            const int comp_xy = depos_order - 1 - i + iE + SegShiftX

                                   + Ncomp_xy0*(depos_order - k + kE + SegShiftZ);

                            amrex::Gpu::Atomic::AddNoRet(&Sxy_arr(i_J, k_J, 0, comp_xy), fpxy*weight_J*weight_E);

                        }

                    }

                }

            }

        }


        // Deposit Szx, Szz, and Szy for this segment

        for (int i = 0; i <= depos_order; i++) {

            for (int k = 0; k <= depos_order - 1; k++) {

                const int i_J = lo.x + i0_node[ns] + i;

                const int k_J = lo.y + k0_cell[ns] + k;

                const amrex::Real weight_J = weight_nodeX_cellZ[ns][i][k];

                for (int ms = 0; ms < num_segments; ms++) {

                    const int SegShiftX = max_crossings + SegNumX[ms] - SegNumX[ns];

                    const int SegShiftZ = max_crossings + SegNumZ[ms] - SegNumZ[ns];

                    // Deposit Szx

                    for (int iE = 0; iE <= depos_order - 1; iE++) {

                        for (int kE = 0; kE <= depos_order; kE++) {

                            const amrex::Real weight_E = weight_cellX_nodeZ[ms][iE][kE];

                            const int comp_zx = depos_order - i + iE + SegShiftX

                                  +  Ncomp_zx0*(depos_order-1 - k + kE + SegShiftZ);

                            amrex::Gpu::Atomic::AddNoRet( &Szx_arr(i_J, k_J, 0, comp_zx), fpzx*weight_J*weight_E);

                        }

                    }

                    // Deposit Szz

                    for (int kE = 0; kE <= depos_order - 1; kE++) {

                        const int row_zz = depos_order - 1 - k + kE + SegShiftZ;

                        const int above_diag = (row_zz > width_zz1) ? 1 : 0;

                        for (int iE = 0; iE <= depos_order; iE++) {

                            const int col_zz = depos_order - i + iE + SegShiftX;

                            if (col_zz > Ncomp_zz0 - 2 - row_zz - above_diag) { break; } // Reduced deposit for diagonal mass matrices

                            const int comp_zz = col_zz + Ncomp_zz0*row_zz;

                            const amrex::Real weight_E = weight_nodeX_cellZ[ms][iE][kE];

                            amrex::Gpu::Atomic::AddNoRet( &Szz_arr(i_J, k_J, 0, comp_zz), fpzz*weight_J*weight_E);

                        }

                    }

                    // Deposit Szy

                    for (int iE = 0; iE <= depos_order; iE++) {

                        for (int kE = 0; kE <= depos_order; kE++) {

                            const amrex::Real weight_E = weight_nodeX_nodeZ[ms][iE][kE];

                            const int comp_zy = depos_order - i + iE + SegShiftX

                                   + Ncomp_zy0*(depos_order-1 - k + kE + SegShiftZ);

                            amrex::Gpu::Atomic::AddNoRet( &Szy_arr(i_J, k_J, 0, comp_zy), fpzy*weight_J*weight_E);

                        }

                    }

                }

            }

        }


        // Deposit Syx, Syz, and Syy for this segment

        for (int i = 0; i <= depos_order; i++) {

            for (int k = 0; k <= depos_order; k++) {

                const int i_J = lo.x + i0_node[ns] + i;

                const int k_J = lo.y + k0_node[ns] + k;

                const amrex::Real weight_J = weight_nodeX_nodeZ[ns][i][k];

                for (int ms = 0; ms < num_segments; ms++) {

                    const int SegShiftX = max_crossings + SegNumX[ms] - SegNumX[ns];

                    const int SegShiftZ = max_crossings + SegNumZ[ms] - SegNumZ[ns];

                    // Deposit Syx

                    for (int iE = 0; iE <= depos_order - 1; iE++) {

                        for (int kE = 0; kE <= depos_order; kE++) {

                            const amrex::Real weight_E = weight_cellX_nodeZ[ms][iE][kE];

                            const int comp_yx = depos_order - i + iE + SegShiftX

                                  +  Ncomp_yx0*(depos_order - k + kE + SegShiftZ);

                            amrex::Gpu::Atomic::AddNoRet( &Syx_arr(i_J, k_J, 0, comp_yx), fpyx*weight_J*weight_E);

                        }

                    }

                    // Deposit Syz

                    for (int iE = 0; iE <= depos_order; iE++) {

                        for (int kE = 0; kE <= depos_order - 1; kE++) {

                            const amrex::Real weight_E = weight_nodeX_cellZ[ms][iE][kE];

                            const int comp_yz = depos_order - i + iE + SegShiftX

                                   + Ncomp_yz0*(depos_order - k + kE + SegShiftZ);

                            amrex::Gpu::Atomic::AddNoRet( &Syz_arr(i_J, k_J, 0, comp_yz), fpyz*weight_J*weight_E);

                        }

                    }

                    // Deposit Syy

                    for (int kE = 0; kE <= depos_order; kE++) {

                        const int row_yy = depos_order - k + kE + SegShiftZ;

                        const int above_diag = (row_yy > width_yy1) ? 1 : 0;

                        for (int iE = 0; iE <= depos_order; iE++) {

                            const int col_yy = depos_order - i + iE + SegShiftX;

                            if (col_yy > Ncomp_yy0 - 1 - row_yy - above_diag) { break; } // Reduced deposit for diagonal mass matrices

                            const int comp_yy = col_yy + Ncomp_yy0*row_yy;

                            const amrex::Real weight_E = weight_nodeX_nodeZ[ms][iE][kE];

                            amrex::Gpu::Atomic::AddNoRet( &Syy_arr(i_J, k_J, 0, comp_yy), fpyy*weight_J*weight_E);

                        }

                    }


                }

            }

        }


     }


     }


#elif defined(WARPX_DIM_RCYLINDER) || defined(WARPX_DIM_RSPHERE)


    // compute cell crossings in X-direction

    const auto i_old = static_cast<int>(x_old-shift);

    const auto i_new = static_cast<int>(x_new-shift);

    const int cell_crossings_x = std::abs(i_new-i_old);

    num_segments += cell_crossings_x;


    // Compute the initial cell location used to find the cell crossings.

    // Keep these double to avoid bug in single precision

    const auto dirX_sign = static_cast<double>(dxp < 0. ? -1. : 1.);

    double Xcell = static_cast<double>(i_old) + shift + 0.5*(1.-dirX_sign);


    // loop over the number of segments and deposit

    const Compute_shape_factor< depos_order-1 > compute_shape_factor_cell;

    const Compute_shape_factor_pair< depos_order > compute_shape_factors_node;

    double dxp_seg;

    double x0_new;

    double x0_old = x_old;


    constexpr int num_segments_max = 1 + 4*AMREX_SPACEDIM;

    AMREX_ALWAYS_ASSERT_WITH_MESSAGE( num_segments <= num_segments_max,

        "Error: num_segments must be less than or equal to 1 + 4*AMREX_SPACEDIM.");


    // Save the start index and interpolation weights for each segment

    int i0_cell[num_segments_max];

    int i0_node[num_segments_max];

    amrex::Real weight_cell[num_segments_max][depos_order];

    amrex::Real weight_node[num_segments_max][depos_order+1];


    const auto i_mid = static_cast<int>(0.5*(x_new+x_old)-shift);

    int SegNum[num_segments_max];


    for (int ns = 0; ns < num_segments; ns++) {


        if (ns == num_segments-1) { // final segment

            x0_new = x_new;

            dxp_seg = x0_new - x0_old;

        }

        else {

            Xcell = Xcell + dirX_sign;

            x0_new = Xcell;

            dxp_seg = x0_new - x0_old;

        }


        // Compute the segment factor (equal to dt_seg/dt for nonzero dxp)

        const auto seg_factor = static_cast<amrex::Real>(dxp == 0. ? 1._rt : dxp_seg/dxp);


        // Compute cell-based weights using the average segment position

        // Keep these double to avoid bug in single precision

        double sx_cell[depos_order] = {0.};

        double const x0_bar = (x0_new + x0_old)/2.0;

        i0_cell[ns] = compute_shape_factor_cell( sx_cell, x0_bar-0.5 );


        // Set the segment number for the mass matrix component calc

        if constexpr (full_mass_matrices) {

            const auto i0_mid = static_cast<int>(x0_bar-shift);

            SegNum[ns] = 1 + i0_mid - i_mid;

        }


        if constexpr (depos_order >= 3) { // higher-order correction to the cell-based weights

            const Compute_shape_factor_pair<depos_order-1> compute_shape_factors_cell;

            double sx_old_cell[depos_order] = {0.};

            double sx_new_cell[depos_order] = {0.};

            const int i0_cell_2 = compute_shape_factors_cell( sx_old_cell, sx_new_cell, x0_old-0.5, x0_new-0.5 );

            amrex::ignore_unused(i0_cell_2);

            for (int m=0; m<depos_order; m++) {

                sx_cell[m] = (4.0*sx_cell[m] + sx_old_cell[m] + sx_new_cell[m])/6.0;

            }

        }


        // Compute node-based weights using the old and new segment positions

        // Keep these double to avoid bug in single precision

        double sx_old_node[depos_order+1] = {0.};

        double sx_new_node[depos_order+1] = {0.};

        i0_node[ns] = compute_shape_factors_node( sx_old_node, sx_new_node, x0_old, x0_new );


        // deposit out-of-plane Jy, Jz, Syy, and Szz for this segment

        for (int i = 0; i <= depos_order; i++) {

            const amrex::Real weight = 0.5_rt*(sx_old_node[i] + sx_new_node[i])*seg_factor;

            const int i_J = lo.x + i0_node[ns] + i;

            if constexpr (deposit_J) {

                amrex::Gpu::Atomic::AddNoRet( &Jy_arr(i_J, 0, 0), wqy*weight);

                amrex::Gpu::Atomic::AddNoRet( &Jz_arr(i_J, 0, 0), wqz*weight);

            }

            //

            if constexpr (full_mass_matrices) { weight_node[ns][i] = weight; }

            else {

                amrex::Gpu::Atomic::AddNoRet( &Syy_arr(i_J, 0, 0, 0), fpyy*weight);

                amrex::Gpu::Atomic::AddNoRet( &Szz_arr(i_J, 0, 0, 0), fpzz*weight);

            }

        }


        // deposit Jx and Sxx for this segment

        for (int i = 0; i <= depos_order - 1; i++) {

            const amrex::Real weight = sx_cell[i]*seg_factor;

            const int i_J = lo.x + i0_cell[ns] + i;

            if constexpr (deposit_J) {

                amrex::Gpu::Atomic::AddNoRet(&Jx_arr(i_J, 0, 0), wqx*weight);

            }

            if constexpr (full_mass_matrices) { weight_cell[ns][i] = weight; }

            else {

                amrex::Gpu::Atomic::AddNoRet(&Sxx_arr(i_J, 0, 0, 0), fpxx*weight);

            }

        }


        // update old segment values

        if (ns < num_segments-1) {

            x0_old = x0_new;

        }


    }


    if constexpr (full_mass_matrices) {


    const int width_xx = depos_order - 1 + max_crossings;

    const int width_yy = depos_order + max_crossings;


    // Loop over segments and deposit full mass matrices

    for (int ns = 0; ns < num_segments; ns++) {


        // Deposit Sxx, Sxy, and Sxz for this segment

        for (int i = 0; i <= depos_order - 1; i++) {


            const int i_J = lo.x + i0_cell[ns] + i;

            const amrex::Real weight_J = weight_cell[ns][i];

            for (int ms = 0; ms < num_segments; ms++) {

                const int SegShift = max_crossings + SegNum[ms] - SegNum[ns];

                for (int iE = 0; iE <= depos_order - 1; iE++) {

                    const int comp_xx = depos_order - 1 - i + iE + SegShift;

                    if (comp_xx > width_xx) { break; } // Reduced deposit for diagonal mass matrices

                    const amrex::Real weight_E = weight_cell[ms][iE];

                    amrex::Gpu::Atomic::AddNoRet(&Sxx_arr(i_J, 0, 0, comp_xx), fpxx*weight_J*weight_E);

                }

                for (int iE = 0; iE <= depos_order; iE++) {

                    const amrex::Real weight_E = weight_node[ms][iE];

                    const int comp_xy = depos_order - 1 - i + iE + SegShift;

                    amrex::Gpu::Atomic::AddNoRet(&Sxz_arr(i_J, 0, 0, comp_xy), fpxz*weight_J*weight_E);

                    amrex::Gpu::Atomic::AddNoRet(&Sxy_arr(i_J, 0, 0, comp_xy), fpxy*weight_J*weight_E);

                }

            }


        }


        // Deposit Syx, Syy, Syz, Szx, Szy, and Szz for this segment

        for (int i = 0; i <= depos_order; i++) {


            const int i_J = lo.x + i0_node[ns] + i;

            const amrex::Real weight_J = weight_node[ns][i];

            for (int ms = 0; ms < num_segments; ms++) {

                const int SegShift = max_crossings + SegNum[ms] - SegNum[ns];

                for (int iE = 0; iE <= depos_order; iE++) {

                    const amrex::Real weight_E = weight_node[ms][iE];

                    const int comp_yy = depos_order - i + iE + SegShift;

                    if (comp_yy <= width_yy) { // Reduced deposit for diagonal mass matrices

                        amrex::Gpu::Atomic::AddNoRet(&Syy_arr(i_J, 0, 0, comp_yy), fpyy*weight_J*weight_E);

                        amrex::Gpu::Atomic::AddNoRet(&Szz_arr(i_J, 0, 0, comp_yy), fpzz*weight_J*weight_E);

                    }

                    amrex::Gpu::Atomic::AddNoRet(&Syz_arr(i_J, 0, 0, comp_yy), fpyz*weight_J*weight_E);

                    amrex::Gpu::Atomic::AddNoRet(&Szy_arr(i_J, 0, 0, comp_yy), fpzy*weight_J*weight_E);

                }

                for (int iE = 0; iE <= depos_order - 1; iE++) {

                    const amrex::Real weight_E = weight_cell[ms][iE];

                    const int comp_yx = depos_order - i + iE + SegShift;

                    amrex::Gpu::Atomic::AddNoRet(&Syx_arr(i_J, 0, 0, comp_yx), fpyx*weight_J*weight_E);

                    amrex::Gpu::Atomic::AddNoRet(&Szx_arr(i_J, 0, 0, comp_yx), fpzx*weight_J*weight_E);

                }

            }


        }


    }


    }


#elif defined(WARPX_DIM_1D_Z)


    // compute cell crossings in Z-direction

    const auto k_old = static_cast<int>(z_old-shift);

    const auto k_new = static_cast<int>(z_new-shift);

    const int cell_crossings_z = std::abs(k_new-k_old);

    num_segments += cell_crossings_z;


    // Compute initial particle cell location used to find cell crossings.

    // Keep these double to avoid bug in single precision

    const auto dirZ_sign = static_cast<double>(dzp < 0. ? -1. : 1.);

    double Zcell = static_cast<double>(k_old) + shift + 0.5*(1.-dirZ_sign);


    // loop over the number of segments and deposit

    const Compute_shape_factor< depos_order-1 > compute_shape_factor_cell;

    const Compute_shape_factor_pair< depos_order > compute_shape_factors_node;

    double dzp_seg;

    double z0_new;

    double z0_old = z_old;


    constexpr int num_segments_max = 1 + 4*AMREX_SPACEDIM;

    AMREX_ALWAYS_ASSERT_WITH_MESSAGE( num_segments <= num_segments_max,

        "Error: num_segments must be less than or equal to 1 + 4*AMREX_SPACEDIM.");


    // Save the start index and interpolation weights for each segment

    int k0_cell[num_segments_max];

    int k0_node[num_segments_max];

    amrex::Real weight_cell[num_segments_max][depos_order];

    amrex::Real weight_node[num_segments_max][depos_order+1];


    const auto k_mid = static_cast<int>(0.5*(z_new+z_old)-shift);

    int SegNum[num_segments_max];


    for (int ns = 0; ns < num_segments; ns++) {


        if (ns == num_segments-1) { // final segment

            z0_new = z_new;

            dzp_seg = z0_new - z0_old;

        }

        else {

            Zcell = Zcell + dirZ_sign;

            z0_new = Zcell;

            dzp_seg = z0_new - z0_old;

        }


        // Compute the segment factor (equal to dt_seg/dt for nonzero dzp)

        const auto seg_factor = static_cast<amrex::Real>(dzp == 0. ? 1._rt : dzp_seg/dzp);


        // Compute cell-based weights using the average segment position

        // Keep these double to avoid bug in single precision

        double sz_cell[depos_order] = {0.};

        double const z0_bar = (z0_new + z0_old)/2.0;

        k0_cell[ns] = compute_shape_factor_cell( sz_cell, z0_bar-0.5 );


        // Set the segment number for the mass matrix component calc

        if constexpr (full_mass_matrices) {

            const auto k0_mid = static_cast<int>(z0_bar-shift);

            SegNum[ns] = 1 + k0_mid - k_mid;

        }


        if constexpr (depos_order >= 3) { // higher-order correction to the cell-based weights

            const Compute_shape_factor_pair<depos_order-1> compute_shape_factors_cell;

            double sz_old_cell[depos_order] = {0.};

            double sz_new_cell[depos_order] = {0.};

            const int k0_cell_2 = compute_shape_factors_cell( sz_old_cell, sz_new_cell, z0_old-0.5, z0_new-0.5 );

            amrex::ignore_unused(k0_cell_2);

            for (int m = 0; m < depos_order; m++) {

                sz_cell[m] = (4.0*sz_cell[m] + sz_old_cell[m] + sz_new_cell[m])/6.0;

            }

        }


        // Compute node-based weights using the old and new segment positions

        // Keep these double to avoid bug in single precision

        double sz_old_node[depos_order+1] = {0.};

        double sz_new_node[depos_order+1] = {0.};

        k0_node[ns] = compute_shape_factors_node( sz_old_node, sz_new_node, z0_old, z0_new );


        // deposit out-of-plane Jx, Jy, Sx, and Sy for this segment

        for (int k = 0; k <= depos_order; k++) {

            const amrex::Real weight = 0.5_rt*(sz_old_node[k] + sz_new_node[k])*seg_factor;

            const int k_J = lo.x + k0_node[ns] + k;

            if constexpr (deposit_J) {

                amrex::Gpu::Atomic::AddNoRet(&Jx_arr(k_J, 0, 0), wqx*weight);

                amrex::Gpu::Atomic::AddNoRet(&Jy_arr(k_J, 0, 0), wqy*weight);

            }

            if constexpr (full_mass_matrices) { weight_node[ns][k] = weight; }

            else {

                amrex::Gpu::Atomic::AddNoRet(&Sxx_arr(k_J, 0, 0, 0), fpxx*weight*weight);

                amrex::Gpu::Atomic::AddNoRet(&Syy_arr(k_J, 0, 0, 0), fpyy*weight*weight);

            }

        }


        // deposit Jz and Szz for this segment

        for (int k = 0; k <= depos_order - 1; k++) {

            const amrex::Real weight = sz_cell[k]*seg_factor;

            const int k_J = lo.x + k0_cell[ns] + k;

            if constexpr (deposit_J) {

                amrex::Gpu::Atomic::AddNoRet(&Jz_arr(k_J, 0, 0), wqz*weight);

            }

            if constexpr (full_mass_matrices) { weight_cell[ns][k] = weight; }

            else {

                amrex::Gpu::Atomic::AddNoRet(&Szz_arr(k_J, 0, 0, 0), fpzz*weight*weight);

            }

        }


        // update old segment values

        if (ns < num_segments-1) {

            z0_old = z0_new;

        }


    }


    if constexpr (full_mass_matrices) {


    const int width_zz = depos_order - 1 + max_crossings;

    const int width_yy = depos_order + max_crossings;


    // Loop over segments and deposit full mass matrices

    for (int ns = 0; ns < num_segments; ns++) {


        // Deposit Sxx, Sxy, Sxz, Syx, Syy, and Syz for this segment

        for (int k = 0; k <= depos_order; k++) {


            const int k_J = lo.x + k0_node[ns] + k;

            const amrex::Real weight_J = weight_node[ns][k];

            for (int ms = 0; ms < num_segments; ms++) {

                const int SegShift = max_crossings + SegNum[ms] - SegNum[ns];

                for (int kE = 0; kE <= depos_order; kE++) {

                    const amrex::Real weight_E = weight_node[ms][kE];

                    const int comp_yy = depos_order - k + kE + SegShift;

                    if (comp_yy <= width_yy) { // Reduced deposit for diagonal mass matrices

                        amrex::Gpu::Atomic::AddNoRet(&Sxx_arr(k_J, 0, 0, comp_yy), fpxx*weight_J*weight_E);

                        amrex::Gpu::Atomic::AddNoRet(&Syy_arr(k_J, 0, 0, comp_yy), fpyy*weight_J*weight_E);

                    }

                    amrex::Gpu::Atomic::AddNoRet(&Sxy_arr(k_J, 0, 0, comp_yy), fpxy*weight_J*weight_E);

                    amrex::Gpu::Atomic::AddNoRet(&Syx_arr(k_J, 0, 0, comp_yy), fpyx*weight_J*weight_E);

                }

                for (int kE = 0; kE <= depos_order - 1; kE++) {

                    const amrex::Real weight_E = weight_cell[ms][kE];

                    const int comp_yz = depos_order - k + kE + SegShift;

                    amrex::Gpu::Atomic::AddNoRet(&Sxz_arr(k_J, 0, 0, comp_yz), fpxz*weight_J*weight_E);

                    amrex::Gpu::Atomic::AddNoRet(&Syz_arr(k_J, 0, 0, comp_yz), fpyz*weight_J*weight_E);

                }

            }


        }


        // Deposit Szx, Szy, and Szz for this segment

        for (int k = 0; k <= depos_order - 1; k++) {


            const int k_J = lo.x + k0_cell[ns] + k;

            const amrex::Real weight_J = weight_cell[ns][k];

            for (int ms = 0; ms < num_segments; ms++) {

                const int SegShift = max_crossings + SegNum[ms] - SegNum[ns];

                for (int kE = 0; kE <= depos_order - 1; kE++) {

                    const int comp_zz = depos_order - 1 - k + kE + SegShift;

                    if (comp_zz > width_zz) { break; } // Reduced deposit for diagonal mass matrices

                    const amrex::Real weight_E = weight_cell[ms][kE];

                    amrex::Gpu::Atomic::AddNoRet(&Szz_arr(k_J, 0, 0, comp_zz), fpzz*weight_J*weight_E);

                }

                for (int kE = 0; kE <= depos_order; kE++) {

                    const amrex::Real weight_E = weight_node[ms][kE];

                    const int comp_zy = depos_order-1 - k + kE + SegShift;

                    amrex::Gpu::Atomic::AddNoRet(&Szx_arr(k_J, 0, 0, comp_zy), fpzx*weight_J*weight_E);

                    amrex::Gpu::Atomic::AddNoRet(&Szy_arr(k_J, 0, 0, comp_zy), fpzy*weight_J*weight_E);

                }

            }


        }


    }


    }


#endif

}


template <int depos_order, bool full_mass_matrices>


void doVillasenorSigmaDeposition ([[maybe_unused]] const amrex::ParticleReal* xp_n_data,

                                  [[maybe_unused]] const amrex::ParticleReal* yp_n_data,

                                  [[maybe_unused]] const amrex::ParticleReal* zp_n_data,

                                  const GetParticlePosition<PIdx>& GetPosition,

                                  [[maybe_unused]] const int* nsuborbits,

                                  const amrex::ParticleReal* wp,

                                  const amrex::ParticleReal* uxp_n,

                                  const amrex::ParticleReal* uyp_n,

                                  const amrex::ParticleReal* uzp_n,

                                  const amrex::ParticleReal* uxp_nph,

                                  const amrex::ParticleReal* uyp_nph,

                                  const amrex::ParticleReal* uzp_nph,

                                  const int max_crossings,

                                  amrex::Array4<amrex::Real> const& Sxx_arr,

                                  amrex::Array4<amrex::Real> const& Sxy_arr,

                                  amrex::Array4<amrex::Real> const& Sxz_arr,

                                  amrex::Array4<amrex::Real> const& Syx_arr,

                                  amrex::Array4<amrex::Real> const& Syy_arr,

                                  amrex::Array4<amrex::Real> const& Syz_arr,

                                  amrex::Array4<amrex::Real> const& Szx_arr,

                                  amrex::Array4<amrex::Real> const& Szy_arr,

                                  amrex::Array4<amrex::Real> const& Szz_arr,

                                  GetExternalEBField const & getExternalEB,

                                  const amrex::ParticleReal Bx_ext,

                                  const amrex::ParticleReal By_ext,

                                  const amrex::ParticleReal Bz_ext,

                                  const amrex::Array4<amrex::Real const>& Bx_arr,

                                  const amrex::Array4<amrex::Real const>& By_arr,

                                  const amrex::Array4<amrex::Real const>& Bz_arr,

                                  const amrex::IndexType Bx_type,

                                  const amrex::IndexType By_type,

                                  const amrex::IndexType Bz_type,

                                  const long np_to_deposit,

                                  const amrex::Real dt,

                                  const amrex::XDim3& dinv,

                                  const amrex::XDim3& xyzmin,

                                  const amrex::GpuArray<amrex::GpuArray<double,2>, AMREX_SPACEDIM> & domain_double,

                                  const amrex::GpuArray<amrex::GpuArray<bool,2>, AMREX_SPACEDIM> & do_cropping,

                                  const amrex::Dim3 lo,

                                  const amrex::Real qs,

                                  const amrex::Real ms)

{

    using namespace amrex::literals;


    const amrex::Real invvol = dinv.x*dinv.y*dinv.z;


    enum exteb_flags : int { no_exteb, has_exteb };

    const int exteb_runtime_flag = getExternalEB.isNoOp() ? no_exteb : has_exteb;


    // Loop over particles and deposit mass matrices

    amrex::ParallelFor(

        amrex::TypeList<amrex::CompileTimeOptions<no_exteb, has_exteb>>{},

        {exteb_runtime_flag},

        np_to_deposit,

        [=] AMREX_GPU_DEVICE (long const ip, auto exteb_control) {


            // Skip mass matrix deposition for particles with suborbits.

            if (nsuborbits && nsuborbits[ip] > 1) { return; }


            amrex::ParticleReal xp_nph, yp_nph, zp_nph;

            GetPosition(ip, xp_nph, yp_nph, zp_nph);


            amrex::ParticleReal const xp_n = (xp_n_data ? xp_n_data[ip] : 0._prt);

            amrex::ParticleReal const yp_n = (yp_n_data ? yp_n_data[ip] : 0._prt);

            amrex::ParticleReal const zp_n = (zp_n_data ? zp_n_data[ip] : 0._prt);


            // Compute position at time n + 1

            amrex::ParticleReal const xp_np1 = 2._prt*xp_nph - xp_n;

            amrex::ParticleReal const yp_np1 = 2._prt*yp_nph - yp_n;

            amrex::ParticleReal const zp_np1 = 2._prt*zp_nph - zp_n;


#if defined(WARPX_DIM_RZ) || defined(WARPX_DIM_RCYLINDER)

            amrex::ParticleReal const rp_mid = 0.5_prt*(std::sqrt(xp_np1*xp_np1 + yp_np1*yp_np1)

                                             +          std::sqrt(xp_n*xp_n + yp_n*yp_n));

            amrex::ParticleReal const costh = (rp_mid > 0._prt ? xp_nph/rp_mid : 1._prt);

            amrex::ParticleReal const sinth = (rp_mid > 0._prt ? yp_nph/rp_mid : 0._prt);

#elif defined(WARPX_DIM_RSPHERE)

            amrex::ParticleReal const rp_mid = 0.5_prt*(std::sqrt(xp_np1*xp_np1 + yp_np1*yp_np1 + zp_np1*zp_np1)

                                             +          std::sqrt(xp_n*xp_n + yp_n*yp_n + zp_n*zp_n));

            amrex::ParticleReal const rpxy_mid = std::sqrt(xp_nph*xp_nph + yp_nph*yp_nph);

            amrex::ParticleReal const costh = (rpxy_mid > 0._prt ? xp_nph/rpxy_mid : 1._prt);

            amrex::ParticleReal const sinth = (rpxy_mid > 0._prt ? yp_nph/rpxy_mid : 0._prt);

            amrex::ParticleReal const cosph = (rp_mid > 0._prt ? rpxy_mid/rp_mid : 1._prt);

            amrex::ParticleReal const sinph = (rp_mid > 0._prt ? zp_nph/rp_mid : 0._prt);

#endif


            // Initialize B on particle to uniform external B

            amrex::ParticleReal Bxp = Bx_ext;

            amrex::ParticleReal Byp = By_ext;

            amrex::ParticleReal Bzp = Bz_ext;


            // Increment with externally applied B-field with time and spatial variation

            [[maybe_unused]] const auto& getExternalEB_tmp = getExternalEB;

            if constexpr (exteb_control == has_exteb) {

                amrex::ParticleReal Exp = 0._prt;

                amrex::ParticleReal Eyp = 0._prt;

                amrex::ParticleReal Ezp = 0._prt;

                getExternalEB(ip, Exp, Eyp, Ezp, Bxp, Byp, Bzp);

            }


            // Gather magnetic field from the grid

            const int depos_order_perp = 1;

            const int depos_order_para = 1;

            amrex::ParticleReal B1p = 0._prt;

            amrex::ParticleReal B2p = 0._prt;

            amrex::ParticleReal B3p = 0._prt;

            doDirectGatherVectorField<depos_order_perp,depos_order_para>(

#if defined(WARPX_DIM_RCYLINDER) || defined(WARPX_DIM_RSPHERE)

                                    rp_mid, 0._prt, 0._prt,

#elif defined(WARPX_DIM_RZ)

                                    rp_mid, 0._prt, zp_nph,

#else

                                    xp_nph, yp_nph, zp_nph,

#endif

                                    B1p, B2p, B3p,

                                    Bx_arr, By_arr, Bz_arr,

                                    Bx_type, By_type, Bz_type,

                                    dinv, xyzmin, lo, /*n_rz_azimuthal_modes=*/0 );


            // Because we pass rp_mid and 0. instead of xp_nph and yp_nph above for

            // axisymmetric geometries, the returned fields on the particle are in mapped space.

            // Need to convert them to Cartesian before passing to setMassMatricesKerenels().

#if defined(WARPX_DIM_RCYLINDER) || defined(WARPX_DIM_RZ)

            // Convert B1p = Brp and B2p = Bthp to Bxp and Byp

            Bxp += costh*B1p - sinth*B2p;

            Byp += costh*B2p + sinth*B1p;

            Bzp += B3p;

#elif defined(WARPX_DIM_RSPHERE)

            // Convert B1p = Brp, B2p = Bthp, and B3p = Bphp to Bxp, Byp, and Bzp

            Bxp += costh*cosph*B1p - sinth*B2p - costh*sinph*B3p;

            Byp += sinth*cosph*B1p + costh*B2p - sinth*sinph*B3p;

            Bzp += sinph*B1p + cosph*B3p;

#else

            Bxp += B1p;

            Byp += B2p;

            Bzp += B3p;

#endif


            // Compute inverse Lorentz factor, the average of gamma at time levels n and n+1

            const amrex::ParticleReal gaminv = GetImplicitGammaInverse(uxp_n[ip], uyp_n[ip], uzp_n[ip],

                                                                       uxp_nph[ip], uyp_nph[ip], uzp_nph[ip]);


            // Compute current density kernels to deposit

            const amrex::Real wq_invvol = qs*wp[ip]*invvol;

            const amrex::Real rhop = wq_invvol*gaminv;


            // Set the Mass Matrices kernels

            amrex::ParticleReal fpxx, fpxy, fpxz;

            amrex::ParticleReal fpyx, fpyy, fpyz;

            amrex::ParticleReal fpzx, fpzy, fpzz;

            setMassMatricesKernels(qs, ms, dt, rhop,

#if defined(WARPX_DIM_RZ) || defined(WARPX_DIM_RCYLINDER)

                                   costh, sinth,

#endif

                                   uxp_nph[ip], uyp_nph[ip], uzp_nph[ip],

                                   Bxp, Byp, Bzp,

                                   fpxx, fpxy, fpxz,

                                   fpyx, fpyy, fpyz,

                                   fpzx, fpzy, fpzz);


            // Pass dummy arrays for Jx, Jy, Jz (which will not be used)

            amrex::Array4<amrex::Real> const dummy_Jx{};

            amrex::Array4<amrex::Real> const dummy_Jy{};

            amrex::Array4<amrex::Real> const dummy_Jz{};


            //NOLINTNEXTLINE(readability-suspicious-call-argument)

            doVillasenorJandSigmaDepositionKernel<depos_order,full_mass_matrices,/*deposit_J=*/false>(

                                                                xp_n, yp_n, zp_n,

                                                                xp_np1, yp_np1, zp_np1,

                                                                wq_invvol,

                                                                uxp_nph[ip], uyp_nph[ip], uzp_nph[ip],

                                                                gaminv,

                                                                fpxx, fpxy, fpxz,

                                                                fpyx, fpyy, fpyz,

                                                                fpzx, fpzy, fpzz,

                                                                dummy_Jx, dummy_Jy, dummy_Jz,

                                                                max_crossings,

                                                                Sxx_arr, Sxy_arr, Sxz_arr,

                                                                Syx_arr, Syy_arr, Syz_arr,

                                                                Szx_arr, Szy_arr, Szz_arr,

                                                                dt, dinv, xyzmin, domain_double, do_cropping, lo );


    });

}


#endif // WARPX_MASSMATRICESDEPOSITION_H_

AMReX.H

AMReX_Arena.H

AMReX_Array4.H

AMREX_ALWAYS_ASSERT_WITH_MESSAGE
#define AMREX_ALWAYS_ASSERT_WITH_MESSAGE(EX, MSG)

AMReX_Dim3.H

AMREX_INLINE
#define AMREX_INLINE

AMREX_GPU_DEVICE
#define AMREX_GPU_DEVICE

AMREX_GPU_HOST_DEVICE
#define AMREX_GPU_HOST_DEVICE

offset
Array4< int const > offset

AMReX_REAL.H

FieldGather.H

doDirectGatherVectorField
AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE void doDirectGatherVectorField(const amrex::ParticleReal xp, const amrex::ParticleReal yp, const amrex::ParticleReal zp, amrex::ParticleReal &Fxp, amrex::ParticleReal &Fyp, amrex::ParticleReal &Fzp, amrex::Array4< amrex::Real const > const &Fx_arr, amrex::Array4< amrex::Real const > const &Fy_arr, amrex::Array4< amrex::Real const > const &Fz_arr, const amrex::IndexType Fx_type, const amrex::IndexType Fy_type, const amrex::IndexType Fz_type, const amrex::XDim3 &dinv, const amrex::XDim3 &xyzmin, const amrex::Dim3 &lo, const int n_rz_azimuthal_modes)
Gather vector field F for a single particle.
Definition FieldGather.H:36

GetAndSetPosition.H

GetExternalFields.H

setMassMatricesKernels
AMREX_GPU_HOST_DEVICE AMREX_INLINE void setMassMatricesKernels(const amrex::ParticleReal qs, const amrex::ParticleReal ms, const amrex::ParticleReal dt, const amrex::ParticleReal rhop, const amrex::ParticleReal costh, const amrex::ParticleReal sinth, const amrex::ParticleReal upx, const amrex::ParticleReal upy, const amrex::ParticleReal upz, const amrex::ParticleReal Bpx, const amrex::ParticleReal Bpy, const amrex::ParticleReal Bpz, amrex::ParticleReal &fpxx, amrex::ParticleReal &fpxy, amrex::ParticleReal &fpxz, amrex::ParticleReal &fpyx, amrex::ParticleReal &fpyy, amrex::ParticleReal &fpyz, amrex::ParticleReal &fpzx, amrex::ParticleReal &fpzy, amrex::ParticleReal &fpzz)
Set the mass matrices kernels for thread thread_num.
Definition MassMatricesDeposition.H:51

doVillasenorJandSigmaDepositionKernel
AMREX_GPU_HOST_DEVICE AMREX_INLINE void doVillasenorJandSigmaDepositionKernel(const amrex::ParticleReal xp_old, const amrex::ParticleReal yp_old, const amrex::ParticleReal zp_old, const amrex::ParticleReal xp_new, const amrex::ParticleReal yp_new, const amrex::ParticleReal zp_new, const amrex::ParticleReal wq_invvol, const amrex::ParticleReal uxp_mid, const amrex::ParticleReal uyp_mid, const amrex::ParticleReal uzp_mid, const amrex::ParticleReal gaminv, const amrex::ParticleReal fpxx, const amrex::ParticleReal fpxy, const amrex::ParticleReal fpxz, const amrex::ParticleReal fpyx, const amrex::ParticleReal fpyy, const amrex::ParticleReal fpyz, const amrex::ParticleReal fpzx, const amrex::ParticleReal fpzy, const amrex::ParticleReal fpzz, amrex::Array4< amrex::Real > const &Jx_arr, amrex::Array4< amrex::Real > const &Jy_arr, amrex::Array4< amrex::Real > const &Jz_arr, int max_crossings, amrex::Array4< amrex::Real > const &Sxx_arr, amrex::Array4< amrex::Real > const &Sxy_arr, amrex::Array4< amrex::Real > const &Sxz_arr, amrex::Array4< amrex::Real > const &Syx_arr, amrex::Array4< amrex::Real > const &Syy_arr, amrex::Array4< amrex::Real > const &Syz_arr, amrex::Array4< amrex::Real > const &Szx_arr, amrex::Array4< amrex::Real > const &Szy_arr, amrex::Array4< amrex::Real > const &Szz_arr, const amrex::Real dt, const amrex::XDim3 &dinv, const amrex::XDim3 &xyzmin, const amrex::GpuArray< amrex::GpuArray< double, 2 >, 3 > &domain_double, const amrex::GpuArray< amrex::GpuArray< bool, 2 >, 3 > &do_cropping, const amrex::Dim3 lo)
Kernel for the Villasenor deposition of J and S (mass matrices) for thread thread_num.
Definition MassMatricesDeposition.H:760

doDirectSigmaDeposition
void doDirectSigmaDeposition(const GetParticlePosition< PIdx > &GetPosition, const int *nsuborbits, const amrex::ParticleReal *wp, const amrex::ParticleReal *uxp_n, const amrex::ParticleReal *uyp_n, const amrex::ParticleReal *uzp_n, const amrex::ParticleReal *uxp_nph, const amrex::ParticleReal *uyp_nph, const amrex::ParticleReal *uzp_nph, amrex::Array4< amrex::Real > const &Sxx_arr, amrex::Array4< amrex::Real > const &Sxy_arr, amrex::Array4< amrex::Real > const &Sxz_arr, amrex::Array4< amrex::Real > const &Syx_arr, amrex::Array4< amrex::Real > const &Syy_arr, amrex::Array4< amrex::Real > const &Syz_arr, amrex::Array4< amrex::Real > const &Szx_arr, amrex::Array4< amrex::Real > const &Szy_arr, amrex::Array4< amrex::Real > const &Szz_arr, const amrex::IntVect &jx_type, const amrex::IntVect &jy_type, const amrex::IntVect &jz_type, GetExternalEBField const &getExternalEB, const amrex::ParticleReal Bx_ext, const amrex::ParticleReal By_ext, const amrex::ParticleReal Bz_ext, const amrex::Array4< amrex::Real const > &Bx_arr, const amrex::Array4< amrex::Real const > &By_arr, const amrex::Array4< amrex::Real const > &Bz_arr, const amrex::IndexType Bx_type, const amrex::IndexType By_type, const amrex::IndexType Bz_type, const long np_to_deposit, const amrex::Real dt, const amrex::XDim3 &dinv, const amrex::XDim3 &xyzmin, const amrex::Dim3 lo, const amrex::Real qs, const amrex::Real ms)
direct deposition of mass matrices for thread thread_num
Definition MassMatricesDeposition.H:599

doVillasenorSigmaDeposition
void doVillasenorSigmaDeposition(const amrex::ParticleReal *xp_n_data, const amrex::ParticleReal *yp_n_data, const amrex::ParticleReal *zp_n_data, const GetParticlePosition< PIdx > &GetPosition, const int *nsuborbits, const amrex::ParticleReal *wp, const amrex::ParticleReal *uxp_n, const amrex::ParticleReal *uyp_n, const amrex::ParticleReal *uzp_n, const amrex::ParticleReal *uxp_nph, const amrex::ParticleReal *uyp_nph, const amrex::ParticleReal *uzp_nph, const int max_crossings, amrex::Array4< amrex::Real > const &Sxx_arr, amrex::Array4< amrex::Real > const &Sxy_arr, amrex::Array4< amrex::Real > const &Sxz_arr, amrex::Array4< amrex::Real > const &Syx_arr, amrex::Array4< amrex::Real > const &Syy_arr, amrex::Array4< amrex::Real > const &Syz_arr, amrex::Array4< amrex::Real > const &Szx_arr, amrex::Array4< amrex::Real > const &Szy_arr, amrex::Array4< amrex::Real > const &Szz_arr, GetExternalEBField const &getExternalEB, const amrex::ParticleReal Bx_ext, const amrex::ParticleReal By_ext, const amrex::ParticleReal Bz_ext, const amrex::Array4< amrex::Real const > &Bx_arr, const amrex::Array4< amrex::Real const > &By_arr, const amrex::Array4< amrex::Real const > &Bz_arr, const amrex::IndexType Bx_type, const amrex::IndexType By_type, const amrex::IndexType Bz_type, const long np_to_deposit, const amrex::Real dt, const amrex::XDim3 &dinv, const amrex::XDim3 &xyzmin, const amrex::GpuArray< amrex::GpuArray< double, 2 >, 3 > &domain_double, const amrex::GpuArray< amrex::GpuArray< bool, 2 >, 3 > &do_cropping, const amrex::Dim3 lo, const amrex::Real qs, const amrex::Real ms)
Villasenor and Buneman deposition of mass matrices for thread thread_num.
Definition MassMatricesDeposition.H:1836

doDirectJandSigmaDepositionKernel
AMREX_GPU_HOST_DEVICE AMREX_INLINE void doDirectJandSigmaDepositionKernel(const amrex::ParticleReal xp, const amrex::ParticleReal yp, const amrex::ParticleReal zp, const amrex::Real wq_invvol, const amrex::ParticleReal vx, const amrex::ParticleReal vy, const amrex::ParticleReal vz, const amrex::ParticleReal fpxx, const amrex::ParticleReal fpxy, const amrex::ParticleReal fpxz, const amrex::ParticleReal fpyx, const amrex::ParticleReal fpyy, const amrex::ParticleReal fpyz, const amrex::ParticleReal fpzx, const amrex::ParticleReal fpzy, const amrex::ParticleReal fpzz, amrex::Array4< amrex::Real > const &jx_arr, amrex::Array4< amrex::Real > const &jy_arr, amrex::Array4< amrex::Real > const &jz_arr, amrex::Array4< amrex::Real > const &Sxx_arr, amrex::Array4< amrex::Real > const &Sxy_arr, amrex::Array4< amrex::Real > const &Sxz_arr, amrex::Array4< amrex::Real > const &Syx_arr, amrex::Array4< amrex::Real > const &Syy_arr, amrex::Array4< amrex::Real > const &Syz_arr, amrex::Array4< amrex::Real > const &Szx_arr, amrex::Array4< amrex::Real > const &Szy_arr, amrex::Array4< amrex::Real > const &Szz_arr, const amrex::IntVect &jx_type, const amrex::IntVect &jy_type, const amrex::IntVect &jz_type, const amrex::XDim3 &dinv, const amrex::XDim3 &xyzmin, const amrex::Dim3 lo)
Kernel for the direct deposition of J and S (mass matrices) for thread thread_num.
Definition MassMatricesDeposition.H:154

RigidAdvanceMode::vz
@ vz
Definition RigidInjectedParticleContainer.H:27

ShapeFactors.H

SharedDepositionUtils.H

UpdatePosition.H

GetImplicitGammaInverse
AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE amrex::ParticleReal GetImplicitGammaInverse(const amrex::ParticleReal uxp_n, const amrex::ParticleReal uyp_n, const amrex::ParticleReal uzp_n, const amrex::ParticleReal uxp_nph, const amrex::ParticleReal uyp_nph, const amrex::ParticleReal uzp_nph) noexcept
Compute the inverse Lorentz factor for the position update in the implicit methods,...
Definition UpdatePosition.H:77

TextMsg.H

WarpX_Complex.H

WarpXAlgorithmSelection.H

WarpXConst.H

amrex::IntVectND< 3 >::TheZeroVector
__host__ static __device__ constexpr IntVectND< dim > TheZeroVector() noexcept

amrex::Real
amrex_real Real

amrex::ParticleReal
amrex_particle_real ParticleReal

amrex::Array4
ArrayND< T, 4, true > Array4

ParticleUtils::crop_at_boundary
AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE void crop_at_boundary(double &x1, double xmin, double xmax, amrex::GpuArray< bool, 2 > const &do_cropping)
Definition ParticleUtils.H:256

ablastr::constant::SI::inv_c2_v
constexpr auto inv_c2_v
inverse of the square of the vacuum speed of light [s^2/m^2] (variable template)
Definition constant.H:153

amrex::Gpu::Atomic::AddNoRet
__host__ __device__ AMREX_FORCE_INLINE void AddNoRet(T *sum, T value) noexcept

amrex::literals

amrex::ignore_unused
__host__ __device__ void ignore_unused(const Ts &...)

amrex::ParallelFor
void ParallelFor(TypeList< CTOs... > ctos, std::array< int, sizeof...(CTOs)> const &runtime_options, T N, F &&f)

amrex::shift
__host__ __device__ BoxND< dim > shift(const BoxND< dim > &b, int dir, int nzones) noexcept

amrex::IndexType
IndexTypeND< 3 > IndexType

amrex::IntVect
IntVectND< 3 > IntVect

Compute_shape_factor_pair
Definition ShapeFactors.H:168

Compute_shape_factor
Definition ShapeFactors.H:29

GetExternalEBField
Functor class that assigns external field values (E and B) to particles.
Definition GetExternalFields.H:23

GetExternalEBField::isNoOp
AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE bool isNoOp() const
Definition GetExternalFields.H:60

GetParticlePosition
Functor that can be used to extract the positions of the macroparticles inside a ParallelFor kernel.
Definition GetAndSetPosition.H:75

amrex::CellIndexEnum::CELL
CELL

amrex::CellIndexEnum::NODE
NODE

amrex::CompileTimeOptions

amrex::Dim3

amrex::Dim3::x
int x

amrex::Dim3::z
int z

amrex::Dim3::y
int y

amrex::GpuArray

amrex::TypeList

amrex::XDim3

amrex::XDim3::x
Real x

amrex::XDim3::z
Real z

amrex::XDim3::y
Real y