WarpX
CurrentDeposition.H
Go to the documentation of this file.
1 /* Copyright 2019 Axel Huebl, David Grote, Maxence Thevenet
2  * Remi Lehe, Weiqun Zhang, Michael Rowan
3  *
4  * This file is part of WarpX.
5  *
6  * License: BSD-3-Clause-LBNL
7  */
8 #ifndef WARPX_CURRENTDEPOSITION_H_
9 #define WARPX_CURRENTDEPOSITION_H_
10 
14 #include "Particles/ShapeFactors.H"
15 #include "Utils/TextMsg.H"
17 #include "Utils/WarpXConst.H"
18 #ifdef WARPX_DIM_RZ
19 # include "Utils/WarpX_Complex.H"
20 #endif
21 
22 #include "WarpX.H" // todo: remove include and pass globals as args
23 
24 #include <AMReX.H>
25 #include <AMReX_Arena.H>
26 #include <AMReX_Array4.H>
27 #include <AMReX_Dim3.H>
28 #include <AMReX_REAL.H>
29 
48 template <int depos_order>
50 void doDepositionShapeNKernel([[maybe_unused]] const amrex::ParticleReal xp,
51  [[maybe_unused]] const amrex::ParticleReal yp,
52  [[maybe_unused]] const amrex::ParticleReal zp,
53  const amrex::ParticleReal wq,
54  const amrex::ParticleReal vx,
55  const amrex::ParticleReal vy,
56  const amrex::ParticleReal vz,
57  amrex::Array4<amrex::Real> const& jx_arr,
58  amrex::Array4<amrex::Real> const& jy_arr,
59  amrex::Array4<amrex::Real> const& jz_arr,
60  amrex::IntVect const& jx_type,
61  amrex::IntVect const& jy_type,
62  amrex::IntVect const& jz_type,
63  const amrex::Real relative_time,
64  const amrex::XDim3 & dinv,
65  const amrex::XDim3 & xyzmin,
66  const amrex::Real invvol,
67  const amrex::Dim3 lo,
68  [[maybe_unused]] const int n_rz_azimuthal_modes)
69 {
70  using namespace amrex::literals;
71 
72  constexpr int NODE = amrex::IndexType::NODE;
73  constexpr int CELL = amrex::IndexType::CELL;
74 
75  // wqx, wqy wqz are particle current in each direction
76 #if defined(WARPX_DIM_RZ)
77  // In RZ, wqx is actually wqr, and wqy is wqtheta
78  // Convert to cylindrical at the mid point
79  const amrex::Real xpmid = xp + relative_time*vx;
80  const amrex::Real ypmid = yp + relative_time*vy;
81  const amrex::Real rpmid = std::sqrt(xpmid*xpmid + ypmid*ypmid);
82  const amrex::Real costheta = (rpmid > 0._rt ? xpmid/rpmid : 1._rt);
83  const amrex::Real sintheta = (rpmid > 0._rt ? ypmid/rpmid : 0._rt);
84  const Complex xy0 = Complex{costheta, sintheta};
85  const amrex::Real wqx = wq*invvol*(+vx*costheta + vy*sintheta);
86  const amrex::Real wqy = wq*invvol*(-vx*sintheta + vy*costheta);
87 #else
88  const amrex::Real wqx = wq*invvol*vx;
89  const amrex::Real wqy = wq*invvol*vy;
90 #endif
91  const amrex::Real wqz = wq*invvol*vz;
92 
93  // --- Compute shape factors
94  Compute_shape_factor< depos_order > const compute_shape_factor;
95 #if (AMREX_SPACEDIM >= 2)
96  // x direction
97  // Get particle position after 1/2 push back in position
98  // Keep these double to avoid bug in single precision
99 #if defined(WARPX_DIM_RZ)
100  const double xmid = (rpmid - xyzmin.x)*dinv.x;
101 #else
102  const double xmid = ((xp - xyzmin.x) + relative_time*vx)*dinv.x;
103 #endif
104 
105  // j_j[xyz] leftmost grid point in x that the particle touches for the centering of each current
106  // sx_j[xyz] shape factor along x for the centering of each current
107  // There are only two possible centerings, node or cell centered, so at most only two shape factor
108  // arrays will be needed.
109  // Keep these double to avoid bug in single precision
110  double sx_node[depos_order + 1] = {0.};
111  double sx_cell[depos_order + 1] = {0.};
112  int j_node = 0;
113  int j_cell = 0;
114  if (jx_type[0] == NODE || jy_type[0] == NODE || jz_type[0] == NODE) {
115  j_node = compute_shape_factor(sx_node, xmid);
116  }
117  if (jx_type[0] == CELL || jy_type[0] == CELL || jz_type[0] == CELL) {
118  j_cell = compute_shape_factor(sx_cell, xmid - 0.5);
119  }
120 
121  amrex::Real sx_jx[depos_order + 1] = {0._rt};
122  amrex::Real sx_jy[depos_order + 1] = {0._rt};
123  amrex::Real sx_jz[depos_order + 1] = {0._rt};
124  for (int ix=0; ix<=depos_order; ix++)
125  {
126  sx_jx[ix] = ((jx_type[0] == NODE) ? amrex::Real(sx_node[ix]) : amrex::Real(sx_cell[ix]));
127  sx_jy[ix] = ((jy_type[0] == NODE) ? amrex::Real(sx_node[ix]) : amrex::Real(sx_cell[ix]));
128  sx_jz[ix] = ((jz_type[0] == NODE) ? amrex::Real(sx_node[ix]) : amrex::Real(sx_cell[ix]));
129  }
130 
131  int const j_jx = ((jx_type[0] == NODE) ? j_node : j_cell);
132  int const j_jy = ((jy_type[0] == NODE) ? j_node : j_cell);
133  int const j_jz = ((jz_type[0] == NODE) ? j_node : j_cell);
134 #endif //AMREX_SPACEDIM >= 2
135 
136 #if defined(WARPX_DIM_3D)
137  // y direction
138  // Keep these double to avoid bug in single precision
139  const double ymid = ((yp - xyzmin.y) + relative_time*vy)*dinv.y;
140  double sy_node[depos_order + 1] = {0.};
141  double sy_cell[depos_order + 1] = {0.};
142  int k_node = 0;
143  int k_cell = 0;
144  if (jx_type[1] == NODE || jy_type[1] == NODE || jz_type[1] == NODE) {
145  k_node = compute_shape_factor(sy_node, ymid);
146  }
147  if (jx_type[1] == CELL || jy_type[1] == CELL || jz_type[1] == CELL) {
148  k_cell = compute_shape_factor(sy_cell, ymid - 0.5);
149  }
150  amrex::Real sy_jx[depos_order + 1] = {0._rt};
151  amrex::Real sy_jy[depos_order + 1] = {0._rt};
152  amrex::Real sy_jz[depos_order + 1] = {0._rt};
153  for (int iy=0; iy<=depos_order; iy++)
154  {
155  sy_jx[iy] = ((jx_type[1] == NODE) ? amrex::Real(sy_node[iy]) : amrex::Real(sy_cell[iy]));
156  sy_jy[iy] = ((jy_type[1] == NODE) ? amrex::Real(sy_node[iy]) : amrex::Real(sy_cell[iy]));
157  sy_jz[iy] = ((jz_type[1] == NODE) ? amrex::Real(sy_node[iy]) : amrex::Real(sy_cell[iy]));
158  }
159  int const k_jx = ((jx_type[1] == NODE) ? k_node : k_cell);
160  int const k_jy = ((jy_type[1] == NODE) ? k_node : k_cell);
161  int const k_jz = ((jz_type[1] == NODE) ? k_node : k_cell);
162 #endif
163 
164  // z direction
165  // Keep these double to avoid bug in single precision
166  constexpr int zdir = WARPX_ZINDEX;
167  const double zmid = ((zp - xyzmin.z) + relative_time*vz)*dinv.z;
168  double sz_node[depos_order + 1] = {0.};
169  double sz_cell[depos_order + 1] = {0.};
170  int l_node = 0;
171  int l_cell = 0;
172  if (jx_type[zdir] == NODE || jy_type[zdir] == NODE || jz_type[zdir] == NODE) {
173  l_node = compute_shape_factor(sz_node, zmid);
174  }
175  if (jx_type[zdir] == CELL || jy_type[zdir] == CELL || jz_type[zdir] == CELL) {
176  l_cell = compute_shape_factor(sz_cell, zmid - 0.5);
177  }
178  amrex::Real sz_jx[depos_order + 1] = {0._rt};
179  amrex::Real sz_jy[depos_order + 1] = {0._rt};
180  amrex::Real sz_jz[depos_order + 1] = {0._rt};
181  for (int iz=0; iz<=depos_order; iz++)
182  {
183  sz_jx[iz] = ((jx_type[zdir] == NODE) ? amrex::Real(sz_node[iz]) : amrex::Real(sz_cell[iz]));
184  sz_jy[iz] = ((jy_type[zdir] == NODE) ? amrex::Real(sz_node[iz]) : amrex::Real(sz_cell[iz]));
185  sz_jz[iz] = ((jz_type[zdir] == NODE) ? amrex::Real(sz_node[iz]) : amrex::Real(sz_cell[iz]));
186  }
187  int const l_jx = ((jx_type[zdir] == NODE) ? l_node : l_cell);
188  int const l_jy = ((jy_type[zdir] == NODE) ? l_node : l_cell);
189  int const l_jz = ((jz_type[zdir] == NODE) ? l_node : l_cell);
190 
191  // Deposit current into jx_arr, jy_arr and jz_arr
192 #if defined(WARPX_DIM_1D_Z)
193  for (int iz=0; iz<=depos_order; iz++){
195  &jx_arr(lo.x+l_jx+iz, 0, 0, 0),
196  sz_jx[iz]*wqx);
198  &jy_arr(lo.x+l_jy+iz, 0, 0, 0),
199  sz_jy[iz]*wqy);
201  &jz_arr(lo.x+l_jz+iz, 0, 0, 0),
202  sz_jz[iz]*wqz);
203  }
204 #endif
205 #if defined(WARPX_DIM_XZ) || defined(WARPX_DIM_RZ)
206  for (int iz=0; iz<=depos_order; iz++){
207  for (int ix=0; ix<=depos_order; ix++){
209  &jx_arr(lo.x+j_jx+ix, lo.y+l_jx+iz, 0, 0),
210  sx_jx[ix]*sz_jx[iz]*wqx);
212  &jy_arr(lo.x+j_jy+ix, lo.y+l_jy+iz, 0, 0),
213  sx_jy[ix]*sz_jy[iz]*wqy);
215  &jz_arr(lo.x+j_jz+ix, lo.y+l_jz+iz, 0, 0),
216  sx_jz[ix]*sz_jz[iz]*wqz);
217 #if defined(WARPX_DIM_RZ)
218  Complex xy = xy0; // Note that xy is equal to e^{i m theta}
219  for (int imode=1 ; imode < n_rz_azimuthal_modes ; imode++) {
220  // The factor 2 on the weighting comes from the normalization of the modes
221  amrex::Gpu::Atomic::AddNoRet( &jx_arr(lo.x+j_jx+ix, lo.y+l_jx+iz, 0, 2*imode-1), 2._rt*sx_jx[ix]*sz_jx[iz]*wqx*xy.real());
222  amrex::Gpu::Atomic::AddNoRet( &jx_arr(lo.x+j_jx+ix, lo.y+l_jx+iz, 0, 2*imode ), 2._rt*sx_jx[ix]*sz_jx[iz]*wqx*xy.imag());
223  amrex::Gpu::Atomic::AddNoRet( &jy_arr(lo.x+j_jy+ix, lo.y+l_jy+iz, 0, 2*imode-1), 2._rt*sx_jy[ix]*sz_jy[iz]*wqy*xy.real());
224  amrex::Gpu::Atomic::AddNoRet( &jy_arr(lo.x+j_jy+ix, lo.y+l_jy+iz, 0, 2*imode ), 2._rt*sx_jy[ix]*sz_jy[iz]*wqy*xy.imag());
225  amrex::Gpu::Atomic::AddNoRet( &jz_arr(lo.x+j_jz+ix, lo.y+l_jz+iz, 0, 2*imode-1), 2._rt*sx_jz[ix]*sz_jz[iz]*wqz*xy.real());
226  amrex::Gpu::Atomic::AddNoRet( &jz_arr(lo.x+j_jz+ix, lo.y+l_jz+iz, 0, 2*imode ), 2._rt*sx_jz[ix]*sz_jz[iz]*wqz*xy.imag());
227  xy = xy*xy0;
228  }
229 #endif
230  }
231  }
232 #elif defined(WARPX_DIM_3D)
233  for (int iz=0; iz<=depos_order; iz++){
234  for (int iy=0; iy<=depos_order; iy++){
235  for (int ix=0; ix<=depos_order; ix++){
237  &jx_arr(lo.x+j_jx+ix, lo.y+k_jx+iy, lo.z+l_jx+iz),
238  sx_jx[ix]*sy_jx[iy]*sz_jx[iz]*wqx);
240  &jy_arr(lo.x+j_jy+ix, lo.y+k_jy+iy, lo.z+l_jy+iz),
241  sx_jy[ix]*sy_jy[iy]*sz_jy[iz]*wqy);
243  &jz_arr(lo.x+j_jz+ix, lo.y+k_jz+iy, lo.z+l_jz+iz),
244  sx_jz[ix]*sy_jz[iy]*sz_jz[iz]*wqz);
245  }
246  }
247  }
248 #endif
249 }
250 
273 template <int depos_order>
275  const amrex::ParticleReal * const wp,
276  const amrex::ParticleReal * const uxp,
277  const amrex::ParticleReal * const uyp,
278  const amrex::ParticleReal * const uzp,
279  const int* ion_lev,
280  amrex::FArrayBox& jx_fab,
281  amrex::FArrayBox& jy_fab,
282  amrex::FArrayBox& jz_fab,
283  long np_to_deposit,
284  amrex::Real relative_time,
285  const amrex::XDim3 & dinv,
286  const amrex::XDim3 & xyzmin,
287  amrex::Dim3 lo,
288  amrex::Real q,
289  [[maybe_unused]]int n_rz_azimuthal_modes)
290 {
291  using namespace amrex::literals;
292 
293  // Whether ion_lev is a null pointer (do_ionization=0) or a real pointer
294  // (do_ionization=1)
295  const bool do_ionization = ion_lev;
296 
297  const amrex::Real invvol = dinv.x*dinv.y*dinv.z;
298 
299  const amrex::Real clightsq = 1.0_rt/PhysConst::c/PhysConst::c;
300 
301  amrex::Array4<amrex::Real> const& jx_arr = jx_fab.array();
302  amrex::Array4<amrex::Real> const& jy_arr = jy_fab.array();
303  amrex::Array4<amrex::Real> const& jz_arr = jz_fab.array();
304  amrex::IntVect const jx_type = jx_fab.box().type();
305  amrex::IntVect const jy_type = jy_fab.box().type();
306  amrex::IntVect const jz_type = jz_fab.box().type();
307 
308  // Loop over particles and deposit into jx_fab, jy_fab and jz_fab
310  np_to_deposit,
311  [=] AMREX_GPU_DEVICE (long ip) {
312  amrex::ParticleReal xp, yp, zp;
313  GetPosition(ip, xp, yp, zp);
314 
315  // --- Get particle quantities
316  const amrex::Real gaminv = 1.0_rt/std::sqrt(1.0_rt + uxp[ip]*uxp[ip]*clightsq
317  + uyp[ip]*uyp[ip]*clightsq
318  + uzp[ip]*uzp[ip]*clightsq);
319  const amrex::Real vx = uxp[ip]*gaminv;
320  const amrex::Real vy = uyp[ip]*gaminv;
321  const amrex::Real vz = uzp[ip]*gaminv;
322 
323  amrex::Real wq = q*wp[ip];
324  if (do_ionization){
325  wq *= ion_lev[ip];
326  }
327 
328  doDepositionShapeNKernel<depos_order>(xp, yp, zp, wq, vx, vy, vz, jx_arr, jy_arr, jz_arr,
329  jx_type, jy_type, jz_type,
330  relative_time, dinv, xyzmin,
331  invvol, lo, n_rz_azimuthal_modes);
332 
333  }
334  );
335 }
336 
358 template <int depos_order>
360  const amrex::ParticleReal * const wp,
361  const amrex::ParticleReal * const uxp_n,
362  const amrex::ParticleReal * const uyp_n,
363  const amrex::ParticleReal * const uzp_n,
364  const amrex::ParticleReal * const uxp,
365  const amrex::ParticleReal * const uyp,
366  const amrex::ParticleReal * const uzp,
367  const int * const ion_lev,
368  amrex::FArrayBox& jx_fab,
369  amrex::FArrayBox& jy_fab,
370  amrex::FArrayBox& jz_fab,
371  const long np_to_deposit,
372  const amrex::XDim3 & dinv,
373  const amrex::XDim3 & xyzmin,
374  const amrex::Dim3 lo,
375  const amrex::Real q,
376  [[maybe_unused]]const int n_rz_azimuthal_modes)
377 {
378  using namespace amrex::literals;
379 
380  // Whether ion_lev is a null pointer (do_ionization=0) or a real pointer
381  // (do_ionization=1)
382  const bool do_ionization = ion_lev;
383 
384  const amrex::Real invvol = dinv.x*dinv.y*dinv.z;
385 
386  amrex::Array4<amrex::Real> const& jx_arr = jx_fab.array();
387  amrex::Array4<amrex::Real> const& jy_arr = jy_fab.array();
388  amrex::Array4<amrex::Real> const& jz_arr = jz_fab.array();
389  amrex::IntVect const jx_type = jx_fab.box().type();
390  amrex::IntVect const jy_type = jy_fab.box().type();
391  amrex::IntVect const jz_type = jz_fab.box().type();
392 
393  // Loop over particles and deposit into jx_fab, jy_fab and jz_fab
395  np_to_deposit,
396  [=] AMREX_GPU_DEVICE (long ip) {
397  amrex::ParticleReal xp, yp, zp;
398  GetPosition(ip, xp, yp, zp);
399 
400  constexpr amrex::ParticleReal inv_c2 = 1._prt/(PhysConst::c*PhysConst::c);
401 
402  // Compute inverse Lorentz factor, the average of gamma at time levels n and n+1
403  // The uxp,uyp,uzp are the velocities at time level n+1/2
404  const amrex::ParticleReal uxp_np1 = 2._prt*uxp[ip] - uxp_n[ip];
405  const amrex::ParticleReal uyp_np1 = 2._prt*uyp[ip] - uyp_n[ip];
406  const amrex::ParticleReal uzp_np1 = 2._prt*uzp[ip] - uzp_n[ip];
407  const amrex::ParticleReal gamma_n = std::sqrt(1._prt + (uxp_n[ip]*uxp_n[ip] + uyp_n[ip]*uyp_n[ip] + uzp_n[ip]*uzp_n[ip])*inv_c2);
408  const amrex::ParticleReal gamma_np1 = std::sqrt(1._prt + (uxp_np1*uxp_np1 + uyp_np1*uyp_np1 + uzp_np1*uzp_np1)*inv_c2);
409  const amrex::ParticleReal gaminv = 2.0_prt/(gamma_n + gamma_np1);
410 
411  const amrex::Real vx = uxp[ip]*gaminv;
412  const amrex::Real vy = uyp[ip]*gaminv;
413  const amrex::Real vz = uzp[ip]*gaminv;
414 
415  amrex::Real wq = q*wp[ip];
416  if (do_ionization){
417  wq *= ion_lev[ip];
418  }
419 
420  const amrex::Real relative_time = 0._rt;
421  doDepositionShapeNKernel<depos_order>(xp, yp, zp, wq, vx, vy, vz, jx_arr, jy_arr, jz_arr,
422  jx_type, jy_type, jz_type,
423  relative_time, dinv, xyzmin,
424  invvol, lo, n_rz_azimuthal_modes);
425 
426  }
427  );
428 }
429 
453 template <int depos_order>
455  const amrex::ParticleReal * const wp,
456  const amrex::ParticleReal * const uxp,
457  const amrex::ParticleReal * const uyp,
458  const amrex::ParticleReal * const uzp,
459  const int* ion_lev,
460  amrex::FArrayBox& jx_fab,
461  amrex::FArrayBox& jy_fab,
462  amrex::FArrayBox& jz_fab,
463  long np_to_deposit,
464  const amrex::Real relative_time,
465  const amrex::XDim3 & dinv,
466  const amrex::XDim3 & xyzmin,
467  amrex::Dim3 lo,
468  amrex::Real q,
469  int n_rz_azimuthal_modes,
471  const amrex::Box& box,
472  const amrex::Geometry& geom,
473  const amrex::IntVect& a_tbox_max_size)
474 {
475  using namespace amrex::literals;
476 
477 #if defined(AMREX_USE_HIP) || defined(AMREX_USE_CUDA)
478  using namespace amrex;
479 
480  auto permutation = a_bins.permutationPtr();
481 
482  amrex::Array4<amrex::Real> const& jx_arr = jx_fab.array();
483  amrex::Array4<amrex::Real> const& jy_arr = jy_fab.array();
484  amrex::Array4<amrex::Real> const& jz_arr = jz_fab.array();
485  amrex::IntVect const jx_type = jx_fab.box().type();
486  amrex::IntVect const jy_type = jy_fab.box().type();
487  amrex::IntVect const jz_type = jz_fab.box().type();
488 
489  constexpr int zdir = WARPX_ZINDEX;
490  constexpr int NODE = amrex::IndexType::NODE;
491  constexpr int CELL = amrex::IndexType::CELL;
492 
493  // Loop over particles and deposit into jx_fab, jy_fab and jz_fab
494  const auto dxiarr = geom.InvCellSizeArray();
495  const auto plo = geom.ProbLoArray();
496  const auto domain = geom.Domain();
497 
498  amrex::Box sample_tbox(IntVect(AMREX_D_DECL(0,0,0)), a_tbox_max_size - 1);
499  sample_tbox.grow(depos_order);
500 
501  amrex::Box sample_tbox_x = convert(sample_tbox, jx_type);
502  amrex::Box sample_tbox_y = convert(sample_tbox, jy_type);
503  amrex::Box sample_tbox_z = convert(sample_tbox, jz_type);
504 
505  const auto npts = amrex::max(sample_tbox_x.numPts(), sample_tbox_y.numPts(), sample_tbox_z.numPts());
506 
507  const int nblocks = a_bins.numBins();
508  const int threads_per_block = WarpX::shared_mem_current_tpb;
509  const auto offsets_ptr = a_bins.offsetsPtr();
510 
511  const std::size_t shared_mem_bytes = npts*sizeof(amrex::Real);
512  const amrex::IntVect bin_size = WarpX::shared_tilesize;
513  const std::size_t max_shared_mem_bytes = amrex::Gpu::Device::sharedMemPerBlock();
514  WARPX_ALWAYS_ASSERT_WITH_MESSAGE(shared_mem_bytes <= max_shared_mem_bytes,
515  "Tile size too big for GPU shared memory current deposition");
516 
517  amrex::ignore_unused(np_to_deposit);
518  // Launch one thread-block per bin
520  nblocks, threads_per_block, shared_mem_bytes, amrex::Gpu::gpuStream(),
521  [=] AMREX_GPU_DEVICE () noexcept {
522  const int bin_id = blockIdx.x;
523  const unsigned int bin_start = offsets_ptr[bin_id];
524  const unsigned int bin_stop = offsets_ptr[bin_id+1];
525 
526  if (bin_start == bin_stop) { return; /*this bin has no particles*/ }
527 
528  // These boxes define the index space for the shared memory buffers
529  amrex::Box buffer_box;
530  {
531  ParticleReal xp, yp, zp;
532  GetPosition(permutation[bin_start], xp, yp, zp);
533 #if defined(WARPX_DIM_3D)
534  IntVect iv = IntVect(int( amrex::Math::floor((xp-plo[0]) * dxiarr[0]) ),
535  int( amrex::Math::floor((yp-plo[1]) * dxiarr[1]) ),
536  int( amrex::Math::floor((zp-plo[2]) * dxiarr[2]) ));
537 #elif defined(WARPX_DIM_XZ) || defined(WARPX_DIM_RZ)
538  IntVect iv = IntVect(int( amrex::Math::floor((xp-plo[0]) * dxiarr[0]) ),
539  int( amrex::Math::floor((zp-plo[1]) * dxiarr[1]) ));
540 #elif defined(WARPX_DIM_1D_Z)
541  IntVect iv = IntVect(int( amrex::Math::floor((zp-plo[0]) * dxiarr[0]) ));
542 #endif
543  iv += domain.smallEnd();
544  getTileIndex(iv, box, true, bin_size, buffer_box);
545  }
546 
547  buffer_box.grow(depos_order);
548  Box tbox_x = convert(buffer_box, jx_type);
549  Box tbox_y = convert(buffer_box, jy_type);
550  Box tbox_z = convert(buffer_box, jz_type);
551 
553  amrex::Real* const shared = gsm.dataPtr();
554 
555  amrex::Array4<amrex::Real> const jx_buff(shared,
556  amrex::begin(tbox_x), amrex::end(tbox_x), 1);
557  amrex::Array4<amrex::Real> const jy_buff(shared,
558  amrex::begin(tbox_y), amrex::end(tbox_y), 1);
559  amrex::Array4<amrex::Real> const jz_buff(shared,
560  amrex::begin(tbox_z), amrex::end(tbox_z), 1);
561 
562  // Zero-initialize the temporary array in shared memory
563  volatile amrex::Real* vs = shared;
564  for (int i = threadIdx.x; i < npts; i += blockDim.x){
565  vs[i] = 0.0;
566  }
567  __syncthreads();
568  for (unsigned int ip_orig = bin_start+threadIdx.x; ip_orig<bin_stop; ip_orig += blockDim.x)
569  {
570  const unsigned int ip = permutation[ip_orig];
571  depositComponent<depos_order>(GetPosition, wp, uxp, uyp, uzp, ion_lev, jx_buff, jx_type,
572  relative_time, dinv, xyzmin, lo, q, n_rz_azimuthal_modes,
573  ip, zdir, NODE, CELL, 0);
574  }
575 
576  __syncthreads();
577  addLocalToGlobal(tbox_x, jx_arr, jx_buff);
578  for (int i = threadIdx.x; i < npts; i += blockDim.x){
579  vs[i] = 0.0;
580  }
581 
582  __syncthreads();
583  for (unsigned int ip_orig = bin_start+threadIdx.x; ip_orig<bin_stop; ip_orig += blockDim.x)
584  {
585  const unsigned int ip = permutation[ip_orig];
586  depositComponent<depos_order>(GetPosition, wp, uxp, uyp, uzp, ion_lev, jy_buff, jy_type,
587  relative_time, dinv, xyzmin, lo, q, n_rz_azimuthal_modes,
588  ip, zdir, NODE, CELL, 1);
589  }
590 
591  __syncthreads();
592  addLocalToGlobal(tbox_y, jy_arr, jy_buff);
593  for (int i = threadIdx.x; i < npts; i += blockDim.x){
594  vs[i] = 0.0;
595  }
596 
597  __syncthreads();
598  for (unsigned int ip_orig = bin_start+threadIdx.x; ip_orig<bin_stop; ip_orig += blockDim.x)
599  {
600  const unsigned int ip = permutation[ip_orig];
601  depositComponent<depos_order>(GetPosition, wp, uxp, uyp, uzp, ion_lev, jz_buff, jz_type,
602  relative_time, dinv, xyzmin, lo, q, n_rz_azimuthal_modes,
603  ip, zdir, NODE, CELL, 2);
604  }
605 
606  __syncthreads();
607  addLocalToGlobal(tbox_z, jz_arr, jz_buff);
608  });
609 #else // not using hip/cuda
610  // Note, you should never reach this part of the code. This funcion cannot be called unless
611  // using HIP/CUDA, and those things are checked prior
612  //don't use any args
613  ignore_unused(GetPosition, wp, uxp, uyp, uzp, ion_lev, jx_fab, jy_fab, jz_fab, np_to_deposit, relative_time, dinv, xyzmin, lo, q, n_rz_azimuthal_modes, a_bins, box, geom, a_tbox_max_size);
614  WARPX_ABORT_WITH_MESSAGE("Shared memory only implemented for HIP/CUDA");
615 #endif
616 }
617 
642 template <int depos_order>
644  const amrex::ParticleReal * const wp,
645  const amrex::ParticleReal * const uxp,
646  const amrex::ParticleReal * const uyp,
647  const amrex::ParticleReal * const uzp,
648  const int* ion_lev,
649  const amrex::Array4<amrex::Real>& Jx_arr,
650  const amrex::Array4<amrex::Real>& Jy_arr,
651  const amrex::Array4<amrex::Real>& Jz_arr,
652  long np_to_deposit,
653  amrex::Real dt,
654  amrex::Real relative_time,
655  const amrex::XDim3 & dinv,
656  const amrex::XDim3 & xyzmin,
657  amrex::Dim3 lo,
658  amrex::Real q,
659  [[maybe_unused]]int n_rz_azimuthal_modes)
660 {
661  using namespace amrex;
662  using namespace amrex::literals;
663 
664  // Whether ion_lev is a null pointer (do_ionization=0) or a real pointer
665  // (do_ionization=1)
666  bool const do_ionization = ion_lev;
667 #if !defined(WARPX_DIM_3D)
668  const amrex::Real invvol = dinv.x*dinv.y*dinv.z;
669 #endif
670 
671  amrex::XDim3 const invdtd = amrex::XDim3{(1.0_rt/dt)*dinv.y*dinv.z,
672  (1.0_rt/dt)*dinv.x*dinv.z,
673  (1.0_rt/dt)*dinv.x*dinv.y};
674 
675  Real constexpr clightsq = 1.0_rt / ( PhysConst::c * PhysConst::c );
676 
677 #if !defined(WARPX_DIM_1D_Z)
678  Real constexpr one_third = 1.0_rt / 3.0_rt;
679  Real constexpr one_sixth = 1.0_rt / 6.0_rt;
680 #endif
681 
682  // Loop over particles and deposit into Jx_arr, Jy_arr and Jz_arr
684  np_to_deposit,
685  [=] AMREX_GPU_DEVICE (long const ip) {
686  // --- Get particle quantities
687  Real const gaminv = 1.0_rt/std::sqrt(1.0_rt + uxp[ip]*uxp[ip]*clightsq
688  + uyp[ip]*uyp[ip]*clightsq
689  + uzp[ip]*uzp[ip]*clightsq);
690 
691  Real wq = q*wp[ip];
692  if (do_ionization){
693  wq *= ion_lev[ip];
694  }
695 
696  ParticleReal xp, yp, zp;
697  GetPosition(ip, xp, yp, zp);
698 
699  // computes current and old position in grid units
700 #if defined(WARPX_DIM_RZ)
701  Real const xp_new = xp + (relative_time + 0.5_rt*dt)*uxp[ip]*gaminv;
702  Real const yp_new = yp + (relative_time + 0.5_rt*dt)*uyp[ip]*gaminv;
703  Real const xp_mid = xp_new - 0.5_rt*dt*uxp[ip]*gaminv;
704  Real const yp_mid = yp_new - 0.5_rt*dt*uyp[ip]*gaminv;
705  Real const xp_old = xp_new - dt*uxp[ip]*gaminv;
706  Real const yp_old = yp_new - dt*uyp[ip]*gaminv;
707  Real const rp_new = std::sqrt(xp_new*xp_new + yp_new*yp_new);
708  Real const rp_mid = std::sqrt(xp_mid*xp_mid + yp_mid*yp_mid);
709  Real const rp_old = std::sqrt(xp_old*xp_old + yp_old*yp_old);
710  const amrex::Real costheta_mid = (rp_mid > 0._rt ? xp_mid/rp_mid : 1._rt);
711  const amrex::Real sintheta_mid = (rp_mid > 0._rt ? yp_mid/rp_mid : 0._rt);
712  const amrex::Real costheta_new = (rp_new > 0._rt ? xp_new/rp_new : 1._rt);
713  const amrex::Real sintheta_new = (rp_new > 0._rt ? yp_new/rp_new : 0._rt);
714  const amrex::Real costheta_old = (rp_old > 0._rt ? xp_old/rp_old : 1._rt);
715  const amrex::Real sintheta_old = (rp_old > 0._rt ? yp_old/rp_old : 0._rt);
716  const Complex xy_new0 = Complex{costheta_new, sintheta_new};
717  const Complex xy_mid0 = Complex{costheta_mid, sintheta_mid};
718  const Complex xy_old0 = Complex{costheta_old, sintheta_old};
719  // Keep these double to avoid bug in single precision
720  double const x_new = (rp_new - xyzmin.x)*dinv.x;
721  double const x_old = (rp_old - xyzmin.x)*dinv.x;
722 #else
723 #if !defined(WARPX_DIM_1D_Z)
724  // Keep these double to avoid bug in single precision
725  double const x_new = (xp - xyzmin.x + (relative_time + 0.5_rt*dt)*uxp[ip]*gaminv)*dinv.x;
726  double const x_old = x_new - dt*dinv.x*uxp[ip]*gaminv;
727 #endif
728 #endif
729 #if defined(WARPX_DIM_3D)
730  // Keep these double to avoid bug in single precision
731  double const y_new = (yp - xyzmin.y + (relative_time + 0.5_rt*dt)*uyp[ip]*gaminv)*dinv.y;
732  double const y_old = y_new - dt*dinv.y*uyp[ip]*gaminv;
733 #endif
734  // Keep these double to avoid bug in single precision
735  double const z_new = (zp - xyzmin.z + (relative_time + 0.5_rt*dt)*uzp[ip]*gaminv)*dinv.z;
736  double const z_old = z_new - dt*dinv.z*uzp[ip]*gaminv;
737 
738 #if defined(WARPX_DIM_RZ)
739  Real const vy = (-uxp[ip]*sintheta_mid + uyp[ip]*costheta_mid)*gaminv;
740 #elif defined(WARPX_DIM_XZ)
741  Real const vy = uyp[ip]*gaminv;
742 #elif defined(WARPX_DIM_1D_Z)
743  Real const vx = uxp[ip]*gaminv;
744  Real const vy = uyp[ip]*gaminv;
745 #endif
746 
747  // --- Compute shape factors
748  // Compute shape factors for position as they are now and at old positions
749  // [ijk]_new: leftmost grid point that the particle touches
750  const Compute_shape_factor< depos_order > compute_shape_factor;
751  const Compute_shifted_shape_factor< depos_order > compute_shifted_shape_factor;
752 
753  // Shape factor arrays
754  // Note that there are extra values above and below
755  // to possibly hold the factor for the old particle
756  // which can be at a different grid location.
757  // Keep these double to avoid bug in single precision
758 #if !defined(WARPX_DIM_1D_Z)
759  double sx_new[depos_order + 3] = {0.};
760  double sx_old[depos_order + 3] = {0.};
761  const int i_new = compute_shape_factor(sx_new+1, x_new);
762  const int i_old = compute_shifted_shape_factor(sx_old, x_old, i_new);
763 #endif
764 #if defined(WARPX_DIM_3D)
765  double sy_new[depos_order + 3] = {0.};
766  double sy_old[depos_order + 3] = {0.};
767  const int j_new = compute_shape_factor(sy_new+1, y_new);
768  const int j_old = compute_shifted_shape_factor(sy_old, y_old, j_new);
769 #endif
770  double sz_new[depos_order + 3] = {0.};
771  double sz_old[depos_order + 3] = {0.};
772  const int k_new = compute_shape_factor(sz_new+1, z_new);
773  const int k_old = compute_shifted_shape_factor(sz_old, z_old, k_new);
774 
775  // computes min/max positions of current contributions
776 #if !defined(WARPX_DIM_1D_Z)
777  int dil = 1, diu = 1;
778  if (i_old < i_new) { dil = 0; }
779  if (i_old > i_new) { diu = 0; }
780 #endif
781 #if defined(WARPX_DIM_3D)
782  int djl = 1, dju = 1;
783  if (j_old < j_new) { djl = 0; }
784  if (j_old > j_new) { dju = 0; }
785 #endif
786  int dkl = 1, dku = 1;
787  if (k_old < k_new) { dkl = 0; }
788  if (k_old > k_new) { dku = 0; }
789 
790 #if defined(WARPX_DIM_3D)
791 
792  for (int k=dkl; k<=depos_order+2-dku; k++) {
793  for (int j=djl; j<=depos_order+2-dju; j++) {
794  amrex::Real sdxi = 0._rt;
795  for (int i=dil; i<=depos_order+1-diu; i++) {
796  sdxi += wq*invdtd.x*(sx_old[i] - sx_new[i])*(
797  one_third*(sy_new[j]*sz_new[k] + sy_old[j]*sz_old[k])
798  +one_sixth*(sy_new[j]*sz_old[k] + sy_old[j]*sz_new[k]));
799  amrex::Gpu::Atomic::AddNoRet( &Jx_arr(lo.x+i_new-1+i, lo.y+j_new-1+j, lo.z+k_new-1+k), sdxi);
800  }
801  }
802  }
803  for (int k=dkl; k<=depos_order+2-dku; k++) {
804  for (int i=dil; i<=depos_order+2-diu; i++) {
805  amrex::Real sdyj = 0._rt;
806  for (int j=djl; j<=depos_order+1-dju; j++) {
807  sdyj += wq*invdtd.y*(sy_old[j] - sy_new[j])*(
808  one_third*(sx_new[i]*sz_new[k] + sx_old[i]*sz_old[k])
809  +one_sixth*(sx_new[i]*sz_old[k] + sx_old[i]*sz_new[k]));
810  amrex::Gpu::Atomic::AddNoRet( &Jy_arr(lo.x+i_new-1+i, lo.y+j_new-1+j, lo.z+k_new-1+k), sdyj);
811  }
812  }
813  }
814  for (int j=djl; j<=depos_order+2-dju; j++) {
815  for (int i=dil; i<=depos_order+2-diu; i++) {
816  amrex::Real sdzk = 0._rt;
817  for (int k=dkl; k<=depos_order+1-dku; k++) {
818  sdzk += wq*invdtd.z*(sz_old[k] - sz_new[k])*(
819  one_third*(sx_new[i]*sy_new[j] + sx_old[i]*sy_old[j])
820  +one_sixth*(sx_new[i]*sy_old[j] + sx_old[i]*sy_new[j]));
821  amrex::Gpu::Atomic::AddNoRet( &Jz_arr(lo.x+i_new-1+i, lo.y+j_new-1+j, lo.z+k_new-1+k), sdzk);
822  }
823  }
824  }
825 
826 #elif defined(WARPX_DIM_XZ) || defined(WARPX_DIM_RZ)
827 
828  for (int k=dkl; k<=depos_order+2-dku; k++) {
829  amrex::Real sdxi = 0._rt;
830  for (int i=dil; i<=depos_order+1-diu; i++) {
831  sdxi += wq*invdtd.x*(sx_old[i] - sx_new[i])*0.5_rt*(sz_new[k] + sz_old[k]);
832  amrex::Gpu::Atomic::AddNoRet( &Jx_arr(lo.x+i_new-1+i, lo.y+k_new-1+k, 0, 0), sdxi);
833 #if defined(WARPX_DIM_RZ)
834  Complex xy_mid = xy_mid0; // Throughout the following loop, xy_mid takes the value e^{i m theta}
835  for (int imode=1 ; imode < n_rz_azimuthal_modes ; imode++) {
836  // The factor 2 comes from the normalization of the modes
837  const Complex djr_cmplx = 2._rt *sdxi*xy_mid;
838  amrex::Gpu::Atomic::AddNoRet( &Jx_arr(lo.x+i_new-1+i, lo.y+k_new-1+k, 0, 2*imode-1), djr_cmplx.real());
839  amrex::Gpu::Atomic::AddNoRet( &Jx_arr(lo.x+i_new-1+i, lo.y+k_new-1+k, 0, 2*imode), djr_cmplx.imag());
840  xy_mid = xy_mid*xy_mid0;
841  }
842 #endif
843  }
844  }
845  for (int k=dkl; k<=depos_order+2-dku; k++) {
846  for (int i=dil; i<=depos_order+2-diu; i++) {
847  Real const sdyj = wq*vy*invvol*(
848  one_third*(sx_new[i]*sz_new[k] + sx_old[i]*sz_old[k])
849  +one_sixth*(sx_new[i]*sz_old[k] + sx_old[i]*sz_new[k]));
850  amrex::Gpu::Atomic::AddNoRet( &Jy_arr(lo.x+i_new-1+i, lo.y+k_new-1+k, 0, 0), sdyj);
851 #if defined(WARPX_DIM_RZ)
852  Complex const I = Complex{0._rt, 1._rt};
853  Complex xy_new = xy_new0;
854  Complex xy_mid = xy_mid0;
855  Complex xy_old = xy_old0;
856  // Throughout the following loop, xy_ takes the value e^{i m theta_}
857  for (int imode=1 ; imode < n_rz_azimuthal_modes ; imode++) {
858  // The factor 2 comes from the normalization of the modes
859  // The minus sign comes from the different convention with respect to Davidson et al.
860  const Complex djt_cmplx = -2._rt * I*(i_new-1 + i + xyzmin.x*dinv.x)*wq*invdtd.x/(amrex::Real)imode
861  *(Complex(sx_new[i]*sz_new[k], 0._rt)*(xy_new - xy_mid)
862  + Complex(sx_old[i]*sz_old[k], 0._rt)*(xy_mid - xy_old));
863  amrex::Gpu::Atomic::AddNoRet( &Jy_arr(lo.x+i_new-1+i, lo.y+k_new-1+k, 0, 2*imode-1), djt_cmplx.real());
864  amrex::Gpu::Atomic::AddNoRet( &Jy_arr(lo.x+i_new-1+i, lo.y+k_new-1+k, 0, 2*imode), djt_cmplx.imag());
865  xy_new = xy_new*xy_new0;
866  xy_mid = xy_mid*xy_mid0;
867  xy_old = xy_old*xy_old0;
868  }
869 #endif
870  }
871  }
872  for (int i=dil; i<=depos_order+2-diu; i++) {
873  Real sdzk = 0._rt;
874  for (int k=dkl; k<=depos_order+1-dku; k++) {
875  sdzk += wq*invdtd.z*(sz_old[k] - sz_new[k])*0.5_rt*(sx_new[i] + sx_old[i]);
876  amrex::Gpu::Atomic::AddNoRet( &Jz_arr(lo.x+i_new-1+i, lo.y+k_new-1+k, 0, 0), sdzk);
877 #if defined(WARPX_DIM_RZ)
878  Complex xy_mid = xy_mid0; // Throughout the following loop, xy_mid takes the value e^{i m theta}
879  for (int imode=1 ; imode < n_rz_azimuthal_modes ; imode++) {
880  // The factor 2 comes from the normalization of the modes
881  const Complex djz_cmplx = 2._rt * sdzk * xy_mid;
882  amrex::Gpu::Atomic::AddNoRet( &Jz_arr(lo.x+i_new-1+i, lo.y+k_new-1+k, 0, 2*imode-1), djz_cmplx.real());
883  amrex::Gpu::Atomic::AddNoRet( &Jz_arr(lo.x+i_new-1+i, lo.y+k_new-1+k, 0, 2*imode), djz_cmplx.imag());
884  xy_mid = xy_mid*xy_mid0;
885  }
886 #endif
887  }
888  }
889 #elif defined(WARPX_DIM_1D_Z)
890 
891  for (int k=dkl; k<=depos_order+2-dku; k++) {
892  amrex::Real const sdxi = wq*vx*invvol*0.5_rt*(sz_old[k] + sz_new[k]);
893  amrex::Gpu::Atomic::AddNoRet( &Jx_arr(lo.x+k_new-1+k, 0, 0, 0), sdxi);
894  }
895  for (int k=dkl; k<=depos_order+2-dku; k++) {
896  amrex::Real const sdyj = wq*vy*invvol*0.5_rt*(sz_old[k] + sz_new[k]);
897  amrex::Gpu::Atomic::AddNoRet( &Jy_arr(lo.x+k_new-1+k, 0, 0, 0), sdyj);
898  }
899  amrex::Real sdzk = 0._rt;
900  for (int k=dkl; k<=depos_order+1-dku; k++) {
901  sdzk += wq*invdtd.z*(sz_old[k] - sz_new[k]);
902  amrex::Gpu::Atomic::AddNoRet( &Jz_arr(lo.x+k_new-1+k, 0, 0, 0), sdzk);
903  }
904 #endif
905  }
906  );
907 }
908 
933 template <int depos_order>
934 void doChargeConservingDepositionShapeNImplicit ([[maybe_unused]]const amrex::ParticleReal * const xp_n,
935  [[maybe_unused]]const amrex::ParticleReal * const yp_n,
936  [[maybe_unused]]const amrex::ParticleReal * const zp_n,
937  const GetParticlePosition<PIdx>& GetPosition,
938  const amrex::ParticleReal * const wp,
939  [[maybe_unused]]const amrex::ParticleReal * const uxp_n,
940  [[maybe_unused]]const amrex::ParticleReal * const uyp_n,
941  [[maybe_unused]]const amrex::ParticleReal * const uzp_n,
942  [[maybe_unused]]const amrex::ParticleReal * const uxp_nph,
943  [[maybe_unused]]const amrex::ParticleReal * const uyp_nph,
944  [[maybe_unused]]const amrex::ParticleReal * const uzp_nph,
945  const int * const ion_lev,
946  const amrex::Array4<amrex::Real>& Jx_arr,
947  const amrex::Array4<amrex::Real>& Jy_arr,
948  const amrex::Array4<amrex::Real>& Jz_arr,
949  const long np_to_deposit,
950  const amrex::Real dt,
951  const amrex::XDim3 & dinv,
952  const amrex::XDim3 & xyzmin,
953  const amrex::Dim3 lo,
954  const amrex::Real q,
955  [[maybe_unused]] const int n_rz_azimuthal_modes)
956 {
957  using namespace amrex;
958 
959  // Whether ion_lev is a null pointer (do_ionization=0) or a real pointer
960  // (do_ionization=1)
961  bool const do_ionization = ion_lev;
962 
963 #if !defined(WARPX_DIM_3D)
964  const amrex::Real invvol = dinv.x*dinv.y*dinv.z;
965 #endif
966 
967  amrex::XDim3 const invdtd = amrex::XDim3{(1.0_rt/dt)*dinv.y*dinv.z,
968  (1.0_rt/dt)*dinv.x*dinv.z,
969  (1.0_rt/dt)*dinv.x*dinv.y};
970 
971 #if !defined(WARPX_DIM_1D_Z)
972  Real constexpr one_third = 1.0_rt / 3.0_rt;
973  Real constexpr one_sixth = 1.0_rt / 6.0_rt;
974 #endif
975 
976  // Loop over particles and deposit into Jx_arr, Jy_arr and Jz_arr
978  np_to_deposit,
979  [=] AMREX_GPU_DEVICE (long const ip) {
980 #if !defined(WARPX_DIM_3D)
981  constexpr amrex::ParticleReal inv_c2 = 1._prt/(PhysConst::c*PhysConst::c);
982 
983  // Compute inverse Lorentz factor, the average of gamma at time levels n and n+1
984  // The uxp,uyp,uzp are the velocities at time level n+1/2
985  const amrex::ParticleReal uxp_np1 = 2._prt*uxp_nph[ip] - uxp_n[ip];
986  const amrex::ParticleReal uyp_np1 = 2._prt*uyp_nph[ip] - uyp_n[ip];
987  const amrex::ParticleReal uzp_np1 = 2._prt*uzp_nph[ip] - uzp_n[ip];
988  const amrex::ParticleReal gamma_n = std::sqrt(1._prt + (uxp_n[ip]*uxp_n[ip] + uyp_n[ip]*uyp_n[ip] + uzp_n[ip]*uzp_n[ip])*inv_c2);
989  const amrex::ParticleReal gamma_np1 = std::sqrt(1._prt + (uxp_np1*uxp_np1 + uyp_np1*uyp_np1 + uzp_np1*uzp_np1)*inv_c2);
990  const amrex::ParticleReal gaminv = 2.0_prt/(gamma_n + gamma_np1);
991 #endif
992 
993  Real wq = q*wp[ip];
994  if (do_ionization){
995  wq *= ion_lev[ip];
996  }
997 
998  ParticleReal xp_nph, yp_nph, zp_nph;
999  GetPosition(ip, xp_nph, yp_nph, zp_nph);
1000 
1001 #if !defined(WARPX_DIM_1D_Z)
1002  ParticleReal const xp_np1 = 2._prt*xp_nph - xp_n[ip];
1003 #endif
1004 #if defined(WARPX_DIM_3D) || defined(WARPX_DIM_RZ)
1005  ParticleReal const yp_np1 = 2._prt*yp_nph - yp_n[ip];
1006 #endif
1007  ParticleReal const zp_np1 = 2._prt*zp_nph - zp_n[ip];
1008 
1009  // computes current and old position in grid units
1010 #if defined(WARPX_DIM_RZ)
1011  amrex::Real const xp_new = xp_np1;
1012  amrex::Real const yp_new = yp_np1;
1013  amrex::Real const xp_mid = xp_nph;
1014  amrex::Real const yp_mid = yp_nph;
1015  amrex::Real const xp_old = xp_n[ip];
1016  amrex::Real const yp_old = yp_n[ip];
1017  amrex::Real const rp_new = std::sqrt(xp_new*xp_new + yp_new*yp_new);
1018  amrex::Real const rp_old = std::sqrt(xp_old*xp_old + yp_old*yp_old);
1019  amrex::Real const rp_mid = (rp_new + rp_old)/2._rt;
1020  const amrex::Real costheta_mid = (rp_mid > 0._rt ? xp_mid/rp_mid : 1._rt);
1021  const amrex::Real sintheta_mid = (rp_mid > 0._rt ? yp_mid/rp_mid : 0._rt);
1022  const amrex::Real costheta_new = (rp_new > 0._rt ? xp_new/rp_new : 1._rt);
1023  const amrex::Real sintheta_new = (rp_new > 0._rt ? yp_new/rp_new : 0._rt);
1024  const amrex::Real costheta_old = (rp_old > 0._rt ? xp_old/rp_old : 1._rt);
1025  const amrex::Real sintheta_old = (rp_old > 0._rt ? yp_old/rp_old : 0._rt);
1026  const Complex xy_new0 = Complex{costheta_new, sintheta_new};
1027  const Complex xy_mid0 = Complex{costheta_mid, sintheta_mid};
1028  const Complex xy_old0 = Complex{costheta_old, sintheta_old};
1029  // Keep these double to avoid bug in single precision
1030  double const x_new = (rp_new - xyzmin.x)*dinv.x;
1031  double const x_old = (rp_old - xyzmin.x)*dinv.x;
1032 #else
1033 #if !defined(WARPX_DIM_1D_Z)
1034  // Keep these double to avoid bug in single precision
1035  double const x_new = (xp_np1 - xyzmin.x)*dinv.x;
1036  double const x_old = (xp_n[ip] - xyzmin.x)*dinv.x;
1037 #endif
1038 #endif
1039 #if defined(WARPX_DIM_3D)
1040  // Keep these double to avoid bug in single precision
1041  double const y_new = (yp_np1 - xyzmin.y)*dinv.y;
1042  double const y_old = (yp_n[ip] - xyzmin.y)*dinv.y;
1043 #endif
1044  // Keep these double to avoid bug in single precision
1045  double const z_new = (zp_np1 - xyzmin.z)*dinv.z;
1046  double const z_old = (zp_n[ip] - xyzmin.z)*dinv.z;
1047 
1048 #if defined(WARPX_DIM_RZ)
1049  amrex::Real const vy = (-uxp_nph[ip]*sintheta_mid + uyp_nph[ip]*costheta_mid)*gaminv;
1050 #elif defined(WARPX_DIM_XZ)
1051  amrex::Real const vy = uyp_nph[ip]*gaminv;
1052 #elif defined(WARPX_DIM_1D_Z)
1053  amrex::Real const vx = uxp_nph[ip]*gaminv;
1054  amrex::Real const vy = uyp_nph[ip]*gaminv;
1055 #endif
1056 
1057  // --- Compute shape factors
1058  // Compute shape factors for position as they are now and at old positions
1059  // [ijk]_new: leftmost grid point that the particle touches
1060  const Compute_shape_factor< depos_order > compute_shape_factor;
1061  const Compute_shifted_shape_factor< depos_order > compute_shifted_shape_factor;
1062 
1063  // Shape factor arrays
1064  // Note that there are extra values above and below
1065  // to possibly hold the factor for the old particle
1066  // which can be at a different grid location.
1067  // Keep these double to avoid bug in single precision
1068 #if !defined(WARPX_DIM_1D_Z)
1069  double sx_new[depos_order + 3] = {0.};
1070  double sx_old[depos_order + 3] = {0.};
1071  const int i_new = compute_shape_factor(sx_new+1, x_new);
1072  const int i_old = compute_shifted_shape_factor(sx_old, x_old, i_new);
1073 #endif
1074 #if defined(WARPX_DIM_3D)
1075  double sy_new[depos_order + 3] = {0.};
1076  double sy_old[depos_order + 3] = {0.};
1077  const int j_new = compute_shape_factor(sy_new+1, y_new);
1078  const int j_old = compute_shifted_shape_factor(sy_old, y_old, j_new);
1079 #endif
1080  double sz_new[depos_order + 3] = {0.};
1081  double sz_old[depos_order + 3] = {0.};
1082  const int k_new = compute_shape_factor(sz_new+1, z_new);
1083  const int k_old = compute_shifted_shape_factor(sz_old, z_old, k_new);
1084 
1085  // computes min/max positions of current contributions
1086 #if !defined(WARPX_DIM_1D_Z)
1087  int dil = 1, diu = 1;
1088  if (i_old < i_new) { dil = 0; }
1089  if (i_old > i_new) { diu = 0; }
1090 #endif
1091 #if defined(WARPX_DIM_3D)
1092  int djl = 1, dju = 1;
1093  if (j_old < j_new) { djl = 0; }
1094  if (j_old > j_new) { dju = 0; }
1095 #endif
1096  int dkl = 1, dku = 1;
1097  if (k_old < k_new) { dkl = 0; }
1098  if (k_old > k_new) { dku = 0; }
1099 
1100 #if defined(WARPX_DIM_3D)
1101 
1102  for (int k=dkl; k<=depos_order+2-dku; k++) {
1103  for (int j=djl; j<=depos_order+2-dju; j++) {
1104  amrex::Real sdxi = 0._rt;
1105  for (int i=dil; i<=depos_order+1-diu; i++) {
1106  sdxi += wq*invdtd.x*(sx_old[i] - sx_new[i])*(
1107  one_third*(sy_new[j]*sz_new[k] + sy_old[j]*sz_old[k])
1108  +one_sixth*(sy_new[j]*sz_old[k] + sy_old[j]*sz_new[k]));
1109  amrex::Gpu::Atomic::AddNoRet( &Jx_arr(lo.x+i_new-1+i, lo.y+j_new-1+j, lo.z+k_new-1+k), sdxi);
1110  }
1111  }
1112  }
1113  for (int k=dkl; k<=depos_order+2-dku; k++) {
1114  for (int i=dil; i<=depos_order+2-diu; i++) {
1115  amrex::Real sdyj = 0._rt;
1116  for (int j=djl; j<=depos_order+1-dju; j++) {
1117  sdyj += wq*invdtd.y*(sy_old[j] - sy_new[j])*(
1118  one_third*(sx_new[i]*sz_new[k] + sx_old[i]*sz_old[k])
1119  +one_sixth*(sx_new[i]*sz_old[k] + sx_old[i]*sz_new[k]));
1120  amrex::Gpu::Atomic::AddNoRet( &Jy_arr(lo.x+i_new-1+i, lo.y+j_new-1+j, lo.z+k_new-1+k), sdyj);
1121  }
1122  }
1123  }
1124  for (int j=djl; j<=depos_order+2-dju; j++) {
1125  for (int i=dil; i<=depos_order+2-diu; i++) {
1126  amrex::Real sdzk = 0._rt;
1127  for (int k=dkl; k<=depos_order+1-dku; k++) {
1128  sdzk += wq*invdtd.z*(sz_old[k] - sz_new[k])*(
1129  one_third*(sx_new[i]*sy_new[j] + sx_old[i]*sy_old[j])
1130  +one_sixth*(sx_new[i]*sy_old[j] + sx_old[i]*sy_new[j]));
1131  amrex::Gpu::Atomic::AddNoRet( &Jz_arr(lo.x+i_new-1+i, lo.y+j_new-1+j, lo.z+k_new-1+k), sdzk);
1132  }
1133  }
1134  }
1135 
1136 #elif defined(WARPX_DIM_XZ) || defined(WARPX_DIM_RZ)
1137 
1138  for (int k=dkl; k<=depos_order+2-dku; k++) {
1139  amrex::Real sdxi = 0._rt;
1140  for (int i=dil; i<=depos_order+1-diu; i++) {
1141  sdxi += wq*invdtd.x*(sx_old[i] - sx_new[i])*0.5_rt*(sz_new[k] + sz_old[k]);
1142  amrex::Gpu::Atomic::AddNoRet( &Jx_arr(lo.x+i_new-1+i, lo.y+k_new-1+k, 0, 0), sdxi);
1143 #if defined(WARPX_DIM_RZ)
1144  Complex xy_mid = xy_mid0; // Throughout the following loop, xy_mid takes the value e^{i m theta}
1145  for (int imode=1 ; imode < n_rz_azimuthal_modes ; imode++) {
1146  // The factor 2 comes from the normalization of the modes
1147  const Complex djr_cmplx = 2._rt *sdxi*xy_mid;
1148  amrex::Gpu::Atomic::AddNoRet( &Jx_arr(lo.x+i_new-1+i, lo.y+k_new-1+k, 0, 2*imode-1), djr_cmplx.real());
1149  amrex::Gpu::Atomic::AddNoRet( &Jx_arr(lo.x+i_new-1+i, lo.y+k_new-1+k, 0, 2*imode), djr_cmplx.imag());
1150  xy_mid = xy_mid*xy_mid0;
1151  }
1152 #endif
1153  }
1154  }
1155  for (int k=dkl; k<=depos_order+2-dku; k++) {
1156  for (int i=dil; i<=depos_order+2-diu; i++) {
1157  Real const sdyj = wq*vy*invvol*(
1158  one_third*(sx_new[i]*sz_new[k] + sx_old[i]*sz_old[k])
1159  +one_sixth*(sx_new[i]*sz_old[k] + sx_old[i]*sz_new[k]));
1160  amrex::Gpu::Atomic::AddNoRet( &Jy_arr(lo.x+i_new-1+i, lo.y+k_new-1+k, 0, 0), sdyj);
1161 #if defined(WARPX_DIM_RZ)
1162  Complex const I = Complex{0._rt, 1._rt};
1163  Complex xy_new = xy_new0;
1164  Complex xy_mid = xy_mid0;
1165  Complex xy_old = xy_old0;
1166  // Throughout the following loop, xy_ takes the value e^{i m theta_}
1167  for (int imode=1 ; imode < n_rz_azimuthal_modes ; imode++) {
1168  // The factor 2 comes from the normalization of the modes
1169  // The minus sign comes from the different convention with respect to Davidson et al.
1170  const Complex djt_cmplx = -2._rt * I*(i_new-1 + i + xyzmin.x*dinv.x)*wq*invdtd.x/(amrex::Real)imode
1171  *(Complex(sx_new[i]*sz_new[k], 0._rt)*(xy_new - xy_mid)
1172  + Complex(sx_old[i]*sz_old[k], 0._rt)*(xy_mid - xy_old));
1173  amrex::Gpu::Atomic::AddNoRet( &Jy_arr(lo.x+i_new-1+i, lo.y+k_new-1+k, 0, 2*imode-1), djt_cmplx.real());
1174  amrex::Gpu::Atomic::AddNoRet( &Jy_arr(lo.x+i_new-1+i, lo.y+k_new-1+k, 0, 2*imode), djt_cmplx.imag());
1175  xy_new = xy_new*xy_new0;
1176  xy_mid = xy_mid*xy_mid0;
1177  xy_old = xy_old*xy_old0;
1178  }
1179 #endif
1180  }
1181  }
1182  for (int i=dil; i<=depos_order+2-diu; i++) {
1183  Real sdzk = 0._rt;
1184  for (int k=dkl; k<=depos_order+1-dku; k++) {
1185  sdzk += wq*invdtd.z*(sz_old[k] - sz_new[k])*0.5_rt*(sx_new[i] + sx_old[i]);
1186  amrex::Gpu::Atomic::AddNoRet( &Jz_arr(lo.x+i_new-1+i, lo.y+k_new-1+k, 0, 0), sdzk);
1187 #if defined(WARPX_DIM_RZ)
1188  Complex xy_mid = xy_mid0; // Throughout the following loop, xy_mid takes the value e^{i m theta}
1189  for (int imode=1 ; imode < n_rz_azimuthal_modes ; imode++) {
1190  // The factor 2 comes from the normalization of the modes
1191  const Complex djz_cmplx = 2._rt * sdzk * xy_mid;
1192  amrex::Gpu::Atomic::AddNoRet( &Jz_arr(lo.x+i_new-1+i, lo.y+k_new-1+k, 0, 2*imode-1), djz_cmplx.real());
1193  amrex::Gpu::Atomic::AddNoRet( &Jz_arr(lo.x+i_new-1+i, lo.y+k_new-1+k, 0, 2*imode), djz_cmplx.imag());
1194  xy_mid = xy_mid*xy_mid0;
1195  }
1196 #endif
1197  }
1198  }
1199 #elif defined(WARPX_DIM_1D_Z)
1200 
1201  for (int k=dkl; k<=depos_order+2-dku; k++) {
1202  amrex::Real const sdxi = wq*vx*invvol*0.5_rt*(sz_old[k] + sz_new[k]);
1203  amrex::Gpu::Atomic::AddNoRet( &Jx_arr(lo.x+k_new-1+k, 0, 0, 0), sdxi);
1204  }
1205  for (int k=dkl; k<=depos_order+2-dku; k++) {
1206  amrex::Real const sdyj = wq*vy*invvol*0.5_rt*(sz_old[k] + sz_new[k]);
1207  amrex::Gpu::Atomic::AddNoRet( &Jy_arr(lo.x+k_new-1+k, 0, 0, 0), sdyj);
1208  }
1209  amrex::Real sdzk = 0._rt;
1210  for (int k=dkl; k<=depos_order+1-dku; k++) {
1211  sdzk += wq*invdtd.z*(sz_old[k] - sz_new[k]);
1212  amrex::Gpu::Atomic::AddNoRet( &Jz_arr(lo.x+k_new-1+k, 0, 0, 0), sdzk);
1213  }
1214 #endif
1215  }
1216  );
1217 }
1218 
1245 template <int depos_order>
1246 void doVillasenorDepositionShapeNImplicit ([[maybe_unused]]const amrex::ParticleReal * const xp_n,
1247  [[maybe_unused]]const amrex::ParticleReal * const yp_n,
1248  [[maybe_unused]]const amrex::ParticleReal * const zp_n,
1249  const GetParticlePosition<PIdx>& GetPosition,
1250  const amrex::ParticleReal * const wp,
1251  [[maybe_unused]]const amrex::ParticleReal * const uxp_n,
1252  [[maybe_unused]]const amrex::ParticleReal * const uyp_n,
1253  [[maybe_unused]]const amrex::ParticleReal * const uzp_n,
1254  [[maybe_unused]]const amrex::ParticleReal * const uxp_nph,
1255  [[maybe_unused]]const amrex::ParticleReal * const uyp_nph,
1256  [[maybe_unused]]const amrex::ParticleReal * const uzp_nph,
1257  const int * const ion_lev,
1258  const amrex::Array4<amrex::Real>& Jx_arr,
1259  const amrex::Array4<amrex::Real>& Jy_arr,
1260  const amrex::Array4<amrex::Real>& Jz_arr,
1261  const long np_to_deposit,
1262  const amrex::Real dt,
1263  const amrex::XDim3 & dinv,
1264  const amrex::XDim3 & xyzmin,
1265  const amrex::Dim3 lo,
1266  const amrex::Real q,
1267  [[maybe_unused]] const int n_rz_azimuthal_modes)
1268 {
1269  using namespace amrex;
1270 
1271  // Whether ion_lev is a null pointer (do_ionization=0) or a real pointer
1272  // (do_ionization=1)
1273  bool const do_ionization = ion_lev;
1274 
1275  const amrex::Real invvol = dinv.x*dinv.y*dinv.z;
1276 
1277 #if (AMREX_SPACEDIM > 1)
1278  Real constexpr one_third = 1.0_rt / 3.0_rt;
1279  Real constexpr one_sixth = 1.0_rt / 6.0_rt;
1280 #endif
1281 
1282  // Loop over particles and deposit into Jx_arr, Jy_arr and Jz_arr
1284  np_to_deposit,
1285  [=] AMREX_GPU_DEVICE (long const ip) {
1286 
1287 #if !defined(WARPX_DIM_3D)
1288  constexpr amrex::ParticleReal inv_c2 = 1._prt/(PhysConst::c*PhysConst::c);
1289 
1290  // Compute inverse Lorentz factor, the average of gamma at time levels n and n+1
1291  // The uxp,uyp,uzp are the velocities at time level n+1/2
1292  const amrex::ParticleReal uxp_np1 = 2._prt*uxp_nph[ip] - uxp_n[ip];
1293  const amrex::ParticleReal uyp_np1 = 2._prt*uyp_nph[ip] - uyp_n[ip];
1294  const amrex::ParticleReal uzp_np1 = 2._prt*uzp_nph[ip] - uzp_n[ip];
1295  const amrex::ParticleReal gamma_n = std::sqrt(1._prt + (uxp_n[ip]*uxp_n[ip] + uyp_n[ip]*uyp_n[ip] + uzp_n[ip]*uzp_n[ip])*inv_c2);
1296  const amrex::ParticleReal gamma_np1 = std::sqrt(1._prt + (uxp_np1*uxp_np1 + uyp_np1*uyp_np1 + uzp_np1*uzp_np1)*inv_c2);
1297  const amrex::ParticleReal gaminv = 2.0_prt/(gamma_n + gamma_np1);
1298 #endif
1299 
1300  Real wq = q*wp[ip];
1301  if (do_ionization){
1302  wq *= ion_lev[ip];
1303  }
1304 
1305  ParticleReal xp_nph, yp_nph, zp_nph;
1306  GetPosition(ip, xp_nph, yp_nph, zp_nph);
1307 
1308 #if !defined(WARPX_DIM_1D_Z)
1309  ParticleReal const xp_np1 = 2._prt*xp_nph - xp_n[ip];
1310 #endif
1311 #if defined(WARPX_DIM_3D) || defined(WARPX_DIM_RZ)
1312  ParticleReal const yp_np1 = 2._prt*yp_nph - yp_n[ip];
1313 #endif
1314  ParticleReal const zp_np1 = 2._prt*zp_nph - zp_n[ip];
1315 
1316  // computes current and old position in grid units
1317 #if defined(WARPX_DIM_RZ)
1318  amrex::Real const xp_new = xp_np1;
1319  amrex::Real const yp_new = yp_np1;
1320  amrex::Real const xp_mid = xp_nph;
1321  amrex::Real const yp_mid = yp_nph;
1322  amrex::Real const xp_old = xp_n[ip];
1323  amrex::Real const yp_old = yp_n[ip];
1324  amrex::Real const rp_new = std::sqrt(xp_new*xp_new + yp_new*yp_new);
1325  amrex::Real const rp_old = std::sqrt(xp_old*xp_old + yp_old*yp_old);
1326  amrex::Real const rp_mid = (rp_new + rp_old)/2._rt;
1327  amrex::Real costheta_mid, sintheta_mid;
1328  if (rp_mid > 0._rt) {
1329  costheta_mid = xp_mid/rp_mid;
1330  sintheta_mid = yp_mid/rp_mid;
1331  } else {
1332  costheta_mid = 1._rt;
1333  sintheta_mid = 0._rt;
1334  }
1335  const Complex xy_mid0 = Complex{costheta_mid, sintheta_mid};
1336 
1337  // Keep these double to avoid bug in single precision
1338  double const x_new = (rp_new - xyzmin.x)*dinv.x;
1339  double const x_old = (rp_old - xyzmin.x)*dinv.x;
1340  amrex::Real const vx = (rp_new - rp_old)/dt;
1341  amrex::Real const vy = (-uxp_nph[ip]*sintheta_mid + uyp_nph[ip]*costheta_mid)*gaminv;
1342 #elif defined(WARPX_DIM_XZ)
1343  // Keep these double to avoid bug in single precision
1344  double const x_new = (xp_np1 - xyzmin.x)*dinv.x;
1345  double const x_old = (xp_n[ip] - xyzmin.x)*dinv.x;
1346  amrex::Real const vx = (xp_np1 - xp_n[ip])/dt;
1347  amrex::Real const vy = uyp_nph[ip]*gaminv;
1348 #elif defined(WARPX_DIM_1D_Z)
1349  amrex::Real const vx = uxp_nph[ip]*gaminv;
1350  amrex::Real const vy = uyp_nph[ip]*gaminv;
1351 #elif defined(WARPX_DIM_3D)
1352  // Keep these double to avoid bug in single precision
1353  double const x_new = (xp_np1 - xyzmin.x)*dinv.x;
1354  double const x_old = (xp_n[ip] - xyzmin.x)*dinv.x;
1355  double const y_new = (yp_np1 - xyzmin.y)*dinv.y;
1356  double const y_old = (yp_n[ip] - xyzmin.y)*dinv.y;
1357  amrex::Real const vx = (xp_np1 - xp_n[ip])/dt;
1358  amrex::Real const vy = (yp_np1 - yp_n[ip])/dt;
1359 #endif
1360 
1361  // Keep these double to avoid bug in single precision
1362  double const z_new = (zp_np1 - xyzmin.z)*dinv.z;
1363  double const z_old = (zp_n[ip] - xyzmin.z)*dinv.z;
1364  amrex::Real const vz = (zp_np1 - zp_n[ip])/dt;
1365 
1366  // Define velocity kernals to deposit
1367  amrex::Real const wqx = wq*vx*invvol;
1368  amrex::Real const wqy = wq*vy*invvol;
1369  amrex::Real const wqz = wq*vz*invvol;
1370 
1371  // 1) Determine the number of segments.
1372  // 2) Loop over segments and deposit current.
1373 
1374  // cell crossings are defined at cell edges if depos_order is odd
1375  // cell crossings are defined at cell centers if depos_order is even
1376 
1377  int num_segments = 1;
1378  double shift = 0.0;
1379  if ( (depos_order % 2) == 0 ) { shift = 0.5; }
1380 
1381 #if defined(WARPX_DIM_3D)
1382 
1383  // compute cell crossings in X-direction
1384  const auto i_old = static_cast<int>(x_old-shift);
1385  const auto i_new = static_cast<int>(x_new-shift);
1386  const int cell_crossings_x = std::abs(i_new-i_old);
1387  num_segments += cell_crossings_x;
1388 
1389  // compute cell crossings in Y-direction
1390  const auto j_old = static_cast<int>(y_old-shift);
1391  const auto j_new = static_cast<int>(y_new-shift);
1392  const int cell_crossings_y = std::abs(j_new-j_old);
1393  num_segments += cell_crossings_y;
1394 
1395  // compute cell crossings in Z-direction
1396  const auto k_old = static_cast<int>(z_old-shift);
1397  const auto k_new = static_cast<int>(z_new-shift);
1398  const int cell_crossings_z = std::abs(k_new-k_old);
1399  num_segments += cell_crossings_z;
1400 
1401  // need to assert that the number of cell crossings in each direction
1402  // is within the range permitted by the number of guard cells
1403  // e.g., if (num_segments > 7) ...
1404 
1405  // compute total change in particle position and the initial cell
1406  // locations in each direction used to find the position at cell crossings.
1407  const double dxp = x_new - x_old;
1408  const double dyp = y_new - y_old;
1409  const double dzp = z_new - z_old;
1410  const auto dirX_sign = static_cast<double>(dxp < 0. ? -1. : 1.);
1411  const auto dirY_sign = static_cast<double>(dyp < 0. ? -1. : 1.);
1412  const auto dirZ_sign = static_cast<double>(dzp < 0. ? -1. : 1.);
1413  double Xcell = 0., Ycell = 0., Zcell = 0.;
1414  if (num_segments > 1) {
1415  Xcell = static_cast<double>(i_old) + shift + 0.5*(1.-dirX_sign);
1416  Ycell = static_cast<double>(j_old) + shift + 0.5*(1.-dirY_sign);
1417  Zcell = static_cast<double>(k_old) + shift + 0.5*(1.-dirZ_sign);
1418  }
1419 
1420  // loop over the number of segments and deposit
1421  const Compute_shape_factor< depos_order-1 > compute_shape_factor_cell;
1422  const Compute_shape_factor_pair< depos_order > compute_shape_factors_node;
1423  double dxp_seg, dyp_seg, dzp_seg;
1424  double x0_new, y0_new, z0_new;
1425  double x0_old = x_old;
1426  double y0_old = y_old;
1427  double z0_old = z_old;
1428 
1429  for (int ns=0; ns<num_segments; ns++) {
1430 
1431  if (ns == num_segments-1) { // final segment
1432 
1433  x0_new = x_new;
1434  y0_new = y_new;
1435  z0_new = z_new;
1436  dxp_seg = x0_new - x0_old;
1437  dyp_seg = y0_new - y0_old;
1438  dzp_seg = z0_new - z0_old;
1439 
1440  }
1441  else {
1442 
1443  x0_new = Xcell + dirX_sign;
1444  y0_new = Ycell + dirY_sign;
1445  z0_new = Zcell + dirZ_sign;
1446  dxp_seg = x0_new - x0_old;
1447  dyp_seg = y0_new - y0_old;
1448  dzp_seg = z0_new - z0_old;
1449 
1450  if ( (dyp == 0. || std::abs(dxp_seg) < std::abs(dxp/dyp*dyp_seg))
1451  && (dzp == 0. || std::abs(dxp_seg) < std::abs(dxp/dzp*dzp_seg)) ) {
1452  Xcell = x0_new;
1453  dyp_seg = dyp/dxp*dxp_seg;
1454  dzp_seg = dzp/dxp*dxp_seg;
1455  y0_new = y0_old + dyp_seg;
1456  z0_new = z0_old + dzp_seg;
1457  }
1458  else if (dzp == 0. || std::abs(dyp_seg) < std::abs(dyp/dzp*dzp_seg)) {
1459  Ycell = y0_new;
1460  dxp_seg = dxp/dyp*dyp_seg;
1461  dzp_seg = dzp/dyp*dyp_seg;
1462  x0_new = x0_old + dxp_seg;
1463  z0_new = z0_old + dzp_seg;
1464  }
1465  else {
1466  Zcell = z0_new;
1467  dxp_seg = dxp/dzp*dzp_seg;
1468  dyp_seg = dyp/dzp*dzp_seg;
1469  x0_new = x0_old + dxp_seg;
1470  y0_new = y0_old + dyp_seg;
1471  }
1472 
1473  }
1474 
1475  // compute the segment factors (each equal to dt_seg/dt for nonzero dxp, dyp, or dzp)
1476  const auto seg_factor_x = static_cast<double>(dxp == 0. ? 1. : dxp_seg/dxp);
1477  const auto seg_factor_y = static_cast<double>(dyp == 0. ? 1. : dyp_seg/dyp);
1478  const auto seg_factor_z = static_cast<double>(dzp == 0. ? 1. : dzp_seg/dzp);
1479 
1480  // compute cell-based weights using the average segment position
1481  double sx_cell[depos_order] = {0.};
1482  double sy_cell[depos_order] = {0.};
1483  double sz_cell[depos_order] = {0.};
1484  double const x0_bar = (x0_new + x0_old)/2.0;
1485  double const y0_bar = (y0_new + y0_old)/2.0;
1486  double const z0_bar = (z0_new + z0_old)/2.0;
1487  const int i0_cell = compute_shape_factor_cell( sx_cell, x0_bar-0.5 );
1488  const int j0_cell = compute_shape_factor_cell( sy_cell, y0_bar-0.5 );
1489  const int k0_cell = compute_shape_factor_cell( sz_cell, z0_bar-0.5 );
1490 
1491  if constexpr (depos_order >= 3) { // higher-order correction to the cell-based weights
1492  const Compute_shape_factor_pair<depos_order-1> compute_shape_factors_cell;
1493  double sx_old_cell[depos_order] = {0.};
1494  double sx_new_cell[depos_order] = {0.};
1495  double sy_old_cell[depos_order] = {0.};
1496  double sy_new_cell[depos_order] = {0.};
1497  double sz_old_cell[depos_order] = {0.};
1498  double sz_new_cell[depos_order] = {0.};
1499  const int i0_cell_2 = compute_shape_factors_cell( sx_old_cell, sx_new_cell, x0_old-0.5, x0_new-0.5 );
1500  const int j0_cell_2 = compute_shape_factors_cell( sy_old_cell, sy_new_cell, y0_old-0.5, y0_new-0.5 );
1501  const int k0_cell_2 = compute_shape_factors_cell( sz_old_cell, sz_new_cell, z0_old-0.5, z0_new-0.5 );
1502  ignore_unused(i0_cell_2, j0_cell_2, k0_cell_2);
1503  for (int m=0; m<depos_order; m++) {
1504  sx_cell[m] = (4.0*sx_cell[m] + sx_old_cell[m] + sx_new_cell[m])/6.0;
1505  sy_cell[m] = (4.0*sy_cell[m] + sy_old_cell[m] + sy_new_cell[m])/6.0;
1506  sz_cell[m] = (4.0*sz_cell[m] + sz_old_cell[m] + sz_new_cell[m])/6.0;
1507  }
1508  }
1509 
1510  // compute node-based weights using the old and new segment positions
1511  double sx_old_node[depos_order+1] = {0.};
1512  double sx_new_node[depos_order+1] = {0.};
1513  double sy_old_node[depos_order+1] = {0.};
1514  double sy_new_node[depos_order+1] = {0.};
1515  double sz_old_node[depos_order+1] = {0.};
1516  double sz_new_node[depos_order+1] = {0.};
1517  const int i0_node = compute_shape_factors_node( sx_old_node, sx_new_node, x0_old, x0_new );
1518  const int j0_node = compute_shape_factors_node( sy_old_node, sy_new_node, y0_old, y0_new );
1519  const int k0_node = compute_shape_factors_node( sz_old_node, sz_new_node, z0_old, z0_new );
1520 
1521  // deposit Jx for this segment
1522  amrex::Real this_Jx;
1523  for (int i=0; i<=depos_order-1; i++) {
1524  for (int j=0; j<=depos_order; j++) {
1525  for (int k=0; k<=depos_order; k++) {
1526  this_Jx = wqx*sx_cell[i]*( sy_old_node[j]*sz_old_node[k]*one_third
1527  + sy_old_node[j]*sz_new_node[k]*one_sixth
1528  + sy_new_node[j]*sz_old_node[k]*one_sixth
1529  + sy_new_node[j]*sz_new_node[k]*one_third )*seg_factor_x;
1530  amrex::Gpu::Atomic::AddNoRet( &Jx_arr(lo.x+i0_cell+i, lo.y+j0_node+j, lo.z+k0_node+k), this_Jx);
1531  }
1532  }
1533  }
1534 
1535  // deposit Jy for this segment
1536  amrex::Real this_Jy;
1537  for (int i=0; i<=depos_order; i++) {
1538  for (int j=0; j<=depos_order-1; j++) {
1539  for (int k=0; k<=depos_order; k++) {
1540  this_Jy = wqy*sy_cell[j]*( sx_old_node[i]*sz_old_node[k]*one_third
1541  + sx_old_node[i]*sz_new_node[k]*one_sixth
1542  + sx_new_node[i]*sz_old_node[k]*one_sixth
1543  + sx_new_node[i]*sz_new_node[k]*one_third )*seg_factor_y;
1544  amrex::Gpu::Atomic::AddNoRet( &Jy_arr(lo.x+i0_node+i, lo.y+j0_cell+j, lo.z+k0_node+k), this_Jy);
1545  }
1546  }
1547  }
1548 
1549  // deposit Jz for this segment
1550  amrex::Real this_Jz;
1551  for (int i=0; i<=depos_order; i++) {
1552  for (int j=0; j<=depos_order; j++) {
1553  for (int k=0; k<=depos_order-1; k++) {
1554  this_Jz = wqz*sz_cell[k]*( sx_old_node[i]*sy_old_node[j]*one_third
1555  + sx_old_node[i]*sy_new_node[j]*one_sixth
1556  + sx_new_node[i]*sy_old_node[j]*one_sixth
1557  + sx_new_node[i]*sy_new_node[j]*one_third )*seg_factor_z;
1558  amrex::Gpu::Atomic::AddNoRet( &Jz_arr(lo.x+i0_node+i, lo.y+j0_node+j, lo.z+k0_cell+k), this_Jz);
1559  }
1560  }
1561  }
1562 
1563  // update old segment values
1564  if (ns < num_segments-1) {
1565  x0_old = x0_new;
1566  y0_old = y0_new;
1567  z0_old = z0_new;
1568  }
1569 
1570  } // end loop over segments
1571 
1572 #elif defined(WARPX_DIM_XZ) || defined(WARPX_DIM_RZ)
1573 
1574  // compute cell crossings in X-direction
1575  const auto i_old = static_cast<int>(x_old-shift);
1576  const auto i_new = static_cast<int>(x_new-shift);
1577  const int cell_crossings_x = std::abs(i_new-i_old);
1578  num_segments += cell_crossings_x;
1579 
1580  // compute cell crossings in Z-direction
1581  const auto k_old = static_cast<int>(z_old-shift);
1582  const auto k_new = static_cast<int>(z_new-shift);
1583  const int cell_crossings_z = std::abs(k_new-k_old);
1584  num_segments += cell_crossings_z;
1585 
1586  // need to assert that the number of cell crossings in each direction
1587  // is within the range permitted by the number of guard cells
1588  // e.g., if (num_segments > 5) ...
1589 
1590  // compute total change in particle position and the initial cell
1591  // locations in each direction used to find the position at cell crossings.
1592  const double dxp = x_new - x_old;
1593  const double dzp = z_new - z_old;
1594  const auto dirX_sign = static_cast<double>(dxp < 0. ? -1. : 1.);
1595  const auto dirZ_sign = static_cast<double>(dzp < 0. ? -1. : 1.);
1596  double Xcell = 0., Zcell = 0.;
1597  if (num_segments > 1) {
1598  Xcell = static_cast<double>(i_old) + shift + 0.5*(1.-dirX_sign);
1599  Zcell = static_cast<double>(k_old) + shift + 0.5*(1.-dirZ_sign);
1600  }
1601 
1602  // loop over the number of segments and deposit
1603  const Compute_shape_factor< depos_order-1 > compute_shape_factor_cell;
1604  const Compute_shape_factor_pair< depos_order > compute_shape_factors_node;
1605  double dxp_seg, dzp_seg;
1606  double x0_new, z0_new;
1607  double x0_old = x_old;
1608  double z0_old = z_old;
1609 
1610  for (int ns=0; ns<num_segments; ns++) {
1611 
1612  if (ns == num_segments-1) { // final segment
1613 
1614  x0_new = x_new;
1615  z0_new = z_new;
1616  dxp_seg = x0_new - x0_old;
1617  dzp_seg = z0_new - z0_old;
1618 
1619  }
1620  else {
1621 
1622  x0_new = Xcell + dirX_sign;
1623  z0_new = Zcell + dirZ_sign;
1624  dxp_seg = x0_new - x0_old;
1625  dzp_seg = z0_new - z0_old;
1626 
1627  if (dzp == 0. || std::abs(dxp_seg) < std::abs(dxp/dzp*dzp_seg)) {
1628  Xcell = x0_new;
1629  dzp_seg = dzp/dxp*dxp_seg;
1630  z0_new = z0_old + dzp_seg;
1631  }
1632  else {
1633  Zcell = z0_new;
1634  dxp_seg = dxp/dzp*dzp_seg;
1635  x0_new = x0_old + dxp_seg;
1636  }
1637 
1638  }
1639 
1640  // compute the segment factors (each equal to dt_seg/dt for nonzero dxp, or dzp)
1641  const auto seg_factor_x = static_cast<double>(dxp == 0. ? 1. : dxp_seg/dxp);
1642  const auto seg_factor_z = static_cast<double>(dzp == 0. ? 1. : dzp_seg/dzp);
1643 
1644  // compute cell-based weights using the average segment position
1645  double sx_cell[depos_order] = {0.};
1646  double sz_cell[depos_order] = {0.};
1647  double const x0_bar = (x0_new + x0_old)/2.0;
1648  double const z0_bar = (z0_new + z0_old)/2.0;
1649  const int i0_cell = compute_shape_factor_cell( sx_cell, x0_bar-0.5 );
1650  const int k0_cell = compute_shape_factor_cell( sz_cell, z0_bar-0.5 );
1651 
1652  if constexpr (depos_order >= 3) { // higher-order correction to the cell-based weights
1653  const Compute_shape_factor_pair<depos_order-1> compute_shape_factors_cell;
1654  double sx_old_cell[depos_order] = {0.};
1655  double sx_new_cell[depos_order] = {0.};
1656  double sz_old_cell[depos_order] = {0.};
1657  double sz_new_cell[depos_order] = {0.};
1658  const int i0_cell_2 = compute_shape_factors_cell( sx_old_cell, sx_new_cell, x0_old-0.5, x0_new-0.5 );
1659  const int k0_cell_2 = compute_shape_factors_cell( sz_old_cell, sz_new_cell, z0_old-0.5, z0_new-0.5 );
1660  ignore_unused(i0_cell_2, k0_cell_2);
1661  for (int m=0; m<depos_order; m++) {
1662  sx_cell[m] = (4.0*sx_cell[m] + sx_old_cell[m] + sx_new_cell[m])/6.0;
1663  sz_cell[m] = (4.0*sz_cell[m] + sz_old_cell[m] + sz_new_cell[m])/6.0;
1664  }
1665  }
1666 
1667  // compute node-based weights using the old and new segment positions
1668  double sx_old_node[depos_order+1] = {0.};
1669  double sx_new_node[depos_order+1] = {0.};
1670  double sz_old_node[depos_order+1] = {0.};
1671  double sz_new_node[depos_order+1] = {0.};
1672  const int i0_node = compute_shape_factors_node( sx_old_node, sx_new_node, x0_old, x0_new );
1673  const int k0_node = compute_shape_factors_node( sz_old_node, sz_new_node, z0_old, z0_new );
1674 
1675  // deposit Jx for this segment
1676  amrex::Real this_Jx;
1677  for (int i=0; i<=depos_order-1; i++) {
1678  for (int k=0; k<=depos_order; k++) {
1679  this_Jx = wqx*sx_cell[i]*(sz_old_node[k] + sz_new_node[k])/2.0_rt*seg_factor_x;
1680  amrex::Gpu::Atomic::AddNoRet( &Jx_arr(lo.x+i0_cell+i, lo.y+k0_node+k, 0, 0), this_Jx);
1681 #if defined(WARPX_DIM_RZ)
1682  Complex xy_mid = xy_mid0; // Throughout the following loop, xy_mid takes the value e^{i m theta}
1683  for (int imode=1 ; imode < n_rz_azimuthal_modes ; imode++) {
1684  // The factor 2 comes from the normalization of the modes
1685  const Complex djr_cmplx = 2._rt*this_Jx*xy_mid;
1686  amrex::Gpu::Atomic::AddNoRet( &Jx_arr(lo.x+i0_cell+i, lo.y+k0_node+k, 0, 2*imode-1), djr_cmplx.real());
1687  amrex::Gpu::Atomic::AddNoRet( &Jx_arr(lo.x+i0_cell+i, lo.y+k0_node+k, 0, 2*imode), djr_cmplx.imag());
1688  xy_mid = xy_mid*xy_mid0;
1689  }
1690 #endif
1691  }
1692  }
1693 
1694  // deposit out-of-plane Jy for this segment
1695  const auto seg_factor_y = std::min(seg_factor_x,seg_factor_z);
1696  amrex::Real this_Jy;
1697  for (int i=0; i<=depos_order; i++) {
1698  for (int k=0; k<=depos_order; k++) {
1699  this_Jy = wqy*( sx_old_node[i]*sz_old_node[k]*one_third
1700  + sx_old_node[i]*sz_new_node[k]*one_sixth
1701  + sx_new_node[i]*sz_old_node[k]*one_sixth
1702  + sx_new_node[i]*sz_new_node[k]*one_third )*seg_factor_y;
1703  amrex::Gpu::Atomic::AddNoRet( &Jy_arr(lo.x+i0_node+i, lo.y+k0_node+k, 0, 0), this_Jy);
1704 #if defined(WARPX_DIM_RZ)
1705  Complex xy_mid = xy_mid0;
1706  // Throughout the following loop, xy_ takes the value e^{i m theta_}
1707  for (int imode=1 ; imode < n_rz_azimuthal_modes ; imode++) {
1708  // The factor 2 comes from the normalization of the modes
1709  const Complex djy_cmplx = 2._rt*this_Jy*xy_mid;
1710  amrex::Gpu::Atomic::AddNoRet( &Jy_arr(lo.x+i0_node+i, lo.y+k0_node+k, 0, 2*imode-1), djy_cmplx.real());
1711  amrex::Gpu::Atomic::AddNoRet( &Jy_arr(lo.x+i0_node+i, lo.y+k0_node+k, 0, 2*imode), djy_cmplx.imag());
1712  xy_mid = xy_mid*xy_mid0;
1713  }
1714 #endif
1715  }
1716  }
1717 
1718  // deposit Jz for this segment
1719  amrex::Real this_Jz;
1720  for (int i=0; i<=depos_order; i++) {
1721  for (int k=0; k<=depos_order-1; k++) {
1722  this_Jz = wqz*sz_cell[k]*(sx_old_node[i] + sx_new_node[i])/2.0_rt*seg_factor_z;
1723  amrex::Gpu::Atomic::AddNoRet( &Jz_arr(lo.x+i0_node+i, lo.y+k0_cell+k, 0, 0), this_Jz);
1724 #if defined(WARPX_DIM_RZ)
1725  Complex xy_mid = xy_mid0; // Throughout the following loop, xy_mid takes the value e^{i m theta}
1726  for (int imode=1 ; imode < n_rz_azimuthal_modes ; imode++) {
1727  // The factor 2 comes from the normalization of the modes
1728  const Complex djz_cmplx = 2._rt*this_Jz*xy_mid;
1729  amrex::Gpu::Atomic::AddNoRet( &Jz_arr(lo.x+i0_node+i, lo.y+k0_cell+k, 0, 2*imode-1), djz_cmplx.real());
1730  amrex::Gpu::Atomic::AddNoRet( &Jz_arr(lo.x+i0_node+i, lo.y+k0_cell+k, 0, 2*imode), djz_cmplx.imag());
1731  xy_mid = xy_mid*xy_mid0;
1732  }
1733 #endif
1734  }
1735  }
1736 
1737  // update old segment values
1738  if (ns < num_segments-1) {
1739  x0_old = x0_new;
1740  z0_old = z0_new;
1741  }
1742 
1743  } // end loop over segments
1744 
1745 #elif defined(WARPX_DIM_1D_Z)
1746 
1747  // compute cell crossings in Z-direction
1748  const auto k_old = static_cast<int>(z_old-shift);
1749  const auto k_new = static_cast<int>(z_new-shift);
1750  const int cell_crossings_z = std::abs(k_new-k_old);
1751  num_segments += cell_crossings_z;
1752 
1753  // need to assert that the number of cell crossings in each direction
1754  // is within the range permitted by the number of guard cells
1755  // e.g., if (num_segments > 3) ...
1756 
1757  // compute dzp and the initial cell location used to find the cell crossings.
1758  double const dzp = z_new - z_old;
1759  const auto dirZ_sign = static_cast<double>(dzp < 0. ? -1. : 1.);
1760  double Zcell = static_cast<double>(k_old) + shift + 0.5*(1.-dirZ_sign);
1761 
1762  // loop over the number of segments and deposit
1763  const Compute_shape_factor< depos_order-1 > compute_shape_factor_cell;
1764  const Compute_shape_factor_pair< depos_order > compute_shape_factors_node;
1765  double dzp_seg;
1766  double z0_new;
1767  double z0_old = z_old;
1768 
1769  for (int ns=0; ns<num_segments; ns++) {
1770 
1771  if (ns == num_segments-1) { // final segment
1772  z0_new = z_new;
1773  dzp_seg = z0_new - z0_old;
1774  }
1775  else {
1776  Zcell = Zcell + dirZ_sign;
1777  z0_new = Zcell;
1778  dzp_seg = z0_new - z0_old;
1779  }
1780 
1781  // compute the segment factor (equal to dt_seg/dt for nonzero dzp)
1782  const auto seg_factor = static_cast<double>(dzp == 0. ? 1. : dzp_seg/dzp);
1783 
1784  // compute cell-based weights using the average segment position
1785  double sz_cell[depos_order] = {0.};
1786  double const z0_bar = (z0_new + z0_old)/2.0;
1787  const int k0_cell = compute_shape_factor_cell( sz_cell, z0_bar-0.5 );
1788 
1789  if constexpr (depos_order >= 3) { // higher-order correction to the cell-based weights
1790  const Compute_shape_factor_pair<depos_order-1> compute_shape_factors_cell;
1791  double sz_old_cell[depos_order] = {0.};
1792  double sz_new_cell[depos_order] = {0.};
1793  const int k0_cell_2 = compute_shape_factors_cell( sz_old_cell, sz_new_cell, z0_old-0.5, z0_new-0.5 );
1794  ignore_unused(k0_cell_2);
1795  for (int m=0; m<depos_order; m++) {
1796  sz_cell[m] = (4.0*sz_cell[m] + sz_old_cell[m] + sz_new_cell[m])/6.0;
1797  }
1798  }
1799 
1800  // compute node-based weights using the old and new segment positions
1801  double sz_old_node[depos_order+1] = {0.};
1802  double sz_new_node[depos_order+1] = {0.};
1803  const int k0_node = compute_shape_factors_node( sz_old_node, sz_new_node, z0_old, z0_new );
1804 
1805  // deposit out-of-plane Jx and Jy for this segment
1806  for (int k=0; k<=depos_order; k++) {
1807  const amrex::Real weight = 0.5_rt*(sz_old_node[k] + sz_new_node[k])*seg_factor;
1808  amrex::Gpu::Atomic::AddNoRet( &Jx_arr(lo.x+k0_node+k, 0, 0), wqx*weight);
1809  amrex::Gpu::Atomic::AddNoRet( &Jy_arr(lo.x+k0_node+k, 0, 0), wqy*weight);
1810  }
1811 
1812  // deposit Jz for this segment
1813  for (int k=0; k<=depos_order-1; k++) {
1814  const amrex::Real this_Jz = wqz*sz_cell[k]*seg_factor;
1815  amrex::Gpu::Atomic::AddNoRet( &Jz_arr(lo.x+k0_cell+k, 0, 0), this_Jz);
1816  }
1817 
1818  // update old segment values
1819  if (ns < num_segments-1) {
1820  z0_old = z0_new;
1821  }
1822 
1823  }
1824 
1825 #endif
1826  }
1827  );
1828 }
1829 
1856 template <int depos_order>
1858  const amrex::ParticleReal* const wp,
1859  const amrex::ParticleReal* const uxp,
1860  const amrex::ParticleReal* const uyp,
1861  const amrex::ParticleReal* const uzp,
1862  const int* const ion_lev,
1863  amrex::FArrayBox& Dx_fab,
1864  amrex::FArrayBox& Dy_fab,
1865  amrex::FArrayBox& Dz_fab,
1866  long np_to_deposit,
1867  amrex::Real dt,
1868  amrex::Real relative_time,
1869  const amrex::XDim3 & dinv,
1870  const amrex::XDim3 & xyzmin,
1871  amrex::Dim3 lo,
1872  amrex::Real q,
1873  [[maybe_unused]]int n_rz_azimuthal_modes)
1874 {
1875  using namespace amrex::literals;
1876 
1877 #if defined(WARPX_DIM_RZ)
1878  amrex::ignore_unused(GetPosition,
1879  wp, uxp, uyp, uzp, ion_lev, Dx_fab, Dy_fab, Dz_fab,
1880  np_to_deposit, dt, relative_time, dinv, xyzmin, lo, q);
1881  WARPX_ABORT_WITH_MESSAGE("Vay deposition not implemented in RZ geometry");
1882 #endif
1883 
1884 #if defined(WARPX_DIM_1D_Z)
1885  amrex::ignore_unused(GetPosition,
1886  wp, uxp, uyp, uzp, ion_lev, Dx_fab, Dy_fab, Dz_fab,
1887  np_to_deposit, dt, relative_time, dinv, xyzmin, lo, q);
1888  WARPX_ABORT_WITH_MESSAGE("Vay deposition not implemented in 1D geometry");
1889 #endif
1890 
1891 #if !(defined WARPX_DIM_RZ || defined WARPX_DIM_1D_Z)
1892 
1893  // If ion_lev is a null pointer, then do_ionization=0, else do_ionization=1
1894  const bool do_ionization = ion_lev;
1895 
1896  // Inverse of time step
1897  const amrex::Real invdt = 1._rt / dt;
1898 
1899  const amrex::Real invvol = dinv.x*dinv.y*dinv.z;
1900 
1901  // Allocate temporary arrays
1902 #if defined(WARPX_DIM_3D)
1903  AMREX_ALWAYS_ASSERT(Dx_fab.box() == Dy_fab.box() && Dx_fab.box() == Dz_fab.box());
1904  amrex::FArrayBox temp_fab{Dx_fab.box(), 4};
1905 #elif defined(WARPX_DIM_XZ)
1906  AMREX_ALWAYS_ASSERT(Dx_fab.box() == Dz_fab.box());
1907  amrex::FArrayBox temp_fab{Dx_fab.box(), 2};
1908 #endif
1909  temp_fab.setVal<amrex::RunOn::Device>(0._rt);
1910  amrex::Array4<amrex::Real> const& temp_arr = temp_fab.array();
1911 
1912  // Inverse of light speed squared
1913  const amrex::Real invcsq = 1._rt / (PhysConst::c * PhysConst::c);
1914 
1915  // Arrays where D will be stored
1916  amrex::Array4<amrex::Real> const& Dx_arr = Dx_fab.array();
1917  amrex::Array4<amrex::Real> const& Dy_arr = Dy_fab.array();
1918  amrex::Array4<amrex::Real> const& Dz_arr = Dz_fab.array();
1919 
1920  // Loop over particles and deposit (Dx,Dy,Dz) into Dx_fab, Dy_fab and Dz_fab
1921  amrex::ParallelFor(np_to_deposit, [=] AMREX_GPU_DEVICE (long ip)
1922  {
1923  // Inverse of Lorentz factor gamma
1924  const amrex::Real invgam = 1._rt / std::sqrt(1._rt + uxp[ip] * uxp[ip] * invcsq
1925  + uyp[ip] * uyp[ip] * invcsq
1926  + uzp[ip] * uzp[ip] * invcsq);
1927  // Product of particle charges and weights
1928  amrex::Real wq = q * wp[ip];
1929  if (do_ionization) { wq *= ion_lev[ip]; }
1930 
1931  // Current particle positions (in physical units)
1932  amrex::ParticleReal xp, yp, zp;
1933  GetPosition(ip, xp, yp, zp);
1934 
1935  // Particle velocities
1936  const amrex::Real vx = uxp[ip] * invgam;
1937  const amrex::Real vy = uyp[ip] * invgam;
1938  const amrex::Real vz = uzp[ip] * invgam;
1939 
1940  // Modify the particle position to match the time of the deposition
1941  xp += relative_time * vx;
1942  yp += relative_time * vy;
1943  zp += relative_time * vz;
1944 
1945  // Current and old particle positions in grid units
1946  // Keep these double to avoid bug in single precision.
1947  double const x_new = (xp - xyzmin.x + 0.5_rt*dt*vx) * dinv.x;
1948  double const x_old = (xp - xyzmin.x - 0.5_rt*dt*vx) * dinv.x;
1949 #if defined(WARPX_DIM_3D)
1950  // Keep these double to avoid bug in single precision.
1951  double const y_new = (yp - xyzmin.y + 0.5_rt*dt*vy) * dinv.y;
1952  double const y_old = (yp - xyzmin.y - 0.5_rt*dt*vy) * dinv.y;
1953 #endif
1954  // Keep these double to avoid bug in single precision.
1955  double const z_new = (zp - xyzmin.z + 0.5_rt*dt*vz) * dinv.z;
1956  double const z_old = (zp - xyzmin.z - 0.5_rt*dt*vz) * dinv.z;
1957 
1958  // Shape factor arrays for current and old positions (nodal)
1959  // Keep these double to avoid bug in single precision.
1960  double sx_new[depos_order+1] = {0.};
1961  double sx_old[depos_order+1] = {0.};
1962 #if defined(WARPX_DIM_3D)
1963  // Keep these double to avoid bug in single precision.
1964  double sy_new[depos_order+1] = {0.};
1965  double sy_old[depos_order+1] = {0.};
1966 #endif
1967  // Keep these double to avoid bug in single precision.
1968  double sz_new[depos_order+1] = {0.};
1969  double sz_old[depos_order+1] = {0.};
1970 
1971  // Compute shape factors for current positions
1972 
1973  // i_new leftmost grid point in x that the particle touches
1974  // sx_new shape factor along x for the centering of each current
1975  Compute_shape_factor< depos_order > const compute_shape_factor;
1976  const int i_new = compute_shape_factor(sx_new, x_new);
1977 #if defined(WARPX_DIM_3D)
1978  // j_new leftmost grid point in y that the particle touches
1979  // sy_new shape factor along y for the centering of each current
1980  const int j_new = compute_shape_factor(sy_new, y_new);
1981 #endif
1982  // k_new leftmost grid point in z that the particle touches
1983  // sz_new shape factor along z for the centering of each current
1984  const int k_new = compute_shape_factor(sz_new, z_new);
1985 
1986  // Compute shape factors for old positions
1987 
1988  // i_old leftmost grid point in x that the particle touches
1989  // sx_old shape factor along x for the centering of each current
1990  const int i_old = compute_shape_factor(sx_old, x_old);
1991 #if defined(WARPX_DIM_3D)
1992  // j_old leftmost grid point in y that the particle touches
1993  // sy_old shape factor along y for the centering of each current
1994  const int j_old = compute_shape_factor(sy_old, y_old);
1995 #endif
1996  // k_old leftmost grid point in z that the particle touches
1997  // sz_old shape factor along z for the centering of each current
1998  const int k_old = compute_shape_factor(sz_old, z_old);
1999 
2000  // Deposit current into Dx_arr, Dy_arr and Dz_arr
2001 #if defined(WARPX_DIM_XZ)
2002 
2003  const amrex::Real wqy = wq * vy * invvol;
2004  for (int k=0; k<=depos_order; k++) {
2005  for (int i=0; i<=depos_order; i++) {
2006 
2007  // Re-casting sx_new and sz_new from double to amrex::Real so that
2008  // Atomic::Add has consistent types in its argument
2009  auto const sxn_szn = static_cast<amrex::Real>(sx_new[i] * sz_new[k]);
2010  auto const sxo_szn = static_cast<amrex::Real>(sx_old[i] * sz_new[k]);
2011  auto const sxn_szo = static_cast<amrex::Real>(sx_new[i] * sz_old[k]);
2012  auto const sxo_szo = static_cast<amrex::Real>(sx_old[i] * sz_old[k]);
2013 
2014  if (i_new == i_old && k_new == k_old) {
2015  // temp arrays for Dx and Dz
2016  amrex::Gpu::Atomic::AddNoRet(&temp_arr(lo.x + i_new + i, lo.y + k_new + k, 0, 0),
2017  wq * invvol * invdt * (sxn_szn - sxo_szo));
2018 
2019  amrex::Gpu::Atomic::AddNoRet(&temp_arr(lo.x + i_new + i, lo.y + k_new + k, 0, 1),
2020  wq * invvol * invdt * (sxn_szo - sxo_szn));
2021 
2022  // Dy
2023  amrex::Gpu::Atomic::AddNoRet(&Dy_arr(lo.x + i_new + i, lo.y + k_new + k, 0, 0),
2024  wqy * 0.25_rt * (sxn_szn + sxn_szo + sxo_szn + sxo_szo));
2025  } else {
2026  // temp arrays for Dx and Dz
2027  amrex::Gpu::Atomic::AddNoRet(&temp_arr(lo.x + i_new + i, lo.y + k_new + k, 0, 0),
2028  wq * invvol * invdt * sxn_szn);
2029 
2030  amrex::Gpu::Atomic::AddNoRet(&temp_arr(lo.x + i_old + i, lo.y + k_old + k, 0, 0),
2031  - wq * invvol * invdt * sxo_szo);
2032 
2033  amrex::Gpu::Atomic::AddNoRet(&temp_arr(lo.x + i_new + i, lo.y + k_old + k, 0, 1),
2034  wq * invvol * invdt * sxn_szo);
2035 
2036  amrex::Gpu::Atomic::AddNoRet(&temp_arr(lo.x + i_old + i, lo.y + k_new + k, 0, 1),
2037  - wq * invvol * invdt * sxo_szn);
2038 
2039  // Dy
2040  amrex::Gpu::Atomic::AddNoRet(&Dy_arr(lo.x + i_new + i, lo.y + k_new + k, 0, 0),
2041  wqy * 0.25_rt * sxn_szn);
2042 
2043  amrex::Gpu::Atomic::AddNoRet(&Dy_arr(lo.x + i_new + i, lo.y + k_old + k, 0, 0),
2044  wqy * 0.25_rt * sxn_szo);
2045 
2046  amrex::Gpu::Atomic::AddNoRet(&Dy_arr(lo.x + i_old + i, lo.y + k_new + k, 0, 0),
2047  wqy * 0.25_rt * sxo_szn);
2048 
2049  amrex::Gpu::Atomic::AddNoRet(&Dy_arr(lo.x + i_old + i, lo.y + k_old + k, 0, 0),
2050  wqy * 0.25_rt * sxo_szo);
2051  }
2052 
2053  }
2054  }
2055 
2056 #elif defined(WARPX_DIM_3D)
2057 
2058  for (int k=0; k<=depos_order; k++) {
2059  for (int j=0; j<=depos_order; j++) {
2060 
2061  auto const syn_szn = static_cast<amrex::Real>(sy_new[j] * sz_new[k]);
2062  auto const syo_szn = static_cast<amrex::Real>(sy_old[j] * sz_new[k]);
2063  auto const syn_szo = static_cast<amrex::Real>(sy_new[j] * sz_old[k]);
2064  auto const syo_szo = static_cast<amrex::Real>(sy_old[j] * sz_old[k]);
2065 
2066  for (int i=0; i<=depos_order; i++) {
2067 
2068  auto const sxn_syn_szn = static_cast<amrex::Real>(sx_new[i]) * syn_szn;
2069  auto const sxo_syn_szn = static_cast<amrex::Real>(sx_old[i]) * syn_szn;
2070  auto const sxn_syo_szn = static_cast<amrex::Real>(sx_new[i]) * syo_szn;
2071  auto const sxo_syo_szn = static_cast<amrex::Real>(sx_old[i]) * syo_szn;
2072  auto const sxn_syn_szo = static_cast<amrex::Real>(sx_new[i]) * syn_szo;
2073  auto const sxo_syn_szo = static_cast<amrex::Real>(sx_old[i]) * syn_szo;
2074  auto const sxn_syo_szo = static_cast<amrex::Real>(sx_new[i]) * syo_szo;
2075  auto const sxo_syo_szo = static_cast<amrex::Real>(sx_old[i]) * syo_szo;
2076 
2077  if (i_new == i_old && j_new == j_old && k_new == k_old) {
2078  // temp arrays for Dx, Dy and Dz
2079  amrex::Gpu::Atomic::AddNoRet(&temp_arr(lo.x + i_new + i, lo.y + j_new + j, lo.z + k_new + k, 0),
2080  wq * invvol * invdt * (sxn_syn_szn - sxo_syo_szo));
2081 
2082  amrex::Gpu::Atomic::AddNoRet(&temp_arr(lo.x + i_new + i, lo.y + j_new + j, lo.z + k_new + k, 1),
2083  wq * invvol * invdt * (sxn_syn_szo - sxo_syo_szn));
2084 
2085  amrex::Gpu::Atomic::AddNoRet(&temp_arr(lo.x + i_new + i, lo.y + j_new + j, lo.z + k_new + k, 2),
2086  wq * invvol * invdt * (sxn_syo_szn - sxo_syn_szo));
2087 
2088  amrex::Gpu::Atomic::AddNoRet(&temp_arr(lo.x + i_new + i, lo.y + j_new + j, lo.z + k_new + k, 3),
2089  wq * invvol * invdt * (sxo_syn_szn - sxn_syo_szo));
2090  } else {
2091  // temp arrays for Dx, Dy and Dz
2092  amrex::Gpu::Atomic::AddNoRet(&temp_arr(lo.x + i_new + i, lo.y + j_new + j, lo.z + k_new + k, 0),
2093  wq * invvol * invdt * sxn_syn_szn);
2094 
2095  amrex::Gpu::Atomic::AddNoRet(&temp_arr(lo.x + i_old + i, lo.y + j_old + j, lo.z + k_old + k, 0),
2096  - wq * invvol * invdt * sxo_syo_szo);
2097 
2098  amrex::Gpu::Atomic::AddNoRet(&temp_arr(lo.x + i_new + i, lo.y + j_new + j, lo.z + k_old + k, 1),
2099  wq * invvol * invdt * sxn_syn_szo);
2100 
2101  amrex::Gpu::Atomic::AddNoRet(&temp_arr(lo.x + i_old + i, lo.y + j_old + j, lo.z + k_new + k, 1),
2102  - wq * invvol * invdt * sxo_syo_szn);
2103 
2104  amrex::Gpu::Atomic::AddNoRet(&temp_arr(lo.x + i_new + i, lo.y + j_old + j, lo.z + k_new + k, 2),
2105  wq * invvol * invdt * sxn_syo_szn);
2106 
2107  amrex::Gpu::Atomic::AddNoRet(&temp_arr(lo.x + i_old + i, lo.y + j_new + j, lo.z + k_old + k, 2),
2108  - wq * invvol * invdt * sxo_syn_szo);
2109 
2110  amrex::Gpu::Atomic::AddNoRet(&temp_arr(lo.x + i_old + i, lo.y + j_new + j, lo.z + k_new + k, 3),
2111  wq * invvol * invdt * sxo_syn_szn);
2112 
2113  amrex::Gpu::Atomic::AddNoRet(&temp_arr(lo.x + i_new + i, lo.y + j_old + j, lo.z + k_old + k, 3),
2114  - wq * invvol * invdt * sxn_syo_szo);
2115  }
2116  }
2117  }
2118  }
2119 #endif
2120  } );
2121 
2122 #if defined(WARPX_DIM_3D)
2123  amrex::ParallelFor(Dx_fab.box(), [=] AMREX_GPU_DEVICE (int i, int j, int k) noexcept
2124  {
2125  const amrex::Real t_a = temp_arr(i,j,k,0);
2126  const amrex::Real t_b = temp_arr(i,j,k,1);
2127  const amrex::Real t_c = temp_arr(i,j,k,2);
2128  const amrex::Real t_d = temp_arr(i,j,k,3);
2129  Dx_arr(i,j,k) += (1._rt/6._rt)*(2_rt*t_a + t_b + t_c - 2._rt*t_d);
2130  Dy_arr(i,j,k) += (1._rt/6._rt)*(2_rt*t_a + t_b - 2._rt*t_c + t_d);
2131  Dz_arr(i,j,k) += (1._rt/6._rt)*(2_rt*t_a - 2._rt*t_b + t_c + t_d);
2132  });
2133 #elif defined(WARPX_DIM_XZ)
2134  amrex::ParallelFor(Dx_fab.box(), [=] AMREX_GPU_DEVICE (int i, int j, int) noexcept
2135  {
2136  const amrex::Real t_a = temp_arr(i,j,0,0);
2137  const amrex::Real t_b = temp_arr(i,j,0,1);
2138  Dx_arr(i,j,0) += (0.5_rt)*(t_a + t_b);
2139  Dz_arr(i,j,0) += (0.5_rt)*(t_a - t_b);
2140  });
2141 #endif
2142  // Synchronize so that temp_fab can be safely deallocated in its destructor
2144 
2145 #endif // #if !(defined WARPX_DIM_RZ || defined WARPX_DIM_1D_Z)
2146 }
2147 #endif // WARPX_CURRENTDEPOSITION_H_
#define AMREX_ALWAYS_ASSERT(EX)
#define AMREX_INLINE
#define AMREX_GPU_DEVICE
#define AMREX_GPU_HOST_DEVICE
#define AMREX_D_DECL(a, b, c)
void doVayDepositionShapeN(const GetParticlePosition< PIdx > &GetPosition, const amrex::ParticleReal *const wp, const amrex::ParticleReal *const uxp, const amrex::ParticleReal *const uyp, const amrex::ParticleReal *const uzp, const int *const ion_lev, amrex::FArrayBox &Dx_fab, amrex::FArrayBox &Dy_fab, amrex::FArrayBox &Dz_fab, long np_to_deposit, amrex::Real dt, amrex::Real relative_time, const amrex::XDim3 &dinv, const amrex::XDim3 &xyzmin, amrex::Dim3 lo, amrex::Real q, [[maybe_unused]]int n_rz_azimuthal_modes)
Vay current deposition (Vay et al, 2013) for thread thread_num: deposit D in real space and store the...
Definition: CurrentDeposition.H:1857
void doDepositionShapeNImplicit(const GetParticlePosition< PIdx > &GetPosition, const amrex::ParticleReal *const wp, const amrex::ParticleReal *const uxp_n, const amrex::ParticleReal *const uyp_n, const amrex::ParticleReal *const uzp_n, const amrex::ParticleReal *const uxp, const amrex::ParticleReal *const uyp, const amrex::ParticleReal *const uzp, const int *const ion_lev, amrex::FArrayBox &jx_fab, amrex::FArrayBox &jy_fab, amrex::FArrayBox &jz_fab, const long np_to_deposit, const amrex::XDim3 &dinv, const amrex::XDim3 &xyzmin, const amrex::Dim3 lo, const amrex::Real q, [[maybe_unused]]const int n_rz_azimuthal_modes)
Direct current deposition for thread thread_num for the implicit scheme The only difference from doDe...
Definition: CurrentDeposition.H:359
AMREX_GPU_HOST_DEVICE AMREX_INLINE void doDepositionShapeNKernel([[maybe_unused]] const amrex::ParticleReal xp, [[maybe_unused]] const amrex::ParticleReal yp, [[maybe_unused]] const amrex::ParticleReal zp, const amrex::ParticleReal wq, const amrex::ParticleReal vx, const amrex::ParticleReal vy, const amrex::ParticleReal vz, amrex::Array4< amrex::Real > const &jx_arr, amrex::Array4< amrex::Real > const &jy_arr, amrex::Array4< amrex::Real > const &jz_arr, amrex::IntVect const &jx_type, amrex::IntVect const &jy_type, amrex::IntVect const &jz_type, const amrex::Real relative_time, const amrex::XDim3 &dinv, const amrex::XDim3 &xyzmin, const amrex::Real invvol, const amrex::Dim3 lo, [[maybe_unused]] const int n_rz_azimuthal_modes)
Kernel for the direct current deposition for thread thread_num.
Definition: CurrentDeposition.H:50
void doChargeConservingDepositionShapeNImplicit([[maybe_unused]]const amrex::ParticleReal *const xp_n, [[maybe_unused]]const amrex::ParticleReal *const yp_n, [[maybe_unused]]const amrex::ParticleReal *const zp_n, const GetParticlePosition< PIdx > &GetPosition, const amrex::ParticleReal *const wp, [[maybe_unused]]const amrex::ParticleReal *const uxp_n, [[maybe_unused]]const amrex::ParticleReal *const uyp_n, [[maybe_unused]]const amrex::ParticleReal *const uzp_n, [[maybe_unused]]const amrex::ParticleReal *const uxp_nph, [[maybe_unused]]const amrex::ParticleReal *const uyp_nph, [[maybe_unused]]const amrex::ParticleReal *const uzp_nph, const int *const ion_lev, const amrex::Array4< amrex::Real > &Jx_arr, const amrex::Array4< amrex::Real > &Jy_arr, const amrex::Array4< amrex::Real > &Jz_arr, const long np_to_deposit, const amrex::Real dt, const amrex::XDim3 &dinv, const amrex::XDim3 &xyzmin, const amrex::Dim3 lo, const amrex::Real q, [[maybe_unused]] const int n_rz_azimuthal_modes)
Esirkepov Current Deposition for thread thread_num for implicit scheme The difference from doEsirkepo...
Definition: CurrentDeposition.H:934
void doDepositionSharedShapeN(const GetParticlePosition< PIdx > &GetPosition, const amrex::ParticleReal *const wp, const amrex::ParticleReal *const uxp, const amrex::ParticleReal *const uyp, const amrex::ParticleReal *const uzp, const int *ion_lev, amrex::FArrayBox &jx_fab, amrex::FArrayBox &jy_fab, amrex::FArrayBox &jz_fab, long np_to_deposit, const amrex::Real relative_time, const amrex::XDim3 &dinv, const amrex::XDim3 &xyzmin, amrex::Dim3 lo, amrex::Real q, int n_rz_azimuthal_modes, const amrex::DenseBins< WarpXParticleContainer::ParticleTileType::ParticleTileDataType > &a_bins, const amrex::Box &box, const amrex::Geometry &geom, const amrex::IntVect &a_tbox_max_size)
Current Deposition for thread thread_num using shared memory.
Definition: CurrentDeposition.H:454
void doDepositionShapeN(const GetParticlePosition< PIdx > &GetPosition, const amrex::ParticleReal *const wp, const amrex::ParticleReal *const uxp, const amrex::ParticleReal *const uyp, const amrex::ParticleReal *const uzp, const int *ion_lev, amrex::FArrayBox &jx_fab, amrex::FArrayBox &jy_fab, amrex::FArrayBox &jz_fab, long np_to_deposit, amrex::Real relative_time, const amrex::XDim3 &dinv, const amrex::XDim3 &xyzmin, amrex::Dim3 lo, amrex::Real q, [[maybe_unused]]int n_rz_azimuthal_modes)
Current Deposition for thread thread_num.
Definition: CurrentDeposition.H:274
void doEsirkepovDepositionShapeN(const GetParticlePosition< PIdx > &GetPosition, const amrex::ParticleReal *const wp, const amrex::ParticleReal *const uxp, const amrex::ParticleReal *const uyp, const amrex::ParticleReal *const uzp, const int *ion_lev, const amrex::Array4< amrex::Real > &Jx_arr, const amrex::Array4< amrex::Real > &Jy_arr, const amrex::Array4< amrex::Real > &Jz_arr, long np_to_deposit, amrex::Real dt, amrex::Real relative_time, const amrex::XDim3 &dinv, const amrex::XDim3 &xyzmin, amrex::Dim3 lo, amrex::Real q, [[maybe_unused]]int n_rz_azimuthal_modes)
Esirkepov Current Deposition for thread thread_num.
Definition: CurrentDeposition.H:643
void doVillasenorDepositionShapeNImplicit([[maybe_unused]]const amrex::ParticleReal *const xp_n, [[maybe_unused]]const amrex::ParticleReal *const yp_n, [[maybe_unused]]const amrex::ParticleReal *const zp_n, const GetParticlePosition< PIdx > &GetPosition, const amrex::ParticleReal *const wp, [[maybe_unused]]const amrex::ParticleReal *const uxp_n, [[maybe_unused]]const amrex::ParticleReal *const uyp_n, [[maybe_unused]]const amrex::ParticleReal *const uzp_n, [[maybe_unused]]const amrex::ParticleReal *const uxp_nph, [[maybe_unused]]const amrex::ParticleReal *const uyp_nph, [[maybe_unused]]const amrex::ParticleReal *const uzp_nph, const int *const ion_lev, const amrex::Array4< amrex::Real > &Jx_arr, const amrex::Array4< amrex::Real > &Jy_arr, const amrex::Array4< amrex::Real > &Jz_arr, const long np_to_deposit, const amrex::Real dt, const amrex::XDim3 &dinv, const amrex::XDim3 &xyzmin, const amrex::Dim3 lo, const amrex::Real q, [[maybe_unused]] const int n_rz_azimuthal_modes)
Villasenor and Buneman Current Deposition for thread thread_num for implicit scheme....
Definition: CurrentDeposition.H:1246
#define WARPX_ABORT_WITH_MESSAGE(MSG)
Definition: TextMsg.H:15
#define WARPX_ALWAYS_ASSERT_WITH_MESSAGE(EX, MSG)
Definition: TextMsg.H:13
amrex::GpuComplex< amrex::Real > Complex
Definition: WarpX_Complex.H:22
NODE
static amrex::IntVect shared_tilesize
tileSize to use for shared current deposition operations
Definition: WarpX.H:255
static int shared_mem_current_tpb
number of threads to use per block in shared deposition
Definition: WarpX.H:252
AMREX_FORCE_INLINE Array4< Real const > array() const noexcept
const Box & box() const noexcept
AMREX_GPU_HOST_DEVICE BoxND & grow(int i) noexcept
AMREX_GPU_HOST_DEVICE IntVectND< dim > type() const noexcept
AMREX_GPU_HOST_DEVICE Long numPts() const noexcept
GpuArray< Real, AMREX_SPACEDIM > InvCellSizeArray() const noexcept
index_type * offsetsPtr() noexcept
index_type * permutationPtr() noexcept
Long numBins() const noexcept
GpuArray< Real, AMREX_SPACEDIM > ProbLoArray() const noexcept
const Box & Domain() const noexcept
static std::size_t sharedMemPerBlock() noexcept
static constexpr auto c
vacuum speed of light [m/s]
Definition: constant.H:44
AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE void AddNoRet(T *sum, T value) noexcept
void streamSynchronize() noexcept
gpuStream_t gpuStream() noexcept
constexpr int iz
constexpr int iy
constexpr int ix
AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE BoxND< dim > shift(const BoxND< dim > &b, int dir, int nzones) noexcept
AMREX_GPU_HOST_DEVICE constexpr AMREX_FORCE_INLINE const T & max(const T &a, const T &b) noexcept
AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE BoxND< dim > convert(const BoxND< dim > &b, const IntVectND< dim > &typ) noexcept
void launch(T const &n, L &&f) noexcept
AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE Dim3 end(BoxND< dim > const &box) noexcept
IntVectND< AMREX_SPACEDIM > IntVect
AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE void ignore_unused(const Ts &...)
AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE Dim3 begin(BoxND< dim > const &box) noexcept
std::enable_if_t< std::is_integral_v< T > > ParallelFor(TypeList< CTOs... >, std::array< int, sizeof...(CTOs)> const &runtime_options, T N, F &&f)
AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE int getTileIndex(const IntVect &iv, const Box &box, const bool a_do_tiling, const IntVect &a_tile_size, Box &tbx)
i
Definition: check_interp_points_and_weights.py:174
float dt
Definition: stencil.py:442
Definition: ShapeFactors.H:168
Definition: ShapeFactors.H:29
Definition: ShapeFactors.H:95
AMREX_GPU_HOST_DEVICE constexpr AMREX_FORCE_INLINE T real() const noexcept
AMREX_GPU_HOST_DEVICE constexpr AMREX_FORCE_INLINE T imag() const noexcept
AMREX_GPU_DEVICE T * dataPtr() noexcept