WarpX
CurrentDeposition.H
Go to the documentation of this file.
1 /* Copyright 2019 Axel Huebl, David Grote, Maxence Thevenet
2  * Remi Lehe, Weiqun Zhang, Michael Rowan
3  *
4  * This file is part of WarpX.
5  *
6  * License: BSD-3-Clause-LBNL
7  */
8 #ifndef WARPX_CURRENTDEPOSITION_H_
9 #define WARPX_CURRENTDEPOSITION_H_
10 
14 #include "Particles/ShapeFactors.H"
15 #include "Utils/TextMsg.H"
17 #include "Utils/WarpXConst.H"
18 #ifdef WARPX_DIM_RZ
19 # include "Utils/WarpX_Complex.H"
20 #endif
21 
22 #include "WarpX.H" // todo: remove include and pass globals as args
23 
24 #include <AMReX.H>
25 #include <AMReX_Arena.H>
26 #include <AMReX_Array4.H>
27 #include <AMReX_REAL.H>
28 
47 template <int depos_order>
49 void doDepositionShapeNKernel(const amrex::ParticleReal xp,
50  const amrex::ParticleReal yp,
51  const amrex::ParticleReal zp,
52  const amrex::ParticleReal wq,
53  const amrex::ParticleReal vx,
54  const amrex::ParticleReal vy,
55  const amrex::ParticleReal vz,
56  amrex::Array4<amrex::Real> const& jx_arr,
57  amrex::Array4<amrex::Real> const& jy_arr,
58  amrex::Array4<amrex::Real> const& jz_arr,
59  amrex::IntVect const& jx_type,
60  amrex::IntVect const& jy_type,
61  amrex::IntVect const& jz_type,
62  const amrex::Real relative_time,
63  AMREX_D_DECL(const amrex::Real dzi,
64  const amrex::Real dxi,
65  const amrex::Real dyi),
66  AMREX_D_DECL(const amrex::Real zmin,
67  const amrex::Real xmin,
68  const amrex::Real ymin),
69  const amrex::Real invvol,
70  const amrex::Dim3 lo,
71  const int n_rz_azimuthal_modes)
72 {
73  using namespace amrex::literals;
74 #if !defined(WARPX_DIM_RZ)
75  amrex::ignore_unused(n_rz_azimuthal_modes);
76 #endif
77 #if defined(WARPX_DIM_1D_Z)
78  amrex::ignore_unused(xp, yp);
79 #endif
80 #if defined(WARPX_DIM_XZ) || defined(WARPX_DIM_RZ)
82 #endif
83 
84  constexpr int zdir = WARPX_ZINDEX;
85  constexpr int NODE = amrex::IndexType::NODE;
86  constexpr int CELL = amrex::IndexType::CELL;
87 
88  // wqx, wqy wqz are particle current in each direction
89 #if defined(WARPX_DIM_RZ)
90  // In RZ, wqx is actually wqr, and wqy is wqtheta
91  // Convert to cylindrical at the mid point
92  const amrex::Real xpmid = xp + relative_time*vx;
93  const amrex::Real ypmid = yp + relative_time*vy;
94  const amrex::Real rpmid = std::sqrt(xpmid*xpmid + ypmid*ypmid);
95  amrex::Real costheta;
96  amrex::Real sintheta;
97  if (rpmid > 0._rt) {
98  costheta = xpmid/rpmid;
99  sintheta = ypmid/rpmid;
100  } else {
101  costheta = 1._rt;
102  sintheta = 0._rt;
103  }
104  const Complex xy0 = Complex{costheta, sintheta};
105  const amrex::Real wqx = wq*invvol*(+vx*costheta + vy*sintheta);
106  const amrex::Real wqy = wq*invvol*(-vx*sintheta + vy*costheta);
107 #else
108  const amrex::Real wqx = wq*invvol*vx;
109  const amrex::Real wqy = wq*invvol*vy;
110 #endif
111  const amrex::Real wqz = wq*invvol*vz;
112 
113  // --- Compute shape factors
114  Compute_shape_factor< depos_order > const compute_shape_factor;
115 #if (AMREX_SPACEDIM >= 2)
116  // x direction
117  // Get particle position after 1/2 push back in position
118 #if defined(WARPX_DIM_RZ)
119  // Keep these double to avoid bug in single precision
120  const double xmid = (rpmid - xmin)*dxi;
121 #else
122  const double xmid = ((xp - xmin) + relative_time*vx)*dxi;
123 #endif
124  // j_j[xyz] leftmost grid point in x that the particle touches for the centering of each current
125  // sx_j[xyz] shape factor along x for the centering of each current
126  // There are only two possible centerings, node or cell centered, so at most only two shape factor
127  // arrays will be needed.
128  // Keep these double to avoid bug in single precision
129  double sx_node[depos_order + 1] = {0.};
130  double sx_cell[depos_order + 1] = {0.};
131  int j_node = 0;
132  int j_cell = 0;
133  if (jx_type[0] == NODE || jy_type[0] == NODE || jz_type[0] == NODE) {
134  j_node = compute_shape_factor(sx_node, xmid);
135  }
136  if (jx_type[0] == CELL || jy_type[0] == CELL || jz_type[0] == CELL) {
137  j_cell = compute_shape_factor(sx_cell, xmid - 0.5);
138  }
139 
140  amrex::Real sx_jx[depos_order + 1] = {0._rt};
141  amrex::Real sx_jy[depos_order + 1] = {0._rt};
142  amrex::Real sx_jz[depos_order + 1] = {0._rt};
143  for (int ix=0; ix<=depos_order; ix++)
144  {
145  sx_jx[ix] = ((jx_type[0] == NODE) ? amrex::Real(sx_node[ix]) : amrex::Real(sx_cell[ix]));
146  sx_jy[ix] = ((jy_type[0] == NODE) ? amrex::Real(sx_node[ix]) : amrex::Real(sx_cell[ix]));
147  sx_jz[ix] = ((jz_type[0] == NODE) ? amrex::Real(sx_node[ix]) : amrex::Real(sx_cell[ix]));
148  }
149 
150  int const j_jx = ((jx_type[0] == NODE) ? j_node : j_cell);
151  int const j_jy = ((jy_type[0] == NODE) ? j_node : j_cell);
152  int const j_jz = ((jz_type[0] == NODE) ? j_node : j_cell);
153 #endif //AMREX_SPACEDIM >= 2
154 
155 #if defined(WARPX_DIM_3D)
156  // y direction
157  // Keep these double to avoid bug in single precision
158  const double ymid = ((yp - ymin) + relative_time*vy)*dyi;
159  double sy_node[depos_order + 1] = {0.};
160  double sy_cell[depos_order + 1] = {0.};
161  int k_node = 0;
162  int k_cell = 0;
163  if (jx_type[1] == NODE || jy_type[1] == NODE || jz_type[1] == NODE) {
164  k_node = compute_shape_factor(sy_node, ymid);
165  }
166  if (jx_type[1] == CELL || jy_type[1] == CELL || jz_type[1] == CELL) {
167  k_cell = compute_shape_factor(sy_cell, ymid - 0.5);
168  }
169  amrex::Real sy_jx[depos_order + 1] = {0._rt};
170  amrex::Real sy_jy[depos_order + 1] = {0._rt};
171  amrex::Real sy_jz[depos_order + 1] = {0._rt};
172  for (int iy=0; iy<=depos_order; iy++)
173  {
174  sy_jx[iy] = ((jx_type[1] == NODE) ? amrex::Real(sy_node[iy]) : amrex::Real(sy_cell[iy]));
175  sy_jy[iy] = ((jy_type[1] == NODE) ? amrex::Real(sy_node[iy]) : amrex::Real(sy_cell[iy]));
176  sy_jz[iy] = ((jz_type[1] == NODE) ? amrex::Real(sy_node[iy]) : amrex::Real(sy_cell[iy]));
177  }
178  int const k_jx = ((jx_type[1] == NODE) ? k_node : k_cell);
179  int const k_jy = ((jy_type[1] == NODE) ? k_node : k_cell);
180  int const k_jz = ((jz_type[1] == NODE) ? k_node : k_cell);
181 #endif
182 
183  // z direction
184  // Keep these double to avoid bug in single precision
185  const double zmid = ((zp - zmin) + relative_time*vz)*dzi;
186  double sz_node[depos_order + 1] = {0.};
187  double sz_cell[depos_order + 1] = {0.};
188  int l_node = 0;
189  int l_cell = 0;
190  if (jx_type[zdir] == NODE || jy_type[zdir] == NODE || jz_type[zdir] == NODE) {
191  l_node = compute_shape_factor(sz_node, zmid);
192  }
193  if (jx_type[zdir] == CELL || jy_type[zdir] == CELL || jz_type[zdir] == CELL) {
194  l_cell = compute_shape_factor(sz_cell, zmid - 0.5);
195  }
196  amrex::Real sz_jx[depos_order + 1] = {0._rt};
197  amrex::Real sz_jy[depos_order + 1] = {0._rt};
198  amrex::Real sz_jz[depos_order + 1] = {0._rt};
199  for (int iz=0; iz<=depos_order; iz++)
200  {
201  sz_jx[iz] = ((jx_type[zdir] == NODE) ? amrex::Real(sz_node[iz]) : amrex::Real(sz_cell[iz]));
202  sz_jy[iz] = ((jy_type[zdir] == NODE) ? amrex::Real(sz_node[iz]) : amrex::Real(sz_cell[iz]));
203  sz_jz[iz] = ((jz_type[zdir] == NODE) ? amrex::Real(sz_node[iz]) : amrex::Real(sz_cell[iz]));
204  }
205  int const l_jx = ((jx_type[zdir] == NODE) ? l_node : l_cell);
206  int const l_jy = ((jy_type[zdir] == NODE) ? l_node : l_cell);
207  int const l_jz = ((jz_type[zdir] == NODE) ? l_node : l_cell);
208 
209  // Deposit current into jx_arr, jy_arr and jz_arr
210 #if defined(WARPX_DIM_1D_Z)
211  for (int iz=0; iz<=depos_order; iz++){
213  &jx_arr(lo.x+l_jx+iz, 0, 0, 0),
214  sz_jx[iz]*wqx);
216  &jy_arr(lo.x+l_jy+iz, 0, 0, 0),
217  sz_jy[iz]*wqy);
219  &jz_arr(lo.x+l_jz+iz, 0, 0, 0),
220  sz_jz[iz]*wqz);
221  }
222 #endif
223 #if defined(WARPX_DIM_XZ) || defined(WARPX_DIM_RZ)
224  for (int iz=0; iz<=depos_order; iz++){
225  for (int ix=0; ix<=depos_order; ix++){
227  &jx_arr(lo.x+j_jx+ix, lo.y+l_jx+iz, 0, 0),
228  sx_jx[ix]*sz_jx[iz]*wqx);
230  &jy_arr(lo.x+j_jy+ix, lo.y+l_jy+iz, 0, 0),
231  sx_jy[ix]*sz_jy[iz]*wqy);
233  &jz_arr(lo.x+j_jz+ix, lo.y+l_jz+iz, 0, 0),
234  sx_jz[ix]*sz_jz[iz]*wqz);
235 #if defined(WARPX_DIM_RZ)
236  Complex xy = xy0; // Note that xy is equal to e^{i m theta}
237  for (int imode=1 ; imode < n_rz_azimuthal_modes ; imode++) {
238  // The factor 2 on the weighting comes from the normalization of the modes
239  amrex::Gpu::Atomic::AddNoRet( &jx_arr(lo.x+j_jx+ix, lo.y+l_jx+iz, 0, 2*imode-1), 2._rt*sx_jx[ix]*sz_jx[iz]*wqx*xy.real());
240  amrex::Gpu::Atomic::AddNoRet( &jx_arr(lo.x+j_jx+ix, lo.y+l_jx+iz, 0, 2*imode ), 2._rt*sx_jx[ix]*sz_jx[iz]*wqx*xy.imag());
241  amrex::Gpu::Atomic::AddNoRet( &jy_arr(lo.x+j_jy+ix, lo.y+l_jy+iz, 0, 2*imode-1), 2._rt*sx_jy[ix]*sz_jy[iz]*wqy*xy.real());
242  amrex::Gpu::Atomic::AddNoRet( &jy_arr(lo.x+j_jy+ix, lo.y+l_jy+iz, 0, 2*imode ), 2._rt*sx_jy[ix]*sz_jy[iz]*wqy*xy.imag());
243  amrex::Gpu::Atomic::AddNoRet( &jz_arr(lo.x+j_jz+ix, lo.y+l_jz+iz, 0, 2*imode-1), 2._rt*sx_jz[ix]*sz_jz[iz]*wqz*xy.real());
244  amrex::Gpu::Atomic::AddNoRet( &jz_arr(lo.x+j_jz+ix, lo.y+l_jz+iz, 0, 2*imode ), 2._rt*sx_jz[ix]*sz_jz[iz]*wqz*xy.imag());
245  xy = xy*xy0;
246  }
247 #endif
248  }
249  }
250 #elif defined(WARPX_DIM_3D)
251  for (int iz=0; iz<=depos_order; iz++){
252  for (int iy=0; iy<=depos_order; iy++){
253  for (int ix=0; ix<=depos_order; ix++){
255  &jx_arr(lo.x+j_jx+ix, lo.y+k_jx+iy, lo.z+l_jx+iz),
256  sx_jx[ix]*sy_jx[iy]*sz_jx[iz]*wqx);
258  &jy_arr(lo.x+j_jy+ix, lo.y+k_jy+iy, lo.z+l_jy+iz),
259  sx_jy[ix]*sy_jy[iy]*sz_jy[iz]*wqy);
261  &jz_arr(lo.x+j_jz+ix, lo.y+k_jz+iy, lo.z+l_jz+iz),
262  sx_jz[ix]*sy_jz[iy]*sz_jz[iz]*wqz);
263  }
264  }
265  }
266 #endif
267 }
268 
291 template <int depos_order>
293  const amrex::ParticleReal * const wp,
294  const amrex::ParticleReal * const uxp,
295  const amrex::ParticleReal * const uyp,
296  const amrex::ParticleReal * const uzp,
297  const int* ion_lev,
298  amrex::FArrayBox& jx_fab,
299  amrex::FArrayBox& jy_fab,
300  amrex::FArrayBox& jz_fab,
301  long np_to_deposit,
302  amrex::Real relative_time,
303  const std::array<amrex::Real,3>& dx,
304  const std::array<amrex::Real,3>& xyzmin,
305  amrex::Dim3 lo,
306  amrex::Real q,
307  int n_rz_azimuthal_modes)
308 {
309  using namespace amrex::literals;
310 
311 #if !defined(WARPX_DIM_RZ)
312  amrex::ignore_unused(n_rz_azimuthal_modes);
313 #endif
314 
315  // Whether ion_lev is a null pointer (do_ionization=0) or a real pointer
316  // (do_ionization=1)
317  const bool do_ionization = ion_lev;
318  const amrex::Real dzi = 1.0_rt/dx[2];
319 #if defined(WARPX_DIM_1D_Z)
320  const amrex::Real invvol = dzi;
321 #endif
322 #if defined(WARPX_DIM_XZ) || defined(WARPX_DIM_RZ)
323  const amrex::Real dxi = 1.0_rt/dx[0];
324  const amrex::Real invvol = dxi*dzi;
325 #elif defined(WARPX_DIM_3D)
326  const amrex::Real dxi = 1.0_rt/dx[0];
327  const amrex::Real dyi = 1.0_rt/dx[1];
328  const amrex::Real invvol = dxi*dyi*dzi;
329 #endif
330 
331 #if (AMREX_SPACEDIM >= 2)
332  const amrex::Real xmin = xyzmin[0];
333 #endif
334 #if defined(WARPX_DIM_3D)
335  const amrex::Real ymin = xyzmin[1];
336 #endif
337  const amrex::Real zmin = xyzmin[2];
338 
339  const amrex::Real clightsq = 1.0_rt/PhysConst::c/PhysConst::c;
340 
341  amrex::Array4<amrex::Real> const& jx_arr = jx_fab.array();
342  amrex::Array4<amrex::Real> const& jy_arr = jy_fab.array();
343  amrex::Array4<amrex::Real> const& jz_arr = jz_fab.array();
344  amrex::IntVect const jx_type = jx_fab.box().type();
345  amrex::IntVect const jy_type = jy_fab.box().type();
346  amrex::IntVect const jz_type = jz_fab.box().type();
347 
348  // Loop over particles and deposit into jx_fab, jy_fab and jz_fab
350  np_to_deposit,
351  [=] AMREX_GPU_DEVICE (long ip) {
352  amrex::ParticleReal xp, yp, zp;
353  GetPosition(ip, xp, yp, zp);
354 
355  // --- Get particle quantities
356  const amrex::Real gaminv = 1.0_rt/std::sqrt(1.0_rt + uxp[ip]*uxp[ip]*clightsq
357  + uyp[ip]*uyp[ip]*clightsq
358  + uzp[ip]*uzp[ip]*clightsq);
359  const amrex::Real vx = uxp[ip]*gaminv;
360  const amrex::Real vy = uyp[ip]*gaminv;
361  const amrex::Real vz = uzp[ip]*gaminv;
362 
363  amrex::Real wq = q*wp[ip];
364  if (do_ionization){
365  wq *= ion_lev[ip];
366  }
367 
368  doDepositionShapeNKernel<depos_order>(xp, yp, zp, wq, vx, vy, vz, jx_arr, jy_arr, jz_arr,
369  jx_type, jy_type, jz_type,
370  relative_time,
371  AMREX_D_DECL(dzi, dxi, dyi),
372  AMREX_D_DECL(zmin, xmin, ymin),
373  invvol, lo, n_rz_azimuthal_modes);
374 
375  }
376  );
377 }
378 
400 template <int depos_order>
402  const amrex::ParticleReal * const wp,
403  const amrex::ParticleReal * const uxp_n,
404  const amrex::ParticleReal * const uyp_n,
405  const amrex::ParticleReal * const uzp_n,
406  const amrex::ParticleReal * const uxp,
407  const amrex::ParticleReal * const uyp,
408  const amrex::ParticleReal * const uzp,
409  const int * const ion_lev,
410  amrex::FArrayBox& jx_fab,
411  amrex::FArrayBox& jy_fab,
412  amrex::FArrayBox& jz_fab,
413  const long np_to_deposit,
414  const std::array<amrex::Real,3>& dx,
415  const std::array<amrex::Real,3>& xyzmin,
416  const amrex::Dim3 lo,
417  const amrex::Real q,
418  const int n_rz_azimuthal_modes)
419 {
420  using namespace amrex::literals;
421 #if !defined(WARPX_DIM_RZ)
422  amrex::ignore_unused(n_rz_azimuthal_modes);
423 #endif
424 
425  // Whether ion_lev is a null pointer (do_ionization=0) or a real pointer
426  // (do_ionization=1)
427  const bool do_ionization = ion_lev;
428  const amrex::Real dzi = 1.0_rt/dx[2];
429 #if defined(WARPX_DIM_1D_Z)
430  const amrex::Real invvol = dzi;
431 #endif
432 #if defined(WARPX_DIM_XZ) || defined(WARPX_DIM_RZ)
433  const amrex::Real dxi = 1.0_rt/dx[0];
434  const amrex::Real invvol = dxi*dzi;
435 #elif defined(WARPX_DIM_3D)
436  const amrex::Real dxi = 1.0_rt/dx[0];
437  const amrex::Real dyi = 1.0_rt/dx[1];
438  const amrex::Real invvol = dxi*dyi*dzi;
439 #endif
440 
441 #if (AMREX_SPACEDIM >= 2)
442  const amrex::Real xmin = xyzmin[0];
443 #endif
444 #if defined(WARPX_DIM_3D)
445  const amrex::Real ymin = xyzmin[1];
446 #endif
447  const amrex::Real zmin = xyzmin[2];
448 
449  amrex::Array4<amrex::Real> const& jx_arr = jx_fab.array();
450  amrex::Array4<amrex::Real> const& jy_arr = jy_fab.array();
451  amrex::Array4<amrex::Real> const& jz_arr = jz_fab.array();
452  amrex::IntVect const jx_type = jx_fab.box().type();
453  amrex::IntVect const jy_type = jy_fab.box().type();
454  amrex::IntVect const jz_type = jz_fab.box().type();
455 
456  // Loop over particles and deposit into jx_fab, jy_fab and jz_fab
458  np_to_deposit,
459  [=] AMREX_GPU_DEVICE (long ip) {
460  amrex::ParticleReal xp, yp, zp;
461  GetPosition(ip, xp, yp, zp);
462 
463  constexpr amrex::ParticleReal inv_c2 = 1._prt/(PhysConst::c*PhysConst::c);
464 
465  // Compute inverse Lorentz factor, the average of gamma at time levels n and n+1
466  // The uxp,uyp,uzp are the velocities at time level n+1/2
467  const amrex::ParticleReal uxp_np1 = 2._prt*uxp[ip] - uxp_n[ip];
468  const amrex::ParticleReal uyp_np1 = 2._prt*uyp[ip] - uyp_n[ip];
469  const amrex::ParticleReal uzp_np1 = 2._prt*uzp[ip] - uzp_n[ip];
470  const amrex::ParticleReal gamma_n = std::sqrt(1._prt + (uxp_n[ip]*uxp_n[ip] + uyp_n[ip]*uyp_n[ip] + uzp_n[ip]*uzp_n[ip])*inv_c2);
471  const amrex::ParticleReal gamma_np1 = std::sqrt(1._prt + (uxp_np1*uxp_np1 + uyp_np1*uyp_np1 + uzp_np1*uzp_np1)*inv_c2);
472  const amrex::ParticleReal gaminv = 2.0_prt/(gamma_n + gamma_np1);
473 
474  const amrex::Real vx = uxp[ip]*gaminv;
475  const amrex::Real vy = uyp[ip]*gaminv;
476  const amrex::Real vz = uzp[ip]*gaminv;
477 
478  amrex::Real wq = q*wp[ip];
479  if (do_ionization){
480  wq *= ion_lev[ip];
481  }
482 
483  const amrex::Real relative_time = 0._rt;
484  doDepositionShapeNKernel<depos_order>(xp, yp, zp, wq, vx, vy, vz, jx_arr, jy_arr, jz_arr,
485  jx_type, jy_type, jz_type,
486  relative_time,
487  AMREX_D_DECL(dzi, dxi, dyi),
488  AMREX_D_DECL(zmin, xmin, ymin),
489  invvol, lo, n_rz_azimuthal_modes);
490 
491  }
492  );
493 }
494 
518 template <int depos_order>
520  const amrex::ParticleReal * const wp,
521  const amrex::ParticleReal * const uxp,
522  const amrex::ParticleReal * const uyp,
523  const amrex::ParticleReal * const uzp,
524  const int* ion_lev,
525  amrex::FArrayBox& jx_fab,
526  amrex::FArrayBox& jy_fab,
527  amrex::FArrayBox& jz_fab,
528  long np_to_deposit,
529  const amrex::Real relative_time,
530  const std::array<amrex::Real,3>& dx,
531  const std::array<amrex::Real,3>& xyzmin,
532  amrex::Dim3 lo,
533  amrex::Real q,
534  int n_rz_azimuthal_modes,
536  const amrex::Box& box,
537  const amrex::Geometry& geom,
538  const amrex::IntVect& a_tbox_max_size)
539 {
540  using namespace amrex::literals;
541 
542 #if defined(AMREX_USE_HIP) || defined(AMREX_USE_CUDA)
543  using namespace amrex;
544 
545  auto permutation = a_bins.permutationPtr();
546 
547  amrex::Array4<amrex::Real> const& jx_arr = jx_fab.array();
548  amrex::Array4<amrex::Real> const& jy_arr = jy_fab.array();
549  amrex::Array4<amrex::Real> const& jz_arr = jz_fab.array();
550  amrex::IntVect const jx_type = jx_fab.box().type();
551  amrex::IntVect const jy_type = jy_fab.box().type();
552  amrex::IntVect const jz_type = jz_fab.box().type();
553 
554  constexpr int zdir = WARPX_ZINDEX;
555  constexpr int NODE = amrex::IndexType::NODE;
556  constexpr int CELL = amrex::IndexType::CELL;
557 
558  // Loop over particles and deposit into jx_fab, jy_fab and jz_fab
559  const auto dxiarr = geom.InvCellSizeArray();
560  const auto plo = geom.ProbLoArray();
561  const auto domain = geom.Domain();
562 
563  amrex::Box sample_tbox(IntVect(AMREX_D_DECL(0,0,0)), a_tbox_max_size - 1);
564  sample_tbox.grow(depos_order);
565 
566  amrex::Box sample_tbox_x = convert(sample_tbox, jx_type);
567  amrex::Box sample_tbox_y = convert(sample_tbox, jy_type);
568  amrex::Box sample_tbox_z = convert(sample_tbox, jz_type);
569 
570  const auto npts = amrex::max(sample_tbox_x.numPts(), sample_tbox_y.numPts(), sample_tbox_z.numPts());
571 
572  const int nblocks = a_bins.numBins();
573  const int threads_per_block = WarpX::shared_mem_current_tpb;
574  const auto offsets_ptr = a_bins.offsetsPtr();
575 
576  const std::size_t shared_mem_bytes = npts*sizeof(amrex::Real);
577  const amrex::IntVect bin_size = WarpX::shared_tilesize;
578  const std::size_t max_shared_mem_bytes = amrex::Gpu::Device::sharedMemPerBlock();
579  WARPX_ALWAYS_ASSERT_WITH_MESSAGE(shared_mem_bytes <= max_shared_mem_bytes,
580  "Tile size too big for GPU shared memory current deposition");
581 
582  amrex::ignore_unused(np_to_deposit);
583  // Launch one thread-block per bin
585  nblocks, threads_per_block, shared_mem_bytes, amrex::Gpu::gpuStream(),
586  [=] AMREX_GPU_DEVICE () noexcept {
587  const int bin_id = blockIdx.x;
588  const unsigned int bin_start = offsets_ptr[bin_id];
589  const unsigned int bin_stop = offsets_ptr[bin_id+1];
590 
591  if (bin_start == bin_stop) { return; /*this bin has no particles*/ }
592 
593  // These boxes define the index space for the shared memory buffers
594  amrex::Box buffer_box;
595  {
596  ParticleReal xp, yp, zp;
597  GetPosition(permutation[bin_start], xp, yp, zp);
598 #if defined(WARPX_DIM_3D)
599  IntVect iv = IntVect(int( amrex::Math::floor((xp-plo[0]) * dxiarr[0]) ),
600  int( amrex::Math::floor((yp-plo[1]) * dxiarr[1]) ),
601  int( amrex::Math::floor((zp-plo[2]) * dxiarr[2]) ));
602 #elif defined(WARPX_DIM_XZ) || defined(WARPX_DIM_RZ)
603  IntVect iv = IntVect(int( amrex::Math::floor((xp-plo[0]) * dxiarr[0]) ),
604  int( amrex::Math::floor((zp-plo[1]) * dxiarr[1]) ));
605 #elif defined(WARPX_DIM_1D_Z)
606  IntVect iv = IntVect(int( amrex::Math::floor((zp-plo[0]) * dxiarr[0]) ));
607 #endif
608  iv += domain.smallEnd();
609  getTileIndex(iv, box, true, bin_size, buffer_box);
610  }
611 
612  buffer_box.grow(depos_order);
613  Box tbox_x = convert(buffer_box, jx_type);
614  Box tbox_y = convert(buffer_box, jy_type);
615  Box tbox_z = convert(buffer_box, jz_type);
616 
618  amrex::Real* const shared = gsm.dataPtr();
619 
620  amrex::Array4<amrex::Real> const jx_buff(shared,
621  amrex::begin(tbox_x), amrex::end(tbox_x), 1);
622  amrex::Array4<amrex::Real> const jy_buff(shared,
623  amrex::begin(tbox_y), amrex::end(tbox_y), 1);
624  amrex::Array4<amrex::Real> const jz_buff(shared,
625  amrex::begin(tbox_z), amrex::end(tbox_z), 1);
626 
627  // Zero-initialize the temporary array in shared memory
628  volatile amrex::Real* vs = shared;
629  for (int i = threadIdx.x; i < npts; i += blockDim.x){
630  vs[i] = 0.0;
631  }
632  __syncthreads();
633  for (unsigned int ip_orig = bin_start+threadIdx.x; ip_orig<bin_stop; ip_orig += blockDim.x)
634  {
635  const unsigned int ip = permutation[ip_orig];
636  depositComponent<depos_order>(GetPosition, wp, uxp, uyp, uzp, ion_lev, jx_buff, jx_type,
637  relative_time, dx, xyzmin, lo, q, n_rz_azimuthal_modes,
638  ip, zdir, NODE, CELL, 0);
639  }
640 
641  __syncthreads();
642  addLocalToGlobal(tbox_x, jx_arr, jx_buff);
643  for (int i = threadIdx.x; i < npts; i += blockDim.x){
644  vs[i] = 0.0;
645  }
646 
647  __syncthreads();
648  for (unsigned int ip_orig = bin_start+threadIdx.x; ip_orig<bin_stop; ip_orig += blockDim.x)
649  {
650  const unsigned int ip = permutation[ip_orig];
651  depositComponent<depos_order>(GetPosition, wp, uxp, uyp, uzp, ion_lev, jy_buff, jy_type,
652  relative_time, dx, xyzmin, lo, q, n_rz_azimuthal_modes,
653  ip, zdir, NODE, CELL, 1);
654  }
655 
656  __syncthreads();
657  addLocalToGlobal(tbox_y, jy_arr, jy_buff);
658  for (int i = threadIdx.x; i < npts; i += blockDim.x){
659  vs[i] = 0.0;
660  }
661 
662  __syncthreads();
663  for (unsigned int ip_orig = bin_start+threadIdx.x; ip_orig<bin_stop; ip_orig += blockDim.x)
664  {
665  const unsigned int ip = permutation[ip_orig];
666  depositComponent<depos_order>(GetPosition, wp, uxp, uyp, uzp, ion_lev, jz_buff, jz_type,
667  relative_time, dx, xyzmin, lo, q, n_rz_azimuthal_modes,
668  ip, zdir, NODE, CELL, 2);
669  }
670 
671  __syncthreads();
672  addLocalToGlobal(tbox_z, jz_arr, jz_buff);
673  });
674 #else // not using hip/cuda
675  // Note, you should never reach this part of the code. This funcion cannot be called unless
676  // using HIP/CUDA, and those things are checked prior
677  //don't use any args
678  ignore_unused(GetPosition, wp, uxp, uyp, uzp, ion_lev, jx_fab, jy_fab, jz_fab, np_to_deposit, relative_time, dx, xyzmin, lo, q, n_rz_azimuthal_modes, a_bins, box, geom, a_tbox_max_size);
679  WARPX_ABORT_WITH_MESSAGE("Shared memory only implemented for HIP/CUDA");
680 #endif
681 }
682 
707 template <int depos_order>
709  const amrex::ParticleReal * const wp,
710  const amrex::ParticleReal * const uxp,
711  const amrex::ParticleReal * const uyp,
712  const amrex::ParticleReal * const uzp,
713  const int* ion_lev,
714  const amrex::Array4<amrex::Real>& Jx_arr,
715  const amrex::Array4<amrex::Real>& Jy_arr,
716  const amrex::Array4<amrex::Real>& Jz_arr,
717  long np_to_deposit,
718  amrex::Real dt,
719  amrex::Real relative_time,
720  const std::array<amrex::Real,3>& dx,
721  std::array<amrex::Real, 3> xyzmin,
722  amrex::Dim3 lo,
723  amrex::Real q,
724  int n_rz_azimuthal_modes)
725 {
726  using namespace amrex;
727  using namespace amrex::literals;
728 
729 #if !defined(WARPX_DIM_RZ)
730  ignore_unused(n_rz_azimuthal_modes);
731 #endif
732 
733  // Whether ion_lev is a null pointer (do_ionization=0) or a real pointer
734  // (do_ionization=1)
735  bool const do_ionization = ion_lev;
736 #if !defined(WARPX_DIM_1D_Z)
737  Real const dxi = 1.0_rt / dx[0];
738 #endif
739 #if !defined(WARPX_DIM_1D_Z)
740  Real const xmin = xyzmin[0];
741 #endif
742 #if defined(WARPX_DIM_3D)
743  Real const dyi = 1.0_rt / dx[1];
744  Real const ymin = xyzmin[1];
745 #endif
746  Real const dzi = 1.0_rt / dx[2];
747  Real const zmin = xyzmin[2];
748 
749 #if defined(WARPX_DIM_3D)
750  Real const invdtdx = 1.0_rt / (dt*dx[1]*dx[2]);
751  Real const invdtdy = 1.0_rt / (dt*dx[0]*dx[2]);
752  Real const invdtdz = 1.0_rt / (dt*dx[0]*dx[1]);
753 #elif defined(WARPX_DIM_XZ) || defined(WARPX_DIM_RZ)
754  Real const invdtdx = 1.0_rt / (dt*dx[2]);
755  Real const invdtdz = 1.0_rt / (dt*dx[0]);
756  Real const invvol = 1.0_rt / (dx[0]*dx[2]);
757 #elif defined(WARPX_DIM_1D_Z)
758  Real const invdtdz = 1.0_rt / (dt*dx[0]);
759  Real const invvol = 1.0_rt / (dx[2]);
760 #endif
761 
762 #if defined(WARPX_DIM_RZ)
763  Complex const I = Complex{0._rt, 1._rt};
764 #endif
765 
766  Real const clightsq = 1.0_rt / ( PhysConst::c * PhysConst::c );
767 #if !defined(WARPX_DIM_1D_Z)
768  Real constexpr one_third = 1.0_rt / 3.0_rt;
769  Real constexpr one_sixth = 1.0_rt / 6.0_rt;
770 #endif
771 
772  // Loop over particles and deposit into Jx_arr, Jy_arr and Jz_arr
774  np_to_deposit,
775  [=] AMREX_GPU_DEVICE (long const ip) {
776  // --- Get particle quantities
777  Real const gaminv = 1.0_rt/std::sqrt(1.0_rt + uxp[ip]*uxp[ip]*clightsq
778  + uyp[ip]*uyp[ip]*clightsq
779  + uzp[ip]*uzp[ip]*clightsq);
780 
781  // wqx, wqy wqz are particle current in each direction
782  Real wq = q*wp[ip];
783  if (do_ionization){
784  wq *= ion_lev[ip];
785  }
786 
787  ParticleReal xp, yp, zp;
788  GetPosition(ip, xp, yp, zp);
789 
790 #if !defined(WARPX_DIM_1D_Z)
791  Real const wqx = wq*invdtdx;
792 #endif
793 #if defined(WARPX_DIM_3D)
794  Real const wqy = wq*invdtdy;
795 #endif
796  Real const wqz = wq*invdtdz;
797 
798  // computes current and old position in grid units
799 #if defined(WARPX_DIM_RZ)
800  Real const xp_new = xp + (relative_time + 0.5_rt*dt)*uxp[ip]*gaminv;
801  Real const yp_new = yp + (relative_time + 0.5_rt*dt)*uyp[ip]*gaminv;
802  Real const xp_mid = xp_new - 0.5_rt*dt*uxp[ip]*gaminv;
803  Real const yp_mid = yp_new - 0.5_rt*dt*uyp[ip]*gaminv;
804  Real const xp_old = xp_new - dt*uxp[ip]*gaminv;
805  Real const yp_old = yp_new - dt*uyp[ip]*gaminv;
806  Real const rp_new = std::sqrt(xp_new*xp_new + yp_new*yp_new);
807  Real const rp_mid = std::sqrt(xp_mid*xp_mid + yp_mid*yp_mid);
808  Real const rp_old = std::sqrt(xp_old*xp_old + yp_old*yp_old);
809  Real costheta_new, sintheta_new;
810  if (rp_new > 0._rt) {
811  costheta_new = xp_new/rp_new;
812  sintheta_new = yp_new/rp_new;
813  } else {
814  costheta_new = 1._rt;
815  sintheta_new = 0._rt;
816  }
817  amrex::Real costheta_mid, sintheta_mid;
818  if (rp_mid > 0._rt) {
819  costheta_mid = xp_mid/rp_mid;
820  sintheta_mid = yp_mid/rp_mid;
821  } else {
822  costheta_mid = 1._rt;
823  sintheta_mid = 0._rt;
824  }
825  amrex::Real costheta_old, sintheta_old;
826  if (rp_old > 0._rt) {
827  costheta_old = xp_old/rp_old;
828  sintheta_old = yp_old/rp_old;
829  } else {
830  costheta_old = 1._rt;
831  sintheta_old = 0._rt;
832  }
833  const Complex xy_new0 = Complex{costheta_new, sintheta_new};
834  const Complex xy_mid0 = Complex{costheta_mid, sintheta_mid};
835  const Complex xy_old0 = Complex{costheta_old, sintheta_old};
836  // Keep these double to avoid bug in single precision
837  double const x_new = (rp_new - xmin)*dxi;
838  double const x_old = (rp_old - xmin)*dxi;
839 #else
840 #if !defined(WARPX_DIM_1D_Z)
841  // Keep these double to avoid bug in single precision
842  double const x_new = (xp - xmin + (relative_time + 0.5_rt*dt)*uxp[ip]*gaminv)*dxi;
843  double const x_old = x_new - dt*dxi*uxp[ip]*gaminv;
844 #endif
845 #endif
846 #if defined(WARPX_DIM_3D)
847  // Keep these double to avoid bug in single precision
848  double const y_new = (yp - ymin + (relative_time + 0.5_rt*dt)*uyp[ip]*gaminv)*dyi;
849  double const y_old = y_new - dt*dyi*uyp[ip]*gaminv;
850 #endif
851  // Keep these double to avoid bug in single precision
852  double const z_new = (zp - zmin + (relative_time + 0.5_rt*dt)*uzp[ip]*gaminv)*dzi;
853  double const z_old = z_new - dt*dzi*uzp[ip]*gaminv;
854 
855 #if defined(WARPX_DIM_RZ)
856  Real const vy = (-uxp[ip]*sintheta_mid + uyp[ip]*costheta_mid)*gaminv;
857 #elif defined(WARPX_DIM_XZ)
858  Real const vy = uyp[ip]*gaminv;
859 #elif defined(WARPX_DIM_1D_Z)
860  Real const vx = uxp[ip]*gaminv;
861  Real const vy = uyp[ip]*gaminv;
862 #endif
863 
864  // Shape factor arrays
865  // Note that there are extra values above and below
866  // to possibly hold the factor for the old particle
867  // which can be at a different grid location.
868  // Keep these double to avoid bug in single precision
869 #if !defined(WARPX_DIM_1D_Z)
870  double sx_new[depos_order + 3] = {0.};
871  double sx_old[depos_order + 3] = {0.};
872 #endif
873 #if defined(WARPX_DIM_3D)
874  // Keep these double to avoid bug in single precision
875  double sy_new[depos_order + 3] = {0.};
876  double sy_old[depos_order + 3] = {0.};
877 #endif
878  // Keep these double to avoid bug in single precision
879  double sz_new[depos_order + 3] = {0.};
880  double sz_old[depos_order + 3] = {0.};
881 
882  // --- Compute shape factors
883  // Compute shape factors for position as they are now and at old positions
884  // [ijk]_new: leftmost grid point that the particle touches
885  const Compute_shape_factor< depos_order > compute_shape_factor;
886  const Compute_shifted_shape_factor< depos_order > compute_shifted_shape_factor;
887 
888 #if !defined(WARPX_DIM_1D_Z)
889  const int i_new = compute_shape_factor(sx_new+1, x_new);
890  const int i_old = compute_shifted_shape_factor(sx_old, x_old, i_new);
891 #endif
892 #if defined(WARPX_DIM_3D)
893  const int j_new = compute_shape_factor(sy_new+1, y_new);
894  const int j_old = compute_shifted_shape_factor(sy_old, y_old, j_new);
895 #endif
896  const int k_new = compute_shape_factor(sz_new+1, z_new);
897  const int k_old = compute_shifted_shape_factor(sz_old, z_old, k_new);
898 
899  // computes min/max positions of current contributions
900 #if !defined(WARPX_DIM_1D_Z)
901  int dil = 1, diu = 1;
902  if (i_old < i_new) { dil = 0; }
903  if (i_old > i_new) { diu = 0; }
904 #endif
905 #if defined(WARPX_DIM_3D)
906  int djl = 1, dju = 1;
907  if (j_old < j_new) { djl = 0; }
908  if (j_old > j_new) { dju = 0; }
909 #endif
910  int dkl = 1, dku = 1;
911  if (k_old < k_new) { dkl = 0; }
912  if (k_old > k_new) { dku = 0; }
913 
914 #if defined(WARPX_DIM_3D)
915 
916  for (int k=dkl; k<=depos_order+2-dku; k++) {
917  for (int j=djl; j<=depos_order+2-dju; j++) {
918  amrex::Real sdxi = 0._rt;
919  for (int i=dil; i<=depos_order+1-diu; i++) {
920  sdxi += wqx*(sx_old[i] - sx_new[i])*(
921  one_third*(sy_new[j]*sz_new[k] + sy_old[j]*sz_old[k])
922  +one_sixth*(sy_new[j]*sz_old[k] + sy_old[j]*sz_new[k]));
923  amrex::Gpu::Atomic::AddNoRet( &Jx_arr(lo.x+i_new-1+i, lo.y+j_new-1+j, lo.z+k_new-1+k), sdxi);
924  }
925  }
926  }
927  for (int k=dkl; k<=depos_order+2-dku; k++) {
928  for (int i=dil; i<=depos_order+2-diu; i++) {
929  amrex::Real sdyj = 0._rt;
930  for (int j=djl; j<=depos_order+1-dju; j++) {
931  sdyj += wqy*(sy_old[j] - sy_new[j])*(
932  one_third*(sx_new[i]*sz_new[k] + sx_old[i]*sz_old[k])
933  +one_sixth*(sx_new[i]*sz_old[k] + sx_old[i]*sz_new[k]));
934  amrex::Gpu::Atomic::AddNoRet( &Jy_arr(lo.x+i_new-1+i, lo.y+j_new-1+j, lo.z+k_new-1+k), sdyj);
935  }
936  }
937  }
938  for (int j=djl; j<=depos_order+2-dju; j++) {
939  for (int i=dil; i<=depos_order+2-diu; i++) {
940  amrex::Real sdzk = 0._rt;
941  for (int k=dkl; k<=depos_order+1-dku; k++) {
942  sdzk += wqz*(sz_old[k] - sz_new[k])*(
943  one_third*(sx_new[i]*sy_new[j] + sx_old[i]*sy_old[j])
944  +one_sixth*(sx_new[i]*sy_old[j] + sx_old[i]*sy_new[j]));
945  amrex::Gpu::Atomic::AddNoRet( &Jz_arr(lo.x+i_new-1+i, lo.y+j_new-1+j, lo.z+k_new-1+k), sdzk);
946  }
947  }
948  }
949 
950 #elif defined(WARPX_DIM_XZ) || defined(WARPX_DIM_RZ)
951 
952  for (int k=dkl; k<=depos_order+2-dku; k++) {
953  amrex::Real sdxi = 0._rt;
954  for (int i=dil; i<=depos_order+1-diu; i++) {
955  sdxi += wqx*(sx_old[i] - sx_new[i])*0.5_rt*(sz_new[k] + sz_old[k]);
956  amrex::Gpu::Atomic::AddNoRet( &Jx_arr(lo.x+i_new-1+i, lo.y+k_new-1+k, 0, 0), sdxi);
957 #if defined(WARPX_DIM_RZ)
958  Complex xy_mid = xy_mid0; // Throughout the following loop, xy_mid takes the value e^{i m theta}
959  for (int imode=1 ; imode < n_rz_azimuthal_modes ; imode++) {
960  // The factor 2 comes from the normalization of the modes
961  const Complex djr_cmplx = 2._rt *sdxi*xy_mid;
962  amrex::Gpu::Atomic::AddNoRet( &Jx_arr(lo.x+i_new-1+i, lo.y+k_new-1+k, 0, 2*imode-1), djr_cmplx.real());
963  amrex::Gpu::Atomic::AddNoRet( &Jx_arr(lo.x+i_new-1+i, lo.y+k_new-1+k, 0, 2*imode), djr_cmplx.imag());
964  xy_mid = xy_mid*xy_mid0;
965  }
966 #endif
967  }
968  }
969  for (int k=dkl; k<=depos_order+2-dku; k++) {
970  for (int i=dil; i<=depos_order+2-diu; i++) {
971  Real const sdyj = wq*vy*invvol*(
972  one_third*(sx_new[i]*sz_new[k] + sx_old[i]*sz_old[k])
973  +one_sixth*(sx_new[i]*sz_old[k] + sx_old[i]*sz_new[k]));
974  amrex::Gpu::Atomic::AddNoRet( &Jy_arr(lo.x+i_new-1+i, lo.y+k_new-1+k, 0, 0), sdyj);
975 #if defined(WARPX_DIM_RZ)
976  Complex xy_new = xy_new0;
977  Complex xy_mid = xy_mid0;
978  Complex xy_old = xy_old0;
979  // Throughout the following loop, xy_ takes the value e^{i m theta_}
980  for (int imode=1 ; imode < n_rz_azimuthal_modes ; imode++) {
981  // The factor 2 comes from the normalization of the modes
982  // The minus sign comes from the different convention with respect to Davidson et al.
983  const Complex djt_cmplx = -2._rt * I*(i_new-1 + i + xmin*dxi)*wq*invdtdx/(amrex::Real)imode
984  *(Complex(sx_new[i]*sz_new[k], 0._rt)*(xy_new - xy_mid)
985  + Complex(sx_old[i]*sz_old[k], 0._rt)*(xy_mid - xy_old));
986  amrex::Gpu::Atomic::AddNoRet( &Jy_arr(lo.x+i_new-1+i, lo.y+k_new-1+k, 0, 2*imode-1), djt_cmplx.real());
987  amrex::Gpu::Atomic::AddNoRet( &Jy_arr(lo.x+i_new-1+i, lo.y+k_new-1+k, 0, 2*imode), djt_cmplx.imag());
988  xy_new = xy_new*xy_new0;
989  xy_mid = xy_mid*xy_mid0;
990  xy_old = xy_old*xy_old0;
991  }
992 #endif
993  }
994  }
995  for (int i=dil; i<=depos_order+2-diu; i++) {
996  Real sdzk = 0._rt;
997  for (int k=dkl; k<=depos_order+1-dku; k++) {
998  sdzk += wqz*(sz_old[k] - sz_new[k])*0.5_rt*(sx_new[i] + sx_old[i]);
999  amrex::Gpu::Atomic::AddNoRet( &Jz_arr(lo.x+i_new-1+i, lo.y+k_new-1+k, 0, 0), sdzk);
1000 #if defined(WARPX_DIM_RZ)
1001  Complex xy_mid = xy_mid0; // Throughout the following loop, xy_mid takes the value e^{i m theta}
1002  for (int imode=1 ; imode < n_rz_azimuthal_modes ; imode++) {
1003  // The factor 2 comes from the normalization of the modes
1004  const Complex djz_cmplx = 2._rt * sdzk * xy_mid;
1005  amrex::Gpu::Atomic::AddNoRet( &Jz_arr(lo.x+i_new-1+i, lo.y+k_new-1+k, 0, 2*imode-1), djz_cmplx.real());
1006  amrex::Gpu::Atomic::AddNoRet( &Jz_arr(lo.x+i_new-1+i, lo.y+k_new-1+k, 0, 2*imode), djz_cmplx.imag());
1007  xy_mid = xy_mid*xy_mid0;
1008  }
1009 #endif
1010  }
1011  }
1012 #elif defined(WARPX_DIM_1D_Z)
1013 
1014  for (int k=dkl; k<=depos_order+2-dku; k++) {
1015  amrex::Real const sdxi = wq*vx*invvol*0.5_rt*(sz_old[k] + sz_new[k]);
1016  amrex::Gpu::Atomic::AddNoRet( &Jx_arr(lo.x+k_new-1+k, 0, 0, 0), sdxi);
1017  }
1018  for (int k=dkl; k<=depos_order+2-dku; k++) {
1019  amrex::Real const sdyj = wq*vy*invvol*0.5_rt*(sz_old[k] + sz_new[k]);
1020  amrex::Gpu::Atomic::AddNoRet( &Jy_arr(lo.x+k_new-1+k, 0, 0, 0), sdyj);
1021  }
1022  amrex::Real sdzk = 0._rt;
1023  for (int k=dkl; k<=depos_order+1-dku; k++) {
1024  sdzk += wqz*(sz_old[k] - sz_new[k]);
1025  amrex::Gpu::Atomic::AddNoRet( &Jz_arr(lo.x+k_new-1+k, 0, 0, 0), sdzk);
1026  }
1027 #endif
1028  }
1029  );
1030 }
1031 
1056 template <int depos_order>
1057 void doChargeConservingDepositionShapeNImplicit (const amrex::ParticleReal * const xp_n,
1058  const amrex::ParticleReal * const yp_n,
1059  const amrex::ParticleReal * const zp_n,
1060  const GetParticlePosition<PIdx>& GetPosition,
1061  const amrex::ParticleReal * const wp,
1062  [[maybe_unused]]const amrex::ParticleReal * const uxp_n,
1063  [[maybe_unused]]const amrex::ParticleReal * const uyp_n,
1064  [[maybe_unused]]const amrex::ParticleReal * const uzp_n,
1065  [[maybe_unused]]const amrex::ParticleReal * const uxp_nph,
1066  [[maybe_unused]]const amrex::ParticleReal * const uyp_nph,
1067  [[maybe_unused]]const amrex::ParticleReal * const uzp_nph,
1068  const int * const ion_lev,
1069  const amrex::Array4<amrex::Real>& Jx_arr,
1070  const amrex::Array4<amrex::Real>& Jy_arr,
1071  const amrex::Array4<amrex::Real>& Jz_arr,
1072  const long np_to_deposit,
1073  const amrex::Real dt,
1074  const std::array<amrex::Real, 3>& dx,
1075  const std::array<amrex::Real, 3> xyzmin,
1076  const amrex::Dim3 lo,
1077  const amrex::Real q,
1078  const int n_rz_azimuthal_modes)
1079 {
1080  using namespace amrex;
1081 #if !defined(WARPX_DIM_RZ)
1082  ignore_unused(n_rz_azimuthal_modes);
1083 #endif
1084 
1085  // Whether ion_lev is a null pointer (do_ionization=0) or a real pointer
1086  // (do_ionization=1)
1087  bool const do_ionization = ion_lev;
1088 #if !defined(WARPX_DIM_1D_Z)
1089  Real const dxi = 1.0_rt / dx[0];
1090 #endif
1091 #if !defined(WARPX_DIM_1D_Z)
1092  Real const xmin = xyzmin[0];
1093 #endif
1094 #if defined(WARPX_DIM_3D)
1095  Real const dyi = 1.0_rt / dx[1];
1096  Real const ymin = xyzmin[1];
1097 #endif
1098  Real const dzi = 1.0_rt / dx[2];
1099  Real const zmin = xyzmin[2];
1100 
1101 #if defined(WARPX_DIM_3D)
1102  Real const invdtdx = 1.0_rt / (dt*dx[1]*dx[2]);
1103  Real const invdtdy = 1.0_rt / (dt*dx[0]*dx[2]);
1104  Real const invdtdz = 1.0_rt / (dt*dx[0]*dx[1]);
1105 #elif defined(WARPX_DIM_XZ) || defined(WARPX_DIM_RZ)
1106  Real const invdtdx = 1.0_rt / (dt*dx[2]);
1107  Real const invdtdz = 1.0_rt / (dt*dx[0]);
1108  Real const invvol = 1.0_rt / (dx[0]*dx[2]);
1109 #elif defined(WARPX_DIM_1D_Z)
1110  Real const invdtdz = 1.0_rt / (dt*dx[0]);
1111  Real const invvol = 1.0_rt / (dx[2]);
1112 #endif
1113 
1114 #if defined(WARPX_DIM_RZ)
1115  Complex const I = Complex{0._rt, 1._rt};
1116 #endif
1117 
1118 #if !defined(WARPX_DIM_1D_Z)
1119  Real constexpr one_third = 1.0_rt / 3.0_rt;
1120  Real constexpr one_sixth = 1.0_rt / 6.0_rt;
1121 #endif
1122 
1123  // Loop over particles and deposit into Jx_arr, Jy_arr and Jz_arr
1125  np_to_deposit,
1126  [=] AMREX_GPU_DEVICE (long const ip) {
1127 #if !defined(WARPX_DIM_3D)
1128  constexpr amrex::ParticleReal inv_c2 = 1._prt/(PhysConst::c*PhysConst::c);
1129 
1130  // Compute inverse Lorentz factor, the average of gamma at time levels n and n+1
1131  // The uxp,uyp,uzp are the velocities at time level n+1/2
1132  const amrex::ParticleReal uxp_np1 = 2._prt*uxp_nph[ip] - uxp_n[ip];
1133  const amrex::ParticleReal uyp_np1 = 2._prt*uyp_nph[ip] - uyp_n[ip];
1134  const amrex::ParticleReal uzp_np1 = 2._prt*uzp_nph[ip] - uzp_n[ip];
1135  const amrex::ParticleReal gamma_n = std::sqrt(1._prt + (uxp_n[ip]*uxp_n[ip] + uyp_n[ip]*uyp_n[ip] + uzp_n[ip]*uzp_n[ip])*inv_c2);
1136  const amrex::ParticleReal gamma_np1 = std::sqrt(1._prt + (uxp_np1*uxp_np1 + uyp_np1*uyp_np1 + uzp_np1*uzp_np1)*inv_c2);
1137  const amrex::ParticleReal gaminv = 2.0_prt/(gamma_n + gamma_np1);
1138 #endif
1139 
1140  // wqx, wqy wqz are particle current in each direction
1141  Real wq = q*wp[ip];
1142  if (do_ionization){
1143  wq *= ion_lev[ip];
1144  }
1145 
1146  ParticleReal xp_nph, yp_nph, zp_nph;
1147  GetPosition(ip, xp_nph, yp_nph, zp_nph);
1148 
1149 #if !defined(WARPX_DIM_1D_Z)
1150  ParticleReal const xp_np1 = 2._prt*xp_nph - xp_n[ip];
1151 #else
1152  ignore_unused(xp_n);
1153 #endif
1154 #if defined(WARPX_DIM_3D) || defined(WARPX_DIM_RZ)
1155  ParticleReal const yp_np1 = 2._prt*yp_nph - yp_n[ip];
1156 #else
1157  ignore_unused(yp_n);
1158 #endif
1159  ParticleReal const zp_np1 = 2._prt*zp_nph - zp_n[ip];
1160 
1161 #if !defined(WARPX_DIM_1D_Z)
1162  amrex::Real const wqx = wq*invdtdx;
1163 #endif
1164 #if defined(WARPX_DIM_3D)
1165  amrex::Real const wqy = wq*invdtdy;
1166 #endif
1167  amrex::Real const wqz = wq*invdtdz;
1168 
1169  // computes current and old position in grid units
1170 #if defined(WARPX_DIM_RZ)
1171  amrex::Real const xp_new = xp_np1;
1172  amrex::Real const yp_new = yp_np1;
1173  amrex::Real const xp_mid = xp_nph;
1174  amrex::Real const yp_mid = yp_nph;
1175  amrex::Real const xp_old = xp_n[ip];
1176  amrex::Real const yp_old = yp_n[ip];
1177  amrex::Real const rp_new = std::sqrt(xp_new*xp_new + yp_new*yp_new);
1178  amrex::Real const rp_old = std::sqrt(xp_old*xp_old + yp_old*yp_old);
1179  amrex::Real const rp_mid = (rp_new + rp_old)/2._rt;
1180  amrex::Real costheta_new, sintheta_new;
1181  if (rp_new > 0._rt) {
1182  costheta_new = xp_new/rp_new;
1183  sintheta_new = yp_new/rp_new;
1184  } else {
1185  costheta_new = 1._rt;
1186  sintheta_new = 0._rt;
1187  }
1188  amrex::Real costheta_mid, sintheta_mid;
1189  if (rp_mid > 0._rt) {
1190  costheta_mid = xp_mid/rp_mid;
1191  sintheta_mid = yp_mid/rp_mid;
1192  } else {
1193  costheta_mid = 1._rt;
1194  sintheta_mid = 0._rt;
1195  }
1196  amrex::Real costheta_old, sintheta_old;
1197  if (rp_old > 0._rt) {
1198  costheta_old = xp_old/rp_old;
1199  sintheta_old = yp_old/rp_old;
1200  } else {
1201  costheta_old = 1._rt;
1202  sintheta_old = 0._rt;
1203  }
1204  const Complex xy_new0 = Complex{costheta_new, sintheta_new};
1205  const Complex xy_mid0 = Complex{costheta_mid, sintheta_mid};
1206  const Complex xy_old0 = Complex{costheta_old, sintheta_old};
1207  // Keep these double to avoid bug in single precision
1208  double const x_new = (rp_new - xmin)*dxi;
1209  double const x_old = (rp_old - xmin)*dxi;
1210 #else
1211 #if !defined(WARPX_DIM_1D_Z)
1212  // Keep these double to avoid bug in single precision
1213  double const x_new = (xp_np1 - xmin)*dxi;
1214  double const x_old = (xp_n[ip] - xmin)*dxi;
1215 #endif
1216 #endif
1217 #if defined(WARPX_DIM_3D)
1218  // Keep these double to avoid bug in single precision
1219  double const y_new = (yp_np1 - ymin)*dyi;
1220  double const y_old = (yp_n[ip] - ymin)*dyi;
1221 #endif
1222  // Keep these double to avoid bug in single precision
1223  double const z_new = (zp_np1 - zmin)*dzi;
1224  double const z_old = (zp_n[ip] - zmin)*dzi;
1225 
1226 #if defined(WARPX_DIM_RZ)
1227  amrex::Real const vy = (-uxp_nph[ip]*sintheta_mid + uyp_nph[ip]*costheta_mid)*gaminv;
1228 #elif defined(WARPX_DIM_XZ)
1229  amrex::Real const vy = uyp_nph[ip]*gaminv;
1230 #elif defined(WARPX_DIM_1D_Z)
1231  amrex::Real const vx = uxp_nph[ip]*gaminv;
1232  amrex::Real const vy = uyp_nph[ip]*gaminv;
1233 #endif
1234 
1235  // Shape factor arrays
1236  // Note that there are extra values above and below
1237  // to possibly hold the factor for the old particle
1238  // which can be at a different grid location.
1239  // Keep these double to avoid bug in single precision
1240 #if !defined(WARPX_DIM_1D_Z)
1241  double sx_new[depos_order + 3] = {0.};
1242  double sx_old[depos_order + 3] = {0.};
1243 #endif
1244 #if defined(WARPX_DIM_3D)
1245  // Keep these double to avoid bug in single precision
1246  double sy_new[depos_order + 3] = {0.};
1247  double sy_old[depos_order + 3] = {0.};
1248 #endif
1249  // Keep these double to avoid bug in single precision
1250  double sz_new[depos_order + 3] = {0.};
1251  double sz_old[depos_order + 3] = {0.};
1252 
1253  // --- Compute shape factors
1254  // Compute shape factors for position as they are now and at old positions
1255  // [ijk]_new: leftmost grid point that the particle touches
1256  const Compute_shape_factor< depos_order > compute_shape_factor;
1257  const Compute_shifted_shape_factor< depos_order > compute_shifted_shape_factor;
1258 
1259 #if !defined(WARPX_DIM_1D_Z)
1260  const int i_new = compute_shape_factor(sx_new+1, x_new);
1261  const int i_old = compute_shifted_shape_factor(sx_old, x_old, i_new);
1262 #endif
1263 #if defined(WARPX_DIM_3D)
1264  const int j_new = compute_shape_factor(sy_new+1, y_new);
1265  const int j_old = compute_shifted_shape_factor(sy_old, y_old, j_new);
1266 #endif
1267  const int k_new = compute_shape_factor(sz_new+1, z_new);
1268  const int k_old = compute_shifted_shape_factor(sz_old, z_old, k_new);
1269 
1270  // computes min/max positions of current contributions
1271 #if !defined(WARPX_DIM_1D_Z)
1272  int dil = 1, diu = 1;
1273  if (i_old < i_new) { dil = 0; }
1274  if (i_old > i_new) { diu = 0; }
1275 #endif
1276 #if defined(WARPX_DIM_3D)
1277  int djl = 1, dju = 1;
1278  if (j_old < j_new) { djl = 0; }
1279  if (j_old > j_new) { dju = 0; }
1280 #endif
1281  int dkl = 1, dku = 1;
1282  if (k_old < k_new) { dkl = 0; }
1283  if (k_old > k_new) { dku = 0; }
1284 
1285 #if defined(WARPX_DIM_3D)
1286 
1287  for (int k=dkl; k<=depos_order+2-dku; k++) {
1288  for (int j=djl; j<=depos_order+2-dju; j++) {
1289  amrex::Real sdxi = 0._rt;
1290  for (int i=dil; i<=depos_order+1-diu; i++) {
1291  sdxi += wqx*(sx_old[i] - sx_new[i])*(
1292  one_third*(sy_new[j]*sz_new[k] + sy_old[j]*sz_old[k])
1293  +one_sixth*(sy_new[j]*sz_old[k] + sy_old[j]*sz_new[k]));
1294  amrex::Gpu::Atomic::AddNoRet( &Jx_arr(lo.x+i_new-1+i, lo.y+j_new-1+j, lo.z+k_new-1+k), sdxi);
1295  }
1296  }
1297  }
1298  for (int k=dkl; k<=depos_order+2-dku; k++) {
1299  for (int i=dil; i<=depos_order+2-diu; i++) {
1300  amrex::Real sdyj = 0._rt;
1301  for (int j=djl; j<=depos_order+1-dju; j++) {
1302  sdyj += wqy*(sy_old[j] - sy_new[j])*(
1303  one_third*(sx_new[i]*sz_new[k] + sx_old[i]*sz_old[k])
1304  +one_sixth*(sx_new[i]*sz_old[k] + sx_old[i]*sz_new[k]));
1305  amrex::Gpu::Atomic::AddNoRet( &Jy_arr(lo.x+i_new-1+i, lo.y+j_new-1+j, lo.z+k_new-1+k), sdyj);
1306  }
1307  }
1308  }
1309  for (int j=djl; j<=depos_order+2-dju; j++) {
1310  for (int i=dil; i<=depos_order+2-diu; i++) {
1311  amrex::Real sdzk = 0._rt;
1312  for (int k=dkl; k<=depos_order+1-dku; k++) {
1313  sdzk += wqz*(sz_old[k] - sz_new[k])*(
1314  one_third*(sx_new[i]*sy_new[j] + sx_old[i]*sy_old[j])
1315  +one_sixth*(sx_new[i]*sy_old[j] + sx_old[i]*sy_new[j]));
1316  amrex::Gpu::Atomic::AddNoRet( &Jz_arr(lo.x+i_new-1+i, lo.y+j_new-1+j, lo.z+k_new-1+k), sdzk);
1317  }
1318  }
1319  }
1320 
1321 #elif defined(WARPX_DIM_XZ) || defined(WARPX_DIM_RZ)
1322 
1323  for (int k=dkl; k<=depos_order+2-dku; k++) {
1324  amrex::Real sdxi = 0._rt;
1325  for (int i=dil; i<=depos_order+1-diu; i++) {
1326  sdxi += wqx*(sx_old[i] - sx_new[i])*0.5_rt*(sz_new[k] + sz_old[k]);
1327  amrex::Gpu::Atomic::AddNoRet( &Jx_arr(lo.x+i_new-1+i, lo.y+k_new-1+k, 0, 0), sdxi);
1328 #if defined(WARPX_DIM_RZ)
1329  Complex xy_mid = xy_mid0; // Throughout the following loop, xy_mid takes the value e^{i m theta}
1330  for (int imode=1 ; imode < n_rz_azimuthal_modes ; imode++) {
1331  // The factor 2 comes from the normalization of the modes
1332  const Complex djr_cmplx = 2._rt *sdxi*xy_mid;
1333  amrex::Gpu::Atomic::AddNoRet( &Jx_arr(lo.x+i_new-1+i, lo.y+k_new-1+k, 0, 2*imode-1), djr_cmplx.real());
1334  amrex::Gpu::Atomic::AddNoRet( &Jx_arr(lo.x+i_new-1+i, lo.y+k_new-1+k, 0, 2*imode), djr_cmplx.imag());
1335  xy_mid = xy_mid*xy_mid0;
1336  }
1337 #endif
1338  }
1339  }
1340  for (int k=dkl; k<=depos_order+2-dku; k++) {
1341  for (int i=dil; i<=depos_order+2-diu; i++) {
1342  Real const sdyj = wq*vy*invvol*(
1343  one_third*(sx_new[i]*sz_new[k] + sx_old[i]*sz_old[k])
1344  +one_sixth*(sx_new[i]*sz_old[k] + sx_old[i]*sz_new[k]));
1345  amrex::Gpu::Atomic::AddNoRet( &Jy_arr(lo.x+i_new-1+i, lo.y+k_new-1+k, 0, 0), sdyj);
1346 #if defined(WARPX_DIM_RZ)
1347  Complex xy_new = xy_new0;
1348  Complex xy_mid = xy_mid0;
1349  Complex xy_old = xy_old0;
1350  // Throughout the following loop, xy_ takes the value e^{i m theta_}
1351  for (int imode=1 ; imode < n_rz_azimuthal_modes ; imode++) {
1352  // The factor 2 comes from the normalization of the modes
1353  // The minus sign comes from the different convention with respect to Davidson et al.
1354  const Complex djt_cmplx = -2._rt * I*(i_new-1 + i + xmin*dxi)*wq*invdtdx/(amrex::Real)imode
1355  *(Complex(sx_new[i]*sz_new[k], 0._rt)*(xy_new - xy_mid)
1356  + Complex(sx_old[i]*sz_old[k], 0._rt)*(xy_mid - xy_old));
1357  amrex::Gpu::Atomic::AddNoRet( &Jy_arr(lo.x+i_new-1+i, lo.y+k_new-1+k, 0, 2*imode-1), djt_cmplx.real());
1358  amrex::Gpu::Atomic::AddNoRet( &Jy_arr(lo.x+i_new-1+i, lo.y+k_new-1+k, 0, 2*imode), djt_cmplx.imag());
1359  xy_new = xy_new*xy_new0;
1360  xy_mid = xy_mid*xy_mid0;
1361  xy_old = xy_old*xy_old0;
1362  }
1363 #endif
1364  }
1365  }
1366  for (int i=dil; i<=depos_order+2-diu; i++) {
1367  Real sdzk = 0._rt;
1368  for (int k=dkl; k<=depos_order+1-dku; k++) {
1369  sdzk += wqz*(sz_old[k] - sz_new[k])*0.5_rt*(sx_new[i] + sx_old[i]);
1370  amrex::Gpu::Atomic::AddNoRet( &Jz_arr(lo.x+i_new-1+i, lo.y+k_new-1+k, 0, 0), sdzk);
1371 #if defined(WARPX_DIM_RZ)
1372  Complex xy_mid = xy_mid0; // Throughout the following loop, xy_mid takes the value e^{i m theta}
1373  for (int imode=1 ; imode < n_rz_azimuthal_modes ; imode++) {
1374  // The factor 2 comes from the normalization of the modes
1375  const Complex djz_cmplx = 2._rt * sdzk * xy_mid;
1376  amrex::Gpu::Atomic::AddNoRet( &Jz_arr(lo.x+i_new-1+i, lo.y+k_new-1+k, 0, 2*imode-1), djz_cmplx.real());
1377  amrex::Gpu::Atomic::AddNoRet( &Jz_arr(lo.x+i_new-1+i, lo.y+k_new-1+k, 0, 2*imode), djz_cmplx.imag());
1378  xy_mid = xy_mid*xy_mid0;
1379  }
1380 #endif
1381  }
1382  }
1383 #elif defined(WARPX_DIM_1D_Z)
1384 
1385  for (int k=dkl; k<=depos_order+2-dku; k++) {
1386  amrex::Real const sdxi = wq*vx*invvol*0.5_rt*(sz_old[k] + sz_new[k]);
1387  amrex::Gpu::Atomic::AddNoRet( &Jx_arr(lo.x+k_new-1+k, 0, 0, 0), sdxi);
1388  }
1389  for (int k=dkl; k<=depos_order+2-dku; k++) {
1390  amrex::Real const sdyj = wq*vy*invvol*0.5_rt*(sz_old[k] + sz_new[k]);
1391  amrex::Gpu::Atomic::AddNoRet( &Jy_arr(lo.x+k_new-1+k, 0, 0, 0), sdyj);
1392  }
1393  amrex::Real sdzk = 0._rt;
1394  for (int k=dkl; k<=depos_order+1-dku; k++) {
1395  sdzk += wqz*(sz_old[k] - sz_new[k]);
1396  amrex::Gpu::Atomic::AddNoRet( &Jz_arr(lo.x+k_new-1+k, 0, 0, 0), sdzk);
1397  }
1398 #endif
1399  }
1400  );
1401 }
1402 
1429 template <int depos_order>
1430 void doVillasenorDepositionShapeNImplicit (const amrex::ParticleReal * const xp_n,
1431  const amrex::ParticleReal * const yp_n,
1432  const amrex::ParticleReal * const zp_n,
1433  const GetParticlePosition<PIdx>& GetPosition,
1434  const amrex::ParticleReal * const wp,
1435  [[maybe_unused]]const amrex::ParticleReal * const uxp_n,
1436  [[maybe_unused]]const amrex::ParticleReal * const uyp_n,
1437  [[maybe_unused]]const amrex::ParticleReal * const uzp_n,
1438  [[maybe_unused]]const amrex::ParticleReal * const uxp_nph,
1439  [[maybe_unused]]const amrex::ParticleReal * const uyp_nph,
1440  [[maybe_unused]]const amrex::ParticleReal * const uzp_nph,
1441  const int * const ion_lev,
1442  const amrex::Array4<amrex::Real>& Jx_arr,
1443  const amrex::Array4<amrex::Real>& Jy_arr,
1444  const amrex::Array4<amrex::Real>& Jz_arr,
1445  const long np_to_deposit,
1446  const amrex::Real dt,
1447  const std::array<amrex::Real, 3>& dx,
1448  const std::array<amrex::Real, 3> xyzmin,
1449  const amrex::Dim3 lo,
1450  const amrex::Real q,
1451  const int n_rz_azimuthal_modes)
1452 {
1453  using namespace amrex;
1454 #if !defined(WARPX_DIM_RZ)
1455  ignore_unused(n_rz_azimuthal_modes);
1456 #endif
1457 
1458  // Whether ion_lev is a null pointer (do_ionization=0) or a real pointer
1459  // (do_ionization=1)
1460  bool const do_ionization = ion_lev;
1461 #if !defined(WARPX_DIM_1D_Z)
1462  Real const dxi = 1.0_rt / dx[0];
1463 #endif
1464 #if !defined(WARPX_DIM_1D_Z)
1465  Real const xmin = xyzmin[0];
1466 #endif
1467 #if defined(WARPX_DIM_3D)
1468  Real const dyi = 1.0_rt / dx[1];
1469  Real const ymin = xyzmin[1];
1470 #endif
1471  Real const dzi = 1.0_rt / dx[2];
1472  Real const zmin = xyzmin[2];
1473 
1474 #if defined(WARPX_DIM_3D)
1475  Real const invvol = 1.0_rt / (dx[0]*dx[1]*dx[2]);
1476 #elif defined(WARPX_DIM_XZ) || defined(WARPX_DIM_RZ)
1477  Real const invvol = 1.0_rt / (dx[0]*dx[2]);
1478 #elif defined(WARPX_DIM_1D_Z)
1479  Real const invvol = 1.0_rt / (dx[2]);
1480 #endif
1481 
1482 #if !defined(WARPX_DIM_1D_Z)
1483  Real constexpr one_third = 1.0_rt / 3.0_rt;
1484  Real constexpr one_sixth = 1.0_rt / 6.0_rt;
1485 #endif
1486 
1487  // Loop over particles and deposit into Jx_arr, Jy_arr and Jz_arr
1489  np_to_deposit,
1490  [=] AMREX_GPU_DEVICE (long const ip) {
1491 
1492 #if !defined(WARPX_DIM_3D)
1493  constexpr amrex::ParticleReal inv_c2 = 1._prt/(PhysConst::c*PhysConst::c);
1494 
1495  // Compute inverse Lorentz factor, the average of gamma at time levels n and n+1
1496  // The uxp,uyp,uzp are the velocities at time level n+1/2
1497  const amrex::ParticleReal uxp_np1 = 2._prt*uxp_nph[ip] - uxp_n[ip];
1498  const amrex::ParticleReal uyp_np1 = 2._prt*uyp_nph[ip] - uyp_n[ip];
1499  const amrex::ParticleReal uzp_np1 = 2._prt*uzp_nph[ip] - uzp_n[ip];
1500  const amrex::ParticleReal gamma_n = std::sqrt(1._prt + (uxp_n[ip]*uxp_n[ip] + uyp_n[ip]*uyp_n[ip] + uzp_n[ip]*uzp_n[ip])*inv_c2);
1501  const amrex::ParticleReal gamma_np1 = std::sqrt(1._prt + (uxp_np1*uxp_np1 + uyp_np1*uyp_np1 + uzp_np1*uzp_np1)*inv_c2);
1502  const amrex::ParticleReal gaminv = 2.0_prt/(gamma_n + gamma_np1);
1503 #endif
1504 
1505  // wqx, wqy wqz are particle current in each direction
1506  Real wq = q*wp[ip];
1507  if (do_ionization){
1508  wq *= ion_lev[ip];
1509  }
1510 
1511  ParticleReal xp_nph, yp_nph, zp_nph;
1512  GetPosition(ip, xp_nph, yp_nph, zp_nph);
1513 
1514 #if !defined(WARPX_DIM_1D_Z)
1515  ParticleReal const xp_np1 = 2._prt*xp_nph - xp_n[ip];
1516 #else
1517  ignore_unused(xp_n);
1518 #endif
1519 #if defined(WARPX_DIM_3D) || defined(WARPX_DIM_RZ)
1520  ParticleReal const yp_np1 = 2._prt*yp_nph - yp_n[ip];
1521 #else
1522  ignore_unused(yp_n);
1523 #endif
1524  ParticleReal const zp_np1 = 2._prt*zp_nph - zp_n[ip];
1525 
1526  // computes current and old position in grid units
1527 #if defined(WARPX_DIM_RZ)
1528  amrex::Real const xp_new = xp_np1;
1529  amrex::Real const yp_new = yp_np1;
1530  amrex::Real const xp_mid = xp_nph;
1531  amrex::Real const yp_mid = yp_nph;
1532  amrex::Real const xp_old = xp_n[ip];
1533  amrex::Real const yp_old = yp_n[ip];
1534  amrex::Real const rp_new = std::sqrt(xp_new*xp_new + yp_new*yp_new);
1535  amrex::Real const rp_old = std::sqrt(xp_old*xp_old + yp_old*yp_old);
1536  amrex::Real const rp_mid = (rp_new + rp_old)/2._rt;
1537  amrex::Real costheta_mid, sintheta_mid;
1538  if (rp_mid > 0._rt) {
1539  costheta_mid = xp_mid/rp_mid;
1540  sintheta_mid = yp_mid/rp_mid;
1541  } else {
1542  costheta_mid = 1._rt;
1543  sintheta_mid = 0._rt;
1544  }
1545  const Complex xy_mid0 = Complex{costheta_mid, sintheta_mid};
1546 
1547  // Keep these double to avoid bug in single precision
1548  double const x_new = (rp_new - xmin)*dxi;
1549  double const x_old = (rp_old - xmin)*dxi;
1550  amrex::Real const vx = (rp_new - rp_old)/dt;
1551  amrex::Real const vy = (-uxp_nph[ip]*sintheta_mid + uyp_nph[ip]*costheta_mid)*gaminv;
1552 #elif defined(WARPX_DIM_XZ)
1553  // Keep these double to avoid bug in single precision
1554  double const x_new = (xp_np1 - xmin)*dxi;
1555  double const x_old = (xp_n[ip] - xmin)*dxi;
1556  amrex::Real const vx = (xp_np1 - xp_n[ip])/dt;
1557  amrex::Real const vy = uyp_nph[ip]*gaminv;
1558 #elif defined(WARPX_DIM_1D_Z)
1559  amrex::Real const vx = uxp_nph[ip]*gaminv;
1560  amrex::Real const vy = uyp_nph[ip]*gaminv;
1561 #elif defined(WARPX_DIM_3D)
1562  // Keep these double to avoid bug in single precision
1563  double const x_new = (xp_np1 - xmin)*dxi;
1564  double const x_old = (xp_n[ip] - xmin)*dxi;
1565  double const y_new = (yp_np1 - ymin)*dyi;
1566  double const y_old = (yp_n[ip] - ymin)*dyi;
1567  amrex::Real const vx = (xp_np1 - xp_n[ip])/dt;
1568  amrex::Real const vy = (yp_np1 - yp_n[ip])/dt;
1569 #endif
1570 
1571  // Keep these double to avoid bug in single precision
1572  double const z_new = (zp_np1 - zmin)*dzi;
1573  double const z_old = (zp_n[ip] - zmin)*dzi;
1574  amrex::Real const vz = (zp_np1 - zp_n[ip])/dt;
1575 
1576  // Define velocity kernals to deposit
1577  amrex::Real const wqx = wq*vx*invvol;
1578  amrex::Real const wqy = wq*vy*invvol;
1579  amrex::Real const wqz = wq*vz*invvol;
1580 
1581  // 1) Determine the number of segments.
1582  // 2) Loop over segments and deposit current.
1583 
1584  // cell crossings are defined at cell edges if depos_order is odd
1585  // cell crossings are defined at cell centers if depos_order is even
1586 
1587  int num_segments = 1;
1588  double shift = 0.0;
1589  if ( (depos_order % 2) == 0 ) { shift = 0.5; }
1590 
1591 #if defined(WARPX_DIM_3D)
1592 
1593  // compute cell crossings in X-direction
1594  const auto i_old = static_cast<int>(x_old-shift);
1595  const auto i_new = static_cast<int>(x_new-shift);
1596  const int cell_crossings_x = std::abs(i_new-i_old);
1597  num_segments += cell_crossings_x;
1598 
1599  // compute cell crossings in Y-direction
1600  const auto j_old = static_cast<int>(y_old-shift);
1601  const auto j_new = static_cast<int>(y_new-shift);
1602  const int cell_crossings_y = std::abs(j_new-j_old);
1603  num_segments += cell_crossings_y;
1604 
1605  // compute cell crossings in Z-direction
1606  const auto k_old = static_cast<int>(z_old-shift);
1607  const auto k_new = static_cast<int>(z_new-shift);
1608  const int cell_crossings_z = std::abs(k_new-k_old);
1609  num_segments += cell_crossings_z;
1610 
1611  // need to assert that the number of cell crossings in each direction
1612  // is within the range permitted by the number of guard cells
1613  // e.g., if (num_segments > 7) ...
1614 
1615  // compute total change in particle position and the initial cell
1616  // locations in each direction used to find the position at cell crossings.
1617  const double dxp = x_new - x_old;
1618  const double dyp = y_new - y_old;
1619  const double dzp = z_new - z_old;
1620  const auto dirX_sign = static_cast<double>(dxp < 0. ? -1. : 1.);
1621  const auto dirY_sign = static_cast<double>(dyp < 0. ? -1. : 1.);
1622  const auto dirZ_sign = static_cast<double>(dzp < 0. ? -1. : 1.);
1623  double Xcell = 0., Ycell = 0., Zcell = 0.;
1624  if (num_segments > 1) {
1625  Xcell = static_cast<double>(i_old) + shift + 0.5*(1.-dirX_sign);
1626  Ycell = static_cast<double>(j_old) + shift + 0.5*(1.-dirY_sign);
1627  Zcell = static_cast<double>(k_old) + shift + 0.5*(1.-dirZ_sign);
1628  }
1629 
1630  // loop over the number of segments and deposit
1631  const Compute_shape_factor< depos_order-1 > compute_shape_factor_cell;
1632  const Compute_shape_factor_pair< depos_order > compute_shape_factors_node;
1633  double dxp_seg, dyp_seg, dzp_seg;
1634  double x0_new, y0_new, z0_new;
1635  double x0_old = x_old;
1636  double y0_old = y_old;
1637  double z0_old = z_old;
1638 
1639  for (int ns=0; ns<num_segments; ns++) {
1640 
1641  if (ns == num_segments-1) { // final segment
1642 
1643  x0_new = x_new;
1644  y0_new = y_new;
1645  z0_new = z_new;
1646  dxp_seg = x0_new - x0_old;
1647  dyp_seg = y0_new - y0_old;
1648  dzp_seg = z0_new - z0_old;
1649 
1650  }
1651  else {
1652 
1653  x0_new = Xcell + dirX_sign;
1654  y0_new = Ycell + dirY_sign;
1655  z0_new = Zcell + dirZ_sign;
1656  dxp_seg = x0_new - x0_old;
1657  dyp_seg = y0_new - y0_old;
1658  dzp_seg = z0_new - z0_old;
1659 
1660  if ( (dyp == 0. || std::abs(dxp_seg) < std::abs(dxp/dyp*dyp_seg))
1661  && (dzp == 0. || std::abs(dxp_seg) < std::abs(dxp/dzp*dzp_seg)) ) {
1662  Xcell = x0_new;
1663  dyp_seg = dyp/dxp*dxp_seg;
1664  dzp_seg = dzp/dxp*dxp_seg;
1665  y0_new = y0_old + dyp_seg;
1666  z0_new = z0_old + dzp_seg;
1667  }
1668  else if (dzp == 0. || std::abs(dyp_seg) < std::abs(dyp/dzp*dzp_seg)) {
1669  Ycell = y0_new;
1670  dxp_seg = dxp/dyp*dyp_seg;
1671  dzp_seg = dzp/dyp*dyp_seg;
1672  x0_new = x0_old + dxp_seg;
1673  z0_new = z0_old + dzp_seg;
1674  }
1675  else {
1676  Zcell = z0_new;
1677  dxp_seg = dxp/dzp*dzp_seg;
1678  dyp_seg = dyp/dzp*dzp_seg;
1679  x0_new = x0_old + dxp_seg;
1680  y0_new = y0_old + dyp_seg;
1681  }
1682 
1683  }
1684 
1685  // compute the segment factors (each equal to dt_seg/dt for nonzero dxp, dyp, or dzp)
1686  const auto seg_factor_x = static_cast<double>(dxp == 0. ? 1. : dxp_seg/dxp);
1687  const auto seg_factor_y = static_cast<double>(dyp == 0. ? 1. : dyp_seg/dyp);
1688  const auto seg_factor_z = static_cast<double>(dzp == 0. ? 1. : dzp_seg/dzp);
1689 
1690  // compute cell-based weights using the average segment position
1691  double sx_cell[depos_order] = {0.};
1692  double sy_cell[depos_order] = {0.};
1693  double sz_cell[depos_order] = {0.};
1694  double const x0_bar = (x0_new + x0_old)/2.0;
1695  double const y0_bar = (y0_new + y0_old)/2.0;
1696  double const z0_bar = (z0_new + z0_old)/2.0;
1697  const int i0_cell = compute_shape_factor_cell( sx_cell, x0_bar-0.5 );
1698  const int j0_cell = compute_shape_factor_cell( sy_cell, y0_bar-0.5 );
1699  const int k0_cell = compute_shape_factor_cell( sz_cell, z0_bar-0.5 );
1700 
1701  if constexpr (depos_order >= 3) { // higher-order correction to the cell-based weights
1702  const Compute_shape_factor_pair<depos_order-1> compute_shape_factors_cell;
1703  double sx_old_cell[depos_order] = {0.};
1704  double sx_new_cell[depos_order] = {0.};
1705  double sy_old_cell[depos_order] = {0.};
1706  double sy_new_cell[depos_order] = {0.};
1707  double sz_old_cell[depos_order] = {0.};
1708  double sz_new_cell[depos_order] = {0.};
1709  const int i0_cell_2 = compute_shape_factors_cell( sx_old_cell, sx_new_cell, x0_old-0.5, x0_new-0.5 );
1710  const int j0_cell_2 = compute_shape_factors_cell( sy_old_cell, sy_new_cell, y0_old-0.5, y0_new-0.5 );
1711  const int k0_cell_2 = compute_shape_factors_cell( sz_old_cell, sz_new_cell, z0_old-0.5, z0_new-0.5 );
1712  ignore_unused(i0_cell_2, j0_cell_2, k0_cell_2);
1713  for (int m=0; m<depos_order; m++) {
1714  sx_cell[m] = (4.0*sx_cell[m] + sx_old_cell[m] + sx_new_cell[m])/6.0;
1715  sy_cell[m] = (4.0*sy_cell[m] + sy_old_cell[m] + sy_new_cell[m])/6.0;
1716  sz_cell[m] = (4.0*sz_cell[m] + sz_old_cell[m] + sz_new_cell[m])/6.0;
1717  }
1718  }
1719 
1720  // compute node-based weights using the old and new segment positions
1721  double sx_old_node[depos_order+1] = {0.};
1722  double sx_new_node[depos_order+1] = {0.};
1723  double sy_old_node[depos_order+1] = {0.};
1724  double sy_new_node[depos_order+1] = {0.};
1725  double sz_old_node[depos_order+1] = {0.};
1726  double sz_new_node[depos_order+1] = {0.};
1727  const int i0_node = compute_shape_factors_node( sx_old_node, sx_new_node, x0_old, x0_new );
1728  const int j0_node = compute_shape_factors_node( sy_old_node, sy_new_node, y0_old, y0_new );
1729  const int k0_node = compute_shape_factors_node( sz_old_node, sz_new_node, z0_old, z0_new );
1730 
1731  // deposit Jx for this segment
1732  amrex::Real this_Jx;
1733  for (int i=0; i<=depos_order-1; i++) {
1734  for (int j=0; j<=depos_order; j++) {
1735  for (int k=0; k<=depos_order; k++) {
1736  this_Jx = wqx*sx_cell[i]*( sy_old_node[j]*sz_old_node[k]*one_third
1737  + sy_old_node[j]*sz_new_node[k]*one_sixth
1738  + sy_new_node[j]*sz_old_node[k]*one_sixth
1739  + sy_new_node[j]*sz_new_node[k]*one_third )*seg_factor_x;
1740  amrex::Gpu::Atomic::AddNoRet( &Jx_arr(lo.x+i0_cell+i, lo.y+j0_node+j, lo.z+k0_node+k), this_Jx);
1741  }
1742  }
1743  }
1744 
1745  // deposit Jy for this segment
1746  amrex::Real this_Jy;
1747  for (int i=0; i<=depos_order; i++) {
1748  for (int j=0; j<=depos_order-1; j++) {
1749  for (int k=0; k<=depos_order; k++) {
1750  this_Jy = wqy*sy_cell[j]*( sx_old_node[i]*sz_old_node[k]*one_third
1751  + sx_old_node[i]*sz_new_node[k]*one_sixth
1752  + sx_new_node[i]*sz_old_node[k]*one_sixth
1753  + sx_new_node[i]*sz_new_node[k]*one_third )*seg_factor_y;
1754  amrex::Gpu::Atomic::AddNoRet( &Jy_arr(lo.x+i0_node+i, lo.y+j0_cell+j, lo.z+k0_node+k), this_Jy);
1755  }
1756  }
1757  }
1758 
1759  // deposit Jz for this segment
1760  amrex::Real this_Jz;
1761  for (int i=0; i<=depos_order; i++) {
1762  for (int j=0; j<=depos_order; j++) {
1763  for (int k=0; k<=depos_order-1; k++) {
1764  this_Jz = wqz*sz_cell[k]*( sx_old_node[i]*sy_old_node[j]*one_third
1765  + sx_old_node[i]*sy_new_node[j]*one_sixth
1766  + sx_new_node[i]*sy_old_node[j]*one_sixth
1767  + sx_new_node[i]*sy_new_node[j]*one_third )*seg_factor_z;
1768  amrex::Gpu::Atomic::AddNoRet( &Jz_arr(lo.x+i0_node+i, lo.y+j0_node+j, lo.z+k0_cell+k), this_Jz);
1769  }
1770  }
1771  }
1772 
1773  // update old segment values
1774  if (ns < num_segments-1) {
1775  x0_old = x0_new;
1776  y0_old = y0_new;
1777  z0_old = z0_new;
1778  }
1779 
1780  } // end loop over segments
1781 
1782 #elif defined(WARPX_DIM_XZ) || defined(WARPX_DIM_RZ)
1783 
1784  // compute cell crossings in X-direction
1785  const auto i_old = static_cast<int>(x_old-shift);
1786  const auto i_new = static_cast<int>(x_new-shift);
1787  const int cell_crossings_x = std::abs(i_new-i_old);
1788  num_segments += cell_crossings_x;
1789 
1790  // compute cell crossings in Z-direction
1791  const auto k_old = static_cast<int>(z_old-shift);
1792  const auto k_new = static_cast<int>(z_new-shift);
1793  const int cell_crossings_z = std::abs(k_new-k_old);
1794  num_segments += cell_crossings_z;
1795 
1796  // need to assert that the number of cell crossings in each direction
1797  // is within the range permitted by the number of guard cells
1798  // e.g., if (num_segments > 5) ...
1799 
1800  // compute total change in particle position and the initial cell
1801  // locations in each direction used to find the position at cell crossings.
1802  const double dxp = x_new - x_old;
1803  const double dzp = z_new - z_old;
1804  const auto dirX_sign = static_cast<double>(dxp < 0. ? -1. : 1.);
1805  const auto dirZ_sign = static_cast<double>(dzp < 0. ? -1. : 1.);
1806  double Xcell = 0., Zcell = 0.;
1807  if (num_segments > 1) {
1808  Xcell = static_cast<double>(i_old) + shift + 0.5*(1.-dirX_sign);
1809  Zcell = static_cast<double>(k_old) + shift + 0.5*(1.-dirZ_sign);
1810  }
1811 
1812  // loop over the number of segments and deposit
1813  const Compute_shape_factor< depos_order-1 > compute_shape_factor_cell;
1814  const Compute_shape_factor_pair< depos_order > compute_shape_factors_node;
1815  double dxp_seg, dzp_seg;
1816  double x0_new, z0_new;
1817  double x0_old = x_old;
1818  double z0_old = z_old;
1819 
1820  for (int ns=0; ns<num_segments; ns++) {
1821 
1822  if (ns == num_segments-1) { // final segment
1823 
1824  x0_new = x_new;
1825  z0_new = z_new;
1826  dxp_seg = x0_new - x0_old;
1827  dzp_seg = z0_new - z0_old;
1828 
1829  }
1830  else {
1831 
1832  x0_new = Xcell + dirX_sign;
1833  z0_new = Zcell + dirZ_sign;
1834  dxp_seg = x0_new - x0_old;
1835  dzp_seg = z0_new - z0_old;
1836 
1837  if (dzp == 0. || std::abs(dxp_seg) < std::abs(dxp/dzp*dzp_seg)) {
1838  Xcell = x0_new;
1839  dzp_seg = dzp/dxp*dxp_seg;
1840  z0_new = z0_old + dzp_seg;
1841  }
1842  else {
1843  Zcell = z0_new;
1844  dxp_seg = dxp/dzp*dzp_seg;
1845  x0_new = x0_old + dxp_seg;
1846  }
1847 
1848  }
1849 
1850  // compute the segment factors (each equal to dt_seg/dt for nonzero dxp, or dzp)
1851  const auto seg_factor_x = static_cast<double>(dxp == 0. ? 1. : dxp_seg/dxp);
1852  const auto seg_factor_z = static_cast<double>(dzp == 0. ? 1. : dzp_seg/dzp);
1853 
1854  // compute cell-based weights using the average segment position
1855  double sx_cell[depos_order] = {0.};
1856  double sz_cell[depos_order] = {0.};
1857  double const x0_bar = (x0_new + x0_old)/2.0;
1858  double const z0_bar = (z0_new + z0_old)/2.0;
1859  const int i0_cell = compute_shape_factor_cell( sx_cell, x0_bar-0.5 );
1860  const int k0_cell = compute_shape_factor_cell( sz_cell, z0_bar-0.5 );
1861 
1862  if constexpr (depos_order >= 3) { // higher-order correction to the cell-based weights
1863  const Compute_shape_factor_pair<depos_order-1> compute_shape_factors_cell;
1864  double sx_old_cell[depos_order] = {0.};
1865  double sx_new_cell[depos_order] = {0.};
1866  double sz_old_cell[depos_order] = {0.};
1867  double sz_new_cell[depos_order] = {0.};
1868  const int i0_cell_2 = compute_shape_factors_cell( sx_old_cell, sx_new_cell, x0_old-0.5, x0_new-0.5 );
1869  const int k0_cell_2 = compute_shape_factors_cell( sz_old_cell, sz_new_cell, z0_old-0.5, z0_new-0.5 );
1870  ignore_unused(i0_cell_2, k0_cell_2);
1871  for (int m=0; m<depos_order; m++) {
1872  sx_cell[m] = (4.0*sx_cell[m] + sx_old_cell[m] + sx_new_cell[m])/6.0;
1873  sz_cell[m] = (4.0*sz_cell[m] + sz_old_cell[m] + sz_new_cell[m])/6.0;
1874  }
1875  }
1876 
1877  // compute node-based weights using the old and new segment positions
1878  double sx_old_node[depos_order+1] = {0.};
1879  double sx_new_node[depos_order+1] = {0.};
1880  double sz_old_node[depos_order+1] = {0.};
1881  double sz_new_node[depos_order+1] = {0.};
1882  const int i0_node = compute_shape_factors_node( sx_old_node, sx_new_node, x0_old, x0_new );
1883  const int k0_node = compute_shape_factors_node( sz_old_node, sz_new_node, z0_old, z0_new );
1884 
1885  // deposit Jx for this segment
1886  amrex::Real this_Jx;
1887  for (int i=0; i<=depos_order-1; i++) {
1888  for (int k=0; k<=depos_order; k++) {
1889  this_Jx = wqx*sx_cell[i]*(sz_old_node[k] + sz_new_node[k])/2.0_rt*seg_factor_x;
1890  amrex::Gpu::Atomic::AddNoRet( &Jx_arr(lo.x+i0_cell+i, lo.y+k0_node+k, 0, 0), this_Jx);
1891 #if defined(WARPX_DIM_RZ)
1892  Complex xy_mid = xy_mid0; // Throughout the following loop, xy_mid takes the value e^{i m theta}
1893  for (int imode=1 ; imode < n_rz_azimuthal_modes ; imode++) {
1894  // The factor 2 comes from the normalization of the modes
1895  const Complex djr_cmplx = 2._rt*this_Jx*xy_mid;
1896  amrex::Gpu::Atomic::AddNoRet( &Jx_arr(lo.x+i0_cell+i, lo.y+k0_node+k, 0, 2*imode-1), djr_cmplx.real());
1897  amrex::Gpu::Atomic::AddNoRet( &Jx_arr(lo.x+i0_cell+i, lo.y+k0_node+k, 0, 2*imode), djr_cmplx.imag());
1898  xy_mid = xy_mid*xy_mid0;
1899  }
1900 #endif
1901  }
1902  }
1903 
1904  // deposit out-of-plane Jy for this segment
1905  const auto seg_factor_y = std::min(seg_factor_x,seg_factor_z);
1906  amrex::Real this_Jy;
1907  for (int i=0; i<=depos_order; i++) {
1908  for (int k=0; k<=depos_order; k++) {
1909  this_Jy = wqy*( sx_old_node[i]*sz_old_node[k]*one_third
1910  + sx_old_node[i]*sz_new_node[k]*one_sixth
1911  + sx_new_node[i]*sz_old_node[k]*one_sixth
1912  + sx_new_node[i]*sz_new_node[k]*one_third )*seg_factor_y;
1913  amrex::Gpu::Atomic::AddNoRet( &Jy_arr(lo.x+i0_node+i, lo.y+k0_node+k, 0, 0), this_Jy);
1914 #if defined(WARPX_DIM_RZ)
1915  Complex xy_mid = xy_mid0;
1916  // Throughout the following loop, xy_ takes the value e^{i m theta_}
1917  for (int imode=1 ; imode < n_rz_azimuthal_modes ; imode++) {
1918  // The factor 2 comes from the normalization of the modes
1919  const Complex djy_cmplx = 2._rt*this_Jy*xy_mid;
1920  amrex::Gpu::Atomic::AddNoRet( &Jy_arr(lo.x+i0_node+i, lo.y+k0_node+k, 0, 2*imode-1), djy_cmplx.real());
1921  amrex::Gpu::Atomic::AddNoRet( &Jy_arr(lo.x+i0_node+i, lo.y+k0_node+k, 0, 2*imode), djy_cmplx.imag());
1922  xy_mid = xy_mid*xy_mid0;
1923  }
1924 #endif
1925  }
1926  }
1927 
1928  // deposit Jz for this segment
1929  amrex::Real this_Jz;
1930  for (int i=0; i<=depos_order; i++) {
1931  for (int k=0; k<=depos_order-1; k++) {
1932  this_Jz = wqz*sz_cell[k]*(sx_old_node[i] + sx_new_node[i])/2.0_rt*seg_factor_z;
1933  amrex::Gpu::Atomic::AddNoRet( &Jz_arr(lo.x+i0_node+i, lo.y+k0_cell+k, 0, 0), this_Jz);
1934 #if defined(WARPX_DIM_RZ)
1935  Complex xy_mid = xy_mid0; // Throughout the following loop, xy_mid takes the value e^{i m theta}
1936  for (int imode=1 ; imode < n_rz_azimuthal_modes ; imode++) {
1937  // The factor 2 comes from the normalization of the modes
1938  const Complex djz_cmplx = 2._rt*this_Jz*xy_mid;
1939  amrex::Gpu::Atomic::AddNoRet( &Jz_arr(lo.x+i0_node+i, lo.y+k0_cell+k, 0, 2*imode-1), djz_cmplx.real());
1940  amrex::Gpu::Atomic::AddNoRet( &Jz_arr(lo.x+i0_node+i, lo.y+k0_cell+k, 0, 2*imode), djz_cmplx.imag());
1941  xy_mid = xy_mid*xy_mid0;
1942  }
1943 #endif
1944  }
1945  }
1946 
1947  // update old segment values
1948  if (ns < num_segments-1) {
1949  x0_old = x0_new;
1950  z0_old = z0_new;
1951  }
1952 
1953  } // end loop over segments
1954 
1955 #elif defined(WARPX_DIM_1D_Z)
1956 
1957  // compute cell crossings in Z-direction
1958  const auto k_old = static_cast<int>(z_old-shift);
1959  const auto k_new = static_cast<int>(z_new-shift);
1960  const int cell_crossings_z = std::abs(k_new-k_old);
1961  num_segments += cell_crossings_z;
1962 
1963  // need to assert that the number of cell crossings in each direction
1964  // is within the range permitted by the number of guard cells
1965  // e.g., if (num_segments > 3) ...
1966 
1967  // compute dzp and the initial cell location used to find the cell crossings.
1968  double const dzp = z_new - z_old;
1969  const auto dirZ_sign = static_cast<double>(dzp < 0. ? -1. : 1.);
1970  double Zcell = static_cast<double>(k_old) + shift + 0.5*(1.-dirZ_sign);
1971 
1972  // loop over the number of segments and deposit
1973  const Compute_shape_factor< depos_order-1 > compute_shape_factor_cell;
1974  const Compute_shape_factor_pair< depos_order > compute_shape_factors_node;
1975  double dzp_seg;
1976  double z0_new;
1977  double z0_old = z_old;
1978 
1979  for (int ns=0; ns<num_segments; ns++) {
1980 
1981  if (ns == num_segments-1) { // final segment
1982  z0_new = z_new;
1983  dzp_seg = z0_new - z0_old;
1984  }
1985  else {
1986  Zcell = Zcell + dirZ_sign;
1987  z0_new = Zcell;
1988  dzp_seg = z0_new - z0_old;
1989  }
1990 
1991  // compute the segment factor (equal to dt_seg/dt for nonzero dzp)
1992  const auto seg_factor = static_cast<double>(dzp == 0. ? 1. : dzp_seg/dzp);
1993 
1994  // compute cell-based weights using the average segment position
1995  double sz_cell[depos_order] = {0.};
1996  double const z0_bar = (z0_new + z0_old)/2.0;
1997  const int k0_cell = compute_shape_factor_cell( sz_cell, z0_bar-0.5 );
1998 
1999  if constexpr (depos_order >= 3) { // higher-order correction to the cell-based weights
2000  const Compute_shape_factor_pair<depos_order-1> compute_shape_factors_cell;
2001  double sz_old_cell[depos_order] = {0.};
2002  double sz_new_cell[depos_order] = {0.};
2003  const int k0_cell_2 = compute_shape_factors_cell( sz_old_cell, sz_new_cell, z0_old-0.5, z0_new-0.5 );
2004  ignore_unused(k0_cell_2);
2005  for (int m=0; m<depos_order; m++) {
2006  sz_cell[m] = (4.0*sz_cell[m] + sz_old_cell[m] + sz_new_cell[m])/6.0;
2007  }
2008  }
2009 
2010  // compute node-based weights using the old and new segment positions
2011  double sz_old_node[depos_order+1] = {0.};
2012  double sz_new_node[depos_order+1] = {0.};
2013  const int k0_node = compute_shape_factors_node( sz_old_node, sz_new_node, z0_old, z0_new );
2014 
2015  // deposit out-of-plane Jx and Jy for this segment
2016  for (int k=0; k<=depos_order; k++) {
2017  const amrex::Real weight = 0.5_rt*(sz_old_node[k] + sz_new_node[k])*seg_factor;
2018  amrex::Gpu::Atomic::AddNoRet( &Jx_arr(lo.x+k0_node+k, 0, 0), wqx*weight);
2019  amrex::Gpu::Atomic::AddNoRet( &Jy_arr(lo.x+k0_node+k, 0, 0), wqy*weight);
2020  }
2021 
2022  // deposit Jz for this segment
2023  for (int k=0; k<=depos_order-1; k++) {
2024  const amrex::Real this_Jz = wqz*sz_cell[k]*seg_factor;
2025  amrex::Gpu::Atomic::AddNoRet( &Jz_arr(lo.x+k0_cell+k, 0, 0), this_Jz);
2026  }
2027 
2028  // update old segment values
2029  if (ns < num_segments-1) {
2030  z0_old = z0_new;
2031  }
2032 
2033  }
2034 
2035 #endif
2036  }
2037  );
2038 }
2039 
2066 template <int depos_order>
2068  const amrex::ParticleReal* const wp,
2069  const amrex::ParticleReal* const uxp,
2070  const amrex::ParticleReal* const uyp,
2071  const amrex::ParticleReal* const uzp,
2072  const int* const ion_lev,
2073  amrex::FArrayBox& Dx_fab,
2074  amrex::FArrayBox& Dy_fab,
2075  amrex::FArrayBox& Dz_fab,
2076  long np_to_deposit,
2077  amrex::Real dt,
2078  amrex::Real relative_time,
2079  const std::array<amrex::Real,3>& dx,
2080  const std::array<amrex::Real,3>& xyzmin,
2081  amrex::Dim3 lo,
2082  amrex::Real q,
2083  int n_rz_azimuthal_modes)
2084 {
2085  using namespace amrex::literals;
2086 
2087 #if defined(WARPX_DIM_RZ)
2088  amrex::ignore_unused(GetPosition,
2089  wp, uxp, uyp, uzp, ion_lev, Dx_fab, Dy_fab, Dz_fab,
2090  np_to_deposit, dt, relative_time, dx, xyzmin, lo, q, n_rz_azimuthal_modes);
2091  WARPX_ABORT_WITH_MESSAGE("Vay deposition not implemented in RZ geometry");
2092 #endif
2093 
2094 #if defined(WARPX_DIM_1D_Z)
2095  amrex::ignore_unused(GetPosition,
2096  wp, uxp, uyp, uzp, ion_lev, Dx_fab, Dy_fab, Dz_fab,
2097  np_to_deposit, dt, relative_time, dx, xyzmin, lo, q, n_rz_azimuthal_modes);
2098  WARPX_ABORT_WITH_MESSAGE("Vay deposition not implemented in cartesian 1D geometry");
2099 #endif
2100 
2101 #if !(defined WARPX_DIM_RZ || defined WARPX_DIM_1D_Z)
2102  amrex::ignore_unused(n_rz_azimuthal_modes);
2103 
2104  // If ion_lev is a null pointer, then do_ionization=0, else do_ionization=1
2105  const bool do_ionization = ion_lev;
2106 
2107  // Inverse cell volume in each direction
2108  const amrex::Real dxi = 1._rt / dx[0];
2109  const amrex::Real dzi = 1._rt / dx[2];
2110 #if defined(WARPX_DIM_3D)
2111  const amrex::Real dyi = 1._rt / dx[1];
2112 #endif
2113 
2114  // Inverse of time step
2115  const amrex::Real invdt = 1._rt / dt;
2116 
2117  // Total inverse cell volume
2118 #if defined(WARPX_DIM_XZ)
2119  const amrex::Real invvol = dxi * dzi;
2120 #elif defined(WARPX_DIM_3D)
2121  const amrex::Real invvol = dxi * dyi * dzi;
2122 #endif
2123 
2124  // Lower bound of physical domain in each direction
2125  const amrex::Real xmin = xyzmin[0];
2126  const amrex::Real zmin = xyzmin[2];
2127 #if defined(WARPX_DIM_3D)
2128  const amrex::Real ymin = xyzmin[1];
2129 #endif
2130 
2131  // Allocate temporary arrays
2132 #if defined(WARPX_DIM_3D)
2133  AMREX_ALWAYS_ASSERT(Dx_fab.box() == Dy_fab.box() && Dx_fab.box() == Dz_fab.box());
2134  amrex::FArrayBox temp_fab{Dx_fab.box(), 4};
2135 #elif defined(WARPX_DIM_XZ)
2136  AMREX_ALWAYS_ASSERT(Dx_fab.box() == Dz_fab.box());
2137  amrex::FArrayBox temp_fab{Dx_fab.box(), 2};
2138 #endif
2139  temp_fab.setVal<amrex::RunOn::Device>(0._rt);
2140  amrex::Array4<amrex::Real> const& temp_arr = temp_fab.array();
2141 
2142  // Inverse of light speed squared
2143  const amrex::Real invcsq = 1._rt / (PhysConst::c * PhysConst::c);
2144 
2145  // Arrays where D will be stored
2146  amrex::Array4<amrex::Real> const& Dx_arr = Dx_fab.array();
2147  amrex::Array4<amrex::Real> const& Dy_arr = Dy_fab.array();
2148  amrex::Array4<amrex::Real> const& Dz_arr = Dz_fab.array();
2149 
2150  // Loop over particles and deposit (Dx,Dy,Dz) into Dx_fab, Dy_fab and Dz_fab
2151  amrex::ParallelFor(np_to_deposit, [=] AMREX_GPU_DEVICE (long ip)
2152  {
2153  // Inverse of Lorentz factor gamma
2154  const amrex::Real invgam = 1._rt / std::sqrt(1._rt + uxp[ip] * uxp[ip] * invcsq
2155  + uyp[ip] * uyp[ip] * invcsq
2156  + uzp[ip] * uzp[ip] * invcsq);
2157  // Product of particle charges and weights
2158  amrex::Real wq = q * wp[ip];
2159  if (do_ionization) { wq *= ion_lev[ip]; }
2160 
2161  // Current particle positions (in physical units)
2162  amrex::ParticleReal xp, yp, zp;
2163  GetPosition(ip, xp, yp, zp);
2164 
2165  // Particle velocities
2166  const amrex::Real vx = uxp[ip] * invgam;
2167  const amrex::Real vy = uyp[ip] * invgam;
2168  const amrex::Real vz = uzp[ip] * invgam;
2169 
2170  // Modify the particle position to match the time of the deposition
2171  xp += relative_time * vx;
2172  yp += relative_time * vy;
2173  zp += relative_time * vz;
2174 
2175  // Particle current densities
2176 #if defined(WARPX_DIM_XZ)
2177  const amrex::Real wqy = wq * vy * invvol;
2178 #endif
2179 
2180  // Current and old particle positions in grid units
2181  // Keep these double to avoid bug in single precision.
2182  double const x_new = (xp - xmin + 0.5_rt*dt*vx) * dxi;
2183  double const x_old = (xp - xmin - 0.5_rt*dt*vx) * dxi;
2184 #if defined(WARPX_DIM_3D)
2185  // Keep these double to avoid bug in single precision.
2186  double const y_new = (yp - ymin + 0.5_rt*dt*vy) * dyi;
2187  double const y_old = (yp - ymin - 0.5_rt*dt*vy) * dyi;
2188 #endif
2189  // Keep these double to avoid bug in single precision.
2190  double const z_new = (zp - zmin + 0.5_rt*dt*vz) * dzi;
2191  double const z_old = (zp - zmin - 0.5_rt*dt*vz) * dzi;
2192 
2193  // Shape factor arrays for current and old positions (nodal)
2194  // Keep these double to avoid bug in single precision.
2195  double sx_new[depos_order+1] = {0.};
2196  double sx_old[depos_order+1] = {0.};
2197 #if defined(WARPX_DIM_3D)
2198  // Keep these double to avoid bug in single precision.
2199  double sy_new[depos_order+1] = {0.};
2200  double sy_old[depos_order+1] = {0.};
2201 #endif
2202  // Keep these double to avoid bug in single precision.
2203  double sz_new[depos_order+1] = {0.};
2204  double sz_old[depos_order+1] = {0.};
2205 
2206  // Compute shape factors for current positions
2207 
2208  // i_new leftmost grid point in x that the particle touches
2209  // sx_new shape factor along x for the centering of each current
2210  Compute_shape_factor< depos_order > const compute_shape_factor;
2211  const int i_new = compute_shape_factor(sx_new, x_new);
2212 #if defined(WARPX_DIM_3D)
2213  // j_new leftmost grid point in y that the particle touches
2214  // sy_new shape factor along y for the centering of each current
2215  const int j_new = compute_shape_factor(sy_new, y_new);
2216 #endif
2217  // k_new leftmost grid point in z that the particle touches
2218  // sz_new shape factor along z for the centering of each current
2219  const int k_new = compute_shape_factor(sz_new, z_new);
2220 
2221  // Compute shape factors for old positions
2222 
2223  // i_old leftmost grid point in x that the particle touches
2224  // sx_old shape factor along x for the centering of each current
2225  const int i_old = compute_shape_factor(sx_old, x_old);
2226 #if defined(WARPX_DIM_3D)
2227  // j_old leftmost grid point in y that the particle touches
2228  // sy_old shape factor along y for the centering of each current
2229  const int j_old = compute_shape_factor(sy_old, y_old);
2230 #endif
2231  // k_old leftmost grid point in z that the particle touches
2232  // sz_old shape factor along z for the centering of each current
2233  const int k_old = compute_shape_factor(sz_old, z_old);
2234 
2235  // Deposit current into Dx_arr, Dy_arr and Dz_arr
2236 #if defined(WARPX_DIM_XZ)
2237 
2238  for (int k=0; k<=depos_order; k++) {
2239  for (int i=0; i<=depos_order; i++) {
2240 
2241  // Re-casting sx_new and sz_new from double to amrex::Real so that
2242  // Atomic::Add has consistent types in its argument
2243  auto const sxn_szn = static_cast<amrex::Real>(sx_new[i] * sz_new[k]);
2244  auto const sxo_szn = static_cast<amrex::Real>(sx_old[i] * sz_new[k]);
2245  auto const sxn_szo = static_cast<amrex::Real>(sx_new[i] * sz_old[k]);
2246  auto const sxo_szo = static_cast<amrex::Real>(sx_old[i] * sz_old[k]);
2247 
2248  if (i_new == i_old && k_new == k_old) {
2249  // temp arrays for Dx and Dz
2250  amrex::Gpu::Atomic::AddNoRet(&temp_arr(lo.x + i_new + i, lo.y + k_new + k, 0, 0),
2251  wq * invvol * invdt * (sxn_szn - sxo_szo));
2252 
2253  amrex::Gpu::Atomic::AddNoRet(&temp_arr(lo.x + i_new + i, lo.y + k_new + k, 0, 1),
2254  wq * invvol * invdt * (sxn_szo - sxo_szn));
2255 
2256  // Dy
2257  amrex::Gpu::Atomic::AddNoRet(&Dy_arr(lo.x + i_new + i, lo.y + k_new + k, 0, 0),
2258  wqy * 0.25_rt * (sxn_szn + sxn_szo + sxo_szn + sxo_szo));
2259  } else {
2260  // temp arrays for Dx and Dz
2261  amrex::Gpu::Atomic::AddNoRet(&temp_arr(lo.x + i_new + i, lo.y + k_new + k, 0, 0),
2262  wq * invvol * invdt * sxn_szn);
2263 
2264  amrex::Gpu::Atomic::AddNoRet(&temp_arr(lo.x + i_old + i, lo.y + k_old + k, 0, 0),
2265  - wq * invvol * invdt * sxo_szo);
2266 
2267  amrex::Gpu::Atomic::AddNoRet(&temp_arr(lo.x + i_new + i, lo.y + k_old + k, 0, 1),
2268  wq * invvol * invdt * sxn_szo);
2269 
2270  amrex::Gpu::Atomic::AddNoRet(&temp_arr(lo.x + i_old + i, lo.y + k_new + k, 0, 1),
2271  - wq * invvol * invdt * sxo_szn);
2272 
2273  // Dy
2274  amrex::Gpu::Atomic::AddNoRet(&Dy_arr(lo.x + i_new + i, lo.y + k_new + k, 0, 0),
2275  wqy * 0.25_rt * sxn_szn);
2276 
2277  amrex::Gpu::Atomic::AddNoRet(&Dy_arr(lo.x + i_new + i, lo.y + k_old + k, 0, 0),
2278  wqy * 0.25_rt * sxn_szo);
2279 
2280  amrex::Gpu::Atomic::AddNoRet(&Dy_arr(lo.x + i_old + i, lo.y + k_new + k, 0, 0),
2281  wqy * 0.25_rt * sxo_szn);
2282 
2283  amrex::Gpu::Atomic::AddNoRet(&Dy_arr(lo.x + i_old + i, lo.y + k_old + k, 0, 0),
2284  wqy * 0.25_rt * sxo_szo);
2285  }
2286 
2287  }
2288  }
2289 
2290 #elif defined(WARPX_DIM_3D)
2291 
2292  for (int k=0; k<=depos_order; k++) {
2293  for (int j=0; j<=depos_order; j++) {
2294 
2295  auto const syn_szn = static_cast<amrex::Real>(sy_new[j] * sz_new[k]);
2296  auto const syo_szn = static_cast<amrex::Real>(sy_old[j] * sz_new[k]);
2297  auto const syn_szo = static_cast<amrex::Real>(sy_new[j] * sz_old[k]);
2298  auto const syo_szo = static_cast<amrex::Real>(sy_old[j] * sz_old[k]);
2299 
2300  for (int i=0; i<=depos_order; i++) {
2301 
2302  auto const sxn_syn_szn = static_cast<amrex::Real>(sx_new[i]) * syn_szn;
2303  auto const sxo_syn_szn = static_cast<amrex::Real>(sx_old[i]) * syn_szn;
2304  auto const sxn_syo_szn = static_cast<amrex::Real>(sx_new[i]) * syo_szn;
2305  auto const sxo_syo_szn = static_cast<amrex::Real>(sx_old[i]) * syo_szn;
2306  auto const sxn_syn_szo = static_cast<amrex::Real>(sx_new[i]) * syn_szo;
2307  auto const sxo_syn_szo = static_cast<amrex::Real>(sx_old[i]) * syn_szo;
2308  auto const sxn_syo_szo = static_cast<amrex::Real>(sx_new[i]) * syo_szo;
2309  auto const sxo_syo_szo = static_cast<amrex::Real>(sx_old[i]) * syo_szo;
2310 
2311  if (i_new == i_old && j_new == j_old && k_new == k_old) {
2312  // temp arrays for Dx, Dy and Dz
2313  amrex::Gpu::Atomic::AddNoRet(&temp_arr(lo.x + i_new + i, lo.y + j_new + j, lo.z + k_new + k, 0),
2314  wq * invvol * invdt * (sxn_syn_szn - sxo_syo_szo));
2315 
2316  amrex::Gpu::Atomic::AddNoRet(&temp_arr(lo.x + i_new + i, lo.y + j_new + j, lo.z + k_new + k, 1),
2317  wq * invvol * invdt * (sxn_syn_szo - sxo_syo_szn));
2318 
2319  amrex::Gpu::Atomic::AddNoRet(&temp_arr(lo.x + i_new + i, lo.y + j_new + j, lo.z + k_new + k, 2),
2320  wq * invvol * invdt * (sxn_syo_szn - sxo_syn_szo));
2321 
2322  amrex::Gpu::Atomic::AddNoRet(&temp_arr(lo.x + i_new + i, lo.y + j_new + j, lo.z + k_new + k, 3),
2323  wq * invvol * invdt * (sxo_syn_szn - sxn_syo_szo));
2324  } else {
2325  // temp arrays for Dx, Dy and Dz
2326  amrex::Gpu::Atomic::AddNoRet(&temp_arr(lo.x + i_new + i, lo.y + j_new + j, lo.z + k_new + k, 0),
2327  wq * invvol * invdt * sxn_syn_szn);
2328 
2329  amrex::Gpu::Atomic::AddNoRet(&temp_arr(lo.x + i_old + i, lo.y + j_old + j, lo.z + k_old + k, 0),
2330  - wq * invvol * invdt * sxo_syo_szo);
2331 
2332  amrex::Gpu::Atomic::AddNoRet(&temp_arr(lo.x + i_new + i, lo.y + j_new + j, lo.z + k_old + k, 1),
2333  wq * invvol * invdt * sxn_syn_szo);
2334 
2335  amrex::Gpu::Atomic::AddNoRet(&temp_arr(lo.x + i_old + i, lo.y + j_old + j, lo.z + k_new + k, 1),
2336  - wq * invvol * invdt * sxo_syo_szn);
2337 
2338  amrex::Gpu::Atomic::AddNoRet(&temp_arr(lo.x + i_new + i, lo.y + j_old + j, lo.z + k_new + k, 2),
2339  wq * invvol * invdt * sxn_syo_szn);
2340 
2341  amrex::Gpu::Atomic::AddNoRet(&temp_arr(lo.x + i_old + i, lo.y + j_new + j, lo.z + k_old + k, 2),
2342  - wq * invvol * invdt * sxo_syn_szo);
2343 
2344  amrex::Gpu::Atomic::AddNoRet(&temp_arr(lo.x + i_old + i, lo.y + j_new + j, lo.z + k_new + k, 3),
2345  wq * invvol * invdt * sxo_syn_szn);
2346 
2347  amrex::Gpu::Atomic::AddNoRet(&temp_arr(lo.x + i_new + i, lo.y + j_old + j, lo.z + k_old + k, 3),
2348  - wq * invvol * invdt * sxn_syo_szo);
2349  }
2350  }
2351  }
2352  }
2353 #endif
2354  } );
2355 
2356 #if defined(WARPX_DIM_3D)
2357  amrex::ParallelFor(Dx_fab.box(), [=] AMREX_GPU_DEVICE (int i, int j, int k) noexcept
2358  {
2359  const amrex::Real t_a = temp_arr(i,j,k,0);
2360  const amrex::Real t_b = temp_arr(i,j,k,1);
2361  const amrex::Real t_c = temp_arr(i,j,k,2);
2362  const amrex::Real t_d = temp_arr(i,j,k,3);
2363  Dx_arr(i,j,k) += (1._rt/6._rt)*(2_rt*t_a + t_b + t_c - 2._rt*t_d);
2364  Dy_arr(i,j,k) += (1._rt/6._rt)*(2_rt*t_a + t_b - 2._rt*t_c + t_d);
2365  Dz_arr(i,j,k) += (1._rt/6._rt)*(2_rt*t_a - 2._rt*t_b + t_c + t_d);
2366  });
2367 #elif defined(WARPX_DIM_XZ)
2368  amrex::ParallelFor(Dx_fab.box(), [=] AMREX_GPU_DEVICE (int i, int j, int) noexcept
2369  {
2370  const amrex::Real t_a = temp_arr(i,j,0,0);
2371  const amrex::Real t_b = temp_arr(i,j,0,1);
2372  Dx_arr(i,j,0) += (0.5_rt)*(t_a + t_b);
2373  Dz_arr(i,j,0) += (0.5_rt)*(t_a - t_b);
2374  });
2375 #endif
2376  // Synchronize so that temp_fab can be safely deallocated in its destructor
2378 
2379 #endif // #if !(defined WARPX_DIM_RZ || defined WARPX_DIM_1D_Z)
2380 }
2381 #endif // WARPX_CURRENTDEPOSITION_H_
#define AMREX_ALWAYS_ASSERT(EX)
#define AMREX_INLINE
#define AMREX_GPU_DEVICE
#define AMREX_GPU_HOST_DEVICE
#define AMREX_D_DECL(a, b, c)
AMREX_GPU_HOST_DEVICE AMREX_INLINE void doDepositionShapeNKernel(const amrex::ParticleReal xp, const amrex::ParticleReal yp, const amrex::ParticleReal zp, const amrex::ParticleReal wq, const amrex::ParticleReal vx, const amrex::ParticleReal vy, const amrex::ParticleReal vz, amrex::Array4< amrex::Real > const &jx_arr, amrex::Array4< amrex::Real > const &jy_arr, amrex::Array4< amrex::Real > const &jz_arr, amrex::IntVect const &jx_type, amrex::IntVect const &jy_type, amrex::IntVect const &jz_type, const amrex::Real relative_time, AMREX_D_DECL(const amrex::Real dzi, const amrex::Real dxi, const amrex::Real dyi), AMREX_D_DECL(const amrex::Real zmin, const amrex::Real xmin, const amrex::Real ymin), const amrex::Real invvol, const amrex::Dim3 lo, const int n_rz_azimuthal_modes)
Kernel for the direct current deposition for thread thread_num.
Definition: CurrentDeposition.H:49
void doDepositionShapeNImplicit(const GetParticlePosition< PIdx > &GetPosition, const amrex::ParticleReal *const wp, const amrex::ParticleReal *const uxp_n, const amrex::ParticleReal *const uyp_n, const amrex::ParticleReal *const uzp_n, const amrex::ParticleReal *const uxp, const amrex::ParticleReal *const uyp, const amrex::ParticleReal *const uzp, const int *const ion_lev, amrex::FArrayBox &jx_fab, amrex::FArrayBox &jy_fab, amrex::FArrayBox &jz_fab, const long np_to_deposit, const std::array< amrex::Real, 3 > &dx, const std::array< amrex::Real, 3 > &xyzmin, const amrex::Dim3 lo, const amrex::Real q, const int n_rz_azimuthal_modes)
Direct current deposition for thread thread_num for the implicit scheme The only difference from doDe...
Definition: CurrentDeposition.H:401
void doEsirkepovDepositionShapeN(const GetParticlePosition< PIdx > &GetPosition, const amrex::ParticleReal *const wp, const amrex::ParticleReal *const uxp, const amrex::ParticleReal *const uyp, const amrex::ParticleReal *const uzp, const int *ion_lev, const amrex::Array4< amrex::Real > &Jx_arr, const amrex::Array4< amrex::Real > &Jy_arr, const amrex::Array4< amrex::Real > &Jz_arr, long np_to_deposit, amrex::Real dt, amrex::Real relative_time, const std::array< amrex::Real, 3 > &dx, std::array< amrex::Real, 3 > xyzmin, amrex::Dim3 lo, amrex::Real q, int n_rz_azimuthal_modes)
Esirkepov Current Deposition for thread thread_num.
Definition: CurrentDeposition.H:708
void doVayDepositionShapeN(const GetParticlePosition< PIdx > &GetPosition, const amrex::ParticleReal *const wp, const amrex::ParticleReal *const uxp, const amrex::ParticleReal *const uyp, const amrex::ParticleReal *const uzp, const int *const ion_lev, amrex::FArrayBox &Dx_fab, amrex::FArrayBox &Dy_fab, amrex::FArrayBox &Dz_fab, long np_to_deposit, amrex::Real dt, amrex::Real relative_time, const std::array< amrex::Real, 3 > &dx, const std::array< amrex::Real, 3 > &xyzmin, amrex::Dim3 lo, amrex::Real q, int n_rz_azimuthal_modes)
Vay current deposition (Vay et al, 2013) for thread thread_num: deposit D in real space and store the...
Definition: CurrentDeposition.H:2067
void doDepositionSharedShapeN(const GetParticlePosition< PIdx > &GetPosition, const amrex::ParticleReal *const wp, const amrex::ParticleReal *const uxp, const amrex::ParticleReal *const uyp, const amrex::ParticleReal *const uzp, const int *ion_lev, amrex::FArrayBox &jx_fab, amrex::FArrayBox &jy_fab, amrex::FArrayBox &jz_fab, long np_to_deposit, const amrex::Real relative_time, const std::array< amrex::Real, 3 > &dx, const std::array< amrex::Real, 3 > &xyzmin, amrex::Dim3 lo, amrex::Real q, int n_rz_azimuthal_modes, const amrex::DenseBins< WarpXParticleContainer::ParticleTileType::ParticleTileDataType > &a_bins, const amrex::Box &box, const amrex::Geometry &geom, const amrex::IntVect &a_tbox_max_size)
Current Deposition for thread thread_num using shared memory.
Definition: CurrentDeposition.H:519
void doDepositionShapeN(const GetParticlePosition< PIdx > &GetPosition, const amrex::ParticleReal *const wp, const amrex::ParticleReal *const uxp, const amrex::ParticleReal *const uyp, const amrex::ParticleReal *const uzp, const int *ion_lev, amrex::FArrayBox &jx_fab, amrex::FArrayBox &jy_fab, amrex::FArrayBox &jz_fab, long np_to_deposit, amrex::Real relative_time, const std::array< amrex::Real, 3 > &dx, const std::array< amrex::Real, 3 > &xyzmin, amrex::Dim3 lo, amrex::Real q, int n_rz_azimuthal_modes)
Current Deposition for thread thread_num.
Definition: CurrentDeposition.H:292
void doVillasenorDepositionShapeNImplicit(const amrex::ParticleReal *const xp_n, const amrex::ParticleReal *const yp_n, const amrex::ParticleReal *const zp_n, const GetParticlePosition< PIdx > &GetPosition, const amrex::ParticleReal *const wp, [[maybe_unused]]const amrex::ParticleReal *const uxp_n, [[maybe_unused]]const amrex::ParticleReal *const uyp_n, [[maybe_unused]]const amrex::ParticleReal *const uzp_n, [[maybe_unused]]const amrex::ParticleReal *const uxp_nph, [[maybe_unused]]const amrex::ParticleReal *const uyp_nph, [[maybe_unused]]const amrex::ParticleReal *const uzp_nph, const int *const ion_lev, const amrex::Array4< amrex::Real > &Jx_arr, const amrex::Array4< amrex::Real > &Jy_arr, const amrex::Array4< amrex::Real > &Jz_arr, const long np_to_deposit, const amrex::Real dt, const std::array< amrex::Real, 3 > &dx, const std::array< amrex::Real, 3 > xyzmin, const amrex::Dim3 lo, const amrex::Real q, const int n_rz_azimuthal_modes)
Villasenor and Buneman Current Deposition for thread thread_num for implicit scheme....
Definition: CurrentDeposition.H:1430
void doChargeConservingDepositionShapeNImplicit(const amrex::ParticleReal *const xp_n, const amrex::ParticleReal *const yp_n, const amrex::ParticleReal *const zp_n, const GetParticlePosition< PIdx > &GetPosition, const amrex::ParticleReal *const wp, [[maybe_unused]]const amrex::ParticleReal *const uxp_n, [[maybe_unused]]const amrex::ParticleReal *const uyp_n, [[maybe_unused]]const amrex::ParticleReal *const uzp_n, [[maybe_unused]]const amrex::ParticleReal *const uxp_nph, [[maybe_unused]]const amrex::ParticleReal *const uyp_nph, [[maybe_unused]]const amrex::ParticleReal *const uzp_nph, const int *const ion_lev, const amrex::Array4< amrex::Real > &Jx_arr, const amrex::Array4< amrex::Real > &Jy_arr, const amrex::Array4< amrex::Real > &Jz_arr, const long np_to_deposit, const amrex::Real dt, const std::array< amrex::Real, 3 > &dx, const std::array< amrex::Real, 3 > xyzmin, const amrex::Dim3 lo, const amrex::Real q, const int n_rz_azimuthal_modes)
Esirkepov Current Deposition for thread thread_num for implicit scheme The difference from doEsirkepo...
Definition: CurrentDeposition.H:1057
#define WARPX_ABORT_WITH_MESSAGE(MSG)
Definition: TextMsg.H:15
#define WARPX_ALWAYS_ASSERT_WITH_MESSAGE(EX, MSG)
Definition: TextMsg.H:13
amrex::GpuComplex< amrex::Real > Complex
Definition: WarpX_Complex.H:22
NODE
static amrex::IntVect shared_tilesize
tileSize to use for shared current deposition operations
Definition: WarpX.H:237
static int shared_mem_current_tpb
number of threads to use per block in shared deposition
Definition: WarpX.H:234
AMREX_FORCE_INLINE Array4< Real const > array() const noexcept
const Box & box() const noexcept
AMREX_GPU_HOST_DEVICE Long numPts() const noexcept
AMREX_GPU_HOST_DEVICE Box & grow(int i) noexcept
GpuArray< Real, AMREX_SPACEDIM > InvCellSizeArray() const noexcept
index_type * offsetsPtr() noexcept
index_type * permutationPtr() noexcept
Long numBins() const noexcept
GpuArray< Real, AMREX_SPACEDIM > ProbLoArray() const noexcept
const Box & Domain() const noexcept
static std::size_t sharedMemPerBlock() noexcept
static constexpr auto c
vacuum speed of light [m/s]
Definition: constant.H:44
AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE void AddNoRet(T *sum, T value) noexcept
void streamSynchronize() noexcept
gpuStream_t gpuStream() noexcept
constexpr int iz
constexpr int iy
constexpr int ix
AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE Dim3 end(Box const &box) noexcept
AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE Box convert(const Box &b, const IntVect &typ) noexcept
AMREX_GPU_HOST_DEVICE constexpr AMREX_FORCE_INLINE const T & max(const T &a, const T &b) noexcept
AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE Dim3 begin(Box const &box) noexcept
void launch(T const &n, L &&f) noexcept
AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE void ignore_unused(const Ts &...)
AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE Box shift(const Box &b, int dir, int nzones) noexcept
std::enable_if_t< std::is_integral_v< T > > ParallelFor(TypeList< CTOs... >, std::array< int, sizeof...(CTOs)> const &runtime_options, T N, F &&f)
AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE int getTileIndex(const IntVect &iv, const Box &box, const bool a_do_tiling, const IntVect &a_tile_size, Box &tbx)
i
Definition: check_interp_points_and_weights.py:174
float dt
Definition: stencil.py:442
tuple dx
lab frame
Definition: stencil.py:429
xmin
Definition: stencil.py:424
Definition: ShapeFactors.H:168
Definition: ShapeFactors.H:29
Definition: ShapeFactors.H:95
AMREX_GPU_HOST_DEVICE constexpr AMREX_FORCE_INLINE T real() const noexcept
AMREX_GPU_HOST_DEVICE constexpr AMREX_FORCE_INLINE T imag() const noexcept
AMREX_GPU_DEVICE T * dataPtr() noexcept