WarpX
CurrentDeposition.H
Go to the documentation of this file.
1 /* Copyright 2019 Axel Huebl, David Grote, Maxence Thevenet
2  * Remi Lehe, Weiqun Zhang, Michael Rowan
3  *
4  * This file is part of WarpX.
5  *
6  * License: BSD-3-Clause-LBNL
7  */
8 #ifndef CURRENTDEPOSITION_H_
9 #define CURRENTDEPOSITION_H_
10 
14 #include "Particles/ShapeFactors.H"
15 #include "Utils/TextMsg.H"
17 #include "Utils/WarpXConst.H"
18 #ifdef WARPX_DIM_RZ
19 # include "Utils/WarpX_Complex.H"
20 #endif
21 
22 #include "WarpX.H" // todo: remove include and pass globals as args
23 
24 #include <AMReX.H>
25 #include <AMReX_Arena.H>
26 #include <AMReX_Array4.H>
27 #include <AMReX_REAL.H>
28 
47 template <int depos_order>
49 void doDepositionShapeNKernel(const amrex::ParticleReal xp,
50  const amrex::ParticleReal yp,
51  const amrex::ParticleReal zp,
52  const amrex::ParticleReal wq,
53  const amrex::ParticleReal vx,
54  const amrex::ParticleReal vy,
55  const amrex::ParticleReal vz,
56  amrex::Array4<amrex::Real> const& jx_arr,
57  amrex::Array4<amrex::Real> const& jy_arr,
58  amrex::Array4<amrex::Real> const& jz_arr,
59  amrex::IntVect const& jx_type,
60  amrex::IntVect const& jy_type,
61  amrex::IntVect const& jz_type,
62  const amrex::Real relative_time,
63  AMREX_D_DECL(const amrex::Real dzi,
64  const amrex::Real dxi,
65  const amrex::Real dyi),
66  AMREX_D_DECL(const amrex::Real zmin,
67  const amrex::Real xmin,
68  const amrex::Real ymin),
69  const amrex::Real invvol,
70  const amrex::Dim3 lo,
71  const int n_rz_azimuthal_modes)
72 {
73  using namespace amrex::literals;
74 #if !defined(WARPX_DIM_RZ)
75  amrex::ignore_unused(n_rz_azimuthal_modes);
76 #endif
77 #if defined(WARPX_DIM_1D_Z)
78  amrex::ignore_unused(xp, yp);
79 #endif
80 #if defined(WARPX_DIM_XZ) || defined(WARPX_DIM_RZ)
82 #endif
83 
84  constexpr int zdir = WARPX_ZINDEX;
85  constexpr int NODE = amrex::IndexType::NODE;
86  constexpr int CELL = amrex::IndexType::CELL;
87 
88  // wqx, wqy wqz are particle current in each direction
89 #if defined(WARPX_DIM_RZ)
90  // In RZ, wqx is actually wqr, and wqy is wqtheta
91  // Convert to cylindrical at the mid point
92  const amrex::Real xpmid = xp + relative_time*vx;
93  const amrex::Real ypmid = yp + relative_time*vy;
94  const amrex::Real rpmid = std::sqrt(xpmid*xpmid + ypmid*ypmid);
95  amrex::Real costheta;
96  amrex::Real sintheta;
97  if (rpmid > 0._rt) {
98  costheta = xpmid/rpmid;
99  sintheta = ypmid/rpmid;
100  } else {
101  costheta = 1._rt;
102  sintheta = 0._rt;
103  }
104  const Complex xy0 = Complex{costheta, sintheta};
105  const amrex::Real wqx = wq*invvol*(+vx*costheta + vy*sintheta);
106  const amrex::Real wqy = wq*invvol*(-vx*sintheta + vy*costheta);
107 #else
108  const amrex::Real wqx = wq*invvol*vx;
109  const amrex::Real wqy = wq*invvol*vy;
110 #endif
111  const amrex::Real wqz = wq*invvol*vz;
112 
113  // --- Compute shape factors
114  Compute_shape_factor< depos_order > const compute_shape_factor;
115 #if (AMREX_SPACEDIM >= 2)
116  // x direction
117  // Get particle position after 1/2 push back in position
118 #if defined(WARPX_DIM_RZ)
119  // Keep these double to avoid bug in single precision
120  const double xmid = (rpmid - xmin)*dxi;
121 #else
122  const double xmid = ((xp - xmin) + relative_time*vx)*dxi;
123 #endif
124  // j_j[xyz] leftmost grid point in x that the particle touches for the centering of each current
125  // sx_j[xyz] shape factor along x for the centering of each current
126  // There are only two possible centerings, node or cell centered, so at most only two shape factor
127  // arrays will be needed.
128  // Keep these double to avoid bug in single precision
129  double sx_node[depos_order + 1] = {0.};
130  double sx_cell[depos_order + 1] = {0.};
131  int j_node = 0;
132  int j_cell = 0;
133  if (jx_type[0] == NODE || jy_type[0] == NODE || jz_type[0] == NODE) {
134  j_node = compute_shape_factor(sx_node, xmid);
135  }
136  if (jx_type[0] == CELL || jy_type[0] == CELL || jz_type[0] == CELL) {
137  j_cell = compute_shape_factor(sx_cell, xmid - 0.5);
138  }
139 
140  amrex::Real sx_jx[depos_order + 1] = {0._rt};
141  amrex::Real sx_jy[depos_order + 1] = {0._rt};
142  amrex::Real sx_jz[depos_order + 1] = {0._rt};
143  for (int ix=0; ix<=depos_order; ix++)
144  {
145  sx_jx[ix] = ((jx_type[0] == NODE) ? amrex::Real(sx_node[ix]) : amrex::Real(sx_cell[ix]));
146  sx_jy[ix] = ((jy_type[0] == NODE) ? amrex::Real(sx_node[ix]) : amrex::Real(sx_cell[ix]));
147  sx_jz[ix] = ((jz_type[0] == NODE) ? amrex::Real(sx_node[ix]) : amrex::Real(sx_cell[ix]));
148  }
149 
150  int const j_jx = ((jx_type[0] == NODE) ? j_node : j_cell);
151  int const j_jy = ((jy_type[0] == NODE) ? j_node : j_cell);
152  int const j_jz = ((jz_type[0] == NODE) ? j_node : j_cell);
153 #endif //AMREX_SPACEDIM >= 2
154 
155 #if defined(WARPX_DIM_3D)
156  // y direction
157  // Keep these double to avoid bug in single precision
158  const double ymid = ((yp - ymin) + relative_time*vy)*dyi;
159  double sy_node[depos_order + 1] = {0.};
160  double sy_cell[depos_order + 1] = {0.};
161  int k_node = 0;
162  int k_cell = 0;
163  if (jx_type[1] == NODE || jy_type[1] == NODE || jz_type[1] == NODE) {
164  k_node = compute_shape_factor(sy_node, ymid);
165  }
166  if (jx_type[1] == CELL || jy_type[1] == CELL || jz_type[1] == CELL) {
167  k_cell = compute_shape_factor(sy_cell, ymid - 0.5);
168  }
169  amrex::Real sy_jx[depos_order + 1] = {0._rt};
170  amrex::Real sy_jy[depos_order + 1] = {0._rt};
171  amrex::Real sy_jz[depos_order + 1] = {0._rt};
172  for (int iy=0; iy<=depos_order; iy++)
173  {
174  sy_jx[iy] = ((jx_type[1] == NODE) ? amrex::Real(sy_node[iy]) : amrex::Real(sy_cell[iy]));
175  sy_jy[iy] = ((jy_type[1] == NODE) ? amrex::Real(sy_node[iy]) : amrex::Real(sy_cell[iy]));
176  sy_jz[iy] = ((jz_type[1] == NODE) ? amrex::Real(sy_node[iy]) : amrex::Real(sy_cell[iy]));
177  }
178  int const k_jx = ((jx_type[1] == NODE) ? k_node : k_cell);
179  int const k_jy = ((jy_type[1] == NODE) ? k_node : k_cell);
180  int const k_jz = ((jz_type[1] == NODE) ? k_node : k_cell);
181 #endif
182 
183  // z direction
184  // Keep these double to avoid bug in single precision
185  const double zmid = ((zp - zmin) + relative_time*vz)*dzi;
186  double sz_node[depos_order + 1] = {0.};
187  double sz_cell[depos_order + 1] = {0.};
188  int l_node = 0;
189  int l_cell = 0;
190  if (jx_type[zdir] == NODE || jy_type[zdir] == NODE || jz_type[zdir] == NODE) {
191  l_node = compute_shape_factor(sz_node, zmid);
192  }
193  if (jx_type[zdir] == CELL || jy_type[zdir] == CELL || jz_type[zdir] == CELL) {
194  l_cell = compute_shape_factor(sz_cell, zmid - 0.5);
195  }
196  amrex::Real sz_jx[depos_order + 1] = {0._rt};
197  amrex::Real sz_jy[depos_order + 1] = {0._rt};
198  amrex::Real sz_jz[depos_order + 1] = {0._rt};
199  for (int iz=0; iz<=depos_order; iz++)
200  {
201  sz_jx[iz] = ((jx_type[zdir] == NODE) ? amrex::Real(sz_node[iz]) : amrex::Real(sz_cell[iz]));
202  sz_jy[iz] = ((jy_type[zdir] == NODE) ? amrex::Real(sz_node[iz]) : amrex::Real(sz_cell[iz]));
203  sz_jz[iz] = ((jz_type[zdir] == NODE) ? amrex::Real(sz_node[iz]) : amrex::Real(sz_cell[iz]));
204  }
205  int const l_jx = ((jx_type[zdir] == NODE) ? l_node : l_cell);
206  int const l_jy = ((jy_type[zdir] == NODE) ? l_node : l_cell);
207  int const l_jz = ((jz_type[zdir] == NODE) ? l_node : l_cell);
208 
209  // Deposit current into jx_arr, jy_arr and jz_arr
210 #if defined(WARPX_DIM_1D_Z)
211  for (int iz=0; iz<=depos_order; iz++){
213  &jx_arr(lo.x+l_jx+iz, 0, 0, 0),
214  sz_jx[iz]*wqx);
216  &jy_arr(lo.x+l_jy+iz, 0, 0, 0),
217  sz_jy[iz]*wqy);
219  &jz_arr(lo.x+l_jz+iz, 0, 0, 0),
220  sz_jz[iz]*wqz);
221  }
222 #endif
223 #if defined(WARPX_DIM_XZ) || defined(WARPX_DIM_RZ)
224  for (int iz=0; iz<=depos_order; iz++){
225  for (int ix=0; ix<=depos_order; ix++){
227  &jx_arr(lo.x+j_jx+ix, lo.y+l_jx+iz, 0, 0),
228  sx_jx[ix]*sz_jx[iz]*wqx);
230  &jy_arr(lo.x+j_jy+ix, lo.y+l_jy+iz, 0, 0),
231  sx_jy[ix]*sz_jy[iz]*wqy);
233  &jz_arr(lo.x+j_jz+ix, lo.y+l_jz+iz, 0, 0),
234  sx_jz[ix]*sz_jz[iz]*wqz);
235 #if defined(WARPX_DIM_RZ)
236  Complex xy = xy0; // Note that xy is equal to e^{i m theta}
237  for (int imode=1 ; imode < n_rz_azimuthal_modes ; imode++) {
238  // The factor 2 on the weighting comes from the normalization of the modes
239  amrex::Gpu::Atomic::AddNoRet( &jx_arr(lo.x+j_jx+ix, lo.y+l_jx+iz, 0, 2*imode-1), 2._rt*sx_jx[ix]*sz_jx[iz]*wqx*xy.real());
240  amrex::Gpu::Atomic::AddNoRet( &jx_arr(lo.x+j_jx+ix, lo.y+l_jx+iz, 0, 2*imode ), 2._rt*sx_jx[ix]*sz_jx[iz]*wqx*xy.imag());
241  amrex::Gpu::Atomic::AddNoRet( &jy_arr(lo.x+j_jy+ix, lo.y+l_jy+iz, 0, 2*imode-1), 2._rt*sx_jy[ix]*sz_jy[iz]*wqy*xy.real());
242  amrex::Gpu::Atomic::AddNoRet( &jy_arr(lo.x+j_jy+ix, lo.y+l_jy+iz, 0, 2*imode ), 2._rt*sx_jy[ix]*sz_jy[iz]*wqy*xy.imag());
243  amrex::Gpu::Atomic::AddNoRet( &jz_arr(lo.x+j_jz+ix, lo.y+l_jz+iz, 0, 2*imode-1), 2._rt*sx_jz[ix]*sz_jz[iz]*wqz*xy.real());
244  amrex::Gpu::Atomic::AddNoRet( &jz_arr(lo.x+j_jz+ix, lo.y+l_jz+iz, 0, 2*imode ), 2._rt*sx_jz[ix]*sz_jz[iz]*wqz*xy.imag());
245  xy = xy*xy0;
246  }
247 #endif
248  }
249  }
250 #elif defined(WARPX_DIM_3D)
251  for (int iz=0; iz<=depos_order; iz++){
252  for (int iy=0; iy<=depos_order; iy++){
253  for (int ix=0; ix<=depos_order; ix++){
255  &jx_arr(lo.x+j_jx+ix, lo.y+k_jx+iy, lo.z+l_jx+iz),
256  sx_jx[ix]*sy_jx[iy]*sz_jx[iz]*wqx);
258  &jy_arr(lo.x+j_jy+ix, lo.y+k_jy+iy, lo.z+l_jy+iz),
259  sx_jy[ix]*sy_jy[iy]*sz_jy[iz]*wqy);
261  &jz_arr(lo.x+j_jz+ix, lo.y+k_jz+iy, lo.z+l_jz+iz),
262  sx_jz[ix]*sy_jz[iy]*sz_jz[iz]*wqz);
263  }
264  }
265  }
266 #endif
267 }
268 
293 template <int depos_order>
295  const amrex::ParticleReal * const wp,
296  const amrex::ParticleReal * const uxp,
297  const amrex::ParticleReal * const uyp,
298  const amrex::ParticleReal * const uzp,
299  const int* ion_lev,
300  amrex::FArrayBox& jx_fab,
301  amrex::FArrayBox& jy_fab,
302  amrex::FArrayBox& jz_fab,
303  long np_to_deposit,
304  amrex::Real relative_time,
305  const std::array<amrex::Real,3>& dx,
306  const std::array<amrex::Real,3>& xyzmin,
307  amrex::Dim3 lo,
308  amrex::Real q,
309  int n_rz_azimuthal_modes,
310  amrex::Real* cost,
311  long load_balance_costs_update_algo)
312 {
313  using namespace amrex::literals;
314 
315 #if !defined(WARPX_DIM_RZ)
316  amrex::ignore_unused(n_rz_azimuthal_modes);
317 #endif
318 
319 #if !defined(AMREX_USE_GPU)
320  amrex::ignore_unused(cost, load_balance_costs_update_algo);
321 #endif
322 
323  // Whether ion_lev is a null pointer (do_ionization=0) or a real pointer
324  // (do_ionization=1)
325  const bool do_ionization = ion_lev;
326  const amrex::Real dzi = 1.0_rt/dx[2];
327 #if defined(WARPX_DIM_1D_Z)
328  const amrex::Real invvol = dzi;
329 #endif
330 #if defined(WARPX_DIM_XZ) || defined(WARPX_DIM_RZ)
331  const amrex::Real dxi = 1.0_rt/dx[0];
332  const amrex::Real invvol = dxi*dzi;
333 #elif defined(WARPX_DIM_3D)
334  const amrex::Real dxi = 1.0_rt/dx[0];
335  const amrex::Real dyi = 1.0_rt/dx[1];
336  const amrex::Real invvol = dxi*dyi*dzi;
337 #endif
338 
339 #if (AMREX_SPACEDIM >= 2)
340  const amrex::Real xmin = xyzmin[0];
341 #endif
342 #if defined(WARPX_DIM_3D)
343  const amrex::Real ymin = xyzmin[1];
344 #endif
345  const amrex::Real zmin = xyzmin[2];
346 
347  const amrex::Real clightsq = 1.0_rt/PhysConst::c/PhysConst::c;
348 
349  amrex::Array4<amrex::Real> const& jx_arr = jx_fab.array();
350  amrex::Array4<amrex::Real> const& jy_arr = jy_fab.array();
351  amrex::Array4<amrex::Real> const& jz_arr = jz_fab.array();
352  amrex::IntVect const jx_type = jx_fab.box().type();
353  amrex::IntVect const jy_type = jy_fab.box().type();
354  amrex::IntVect const jz_type = jz_fab.box().type();
355 
356  // Loop over particles and deposit into jx_fab, jy_fab and jz_fab
357 #if defined(WARPX_USE_GPUCLOCK)
358  amrex::Real* cost_real = nullptr;
359  if( load_balance_costs_update_algo == LoadBalanceCostsUpdateAlgo::GpuClock) {
360  cost_real = (amrex::Real *) amrex::The_Managed_Arena()->alloc(sizeof(amrex::Real));
361  *cost_real = 0._rt;
362  }
363 #endif
365  np_to_deposit,
366  [=] AMREX_GPU_DEVICE (long ip) {
367 #if defined(WARPX_USE_GPUCLOCK)
368  const auto KernelTimer = ablastr::parallelization::KernelTimer(
369  cost && (load_balance_costs_update_algo == LoadBalanceCostsUpdateAlgo::GpuClock),
370  cost_real);
371 #endif
372 
373  amrex::ParticleReal xp, yp, zp;
374  GetPosition(ip, xp, yp, zp);
375 
376  // --- Get particle quantities
377  const amrex::Real gaminv = 1.0_rt/std::sqrt(1.0_rt + uxp[ip]*uxp[ip]*clightsq
378  + uyp[ip]*uyp[ip]*clightsq
379  + uzp[ip]*uzp[ip]*clightsq);
380  const amrex::Real vx = uxp[ip]*gaminv;
381  const amrex::Real vy = uyp[ip]*gaminv;
382  const amrex::Real vz = uzp[ip]*gaminv;
383 
384  amrex::Real wq = q*wp[ip];
385  if (do_ionization){
386  wq *= ion_lev[ip];
387  }
388 
389  doDepositionShapeNKernel<depos_order>(xp, yp, zp, wq, vx, vy, vz, jx_arr, jy_arr, jz_arr,
390  jx_type, jy_type, jz_type,
391  relative_time,
392  AMREX_D_DECL(dzi, dxi, dyi),
393  AMREX_D_DECL(zmin, xmin, ymin),
394  invvol, lo, n_rz_azimuthal_modes);
395 
396  }
397  );
398 #if defined(WARPX_USE_GPUCLOCK)
399  if( load_balance_costs_update_algo == LoadBalanceCostsUpdateAlgo::GpuClock) {
401  *cost += *cost_real;
402  amrex::The_Managed_Arena()->free(cost_real);
403  }
404 #endif
405 }
406 
430 template <int depos_order>
432  const amrex::ParticleReal * const wp,
433  const amrex::ParticleReal * const uxp_n,
434  const amrex::ParticleReal * const uyp_n,
435  const amrex::ParticleReal * const uzp_n,
436  const amrex::ParticleReal * const uxp,
437  const amrex::ParticleReal * const uyp,
438  const amrex::ParticleReal * const uzp,
439  const int * const ion_lev,
440  amrex::FArrayBox& jx_fab,
441  amrex::FArrayBox& jy_fab,
442  amrex::FArrayBox& jz_fab,
443  const long np_to_deposit,
444  const std::array<amrex::Real,3>& dx,
445  const std::array<amrex::Real,3>& xyzmin,
446  const amrex::Dim3 lo,
447  const amrex::Real q,
448  const int n_rz_azimuthal_modes,
449  amrex::Real* cost,
450  const long load_balance_costs_update_algo)
451 {
452  using namespace amrex::literals;
453 #if !defined(WARPX_DIM_RZ)
454  amrex::ignore_unused(n_rz_azimuthal_modes);
455 #endif
456 
457 #if !defined(AMREX_USE_GPU)
458  amrex::ignore_unused(cost, load_balance_costs_update_algo);
459 #endif
460 
461  // Whether ion_lev is a null pointer (do_ionization=0) or a real pointer
462  // (do_ionization=1)
463  const bool do_ionization = ion_lev;
464  const amrex::Real dzi = 1.0_rt/dx[2];
465 #if defined(WARPX_DIM_1D_Z)
466  const amrex::Real invvol = dzi;
467 #endif
468 #if defined(WARPX_DIM_XZ) || defined(WARPX_DIM_RZ)
469  const amrex::Real dxi = 1.0_rt/dx[0];
470  const amrex::Real invvol = dxi*dzi;
471 #elif defined(WARPX_DIM_3D)
472  const amrex::Real dxi = 1.0_rt/dx[0];
473  const amrex::Real dyi = 1.0_rt/dx[1];
474  const amrex::Real invvol = dxi*dyi*dzi;
475 #endif
476 
477 #if (AMREX_SPACEDIM >= 2)
478  const amrex::Real xmin = xyzmin[0];
479 #endif
480 #if defined(WARPX_DIM_3D)
481  const amrex::Real ymin = xyzmin[1];
482 #endif
483  const amrex::Real zmin = xyzmin[2];
484 
485  amrex::Array4<amrex::Real> const& jx_arr = jx_fab.array();
486  amrex::Array4<amrex::Real> const& jy_arr = jy_fab.array();
487  amrex::Array4<amrex::Real> const& jz_arr = jz_fab.array();
488  amrex::IntVect const jx_type = jx_fab.box().type();
489  amrex::IntVect const jy_type = jy_fab.box().type();
490  amrex::IntVect const jz_type = jz_fab.box().type();
491 
492  // Loop over particles and deposit into jx_fab, jy_fab and jz_fab
493 #if defined(WARPX_USE_GPUCLOCK)
494  amrex::Real* cost_real = nullptr;
495  if( load_balance_costs_update_algo == LoadBalanceCostsUpdateAlgo::GpuClock) {
496  cost_real = (amrex::Real *) amrex::The_Managed_Arena()->alloc(sizeof(amrex::Real));
497  *cost_real = 0._rt;
498  }
499 #endif
501  np_to_deposit,
502  [=] AMREX_GPU_DEVICE (long ip) {
503 #if defined(WARPX_USE_GPUCLOCK)
504  const auto KernelTimer = ablastr::parallelization::KernelTimer(
505  cost && (load_balance_costs_update_algo == LoadBalanceCostsUpdateAlgo::GpuClock),
506  cost_real);
507 #endif
508 
509  amrex::ParticleReal xp, yp, zp;
510  GetPosition(ip, xp, yp, zp);
511 
512  constexpr amrex::ParticleReal inv_c2 = 1._prt/(PhysConst::c*PhysConst::c);
513 
514  // Compute inverse Lorentz factor, the average of gamma at time levels n and n+1
515  // The uxp,uyp,uzp are the velocities at time level n+1/2
516  const amrex::ParticleReal uxp_np1 = 2._prt*uxp[ip] - uxp_n[ip];
517  const amrex::ParticleReal uyp_np1 = 2._prt*uyp[ip] - uyp_n[ip];
518  const amrex::ParticleReal uzp_np1 = 2._prt*uzp[ip] - uzp_n[ip];
519  const amrex::ParticleReal gamma_n = std::sqrt(1._prt + (uxp_n[ip]*uxp_n[ip] + uyp_n[ip]*uyp_n[ip] + uzp_n[ip]*uzp_n[ip])*inv_c2);
520  const amrex::ParticleReal gamma_np1 = std::sqrt(1._prt + (uxp_np1*uxp_np1 + uyp_np1*uyp_np1 + uzp_np1*uzp_np1)*inv_c2);
521  const amrex::ParticleReal gaminv = 2.0_prt/(gamma_n + gamma_np1);
522 
523  const amrex::Real vx = uxp[ip]*gaminv;
524  const amrex::Real vy = uyp[ip]*gaminv;
525  const amrex::Real vz = uzp[ip]*gaminv;
526 
527  amrex::Real wq = q*wp[ip];
528  if (do_ionization){
529  wq *= ion_lev[ip];
530  }
531 
532  const amrex::Real relative_time = 0._rt;
533  doDepositionShapeNKernel<depos_order>(xp, yp, zp, wq, vx, vy, vz, jx_arr, jy_arr, jz_arr,
534  jx_type, jy_type, jz_type,
535  relative_time,
536  AMREX_D_DECL(dzi, dxi, dyi),
537  AMREX_D_DECL(zmin, xmin, ymin),
538  invvol, lo, n_rz_azimuthal_modes);
539 
540  }
541  );
542 #if defined(WARPX_USE_GPUCLOCK)
543  if(cost && load_balance_costs_update_algo == LoadBalanceCostsUpdateAlgo::GpuClock) {
545  *cost += *cost_real;
546  amrex::The_Managed_Arena()->free(cost_real);
547  }
548 #endif
549 }
550 
576 template <int depos_order>
578  const amrex::ParticleReal * const wp,
579  const amrex::ParticleReal * const uxp,
580  const amrex::ParticleReal * const uyp,
581  const amrex::ParticleReal * const uzp,
582  const int* ion_lev,
583  amrex::FArrayBox& jx_fab,
584  amrex::FArrayBox& jy_fab,
585  amrex::FArrayBox& jz_fab,
586  long np_to_deposit,
587  const amrex::Real relative_time,
588  const std::array<amrex::Real,3>& dx,
589  const std::array<amrex::Real,3>& xyzmin,
590  amrex::Dim3 lo,
591  amrex::Real q,
592  int n_rz_azimuthal_modes,
593  amrex::Real* cost,
594  long load_balance_costs_update_algo,
596  const amrex::Box& box,
597  const amrex::Geometry& geom,
598  const amrex::IntVect& a_tbox_max_size)
599 {
600  using namespace amrex::literals;
601 
602 #if defined(AMREX_USE_HIP) || defined(AMREX_USE_CUDA)
603  using namespace amrex;
604 
605  auto permutation = a_bins.permutationPtr();
606 
607  amrex::Array4<amrex::Real> const& jx_arr = jx_fab.array();
608  amrex::Array4<amrex::Real> const& jy_arr = jy_fab.array();
609  amrex::Array4<amrex::Real> const& jz_arr = jz_fab.array();
610  amrex::IntVect const jx_type = jx_fab.box().type();
611  amrex::IntVect const jy_type = jy_fab.box().type();
612  amrex::IntVect const jz_type = jz_fab.box().type();
613 
614  constexpr int zdir = WARPX_ZINDEX;
615  constexpr int NODE = amrex::IndexType::NODE;
616  constexpr int CELL = amrex::IndexType::CELL;
617 
618  // Loop over particles and deposit into jx_fab, jy_fab and jz_fab
619 #if defined(WARPX_USE_GPUCLOCK)
620  amrex::Real* cost_real = nullptr;
621  if (cost && load_balance_costs_update_algo == LoadBalanceCostsUpdateAlgo::GpuClock) {
622  cost_real = (amrex::Real *) amrex::The_Managed_Arena()->alloc(sizeof(amrex::Real));
623  *cost_real = 0._rt;
624  }
625 #endif
626  const auto dxiarr = geom.InvCellSizeArray();
627  const auto plo = geom.ProbLoArray();
628  const auto domain = geom.Domain();
629 
630  amrex::Box sample_tbox(IntVect(AMREX_D_DECL(0,0,0)), a_tbox_max_size - 1);
631  sample_tbox.grow(depos_order);
632 
633  amrex::Box sample_tbox_x = convert(sample_tbox, jx_type);
634  amrex::Box sample_tbox_y = convert(sample_tbox, jy_type);
635  amrex::Box sample_tbox_z = convert(sample_tbox, jz_type);
636 
637  const auto npts = amrex::max(sample_tbox_x.numPts(), sample_tbox_y.numPts(), sample_tbox_z.numPts());
638 
639  const int nblocks = a_bins.numBins();
640  const int threads_per_block = WarpX::shared_mem_current_tpb;
641  const auto offsets_ptr = a_bins.offsetsPtr();
642 
643  const std::size_t shared_mem_bytes = npts*sizeof(amrex::Real);
644  const amrex::IntVect bin_size = WarpX::shared_tilesize;
645  const std::size_t max_shared_mem_bytes = amrex::Gpu::Device::sharedMemPerBlock();
646  WARPX_ALWAYS_ASSERT_WITH_MESSAGE(shared_mem_bytes <= max_shared_mem_bytes,
647  "Tile size too big for GPU shared memory current deposition");
648 
649  amrex::ignore_unused(np_to_deposit);
650  // Launch one thread-block per bin
652  nblocks, threads_per_block, shared_mem_bytes, amrex::Gpu::gpuStream(),
653  [=] AMREX_GPU_DEVICE () noexcept {
654 #if defined(WARPX_USE_GPUCLOCK)
655  const auto KernelTimer = ablastr::parallelization::KernelTimer(
656  cost && (load_balance_costs_update_algo == LoadBalanceCostsUpdateAlgo::GpuClock),
657  cost_real);
658 #endif
659  const int bin_id = blockIdx.x;
660  const unsigned int bin_start = offsets_ptr[bin_id];
661  const unsigned int bin_stop = offsets_ptr[bin_id+1];
662 
663  if (bin_start == bin_stop) { return; /*this bin has no particles*/ }
664 
665  // These boxes define the index space for the shared memory buffers
666  amrex::Box buffer_box;
667  {
668  ParticleReal xp, yp, zp;
669  GetPosition(permutation[bin_start], xp, yp, zp);
670 #if defined(WARPX_DIM_3D)
671  IntVect iv = IntVect(int( amrex::Math::floor((xp-plo[0]) * dxiarr[0]) ),
672  int( amrex::Math::floor((yp-plo[1]) * dxiarr[1]) ),
673  int( amrex::Math::floor((zp-plo[2]) * dxiarr[2]) ));
674 #elif defined(WARPX_DIM_XZ) || defined(WARPX_DIM_RZ)
675  IntVect iv = IntVect(int( amrex::Math::floor((xp-plo[0]) * dxiarr[0]) ),
676  int( amrex::Math::floor((zp-plo[1]) * dxiarr[1]) ));
677 #elif defined(WARPX_DIM_1D_Z)
678  IntVect iv = IntVect(int( amrex::Math::floor((zp-plo[0]) * dxiarr[0]) ));
679 #endif
680  iv += domain.smallEnd();
681  getTileIndex(iv, box, true, bin_size, buffer_box);
682  }
683 
684  buffer_box.grow(depos_order);
685  Box tbox_x = convert(buffer_box, jx_type);
686  Box tbox_y = convert(buffer_box, jy_type);
687  Box tbox_z = convert(buffer_box, jz_type);
688 
690  amrex::Real* const shared = gsm.dataPtr();
691 
692  amrex::Array4<amrex::Real> const jx_buff(shared,
693  amrex::begin(tbox_x), amrex::end(tbox_x), 1);
694  amrex::Array4<amrex::Real> const jy_buff(shared,
695  amrex::begin(tbox_y), amrex::end(tbox_y), 1);
696  amrex::Array4<amrex::Real> const jz_buff(shared,
697  amrex::begin(tbox_z), amrex::end(tbox_z), 1);
698 
699  // Zero-initialize the temporary array in shared memory
700  volatile amrex::Real* vs = shared;
701  for (int i = threadIdx.x; i < npts; i += blockDim.x){
702  vs[i] = 0.0;
703  }
704  __syncthreads();
705  for (unsigned int ip_orig = bin_start+threadIdx.x; ip_orig<bin_stop; ip_orig += blockDim.x)
706  {
707  const unsigned int ip = permutation[ip_orig];
708  depositComponent<depos_order>(GetPosition, wp, uxp, uyp, uzp, ion_lev, jx_buff, jx_type,
709  relative_time, dx, xyzmin, lo, q, n_rz_azimuthal_modes,
710  ip, zdir, NODE, CELL, 0);
711  }
712 
713  __syncthreads();
714  addLocalToGlobal(tbox_x, jx_arr, jx_buff);
715  for (int i = threadIdx.x; i < npts; i += blockDim.x){
716  vs[i] = 0.0;
717  }
718 
719  __syncthreads();
720  for (unsigned int ip_orig = bin_start+threadIdx.x; ip_orig<bin_stop; ip_orig += blockDim.x)
721  {
722  const unsigned int ip = permutation[ip_orig];
723  depositComponent<depos_order>(GetPosition, wp, uxp, uyp, uzp, ion_lev, jy_buff, jy_type,
724  relative_time, dx, xyzmin, lo, q, n_rz_azimuthal_modes,
725  ip, zdir, NODE, CELL, 1);
726  }
727 
728  __syncthreads();
729  addLocalToGlobal(tbox_y, jy_arr, jy_buff);
730  for (int i = threadIdx.x; i < npts; i += blockDim.x){
731  vs[i] = 0.0;
732  }
733 
734  __syncthreads();
735  for (unsigned int ip_orig = bin_start+threadIdx.x; ip_orig<bin_stop; ip_orig += blockDim.x)
736  {
737  const unsigned int ip = permutation[ip_orig];
738  depositComponent<depos_order>(GetPosition, wp, uxp, uyp, uzp, ion_lev, jz_buff, jz_type,
739  relative_time, dx, xyzmin, lo, q, n_rz_azimuthal_modes,
740  ip, zdir, NODE, CELL, 2);
741  }
742 
743  __syncthreads();
744  addLocalToGlobal(tbox_z, jz_arr, jz_buff);
745  });
746 #if defined(WARPX_USE_GPUCLOCK)
747  if (cost && load_balance_costs_update_algo == LoadBalanceCostsUpdateAlgo::GpuClock) {
749  *cost += *cost_real;
750  amrex::The_Managed_Arena()->free(cost_real);
751  }
752 #endif
753 #else // not using hip/cuda
754  // Note, you should never reach this part of the code. This funcion cannot be called unless
755  // using HIP/CUDA, and those things are checked prior
756  //don't use any args
757  ignore_unused(GetPosition, wp, uxp, uyp, uzp, ion_lev, jx_fab, jy_fab, jz_fab, np_to_deposit, relative_time, dx, xyzmin, lo, q, n_rz_azimuthal_modes, cost, load_balance_costs_update_algo, a_bins, box, geom, a_tbox_max_size);
758  WARPX_ABORT_WITH_MESSAGE("Shared memory only implemented for HIP/CUDA");
759 #endif
760 }
761 
788 template <int depos_order>
790  const amrex::ParticleReal * const wp,
791  const amrex::ParticleReal * const uxp,
792  const amrex::ParticleReal * const uyp,
793  const amrex::ParticleReal * const uzp,
794  const int* ion_lev,
795  const amrex::Array4<amrex::Real>& Jx_arr,
796  const amrex::Array4<amrex::Real>& Jy_arr,
797  const amrex::Array4<amrex::Real>& Jz_arr,
798  long np_to_deposit,
799  amrex::Real dt,
800  amrex::Real relative_time,
801  const std::array<amrex::Real,3>& dx,
802  std::array<amrex::Real, 3> xyzmin,
803  amrex::Dim3 lo,
804  amrex::Real q,
805  int n_rz_azimuthal_modes,
806  amrex::Real * const cost,
807  long load_balance_costs_update_algo)
808 {
809  using namespace amrex;
810  using namespace amrex::literals;
811 
812 #if !defined(WARPX_DIM_RZ)
813  ignore_unused(n_rz_azimuthal_modes);
814 #endif
815 
816 #if !defined(AMREX_USE_GPU)
817  amrex::ignore_unused(cost, load_balance_costs_update_algo);
818 #endif
819 
820  // Whether ion_lev is a null pointer (do_ionization=0) or a real pointer
821  // (do_ionization=1)
822  bool const do_ionization = ion_lev;
823 #if !defined(WARPX_DIM_1D_Z)
824  Real const dxi = 1.0_rt / dx[0];
825 #endif
826 #if !defined(WARPX_DIM_1D_Z)
827  Real const xmin = xyzmin[0];
828 #endif
829 #if defined(WARPX_DIM_3D)
830  Real const dyi = 1.0_rt / dx[1];
831  Real const ymin = xyzmin[1];
832 #endif
833  Real const dzi = 1.0_rt / dx[2];
834  Real const zmin = xyzmin[2];
835 
836 #if defined(WARPX_DIM_3D)
837  Real const invdtdx = 1.0_rt / (dt*dx[1]*dx[2]);
838  Real const invdtdy = 1.0_rt / (dt*dx[0]*dx[2]);
839  Real const invdtdz = 1.0_rt / (dt*dx[0]*dx[1]);
840 #elif defined(WARPX_DIM_XZ) || defined(WARPX_DIM_RZ)
841  Real const invdtdx = 1.0_rt / (dt*dx[2]);
842  Real const invdtdz = 1.0_rt / (dt*dx[0]);
843  Real const invvol = 1.0_rt / (dx[0]*dx[2]);
844 #elif defined(WARPX_DIM_1D_Z)
845  Real const invdtdz = 1.0_rt / (dt*dx[0]);
846  Real const invvol = 1.0_rt / (dx[2]);
847 #endif
848 
849 #if defined(WARPX_DIM_RZ)
850  Complex const I = Complex{0._rt, 1._rt};
851 #endif
852 
853  Real const clightsq = 1.0_rt / ( PhysConst::c * PhysConst::c );
854 #if !defined(WARPX_DIM_1D_Z)
855  Real constexpr one_third = 1.0_rt / 3.0_rt;
856  Real constexpr one_sixth = 1.0_rt / 6.0_rt;
857 #endif
858 
859  // Loop over particles and deposit into Jx_arr, Jy_arr and Jz_arr
860 #if defined(WARPX_USE_GPUCLOCK)
861  amrex::Real* cost_real = nullptr;
862  if( load_balance_costs_update_algo == LoadBalanceCostsUpdateAlgo::GpuClock) {
863  cost_real = (amrex::Real *) amrex::The_Managed_Arena()->alloc(sizeof(amrex::Real));
864  *cost_real = 0._rt;
865  }
866 #endif
868  np_to_deposit,
869  [=] AMREX_GPU_DEVICE (long const ip) {
870 #if defined(WARPX_USE_GPUCLOCK)
871  const auto KernelTimer = ablastr::parallelization::KernelTimer(
872  cost && (load_balance_costs_update_algo == LoadBalanceCostsUpdateAlgo::GpuClock),
873  cost_real);
874 #endif
875 
876  // --- Get particle quantities
877  Real const gaminv = 1.0_rt/std::sqrt(1.0_rt + uxp[ip]*uxp[ip]*clightsq
878  + uyp[ip]*uyp[ip]*clightsq
879  + uzp[ip]*uzp[ip]*clightsq);
880 
881  // wqx, wqy wqz are particle current in each direction
882  Real wq = q*wp[ip];
883  if (do_ionization){
884  wq *= ion_lev[ip];
885  }
886 
887  ParticleReal xp, yp, zp;
888  GetPosition(ip, xp, yp, zp);
889 
890 #if !defined(WARPX_DIM_1D_Z)
891  Real const wqx = wq*invdtdx;
892 #endif
893 #if defined(WARPX_DIM_3D)
894  Real const wqy = wq*invdtdy;
895 #endif
896  Real const wqz = wq*invdtdz;
897 
898  // computes current and old position in grid units
899 #if defined(WARPX_DIM_RZ)
900  Real const xp_new = xp + (relative_time + 0.5_rt*dt)*uxp[ip]*gaminv;
901  Real const yp_new = yp + (relative_time + 0.5_rt*dt)*uyp[ip]*gaminv;
902  Real const xp_mid = xp_new - 0.5_rt*dt*uxp[ip]*gaminv;
903  Real const yp_mid = yp_new - 0.5_rt*dt*uyp[ip]*gaminv;
904  Real const xp_old = xp_new - dt*uxp[ip]*gaminv;
905  Real const yp_old = yp_new - dt*uyp[ip]*gaminv;
906  Real const rp_new = std::sqrt(xp_new*xp_new + yp_new*yp_new);
907  Real const rp_mid = std::sqrt(xp_mid*xp_mid + yp_mid*yp_mid);
908  Real const rp_old = std::sqrt(xp_old*xp_old + yp_old*yp_old);
909  Real costheta_new, sintheta_new;
910  if (rp_new > 0._rt) {
911  costheta_new = xp_new/rp_new;
912  sintheta_new = yp_new/rp_new;
913  } else {
914  costheta_new = 1._rt;
915  sintheta_new = 0._rt;
916  }
917  amrex::Real costheta_mid, sintheta_mid;
918  if (rp_mid > 0._rt) {
919  costheta_mid = xp_mid/rp_mid;
920  sintheta_mid = yp_mid/rp_mid;
921  } else {
922  costheta_mid = 1._rt;
923  sintheta_mid = 0._rt;
924  }
925  amrex::Real costheta_old, sintheta_old;
926  if (rp_old > 0._rt) {
927  costheta_old = xp_old/rp_old;
928  sintheta_old = yp_old/rp_old;
929  } else {
930  costheta_old = 1._rt;
931  sintheta_old = 0._rt;
932  }
933  const Complex xy_new0 = Complex{costheta_new, sintheta_new};
934  const Complex xy_mid0 = Complex{costheta_mid, sintheta_mid};
935  const Complex xy_old0 = Complex{costheta_old, sintheta_old};
936  // Keep these double to avoid bug in single precision
937  double const x_new = (rp_new - xmin)*dxi;
938  double const x_old = (rp_old - xmin)*dxi;
939 #else
940 #if !defined(WARPX_DIM_1D_Z)
941  // Keep these double to avoid bug in single precision
942  double const x_new = (xp - xmin + (relative_time + 0.5_rt*dt)*uxp[ip]*gaminv)*dxi;
943  double const x_old = x_new - dt*dxi*uxp[ip]*gaminv;
944 #endif
945 #endif
946 #if defined(WARPX_DIM_3D)
947  // Keep these double to avoid bug in single precision
948  double const y_new = (yp - ymin + (relative_time + 0.5_rt*dt)*uyp[ip]*gaminv)*dyi;
949  double const y_old = y_new - dt*dyi*uyp[ip]*gaminv;
950 #endif
951  // Keep these double to avoid bug in single precision
952  double const z_new = (zp - zmin + (relative_time + 0.5_rt*dt)*uzp[ip]*gaminv)*dzi;
953  double const z_old = z_new - dt*dzi*uzp[ip]*gaminv;
954 
955 #if defined(WARPX_DIM_RZ)
956  Real const vy = (-uxp[ip]*sintheta_mid + uyp[ip]*costheta_mid)*gaminv;
957 #elif defined(WARPX_DIM_XZ)
958  Real const vy = uyp[ip]*gaminv;
959 #elif defined(WARPX_DIM_1D_Z)
960  Real const vx = uxp[ip]*gaminv;
961  Real const vy = uyp[ip]*gaminv;
962 #endif
963 
964  // Shape factor arrays
965  // Note that there are extra values above and below
966  // to possibly hold the factor for the old particle
967  // which can be at a different grid location.
968  // Keep these double to avoid bug in single precision
969 #if !defined(WARPX_DIM_1D_Z)
970  double sx_new[depos_order + 3] = {0.};
971  double sx_old[depos_order + 3] = {0.};
972 #endif
973 #if defined(WARPX_DIM_3D)
974  // Keep these double to avoid bug in single precision
975  double sy_new[depos_order + 3] = {0.};
976  double sy_old[depos_order + 3] = {0.};
977 #endif
978  // Keep these double to avoid bug in single precision
979  double sz_new[depos_order + 3] = {0.};
980  double sz_old[depos_order + 3] = {0.};
981 
982  // --- Compute shape factors
983  // Compute shape factors for position as they are now and at old positions
984  // [ijk]_new: leftmost grid point that the particle touches
985  const Compute_shape_factor< depos_order > compute_shape_factor;
986  const Compute_shifted_shape_factor< depos_order > compute_shifted_shape_factor;
987 
988 #if !defined(WARPX_DIM_1D_Z)
989  const int i_new = compute_shape_factor(sx_new+1, x_new);
990  const int i_old = compute_shifted_shape_factor(sx_old, x_old, i_new);
991 #endif
992 #if defined(WARPX_DIM_3D)
993  const int j_new = compute_shape_factor(sy_new+1, y_new);
994  const int j_old = compute_shifted_shape_factor(sy_old, y_old, j_new);
995 #endif
996  const int k_new = compute_shape_factor(sz_new+1, z_new);
997  const int k_old = compute_shifted_shape_factor(sz_old, z_old, k_new);
998 
999  // computes min/max positions of current contributions
1000 #if !defined(WARPX_DIM_1D_Z)
1001  int dil = 1, diu = 1;
1002  if (i_old < i_new) { dil = 0; }
1003  if (i_old > i_new) { diu = 0; }
1004 #endif
1005 #if defined(WARPX_DIM_3D)
1006  int djl = 1, dju = 1;
1007  if (j_old < j_new) { djl = 0; }
1008  if (j_old > j_new) { dju = 0; }
1009 #endif
1010  int dkl = 1, dku = 1;
1011  if (k_old < k_new) { dkl = 0; }
1012  if (k_old > k_new) { dku = 0; }
1013 
1014 #if defined(WARPX_DIM_3D)
1015 
1016  for (int k=dkl; k<=depos_order+2-dku; k++) {
1017  for (int j=djl; j<=depos_order+2-dju; j++) {
1018  amrex::Real sdxi = 0._rt;
1019  for (int i=dil; i<=depos_order+1-diu; i++) {
1020  sdxi += wqx*(sx_old[i] - sx_new[i])*(
1021  one_third*(sy_new[j]*sz_new[k] + sy_old[j]*sz_old[k])
1022  +one_sixth*(sy_new[j]*sz_old[k] + sy_old[j]*sz_new[k]));
1023  amrex::Gpu::Atomic::AddNoRet( &Jx_arr(lo.x+i_new-1+i, lo.y+j_new-1+j, lo.z+k_new-1+k), sdxi);
1024  }
1025  }
1026  }
1027  for (int k=dkl; k<=depos_order+2-dku; k++) {
1028  for (int i=dil; i<=depos_order+2-diu; i++) {
1029  amrex::Real sdyj = 0._rt;
1030  for (int j=djl; j<=depos_order+1-dju; j++) {
1031  sdyj += wqy*(sy_old[j] - sy_new[j])*(
1032  one_third*(sx_new[i]*sz_new[k] + sx_old[i]*sz_old[k])
1033  +one_sixth*(sx_new[i]*sz_old[k] + sx_old[i]*sz_new[k]));
1034  amrex::Gpu::Atomic::AddNoRet( &Jy_arr(lo.x+i_new-1+i, lo.y+j_new-1+j, lo.z+k_new-1+k), sdyj);
1035  }
1036  }
1037  }
1038  for (int j=djl; j<=depos_order+2-dju; j++) {
1039  for (int i=dil; i<=depos_order+2-diu; i++) {
1040  amrex::Real sdzk = 0._rt;
1041  for (int k=dkl; k<=depos_order+1-dku; k++) {
1042  sdzk += wqz*(sz_old[k] - sz_new[k])*(
1043  one_third*(sx_new[i]*sy_new[j] + sx_old[i]*sy_old[j])
1044  +one_sixth*(sx_new[i]*sy_old[j] + sx_old[i]*sy_new[j]));
1045  amrex::Gpu::Atomic::AddNoRet( &Jz_arr(lo.x+i_new-1+i, lo.y+j_new-1+j, lo.z+k_new-1+k), sdzk);
1046  }
1047  }
1048  }
1049 
1050 #elif defined(WARPX_DIM_XZ) || defined(WARPX_DIM_RZ)
1051 
1052  for (int k=dkl; k<=depos_order+2-dku; k++) {
1053  amrex::Real sdxi = 0._rt;
1054  for (int i=dil; i<=depos_order+1-diu; i++) {
1055  sdxi += wqx*(sx_old[i] - sx_new[i])*0.5_rt*(sz_new[k] + sz_old[k]);
1056  amrex::Gpu::Atomic::AddNoRet( &Jx_arr(lo.x+i_new-1+i, lo.y+k_new-1+k, 0, 0), sdxi);
1057 #if defined(WARPX_DIM_RZ)
1058  Complex xy_mid = xy_mid0; // Throughout the following loop, xy_mid takes the value e^{i m theta}
1059  for (int imode=1 ; imode < n_rz_azimuthal_modes ; imode++) {
1060  // The factor 2 comes from the normalization of the modes
1061  const Complex djr_cmplx = 2._rt *sdxi*xy_mid;
1062  amrex::Gpu::Atomic::AddNoRet( &Jx_arr(lo.x+i_new-1+i, lo.y+k_new-1+k, 0, 2*imode-1), djr_cmplx.real());
1063  amrex::Gpu::Atomic::AddNoRet( &Jx_arr(lo.x+i_new-1+i, lo.y+k_new-1+k, 0, 2*imode), djr_cmplx.imag());
1064  xy_mid = xy_mid*xy_mid0;
1065  }
1066 #endif
1067  }
1068  }
1069  for (int k=dkl; k<=depos_order+2-dku; k++) {
1070  for (int i=dil; i<=depos_order+2-diu; i++) {
1071  Real const sdyj = wq*vy*invvol*(
1072  one_third*(sx_new[i]*sz_new[k] + sx_old[i]*sz_old[k])
1073  +one_sixth*(sx_new[i]*sz_old[k] + sx_old[i]*sz_new[k]));
1074  amrex::Gpu::Atomic::AddNoRet( &Jy_arr(lo.x+i_new-1+i, lo.y+k_new-1+k, 0, 0), sdyj);
1075 #if defined(WARPX_DIM_RZ)
1076  Complex xy_new = xy_new0;
1077  Complex xy_mid = xy_mid0;
1078  Complex xy_old = xy_old0;
1079  // Throughout the following loop, xy_ takes the value e^{i m theta_}
1080  for (int imode=1 ; imode < n_rz_azimuthal_modes ; imode++) {
1081  // The factor 2 comes from the normalization of the modes
1082  // The minus sign comes from the different convention with respect to Davidson et al.
1083  const Complex djt_cmplx = -2._rt * I*(i_new-1 + i + xmin*dxi)*wq*invdtdx/(amrex::Real)imode
1084  *(Complex(sx_new[i]*sz_new[k], 0._rt)*(xy_new - xy_mid)
1085  + Complex(sx_old[i]*sz_old[k], 0._rt)*(xy_mid - xy_old));
1086  amrex::Gpu::Atomic::AddNoRet( &Jy_arr(lo.x+i_new-1+i, lo.y+k_new-1+k, 0, 2*imode-1), djt_cmplx.real());
1087  amrex::Gpu::Atomic::AddNoRet( &Jy_arr(lo.x+i_new-1+i, lo.y+k_new-1+k, 0, 2*imode), djt_cmplx.imag());
1088  xy_new = xy_new*xy_new0;
1089  xy_mid = xy_mid*xy_mid0;
1090  xy_old = xy_old*xy_old0;
1091  }
1092 #endif
1093  }
1094  }
1095  for (int i=dil; i<=depos_order+2-diu; i++) {
1096  Real sdzk = 0._rt;
1097  for (int k=dkl; k<=depos_order+1-dku; k++) {
1098  sdzk += wqz*(sz_old[k] - sz_new[k])*0.5_rt*(sx_new[i] + sx_old[i]);
1099  amrex::Gpu::Atomic::AddNoRet( &Jz_arr(lo.x+i_new-1+i, lo.y+k_new-1+k, 0, 0), sdzk);
1100 #if defined(WARPX_DIM_RZ)
1101  Complex xy_mid = xy_mid0; // Throughout the following loop, xy_mid takes the value e^{i m theta}
1102  for (int imode=1 ; imode < n_rz_azimuthal_modes ; imode++) {
1103  // The factor 2 comes from the normalization of the modes
1104  const Complex djz_cmplx = 2._rt * sdzk * xy_mid;
1105  amrex::Gpu::Atomic::AddNoRet( &Jz_arr(lo.x+i_new-1+i, lo.y+k_new-1+k, 0, 2*imode-1), djz_cmplx.real());
1106  amrex::Gpu::Atomic::AddNoRet( &Jz_arr(lo.x+i_new-1+i, lo.y+k_new-1+k, 0, 2*imode), djz_cmplx.imag());
1107  xy_mid = xy_mid*xy_mid0;
1108  }
1109 #endif
1110  }
1111  }
1112 #elif defined(WARPX_DIM_1D_Z)
1113 
1114  for (int k=dkl; k<=depos_order+2-dku; k++) {
1115  amrex::Real const sdxi = wq*vx*invvol*0.5_rt*(sz_old[k] + sz_new[k]);
1116  amrex::Gpu::Atomic::AddNoRet( &Jx_arr(lo.x+k_new-1+k, 0, 0, 0), sdxi);
1117  }
1118  for (int k=dkl; k<=depos_order+2-dku; k++) {
1119  amrex::Real const sdyj = wq*vy*invvol*0.5_rt*(sz_old[k] + sz_new[k]);
1120  amrex::Gpu::Atomic::AddNoRet( &Jy_arr(lo.x+k_new-1+k, 0, 0, 0), sdyj);
1121  }
1122  amrex::Real sdzk = 0._rt;
1123  for (int k=dkl; k<=depos_order+1-dku; k++) {
1124  sdzk += wqz*(sz_old[k] - sz_new[k]);
1125  amrex::Gpu::Atomic::AddNoRet( &Jz_arr(lo.x+k_new-1+k, 0, 0, 0), sdzk);
1126  }
1127 #endif
1128  }
1129  );
1130 #if defined(WARPX_USE_GPUCLOCK)
1131  if(cost && load_balance_costs_update_algo == LoadBalanceCostsUpdateAlgo::GpuClock) {
1133  *cost += *cost_real;
1134  amrex::The_Managed_Arena()->free(cost_real);
1135  }
1136 #endif
1137 }
1138 
1163 template <int depos_order>
1164 void doChargeConservingDepositionShapeNImplicit (const amrex::ParticleReal * const xp_n,
1165  const amrex::ParticleReal * const yp_n,
1166  const amrex::ParticleReal * const zp_n,
1167  const GetParticlePosition<PIdx>& GetPosition,
1168  const amrex::ParticleReal * const wp,
1169  [[maybe_unused]]const amrex::ParticleReal * const uxp_n,
1170  [[maybe_unused]]const amrex::ParticleReal * const uyp_n,
1171  [[maybe_unused]]const amrex::ParticleReal * const uzp_n,
1172  [[maybe_unused]]const amrex::ParticleReal * const uxp_nph,
1173  [[maybe_unused]]const amrex::ParticleReal * const uyp_nph,
1174  [[maybe_unused]]const amrex::ParticleReal * const uzp_nph,
1175  const int * const ion_lev,
1176  const amrex::Array4<amrex::Real>& Jx_arr,
1177  const amrex::Array4<amrex::Real>& Jy_arr,
1178  const amrex::Array4<amrex::Real>& Jz_arr,
1179  const long np_to_deposit,
1180  const amrex::Real dt,
1181  const std::array<amrex::Real,3>& dx,
1182  const std::array<amrex::Real, 3> xyzmin,
1183  const amrex::Dim3 lo,
1184  const amrex::Real q,
1185  const int n_rz_azimuthal_modes,
1186  amrex::Real * const cost,
1187  const long load_balance_costs_update_algo)
1188 {
1189  using namespace amrex;
1190 #if !defined(WARPX_DIM_RZ)
1191  ignore_unused(n_rz_azimuthal_modes);
1192 #endif
1193 
1194 #if !defined(AMREX_USE_GPU)
1195  amrex::ignore_unused(cost, load_balance_costs_update_algo);
1196 #endif
1197 
1198  // Whether ion_lev is a null pointer (do_ionization=0) or a real pointer
1199  // (do_ionization=1)
1200  bool const do_ionization = ion_lev;
1201 #if !defined(WARPX_DIM_1D_Z)
1202  Real const dxi = 1.0_rt / dx[0];
1203 #endif
1204 #if !defined(WARPX_DIM_1D_Z)
1205  Real const xmin = xyzmin[0];
1206 #endif
1207 #if defined(WARPX_DIM_3D)
1208  Real const dyi = 1.0_rt / dx[1];
1209  Real const ymin = xyzmin[1];
1210 #endif
1211  Real const dzi = 1.0_rt / dx[2];
1212  Real const zmin = xyzmin[2];
1213 
1214 #if defined(WARPX_DIM_3D)
1215  Real const invdtdx = 1.0_rt / (dt*dx[1]*dx[2]);
1216  Real const invdtdy = 1.0_rt / (dt*dx[0]*dx[2]);
1217  Real const invdtdz = 1.0_rt / (dt*dx[0]*dx[1]);
1218 #elif defined(WARPX_DIM_XZ) || defined(WARPX_DIM_RZ)
1219  Real const invdtdx = 1.0_rt / (dt*dx[2]);
1220  Real const invdtdz = 1.0_rt / (dt*dx[0]);
1221  Real const invvol = 1.0_rt / (dx[0]*dx[2]);
1222 #elif defined(WARPX_DIM_1D_Z)
1223  Real const invdtdz = 1.0_rt / (dt*dx[0]);
1224  Real const invvol = 1.0_rt / (dx[2]);
1225 #endif
1226 
1227 #if defined(WARPX_DIM_RZ)
1228  Complex const I = Complex{0._rt, 1._rt};
1229 #endif
1230 
1231 #if !defined(WARPX_DIM_1D_Z)
1232  Real constexpr one_third = 1.0_rt / 3.0_rt;
1233  Real constexpr one_sixth = 1.0_rt / 6.0_rt;
1234 #endif
1235 
1236  // Loop over particles and deposit into Jx_arr, Jy_arr and Jz_arr
1237 #if defined(WARPX_USE_GPUCLOCK)
1238  amrex::Real* cost_real = nullptr;
1239  if( load_balance_costs_update_algo == LoadBalanceCostsUpdateAlgo::GpuClock) {
1240  cost_real = (amrex::Real *) amrex::The_Managed_Arena()->alloc(sizeof(amrex::Real));
1241  *cost_real = 0._rt;
1242  }
1243 #endif
1245  np_to_deposit,
1246  [=] AMREX_GPU_DEVICE (long const ip) {
1247 #if defined(WARPX_USE_GPUCLOCK)
1248  const auto KernelTimer = ablastr::parallelization::KernelTimer(
1249  cost && (load_balance_costs_update_algo == LoadBalanceCostsUpdateAlgo::GpuClock),
1250  cost_real);
1251 #endif
1252 
1253 #if !defined(WARPX_DIM_3D)
1254  constexpr amrex::ParticleReal inv_c2 = 1._prt/(PhysConst::c*PhysConst::c);
1255 
1256  // Compute inverse Lorentz factor, the average of gamma at time levels n and n+1
1257  // The uxp,uyp,uzp are the velocities at time level n+1/2
1258  const amrex::ParticleReal uxp_np1 = 2._prt*uxp_nph[ip] - uxp_n[ip];
1259  const amrex::ParticleReal uyp_np1 = 2._prt*uyp_nph[ip] - uyp_n[ip];
1260  const amrex::ParticleReal uzp_np1 = 2._prt*uzp_nph[ip] - uzp_n[ip];
1261  const amrex::ParticleReal gamma_n = std::sqrt(1._prt + (uxp_n[ip]*uxp_n[ip] + uyp_n[ip]*uyp_n[ip] + uzp_n[ip]*uzp_n[ip])*inv_c2);
1262  const amrex::ParticleReal gamma_np1 = std::sqrt(1._prt + (uxp_np1*uxp_np1 + uyp_np1*uyp_np1 + uzp_np1*uzp_np1)*inv_c2);
1263  const amrex::ParticleReal gaminv = 2.0_prt/(gamma_n + gamma_np1);
1264 #endif
1265 
1266  // wqx, wqy wqz are particle current in each direction
1267  Real wq = q*wp[ip];
1268  if (do_ionization){
1269  wq *= ion_lev[ip];
1270  }
1271 
1272  ParticleReal xp_nph, yp_nph, zp_nph;
1273  GetPosition(ip, xp_nph, yp_nph, zp_nph);
1274 
1275 #if !defined(WARPX_DIM_1D_Z)
1276  ParticleReal const xp_np1 = 2._prt*xp_nph - xp_n[ip];
1277 #else
1278  ignore_unused(xp_n);
1279 #endif
1280 #if defined(WARPX_DIM_3D) || defined(WARPX_DIM_RZ)
1281  ParticleReal const yp_np1 = 2._prt*yp_nph - yp_n[ip];
1282 #else
1283  ignore_unused(yp_n);
1284 #endif
1285  ParticleReal const zp_np1 = 2._prt*zp_nph - zp_n[ip];
1286 
1287 #if !defined(WARPX_DIM_1D_Z)
1288  amrex::Real const wqx = wq*invdtdx;
1289 #endif
1290 #if defined(WARPX_DIM_3D)
1291  amrex::Real const wqy = wq*invdtdy;
1292 #endif
1293  amrex::Real const wqz = wq*invdtdz;
1294 
1295  // computes current and old position in grid units
1296 #if defined(WARPX_DIM_RZ)
1297  amrex::Real const xp_new = xp_np1;
1298  amrex::Real const yp_new = yp_np1;
1299  amrex::Real const xp_mid = xp_nph;
1300  amrex::Real const yp_mid = yp_nph;
1301  amrex::Real const xp_old = xp_n[ip];
1302  amrex::Real const yp_old = yp_n[ip];
1303  amrex::Real const rp_new = std::sqrt(xp_new*xp_new + yp_new*yp_new);
1304  amrex::Real const rp_old = std::sqrt(xp_old*xp_old + yp_old*yp_old);
1305  amrex::Real const rp_mid = (rp_new + rp_old)/2._rt;
1306  amrex::Real costheta_new, sintheta_new;
1307  if (rp_new > 0._rt) {
1308  costheta_new = xp_new/rp_new;
1309  sintheta_new = yp_new/rp_new;
1310  } else {
1311  costheta_new = 1._rt;
1312  sintheta_new = 0._rt;
1313  }
1314  amrex::Real costheta_mid, sintheta_mid;
1315  if (rp_mid > 0._rt) {
1316  costheta_mid = xp_mid/rp_mid;
1317  sintheta_mid = yp_mid/rp_mid;
1318  } else {
1319  costheta_mid = 1._rt;
1320  sintheta_mid = 0._rt;
1321  }
1322  amrex::Real costheta_old, sintheta_old;
1323  if (rp_old > 0._rt) {
1324  costheta_old = xp_old/rp_old;
1325  sintheta_old = yp_old/rp_old;
1326  } else {
1327  costheta_old = 1._rt;
1328  sintheta_old = 0._rt;
1329  }
1330  const Complex xy_new0 = Complex{costheta_new, sintheta_new};
1331  const Complex xy_mid0 = Complex{costheta_mid, sintheta_mid};
1332  const Complex xy_old0 = Complex{costheta_old, sintheta_old};
1333  // Keep these double to avoid bug in single precision
1334  double const x_new = (rp_new - xmin)*dxi;
1335  double const x_old = (rp_old - xmin)*dxi;
1336 #else
1337 #if !defined(WARPX_DIM_1D_Z)
1338  // Keep these double to avoid bug in single precision
1339  double const x_new = (xp_np1 - xmin)*dxi;
1340  double const x_old = (xp_n[ip] - xmin)*dxi;
1341 #endif
1342 #endif
1343 #if defined(WARPX_DIM_3D)
1344  // Keep these double to avoid bug in single precision
1345  double const y_new = (yp_np1 - ymin)*dyi;
1346  double const y_old = (yp_n[ip] - ymin)*dyi;
1347 #endif
1348  // Keep these double to avoid bug in single precision
1349  double const z_new = (zp_np1 - zmin)*dzi;
1350  double const z_old = (zp_n[ip] - zmin)*dzi;
1351 
1352 #if defined(WARPX_DIM_RZ)
1353  amrex::Real const vy = (-uxp_nph[ip]*sintheta_mid + uyp_nph[ip]*costheta_mid)*gaminv;
1354 #elif defined(WARPX_DIM_XZ)
1355  amrex::Real const vy = uyp_nph[ip]*gaminv;
1356 #elif defined(WARPX_DIM_1D_Z)
1357  amrex::Real const vx = uxp_nph[ip]*gaminv;
1358  amrex::Real const vy = uyp_nph[ip]*gaminv;
1359 #endif
1360 
1361  // Shape factor arrays
1362  // Note that there are extra values above and below
1363  // to possibly hold the factor for the old particle
1364  // which can be at a different grid location.
1365  // Keep these double to avoid bug in single precision
1366 #if !defined(WARPX_DIM_1D_Z)
1367  double sx_new[depos_order + 3] = {0.};
1368  double sx_old[depos_order + 3] = {0.};
1369 #endif
1370 #if defined(WARPX_DIM_3D)
1371  // Keep these double to avoid bug in single precision
1372  double sy_new[depos_order + 3] = {0.};
1373  double sy_old[depos_order + 3] = {0.};
1374 #endif
1375  // Keep these double to avoid bug in single precision
1376  double sz_new[depos_order + 3] = {0.};
1377  double sz_old[depos_order + 3] = {0.};
1378 
1379  // --- Compute shape factors
1380  // Compute shape factors for position as they are now and at old positions
1381  // [ijk]_new: leftmost grid point that the particle touches
1382  Compute_shape_factor< depos_order > compute_shape_factor;
1383  Compute_shifted_shape_factor< depos_order > compute_shifted_shape_factor;
1384 
1385 #if !defined(WARPX_DIM_1D_Z)
1386  const int i_new = compute_shape_factor(sx_new+1, x_new);
1387  const int i_old = compute_shifted_shape_factor(sx_old, x_old, i_new);
1388 #endif
1389 #if defined(WARPX_DIM_3D)
1390  const int j_new = compute_shape_factor(sy_new+1, y_new);
1391  const int j_old = compute_shifted_shape_factor(sy_old, y_old, j_new);
1392 #endif
1393  const int k_new = compute_shape_factor(sz_new+1, z_new);
1394  const int k_old = compute_shifted_shape_factor(sz_old, z_old, k_new);
1395 
1396  // computes min/max positions of current contributions
1397 #if !defined(WARPX_DIM_1D_Z)
1398  int dil = 1, diu = 1;
1399  if (i_old < i_new) { dil = 0; }
1400  if (i_old > i_new) { diu = 0; }
1401 #endif
1402 #if defined(WARPX_DIM_3D)
1403  int djl = 1, dju = 1;
1404  if (j_old < j_new) { djl = 0; }
1405  if (j_old > j_new) { dju = 0; }
1406 #endif
1407  int dkl = 1, dku = 1;
1408  if (k_old < k_new) { dkl = 0; }
1409  if (k_old > k_new) { dku = 0; }
1410 
1411 #if defined(WARPX_DIM_3D)
1412 
1413  for (int k=dkl; k<=depos_order+2-dku; k++) {
1414  for (int j=djl; j<=depos_order+2-dju; j++) {
1415  amrex::Real sdxi = 0._rt;
1416  for (int i=dil; i<=depos_order+1-diu; i++) {
1417  sdxi += wqx*(sx_old[i] - sx_new[i])*(
1418  one_third*(sy_new[j]*sz_new[k] + sy_old[j]*sz_old[k])
1419  +one_sixth*(sy_new[j]*sz_old[k] + sy_old[j]*sz_new[k]));
1420  amrex::Gpu::Atomic::AddNoRet( &Jx_arr(lo.x+i_new-1+i, lo.y+j_new-1+j, lo.z+k_new-1+k), sdxi);
1421  }
1422  }
1423  }
1424  for (int k=dkl; k<=depos_order+2-dku; k++) {
1425  for (int i=dil; i<=depos_order+2-diu; i++) {
1426  amrex::Real sdyj = 0._rt;
1427  for (int j=djl; j<=depos_order+1-dju; j++) {
1428  sdyj += wqy*(sy_old[j] - sy_new[j])*(
1429  one_third*(sx_new[i]*sz_new[k] + sx_old[i]*sz_old[k])
1430  +one_sixth*(sx_new[i]*sz_old[k] + sx_old[i]*sz_new[k]));
1431  amrex::Gpu::Atomic::AddNoRet( &Jy_arr(lo.x+i_new-1+i, lo.y+j_new-1+j, lo.z+k_new-1+k), sdyj);
1432  }
1433  }
1434  }
1435  for (int j=djl; j<=depos_order+2-dju; j++) {
1436  for (int i=dil; i<=depos_order+2-diu; i++) {
1437  amrex::Real sdzk = 0._rt;
1438  for (int k=dkl; k<=depos_order+1-dku; k++) {
1439  sdzk += wqz*(sz_old[k] - sz_new[k])*(
1440  one_third*(sx_new[i]*sy_new[j] + sx_old[i]*sy_old[j])
1441  +one_sixth*(sx_new[i]*sy_old[j] + sx_old[i]*sy_new[j]));
1442  amrex::Gpu::Atomic::AddNoRet( &Jz_arr(lo.x+i_new-1+i, lo.y+j_new-1+j, lo.z+k_new-1+k), sdzk);
1443  }
1444  }
1445  }
1446 
1447 #elif defined(WARPX_DIM_XZ) || defined(WARPX_DIM_RZ)
1448 
1449  for (int k=dkl; k<=depos_order+2-dku; k++) {
1450  amrex::Real sdxi = 0._rt;
1451  for (int i=dil; i<=depos_order+1-diu; i++) {
1452  sdxi += wqx*(sx_old[i] - sx_new[i])*0.5_rt*(sz_new[k] + sz_old[k]);
1453  amrex::Gpu::Atomic::AddNoRet( &Jx_arr(lo.x+i_new-1+i, lo.y+k_new-1+k, 0, 0), sdxi);
1454 #if defined(WARPX_DIM_RZ)
1455  Complex xy_mid = xy_mid0; // Throughout the following loop, xy_mid takes the value e^{i m theta}
1456  for (int imode=1 ; imode < n_rz_azimuthal_modes ; imode++) {
1457  // The factor 2 comes from the normalization of the modes
1458  const Complex djr_cmplx = 2._rt *sdxi*xy_mid;
1459  amrex::Gpu::Atomic::AddNoRet( &Jx_arr(lo.x+i_new-1+i, lo.y+k_new-1+k, 0, 2*imode-1), djr_cmplx.real());
1460  amrex::Gpu::Atomic::AddNoRet( &Jx_arr(lo.x+i_new-1+i, lo.y+k_new-1+k, 0, 2*imode), djr_cmplx.imag());
1461  xy_mid = xy_mid*xy_mid0;
1462  }
1463 #endif
1464  }
1465  }
1466  for (int k=dkl; k<=depos_order+2-dku; k++) {
1467  for (int i=dil; i<=depos_order+2-diu; i++) {
1468  Real const sdyj = wq*vy*invvol*(
1469  one_third*(sx_new[i]*sz_new[k] + sx_old[i]*sz_old[k])
1470  +one_sixth*(sx_new[i]*sz_old[k] + sx_old[i]*sz_new[k]));
1471  amrex::Gpu::Atomic::AddNoRet( &Jy_arr(lo.x+i_new-1+i, lo.y+k_new-1+k, 0, 0), sdyj);
1472 #if defined(WARPX_DIM_RZ)
1473  Complex xy_new = xy_new0;
1474  Complex xy_mid = xy_mid0;
1475  Complex xy_old = xy_old0;
1476  // Throughout the following loop, xy_ takes the value e^{i m theta_}
1477  for (int imode=1 ; imode < n_rz_azimuthal_modes ; imode++) {
1478  // The factor 2 comes from the normalization of the modes
1479  // The minus sign comes from the different convention with respect to Davidson et al.
1480  const Complex djt_cmplx = -2._rt * I*(i_new-1 + i + xmin*dxi)*wq*invdtdx/(amrex::Real)imode
1481  *(Complex(sx_new[i]*sz_new[k], 0._rt)*(xy_new - xy_mid)
1482  + Complex(sx_old[i]*sz_old[k], 0._rt)*(xy_mid - xy_old));
1483  amrex::Gpu::Atomic::AddNoRet( &Jy_arr(lo.x+i_new-1+i, lo.y+k_new-1+k, 0, 2*imode-1), djt_cmplx.real());
1484  amrex::Gpu::Atomic::AddNoRet( &Jy_arr(lo.x+i_new-1+i, lo.y+k_new-1+k, 0, 2*imode), djt_cmplx.imag());
1485  xy_new = xy_new*xy_new0;
1486  xy_mid = xy_mid*xy_mid0;
1487  xy_old = xy_old*xy_old0;
1488  }
1489 #endif
1490  }
1491  }
1492  for (int i=dil; i<=depos_order+2-diu; i++) {
1493  Real sdzk = 0._rt;
1494  for (int k=dkl; k<=depos_order+1-dku; k++) {
1495  sdzk += wqz*(sz_old[k] - sz_new[k])*0.5_rt*(sx_new[i] + sx_old[i]);
1496  amrex::Gpu::Atomic::AddNoRet( &Jz_arr(lo.x+i_new-1+i, lo.y+k_new-1+k, 0, 0), sdzk);
1497 #if defined(WARPX_DIM_RZ)
1498  Complex xy_mid = xy_mid0; // Throughout the following loop, xy_mid takes the value e^{i m theta}
1499  for (int imode=1 ; imode < n_rz_azimuthal_modes ; imode++) {
1500  // The factor 2 comes from the normalization of the modes
1501  const Complex djz_cmplx = 2._rt * sdzk * xy_mid;
1502  amrex::Gpu::Atomic::AddNoRet( &Jz_arr(lo.x+i_new-1+i, lo.y+k_new-1+k, 0, 2*imode-1), djz_cmplx.real());
1503  amrex::Gpu::Atomic::AddNoRet( &Jz_arr(lo.x+i_new-1+i, lo.y+k_new-1+k, 0, 2*imode), djz_cmplx.imag());
1504  xy_mid = xy_mid*xy_mid0;
1505  }
1506 #endif
1507  }
1508  }
1509 #elif defined(WARPX_DIM_1D_Z)
1510 
1511  for (int k=dkl; k<=depos_order+2-dku; k++) {
1512  amrex::Real const sdxi = wq*vx*invvol*0.5_rt*(sz_old[k] + sz_new[k]);
1513  amrex::Gpu::Atomic::AddNoRet( &Jx_arr(lo.x+k_new-1+k, 0, 0, 0), sdxi);
1514  }
1515  for (int k=dkl; k<=depos_order+2-dku; k++) {
1516  amrex::Real const sdyj = wq*vy*invvol*0.5_rt*(sz_old[k] + sz_new[k]);
1517  amrex::Gpu::Atomic::AddNoRet( &Jy_arr(lo.x+k_new-1+k, 0, 0, 0), sdyj);
1518  }
1519  amrex::Real sdzk = 0._rt;
1520  for (int k=dkl; k<=depos_order+1-dku; k++) {
1521  sdzk += wqz*(sz_old[k] - sz_new[k]);
1522  amrex::Gpu::Atomic::AddNoRet( &Jz_arr(lo.x+k_new-1+k, 0, 0, 0), sdzk);
1523  }
1524 #endif
1525  }
1526  );
1527 #if defined(WARPX_USE_GPUCLOCK)
1528  if( load_balance_costs_update_algo == LoadBalanceCostsUpdateAlgo::GpuClock) {
1530  *cost += *cost_real;
1531  amrex::The_Managed_Arena()->free(cost_real);
1532  }
1533 #endif
1534 }
1535 
1565 template <int depos_order>
1567  const amrex::ParticleReal* const wp,
1568  const amrex::ParticleReal* const uxp,
1569  const amrex::ParticleReal* const uyp,
1570  const amrex::ParticleReal* const uzp,
1571  const int* const ion_lev,
1572  amrex::FArrayBox& Dx_fab,
1573  amrex::FArrayBox& Dy_fab,
1574  amrex::FArrayBox& Dz_fab,
1575  long np_to_deposit,
1576  amrex::Real dt,
1577  amrex::Real relative_time,
1578  const std::array<amrex::Real,3>& dx,
1579  const std::array<amrex::Real,3>& xyzmin,
1580  amrex::Dim3 lo,
1581  amrex::Real q,
1582  int n_rz_azimuthal_modes,
1583  amrex::Real* cost,
1584  long load_balance_costs_update_algo)
1585 {
1586  using namespace amrex::literals;
1587 
1588 #if defined(WARPX_DIM_RZ)
1589  amrex::ignore_unused(GetPosition,
1590  wp, uxp, uyp, uzp, ion_lev, Dx_fab, Dy_fab, Dz_fab,
1591  np_to_deposit, dt, relative_time, dx, xyzmin, lo, q, n_rz_azimuthal_modes);
1592  WARPX_ABORT_WITH_MESSAGE("Vay deposition not implemented in RZ geometry");
1593 #endif
1594 
1595 #if defined(WARPX_DIM_1D_Z)
1596  amrex::ignore_unused(GetPosition,
1597  wp, uxp, uyp, uzp, ion_lev, Dx_fab, Dy_fab, Dz_fab,
1598  np_to_deposit, dt, relative_time, dx, xyzmin, lo, q, n_rz_azimuthal_modes);
1599  WARPX_ABORT_WITH_MESSAGE("Vay deposition not implemented in cartesian 1D geometry");
1600 #endif
1601 
1602 #if !defined(WARPX_USE_GPUCLOCK)
1603  amrex::ignore_unused(cost, load_balance_costs_update_algo);
1604 #endif
1605 
1606 #if !(defined WARPX_DIM_RZ || defined WARPX_DIM_1D_Z)
1607  amrex::ignore_unused(n_rz_azimuthal_modes);
1608 
1609  // If ion_lev is a null pointer, then do_ionization=0, else do_ionization=1
1610  const bool do_ionization = ion_lev;
1611 
1612  // Inverse cell volume in each direction
1613  const amrex::Real dxi = 1._rt / dx[0];
1614  const amrex::Real dzi = 1._rt / dx[2];
1615 #if defined(WARPX_DIM_3D)
1616  const amrex::Real dyi = 1._rt / dx[1];
1617 #endif
1618 
1619  // Inverse of time step
1620  const amrex::Real invdt = 1._rt / dt;
1621 
1622  // Total inverse cell volume
1623 #if defined(WARPX_DIM_XZ)
1624  const amrex::Real invvol = dxi * dzi;
1625 #elif defined(WARPX_DIM_3D)
1626  const amrex::Real invvol = dxi * dyi * dzi;
1627 #endif
1628 
1629  // Lower bound of physical domain in each direction
1630  const amrex::Real xmin = xyzmin[0];
1631  const amrex::Real zmin = xyzmin[2];
1632 #if defined(WARPX_DIM_3D)
1633  const amrex::Real ymin = xyzmin[1];
1634 #endif
1635 
1636  // Allocate temporary arrays
1637 #if defined(WARPX_DIM_3D)
1638  AMREX_ALWAYS_ASSERT(Dx_fab.box() == Dy_fab.box() && Dx_fab.box() == Dz_fab.box());
1639  amrex::FArrayBox temp_fab{Dx_fab.box(), 4};
1640 #elif defined(WARPX_DIM_XZ)
1641  AMREX_ALWAYS_ASSERT(Dx_fab.box() == Dz_fab.box());
1642  amrex::FArrayBox temp_fab{Dx_fab.box(), 2};
1643 #endif
1644  temp_fab.setVal<amrex::RunOn::Device>(0._rt);
1645  amrex::Array4<amrex::Real> const& temp_arr = temp_fab.array();
1646 
1647  // Inverse of light speed squared
1648  const amrex::Real invcsq = 1._rt / (PhysConst::c * PhysConst::c);
1649 
1650  // Arrays where D will be stored
1651  amrex::Array4<amrex::Real> const& Dx_arr = Dx_fab.array();
1652  amrex::Array4<amrex::Real> const& Dy_arr = Dy_fab.array();
1653  amrex::Array4<amrex::Real> const& Dz_arr = Dz_fab.array();
1654 
1655  // Loop over particles and deposit (Dx,Dy,Dz) into Dx_fab, Dy_fab and Dz_fab
1656 #if defined(WARPX_USE_GPUCLOCK)
1657  amrex::Real* cost_real = nullptr;
1658  if( load_balance_costs_update_algo == LoadBalanceCostsUpdateAlgo::GpuClock) {
1659  cost_real = (amrex::Real *) amrex::The_Managed_Arena()->alloc(sizeof(amrex::Real));
1660  *cost_real = 0._rt;
1661  }
1662 #endif
1663  amrex::ParallelFor(np_to_deposit, [=] AMREX_GPU_DEVICE (long ip)
1664  {
1665 #if defined(WARPX_USE_GPUCLOCK)
1666  const auto KernelTimer = ablastr::parallelization::KernelTimer(
1667  cost && (load_balance_costs_update_algo == LoadBalanceCostsUpdateAlgo::GpuClock),
1668  cost_real);
1669 #endif
1670 
1671  // Inverse of Lorentz factor gamma
1672  const amrex::Real invgam = 1._rt / std::sqrt(1._rt + uxp[ip] * uxp[ip] * invcsq
1673  + uyp[ip] * uyp[ip] * invcsq
1674  + uzp[ip] * uzp[ip] * invcsq);
1675  // Product of particle charges and weights
1676  amrex::Real wq = q * wp[ip];
1677  if (do_ionization) { wq *= ion_lev[ip]; }
1678 
1679  // Current particle positions (in physical units)
1680  amrex::ParticleReal xp, yp, zp;
1681  GetPosition(ip, xp, yp, zp);
1682 
1683  // Particle velocities
1684  const amrex::Real vx = uxp[ip] * invgam;
1685  const amrex::Real vy = uyp[ip] * invgam;
1686  const amrex::Real vz = uzp[ip] * invgam;
1687 
1688  // Modify the particle position to match the time of the deposition
1689  xp += relative_time * vx;
1690  yp += relative_time * vy;
1691  zp += relative_time * vz;
1692 
1693  // Particle current densities
1694 #if defined(WARPX_DIM_XZ)
1695  const amrex::Real wqy = wq * vy * invvol;
1696 #endif
1697 
1698  // Current and old particle positions in grid units
1699  // Keep these double to avoid bug in single precision.
1700  double const x_new = (xp - xmin + 0.5_rt*dt*vx) * dxi;
1701  double const x_old = (xp - xmin - 0.5_rt*dt*vx) * dxi;
1702 #if defined(WARPX_DIM_3D)
1703  // Keep these double to avoid bug in single precision.
1704  double const y_new = (yp - ymin + 0.5_rt*dt*vy) * dyi;
1705  double const y_old = (yp - ymin - 0.5_rt*dt*vy) * dyi;
1706 #endif
1707  // Keep these double to avoid bug in single precision.
1708  double const z_new = (zp - zmin + 0.5_rt*dt*vz) * dzi;
1709  double const z_old = (zp - zmin - 0.5_rt*dt*vz) * dzi;
1710 
1711  // Shape factor arrays for current and old positions (nodal)
1712  // Keep these double to avoid bug in single precision.
1713  double sx_new[depos_order+1] = {0.};
1714  double sx_old[depos_order+1] = {0.};
1715 #if defined(WARPX_DIM_3D)
1716  // Keep these double to avoid bug in single precision.
1717  double sy_new[depos_order+1] = {0.};
1718  double sy_old[depos_order+1] = {0.};
1719 #endif
1720  // Keep these double to avoid bug in single precision.
1721  double sz_new[depos_order+1] = {0.};
1722  double sz_old[depos_order+1] = {0.};
1723 
1724  // Compute shape factors for current positions
1725 
1726  // i_new leftmost grid point in x that the particle touches
1727  // sx_new shape factor along x for the centering of each current
1728  Compute_shape_factor< depos_order > const compute_shape_factor;
1729  const int i_new = compute_shape_factor(sx_new, x_new);
1730 #if defined(WARPX_DIM_3D)
1731  // j_new leftmost grid point in y that the particle touches
1732  // sy_new shape factor along y for the centering of each current
1733  const int j_new = compute_shape_factor(sy_new, y_new);
1734 #endif
1735  // k_new leftmost grid point in z that the particle touches
1736  // sz_new shape factor along z for the centering of each current
1737  const int k_new = compute_shape_factor(sz_new, z_new);
1738 
1739  // Compute shape factors for old positions
1740 
1741  // i_old leftmost grid point in x that the particle touches
1742  // sx_old shape factor along x for the centering of each current
1743  const int i_old = compute_shape_factor(sx_old, x_old);
1744 #if defined(WARPX_DIM_3D)
1745  // j_old leftmost grid point in y that the particle touches
1746  // sy_old shape factor along y for the centering of each current
1747  const int j_old = compute_shape_factor(sy_old, y_old);
1748 #endif
1749  // k_old leftmost grid point in z that the particle touches
1750  // sz_old shape factor along z for the centering of each current
1751  const int k_old = compute_shape_factor(sz_old, z_old);
1752 
1753  // Deposit current into Dx_arr, Dy_arr and Dz_arr
1754 #if defined(WARPX_DIM_XZ)
1755 
1756  for (int k=0; k<=depos_order; k++) {
1757  for (int i=0; i<=depos_order; i++) {
1758 
1759  // Re-casting sx_new and sz_new from double to amrex::Real so that
1760  // Atomic::Add has consistent types in its argument
1761  auto const sxn_szn = static_cast<amrex::Real>(sx_new[i] * sz_new[k]);
1762  auto const sxo_szn = static_cast<amrex::Real>(sx_old[i] * sz_new[k]);
1763  auto const sxn_szo = static_cast<amrex::Real>(sx_new[i] * sz_old[k]);
1764  auto const sxo_szo = static_cast<amrex::Real>(sx_old[i] * sz_old[k]);
1765 
1766  if (i_new == i_old && k_new == k_old) {
1767  // temp arrays for Dx and Dz
1768  amrex::Gpu::Atomic::AddNoRet(&temp_arr(lo.x + i_new + i, lo.y + k_new + k, 0, 0),
1769  wq * invvol * invdt * (sxn_szn - sxo_szo));
1770 
1771  amrex::Gpu::Atomic::AddNoRet(&temp_arr(lo.x + i_new + i, lo.y + k_new + k, 0, 1),
1772  wq * invvol * invdt * (sxn_szo - sxo_szn));
1773 
1774  // Dy
1775  amrex::Gpu::Atomic::AddNoRet(&Dy_arr(lo.x + i_new + i, lo.y + k_new + k, 0, 0),
1776  wqy * 0.25_rt * (sxn_szn + sxn_szo + sxo_szn + sxo_szo));
1777  } else {
1778  // temp arrays for Dx and Dz
1779  amrex::Gpu::Atomic::AddNoRet(&temp_arr(lo.x + i_new + i, lo.y + k_new + k, 0, 0),
1780  wq * invvol * invdt * sxn_szn);
1781 
1782  amrex::Gpu::Atomic::AddNoRet(&temp_arr(lo.x + i_old + i, lo.y + k_old + k, 0, 0),
1783  - wq * invvol * invdt * sxo_szo);
1784 
1785  amrex::Gpu::Atomic::AddNoRet(&temp_arr(lo.x + i_new + i, lo.y + k_old + k, 0, 1),
1786  wq * invvol * invdt * sxn_szo);
1787 
1788  amrex::Gpu::Atomic::AddNoRet(&temp_arr(lo.x + i_old + i, lo.y + k_new + k, 0, 1),
1789  - wq * invvol * invdt * sxo_szn);
1790 
1791  // Dy
1792  amrex::Gpu::Atomic::AddNoRet(&Dy_arr(lo.x + i_new + i, lo.y + k_new + k, 0, 0),
1793  wqy * 0.25_rt * sxn_szn);
1794 
1795  amrex::Gpu::Atomic::AddNoRet(&Dy_arr(lo.x + i_new + i, lo.y + k_old + k, 0, 0),
1796  wqy * 0.25_rt * sxn_szo);
1797 
1798  amrex::Gpu::Atomic::AddNoRet(&Dy_arr(lo.x + i_old + i, lo.y + k_new + k, 0, 0),
1799  wqy * 0.25_rt * sxo_szn);
1800 
1801  amrex::Gpu::Atomic::AddNoRet(&Dy_arr(lo.x + i_old + i, lo.y + k_old + k, 0, 0),
1802  wqy * 0.25_rt * sxo_szo);
1803  }
1804 
1805  }
1806  }
1807 
1808 #elif defined(WARPX_DIM_3D)
1809 
1810  for (int k=0; k<=depos_order; k++) {
1811  for (int j=0; j<=depos_order; j++) {
1812 
1813  auto const syn_szn = static_cast<amrex::Real>(sy_new[j] * sz_new[k]);
1814  auto const syo_szn = static_cast<amrex::Real>(sy_old[j] * sz_new[k]);
1815  auto const syn_szo = static_cast<amrex::Real>(sy_new[j] * sz_old[k]);
1816  auto const syo_szo = static_cast<amrex::Real>(sy_old[j] * sz_old[k]);
1817 
1818  for (int i=0; i<=depos_order; i++) {
1819 
1820  auto const sxn_syn_szn = static_cast<amrex::Real>(sx_new[i]) * syn_szn;
1821  auto const sxo_syn_szn = static_cast<amrex::Real>(sx_old[i]) * syn_szn;
1822  auto const sxn_syo_szn = static_cast<amrex::Real>(sx_new[i]) * syo_szn;
1823  auto const sxo_syo_szn = static_cast<amrex::Real>(sx_old[i]) * syo_szn;
1824  auto const sxn_syn_szo = static_cast<amrex::Real>(sx_new[i]) * syn_szo;
1825  auto const sxo_syn_szo = static_cast<amrex::Real>(sx_old[i]) * syn_szo;
1826  auto const sxn_syo_szo = static_cast<amrex::Real>(sx_new[i]) * syo_szo;
1827  auto const sxo_syo_szo = static_cast<amrex::Real>(sx_old[i]) * syo_szo;
1828 
1829  if (i_new == i_old && j_new == j_old && k_new == k_old) {
1830  // temp arrays for Dx, Dy and Dz
1831  amrex::Gpu::Atomic::AddNoRet(&temp_arr(lo.x + i_new + i, lo.y + j_new + j, lo.z + k_new + k, 0),
1832  wq * invvol * invdt * (sxn_syn_szn - sxo_syo_szo));
1833 
1834  amrex::Gpu::Atomic::AddNoRet(&temp_arr(lo.x + i_new + i, lo.y + j_new + j, lo.z + k_new + k, 1),
1835  wq * invvol * invdt * (sxn_syn_szo - sxo_syo_szn));
1836 
1837  amrex::Gpu::Atomic::AddNoRet(&temp_arr(lo.x + i_new + i, lo.y + j_new + j, lo.z + k_new + k, 2),
1838  wq * invvol * invdt * (sxn_syo_szn - sxo_syn_szo));
1839 
1840  amrex::Gpu::Atomic::AddNoRet(&temp_arr(lo.x + i_new + i, lo.y + j_new + j, lo.z + k_new + k, 3),
1841  wq * invvol * invdt * (sxo_syn_szn - sxn_syo_szo));
1842  } else {
1843  // temp arrays for Dx, Dy and Dz
1844  amrex::Gpu::Atomic::AddNoRet(&temp_arr(lo.x + i_new + i, lo.y + j_new + j, lo.z + k_new + k, 0),
1845  wq * invvol * invdt * sxn_syn_szn);
1846 
1847  amrex::Gpu::Atomic::AddNoRet(&temp_arr(lo.x + i_old + i, lo.y + j_old + j, lo.z + k_old + k, 0),
1848  - wq * invvol * invdt * sxo_syo_szo);
1849 
1850  amrex::Gpu::Atomic::AddNoRet(&temp_arr(lo.x + i_new + i, lo.y + j_new + j, lo.z + k_old + k, 1),
1851  wq * invvol * invdt * sxn_syn_szo);
1852 
1853  amrex::Gpu::Atomic::AddNoRet(&temp_arr(lo.x + i_old + i, lo.y + j_old + j, lo.z + k_new + k, 1),
1854  - wq * invvol * invdt * sxo_syo_szn);
1855 
1856  amrex::Gpu::Atomic::AddNoRet(&temp_arr(lo.x + i_new + i, lo.y + j_old + j, lo.z + k_new + k, 2),
1857  wq * invvol * invdt * sxn_syo_szn);
1858 
1859  amrex::Gpu::Atomic::AddNoRet(&temp_arr(lo.x + i_old + i, lo.y + j_new + j, lo.z + k_old + k, 2),
1860  - wq * invvol * invdt * sxo_syn_szo);
1861 
1862  amrex::Gpu::Atomic::AddNoRet(&temp_arr(lo.x + i_old + i, lo.y + j_new + j, lo.z + k_new + k, 3),
1863  wq * invvol * invdt * sxo_syn_szn);
1864 
1865  amrex::Gpu::Atomic::AddNoRet(&temp_arr(lo.x + i_new + i, lo.y + j_old + j, lo.z + k_old + k, 3),
1866  - wq * invvol * invdt * sxn_syo_szo);
1867  }
1868  }
1869  }
1870  }
1871 #endif
1872  } );
1873 
1874 #if defined(WARPX_DIM_3D)
1875  amrex::ParallelFor(Dx_fab.box(), [=] AMREX_GPU_DEVICE (int i, int j, int k) noexcept
1876  {
1877  const amrex::Real t_a = temp_arr(i,j,k,0);
1878  const amrex::Real t_b = temp_arr(i,j,k,1);
1879  const amrex::Real t_c = temp_arr(i,j,k,2);
1880  const amrex::Real t_d = temp_arr(i,j,k,3);
1881  Dx_arr(i,j,k) += (1._rt/6._rt)*(2_rt*t_a + t_b + t_c - 2._rt*t_d);
1882  Dy_arr(i,j,k) += (1._rt/6._rt)*(2_rt*t_a + t_b - 2._rt*t_c + t_d);
1883  Dz_arr(i,j,k) += (1._rt/6._rt)*(2_rt*t_a - 2._rt*t_b + t_c + t_d);
1884  });
1885 #elif defined(WARPX_DIM_XZ)
1886  amrex::ParallelFor(Dx_fab.box(), [=] AMREX_GPU_DEVICE (int i, int j, int) noexcept
1887  {
1888  const amrex::Real t_a = temp_arr(i,j,0,0);
1889  const amrex::Real t_b = temp_arr(i,j,0,1);
1890  Dx_arr(i,j,0) += (0.5_rt)*(t_a + t_b);
1891  Dz_arr(i,j,0) += (0.5_rt)*(t_a - t_b);
1892  });
1893 #endif
1894  // Synchronize so that temp_fab can be safely deallocated in its destructor
1896 
1897 
1898 # if defined(WARPX_USE_GPUCLOCK)
1899  if( load_balance_costs_update_algo == LoadBalanceCostsUpdateAlgo::GpuClock) {
1901  *cost += *cost_real;
1902  amrex::The_Managed_Arena()->free(cost_real);
1903  }
1904 # endif
1905 #endif // #if !(defined WARPX_DIM_RZ || defined WARPX_DIM_1D_Z)
1906 }
1907 #endif // CURRENTDEPOSITION_H_
#define AMREX_ALWAYS_ASSERT(EX)
#define AMREX_INLINE
#define AMREX_GPU_DEVICE
#define AMREX_GPU_HOST_DEVICE
#define AMREX_D_DECL(a, b, c)
void doDepositionShapeN(const GetParticlePosition< PIdx > &GetPosition, const amrex::ParticleReal *const wp, const amrex::ParticleReal *const uxp, const amrex::ParticleReal *const uyp, const amrex::ParticleReal *const uzp, const int *ion_lev, amrex::FArrayBox &jx_fab, amrex::FArrayBox &jy_fab, amrex::FArrayBox &jz_fab, long np_to_deposit, amrex::Real relative_time, const std::array< amrex::Real, 3 > &dx, const std::array< amrex::Real, 3 > &xyzmin, amrex::Dim3 lo, amrex::Real q, int n_rz_azimuthal_modes, amrex::Real *cost, long load_balance_costs_update_algo)
Current Deposition for thread thread_num.
Definition: CurrentDeposition.H:294
AMREX_GPU_HOST_DEVICE AMREX_INLINE void doDepositionShapeNKernel(const amrex::ParticleReal xp, const amrex::ParticleReal yp, const amrex::ParticleReal zp, const amrex::ParticleReal wq, const amrex::ParticleReal vx, const amrex::ParticleReal vy, const amrex::ParticleReal vz, amrex::Array4< amrex::Real > const &jx_arr, amrex::Array4< amrex::Real > const &jy_arr, amrex::Array4< amrex::Real > const &jz_arr, amrex::IntVect const &jx_type, amrex::IntVect const &jy_type, amrex::IntVect const &jz_type, const amrex::Real relative_time, AMREX_D_DECL(const amrex::Real dzi, const amrex::Real dxi, const amrex::Real dyi), AMREX_D_DECL(const amrex::Real zmin, const amrex::Real xmin, const amrex::Real ymin), const amrex::Real invvol, const amrex::Dim3 lo, const int n_rz_azimuthal_modes)
Kernel for the direct current deposition for thread thread_num.
Definition: CurrentDeposition.H:49
void doDepositionSharedShapeN(const GetParticlePosition< PIdx > &GetPosition, const amrex::ParticleReal *const wp, const amrex::ParticleReal *const uxp, const amrex::ParticleReal *const uyp, const amrex::ParticleReal *const uzp, const int *ion_lev, amrex::FArrayBox &jx_fab, amrex::FArrayBox &jy_fab, amrex::FArrayBox &jz_fab, long np_to_deposit, const amrex::Real relative_time, const std::array< amrex::Real, 3 > &dx, const std::array< amrex::Real, 3 > &xyzmin, amrex::Dim3 lo, amrex::Real q, int n_rz_azimuthal_modes, amrex::Real *cost, long load_balance_costs_update_algo, const amrex::DenseBins< WarpXParticleContainer::ParticleType > &a_bins, const amrex::Box &box, const amrex::Geometry &geom, const amrex::IntVect &a_tbox_max_size)
Current Deposition for thread thread_num using shared memory.
Definition: CurrentDeposition.H:577
void doEsirkepovDepositionShapeN(const GetParticlePosition< PIdx > &GetPosition, const amrex::ParticleReal *const wp, const amrex::ParticleReal *const uxp, const amrex::ParticleReal *const uyp, const amrex::ParticleReal *const uzp, const int *ion_lev, const amrex::Array4< amrex::Real > &Jx_arr, const amrex::Array4< amrex::Real > &Jy_arr, const amrex::Array4< amrex::Real > &Jz_arr, long np_to_deposit, amrex::Real dt, amrex::Real relative_time, const std::array< amrex::Real, 3 > &dx, std::array< amrex::Real, 3 > xyzmin, amrex::Dim3 lo, amrex::Real q, int n_rz_azimuthal_modes, amrex::Real *const cost, long load_balance_costs_update_algo)
Esirkepov Current Deposition for thread thread_num.
Definition: CurrentDeposition.H:789
void doChargeConservingDepositionShapeNImplicit(const amrex::ParticleReal *const xp_n, const amrex::ParticleReal *const yp_n, const amrex::ParticleReal *const zp_n, const GetParticlePosition< PIdx > &GetPosition, const amrex::ParticleReal *const wp, [[maybe_unused]]const amrex::ParticleReal *const uxp_n, [[maybe_unused]]const amrex::ParticleReal *const uyp_n, [[maybe_unused]]const amrex::ParticleReal *const uzp_n, [[maybe_unused]]const amrex::ParticleReal *const uxp_nph, [[maybe_unused]]const amrex::ParticleReal *const uyp_nph, [[maybe_unused]]const amrex::ParticleReal *const uzp_nph, const int *const ion_lev, const amrex::Array4< amrex::Real > &Jx_arr, const amrex::Array4< amrex::Real > &Jy_arr, const amrex::Array4< amrex::Real > &Jz_arr, const long np_to_deposit, const amrex::Real dt, const std::array< amrex::Real, 3 > &dx, const std::array< amrex::Real, 3 > xyzmin, const amrex::Dim3 lo, const amrex::Real q, const int n_rz_azimuthal_modes, amrex::Real *const cost, const long load_balance_costs_update_algo)
Esirkepov Current Deposition for thread thread_num for implicit scheme The difference from doEsirkepo...
Definition: CurrentDeposition.H:1164
void doVayDepositionShapeN(const GetParticlePosition< PIdx > &GetPosition, const amrex::ParticleReal *const wp, const amrex::ParticleReal *const uxp, const amrex::ParticleReal *const uyp, const amrex::ParticleReal *const uzp, const int *const ion_lev, amrex::FArrayBox &Dx_fab, amrex::FArrayBox &Dy_fab, amrex::FArrayBox &Dz_fab, long np_to_deposit, amrex::Real dt, amrex::Real relative_time, const std::array< amrex::Real, 3 > &dx, const std::array< amrex::Real, 3 > &xyzmin, amrex::Dim3 lo, amrex::Real q, int n_rz_azimuthal_modes, amrex::Real *cost, long load_balance_costs_update_algo)
Vay current deposition (Vay et al, 2013) for thread thread_num: deposit D in real space and store the...
Definition: CurrentDeposition.H:1566
void doDepositionShapeNImplicit(const GetParticlePosition< PIdx > &GetPosition, const amrex::ParticleReal *const wp, const amrex::ParticleReal *const uxp_n, const amrex::ParticleReal *const uyp_n, const amrex::ParticleReal *const uzp_n, const amrex::ParticleReal *const uxp, const amrex::ParticleReal *const uyp, const amrex::ParticleReal *const uzp, const int *const ion_lev, amrex::FArrayBox &jx_fab, amrex::FArrayBox &jy_fab, amrex::FArrayBox &jz_fab, const long np_to_deposit, const std::array< amrex::Real, 3 > &dx, const std::array< amrex::Real, 3 > &xyzmin, const amrex::Dim3 lo, const amrex::Real q, const int n_rz_azimuthal_modes, amrex::Real *cost, const long load_balance_costs_update_algo)
Direct current deposition for thread thread_num for the implicit scheme The only difference from doDe...
Definition: CurrentDeposition.H:431
#define WARPX_ABORT_WITH_MESSAGE(MSG)
Definition: TextMsg.H:15
#define WARPX_ALWAYS_ASSERT_WITH_MESSAGE(EX, MSG)
Definition: TextMsg.H:13
amrex::GpuComplex< amrex::Real > Complex
Definition: WarpX_Complex.H:22
NODE
static amrex::IntVect shared_tilesize
tileSize to use for shared current deposition operations
Definition: WarpX.H:242
static int shared_mem_current_tpb
number of threads to use per block in shared deposition
Definition: WarpX.H:239
Defines a timer object to be used on GPU; measures summed thread cycles.
Definition: KernelTimer.H:27
virtual void free(void *pt)=0
virtual void * alloc(std::size_t sz)=0
AMREX_FORCE_INLINE Array4< Real const > array() const noexcept
const Box & box() const noexcept
AMREX_GPU_HOST_DEVICE Long numPts() const noexcept
AMREX_GPU_HOST_DEVICE Box & grow(int i) noexcept
GpuArray< Real, AMREX_SPACEDIM > InvCellSizeArray() const noexcept
index_type * offsetsPtr() noexcept
index_type * permutationPtr() noexcept
Long numBins() const noexcept
GpuArray< Real, AMREX_SPACEDIM > ProbLoArray() const noexcept
const Box & Domain() const noexcept
static std::size_t sharedMemPerBlock() noexcept
static constexpr auto c
vacuum speed of light [m/s]
Definition: constant.H:44
AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE void AddNoRet(T *sum, T value) noexcept
void streamSynchronize() noexcept
gpuStream_t gpuStream() noexcept
AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE Dim3 end(Box const &box) noexcept
AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE Box convert(const Box &b, const IntVect &typ) noexcept
AMREX_GPU_HOST_DEVICE constexpr AMREX_FORCE_INLINE const T & max(const T &a, const T &b) noexcept
AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE Dim3 begin(Box const &box) noexcept
void launch(T const &n, L &&f) noexcept
Arena * The_Managed_Arena()
std::enable_if_t< std::is_integral< T >::value > ParallelFor(TypeList< CTOs... >, std::array< int, sizeof...(CTOs)> const &runtime_options, T N, F &&f)
AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE void ignore_unused(const Ts &...)
AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE int getTileIndex(const IntVect &iv, const Box &box, const bool a_do_tiling, const IntVect &a_tile_size, Box &tbx)
i
Definition: check_interp_points_and_weights.py:174
float dt
Definition: stencil.py:442
tuple dx
lab frame
Definition: stencil.py:429
xmin
Definition: stencil.py:424
Definition: ShapeFactors.H:29
Definition: ShapeFactors.H:84
@ GpuClock
Definition: WarpXAlgorithmSelection.H:136
AMREX_GPU_HOST_DEVICE constexpr AMREX_FORCE_INLINE T real() const noexcept
AMREX_GPU_HOST_DEVICE constexpr AMREX_FORCE_INLINE T imag() const noexcept
AMREX_GPU_DEVICE T * dataPtr() noexcept