WarpX
CurrentDeposition.H
Go to the documentation of this file.
1 /* Copyright 2019 Axel Huebl, David Grote, Maxence Thevenet
2  * Remi Lehe, Weiqun Zhang, Michael Rowan
3  *
4  * This file is part of WarpX.
5  *
6  * License: BSD-3-Clause-LBNL
7  */
8 #ifndef CURRENTDEPOSITION_H_
9 #define CURRENTDEPOSITION_H_
10 
13 #include "Particles/ShapeFactors.H"
15 #include "Utils/WarpXConst.H"
16 #ifdef WARPX_DIM_RZ
17 # include "Utils/WarpX_Complex.H"
18 #endif
19 
20 #include <AMReX.H>
21 #include <AMReX_Arena.H>
22 #include <AMReX_Array4.H>
23 #include <AMReX_REAL.H>
24 
25 using namespace amrex::literals;
26 
51 template <int depos_order>
52 void doDepositionShapeN(const GetParticlePosition& GetPosition,
53  const amrex::ParticleReal * const wp,
54  const amrex::ParticleReal * const uxp,
55  const amrex::ParticleReal * const uyp,
56  const amrex::ParticleReal * const uzp,
57  const int * const ion_lev,
58  amrex::FArrayBox& jx_fab,
59  amrex::FArrayBox& jy_fab,
60  amrex::FArrayBox& jz_fab,
61  const long np_to_depose,
62  const amrex::Real relative_time,
63  const std::array<amrex::Real,3>& dx,
64  const std::array<amrex::Real,3>& xyzmin,
65  const amrex::Dim3 lo,
66  const amrex::Real q,
67  const int n_rz_azimuthal_modes,
68  amrex::Real* cost,
69  const long load_balance_costs_update_algo)
70 {
71 #if !defined(WARPX_DIM_RZ)
72  amrex::ignore_unused(n_rz_azimuthal_modes);
73 #endif
74 
75 #if !defined(AMREX_USE_GPU)
76  amrex::ignore_unused(cost, load_balance_costs_update_algo);
77 #endif
78 
79  // Whether ion_lev is a null pointer (do_ionization=0) or a real pointer
80  // (do_ionization=1)
81  const bool do_ionization = ion_lev;
82  const amrex::Real dzi = 1.0_rt/dx[2];
83 #if defined(WARPX_DIM_1D_Z)
84  const amrex::Real invvol = dzi;
85 #endif
86 #if defined(WARPX_DIM_XZ) || defined(WARPX_DIM_RZ)
87  const amrex::Real dxi = 1.0_rt/dx[0];
88  const amrex::Real invvol = dxi*dzi;
89 #elif defined(WARPX_DIM_3D)
90  const amrex::Real dxi = 1.0_rt/dx[0];
91  const amrex::Real dyi = 1.0_rt/dx[1];
92  const amrex::Real invvol = dxi*dyi*dzi;
93 #endif
94 
95 #if (AMREX_SPACEDIM >= 2)
96  const amrex::Real xmin = xyzmin[0];
97 #endif
98 #if defined(WARPX_DIM_3D)
99  const amrex::Real ymin = xyzmin[1];
100 #endif
101  const amrex::Real zmin = xyzmin[2];
102 
103  const amrex::Real clightsq = 1.0_rt/PhysConst::c/PhysConst::c;
104 
105  amrex::Array4<amrex::Real> const& jx_arr = jx_fab.array();
106  amrex::Array4<amrex::Real> const& jy_arr = jy_fab.array();
107  amrex::Array4<amrex::Real> const& jz_arr = jz_fab.array();
108  amrex::IntVect const jx_type = jx_fab.box().type();
109  amrex::IntVect const jy_type = jy_fab.box().type();
110  amrex::IntVect const jz_type = jz_fab.box().type();
111 
112  constexpr int zdir = WARPX_ZINDEX;
113  constexpr int NODE = amrex::IndexType::NODE;
114  constexpr int CELL = amrex::IndexType::CELL;
115 
116  // Loop over particles and deposit into jx_fab, jy_fab and jz_fab
117 #if defined(WARPX_USE_GPUCLOCK)
118  amrex::Real* cost_real = nullptr;
119  if( load_balance_costs_update_algo == LoadBalanceCostsUpdateAlgo::GpuClock) {
120  cost_real = (amrex::Real *) amrex::The_Managed_Arena()->alloc(sizeof(amrex::Real));
121  *cost_real = 0._rt;
122  }
123 #endif
125  np_to_depose,
126  [=] AMREX_GPU_DEVICE (long ip) {
127 #if defined(WARPX_USE_GPUCLOCK)
128  KernelTimer kernelTimer(cost && load_balance_costs_update_algo
130 #endif
131 
132  // --- Get particle quantities
133  const amrex::Real gaminv = 1.0_rt/std::sqrt(1.0_rt + uxp[ip]*uxp[ip]*clightsq
134  + uyp[ip]*uyp[ip]*clightsq
135  + uzp[ip]*uzp[ip]*clightsq);
136  amrex::Real wq = q*wp[ip];
137  if (do_ionization){
138  wq *= ion_lev[ip];
139  }
140 
141  amrex::ParticleReal xp, yp, zp;
142  GetPosition(ip, xp, yp, zp);
143 
144  const amrex::Real vx = uxp[ip]*gaminv;
145  const amrex::Real vy = uyp[ip]*gaminv;
146  const amrex::Real vz = uzp[ip]*gaminv;
147  // wqx, wqy wqz are particle current in each direction
148 #if defined(WARPX_DIM_RZ)
149  // In RZ, wqx is actually wqr, and wqy is wqtheta
150  // Convert to cylinderical at the mid point
151  const amrex::Real xpmid = xp + relative_time*vx;
152  const amrex::Real ypmid = yp + relative_time*vy;
153  const amrex::Real rpmid = std::sqrt(xpmid*xpmid + ypmid*ypmid);
154  amrex::Real costheta;
155  amrex::Real sintheta;
156  if (rpmid > 0._rt) {
157  costheta = xpmid/rpmid;
158  sintheta = ypmid/rpmid;
159  } else {
160  costheta = 1._rt;
161  sintheta = 0._rt;
162  }
163  const Complex xy0 = Complex{costheta, sintheta};
164  const amrex::Real wqx = wq*invvol*(+vx*costheta + vy*sintheta);
165  const amrex::Real wqy = wq*invvol*(-vx*sintheta + vy*costheta);
166 #else
167  const amrex::Real wqx = wq*invvol*vx;
168  const amrex::Real wqy = wq*invvol*vy;
169 #endif
170  const amrex::Real wqz = wq*invvol*vz;
171 
172  // --- Compute shape factors
173  Compute_shape_factor< depos_order > const compute_shape_factor;
174 #if (AMREX_SPACEDIM >= 2)
175  // x direction
176  // Get particle position after 1/2 push back in position
177 #if defined(WARPX_DIM_RZ)
178  // Keep these double to avoid bug in single precision
179  const double xmid = (rpmid - xmin)*dxi;
180 #else
181  const double xmid = ((xp - xmin) + relative_time*vx)*dxi;
182 #endif
183  // j_j[xyz] leftmost grid point in x that the particle touches for the centering of each current
184  // sx_j[xyz] shape factor along x for the centering of each current
185  // There are only two possible centerings, node or cell centered, so at most only two shape factor
186  // arrays will be needed.
187  // Keep these double to avoid bug in single precision
188  double sx_node[depos_order + 1] = {0.};
189  double sx_cell[depos_order + 1] = {0.};
190  int j_node = 0;
191  int j_cell = 0;
192  if (jx_type[0] == NODE || jy_type[0] == NODE || jz_type[0] == NODE) {
193  j_node = compute_shape_factor(sx_node, xmid);
194  }
195  if (jx_type[0] == CELL || jy_type[0] == CELL || jz_type[0] == CELL) {
196  j_cell = compute_shape_factor(sx_cell, xmid - 0.5);
197  }
198 
199  amrex::Real sx_jx[depos_order + 1] = {0._rt};
200  amrex::Real sx_jy[depos_order + 1] = {0._rt};
201  amrex::Real sx_jz[depos_order + 1] = {0._rt};
202  for (int ix=0; ix<=depos_order; ix++)
203  {
204  sx_jx[ix] = ((jx_type[0] == NODE) ? amrex::Real(sx_node[ix]) : amrex::Real(sx_cell[ix]));
205  sx_jy[ix] = ((jy_type[0] == NODE) ? amrex::Real(sx_node[ix]) : amrex::Real(sx_cell[ix]));
206  sx_jz[ix] = ((jz_type[0] == NODE) ? amrex::Real(sx_node[ix]) : amrex::Real(sx_cell[ix]));
207  }
208 
209  int const j_jx = ((jx_type[0] == NODE) ? j_node : j_cell);
210  int const j_jy = ((jy_type[0] == NODE) ? j_node : j_cell);
211  int const j_jz = ((jz_type[0] == NODE) ? j_node : j_cell);
212 #endif //AMREX_SPACEDIM >= 2
213 
214 #if defined(WARPX_DIM_3D)
215  // y direction
216  // Keep these double to avoid bug in single precision
217  const double ymid = ((yp - ymin) + relative_time*vy)*dyi;
218  double sy_node[depos_order + 1] = {0.};
219  double sy_cell[depos_order + 1] = {0.};
220  int k_node = 0;
221  int k_cell = 0;
222  if (jx_type[1] == NODE || jy_type[1] == NODE || jz_type[1] == NODE) {
223  k_node = compute_shape_factor(sy_node, ymid);
224  }
225  if (jx_type[1] == CELL || jy_type[1] == CELL || jz_type[1] == CELL) {
226  k_cell = compute_shape_factor(sy_cell, ymid - 0.5);
227  }
228  amrex::Real sy_jx[depos_order + 1] = {0._rt};
229  amrex::Real sy_jy[depos_order + 1] = {0._rt};
230  amrex::Real sy_jz[depos_order + 1] = {0._rt};
231  for (int iy=0; iy<=depos_order; iy++)
232  {
233  sy_jx[iy] = ((jx_type[1] == NODE) ? amrex::Real(sy_node[iy]) : amrex::Real(sy_cell[iy]));
234  sy_jy[iy] = ((jy_type[1] == NODE) ? amrex::Real(sy_node[iy]) : amrex::Real(sy_cell[iy]));
235  sy_jz[iy] = ((jz_type[1] == NODE) ? amrex::Real(sy_node[iy]) : amrex::Real(sy_cell[iy]));
236  }
237  int const k_jx = ((jx_type[1] == NODE) ? k_node : k_cell);
238  int const k_jy = ((jy_type[1] == NODE) ? k_node : k_cell);
239  int const k_jz = ((jz_type[1] == NODE) ? k_node : k_cell);
240 #endif
241 
242  // z direction
243  // Keep these double to avoid bug in single precision
244  const double zmid = ((zp - zmin) + relative_time*vz)*dzi;
245  double sz_node[depos_order + 1] = {0.};
246  double sz_cell[depos_order + 1] = {0.};
247  int l_node = 0;
248  int l_cell = 0;
249  if (jx_type[zdir] == NODE || jy_type[zdir] == NODE || jz_type[zdir] == NODE) {
250  l_node = compute_shape_factor(sz_node, zmid);
251  }
252  if (jx_type[zdir] == CELL || jy_type[zdir] == CELL || jz_type[zdir] == CELL) {
253  l_cell = compute_shape_factor(sz_cell, zmid - 0.5);
254  }
255  amrex::Real sz_jx[depos_order + 1] = {0._rt};
256  amrex::Real sz_jy[depos_order + 1] = {0._rt};
257  amrex::Real sz_jz[depos_order + 1] = {0._rt};
258  for (int iz=0; iz<=depos_order; iz++)
259  {
260  sz_jx[iz] = ((jx_type[zdir] == NODE) ? amrex::Real(sz_node[iz]) : amrex::Real(sz_cell[iz]));
261  sz_jy[iz] = ((jy_type[zdir] == NODE) ? amrex::Real(sz_node[iz]) : amrex::Real(sz_cell[iz]));
262  sz_jz[iz] = ((jz_type[zdir] == NODE) ? amrex::Real(sz_node[iz]) : amrex::Real(sz_cell[iz]));
263  }
264  int const l_jx = ((jx_type[zdir] == NODE) ? l_node : l_cell);
265  int const l_jy = ((jy_type[zdir] == NODE) ? l_node : l_cell);
266  int const l_jz = ((jz_type[zdir] == NODE) ? l_node : l_cell);
267 
268  // Deposit current into jx_arr, jy_arr and jz_arr
269 #if defined(WARPX_DIM_1D_Z)
270  for (int iz=0; iz<=depos_order; iz++){
272  &jx_arr(lo.x+l_jx+iz, 0, 0, 0),
273  sz_jx[iz]*wqx);
275  &jy_arr(lo.x+l_jy+iz, 0, 0, 0),
276  sz_jy[iz]*wqy);
278  &jz_arr(lo.x+l_jz+iz, 0, 0, 0),
279  sz_jz[iz]*wqz);
280  }
281 #endif
282 #if defined(WARPX_DIM_XZ) || defined(WARPX_DIM_RZ)
283  for (int iz=0; iz<=depos_order; iz++){
284  for (int ix=0; ix<=depos_order; ix++){
286  &jx_arr(lo.x+j_jx+ix, lo.y+l_jx+iz, 0, 0),
287  sx_jx[ix]*sz_jx[iz]*wqx);
289  &jy_arr(lo.x+j_jy+ix, lo.y+l_jy+iz, 0, 0),
290  sx_jy[ix]*sz_jy[iz]*wqy);
292  &jz_arr(lo.x+j_jz+ix, lo.y+l_jz+iz, 0, 0),
293  sx_jz[ix]*sz_jz[iz]*wqz);
294 #if defined(WARPX_DIM_RZ)
295  Complex xy = xy0; // Note that xy is equal to e^{i m theta}
296  for (int imode=1 ; imode < n_rz_azimuthal_modes ; imode++) {
297  // The factor 2 on the weighting comes from the normalization of the modes
298  amrex::Gpu::Atomic::AddNoRet( &jx_arr(lo.x+j_jx+ix, lo.y+l_jx+iz, 0, 2*imode-1), 2._rt*sx_jx[ix]*sz_jx[iz]*wqx*xy.real());
299  amrex::Gpu::Atomic::AddNoRet( &jx_arr(lo.x+j_jx+ix, lo.y+l_jx+iz, 0, 2*imode ), 2._rt*sx_jx[ix]*sz_jx[iz]*wqx*xy.imag());
300  amrex::Gpu::Atomic::AddNoRet( &jy_arr(lo.x+j_jy+ix, lo.y+l_jy+iz, 0, 2*imode-1), 2._rt*sx_jy[ix]*sz_jy[iz]*wqy*xy.real());
301  amrex::Gpu::Atomic::AddNoRet( &jy_arr(lo.x+j_jy+ix, lo.y+l_jy+iz, 0, 2*imode ), 2._rt*sx_jy[ix]*sz_jy[iz]*wqy*xy.imag());
302  amrex::Gpu::Atomic::AddNoRet( &jz_arr(lo.x+j_jz+ix, lo.y+l_jz+iz, 0, 2*imode-1), 2._rt*sx_jz[ix]*sz_jz[iz]*wqz*xy.real());
303  amrex::Gpu::Atomic::AddNoRet( &jz_arr(lo.x+j_jz+ix, lo.y+l_jz+iz, 0, 2*imode ), 2._rt*sx_jz[ix]*sz_jz[iz]*wqz*xy.imag());
304  xy = xy*xy0;
305  }
306 #endif
307  }
308  }
309 #elif defined(WARPX_DIM_3D)
310  for (int iz=0; iz<=depos_order; iz++){
311  for (int iy=0; iy<=depos_order; iy++){
312  for (int ix=0; ix<=depos_order; ix++){
314  &jx_arr(lo.x+j_jx+ix, lo.y+k_jx+iy, lo.z+l_jx+iz),
315  sx_jx[ix]*sy_jx[iy]*sz_jx[iz]*wqx);
317  &jy_arr(lo.x+j_jy+ix, lo.y+k_jy+iy, lo.z+l_jy+iz),
318  sx_jy[ix]*sy_jy[iy]*sz_jy[iz]*wqy);
320  &jz_arr(lo.x+j_jz+ix, lo.y+k_jz+iy, lo.z+l_jz+iz),
321  sx_jz[ix]*sy_jz[iy]*sz_jz[iz]*wqz);
322  }
323  }
324  }
325 #endif
326  }
327  );
328 #if defined(WARPX_USE_GPUCLOCK)
329  if(cost && load_balance_costs_update_algo == LoadBalanceCostsUpdateAlgo::GpuClock) {
331  *cost += *cost_real;
332  amrex::The_Managed_Arena()->free(cost_real);
333  }
334 #endif
335 }
336 
363 template <int depos_order>
365  const amrex::ParticleReal * const wp,
366  const amrex::ParticleReal * const uxp,
367  const amrex::ParticleReal * const uyp,
368  const amrex::ParticleReal * const uzp,
369  const int * const ion_lev,
370  const amrex::Array4<amrex::Real>& Jx_arr,
371  const amrex::Array4<amrex::Real>& Jy_arr,
372  const amrex::Array4<amrex::Real>& Jz_arr,
373  const long np_to_depose,
374  const amrex::Real dt,
375  const amrex::Real relative_time,
376  const std::array<amrex::Real,3>& dx,
377  const std::array<amrex::Real, 3> xyzmin,
378  const amrex::Dim3 lo,
379  const amrex::Real q,
380  const int n_rz_azimuthal_modes,
381  amrex::Real * const cost,
382  const long load_balance_costs_update_algo)
383 {
384  using namespace amrex;
385 #if !defined(WARPX_DIM_RZ)
386  ignore_unused(n_rz_azimuthal_modes);
387 #endif
388 
389 #if !defined(AMREX_USE_GPU)
390  amrex::ignore_unused(cost, load_balance_costs_update_algo);
391 #endif
392 
393  // Whether ion_lev is a null pointer (do_ionization=0) or a real pointer
394  // (do_ionization=1)
395  bool const do_ionization = ion_lev;
396 #if !defined(WARPX_DIM_1D_Z)
397  Real const dxi = 1.0_rt / dx[0];
398 #endif
399 #if !defined(WARPX_DIM_1D_Z)
400  Real const xmin = xyzmin[0];
401 #endif
402 #if defined(WARPX_DIM_3D)
403  Real const dyi = 1.0_rt / dx[1];
404  Real const ymin = xyzmin[1];
405 #endif
406  Real const dzi = 1.0_rt / dx[2];
407  Real const zmin = xyzmin[2];
408 
409 #if defined(WARPX_DIM_3D)
410  Real const invdtdx = 1.0_rt / (dt*dx[1]*dx[2]);
411  Real const invdtdy = 1.0_rt / (dt*dx[0]*dx[2]);
412  Real const invdtdz = 1.0_rt / (dt*dx[0]*dx[1]);
413 #elif defined(WARPX_DIM_XZ) || defined(WARPX_DIM_RZ)
414  Real const invdtdx = 1.0_rt / (dt*dx[2]);
415  Real const invdtdz = 1.0_rt / (dt*dx[0]);
416  Real const invvol = 1.0_rt / (dx[0]*dx[2]);
417 #elif defined(WARPX_DIM_1D_Z)
418  Real const invdtdz = 1.0_rt / (dt*dx[0]);
419  Real const invvol = 1.0_rt / (dx[2]);
420 #endif
421 
422 #if defined(WARPX_DIM_RZ)
423  Complex const I = Complex{0._rt, 1._rt};
424 #endif
425 
426  Real const clightsq = 1.0_rt / ( PhysConst::c * PhysConst::c );
427 #if !defined(WARPX_DIM_1D_Z)
428  Real constexpr one_third = 1.0_rt / 3.0_rt;
429  Real constexpr one_sixth = 1.0_rt / 6.0_rt;
430 #endif
431 
432  // Loop over particles and deposit into Jx_arr, Jy_arr and Jz_arr
433 #if defined(WARPX_USE_GPUCLOCK)
434  amrex::Real* cost_real = nullptr;
435  if( load_balance_costs_update_algo == LoadBalanceCostsUpdateAlgo::GpuClock) {
436  cost_real = (amrex::Real *) amrex::The_Managed_Arena()->alloc(sizeof(amrex::Real));
437  *cost_real = 0._rt;
438  }
439 #endif
441  np_to_depose,
442  [=] AMREX_GPU_DEVICE (long const ip) {
443 #if defined(WARPX_USE_GPUCLOCK)
444  KernelTimer kernelTimer(cost && load_balance_costs_update_algo
446 #endif
447 
448  // --- Get particle quantities
449  Real const gaminv = 1.0_rt/std::sqrt(1.0_rt + uxp[ip]*uxp[ip]*clightsq
450  + uyp[ip]*uyp[ip]*clightsq
451  + uzp[ip]*uzp[ip]*clightsq);
452 
453  // wqx, wqy wqz are particle current in each direction
454  Real wq = q*wp[ip];
455  if (do_ionization){
456  wq *= ion_lev[ip];
457  }
458 
459  ParticleReal xp, yp, zp;
460  GetPosition(ip, xp, yp, zp);
461 
462 #if !defined(WARPX_DIM_1D_Z)
463  Real const wqx = wq*invdtdx;
464 #endif
465 #if defined(WARPX_DIM_3D)
466  Real const wqy = wq*invdtdy;
467 #endif
468  Real const wqz = wq*invdtdz;
469 
470  // computes current and old position in grid units
471 #if defined(WARPX_DIM_RZ)
472  Real const xp_new = xp + (relative_time + 0.5_rt*dt)*uxp[ip]*gaminv;
473  Real const yp_new = yp + (relative_time + 0.5_rt*dt)*uyp[ip]*gaminv;
474  Real const xp_mid = xp_new - 0.5_rt*dt*uxp[ip]*gaminv;
475  Real const yp_mid = yp_new - 0.5_rt*dt*uyp[ip]*gaminv;
476  Real const xp_old = xp_new - dt*uxp[ip]*gaminv;
477  Real const yp_old = yp_new - dt*uyp[ip]*gaminv;
478  Real const rp_new = std::sqrt(xp_new*xp_new + yp_new*yp_new);
479  Real const rp_mid = std::sqrt(xp_mid*xp_mid + yp_mid*yp_mid);
480  Real const rp_old = std::sqrt(xp_old*xp_old + yp_old*yp_old);
481  Real costheta_new, sintheta_new;
482  if (rp_new > 0._rt) {
483  costheta_new = xp_new/rp_new;
484  sintheta_new = yp_new/rp_new;
485  } else {
486  costheta_new = 1._rt;
487  sintheta_new = 0._rt;
488  }
489  amrex::Real costheta_mid, sintheta_mid;
490  if (rp_mid > 0._rt) {
491  costheta_mid = xp_mid/rp_mid;
492  sintheta_mid = yp_mid/rp_mid;
493  } else {
494  costheta_mid = 1._rt;
495  sintheta_mid = 0._rt;
496  }
497  amrex::Real costheta_old, sintheta_old;
498  if (rp_old > 0._rt) {
499  costheta_old = xp_old/rp_old;
500  sintheta_old = yp_old/rp_old;
501  } else {
502  costheta_old = 1._rt;
503  sintheta_old = 0._rt;
504  }
505  const Complex xy_new0 = Complex{costheta_new, sintheta_new};
506  const Complex xy_mid0 = Complex{costheta_mid, sintheta_mid};
507  const Complex xy_old0 = Complex{costheta_old, sintheta_old};
508  // Keep these double to avoid bug in single precision
509  double const x_new = (rp_new - xmin)*dxi;
510  double const x_old = (rp_old - xmin)*dxi;
511 #else
512 #if !defined(WARPX_DIM_1D_Z)
513  // Keep these double to avoid bug in single precision
514  double const x_new = (xp - xmin + (relative_time + 0.5_rt*dt)*uxp[ip]*gaminv)*dxi;
515  double const x_old = x_new - dt*dxi*uxp[ip]*gaminv;
516 #endif
517 #endif
518 #if defined(WARPX_DIM_3D)
519  // Keep these double to avoid bug in single precision
520  double const y_new = (yp - ymin + (relative_time + 0.5_rt*dt)*uyp[ip]*gaminv)*dyi;
521  double const y_old = y_new - dt*dyi*uyp[ip]*gaminv;
522 #endif
523  // Keep these double to avoid bug in single precision
524  double const z_new = (zp - zmin + (relative_time + 0.5_rt*dt)*uzp[ip]*gaminv)*dzi;
525  double const z_old = z_new - dt*dzi*uzp[ip]*gaminv;
526 
527 #if defined(WARPX_DIM_RZ)
528  Real const vy = (-uxp[ip]*sintheta_mid + uyp[ip]*costheta_mid)*gaminv;
529 #elif defined(WARPX_DIM_XZ)
530  Real const vy = uyp[ip]*gaminv;
531 #elif defined(WARPX_DIM_1D_Z)
532  Real const vx = uxp[ip]*gaminv;
533  Real const vy = uyp[ip]*gaminv;
534 #endif
535 
536  // Shape factor arrays
537  // Note that there are extra values above and below
538  // to possibly hold the factor for the old particle
539  // which can be at a different grid location.
540  // Keep these double to avoid bug in single precision
541 #if !defined(WARPX_DIM_1D_Z)
542  double sx_new[depos_order + 3] = {0.};
543  double sx_old[depos_order + 3] = {0.};
544 #endif
545 #if defined(WARPX_DIM_3D)
546  // Keep these double to avoid bug in single precision
547  double sy_new[depos_order + 3] = {0.};
548  double sy_old[depos_order + 3] = {0.};
549 #endif
550  // Keep these double to avoid bug in single precision
551  double sz_new[depos_order + 3] = {0.};
552  double sz_old[depos_order + 3] = {0.};
553 
554  // --- Compute shape factors
555  // Compute shape factors for position as they are now and at old positions
556  // [ijk]_new: leftmost grid point that the particle touches
557  Compute_shape_factor< depos_order > compute_shape_factor;
558  Compute_shifted_shape_factor< depos_order > compute_shifted_shape_factor;
559 
560 #if !defined(WARPX_DIM_1D_Z)
561  const int i_new = compute_shape_factor(sx_new+1, x_new);
562  const int i_old = compute_shifted_shape_factor(sx_old, x_old, i_new);
563 #endif
564 #if defined(WARPX_DIM_3D)
565  const int j_new = compute_shape_factor(sy_new+1, y_new);
566  const int j_old = compute_shifted_shape_factor(sy_old, y_old, j_new);
567 #endif
568  const int k_new = compute_shape_factor(sz_new+1, z_new);
569  const int k_old = compute_shifted_shape_factor(sz_old, z_old, k_new);
570 
571  // computes min/max positions of current contributions
572 #if !defined(WARPX_DIM_1D_Z)
573  int dil = 1, diu = 1;
574  if (i_old < i_new) dil = 0;
575  if (i_old > i_new) diu = 0;
576 #endif
577 #if defined(WARPX_DIM_3D)
578  int djl = 1, dju = 1;
579  if (j_old < j_new) djl = 0;
580  if (j_old > j_new) dju = 0;
581 #endif
582  int dkl = 1, dku = 1;
583  if (k_old < k_new) dkl = 0;
584  if (k_old > k_new) dku = 0;
585 
586 #if defined(WARPX_DIM_3D)
587 
588  for (int k=dkl; k<=depos_order+2-dku; k++) {
589  for (int j=djl; j<=depos_order+2-dju; j++) {
590  amrex::Real sdxi = 0._rt;
591  for (int i=dil; i<=depos_order+1-diu; i++) {
592  sdxi += wqx*(sx_old[i] - sx_new[i])*(
593  one_third*(sy_new[j]*sz_new[k] + sy_old[j]*sz_old[k])
594  +one_sixth*(sy_new[j]*sz_old[k] + sy_old[j]*sz_new[k]));
595  amrex::Gpu::Atomic::AddNoRet( &Jx_arr(lo.x+i_new-1+i, lo.y+j_new-1+j, lo.z+k_new-1+k), sdxi);
596  }
597  }
598  }
599  for (int k=dkl; k<=depos_order+2-dku; k++) {
600  for (int i=dil; i<=depos_order+2-diu; i++) {
601  amrex::Real sdyj = 0._rt;
602  for (int j=djl; j<=depos_order+1-dju; j++) {
603  sdyj += wqy*(sy_old[j] - sy_new[j])*(
604  one_third*(sx_new[i]*sz_new[k] + sx_old[i]*sz_old[k])
605  +one_sixth*(sx_new[i]*sz_old[k] + sx_old[i]*sz_new[k]));
606  amrex::Gpu::Atomic::AddNoRet( &Jy_arr(lo.x+i_new-1+i, lo.y+j_new-1+j, lo.z+k_new-1+k), sdyj);
607  }
608  }
609  }
610  for (int j=djl; j<=depos_order+2-dju; j++) {
611  for (int i=dil; i<=depos_order+2-diu; i++) {
612  amrex::Real sdzk = 0._rt;
613  for (int k=dkl; k<=depos_order+1-dku; k++) {
614  sdzk += wqz*(sz_old[k] - sz_new[k])*(
615  one_third*(sx_new[i]*sy_new[j] + sx_old[i]*sy_old[j])
616  +one_sixth*(sx_new[i]*sy_old[j] + sx_old[i]*sy_new[j]));
617  amrex::Gpu::Atomic::AddNoRet( &Jz_arr(lo.x+i_new-1+i, lo.y+j_new-1+j, lo.z+k_new-1+k), sdzk);
618  }
619  }
620  }
621 
622 #elif defined(WARPX_DIM_XZ) || defined(WARPX_DIM_RZ)
623 
624  for (int k=dkl; k<=depos_order+2-dku; k++) {
625  amrex::Real sdxi = 0._rt;
626  for (int i=dil; i<=depos_order+1-diu; i++) {
627  sdxi += wqx*(sx_old[i] - sx_new[i])*0.5_rt*(sz_new[k] + sz_old[k]);
628  amrex::Gpu::Atomic::AddNoRet( &Jx_arr(lo.x+i_new-1+i, lo.y+k_new-1+k, 0, 0), sdxi);
629 #if defined(WARPX_DIM_RZ)
630  Complex xy_mid = xy_mid0; // Throughout the following loop, xy_mid takes the value e^{i m theta}
631  for (int imode=1 ; imode < n_rz_azimuthal_modes ; imode++) {
632  // The factor 2 comes from the normalization of the modes
633  const Complex djr_cmplx = 2._rt *sdxi*xy_mid;
634  amrex::Gpu::Atomic::AddNoRet( &Jx_arr(lo.x+i_new-1+i, lo.y+k_new-1+k, 0, 2*imode-1), djr_cmplx.real());
635  amrex::Gpu::Atomic::AddNoRet( &Jx_arr(lo.x+i_new-1+i, lo.y+k_new-1+k, 0, 2*imode), djr_cmplx.imag());
636  xy_mid = xy_mid*xy_mid0;
637  }
638 #endif
639  }
640  }
641  for (int k=dkl; k<=depos_order+2-dku; k++) {
642  for (int i=dil; i<=depos_order+2-diu; i++) {
643  Real const sdyj = wq*vy*invvol*(
644  one_third*(sx_new[i]*sz_new[k] + sx_old[i]*sz_old[k])
645  +one_sixth*(sx_new[i]*sz_old[k] + sx_old[i]*sz_new[k]));
646  amrex::Gpu::Atomic::AddNoRet( &Jy_arr(lo.x+i_new-1+i, lo.y+k_new-1+k, 0, 0), sdyj);
647 #if defined(WARPX_DIM_RZ)
648  Complex xy_new = xy_new0;
649  Complex xy_mid = xy_mid0;
650  Complex xy_old = xy_old0;
651  // Throughout the following loop, xy_ takes the value e^{i m theta_}
652  for (int imode=1 ; imode < n_rz_azimuthal_modes ; imode++) {
653  // The factor 2 comes from the normalization of the modes
654  // The minus sign comes from the different convention with respect to Davidson et al.
655  const Complex djt_cmplx = -2._rt * I*(i_new-1 + i + xmin*dxi)*wq*invdtdx/(amrex::Real)imode
656  *(Complex(sx_new[i]*sz_new[k], 0._rt)*(xy_new - xy_mid)
657  + Complex(sx_old[i]*sz_old[k], 0._rt)*(xy_mid - xy_old));
658  amrex::Gpu::Atomic::AddNoRet( &Jy_arr(lo.x+i_new-1+i, lo.y+k_new-1+k, 0, 2*imode-1), djt_cmplx.real());
659  amrex::Gpu::Atomic::AddNoRet( &Jy_arr(lo.x+i_new-1+i, lo.y+k_new-1+k, 0, 2*imode), djt_cmplx.imag());
660  xy_new = xy_new*xy_new0;
661  xy_mid = xy_mid*xy_mid0;
662  xy_old = xy_old*xy_old0;
663  }
664 #endif
665  }
666  }
667  for (int i=dil; i<=depos_order+2-diu; i++) {
668  Real sdzk = 0._rt;
669  for (int k=dkl; k<=depos_order+1-dku; k++) {
670  sdzk += wqz*(sz_old[k] - sz_new[k])*0.5_rt*(sx_new[i] + sx_old[i]);
671  amrex::Gpu::Atomic::AddNoRet( &Jz_arr(lo.x+i_new-1+i, lo.y+k_new-1+k, 0, 0), sdzk);
672 #if defined(WARPX_DIM_RZ)
673  Complex xy_mid = xy_mid0; // Throughout the following loop, xy_mid takes the value e^{i m theta}
674  for (int imode=1 ; imode < n_rz_azimuthal_modes ; imode++) {
675  // The factor 2 comes from the normalization of the modes
676  const Complex djz_cmplx = 2._rt * sdzk * xy_mid;
677  amrex::Gpu::Atomic::AddNoRet( &Jz_arr(lo.x+i_new-1+i, lo.y+k_new-1+k, 0, 2*imode-1), djz_cmplx.real());
678  amrex::Gpu::Atomic::AddNoRet( &Jz_arr(lo.x+i_new-1+i, lo.y+k_new-1+k, 0, 2*imode), djz_cmplx.imag());
679  xy_mid = xy_mid*xy_mid0;
680  }
681 #endif
682  }
683  }
684 #elif defined(WARPX_DIM_1D_Z)
685 
686  for (int k=dkl; k<=depos_order+2-dku; k++) {
687  amrex::Real const sdxi = wq*vx*invvol*0.5_rt*(sz_old[k] + sz_new[k]);
688  amrex::Gpu::Atomic::AddNoRet( &Jx_arr(lo.x+k_new-1+k, 0, 0, 0), sdxi);
689  }
690  for (int k=dkl; k<=depos_order+2-dku; k++) {
691  amrex::Real const sdyj = wq*vy*invvol*0.5_rt*(sz_old[k] + sz_new[k]);
692  amrex::Gpu::Atomic::AddNoRet( &Jy_arr(lo.x+k_new-1+k, 0, 0, 0), sdyj);
693  }
694  amrex::Real sdzk = 0._rt;
695  for (int k=dkl; k<=depos_order+1-dku; k++) {
696  sdzk += wqz*(sz_old[k] - sz_new[k]);
697  amrex::Gpu::Atomic::AddNoRet( &Jz_arr(lo.x+k_new-1+k, 0, 0, 0), sdzk);
698  }
699 #endif
700  }
701  );
702 #if defined(WARPX_USE_GPUCLOCK)
703  if(cost && load_balance_costs_update_algo == LoadBalanceCostsUpdateAlgo::GpuClock) {
705  *cost += *cost_real;
706  amrex::The_Managed_Arena()->free(cost_real);
707  }
708 #endif
709 }
710 
740 template <int depos_order>
742  const amrex::ParticleReal* const wp,
743  const amrex::ParticleReal* const uxp,
744  const amrex::ParticleReal* const uyp,
745  const amrex::ParticleReal* const uzp,
746  const int* const ion_lev,
747  amrex::FArrayBox& Dx_fab,
748  amrex::FArrayBox& Dy_fab,
749  amrex::FArrayBox& Dz_fab,
750  const long np_to_depose,
751  const amrex::Real dt,
752  const amrex::Real relative_time,
753  const std::array<amrex::Real,3>& dx,
754  const std::array<amrex::Real,3>& xyzmin,
755  const amrex::Dim3 lo,
756  const amrex::Real q,
757  const int n_rz_azimuthal_modes,
758  amrex::Real* cost,
759  const long load_balance_costs_update_algo)
760 {
761 #if defined(WARPX_DIM_RZ)
762  amrex::ignore_unused(GetPosition,
763  wp, uxp, uyp, uzp, ion_lev, Dx_fab, Dy_fab, Dz_fab,
764  np_to_depose, dt, relative_time, dx, xyzmin, lo, q, n_rz_azimuthal_modes);
765  amrex::Abort("Vay deposition not implemented in RZ geometry");
766 #endif
767 
768 #if defined(WARPX_DIM_1D_Z)
769  amrex::ignore_unused(GetPosition,
770  wp, uxp, uyp, uzp, ion_lev, Dx_fab, Dy_fab, Dz_fab,
771  np_to_depose, dt, relative_time, dx, xyzmin, lo, q, n_rz_azimuthal_modes);
772  amrex::Abort("Vay deposition not implemented in cartesian 1D geometry");
773 #endif
774 
775 #if !defined(AMREX_USE_GPU)
776  amrex::ignore_unused(cost, load_balance_costs_update_algo);
777 #endif
778 
779 #if !(defined WARPX_DIM_RZ || defined WARPX_DIM_1D_Z)
780  amrex::ignore_unused(n_rz_azimuthal_modes);
781 
782  // If ion_lev is a null pointer, then do_ionization=0, else do_ionization=1
783  const bool do_ionization = ion_lev;
784 
785  // Inverse cell volume in each direction
786  const amrex::Real dxi = 1._rt / dx[0];
787  const amrex::Real dzi = 1._rt / dx[2];
788 #if defined(WARPX_DIM_3D)
789  const amrex::Real dyi = 1._rt / dx[1];
790 #endif
791 
792  // Inverse of time step
793  const amrex::Real invdt = 1._rt / dt;
794 
795  // Total inverse cell volume
796 #if defined(WARPX_DIM_XZ)
797  const amrex::Real invvol = dxi * dzi;
798 #elif defined(WARPX_DIM_3D)
799  const amrex::Real invvol = dxi * dyi * dzi;
800 #endif
801 
802  // Lower bound of physical domain in each direction
803  const amrex::Real xmin = xyzmin[0];
804  const amrex::Real zmin = xyzmin[2];
805 #if defined(WARPX_DIM_3D)
806  const amrex::Real ymin = xyzmin[1];
807 #endif
808 
809  // Allocate temporary arrays
810 #if defined(WARPX_DIM_3D)
811  AMREX_ALWAYS_ASSERT(Dx_fab.box() == Dy_fab.box() && Dx_fab.box() == Dz_fab.box());
812  amrex::FArrayBox temp_fab{Dx_fab.box(), 4};
813 #elif defined(WARPX_DIM_XZ)
814  AMREX_ALWAYS_ASSERT(Dx_fab.box() == Dz_fab.box());
815  amrex::FArrayBox temp_fab{Dx_fab.box(), 2};
816 #endif
817  temp_fab.setVal<amrex::RunOn::Device>(0._rt);
818  amrex::Array4<amrex::Real> const& temp_arr = temp_fab.array();
819 
820  // Inverse of light speed squared
821  const amrex::Real invcsq = 1._rt / (PhysConst::c * PhysConst::c);
822 
823  // Arrays where D will be stored
824  amrex::Array4<amrex::Real> const& Dx_arr = Dx_fab.array();
825  amrex::Array4<amrex::Real> const& Dy_arr = Dy_fab.array();
826  amrex::Array4<amrex::Real> const& Dz_arr = Dz_fab.array();
827 
828  // Loop over particles and deposit (Dx,Dy,Dz) into Dx_fab, Dy_fab and Dz_fab
829 #if defined(WARPX_USE_GPUCLOCK)
830  amrex::Real* cost_real = nullptr;
831  if( load_balance_costs_update_algo == LoadBalanceCostsUpdateAlgo::GpuClock) {
832  cost_real = (amrex::Real *) amrex::The_Managed_Arena()->alloc(sizeof(amrex::Real));
833  *cost_real = 0._rt;
834  }
835 #endif
836  amrex::ParallelFor(np_to_depose, [=] AMREX_GPU_DEVICE (long ip)
837  {
838 #if defined(WARPX_USE_GPUCLOCK)
839  KernelTimer kernelTimer(cost && load_balance_costs_update_algo
841 #endif
842 
843  // Inverse of Lorentz factor gamma
844  const amrex::Real invgam = 1._rt / std::sqrt(1._rt + uxp[ip] * uxp[ip] * invcsq
845  + uyp[ip] * uyp[ip] * invcsq
846  + uzp[ip] * uzp[ip] * invcsq);
847  // Product of particle charges and weights
848  amrex::Real wq = q * wp[ip];
849  if (do_ionization) wq *= ion_lev[ip];
850 
851  // Current particle positions (in physical units)
852  amrex::ParticleReal xp, yp, zp;
853  GetPosition(ip, xp, yp, zp);
854 
855  // Particle velocities
856  const amrex::Real vx = uxp[ip] * invgam;
857  const amrex::Real vy = uyp[ip] * invgam;
858  const amrex::Real vz = uzp[ip] * invgam;
859 
860  // Modify the particle position to match the time of the deposition
861  xp += relative_time * vx;
862  yp += relative_time * vy;
863  zp += relative_time * vz;
864 
865  // Particle current densities
866 #if defined(WARPX_DIM_XZ)
867  const amrex::Real wqy = wq * vy * invvol;
868 #endif
869 
870  // Current and old particle positions in grid units
871  // Keep these double to avoid bug in single precision.
872  double const x_new = (xp - xmin + 0.5_rt*dt*vx) * dxi;
873  double const x_old = (xp - xmin - 0.5_rt*dt*vx) * dxi;
874 #if defined(WARPX_DIM_3D)
875  // Keep these double to avoid bug in single precision.
876  double const y_new = (yp - ymin + 0.5_rt*dt*vy) * dyi;
877  double const y_old = (yp - ymin - 0.5_rt*dt*vy) * dyi;
878 #endif
879  // Keep these double to avoid bug in single precision.
880  double const z_new = (zp - zmin + 0.5_rt*dt*vz) * dzi;
881  double const z_old = (zp - zmin - 0.5_rt*dt*vz) * dzi;
882 
883  // Shape factor arrays for current and old positions (nodal)
884  // Keep these double to avoid bug in single precision.
885  double sx_new[depos_order+1] = {0.};
886  double sx_old[depos_order+1] = {0.};
887 #if defined(WARPX_DIM_3D)
888  // Keep these double to avoid bug in single precision.
889  double sy_new[depos_order+1] = {0.};
890  double sy_old[depos_order+1] = {0.};
891 #endif
892  // Keep these double to avoid bug in single precision.
893  double sz_new[depos_order+1] = {0.};
894  double sz_old[depos_order+1] = {0.};
895 
896  // Compute shape factors for current positions
897 
898  // i_new leftmost grid point in x that the particle touches
899  // sx_new shape factor along x for the centering of each current
900  Compute_shape_factor< depos_order > const compute_shape_factor;
901  const int i_new = compute_shape_factor(sx_new, x_new);
902 #if defined(WARPX_DIM_3D)
903  // j_new leftmost grid point in y that the particle touches
904  // sy_new shape factor along y for the centering of each current
905  const int j_new = compute_shape_factor(sy_new, y_new);
906 #endif
907  // k_new leftmost grid point in z that the particle touches
908  // sz_new shape factor along z for the centering of each current
909  const int k_new = compute_shape_factor(sz_new, z_new);
910 
911  // Compute shape factors for old positions
912 
913  // i_old leftmost grid point in x that the particle touches
914  // sx_old shape factor along x for the centering of each current
915  const int i_old = compute_shape_factor(sx_old, x_old);
916 #if defined(WARPX_DIM_3D)
917  // j_old leftmost grid point in y that the particle touches
918  // sy_old shape factor along y for the centering of each current
919  const int j_old = compute_shape_factor(sy_old, y_old);
920 #endif
921  // k_old leftmost grid point in z that the particle touches
922  // sz_old shape factor along z for the centering of each current
923  const int k_old = compute_shape_factor(sz_old, z_old);
924 
925  // Deposit current into Dx_arr, Dy_arr and Dz_arr
926 #if defined(WARPX_DIM_XZ)
927 
928  for (int k=0; k<=depos_order; k++) {
929  for (int i=0; i<=depos_order; i++) {
930 
931  // Re-casting sx_new and sz_new from double to amrex::Real so that
932  // Atomic::Add has consistent types in its argument
933  auto const sxn_szn = static_cast<amrex::Real>(sx_new[i] * sz_new[k]);
934  auto const sxo_szn = static_cast<amrex::Real>(sx_old[i] * sz_new[k]);
935  auto const sxn_szo = static_cast<amrex::Real>(sx_new[i] * sz_old[k]);
936  auto const sxo_szo = static_cast<amrex::Real>(sx_old[i] * sz_old[k]);
937 
938  if (i_new == i_old && k_new == k_old) {
939  // temp arrays for Dx and Dz
940  amrex::Gpu::Atomic::AddNoRet(&temp_arr(lo.x + i_new + i, lo.y + k_new + k, 0, 0),
941  wq * invvol * invdt * (sxn_szn - sxo_szo));
942 
943  amrex::Gpu::Atomic::AddNoRet(&temp_arr(lo.x + i_new + i, lo.y + k_new + k, 0, 1),
944  wq * invvol * invdt * (sxn_szo - sxo_szn));
945 
946  // Dy
947  amrex::Gpu::Atomic::AddNoRet(&Dy_arr(lo.x + i_new + i, lo.y + k_new + k, 0, 0),
948  wqy * 0.25_rt * (sxn_szn + sxn_szo + sxo_szn + sxo_szo));
949  } else {
950  // temp arrays for Dx and Dz
951  amrex::Gpu::Atomic::AddNoRet(&temp_arr(lo.x + i_new + i, lo.y + k_new + k, 0, 0),
952  wq * invvol * invdt * sxn_szn);
953 
954  amrex::Gpu::Atomic::AddNoRet(&temp_arr(lo.x + i_old + i, lo.y + k_old + k, 0, 0),
955  - wq * invvol * invdt * sxo_szo);
956 
957  amrex::Gpu::Atomic::AddNoRet(&temp_arr(lo.x + i_new + i, lo.y + k_old + k, 0, 1),
958  wq * invvol * invdt * sxn_szo);
959 
960  amrex::Gpu::Atomic::AddNoRet(&temp_arr(lo.x + i_old + i, lo.y + k_new + k, 0, 1),
961  - wq * invvol * invdt * sxo_szn);
962 
963  // Dy
964  amrex::Gpu::Atomic::AddNoRet(&Dy_arr(lo.x + i_new + i, lo.y + k_new + k, 0, 0),
965  wqy * 0.25_rt * sxn_szn);
966 
967  amrex::Gpu::Atomic::AddNoRet(&Dy_arr(lo.x + i_new + i, lo.y + k_old + k, 0, 0),
968  wqy * 0.25_rt * sxn_szo);
969 
970  amrex::Gpu::Atomic::AddNoRet(&Dy_arr(lo.x + i_old + i, lo.y + k_new + k, 0, 0),
971  wqy * 0.25_rt * sxo_szn);
972 
973  amrex::Gpu::Atomic::AddNoRet(&Dy_arr(lo.x + i_old + i, lo.y + k_old + k, 0, 0),
974  wqy * 0.25_rt * sxo_szo);
975  }
976 
977  }
978  }
979 
980 #elif defined(WARPX_DIM_3D)
981 
982  for (int k=0; k<=depos_order; k++) {
983  for (int j=0; j<=depos_order; j++) {
984 
985  auto const syn_szn = static_cast<amrex::Real>(sy_new[j] * sz_new[k]);
986  auto const syo_szn = static_cast<amrex::Real>(sy_old[j] * sz_new[k]);
987  auto const syn_szo = static_cast<amrex::Real>(sy_new[j] * sz_old[k]);
988  auto const syo_szo = static_cast<amrex::Real>(sy_old[j] * sz_old[k]);
989 
990  for (int i=0; i<=depos_order; i++) {
991 
992  auto const sxn_syn_szn = static_cast<amrex::Real>(sx_new[i]) * syn_szn;
993  auto const sxo_syn_szn = static_cast<amrex::Real>(sx_old[i]) * syn_szn;
994  auto const sxn_syo_szn = static_cast<amrex::Real>(sx_new[i]) * syo_szn;
995  auto const sxo_syo_szn = static_cast<amrex::Real>(sx_old[i]) * syo_szn;
996  auto const sxn_syn_szo = static_cast<amrex::Real>(sx_new[i]) * syn_szo;
997  auto const sxo_syn_szo = static_cast<amrex::Real>(sx_old[i]) * syn_szo;
998  auto const sxn_syo_szo = static_cast<amrex::Real>(sx_new[i]) * syo_szo;
999  auto const sxo_syo_szo = static_cast<amrex::Real>(sx_old[i]) * syo_szo;
1000 
1001  if (i_new == i_old && j_new == j_old && k_new == k_old) {
1002  // temp arrays for Dx, Dy and Dz
1003  amrex::Gpu::Atomic::AddNoRet(&temp_arr(lo.x + i_new + i, lo.y + j_new + j, lo.z + k_new + k, 0),
1004  wq * invvol * invdt * (sxn_syn_szn - sxo_syo_szo));
1005 
1006  amrex::Gpu::Atomic::AddNoRet(&temp_arr(lo.x + i_new + i, lo.y + j_new + j, lo.z + k_new + k, 1),
1007  wq * invvol * invdt * (sxn_syn_szo - sxo_syo_szn));
1008 
1009  amrex::Gpu::Atomic::AddNoRet(&temp_arr(lo.x + i_new + i, lo.y + j_new + j, lo.z + k_new + k, 2),
1010  wq * invvol * invdt * (sxn_syo_szn - sxo_syn_szo));
1011 
1012  amrex::Gpu::Atomic::AddNoRet(&temp_arr(lo.x + i_new + i, lo.y + j_new + j, lo.z + k_new + k, 3),
1013  wq * invvol * invdt * (sxo_syn_szn - sxn_syo_szo));
1014  } else {
1015  // temp arrays for Dx, Dy and Dz
1016  amrex::Gpu::Atomic::AddNoRet(&temp_arr(lo.x + i_new + i, lo.y + j_new + j, lo.z + k_new + k, 0),
1017  wq * invvol * invdt * sxn_syn_szn);
1018 
1019  amrex::Gpu::Atomic::AddNoRet(&temp_arr(lo.x + i_old + i, lo.y + j_old + j, lo.z + k_old + k, 0),
1020  - wq * invvol * invdt * sxo_syo_szo);
1021 
1022  amrex::Gpu::Atomic::AddNoRet(&temp_arr(lo.x + i_new + i, lo.y + j_new + j, lo.z + k_old + k, 1),
1023  wq * invvol * invdt * sxn_syn_szo);
1024 
1025  amrex::Gpu::Atomic::AddNoRet(&temp_arr(lo.x + i_old + i, lo.y + j_old + j, lo.z + k_new + k, 1),
1026  - wq * invvol * invdt * sxo_syo_szn);
1027 
1028  amrex::Gpu::Atomic::AddNoRet(&temp_arr(lo.x + i_new + i, lo.y + j_old + j, lo.z + k_new + k, 2),
1029  wq * invvol * invdt * sxn_syo_szn);
1030 
1031  amrex::Gpu::Atomic::AddNoRet(&temp_arr(lo.x + i_old + i, lo.y + j_new + j, lo.z + k_old + k, 2),
1032  - wq * invvol * invdt * sxo_syn_szo);
1033 
1034  amrex::Gpu::Atomic::AddNoRet(&temp_arr(lo.x + i_old + i, lo.y + j_new + j, lo.z + k_new + k, 3),
1035  wq * invvol * invdt * sxo_syn_szn);
1036 
1037  amrex::Gpu::Atomic::AddNoRet(&temp_arr(lo.x + i_new + i, lo.y + j_old + j, lo.z + k_old + k, 3),
1038  - wq * invvol * invdt * sxn_syo_szo);
1039  }
1040  }
1041  }
1042  }
1043 #endif
1044  } );
1045 
1046 #if defined(WARPX_DIM_3D)
1047  amrex::ParallelFor(Dx_fab.box(), [=] AMREX_GPU_DEVICE (int i, int j, int k) noexcept
1048  {
1049  const amrex::Real t_a = temp_arr(i,j,k,0);
1050  const amrex::Real t_b = temp_arr(i,j,k,1);
1051  const amrex::Real t_c = temp_arr(i,j,k,2);
1052  const amrex::Real t_d = temp_arr(i,j,k,3);
1053  Dx_arr(i,j,k) += (1._rt/6._rt)*(2_rt*t_a + t_b + t_c - 2._rt*t_d);
1054  Dy_arr(i,j,k) += (1._rt/6._rt)*(2_rt*t_a + t_b - 2._rt*t_c + t_d);
1055  Dz_arr(i,j,k) += (1._rt/6._rt)*(2_rt*t_a - 2._rt*t_b + t_c + t_d);
1056  });
1057 #elif defined(WARPX_DIM_XZ)
1058  amrex::ParallelFor(Dx_fab.box(), [=] AMREX_GPU_DEVICE (int i, int j, int) noexcept
1059  {
1060  const amrex::Real t_a = temp_arr(i,j,0,0);
1061  const amrex::Real t_b = temp_arr(i,j,0,1);
1062  Dx_arr(i,j,0) += (0.5_rt)*(t_a + t_b);
1063  Dz_arr(i,j,0) += (0.5_rt)*(t_a - t_b);
1064  });
1065 #endif
1066  // Synchronize so that temp_fab can be safely deallocated in its destructor
1068 
1069 
1070 # if defined(WARPX_USE_GPUCLOCK)
1071  if( load_balance_costs_update_algo == LoadBalanceCostsUpdateAlgo::GpuClock) {
1073  *cost += *cost_real;
1074  amrex::The_Managed_Arena()->free(cost_real);
1075  }
1076 # endif
1077 #endif // #if !(defined WARPX_DIM_RZ || defined WARPX_DIM_1D_Z)
1078 }
1079 #endif // CURRENTDEPOSITION_H_
const Box & box() const noexcept
amrex::GpuComplex< amrex::Real > Complex
Definition: WarpX_Complex.H:22
void doVayDepositionShapeN(const GetParticlePosition &GetPosition, const amrex::ParticleReal *const wp, const amrex::ParticleReal *const uxp, const amrex::ParticleReal *const uyp, const amrex::ParticleReal *const uzp, const int *const ion_lev, amrex::FArrayBox &Dx_fab, amrex::FArrayBox &Dy_fab, amrex::FArrayBox &Dz_fab, const long np_to_depose, const amrex::Real dt, const amrex::Real relative_time, const std::array< amrex::Real, 3 > &dx, const std::array< amrex::Real, 3 > &xyzmin, const amrex::Dim3 lo, const amrex::Real q, const int n_rz_azimuthal_modes, amrex::Real *cost, const long load_balance_costs_update_algo)
Vay current deposition (Vay et al, 2013) for thread thread_num: deposit D in real space and store the...
Definition: CurrentDeposition.H:741
AMREX_FORCE_INLINE Array4< Real const > array() const noexcept
static constexpr auto c
vacuum speed of light [m/s]
Definition: constant.H:44
int dx
Definition: compute_domain.py:35
int dt
Definition: Stencil.py:468
void doDepositionShapeN(const GetParticlePosition &GetPosition, const amrex::ParticleReal *const wp, const amrex::ParticleReal *const uxp, const amrex::ParticleReal *const uyp, const amrex::ParticleReal *const uzp, const int *const ion_lev, amrex::FArrayBox &jx_fab, amrex::FArrayBox &jy_fab, amrex::FArrayBox &jz_fab, const long np_to_depose, const amrex::Real relative_time, const std::array< amrex::Real, 3 > &dx, const std::array< amrex::Real, 3 > &xyzmin, const amrex::Dim3 lo, const amrex::Real q, const int n_rz_azimuthal_modes, amrex::Real *cost, const long load_balance_costs_update_algo)
Current Deposition for thread thread_num.
Definition: CurrentDeposition.H:52
void doEsirkepovDepositionShapeN(const GetParticlePosition &GetPosition, const amrex::ParticleReal *const wp, const amrex::ParticleReal *const uxp, const amrex::ParticleReal *const uyp, const amrex::ParticleReal *const uzp, const int *const ion_lev, const amrex::Array4< amrex::Real > &Jx_arr, const amrex::Array4< amrex::Real > &Jy_arr, const amrex::Array4< amrex::Real > &Jz_arr, const long np_to_depose, const amrex::Real dt, const amrex::Real relative_time, const std::array< amrex::Real, 3 > &dx, const std::array< amrex::Real, 3 > xyzmin, const amrex::Dim3 lo, const amrex::Real q, const int n_rz_azimuthal_modes, amrex::Real *const cost, const long load_balance_costs_update_algo)
Esirkepov Current Deposition for thread thread_num.
Definition: CurrentDeposition.H:364
Definition: ShapeFactors.H:81
virtual void free(void *pt)=0
AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE void ignore_unused(const Ts &...)
#define AMREX_ALWAYS_ASSERT(EX)
Definition: ShapeFactors.H:26
AMREX_GPU_HOST_DEVICE constexpr AMREX_FORCE_INLINE T imag() const noexcept
std::enable_if_t< std::is_integral< T >::value > ParallelFor(TypeList< CTOs... >, std::array< int, sizeof...(CTOs)> const &runtime_options, T N, F &&f)
#define AMREX_GPU_DEVICE
void Abort(const std::string &msg)
Definition: WarpXAlgorithmSelection.H:124
i
Definition: check_interp_points_and_weights.py:174
virtual void * alloc(std::size_t sz)=0
AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE void AddNoRet(T *sum, T value) noexcept
AMREX_GPU_HOST_DEVICE constexpr AMREX_FORCE_INLINE T real() const noexcept
Functor that can be used to extract the positions of the macroparticles inside a ParallelFor kernel...
Definition: GetAndSetPosition.H:52
void streamSynchronize() noexcept
Defines a timer object to be used on GPU; measures summed thread cycles.
Definition: KernelTimer.H:23
Arena * The_Managed_Arena()
NODE