WarpX
ChargeDeposition.H
Go to the documentation of this file.
1 /* Copyright 2019 Axel Huebl, Andrew Myers, David Grote, Maxence Thevenet
2  * Weiqun Zhang
3  *
4  * This file is part of WarpX.
5  *
6  * License: BSD-3-Clause-LBNL
7  */
8 #ifndef WARPX_CHARGEDEPOSITION_H_
9 #define WARPX_CHARGEDEPOSITION_H_
10 
14 #include "Particles/ShapeFactors.H"
16 #ifdef WARPX_DIM_RZ
17 # include "Utils/WarpX_Complex.H"
18 #endif
19 
20 #include <AMReX.H>
21 
22 /* \brief Perform charge deposition on a tile
23  * \param GetPosition A functor for returning the particle position.
24  * \param wp Pointer to array of particle weights.
25  * \param ion_lev Pointer to array of particle ionization level. This is
26  required to have the charge of each macroparticle
27  since q is a scalar. For non-ionizable species,
28  ion_lev is a null pointer.
29  * \param rho_fab FArrayBox of charge density, either full array or tile.
30  * \param np_to_deposit Number of particles for which current is deposited.
31  * \param dx 3D cell size
32  * \param xyzmin Physical lower bounds of domain.
33  * \param lo Index lower bounds of domain.
34  * \param q species charge.
35  * \param n_rz_azimuthal_modes Number of azimuthal modes when using RZ geometry.
36  */
37 template <int depos_order>
39  const amrex::ParticleReal * const wp,
40  const int* ion_lev,
41  amrex::FArrayBox& rho_fab,
42  long np_to_deposit,
43  const std::array<amrex::Real, 3>& dx,
44  const std::array<amrex::Real, 3> xyzmin,
45  amrex::Dim3 lo,
46  amrex::Real q,
47  int n_rz_azimuthal_modes)
48 {
49  using namespace amrex;
50 
51  // Whether ion_lev is a null pointer (do_ionization=0) or a real pointer
52  // (do_ionization=1)
53  const bool do_ionization = ion_lev;
54  const amrex::Real dzi = 1.0_rt/dx[2];
55 #if defined(WARPX_DIM_1D_Z)
56  const amrex::Real invvol = dzi;
57 #endif
58 #if defined(WARPX_DIM_XZ) || defined(WARPX_DIM_RZ)
59  const amrex::Real dxi = 1.0_rt/dx[0];
60  const amrex::Real invvol = dxi*dzi;
61 #elif defined(WARPX_DIM_3D)
62  const amrex::Real dxi = 1.0_rt/dx[0];
63  const amrex::Real dyi = 1.0_rt/dx[1];
64  const amrex::Real invvol = dxi*dyi*dzi;
65 #endif
66 
67 #if defined(WARPX_DIM_XZ) || defined(WARPX_DIM_RZ) || defined(WARPX_DIM_3D)
68  const amrex::Real xmin = xyzmin[0];
69 #endif
70 #if defined(WARPX_DIM_3D)
71  const amrex::Real ymin = xyzmin[1];
72 #endif
73  const amrex::Real zmin = xyzmin[2];
74 
75  amrex::Array4<amrex::Real> const& rho_arr = rho_fab.array();
76  amrex::IntVect const rho_type = rho_fab.box().type();
77 
78  constexpr int NODE = amrex::IndexType::NODE;
79  constexpr int CELL = amrex::IndexType::CELL;
80 
81  // Loop over particles and deposit into rho_fab
83  np_to_deposit,
84  [=] AMREX_GPU_DEVICE (long ip) {
85  // --- Get particle quantities
86  amrex::Real wq = q*wp[ip]*invvol;
87  if (do_ionization){
88  wq *= ion_lev[ip];
89  }
90 
91  amrex::ParticleReal xp, yp, zp;
92  GetPosition(ip, xp, yp, zp);
93 
94  // --- Compute shape factors
95  Compute_shape_factor< depos_order > const compute_shape_factor;
96 #if defined(WARPX_DIM_XZ) || defined(WARPX_DIM_RZ) || defined(WARPX_DIM_3D)
97  // x direction
98  // Get particle position in grid coordinates
99 #if defined(WARPX_DIM_RZ)
100  const amrex::Real rp = std::sqrt(xp*xp + yp*yp);
101  amrex::Real costheta;
102  amrex::Real sintheta;
103  if (rp > 0.) {
104  costheta = xp/rp;
105  sintheta = yp/rp;
106  } else {
107  costheta = 1._rt;
108  sintheta = 0._rt;
109  }
110  const Complex xy0 = Complex{costheta, sintheta};
111  const amrex::Real x = (rp - xmin)*dxi;
112 #else
113  const amrex::Real x = (xp - xmin)*dxi;
114 #endif
115 
116  // Compute shape factor along x
117  // i: leftmost grid point that the particle touches
118  amrex::Real sx[depos_order + 1] = {0._rt};
119  int i = 0;
120  if (rho_type[0] == NODE) {
121  i = compute_shape_factor(sx, x);
122  } else if (rho_type[0] == CELL) {
123  i = compute_shape_factor(sx, x - 0.5_rt);
124  }
125 #endif //defined(WARPX_DIM_XZ) || defined(WARPX_DIM_RZ) || defined(WARPX_DIM_3D)
126 #if defined(WARPX_DIM_3D)
127  // y direction
128  const amrex::Real y = (yp - ymin)*dyi;
129  amrex::Real sy[depos_order + 1] = {0._rt};
130  int j = 0;
131  if (rho_type[1] == NODE) {
132  j = compute_shape_factor(sy, y);
133  } else if (rho_type[1] == CELL) {
134  j = compute_shape_factor(sy, y - 0.5_rt);
135  }
136 #endif
137  // z direction
138  const amrex::Real z = (zp - zmin)*dzi;
139  amrex::Real sz[depos_order + 1] = {0._rt};
140  int k = 0;
141  if (rho_type[WARPX_ZINDEX] == NODE) {
142  k = compute_shape_factor(sz, z);
143  } else if (rho_type[WARPX_ZINDEX] == CELL) {
144  k = compute_shape_factor(sz, z - 0.5_rt);
145  }
146 
147  // Deposit charge into rho_arr
148 #if defined(WARPX_DIM_1D_Z)
149  for (int iz=0; iz<=depos_order; iz++){
151  &rho_arr(lo.x+k+iz, 0, 0, 0),
152  sz[iz]*wq);
153  }
154 #endif
155 #if defined(WARPX_DIM_XZ) || defined(WARPX_DIM_RZ)
156  for (int iz=0; iz<=depos_order; iz++){
157  for (int ix=0; ix<=depos_order; ix++){
159  &rho_arr(lo.x+i+ix, lo.y+k+iz, 0, 0),
160  sx[ix]*sz[iz]*wq);
161 #if defined(WARPX_DIM_RZ)
162  Complex xy = xy0; // Throughout the following loop, xy takes the value e^{i m theta}
163  for (int imode=1 ; imode < n_rz_azimuthal_modes ; imode++) {
164  // The factor 2 on the weighting comes from the normalization of the modes
165  amrex::Gpu::Atomic::AddNoRet( &rho_arr(lo.x+i+ix, lo.y+k+iz, 0, 2*imode-1), 2._rt*sx[ix]*sz[iz]*wq*xy.real());
166  amrex::Gpu::Atomic::AddNoRet( &rho_arr(lo.x+i+ix, lo.y+k+iz, 0, 2*imode ), 2._rt*sx[ix]*sz[iz]*wq*xy.imag());
167  xy = xy*xy0;
168  }
169 #endif
170  }
171  }
172 #elif defined(WARPX_DIM_3D)
173  for (int iz=0; iz<=depos_order; iz++){
174  for (int iy=0; iy<=depos_order; iy++){
175  for (int ix=0; ix<=depos_order; ix++){
177  &rho_arr(lo.x+i+ix, lo.y+j+iy, lo.z+k+iz),
178  sx[ix]*sy[iy]*sz[iz]*wq);
179  }
180  }
181  }
182 #endif
183  }
184  );
185 
186 #ifndef WARPX_DIM_RZ
187  amrex::ignore_unused(n_rz_azimuthal_modes);
188 #endif
189 }
190 
191 /* \brief Perform charge deposition on a tile using shared memory
192  * \param GetPosition A functor for returning the particle position.
193  * \param wp Pointer to array of particle weights.
194  * \param ion_lev Pointer to array of particle ionization level. This is
195  required to have the charge of each macroparticle
196  since q is a scalar. For non-ionizable species,
197  ion_lev is a null pointer.
198  * \param rho_fab FArrayBox of charge density, either full array or tile.
199  * \param ix_type
200  * \param np_to_deposit Number of particles for which charge is deposited.
201  * \param dx 3D cell size
202  * \param xyzmin Physical lower bounds of domain.
203  * \param lo Index lower bounds of domain.
204  * \param q species charge.
205  * \param n_rz_azimuthal_modes Number of azimuthal modes when using RZ geometry.
206  * \param a_bins
207  * \param box
208  * \param geom
209  * \param a_tbox_max_size
210  * \param bin_size tile size to use for shared current deposition operations
211  */
212 template <int depos_order>
214  const amrex::ParticleReal * const wp,
215  const int* ion_lev,
216  amrex::FArrayBox& rho_fab,
217  const amrex::IntVect& ix_type,
218  const long np_to_deposit,
219  const std::array<amrex::Real, 3>& dx,
220  const std::array<amrex::Real, 3> xyzmin,
221  const amrex::Dim3 lo,
222  const amrex::Real q,
223  const int n_rz_azimuthal_modes,
225  const amrex::Box& box,
226  const amrex::Geometry& geom,
227  const amrex::IntVect& a_tbox_max_size,
228  const amrex::IntVect bin_size
229  )
230 {
231  using namespace amrex;
232 
233  const auto *permutation = a_bins.permutationPtr();
234 
235 #if !defined(AMREX_USE_GPU)
236  amrex::ignore_unused(ix_type, a_bins, box, geom, a_tbox_max_size, bin_size);
237 #endif
238 
239  // Whether ion_lev is a null pointer (do_ionization=0) or a real pointer
240  // (do_ionization=1)
241  const bool do_ionization = ion_lev;
242  const amrex::Real dzi = 1.0_rt/dx[2];
243 #if defined(WARPX_DIM_1D_Z)
244  const amrex::Real invvol = dzi;
245 #endif
246 #if defined(WARPX_DIM_XZ) || defined(WARPX_DIM_RZ)
247  const amrex::Real dxi = 1.0_rt/dx[0];
248  const amrex::Real invvol = dxi*dzi;
249 #elif defined(WARPX_DIM_3D)
250  const amrex::Real dxi = 1.0_rt/dx[0];
251  const amrex::Real dyi = 1.0_rt/dx[1];
252  const amrex::Real invvol = dxi*dyi*dzi;
253 #endif
254 
255 #if defined(WARPX_DIM_XZ) || defined(WARPX_DIM_RZ) || defined(WARPX_DIM_3D)
256  const amrex::Real xmin = xyzmin[0];
257 #endif
258 #if defined(WARPX_DIM_3D)
259  const amrex::Real ymin = xyzmin[1];
260 #endif
261  const amrex::Real zmin = xyzmin[2];
262 
263  amrex::Array4<amrex::Real> const& rho_arr = rho_fab.array();
264  auto rho_box = rho_fab.box();
265  amrex::IntVect const rho_type = rho_box.type();
266 
267  constexpr int NODE = amrex::IndexType::NODE;
268  constexpr int CELL = amrex::IndexType::CELL;
269 
270  // Loop over particles and deposit into rho_fab
271 #if defined(AMREX_USE_CUDA) || defined(AMREX_USE_HIP)
272  const auto dxiarr = geom.InvCellSizeArray();
273  const auto plo = geom.ProbLoArray();
274  const auto domain = geom.Domain();
275 
276  const amrex::Box sample_tbox(IntVect(AMREX_D_DECL(0, 0, 0)), a_tbox_max_size - 1);
277  amrex::Box sample_tbox_x = convert(sample_tbox, ix_type);
278 
279  sample_tbox_x.grow(depos_order);
280 
281  const auto npts = sample_tbox_x.numPts();
282 
283  const int nblocks = a_bins.numBins();
284  const auto offsets_ptr = a_bins.offsetsPtr();
285  const int threads_per_block = 256;
286 
287  std::size_t shared_mem_bytes = npts*sizeof(amrex::Real);
288 
289  const std::size_t max_shared_mem_bytes = amrex::Gpu::Device::sharedMemPerBlock();
290 
291  WARPX_ALWAYS_ASSERT_WITH_MESSAGE(shared_mem_bytes <= max_shared_mem_bytes,
292  "Tile size too big for GPU shared memory charge deposition");
293 #endif
294 
295 #if defined(AMREX_USE_CUDA) || defined(AMREX_USE_HIP)
296  amrex::ignore_unused(np_to_deposit);
297  // Loop with one block per tile (the shared memory is allocated on a per-block basis)
298  // The threads within each block loop over the particles of its tile
299  // (Each threads processes a different set of particles.)
301  nblocks, threads_per_block, shared_mem_bytes, amrex::Gpu::gpuStream(),
302  [=] AMREX_GPU_DEVICE () noexcept
303 #else // defined(AMREX_USE_CUDA) || defined(AMREX_USE_HIP)
304  amrex::ParallelFor(np_to_deposit, [=] AMREX_GPU_DEVICE (long ip_orig) noexcept
305 #endif
306  {
307 #if defined(AMREX_USE_CUDA) || defined(AMREX_USE_HIP)
308  const int bin_id = blockIdx.x;
309  const unsigned int bin_start = offsets_ptr[bin_id];
310  const unsigned int bin_stop = offsets_ptr[bin_id+1];
311 
312  if (bin_start == bin_stop) { return; }
313 
314  amrex::Box buffer_box;
315  {
316  ParticleReal xp, yp, zp;
317  GetPosition(permutation[bin_start], xp, yp, zp);
318 #if defined(WARPX_DIM_3D)
319  IntVect iv = IntVect(int(amrex::Math::floor((xp-plo[0])*dxiarr[0])),
320  int(amrex::Math::floor((yp-plo[1])*dxiarr[1])),
321  int(amrex::Math::floor((zp-plo[2])*dxiarr[2])));
322 #elif defined(WARPX_DIM_XZ) || defined(WARPX_DIM_RZ)
323  IntVect iv = IntVect(
324  int(amrex::Math::floor((xp-plo[0])*dxiarr[0])),
325  int(amrex::Math::floor((zp-plo[1])*dxiarr[1])));
326 #elif defined(WARPX_DIM_1D_Z)
327  IntVect iv = IntVect(int(amrex::Math::floor((zp-plo[0])*dxiarr[0])));
328 #endif
329  iv += domain.smallEnd();
330  getTileIndex(iv, box, true, bin_size, buffer_box);
331  }
332 
333  Box tbx = convert( buffer_box, ix_type);
334  tbx.grow(depos_order);
335 
337  amrex::Real* const shared = gsm.dataPtr();
338 
339  amrex::Array4<amrex::Real> buf(shared, amrex::begin(tbx), amrex::end(tbx), 1);
340 
341  // Zero-initialize the temporary array in shared memory
342  volatile amrex::Real* vs = shared;
343  for (int i = threadIdx.x; i < tbx.numPts(); i += blockDim.x) {
344  vs[i] = 0.0;
345  }
346  __syncthreads();
347 #else
348  amrex::Array4<amrex::Real> const &buf = rho_arr;
349 #endif // defined(AMREX_USE_CUDA) || defined(AMREX_USE_HIP)
350 
351 #if defined(AMREX_USE_CUDA) || defined(AMREX_USE_HIP)
352  // Loop over macroparticles: each threads loops over particles with a stride of `blockDim.x`
353  for (unsigned int ip_orig = bin_start + threadIdx.x; ip_orig < bin_stop; ip_orig += blockDim.x)
354 #endif
355  {
356  const unsigned int ip = permutation[ip_orig];
357 
358  // --- Get particle quantities
359  amrex::Real wq = q*wp[ip]*invvol;
360  if (do_ionization){
361  wq *= ion_lev[ip];
362  }
363 
364  amrex::ParticleReal xp, yp, zp;
365  GetPosition(ip, xp, yp, zp);
366 
367  // --- Compute shape factors
368  Compute_shape_factor< depos_order > const compute_shape_factor;
369 #if defined(WARPX_DIM_XZ) || defined(WARPX_DIM_RZ) || defined(WARPX_DIM_3D)
370  // x direction
371  // Get particle position in grid coordinates
372 #if defined(WARPX_DIM_RZ)
373  const amrex::Real rp = std::sqrt(xp*xp + yp*yp);
374  amrex::Real costheta;
375  amrex::Real sintheta;
376  if (rp > 0.) {
377  costheta = xp/rp;
378  sintheta = yp/rp;
379  } else {
380  costheta = 1._rt;
381  sintheta = 0._rt;
382  }
383  const Complex xy0 = Complex{costheta, sintheta};
384  const amrex::Real x = (rp - xmin)*dxi;
385 #else
386  const amrex::Real x = (xp - xmin)*dxi;
387 #endif
388 
389  // Compute shape factor along x
390  // i: leftmost grid point that the particle touches
391  amrex::Real sx[depos_order + 1] = {0._rt};
392  int i = 0;
393  if (rho_type[0] == NODE) {
394  i = compute_shape_factor(sx, x);
395  } else if (rho_type[0] == CELL) {
396  i = compute_shape_factor(sx, x - 0.5_rt);
397  }
398 #endif //defined(WARPX_DIM_XZ) || defined(WARPX_DIM_RZ) || defined(WARPX_DIM_3D)
399 #if defined(WARPX_DIM_3D)
400  // y direction
401  const amrex::Real y = (yp - ymin)*dyi;
402  amrex::Real sy[depos_order + 1] = {0._rt};
403  int j = 0;
404  if (rho_type[1] == NODE) {
405  j = compute_shape_factor(sy, y);
406  } else if (rho_type[1] == CELL) {
407  j = compute_shape_factor(sy, y - 0.5_rt);
408  }
409 #endif
410  // z direction
411  const amrex::Real z = (zp - zmin)*dzi;
412  amrex::Real sz[depos_order + 1] = {0._rt};
413  int k = 0;
414  if (rho_type[WARPX_ZINDEX] == NODE) {
415  k = compute_shape_factor(sz, z);
416  } else if (rho_type[WARPX_ZINDEX] == CELL) {
417  k = compute_shape_factor(sz, z - 0.5_rt);
418  }
419 
420  // Deposit charge into buf
421 #if defined(WARPX_DIM_1D_Z)
422  for (int iz=0; iz<=depos_order; iz++){
424  &buf(lo.x+k+iz, 0, 0, 0),
425  sz[iz]*wq);
426  }
427 #endif
428 #if defined(WARPX_DIM_XZ) || defined(WARPX_DIM_RZ)
429  for (int iz=0; iz<=depos_order; iz++){
430  for (int ix=0; ix<=depos_order; ix++){
432  &buf(lo.x+i+ix, lo.y+k+iz, 0, 0),
433  sx[ix]*sz[iz]*wq);
434 #if defined(WARPX_DIM_RZ)
435  Complex xy = xy0; // Throughout the following loop, xy takes the value e^{i m theta}
436  for (int imode=1 ; imode < n_rz_azimuthal_modes ; imode++) {
437  // The factor 2 on the weighting comes from the normalization of the modes
438  amrex::Gpu::Atomic::AddNoRet( &buf(lo.x+i+ix, lo.y+k+iz, 0, 2*imode-1), 2._rt*sx[ix]*sz[iz]*wq*xy.real());
439  amrex::Gpu::Atomic::AddNoRet( &buf(lo.x+i+ix, lo.y+k+iz, 0, 2*imode ), 2._rt*sx[ix]*sz[iz]*wq*xy.imag());
440  xy = xy*xy0;
441  }
442 #endif
443  }
444  }
445 #elif defined(WARPX_DIM_3D)
446  for (int iz=0; iz<=depos_order; iz++){
447  for (int iy=0; iy<=depos_order; iy++){
448  for (int ix=0; ix<=depos_order; ix++){
450  &buf(lo.x+i+ix, lo.y+j+iy, lo.z+k+iz),
451  sx[ix]*sy[iy]*sz[iz]*wq);
452  }
453  }
454  }
455 #endif
456  }
457 
458 #if defined(AMREX_USE_CUDA) || defined(AMREX_USE_HIP)
459  __syncthreads();
460 
461  addLocalToGlobal(tbx, rho_arr, buf);
462 #endif // defined(AMREX_USE_CUDA) || defined(AMREX_USE_HIP)
463  }
464  );
465 
466 #ifndef WARPX_DIM_RZ
467  amrex::ignore_unused(n_rz_azimuthal_modes);
468 #endif
469 }
470 
471 #endif // WARPX_CHARGEDEPOSITION_H_
#define AMREX_GPU_DEVICE
#define AMREX_D_DECL(a, b, c)
void doChargeDepositionShapeN(const GetParticlePosition< PIdx > &GetPosition, const amrex::ParticleReal *const wp, const int *ion_lev, amrex::FArrayBox &rho_fab, long np_to_deposit, const std::array< amrex::Real, 3 > &dx, const std::array< amrex::Real, 3 > xyzmin, amrex::Dim3 lo, amrex::Real q, int n_rz_azimuthal_modes)
Definition: ChargeDeposition.H:38
void doChargeDepositionSharedShapeN(const GetParticlePosition< PIdx > &GetPosition, const amrex::ParticleReal *const wp, const int *ion_lev, amrex::FArrayBox &rho_fab, const amrex::IntVect &ix_type, const long np_to_deposit, const std::array< amrex::Real, 3 > &dx, const std::array< amrex::Real, 3 > xyzmin, const amrex::Dim3 lo, const amrex::Real q, const int n_rz_azimuthal_modes, const amrex::DenseBins< WarpXParticleContainer::ParticleTileType::ParticleTileDataType > &a_bins, const amrex::Box &box, const amrex::Geometry &geom, const amrex::IntVect &a_tbox_max_size, const amrex::IntVect bin_size)
Definition: ChargeDeposition.H:213
#define WARPX_ALWAYS_ASSERT_WITH_MESSAGE(EX, MSG)
Definition: TextMsg.H:13
NODE
AMREX_FORCE_INLINE Array4< Real const > array() const noexcept
const Box & box() const noexcept
AMREX_GPU_HOST_DEVICE IntVect type() const noexcept
AMREX_GPU_HOST_DEVICE Long numPts() const noexcept
AMREX_GPU_HOST_DEVICE Box & grow(int i) noexcept
GpuArray< Real, AMREX_SPACEDIM > InvCellSizeArray() const noexcept
index_type * offsetsPtr() noexcept
index_type * permutationPtr() noexcept
Long numBins() const noexcept
GpuArray< Real, AMREX_SPACEDIM > ProbLoArray() const noexcept
const Box & Domain() const noexcept
static std::size_t sharedMemPerBlock() noexcept
AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE void AddNoRet(T *sum, T value) noexcept
gpuStream_t gpuStream() noexcept
constexpr int iz
constexpr int iy
constexpr int ix
AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE Dim3 end(Box const &box) noexcept
AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE Box convert(const Box &b, const IntVect &typ) noexcept
AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE Dim3 begin(Box const &box) noexcept
void launch(T const &n, L &&f) noexcept
AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE void ignore_unused(const Ts &...)
std::enable_if_t< std::is_integral_v< T > > ParallelFor(TypeList< CTOs... >, std::array< int, sizeof...(CTOs)> const &runtime_options, T N, F &&f)
AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE int getTileIndex(const IntVect &iv, const Box &box, const bool a_do_tiling, const IntVect &a_tile_size, Box &tbx)
i
Definition: check_interp_points_and_weights.py:174
tuple dx
lab frame
Definition: stencil.py:429
xmin
Definition: stencil.py:424
Definition: ShapeFactors.H:29
AMREX_GPU_HOST_DEVICE constexpr AMREX_FORCE_INLINE T real() const noexcept
AMREX_GPU_HOST_DEVICE constexpr AMREX_FORCE_INLINE T imag() const noexcept
AMREX_GPU_DEVICE T * dataPtr() noexcept