WarpX
ChargeDeposition.H
Go to the documentation of this file.
1 /* Copyright 2019 Axel Huebl, Andrew Myers, David Grote, Maxence Thevenet
2  * Weiqun Zhang
3  *
4  * This file is part of WarpX.
5  *
6  * License: BSD-3-Clause-LBNL
7  */
8 #ifndef WARPX_CHARGEDEPOSITION_H_
9 #define WARPX_CHARGEDEPOSITION_H_
10 
14 #include "Particles/ShapeFactors.H"
16 #ifdef WARPX_DIM_RZ
17 # include "Utils/WarpX_Complex.H"
18 #endif
19 
20 #include <AMReX.H>
21 
22 /* \brief Perform charge deposition on a tile
23  * \param GetPosition A functor for returning the particle position.
24  * \param wp Pointer to array of particle weights.
25  * \param ion_lev Pointer to array of particle ionization level. This is
26  required to have the charge of each macroparticle
27  since q is a scalar. For non-ionizable species,
28  ion_lev is a null pointer.
29  * \param rho_fab FArrayBox of charge density, either full array or tile.
30  * \param np_to_deposit Number of particles for which current is deposited.
31  * \param dinv 3D cell size inverse
32  * \param xyzmin The lower bounds of the domain
33  * \param lo Index lower bounds of domain.
34  * \param q species charge.
35  * \param n_rz_azimuthal_modes Number of azimuthal modes when using RZ geometry.
36  */
37 template <int depos_order>
39  const amrex::ParticleReal * const wp,
40  const int* ion_lev,
41  amrex::FArrayBox& rho_fab,
42  long np_to_deposit,
43  const amrex::XDim3 & dinv,
44  const amrex::XDim3 & xyzmin,
45  amrex::Dim3 lo,
46  amrex::Real q,
47  [[maybe_unused]] int n_rz_azimuthal_modes)
48 {
49  using namespace amrex;
50 
51  // Whether ion_lev is a null pointer (do_ionization=0) or a real pointer
52  // (do_ionization=1)
53  const bool do_ionization = ion_lev;
54 
55  const amrex::Real invvol = dinv.x*dinv.y*dinv.z;
56 
57  amrex::Array4<amrex::Real> const& rho_arr = rho_fab.array();
58  amrex::IntVect const rho_type = rho_fab.box().type();
59 
60  constexpr int NODE = amrex::IndexType::NODE;
61  constexpr int CELL = amrex::IndexType::CELL;
62 
63  // Loop over particles and deposit into rho_fab
65  np_to_deposit,
66  [=] AMREX_GPU_DEVICE (long ip) {
67  // --- Get particle quantities
68  amrex::Real wq = q*wp[ip]*invvol;
69  if (do_ionization){
70  wq *= ion_lev[ip];
71  }
72 
73  amrex::ParticleReal xp, yp, zp;
74  GetPosition(ip, xp, yp, zp);
75 
76  // --- Compute shape factors
77  Compute_shape_factor< depos_order > const compute_shape_factor;
78 #if defined(WARPX_DIM_XZ) || defined(WARPX_DIM_RZ) || defined(WARPX_DIM_3D)
79  // x direction
80  // Get particle position in grid coordinates
81 #if defined(WARPX_DIM_RZ)
82  const amrex::Real rp = std::sqrt(xp*xp + yp*yp);
83  const amrex::Real costheta = (rp > 0._rt ? xp/rp : 1._rt);
84  const amrex::Real sintheta = (rp > 0._rt ? yp/rp : 0._rt);
85  const Complex xy0 = Complex{costheta, sintheta};
86  const amrex::Real x = (rp - xyzmin.x)*dinv.x;
87 #else
88  const amrex::Real x = (xp - xyzmin.x)*dinv.x;
89 #endif
90 
91  // Compute shape factor along x
92  // i: leftmost grid point that the particle touches
93  amrex::Real sx[depos_order + 1] = {0._rt};
94  int i = 0;
95  if (rho_type[0] == NODE) {
96  i = compute_shape_factor(sx, x);
97  } else if (rho_type[0] == CELL) {
98  i = compute_shape_factor(sx, x - 0.5_rt);
99  }
100 #endif //defined(WARPX_DIM_XZ) || defined(WARPX_DIM_RZ) || defined(WARPX_DIM_3D)
101 #if defined(WARPX_DIM_3D)
102  // y direction
103  const amrex::Real y = (yp - xyzmin.y)*dinv.y;
104  amrex::Real sy[depos_order + 1] = {0._rt};
105  int j = 0;
106  if (rho_type[1] == NODE) {
107  j = compute_shape_factor(sy, y);
108  } else if (rho_type[1] == CELL) {
109  j = compute_shape_factor(sy, y - 0.5_rt);
110  }
111 #endif
112  // z direction
113  const amrex::Real z = (zp - xyzmin.z)*dinv.z;
114  amrex::Real sz[depos_order + 1] = {0._rt};
115  int k = 0;
116  if (rho_type[WARPX_ZINDEX] == NODE) {
117  k = compute_shape_factor(sz, z);
118  } else if (rho_type[WARPX_ZINDEX] == CELL) {
119  k = compute_shape_factor(sz, z - 0.5_rt);
120  }
121 
122  // Deposit charge into rho_arr
123 #if defined(WARPX_DIM_1D_Z)
124  for (int iz=0; iz<=depos_order; iz++){
126  &rho_arr(lo.x+k+iz, 0, 0, 0),
127  sz[iz]*wq);
128  }
129 #elif defined(WARPX_DIM_XZ) || defined(WARPX_DIM_RZ)
130  for (int iz=0; iz<=depos_order; iz++){
131  for (int ix=0; ix<=depos_order; ix++){
133  &rho_arr(lo.x+i+ix, lo.y+k+iz, 0, 0),
134  sx[ix]*sz[iz]*wq);
135 #if defined(WARPX_DIM_RZ)
136  Complex xy = xy0; // Throughout the following loop, xy takes the value e^{i m theta}
137  for (int imode=1 ; imode < n_rz_azimuthal_modes ; imode++) {
138  // The factor 2 on the weighting comes from the normalization of the modes
139  amrex::Gpu::Atomic::AddNoRet( &rho_arr(lo.x+i+ix, lo.y+k+iz, 0, 2*imode-1), 2._rt*sx[ix]*sz[iz]*wq*xy.real());
140  amrex::Gpu::Atomic::AddNoRet( &rho_arr(lo.x+i+ix, lo.y+k+iz, 0, 2*imode ), 2._rt*sx[ix]*sz[iz]*wq*xy.imag());
141  xy = xy*xy0;
142  }
143 #endif
144  }
145  }
146 #elif defined(WARPX_DIM_3D)
147  for (int iz=0; iz<=depos_order; iz++){
148  for (int iy=0; iy<=depos_order; iy++){
149  for (int ix=0; ix<=depos_order; ix++){
151  &rho_arr(lo.x+i+ix, lo.y+j+iy, lo.z+k+iz),
152  sx[ix]*sy[iy]*sz[iz]*wq);
153  }
154  }
155  }
156 #endif
157  }
158  );
159 }
160 
161 /* \brief Perform charge deposition on a tile using shared memory
162  * \param GetPosition A functor for returning the particle position.
163  * \param wp Pointer to array of particle weights.
164  * \param ion_lev Pointer to array of particle ionization level. This is
165  required to have the charge of each macroparticle
166  since q is a scalar. For non-ionizable species,
167  ion_lev is a null pointer.
168  * \param rho_fab FArrayBox of charge density, either full array or tile.
169  * \param ix_type
170  * \param np_to_deposit Number of particles for which charge is deposited.
171  * \param dinv 3D cell size inverse
172  * \param xyzmin The lower bounds of the domain
173  * \param lo Index lower bounds of domain.
174  * \param q species charge.
175  * \param n_rz_azimuthal_modes Number of azimuthal modes when using RZ geometry.
176  * \param a_bins
177  * \param box
178  * \param geom
179  * \param a_tbox_max_size
180  * \param bin_size tile size to use for shared current deposition operations
181  */
182 template <int depos_order>
184  const amrex::ParticleReal * const wp,
185  const int* ion_lev,
186  amrex::FArrayBox& rho_fab,
187  const amrex::IntVect& ix_type,
188  const long np_to_deposit,
189  const amrex::XDim3 & dinv,
190  const amrex::XDim3 & xyzmin,
191  const amrex::Dim3 lo,
192  const amrex::Real q,
193  [[maybe_unused]]const int n_rz_azimuthal_modes,
195  const amrex::Box& box,
196  const amrex::Geometry& geom,
197  const amrex::IntVect& a_tbox_max_size,
198  const amrex::IntVect bin_size
199  )
200 {
201  using namespace amrex;
202 
203  const auto *permutation = a_bins.permutationPtr();
204 
205 #if !defined(AMREX_USE_GPU)
206  amrex::ignore_unused(ix_type, a_bins, box, geom, a_tbox_max_size, bin_size);
207 #endif
208 
209  // Whether ion_lev is a null pointer (do_ionization=0) or a real pointer
210  // (do_ionization=1)
211  const bool do_ionization = ion_lev;
212 
213  const amrex::Real invvol = dinv.x*dinv.y*dinv.z;
214 
215  amrex::Array4<amrex::Real> const& rho_arr = rho_fab.array();
216  auto rho_box = rho_fab.box();
217  amrex::IntVect const rho_type = rho_box.type();
218 
219  constexpr int NODE = amrex::IndexType::NODE;
220  constexpr int CELL = amrex::IndexType::CELL;
221 
222  // Loop over particles and deposit into rho_fab
223 #if defined(AMREX_USE_CUDA) || defined(AMREX_USE_HIP)
224  const auto plo = geom.ProbLoArray();
225  const auto domain = geom.Domain();
226 
227  const amrex::Box sample_tbox(IntVect(AMREX_D_DECL(0, 0, 0)), a_tbox_max_size - 1);
228  amrex::Box sample_tbox_x = convert(sample_tbox, ix_type);
229 
230  sample_tbox_x.grow(depos_order);
231 
232  const auto npts = sample_tbox_x.numPts();
233 
234  const int nblocks = a_bins.numBins();
235  const auto offsets_ptr = a_bins.offsetsPtr();
236  const int threads_per_block = 256;
237 
238  std::size_t shared_mem_bytes = npts*sizeof(amrex::Real);
239 
240  const std::size_t max_shared_mem_bytes = amrex::Gpu::Device::sharedMemPerBlock();
241 
242  WARPX_ALWAYS_ASSERT_WITH_MESSAGE(shared_mem_bytes <= max_shared_mem_bytes,
243  "Tile size too big for GPU shared memory charge deposition");
244 #endif
245 
246 #if defined(AMREX_USE_CUDA) || defined(AMREX_USE_HIP)
247  amrex::ignore_unused(np_to_deposit);
248  // Loop with one block per tile (the shared memory is allocated on a per-block basis)
249  // The threads within each block loop over the particles of its tile
250  // (Each threads processes a different set of particles.)
252  nblocks, threads_per_block, shared_mem_bytes, amrex::Gpu::gpuStream(),
253  [=] AMREX_GPU_DEVICE () noexcept
254 #else // defined(AMREX_USE_CUDA) || defined(AMREX_USE_HIP)
255  amrex::ParallelFor(np_to_deposit, [=] AMREX_GPU_DEVICE (long ip_orig) noexcept
256 #endif
257  {
258 #if defined(AMREX_USE_CUDA) || defined(AMREX_USE_HIP)
259  const int bin_id = blockIdx.x;
260  const unsigned int bin_start = offsets_ptr[bin_id];
261  const unsigned int bin_stop = offsets_ptr[bin_id+1];
262 
263  if (bin_start == bin_stop) { return; }
264 
265  amrex::Box buffer_box;
266  {
267  ParticleReal xp, yp, zp;
268  GetPosition(permutation[bin_start], xp, yp, zp);
269 #if defined(WARPX_DIM_3D)
270  IntVect iv = IntVect(int(amrex::Math::floor((xp-plo[0])*dinv.x)),
271  int(amrex::Math::floor((yp-plo[1])*dinv.y)),
272  int(amrex::Math::floor((zp-plo[2])*dinv.z)));
273 #elif defined(WARPX_DIM_XZ) || defined(WARPX_DIM_RZ)
274  IntVect iv = IntVect(int(amrex::Math::floor((xp-plo[0])*dinv.x)),
275  int(amrex::Math::floor((zp-plo[1])*dinv.z)));
276 #elif defined(WARPX_DIM_1D_Z)
277  IntVect iv = IntVect(int(amrex::Math::floor((zp-plo[0])*dinv.z)));
278 #endif
279  iv += domain.smallEnd();
280  getTileIndex(iv, box, true, bin_size, buffer_box);
281  }
282 
283  Box tbx = convert( buffer_box, ix_type);
284  tbx.grow(depos_order);
285 
287  amrex::Real* const shared = gsm.dataPtr();
288 
289  amrex::Array4<amrex::Real> buf(shared, amrex::begin(tbx), amrex::end(tbx), 1);
290 
291  // Zero-initialize the temporary array in shared memory
292  volatile amrex::Real* vs = shared;
293  for (int i = threadIdx.x; i < tbx.numPts(); i += blockDim.x) {
294  vs[i] = 0.0;
295  }
296  __syncthreads();
297 #else
298  amrex::Array4<amrex::Real> const &buf = rho_arr;
299 #endif // defined(AMREX_USE_CUDA) || defined(AMREX_USE_HIP)
300 
301 #if defined(AMREX_USE_CUDA) || defined(AMREX_USE_HIP)
302  // Loop over macroparticles: each threads loops over particles with a stride of `blockDim.x`
303  for (unsigned int ip_orig = bin_start + threadIdx.x; ip_orig < bin_stop; ip_orig += blockDim.x)
304 #endif
305  {
306  const unsigned int ip = permutation[ip_orig];
307 
308  // --- Get particle quantities
309  amrex::Real wq = q*wp[ip]*invvol;
310  if (do_ionization){
311  wq *= ion_lev[ip];
312  }
313 
314  amrex::ParticleReal xp, yp, zp;
315  GetPosition(ip, xp, yp, zp);
316 
317  // --- Compute shape factors
318  Compute_shape_factor< depos_order > const compute_shape_factor;
319 #if defined(WARPX_DIM_XZ) || defined(WARPX_DIM_RZ) || defined(WARPX_DIM_3D)
320  // x direction
321  // Get particle position in grid coordinates
322 #if defined(WARPX_DIM_RZ)
323  const amrex::Real rp = std::sqrt(xp*xp + yp*yp);
324  amrex::Real costheta;
325  amrex::Real sintheta;
326  if (rp > 0.) {
327  costheta = xp/rp;
328  sintheta = yp/rp;
329  } else {
330  costheta = 1._rt;
331  sintheta = 0._rt;
332  }
333  const Complex xy0 = Complex{costheta, sintheta};
334  const amrex::Real x = (rp - xyzmin.x)*dinv.x;
335 #else
336  const amrex::Real x = (xp - xyzmin.x)*dinv.x;
337 #endif
338 
339  // Compute shape factor along x
340  // i: leftmost grid point that the particle touches
341  amrex::Real sx[depos_order + 1] = {0._rt};
342  int i = 0;
343  if (rho_type[0] == NODE) {
344  i = compute_shape_factor(sx, x);
345  } else if (rho_type[0] == CELL) {
346  i = compute_shape_factor(sx, x - 0.5_rt);
347  }
348 #endif //defined(WARPX_DIM_XZ) || defined(WARPX_DIM_RZ) || defined(WARPX_DIM_3D)
349 #if defined(WARPX_DIM_3D)
350  // y direction
351  const amrex::Real y = (yp - xyzmin.y)*dinv.y;
352  amrex::Real sy[depos_order + 1] = {0._rt};
353  int j = 0;
354  if (rho_type[1] == NODE) {
355  j = compute_shape_factor(sy, y);
356  } else if (rho_type[1] == CELL) {
357  j = compute_shape_factor(sy, y - 0.5_rt);
358  }
359 #endif
360  // z direction
361  const amrex::Real z = (zp - xyzmin.z)*dinv.z;
362  amrex::Real sz[depos_order + 1] = {0._rt};
363  int k = 0;
364  if (rho_type[WARPX_ZINDEX] == NODE) {
365  k = compute_shape_factor(sz, z);
366  } else if (rho_type[WARPX_ZINDEX] == CELL) {
367  k = compute_shape_factor(sz, z - 0.5_rt);
368  }
369 
370  // Deposit charge into buf
371 #if defined(WARPX_DIM_1D_Z)
372  for (int iz=0; iz<=depos_order; iz++){
374  &buf(lo.x+k+iz, 0, 0, 0),
375  sz[iz]*wq);
376  }
377 #elif defined(WARPX_DIM_XZ) || defined(WARPX_DIM_RZ)
378  for (int iz=0; iz<=depos_order; iz++){
379  for (int ix=0; ix<=depos_order; ix++){
381  &buf(lo.x+i+ix, lo.y+k+iz, 0, 0),
382  sx[ix]*sz[iz]*wq);
383 #if defined(WARPX_DIM_RZ)
384  Complex xy = xy0; // Throughout the following loop, xy takes the value e^{i m theta}
385  for (int imode=1 ; imode < n_rz_azimuthal_modes ; imode++) {
386  // The factor 2 on the weighting comes from the normalization of the modes
387  amrex::Gpu::Atomic::AddNoRet( &buf(lo.x+i+ix, lo.y+k+iz, 0, 2*imode-1), 2._rt*sx[ix]*sz[iz]*wq*xy.real());
388  amrex::Gpu::Atomic::AddNoRet( &buf(lo.x+i+ix, lo.y+k+iz, 0, 2*imode ), 2._rt*sx[ix]*sz[iz]*wq*xy.imag());
389  xy = xy*xy0;
390  }
391 #endif
392  }
393  }
394 #elif defined(WARPX_DIM_3D)
395  for (int iz=0; iz<=depos_order; iz++){
396  for (int iy=0; iy<=depos_order; iy++){
397  for (int ix=0; ix<=depos_order; ix++){
399  &buf(lo.x+i+ix, lo.y+j+iy, lo.z+k+iz),
400  sx[ix]*sy[iy]*sz[iz]*wq);
401  }
402  }
403  }
404 #endif
405  }
406 
407 #if defined(AMREX_USE_CUDA) || defined(AMREX_USE_HIP)
408  __syncthreads();
409 
410  addLocalToGlobal(tbx, rho_arr, buf);
411 #endif // defined(AMREX_USE_CUDA) || defined(AMREX_USE_HIP)
412  }
413  );
414 }
415 
416 #endif // WARPX_CHARGEDEPOSITION_H_
#define AMREX_GPU_DEVICE
#define AMREX_D_DECL(a, b, c)
void doChargeDepositionShapeN(const GetParticlePosition< PIdx > &GetPosition, const amrex::ParticleReal *const wp, const int *ion_lev, amrex::FArrayBox &rho_fab, long np_to_deposit, const amrex::XDim3 &dinv, const amrex::XDim3 &xyzmin, amrex::Dim3 lo, amrex::Real q, [[maybe_unused]] int n_rz_azimuthal_modes)
Definition: ChargeDeposition.H:38
void doChargeDepositionSharedShapeN(const GetParticlePosition< PIdx > &GetPosition, const amrex::ParticleReal *const wp, const int *ion_lev, amrex::FArrayBox &rho_fab, const amrex::IntVect &ix_type, const long np_to_deposit, const amrex::XDim3 &dinv, const amrex::XDim3 &xyzmin, const amrex::Dim3 lo, const amrex::Real q, [[maybe_unused]]const int n_rz_azimuthal_modes, const amrex::DenseBins< WarpXParticleContainer::ParticleTileType::ParticleTileDataType > &a_bins, const amrex::Box &box, const amrex::Geometry &geom, const amrex::IntVect &a_tbox_max_size, const amrex::IntVect bin_size)
Definition: ChargeDeposition.H:183
#define WARPX_ALWAYS_ASSERT_WITH_MESSAGE(EX, MSG)
Definition: TextMsg.H:13
NODE
AMREX_FORCE_INLINE Array4< Real const > array() const noexcept
const Box & box() const noexcept
AMREX_GPU_HOST_DEVICE BoxND & grow(int i) noexcept
AMREX_GPU_HOST_DEVICE IntVectND< dim > type() const noexcept
AMREX_GPU_HOST_DEVICE Long numPts() const noexcept
index_type * offsetsPtr() noexcept
index_type * permutationPtr() noexcept
Long numBins() const noexcept
GpuArray< Real, AMREX_SPACEDIM > ProbLoArray() const noexcept
const Box & Domain() const noexcept
static std::size_t sharedMemPerBlock() noexcept
AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE void AddNoRet(T *sum, T value) noexcept
gpuStream_t gpuStream() noexcept
constexpr int iz
constexpr int iy
constexpr int ix
AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE BoxND< dim > convert(const BoxND< dim > &b, const IntVectND< dim > &typ) noexcept
void launch(T const &n, L &&f) noexcept
AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE Dim3 end(BoxND< dim > const &box) noexcept
IntVectND< AMREX_SPACEDIM > IntVect
AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE void ignore_unused(const Ts &...)
AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE Dim3 begin(BoxND< dim > const &box) noexcept
std::enable_if_t< std::is_integral_v< T > > ParallelFor(TypeList< CTOs... >, std::array< int, sizeof...(CTOs)> const &runtime_options, T N, F &&f)
AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE int getTileIndex(const IntVect &iv, const Box &box, const bool a_do_tiling, const IntVect &a_tile_size, Box &tbx)
i
Definition: check_interp_points_and_weights.py:174
Definition: ShapeFactors.H:29
AMREX_GPU_HOST_DEVICE constexpr AMREX_FORCE_INLINE T real() const noexcept
AMREX_GPU_HOST_DEVICE constexpr AMREX_FORCE_INLINE T imag() const noexcept
AMREX_GPU_DEVICE T * dataPtr() noexcept