8 #ifndef CHARGEDEPOSITION_H_
9 #define CHARGEDEPOSITION_H_
39 template <
int depos_order>
41 const amrex::ParticleReal *
const wp,
45 const std::array<amrex::Real,3>&
dx,
46 const std::array<amrex::Real, 3> xyzmin,
49 int n_rz_azimuthal_modes,
51 long load_balance_costs_update_algo)
53 using namespace amrex;
55 #if !defined(AMREX_USE_GPU)
61 const bool do_ionization = ion_lev;
62 const amrex::Real dzi = 1.0_rt/
dx[2];
63 #if defined(WARPX_DIM_1D_Z)
64 const amrex::Real invvol = dzi;
66 #if defined(WARPX_DIM_XZ) || defined(WARPX_DIM_RZ)
67 const amrex::Real dxi = 1.0_rt/
dx[0];
68 const amrex::Real invvol = dxi*dzi;
69 #elif defined(WARPX_DIM_3D)
70 const amrex::Real dxi = 1.0_rt/
dx[0];
71 const amrex::Real dyi = 1.0_rt/
dx[1];
72 const amrex::Real invvol = dxi*dyi*dzi;
75 #if defined(WARPX_DIM_XZ) || defined(WARPX_DIM_RZ) || defined(WARPX_DIM_3D)
76 const amrex::Real
xmin = xyzmin[0];
78 #if defined(WARPX_DIM_3D)
79 const amrex::Real ymin = xyzmin[1];
81 const amrex::Real zmin = xyzmin[2];
90 #if defined(WARPX_USE_GPUCLOCK)
91 amrex::Real* cost_real =
nullptr;
100 #if defined(WARPX_USE_GPUCLOCK)
106 amrex::Real wq = q*wp[ip]*invvol;
111 amrex::ParticleReal xp, yp, zp;
112 GetPosition(ip, xp, yp, zp);
116 #if defined(WARPX_DIM_XZ) || defined(WARPX_DIM_RZ) || defined(WARPX_DIM_3D)
119 #if defined(WARPX_DIM_RZ)
120 const amrex::Real rp = std::sqrt(xp*xp + yp*yp);
121 amrex::Real costheta;
122 amrex::Real sintheta;
131 const amrex::Real x = (rp -
xmin)*dxi;
133 const amrex::Real x = (xp -
xmin)*dxi;
138 amrex::Real sx[depos_order + 1] = {0._rt};
140 if (rho_type[0] ==
NODE) {
141 i = compute_shape_factor(sx, x);
142 }
else if (rho_type[0] == CELL) {
143 i = compute_shape_factor(sx, x - 0.5_rt);
146 #if defined(WARPX_DIM_3D)
148 const amrex::Real y = (yp - ymin)*dyi;
149 amrex::Real sy[depos_order + 1] = {0._rt};
151 if (rho_type[1] ==
NODE) {
152 j = compute_shape_factor(sy, y);
153 }
else if (rho_type[1] == CELL) {
154 j = compute_shape_factor(sy, y - 0.5_rt);
158 const amrex::Real z = (zp - zmin)*dzi;
159 amrex::Real sz[depos_order + 1] = {0._rt};
161 if (rho_type[WARPX_ZINDEX] ==
NODE) {
162 k = compute_shape_factor(sz, z);
163 }
else if (rho_type[WARPX_ZINDEX] == CELL) {
164 k = compute_shape_factor(sz, z - 0.5_rt);
168 #if defined(WARPX_DIM_1D_Z)
169 for (
int iz=0; iz<=depos_order; iz++){
171 &rho_arr(lo.
x+k+iz, 0, 0, 0),
175 #if defined(WARPX_DIM_XZ) || defined(WARPX_DIM_RZ)
176 for (
int iz=0; iz<=depos_order; iz++){
177 for (
int ix=0; ix<=depos_order; ix++){
179 &rho_arr(lo.
x+
i+ix, lo.
y+k+iz, 0, 0),
181 #if defined(WARPX_DIM_RZ)
183 for (
int imode=1 ; imode < n_rz_azimuthal_modes ; imode++) {
192 #elif defined(WARPX_DIM_3D)
193 for (
int iz=0; iz<=depos_order; iz++){
194 for (
int iy=0; iy<=depos_order; iy++){
195 for (
int ix=0; ix<=depos_order; ix++){
197 &rho_arr(lo.
x+
i+ix, lo.
y+j+iy, lo.
z+k+iz),
198 sx[ix]*sy[iy]*sz[iz]*wq);
205 #if defined(WARPX_USE_GPUCLOCK)
241 template <
int depos_order>
243 const amrex::ParticleReal *
const wp,
247 const long np_to_deposit,
248 const std::array<amrex::Real,3>&
dx,
249 const std::array<amrex::Real, 3> xyzmin,
252 const int n_rz_azimuthal_modes,
254 const long load_balance_costs_update_algo,
262 using namespace amrex;
266 #if !defined(AMREX_USE_GPU)
267 amrex::ignore_unused(ix_type, cost, load_balance_costs_update_algo, a_bins, box, geom, a_tbox_max_size, bin_size);
272 const bool do_ionization = ion_lev;
273 const amrex::Real dzi = 1.0_rt/
dx[2];
274 #if defined(WARPX_DIM_1D_Z)
275 const amrex::Real invvol = dzi;
277 #if defined(WARPX_DIM_XZ) || defined(WARPX_DIM_RZ)
278 const amrex::Real dxi = 1.0_rt/
dx[0];
279 const amrex::Real invvol = dxi*dzi;
280 #elif defined(WARPX_DIM_3D)
281 const amrex::Real dxi = 1.0_rt/
dx[0];
282 const amrex::Real dyi = 1.0_rt/
dx[1];
283 const amrex::Real invvol = dxi*dyi*dzi;
286 #if defined(WARPX_DIM_XZ) || defined(WARPX_DIM_RZ) || defined(WARPX_DIM_3D)
287 const amrex::Real
xmin = xyzmin[0];
289 #if defined(WARPX_DIM_3D)
290 const amrex::Real ymin = xyzmin[1];
292 const amrex::Real zmin = xyzmin[2];
295 auto rho_box = rho_fab.
box();
302 #if defined(WARPX_USE_GPUCLOCK)
303 amrex::Real* cost_real =
nullptr;
310 #if defined(AMREX_USE_CUDA) || defined(AMREX_USE_HIP)
313 const auto domain = geom.
Domain();
318 sample_tbox_x.
grow(depos_order);
320 const auto npts = sample_tbox_x.
numPts();
322 const int nblocks = a_bins.
numBins();
324 const int threads_per_block = 256;
326 std::size_t shared_mem_bytes = npts*
sizeof(amrex::Real);
331 "Tile size too big for GPU shared memory charge deposition");
334 #if defined(AMREX_USE_CUDA) || defined(AMREX_USE_HIP)
346 #if defined(AMREX_USE_CUDA) || defined(AMREX_USE_HIP)
347 const int bin_id = blockIdx.x;
348 const unsigned int bin_start = offsets_ptr[bin_id];
349 const unsigned int bin_stop = offsets_ptr[bin_id+1];
351 if (bin_start == bin_stop) {
return; }
355 ParticleReal xp, yp, zp;
356 GetPosition(permutation[bin_start], xp, yp, zp);
357 #if defined(WARPX_DIM_3D)
358 IntVect iv =
IntVect(
int(amrex::Math::floor((xp-plo[0])*dxiarr[0])),
359 int(amrex::Math::floor((yp-plo[1])*dxiarr[1])),
360 int(amrex::Math::floor((zp-plo[2])*dxiarr[2])));
361 #elif defined(WARPX_DIM_XZ) || defined(WARPX_DIM_RZ)
363 int(amrex::Math::floor((xp-plo[0])*dxiarr[0])),
364 int(amrex::Math::floor((zp-plo[1])*dxiarr[1])));
365 #elif defined(WARPX_DIM_1D_Z)
366 IntVect iv =
IntVect(
int(amrex::Math::floor((zp-plo[0])*dxiarr[0])));
368 iv += domain.smallEnd();
373 tbx.
grow(depos_order);
376 amrex::Real*
const shared = gsm.
dataPtr();
381 volatile amrex::Real* vs = shared;
382 for (
int i = threadIdx.x;
i < tbx.
numPts();
i += blockDim.x) {
390 #if defined(AMREX_USE_CUDA) || defined(AMREX_USE_HIP)
392 for (
unsigned int ip_orig = bin_start + threadIdx.x; ip_orig < bin_stop; ip_orig += blockDim.x)
395 const unsigned int ip = permutation[ip_orig];
397 #if defined(WARPX_USE_GPUCLOCK)
403 amrex::Real wq = q*wp[ip]*invvol;
408 amrex::ParticleReal xp, yp, zp;
409 GetPosition(ip, xp, yp, zp);
413 #if defined(WARPX_DIM_XZ) || defined(WARPX_DIM_RZ) || defined(WARPX_DIM_3D)
416 #if defined(WARPX_DIM_RZ)
417 const amrex::Real rp = std::sqrt(xp*xp + yp*yp);
418 amrex::Real costheta;
419 amrex::Real sintheta;
428 const amrex::Real x = (rp -
xmin)*dxi;
430 const amrex::Real x = (xp -
xmin)*dxi;
435 amrex::Real sx[depos_order + 1] = {0._rt};
437 if (rho_type[0] ==
NODE) {
438 i = compute_shape_factor(sx, x);
439 }
else if (rho_type[0] == CELL) {
440 i = compute_shape_factor(sx, x - 0.5_rt);
443 #if defined(WARPX_DIM_3D)
445 const amrex::Real y = (yp - ymin)*dyi;
446 amrex::Real sy[depos_order + 1] = {0._rt};
448 if (rho_type[1] ==
NODE) {
449 j = compute_shape_factor(sy, y);
450 }
else if (rho_type[1] == CELL) {
451 j = compute_shape_factor(sy, y - 0.5_rt);
455 const amrex::Real z = (zp - zmin)*dzi;
456 amrex::Real sz[depos_order + 1] = {0._rt};
458 if (rho_type[WARPX_ZINDEX] ==
NODE) {
459 k = compute_shape_factor(sz, z);
460 }
else if (rho_type[WARPX_ZINDEX] == CELL) {
461 k = compute_shape_factor(sz, z - 0.5_rt);
465 #if defined(WARPX_DIM_1D_Z)
466 for (
int iz=0; iz<=depos_order; iz++){
468 &buf(lo.
x+k+iz, 0, 0, 0),
472 #if defined(WARPX_DIM_XZ) || defined(WARPX_DIM_RZ)
473 for (
int iz=0; iz<=depos_order; iz++){
474 for (
int ix=0; ix<=depos_order; ix++){
476 &buf(lo.
x+
i+ix, lo.
y+k+iz, 0, 0),
478 #if defined(WARPX_DIM_RZ)
480 for (
int imode=1 ; imode < n_rz_azimuthal_modes ; imode++) {
489 #elif defined(WARPX_DIM_3D)
490 for (
int iz=0; iz<=depos_order; iz++){
491 for (
int iy=0; iy<=depos_order; iy++){
492 for (
int ix=0; ix<=depos_order; ix++){
494 &buf(lo.
x+
i+ix, lo.
y+j+iy, lo.
z+k+iz),
495 sx[ix]*sy[iy]*sz[iz]*wq);
502 #if defined(AMREX_USE_CUDA) || defined(AMREX_USE_HIP)
505 addLocalToGlobal(tbx, rho_arr, buf);
509 #if defined(WARPX_USE_GPUCLOCK)
#define AMREX_D_DECL(a, b, c)
void doChargeDepositionSharedShapeN(const GetParticlePosition< PIdx > &GetPosition, const amrex::ParticleReal *const wp, const int *ion_lev, amrex::FArrayBox &rho_fab, const amrex::IntVect &ix_type, const long np_to_deposit, const std::array< amrex::Real, 3 > &dx, const std::array< amrex::Real, 3 > xyzmin, const amrex::Dim3 lo, const amrex::Real q, const int n_rz_azimuthal_modes, amrex::Real *cost, const long load_balance_costs_update_algo, const amrex::DenseBins< WarpXParticleContainer::ParticleType > &a_bins, const amrex::Box &box, const amrex::Geometry &geom, const amrex::IntVect &a_tbox_max_size, const amrex::IntVect bin_size)
Definition: ChargeDeposition.H:242
void doChargeDepositionShapeN(const GetParticlePosition< PIdx > &GetPosition, const amrex::ParticleReal *const wp, const int *ion_lev, amrex::FArrayBox &rho_fab, long np_to_depose, const std::array< amrex::Real, 3 > &dx, const std::array< amrex::Real, 3 > xyzmin, amrex::Dim3 lo, amrex::Real q, int n_rz_azimuthal_modes, amrex::Real *cost, long load_balance_costs_update_algo)
Definition: ChargeDeposition.H:40
#define WARPX_ALWAYS_ASSERT_WITH_MESSAGE(EX, MSG)
Definition: TextMsg.H:13
Defines a timer object to be used on GPU; measures summed thread cycles.
Definition: KernelTimer.H:27
virtual void free(void *pt)=0
virtual void * alloc(std::size_t sz)=0
AMREX_FORCE_INLINE Array4< Real const > array() const noexcept
const Box & box() const noexcept
AMREX_GPU_HOST_DEVICE IntVect type() const noexcept
AMREX_GPU_HOST_DEVICE Long numPts() const noexcept
AMREX_GPU_HOST_DEVICE Box & grow(int i) noexcept
GpuArray< Real, AMREX_SPACEDIM > InvCellSizeArray() const noexcept
index_type * offsetsPtr() noexcept
index_type * permutationPtr() noexcept
Long numBins() const noexcept
GpuArray< Real, AMREX_SPACEDIM > ProbLoArray() const noexcept
const Box & Domain() const noexcept
static std::size_t sharedMemPerBlock() noexcept
AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE void AddNoRet(T *sum, T value) noexcept
void streamSynchronize() noexcept
gpuStream_t gpuStream() noexcept
AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE Dim3 end(Box const &box) noexcept
AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE Box convert(const Box &b, const IntVect &typ) noexcept
AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE Dim3 begin(Box const &box) noexcept
void launch(T const &n, L &&f) noexcept
Arena * The_Managed_Arena()
std::enable_if_t< std::is_integral< T >::value > ParallelFor(TypeList< CTOs... >, std::array< int, sizeof...(CTOs)> const &runtime_options, T N, F &&f)
AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE void ignore_unused(const Ts &...)
AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE int getTileIndex(const IntVect &iv, const Box &box, const bool a_do_tiling, const IntVect &a_tile_size, Box &tbx)
i
Definition: check_interp_points_and_weights.py:174
tuple dx
lab frame
Definition: stencil.py:429
xmin
Definition: stencil.py:424
Definition: ShapeFactors.H:29
@ GpuClock
Definition: WarpXAlgorithmSelection.H:125
AMREX_GPU_HOST_DEVICE constexpr AMREX_FORCE_INLINE T real() const noexcept
AMREX_GPU_HOST_DEVICE constexpr AMREX_FORCE_INLINE T imag() const noexcept
AMREX_GPU_DEVICE T * dataPtr() noexcept