8 #ifndef CURRENTDEPOSITION_H_
9 #define CURRENTDEPOSITION_H_
47 template <
int depos_order>
50 const amrex::ParticleReal yp,
51 const amrex::ParticleReal zp,
52 const amrex::ParticleReal wq,
53 const amrex::ParticleReal vx,
54 const amrex::ParticleReal vy,
55 const amrex::ParticleReal vz,
62 const amrex::Real relative_time,
64 const amrex::Real dxi,
65 const amrex::Real dyi),
67 const amrex::Real
xmin,
68 const amrex::Real ymin),
69 const amrex::Real invvol,
71 const int n_rz_azimuthal_modes)
73 using namespace amrex::literals;
74 #if !defined(WARPX_DIM_RZ)
77 #if defined(WARPX_DIM_1D_Z)
80 #if defined(WARPX_DIM_XZ) || defined(WARPX_DIM_RZ)
84 constexpr
int zdir = WARPX_ZINDEX;
89 #if defined(WARPX_DIM_RZ)
92 const amrex::Real xpmid = xp + relative_time*vx;
93 const amrex::Real ypmid = yp + relative_time*vy;
94 const amrex::Real rpmid = std::sqrt(xpmid*xpmid + ypmid*ypmid);
98 costheta = xpmid/rpmid;
99 sintheta = ypmid/rpmid;
105 const amrex::Real wqx = wq*invvol*(+vx*costheta + vy*sintheta);
106 const amrex::Real wqy = wq*invvol*(-vx*sintheta + vy*costheta);
108 const amrex::Real wqx = wq*invvol*vx;
109 const amrex::Real wqy = wq*invvol*vy;
111 const amrex::Real wqz = wq*invvol*vz;
115 #if (AMREX_SPACEDIM >= 2)
118 #if defined(WARPX_DIM_RZ)
120 const double xmid = (rpmid -
xmin)*dxi;
122 const double xmid = ((xp -
xmin) + relative_time*vx)*dxi;
129 double sx_node[depos_order + 1] = {0.};
130 double sx_cell[depos_order + 1] = {0.};
133 if (jx_type[0] ==
NODE || jy_type[0] ==
NODE || jz_type[0] ==
NODE) {
134 j_node = compute_shape_factor(sx_node, xmid);
136 if (jx_type[0] == CELL || jy_type[0] == CELL || jz_type[0] == CELL) {
137 j_cell = compute_shape_factor(sx_cell, xmid - 0.5);
140 amrex::Real sx_jx[depos_order + 1] = {0._rt};
141 amrex::Real sx_jy[depos_order + 1] = {0._rt};
142 amrex::Real sx_jz[depos_order + 1] = {0._rt};
143 for (
int ix=0; ix<=depos_order; ix++)
145 sx_jx[ix] = ((jx_type[0] ==
NODE) ? amrex::Real(sx_node[ix]) : amrex::Real(sx_cell[ix]));
146 sx_jy[ix] = ((jy_type[0] ==
NODE) ? amrex::Real(sx_node[ix]) : amrex::Real(sx_cell[ix]));
147 sx_jz[ix] = ((jz_type[0] ==
NODE) ? amrex::Real(sx_node[ix]) : amrex::Real(sx_cell[ix]));
150 int const j_jx = ((jx_type[0] ==
NODE) ? j_node : j_cell);
151 int const j_jy = ((jy_type[0] ==
NODE) ? j_node : j_cell);
152 int const j_jz = ((jz_type[0] ==
NODE) ? j_node : j_cell);
155 #if defined(WARPX_DIM_3D)
158 const double ymid = ((yp - ymin) + relative_time*vy)*dyi;
159 double sy_node[depos_order + 1] = {0.};
160 double sy_cell[depos_order + 1] = {0.};
163 if (jx_type[1] ==
NODE || jy_type[1] ==
NODE || jz_type[1] ==
NODE) {
164 k_node = compute_shape_factor(sy_node, ymid);
166 if (jx_type[1] == CELL || jy_type[1] == CELL || jz_type[1] == CELL) {
167 k_cell = compute_shape_factor(sy_cell, ymid - 0.5);
169 amrex::Real sy_jx[depos_order + 1] = {0._rt};
170 amrex::Real sy_jy[depos_order + 1] = {0._rt};
171 amrex::Real sy_jz[depos_order + 1] = {0._rt};
172 for (
int iy=0; iy<=depos_order; iy++)
174 sy_jx[iy] = ((jx_type[1] ==
NODE) ? amrex::Real(sy_node[iy]) : amrex::Real(sy_cell[iy]));
175 sy_jy[iy] = ((jy_type[1] ==
NODE) ? amrex::Real(sy_node[iy]) : amrex::Real(sy_cell[iy]));
176 sy_jz[iy] = ((jz_type[1] ==
NODE) ? amrex::Real(sy_node[iy]) : amrex::Real(sy_cell[iy]));
178 int const k_jx = ((jx_type[1] ==
NODE) ? k_node : k_cell);
179 int const k_jy = ((jy_type[1] ==
NODE) ? k_node : k_cell);
180 int const k_jz = ((jz_type[1] ==
NODE) ? k_node : k_cell);
185 const double zmid = ((zp - zmin) + relative_time*vz)*dzi;
186 double sz_node[depos_order + 1] = {0.};
187 double sz_cell[depos_order + 1] = {0.};
190 if (jx_type[zdir] ==
NODE || jy_type[zdir] ==
NODE || jz_type[zdir] ==
NODE) {
191 l_node = compute_shape_factor(sz_node, zmid);
193 if (jx_type[zdir] == CELL || jy_type[zdir] == CELL || jz_type[zdir] == CELL) {
194 l_cell = compute_shape_factor(sz_cell, zmid - 0.5);
196 amrex::Real sz_jx[depos_order + 1] = {0._rt};
197 amrex::Real sz_jy[depos_order + 1] = {0._rt};
198 amrex::Real sz_jz[depos_order + 1] = {0._rt};
199 for (
int iz=0; iz<=depos_order; iz++)
201 sz_jx[iz] = ((jx_type[zdir] ==
NODE) ? amrex::Real(sz_node[iz]) : amrex::Real(sz_cell[iz]));
202 sz_jy[iz] = ((jy_type[zdir] ==
NODE) ? amrex::Real(sz_node[iz]) : amrex::Real(sz_cell[iz]));
203 sz_jz[iz] = ((jz_type[zdir] ==
NODE) ? amrex::Real(sz_node[iz]) : amrex::Real(sz_cell[iz]));
205 int const l_jx = ((jx_type[zdir] ==
NODE) ? l_node : l_cell);
206 int const l_jy = ((jy_type[zdir] ==
NODE) ? l_node : l_cell);
207 int const l_jz = ((jz_type[zdir] ==
NODE) ? l_node : l_cell);
210 #if defined(WARPX_DIM_1D_Z)
211 for (
int iz=0; iz<=depos_order; iz++){
213 &jx_arr(lo.
x+l_jx+iz, 0, 0, 0),
216 &jy_arr(lo.
x+l_jy+iz, 0, 0, 0),
219 &jz_arr(lo.
x+l_jz+iz, 0, 0, 0),
223 #if defined(WARPX_DIM_XZ) || defined(WARPX_DIM_RZ)
224 for (
int iz=0; iz<=depos_order; iz++){
225 for (
int ix=0; ix<=depos_order; ix++){
227 &jx_arr(lo.
x+j_jx+ix, lo.
y+l_jx+iz, 0, 0),
228 sx_jx[ix]*sz_jx[iz]*wqx);
230 &jy_arr(lo.
x+j_jy+ix, lo.
y+l_jy+iz, 0, 0),
231 sx_jy[ix]*sz_jy[iz]*wqy);
233 &jz_arr(lo.
x+j_jz+ix, lo.
y+l_jz+iz, 0, 0),
234 sx_jz[ix]*sz_jz[iz]*wqz);
235 #if defined(WARPX_DIM_RZ)
237 for (
int imode=1 ; imode < n_rz_azimuthal_modes ; imode++) {
250 #elif defined(WARPX_DIM_3D)
251 for (
int iz=0; iz<=depos_order; iz++){
252 for (
int iy=0; iy<=depos_order; iy++){
253 for (
int ix=0; ix<=depos_order; ix++){
255 &jx_arr(lo.
x+j_jx+ix, lo.
y+k_jx+iy, lo.
z+l_jx+iz),
256 sx_jx[ix]*sy_jx[iy]*sz_jx[iz]*wqx);
258 &jy_arr(lo.
x+j_jy+ix, lo.
y+k_jy+iy, lo.
z+l_jy+iz),
259 sx_jy[ix]*sy_jy[iy]*sz_jy[iz]*wqy);
261 &jz_arr(lo.
x+j_jz+ix, lo.
y+k_jz+iy, lo.
z+l_jz+iz),
262 sx_jz[ix]*sy_jz[iy]*sz_jz[iz]*wqz);
293 template <
int depos_order>
295 const amrex::ParticleReal *
const wp,
296 const amrex::ParticleReal *
const uxp,
297 const amrex::ParticleReal *
const uyp,
298 const amrex::ParticleReal *
const uzp,
304 amrex::Real relative_time,
305 const std::array<amrex::Real,3>&
dx,
306 const std::array<amrex::Real,3>& xyzmin,
309 int n_rz_azimuthal_modes,
311 long load_balance_costs_update_algo)
313 using namespace amrex::literals;
315 #if !defined(WARPX_DIM_RZ)
319 #if !defined(AMREX_USE_GPU)
325 const bool do_ionization = ion_lev;
326 const amrex::Real dzi = 1.0_rt/
dx[2];
327 #if defined(WARPX_DIM_1D_Z)
328 const amrex::Real invvol = dzi;
330 #if defined(WARPX_DIM_XZ) || defined(WARPX_DIM_RZ)
331 const amrex::Real dxi = 1.0_rt/
dx[0];
332 const amrex::Real invvol = dxi*dzi;
333 #elif defined(WARPX_DIM_3D)
334 const amrex::Real dxi = 1.0_rt/
dx[0];
335 const amrex::Real dyi = 1.0_rt/
dx[1];
336 const amrex::Real invvol = dxi*dyi*dzi;
339 #if (AMREX_SPACEDIM >= 2)
340 const amrex::Real
xmin = xyzmin[0];
342 #if defined(WARPX_DIM_3D)
343 const amrex::Real ymin = xyzmin[1];
345 const amrex::Real zmin = xyzmin[2];
357 #if defined(WARPX_USE_GPUCLOCK)
358 amrex::Real* cost_real =
nullptr;
367 #if defined(WARPX_USE_GPUCLOCK)
373 amrex::ParticleReal xp, yp, zp;
374 GetPosition(ip, xp, yp, zp);
377 const amrex::Real gaminv = 1.0_rt/std::sqrt(1.0_rt + uxp[ip]*uxp[ip]*clightsq
378 + uyp[ip]*uyp[ip]*clightsq
379 + uzp[ip]*uzp[ip]*clightsq);
380 const amrex::Real vx = uxp[ip]*gaminv;
381 const amrex::Real vy = uyp[ip]*gaminv;
382 const amrex::Real vz = uzp[ip]*gaminv;
384 amrex::Real wq = q*wp[ip];
389 doDepositionShapeNKernel<depos_order>(xp, yp, zp, wq, vx, vy, vz, jx_arr, jy_arr, jz_arr,
390 jx_type, jy_type, jz_type,
394 invvol, lo, n_rz_azimuthal_modes);
398 #if defined(WARPX_USE_GPUCLOCK)
430 template <
int depos_order>
432 const amrex::ParticleReal *
const wp,
433 const amrex::ParticleReal *
const uxp_n,
434 const amrex::ParticleReal *
const uyp_n,
435 const amrex::ParticleReal *
const uzp_n,
436 const amrex::ParticleReal *
const uxp,
437 const amrex::ParticleReal *
const uyp,
438 const amrex::ParticleReal *
const uzp,
439 const int *
const ion_lev,
443 const long np_to_deposit,
444 const std::array<amrex::Real,3>&
dx,
445 const std::array<amrex::Real,3>& xyzmin,
448 const int n_rz_azimuthal_modes,
450 const long load_balance_costs_update_algo)
452 using namespace amrex::literals;
453 #if !defined(WARPX_DIM_RZ)
457 #if !defined(AMREX_USE_GPU)
463 const bool do_ionization = ion_lev;
464 const amrex::Real dzi = 1.0_rt/
dx[2];
465 #if defined(WARPX_DIM_1D_Z)
466 const amrex::Real invvol = dzi;
468 #if defined(WARPX_DIM_XZ) || defined(WARPX_DIM_RZ)
469 const amrex::Real dxi = 1.0_rt/
dx[0];
470 const amrex::Real invvol = dxi*dzi;
471 #elif defined(WARPX_DIM_3D)
472 const amrex::Real dxi = 1.0_rt/
dx[0];
473 const amrex::Real dyi = 1.0_rt/
dx[1];
474 const amrex::Real invvol = dxi*dyi*dzi;
477 #if (AMREX_SPACEDIM >= 2)
478 const amrex::Real
xmin = xyzmin[0];
480 #if defined(WARPX_DIM_3D)
481 const amrex::Real ymin = xyzmin[1];
483 const amrex::Real zmin = xyzmin[2];
493 #if defined(WARPX_USE_GPUCLOCK)
494 amrex::Real* cost_real =
nullptr;
503 #if defined(WARPX_USE_GPUCLOCK)
509 amrex::ParticleReal xp, yp, zp;
510 GetPosition(ip, xp, yp, zp);
516 const amrex::ParticleReal uxp_np1 = 2._prt*uxp[ip] - uxp_n[ip];
517 const amrex::ParticleReal uyp_np1 = 2._prt*uyp[ip] - uyp_n[ip];
518 const amrex::ParticleReal uzp_np1 = 2._prt*uzp[ip] - uzp_n[ip];
519 const amrex::ParticleReal gamma_n = std::sqrt(1._prt + (uxp_n[ip]*uxp_n[ip] + uyp_n[ip]*uyp_n[ip] + uzp_n[ip]*uzp_n[ip])*inv_c2);
520 const amrex::ParticleReal gamma_np1 = std::sqrt(1._prt + (uxp_np1*uxp_np1 + uyp_np1*uyp_np1 + uzp_np1*uzp_np1)*inv_c2);
521 const amrex::ParticleReal gaminv = 2.0_prt/(gamma_n + gamma_np1);
523 const amrex::Real vx = uxp[ip]*gaminv;
524 const amrex::Real vy = uyp[ip]*gaminv;
525 const amrex::Real vz = uzp[ip]*gaminv;
527 amrex::Real wq = q*wp[ip];
532 const amrex::Real relative_time = 0._rt;
533 doDepositionShapeNKernel<depos_order>(xp, yp, zp, wq, vx, vy, vz, jx_arr, jy_arr, jz_arr,
534 jx_type, jy_type, jz_type,
538 invvol, lo, n_rz_azimuthal_modes);
542 #if defined(WARPX_USE_GPUCLOCK)
576 template <
int depos_order>
578 const amrex::ParticleReal *
const wp,
579 const amrex::ParticleReal *
const uxp,
580 const amrex::ParticleReal *
const uyp,
581 const amrex::ParticleReal *
const uzp,
587 const amrex::Real relative_time,
588 const std::array<amrex::Real,3>&
dx,
589 const std::array<amrex::Real,3>& xyzmin,
592 int n_rz_azimuthal_modes,
594 long load_balance_costs_update_algo,
600 using namespace amrex::literals;
602 #if defined(AMREX_USE_HIP) || defined(AMREX_USE_CUDA)
603 using namespace amrex;
614 constexpr
int zdir = WARPX_ZINDEX;
619 #if defined(WARPX_USE_GPUCLOCK)
620 amrex::Real* cost_real =
nullptr;
628 const auto domain = geom.
Domain();
631 sample_tbox.
grow(depos_order);
639 const int nblocks = a_bins.
numBins();
643 const std::size_t shared_mem_bytes = npts*
sizeof(amrex::Real);
647 "Tile size too big for GPU shared memory current deposition");
654 #if defined(WARPX_USE_GPUCLOCK)
659 const int bin_id = blockIdx.x;
660 const unsigned int bin_start = offsets_ptr[bin_id];
661 const unsigned int bin_stop = offsets_ptr[bin_id+1];
663 if (bin_start == bin_stop) {
return; }
668 ParticleReal xp, yp, zp;
669 GetPosition(permutation[bin_start], xp, yp, zp);
670 #if defined(WARPX_DIM_3D)
671 IntVect iv =
IntVect(
int( amrex::Math::floor((xp-plo[0]) * dxiarr[0]) ),
672 int( amrex::Math::floor((yp-plo[1]) * dxiarr[1]) ),
673 int( amrex::Math::floor((zp-plo[2]) * dxiarr[2]) ));
674 #elif defined(WARPX_DIM_XZ) || defined(WARPX_DIM_RZ)
675 IntVect iv =
IntVect(
int( amrex::Math::floor((xp-plo[0]) * dxiarr[0]) ),
676 int( amrex::Math::floor((zp-plo[1]) * dxiarr[1]) ));
677 #elif defined(WARPX_DIM_1D_Z)
678 IntVect iv =
IntVect(
int( amrex::Math::floor((zp-plo[0]) * dxiarr[0]) ));
680 iv += domain.smallEnd();
684 buffer_box.
grow(depos_order);
690 amrex::Real*
const shared = gsm.
dataPtr();
700 volatile amrex::Real* vs = shared;
701 for (
int i = threadIdx.x;
i < npts;
i += blockDim.x){
705 for (
unsigned int ip_orig = bin_start+threadIdx.x; ip_orig<bin_stop; ip_orig += blockDim.x)
707 const unsigned int ip = permutation[ip_orig];
708 depositComponent<depos_order>(GetPosition, wp, uxp, uyp, uzp, ion_lev, jx_buff, jx_type,
709 relative_time,
dx, xyzmin, lo, q, n_rz_azimuthal_modes,
710 ip, zdir,
NODE, CELL, 0);
714 addLocalToGlobal(tbox_x, jx_arr, jx_buff);
715 for (
int i = threadIdx.x;
i < npts;
i += blockDim.x){
720 for (
unsigned int ip_orig = bin_start+threadIdx.x; ip_orig<bin_stop; ip_orig += blockDim.x)
722 const unsigned int ip = permutation[ip_orig];
723 depositComponent<depos_order>(GetPosition, wp, uxp, uyp, uzp, ion_lev, jy_buff, jy_type,
724 relative_time,
dx, xyzmin, lo, q, n_rz_azimuthal_modes,
725 ip, zdir,
NODE, CELL, 1);
729 addLocalToGlobal(tbox_y, jy_arr, jy_buff);
730 for (
int i = threadIdx.x;
i < npts;
i += blockDim.x){
735 for (
unsigned int ip_orig = bin_start+threadIdx.x; ip_orig<bin_stop; ip_orig += blockDim.x)
737 const unsigned int ip = permutation[ip_orig];
738 depositComponent<depos_order>(GetPosition, wp, uxp, uyp, uzp, ion_lev, jz_buff, jz_type,
739 relative_time,
dx, xyzmin, lo, q, n_rz_azimuthal_modes,
740 ip, zdir,
NODE, CELL, 2);
744 addLocalToGlobal(tbox_z, jz_arr, jz_buff);
746 #if defined(WARPX_USE_GPUCLOCK)
757 ignore_unused(GetPosition, wp, uxp, uyp, uzp, ion_lev, jx_fab, jy_fab, jz_fab, np_to_deposit, relative_time,
dx, xyzmin, lo, q, n_rz_azimuthal_modes, cost, load_balance_costs_update_algo, a_bins, box, geom, a_tbox_max_size);
788 template <
int depos_order>
790 const amrex::ParticleReal *
const wp,
791 const amrex::ParticleReal *
const uxp,
792 const amrex::ParticleReal *
const uyp,
793 const amrex::ParticleReal *
const uzp,
800 amrex::Real relative_time,
801 const std::array<amrex::Real,3>&
dx,
802 std::array<amrex::Real, 3> xyzmin,
805 int n_rz_azimuthal_modes,
806 amrex::Real *
const cost,
807 long load_balance_costs_update_algo)
809 using namespace amrex;
810 using namespace amrex::literals;
812 #if !defined(WARPX_DIM_RZ)
816 #if !defined(AMREX_USE_GPU)
822 bool const do_ionization = ion_lev;
823 #if !defined(WARPX_DIM_1D_Z)
824 Real
const dxi = 1.0_rt /
dx[0];
826 #if !defined(WARPX_DIM_1D_Z)
827 Real
const xmin = xyzmin[0];
829 #if defined(WARPX_DIM_3D)
830 Real
const dyi = 1.0_rt /
dx[1];
831 Real
const ymin = xyzmin[1];
833 Real
const dzi = 1.0_rt /
dx[2];
834 Real
const zmin = xyzmin[2];
836 #if defined(WARPX_DIM_3D)
837 Real
const invdtdx = 1.0_rt / (
dt*
dx[1]*
dx[2]);
838 Real
const invdtdy = 1.0_rt / (
dt*
dx[0]*
dx[2]);
839 Real
const invdtdz = 1.0_rt / (
dt*
dx[0]*
dx[1]);
840 #elif defined(WARPX_DIM_XZ) || defined(WARPX_DIM_RZ)
841 Real
const invdtdx = 1.0_rt / (
dt*
dx[2]);
842 Real
const invdtdz = 1.0_rt / (
dt*
dx[0]);
843 Real
const invvol = 1.0_rt / (
dx[0]*
dx[2]);
844 #elif defined(WARPX_DIM_1D_Z)
845 Real
const invdtdz = 1.0_rt / (
dt*
dx[0]);
846 Real
const invvol = 1.0_rt / (
dx[2]);
849 #if defined(WARPX_DIM_RZ)
854 #if !defined(WARPX_DIM_1D_Z)
855 Real constexpr one_third = 1.0_rt / 3.0_rt;
856 Real constexpr one_sixth = 1.0_rt / 6.0_rt;
860 #if defined(WARPX_USE_GPUCLOCK)
861 amrex::Real* cost_real =
nullptr;
870 #if defined(WARPX_USE_GPUCLOCK)
877 Real
const gaminv = 1.0_rt/std::sqrt(1.0_rt + uxp[ip]*uxp[ip]*clightsq
878 + uyp[ip]*uyp[ip]*clightsq
879 + uzp[ip]*uzp[ip]*clightsq);
887 ParticleReal xp, yp, zp;
888 GetPosition(ip, xp, yp, zp);
890 #if !defined(WARPX_DIM_1D_Z)
891 Real
const wqx = wq*invdtdx;
893 #if defined(WARPX_DIM_3D)
894 Real
const wqy = wq*invdtdy;
896 Real
const wqz = wq*invdtdz;
899 #if defined(WARPX_DIM_RZ)
900 Real
const xp_new = xp + (relative_time + 0.5_rt*
dt)*uxp[ip]*gaminv;
901 Real
const yp_new = yp + (relative_time + 0.5_rt*
dt)*uyp[ip]*gaminv;
902 Real
const xp_mid = xp_new - 0.5_rt*
dt*uxp[ip]*gaminv;
903 Real
const yp_mid = yp_new - 0.5_rt*
dt*uyp[ip]*gaminv;
904 Real
const xp_old = xp_new -
dt*uxp[ip]*gaminv;
905 Real
const yp_old = yp_new -
dt*uyp[ip]*gaminv;
906 Real
const rp_new = std::sqrt(xp_new*xp_new + yp_new*yp_new);
907 Real
const rp_mid = std::sqrt(xp_mid*xp_mid + yp_mid*yp_mid);
908 Real
const rp_old = std::sqrt(xp_old*xp_old + yp_old*yp_old);
909 Real costheta_new, sintheta_new;
910 if (rp_new > 0._rt) {
911 costheta_new = xp_new/rp_new;
912 sintheta_new = yp_new/rp_new;
914 costheta_new = 1._rt;
915 sintheta_new = 0._rt;
917 amrex::Real costheta_mid, sintheta_mid;
918 if (rp_mid > 0._rt) {
919 costheta_mid = xp_mid/rp_mid;
920 sintheta_mid = yp_mid/rp_mid;
922 costheta_mid = 1._rt;
923 sintheta_mid = 0._rt;
925 amrex::Real costheta_old, sintheta_old;
926 if (rp_old > 0._rt) {
927 costheta_old = xp_old/rp_old;
928 sintheta_old = yp_old/rp_old;
930 costheta_old = 1._rt;
931 sintheta_old = 0._rt;
937 double const x_new = (rp_new -
xmin)*dxi;
938 double const x_old = (rp_old -
xmin)*dxi;
940 #if !defined(WARPX_DIM_1D_Z)
942 double const x_new = (xp -
xmin + (relative_time + 0.5_rt*
dt)*uxp[ip]*gaminv)*dxi;
943 double const x_old = x_new -
dt*dxi*uxp[ip]*gaminv;
946 #if defined(WARPX_DIM_3D)
948 double const y_new = (yp - ymin + (relative_time + 0.5_rt*
dt)*uyp[ip]*gaminv)*dyi;
949 double const y_old = y_new -
dt*dyi*uyp[ip]*gaminv;
952 double const z_new = (zp - zmin + (relative_time + 0.5_rt*
dt)*uzp[ip]*gaminv)*dzi;
953 double const z_old = z_new -
dt*dzi*uzp[ip]*gaminv;
955 #if defined(WARPX_DIM_RZ)
956 Real
const vy = (-uxp[ip]*sintheta_mid + uyp[ip]*costheta_mid)*gaminv;
957 #elif defined(WARPX_DIM_XZ)
958 Real
const vy = uyp[ip]*gaminv;
959 #elif defined(WARPX_DIM_1D_Z)
960 Real
const vx = uxp[ip]*gaminv;
961 Real
const vy = uyp[ip]*gaminv;
969 #if !defined(WARPX_DIM_1D_Z)
970 double sx_new[depos_order + 3] = {0.};
971 double sx_old[depos_order + 3] = {0.};
973 #if defined(WARPX_DIM_3D)
975 double sy_new[depos_order + 3] = {0.};
976 double sy_old[depos_order + 3] = {0.};
979 double sz_new[depos_order + 3] = {0.};
980 double sz_old[depos_order + 3] = {0.};
988 #if !defined(WARPX_DIM_1D_Z)
989 const int i_new = compute_shape_factor(sx_new+1, x_new);
990 const int i_old = compute_shifted_shape_factor(sx_old, x_old, i_new);
992 #if defined(WARPX_DIM_3D)
993 const int j_new = compute_shape_factor(sy_new+1, y_new);
994 const int j_old = compute_shifted_shape_factor(sy_old, y_old, j_new);
996 const int k_new = compute_shape_factor(sz_new+1, z_new);
997 const int k_old = compute_shifted_shape_factor(sz_old, z_old, k_new);
1000 #if !defined(WARPX_DIM_1D_Z)
1001 int dil = 1, diu = 1;
1002 if (i_old < i_new) { dil = 0; }
1003 if (i_old > i_new) { diu = 0; }
1005 #if defined(WARPX_DIM_3D)
1006 int djl = 1, dju = 1;
1007 if (j_old < j_new) { djl = 0; }
1008 if (j_old > j_new) { dju = 0; }
1010 int dkl = 1, dku = 1;
1011 if (k_old < k_new) { dkl = 0; }
1012 if (k_old > k_new) { dku = 0; }
1014 #if defined(WARPX_DIM_3D)
1016 for (
int k=dkl; k<=depos_order+2-dku; k++) {
1017 for (
int j=djl; j<=depos_order+2-dju; j++) {
1018 amrex::Real sdxi = 0._rt;
1019 for (
int i=dil;
i<=depos_order+1-diu;
i++) {
1020 sdxi += wqx*(sx_old[
i] - sx_new[
i])*(
1021 one_third*(sy_new[j]*sz_new[k] + sy_old[j]*sz_old[k])
1022 +one_sixth*(sy_new[j]*sz_old[k] + sy_old[j]*sz_new[k]));
1027 for (
int k=dkl; k<=depos_order+2-dku; k++) {
1028 for (
int i=dil;
i<=depos_order+2-diu;
i++) {
1029 amrex::Real sdyj = 0._rt;
1030 for (
int j=djl; j<=depos_order+1-dju; j++) {
1031 sdyj += wqy*(sy_old[j] - sy_new[j])*(
1032 one_third*(sx_new[
i]*sz_new[k] + sx_old[
i]*sz_old[k])
1033 +one_sixth*(sx_new[
i]*sz_old[k] + sx_old[
i]*sz_new[k]));
1038 for (
int j=djl; j<=depos_order+2-dju; j++) {
1039 for (
int i=dil;
i<=depos_order+2-diu;
i++) {
1040 amrex::Real sdzk = 0._rt;
1041 for (
int k=dkl; k<=depos_order+1-dku; k++) {
1042 sdzk += wqz*(sz_old[k] - sz_new[k])*(
1043 one_third*(sx_new[
i]*sy_new[j] + sx_old[
i]*sy_old[j])
1044 +one_sixth*(sx_new[
i]*sy_old[j] + sx_old[
i]*sy_new[j]));
1050 #elif defined(WARPX_DIM_XZ) || defined(WARPX_DIM_RZ)
1052 for (
int k=dkl; k<=depos_order+2-dku; k++) {
1053 amrex::Real sdxi = 0._rt;
1054 for (
int i=dil;
i<=depos_order+1-diu;
i++) {
1055 sdxi += wqx*(sx_old[
i] - sx_new[
i])*0.5_rt*(sz_new[k] + sz_old[k]);
1057 #if defined(WARPX_DIM_RZ)
1059 for (
int imode=1 ; imode < n_rz_azimuthal_modes ; imode++) {
1061 const Complex djr_cmplx = 2._rt *sdxi*xy_mid;
1064 xy_mid = xy_mid*xy_mid0;
1069 for (
int k=dkl; k<=depos_order+2-dku; k++) {
1070 for (
int i=dil;
i<=depos_order+2-diu;
i++) {
1071 Real
const sdyj = wq*vy*invvol*(
1072 one_third*(sx_new[
i]*sz_new[k] + sx_old[
i]*sz_old[k])
1073 +one_sixth*(sx_new[
i]*sz_old[k] + sx_old[
i]*sz_new[k]));
1075 #if defined(WARPX_DIM_RZ)
1080 for (
int imode=1 ; imode < n_rz_azimuthal_modes ; imode++) {
1083 const Complex djt_cmplx = -2._rt * I*(i_new-1 +
i +
xmin*dxi)*wq*invdtdx/(amrex::Real)imode
1084 *(
Complex(sx_new[
i]*sz_new[k], 0._rt)*(xy_new - xy_mid)
1085 +
Complex(sx_old[
i]*sz_old[k], 0._rt)*(xy_mid - xy_old));
1088 xy_new = xy_new*xy_new0;
1089 xy_mid = xy_mid*xy_mid0;
1090 xy_old = xy_old*xy_old0;
1095 for (
int i=dil;
i<=depos_order+2-diu;
i++) {
1097 for (
int k=dkl; k<=depos_order+1-dku; k++) {
1098 sdzk += wqz*(sz_old[k] - sz_new[k])*0.5_rt*(sx_new[
i] + sx_old[
i]);
1100 #if defined(WARPX_DIM_RZ)
1102 for (
int imode=1 ; imode < n_rz_azimuthal_modes ; imode++) {
1104 const Complex djz_cmplx = 2._rt * sdzk * xy_mid;
1107 xy_mid = xy_mid*xy_mid0;
1112 #elif defined(WARPX_DIM_1D_Z)
1114 for (
int k=dkl; k<=depos_order+2-dku; k++) {
1115 amrex::Real
const sdxi = wq*vx*invvol*0.5_rt*(sz_old[k] + sz_new[k]);
1118 for (
int k=dkl; k<=depos_order+2-dku; k++) {
1119 amrex::Real
const sdyj = wq*vy*invvol*0.5_rt*(sz_old[k] + sz_new[k]);
1122 amrex::Real sdzk = 0._rt;
1123 for (
int k=dkl; k<=depos_order+1-dku; k++) {
1124 sdzk += wqz*(sz_old[k] - sz_new[k]);
1130 #if defined(WARPX_USE_GPUCLOCK)
1133 *cost += *cost_real;
1163 template <
int depos_order>
1165 const amrex::ParticleReal *
const yp_n,
1166 const amrex::ParticleReal *
const zp_n,
1168 const amrex::ParticleReal *
const wp,
1169 [[maybe_unused]]
const amrex::ParticleReal *
const uxp_n,
1170 [[maybe_unused]]
const amrex::ParticleReal *
const uyp_n,
1171 [[maybe_unused]]
const amrex::ParticleReal *
const uzp_n,
1172 [[maybe_unused]]
const amrex::ParticleReal *
const uxp_nph,
1173 [[maybe_unused]]
const amrex::ParticleReal *
const uyp_nph,
1174 [[maybe_unused]]
const amrex::ParticleReal *
const uzp_nph,
1175 const int *
const ion_lev,
1179 const long np_to_deposit,
1180 const amrex::Real
dt,
1181 const std::array<amrex::Real,3>&
dx,
1182 const std::array<amrex::Real, 3> xyzmin,
1184 const amrex::Real q,
1185 const int n_rz_azimuthal_modes,
1186 amrex::Real *
const cost,
1187 const long load_balance_costs_update_algo)
1189 using namespace amrex;
1190 #if !defined(WARPX_DIM_RZ)
1194 #if !defined(AMREX_USE_GPU)
1200 bool const do_ionization = ion_lev;
1201 #if !defined(WARPX_DIM_1D_Z)
1202 Real
const dxi = 1.0_rt /
dx[0];
1204 #if !defined(WARPX_DIM_1D_Z)
1205 Real
const xmin = xyzmin[0];
1207 #if defined(WARPX_DIM_3D)
1208 Real
const dyi = 1.0_rt /
dx[1];
1209 Real
const ymin = xyzmin[1];
1211 Real
const dzi = 1.0_rt /
dx[2];
1212 Real
const zmin = xyzmin[2];
1214 #if defined(WARPX_DIM_3D)
1215 Real
const invdtdx = 1.0_rt / (
dt*
dx[1]*
dx[2]);
1216 Real
const invdtdy = 1.0_rt / (
dt*
dx[0]*
dx[2]);
1217 Real
const invdtdz = 1.0_rt / (
dt*
dx[0]*
dx[1]);
1218 #elif defined(WARPX_DIM_XZ) || defined(WARPX_DIM_RZ)
1219 Real
const invdtdx = 1.0_rt / (
dt*
dx[2]);
1220 Real
const invdtdz = 1.0_rt / (
dt*
dx[0]);
1221 Real
const invvol = 1.0_rt / (
dx[0]*
dx[2]);
1222 #elif defined(WARPX_DIM_1D_Z)
1223 Real
const invdtdz = 1.0_rt / (
dt*
dx[0]);
1224 Real
const invvol = 1.0_rt / (
dx[2]);
1227 #if defined(WARPX_DIM_RZ)
1231 #if !defined(WARPX_DIM_1D_Z)
1232 Real constexpr one_third = 1.0_rt / 3.0_rt;
1233 Real constexpr one_sixth = 1.0_rt / 6.0_rt;
1237 #if defined(WARPX_USE_GPUCLOCK)
1238 amrex::Real* cost_real =
nullptr;
1247 #if defined(WARPX_USE_GPUCLOCK)
1253 #if !defined(WARPX_DIM_3D)
1258 const amrex::ParticleReal uxp_np1 = 2._prt*uxp_nph[ip] - uxp_n[ip];
1259 const amrex::ParticleReal uyp_np1 = 2._prt*uyp_nph[ip] - uyp_n[ip];
1260 const amrex::ParticleReal uzp_np1 = 2._prt*uzp_nph[ip] - uzp_n[ip];
1261 const amrex::ParticleReal gamma_n = std::sqrt(1._prt + (uxp_n[ip]*uxp_n[ip] + uyp_n[ip]*uyp_n[ip] + uzp_n[ip]*uzp_n[ip])*inv_c2);
1262 const amrex::ParticleReal gamma_np1 = std::sqrt(1._prt + (uxp_np1*uxp_np1 + uyp_np1*uyp_np1 + uzp_np1*uzp_np1)*inv_c2);
1263 const amrex::ParticleReal gaminv = 2.0_prt/(gamma_n + gamma_np1);
1272 ParticleReal xp_nph, yp_nph, zp_nph;
1273 GetPosition(ip, xp_nph, yp_nph, zp_nph);
1275 #if !defined(WARPX_DIM_1D_Z)
1276 ParticleReal
const xp_np1 = 2._prt*xp_nph - xp_n[ip];
1280 #if defined(WARPX_DIM_3D) || defined(WARPX_DIM_RZ)
1281 ParticleReal
const yp_np1 = 2._prt*yp_nph - yp_n[ip];
1285 ParticleReal
const zp_np1 = 2._prt*zp_nph - zp_n[ip];
1287 #if !defined(WARPX_DIM_1D_Z)
1288 amrex::Real
const wqx = wq*invdtdx;
1290 #if defined(WARPX_DIM_3D)
1291 amrex::Real
const wqy = wq*invdtdy;
1293 amrex::Real
const wqz = wq*invdtdz;
1296 #if defined(WARPX_DIM_RZ)
1297 amrex::Real
const xp_new = xp_np1;
1298 amrex::Real
const yp_new = yp_np1;
1299 amrex::Real
const xp_mid = xp_nph;
1300 amrex::Real
const yp_mid = yp_nph;
1301 amrex::Real
const xp_old = xp_n[ip];
1302 amrex::Real
const yp_old = yp_n[ip];
1303 amrex::Real
const rp_new = std::sqrt(xp_new*xp_new + yp_new*yp_new);
1304 amrex::Real
const rp_old = std::sqrt(xp_old*xp_old + yp_old*yp_old);
1305 amrex::Real
const rp_mid = (rp_new + rp_old)/2._rt;
1306 amrex::Real costheta_new, sintheta_new;
1307 if (rp_new > 0._rt) {
1308 costheta_new = xp_new/rp_new;
1309 sintheta_new = yp_new/rp_new;
1311 costheta_new = 1._rt;
1312 sintheta_new = 0._rt;
1314 amrex::Real costheta_mid, sintheta_mid;
1315 if (rp_mid > 0._rt) {
1316 costheta_mid = xp_mid/rp_mid;
1317 sintheta_mid = yp_mid/rp_mid;
1319 costheta_mid = 1._rt;
1320 sintheta_mid = 0._rt;
1322 amrex::Real costheta_old, sintheta_old;
1323 if (rp_old > 0._rt) {
1324 costheta_old = xp_old/rp_old;
1325 sintheta_old = yp_old/rp_old;
1327 costheta_old = 1._rt;
1328 sintheta_old = 0._rt;
1334 double const x_new = (rp_new -
xmin)*dxi;
1335 double const x_old = (rp_old -
xmin)*dxi;
1337 #if !defined(WARPX_DIM_1D_Z)
1339 double const x_new = (xp_np1 -
xmin)*dxi;
1340 double const x_old = (xp_n[ip] -
xmin)*dxi;
1343 #if defined(WARPX_DIM_3D)
1345 double const y_new = (yp_np1 - ymin)*dyi;
1346 double const y_old = (yp_n[ip] - ymin)*dyi;
1349 double const z_new = (zp_np1 - zmin)*dzi;
1350 double const z_old = (zp_n[ip] - zmin)*dzi;
1352 #if defined(WARPX_DIM_RZ)
1353 amrex::Real
const vy = (-uxp_nph[ip]*sintheta_mid + uyp_nph[ip]*costheta_mid)*gaminv;
1354 #elif defined(WARPX_DIM_XZ)
1355 amrex::Real
const vy = uyp_nph[ip]*gaminv;
1356 #elif defined(WARPX_DIM_1D_Z)
1357 amrex::Real
const vx = uxp_nph[ip]*gaminv;
1358 amrex::Real
const vy = uyp_nph[ip]*gaminv;
1366 #if !defined(WARPX_DIM_1D_Z)
1367 double sx_new[depos_order + 3] = {0.};
1368 double sx_old[depos_order + 3] = {0.};
1370 #if defined(WARPX_DIM_3D)
1372 double sy_new[depos_order + 3] = {0.};
1373 double sy_old[depos_order + 3] = {0.};
1376 double sz_new[depos_order + 3] = {0.};
1377 double sz_old[depos_order + 3] = {0.};
1385 #if !defined(WARPX_DIM_1D_Z)
1386 const int i_new = compute_shape_factor(sx_new+1, x_new);
1387 const int i_old = compute_shifted_shape_factor(sx_old, x_old, i_new);
1389 #if defined(WARPX_DIM_3D)
1390 const int j_new = compute_shape_factor(sy_new+1, y_new);
1391 const int j_old = compute_shifted_shape_factor(sy_old, y_old, j_new);
1393 const int k_new = compute_shape_factor(sz_new+1, z_new);
1394 const int k_old = compute_shifted_shape_factor(sz_old, z_old, k_new);
1397 #if !defined(WARPX_DIM_1D_Z)
1398 int dil = 1, diu = 1;
1399 if (i_old < i_new) { dil = 0; }
1400 if (i_old > i_new) { diu = 0; }
1402 #if defined(WARPX_DIM_3D)
1403 int djl = 1, dju = 1;
1404 if (j_old < j_new) { djl = 0; }
1405 if (j_old > j_new) { dju = 0; }
1407 int dkl = 1, dku = 1;
1408 if (k_old < k_new) { dkl = 0; }
1409 if (k_old > k_new) { dku = 0; }
1411 #if defined(WARPX_DIM_3D)
1413 for (
int k=dkl; k<=depos_order+2-dku; k++) {
1414 for (
int j=djl; j<=depos_order+2-dju; j++) {
1415 amrex::Real sdxi = 0._rt;
1416 for (
int i=dil;
i<=depos_order+1-diu;
i++) {
1417 sdxi += wqx*(sx_old[
i] - sx_new[
i])*(
1418 one_third*(sy_new[j]*sz_new[k] + sy_old[j]*sz_old[k])
1419 +one_sixth*(sy_new[j]*sz_old[k] + sy_old[j]*sz_new[k]));
1424 for (
int k=dkl; k<=depos_order+2-dku; k++) {
1425 for (
int i=dil;
i<=depos_order+2-diu;
i++) {
1426 amrex::Real sdyj = 0._rt;
1427 for (
int j=djl; j<=depos_order+1-dju; j++) {
1428 sdyj += wqy*(sy_old[j] - sy_new[j])*(
1429 one_third*(sx_new[
i]*sz_new[k] + sx_old[
i]*sz_old[k])
1430 +one_sixth*(sx_new[
i]*sz_old[k] + sx_old[
i]*sz_new[k]));
1435 for (
int j=djl; j<=depos_order+2-dju; j++) {
1436 for (
int i=dil;
i<=depos_order+2-diu;
i++) {
1437 amrex::Real sdzk = 0._rt;
1438 for (
int k=dkl; k<=depos_order+1-dku; k++) {
1439 sdzk += wqz*(sz_old[k] - sz_new[k])*(
1440 one_third*(sx_new[
i]*sy_new[j] + sx_old[
i]*sy_old[j])
1441 +one_sixth*(sx_new[
i]*sy_old[j] + sx_old[
i]*sy_new[j]));
1447 #elif defined(WARPX_DIM_XZ) || defined(WARPX_DIM_RZ)
1449 for (
int k=dkl; k<=depos_order+2-dku; k++) {
1450 amrex::Real sdxi = 0._rt;
1451 for (
int i=dil;
i<=depos_order+1-diu;
i++) {
1452 sdxi += wqx*(sx_old[
i] - sx_new[
i])*0.5_rt*(sz_new[k] + sz_old[k]);
1454 #if defined(WARPX_DIM_RZ)
1456 for (
int imode=1 ; imode < n_rz_azimuthal_modes ; imode++) {
1458 const Complex djr_cmplx = 2._rt *sdxi*xy_mid;
1461 xy_mid = xy_mid*xy_mid0;
1466 for (
int k=dkl; k<=depos_order+2-dku; k++) {
1467 for (
int i=dil;
i<=depos_order+2-diu;
i++) {
1468 Real
const sdyj = wq*vy*invvol*(
1469 one_third*(sx_new[
i]*sz_new[k] + sx_old[
i]*sz_old[k])
1470 +one_sixth*(sx_new[
i]*sz_old[k] + sx_old[
i]*sz_new[k]));
1472 #if defined(WARPX_DIM_RZ)
1477 for (
int imode=1 ; imode < n_rz_azimuthal_modes ; imode++) {
1480 const Complex djt_cmplx = -2._rt * I*(i_new-1 +
i +
xmin*dxi)*wq*invdtdx/(amrex::Real)imode
1481 *(
Complex(sx_new[
i]*sz_new[k], 0._rt)*(xy_new - xy_mid)
1482 +
Complex(sx_old[
i]*sz_old[k], 0._rt)*(xy_mid - xy_old));
1485 xy_new = xy_new*xy_new0;
1486 xy_mid = xy_mid*xy_mid0;
1487 xy_old = xy_old*xy_old0;
1492 for (
int i=dil;
i<=depos_order+2-diu;
i++) {
1494 for (
int k=dkl; k<=depos_order+1-dku; k++) {
1495 sdzk += wqz*(sz_old[k] - sz_new[k])*0.5_rt*(sx_new[
i] + sx_old[
i]);
1497 #if defined(WARPX_DIM_RZ)
1499 for (
int imode=1 ; imode < n_rz_azimuthal_modes ; imode++) {
1501 const Complex djz_cmplx = 2._rt * sdzk * xy_mid;
1504 xy_mid = xy_mid*xy_mid0;
1509 #elif defined(WARPX_DIM_1D_Z)
1511 for (
int k=dkl; k<=depos_order+2-dku; k++) {
1512 amrex::Real
const sdxi = wq*vx*invvol*0.5_rt*(sz_old[k] + sz_new[k]);
1515 for (
int k=dkl; k<=depos_order+2-dku; k++) {
1516 amrex::Real
const sdyj = wq*vy*invvol*0.5_rt*(sz_old[k] + sz_new[k]);
1519 amrex::Real sdzk = 0._rt;
1520 for (
int k=dkl; k<=depos_order+1-dku; k++) {
1521 sdzk += wqz*(sz_old[k] - sz_new[k]);
1527 #if defined(WARPX_USE_GPUCLOCK)
1530 *cost += *cost_real;
1565 template <
int depos_order>
1567 const amrex::ParticleReal*
const wp,
1568 const amrex::ParticleReal*
const uxp,
1569 const amrex::ParticleReal*
const uyp,
1570 const amrex::ParticleReal*
const uzp,
1571 const int*
const ion_lev,
1577 amrex::Real relative_time,
1578 const std::array<amrex::Real,3>&
dx,
1579 const std::array<amrex::Real,3>& xyzmin,
1582 int n_rz_azimuthal_modes,
1584 long load_balance_costs_update_algo)
1586 using namespace amrex::literals;
1588 #if defined(WARPX_DIM_RZ)
1590 wp, uxp, uyp, uzp, ion_lev, Dx_fab, Dy_fab, Dz_fab,
1591 np_to_deposit,
dt, relative_time,
dx, xyzmin, lo, q, n_rz_azimuthal_modes);
1595 #if defined(WARPX_DIM_1D_Z)
1597 wp, uxp, uyp, uzp, ion_lev, Dx_fab, Dy_fab, Dz_fab,
1598 np_to_deposit,
dt, relative_time,
dx, xyzmin, lo, q, n_rz_azimuthal_modes);
1602 #if !defined(WARPX_USE_GPUCLOCK)
1606 #if !(defined WARPX_DIM_RZ || defined WARPX_DIM_1D_Z)
1610 const bool do_ionization = ion_lev;
1613 const amrex::Real dxi = 1._rt /
dx[0];
1614 const amrex::Real dzi = 1._rt /
dx[2];
1615 #if defined(WARPX_DIM_3D)
1616 const amrex::Real dyi = 1._rt /
dx[1];
1620 const amrex::Real invdt = 1._rt /
dt;
1623 #if defined(WARPX_DIM_XZ)
1624 const amrex::Real invvol = dxi * dzi;
1625 #elif defined(WARPX_DIM_3D)
1626 const amrex::Real invvol = dxi * dyi * dzi;
1630 const amrex::Real
xmin = xyzmin[0];
1631 const amrex::Real zmin = xyzmin[2];
1632 #if defined(WARPX_DIM_3D)
1633 const amrex::Real ymin = xyzmin[1];
1637 #if defined(WARPX_DIM_3D)
1640 #elif defined(WARPX_DIM_XZ)
1656 #if defined(WARPX_USE_GPUCLOCK)
1657 amrex::Real* cost_real =
nullptr;
1665 #if defined(WARPX_USE_GPUCLOCK)
1672 const amrex::Real invgam = 1._rt / std::sqrt(1._rt + uxp[ip] * uxp[ip] * invcsq
1673 + uyp[ip] * uyp[ip] * invcsq
1674 + uzp[ip] * uzp[ip] * invcsq);
1676 amrex::Real wq = q * wp[ip];
1677 if (do_ionization) { wq *= ion_lev[ip]; }
1680 amrex::ParticleReal xp, yp, zp;
1681 GetPosition(ip, xp, yp, zp);
1684 const amrex::Real vx = uxp[ip] * invgam;
1685 const amrex::Real vy = uyp[ip] * invgam;
1686 const amrex::Real vz = uzp[ip] * invgam;
1689 xp += relative_time * vx;
1690 yp += relative_time * vy;
1691 zp += relative_time * vz;
1694 #if defined(WARPX_DIM_XZ)
1695 const amrex::Real wqy = wq * vy * invvol;
1700 double const x_new = (xp -
xmin + 0.5_rt*
dt*vx) * dxi;
1701 double const x_old = (xp -
xmin - 0.5_rt*
dt*vx) * dxi;
1702 #if defined(WARPX_DIM_3D)
1704 double const y_new = (yp - ymin + 0.5_rt*
dt*vy) * dyi;
1705 double const y_old = (yp - ymin - 0.5_rt*
dt*vy) * dyi;
1708 double const z_new = (zp - zmin + 0.5_rt*
dt*vz) * dzi;
1709 double const z_old = (zp - zmin - 0.5_rt*
dt*vz) * dzi;
1713 double sx_new[depos_order+1] = {0.};
1714 double sx_old[depos_order+1] = {0.};
1715 #if defined(WARPX_DIM_3D)
1717 double sy_new[depos_order+1] = {0.};
1718 double sy_old[depos_order+1] = {0.};
1721 double sz_new[depos_order+1] = {0.};
1722 double sz_old[depos_order+1] = {0.};
1729 const int i_new = compute_shape_factor(sx_new, x_new);
1730 #if defined(WARPX_DIM_3D)
1733 const int j_new = compute_shape_factor(sy_new, y_new);
1737 const int k_new = compute_shape_factor(sz_new, z_new);
1743 const int i_old = compute_shape_factor(sx_old, x_old);
1744 #if defined(WARPX_DIM_3D)
1747 const int j_old = compute_shape_factor(sy_old, y_old);
1751 const int k_old = compute_shape_factor(sz_old, z_old);
1754 #if defined(WARPX_DIM_XZ)
1756 for (
int k=0; k<=depos_order; k++) {
1757 for (
int i=0;
i<=depos_order;
i++) {
1761 auto const sxn_szn =
static_cast<amrex::Real
>(sx_new[
i] * sz_new[k]);
1762 auto const sxo_szn =
static_cast<amrex::Real
>(sx_old[
i] * sz_new[k]);
1763 auto const sxn_szo =
static_cast<amrex::Real
>(sx_new[
i] * sz_old[k]);
1764 auto const sxo_szo =
static_cast<amrex::Real
>(sx_old[
i] * sz_old[k]);
1766 if (i_new == i_old && k_new == k_old) {
1769 wq * invvol * invdt * (sxn_szn - sxo_szo));
1772 wq * invvol * invdt * (sxn_szo - sxo_szn));
1776 wqy * 0.25_rt * (sxn_szn + sxn_szo + sxo_szn + sxo_szo));
1780 wq * invvol * invdt * sxn_szn);
1783 - wq * invvol * invdt * sxo_szo);
1786 wq * invvol * invdt * sxn_szo);
1789 - wq * invvol * invdt * sxo_szn);
1793 wqy * 0.25_rt * sxn_szn);
1796 wqy * 0.25_rt * sxn_szo);
1799 wqy * 0.25_rt * sxo_szn);
1802 wqy * 0.25_rt * sxo_szo);
1808 #elif defined(WARPX_DIM_3D)
1810 for (
int k=0; k<=depos_order; k++) {
1811 for (
int j=0; j<=depos_order; j++) {
1813 auto const syn_szn =
static_cast<amrex::Real
>(sy_new[j] * sz_new[k]);
1814 auto const syo_szn =
static_cast<amrex::Real
>(sy_old[j] * sz_new[k]);
1815 auto const syn_szo =
static_cast<amrex::Real
>(sy_new[j] * sz_old[k]);
1816 auto const syo_szo =
static_cast<amrex::Real
>(sy_old[j] * sz_old[k]);
1818 for (
int i=0;
i<=depos_order;
i++) {
1820 auto const sxn_syn_szn =
static_cast<amrex::Real
>(sx_new[
i]) * syn_szn;
1821 auto const sxo_syn_szn =
static_cast<amrex::Real
>(sx_old[
i]) * syn_szn;
1822 auto const sxn_syo_szn =
static_cast<amrex::Real
>(sx_new[
i]) * syo_szn;
1823 auto const sxo_syo_szn =
static_cast<amrex::Real
>(sx_old[
i]) * syo_szn;
1824 auto const sxn_syn_szo =
static_cast<amrex::Real
>(sx_new[
i]) * syn_szo;
1825 auto const sxo_syn_szo =
static_cast<amrex::Real
>(sx_old[
i]) * syn_szo;
1826 auto const sxn_syo_szo =
static_cast<amrex::Real
>(sx_new[
i]) * syo_szo;
1827 auto const sxo_syo_szo =
static_cast<amrex::Real
>(sx_old[
i]) * syo_szo;
1829 if (i_new == i_old && j_new == j_old && k_new == k_old) {
1832 wq * invvol * invdt * (sxn_syn_szn - sxo_syo_szo));
1835 wq * invvol * invdt * (sxn_syn_szo - sxo_syo_szn));
1838 wq * invvol * invdt * (sxn_syo_szn - sxo_syn_szo));
1841 wq * invvol * invdt * (sxo_syn_szn - sxn_syo_szo));
1845 wq * invvol * invdt * sxn_syn_szn);
1848 - wq * invvol * invdt * sxo_syo_szo);
1851 wq * invvol * invdt * sxn_syn_szo);
1854 - wq * invvol * invdt * sxo_syo_szn);
1857 wq * invvol * invdt * sxn_syo_szn);
1860 - wq * invvol * invdt * sxo_syn_szo);
1863 wq * invvol * invdt * sxo_syn_szn);
1866 - wq * invvol * invdt * sxn_syo_szo);
1874 #if defined(WARPX_DIM_3D)
1877 const amrex::Real t_a = temp_arr(
i,j,k,0);
1878 const amrex::Real t_b = temp_arr(
i,j,k,1);
1879 const amrex::Real t_c = temp_arr(
i,j,k,2);
1880 const amrex::Real t_d = temp_arr(
i,j,k,3);
1881 Dx_arr(
i,j,k) += (1._rt/6._rt)*(2_rt*t_a + t_b + t_c - 2._rt*t_d);
1882 Dy_arr(
i,j,k) += (1._rt/6._rt)*(2_rt*t_a + t_b - 2._rt*t_c + t_d);
1883 Dz_arr(
i,j,k) += (1._rt/6._rt)*(2_rt*t_a - 2._rt*t_b + t_c + t_d);
1885 #elif defined(WARPX_DIM_XZ)
1888 const amrex::Real t_a = temp_arr(
i,j,0,0);
1889 const amrex::Real t_b = temp_arr(
i,j,0,1);
1890 Dx_arr(
i,j,0) += (0.5_rt)*(t_a + t_b);
1891 Dz_arr(
i,j,0) += (0.5_rt)*(t_a - t_b);
1898 # if defined(WARPX_USE_GPUCLOCK)
1901 *cost += *cost_real;
#define AMREX_ALWAYS_ASSERT(EX)
#define AMREX_GPU_HOST_DEVICE
#define AMREX_D_DECL(a, b, c)
void doDepositionShapeN(const GetParticlePosition< PIdx > &GetPosition, const amrex::ParticleReal *const wp, const amrex::ParticleReal *const uxp, const amrex::ParticleReal *const uyp, const amrex::ParticleReal *const uzp, const int *ion_lev, amrex::FArrayBox &jx_fab, amrex::FArrayBox &jy_fab, amrex::FArrayBox &jz_fab, long np_to_deposit, amrex::Real relative_time, const std::array< amrex::Real, 3 > &dx, const std::array< amrex::Real, 3 > &xyzmin, amrex::Dim3 lo, amrex::Real q, int n_rz_azimuthal_modes, amrex::Real *cost, long load_balance_costs_update_algo)
Current Deposition for thread thread_num.
Definition: CurrentDeposition.H:294
AMREX_GPU_HOST_DEVICE AMREX_INLINE void doDepositionShapeNKernel(const amrex::ParticleReal xp, const amrex::ParticleReal yp, const amrex::ParticleReal zp, const amrex::ParticleReal wq, const amrex::ParticleReal vx, const amrex::ParticleReal vy, const amrex::ParticleReal vz, amrex::Array4< amrex::Real > const &jx_arr, amrex::Array4< amrex::Real > const &jy_arr, amrex::Array4< amrex::Real > const &jz_arr, amrex::IntVect const &jx_type, amrex::IntVect const &jy_type, amrex::IntVect const &jz_type, const amrex::Real relative_time, AMREX_D_DECL(const amrex::Real dzi, const amrex::Real dxi, const amrex::Real dyi), AMREX_D_DECL(const amrex::Real zmin, const amrex::Real xmin, const amrex::Real ymin), const amrex::Real invvol, const amrex::Dim3 lo, const int n_rz_azimuthal_modes)
Kernel for the direct current deposition for thread thread_num.
Definition: CurrentDeposition.H:49
void doDepositionSharedShapeN(const GetParticlePosition< PIdx > &GetPosition, const amrex::ParticleReal *const wp, const amrex::ParticleReal *const uxp, const amrex::ParticleReal *const uyp, const amrex::ParticleReal *const uzp, const int *ion_lev, amrex::FArrayBox &jx_fab, amrex::FArrayBox &jy_fab, amrex::FArrayBox &jz_fab, long np_to_deposit, const amrex::Real relative_time, const std::array< amrex::Real, 3 > &dx, const std::array< amrex::Real, 3 > &xyzmin, amrex::Dim3 lo, amrex::Real q, int n_rz_azimuthal_modes, amrex::Real *cost, long load_balance_costs_update_algo, const amrex::DenseBins< WarpXParticleContainer::ParticleType > &a_bins, const amrex::Box &box, const amrex::Geometry &geom, const amrex::IntVect &a_tbox_max_size)
Current Deposition for thread thread_num using shared memory.
Definition: CurrentDeposition.H:577
void doEsirkepovDepositionShapeN(const GetParticlePosition< PIdx > &GetPosition, const amrex::ParticleReal *const wp, const amrex::ParticleReal *const uxp, const amrex::ParticleReal *const uyp, const amrex::ParticleReal *const uzp, const int *ion_lev, const amrex::Array4< amrex::Real > &Jx_arr, const amrex::Array4< amrex::Real > &Jy_arr, const amrex::Array4< amrex::Real > &Jz_arr, long np_to_deposit, amrex::Real dt, amrex::Real relative_time, const std::array< amrex::Real, 3 > &dx, std::array< amrex::Real, 3 > xyzmin, amrex::Dim3 lo, amrex::Real q, int n_rz_azimuthal_modes, amrex::Real *const cost, long load_balance_costs_update_algo)
Esirkepov Current Deposition for thread thread_num.
Definition: CurrentDeposition.H:789
void doChargeConservingDepositionShapeNImplicit(const amrex::ParticleReal *const xp_n, const amrex::ParticleReal *const yp_n, const amrex::ParticleReal *const zp_n, const GetParticlePosition< PIdx > &GetPosition, const amrex::ParticleReal *const wp, [[maybe_unused]]const amrex::ParticleReal *const uxp_n, [[maybe_unused]]const amrex::ParticleReal *const uyp_n, [[maybe_unused]]const amrex::ParticleReal *const uzp_n, [[maybe_unused]]const amrex::ParticleReal *const uxp_nph, [[maybe_unused]]const amrex::ParticleReal *const uyp_nph, [[maybe_unused]]const amrex::ParticleReal *const uzp_nph, const int *const ion_lev, const amrex::Array4< amrex::Real > &Jx_arr, const amrex::Array4< amrex::Real > &Jy_arr, const amrex::Array4< amrex::Real > &Jz_arr, const long np_to_deposit, const amrex::Real dt, const std::array< amrex::Real, 3 > &dx, const std::array< amrex::Real, 3 > xyzmin, const amrex::Dim3 lo, const amrex::Real q, const int n_rz_azimuthal_modes, amrex::Real *const cost, const long load_balance_costs_update_algo)
Esirkepov Current Deposition for thread thread_num for implicit scheme The difference from doEsirkepo...
Definition: CurrentDeposition.H:1164
void doVayDepositionShapeN(const GetParticlePosition< PIdx > &GetPosition, const amrex::ParticleReal *const wp, const amrex::ParticleReal *const uxp, const amrex::ParticleReal *const uyp, const amrex::ParticleReal *const uzp, const int *const ion_lev, amrex::FArrayBox &Dx_fab, amrex::FArrayBox &Dy_fab, amrex::FArrayBox &Dz_fab, long np_to_deposit, amrex::Real dt, amrex::Real relative_time, const std::array< amrex::Real, 3 > &dx, const std::array< amrex::Real, 3 > &xyzmin, amrex::Dim3 lo, amrex::Real q, int n_rz_azimuthal_modes, amrex::Real *cost, long load_balance_costs_update_algo)
Vay current deposition (Vay et al, 2013) for thread thread_num: deposit D in real space and store the...
Definition: CurrentDeposition.H:1566
void doDepositionShapeNImplicit(const GetParticlePosition< PIdx > &GetPosition, const amrex::ParticleReal *const wp, const amrex::ParticleReal *const uxp_n, const amrex::ParticleReal *const uyp_n, const amrex::ParticleReal *const uzp_n, const amrex::ParticleReal *const uxp, const amrex::ParticleReal *const uyp, const amrex::ParticleReal *const uzp, const int *const ion_lev, amrex::FArrayBox &jx_fab, amrex::FArrayBox &jy_fab, amrex::FArrayBox &jz_fab, const long np_to_deposit, const std::array< amrex::Real, 3 > &dx, const std::array< amrex::Real, 3 > &xyzmin, const amrex::Dim3 lo, const amrex::Real q, const int n_rz_azimuthal_modes, amrex::Real *cost, const long load_balance_costs_update_algo)
Direct current deposition for thread thread_num for the implicit scheme The only difference from doDe...
Definition: CurrentDeposition.H:431
#define WARPX_ABORT_WITH_MESSAGE(MSG)
Definition: TextMsg.H:15
#define WARPX_ALWAYS_ASSERT_WITH_MESSAGE(EX, MSG)
Definition: TextMsg.H:13
amrex::GpuComplex< amrex::Real > Complex
Definition: WarpX_Complex.H:22
static amrex::IntVect shared_tilesize
tileSize to use for shared current deposition operations
Definition: WarpX.H:242
static int shared_mem_current_tpb
number of threads to use per block in shared deposition
Definition: WarpX.H:239
Defines a timer object to be used on GPU; measures summed thread cycles.
Definition: KernelTimer.H:27
virtual void free(void *pt)=0
virtual void * alloc(std::size_t sz)=0
AMREX_FORCE_INLINE Array4< Real const > array() const noexcept
const Box & box() const noexcept
AMREX_GPU_HOST_DEVICE Long numPts() const noexcept
AMREX_GPU_HOST_DEVICE Box & grow(int i) noexcept
GpuArray< Real, AMREX_SPACEDIM > InvCellSizeArray() const noexcept
index_type * offsetsPtr() noexcept
index_type * permutationPtr() noexcept
Long numBins() const noexcept
GpuArray< Real, AMREX_SPACEDIM > ProbLoArray() const noexcept
const Box & Domain() const noexcept
static std::size_t sharedMemPerBlock() noexcept
static constexpr auto c
vacuum speed of light [m/s]
Definition: constant.H:44
AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE void AddNoRet(T *sum, T value) noexcept
void streamSynchronize() noexcept
gpuStream_t gpuStream() noexcept
AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE Dim3 end(Box const &box) noexcept
AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE Box convert(const Box &b, const IntVect &typ) noexcept
AMREX_GPU_HOST_DEVICE constexpr AMREX_FORCE_INLINE const T & max(const T &a, const T &b) noexcept
AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE Dim3 begin(Box const &box) noexcept
void launch(T const &n, L &&f) noexcept
Arena * The_Managed_Arena()
std::enable_if_t< std::is_integral< T >::value > ParallelFor(TypeList< CTOs... >, std::array< int, sizeof...(CTOs)> const &runtime_options, T N, F &&f)
AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE void ignore_unused(const Ts &...)
AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE int getTileIndex(const IntVect &iv, const Box &box, const bool a_do_tiling, const IntVect &a_tile_size, Box &tbx)
i
Definition: check_interp_points_and_weights.py:174
float dt
Definition: stencil.py:442
tuple dx
lab frame
Definition: stencil.py:429
xmin
Definition: stencil.py:424
Definition: ShapeFactors.H:29
Definition: ShapeFactors.H:84
@ GpuClock
Definition: WarpXAlgorithmSelection.H:136
AMREX_GPU_HOST_DEVICE constexpr AMREX_FORCE_INLINE T real() const noexcept
AMREX_GPU_HOST_DEVICE constexpr AMREX_FORCE_INLINE T imag() const noexcept
AMREX_GPU_DEVICE T * dataPtr() noexcept