WarpX
Loading...
Searching...
No Matches
MassMatricesDeposition.H
Go to the documentation of this file.
1/* Copyright 2019 Axel Huebl, David Grote, Maxence Thevenet
2 * Remi Lehe, Weiqun Zhang, Michael Rowan
3 *
4 * This file is part of WarpX.
5 *
6 * License: BSD-3-Clause-LBNL
7 */
8#ifndef WARPX_MASS_MATRICES_DEPOSITION_H_
9#define WARPX_MASS_MATRICES_DEPOSITION_H_
10
17#include "Utils/TextMsg.H"
19#include "Utils/WarpXConst.H"
20#ifdef WARPX_DIM_RZ
21# include "Utils/WarpX_Complex.H"
22#endif
23
24#include <AMReX.H>
25#include <AMReX_Arena.H>
26#include <AMReX_Array4.H>
27#include <AMReX_Dim3.H>
28#include <AMReX_REAL.H>
29
37#if defined(WARPX_DIM_RZ) || defined(WARPX_DIM_RCYLINDER)
42#endif
52 const amrex::ParticleReal ms,
53 const amrex::ParticleReal dt,
54 const amrex::ParticleReal rhop,
55#if defined(WARPX_DIM_RZ) || defined(WARPX_DIM_RCYLINDER)
56 const amrex::ParticleReal costh,
57 const amrex::ParticleReal sinth,
58#endif
59 const amrex::ParticleReal upx,
60 const amrex::ParticleReal upy,
61 const amrex::ParticleReal upz,
62 const amrex::ParticleReal Bpx,
63 const amrex::ParticleReal Bpy,
64 const amrex::ParticleReal Bpz,
74{
75 using namespace amrex::literals;
76
77 constexpr auto inv_c2 = PhysConst::inv_c2_v<amrex::ParticleReal>;
78
79 // Convert Cartesian B on particle to normalized cyclotron units with dt/2.0
80 const amrex::ParticleReal gamma_bar = std::sqrt(1._prt + (upx*upx + upy*upy + upz*upz)*inv_c2);
81 const amrex::ParticleReal alpha = qs/ms*0.5_prt*dt/gamma_bar;
82 const amrex::ParticleReal bpx = alpha*Bpx;
83 const amrex::ParticleReal bpy = alpha*Bpy;
84 const amrex::ParticleReal bpz = alpha*Bpz;
85
86 const amrex::ParticleReal bpsq = bpx*bpx + bpy*bpy + bpz*bpz;
87 const amrex::ParticleReal arogp = alpha*rhop/(1.0_prt + bpsq);
88
89 // Compute Mass Matrix kernels (non-relativistic for now)
90 fpxx = arogp*(bpx*bpx + 1.0_rt);
91 fpxy = arogp*(bpx*bpy + bpz);
92 fpxz = arogp*(bpx*bpz - bpy);
93
94 fpyx = arogp*(bpy*bpx - bpz);
95 fpyy = arogp*(bpy*bpy + 1.0_rt);
96 fpyz = arogp*(bpy*bpz + bpx);
97
98 fpzx = arogp*(bpz*bpx + bpy);
99 fpzy = arogp*(bpz*bpy - bpx);
100 fpzz = arogp*(bpz*bpz + 1.0_rt);
101
102#if defined(WARPX_DIM_RZ) || defined(WARPX_DIM_RCYLINDER)
103
104 // [fprr fprt fprz] = [ costh sinth 0][fpxx fpxy fpxz][costh -sinth 0]
105 // [fptr fptt fptz] = [-sinth costh 0][fpyx fpyy fpyz][sinth costh 0]
106 // [fpzr fpzt fpzz] = [ 0 0 1][fpzx fpzy fpzz][0 0 1]
107 const amrex::ParticleReal c2 = costh*costh;
108 const amrex::ParticleReal s2 = sinth*sinth;
109 const amrex::ParticleReal cs = costh*sinth;
110 const amrex::ParticleReal fprr = c2*fpxx + cs*(fpxy + fpyx) + s2*fpyy;
111 const amrex::ParticleReal fprt = cs*(fpyy - fpxx) + c2*fpxy - s2*fpyx;
112 const amrex::ParticleReal fprz = costh*fpxz + sinth*fpyz;
113 const amrex::ParticleReal fptr = cs*(fpyy - fpxx) - s2*fpxy + c2*fpyx;
114 const amrex::ParticleReal fptt = s2*fpxx + c2*fpyy - cs*(fpxy + fpyx);
115 const amrex::ParticleReal fptz = -sinth*fpxz + costh*fpyz;
116 const amrex::ParticleReal fpzr = costh*fpzx + sinth*fpzy;
117 const amrex::ParticleReal fpzt = -sinth*fpzx + costh*fpzy;
118
119 // Returned values are named for Cartesian, but they are indeed mapped
120 fpxx = fprr;
121 fpxy = fprt;
122 fpxz = fprz;
123 fpyx = fptr;
124 fpyy = fptt;
125 fpyz = fptz;
126 fpzx = fpzr;
127 fpzy = fpzt;
128#endif
129
130}
131
152template <int depos_order, bool full_mass_matrices, bool deposit_J>
155 [[maybe_unused]] const amrex::ParticleReal yp,
156 [[maybe_unused]] const amrex::ParticleReal zp,
157 const amrex::Real wq_invvol,
158 const amrex::ParticleReal vx,
159 [[maybe_unused]] const amrex::ParticleReal vy,
161 const amrex::ParticleReal fpxx,
162 [[maybe_unused]] const amrex::ParticleReal fpxy,
163 [[maybe_unused]] const amrex::ParticleReal fpxz,
164 [[maybe_unused]] const amrex::ParticleReal fpyx,
165 const amrex::ParticleReal fpyy,
166 [[maybe_unused]] const amrex::ParticleReal fpyz,
167 [[maybe_unused]] const amrex::ParticleReal fpzx,
168 [[maybe_unused]] const amrex::ParticleReal fpzy,
169 const amrex::ParticleReal fpzz,
170 amrex::Array4<amrex::Real> const& jx_arr,
171 [[maybe_unused]] amrex::Array4<amrex::Real> const& jy_arr,
172 amrex::Array4<amrex::Real> const& jz_arr,
173 amrex::Array4<amrex::Real> const& Sxx_arr,
174 [[maybe_unused]] amrex::Array4<amrex::Real> const& Sxy_arr,
175 [[maybe_unused]] amrex::Array4<amrex::Real> const& Sxz_arr,
176 [[maybe_unused]] amrex::Array4<amrex::Real> const& Syx_arr,
177 [[maybe_unused]] amrex::Array4<amrex::Real> const& Syy_arr,
178 [[maybe_unused]] amrex::Array4<amrex::Real> const& Syz_arr,
179 [[maybe_unused]] amrex::Array4<amrex::Real> const& Szx_arr,
180 [[maybe_unused]] amrex::Array4<amrex::Real> const& Szy_arr,
181 amrex::Array4<amrex::Real> const& Szz_arr,
182 const amrex::IntVect& jx_type,
183 const amrex::IntVect& jy_type,
184 const amrex::IntVect& jz_type,
185 const amrex::XDim3& dinv,
186 const amrex::XDim3& xyzmin,
187 const amrex::Dim3 lo)
188{
189 using namespace amrex::literals;
190
191 constexpr int NODE = amrex::IndexType::NODE;
192 constexpr int CELL = amrex::IndexType::CELL;
193
194 // MassMatrices index shift parameter
196
197#if defined(WARPX_DIM_RZ) || defined(WARPX_DIM_RCYLINDER)
198 // In RZ and RCYLINDER, wqx is actually wqr, and wqy is wqtheta
199 // Convert to cylindrical at the mid point
200 const amrex::Real rpmid = std::sqrt(xp*xp + yp*yp);
201 const amrex::Real costheta = (rpmid > 0._rt ? xp/rpmid : 1._rt);
202 const amrex::Real sintheta = (rpmid > 0._rt ? yp/rpmid : 0._rt);
203 const amrex::Real wqx = wq_invvol*(+vx*costheta + vy*sintheta);
204 const amrex::Real wqy = wq_invvol*(-vx*sintheta + vy*costheta);
205 const amrex::Real wqz = wq_invvol*vz;
206#elif defined(WARPX_DIM_RSPHERE)
207 // Convert to cylindrical at the mid point
208 const amrex::Real rpxymid = std::sqrt(xp*xp + yp*yp);
209 const amrex::Real rpmid = std::sqrt(xp*xp + yp*yp + zp*zp);
210 const amrex::Real costheta = (rpxymid > 0._rt ? xp/rpxymid : 1._rt);
211 const amrex::Real sintheta = (rpxymid > 0._rt ? yp/rpxymid : 0._rt);
212 const amrex::Real cosphi = (rpmid > 0._rt ? rpxymid/rpmid : 1._rt);
213 const amrex::Real sinphi = (rpmid > 0._rt ? zp/rpmid : 0._rt);
214 // convert from Cartesian to spherical
215 const amrex::Real wqx = wq_invvol*(+vx*costheta*cosphi + vy*sintheta*cosphi + vz*sinphi);
216 const amrex::Real wqy = wq_invvol*(-vx*sintheta + vy*costheta);
217 const amrex::Real wqz = wq_invvol*(-vx*costheta*sinphi - vy*sintheta*sinphi + vz*cosphi);
218#else
219 const amrex::Real wqx = wq_invvol*vx;
220 const amrex::Real wqy = wq_invvol*vy;
221 const amrex::Real wqz = wq_invvol*vz;
222#endif
223
224 // --- Compute shape factors
225 Compute_shape_factor< depos_order > const compute_shape_factor;
226#if !defined(WARPX_DIM_1D_Z)
227 // x direction
228 // Get particle position after 1/2 push back in position
229 // Keep these double to avoid bug in single precision
230#if defined(WARPX_DIM_RZ) || defined(WARPX_DIM_RCYLINDER) || defined(WARPX_DIM_RSPHERE)
231 const double xmid = (rpmid - xyzmin.x)*dinv.x;
232#else
233 const double xmid = (xp - xyzmin.x)*dinv.x;
234#endif
235
236 // j_j[xyz] leftmost grid point in x that the particle touches for the centering of each current
237 // sx_j[xyz] shape factor along x for the centering of each current
238 // There are only two possible centerings, node or cell centered, so at most only two shape factor
239 // arrays will be needed.
240 // Keep these double to avoid bug in single precision
241 double sx_node[depos_order + 1] = {0.};
242 double sx_cell[depos_order + 1] = {0.};
243 int j_node = 0;
244 int j_cell = 0;
245 if (jx_type[0] == NODE || jy_type[0] == NODE || jz_type[0] == NODE) {
246 j_node = compute_shape_factor(sx_node, xmid);
247 }
248 if (jx_type[0] == CELL || jy_type[0] == CELL || jz_type[0] == CELL) {
249 j_cell = compute_shape_factor(sx_cell, xmid - 0.5);
250 }
251
252 // Set the index shift parameter
253 if (j_node == j_cell) { shift[0] = 1; }
254
255 amrex::Real sx_jx[depos_order + 1] = {0._rt};
256 amrex::Real sx_jy[depos_order + 1] = {0._rt};
257 amrex::Real sx_jz[depos_order + 1] = {0._rt};
258 for (int ix = 0; ix <= depos_order; ix++)
259 {
260 sx_jx[ix] = ((jx_type[0] == NODE) ? amrex::Real(sx_node[ix]) : amrex::Real(sx_cell[ix]));
261 sx_jy[ix] = ((jy_type[0] == NODE) ? amrex::Real(sx_node[ix]) : amrex::Real(sx_cell[ix]));
262 sx_jz[ix] = ((jz_type[0] == NODE) ? amrex::Real(sx_node[ix]) : amrex::Real(sx_cell[ix]));
263 }
264
265 int const j_jx = ((jx_type[0] == NODE) ? j_node : j_cell);
266 int const j_jy = ((jy_type[0] == NODE) ? j_node : j_cell);
267 int const j_jz = ((jz_type[0] == NODE) ? j_node : j_cell);
268#endif
269
270#if defined(WARPX_DIM_3D)
271 // y direction
272 // Keep these double to avoid bug in single precision
273 const double ymid = (yp - xyzmin.y)*dinv.y;
274 double sy_node[depos_order + 1] = {0.};
275 double sy_cell[depos_order + 1] = {0.};
276 int k_node = 0;
277 int k_cell = 0;
278 if (jx_type[1] == NODE || jy_type[1] == NODE || jz_type[1] == NODE) {
279 k_node = compute_shape_factor(sy_node, ymid);
280 }
281 if (jx_type[1] == CELL || jy_type[1] == CELL || jz_type[1] == CELL) {
282 k_cell = compute_shape_factor(sy_cell, ymid - 0.5);
283 }
284
285 // Set the index shift parameter
286 if (k_node == k_cell) { shift[1] = 1; }
287
288 amrex::Real sy_jx[depos_order + 1] = {0._rt};
289 amrex::Real sy_jy[depos_order + 1] = {0._rt};
290 amrex::Real sy_jz[depos_order + 1] = {0._rt};
291 for (int iy = 0; iy <= depos_order; iy++)
292 {
293 sy_jx[iy] = ((jx_type[1] == NODE) ? amrex::Real(sy_node[iy]) : amrex::Real(sy_cell[iy]));
294 sy_jy[iy] = ((jy_type[1] == NODE) ? amrex::Real(sy_node[iy]) : amrex::Real(sy_cell[iy]));
295 sy_jz[iy] = ((jz_type[1] == NODE) ? amrex::Real(sy_node[iy]) : amrex::Real(sy_cell[iy]));
296 }
297 int const k_jx = ((jx_type[1] == NODE) ? k_node : k_cell);
298 int const k_jy = ((jy_type[1] == NODE) ? k_node : k_cell);
299 int const k_jz = ((jz_type[1] == NODE) ? k_node : k_cell);
300#endif
301
302#if !defined(WARPX_DIM_RCYLINDER) && !defined(WARPX_DIM_RSPHERE)
303 // z direction
304 // Keep these double to avoid bug in single precision
305 constexpr int zdir = WARPX_ZINDEX;
306 const double zmid = (zp - xyzmin.z)*dinv.z;
307 double sz_node[depos_order + 1] = {0.};
308 double sz_cell[depos_order + 1] = {0.};
309 int l_node = 0;
310 int l_cell = 0;
311 if (jx_type[zdir] == NODE || jy_type[zdir] == NODE || jz_type[zdir] == NODE) {
312 l_node = compute_shape_factor(sz_node, zmid);
313 }
314 if (jx_type[zdir] == CELL || jy_type[zdir] == CELL || jz_type[zdir] == CELL) {
315 l_cell = compute_shape_factor(sz_cell, zmid - 0.5);
316 }
317 amrex::Real sz_jx[depos_order + 1] = {0._rt};
318 amrex::Real sz_jy[depos_order + 1] = {0._rt};
319 amrex::Real sz_jz[depos_order + 1] = {0._rt};
320 for (int iz = 0; iz <= depos_order; iz++)
321 {
322 sz_jx[iz] = ((jx_type[zdir] == NODE) ? amrex::Real(sz_node[iz]) : amrex::Real(sz_cell[iz]));
323 sz_jy[iz] = ((jy_type[zdir] == NODE) ? amrex::Real(sz_node[iz]) : amrex::Real(sz_cell[iz]));
324 sz_jz[iz] = ((jz_type[zdir] == NODE) ? amrex::Real(sz_node[iz]) : amrex::Real(sz_cell[iz]));
325 }
326 int const l_jx = ((jx_type[zdir] == NODE) ? l_node : l_cell);
327 int const l_jy = ((jy_type[zdir] == NODE) ? l_node : l_cell);
328 int const l_jz = ((jz_type[zdir] == NODE) ? l_node : l_cell);
329
330 // Set the index shift parameter
331 if (l_node==l_cell) { shift[zdir] = 1; }
332
333#endif
334
335 // Compute index offset needed when x and y comps have different location on grid
336 amrex::IntVect offset_xy, offset_xz, offset_yz;
337 for (int dir = 0; dir < AMREX_SPACEDIM; dir++) {
338 offset_xy[dir] = (jx_type[dir] + jy_type[dir]) % 2;
339 offset_xz[dir] = (jx_type[dir] + jz_type[dir]) % 2;
340 offset_yz[dir] = (jy_type[dir] + jz_type[dir]) % 2;
341 }
342
343 // Deposit J and mass matrices
344#if defined(WARPX_DIM_1D_Z)
345 for (int iz = 0; iz <= depos_order; iz++){
346 if constexpr (deposit_J) {
347 amrex::Gpu::Atomic::AddNoRet(&jx_arr(lo.x+l_jx+iz, 0, 0, 0), sz_jx[iz]*wqx);
348 amrex::Gpu::Atomic::AddNoRet(&jy_arr(lo.x+l_jy+iz, 0, 0, 0), sz_jy[iz]*wqy);
349 amrex::Gpu::Atomic::AddNoRet(&jz_arr(lo.x+l_jz+iz, 0, 0, 0), sz_jz[iz]*wqz);
350 }
351
352 // Deposit mass matrices
353 if constexpr (full_mass_matrices) {
354 for (int aa = 0; aa <= depos_order; aa++){
355
356 const int col_base = depos_order + aa - iz;
357 int Nc = 0;
358
359 // Reduced deposit for diagonal mass matrices
360 if (aa <= iz) {
361 Nc = col_base;
362 amrex::Gpu::Atomic::AddNoRet(&Sxx_arr(lo.x+l_jx+iz, 0, 0, Nc), sz_jx[iz]*sz_jx[aa]*fpxx);
363 amrex::Gpu::Atomic::AddNoRet(&Syy_arr(lo.x+l_jy+iz, 0, 0, Nc), sz_jy[iz]*sz_jy[aa]*fpyy);
364 amrex::Gpu::Atomic::AddNoRet(&Szz_arr(lo.x+l_jz+iz, 0, 0, Nc), sz_jz[iz]*sz_jz[aa]*fpzz);
365 }
366
367 // Deposit off-diagonal mass matrices for X-current
368 Nc = col_base + shift[0]*offset_xy[0];
369 amrex::Gpu::Atomic::AddNoRet(&Sxy_arr(lo.x+l_jx+iz, 0, 0, Nc), sz_jx[iz]*sz_jy[aa]*fpxy);
370 Nc = col_base + shift[0]*offset_xz[0];
371 amrex::Gpu::Atomic::AddNoRet(&Sxz_arr(lo.x+l_jx+iz, 0, 0, Nc), sz_jx[iz]*sz_jz[aa]*fpxz);
372
373 // Deposit off-diagonal mass matrices for Y-current
374 Nc = col_base + shift[0]*offset_xy[0];
375 amrex::Gpu::Atomic::AddNoRet(&Syx_arr(lo.x+l_jy+iz, 0, 0, Nc), sz_jy[iz]*sz_jx[aa]*fpyx);
376 Nc = col_base + shift[0]*offset_yz[0];
377 amrex::Gpu::Atomic::AddNoRet(&Syz_arr(lo.x+l_jy+iz, 0, 0, Nc), sz_jy[iz]*sz_jz[aa]*fpyz);
378
379 // Deposit off-diagonal mass matrices for Z-current
380 Nc = col_base + 1 - shift[0]*offset_xz[0];
381 amrex::Gpu::Atomic::AddNoRet(&Szx_arr(lo.x+l_jz+iz, 0, 0, Nc), sz_jz[iz]*sz_jx[aa]*fpzx);
382 Nc = col_base + 1 - shift[0]*offset_yz[0];
383 amrex::Gpu::Atomic::AddNoRet(&Szy_arr(lo.x+l_jz+iz, 0, 0, Nc), sz_jz[iz]*sz_jy[aa]*fpzy);
384 }
385 }
386 else { // Deposit mass matrices for diagonal PC only
387 amrex::Gpu::Atomic::AddNoRet(&Sxx_arr(lo.x+l_jx+iz, 0, 0, 0), sz_jx[iz]*fpxx);
388 amrex::Gpu::Atomic::AddNoRet(&Syy_arr(lo.x+l_jy+iz, 0, 0, 0), sz_jy[iz]*fpyy);
389 amrex::Gpu::Atomic::AddNoRet(&Szz_arr(lo.x+l_jz+iz, 0, 0, 0), sz_jz[iz]*fpzz);
390 }
391 }
392#elif defined(WARPX_DIM_RCYLINDER) || defined(WARPX_DIM_RSPHERE)
393 for (int ix = 0; ix <= depos_order; ix++){
394 if constexpr (deposit_J) {
395 amrex::Gpu::Atomic::AddNoRet(&jx_arr(lo.x+j_jx+ix, 0, 0, 0), sx_jx[ix]*wqx);
396 amrex::Gpu::Atomic::AddNoRet(&jy_arr(lo.x+j_jy+ix, 0, 0, 0), sx_jy[ix]*wqy);
397 amrex::Gpu::Atomic::AddNoRet(&jz_arr(lo.x+j_jz+ix, 0, 0, 0), sx_jz[ix]*wqz);
398 }
399 // Deposit mass matrices
400 if constexpr (full_mass_matrices) {
401 for (int aa = 0; aa <= depos_order; aa++) {
402
403 int col_base = depos_order + aa - ix;
404 int Nc;
405
406 // Reduced deposit for diagonal mass matrices
407 if (aa <= ix) {
408 Nc = col_base;
409 amrex::Gpu::Atomic::AddNoRet(&Sxx_arr(lo.x+j_jx+ix, 0, 0, Nc), sx_jx[ix]*sx_jx[aa]*fpxx);
410 amrex::Gpu::Atomic::AddNoRet(&Syy_arr(lo.x+j_jy+ix, 0, 0, Nc), sx_jy[ix]*sx_jy[aa]*fpyy);
411 amrex::Gpu::Atomic::AddNoRet(&Szz_arr(lo.x+j_jz+ix, 0, 0, Nc), sx_jz[ix]*sx_jz[aa]*fpzz);
412 }
413
414 // Deposit off-diagonal mass matrices for X-current
415 Nc = col_base + 1 - shift[0]*offset_xy[0];
416 amrex::Gpu::Atomic::AddNoRet(&Sxy_arr(lo.x+j_jx+ix, 0, 0, Nc), sx_jx[ix]*sx_jy[aa]*fpxy);
417 Nc = col_base + 1 - shift[0]*offset_xz[0];
418 amrex::Gpu::Atomic::AddNoRet(&Sxz_arr(lo.x+j_jx+ix, 0, 0, Nc), sx_jx[ix]*sx_jz[aa]*fpxz);
419
420 // Deposit off-diagonal mass matrices for Y-current
421 Nc = col_base + shift[0]*offset_xy[0];
422 amrex::Gpu::Atomic::AddNoRet(&Syx_arr(lo.x+j_jy+ix, 0, 0, Nc), sx_jy[ix]*sx_jx[aa]*fpyx);
423 Nc = col_base + shift[0]*offset_yz[0];
424 amrex::Gpu::Atomic::AddNoRet(&Syz_arr(lo.x+j_jy+ix, 0, 0, Nc), sx_jy[ix]*sx_jz[aa]*fpyz);
425
426 // Deposit off-diagonal mass matrices for Z-current
427 Nc = col_base + shift[0]*offset_xz[0];
428 amrex::Gpu::Atomic::AddNoRet(&Szx_arr(lo.x+j_jz+ix, 0, 0, Nc), sx_jz[ix]*sx_jx[aa]*fpzx);
429 Nc = col_base + shift[0]*offset_yz[0];
430 amrex::Gpu::Atomic::AddNoRet(&Szy_arr(lo.x+j_jz+ix, 0, 0, Nc), sx_jz[ix]*sx_jy[aa]*fpzy);
431
432 }
433 }
434 else { // Deposit mass matrices for diagonal PC only
435 amrex::Gpu::Atomic::AddNoRet(&Sxx_arr(lo.x + j_jx + ix, 0, 0, 0), sx_jx[ix]*fpxx);
436 amrex::Gpu::Atomic::AddNoRet(&Syy_arr(lo.x + j_jy + ix, 0, 0, 0), sx_jy[ix]*fpyy);
437 amrex::Gpu::Atomic::AddNoRet(&Szz_arr(lo.x + j_jz + ix, 0, 0, 0), sx_jz[ix]*fpzz);
438 }
439 }
440#elif defined(WARPX_DIM_XZ) || defined(WARPX_DIM_RZ)
441 const int base_offset = 1 + 2*depos_order;
442
443 for (int iz = 0; iz <= depos_order; iz++){
444
445 for (int ix = 0; ix <= depos_order; ix++){
446
447 const amrex::Real weight_Jx = sx_jx[ix]*sz_jx[iz];
448 const amrex::Real weight_Jy = sx_jy[ix]*sz_jy[iz];
449 const amrex::Real weight_Jz = sx_jz[ix]*sz_jz[iz];
450
451 if constexpr (deposit_J) {
452 amrex::Gpu::Atomic::AddNoRet(&jx_arr(lo.x+j_jx+ix, lo.y+l_jx+iz, 0, 0), weight_Jx*wqx);
453 amrex::Gpu::Atomic::AddNoRet(&jy_arr(lo.x+j_jy+ix, lo.y+l_jy+iz, 0, 0), weight_Jy*wqy);
454 amrex::Gpu::Atomic::AddNoRet(&jz_arr(lo.x+j_jz+ix, lo.y+l_jz+iz, 0, 0), weight_Jz*wqz);
455 }
456
457 // Deposit mass matrices
458 if constexpr (full_mass_matrices) {
459
460 const int Ncomp0 = 1 + 2*depos_order;
461
462 for (int bb = 0; bb <= depos_order; bb++){
463
464 const int row_base = depos_order + bb - iz;
465
466 for (int aa = 0; aa <= depos_order; aa++){
467 const int col_base = depos_order + aa - ix;
468 const amrex::Real weight_Ex = sx_jx[aa]*sz_jx[bb];
469 const amrex::Real weight_Ey = sx_jy[aa]*sz_jy[bb];
470 const amrex::Real weight_Ez = sx_jz[aa]*sz_jz[bb];
471
472 int offset;
473 int Nc;
474
475 // Reduced deposit for diagonal mass matrices
476 if (col_base <= Ncomp0 - row_base) {
477 offset = base_offset;
478 Nc = col_base + row_base*offset;
480 &Sxx_arr(lo.x+j_jx+ix, lo.y+l_jx+iz, 0, Nc),
481 weight_Jx*weight_Ex*fpxx);
483 &Syy_arr(lo.x+j_jy+ix, lo.y+l_jy+iz, 0, Nc),
484 weight_Jy*weight_Ey*fpyy);
486 &Szz_arr(lo.x+j_jz+ix, lo.y+l_jz+iz, 0, Nc),
487 weight_Jz*weight_Ez*fpzz);
488 }
489
490 // Deposit off-diagonal mass matrices for X-current
491 offset = base_offset + offset_xy[0];
492 Nc = col_base + 1 - shift[0]*offset_xy[0]
493 + (row_base + shift[1]*offset_xy[1])*offset;
495 &Sxy_arr(lo.x+j_jx+ix, lo.y+l_jx+iz, 0, Nc),
496 weight_Jx*weight_Ey*fpxy);
497 offset = base_offset + offset_xz[0];
498 Nc = col_base + 1 - shift[0]*offset_xz[0]
499 + (row_base + shift[1]*offset_xz[1])*offset;
501 &Sxz_arr(lo.x+j_jx+ix, lo.y+l_jx+iz, 0, Nc),
502 weight_Jx*weight_Ez*fpxz);
503
504 // Deposit off-diagonal mass matrices for Y-current
505 offset = base_offset + offset_xy[0];
506 Nc = col_base + shift[0]*offset_xy[0]
507 + (row_base + shift[1]*offset_xy[1])*offset;
509 &Syx_arr(lo.x+j_jy+ix, lo.y+l_jy+iz, 0, Nc),
510 weight_Jy*weight_Ex*fpyx);
511 offset = base_offset + offset_yz[0];
512 Nc = col_base + shift[0]*offset_yz[0]
513 + (row_base + shift[1]*offset_yz[1])*offset;
515 &Syz_arr(lo.x+j_jy+ix, lo.y+l_jy+iz, 0, Nc),
516 weight_Jy*weight_Ez*fpyz);
517
518 // Deposit off-diagonal mass matrices for Z-current
519 offset = base_offset + offset_xz[0];
520 Nc = col_base + shift[0]*offset_xz[0]
521 + (row_base + 1 - shift[1]*offset_xz[1])*offset;
523 &Szx_arr(lo.x+j_jz+ix, lo.y+l_jz+iz, 0, Nc),
524 weight_Jz*weight_Ex*fpzx);
525 offset = base_offset + offset_yz[0];
526 Nc = col_base + shift[0]*offset_yz[0]
527 + (row_base + 1 - shift[1]*offset_yz[1])*offset;
529 &Szy_arr(lo.x+j_jz+ix, lo.y+l_jz+iz, 0, Nc),
530 weight_Jz*weight_Ey*fpzy);
531 }
532
533 }
534
535 }
536 else { // Deposit mass matrices for diagonal PC only
537 amrex::Gpu::Atomic::AddNoRet(&Syy_arr(lo.x+j_jy+ix, lo.y+l_jy+iz, 0, 0), weight_Jy*fpyy);
538 amrex::Gpu::Atomic::AddNoRet(&Sxx_arr(lo.x+j_jx+ix, lo.y+l_jx+iz, 0, 0), weight_Jx*fpxx);
539 amrex::Gpu::Atomic::AddNoRet(&Szz_arr(lo.x+j_jz+ix, lo.y+l_jz+iz, 0, 0), weight_Jz*fpzz);
540 }
541 }
542 }
543#elif defined(WARPX_DIM_3D)
544 for (int iz = 0; iz <= depos_order; iz++){
545 for (int iy = 0; iy <= depos_order; iy++){
546 for (int ix = 0; ix <= depos_order; ix++){
547 const amrex::Real weight_Jx = sx_jx[ix]*sy_jx[iy]*sz_jx[iz];
548 const amrex::Real weight_Jy = sx_jy[ix]*sy_jy[iy]*sz_jy[iz];
549 const amrex::Real weight_Jz = sx_jz[ix]*sy_jz[iy]*sz_jz[iz];
550
551 if constexpr (deposit_J) {
552 amrex::Gpu::Atomic::AddNoRet(&jx_arr(lo.x+j_jx+ix, lo.y+k_jx+iy, lo.z+l_jx+iz), weight_Jx*wqx);
553 amrex::Gpu::Atomic::AddNoRet(&jy_arr(lo.x+j_jy+ix, lo.y+k_jy+iy, lo.z+l_jy+iz), weight_Jy*wqy);
554 amrex::Gpu::Atomic::AddNoRet(&jz_arr(lo.x+j_jz+ix, lo.y+k_jz+iy, lo.z+l_jz+iz), weight_Jz*wqz);
555 }
556
557 // Deposit mass matrices
558 if constexpr (full_mass_matrices) {
559 // Should not be here. Full mass matrices not yet implemented in 3D
560 }
561 else { // Deposit mass matrices for diagonal PC only
562 amrex::Gpu::Atomic::AddNoRet(&Sxx_arr(lo.x+j_jx+ix, lo.y+k_jx+iy, lo.z+l_jx+iz, 0), weight_Jx*fpxx);
563 amrex::Gpu::Atomic::AddNoRet(&Syy_arr(lo.x+j_jy+ix, lo.y+k_jy+iy, lo.z+l_jy+iz, 0), weight_Jy*fpyy);
564 amrex::Gpu::Atomic::AddNoRet(&Szz_arr(lo.x+j_jz+ix, lo.y+k_jz+iy, lo.z+l_jz+iz, 0), weight_Jz*fpzz);
565 }
566 }
567 }
568 }
569#endif
570}
571
598template <int depos_order, bool full_mass_matrices>
600 [[maybe_unused]] const int* nsuborbits,
601 const amrex::ParticleReal* wp,
602 const amrex::ParticleReal* uxp_n,
603 const amrex::ParticleReal* uyp_n,
604 const amrex::ParticleReal* uzp_n,
605 const amrex::ParticleReal* uxp_nph,
606 const amrex::ParticleReal* uyp_nph,
607 const amrex::ParticleReal* uzp_nph,
608 amrex::Array4<amrex::Real> const& Sxx_arr,
609 amrex::Array4<amrex::Real> const& Sxy_arr,
610 amrex::Array4<amrex::Real> const& Sxz_arr,
611 amrex::Array4<amrex::Real> const& Syx_arr,
612 amrex::Array4<amrex::Real> const& Syy_arr,
613 amrex::Array4<amrex::Real> const& Syz_arr,
614 amrex::Array4<amrex::Real> const& Szx_arr,
615 amrex::Array4<amrex::Real> const& Szy_arr,
616 amrex::Array4<amrex::Real> const& Szz_arr,
617 const amrex::IntVect& jx_type,
618 const amrex::IntVect& jy_type,
619 const amrex::IntVect& jz_type,
620 GetExternalEBField const & getExternalEB,
621 const amrex::ParticleReal Bx_ext,
622 const amrex::ParticleReal By_ext,
623 const amrex::ParticleReal Bz_ext,
627 const amrex::IndexType Bx_type,
628 const amrex::IndexType By_type,
629 const amrex::IndexType Bz_type,
630 const long np_to_deposit,
631 const amrex::Real dt,
632 const amrex::XDim3& dinv,
633 const amrex::XDim3& xyzmin,
634 const amrex::Dim3 lo,
635 const amrex::Real qs,
636 const amrex::Real ms)
637{
638 using namespace amrex::literals;
639
640 const amrex::Real invvol = dinv.x*dinv.y*dinv.z;
641
642 enum exteb_flags : int { no_exteb, has_exteb };
643 const int exteb_runtime_flag = getExternalEB.isNoOp() ? no_exteb : has_exteb;
644
645 // Loop over particles and deposit mass matrices
648 {exteb_runtime_flag},
649 np_to_deposit,
650 [=] AMREX_GPU_DEVICE (long const ip, auto exteb_control) {
651
652 // Skip mass matrix deposition for particles with suborbits.
653 if (nsuborbits && nsuborbits[ip] > 1) { return; }
654
655 amrex::ParticleReal xp_nph, yp_nph, zp_nph;
656 GetPosition(ip, xp_nph, yp_nph, zp_nph);
657
658 // Initialize B on particle to uniform external B
659 amrex::ParticleReal Bxp = Bx_ext;
660 amrex::ParticleReal Byp = By_ext;
661 amrex::ParticleReal Bzp = Bz_ext;
662
663 // Increment with externally applied B-field with time and spatial variation
664 [[maybe_unused]] const auto& getExternalEB_tmp = getExternalEB;
665 if constexpr (exteb_control == has_exteb) {
666 amrex::ParticleReal Exp = 0._prt;
667 amrex::ParticleReal Eyp = 0._prt;
668 amrex::ParticleReal Ezp = 0._prt;
669 getExternalEB(ip, Exp, Eyp, Ezp, Bxp, Byp, Bzp);
670 }
671
672 // Gather magnetic field from the grid
673 doDirectGatherVectorField</*depos_order_perp=*/depos_order,/*depos_order_para=*/depos_order>(
674 xp_nph, yp_nph, zp_nph,
675 Bxp, Byp, Bzp,
676 Bx_arr, By_arr, Bz_arr,
677 Bx_type, By_type, Bz_type,
678 dinv, xyzmin, lo, /*n_rz_azimuthal_modes=*/0 );
679
680 // Compute inverse Lorentz factor, the average of gamma at time levels n and n+1
681 const amrex::ParticleReal gaminv = GetImplicitGammaInverse(uxp_n[ip], uyp_n[ip], uzp_n[ip],
682 uxp_nph[ip], uyp_nph[ip], uzp_nph[ip]);
683
684 // Compute current density kernels to deposit
685 const amrex::Real wq_invvol = qs*wp[ip]*invvol;
686 const amrex::Real rhop = wq_invvol*gaminv;
687 const amrex::Real vx = uxp_nph[ip]*gaminv;
688 const amrex::Real vy = uyp_nph[ip]*gaminv;
689 const amrex::Real vz = uzp_nph[ip]*gaminv;
690
691#if defined(WARPX_DIM_RZ) || defined(WARPX_DIM_RCYLINDER)
692 amrex::Real const rp_mid = std::sqrt(xp_nph*xp_nph + yp_nph*yp_nph);
693 amrex::Real const costh = (rp_mid > 0._rt ? xp_nph/rp_mid : 1._rt);
694 amrex::Real const sinth = (rp_mid > 0._rt ? yp_nph/rp_mid : 0._rt);
695#endif
696
697 // Set the Mass Matrices kernels
698 amrex::ParticleReal fpxx, fpxy, fpxz;
699 amrex::ParticleReal fpyx, fpyy, fpyz;
700 amrex::ParticleReal fpzx, fpzy, fpzz;
701 setMassMatricesKernels(qs, ms, dt, rhop,
702#if defined(WARPX_DIM_RZ) || defined(WARPX_DIM_RCYLINDER)
703 costh, sinth,
704#endif
705 uxp_nph[ip], uyp_nph[ip], uzp_nph[ip],
706 Bxp, Byp, Bzp,
707 fpxx, fpxy, fpxz,
708 fpyx, fpyy, fpyz,
709 fpzx, fpzy, fpzz);
710
711 // Pass dummy arrays for Jx, Jy, Jz (which will not be used)
712 amrex::Array4<amrex::Real> const dummy_Jx{};
713 amrex::Array4<amrex::Real> const dummy_Jy{};
714 amrex::Array4<amrex::Real> const dummy_Jz{};
715
716 doDirectJandSigmaDepositionKernel<depos_order,full_mass_matrices,/*deposit_J=*/false>(
717 xp_nph, yp_nph, zp_nph,
718 wq_invvol, vx, vy, vz,
719 fpxx, fpxy, fpxz,
720 fpyx, fpyy, fpyz,
721 fpzx, fpzy, fpzz,
722 dummy_Jx, dummy_Jy, dummy_Jz,
723 Sxx_arr, Sxy_arr, Sxz_arr,
724 Syx_arr, Syy_arr, Syz_arr,
725 Szx_arr, Szy_arr, Szz_arr,
726 jx_type, jy_type, jz_type,
727 dinv, xyzmin, lo );
728
729 }
730 );
731}
732
758template <int depos_order, bool full_mass_matrices, bool deposit_J>
761 [[maybe_unused]] const amrex::ParticleReal yp_old,
762 [[maybe_unused]] const amrex::ParticleReal zp_old,
763 [[maybe_unused]] const amrex::ParticleReal xp_new,
764 [[maybe_unused]] const amrex::ParticleReal yp_new,
765 [[maybe_unused]] const amrex::ParticleReal zp_new,
766 const amrex::ParticleReal wq_invvol,
767 [[maybe_unused]] const amrex::ParticleReal uxp_mid,
768 [[maybe_unused]] const amrex::ParticleReal uyp_mid,
769 [[maybe_unused]] const amrex::ParticleReal uzp_mid,
770 [[maybe_unused]] const amrex::ParticleReal gaminv,
771 const amrex::ParticleReal fpxx,
772 [[maybe_unused]] const amrex::ParticleReal fpxy,
773 [[maybe_unused]] const amrex::ParticleReal fpxz,
774 [[maybe_unused]] const amrex::ParticleReal fpyx,
775 const amrex::ParticleReal fpyy,
776 [[maybe_unused]] const amrex::ParticleReal fpyz,
777 [[maybe_unused]] const amrex::ParticleReal fpzx,
778 [[maybe_unused]] const amrex::ParticleReal fpzy,
779 const amrex::ParticleReal fpzz,
780 amrex::Array4<amrex::Real> const& Jx_arr,
781 [[maybe_unused]] amrex::Array4<amrex::Real> const& Jy_arr,
782 amrex::Array4<amrex::Real> const& Jz_arr,
783 [[maybe_unused]] int max_crossings,
784 amrex::Array4<amrex::Real> const& Sxx_arr,
785 [[maybe_unused]] amrex::Array4<amrex::Real> const& Sxy_arr,
786 [[maybe_unused]] amrex::Array4<amrex::Real> const& Sxz_arr,
787 [[maybe_unused]] amrex::Array4<amrex::Real> const& Syx_arr,
788 amrex::Array4<amrex::Real> const& Syy_arr,
789 [[maybe_unused]] amrex::Array4<amrex::Real> const& Syz_arr,
790 [[maybe_unused]] amrex::Array4<amrex::Real> const& Szx_arr,
791 [[maybe_unused]] amrex::Array4<amrex::Real> const& Szy_arr,
792 amrex::Array4<amrex::Real> const& Szz_arr,
793 const amrex::Real dt,
794 const amrex::XDim3& dinv,
795 const amrex::XDim3& xyzmin,
796 const amrex::GpuArray<amrex::GpuArray<double,2>, AMREX_SPACEDIM> & domain_double,
797 const amrex::GpuArray<amrex::GpuArray<bool,2>, AMREX_SPACEDIM> & do_cropping,
798 const amrex::Dim3 lo)
799{
800
801 using namespace amrex::literals;
802
803#if (AMREX_SPACEDIM > 1)
804 amrex::Real constexpr one_third = 1.0_rt / 3.0_rt;
805 amrex::Real constexpr one_sixth = 1.0_rt / 6.0_rt;
806#endif
807
808 // computes current and old position in grid units
809#if defined(WARPX_DIM_RZ) || defined(WARPX_DIM_RCYLINDER)
810 amrex::Real const rp_new = std::sqrt(xp_new*xp_new + yp_new*yp_new);
811 amrex::Real const rp_old = std::sqrt(xp_old*xp_old + yp_old*yp_old);
812
813 // Keep these double to avoid bug in single precision
814 double x_new = (rp_new - xyzmin.x)*dinv.x;
815 double const x_old = (rp_old - xyzmin.x)*dinv.x;
816 amrex::Real const vx = (rp_new - rp_old)/dt;
817 amrex::Real const xp_mid = (xp_new + xp_old)*0.5_rt;
818 amrex::Real const yp_mid = (yp_new + yp_old)*0.5_rt;
819 amrex::Real const rp_mid = (rp_new + rp_old)/2._rt;
820 amrex::Real const costheta_mid = (rp_mid > 0._rt ? xp_mid/rp_mid : 1._rt);
821 amrex::Real const sintheta_mid = (rp_mid > 0._rt ? yp_mid/rp_mid : 0._rt);
822 amrex::Real const vy = (-uxp_mid*sintheta_mid + uyp_mid*costheta_mid)*gaminv;
823#if defined(WARPX_DIM_RCYLINDER)
824 amrex::Real const vz = uzp_mid*gaminv;
825#endif
826#elif defined(WARPX_DIM_RSPHERE)
827 amrex::Real const xp_mid = (xp_new + xp_old)*0.5_rt;
828 amrex::Real const yp_mid = (yp_new + yp_old)*0.5_rt;
829 amrex::Real const zp_mid = (zp_new + zp_old)*0.5_rt;
830 amrex::Real const rpxy_new = std::sqrt(xp_new*xp_new + yp_new*yp_new);
831 amrex::Real const rp_new = std::sqrt(xp_new*xp_new + yp_new*yp_new + zp_new*zp_new);
832 amrex::Real const rpxy_old = std::sqrt(xp_old*xp_old + yp_old*yp_old);
833 amrex::Real const rp_old = std::sqrt(xp_old*xp_old + yp_old*yp_old + zp_old*zp_old);
834 amrex::Real const rpxy_mid = (rpxy_new + rpxy_old)*0.5_rt;
835 amrex::Real const rp_mid = (rp_new + rp_old)*0.5_rt;
836 amrex::Real const costheta_mid = (rpxy_mid > 0._rt ? xp_mid/rpxy_mid : 1._rt);
837 amrex::Real const sintheta_mid = (rpxy_mid > 0._rt ? yp_mid/rpxy_mid : 0._rt);
838 amrex::Real const cosphi_mid = (rp_mid > 0._rt ? rpxy_mid/rp_mid : 1._rt);
839 amrex::Real const sinphi_mid = (rp_mid > 0._rt ? zp_mid/rp_mid : 0._rt);
840
841 // Keep these double to avoid bug in single precision
842 double x_new = (rp_new - xyzmin.x)*dinv.x;
843 double const x_old = (rp_old - xyzmin.x)*dinv.x;
844 amrex::Real const vx = (rp_new - rp_old)/dt;
845 amrex::Real const vy = (-uxp_mid*sintheta_mid + uyp_mid*costheta_mid)*gaminv;
846 amrex::Real const vz = (-uxp_mid*costheta_mid*cosphi_mid - uyp_mid*sintheta_mid*cosphi_mid + uzp_mid*sinphi_mid)*gaminv;
847#elif defined(WARPX_DIM_XZ)
848 // Keep these double to avoid bug in single precision
849 double x_new = (xp_new - xyzmin.x)*dinv.x;
850 double const x_old = (xp_old - xyzmin.x)*dinv.x;
851 amrex::Real const vx = (xp_new - xp_old)/dt;
852 amrex::Real const vy = uyp_mid*gaminv;
853#elif defined(WARPX_DIM_1D_Z)
854 amrex::Real const vx = uxp_mid*gaminv;
855 amrex::Real const vy = uyp_mid*gaminv;
856#elif defined(WARPX_DIM_3D)
857 // Keep these double to avoid bug in single precision
858 double x_new = (xp_new - xyzmin.x)*dinv.x;
859 double const x_old = (xp_old - xyzmin.x)*dinv.x;
860 double y_new = (yp_new - xyzmin.y)*dinv.y;
861 double const y_old = (yp_old - xyzmin.y)*dinv.y;
862 amrex::Real const vx = (xp_new - xp_old)/dt;
863 amrex::Real const vy = (yp_new - yp_old)/dt;
864#endif
865
866#if !defined(WARPX_DIM_RCYLINDER) && !defined(WARPX_DIM_RSPHERE)
867 // Keep these double to avoid bug in single precision
868 double z_new = (zp_new - xyzmin.z)*dinv.z;
869 double const z_old = (zp_old - xyzmin.z)*dinv.z;
870 amrex::Real const vz = (zp_new - zp_old)/dt;
871#endif
872
873 // Define velocity kernels to deposit
874 amrex::Real const wqx = wq_invvol*vx;
875 amrex::Real const wqy = wq_invvol*vy;
876 amrex::Real const wqz = wq_invvol*vz;
877
878 // Compute total change in particle position (always do before cropping)
879#if !defined(WARPX_DIM_1D_Z)
880 const double dxp = x_new - x_old;
881#endif
882#if defined(WARPX_DIM_3D)
883 const double dyp = y_new - y_old;
884#endif
885#if !defined(WARPX_DIM_RCYLINDER) && !defined(WARPX_DIM_RSPHERE)
886 const double dzp = z_new - z_old;
887#endif
888
889 // Crop particle orbits at absorbing domain boundaries
890#if defined(WARPX_DIM_3D)
891 ParticleUtils::crop_at_boundary(x_old, y_old, z_old, x_new, y_new, z_new,
892 domain_double[0][0], domain_double[0][1], do_cropping[0]);
893 ParticleUtils::crop_at_boundary(y_old, z_old, x_old, y_new, z_new, x_new,
894 domain_double[1][0], domain_double[1][1], do_cropping[1]);
895 ParticleUtils::crop_at_boundary(z_old, x_old, y_old, z_new, x_new, y_new,
896 domain_double[2][0], domain_double[2][1], do_cropping[2]);
897#elif defined(WARPX_DIM_XZ) || defined(WARPX_DIM_RZ)
898 ParticleUtils::crop_at_boundary(x_old, z_old, x_new, z_new,
899 domain_double[0][0], domain_double[0][1], do_cropping[0]);
900 ParticleUtils::crop_at_boundary(z_old, x_old, z_new, x_new,
901 domain_double[1][0], domain_double[1][1], do_cropping[1]);
902#elif defined(WARPX_DIM_1D_Z)
903 ParticleUtils::crop_at_boundary(z_new, domain_double[0][0], domain_double[0][1], do_cropping[0]);
904#elif defined(WARPX_DIM_RCYLINDER) || defined(WARPX_DIM_RSPHERE)
905 ParticleUtils::crop_at_boundary(x_new, domain_double[0][0], domain_double[0][1], do_cropping[0]);
906#endif
907
908 // 1) Determine the number of segments.
909 // 2) Loop over segments and deposit current.
910
911 // cell crossings are defined at cell edges if depos_order is odd
912 // cell crossings are defined at cell centers if depos_order is even
913
914 int num_segments = 1;
915 double shift = 0.0;
916 if ( (depos_order % 2) == 0 ) { shift = 0.5; }
917
918#if defined(WARPX_DIM_3D)
919
920 // compute cell crossings in X-direction
921 const auto i_old = static_cast<int>(x_old-shift);
922 const auto i_new = static_cast<int>(x_new-shift);
923 const int cell_crossings_x = std::abs(i_new-i_old);
924 num_segments += cell_crossings_x;
925
926 // compute cell crossings in Y-direction
927 const auto j_old = static_cast<int>(y_old-shift);
928 const auto j_new = static_cast<int>(y_new-shift);
929 const int cell_crossings_y = std::abs(j_new-j_old);
930 num_segments += cell_crossings_y;
931
932 // compute cell crossings in Z-direction
933 const auto k_old = static_cast<int>(z_old-shift);
934 const auto k_new = static_cast<int>(z_new-shift);
935 const int cell_crossings_z = std::abs(k_new-k_old);
936 num_segments += cell_crossings_z;
937
938 // Compute initial particle cell locations in each direction
939 // used to find the position at cell crossings.
940 // Keep these double to avoid bug in single precision
941 const auto dirX_sign = static_cast<double>(dxp < 0. ? -1. : 1.);
942 const auto dirY_sign = static_cast<double>(dyp < 0. ? -1. : 1.);
943 const auto dirZ_sign = static_cast<double>(dzp < 0. ? -1. : 1.);
944 double Xcell = 0., Ycell = 0., Zcell = 0.;
945 if (num_segments > 1) {
946 Xcell = static_cast<double>(i_old) + shift + 0.5*(1.-dirX_sign);
947 Ycell = static_cast<double>(j_old) + shift + 0.5*(1.-dirY_sign);
948 Zcell = static_cast<double>(k_old) + shift + 0.5*(1.-dirZ_sign);
949 }
950
951 // loop over the number of segments and deposit
952 const Compute_shape_factor< depos_order-1 > compute_shape_factor_cell;
953 const Compute_shape_factor_pair< depos_order > compute_shape_factors_node;
954 double dxp_seg, dyp_seg, dzp_seg;
955 double x0_new, y0_new, z0_new;
956 double x0_old = x_old;
957 double y0_old = y_old;
958 double z0_old = z_old;
959
960 for (int ns = 0; ns < num_segments; ns++) {
961
962 if (ns == num_segments-1) { // final segment
963
964 x0_new = x_new;
965 y0_new = y_new;
966 z0_new = z_new;
967 dxp_seg = x0_new - x0_old;
968 dyp_seg = y0_new - y0_old;
969 dzp_seg = z0_new - z0_old;
970
971 }
972 else {
973
974 x0_new = Xcell + dirX_sign;
975 y0_new = Ycell + dirY_sign;
976 z0_new = Zcell + dirZ_sign;
977 dxp_seg = x0_new - x0_old;
978 dyp_seg = y0_new - y0_old;
979 dzp_seg = z0_new - z0_old;
980
981 if ( (dyp == 0. || std::abs(dxp_seg) < std::abs(dxp/dyp*dyp_seg))
982 && (dzp == 0. || std::abs(dxp_seg) < std::abs(dxp/dzp*dzp_seg)) ) {
983 Xcell = x0_new;
984 dyp_seg = dyp/dxp*dxp_seg;
985 dzp_seg = dzp/dxp*dxp_seg;
986 y0_new = y0_old + dyp_seg;
987 z0_new = z0_old + dzp_seg;
988 }
989 else if (dzp == 0. || std::abs(dyp_seg) < std::abs(dyp/dzp*dzp_seg)) {
990 Ycell = y0_new;
991 dxp_seg = dxp/dyp*dyp_seg;
992 dzp_seg = dzp/dyp*dyp_seg;
993 x0_new = x0_old + dxp_seg;
994 z0_new = z0_old + dzp_seg;
995 }
996 else {
997 Zcell = z0_new;
998 dxp_seg = dxp/dzp*dzp_seg;
999 dyp_seg = dyp/dzp*dzp_seg;
1000 x0_new = x0_old + dxp_seg;
1001 y0_new = y0_old + dyp_seg;
1002 }
1003
1004 }
1005
1006 // Compute the segment factors (each equal to dt_seg/dt for nonzero dxp, dyp, or dzp)
1007 const auto seg_factor_x = static_cast<amrex::Real>(dxp == 0. ? 1._rt : dxp_seg/dxp);
1008 const auto seg_factor_y = static_cast<amrex::Real>(dyp == 0. ? 1._rt : dyp_seg/dyp);
1009 const auto seg_factor_z = static_cast<amrex::Real>(dzp == 0. ? 1._rt : dzp_seg/dzp);
1010
1011 // Compute cell-based weights using the average segment position
1012 // Keep these double to avoid bug in single precision
1013 double sx_cell[depos_order] = {0.};
1014 double sy_cell[depos_order] = {0.};
1015 double sz_cell[depos_order] = {0.};
1016 double const x0_bar = (x0_new + x0_old)/2.0;
1017 double const y0_bar = (y0_new + y0_old)/2.0;
1018 double const z0_bar = (z0_new + z0_old)/2.0;
1019 const int i0_cell = compute_shape_factor_cell( sx_cell, x0_bar-0.5 );
1020 const int j0_cell = compute_shape_factor_cell( sy_cell, y0_bar-0.5 );
1021 const int k0_cell = compute_shape_factor_cell( sz_cell, z0_bar-0.5 );
1022
1023 if constexpr (depos_order >= 3) { // higher-order correction to the cell-based weights
1024 const Compute_shape_factor_pair<depos_order-1> compute_shape_factors_cell;
1025 double sx_old_cell[depos_order] = {0.};
1026 double sx_new_cell[depos_order] = {0.};
1027 double sy_old_cell[depos_order] = {0.};
1028 double sy_new_cell[depos_order] = {0.};
1029 double sz_old_cell[depos_order] = {0.};
1030 double sz_new_cell[depos_order] = {0.};
1031 const int i0_cell_2 = compute_shape_factors_cell( sx_old_cell, sx_new_cell, x0_old-0.5, x0_new-0.5 );
1032 const int j0_cell_2 = compute_shape_factors_cell( sy_old_cell, sy_new_cell, y0_old-0.5, y0_new-0.5 );
1033 const int k0_cell_2 = compute_shape_factors_cell( sz_old_cell, sz_new_cell, z0_old-0.5, z0_new-0.5 );
1034 amrex::ignore_unused(i0_cell_2, j0_cell_2, k0_cell_2);
1035 for (int m = 0; m < depos_order; m++) {
1036 sx_cell[m] = (4.0*sx_cell[m] + sx_old_cell[m] + sx_new_cell[m])/6.0;
1037 sy_cell[m] = (4.0*sy_cell[m] + sy_old_cell[m] + sy_new_cell[m])/6.0;
1038 sz_cell[m] = (4.0*sz_cell[m] + sz_old_cell[m] + sz_new_cell[m])/6.0;
1039 }
1040 }
1041
1042 // Compute node-based weights using the old and new segment positions
1043 // Keep these double to avoid bug in single precision
1044 double sx_old_node[depos_order+1] = {0.};
1045 double sx_new_node[depos_order+1] = {0.};
1046 double sy_old_node[depos_order+1] = {0.};
1047 double sy_new_node[depos_order+1] = {0.};
1048 double sz_old_node[depos_order+1] = {0.};
1049 double sz_new_node[depos_order+1] = {0.};
1050 const int i0_node = compute_shape_factors_node( sx_old_node, sx_new_node, x0_old, x0_new );
1051 const int j0_node = compute_shape_factors_node( sy_old_node, sy_new_node, y0_old, y0_new );
1052 const int k0_node = compute_shape_factors_node( sz_old_node, sz_new_node, z0_old, z0_new );
1053
1054 // deposit Jx and Sxx for this segment
1055 amrex::Real weight;
1056 for (int i = 0; i <= depos_order - 1; i++) {
1057 for (int j = 0; j <= depos_order; j++) {
1058 for (int k = 0; k <= depos_order; k++) {
1059 weight = sx_cell[i]*( sy_old_node[j]*sz_old_node[k]*one_third
1060 + sy_old_node[j]*sz_new_node[k]*one_sixth
1061 + sy_new_node[j]*sz_old_node[k]*one_sixth
1062 + sy_new_node[j]*sz_new_node[k]*one_third )*seg_factor_x;
1063 if constexpr (deposit_J) {
1064 amrex::Gpu::Atomic::AddNoRet( &Jx_arr(lo.x+i0_cell+i, lo.y+j0_node+j, lo.z+k0_node+k), wqx*weight);
1065 }
1066 amrex::Gpu::Atomic::AddNoRet( &Sxx_arr(lo.x+i0_cell+i, lo.y+j0_node+j, lo.z+k0_node+k, 0), fpxx*weight*weight);
1067 }
1068 }
1069 }
1070
1071 // deposit Jy and Syy or this segment
1072 for (int i = 0; i <= depos_order; i++) {
1073 for (int j = 0; j <= depos_order - 1; j++) {
1074 for (int k = 0; k <= depos_order; k++) {
1075 weight = sy_cell[j]*( sx_old_node[i]*sz_old_node[k]*one_third
1076 + sx_old_node[i]*sz_new_node[k]*one_sixth
1077 + sx_new_node[i]*sz_old_node[k]*one_sixth
1078 + sx_new_node[i]*sz_new_node[k]*one_third )*seg_factor_y;
1079 if constexpr (deposit_J) {
1080 amrex::Gpu::Atomic::AddNoRet( &Jy_arr(lo.x+i0_node+i, lo.y+j0_cell+j, lo.z+k0_node+k), wqy*weight);
1081 }
1082 amrex::Gpu::Atomic::AddNoRet( &Syy_arr(lo.x+i0_node+i, lo.y+j0_cell+j, lo.z+k0_node+k, 0), fpyy*weight*weight);
1083 }
1084 }
1085 }
1086
1087 // deposit Jz and Sz for this segment
1088 for (int i = 0; i <= depos_order; i++) {
1089 for (int j = 0; j <= depos_order; j++) {
1090 for (int k = 0; k <= depos_order - 1; k++) {
1091 weight = sz_cell[k]*( sx_old_node[i]*sy_old_node[j]*one_third
1092 + sx_old_node[i]*sy_new_node[j]*one_sixth
1093 + sx_new_node[i]*sy_old_node[j]*one_sixth
1094 + sx_new_node[i]*sy_new_node[j]*one_third )*seg_factor_z;
1095 if constexpr (deposit_J) {
1096 amrex::Gpu::Atomic::AddNoRet( &Jz_arr(lo.x+i0_node+i, lo.y+j0_node+j, lo.z+k0_cell+k), wqz*weight);
1097 }
1098 amrex::Gpu::Atomic::AddNoRet( &Szz_arr(lo.x+i0_node+i, lo.y+j0_node+j, lo.z+k0_cell+k, 0), fpzz*weight*weight);
1099 }
1100 }
1101 }
1102
1103 // update old segment values
1104 if (ns < num_segments-1) {
1105 x0_old = x0_new;
1106 y0_old = y0_new;
1107 z0_old = z0_new;
1108 }
1109
1110 } // end loop over segments
1111
1112#elif defined(WARPX_DIM_XZ) || defined(WARPX_DIM_RZ)
1113
1114 // compute cell crossings in X-direction
1115 const auto i_old = static_cast<int>(x_old-shift);
1116 const auto i_new = static_cast<int>(x_new-shift);
1117 const int cell_crossings_x = std::abs(i_new-i_old);
1118 num_segments += cell_crossings_x;
1119
1120 // compute cell crossings in Z-direction
1121 const auto k_old = static_cast<int>(z_old-shift);
1122 const auto k_new = static_cast<int>(z_new-shift);
1123 const int cell_crossings_z = std::abs(k_new-k_old);
1124 num_segments += cell_crossings_z;
1125
1126 // Compute initial particle cell locations in each direction
1127 // used to find the position at cell crossings.
1128 // Keep these double to avoid bug in single precision
1129 const auto dirX_sign = static_cast<double>(dxp < 0. ? -1. : 1.);
1130 const auto dirZ_sign = static_cast<double>(dzp < 0. ? -1. : 1.);
1131 double Xcell = 0., Zcell = 0.;
1132 if (num_segments > 1) {
1133 Xcell = static_cast<double>(i_old) + shift + 0.5*(1.-dirX_sign);
1134 Zcell = static_cast<double>(k_old) + shift + 0.5*(1.-dirZ_sign);
1135 }
1136
1137 // loop over the number of segments and deposit
1138 const Compute_shape_factor< depos_order-1 > compute_shape_factor_cell;
1139 const Compute_shape_factor_pair< depos_order > compute_shape_factors_node;
1140 double dxp_seg, dzp_seg;
1141 double x0_new, z0_new;
1142 double x0_old = x_old;
1143 double z0_old = z_old;
1144
1145 constexpr int num_segments_max = 1 + 4*AMREX_SPACEDIM;
1146 AMREX_ALWAYS_ASSERT_WITH_MESSAGE( num_segments <= num_segments_max,
1147 "Error: num_segments must be less than or equal to 1 + 4*AMREX_SPACEDIM.");
1148
1149 // Save the start index and interpolation weights for each segment
1150 int i0_cell[num_segments_max];
1151 int i0_node[num_segments_max];
1152 int k0_cell[num_segments_max];
1153 int k0_node[num_segments_max];
1154 amrex::Real weight_cellX_nodeZ[num_segments_max][depos_order][depos_order+1];
1155 amrex::Real weight_nodeX_cellZ[num_segments_max][depos_order+1][depos_order];
1156 amrex::Real weight_nodeX_nodeZ[num_segments_max][depos_order+1][depos_order+1];
1157
1158 const auto i_mid = static_cast<int>(0.5*(x_new+x_old)-shift);
1159 const auto k_mid = static_cast<int>(0.5*(z_new+z_old)-shift);
1160 int SegNumX[num_segments_max];
1161 int SegNumZ[num_segments_max];
1162
1163 for (int ns = 0; ns < num_segments; ns++) {
1164
1165 if (ns == num_segments-1) { // final segment
1166
1167 x0_new = x_new;
1168 z0_new = z_new;
1169 dxp_seg = x0_new - x0_old;
1170 dzp_seg = z0_new - z0_old;
1171
1172 }
1173 else {
1174
1175 x0_new = Xcell + dirX_sign;
1176 z0_new = Zcell + dirZ_sign;
1177 dxp_seg = x0_new - x0_old;
1178 dzp_seg = z0_new - z0_old;
1179
1180 if (dzp == 0. || std::abs(dxp_seg) < std::abs(dxp/dzp*dzp_seg)) {
1181 Xcell = x0_new;
1182 dzp_seg = dzp/dxp*dxp_seg;
1183 z0_new = z0_old + dzp_seg;
1184 }
1185 else {
1186 Zcell = z0_new;
1187 dxp_seg = dxp/dzp*dzp_seg;
1188 x0_new = x0_old + dxp_seg;
1189 }
1190
1191 }
1192
1193 // Compute the segment factors (each equal to dt_seg/dt for nonzero dxp, or dzp)
1194 const auto seg_factor_x = static_cast<amrex::Real>(dxp == 0. ? 1._rt : dxp_seg/dxp);
1195 const auto seg_factor_z = static_cast<amrex::Real>(dzp == 0. ? 1._rt : dzp_seg/dzp);
1196
1197 // Compute cell-based weights using the average segment position
1198 // Keep these double to avoid bug in single precision
1199 double sx_cell[depos_order] = {0.};
1200 double sz_cell[depos_order] = {0.};
1201 double const x0_bar = (x0_new + x0_old)/2.0;
1202 double const z0_bar = (z0_new + z0_old)/2.0;
1203 i0_cell[ns] = compute_shape_factor_cell( sx_cell, x0_bar-0.5 );
1204 k0_cell[ns] = compute_shape_factor_cell( sz_cell, z0_bar-0.5 );
1205
1206 // Set the segment number for the mass matrix component calc
1207 if constexpr (full_mass_matrices) {
1208 const auto i0_mid = static_cast<int>(x0_bar-shift);
1209 const auto k0_mid = static_cast<int>(z0_bar-shift);
1210 SegNumX[ns] = 1 + i0_mid - i_mid;
1211 SegNumZ[ns] = 1 + k0_mid - k_mid;
1212 }
1213
1214 if constexpr (depos_order >= 3) { // higher-order correction to the cell-based weights
1215 const Compute_shape_factor_pair<depos_order-1> compute_shape_factors_cell;
1216 double sx_old_cell[depos_order] = {0.};
1217 double sx_new_cell[depos_order] = {0.};
1218 double sz_old_cell[depos_order] = {0.};
1219 double sz_new_cell[depos_order] = {0.};
1220 const int i0_cell_2 = compute_shape_factors_cell( sx_old_cell, sx_new_cell, x0_old-0.5, x0_new-0.5 );
1221 const int k0_cell_2 = compute_shape_factors_cell( sz_old_cell, sz_new_cell, z0_old-0.5, z0_new-0.5 );
1222 amrex::ignore_unused(i0_cell_2, k0_cell_2);
1223 for (int m = 0; m < depos_order; m++) {
1224 sx_cell[m] = (4.0*sx_cell[m] + sx_old_cell[m] + sx_new_cell[m])/6.0;
1225 sz_cell[m] = (4.0*sz_cell[m] + sz_old_cell[m] + sz_new_cell[m])/6.0;
1226 }
1227 }
1228
1229 // Compute node-based weights using the old and new segment positions
1230 // Keep these double to avoid bug in single precision
1231 double sx_old_node[depos_order+1] = {0.};
1232 double sx_new_node[depos_order+1] = {0.};
1233 double sz_old_node[depos_order+1] = {0.};
1234 double sz_new_node[depos_order+1] = {0.};
1235 i0_node[ns] = compute_shape_factors_node( sx_old_node, sx_new_node, x0_old, x0_new );
1236 k0_node[ns] = compute_shape_factors_node( sz_old_node, sz_new_node, z0_old, z0_new );
1237
1238 // deposit Jx and Sx for this segment
1239 amrex::Real weight;
1240 for (int i = 0; i <= depos_order - 1; i++) {
1241 for (int k = 0; k <= depos_order; k++) {
1242 const int i_J = lo.x + i0_cell[ns] + i;
1243 const int k_J = lo.y + k0_node[ns] + k;
1244 weight = sx_cell[i]*(sz_old_node[k] + sz_new_node[k])/2.0_rt*seg_factor_x;
1245 if constexpr (deposit_J) {
1246 amrex::Gpu::Atomic::AddNoRet(&Jx_arr(i_J, k_J, 0, 0), wqx*weight);
1247 }
1248 if constexpr (full_mass_matrices) { weight_cellX_nodeZ[ns][i][k] = weight; }
1249 else {
1250 amrex::Gpu::Atomic::AddNoRet(&Sxx_arr(i_J, k_J, 0, 0), fpxx*weight*weight);
1251 }
1252 }
1253 }
1254
1255 // deposit out-of-plane Jy and Sy for this segment
1256 const auto seg_factor_y = std::min(seg_factor_x,seg_factor_z);
1257 for (int i = 0; i <= depos_order; i++) {
1258 for (int k = 0; k <= depos_order; k++) {
1259 const int i_J = lo.x + i0_node[ns] + i;
1260 const int k_J = lo.y + k0_node[ns] + k;
1261 weight = ( sx_old_node[i]*sz_old_node[k]*one_third
1262 + sx_old_node[i]*sz_new_node[k]*one_sixth
1263 + sx_new_node[i]*sz_old_node[k]*one_sixth
1264 + sx_new_node[i]*sz_new_node[k]*one_third )*seg_factor_y;
1265 if constexpr (deposit_J) {
1266 amrex::Gpu::Atomic::AddNoRet(&Jy_arr(i_J, k_J, 0, 0), wqy*weight);
1267 }
1268 if constexpr (full_mass_matrices) { weight_nodeX_nodeZ[ns][i][k] = weight; }
1269 else {
1270 amrex::Gpu::Atomic::AddNoRet(&Syy_arr(i_J, k_J, 0, 0), fpyy*weight*weight);
1271 }
1272 }
1273 }
1274
1275 // deposit Jz and Szz for this segment
1276 for (int i = 0; i <= depos_order; i++) {
1277 for (int k = 0; k <= depos_order - 1; k++) {
1278 const int i_J = lo.x + i0_node[ns] + i;
1279 const int k_J = lo.y + k0_cell[ns] + k;
1280 weight = sz_cell[k]*(sx_old_node[i] + sx_new_node[i])/2.0_rt*seg_factor_z;
1281 if constexpr (deposit_J) {
1282 amrex::Gpu::Atomic::AddNoRet(&Jz_arr(i_J, k_J, 0, 0), wqz*weight);
1283 }
1284 if constexpr (full_mass_matrices) { weight_nodeX_cellZ[ns][i][k] = weight; }
1285 else {
1286 amrex::Gpu::Atomic::AddNoRet(&Szz_arr(i_J, k_J, 0, 0), fpzz*weight*weight);
1287 }
1288 }
1289 }
1290
1291 // update old segment values
1292 if (ns < num_segments - 1) {
1293 x0_old = x0_new;
1294 z0_old = z0_new;
1295 }
1296
1297 } // end loop over segments
1298
1299 if constexpr (full_mass_matrices) {
1300
1301 const int Ncomp_base = 2*depos_order + 2*max_crossings;
1302 const int Ncomp_xx0 = Ncomp_base - 1;
1303 const int Ncomp_xy0 = Ncomp_base;
1304 const int Ncomp_xz0 = Ncomp_base;
1305 const int Ncomp_yx0 = Ncomp_base;
1306 const int Ncomp_yy0 = 1 + Ncomp_base;
1307 const int Ncomp_yz0 = 1 + Ncomp_base;
1308 const int Ncomp_zx0 = Ncomp_base;
1309 const int Ncomp_zy0 = 1 + Ncomp_base;
1310 const int Ncomp_zz0 = 1 + Ncomp_base;
1311
1312 const int width_xx1 = depos_order + max_crossings; // (Ncomp_xx[1] - 1)/2
1313 const int width_yy1 = width_xx1; // (Ncomp_yy[1] - 1)/2
1314 const int width_zz1 = width_xx1 - 1; // (Ncomp_zz[1] - 1)/2
1315
1316 // Loop over segments and deposit full mass matrices
1317 for (int ns = 0; ns < num_segments; ns++) {
1318
1319 // Deposit Sxx, Sxz, and Sxy for this segment
1320 for (int i = 0; i <= depos_order - 1; i++) {
1321 for (int k = 0; k <= depos_order; k++) {
1322 const int i_J = lo.x + i0_cell[ns] + i;
1323 const int k_J = lo.y + k0_node[ns] + k;
1324 const amrex::Real weight_J = weight_cellX_nodeZ[ns][i][k];
1325 for (int ms = 0; ms < num_segments; ms++) {
1326 const int SegShiftX = max_crossings + SegNumX[ms] - SegNumX[ns];
1327 const int SegShiftZ = max_crossings + SegNumZ[ms] - SegNumZ[ns];
1328 // Deposit Sxx
1329 for (int kE = 0; kE <= depos_order; kE++) {
1330 const int row_xx = depos_order - k + kE + SegShiftZ;
1331 const int above_diag = (row_xx > width_xx1) ? 1 : 0;
1332 for (int iE = 0; iE <= depos_order - 1; iE++) {
1333 const int col_xx = depos_order - 1 - i + iE + SegShiftX;
1334 if (col_xx > Ncomp_xx0 - row_xx - above_diag) { break; } // Reduced deposit for diagonal mass matrices
1335 const int comp_xx = col_xx + Ncomp_xx0*row_xx;
1336 const amrex::Real weight_E = weight_cellX_nodeZ[ms][iE][kE];
1337 amrex::Gpu::Atomic::AddNoRet(&Sxx_arr(i_J, k_J, 0, comp_xx), fpxx*weight_J*weight_E);
1338 }
1339 }
1340 // Deposit Sxz
1341 for (int iE = 0; iE <= depos_order; iE++) {
1342 for (int kE = 0; kE <= depos_order - 1; kE++) {
1343 const amrex::Real weight_E = weight_nodeX_cellZ[ms][iE][kE];
1344 const int comp_xz = depos_order - 1 - i + iE + SegShiftX
1345 + Ncomp_xz0*(depos_order - k + kE + SegShiftZ);
1346 amrex::Gpu::Atomic::AddNoRet(&Sxz_arr(i_J, k_J, 0, comp_xz), fpxz*weight_J*weight_E);
1347 }
1348 }
1349 // Deposit Sxy
1350 for (int iE = 0; iE <= depos_order; iE++) {
1351 for (int kE = 0; kE <= depos_order; kE++) {
1352 const amrex::Real weight_E = weight_nodeX_nodeZ[ms][iE][kE];
1353 const int comp_xy = depos_order - 1 - i + iE + SegShiftX
1354 + Ncomp_xy0*(depos_order - k + kE + SegShiftZ);
1355 amrex::Gpu::Atomic::AddNoRet(&Sxy_arr(i_J, k_J, 0, comp_xy), fpxy*weight_J*weight_E);
1356 }
1357 }
1358 }
1359 }
1360 }
1361
1362 // Deposit Szx, Szz, and Szy for this segment
1363 for (int i = 0; i <= depos_order; i++) {
1364 for (int k = 0; k <= depos_order - 1; k++) {
1365 const int i_J = lo.x + i0_node[ns] + i;
1366 const int k_J = lo.y + k0_cell[ns] + k;
1367 const amrex::Real weight_J = weight_nodeX_cellZ[ns][i][k];
1368 for (int ms = 0; ms < num_segments; ms++) {
1369 const int SegShiftX = max_crossings + SegNumX[ms] - SegNumX[ns];
1370 const int SegShiftZ = max_crossings + SegNumZ[ms] - SegNumZ[ns];
1371 // Deposit Szx
1372 for (int iE = 0; iE <= depos_order - 1; iE++) {
1373 for (int kE = 0; kE <= depos_order; kE++) {
1374 const amrex::Real weight_E = weight_cellX_nodeZ[ms][iE][kE];
1375 const int comp_zx = depos_order - i + iE + SegShiftX
1376 + Ncomp_zx0*(depos_order-1 - k + kE + SegShiftZ);
1377 amrex::Gpu::Atomic::AddNoRet( &Szx_arr(i_J, k_J, 0, comp_zx), fpzx*weight_J*weight_E);
1378 }
1379 }
1380 // Deposit Szz
1381 for (int kE = 0; kE <= depos_order - 1; kE++) {
1382 const int row_zz = depos_order - 1 - k + kE + SegShiftZ;
1383 const int above_diag = (row_zz > width_zz1) ? 1 : 0;
1384 for (int iE = 0; iE <= depos_order; iE++) {
1385 const int col_zz = depos_order - i + iE + SegShiftX;
1386 if (col_zz > Ncomp_zz0 - 2 - row_zz - above_diag) { break; } // Reduced deposit for diagonal mass matrices
1387 const int comp_zz = col_zz + Ncomp_zz0*row_zz;
1388 const amrex::Real weight_E = weight_nodeX_cellZ[ms][iE][kE];
1389 amrex::Gpu::Atomic::AddNoRet( &Szz_arr(i_J, k_J, 0, comp_zz), fpzz*weight_J*weight_E);
1390 }
1391 }
1392 // Deposit Szy
1393 for (int iE = 0; iE <= depos_order; iE++) {
1394 for (int kE = 0; kE <= depos_order; kE++) {
1395 const amrex::Real weight_E = weight_nodeX_nodeZ[ms][iE][kE];
1396 const int comp_zy = depos_order - i + iE + SegShiftX
1397 + Ncomp_zy0*(depos_order-1 - k + kE + SegShiftZ);
1398 amrex::Gpu::Atomic::AddNoRet( &Szy_arr(i_J, k_J, 0, comp_zy), fpzy*weight_J*weight_E);
1399 }
1400 }
1401 }
1402 }
1403 }
1404
1405 // Deposit Syx, Syz, and Syy for this segment
1406 for (int i = 0; i <= depos_order; i++) {
1407 for (int k = 0; k <= depos_order; k++) {
1408 const int i_J = lo.x + i0_node[ns] + i;
1409 const int k_J = lo.y + k0_node[ns] + k;
1410 const amrex::Real weight_J = weight_nodeX_nodeZ[ns][i][k];
1411 for (int ms = 0; ms < num_segments; ms++) {
1412 const int SegShiftX = max_crossings + SegNumX[ms] - SegNumX[ns];
1413 const int SegShiftZ = max_crossings + SegNumZ[ms] - SegNumZ[ns];
1414 // Deposit Syx
1415 for (int iE = 0; iE <= depos_order - 1; iE++) {
1416 for (int kE = 0; kE <= depos_order; kE++) {
1417 const amrex::Real weight_E = weight_cellX_nodeZ[ms][iE][kE];
1418 const int comp_yx = depos_order - i + iE + SegShiftX
1419 + Ncomp_yx0*(depos_order - k + kE + SegShiftZ);
1420 amrex::Gpu::Atomic::AddNoRet( &Syx_arr(i_J, k_J, 0, comp_yx), fpyx*weight_J*weight_E);
1421 }
1422 }
1423 // Deposit Syz
1424 for (int iE = 0; iE <= depos_order; iE++) {
1425 for (int kE = 0; kE <= depos_order - 1; kE++) {
1426 const amrex::Real weight_E = weight_nodeX_cellZ[ms][iE][kE];
1427 const int comp_yz = depos_order - i + iE + SegShiftX
1428 + Ncomp_yz0*(depos_order - k + kE + SegShiftZ);
1429 amrex::Gpu::Atomic::AddNoRet( &Syz_arr(i_J, k_J, 0, comp_yz), fpyz*weight_J*weight_E);
1430 }
1431 }
1432 // Deposit Syy
1433 for (int kE = 0; kE <= depos_order; kE++) {
1434 const int row_yy = depos_order - k + kE + SegShiftZ;
1435 const int above_diag = (row_yy > width_yy1) ? 1 : 0;
1436 for (int iE = 0; iE <= depos_order; iE++) {
1437 const int col_yy = depos_order - i + iE + SegShiftX;
1438 if (col_yy > Ncomp_yy0 - 1 - row_yy - above_diag) { break; } // Reduced deposit for diagonal mass matrices
1439 const int comp_yy = col_yy + Ncomp_yy0*row_yy;
1440 const amrex::Real weight_E = weight_nodeX_nodeZ[ms][iE][kE];
1441 amrex::Gpu::Atomic::AddNoRet( &Syy_arr(i_J, k_J, 0, comp_yy), fpyy*weight_J*weight_E);
1442 }
1443 }
1444
1445 }
1446 }
1447 }
1448
1449 }
1450
1451 }
1452
1453#elif defined(WARPX_DIM_RCYLINDER) || defined(WARPX_DIM_RSPHERE)
1454
1455 // compute cell crossings in X-direction
1456 const auto i_old = static_cast<int>(x_old-shift);
1457 const auto i_new = static_cast<int>(x_new-shift);
1458 const int cell_crossings_x = std::abs(i_new-i_old);
1459 num_segments += cell_crossings_x;
1460
1461 // Compute the initial cell location used to find the cell crossings.
1462 // Keep these double to avoid bug in single precision
1463 const auto dirX_sign = static_cast<double>(dxp < 0. ? -1. : 1.);
1464 double Xcell = static_cast<double>(i_old) + shift + 0.5*(1.-dirX_sign);
1465
1466 // loop over the number of segments and deposit
1467 const Compute_shape_factor< depos_order-1 > compute_shape_factor_cell;
1468 const Compute_shape_factor_pair< depos_order > compute_shape_factors_node;
1469 double dxp_seg;
1470 double x0_new;
1471 double x0_old = x_old;
1472
1473 constexpr int num_segments_max = 1 + 4*AMREX_SPACEDIM;
1474 AMREX_ALWAYS_ASSERT_WITH_MESSAGE( num_segments <= num_segments_max,
1475 "Error: num_segments must be less than or equal to 1 + 4*AMREX_SPACEDIM.");
1476
1477 // Save the start index and interpolation weights for each segment
1478 int i0_cell[num_segments_max];
1479 int i0_node[num_segments_max];
1480 amrex::Real weight_cell[num_segments_max][depos_order];
1481 amrex::Real weight_node[num_segments_max][depos_order+1];
1482
1483 const auto i_mid = static_cast<int>(0.5*(x_new+x_old)-shift);
1484 int SegNum[num_segments_max];
1485
1486 for (int ns = 0; ns < num_segments; ns++) {
1487
1488 if (ns == num_segments-1) { // final segment
1489 x0_new = x_new;
1490 dxp_seg = x0_new - x0_old;
1491 }
1492 else {
1493 Xcell = Xcell + dirX_sign;
1494 x0_new = Xcell;
1495 dxp_seg = x0_new - x0_old;
1496 }
1497
1498 // Compute the segment factor (equal to dt_seg/dt for nonzero dxp)
1499 const auto seg_factor = static_cast<amrex::Real>(dxp == 0. ? 1._rt : dxp_seg/dxp);
1500
1501 // Compute cell-based weights using the average segment position
1502 // Keep these double to avoid bug in single precision
1503 double sx_cell[depos_order] = {0.};
1504 double const x0_bar = (x0_new + x0_old)/2.0;
1505 i0_cell[ns] = compute_shape_factor_cell( sx_cell, x0_bar-0.5 );
1506
1507 // Set the segment number for the mass matrix component calc
1508 if constexpr (full_mass_matrices) {
1509 const auto i0_mid = static_cast<int>(x0_bar-shift);
1510 SegNum[ns] = 1 + i0_mid - i_mid;
1511 }
1512
1513 if constexpr (depos_order >= 3) { // higher-order correction to the cell-based weights
1514 const Compute_shape_factor_pair<depos_order-1> compute_shape_factors_cell;
1515 double sx_old_cell[depos_order] = {0.};
1516 double sx_new_cell[depos_order] = {0.};
1517 const int i0_cell_2 = compute_shape_factors_cell( sx_old_cell, sx_new_cell, x0_old-0.5, x0_new-0.5 );
1518 amrex::ignore_unused(i0_cell_2);
1519 for (int m=0; m<depos_order; m++) {
1520 sx_cell[m] = (4.0*sx_cell[m] + sx_old_cell[m] + sx_new_cell[m])/6.0;
1521 }
1522 }
1523
1524 // Compute node-based weights using the old and new segment positions
1525 // Keep these double to avoid bug in single precision
1526 double sx_old_node[depos_order+1] = {0.};
1527 double sx_new_node[depos_order+1] = {0.};
1528 i0_node[ns] = compute_shape_factors_node( sx_old_node, sx_new_node, x0_old, x0_new );
1529
1530 // deposit out-of-plane Jy, Jz, Syy, and Szz for this segment
1531 for (int i = 0; i <= depos_order; i++) {
1532 const amrex::Real weight = 0.5_rt*(sx_old_node[i] + sx_new_node[i])*seg_factor;
1533 const int i_J = lo.x + i0_node[ns] + i;
1534 if constexpr (deposit_J) {
1535 amrex::Gpu::Atomic::AddNoRet( &Jy_arr(i_J, 0, 0), wqy*weight);
1536 amrex::Gpu::Atomic::AddNoRet( &Jz_arr(i_J, 0, 0), wqz*weight);
1537 }
1538 //
1539 if constexpr (full_mass_matrices) { weight_node[ns][i] = weight; }
1540 else {
1541 amrex::Gpu::Atomic::AddNoRet( &Syy_arr(i_J, 0, 0, 0), fpyy*weight);
1542 amrex::Gpu::Atomic::AddNoRet( &Szz_arr(i_J, 0, 0, 0), fpzz*weight);
1543 }
1544 }
1545
1546 // deposit Jx and Sxx for this segment
1547 for (int i = 0; i <= depos_order - 1; i++) {
1548 const amrex::Real weight = sx_cell[i]*seg_factor;
1549 const int i_J = lo.x + i0_cell[ns] + i;
1550 if constexpr (deposit_J) {
1551 amrex::Gpu::Atomic::AddNoRet(&Jx_arr(i_J, 0, 0), wqx*weight);
1552 }
1553 if constexpr (full_mass_matrices) { weight_cell[ns][i] = weight; }
1554 else {
1555 amrex::Gpu::Atomic::AddNoRet(&Sxx_arr(i_J, 0, 0, 0), fpxx*weight);
1556 }
1557 }
1558
1559 // update old segment values
1560 if (ns < num_segments-1) {
1561 x0_old = x0_new;
1562 }
1563
1564 }
1565
1566 if constexpr (full_mass_matrices) {
1567
1568 const int width_xx = depos_order - 1 + max_crossings;
1569 const int width_yy = depos_order + max_crossings;
1570
1571 // Loop over segments and deposit full mass matrices
1572 for (int ns = 0; ns < num_segments; ns++) {
1573
1574 // Deposit Sxx, Sxy, and Sxz for this segment
1575 for (int i = 0; i <= depos_order - 1; i++) {
1576
1577 const int i_J = lo.x + i0_cell[ns] + i;
1578 const amrex::Real weight_J = weight_cell[ns][i];
1579 for (int ms = 0; ms < num_segments; ms++) {
1580 const int SegShift = max_crossings + SegNum[ms] - SegNum[ns];
1581 for (int iE = 0; iE <= depos_order - 1; iE++) {
1582 const int comp_xx = depos_order - 1 - i + iE + SegShift;
1583 if (comp_xx > width_xx) { break; } // Reduced deposit for diagonal mass matrices
1584 const amrex::Real weight_E = weight_cell[ms][iE];
1585 amrex::Gpu::Atomic::AddNoRet(&Sxx_arr(i_J, 0, 0, comp_xx), fpxx*weight_J*weight_E);
1586 }
1587 for (int iE = 0; iE <= depos_order; iE++) {
1588 const amrex::Real weight_E = weight_node[ms][iE];
1589 const int comp_xy = depos_order - 1 - i + iE + SegShift;
1590 amrex::Gpu::Atomic::AddNoRet(&Sxz_arr(i_J, 0, 0, comp_xy), fpxz*weight_J*weight_E);
1591 amrex::Gpu::Atomic::AddNoRet(&Sxy_arr(i_J, 0, 0, comp_xy), fpxy*weight_J*weight_E);
1592 }
1593 }
1594
1595 }
1596
1597 // Deposit Syx, Syy, Syz, Szx, Szy, and Szz for this segment
1598 for (int i = 0; i <= depos_order; i++) {
1599
1600 const int i_J = lo.x + i0_node[ns] + i;
1601 const amrex::Real weight_J = weight_node[ns][i];
1602 for (int ms = 0; ms < num_segments; ms++) {
1603 const int SegShift = max_crossings + SegNum[ms] - SegNum[ns];
1604 for (int iE = 0; iE <= depos_order; iE++) {
1605 const amrex::Real weight_E = weight_node[ms][iE];
1606 const int comp_yy = depos_order - i + iE + SegShift;
1607 if (comp_yy <= width_yy) { // Reduced deposit for diagonal mass matrices
1608 amrex::Gpu::Atomic::AddNoRet(&Syy_arr(i_J, 0, 0, comp_yy), fpyy*weight_J*weight_E);
1609 amrex::Gpu::Atomic::AddNoRet(&Szz_arr(i_J, 0, 0, comp_yy), fpzz*weight_J*weight_E);
1610 }
1611 amrex::Gpu::Atomic::AddNoRet(&Syz_arr(i_J, 0, 0, comp_yy), fpyz*weight_J*weight_E);
1612 amrex::Gpu::Atomic::AddNoRet(&Szy_arr(i_J, 0, 0, comp_yy), fpzy*weight_J*weight_E);
1613 }
1614 for (int iE = 0; iE <= depos_order - 1; iE++) {
1615 const amrex::Real weight_E = weight_cell[ms][iE];
1616 const int comp_yx = depos_order - i + iE + SegShift;
1617 amrex::Gpu::Atomic::AddNoRet(&Syx_arr(i_J, 0, 0, comp_yx), fpyx*weight_J*weight_E);
1618 amrex::Gpu::Atomic::AddNoRet(&Szx_arr(i_J, 0, 0, comp_yx), fpzx*weight_J*weight_E);
1619 }
1620 }
1621
1622 }
1623
1624 }
1625
1626 }
1627
1628#elif defined(WARPX_DIM_1D_Z)
1629
1630 // compute cell crossings in Z-direction
1631 const auto k_old = static_cast<int>(z_old-shift);
1632 const auto k_new = static_cast<int>(z_new-shift);
1633 const int cell_crossings_z = std::abs(k_new-k_old);
1634 num_segments += cell_crossings_z;
1635
1636 // Compute initial particle cell location used to find cell crossings.
1637 // Keep these double to avoid bug in single precision
1638 const auto dirZ_sign = static_cast<double>(dzp < 0. ? -1. : 1.);
1639 double Zcell = static_cast<double>(k_old) + shift + 0.5*(1.-dirZ_sign);
1640
1641 // loop over the number of segments and deposit
1642 const Compute_shape_factor< depos_order-1 > compute_shape_factor_cell;
1643 const Compute_shape_factor_pair< depos_order > compute_shape_factors_node;
1644 double dzp_seg;
1645 double z0_new;
1646 double z0_old = z_old;
1647
1648 constexpr int num_segments_max = 1 + 4*AMREX_SPACEDIM;
1649 AMREX_ALWAYS_ASSERT_WITH_MESSAGE( num_segments <= num_segments_max,
1650 "Error: num_segments must be less than or equal to 1 + 4*AMREX_SPACEDIM.");
1651
1652 // Save the start index and interpolation weights for each segment
1653 int k0_cell[num_segments_max];
1654 int k0_node[num_segments_max];
1655 amrex::Real weight_cell[num_segments_max][depos_order];
1656 amrex::Real weight_node[num_segments_max][depos_order+1];
1657
1658 const auto k_mid = static_cast<int>(0.5*(z_new+z_old)-shift);
1659 int SegNum[num_segments_max];
1660
1661 for (int ns = 0; ns < num_segments; ns++) {
1662
1663 if (ns == num_segments-1) { // final segment
1664 z0_new = z_new;
1665 dzp_seg = z0_new - z0_old;
1666 }
1667 else {
1668 Zcell = Zcell + dirZ_sign;
1669 z0_new = Zcell;
1670 dzp_seg = z0_new - z0_old;
1671 }
1672
1673 // Compute the segment factor (equal to dt_seg/dt for nonzero dzp)
1674 const auto seg_factor = static_cast<amrex::Real>(dzp == 0. ? 1._rt : dzp_seg/dzp);
1675
1676 // Compute cell-based weights using the average segment position
1677 // Keep these double to avoid bug in single precision
1678 double sz_cell[depos_order] = {0.};
1679 double const z0_bar = (z0_new + z0_old)/2.0;
1680 k0_cell[ns] = compute_shape_factor_cell( sz_cell, z0_bar-0.5 );
1681
1682 // Set the segment number for the mass matrix component calc
1683 if constexpr (full_mass_matrices) {
1684 const auto k0_mid = static_cast<int>(z0_bar-shift);
1685 SegNum[ns] = 1 + k0_mid - k_mid;
1686 }
1687
1688 if constexpr (depos_order >= 3) { // higher-order correction to the cell-based weights
1689 const Compute_shape_factor_pair<depos_order-1> compute_shape_factors_cell;
1690 double sz_old_cell[depos_order] = {0.};
1691 double sz_new_cell[depos_order] = {0.};
1692 const int k0_cell_2 = compute_shape_factors_cell( sz_old_cell, sz_new_cell, z0_old-0.5, z0_new-0.5 );
1693 amrex::ignore_unused(k0_cell_2);
1694 for (int m = 0; m < depos_order; m++) {
1695 sz_cell[m] = (4.0*sz_cell[m] + sz_old_cell[m] + sz_new_cell[m])/6.0;
1696 }
1697 }
1698
1699 // Compute node-based weights using the old and new segment positions
1700 // Keep these double to avoid bug in single precision
1701 double sz_old_node[depos_order+1] = {0.};
1702 double sz_new_node[depos_order+1] = {0.};
1703 k0_node[ns] = compute_shape_factors_node( sz_old_node, sz_new_node, z0_old, z0_new );
1704
1705 // deposit out-of-plane Jx, Jy, Sx, and Sy for this segment
1706 for (int k = 0; k <= depos_order; k++) {
1707 const amrex::Real weight = 0.5_rt*(sz_old_node[k] + sz_new_node[k])*seg_factor;
1708 const int k_J = lo.x + k0_node[ns] + k;
1709 if constexpr (deposit_J) {
1710 amrex::Gpu::Atomic::AddNoRet(&Jx_arr(k_J, 0, 0), wqx*weight);
1711 amrex::Gpu::Atomic::AddNoRet(&Jy_arr(k_J, 0, 0), wqy*weight);
1712 }
1713 if constexpr (full_mass_matrices) { weight_node[ns][k] = weight; }
1714 else {
1715 amrex::Gpu::Atomic::AddNoRet(&Sxx_arr(k_J, 0, 0, 0), fpxx*weight*weight);
1716 amrex::Gpu::Atomic::AddNoRet(&Syy_arr(k_J, 0, 0, 0), fpyy*weight*weight);
1717 }
1718 }
1719
1720 // deposit Jz and Szz for this segment
1721 for (int k = 0; k <= depos_order - 1; k++) {
1722 const amrex::Real weight = sz_cell[k]*seg_factor;
1723 const int k_J = lo.x + k0_cell[ns] + k;
1724 if constexpr (deposit_J) {
1725 amrex::Gpu::Atomic::AddNoRet(&Jz_arr(k_J, 0, 0), wqz*weight);
1726 }
1727 if constexpr (full_mass_matrices) { weight_cell[ns][k] = weight; }
1728 else {
1729 amrex::Gpu::Atomic::AddNoRet(&Szz_arr(k_J, 0, 0, 0), fpzz*weight*weight);
1730 }
1731 }
1732
1733 // update old segment values
1734 if (ns < num_segments-1) {
1735 z0_old = z0_new;
1736 }
1737
1738 }
1739
1740 if constexpr (full_mass_matrices) {
1741
1742 const int width_zz = depos_order - 1 + max_crossings;
1743 const int width_yy = depos_order + max_crossings;
1744
1745 // Loop over segments and deposit full mass matrices
1746 for (int ns = 0; ns < num_segments; ns++) {
1747
1748 // Deposit Sxx, Sxy, Sxz, Syx, Syy, and Syz for this segment
1749 for (int k = 0; k <= depos_order; k++) {
1750
1751 const int k_J = lo.x + k0_node[ns] + k;
1752 const amrex::Real weight_J = weight_node[ns][k];
1753 for (int ms = 0; ms < num_segments; ms++) {
1754 const int SegShift = max_crossings + SegNum[ms] - SegNum[ns];
1755 for (int kE = 0; kE <= depos_order; kE++) {
1756 const amrex::Real weight_E = weight_node[ms][kE];
1757 const int comp_yy = depos_order - k + kE + SegShift;
1758 if (comp_yy <= width_yy) { // Reduced deposit for diagonal mass matrices
1759 amrex::Gpu::Atomic::AddNoRet(&Sxx_arr(k_J, 0, 0, comp_yy), fpxx*weight_J*weight_E);
1760 amrex::Gpu::Atomic::AddNoRet(&Syy_arr(k_J, 0, 0, comp_yy), fpyy*weight_J*weight_E);
1761 }
1762 amrex::Gpu::Atomic::AddNoRet(&Sxy_arr(k_J, 0, 0, comp_yy), fpxy*weight_J*weight_E);
1763 amrex::Gpu::Atomic::AddNoRet(&Syx_arr(k_J, 0, 0, comp_yy), fpyx*weight_J*weight_E);
1764 }
1765 for (int kE = 0; kE <= depos_order - 1; kE++) {
1766 const amrex::Real weight_E = weight_cell[ms][kE];
1767 const int comp_yz = depos_order - k + kE + SegShift;
1768 amrex::Gpu::Atomic::AddNoRet(&Sxz_arr(k_J, 0, 0, comp_yz), fpxz*weight_J*weight_E);
1769 amrex::Gpu::Atomic::AddNoRet(&Syz_arr(k_J, 0, 0, comp_yz), fpyz*weight_J*weight_E);
1770 }
1771 }
1772
1773 }
1774
1775 // Deposit Szx, Szy, and Szz for this segment
1776 for (int k = 0; k <= depos_order - 1; k++) {
1777
1778 const int k_J = lo.x + k0_cell[ns] + k;
1779 const amrex::Real weight_J = weight_cell[ns][k];
1780 for (int ms = 0; ms < num_segments; ms++) {
1781 const int SegShift = max_crossings + SegNum[ms] - SegNum[ns];
1782 for (int kE = 0; kE <= depos_order - 1; kE++) {
1783 const int comp_zz = depos_order - 1 - k + kE + SegShift;
1784 if (comp_zz > width_zz) { break; } // Reduced deposit for diagonal mass matrices
1785 const amrex::Real weight_E = weight_cell[ms][kE];
1786 amrex::Gpu::Atomic::AddNoRet(&Szz_arr(k_J, 0, 0, comp_zz), fpzz*weight_J*weight_E);
1787 }
1788 for (int kE = 0; kE <= depos_order; kE++) {
1789 const amrex::Real weight_E = weight_node[ms][kE];
1790 const int comp_zy = depos_order-1 - k + kE + SegShift;
1791 amrex::Gpu::Atomic::AddNoRet(&Szx_arr(k_J, 0, 0, comp_zy), fpzx*weight_J*weight_E);
1792 amrex::Gpu::Atomic::AddNoRet(&Szy_arr(k_J, 0, 0, comp_zy), fpzy*weight_J*weight_E);
1793 }
1794 }
1795
1796 }
1797
1798 }
1799
1800 }
1801
1802#endif
1803}
1804
1835template <int depos_order, bool full_mass_matrices>
1836void doVillasenorSigmaDeposition ([[maybe_unused]] const amrex::ParticleReal* xp_n_data,
1837 [[maybe_unused]] const amrex::ParticleReal* yp_n_data,
1838 [[maybe_unused]] const amrex::ParticleReal* zp_n_data,
1839 const GetParticlePosition<PIdx>& GetPosition,
1840 [[maybe_unused]] const int* nsuborbits,
1841 const amrex::ParticleReal* wp,
1842 const amrex::ParticleReal* uxp_n,
1843 const amrex::ParticleReal* uyp_n,
1844 const amrex::ParticleReal* uzp_n,
1845 const amrex::ParticleReal* uxp_nph,
1846 const amrex::ParticleReal* uyp_nph,
1847 const amrex::ParticleReal* uzp_nph,
1848 const int max_crossings,
1849 amrex::Array4<amrex::Real> const& Sxx_arr,
1850 amrex::Array4<amrex::Real> const& Sxy_arr,
1851 amrex::Array4<amrex::Real> const& Sxz_arr,
1852 amrex::Array4<amrex::Real> const& Syx_arr,
1853 amrex::Array4<amrex::Real> const& Syy_arr,
1854 amrex::Array4<amrex::Real> const& Syz_arr,
1855 amrex::Array4<amrex::Real> const& Szx_arr,
1856 amrex::Array4<amrex::Real> const& Szy_arr,
1857 amrex::Array4<amrex::Real> const& Szz_arr,
1858 GetExternalEBField const & getExternalEB,
1859 const amrex::ParticleReal Bx_ext,
1860 const amrex::ParticleReal By_ext,
1861 const amrex::ParticleReal Bz_ext,
1865 const amrex::IndexType Bx_type,
1866 const amrex::IndexType By_type,
1867 const amrex::IndexType Bz_type,
1868 const long np_to_deposit,
1869 const amrex::Real dt,
1870 const amrex::XDim3& dinv,
1871 const amrex::XDim3& xyzmin,
1872 const amrex::GpuArray<amrex::GpuArray<double,2>, AMREX_SPACEDIM> & domain_double,
1873 const amrex::GpuArray<amrex::GpuArray<bool,2>, AMREX_SPACEDIM> & do_cropping,
1874 const amrex::Dim3 lo,
1875 const amrex::Real qs,
1876 const amrex::Real ms)
1877{
1878 using namespace amrex::literals;
1879
1880 const amrex::Real invvol = dinv.x*dinv.y*dinv.z;
1881
1882 enum exteb_flags : int { no_exteb, has_exteb };
1883 const int exteb_runtime_flag = getExternalEB.isNoOp() ? no_exteb : has_exteb;
1884
1885 // Loop over particles and deposit mass matrices
1888 {exteb_runtime_flag},
1889 np_to_deposit,
1890 [=] AMREX_GPU_DEVICE (long const ip, auto exteb_control) {
1891
1892 // Skip mass matrix deposition for particles with suborbits.
1893 if (nsuborbits && nsuborbits[ip] > 1) { return; }
1894
1895 amrex::ParticleReal xp_nph, yp_nph, zp_nph;
1896 GetPosition(ip, xp_nph, yp_nph, zp_nph);
1897
1898 amrex::ParticleReal const xp_n = (xp_n_data ? xp_n_data[ip] : 0._prt);
1899 amrex::ParticleReal const yp_n = (yp_n_data ? yp_n_data[ip] : 0._prt);
1900 amrex::ParticleReal const zp_n = (zp_n_data ? zp_n_data[ip] : 0._prt);
1901
1902 // Compute position at time n + 1
1903 amrex::ParticleReal const xp_np1 = 2._prt*xp_nph - xp_n;
1904 amrex::ParticleReal const yp_np1 = 2._prt*yp_nph - yp_n;
1905 amrex::ParticleReal const zp_np1 = 2._prt*zp_nph - zp_n;
1906
1907#if defined(WARPX_DIM_RZ) || defined(WARPX_DIM_RCYLINDER)
1908 amrex::ParticleReal const rp_mid = 0.5_prt*(std::sqrt(xp_np1*xp_np1 + yp_np1*yp_np1)
1909 + std::sqrt(xp_n*xp_n + yp_n*yp_n));
1910 amrex::ParticleReal const costh = (rp_mid > 0._prt ? xp_nph/rp_mid : 1._prt);
1911 amrex::ParticleReal const sinth = (rp_mid > 0._prt ? yp_nph/rp_mid : 0._prt);
1912#elif defined(WARPX_DIM_RSPHERE)
1913 amrex::ParticleReal const rp_mid = 0.5_prt*(std::sqrt(xp_np1*xp_np1 + yp_np1*yp_np1 + zp_np1*zp_np1)
1914 + std::sqrt(xp_n*xp_n + yp_n*yp_n + zp_n*zp_n));
1915 amrex::ParticleReal const rpxy_mid = std::sqrt(xp_nph*xp_nph + yp_nph*yp_nph);
1916 amrex::ParticleReal const costh = (rpxy_mid > 0._prt ? xp_nph/rpxy_mid : 1._prt);
1917 amrex::ParticleReal const sinth = (rpxy_mid > 0._prt ? yp_nph/rpxy_mid : 0._prt);
1918 amrex::ParticleReal const cosph = (rp_mid > 0._prt ? rpxy_mid/rp_mid : 1._prt);
1919 amrex::ParticleReal const sinph = (rp_mid > 0._prt ? zp_nph/rp_mid : 0._prt);
1920#endif
1921
1922 // Initialize B on particle to uniform external B
1923 amrex::ParticleReal Bxp = Bx_ext;
1924 amrex::ParticleReal Byp = By_ext;
1925 amrex::ParticleReal Bzp = Bz_ext;
1926
1927 // Increment with externally applied B-field with time and spatial variation
1928 [[maybe_unused]] const auto& getExternalEB_tmp = getExternalEB;
1929 if constexpr (exteb_control == has_exteb) {
1930 amrex::ParticleReal Exp = 0._prt;
1931 amrex::ParticleReal Eyp = 0._prt;
1932 amrex::ParticleReal Ezp = 0._prt;
1933 getExternalEB(ip, Exp, Eyp, Ezp, Bxp, Byp, Bzp);
1934 }
1935
1936 // Gather magnetic field from the grid
1937 const int depos_order_perp = 1;
1938 const int depos_order_para = 1;
1939 amrex::ParticleReal B1p = 0._prt;
1940 amrex::ParticleReal B2p = 0._prt;
1941 amrex::ParticleReal B3p = 0._prt;
1943#if defined(WARPX_DIM_RCYLINDER) || defined(WARPX_DIM_RSPHERE)
1944 rp_mid, 0._prt, 0._prt,
1945#elif defined(WARPX_DIM_RZ)
1946 rp_mid, 0._prt, zp_nph,
1947#else
1948 xp_nph, yp_nph, zp_nph,
1949#endif
1950 B1p, B2p, B3p,
1951 Bx_arr, By_arr, Bz_arr,
1952 Bx_type, By_type, Bz_type,
1953 dinv, xyzmin, lo, /*n_rz_azimuthal_modes=*/0 );
1954
1955 // Because we pass rp_mid and 0. instead of xp_nph and yp_nph above for
1956 // axisymmetric geometries, the returned fields on the particle are in mapped space.
1957 // Need to convert them to Cartesian before passing to setMassMatricesKerenels().
1958#if defined(WARPX_DIM_RCYLINDER) || defined(WARPX_DIM_RZ)
1959 // Convert B1p = Brp and B2p = Bthp to Bxp and Byp
1960 Bxp += costh*B1p - sinth*B2p;
1961 Byp += costh*B2p + sinth*B1p;
1962 Bzp += B3p;
1963#elif defined(WARPX_DIM_RSPHERE)
1964 // Convert B1p = Brp, B2p = Bthp, and B3p = Bphp to Bxp, Byp, and Bzp
1965 Bxp += costh*cosph*B1p - sinth*B2p - costh*sinph*B3p;
1966 Byp += sinth*cosph*B1p + costh*B2p - sinth*sinph*B3p;
1967 Bzp += sinph*B1p + cosph*B3p;
1968#else
1969 Bxp += B1p;
1970 Byp += B2p;
1971 Bzp += B3p;
1972#endif
1973
1974 // Compute inverse Lorentz factor, the average of gamma at time levels n and n+1
1975 const amrex::ParticleReal gaminv = GetImplicitGammaInverse(uxp_n[ip], uyp_n[ip], uzp_n[ip],
1976 uxp_nph[ip], uyp_nph[ip], uzp_nph[ip]);
1977
1978 // Compute current density kernels to deposit
1979 const amrex::Real wq_invvol = qs*wp[ip]*invvol;
1980 const amrex::Real rhop = wq_invvol*gaminv;
1981
1982 // Set the Mass Matrices kernels
1983 amrex::ParticleReal fpxx, fpxy, fpxz;
1984 amrex::ParticleReal fpyx, fpyy, fpyz;
1985 amrex::ParticleReal fpzx, fpzy, fpzz;
1986 setMassMatricesKernels(qs, ms, dt, rhop,
1987#if defined(WARPX_DIM_RZ) || defined(WARPX_DIM_RCYLINDER)
1988 costh, sinth,
1989#endif
1990 uxp_nph[ip], uyp_nph[ip], uzp_nph[ip],
1991 Bxp, Byp, Bzp,
1992 fpxx, fpxy, fpxz,
1993 fpyx, fpyy, fpyz,
1994 fpzx, fpzy, fpzz);
1995
1996 // Pass dummy arrays for Jx, Jy, Jz (which will not be used)
1997 amrex::Array4<amrex::Real> const dummy_Jx{};
1998 amrex::Array4<amrex::Real> const dummy_Jy{};
1999 amrex::Array4<amrex::Real> const dummy_Jz{};
2000
2001 //NOLINTNEXTLINE(readability-suspicious-call-argument)
2002 doVillasenorJandSigmaDepositionKernel<depos_order,full_mass_matrices,/*deposit_J=*/false>(
2003 xp_n, yp_n, zp_n,
2004 xp_np1, yp_np1, zp_np1,
2005 wq_invvol,
2006 uxp_nph[ip], uyp_nph[ip], uzp_nph[ip],
2007 gaminv,
2008 fpxx, fpxy, fpxz,
2009 fpyx, fpyy, fpyz,
2010 fpzx, fpzy, fpzz,
2011 dummy_Jx, dummy_Jy, dummy_Jz,
2012 max_crossings,
2013 Sxx_arr, Sxy_arr, Sxz_arr,
2014 Syx_arr, Syy_arr, Syz_arr,
2015 Szx_arr, Szy_arr, Szz_arr,
2016 dt, dinv, xyzmin, domain_double, do_cropping, lo );
2017
2018 });
2019}
2020
2021#endif // WARPX_MASSMATRICESDEPOSITION_H_
#define AMREX_ALWAYS_ASSERT_WITH_MESSAGE(EX, MSG)
#define AMREX_INLINE
#define AMREX_GPU_DEVICE
#define AMREX_GPU_HOST_DEVICE
Array4< int const > offset
AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE void doDirectGatherVectorField(const amrex::ParticleReal xp, const amrex::ParticleReal yp, const amrex::ParticleReal zp, amrex::ParticleReal &Fxp, amrex::ParticleReal &Fyp, amrex::ParticleReal &Fzp, amrex::Array4< amrex::Real const > const &Fx_arr, amrex::Array4< amrex::Real const > const &Fy_arr, amrex::Array4< amrex::Real const > const &Fz_arr, const amrex::IndexType Fx_type, const amrex::IndexType Fy_type, const amrex::IndexType Fz_type, const amrex::XDim3 &dinv, const amrex::XDim3 &xyzmin, const amrex::Dim3 &lo, const int n_rz_azimuthal_modes)
Gather vector field F for a single particle.
Definition FieldGather.H:36
AMREX_GPU_HOST_DEVICE AMREX_INLINE void setMassMatricesKernels(const amrex::ParticleReal qs, const amrex::ParticleReal ms, const amrex::ParticleReal dt, const amrex::ParticleReal rhop, const amrex::ParticleReal costh, const amrex::ParticleReal sinth, const amrex::ParticleReal upx, const amrex::ParticleReal upy, const amrex::ParticleReal upz, const amrex::ParticleReal Bpx, const amrex::ParticleReal Bpy, const amrex::ParticleReal Bpz, amrex::ParticleReal &fpxx, amrex::ParticleReal &fpxy, amrex::ParticleReal &fpxz, amrex::ParticleReal &fpyx, amrex::ParticleReal &fpyy, amrex::ParticleReal &fpyz, amrex::ParticleReal &fpzx, amrex::ParticleReal &fpzy, amrex::ParticleReal &fpzz)
Set the mass matrices kernels for thread thread_num.
Definition MassMatricesDeposition.H:51
AMREX_GPU_HOST_DEVICE AMREX_INLINE void doVillasenorJandSigmaDepositionKernel(const amrex::ParticleReal xp_old, const amrex::ParticleReal yp_old, const amrex::ParticleReal zp_old, const amrex::ParticleReal xp_new, const amrex::ParticleReal yp_new, const amrex::ParticleReal zp_new, const amrex::ParticleReal wq_invvol, const amrex::ParticleReal uxp_mid, const amrex::ParticleReal uyp_mid, const amrex::ParticleReal uzp_mid, const amrex::ParticleReal gaminv, const amrex::ParticleReal fpxx, const amrex::ParticleReal fpxy, const amrex::ParticleReal fpxz, const amrex::ParticleReal fpyx, const amrex::ParticleReal fpyy, const amrex::ParticleReal fpyz, const amrex::ParticleReal fpzx, const amrex::ParticleReal fpzy, const amrex::ParticleReal fpzz, amrex::Array4< amrex::Real > const &Jx_arr, amrex::Array4< amrex::Real > const &Jy_arr, amrex::Array4< amrex::Real > const &Jz_arr, int max_crossings, amrex::Array4< amrex::Real > const &Sxx_arr, amrex::Array4< amrex::Real > const &Sxy_arr, amrex::Array4< amrex::Real > const &Sxz_arr, amrex::Array4< amrex::Real > const &Syx_arr, amrex::Array4< amrex::Real > const &Syy_arr, amrex::Array4< amrex::Real > const &Syz_arr, amrex::Array4< amrex::Real > const &Szx_arr, amrex::Array4< amrex::Real > const &Szy_arr, amrex::Array4< amrex::Real > const &Szz_arr, const amrex::Real dt, const amrex::XDim3 &dinv, const amrex::XDim3 &xyzmin, const amrex::GpuArray< amrex::GpuArray< double, 2 >, 3 > &domain_double, const amrex::GpuArray< amrex::GpuArray< bool, 2 >, 3 > &do_cropping, const amrex::Dim3 lo)
Kernel for the Villasenor deposition of J and S (mass matrices) for thread thread_num.
Definition MassMatricesDeposition.H:760
void doDirectSigmaDeposition(const GetParticlePosition< PIdx > &GetPosition, const int *nsuborbits, const amrex::ParticleReal *wp, const amrex::ParticleReal *uxp_n, const amrex::ParticleReal *uyp_n, const amrex::ParticleReal *uzp_n, const amrex::ParticleReal *uxp_nph, const amrex::ParticleReal *uyp_nph, const amrex::ParticleReal *uzp_nph, amrex::Array4< amrex::Real > const &Sxx_arr, amrex::Array4< amrex::Real > const &Sxy_arr, amrex::Array4< amrex::Real > const &Sxz_arr, amrex::Array4< amrex::Real > const &Syx_arr, amrex::Array4< amrex::Real > const &Syy_arr, amrex::Array4< amrex::Real > const &Syz_arr, amrex::Array4< amrex::Real > const &Szx_arr, amrex::Array4< amrex::Real > const &Szy_arr, amrex::Array4< amrex::Real > const &Szz_arr, const amrex::IntVect &jx_type, const amrex::IntVect &jy_type, const amrex::IntVect &jz_type, GetExternalEBField const &getExternalEB, const amrex::ParticleReal Bx_ext, const amrex::ParticleReal By_ext, const amrex::ParticleReal Bz_ext, const amrex::Array4< amrex::Real const > &Bx_arr, const amrex::Array4< amrex::Real const > &By_arr, const amrex::Array4< amrex::Real const > &Bz_arr, const amrex::IndexType Bx_type, const amrex::IndexType By_type, const amrex::IndexType Bz_type, const long np_to_deposit, const amrex::Real dt, const amrex::XDim3 &dinv, const amrex::XDim3 &xyzmin, const amrex::Dim3 lo, const amrex::Real qs, const amrex::Real ms)
direct deposition of mass matrices for thread thread_num
Definition MassMatricesDeposition.H:599
void doVillasenorSigmaDeposition(const amrex::ParticleReal *xp_n_data, const amrex::ParticleReal *yp_n_data, const amrex::ParticleReal *zp_n_data, const GetParticlePosition< PIdx > &GetPosition, const int *nsuborbits, const amrex::ParticleReal *wp, const amrex::ParticleReal *uxp_n, const amrex::ParticleReal *uyp_n, const amrex::ParticleReal *uzp_n, const amrex::ParticleReal *uxp_nph, const amrex::ParticleReal *uyp_nph, const amrex::ParticleReal *uzp_nph, const int max_crossings, amrex::Array4< amrex::Real > const &Sxx_arr, amrex::Array4< amrex::Real > const &Sxy_arr, amrex::Array4< amrex::Real > const &Sxz_arr, amrex::Array4< amrex::Real > const &Syx_arr, amrex::Array4< amrex::Real > const &Syy_arr, amrex::Array4< amrex::Real > const &Syz_arr, amrex::Array4< amrex::Real > const &Szx_arr, amrex::Array4< amrex::Real > const &Szy_arr, amrex::Array4< amrex::Real > const &Szz_arr, GetExternalEBField const &getExternalEB, const amrex::ParticleReal Bx_ext, const amrex::ParticleReal By_ext, const amrex::ParticleReal Bz_ext, const amrex::Array4< amrex::Real const > &Bx_arr, const amrex::Array4< amrex::Real const > &By_arr, const amrex::Array4< amrex::Real const > &Bz_arr, const amrex::IndexType Bx_type, const amrex::IndexType By_type, const amrex::IndexType Bz_type, const long np_to_deposit, const amrex::Real dt, const amrex::XDim3 &dinv, const amrex::XDim3 &xyzmin, const amrex::GpuArray< amrex::GpuArray< double, 2 >, 3 > &domain_double, const amrex::GpuArray< amrex::GpuArray< bool, 2 >, 3 > &do_cropping, const amrex::Dim3 lo, const amrex::Real qs, const amrex::Real ms)
Villasenor and Buneman deposition of mass matrices for thread thread_num.
Definition MassMatricesDeposition.H:1836
AMREX_GPU_HOST_DEVICE AMREX_INLINE void doDirectJandSigmaDepositionKernel(const amrex::ParticleReal xp, const amrex::ParticleReal yp, const amrex::ParticleReal zp, const amrex::Real wq_invvol, const amrex::ParticleReal vx, const amrex::ParticleReal vy, const amrex::ParticleReal vz, const amrex::ParticleReal fpxx, const amrex::ParticleReal fpxy, const amrex::ParticleReal fpxz, const amrex::ParticleReal fpyx, const amrex::ParticleReal fpyy, const amrex::ParticleReal fpyz, const amrex::ParticleReal fpzx, const amrex::ParticleReal fpzy, const amrex::ParticleReal fpzz, amrex::Array4< amrex::Real > const &jx_arr, amrex::Array4< amrex::Real > const &jy_arr, amrex::Array4< amrex::Real > const &jz_arr, amrex::Array4< amrex::Real > const &Sxx_arr, amrex::Array4< amrex::Real > const &Sxy_arr, amrex::Array4< amrex::Real > const &Sxz_arr, amrex::Array4< amrex::Real > const &Syx_arr, amrex::Array4< amrex::Real > const &Syy_arr, amrex::Array4< amrex::Real > const &Syz_arr, amrex::Array4< amrex::Real > const &Szx_arr, amrex::Array4< amrex::Real > const &Szy_arr, amrex::Array4< amrex::Real > const &Szz_arr, const amrex::IntVect &jx_type, const amrex::IntVect &jy_type, const amrex::IntVect &jz_type, const amrex::XDim3 &dinv, const amrex::XDim3 &xyzmin, const amrex::Dim3 lo)
Kernel for the direct deposition of J and S (mass matrices) for thread thread_num.
Definition MassMatricesDeposition.H:154
@ vz
Definition RigidInjectedParticleContainer.H:27
AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE amrex::ParticleReal GetImplicitGammaInverse(const amrex::ParticleReal uxp_n, const amrex::ParticleReal uyp_n, const amrex::ParticleReal uzp_n, const amrex::ParticleReal uxp_nph, const amrex::ParticleReal uyp_nph, const amrex::ParticleReal uzp_nph) noexcept
Compute the inverse Lorentz factor for the position update in the implicit methods,...
Definition UpdatePosition.H:77
__host__ static __device__ constexpr IntVectND< dim > TheZeroVector() noexcept
amrex_real Real
amrex_particle_real ParticleReal
ArrayND< T, 4, true > Array4
AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE void crop_at_boundary(double &x1, double xmin, double xmax, amrex::GpuArray< bool, 2 > const &do_cropping)
Definition ParticleUtils.H:256
constexpr auto inv_c2_v
inverse of the square of the vacuum speed of light [s^2/m^2] (variable template)
Definition constant.H:153
__host__ __device__ AMREX_FORCE_INLINE void AddNoRet(T *sum, T value) noexcept
__host__ __device__ void ignore_unused(const Ts &...)
void ParallelFor(TypeList< CTOs... > ctos, std::array< int, sizeof...(CTOs)> const &runtime_options, T N, F &&f)
__host__ __device__ BoxND< dim > shift(const BoxND< dim > &b, int dir, int nzones) noexcept
IndexTypeND< 3 > IndexType
IntVectND< 3 > IntVect
Definition ShapeFactors.H:168
Definition ShapeFactors.H:29
Functor class that assigns external field values (E and B) to particles.
Definition GetExternalFields.H:23
AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE bool isNoOp() const
Definition GetExternalFields.H:60
Functor that can be used to extract the positions of the macroparticles inside a ParallelFor kernel.
Definition GetAndSetPosition.H:75