WarpX
BinaryCollision.H
Go to the documentation of this file.
1 /* Copyright 2020-2021 Yinjian Zhao, David Grote, Neil Zaim
2  *
3  * This file is part of WarpX.
4  *
5  * License: BSD-3-Clause-LBNL
6  */
7 #ifndef WARPX_PARTICLES_COLLISION_BINARYCOLLISION_H_
8 #define WARPX_PARTICLES_COLLISION_BINARYCOLLISION_H_
9 
22 #include "Utils/ParticleUtils.H"
23 #include "Utils/TextMsg.H"
25 #include "WarpX.H"
26 
29 
30 #include <AMReX.H>
31 #include <AMReX_Algorithm.H>
32 #include <AMReX_BLassert.H>
33 #include <AMReX_Config.H>
34 #include <AMReX_DenseBins.H>
35 #include <AMReX_Extension.H>
36 #include <AMReX_Geometry.H>
37 #include <AMReX_GpuAtomic.H>
38 #include <AMReX_GpuContainers.H>
39 #include <AMReX_GpuControl.H>
40 #include <AMReX_GpuDevice.H>
41 #include <AMReX_GpuLaunch.H>
42 #include <AMReX_GpuQualifiers.H>
43 #include <AMReX_LayoutData.H>
44 #include <AMReX_MFIter.H>
45 #include <AMReX_PODVector.H>
46 #include <AMReX_ParmParse.H>
47 #include <AMReX_Particles.H>
48 #include <AMReX_ParticleTile.H>
49 #include <AMReX_Random.H>
50 #include <AMReX_REAL.H>
51 #include <AMReX_Scan.H>
52 #include <AMReX_Utility.H>
53 #include <AMReX_Vector.H>
54 
55 #include <AMReX_BaseFwd.H>
56 
57 #include <cmath>
58 #include <string>
59 
69 template <typename CollisionFunctor,
70  typename CopyTransformFunctor = NoParticleCreationFunc>
71 class BinaryCollision final
72  : public CollisionBase
73 {
74  // Define shortcuts for frequently-used type names
80 
81 public:
89  BinaryCollision (std::string collision_name, MultiParticleContainer const * const mypc)
90  : CollisionBase(collision_name)
91  {
92  if(m_species_names.size() != 2) {
93  WARPX_ABORT_WITH_MESSAGE("Binary collision " + collision_name + " must have exactly two species.");
94  }
95 
96  const CollisionType collision_type = BinaryCollisionUtils::get_collision_type(collision_name, mypc);
97 
99 
100  m_binary_collision_functor = CollisionFunctor(collision_name, mypc, m_isSameSpecies);
101 
102  const amrex::ParmParse pp_collision_name(collision_name);
103  pp_collision_name.queryarr("product_species", m_product_species);
104 
105  // if DSMC the colliding species are also product species
106  // Therefore, we insert the colliding species at the beginning of `m_product_species`
107  if (collision_type == CollisionType::DSMC) {
108  m_product_species.insert( m_product_species.begin(), m_species_names.begin(), m_species_names.end() );
109  }
111 
112  if ((std::is_same_v<CopyTransformFunctor, NoParticleCreationFunc>) && (m_have_product_species)) {
113  WARPX_ABORT_WITH_MESSAGE( "Binary collision " + collision_name +
114  " does not produce species. Thus, `product_species` should not be specified in the input script." );
115  }
116  m_copy_transform_functor = CopyTransformFunctor(collision_name, mypc);
117  }
118 
119  ~BinaryCollision () override = default;
120 
121  BinaryCollision ( BinaryCollision const &) = default;
123 
126 
134  void doCollisions (amrex::Real cur_time, amrex::Real dt, MultiParticleContainer* mypc) override
135  {
136  amrex::ignore_unused(cur_time);
137 
139  auto& species2 = mypc->GetParticleContainerFromName(m_species_names[1]);
140 
141  // In case of particle creation, create the necessary vectors
142  const int n_product_species = m_product_species.size();
143  amrex::Vector<WarpXParticleContainer*> product_species_vector;
144  amrex::Vector<SmartCopyFactory> copy_factory_species1;
145  amrex::Vector<SmartCopyFactory> copy_factory_species2;
146  amrex::Vector<SmartCopy> copy_species1;
147  amrex::Vector<SmartCopy> copy_species2;
148  for (int i = 0; i < n_product_species; i++)
149  {
150  auto& product = mypc->GetParticleContainerFromName(m_product_species[i]);
151  product.defineAllParticleTiles();
152  product_species_vector.push_back(&product);
153  // Although the copy factories are not explicitly reused past this point, we need to
154  // store them in vectors so that the data that they own, which is used by the smart
155  // copy functors, does not go out of scope at the end of this for loop.
156  copy_factory_species1.push_back(SmartCopyFactory(species1, product));
157  copy_factory_species2.push_back(SmartCopyFactory(species2, product));
158  copy_species1.push_back(copy_factory_species1[i].getSmartCopy());
159  copy_species2.push_back(copy_factory_species2[i].getSmartCopy());
160  }
161 #ifdef AMREX_USE_GPU
162  amrex::Gpu::DeviceVector<SmartCopy> device_copy_species1(n_product_species);
163  amrex::Gpu::DeviceVector<SmartCopy> device_copy_species2(n_product_species);
164  amrex::Gpu::copyAsync(amrex::Gpu::hostToDevice, copy_species1.begin(),
165  copy_species1.end(), device_copy_species1.begin());
166  amrex::Gpu::copyAsync(amrex::Gpu::hostToDevice, copy_species2.begin(),
167  copy_species2.end(), device_copy_species2.begin());
169  auto *copy_species1_data = device_copy_species1.data();
170  auto *copy_species2_data = device_copy_species2.data();
171 #else
172  auto *copy_species1_data = copy_species1.data();
173  auto *copy_species2_data = copy_species2.data();
174 #endif
176  species1.defineAllParticleTiles();
177  if (!m_isSameSpecies) { species2.defineAllParticleTiles(); }
178  }
179 
180  // Enable tiling
181  amrex::MFItInfo info;
182  if (amrex::Gpu::notInLaunchRegion()) { info.EnableTiling(species1.tile_size); }
183 
184  // Loop over refinement levels
185  for (int lev = 0; lev <= species1.finestLevel(); ++lev){
186 
188 
189  // Loop over all grids/tiles at this level
190 #ifdef AMREX_USE_OMP
191  info.SetDynamic(true);
192 #pragma omp parallel if (amrex::Gpu::notInLaunchRegion())
193 #endif
194  for (amrex::MFIter mfi = species1.MakeMFIter(lev, info); mfi.isValid(); ++mfi){
196  {
198  }
199  auto wt = static_cast<amrex::Real>(amrex::second());
200 
201  doCollisionsWithinTile( dt, lev, mfi, species1, species2, product_species_vector,
202  copy_species1_data, copy_species2_data);
203 
205  {
207  wt = static_cast<amrex::Real>(amrex::second()) - wt;
208  amrex::HostDevice::Atomic::Add( &(*cost)[mfi.index()], wt);
209  }
210  }
211 
213  // The fact that there are product species indicates that particles of
214  // the colliding species (`species1` and `species2`) may be removed
215  // (i.e., marked as invalid) in the process of creating new product particles.
216  species1.deleteInvalidParticles();
217  if (!m_isSameSpecies) { species2.deleteInvalidParticles(); }
218  }
219  }
220  }
221 
235  amrex::Real dt, int const lev, amrex::MFIter const& mfi,
236  WarpXParticleContainer& species_1,
237  WarpXParticleContainer& species_2,
238  amrex::Vector<WarpXParticleContainer*> product_species_vector,
239  SmartCopy* copy_species1, SmartCopy* copy_species2)
240  {
241  using namespace ParticleUtils;
242  using namespace amrex::literals;
243 
244  const auto& binary_collision_functor = m_binary_collision_functor.executor();
245  const bool have_product_species = m_have_product_species;
246 
247  // Store product species data in vectors
248  const int n_product_species = m_product_species.size();
249  amrex::Vector<ParticleTileType*> tile_products;
250  amrex::Vector<GetParticlePosition<PIdx>> get_position_products;
251  amrex::Vector<index_type> products_np;
253  constexpr int getpos_offset = 0;
254  for (int i = 0; i < n_product_species; i++)
255  {
256  ParticleTileType& ptile_product = product_species_vector[i]->ParticlesAt(lev, mfi);
257  tile_products.push_back(&ptile_product);
258  get_position_products.push_back(GetParticlePosition<PIdx>(ptile_product,
259  getpos_offset));
260  products_np.push_back(ptile_product.numParticles());
261  products_mass.push_back(product_species_vector[i]->getMass());
262  }
263  auto *tile_products_data = tile_products.data();
264 
265  if ( m_isSameSpecies ) // species_1 == species_2
266  {
267  // Extract particles in the tile that `mfi` points to
268  ParticleTileType& ptile_1 = species_1.ParticlesAt(lev, mfi);
269 
270  // Find the particles that are in each cell of this tile
271  ParticleBins bins_1 = findParticlesInEachCell( lev, mfi, ptile_1 );
272 
273  // Loop over cells, and collide the particles in each cell
274 
275  // Extract low-level data
276  auto const n_cells = static_cast<int>(bins_1.numBins());
277  // - Species 1
278  const auto soa_1 = ptile_1.getParticleTileData();
279  index_type* AMREX_RESTRICT indices_1 = bins_1.permutationPtr();
280  index_type const* AMREX_RESTRICT cell_offsets_1 = bins_1.offsetsPtr();
281  const amrex::ParticleReal q1 = species_1.getCharge();
282  const amrex::ParticleReal m1 = species_1.getMass();
283  auto get_position_1 = GetParticlePosition<PIdx>(ptile_1, getpos_offset);
284 
285  amrex::Geometry const& geom = WarpX::GetInstance().Geom(lev);
286 #if defined WARPX_DIM_1D_Z
287  auto dV = geom.CellSize(0);
288 #elif defined WARPX_DIM_XZ
289  auto dV = geom.CellSize(0) * geom.CellSize(1);
290 #elif defined WARPX_DIM_RZ
291  amrex::Box const& cbx = mfi.tilebox(amrex::IntVect::TheZeroVector()); //Cell-centered box
292  const auto lo = lbound(cbx);
293  const auto hi = ubound(cbx);
294  int const nz = hi.y-lo.y+1;
295  auto dr = geom.CellSize(0);
296  auto dz = geom.CellSize(1);
297 #elif defined(WARPX_DIM_3D)
298  auto dV = geom.CellSize(0) * geom.CellSize(1) * geom.CellSize(2);
299 #endif
300 
301  /*
302  The following calculations are only required when creating product particles
303  */
304  const int n_cells_products = have_product_species ? n_cells: 0;
305  amrex::Gpu::DeviceVector<index_type> n_pairs_in_each_cell(n_cells_products);
306  index_type* AMREX_RESTRICT p_n_pairs_in_each_cell = n_pairs_in_each_cell.dataPtr();
307 
308  // Compute how many pairs in each cell and store in n_pairs_in_each_cell array
309  // For a single species, the number of pair in a cell is half the number of particles
310  // in that cell, rounded up to the next higher integer.
311  amrex::ParallelFor( n_cells_products,
312  [=] AMREX_GPU_DEVICE (int i_cell) noexcept
313  {
314  const auto n_part_in_cell = cell_offsets_1[i_cell+1] - cell_offsets_1[i_cell];
315  // Particular case: if there's only 1 particle in a cell, then there's no pair
316  p_n_pairs_in_each_cell[i_cell] = (n_part_in_cell == 1)? 0: (n_part_in_cell+1)/2;
317  }
318  );
319 
320  // Start indices of the pairs in a cell. Will be used for particle creation.
321  amrex::Gpu::DeviceVector<index_type> pair_offsets(n_cells_products);
322  const index_type n_total_pairs = (n_cells_products == 0) ? 0:
323  amrex::Scan::ExclusiveSum(n_cells_products,
324  p_n_pairs_in_each_cell, pair_offsets.data());
325  index_type* AMREX_RESTRICT p_pair_offsets = pair_offsets.dataPtr();
326 
327  amrex::Gpu::DeviceVector<index_type> n_ind_pairs_in_each_cell(n_cells+1);
328  index_type* AMREX_RESTRICT p_n_ind_pairs_in_each_cell = n_ind_pairs_in_each_cell.dataPtr();
329 
330  amrex::ParallelFor( n_cells+1,
331  [=] AMREX_GPU_DEVICE (int i_cell) noexcept
332  {
333  const auto n_part_in_cell = (i_cell < n_cells)? cell_offsets_1[i_cell+1] - cell_offsets_1[i_cell]: 0;
334  // number of independent collisions in each cell
335  p_n_ind_pairs_in_each_cell[i_cell] = n_part_in_cell/2;
336  }
337  );
338 
339  // start indices of independent collisions.
340  amrex::Gpu::DeviceVector<index_type> coll_offsets(n_cells+1);
341  // number of total independent collision pairs
342  const auto n_independent_pairs = (int) amrex::Scan::ExclusiveSum(n_cells+1,
343  p_n_ind_pairs_in_each_cell, coll_offsets.data(), amrex::Scan::RetSum{true});
344  index_type* AMREX_RESTRICT p_coll_offsets = coll_offsets.dataPtr();
345 
346  // mask: equal to 1 if particle creation occurs for a given pair, 0 otherwise
349  // Will be filled with the index of the first particle of a given pair
350  amrex::Gpu::DeviceVector<index_type> pair_indices_1(n_total_pairs);
351  index_type* AMREX_RESTRICT p_pair_indices_1 = pair_indices_1.dataPtr();
352  // Will be filled with the index of the second particle of a given pair
353  amrex::Gpu::DeviceVector<index_type> pair_indices_2(n_total_pairs);
354  index_type* AMREX_RESTRICT p_pair_indices_2 = pair_indices_2.dataPtr();
355  // How much weight should be given to the produced particles (and removed from the
356  // reacting particles)
357  amrex::Gpu::DeviceVector<amrex::ParticleReal> pair_reaction_weight(n_total_pairs);
358  amrex::ParticleReal* AMREX_RESTRICT p_pair_reaction_weight =
359  pair_reaction_weight.dataPtr();
360  /*
361  End of calculations only required when creating product particles
362  */
363 
364  // create vectors to store density and temperature on cell level
367  if (binary_collision_functor.m_computeSpeciesDensities) {
368  n1_vec.resize(n_cells);
369  }
370  if (binary_collision_functor.m_computeSpeciesTemperatures) {
371  T1_vec.resize(n_cells);
372  }
373  amrex::ParticleReal* AMREX_RESTRICT n1_in_each_cell = n1_vec.dataPtr();
374  amrex::ParticleReal* AMREX_RESTRICT T1_in_each_cell = T1_vec.dataPtr();
375 
376  // Loop over cells
377  amrex::ParallelForRNG( n_cells,
378  [=] AMREX_GPU_DEVICE (int i_cell, amrex::RandomEngine const& engine) noexcept
379  {
380  // The particles from species1 that are in the cell `i_cell` are
381  // given by the `indices_1[cell_start_1:cell_stop_1]`
382  index_type const cell_start_1 = cell_offsets_1[i_cell];
383  index_type const cell_stop_1 = cell_offsets_1[i_cell+1];
384 
385  // Do not collide if there is only one particle in the cell
386  if ( cell_stop_1 - cell_start_1 <= 1 ) { return; }
387 
388  // compute local density [1/m^3]
389  if (binary_collision_functor.m_computeSpeciesDensities) {
390  amrex::ParticleReal wtot1 = 0.0;
391  amrex::ParticleReal * const AMREX_RESTRICT w1 = soa_1.m_rdata[PIdx::w];
392  for (index_type i1=cell_start_1; i1<cell_stop_1; ++i1) {
393  wtot1 += w1[ indices_1[i1] ];
394  }
395 #if defined WARPX_DIM_RZ
396  const int ri = (i_cell - i_cell%nz) / nz;
397  auto dV = MathConst::pi*(2.0_prt*ri+1.0_prt)*dr*dr*dz;
398 #endif
399  n1_in_each_cell[i_cell] = wtot1/dV;
400  }
401 
402  // compute local temperature [Joules]
403  if (binary_collision_functor.m_computeSpeciesTemperatures) {
404  amrex::ParticleReal * const AMREX_RESTRICT w1 = soa_1.m_rdata[PIdx::w];
405  amrex::ParticleReal * const AMREX_RESTRICT u1x = soa_1.m_rdata[PIdx::ux];
406  amrex::ParticleReal * const AMREX_RESTRICT u1y = soa_1.m_rdata[PIdx::uy];
407  amrex::ParticleReal * const AMREX_RESTRICT u1z = soa_1.m_rdata[PIdx::uz];
408  T1_in_each_cell[i_cell] = ComputeTemperature( cell_start_1, cell_stop_1, indices_1,
409  w1, u1x, u1y, u1z, m1 );
410  }
411 
412  // shuffle
413  ShuffleFisherYates(indices_1, cell_start_1, cell_stop_1, engine);
414  }
415  );
416 
417  // Loop over independent particle pairs
418  // To speed up binary collisions on GPU, we try to expose as much parallelism
419  // as possible (while avoiding race conditions): Instead of looping with one GPU
420  // thread per cell, we loop with one GPU thread per "independent pairs" (i.e. pairs
421  // that do not touch the same macroparticles, so that there is no race condition),
422  // where the number of independent pairs is determined by the lower number of
423  // macroparticles of either species, within each cell.
424  amrex::ParallelForRNG( n_independent_pairs,
425  [=] AMREX_GPU_DEVICE (int i_coll, amrex::RandomEngine const& engine) noexcept
426  {
427  // to avoid type mismatch errors
428  auto ui_coll = (index_type)i_coll;
429 
430  // Use a bisection algorithm to find the index of the cell in which this pair is located
431  const int i_cell = amrex::bisect( p_coll_offsets, 0, n_cells, ui_coll );
432 
433  // The particles from species1 that are in the cell `i_cell` are
434  // given by the `indices_1[cell_start_1:cell_stop_1]`
435  index_type const cell_start_1 = cell_offsets_1[i_cell];
436  index_type const cell_stop_1 = cell_offsets_1[i_cell+1];
437  index_type const cell_half_1 = (cell_start_1+cell_stop_1)/2;
438 
439  // collision number of the cell
440  const index_type coll_idx = ui_coll - p_coll_offsets[i_cell];
441 
442  // Same but for the pairs
443  index_type const cell_start_pair = have_product_species?
444  p_pair_offsets[i_cell] : 0;
445 
446 #if defined WARPX_DIM_RZ
447  const int ri = (i_cell - i_cell%nz) / nz;
448  auto dV = MathConst::pi*(2.0_prt*ri+1.0_prt)*dr*dr*dz;
449 #endif
450 
451  // Get the local density and temperature for this cell
452  amrex::ParticleReal n1 = 0.0;
453  amrex::ParticleReal T1 = 0.0;
454  if (binary_collision_functor.m_computeSpeciesDensities) {
455  n1 = n1_in_each_cell[i_cell];
456  }
457  if (binary_collision_functor.m_computeSpeciesTemperatures) {
458  T1 = T1_in_each_cell[i_cell];
459  }
460 
461  // Call the function in order to perform collisions
462  // If there are product species, mask, p_pair_indices_1/2, and
463  // p_pair_reaction_weight are filled here
464  binary_collision_functor(
465  cell_start_1, cell_half_1,
466  cell_half_1, cell_stop_1,
467  indices_1, indices_1,
468  soa_1, soa_1, get_position_1, get_position_1,
469  n1, n1, T1, T1,
470  q1, q1, m1, m1, dt, dV, coll_idx,
471  cell_start_pair, p_mask, p_pair_indices_1, p_pair_indices_2,
472  p_pair_reaction_weight, engine);
473  }
474  );
475 
476  // Create the new product particles and define their initial values
477  // num_added: how many particles of each product species have been created
478  const amrex::Vector<int> num_added = m_copy_transform_functor(n_total_pairs,
479  ptile_1, ptile_1,
480  product_species_vector,
481  tile_products_data,
482  m1, m1,
483  products_mass, p_mask, products_np,
484  copy_species1, copy_species2,
485  p_pair_indices_1, p_pair_indices_2,
486  p_pair_reaction_weight);
487 
488  for (int i = 0; i < n_product_species; i++)
489  {
490  setNewParticleIDs(*(tile_products_data[i]), static_cast<int>(products_np[i]), num_added[i]);
491  }
492  }
493  else // species_1 != species_2
494  {
495  // Extract particles in the tile that `mfi` points to
496  ParticleTileType& ptile_1 = species_1.ParticlesAt(lev, mfi);
497  ParticleTileType& ptile_2 = species_2.ParticlesAt(lev, mfi);
498 
499  // Find the particles that are in each cell of this tile
500  ParticleBins bins_1 = findParticlesInEachCell( lev, mfi, ptile_1 );
501  ParticleBins bins_2 = findParticlesInEachCell( lev, mfi, ptile_2 );
502 
503  // Loop over cells, and collide the particles in each cell
504 
505  // Extract low-level data
506  auto const n_cells = static_cast<int>(bins_1.numBins());
507  // - Species 1
508  const auto soa_1 = ptile_1.getParticleTileData();
509  index_type* AMREX_RESTRICT indices_1 = bins_1.permutationPtr();
510  index_type const* AMREX_RESTRICT cell_offsets_1 = bins_1.offsetsPtr();
511  const amrex::ParticleReal q1 = species_1.getCharge();
512  const amrex::ParticleReal m1 = species_1.getMass();
513  auto get_position_1 = GetParticlePosition<PIdx>(ptile_1, getpos_offset);
514  // - Species 2
515  const auto soa_2 = ptile_2.getParticleTileData();
516  index_type* AMREX_RESTRICT indices_2 = bins_2.permutationPtr();
517  index_type const* AMREX_RESTRICT cell_offsets_2 = bins_2.offsetsPtr();
518  const amrex::ParticleReal q2 = species_2.getCharge();
519  const amrex::ParticleReal m2 = species_2.getMass();
520  auto get_position_2 = GetParticlePosition<PIdx>(ptile_2, getpos_offset);
521 
522  amrex::Geometry const& geom = WarpX::GetInstance().Geom(lev);
523 #if defined WARPX_DIM_1D_Z
524  auto dV = geom.CellSize(0);
525 #elif defined WARPX_DIM_XZ
526  auto dV = geom.CellSize(0) * geom.CellSize(1);
527 #elif defined WARPX_DIM_RZ
528  amrex::Box const& cbx = mfi.tilebox(amrex::IntVect::TheZeroVector()); //Cell-centered box
529  const auto lo = lbound(cbx);
530  const auto hi = ubound(cbx);
531  const int nz = hi.y-lo.y+1;
532  auto dr = geom.CellSize(0);
533  auto dz = geom.CellSize(1);
534 #elif defined(WARPX_DIM_3D)
535  auto dV = geom.CellSize(0) * geom.CellSize(1) * geom.CellSize(2);
536 #endif
537 
538  /*
539  The following calculations are only required when creating product particles
540  */
541  const int n_cells_products = have_product_species ? n_cells: 0;
542  amrex::Gpu::DeviceVector<index_type> n_pairs_in_each_cell(n_cells_products);
543  index_type* AMREX_RESTRICT p_n_pairs_in_each_cell = n_pairs_in_each_cell.dataPtr();
544 
545  // Compute how many pairs in each cell and store in n_pairs_in_each_cell array
546  // For different species, the number of pairs in a cell is the number of particles of
547  // the species that has the most particles in that cell
548  amrex::ParallelFor( n_cells_products,
549  [=] AMREX_GPU_DEVICE (int i_cell) noexcept
550  {
551  const auto n_part_in_cell_1 = cell_offsets_1[i_cell+1] - cell_offsets_1[i_cell];
552  const auto n_part_in_cell_2 = cell_offsets_2[i_cell+1] - cell_offsets_2[i_cell];
553  // Particular case: no pair if a species has no particle in that cell
554  if (n_part_in_cell_1 == 0 || n_part_in_cell_2 == 0) {
555  p_n_pairs_in_each_cell[i_cell] = 0;
556  } else {
557  p_n_pairs_in_each_cell[i_cell] =
558  amrex::max(n_part_in_cell_1,n_part_in_cell_2);
559  }
560  }
561  );
562 
563  // Start indices of the pairs in a cell. Will be used for particle creation
564  amrex::Gpu::DeviceVector<index_type> pair_offsets(n_cells_products);
565  const index_type n_total_pairs = (n_cells_products == 0) ? 0:
566  amrex::Scan::ExclusiveSum(n_cells_products,
567  p_n_pairs_in_each_cell, pair_offsets.data());
568  index_type* AMREX_RESTRICT p_pair_offsets = pair_offsets.dataPtr();
569 
570  amrex::Gpu::DeviceVector<index_type> n_ind_pairs_in_each_cell(n_cells+1);
571  index_type* AMREX_RESTRICT p_n_ind_pairs_in_each_cell = n_ind_pairs_in_each_cell.dataPtr();
572 
573  amrex::ParallelFor( n_cells+1,
574  [=] AMREX_GPU_DEVICE (int i_cell) noexcept
575  {
576  if (i_cell < n_cells)
577  {
578  const auto n_part_in_cell_1 = cell_offsets_1[i_cell+1] - cell_offsets_1[i_cell];
579  const auto n_part_in_cell_2 = cell_offsets_2[i_cell+1] - cell_offsets_2[i_cell];
580  p_n_ind_pairs_in_each_cell[i_cell] = amrex::min(n_part_in_cell_1, n_part_in_cell_2);
581  }
582  else
583  {
584  p_n_ind_pairs_in_each_cell[i_cell] = 0;
585  }
586  }
587  );
588 
589  // start indices of independent collisions.
590  amrex::Gpu::DeviceVector<index_type> coll_offsets(n_cells+1);
591  // number of total independent collision pairs
592  const auto n_independent_pairs = (int) amrex::Scan::ExclusiveSum(n_cells+1,
593  p_n_ind_pairs_in_each_cell, coll_offsets.data(), amrex::Scan::RetSum{true});
594  index_type* AMREX_RESTRICT p_coll_offsets = coll_offsets.dataPtr();
595 
596  // mask: equal to 1 if particle creation occurs for a given pair, 0 otherwise
599  // Will be filled with the index of the first particle of a given pair
600  amrex::Gpu::DeviceVector<index_type> pair_indices_1(n_total_pairs);
601  index_type* AMREX_RESTRICT p_pair_indices_1 = pair_indices_1.dataPtr();
602  // Will be filled with the index of the second particle of a given pair
603  amrex::Gpu::DeviceVector<index_type> pair_indices_2(n_total_pairs);
604  index_type* AMREX_RESTRICT p_pair_indices_2 = pair_indices_2.dataPtr();
605  // How much weight should be given to the produced particles (and removed from the
606  // reacting particles)
607  amrex::Gpu::DeviceVector<amrex::ParticleReal> pair_reaction_weight(n_total_pairs);
608  amrex::ParticleReal* AMREX_RESTRICT p_pair_reaction_weight =
609  pair_reaction_weight.dataPtr();
610  /*
611  End of calculations only required when creating product particles
612  */
613 
614  // create vectors to store density and temperature on cell level
617  if (binary_collision_functor.m_computeSpeciesDensities) {
618  n1_vec.resize(n_cells);
619  n2_vec.resize(n_cells);
620  }
621  if (binary_collision_functor.m_computeSpeciesTemperatures) {
622  T1_vec.resize(n_cells);
623  T2_vec.resize(n_cells);
624  }
625  amrex::ParticleReal* AMREX_RESTRICT n1_in_each_cell = n1_vec.dataPtr();
626  amrex::ParticleReal* AMREX_RESTRICT n2_in_each_cell = n2_vec.dataPtr();
627  amrex::ParticleReal* AMREX_RESTRICT T1_in_each_cell = T1_vec.dataPtr();
628  amrex::ParticleReal* AMREX_RESTRICT T2_in_each_cell = T2_vec.dataPtr();
629 
630  // Loop over cells
631  amrex::ParallelForRNG( n_cells,
632  [=] AMREX_GPU_DEVICE (int i_cell, amrex::RandomEngine const& engine) noexcept
633  {
634  // The particles from species1 that are in the cell `i_cell` are
635  // given by the `indices_1[cell_start_1:cell_stop_1]`
636  index_type const cell_start_1 = cell_offsets_1[i_cell];
637  index_type const cell_stop_1 = cell_offsets_1[i_cell+1];
638  // Same for species 2
639  index_type const cell_start_2 = cell_offsets_2[i_cell];
640  index_type const cell_stop_2 = cell_offsets_2[i_cell+1];
641 
642  // ux from species1 can be accessed like this:
643  // ux_1[ indices_1[i] ], where i is between
644  // cell_start_1 (inclusive) and cell_start_2 (exclusive)
645 
646  // compute local densities [1/m^3]
647  if (binary_collision_functor.m_computeSpeciesDensities) {
648  amrex::ParticleReal w1tot = 0.0;
649  amrex::ParticleReal w2tot = 0.0;
650  amrex::ParticleReal * const AMREX_RESTRICT w1 = soa_1.m_rdata[PIdx::w];
651  amrex::ParticleReal * const AMREX_RESTRICT w2 = soa_2.m_rdata[PIdx::w];
652  for (index_type i1=cell_start_1; i1<cell_stop_1; ++i1) {
653  w1tot += w1[ indices_1[i1] ];
654  }
655  for (index_type i2=cell_start_2; i2<cell_stop_2; ++i2) {
656  w2tot += w2[ indices_2[i2] ];
657  }
658 #if defined WARPX_DIM_RZ
659  const int ri = (i_cell - i_cell%nz) / nz;
660  auto dV = MathConst::pi*(2.0_prt*ri+1.0_prt)*dr*dr*dz;
661 #endif
662  n1_in_each_cell[i_cell] = w1tot/dV;
663  n2_in_each_cell[i_cell] = w2tot/dV;
664  }
665 
666  // compute local temperatures [Joules]
667  if (binary_collision_functor.m_computeSpeciesTemperatures) {
668  amrex::ParticleReal * const AMREX_RESTRICT w1 = soa_1.m_rdata[PIdx::w];
669  amrex::ParticleReal * const AMREX_RESTRICT u1x = soa_1.m_rdata[PIdx::ux];
670  amrex::ParticleReal * const AMREX_RESTRICT u1y = soa_1.m_rdata[PIdx::uy];
671  amrex::ParticleReal * const AMREX_RESTRICT u1z = soa_1.m_rdata[PIdx::uz];
672  T1_in_each_cell[i_cell] = ComputeTemperature( cell_start_1, cell_stop_1, indices_1,
673  w1, u1x, u1y, u1z, m1 );
674 
675  amrex::ParticleReal * const AMREX_RESTRICT w2 = soa_2.m_rdata[PIdx::w];
676  amrex::ParticleReal * const AMREX_RESTRICT u2x = soa_2.m_rdata[PIdx::ux];
677  amrex::ParticleReal * const AMREX_RESTRICT u2y = soa_2.m_rdata[PIdx::uy];
678  amrex::ParticleReal * const AMREX_RESTRICT u2z = soa_2.m_rdata[PIdx::uz];
679  T2_in_each_cell[i_cell] = ComputeTemperature( cell_start_2, cell_stop_2, indices_2,
680  w2, u2x, u2y, u2z, m2 );
681  }
682 
683  // Do not collide if one species is missing in the cell
684  if ( cell_stop_1 - cell_start_1 < 1 ||
685  cell_stop_2 - cell_start_2 < 1 ) { return; }
686 
687  // shuffle
688  ShuffleFisherYates(indices_1, cell_start_1, cell_stop_1, engine);
689  ShuffleFisherYates(indices_2, cell_start_2, cell_stop_2, engine);
690  }
691  );
692 
693  // Loop over independent particle pairs
694  // To speed up binary collisions on GPU, we try to expose as much parallelism
695  // as possible (while avoiding race conditions): Instead of looping with one GPU
696  // thread per cell, we loop with one GPU thread per "independent pairs" (i.e. pairs
697  // that do not touch the same macroparticles, so that there is no race condition),
698  // where the number of independent pairs is determined by the lower number of
699  // macroparticles of either species, within each cell.
700  amrex::ParallelForRNG( n_independent_pairs,
701  [=] AMREX_GPU_DEVICE (int i_coll, amrex::RandomEngine const& engine) noexcept
702  {
703  // to avoid type mismatch errors
704  auto ui_coll = (index_type)i_coll;
705 
706  // Use a bisection algorithm to find the index of the cell in which this pair is located
707  const int i_cell = amrex::bisect( p_coll_offsets, 0, n_cells, ui_coll );
708 
709  // The particles from species1 that are in the cell `i_cell` are
710  // given by the `indices_1[cell_start_1:cell_stop_1]`
711  index_type const cell_start_1 = cell_offsets_1[i_cell];
712  index_type const cell_stop_1 = cell_offsets_1[i_cell+1];
713  // Same for species 2
714  index_type const cell_start_2 = cell_offsets_2[i_cell];
715  index_type const cell_stop_2 = cell_offsets_2[i_cell+1];
716 
717  // collision number of the cell
718  const index_type coll_idx = ui_coll - p_coll_offsets[i_cell];
719 
720  // Same but for the pairs
721  index_type const cell_start_pair = have_product_species?
722  p_pair_offsets[i_cell]: 0;
723 
724  // ux from species1 can be accessed like this:
725  // ux_1[ indices_1[i] ], where i is between
726  // cell_start_1 (inclusive) and cell_start_2 (exclusive)
727 
728 #if defined WARPX_DIM_RZ
729  const int ri = (i_cell - i_cell%nz) / nz;
730  auto dV = MathConst::pi*(2.0_prt*ri+1.0_prt)*dr*dr*dz;
731 #endif
732 
733  // Get the local densities and temperatures for this cell
734  amrex::ParticleReal n1 = 0.0, n2 = 0.0;
735  amrex::ParticleReal T1 = 0.0, T2 = 0.0;
736  if (binary_collision_functor.m_computeSpeciesDensities) {
737  n1 = n1_in_each_cell[i_cell];
738  n2 = n2_in_each_cell[i_cell];
739  }
740  if (binary_collision_functor.m_computeSpeciesTemperatures) {
741  T1 = T1_in_each_cell[i_cell];
742  T2 = T2_in_each_cell[i_cell];
743  }
744 
745  // Call the function in order to perform collisions
746  // If there are product species, p_mask, p_pair_indices_1/2, and
747  // p_pair_reaction_weight are filled here
748  binary_collision_functor(
749  cell_start_1, cell_stop_1, cell_start_2, cell_stop_2,
750  indices_1, indices_2,
751  soa_1, soa_2, get_position_1, get_position_2,
752  n1, n2, T1, T2,
753  q1, q2, m1, m2, dt, dV, coll_idx,
754  cell_start_pair, p_mask, p_pair_indices_1, p_pair_indices_2,
755  p_pair_reaction_weight, engine);
756  }
757  );
758 
759  // Create the new product particles and define their initial values
760  // num_added: how many particles of each product species have been created
761  const amrex::Vector<int> num_added = m_copy_transform_functor(n_total_pairs,
762  ptile_1, ptile_2,
763  product_species_vector,
764  tile_products_data,
765  m1, m2,
766  products_mass, p_mask, products_np,
767  copy_species1, copy_species2,
768  p_pair_indices_1, p_pair_indices_2,
769  p_pair_reaction_weight);
770 
771  for (int i = 0; i < n_product_species; i++)
772  {
773  setNewParticleIDs(*(tile_products_data[i]), static_cast<int>(products_np[i]), num_added[i]);
774  }
775 
776  } // end if ( m_isSameSpecies)
777 
778  }
779 
780 private:
781 
785  // functor that performs collisions within a cell
786  CollisionFunctor m_binary_collision_functor;
787  // functor that creates new particles and initializes their parameters
788  CopyTransformFunctor m_copy_transform_functor;
789 
790 };
791 
792 #endif // WARPX_PARTICLES_COLLISION_BINARYCOLLISION_H_
#define AMREX_RESTRICT
#define AMREX_GPU_DEVICE
Box cbx
Array4< int const > mask
CollisionType
Definition: BinaryCollisionUtils.H:17
AMREX_GPU_HOST_DEVICE T_R ComputeTemperature(T_index const Is, T_index const Ie, T_index const *AMREX_RESTRICT I, T_R const *AMREX_RESTRICT w, T_R const *AMREX_RESTRICT ux, T_R const *AMREX_RESTRICT uy, T_R const *AMREX_RESTRICT uz, T_R const m)
Definition: ComputeTemperature.H:15
AMREX_GPU_HOST_DEVICE AMREX_INLINE void ShuffleFisherYates(T_index *array, T_index const is, T_index const ie, amrex::RandomEngine const &engine)
Definition: ShuffleFisherYates.H:20
void setNewParticleIDs(PTile &ptile, amrex::Long old_size, amrex::Long num_added)
Sets the ids of newly created particles to the next values.
Definition: SmartUtils.H:52
#define WARPX_ABORT_WITH_MESSAGE(MSG)
Definition: TextMsg.H:15
This class performs generic binary collisions.
Definition: BinaryCollision.H:73
bool m_have_product_species
Definition: BinaryCollision.H:783
ParticleBins::index_type index_type
Definition: BinaryCollision.H:79
CopyTransformFunctor m_copy_transform_functor
Definition: BinaryCollision.H:788
bool m_isSameSpecies
Definition: BinaryCollision.H:782
WarpXParticleContainer::ParticleType ParticleType
Definition: BinaryCollision.H:75
BinaryCollision(std::string collision_name, MultiParticleContainer const *const mypc)
Constructor of the BinaryCollision class.
Definition: BinaryCollision.H:89
~BinaryCollision() override=default
void doCollisions(amrex::Real cur_time, amrex::Real dt, MultiParticleContainer *mypc) override
Definition: BinaryCollision.H:134
void doCollisionsWithinTile(amrex::Real dt, int const lev, amrex::MFIter const &mfi, WarpXParticleContainer &species_1, WarpXParticleContainer &species_2, amrex::Vector< WarpXParticleContainer * > product_species_vector, SmartCopy *copy_species1, SmartCopy *copy_species2)
Definition: BinaryCollision.H:234
BinaryCollision & operator=(BinaryCollision const &)=default
amrex::Vector< std::string > m_product_species
Definition: BinaryCollision.H:784
CollisionFunctor m_binary_collision_functor
Definition: BinaryCollision.H:786
BinaryCollision(BinaryCollision const &)=default
BinaryCollision(BinaryCollision &&)=delete
Definition: CollisionBase.H:18
amrex::Vector< std::string > m_species_names
Definition: CollisionBase.H:36
Definition: MultiParticleContainer.H:66
WarpXParticleContainer & GetParticleContainerFromName(const std::string &name) const
Definition: MultiParticleContainer.cpp:392
This class does nothing and is used as second template parameter for binary collisions that do not cr...
Definition: ParticleCreationFunc.H:284
A factory for creating SmartCopy functors.
Definition: SmartCopy.H:133
static WarpX & GetInstance()
Definition: WarpX.cpp:231
static amrex::LayoutData< amrex::Real > * getCosts(int lev)
Definition: WarpX.cpp:3108
static short load_balance_costs_update_algo
Definition: WarpX.H:191
Definition: WarpXParticleContainer.H:111
void defineAllParticleTiles() noexcept
Definition: WarpXParticleContainer.cpp:1517
amrex::ParticleReal getCharge() const
Definition: WarpXParticleContainer.H:371
amrex::ParticleReal getMass() const
Definition: WarpXParticleContainer.H:373
const Vector< Geometry > & Geom() const noexcept
const Real * CellSize() const noexcept
index_type * offsetsPtr() noexcept
index_type * permutationPtr() noexcept
Long numBins() const noexcept
AMREX_GPU_HOST_DEVICE static constexpr AMREX_FORCE_INLINE IntVectND< dim > TheZeroVector() noexcept
Box tilebox() const noexcept
T * data() noexcept
iterator begin() noexcept
void resize(size_type a_new_size)
T * dataPtr() noexcept
int queryarr(const char *name, std::vector< int > &ref, int start_ix=FIRST, int num_val=ALL) const
ParticleTile< ParticleType, NArrayReal, NArrayInt, Allocator > ParticleTileType
const ParticleTileType & ParticlesAt(int lev, int grid, int tile) const
CollisionType get_collision_type(const std::string &collision_name, MultiParticleContainer const *const mypc)
Definition: BinaryCollisionUtils.cpp:20
Definition: ParticleUtils.cpp:26
ParticleBins findParticlesInEachCell(int lev, MFIter const &mfi, ParticleTileType &ptile)
Find the particles and count the particles that are in each cell. More specifically this function ret...
Definition: ParticleUtils.cpp:40
typename WarpXParticleContainer::ParticleTileType ParticleTileType
Definition: ParticleUtils.cpp:32
typename ParticleBins::index_type index_type
Definition: ParticleUtils.cpp:35
void synchronize() noexcept
void copyAsync(HostToDevice, InIter begin, InIter end, OutIter result) noexcept
static constexpr HostToDevice hostToDevice
void streamSynchronize() noexcept
bool notInLaunchRegion() noexcept
AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE void Add(T *const sum, T const value) noexcept
T ExclusiveSum(N n, T const *in, T *out, RetSum a_ret_sum=retSum)
AMREX_GPU_HOST_DEVICE constexpr AMREX_FORCE_INLINE const T & max(const T &a, const T &b) noexcept
AMREX_GPU_HOST_DEVICE constexpr AMREX_FORCE_INLINE const T & min(const T &a, const T &b) noexcept
AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE Dim3 ubound(Array4< T > const &a) noexcept
AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE T bisect(T lo, T hi, F f, T tol=1e-12, int max_iter=100)
double second() noexcept
AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE Dim3 lbound(Array4< T > const &a) noexcept
AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE void ignore_unused(const Ts &...)
const int[]
std::enable_if_t< std::is_integral_v< T > > ParallelFor(TypeList< CTOs... >, std::array< int, sizeof...(CTOs)> const &runtime_options, T N, F &&f)
AMREX_ATTRIBUTE_FLATTEN_FOR void ParallelForRNG(T n, L const &f) noexcept
i
Definition: check_interp_points_and_weights.py:174
int dz
Definition: compute_domain.py:36
float dt
Definition: stencil.py:442
string species1
Definition: video_yt.py:35
@ Timers
load balance according to in-code timer-based weights (i.e., with costs)
Definition: WarpXAlgorithmSelection.H:137
@ uz
Definition: NamedComponentParticleContainer.H:34
@ w
weight
Definition: NamedComponentParticleContainer.H:33
@ uy
Definition: NamedComponentParticleContainer.H:34
@ ux
Definition: NamedComponentParticleContainer.H:34
This is a functor for performing a "smart copy" that works in both host and device code.
Definition: SmartCopy.H:34
AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE int const * dataPtr() const noexcept
MFItInfo & EnableTiling(const IntVect &ts=FabArrayBase::mfiter_tile_size) noexcept
MFItInfo & SetDynamic(bool f) noexcept
ParticleTileData< StorageParticleType, NArrayReal, NArrayInt > ParticleTileDataType
ParticleTileDataType getParticleTileData()
int numParticles() const