WarpX
BinaryCollision.H
Go to the documentation of this file.
1 /* Copyright 2020-2021 Yinjian Zhao, David Grote, Neil Zaim
2  *
3  * This file is part of WarpX.
4  *
5  * License: BSD-3-Clause-LBNL
6  */
7 #ifndef WARPX_PARTICLES_COLLISION_BINARYCOLLISION_H_
8 #define WARPX_PARTICLES_COLLISION_BINARYCOLLISION_H_
9 
21 #include "Utils/ParticleUtils.H"
22 #include "Utils/TextMsg.H"
24 #include "WarpX.H"
25 
28 
29 #include <AMReX.H>
30 #include <AMReX_Algorithm.H>
31 #include <AMReX_BLassert.H>
32 #include <AMReX_Config.H>
33 #include <AMReX_DenseBins.H>
34 #include <AMReX_Extension.H>
35 #include <AMReX_Geometry.H>
36 #include <AMReX_GpuAtomic.H>
37 #include <AMReX_GpuContainers.H>
38 #include <AMReX_GpuControl.H>
39 #include <AMReX_GpuDevice.H>
40 #include <AMReX_GpuLaunch.H>
41 #include <AMReX_GpuQualifiers.H>
42 #include <AMReX_LayoutData.H>
43 #include <AMReX_MFIter.H>
44 #include <AMReX_PODVector.H>
45 #include <AMReX_ParmParse.H>
46 #include <AMReX_Particles.H>
47 #include <AMReX_ParticleTile.H>
48 #include <AMReX_Random.H>
49 #include <AMReX_REAL.H>
50 #include <AMReX_Scan.H>
51 #include <AMReX_Utility.H>
52 #include <AMReX_Vector.H>
53 
54 #include <AMReX_BaseFwd.H>
55 
56 #include <cmath>
57 #include <string>
58 
68 template <typename CollisionFunctor,
69  typename CopyTransformFunctor = NoParticleCreationFunc>
70 class BinaryCollision final
71  : public CollisionBase
72 {
73  // Define shortcuts for frequently-used type names
79 
80 public:
88  BinaryCollision (std::string collision_name, MultiParticleContainer const * const mypc)
89  : CollisionBase(collision_name)
90  {
91  if(m_species_names.size() != 2) {
92  WARPX_ABORT_WITH_MESSAGE("Binary collision " + collision_name + " must have exactly two species.");
93  }
94 
95  const CollisionType collision_type = BinaryCollisionUtils::get_collision_type(collision_name, mypc);
96 
98 
99  m_binary_collision_functor = CollisionFunctor(collision_name, mypc, m_isSameSpecies);
100 
101  const amrex::ParmParse pp_collision_name(collision_name);
102  pp_collision_name.queryarr("product_species", m_product_species);
103 
104  // if DSMC the colliding species are also product species
105  // Therefore, we insert the colliding species at the beginning of `m_product_species`
106  if (collision_type == CollisionType::DSMC) {
107  m_product_species.insert( m_product_species.begin(), m_species_names.begin(), m_species_names.end() );
108  }
110 
112  WARPX_ABORT_WITH_MESSAGE( "Binary collision " + collision_name +
113  " does not produce species. Thus, `product_species` should not be specified in the input script." );
114  }
115  m_copy_transform_functor = CopyTransformFunctor(collision_name, mypc);
116  }
117 
118  ~BinaryCollision () override = default;
119 
120  BinaryCollision ( BinaryCollision const &) = default;
122 
125 
133  void doCollisions (amrex::Real cur_time, amrex::Real dt, MultiParticleContainer* mypc) override
134  {
135  amrex::ignore_unused(cur_time);
136 
138  auto& species2 = mypc->GetParticleContainerFromName(m_species_names[1]);
139 
140  // In case of particle creation, create the necessary vectors
141  const int n_product_species = m_product_species.size();
142  amrex::Vector<WarpXParticleContainer*> product_species_vector;
143  amrex::Vector<SmartCopyFactory> copy_factory_species1;
144  amrex::Vector<SmartCopyFactory> copy_factory_species2;
145  amrex::Vector<SmartCopy> copy_species1;
146  amrex::Vector<SmartCopy> copy_species2;
147  for (int i = 0; i < n_product_species; i++)
148  {
149  auto& product = mypc->GetParticleContainerFromName(m_product_species[i]);
150  product.defineAllParticleTiles();
151  product_species_vector.push_back(&product);
152  // Although the copy factories are not explicitly reused past this point, we need to
153  // store them in vectors so that the data that they own, which is used by the smart
154  // copy functors, does not go out of scope at the end of this for loop.
155  copy_factory_species1.push_back(SmartCopyFactory(species1, product));
156  copy_factory_species2.push_back(SmartCopyFactory(species2, product));
157  copy_species1.push_back(copy_factory_species1[i].getSmartCopy());
158  copy_species2.push_back(copy_factory_species2[i].getSmartCopy());
159  }
160 #ifdef AMREX_USE_GPU
161  amrex::Gpu::DeviceVector<SmartCopy> device_copy_species1(n_product_species);
162  amrex::Gpu::DeviceVector<SmartCopy> device_copy_species2(n_product_species);
163  amrex::Gpu::copyAsync(amrex::Gpu::hostToDevice, copy_species1.begin(),
164  copy_species1.end(), device_copy_species1.begin());
165  amrex::Gpu::copyAsync(amrex::Gpu::hostToDevice, copy_species2.begin(),
166  copy_species2.end(), device_copy_species2.begin());
168  auto *copy_species1_data = device_copy_species1.data();
169  auto *copy_species2_data = device_copy_species2.data();
170 #else
171  auto *copy_species1_data = copy_species1.data();
172  auto *copy_species2_data = copy_species2.data();
173 #endif
175  species1.defineAllParticleTiles();
176  if (!m_isSameSpecies) { species2.defineAllParticleTiles(); }
177  }
178 
179  // Enable tiling
180  amrex::MFItInfo info;
181  if (amrex::Gpu::notInLaunchRegion()) { info.EnableTiling(species1.tile_size); }
182 
183  // Loop over refinement levels
184  for (int lev = 0; lev <= species1.finestLevel(); ++lev){
185 
187 
188  // Loop over all grids/tiles at this level
189 #ifdef AMREX_USE_OMP
190  info.SetDynamic(true);
191 #pragma omp parallel if (amrex::Gpu::notInLaunchRegion())
192 #endif
193  for (amrex::MFIter mfi = species1.MakeMFIter(lev, info); mfi.isValid(); ++mfi){
195  {
197  }
198  auto wt = static_cast<amrex::Real>(amrex::second());
199 
200  doCollisionsWithinTile( dt, lev, mfi, species1, species2, product_species_vector,
201  copy_species1_data, copy_species2_data);
202 
204  {
206  wt = static_cast<amrex::Real>(amrex::second()) - wt;
207  amrex::HostDevice::Atomic::Add( &(*cost)[mfi.index()], wt);
208  }
209  }
210 
212  // The fact that there are product species indicates that particles of
213  // the colliding species (`species1` and `species2`) may be removed
214  // (i.e., marked as invalid) in the process of creating new product particles.
215  species1.deleteInvalidParticles();
216  if (!m_isSameSpecies) { species2.deleteInvalidParticles(); }
217  }
218  }
219  }
220 
234  amrex::Real dt, int const lev, amrex::MFIter const& mfi,
235  WarpXParticleContainer& species_1,
236  WarpXParticleContainer& species_2,
237  amrex::Vector<WarpXParticleContainer*> product_species_vector,
238  SmartCopy* copy_species1, SmartCopy* copy_species2)
239  {
240  using namespace ParticleUtils;
241  using namespace amrex::literals;
242 
243  const auto& binary_collision_functor = m_binary_collision_functor.executor();
244  const bool have_product_species = m_have_product_species;
245 
246  // Store product species data in vectors
247  const int n_product_species = m_product_species.size();
248  amrex::Vector<ParticleTileType*> tile_products;
249  amrex::Vector<GetParticlePosition<PIdx>> get_position_products;
250  amrex::Vector<index_type> products_np;
252  constexpr int getpos_offset = 0;
253  for (int i = 0; i < n_product_species; i++)
254  {
255  ParticleTileType& ptile_product = product_species_vector[i]->ParticlesAt(lev, mfi);
256  tile_products.push_back(&ptile_product);
257  get_position_products.push_back(GetParticlePosition<PIdx>(ptile_product,
258  getpos_offset));
259  products_np.push_back(ptile_product.numParticles());
260  products_mass.push_back(product_species_vector[i]->getMass());
261  }
262  auto *tile_products_data = tile_products.data();
263 
264  if ( m_isSameSpecies ) // species_1 == species_2
265  {
266  // Extract particles in the tile that `mfi` points to
267  ParticleTileType& ptile_1 = species_1.ParticlesAt(lev, mfi);
268 
269  // Find the particles that are in each cell of this tile
270  ParticleBins bins_1 = findParticlesInEachCell( lev, mfi, ptile_1 );
271 
272  // Loop over cells, and collide the particles in each cell
273 
274  // Extract low-level data
275  auto const n_cells = static_cast<int>(bins_1.numBins());
276  // - Species 1
277  const auto soa_1 = ptile_1.getParticleTileData();
278  index_type* AMREX_RESTRICT indices_1 = bins_1.permutationPtr();
279  index_type const* AMREX_RESTRICT cell_offsets_1 = bins_1.offsetsPtr();
280  const amrex::ParticleReal q1 = species_1.getCharge();
281  const amrex::ParticleReal m1 = species_1.getMass();
282  auto get_position_1 = GetParticlePosition<PIdx>(ptile_1, getpos_offset);
283 
284  amrex::Geometry const& geom = WarpX::GetInstance().Geom(lev);
285 #if defined WARPX_DIM_1D_Z
286  auto dV = geom.CellSize(0);
287 #elif defined WARPX_DIM_XZ
288  auto dV = geom.CellSize(0) * geom.CellSize(1);
289 #elif defined WARPX_DIM_RZ
290  amrex::Box const& cbx = mfi.tilebox(amrex::IntVect::TheZeroVector()); //Cell-centered box
291  const auto lo = lbound(cbx);
292  const auto hi = ubound(cbx);
293  int const nz = hi.y-lo.y+1;
294  auto dr = geom.CellSize(0);
295  auto dz = geom.CellSize(1);
296 #elif defined(WARPX_DIM_3D)
297  auto dV = geom.CellSize(0) * geom.CellSize(1) * geom.CellSize(2);
298 #endif
299 
300  /*
301  The following calculations are only required when creating product particles
302  */
303  const int n_cells_products = have_product_species ? n_cells: 0;
304  amrex::Gpu::DeviceVector<index_type> n_pairs_in_each_cell(n_cells_products);
305  index_type* AMREX_RESTRICT p_n_pairs_in_each_cell = n_pairs_in_each_cell.dataPtr();
306 
307  // Compute how many pairs in each cell and store in n_pairs_in_each_cell array
308  // For a single species, the number of pair in a cell is half the number of particles
309  // in that cell, rounded up to the next higher integer.
310  amrex::ParallelFor( n_cells_products,
311  [=] AMREX_GPU_DEVICE (int i_cell) noexcept
312  {
313  const auto n_part_in_cell = cell_offsets_1[i_cell+1] - cell_offsets_1[i_cell];
314  // Particular case: if there's only 1 particle in a cell, then there's no pair
315  p_n_pairs_in_each_cell[i_cell] = (n_part_in_cell == 1)? 0: (n_part_in_cell+1)/2;
316  }
317  );
318 
319  // Start indices of the pairs in a cell. Will be used for particle creation.
320  amrex::Gpu::DeviceVector<index_type> pair_offsets(n_cells_products);
321  const index_type n_total_pairs = (n_cells_products == 0) ? 0:
322  amrex::Scan::ExclusiveSum(n_cells_products,
323  p_n_pairs_in_each_cell, pair_offsets.data());
324  index_type* AMREX_RESTRICT p_pair_offsets = pair_offsets.dataPtr();
325 
326  amrex::Gpu::DeviceVector<index_type> n_ind_pairs_in_each_cell(n_cells+1);
327  index_type* AMREX_RESTRICT p_n_ind_pairs_in_each_cell = n_ind_pairs_in_each_cell.dataPtr();
328 
329  amrex::ParallelFor( n_cells+1,
330  [=] AMREX_GPU_DEVICE (int i_cell) noexcept
331  {
332  const auto n_part_in_cell = (i_cell < n_cells)? cell_offsets_1[i_cell+1] - cell_offsets_1[i_cell]: 0;
333  // number of independent collisions in each cell
334  p_n_ind_pairs_in_each_cell[i_cell] = n_part_in_cell/2;
335  }
336  );
337 
338  // start indices of independent collisions.
339  amrex::Gpu::DeviceVector<index_type> coll_offsets(n_cells+1);
340  // number of total independent collision pairs
341  const auto n_independent_pairs = (int) amrex::Scan::ExclusiveSum(n_cells+1,
342  p_n_ind_pairs_in_each_cell, coll_offsets.data(), amrex::Scan::RetSum{true});
343  index_type* AMREX_RESTRICT p_coll_offsets = coll_offsets.dataPtr();
344 
345  // mask: equal to 1 if particle creation occurs for a given pair, 0 otherwise
348  // Will be filled with the index of the first particle of a given pair
349  amrex::Gpu::DeviceVector<index_type> pair_indices_1(n_total_pairs);
350  index_type* AMREX_RESTRICT p_pair_indices_1 = pair_indices_1.dataPtr();
351  // Will be filled with the index of the second particle of a given pair
352  amrex::Gpu::DeviceVector<index_type> pair_indices_2(n_total_pairs);
353  index_type* AMREX_RESTRICT p_pair_indices_2 = pair_indices_2.dataPtr();
354  // How much weight should be given to the produced particles (and removed from the
355  // reacting particles)
356  amrex::Gpu::DeviceVector<amrex::ParticleReal> pair_reaction_weight(n_total_pairs);
357  amrex::ParticleReal* AMREX_RESTRICT p_pair_reaction_weight =
358  pair_reaction_weight.dataPtr();
359  /*
360  End of calculations only required when creating product particles
361  */
362 
363  // Loop over cells
364  amrex::ParallelForRNG( n_cells,
365  [=] AMREX_GPU_DEVICE (int i_cell, amrex::RandomEngine const& engine) noexcept
366  {
367  // The particles from species1 that are in the cell `i_cell` are
368  // given by the `indices_1[cell_start_1:cell_stop_1]`
369  index_type const cell_start_1 = cell_offsets_1[i_cell];
370  index_type const cell_stop_1 = cell_offsets_1[i_cell+1];
371  index_type const cell_half_1 = (cell_start_1+cell_stop_1)/2;
372 
373  // Do not collide if there is only one particle in the cell
374  if ( cell_stop_1 - cell_start_1 <= 1 ) { return; }
375 
376  // shuffle
378  indices_1, cell_start_1, cell_half_1, engine );
379  }
380  );
381 
382  // Loop over independent particle pairs
383  // To speed up binary collisions on GPU, we try to expose as much parallelism
384  // as possible (while avoiding race conditions): Instead of looping with one GPU
385  // thread per cell, we loop with one GPU thread per "independent pairs" (i.e. pairs
386  // that do not touch the same macroparticles, so that there is no race condition),
387  // where the number of independent pairs is determined by the lower number of
388  // macroparticles of either species, within each cell.
389  amrex::ParallelForRNG( n_independent_pairs,
390  [=] AMREX_GPU_DEVICE (int i_coll, amrex::RandomEngine const& engine) noexcept
391  {
392  // to avoid type mismatch errors
393  auto ui_coll = (index_type)i_coll;
394 
395  // Use a bisection algorithm to find the index of the cell in which this pair is located
396  const int i_cell = amrex::bisect( p_coll_offsets, 0, n_cells, ui_coll );
397 
398  // The particles from species1 that are in the cell `i_cell` are
399  // given by the `indices_1[cell_start_1:cell_stop_1]`
400  index_type const cell_start_1 = cell_offsets_1[i_cell];
401  index_type const cell_stop_1 = cell_offsets_1[i_cell+1];
402  index_type const cell_half_1 = (cell_start_1+cell_stop_1)/2;
403 
404  // collision number of the cell
405  const index_type coll_idx = ui_coll - p_coll_offsets[i_cell];
406 
407  // Same but for the pairs
408  index_type const cell_start_pair = have_product_species?
409  p_pair_offsets[i_cell] : 0;
410 
411 #if defined WARPX_DIM_RZ
412  const int ri = (i_cell - i_cell%nz) / nz;
413  auto dV = MathConst::pi*(2.0_prt*ri+1.0_prt)*dr*dr*dz;
414 #endif
415  // Call the function in order to perform collisions
416  // If there are product species, mask, p_pair_indices_1/2, and
417  // p_pair_reaction_weight are filled here
418  binary_collision_functor(
419  cell_start_1, cell_half_1,
420  cell_half_1, cell_stop_1,
421  indices_1, indices_1,
422  soa_1, soa_1, get_position_1, get_position_1,
423  q1, q1, m1, m1, dt, dV, coll_idx,
424  cell_start_pair, p_mask, p_pair_indices_1, p_pair_indices_2,
425  p_pair_reaction_weight, engine);
426  }
427  );
428 
429  // Create the new product particles and define their initial values
430  // num_added: how many particles of each product species have been created
431  const amrex::Vector<int> num_added = m_copy_transform_functor(n_total_pairs,
432  ptile_1, ptile_1,
433  product_species_vector,
434  tile_products_data,
435  m1, m1,
436  products_mass, p_mask, products_np,
437  copy_species1, copy_species2,
438  p_pair_indices_1, p_pair_indices_2,
439  p_pair_reaction_weight);
440 
441  for (int i = 0; i < n_product_species; i++)
442  {
443  setNewParticleIDs(*(tile_products_data[i]), static_cast<int>(products_np[i]), num_added[i]);
444  }
445  }
446  else // species_1 != species_2
447  {
448  // Extract particles in the tile that `mfi` points to
449  ParticleTileType& ptile_1 = species_1.ParticlesAt(lev, mfi);
450  ParticleTileType& ptile_2 = species_2.ParticlesAt(lev, mfi);
451 
452  // Find the particles that are in each cell of this tile
453  ParticleBins bins_1 = findParticlesInEachCell( lev, mfi, ptile_1 );
454  ParticleBins bins_2 = findParticlesInEachCell( lev, mfi, ptile_2 );
455 
456  // Loop over cells, and collide the particles in each cell
457 
458  // Extract low-level data
459  auto const n_cells = static_cast<int>(bins_1.numBins());
460  // - Species 1
461  const auto soa_1 = ptile_1.getParticleTileData();
462  index_type* AMREX_RESTRICT indices_1 = bins_1.permutationPtr();
463  index_type const* AMREX_RESTRICT cell_offsets_1 = bins_1.offsetsPtr();
464  const amrex::ParticleReal q1 = species_1.getCharge();
465  const amrex::ParticleReal m1 = species_1.getMass();
466  auto get_position_1 = GetParticlePosition<PIdx>(ptile_1, getpos_offset);
467  // - Species 2
468  const auto soa_2 = ptile_2.getParticleTileData();
469  index_type* AMREX_RESTRICT indices_2 = bins_2.permutationPtr();
470  index_type const* AMREX_RESTRICT cell_offsets_2 = bins_2.offsetsPtr();
471  const amrex::ParticleReal q2 = species_2.getCharge();
472  const amrex::ParticleReal m2 = species_2.getMass();
473  auto get_position_2 = GetParticlePosition<PIdx>(ptile_2, getpos_offset);
474 
475  amrex::Geometry const& geom = WarpX::GetInstance().Geom(lev);
476 #if defined WARPX_DIM_1D_Z
477  auto dV = geom.CellSize(0);
478 #elif defined WARPX_DIM_XZ
479  auto dV = geom.CellSize(0) * geom.CellSize(1);
480 #elif defined WARPX_DIM_RZ
481  amrex::Box const& cbx = mfi.tilebox(amrex::IntVect::TheZeroVector()); //Cell-centered box
482  const auto lo = lbound(cbx);
483  const auto hi = ubound(cbx);
484  const int nz = hi.y-lo.y+1;
485  auto dr = geom.CellSize(0);
486  auto dz = geom.CellSize(1);
487 #elif defined(WARPX_DIM_3D)
488  auto dV = geom.CellSize(0) * geom.CellSize(1) * geom.CellSize(2);
489 #endif
490 
491  /*
492  The following calculations are only required when creating product particles
493  */
494  const int n_cells_products = have_product_species ? n_cells: 0;
495  amrex::Gpu::DeviceVector<index_type> n_pairs_in_each_cell(n_cells_products);
496  index_type* AMREX_RESTRICT p_n_pairs_in_each_cell = n_pairs_in_each_cell.dataPtr();
497 
498  // Compute how many pairs in each cell and store in n_pairs_in_each_cell array
499  // For different species, the number of pairs in a cell is the number of particles of
500  // the species that has the most particles in that cell
501  amrex::ParallelFor( n_cells_products,
502  [=] AMREX_GPU_DEVICE (int i_cell) noexcept
503  {
504  const auto n_part_in_cell_1 = cell_offsets_1[i_cell+1] - cell_offsets_1[i_cell];
505  const auto n_part_in_cell_2 = cell_offsets_2[i_cell+1] - cell_offsets_2[i_cell];
506  // Particular case: no pair if a species has no particle in that cell
507  if (n_part_in_cell_1 == 0 || n_part_in_cell_2 == 0) {
508  p_n_pairs_in_each_cell[i_cell] = 0;
509  } else {
510  p_n_pairs_in_each_cell[i_cell] =
511  amrex::max(n_part_in_cell_1,n_part_in_cell_2);
512  }
513  }
514  );
515 
516  // Start indices of the pairs in a cell. Will be used for particle creation
517  amrex::Gpu::DeviceVector<index_type> pair_offsets(n_cells_products);
518  const index_type n_total_pairs = (n_cells_products == 0) ? 0:
519  amrex::Scan::ExclusiveSum(n_cells_products,
520  p_n_pairs_in_each_cell, pair_offsets.data());
521  index_type* AMREX_RESTRICT p_pair_offsets = pair_offsets.dataPtr();
522 
523  amrex::Gpu::DeviceVector<index_type> n_ind_pairs_in_each_cell(n_cells+1);
524  index_type* AMREX_RESTRICT p_n_ind_pairs_in_each_cell = n_ind_pairs_in_each_cell.dataPtr();
525 
526  amrex::ParallelFor( n_cells+1,
527  [=] AMREX_GPU_DEVICE (int i_cell) noexcept
528  {
529  if (i_cell < n_cells)
530  {
531  const auto n_part_in_cell_1 = cell_offsets_1[i_cell+1] - cell_offsets_1[i_cell];
532  const auto n_part_in_cell_2 = cell_offsets_2[i_cell+1] - cell_offsets_2[i_cell];
533  p_n_ind_pairs_in_each_cell[i_cell] = amrex::min(n_part_in_cell_1, n_part_in_cell_2);
534  }
535  else
536  {
537  p_n_ind_pairs_in_each_cell[i_cell] = 0;
538  }
539  }
540  );
541 
542  // start indices of independent collisions.
543  amrex::Gpu::DeviceVector<index_type> coll_offsets(n_cells+1);
544  // number of total independent collision pairs
545  const auto n_independent_pairs = (int) amrex::Scan::ExclusiveSum(n_cells+1,
546  p_n_ind_pairs_in_each_cell, coll_offsets.data(), amrex::Scan::RetSum{true});
547  index_type* AMREX_RESTRICT p_coll_offsets = coll_offsets.dataPtr();
548 
549  // mask: equal to 1 if particle creation occurs for a given pair, 0 otherwise
552  // Will be filled with the index of the first particle of a given pair
553  amrex::Gpu::DeviceVector<index_type> pair_indices_1(n_total_pairs);
554  index_type* AMREX_RESTRICT p_pair_indices_1 = pair_indices_1.dataPtr();
555  // Will be filled with the index of the second particle of a given pair
556  amrex::Gpu::DeviceVector<index_type> pair_indices_2(n_total_pairs);
557  index_type* AMREX_RESTRICT p_pair_indices_2 = pair_indices_2.dataPtr();
558  // How much weight should be given to the produced particles (and removed from the
559  // reacting particles)
560  amrex::Gpu::DeviceVector<amrex::ParticleReal> pair_reaction_weight(n_total_pairs);
561  amrex::ParticleReal* AMREX_RESTRICT p_pair_reaction_weight =
562  pair_reaction_weight.dataPtr();
563  /*
564  End of calculations only required when creating product particles
565  */
566 
567 
568  // Loop over cells
569  amrex::ParallelForRNG( n_cells,
570  [=] AMREX_GPU_DEVICE (int i_cell, amrex::RandomEngine const& engine) noexcept
571  {
572  // The particles from species1 that are in the cell `i_cell` are
573  // given by the `indices_1[cell_start_1:cell_stop_1]`
574  index_type const cell_start_1 = cell_offsets_1[i_cell];
575  index_type const cell_stop_1 = cell_offsets_1[i_cell+1];
576  // Same for species 2
577  index_type const cell_start_2 = cell_offsets_2[i_cell];
578  index_type const cell_stop_2 = cell_offsets_2[i_cell+1];
579 
580  // ux from species1 can be accessed like this:
581  // ux_1[ indices_1[i] ], where i is between
582  // cell_start_1 (inclusive) and cell_start_2 (exclusive)
583 
584  // Do not collide if one species is missing in the cell
585  if ( cell_stop_1 - cell_start_1 < 1 ||
586  cell_stop_2 - cell_start_2 < 1 ) { return; }
587 
588  // shuffle
589  ShuffleFisherYates(indices_1, cell_start_1, cell_stop_1, engine);
590  ShuffleFisherYates(indices_2, cell_start_2, cell_stop_2, engine);
591  }
592  );
593 
594  // Loop over independent particle pairs
595  // To speed up binary collisions on GPU, we try to expose as much parallelism
596  // as possible (while avoiding race conditions): Instead of looping with one GPU
597  // thread per cell, we loop with one GPU thread per "independent pairs" (i.e. pairs
598  // that do not touch the same macroparticles, so that there is no race condition),
599  // where the number of independent pairs is determined by the lower number of
600  // macroparticles of either species, within each cell.
601  amrex::ParallelForRNG( n_independent_pairs,
602  [=] AMREX_GPU_DEVICE (int i_coll, amrex::RandomEngine const& engine) noexcept
603  {
604  // to avoid type mismatch errors
605  auto ui_coll = (index_type)i_coll;
606 
607  // Use a bisection algorithm to find the index of the cell in which this pair is located
608  const int i_cell = amrex::bisect( p_coll_offsets, 0, n_cells, ui_coll );
609 
610  // The particles from species1 that are in the cell `i_cell` are
611  // given by the `indices_1[cell_start_1:cell_stop_1]`
612  index_type const cell_start_1 = cell_offsets_1[i_cell];
613  index_type const cell_stop_1 = cell_offsets_1[i_cell+1];
614  // Same for species 2
615  index_type const cell_start_2 = cell_offsets_2[i_cell];
616  index_type const cell_stop_2 = cell_offsets_2[i_cell+1];
617 
618  // collision number of the cell
619  const index_type coll_idx = ui_coll - p_coll_offsets[i_cell];
620 
621  // Same but for the pairs
622  index_type const cell_start_pair = have_product_species?
623  p_pair_offsets[i_cell]: 0;
624 
625  // ux from species1 can be accessed like this:
626  // ux_1[ indices_1[i] ], where i is between
627  // cell_start_1 (inclusive) and cell_start_2 (exclusive)
628 
629 #if defined WARPX_DIM_RZ
630  const int ri = (i_cell - i_cell%nz) / nz;
631  auto dV = MathConst::pi*(2.0_prt*ri+1.0_prt)*dr*dr*dz;
632 #endif
633  // Call the function in order to perform collisions
634  // If there are product species, p_mask, p_pair_indices_1/2, and
635  // p_pair_reaction_weight are filled here
636  binary_collision_functor(
637  cell_start_1, cell_stop_1, cell_start_2, cell_stop_2,
638  indices_1, indices_2,
639  soa_1, soa_2, get_position_1, get_position_2,
640  q1, q2, m1, m2, dt, dV, coll_idx,
641  cell_start_pair, p_mask, p_pair_indices_1, p_pair_indices_2,
642  p_pair_reaction_weight, engine);
643  }
644  );
645 
646  // Create the new product particles and define their initial values
647  // num_added: how many particles of each product species have been created
648  const amrex::Vector<int> num_added = m_copy_transform_functor(n_total_pairs,
649  ptile_1, ptile_2,
650  product_species_vector,
651  tile_products_data,
652  m1, m2,
653  products_mass, p_mask, products_np,
654  copy_species1, copy_species2,
655  p_pair_indices_1, p_pair_indices_2,
656  p_pair_reaction_weight);
657 
658  for (int i = 0; i < n_product_species; i++)
659  {
660  setNewParticleIDs(*(tile_products_data[i]), static_cast<int>(products_np[i]), num_added[i]);
661  }
662 
663  } // end if ( m_isSameSpecies)
664 
665  }
666 
667 private:
668 
672  // functor that performs collisions within a cell
673  CollisionFunctor m_binary_collision_functor;
674  // functor that creates new particles and initializes their parameters
675  CopyTransformFunctor m_copy_transform_functor;
676 
677 };
678 
679 #endif // WARPX_PARTICLES_COLLISION_BINARYCOLLISION_H_
#define AMREX_RESTRICT
#define AMREX_GPU_DEVICE
Array4< int const > mask
CollisionType
Definition: BinaryCollisionUtils.H:17
AMREX_GPU_HOST_DEVICE AMREX_INLINE void ShuffleFisherYates(T_index *array, T_index const is, T_index const ie, amrex::RandomEngine const &engine)
Definition: ShuffleFisherYates.H:20
void setNewParticleIDs(PTile &ptile, amrex::Long old_size, amrex::Long num_added)
Sets the ids of newly created particles to the next values.
Definition: SmartUtils.H:52
#define WARPX_ABORT_WITH_MESSAGE(MSG)
Definition: TextMsg.H:15
This class performs generic binary collisions.
Definition: BinaryCollision.H:72
bool m_have_product_species
Definition: BinaryCollision.H:670
ParticleBins::index_type index_type
Definition: BinaryCollision.H:78
CopyTransformFunctor m_copy_transform_functor
Definition: BinaryCollision.H:675
bool m_isSameSpecies
Definition: BinaryCollision.H:669
WarpXParticleContainer::ParticleType ParticleType
Definition: BinaryCollision.H:74
BinaryCollision(std::string collision_name, MultiParticleContainer const *const mypc)
Constructor of the BinaryCollision class.
Definition: BinaryCollision.H:88
~BinaryCollision() override=default
void doCollisions(amrex::Real cur_time, amrex::Real dt, MultiParticleContainer *mypc) override
Definition: BinaryCollision.H:133
void doCollisionsWithinTile(amrex::Real dt, int const lev, amrex::MFIter const &mfi, WarpXParticleContainer &species_1, WarpXParticleContainer &species_2, amrex::Vector< WarpXParticleContainer * > product_species_vector, SmartCopy *copy_species1, SmartCopy *copy_species2)
Definition: BinaryCollision.H:233
BinaryCollision & operator=(BinaryCollision const &)=default
amrex::Vector< std::string > m_product_species
Definition: BinaryCollision.H:671
CollisionFunctor m_binary_collision_functor
Definition: BinaryCollision.H:673
BinaryCollision(BinaryCollision const &)=default
BinaryCollision(BinaryCollision &&)=delete
Definition: CollisionBase.H:18
amrex::Vector< std::string > m_species_names
Definition: CollisionBase.H:36
Definition: MultiParticleContainer.H:66
WarpXParticleContainer & GetParticleContainerFromName(const std::string &name) const
Definition: MultiParticleContainer.cpp:392
This class does nothing and is used as second template parameter for binary collisions that do not cr...
Definition: ParticleCreationFunc.H:284
A factory for creating SmartCopy functors.
Definition: SmartCopy.H:133
static WarpX & GetInstance()
Definition: WarpX.cpp:237
static amrex::LayoutData< amrex::Real > * getCosts(int lev)
Definition: WarpX.cpp:3043
static short load_balance_costs_update_algo
Definition: WarpX.H:173
Definition: WarpXParticleContainer.H:111
void defineAllParticleTiles() noexcept
Definition: WarpXParticleContainer.cpp:1518
amrex::ParticleReal getCharge() const
Definition: WarpXParticleContainer.H:371
amrex::ParticleReal getMass() const
Definition: WarpXParticleContainer.H:373
const Vector< Geometry > & Geom() const noexcept
const Real * CellSize() const noexcept
index_type * offsetsPtr() noexcept
index_type * permutationPtr() noexcept
unsigned int index_type
Long numBins() const noexcept
AMREX_GPU_HOST_DEVICE static constexpr AMREX_FORCE_INLINE IntVect TheZeroVector() noexcept
Box tilebox() const noexcept
T * data() noexcept
iterator begin() noexcept
T * dataPtr() noexcept
int queryarr(const char *name, std::vector< int > &ref, int start_ix=FIRST, int num_val=ALL) const
ParticleTile< ParticleType, NArrayReal, NArrayInt, Allocator > ParticleTileType
const ParticleTileType & ParticlesAt(int lev, int grid, int tile) const
CollisionType get_collision_type(const std::string &collision_name, MultiParticleContainer const *const mypc)
Definition: BinaryCollisionUtils.cpp:20
Definition: ParticleUtils.cpp:26
ParticleBins findParticlesInEachCell(int lev, MFIter const &mfi, ParticleTileType &ptile)
Find the particles and count the particles that are in each cell. More specifically this function ret...
Definition: ParticleUtils.cpp:40
void synchronize() noexcept
void copyAsync(HostToDevice, InIter begin, InIter end, OutIter result) noexcept
static constexpr HostToDevice hostToDevice
void streamSynchronize() noexcept
bool notInLaunchRegion() noexcept
AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE void Add(T *const sum, T const value) noexcept
T ExclusiveSum(N n, T const *in, T *out, RetSum a_ret_sum=retSum)
AMREX_GPU_HOST_DEVICE constexpr AMREX_FORCE_INLINE const T & max(const T &a, const T &b) noexcept
AMREX_GPU_HOST_DEVICE constexpr AMREX_FORCE_INLINE const T & min(const T &a, const T &b) noexcept
AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE Dim3 ubound(Array4< T > const &a) noexcept
AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE T bisect(T lo, T hi, F f, T tol=1e-12, int max_iter=100)
double second() noexcept
AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE Dim3 lbound(Array4< T > const &a) noexcept
AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE void ignore_unused(const Ts &...)
const int[]
std::enable_if_t< std::is_integral_v< T > > ParallelFor(TypeList< CTOs... >, std::array< int, sizeof...(CTOs)> const &runtime_options, T N, F &&f)
AMREX_ATTRIBUTE_FLATTEN_FOR void ParallelForRNG(T n, L const &f) noexcept
i
Definition: check_interp_points_and_weights.py:174
int dz
Definition: compute_domain.py:36
float dt
Definition: stencil.py:442
value
Definition: updateAMReX.py:141
string species1
Definition: video_yt.py:35
@ Timers
load balance according to in-code timer-based weights (i.e., with costs)
Definition: WarpXAlgorithmSelection.H:147
This is a functor for performing a "smart copy" that works in both host and device code.
Definition: SmartCopy.H:34
AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE int const * dataPtr() const noexcept
MFItInfo & EnableTiling(const IntVect &ts=FabArrayBase::mfiter_tile_size) noexcept
MFItInfo & SetDynamic(bool f) noexcept
ParticleTileData< StorageParticleType, NArrayReal, NArrayInt > ParticleTileDataType
ParticleTileDataType getParticleTileData()
int numParticles() const