WarpX
GpuParser.H
Go to the documentation of this file.
1 /* Copyright 2019-2020 Maxence Thevenet, Revathi Jambunathan, Weiqun Zhang
2  *
3  * This file is part of WarpX.
4  *
5  * License: BSD-3-Clause-LBNL
6  */
7 #ifndef WARPX_GPU_PARSER_H_
8 #define WARPX_GPU_PARSER_H_
9 
10 #include "Parser/WarpXParser.H"
11 
12 #include <AMReX_Gpu.H>
13 #include <AMReX_Array.H>
14 #include <AMReX_TypeTraits.H>
15 #include <AMReX.H>
16 
17 // When compiled for CPU, wrap WarpXParser and enable threading.
18 // When compiled for GPU, store one copy of the parser in
19 // device memory for __device__ code, and one copy of the parser
20 // in host memory for __host__ code. This way, the parser can be
21 // efficiently called from both host and device.
22 template <int N>
23 class GpuParser
24 {
25 public:
26  GpuParser (WarpXParser const& wp);
27 
28  GpuParser (GpuParser<N> const&) = delete;
29  GpuParser (GpuParser<N> &&) = delete;
30  void operator= (GpuParser<N> const&) = delete;
31  void operator= (GpuParser<N> &&) = delete;
32 
33  void clear ();
34 
35  template <typename... Ts>
36  AMREX_GPU_HOST_DEVICE
37  std::enable_if_t<sizeof...(Ts) == N
38  and amrex::Same<amrex::Real,Ts...>::value,
39  amrex::Real>
40  operator() (Ts... var) const noexcept
41  {
42 #ifdef AMREX_USE_GPU
43  amrex::GpuArray<amrex::Real,N> l_var{var...};
44 #if AMREX_DEVICE_COMPILE
45 // WarpX compiled for GPU, function compiled for __device__
46  return wp_ast_eval<0>(m_gpu_parser_ast, l_var.data());
47 #else
48 // WarpX compiled for GPU, function compiled for __host__
49  return wp_ast_eval<0>(m_cpu_parser->ast, nullptr);
50 #endif
51 
52 #else
53 // WarpX compiled for CPU
54 #ifdef _OPENMP
55  int tid = omp_get_thread_num();
56 #else
57  int tid = 0;
58 #endif
59  m_var[tid] = amrex::GpuArray<amrex::Real,N>{var...};
60  return wp_ast_eval<0>(m_parser[tid]->ast, nullptr);
61 #endif
62  }
63 
64  void init_gpu_parser (WarpXParser const& wp); // public for CUDA
65 
66 protected:
67 
68 #ifdef AMREX_USE_GPU
69  // Copy of the parser running on __device__
70  struct wp_node* m_gpu_parser_ast;
71  // Copy of the parser running on __host__
72  struct wp_parser* m_cpu_parser;
73  mutable amrex::GpuArray<amrex::Real,N> m_var;
74 #else
75  // Only one parser
76  struct wp_parser** m_parser;
77  mutable amrex::GpuArray<amrex::Real,N>* m_var;
78  int nthreads;
79 #endif
80 };
81 
82 template <int N>
84 {
85  AMREX_ALWAYS_ASSERT(wp.depth() <= WARPX_PARSER_DEPTH);
86 
87 #ifdef AMREX_USE_GPU
88 
89  struct wp_parser* a_wp = wp.m_parser;
90 
91  // Initialize CPU parser:
92  m_cpu_parser = wp_parser_dup(a_wp);
93  for (int i = 0; i < N; ++i) {
94  wp_parser_regvar(m_cpu_parser, wp.m_varnames[i].c_str(), &m_var[i]);
95  }
96 
97  // Initialize GPU parser
98  init_gpu_parser(wp);
99 
100 #else // not defined AMREX_USE_GPU
101 
102 #ifdef _OPENMP
103  nthreads = omp_get_max_threads();
104 #else // _OPENMP
105  nthreads = 1;
106 #endif // _OPENMP
107 
108  m_parser = ::new struct wp_parser*[nthreads];
109  m_var = ::new amrex::GpuArray<amrex::Real,N>[nthreads];
110 
111  for (int tid = 0; tid < nthreads; ++tid)
112  {
113 #ifdef _OPENMP
114  m_parser[tid] = wp_parser_dup(wp.m_parser[tid]);
115  for (int i = 0; i < N; ++i) {
116  wp_parser_regvar(m_parser[tid], wp.m_varnames[tid][i].c_str(), &(m_var[tid][i]));
117  }
118 #else // _OPENMP
119  m_parser[tid] = wp_parser_dup(wp.m_parser);
120  for (int i = 0; i < N; ++i) {
121  wp_parser_regvar(m_parser[tid], wp.m_varnames[i].c_str(), &(m_var[tid][i]));
122  }
123 #endif // _OPENMP
124  }
125 
126 #endif // AMREX_USE_GPU
127 }
128 
129 template <int N>
131 {
132 #ifdef AMREX_USE_GPU
133 
134  // We create a temporary Parser on CPU for memcpy. We cannot use
135  // m_cpu_parser for this because the variables in m_cpu_parser are
136  // registered for CPU use.
137  struct wp_parser* cpu_tmp = wp_parser_dup(m_cpu_parser);
138  for (int i = 0; i < N; ++i) {
139  wp_parser_regvar_gpu(cpu_tmp, wp.m_varnames[i].c_str(), i);
140  }
141 
142  m_gpu_parser_ast = (struct wp_node*)
143  amrex::The_Arena()->alloc(cpu_tmp->sz_mempool);
144  amrex::Gpu::htod_memcpy_async(m_gpu_parser_ast, cpu_tmp->ast, cpu_tmp->sz_mempool);
145 
146  auto dp = m_gpu_parser_ast;
147  char* droot = (char*)dp;
148  char* croot = (char*)(cpu_tmp->ast);
149  amrex::single_task([=] AMREX_GPU_DEVICE () noexcept
150  {
151  wp_ast_update_device_ptr<0>(dp, droot, croot);
152  });
153 
154  amrex::Gpu::synchronize();
155 
156  wp_parser_delete(cpu_tmp);
157 #endif
158  amrex::ignore_unused(wp);
159 }
160 
161 template <int N>
162 void
164 {
165 #ifdef AMREX_USE_GPU
166  amrex::The_Arena()->free(m_gpu_parser_ast);
167  wp_parser_delete(m_cpu_parser);
168 #else
169  for (int tid = 0; tid < nthreads; ++tid)
170  {
172  }
173  ::delete[] m_parser;
174  ::delete[] m_var;
175 #endif
176 }
177 
178 #endif
void operator=(GpuParser< N > const &)=delete
void init_gpu_parser(WarpXParser const &wp)
Definition: GpuParser.H:130
struct wp_parser * m_parser
Definition: WarpXParser.H:78
struct wp_parser ** m_parser
Definition: GpuParser.H:76
Definition: wp_parser_y.h:145
size_t sz_mempool
Definition: wp_parser_y.h:149
struct wp_node * ast
Definition: wp_parser_y.h:148
struct wp_parser * wp_parser_dup(struct wp_parser *source)
Definition: wp_parser_y.cpp:131
i
Definition: check_interp_points_and_weights.py:171
amrex::GpuArray< amrex::Real, N > * m_var
Definition: GpuParser.H:77
Definition: wp_parser_y.h:91
void clear()
Definition: GpuParser.H:163
Definition: WarpXParser.H:26
std::vector< std::string > m_varnames
Definition: WarpXParser.H:80
int depth() const
Definition: WarpXParser.cpp:150
int nthreads
Definition: GpuParser.H:78
void wp_parser_delete(struct wp_parser *parser)
Definition: wp_parser_y.cpp:106
void wp_parser_regvar(struct wp_parser *parser, char const *name, amrex_real *p)
Definition: wp_parser_y.cpp:1087
void wp_parser_regvar_gpu(struct wp_parser *parser, char const *name, int i)
Definition: wp_parser_y.cpp:1093
GpuParser(WarpXParser const &wp)
Definition: GpuParser.H:83
Definition: GpuParser.H:23