VM2D  1.12
Vortex methods for 2D flows simulation
wrapper.cpp
Go to the documentation of this file.
1 /*--------------------------------*- BHgpu -*----------------*---------------*\
2 | ##### ## ## | | Version 1.5 |
3 | ## ## ## ## #### ## ## | BHgpu: Barnes-Hut method | 2023/08/29 |
4 | ##### ###### ## ## ## | for 2D vortex particles *----------------*
5 | ## ## ## ## ## ## ## | Open Source Code |
6 | ##### ## ## #### #### | https://www.github.com/vortexmethods/fastm |
7 | |
8 | Copyright (C) 2020-2023 I. Marchevsky, E. Ryatina, A. Kolganova |
9 | Copyright (C) 2013, Texas State University-San Marcos. All rights reserved. |
10 *-----------------------------------------------------------------------------*
11 | File name: main.cpp |
12 | Info: Source code of BHgpu |
13 | |
14 | This file is part of BHgpu. |
15 | BHcu is free software: you can redistribute it and/or modify it |
16 | under the terms of the GNU General Public License as published by |
17 | the Free Software Foundation, either version 3 of the License, or |
18 | (at your option) any later version. |
19 | |
20 | BHcu is distributed in the hope that it will be useful, but WITHOUT |
21 | ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or |
22 | FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License |
23 | for more details. |
24 | |
25 | You should have received a copy of the GNU General Public License |
26 | along with BHgpu. If not, see <http://www.gnu.org/licenses/>. |
27 \*---------------------------------------------------------------------------*/
28 
29 /*
30  * Portions of this program were originally released under the following license
31  *
32  * CUDA BarnesHut v3.1: Simulation of the gravitational forces
33  * in a galactic cluster using the Barnes-Hut n-body algorithm
34  *
35  * Copyright (c) 2013, Texas State University-San Marcos. All rights reserved.
36  *
37  * Redistribution and use in source and binary forms, with or without modification,
38  * are permitted for academic, research, experimental, or personal use provided that
39  * the following conditions are met:
40  *
41  * * Redistributions of source code must retain the above copyright notice,
42  * this list of conditions and the following disclaimer.
43  * * Redistributions in binary form must reproduce the above copyright notice,
44  * this list of conditions and the following disclaimer in the documentation
45  * and/or other materials provided with the distribution.
46  * * Neither the name of Texas State University-San Marcos nor the names of its
47  * contributors may be used to endorse or promote products derived from this
48  * software without specific prior written permission.
49  *
50  * For all other uses, please contact the Office for Commercialization and Industry
51  * Relations at Texas State University-San Marcos <http://www.txstate.edu/ocir/>.
52  *
53  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
54  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
55  * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED
56  * IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT,
57  * INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
58  * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
59  * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
60  * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE
61  * OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED
62  * OF THE POSSIBILITY OF SUCH DAMAGE.
63  *
64  * Author: Martin Burtscher <burtscher@txstate.edu>
65  *
66  */
67 
78 #include <algorithm>
79 #include <fstream>
80 #include <iostream>
81 #include <math.h>
82 #include <stdlib.h>
83 #include <stdio.h>
84 #include <vector>
85 
86 #include "omp.h"
87 
88 #include "types.cuh"
89 #include "cuKernels.cuh"
90 
91 
92 namespace BHcu
93 {
94 
95  const real IDPI = (real)0.15915494309189534;
96 
97  /******************************************************************************/
98  /******************************************************************************/
99 
100  struct CudaCalcGab
101  {
102  realPoint* maxpt;
103  realPoint* minpt;
104  int blocks;
105 
106  CudaCalcGab() //(realPoint*& maxpt, realPoint*& minpt, int blocks)
107  //: maxpt_(maxpt), minpt_(minpt), blocks_(blocks)
108  {
109  CudaSelect(0);
110  setBlocks(blocks);
111 
112  maxpt = (realPoint*)cudaNew(blocks * FACTOR1, sizeof(realPoint));
113  minpt = (realPoint*)cudaNew(blocks * FACTOR1, sizeof(realPoint));
114  };
115 
116  float calc(int npoints, const realVortex* pointsl)
117  {
118  float time;
119  time = McuBoundingBoxKernelFree(nullptr, maxpt, minpt, npoints, pointsl);
120  return time;
121  }
122 
124  {
125  cudaDelete(maxpt);
126  cudaDelete(maxpt);
127  }
128  };
129 
130  struct CudaSorter
131  {
134 
137 
138  int npoints_;
140  const realVortex* pointsl_;
141 
142  CudaSorter(int npoints, const realVortex* pointsl)
143  : npoints_(npoints), pointsl_(pointsl)
144  {
145  pointsMortonCodesKeyl = (int*)cudaNew(npoints, sizeof(int));
146  pointsMortonCodesKeyUnsortl = (int*)cudaNew(npoints, sizeof(int));
147 
148  pointsMortonCodesIdxl = (int*)cudaNew(npoints, sizeof(int));
149  pointsMortonCodesIdxUnsortl = (int*)cudaNew(npoints, sizeof(int));
150  };
151 
152  float calc()
153  {
154  float timeGab, timeCodes;
155  timeGab = gab.calc(npoints_, pointsl_);
156 
157  timeCodes = McuMortonCodesKernelFree(gab.maxpt, gab.minpt,
158  pointsMortonCodesKeyUnsortl, pointsMortonCodesIdxUnsortl,
159  pointsMortonCodesKeyl, pointsMortonCodesIdxl, nullptr,
160  npoints_, pointsl_);
161  return timeGab + timeCodes;
162  }
163 
165  {
166  cudaDelete(pointsMortonCodesKeyl);
167  cudaDelete(pointsMortonCodesKeyUnsortl);
168  cudaDelete(pointsMortonCodesIdxl);
169  cudaDelete(pointsMortonCodesIdxUnsortl);
170  }
171  };
172 
173 
174  void rebuildBaseTree(CUDApointers& ptrs, const int nbodies, const realVortex* vtxl, int nnodes, int order, double* timing)
175  {
176  timing[0] += cuInitializationKernel();
177  timing[1] += McuBoundingBoxKernel(ptrs, nbodies, vtxl);
178 
179  timing[2] += McuMortonCodesKernel(ptrs, nbodies, vtxl);
180  timing[2] += McuMortonInternalNodesKernel(ptrs, nbodies);
181  timing[2] += McuMortonInternalCellsGeometryKernel(ptrs, nbodies, nnodes);
182 
183  timing[3] += cuClearKernel2(ptrs, order, nnodes, nbodies);
184 
185  timing[4] += cuAABBKernel2(ptrs, nnodes, nbodies, vtxl);
186 
187  timing[4] += cuClearKernel2(ptrs, order, nnodes, nbodies);
188 
189  timing[4] += cuSummarizationKernel2(ptrs, order, nnodes, nbodies, vtxl);
190  }
191 
192 
193  double memoryAllocate(CUDApointers& ptrs, int nnodes, int nbodies, int nbodiesOld, int blocks, int order)
194  {
195  double starttime, endtime;
196  starttime = omp_get_wtime();
197 
198  if (nbodiesOld > 0)
199  {
200  //std::cout << "BHgpu: free CUDA-memory" << std::endl;
201  cudaDelete(ptrs.massl);
202 
203  cudaDelete(ptrs.momsl);
204  cudaDelete(ptrs.El);
205 
206  cudaDelete(ptrs.maxrl);
207  cudaDelete(ptrs.minrl);
208 
210  cudaDelete(ptrs.MmortonCodesKeyUnsortl);
211  cudaDelete(ptrs.MmortonCodesIdxUnsortl);
212  cudaDelete(ptrs.MmortonCodesKeyl);
213  cudaDelete(ptrs.MmortonCodesIdxl);
214 
215  cudaDelete(ptrs.Mposl);
216  cudaDelete(ptrs.Mlowerl);
217  cudaDelete(ptrs.Mupperl);
218  cudaDelete(ptrs.Mparentl);
219  cudaDelete(ptrs.Mchildl);
220  cudaDelete(ptrs.Mrangel);
221 
222  cudaDelete(ptrs.MlevelUnsortl);
223  cudaDelete(ptrs.MlevelSortl);
224  cudaDelete(ptrs.MindexUnsortl);
225  cudaDelete(ptrs.MindexSortl);
226  cudaDelete(ptrs.MindexSortTl);
227  }
228 
229  //std::cout << "BHgpu: allocation GPU-memory: nbodies = " << nbodies << ", nnodes = " << nnodes << ", order = " << order << std::endl;
230 
231  //unsigned long long int mem = 0;
232  ptrs.massl = (int*)cudaNew(nbodies - 1, sizeof(int));
233  //mem += (nbodies - 1) * sizeof(int);
234 
235  ptrs.momsl = (realPoint*)cudaNew((nbodies - 1) * order, sizeof(realPoint));
236 
237  //printf("ALLOCATED for MOMS = %d bytes for %d bodies, order = %d, sizeof = %d\n", int((nbodies - 1) * order * sizeof(realPoint)), nbodies - 1, order, sizeof(realPoint));
238 
239  ptrs.El = nullptr;
240  //mem += (nbodies - 1) * order * sizeof(realPoint);
241 
242  ptrs.maxrl = (realPoint*)cudaNew(blocks * FACTOR1, sizeof(realPoint));
243  ptrs.minrl = (realPoint*)cudaNew(blocks * FACTOR1, sizeof(realPoint));
244  //mem += 2 * blocks * FACTOR1 * sizeof(realPoint);
245 
247  ptrs.MmortonCodesKeyUnsortl = (int*)cudaNew(nbodies, sizeof(int));
248  ptrs.MmortonCodesKeyl = (int*)cudaNew(nbodies, sizeof(int));
249  ptrs.MmortonCodesIdxUnsortl = (int*)cudaNew(nbodies, sizeof(int));
250  ptrs.MmortonCodesIdxl = (int*)cudaNew(nbodies, sizeof(int));
251  //mem += 4 * nbodies * sizeof(int);
252 
253  ptrs.Mposl = (realPoint*)cudaNew(nbodies - 1, sizeof(realPoint));
254  ptrs.Mlowerl = (realPoint*)cudaNew(nbodies - 1, sizeof(realPoint));
255  ptrs.Mupperl = (realPoint*)cudaNew(nbodies - 1, sizeof(realPoint));
256  //mem += 3 * (nbodies - 1) * sizeof(realPoint);
257 
258  ptrs.Mparentl = (int*)cudaNew(nnodes, sizeof(int));
259  //mem += nnodes * sizeof(int);
260 
261  ptrs.Mchildl = (intPair*)cudaNew(nbodies - 1, sizeof(intPair));
262  //mem += (nbodies - 1) * sizeof(intPair);
263 
264  ptrs.Mrangel = (intPair*)cudaNew(nnodes, sizeof(intPair)); //Нужно ли для всех?
265  //mem += nnodes * sizeof(intPair);
266 
267  ptrs.MlevelUnsortl = (int*)cudaNew(nbodies - 1, sizeof(int));
268  ptrs.MlevelSortl = (int*)cudaNew(nbodies - 1, sizeof(int));
269  ptrs.MindexUnsortl = (int*)cudaNew(nbodies - 1, sizeof(int));
270  ptrs.MindexSortl = (int*)cudaNew(nbodies - 1, sizeof(int));
271  ptrs.MindexSortTl = (int*)cudaNew(nbodies - 1, sizeof(int));
272  //mem += 5 * (nbodies - 1) * sizeof(int);
273 
274  endtime = omp_get_wtime();
275  return endtime - starttime;
276  }
277 
278 
279  // N body <=> wake to wake
280  double wrapperInfluence(const realVortex* vtxl, realPoint* vell,
281  real* epsastl, CUDApointers& ptrs,
282  int nbodies, double* timing, real eps, real theta,
283  size_t& nbodiesOld, int nbodiesUp,
284  int order,
285  size_t nAfls, size_t* nVtxs, double** ptrVtxs)
286  {
287  double starttime, endtime;
288  starttime = omp_get_wtime();
289 
290  //Число мультипроцессоров, заполняется функцией setBlocks(blocks)
291  int blocks;
292 
293  //Число ячеек дерева и тел
294  int nnodes, nnodesUp;
295 
296  //Радиус вихря и параметр близости и их квадраты
297  real epssq = (real)(eps * eps);
298  real itolsq = (real)(1 / (theta * theta));
299 
300  CudaSelect(0);
301  setBlocks(blocks); //"достает" число блоков, равное числу мультипроцессоров (blocks - по ссылке)
302 
303  nnodes = nbodies * 2;
304  if (nnodes < 1024 * blocks)
305  nnodes = 1024 * blocks;
306  while ((nnodes & (32 - 1)) != 0) // 32 - это размер варпа
307  nnodes++;
308  nnodes--;
309 
310 
311  nnodesUp = nbodiesUp * 2;
312  if (nnodesUp < 1024 * blocks)
313  nnodesUp = 1024 * blocks;
314  while ((nnodesUp & (32 - 1)) != 0) // 32 - это размер варпа
315  nnodesUp++;
316  nnodesUp--;
317 
318  KernelsOptimization();
319 
320  for (int i = 0; i < 6; i++)
321  timing[i] = 0;
322 
323  if (nbodiesUp > nbodiesOld)
324  timing[1] += memoryAllocate(ptrs, nnodesUp, nbodiesUp, (int)nbodiesOld, blocks, order);
325 
326  nbodiesOld = nbodiesUp;
327  rebuildBaseTree(ptrs, nbodies, vtxl, nnodes, order, timing);
328 
329  timing[5] += cuForceCalculationKernel2points(ptrs, order, nnodes, nbodies, itolsq, epssq, vtxl,
330  ptrs.MmortonCodesIdxl, nbodies, vtxl, vell, true, epsastl, nAfls, nVtxs, ptrVtxs);
331  timing[6] = timing[1] + timing[2] + timing[3] + timing[4] + timing[5];
332 
333  endtime = omp_get_wtime();
334  return endtime - starttime;
335 
336  }
337 
338 
339  // wake to points
341  const realVortex* vtxl, const realVortex* pointsl, realPoint* vell, real* epsastl,
342  CUDApointers& ptrs, bool rebuild, int nbodies, int npoints, double* timing, real eps, real theta,
343  size_t& nbodiesOld, int nbodiesUp, int order,
344  size_t nAfls, size_t* nVtxs, double** ptrVtxs)
345  {
346  double starttime, endtime;
347  starttime = omp_get_wtime();
348 
349  //Число мультипроцессоров, заполняется функцией setBlocks(blocks)
350  int blocks;
351 
352  //Число ячеек дерева и тел
353  int nnodes, nnodesUp;
354 
355  //Радиус вихря и параметр близости и их квадраты
356  real epssq = (real)(eps * eps);
357  real itolsq = (real)(1 / (theta * theta));
358 
359  CudaSelect(0);
360  setBlocks(blocks); //"достает" число блоков, равное числу мультипроцессоров (blocks - по ссылке)
361 
362  nnodes = nbodies * 2;
363  if (nnodes < 1024 * blocks)
364  nnodes = 1024 * blocks;
365  while ((nnodes & (32 - 1)) != 0) // 32 - это размер варпа
366  nnodes++;
367  nnodes--;
368 
369  if (rebuild)
370  {
371  nnodesUp = nbodiesUp * 2;
372  if (nnodesUp < 1024 * blocks)
373  nnodesUp = 1024 * blocks;
374  while ((nnodesUp & (32 - 1)) != 0) // 32 - это размер варпа
375  nnodesUp++;
376  nnodesUp--;
377  }
378 
379  KernelsOptimization();
380 
381 
382  for (int i = 0; i < 6; i++)
383  timing[i] = 0;
384 
385  if (rebuild)
386  {
387  if (nbodiesUp > nbodiesOld)
388  timing[1] += memoryAllocate(ptrs, nnodesUp, nbodiesUp, (int)nbodiesOld, blocks, order);
389 
390  nbodiesOld = nbodiesUp;
391  rebuildBaseTree(ptrs, nbodies, vtxl, nnodes, order, timing);
392  }
393 
394  CudaSorter srt(npoints, pointsl);
395  timing[5] += srt.calc();
396 
397  timing[5] += cuForceCalculationKernel2points(ptrs, order, nnodes, nbodies, itolsq, epssq, vtxl, srt.pointsMortonCodesIdxl, npoints, pointsl, vell, true, epsastl,
398  nAfls, nVtxs, ptrVtxs);
399 
400 
401  timing[6] = timing[1] + timing[2] + timing[3] + timing[4] + timing[5];
402 
403  endtime = omp_get_wtime();
404  return endtime - starttime;
405 
406  }
407 
408 
409 
411  const realVortex* dev_ptr_vt, //вихри в следе
412  const double* dev_ptr_pt, //начала и концы панелей
413  double* dev_ptr_rhs, //куда сохранить результат (для T0 и верхней половины T1)
414  double* dev_ptr_rhslin, //куда сохранить результат (для нижней половины T1)
415 
416  CUDApointers& ptrs, //указатели на делево вихрей
417  bool rebuild, //признак перестроения делева вихрей
418 
419  int nvt, //число вихрей в следе
420  int nTotPan, //общее число панелей на всех профилях
421  double* timingsToRHS, //засечки времени
422  double theta, //theta
423  size_t& nbodiesOld, int nbodiesUp, int order, int scheme)
424  {
425  double starttime, endtime;
426  starttime = omp_get_wtime();
427 
428  //Число мультипроцессоров, заполняется функцией setBlocks(blocks)
429  int blocks;
430 
431  //Число ячеек дерева и тел
432  int nnodes, nnodesUp;
433 
434  //Радиус вихря и параметр близости и их квадраты
435  real itolsq = (real)(1 / (theta * theta));
436 
437  CudaSelect(0);
438  setBlocks(blocks); //"достает" число блоков, равное числу мультипроцессоров (blocks - по ссылке)
439 
440  nnodes = nvt * 2;
441  if (nnodes < 1024 * blocks)
442  nnodes = 1024 * blocks;
443  while ((nnodes & (32 - 1)) != 0) // 32 - это размер варпа
444  nnodes++;
445  nnodes--;
446 
447  if (rebuild)
448  {
449  nnodesUp = nbodiesUp * 2;
450  if (nnodesUp < 1024 * blocks)
451  nnodesUp = 1024 * blocks;
452  while ((nnodesUp & (32 - 1)) != 0) // 32 - это размер варпа
453  nnodesUp++;
454  nnodesUp--;
455  }
456 
457  KernelsOptimization();
458 
459  for (int i = 0; i < 6; i++)
460  timingsToRHS[i] = 0;
461 
462  if (rebuild)
463  {
464  if (nbodiesUp > nbodiesOld)
465  timingsToRHS[1] += memoryAllocate(ptrs, nnodesUp, nbodiesUp, (int)nbodiesOld, blocks, order);
466 
467  nbodiesOld = nbodiesUp;
468  rebuildBaseTree(ptrs, nvt, dev_ptr_vt, nnodes, order, timingsToRHS);
469  }
470 
471  Vortex2D* pointsl = (Vortex2D*)cudaNew(nTotPan, sizeof(Vortex2D));
472  realPoint* El = (realPoint*)cudaNew(nTotPan * order, sizeof(realPoint));
473 
474  McuVerticesToControlPoints(nTotPan, (double*)dev_ptr_pt, (double*)pointsl);
475 
476  CudaSorter srt(nTotPan, pointsl);
477  timingsToRHS[5] += srt.calc();
478 
479  double* ptrToLin = nullptr;
480  if (scheme == 1)
481  ptrToLin = dev_ptr_rhslin;
482 
483  timingsToRHS[5] += cuRhsCalculationKernel(ptrs, order, nnodes, nvt, itolsq, dev_ptr_vt,
484  srt.pointsMortonCodesIdxl, El,
485  nTotPan, dev_ptr_pt, (const real*)pointsl, dev_ptr_rhs, ptrToLin);
486 
487  cudaDelete(El);
488  cudaDelete(pointsl);
489 
490  timingsToRHS[6] = timingsToRHS[1] + timingsToRHS[2] + timingsToRHS[3] + timingsToRHS[4] + timingsToRHS[5];
491 
492  endtime = omp_get_wtime();
493  return endtime - starttime;
494 
495  }
496 
497  double wrapperDiffusiveVelo(const realVortex* vtxl, real* i1l, realPoint* i2l, real* epsastl, CUDApointers& ptrs, bool rebuild, int nbodies, double* timing, real eps, real theta, size_t& nbodiesOld, int nbodiesUp, int order,
498  size_t nAfls, size_t* nVtxs, double** ptrVtxs)
499  {
500  double starttime, endtime;
501  starttime = omp_get_wtime();
502 
503  //Число мультипроцессоров, заполняется функцией setBlocks(blocks)
504  int blocks;
505 
506  //Число ячеек дерева и тел
507  int nnodes, nnodesUp;
508 
509  //Радиус вихря и параметр близости и их квадраты
510  real epssq = (real)(eps * eps);
511  real itolsq = (real)(1 / (theta * theta));
512 
513  CudaSelect(0);
514  setBlocks(blocks); //"достает" число блоков, равное числу мультипроцессоров (blocks - по ссылке)
515 
516  nnodes = nbodies * 2;
517  if (nnodes < 1024 * blocks)
518  nnodes = 1024 * blocks;
519  while ((nnodes & (32 - 1)) != 0) // 32 - это размер варпа
520  nnodes++;
521  nnodes--;
522 
523  if (rebuild)
524  {
525  nnodesUp = nbodiesUp * 2;
526  if (nnodesUp < 1024 * blocks)
527  nnodesUp = 1024 * blocks;
528  while ((nnodesUp & (32 - 1)) != 0) // 32 - это размер варпа
529  nnodesUp++;
530  nnodesUp--;
531  }
532 
533  KernelsOptimization();
534 
535  for (int i = 0; i < 6; i++)
536  timing[i] = 0;
537 
538  if (rebuild)
539  {
540  if (nbodiesUp > nbodiesOld)
541  timing[1] += memoryAllocate(ptrs, nnodesUp, nbodiesUp, (int)nbodiesOld, blocks, order);
542 
543  nbodiesOld = nbodiesUp;
544  rebuildBaseTree(ptrs, nbodies, vtxl, nnodes, order, timing);
545  }
546 
547  timing[5] += cuDiffVelCalculationKernel2(ptrs, order, nnodes, nbodies, itolsq, epssq, vtxl, i1l, i2l, true, epsastl, nAfls, nVtxs, ptrVtxs);
548  timing[6] = timing[1] + timing[2] + timing[3] + timing[4] + timing[5];
549 
550  endtime = omp_get_wtime();
551  return endtime - starttime;
552  }
553 
554 
555 
556 
557 }
int * pointsMortonCodesIdxUnsortl
Definition: wrapper.cpp:136
double wrapperInfluence(const realVortex *vtxl, realPoint *vell, real *epsastl, CUDApointers &ptrs, int nbodies, double *timing, real eps, real theta, size_t &nbodiesOld, int nbodiesUp, int order, size_t nAfls, size_t *nVtxs, double **ptrVtxs)
Definition: wrapper.cpp:280
int * pointsMortonCodesIdxl
Definition: wrapper.cpp:135
realPoint * maxpt
Definition: wrapper.cpp:102
Definition: wrapper.cpp:92
double wrapperDiffusiveVelo(const realVortex *vtxl, real *i1l, realPoint *i2l, real *epsastl, CUDApointers &ptrs, bool rebuild, int nbodies, double *timing, real eps, real theta, size_t &nbodiesOld, int nbodiesUp, int order, size_t nAfls, size_t *nVtxs, double **ptrVtxs)
Definition: wrapper.cpp:497
int * pointsMortonCodesKeyUnsortl
Definition: wrapper.cpp:133
CudaCalcGab gab
Definition: wrapper.cpp:139
double wrapperInfluenceToRHS(const realVortex *dev_ptr_vt, const double *dev_ptr_pt, double *dev_ptr_rhs, double *dev_ptr_rhslin, CUDApointers &ptrs, bool rebuild, int nvt, int nTotPan, double *timingsToRHS, double theta, size_t &nbodiesOld, int nbodiesUp, int order, int scheme)
Definition: wrapper.cpp:410
void rebuildBaseTree(CUDApointers &ptrs, const int nbodies, const realVortex *vtxl, int nnodes, int order, double *timing)
Definition: wrapper.cpp:174
float calc(int npoints, const realVortex *pointsl)
Definition: wrapper.cpp:116
realPoint * minpt
Definition: wrapper.cpp:103
const realVortex * pointsl_
Definition: wrapper.cpp:140
CudaSorter(int npoints, const realVortex *pointsl)
Definition: wrapper.cpp:142
int * pointsMortonCodesKeyl
Definition: wrapper.cpp:132
const real IDPI
Definition: wrapper.cpp:95
double wrapperInfluenceToPoints(const realVortex *vtxl, const realVortex *pointsl, realPoint *vell, real *epsastl, CUDApointers &ptrs, bool rebuild, int nbodies, int npoints, double *timing, real eps, real theta, size_t &nbodiesOld, int nbodiesUp, int order, size_t nAfls, size_t *nVtxs, double **ptrVtxs)
npoints -
Definition: wrapper.cpp:340
double memoryAllocate(CUDApointers &ptrs, int nnodes, int nbodies, int nbodiesOld, int blocks, int order)
Definition: wrapper.cpp:193