88 #include "cuKernels.cuh" 101 void setBinom(std::vector<int>& cft);
122 int main(
int argc,
char** argv)
127 std::vector<realVortex> vtx;
132 std::vector<realPoint> vel;
136 std::vector<int> cft;
153 float timing[7], mintiming[7] = { 1e+10, 1e+10, 1e+10, 1e+10, 1e+10, 1e+10, 1e+10 }, avtiming[7]{};
154 double runtime, minruntime = 1e+10, avruntime = 0;
167 int* MmortonCodesKeyUnsortl;
168 int* MmortonCodesKeyl;
170 int* MmortonCodesIdxUnsortl;
171 int* MmortonCodesIdxl;
188 std::ifstream infile;
192 nnodes = nbodies * 2;
193 if (nnodes < 1024 * blocks)
194 nnodes = 1024 * blocks;
195 while ((nnodes & (32 - 1)) != 0)
207 fprintf(stderr,
"cannot allocate host memory\n"); exit(-1);
212 for (
int i = 0; i < nbodies; i++) {
213 infile >> vtx[i].r()[0] >> vtx[i].r()[1] >> vtx[i].g();
220 KernelsOptimization();
224 for (
int run = 0; run <
runs; run++)
226 for (
int i = 0; i < 6; i++)
231 unsigned long long int mem = 0;
233 massl = (
int*)cudaNew(nbodies - 1,
sizeof(
int));
234 mem += (nbodies - 1) *
sizeof(
int);
252 mem += 2 * blocks * FACTOR1 *
sizeof(
realPoint);
255 MmortonCodesKeyUnsortl = (
int*)cudaNew(nbodies,
sizeof(
int));
256 MmortonCodesKeyl = (
int*)cudaNew(nbodies,
sizeof(
int));
257 MmortonCodesIdxUnsortl = (
int*)cudaNew(nbodies,
sizeof(
int));
258 MmortonCodesIdxl = (
int*)cudaNew(nbodies,
sizeof(
int));
259 mem += 4 * nbodies *
sizeof(int);
263 mem += 2 * (nbodies - 1) *
sizeof(
realPoint);
265 Mparentl = (
int*)cudaNew(nnodes,
sizeof(
int));
266 mem += nnodes *
sizeof(int);
269 mem += (nbodies - 1) *
sizeof(
intPair);
272 mem += nnodes *
sizeof(
intPair);
274 MlevelUnsortl = (
int*)cudaNew(nbodies - 1,
sizeof(
int));
275 MlevelSortl = (
int*)cudaNew(nbodies - 1,
sizeof(
int));
276 MindexUnsortl = (
int*)cudaNew(nbodies - 1,
sizeof(
int));
277 MindexSortl = (
int*)cudaNew(nbodies - 1,
sizeof(
int));
278 MindexSortTl = (
int*)cudaNew(nbodies - 1,
sizeof(
int));
279 mem += 5 * (nbodies - 1) *
sizeof(
int);
281 printf(
"Total allocated memory: %llu bytes = %llu Mbytes\n", mem, mem >> 20);
286 cudaCopyVecToDevice(vtx.data(), vtxl, nbodies,
sizeof(
realVortex));
289 setBinomCftConst(cft.data());
294 double starttime, endtime;
295 starttime = omp_get_wtime();
298 timing[0] += cuInitializationKernel();
309 timing[1] += McuBoundingBoxKernel(nbodies, vtxl, Mposl, maxrl, minrl);
329 auto ttt1 = McuMortonCodesKernel(nbodies, vtxl, MmortonCodesKeyUnsortl, MmortonCodesIdxUnsortl, MmortonCodesKeyl, MmortonCodesIdxl, Mrangel);
342 auto ttt2 = McuMortonInternalNodesKernel(nbodies, MmortonCodesKeyl, Mparentl, Mchildl, Mrangel);
361 auto ttt3 = McuMortonInternalCellsGeometryKernel(nbodies, nnodes, MmortonCodesKeyl, Mposl, Msizel, Mrangel, MlevelUnsortl, MlevelSortl, MindexUnsortl, MindexSortl, MindexSortTl);
373 auto ttt4 = cuClearKernel2(nnodes, nbodies, massl, momsl);
391 timing[4] += cuSummarizationKernel2(nnodes, nbodies, Mchildl, massl, momsl, vtxl, MmortonCodesIdxl, Mposl, MindexSortl, MindexSortTl);
410 timing[5] += cuForceCalculationKernel2(nnodes, nbodies, itolsq, epssq, Mchildl, momsl, vtxl, MmortonCodesIdxl, Mposl, MindexSortl, MindexSortTl, vell, Msizel);
412 endtime = omp_get_wtime();
413 runtime = endtime - starttime;
414 if (minruntime > runtime)
415 minruntime = runtime;
416 avruntime += runtime;
418 timing[6] = timing[1] + timing[2] + timing[3] + timing[4] + timing[5];
420 for (
int i = 1; i <= 6; ++i)
422 if (mintiming[i] > timing[i])
423 mintiming[i] = timing[i];
424 avtiming[i] += timing[i];
427 PrintStatistics(run, runs, timing, mintiming, avtiming, runtime, minruntime, avruntime);
432 cudaCopyVecFromDevice(vell, vel.data(), nbodies,
sizeof(
realPoint));
433 for (
int i = 0; i < nbodies; ++i)
441 std::vector<realPoint> veloBS(nbodies);
446 std::ifstream f(
"../../res/velBS" +
task +
".txt");
447 if (realBSfromFile = f.good())
448 std::cout <<
"File with Biot-Savart results is found, loading it... " << std::flush;
450 std::cout <<
"File with Biot-Savart results is not found, computing it... " << std::flush;
455 float tBS = cuForceDirectCalculationKernel(nnodes, nbodies, epssq, vtxl, vellBS);
456 std::cout <<
"done!" << std::endl;
457 std::cout <<
"Time (Biot-Savart): " << tBS <<
" ms" << std::endl;
459 cudaCopyVecFromDevice(vellBS, veloBS.data(), nbodies,
sizeof(
realPoint));
461 for (
int i = 0; i < nbodies; ++i)
464 std::ofstream velFileBS(
"../../res/velBS" +
task +
".txt");
465 velFileBS.precision(16);
466 for (
int i = 0; i < nbodies; i++)
467 velFileBS << veloBS[i][0] <<
" " << veloBS[i][1] << std::endl;
472 std::ifstream velFileBS(
"../../res/velBS" +
task +
".txt");
473 for (
int i = 0; i < nbodies; i++)
474 velFileBS >> veloBS[i][0] >> veloBS[i][1];
476 std::cout <<
"done!" << std::endl;
479 real err = 0, absVel = 0;
480 for (
int i = 0; i < nbodies; i++)
482 err += (vel[i] - veloBS[i]).length<real>();
483 absVel = std::max(absVel, veloBS[i].length<real>());
499 std::ofstream velFile(
"../../res/BH" +
task +
".txt");
500 velFile.precision(16);
501 for (
int i = 0; i < nbodies; i++)
502 velFile << vel[i][0] <<
" " << vel[i][1] << std::endl;
505 std::cout <<
"Goodbye! " << std::endl;
517 cudaDelete(MmortonCodesKeyUnsortl);
518 cudaDelete(MmortonCodesIdxUnsortl);
519 cudaDelete(MmortonCodesKeyl);
520 cudaDelete(MmortonCodesIdxl);
524 cudaDelete(Mparentl);
528 cudaDelete(MlevelUnsortl);
529 cudaDelete(MlevelSortl);
530 cudaDelete(MindexUnsortl);
531 cudaDelete(MindexSortl);
539 for (
int i = 1; i <
order; ++i)
541 cft[i * order + 0] = 1;
542 cft[i * order + i] = 1;
543 for (
int j = 1; j < i; ++j)
544 cft[i * order + j] = cft[(i - 1) * order + j] + cft[(i - 1) * order + (j - 1)];
void PrintStatistics(int run, int runs, const float *timing, const float *mintiming, const float *avtiming, double runtime, double minruntime, double avruntime)
void PrintLogoToStream(std::ostream &str)
static const std::string nameFile
int main(int argc, char **argv)
void PrintConfiguration(int nbodies)
static const std::string task
#define THETA
Параметр точности
Параметры решаемой задачи
void PrintAccuracyError(real val)
static const bool BSfromFile
#define EPS
Радиус вихревого элемента
void setBinom(std::vector< int > &cft)
static const bool compare