13 std::pair<Point2D, Point2D>
GetGabarit(
const std::vector<Vortex2D>& vtx)
15 auto xx = std::minmax_element(vtx.begin(), vtx.end(), [](
const Vortex2D& P1,
const Vortex2D& P2) {
return P1.
r()[0] < P2.r()[0]; });
16 auto yy = std::minmax_element(vtx.begin(), vtx.end(), [](
const Vortex2D& P1,
const Vortex2D& P2) {
return P1.
r()[1] < P2.r()[1]; });
18 return { { xx.first->r()[0], yy.first->r()[1] }, { xx.second->r()[0], yy.second->r()[1] } };
27 v = (v | (v << 8)) & 0x00FF00FF;
33 v = (v | (v << 4)) & 0x0F0F0F0F;
39 v = (v | (v << 2)) & 0x33333333;
45 v = (v | (v << 1)) & 0x55555555;
59 const unsigned int& xx =
ExpandBits((
unsigned int)(rscale[0]));
60 const unsigned int& yy =
ExpandBits((
unsigned int)(rscale[1]));
61 return yy | (xx << 1);
71 unsigned char threads = omp_get_max_threads();
74 #pragma omp parallel num_threads(threads) 78 unsigned int l = omp_get_thread_num();
79 unsigned int div = n / omp_get_num_threads();
80 unsigned int mod = n % omp_get_num_threads();
81 unsigned int left_index = l < mod ? (div + (mod == 0 ? 0 : 1)) * l : n - (omp_get_num_threads() - l) * div;
82 unsigned int right_index = left_index + div - (mod > l ? 0 : 1);
84 for (
unsigned int digit = 0; digit <
sizeof(m->
key); ++digit)
86 unsigned int s_sum[256] = { 0 };
87 unsigned int s0[256] = { 0 };
88 unsigned char* b1 = (
unsigned char*)&source[right_index].key;
89 unsigned char* b2 = (
unsigned char*)&source[left_index].key;
95 for (
unsigned int i = 0; i < 256; i++)
97 s[i + 256 * l] = s0[i];
101 for (
unsigned int j = 0; j < threads; j++)
103 for (
unsigned int i = 0; i < 256; i++)
105 s_sum[i] += s[i + 256 * j];
108 s0[i] += s[i + 256 * j];
113 for (
unsigned int i = 1; i < 256; ++i)
115 s_sum[i] += s_sum[i - 1];
116 s0[i] += s_sum[i - 1];
118 unsigned char* b = (
unsigned char*)&source[right_index].key + digit;
123 dest[--s0[*b]] = *v1--;
127 std::swap(source, dest);
132 if (
sizeof(m->
key) == 1)
139 inline size_t BinSearch(
const std::vector<std::pair<double, size_t>>& currentNN,
double x,
int low,
int high)
143 if (x > currentNN[high].first)
146 while (low <= high) {
147 mid = (low + high) / 2;
149 if (currentNN[mid].first == x)
152 if (currentNN[mid].first < x)
160 void newSort(std::vector<std::pair<double, size_t>>& mass, std::vector<std::pair<double, size_t>>& dstKeys) {
161 size_t k = mass.size();
164 for (
size_t i = 0; i < k; i++) {
165 double elem = mass[i].first;
168 for (
size_t j = 0; j < k; j++) {
169 cnt += (mass[j].first < elem);
173 for (
size_t j = 0; j < i; j++)
174 cnt += (mass[j].first == elem);
175 dstKeys[cnt] = mass[i];
180 void newMerge(std::vector<std::pair<double, size_t>>& currentNN,
const std::vector<std::pair<double, size_t>>& candidateNN,
181 std::vector<size_t>& loc,
182 std::vector<size_t>& counter,
183 std::vector<size_t>& offset,
184 std::vector<size_t>& counterScan,
185 std::vector<std::pair<double, size_t>>& updateNN
188 const size_t k = candidateNN.size() / 2;
191 for (
size_t j = 0; j < 2 * k; ++j)
193 loc[j] =
BinSearch(currentNN, candidateNN[j].first, 0, (
int)k - 1);
198 for (
size_t j = 0; j < 2 * k; ++j)
206 for (
size_t j = 0; j < 2 * k; ++j)
208 if ((loc[j] > 0) && ((loc[j] == k) || (candidateNN[j].first == currentNN[loc[j] - 1].first)))
211 offset[j] = counter[2 * loc[j]]++;
226 for (
size_t j = 1; j < 2 * k; ++j)
227 counterScan[j] = counterScan[j - 1] + counter[j - 1];
237 for (
size_t j = 0; j < k; ++j)
239 index = counterScan[2 * j + 1];
242 updateNN[index] = currentNN[j];
249 for (
size_t j = 0; j < 2 * k; ++j)
251 if (2 * loc[j] < 2 * k)
253 index = counterScan[2 * loc[j]] + offset[j];
255 updateNN[index] = candidateNN[j];
259 currentNN.swap(updateNN);
266 const std::vector<Vortex2D>& vtx,
const std::pair<Point2D, Point2D>& gab,
267 const double scale,
const int sdvig,
268 std::vector<TParticleCode>& mortonCodeBoth,
269 std::vector<unsigned int>& s, std::vector<TParticleCode>& mortonCodeBoth_temp,
270 std::vector<TParticleCode>& mcdata, std::vector<TParticleCode>& mcquery,
271 std::vector<unsigned int>& iq,
272 std::vector<size_t>& initneig,
273 std::vector<std::vector<std::pair<double, size_t>>>& initdist,
279 time[0] = omp_get_wtime();
280 #pragma omp parallel for 281 for (
int i = 0; i < vtx.size(); ++i)
284 mortonCodeBoth[i].key =
Morton2D((vtx[i].r() - gab.first) * (0.75 / scale) + sdvig *
Point2D{ 0.05, 0.05 });
285 mortonCodeBoth[i].key <<= 1;
287 mortonCodeBoth[i].originNumber = i;
290 mortonCodeBoth[vtx.size() + i].key = mortonCodeBoth[i].key;
292 mortonCodeBoth[vtx.size() + i].key |= 1;
293 mortonCodeBoth[vtx.size() + i].originNumber = i;
298 time[1] = omp_get_wtime();
308 RSort_Parallel(mortonCodeBoth.data(), mortonCodeBoth_temp.data(), (int)mortonCodeBoth.size(), s.data());
310 time[2] = omp_get_wtime();
325 mcdata.reserve(vtx.size());
326 mcquery.reserve(vtx.size());
328 time[3] = omp_get_wtime();
331 iq.reserve(vtx.size());
332 for (
size_t i = 0, j = -1; i < mortonCodeBoth.size(); ++i)
334 if (mortonCodeBoth[i].key & 1)
336 mcquery.push_back(mortonCodeBoth[i]);
337 iq.push_back((
unsigned)(j + 1) == 0 ? 0 : (
unsigned int)j);
342 mcdata.push_back(mortonCodeBoth[i]);
346 time[4] = omp_get_wtime();
361 #pragma omp parallel for 362 for (
int q = 0; q < mcquery.size(); ++q)
365 size_t left = pt - k + 1;
371 if (pt > mcquery.size() - k - 1)
374 left = (mcquery.size() - 1) - (2 * k - 1);
380 time[5] = omp_get_wtime();
390 std::vector<std::pair<double, size_t>>
dist(2 * k, { -1.0, -1 });
393 std::vector<size_t> loc(2 * k);
394 std::vector<size_t> counter(2 * k, 0);
395 std::vector<size_t> offset(2 * k, 0);
396 std::vector<size_t> counterScan(2 * k, 0);
397 std::vector<std::pair<double, size_t>> updateNN(k, { 0.0, 0 });
399 std::vector<std::pair<double, size_t>> dstKeys1(2 * k, { 0.0, 0 });
400 std::vector<std::pair<double, size_t>> dstKeys(k, { 0.0, 0 });
402 time[6] = omp_get_wtime();
405 #pragma omp parallel for firstprivate(dstKeys1) 406 for (
int i = 0; i < initdist.size(); ++i)
409 for (
size_t j = 0; j < 2 * k; ++j)
410 initdist[mcquery[i].originNumber][j] = { (vtx[mcquery[i].originNumber].r() - vtx[mcdata[initneig[i] + j].originNumber].r()).length2(), mcdata[initneig[i] + j].originNumber };
413 newSort(initdist[mcquery[i].originNumber], dstKeys1);
415 initdist[mcquery[i].originNumber].resize(k);
418 #pragma omp parallel for firstprivate(dist) firstprivate(loc, counter, offset, counterScan, updateNN, dstKeys)//, firstprivate(mergeArray) 419 for (
int i = 0; i < initdist.size(); ++i)
421 for (
size_t j = 0; j < 2 * k; ++j)
422 dist[j] = { (vtx[mcquery[i].originNumber].r() - vtx[mcdata[initneig[i] + j].originNumber].r()).length2(), mcdata[initneig[i] + j].originNumber };
432 newMerge(initdist[mcquery[i].originNumber],
dist, loc, counter, offset, counterScan, updateNN);
434 newSort(initdist[mcquery[i].originNumber], dstKeys);
441 time[7] = omp_get_wtime();
447 void WakekNN(
const std::vector<Vortex2D>& vtx,
const size_t k, std::vector<std::vector<std::pair<double, size_t>>>& initdist) {
448 const size_t n = vtx.size();
451 double scale = std::max(gab.second[0] - gab.first[0], gab.second[1] - gab.first[1]);
454 std::vector<TParticleCode> mortonCodeBoth(2 * vtx.size());
457 std::vector<unsigned int> s(256 * omp_get_max_threads());
458 std::vector<TParticleCode> mortonCodeBoth_temp(2 * vtx.size());
462 std::vector<TParticleCode> mcdata, mcquery;
464 std::vector<unsigned int> iq;
469 std::vector<size_t> initneig(vtx.size(), -1);
479 double t1 = omp_get_wtime();
480 double tStart[5], tFinish[5];
482 for (
size_t sdvig = 0; sdvig <= 4; ++sdvig)
485 tStart[sdvig] = omp_get_wtime();
486 mainCycle((
int)k, vtx, gab, scale, (
int)sdvig,
488 s, mortonCodeBoth_temp, mcdata, mcquery,
489 iq, initneig, initdist, time[sdvig]);
490 tFinish[sdvig] = omp_get_wtime();
unsigned int key
Мортоновский код частицы
unsigned int ExpandBits(unsigned int v)
void newMerge(std::vector< std::pair< double, size_t >> ¤tNN, const std::vector< std::pair< double, size_t >> &candidateNN, std::vector< size_t > &loc, std::vector< size_t > &counter, std::vector< size_t > &offset, std::vector< size_t > &counterScan, std::vector< std::pair< double, size_t >> &updateNN)
HD Point2D & r()
Функция для доступа к радиус-вектору вихря
size_t BinSearch(const std::vector< std::pair< double, size_t >> ¤tNN, double x, int low, int high)
void mainCycle(const int k, const std::vector< Vortex2D > &vtx, const std::pair< Point2D, Point2D > &gab, const double scale, const int sdvig, std::vector< TParticleCode > &mortonCodeBoth, std::vector< unsigned int > &s, std::vector< TParticleCode > &mortonCodeBoth_temp, std::vector< TParticleCode > &mcdata, std::vector< TParticleCode > &mcquery, std::vector< unsigned int > &iq, std::vector< size_t > &initneig, std::vector< std::vector< std::pair< double, size_t >>> &initdist, double *time)
unsigned int Morton2D(const Point2D &r)
Класс, опеделяющий двумерный вектор
const int twoPowCodeLength
Класс, опеделяющий двумерный вихревой элемент
R dist(const numvector< T, n > &x, const numvector< P, n > &y)
Вычисление расстояния между двумя точками
void RSort_Parallel(TParticleCode *m, TParticleCode *m_temp, unsigned int n, unsigned int *s)
Сортировка массива из мортоновский кодов
std::pair< Point2D, Point2D > GetGabarit(const std::vector< Vortex2D > &vtx)
Поиск границ габаритного прямоугольника (левый нижний и правый верхний)
void WakekNN(const std::vector< Vortex2D > &vtx, const size_t k, std::vector< std::vector< std::pair< double, size_t >>> &initdist)
void newSort(std::vector< std::pair< double, size_t >> &mass, std::vector< std::pair< double, size_t >> &dstKeys)