45 S
x = S(),
y = S(),
z = S();
47 void get(
const S* p) {
48 x = p[0],
y = p[1],
z = p[2];
51 p[0] =
x, p[1] =
y, p[2] =
z;
68 template<
class T,
class S>
75 template<
class T,
class S>
80 std::vector<elem<T,S> > * elems = NULL;
83 if(elems)
delete elems;
93 double log_two = log(val) / log(2.0);
94 int log_two_int = log_two;
96 return (fabs(
double(log_two_int) - log_two) < 1e-6);
107 template<
class T,
class S>
114 template<
class T,
class S>
117 return lhs.
v1 < rhs.v1;
131 template<
typename V,
typename W>
132 V
clamp(
const V val,
const W start,
const W end) {
133 if(val < start)
return start;
134 if(val > end)
return end;
140 inline void dsp_from_cnt(
const std::vector<T> & cnt, std::vector<T> & dsp)
142 dsp.resize(cnt.size()+1);
144 for(
size_t i=0; i<cnt.size(); i++) dsp[i+1] = dsp[i] + cnt[i];
149 inline void cnt_from_dsp(
const std::vector<T> & dsp, std::vector<T> & cnt)
151 cnt.resize(dsp.size() - 1);
152 for(
size_t i=0; i<dsp.size()-1; i++) cnt[i] = dsp[i+1] - dsp[i];
155 template<
class V,
class W>
156 void sort_copy(std::vector<V> & v1, std::vector<W> & v2)
158 assert(v1.size() == v2.size());
160 std::vector<mixed_pair<V,W> > pair_array(v1.size());
162 for(
size_t i=0; i<v1.size(); i++) {
163 pair_array[i].v1 = v1[i];
164 pair_array[i].v2 = v2[i];
167 std::sort(pair_array.begin(), pair_array.end());
169 for(
size_t i=0; i<v1.size(); i++) {
170 v1[i] = pair_array[i].v1;
171 v2[i] = pair_array[i].v2;
185 #define KD_ORDER_INC 5.0 187 #define KD_MIN_SIZE 64 190 template<
class T,
class S>
195 std::vector<kdpart::partition<T,S> > _layout;
202 double minmax[6], minmax_red[6];
211 for(
size_t i=0; i<elems.size(); i++) {
214 if(minmax[0] > p.
x) minmax[0] = p.
x;
215 if(minmax[1] > p.
y) minmax[1] = p.
y;
216 if(minmax[2] > p.
z) minmax[2] = p.
z;
217 if(minmax[3] < p.
x) minmax[3] = p.
x;
218 if(minmax[4] < p.
y) minmax[4] = p.
y;
219 if(minmax[5] < p.
z) minmax[5] = p.
z;
222 MPI_Allreduce(minmax, minmax_red, 3, MPI_DOUBLE, MPI_MIN, _comm);
223 MPI_Allreduce(minmax+3, minmax_red+3, 3, MPI_DOUBLE, MPI_MAX, _comm);
229 min.
x = minmax_red[0];
230 min.
y = minmax_red[1];
231 min.
z = minmax_red[2];
232 max.
x = minmax_red[3];
233 max.
y = minmax_red[4];
234 max.
z = minmax_red[5];
245 S
x = fabs(min.
x - max.
x);
246 S
y = fabs(min.
y - max.
y);
247 S
z = fabs(min.
z - max.
z);
249 return x > y && x > z ?
X : y > z ?
Y :
Z;
259 inline void print_layout(
const T split_pos = -1)
261 int rank; MPI_Comm_rank(_comm, &rank);
263 if(rank != 0)
return;
267 while(idx < split_pos) {
268 printf(
"%d : %d \n",
int(_layout[idx].pidx),
int(_layout[idx].cnt));
273 printf(
"%d : %d \n",
int(_layout[idx].pidx),
int(_layout[idx].cnt));
274 printf(
"%d : %d \n",
int(_layout[idx+1].pidx),
int(_layout[idx+1].cnt));
278 while(
size_t(idx) < _layout.size() && _layout[idx].pidx > -1) {
279 printf(
"%d : %d \n",
int(_layout[idx].pidx),
int(_layout[idx].cnt));
286 while(
size_t(idx) < _layout.size() && _layout[idx].pidx > -1) {
287 printf(
"%d : %d \n",
int(_layout[idx].pidx),
int(_layout[idx].cnt));
294 inline void get_parallel_median_split(std::vector<
mixed_pair<S,T>> & vals,
296 std::vector<bool> & on_left_side)
299 MPI_Comm_size(_comm, &size);
300 MPI_Comm_rank(_comm, &rank);
302 on_left_side.resize(vals.size());
303 std::sort(vals.begin(), vals.end());
305 size_t nelem = vals.size();
306 double bucket_size = (max*1.05 -
min) /
double(size);
307 std::vector<int> buckets(
size_t(size),
int(0));
308 for(
size_t i=0; i<nelem; i++) {
309 int idx = (vals[i].v1 -
min) / bucket_size;
314 std::vector<int> glob_buckets(
size_t(size),
int(0));
315 MPI_Allreduce(buckets.data(), glob_buckets.data(), size, MPI_INT, MPI_SUM, _comm);
318 int glob_sum = std::accumulate(glob_buckets.begin(), glob_buckets.end(), 0);
319 int lhalf = (glob_sum + 1) / 2;
321 int dsp = 0, bucket_idx = 0;
322 while(bucket_idx < size && (dsp + glob_buckets[bucket_idx]) <= lhalf) {
323 dsp += glob_buckets[bucket_idx];
329 if(bucket_idx < 0 || bucket_idx >= size) {
330 fprintf(stderr,
"Error: Illegal bucket index !!\n");
334 std::vector<double> loc_val_bucket, glob_val_bucket;
336 std::vector<short> global_split_bucket, split_bucket(buckets[bucket_idx]);
338 loc_val_bucket.resize(buckets[bucket_idx]);
339 for(
size_t i=0, widx=0; i<nelem; i++) {
340 int idx = (vals[i].v1 -
min) / bucket_size;
343 if(idx == bucket_idx)
344 loc_val_bucket[widx++] = vals[i].v1;
347 std::vector<int> rcnt(size), rdsp(size);
348 MPI_Gather(&buckets[bucket_idx], 1, MPI_INT, rcnt.data(), 1, MPI_INT, bucket_idx, _comm);
351 if(rank == bucket_idx) {
352 glob_val_bucket.resize(glob_buckets[bucket_idx]);
353 global_split_bucket.resize(glob_buckets[bucket_idx]);
357 MPI_Gatherv(loc_val_bucket.data(), buckets[bucket_idx], MPI_DOUBLE,
358 glob_val_bucket.data(), rcnt.data(), rdsp.data(), MPI_DOUBLE,
364 if(rank == bucket_idx) {
365 size_t bsize = glob_val_bucket.size();
367 std::vector<int> bucket_perm(bsize);
368 for(
size_t i=0; i<bsize; i++) bucket_perm[i] = i;
371 int loc_half_idx = lhalf - dsp;
374 if(loc_half_idx < 0 || loc_half_idx >=
int(glob_val_bucket.size())) {
375 fprintf(stderr,
"Error: Illegal local val index!!\n");
378 for(
int i=0; i <= loc_half_idx; i++)
379 global_split_bucket[bucket_perm[i]] = 1;
381 for(
int i=loc_half_idx+1; i < int(bsize); i++)
382 global_split_bucket[bucket_perm[i]] = 0;
385 MPI_Scatterv(global_split_bucket.data(), rcnt.data(), rdsp.data(), MPI_SHORT,
386 split_bucket.data(), buckets[bucket_idx], MPI_SHORT,
389 for(
size_t i=0, ridx=0; i<nelem; i++) {
390 int pidx = vals[i].v2;
391 int idx = (vals[i].v1 -
min) / bucket_size;
395 on_left_side[pidx] =
true;
396 else if(idx == bucket_idx)
397 on_left_side[pidx] = split_bucket[ridx++] == 1;
399 on_left_side[pidx] =
false;
415 size_t nelem = parent.
elems->size();
417 std::vector<mixed_pair<S,T>> vals(nelem);
419 double min = 0, max = 0;
420 switch(longest_axis) {
422 min = parent.
box.bounds[0].x, max = parent.
box.bounds[1].x;
423 for(
size_t i=0; i<nelem; i++) {
431 min = parent.
box.bounds[0].y, max = parent.
box.bounds[1].y;
432 for(
size_t i=0; i<nelem; i++) {
440 min = parent.
box.bounds[0].z, max = parent.
box.bounds[1].z;
441 for(
size_t i=0; i<nelem; i++) {
452 std::vector<bool> on_left;
453 get_parallel_median_split(vals, min, max, on_left);
456 size_t left_size = 0, right_size = 0;
458 for(
size_t i=0; i<nelem; i++) {
459 if(on_left[i]) left_size++;
463 lchild.
elems =
new std::vector<elem<T,S> >(left_size), rchild.
elems =
new std::vector<
elem<T,S> >(right_size);
465 left_size = 0, right_size = 0;
466 for(
size_t i=0; i<nelem; i++) {
467 if(on_left[i]) (*lchild.
elems)[left_size++ ] = (*parent.
elems)[i];
468 else (*rchild.
elems)[right_size++] = (*parent.
elems)[i];
475 int buff[2] = {int(lchild.
elems->size()),
int(rchild.
elems->size())};
476 MPI_Allreduce(MPI_IN_PLACE, buff, 2, MPI_INT, MPI_SUM, _comm);
477 lchild.
cnt = buff[0], rchild.
cnt = buff[1];
479 if(lchild.
cnt == 0 || rchild.
cnt == 0) {
480 fprintf(stderr,
"Error: Empty partitioning!!\n");
483 lchild.
box = get_bbox(*lchild.
elems);
484 rchild.
box = get_bbox(*rchild.
elems);
492 inline void update_layout(
const T split_pos) {
493 assert(
size_t(split_pos) < _layout.size());
495 T idx_at_split = _layout[split_pos].pidx;
496 T start = _cur_part_number - 1, stop = split_pos + 1;
499 for(T i = start; i > stop; i--) {
500 _layout[i] = _layout[i-1];
505 median_split(_layout[split_pos], _layout[split_pos], _layout[split_pos+1]);
506 _layout[split_pos].pidx = idx_at_split;
507 _layout[split_pos+1].pidx = idx_at_split+1;
518 inline T get_split_pos()
520 T idx = 0, max = _layout[0].cnt, maxidx = 0;
522 while(idx < _cur_part_number && _layout[idx].cnt > -1) {
523 if(max < _layout[idx].cnt) {
524 max = _layout[idx].cnt;
534 inline void operator() (
const MPI_Comm comm,
const std::vector<S> & ctr,
const int req_part,
535 std::vector<T> & part_vec)
539 assert(ctr.size() % 3 == 0);
542 long int l_numelem = ctr.size() / 3, g_numelem;
543 MPI_Allreduce(&l_numelem, &g_numelem, 1, MPI_LONG, MPI_SUM, _comm);
544 assert(g_numelem > 0);
547 MPI_Comm_size(_comm, &size); MPI_Comm_rank(_comm, &rank);
551 bool redistribute_remainder =
false;
563 npart = pow(2, npart);
569 redistribute_remainder =
true;
572 assert(g_numelem > (
long int)npart);
575 _cur_part_number = 1;
576 _layout.resize(
size_t(npart));
578 _layout[0].elems =
new std::vector<kdpart::elem<T,S> >(l_numelem);
579 _layout[0].cnt = g_numelem;
581 std::vector<kdpart::elem<T,S> > & elems = *_layout[0].elems;
584 for(
long int i=0; i<l_numelem; i++) {
585 elems[i].ctr.get(ctr.data() + i*3);
589 _layout[0].box = get_bbox(elems);
597 while(_cur_part_number < npart) {
598 T split_pos = get_split_pos();
601 update_layout(split_pos);
607 if(redistribute_remainder) {
609 T base_size = npart / npart_old;
612 T extended_size = base_size + 1;
613 T remainder = npart % npart_old;
616 for(T i=0; i<remainder; i++) {
617 for(T j=0; j<extended_size; j++)
618 _layout[i*extended_size+j].pidx = i;
622 for(T i=remainder*extended_size, pidx=remainder; i<T(_layout.size()); i+=base_size, pidx++) {
623 for(T j=0; j<base_size; j++)
624 _layout[i+j].pidx = pidx;
631 part_vec.assign(l_numelem, T(-1));
634 part_vec[e.eidx] = p.pidx;
641 template<
class T,
class S>
646 std::vector<kdpart::partition<T,S> > _layout;
657 min.
x = p.
x, min.
y = p.
y, min.
z = p.
z;
658 max.
x = p.
x, max.
y = p.
y, max.
z = p.
z;
660 for(
size_t i=1; i<elems.size(); i++) {
663 if(min.
x > p.
x) min.
x = p.
x;
664 if(min.
y > p.
y) min.
y = p.
y;
665 if(min.
z > p.
z) min.
z = p.
z;
666 if(max.
x < p.
x) max.
x = p.
x;
667 if(max.
y < p.
y) max.
y = p.
y;
668 if(max.
z < p.
z) max.
z = p.
z;
680 S
x = fabs(min.
x - max.
x);
681 S
y = fabs(min.
y - max.
y);
682 S
z = fabs(min.
z - max.
z);
684 return x > y && x > z ?
X : y > z ?
Y :
Z;
694 inline void print_layout(
const T split_pos = -1)
698 while(idx < split_pos) {
699 printf(
"%d : %d \n",
int(_layout[idx].pidx),
int(_layout[idx].cnt));
704 printf(
"%d : %d \n",
int(_layout[idx].pidx),
int(_layout[idx].cnt));
705 printf(
"%d : %d \n",
int(_layout[idx+1].pidx),
int(_layout[idx+1].cnt));
709 while(
size_t(idx) < _layout.size() && _layout[idx].pidx > -1) {
710 printf(
"%d : %d \n",
int(_layout[idx].pidx),
int(_layout[idx].cnt));
717 while(
size_t(idx) < _layout.size() && _layout[idx].pidx > -1) {
718 printf(
"%d : %d \n",
int(_layout[idx].pidx),
int(_layout[idx].cnt));
737 size_t nelem = parent.
elems->size();
740 std::vector<mixed_pair<S,T> > pairs(nelem);
741 for(
size_t i=0; i<nelem; i++) {
744 switch(longest_axis) {
745 case X: val = p.
x;
break;
746 case Y: val = p.
y;
break;
747 case Z: val = p.
z;
break;
751 pairs[i] = {val, T(i)};
753 std::sort(pairs.begin(), pairs.end());
757 size_t lnum = (nelem + 1) / 2, rnum = nelem - lnum;
758 lchild.
elems =
new std::vector<elem<T,S> >(lnum), rchild.
elems =
new std::vector<
elem<T,S> >(rnum);
760 for(
size_t i=0; i<lnum; i++) {
761 T lidx = pairs[i].v2;
765 for(
size_t i=0; i<rnum; i++) {
766 T lidx = pairs[lnum + i].v2;
776 lchild.
box = get_bbox(*lchild.
elems);
777 rchild.
box = get_bbox(*rchild.
elems);
785 inline void update_layout(
const T split_pos) {
786 assert(
size_t(split_pos) < _layout.size());
788 T idx_at_split = _layout[split_pos].pidx;
789 T start = _cur_part_number - 1, stop = split_pos + 1;
792 for(T i = start; i > stop; i--) {
793 _layout[i] = _layout[i-1];
798 median_split(_layout[split_pos], _layout[split_pos], _layout[split_pos+1]);
799 _layout[split_pos].pidx = idx_at_split;
800 _layout[split_pos+1].pidx = idx_at_split+1;
811 inline T get_split_pos()
813 T idx = 0,
max = _layout[0].cnt, maxidx = 0;
815 while(idx < _cur_part_number && _layout[idx].cnt > -1) {
816 if(
max < _layout[idx].cnt) {
817 max = _layout[idx].cnt;
827 inline void operator() (
const std::vector<S> & ctr, T npart, std::vector<T> & part_vec)
829 assert(ctr.size() % 3 == 0);
831 size_t nelem = ctr.size() / 3;
832 assert(nelem >
size_t(npart));
834 bool redistribute_remainder =
false;
845 npart = (log(
double(npart)) / log(2.0)) + 5.0;
846 npart = pow(2, npart);
852 redistribute_remainder =
true;
856 _layout.resize(
size_t(npart));
857 part_vec.assign(nelem, T(-1));
859 _cur_part_number = 1;
861 _layout[0].elems =
new std::vector<kdpart::elem<T,S> >(nelem);
862 std::vector<kdpart::elem<T,S> > & elems = *_layout[0].elems;
864 for(
size_t i=0; i<nelem; i++) {
865 elems[i].ctr.get(ctr.data() + i*3);
869 _layout[0].box = get_bbox(elems);
872 while(_cur_part_number < npart) {
873 T split_pos = get_split_pos();
876 update_layout(split_pos);
879 if(redistribute_remainder) {
880 T base_size = npart / npart_old, extended_size = base_size + 1;
881 T remainder = npart % npart_old;
883 for(T i=0; i<remainder; i++) {
884 for(T j=0; j<extended_size; j++)
885 _layout[i*extended_size+j].pidx = i;
889 for(T i=remainder*extended_size, pidx=remainder; i<T(_layout.size()); i+=base_size, pidx++) {
890 for(T j=0; j<base_size; j++)
891 _layout[i+j].pidx = pidx;
900 part_vec[e.eidx] = p.pidx;
constexpr T min(T a, T b)
#define KD_ORDER_INC
The value we add to the power of two if we need higher partitioning resolution.
constexpr T max(T a, T b)
Combined floating point and integer pair.
minimalistic internal point struct
void cnt_from_dsp(const std::vector< T > &dsp, std::vector< T > &cnt)
Compute counts from displacements.
std::vector< elem< T, S > > * elems
(local) elements in partition
kdpart::bbox< S > box
(global) partition bounding box
kdpart::vec3< S > bounds[2]
the bounds. bounds[0] = lower left (min), bounds[1] = upper right (max)
bool is_power_of_two(double val)
Check if a given value is an integer power of two.
T cnt
(global) partition size
the struct holding all partition data
void sort_copy(std::vector< V > &v1, std::vector< W > &v2)
void dsp_from_cnt(const std::vector< T > &cnt, std::vector< T > &dsp)
Compute displacements from counts.
vec3< S > ctr
element center location
V clamp(const V val, const W start, const W end)
Clamp a value into an interval [start, end].