45   S 
x = S(), 
y = S(), 
z = S();
 
   47   void get(
const S* p) {
 
   48     x = p[0], 
y = p[1], 
z = p[2];
 
   51     p[0] = 
x, p[1] = 
y, p[2] = 
z;
 
   68 template<
class T, 
class S>
 
   75 template<
class T, 
class S>
 
   80   std::vector<elem<T,S> > * 
elems = NULL;       
 
   93   double log_two     = log(val) / log(2.0);
 
   94   int    log_two_int = log_two;
 
   96   return (fabs(
double(log_two_int) - log_two) < 1e-6);
 
  107 template<
class T, 
class S>
 
  114 template<
class T, 
class S>
 
  117   return lhs.
v1 < rhs.
v1;
 
  131 template<
typename V, 
typename W>
 
  132 V 
clamp(
const V val, 
const W start, 
const W end) {
 
  133   if(val < start) 
return start;
 
  134   if(val > end) 
return end;
 
  140 inline void dsp_from_cnt(
const std::vector<T> & cnt, std::vector<T> & dsp)
 
  142   dsp.resize(cnt.size()+1);
 
  144   for(
size_t i=0; i<cnt.size(); i++) dsp[i+1] = dsp[i] + cnt[i];
 
  149 inline void cnt_from_dsp(
const std::vector<T> & dsp, std::vector<T> & cnt)
 
  151   cnt.resize(dsp.size() - 1);
 
  152   for(
size_t i=0; i<dsp.size()-1; i++) cnt[i] = dsp[i+1] - dsp[i];
 
  155 template<
class V, 
class W>
 
  156 void sort_copy(std::vector<V> & v1, std::vector<W> & v2)
 
  158   assert(v1.size() == v2.size());
 
  160   std::vector<mixed_pair<V,W> > pair_array(v1.size());
 
  162   for(
size_t i=0; i<v1.size(); i++) {
 
  163     pair_array[i].v1 = v1[i];
 
  164     pair_array[i].v2 = v2[i];
 
  167   std::sort(pair_array.begin(), pair_array.end());
 
  169   for(
size_t i=0; i<v1.size(); i++) {
 
  170     v1[i] = pair_array[i].v1;
 
  171     v2[i] = pair_array[i].v2;
 
  185 #define KD_ORDER_INC 5.0 
  187 #define KD_MIN_SIZE 64 
  190 template<
class T, 
class S>
 
  195   std::vector<kdpart::partition<T,S> > _layout;  
 
  202     double minmax[6], minmax_red[6];
 
  211     for(
size_t i=0; i<elems.size(); i++) {
 
  214       if(minmax[0] > p.
x) minmax[0] = p.
x;
 
  215       if(minmax[1] > p.
y) minmax[1] = p.
y;
 
  216       if(minmax[2] > p.
z) minmax[2] = p.
z;
 
  217       if(minmax[3] < p.
x) minmax[3] = p.
x;
 
  218       if(minmax[4] < p.
y) minmax[4] = p.
y;
 
  219       if(minmax[5] < p.
z) minmax[5] = p.
z;
 
  222     MPI_Allreduce(minmax, minmax_red, 3, MPI_DOUBLE, MPI_MIN, _comm);
 
  223     MPI_Allreduce(minmax+3, minmax_red+3, 3, MPI_DOUBLE, MPI_MAX, _comm);
 
  229     min.x = minmax_red[0];
 
  230     min.y = minmax_red[1];
 
  231     min.z = minmax_red[2];
 
  232     max.x = minmax_red[3];
 
  233     max.y = minmax_red[4];
 
  234     max.z = minmax_red[5];
 
  245     S x = fabs(
min.x - 
max.x);
 
  246     S y = fabs(
min.y - 
max.y);
 
  247     S z = fabs(
min.z - 
max.z);
 
  249     return x > y && x > z ? 
X : y > z ? 
Y : 
Z;
 
  259   inline void print_layout(
const T split_pos = -1)
 
  261     int rank; MPI_Comm_rank(_comm, &rank);
 
  263     if(rank != 0) 
return;
 
  267       while(idx < split_pos) {
 
  268         printf(
"%d : %d \n", 
int(_layout[idx].pidx), 
int(_layout[idx].cnt));
 
  273       printf(
"%d : %d \n", 
int(_layout[idx].pidx), 
int(_layout[idx].cnt));
 
  274       printf(
"%d : %d \n", 
int(_layout[idx+1].pidx), 
int(_layout[idx+1].cnt));
 
  278       while(
size_t(idx) < _layout.size() && _layout[idx].pidx > -1) {
 
  279         printf(
"%d : %d \n", 
int(_layout[idx].pidx), 
int(_layout[idx].cnt));
 
  286       while(
size_t(idx) < _layout.size() && _layout[idx].pidx > -1) {
 
  287         printf(
"%d : %d \n", 
int(_layout[idx].pidx), 
int(_layout[idx].cnt));
 
  294   inline void get_parallel_median_split(std::vector<
mixed_pair<S,T>> & vals,
 
  296                                         std::vector<bool> & on_left_side)
 
  299     MPI_Comm_size(_comm, &size);
 
  300     MPI_Comm_rank(_comm, &rank);
 
  302     on_left_side.resize(vals.size());
 
  303     std::sort(vals.begin(), vals.end());
 
  305     size_t nelem = vals.size();
 
  306     double bucket_size = (
max*1.05 - 
min) / 
double(size);
 
  307     std::vector<int> buckets(
size_t(size), 
int(0));
 
  308     for(
size_t i=0; i<nelem; i++) {
 
  309       int idx = (vals[i].v1 - 
min) / bucket_size;
 
  314     std::vector<int> glob_buckets(
size_t(size), 
int(0));
 
  315     MPI_Allreduce(buckets.data(), glob_buckets.data(), size, MPI_INT, MPI_SUM, _comm);
 
  318     int glob_sum = std::accumulate(glob_buckets.begin(), glob_buckets.end(), 0);
 
  319     int lhalf = (glob_sum + 1) / 2;
 
  321     int dsp = 0, bucket_idx = 0;
 
  322     while(bucket_idx < size && (dsp + glob_buckets[bucket_idx]) <= lhalf) {
 
  323       dsp += glob_buckets[bucket_idx];
 
  329     if(bucket_idx < 0 || bucket_idx >= size) {
 
  330       fprintf(stderr, 
"Error: Illegal bucket index !!\n");
 
  334     std::vector<double> loc_val_bucket, glob_val_bucket;
 
  336     std::vector<short> global_split_bucket, split_bucket(buckets[bucket_idx]);
 
  338     loc_val_bucket.resize(buckets[bucket_idx]);
 
  339     for(
size_t i=0, widx=0; i<nelem; i++) {
 
  340       int idx = (vals[i].v1 - 
min) / bucket_size;
 
  343       if(idx == bucket_idx)
 
  344         loc_val_bucket[widx++] = vals[i].v1;
 
  347     std::vector<int> rcnt(size), rdsp(size);
 
  348     MPI_Gather(&buckets[bucket_idx], 1, MPI_INT, rcnt.data(), 1, MPI_INT, bucket_idx, _comm);
 
  351     if(rank == bucket_idx) {
 
  352       glob_val_bucket.resize(glob_buckets[bucket_idx]);
 
  353       global_split_bucket.resize(glob_buckets[bucket_idx]);
 
  357     MPI_Gatherv(loc_val_bucket.data(), buckets[bucket_idx], MPI_DOUBLE,
 
  358                 glob_val_bucket.data(), rcnt.data(), rdsp.data(), MPI_DOUBLE,
 
  364     if(rank == bucket_idx) {
 
  365       size_t bsize = glob_val_bucket.size();
 
  367       std::vector<int> bucket_perm(bsize);
 
  368       for(
size_t i=0; i<bsize; i++) bucket_perm[i] = i;
 
  371       int loc_half_idx = lhalf - dsp;
 
  374       if(loc_half_idx < 0 || loc_half_idx >= 
int(glob_val_bucket.size())) {
 
  375         fprintf(stderr, 
"Error: Illegal local val index!!\n");
 
  378       for(
int i=0; i <= loc_half_idx; i++)
 
  379         global_split_bucket[bucket_perm[i]] = 1;
 
  381       for(
int i=loc_half_idx+1; i < int(bsize); i++)
 
  382         global_split_bucket[bucket_perm[i]] = 0;
 
  385     MPI_Scatterv(global_split_bucket.data(), rcnt.data(), rdsp.data(), MPI_SHORT,
 
  386                  split_bucket.data(), buckets[bucket_idx], MPI_SHORT,
 
  389     for(
size_t i=0, ridx=0; i<nelem; i++) {
 
  390       int pidx = vals[i].v2;
 
  391       int idx = (vals[i].v1 - 
min) / bucket_size;
 
  395         on_left_side[pidx] = 
true;
 
  396       else if(idx == bucket_idx)
 
  397         on_left_side[pidx] = split_bucket[ridx++] == 1;
 
  399         on_left_side[pidx] = 
false;
 
  415     size_t nelem = parent.
elems->size();
 
  417     std::vector<mixed_pair<S,T>> vals(nelem);
 
  420     switch(longest_axis) {
 
  422         min = parent.
box.bounds[0].x, 
max = parent.
box.bounds[1].x;
 
  423         for(
size_t i=0; i<nelem; i++) {
 
  431         min = parent.
box.bounds[0].y, 
max = parent.
box.bounds[1].y;
 
  432         for(
size_t i=0; i<nelem; i++) {
 
  440         min = parent.
box.bounds[0].z, 
max = parent.
box.bounds[1].z;
 
  441         for(
size_t i=0; i<nelem; i++) {
 
  452     std::vector<bool> on_left;
 
  453     get_parallel_median_split(vals, 
min, 
max, on_left);
 
  456     size_t left_size = 0, right_size = 0;
 
  458     for(
size_t i=0; i<nelem; i++) {
 
  459       if(on_left[i]) left_size++;
 
  463     lchild.
elems = 
new std::vector<elem<T,S> >(left_size), rchild.
elems = 
new std::vector<
elem<T,S> >(right_size);
 
  465     left_size = 0, right_size = 0;
 
  466     for(
size_t i=0; i<nelem; i++) {
 
  467       if(on_left[i]) (*lchild.
elems)[left_size++ ] = (*parent.
elems)[i];
 
  468       else           (*rchild.
elems)[right_size++] = (*parent.
elems)[i];
 
  475     int buff[2] = {int(lchild.
elems->size()), int(rchild.
elems->size())};
 
  476     MPI_Allreduce(MPI_IN_PLACE, buff, 2, MPI_INT, MPI_SUM, _comm);
 
  477     lchild.
cnt = buff[0], rchild.
cnt = buff[1];
 
  479     if(lchild.
cnt == 0 || rchild.
cnt == 0) {
 
  480       fprintf(stderr, 
"Error: Empty partitioning!!\n");
 
  483     lchild.
box = get_bbox(*lchild.
elems);
 
  484     rchild.
box = get_bbox(*rchild.
elems);
 
  492   inline void update_layout(
const T split_pos) {
 
  493     assert(
size_t(split_pos) < _layout.size());
 
  495     T idx_at_split = _layout[split_pos].pidx;
 
  496     T start = _cur_part_number - 1, stop = split_pos + 1;
 
  499     for(T i = start; i > stop; i--) {
 
  500       _layout[i] = _layout[i-1];  
 
  505     median_split(_layout[split_pos], _layout[split_pos], _layout[split_pos+1]);
 
  506     _layout[split_pos].pidx   = idx_at_split;
 
  507     _layout[split_pos+1].pidx = idx_at_split+1;
 
  518   inline T get_split_pos()
 
  520     T idx = 0, 
max = _layout[0].cnt, maxidx = 0;
 
  522     while(idx < _cur_part_number && _layout[idx].cnt > -1) {
 
  523       if(
max < _layout[idx].cnt) {
 
  524         max = _layout[idx].cnt;
 
  534   inline void operator() (
const MPI_Comm comm, 
const std::vector<S> & ctr, 
const int req_part,
 
  535                           std::vector<T> & part_vec)
 
  539     assert(ctr.size() % 3 == 0); 
 
  542     long int l_numelem = ctr.size() / 3, g_numelem;
 
  543     MPI_Allreduce(&l_numelem, &g_numelem, 1, MPI_LONG, MPI_SUM, _comm);
 
  544     assert(g_numelem > 0);
 
  547     MPI_Comm_size(_comm, &size); MPI_Comm_rank(_comm, &rank);
 
  551     bool redistribute_remainder = 
false;
 
  563       npart = pow(2, npart);
 
  569       redistribute_remainder = 
true;
 
  572     assert(g_numelem > (
long int)npart);
 
  575     _cur_part_number = 1;
 
  576     _layout.resize(
size_t(npart));
 
  578     _layout[0].elems = 
new std::vector<kdpart::elem<T,S> >(l_numelem);
 
  579     _layout[0].cnt   = g_numelem;
 
  581     std::vector<kdpart::elem<T,S> > & elems = *_layout[0].elems;
 
  584     for(
long int i=0; i<l_numelem; i++) {
 
  585       elems[i].ctr.get(ctr.data() + i*3);  
 
  589     _layout[0].box = get_bbox(elems);
 
  597     while(_cur_part_number < npart) {
 
  598       T split_pos = get_split_pos();
 
  601       update_layout(split_pos);
 
  607     if(redistribute_remainder) {
 
  609       T base_size = npart / npart_old;
 
  612       T extended_size = base_size + 1;
 
  613       T remainder = npart % npart_old;
 
  616       for(T i=0; i<remainder; i++) {
 
  617         for(T j=0; j<extended_size; j++)
 
  618           _layout[i*extended_size+j].pidx = i;
 
  622       for(T i=remainder*extended_size, pidx=remainder; i<T(_layout.size()); i+=base_size, pidx++) {
 
  623         for(T j=0; j<base_size; j++)
 
  624           _layout[i+j].pidx = pidx;
 
  631     part_vec.assign(l_numelem, T(-1));
 
  634         part_vec[e.eidx] = p.pidx;
 
  641 template<
class T, 
class S>
 
  646   std::vector<kdpart::partition<T,S> > _layout;  
 
  660     for(
size_t i=1; i<elems.size(); i++) {
 
  680     S x = fabs(
min.x - 
max.x);
 
  681     S y = fabs(
min.y - 
max.y);
 
  682     S z = fabs(
min.z - 
max.z);
 
  684     return x > y && x > z ? 
X : y > z ? 
Y : 
Z;
 
  694   inline void print_layout(
const T split_pos = -1)
 
  698       while(idx < split_pos) {
 
  699         printf(
"%d : %d \n", 
int(_layout[idx].pidx), 
int(_layout[idx].cnt));
 
  704       printf(
"%d : %d \n", 
int(_layout[idx].pidx), 
int(_layout[idx].cnt));
 
  705       printf(
"%d : %d \n", 
int(_layout[idx+1].pidx), 
int(_layout[idx+1].cnt));
 
  709       while(
size_t(idx) < _layout.size() && _layout[idx].pidx > -1) {
 
  710         printf(
"%d : %d \n", 
int(_layout[idx].pidx), 
int(_layout[idx].cnt));
 
  717       while(
size_t(idx) < _layout.size() && _layout[idx].pidx > -1) {
 
  718         printf(
"%d : %d \n", 
int(_layout[idx].pidx), 
int(_layout[idx].cnt));
 
  737     size_t nelem = parent.
elems->size();
 
  740     std::vector<mixed_pair<S,T> > pairs(nelem);
 
  741     for(
size_t i=0; i<nelem; i++) {
 
  744       switch(longest_axis) {
 
  745         case X: val = p.
x; 
break;
 
  746         case Y: val = p.
y; 
break;
 
  747         case Z: val = p.
z; 
break;
 
  751       pairs[i] = {val, T(i)};
 
  753     std::sort(pairs.begin(), pairs.end());
 
  757     size_t lnum  = (nelem + 1) / 2, rnum = nelem - lnum;
 
  758     lchild.
elems = 
new std::vector<elem<T,S> >(lnum), rchild.
elems = 
new std::vector<
elem<T,S> >(rnum);
 
  760     for(
size_t i=0; i<lnum; i++) {
 
  761       T lidx = pairs[i].v2;
 
  765     for(
size_t i=0; i<rnum; i++) {
 
  766       T lidx = pairs[lnum + i].v2;
 
  776     lchild.
box = get_bbox(*lchild.
elems);
 
  777     rchild.
box = get_bbox(*rchild.
elems);
 
  785   inline void update_layout(
const T split_pos) {
 
  786     assert(
size_t(split_pos) < _layout.size());
 
  788     T idx_at_split = _layout[split_pos].pidx;
 
  789     T start = _cur_part_number - 1, stop = split_pos + 1;
 
  792     for(T i = start; i > stop; i--) {
 
  793       _layout[i] = _layout[i-1];  
 
  798     median_split(_layout[split_pos], _layout[split_pos], _layout[split_pos+1]);
 
  799     _layout[split_pos].pidx   = idx_at_split;
 
  800     _layout[split_pos+1].pidx = idx_at_split+1;
 
  811   inline T get_split_pos()
 
  813     T idx = 0, 
max = _layout[0].cnt, maxidx = 0;
 
  815     while(idx < _cur_part_number && _layout[idx].cnt > -1) {
 
  816       if(
max < _layout[idx].cnt) {
 
  817         max = _layout[idx].cnt;
 
  827   inline void operator() (
const std::vector<S> & ctr, T npart, std::vector<T> & part_vec)
 
  829     assert(ctr.size() % 3 == 0);
 
  831     size_t nelem = ctr.size() / 3;
 
  832     assert(nelem > 
size_t(npart));
 
  834     bool redistribute_remainder = 
false;
 
  845       npart = (log(
double(npart)) / log(2.0)) + 5.0;
 
  846       npart = pow(2, npart);
 
  852       redistribute_remainder = 
true;
 
  856     _layout.resize(
size_t(npart));
 
  857     part_vec.assign(nelem, T(-1));
 
  859     _cur_part_number = 1;
 
  861     _layout[0].elems = 
new std::vector<kdpart::elem<T,S> >(nelem);
 
  862     std::vector<kdpart::elem<T,S> > & elems = *_layout[0].elems;
 
  864     for(
size_t i=0; i<nelem; i++) {
 
  865       elems[i].ctr.get(ctr.data() + i*3);  
 
  869     _layout[0].box = get_bbox(elems);
 
  872     while(_cur_part_number < npart) {
 
  873       T split_pos = get_split_pos();
 
  876       update_layout(split_pos);
 
  879     if(redistribute_remainder) {
 
  880       T base_size = npart / npart_old, extended_size = base_size + 1;
 
  881       T remainder = npart % npart_old;
 
  883       for(T i=0; i<remainder; i++) {
 
  884         for(T j=0; j<extended_size; j++)
 
  885           _layout[i*extended_size+j].pidx = i;
 
  889       for(T i=remainder*extended_size, pidx=remainder; i<T(_layout.size()); i+=base_size, pidx++) {
 
  890         for(T j=0; j<base_size; j++)
 
  891           _layout[i+j].pidx = pidx;
 
  900         part_vec[e.eidx] = p.pidx;
 
void operator()(const MPI_Comm comm, const std::vector< S > &ctr, const int req_part, std::vector< T > &part_vec)
 
void operator()(const std::vector< S > &ctr, T npart, std::vector< T > &part_vec)
 
#define KD_ORDER_INC
The value we add to the power of two if we need higher partitioning resolution.
 
void sort_copy(std::vector< V > &v1, std::vector< W > &v2)
 
V clamp(const V val, const W start, const W end)
Clamp a value into an interval [start, end].
 
bool operator<(const mixed_pair< T, S > &lhs, const mixed_pair< T, S > &rhs)
sorting operator
 
void cnt_from_dsp(const std::vector< T > &dsp, std::vector< T > &cnt)
Compute counts from displacements.
 
bool is_power_of_two(double val)
Check if a given value is an integer power of two.
 
void dsp_from_cnt(const std::vector< T > &cnt, std::vector< T > &dsp)
Compute displacements from counts.
 
constexpr T min(T a, T b)
 
constexpr T max(T a, T b)
 
kdpart::vec3< S > bounds[2]
the bounds. bounds[0] = lower left (min), bounds[1] = upper right (max)
 
vec3< S > ctr
element center location
 
Combined floating point and integer pair.
 
the struct holding all partition data
 
kdpart::bbox< S > box
(global) partition bounding box
 
T cnt
(global) partition size
 
std::vector< elem< T, S > > * elems
(local) elements in partition
 
minimalistic internal point struct