openCARP
Doxygen code documentation for the open cardiac electrophysiology simulator openCARP
SF_parallel_layout.h
Go to the documentation of this file.
1 // ----------------------------------------------------------------------------
2 // openCARP is an open cardiac electrophysiology simulator.
3 //
4 // Copyright (C) 2020 openCARP project
5 //
6 // This program is licensed under the openCARP Academic Public License (APL)
7 // v1.0: You can use and redistribute it and/or modify it in non-commercial
8 // academic environments under the terms of APL as published by the openCARP
9 // project v1.0, or (at your option) any later version. Commercial use requires
10 // a commercial license (info@opencarp.org).
11 //
12 // This program is distributed without any warranty; see the openCARP APL for
13 // more details.
14 //
15 // You should have received a copy of the openCARP APL along with this program
16 // and can find it online: http://www.opencarp.org/license
17 // ----------------------------------------------------------------------------
18 
27 #ifndef _SF_PARALLEL_LAYOUT_H
28 #define _SF_PARALLEL_LAYOUT_H
29 
30 
31 #include <iostream>
32 
33 #include "hashmap.hpp"
34 
35 #include "mpi_utils.h"
36 #include "SF_vector.h"
37 #include "SF_container.h"
38 
39 namespace SF {
40 
57 template<class T>
58 inline void parallel_distribution(const vector<T> & gtarget,
59  const vector<T> & cnt,
60  const vector<T> & dsp,
61  const vector<T> & ranks,
62  const int myrank,
63  vector<T> & owner,
64  vector<T> & counts)
65 {
66  int size = gtarget.size();
67  size_t csize = cnt.size();
68  vector<T> ltarget(size);
69 
70  // this small algorithm computes ltarget[i] = gtarget[i] / size,
71  // but with a even distribution of the divisions remainder
72  // among the processes
73  for(int i=0; i<size; i++) {
74  int idx = (i + myrank) % size;
75  T gt = gtarget[idx];
76  ltarget[idx] = (i * gt + gt) / size - (i * gt) / size;
77  }
78 
79  // set initial distribution
80  vector<T> act_idx(csize);
81  counts.resize(size); counts.zero();
82  owner.resize(csize);
83  for(size_t nidx=0; nidx < csize; nidx++)
84  {
85  act_idx[nidx] = (unsigned int)(2147483647 * nidx) % cnt[nidx];
86 
87  T j = dsp[nidx] + act_idx[nidx];
88  T p = ranks[j];
89  owner[nidx] = p;
90  counts[p]++;
91  }
92 
93  // compute initial functional value
94  unsigned int J = 0;
95  for(int i = 0; i < size; i++) J += (counts[i] - ltarget[i])*(counts[i] - ltarget[i]);
96 
97  unsigned int k = 0, update = 0, osteps = 32;
98 
99  // optimize
100  while(J > 0 && k++ < osteps)
101  {
102  for(size_t nidx = 0; nidx < csize; nidx++)
103  {
104  // round-robin incrementation
105  act_idx[nidx]++;
106  if(act_idx[nidx] == cnt[nidx]) act_idx[nidx] = 0;
107 
108  T j = dsp[nidx] + act_idx[nidx];
109  T op = owner[nidx];
110  T p = ranks[j];
111  T n0 = counts[op], n1 = counts[p];
112  T n0_ = ltarget[op], n1_ = ltarget[p];
113  if( ((n0 - n0_) - (n1 - n1_)) >= 1 )
114  {
115  owner[nidx] = p;
116  counts[op]--; counts[p]++;
117  update++;
118  J += 2*(1 - (n0 - n0_) + (n1 - n1_));
119 
120  if (J == 0) break;
121  }
122  }
123  }
124 }
125 
126 
139 template<class T>
140 inline void refine_distribution(const vector<T> & gtarget,
141  const vector<T> & cnt,
142  const vector<T> & dsp,
143  const vector<T> & ranks,
144  const int myrank,
145  vector<T> & owner,
146  vector<T> & counts)
147 {
148  int size = gtarget.size();
149  size_t csize = cnt.size();
150  vector<T> ltarget(size);
151 
152  // this small algorithm computes ltarget[i] = gtarget[i] / size,
153  // but with a even distribution of the divisions remainder
154  // among the processes
155  for(int i=0; i<size; i++) {
156  int idx = (i + myrank) % size;
157  T gt = gtarget[idx];
158  ltarget[idx] = (i * gt + gt) / size - (i * gt) / size;
159  }
160 
161  vector<T> act_idx(csize);
162  counts.resize(size); counts.zero();
163 
164  // compute initial functional value
165  unsigned int J = 0;
166  for(int i = 0; i < size; i++) J += (counts[i] - ltarget[i])*(counts[i] - ltarget[i]);
167 
168  unsigned int k = 0, update = 0, osteps = 64;
169 
170  // choose random starting index
171  for(size_t nidx=0; nidx < csize; nidx++)
172  act_idx[nidx] = (unsigned int)(2147483647 * nidx) % cnt[nidx];
173 
174  // optimize
175  while(J > 0 && k++ < osteps)
176  {
177  for(size_t nidx = 0; nidx < csize; nidx++)
178  {
179  // round-robin incrementation
180  act_idx[nidx]++;
181  if(act_idx[nidx] == cnt[nidx]) act_idx[nidx] = 0;
182 
183  T j = dsp[nidx] + act_idx[nidx];
184  T op = owner[nidx];
185  T p = ranks[j];
186  T n0 = counts[op], n1 = counts[p];
187  T n0_ = ltarget[op], n1_ = ltarget[p];
188  if( ((n0 - n0_) - (n1 - n1_)) >= 1 )
189  {
190  owner[nidx] = p;
191  counts[op]--; counts[p]++;
192  update++;
193  J += 2*(1 - (n0 - n0_) + (n1 - n1_));
194 
195  if (J == 0) break;
196  }
197  }
198  }
199 }
200 
201 template<class T>
202 inline void parallel_distribution_minrank(const vector<T> & gtarget,
203  const vector<T> & cnt,
204  const vector<T> & dsp,
205  const vector<T> & ranks,
206  vector<T> & owner,
207  vector<T> & counts)
208 {
209  owner.resize(cnt.size());
210  counts.assign(gtarget.size(), 0);
211 
212  for(size_t i = 0; i < owner.size(); i++)
213  {
214  // get the smallest rank index that holds current node
215  int minrank = ranks[dsp[i]];
216  for(T j = dsp[i]; j < dsp[i+1]; j++)
217  if(minrank > ranks[j]) minrank = ranks[j];
218 
219  // this rank gets assigned to this node
220  owner[i] = minrank;
221  counts[minrank]++;
222  }
223 }
224 
225 
226 
235 template<class T>
237 {
238  protected:
243 
244  public:
255  inline void globalize(vector<T> & lvec) const
256  {
257  size_t lsize = _l2g.size(), widx = 0;
258 
259  for(size_t ridx=0; ridx<lvec.size(); ridx++)
260  {
261  T loc = lvec[ridx];
262  if(loc < (T)lsize)
263  lvec[widx++] = _l2g[loc];
264  }
265  lvec.resize(widx);
266  }
277  inline T globalize(const T lidx) const
278  {
279  size_t lsize = _l2g.size();
280 
281  if(lidx < (T)lsize)
282  return _l2g[lidx];
283  else
284  return T(-1);
285  }
286 
287 
298  inline void localize(vector<T> & gvec) const
299  {
300  size_t widx = 0;
302 
303  for(size_t ridx=0; ridx<gvec.size(); ridx++)
304  {
305  T glob = gvec[ridx];
306  it = _g2l.find(glob);
307  if(it != _g2l.end())
308  gvec[widx++] = it->second;
309  }
310  gvec.resize(widx);
311  }
312 
326  template<class V>
327  inline void localize(vector<T> & gidx, vector<V> & gdat) const
328  {
329  size_t widx = 0;
331 
332  for(size_t ridx=0; ridx<gidx.size(); ridx++)
333  {
334  T glob = gidx[ridx];
335  it = _g2l.find(glob);
336  if(it != _g2l.end()) {
337  gidx[widx] = it->second;
338  gdat[widx] = gdat[ridx];
339  widx++;
340  }
341  }
342  gidx.resize(widx);
343  gdat.resize(widx);
344  }
345 
346  inline T localize(T gidx) const
347  {
348  auto it = _g2l.find(gidx);
349  if(it != _g2l.end())
350  return it->second;
351  else
352  return T(-1);
353  }
354 
360  inline void assign(const vector<T> & idx)
361  {
362  _l2g.assign(idx.begin(), idx.end());
363  _g2l.clear();
364 
365  for(size_t i=0; i<_l2g.size(); i++)
366  _g2l[_l2g[i]] = i;
367  }
368 
369 };
370 
371 
376 template<class T>
378 {
379  private:
381  vector<T> _inod;
383  size_t _glob_num_idx;
384 
386  vector<T> _alg_nod;
388  vector<T> _alg_layout;
390  vector<T> _layout;
392  MPI_Comm _comm;
393 
401  inline void find_domain_interfaces()
402  {
403  int size, rank;
404  MPI_Comm_size(_comm, &size); MPI_Comm_rank(_comm, &rank);
405 
406  // compute a destination for each index in local domain
408  for(size_t i=0; i<parallel_layout<T>::_l2g.size(); i++)
409  dest[i] = parallel_layout<T>::_l2g[i] % size;
410  binary_sort_copy(dest, sbuf);
411 
412  // set up a commgraph w.r.t. the destination
413  commgraph<size_t> grph;
414  grph.configure(dest, _comm);
415  size_t numrecv = sum(grph.rcnt);
416 
417  // allocate receiving datastructs
418  vector<T> rnod(numrecv); // holds the received node indices
419  vector<T> rproc(numrecv); // holds the process rank each index was received from
420  vector<T> acc_cnt(numrecv, 1), acc_dsp;
421  grph.source_ranks(rproc);
422 
423  MPI_Exchange(grph, sbuf, rnod, _comm);
424 
425  binary_sort_copy(rnod, rproc);
426  unique_accumulate(rnod, acc_cnt);
427  acc_dsp.resize(acc_cnt.size()+1); dsp_from_cnt(acc_cnt, acc_dsp);
428 
429  // compute the global number of entities
430  unsigned long int num_unique = acc_cnt.size(), gnum_unique;
431  MPI_Allreduce(&num_unique, &gnum_unique, 1, MPI_UNSIGNED_LONG, MPI_SUM, _comm);
432  _glob_num_idx = gnum_unique;
433 
434  // now for each unique node rnod[i] we know the multiplicity acc_cnt[i],
435  // and the process ranks associated to it in rproc[acc_dsp[i]] till rproc[acc_dsp[i+1]]
436 
437  // compute commgraph for those entities with acc_cnt[i] > 1
438  grph.scnt.zero();
439  for(size_t i=0; i<acc_cnt.size(); i++) {
440  if(acc_cnt[i] > 1) {
441  for(T j=acc_dsp[i]; j<acc_dsp[i+1]; j++)
442  grph.scnt[rproc[j]]++;
443  }
444  }
445  dsp_from_cnt(grph.scnt, grph.sdsp);
446  MPI_Alltoall(grph.scnt.data(), sizeof(size_t), MPI_BYTE, grph.rcnt.data(), sizeof(size_t), MPI_BYTE, _comm);
447  dsp_from_cnt(grph.rcnt, grph.rdsp);
448 
449  // fill sbuf with those entities with acc_cnt[i] > 1
450  size_t numsend = sum(grph.scnt);
451  sbuf.resize(numsend);
452 
453  for(size_t i=0; i<acc_cnt.size(); i++) {
454  if(acc_cnt[i] > 1) {
455  for(T j=acc_dsp[i]; j<acc_dsp[i+1]; j++) {
456  T sproc = rproc[j];
457  sbuf[grph.sdsp[sproc]] = rnod[i];
458  grph.sdsp[sproc]++;
459  }
460  }
461  }
462 
463  dsp_from_cnt(grph.scnt, grph.sdsp);
464  numrecv = sum(grph.rcnt);
465  _inod.resize(numrecv);
466  MPI_Exchange(grph, sbuf, _inod, _comm);
467  binary_sort(_inod);
468  }
469 
482  inline void find_algebraic_layout()
483  {
484  // whether we want to be verbose about suboptimal algebraic node distributions. from
485  // a high-level view this is unimportant, so by default this is false.
486  const bool dist_warnings = false;
487 
488  int size, rank;
489  MPI_Comm_size(_comm, &size); MPI_Comm_rank(_comm, &rank);
490 
491  // first compute a unique ownership of a subset of the nodes in the local domain
492  {
493  // compute inner nodes via a set difference of all nodes and the interface nodes
494  vector<T> inner_nodes(parallel_layout<T>::_l2g.size());
495  {
496  vector<T> nodes(parallel_layout<T>::_l2g), intf_nodes(_inod);
497  binary_sort(nodes);
498  T* end = std::set_difference(nodes.begin(), nodes.end(),
499  intf_nodes.begin(), intf_nodes.end(), inner_nodes.begin());
500  inner_nodes.resize(end - inner_nodes.begin());
501  }
502  _alg_nod.assign(inner_nodes.begin(), inner_nodes.end());
503 
504  // compute a destination for each interface index in the local domain
505  vector<T> sbuf(_inod.size()), dest(_inod.size());
506  for(size_t i=0; i < sbuf.size(); i++) {
507  T nod = _inod[i];
508  dest[i] = nod % size;
509  sbuf[i] = nod;
510  }
511  binary_sort_copy(dest, sbuf);
512 
513  // set up a commgraph w.r.t. the destination
514  commgraph<size_t> grph;
515  grph.configure(dest, _comm);
516 
517  // allocate receiving datastructs
518  size_t numrecv = sum(grph.rcnt);
519  vector<T> rnod(numrecv); // holds the received node indices
520  vector<T> rproc(numrecv); // holds the process rank each index was received from
521 
522  grph.source_ranks(rproc);
523  MPI_Exchange(grph, sbuf, rnod, _comm);
524 
525  vector<T> acc_cnt(numrecv, 1), acc_dsp;
526  binary_sort_copy(rnod, rproc);
527  unique_accumulate(rnod, acc_cnt);
528  acc_dsp.resize(acc_cnt.size()+1); dsp_from_cnt(acc_cnt, acc_dsp);
529 
530  const MPI_Datatype mpi_t = opencarp::mpi_datatype<T>();
531 
532  // initialize the target distribution to approx. _glob_num_idx / size
533  vector<T> target;
534  divide(_glob_num_idx, size, target);
535 
536  target[rank] -= inner_nodes.size();
537  MPI_Allreduce(MPI_IN_PLACE, target.data(), target.size(), mpi_t, MPI_MIN, _comm);
538 
539  if(dist_warnings) {
540  // treat negative values in target with warning.
541  bool warn = false;
542  for(int i=0; i<size; i++)
543  if(target[i] < 0) {
544  warn = true;
545  break;
546  }
547  if(warn)
548  if(!rank) std::cerr << "Warning: Domains too unbalanced for balanced re-indexing!" << std::endl;
549  }
550 
551  vector<T> owners, counts;
552 
553  #if 1
554  // use optimization algorithm to find a parallel distribution that fits the specified
555  // target distribution
556  parallel_distribution(target, acc_cnt, acc_dsp, rproc, rank, owners, counts);
557  MPI_Allreduce(MPI_IN_PLACE, counts.data(), counts.size(), mpi_t, MPI_SUM, _comm);
558  for(int i=0; i<size; i++) target[i] -= counts[i];
559 
560  unsigned int k = 0, numref = 10;
561  while( !isEmpty(target) && k++ < numref)
562  {
563  refine_distribution(target, acc_cnt, acc_dsp, rproc, rank, owners, counts);
564  MPI_Allreduce(MPI_IN_PLACE, counts.data(), counts.size(), mpi_t, MPI_SUM, _comm);
565  for(int i=0; i<size; i++) target[i] -= counts[i];
566  }
567  #else
568  parallel_distribution_minrank(target, acc_cnt, acc_dsp, rproc, owners, counts);
569  MPI_Allreduce(MPI_IN_PLACE, counts.data(), counts.size(), mpi_t, MPI_SUM, _comm);
570  for(int i=0; i<size; i++) target[i] -= counts[i];
571  #endif
572 
573  if( dist_warnings && rank == 0 ) {
574  if(!isEmpty(target)) {
575  std::cerr << "Warning: Balanced re-indexing could not be computed." << std::endl;
576  std::cerr << "Final differences to even distribution: " << std::endl;
577  for(int i=0; i<size; i++) std::cerr << target[i] << " ";
578  std::cerr << std::endl;
579  }
580  }
581 
582  // send the assigned nodes back to the respective processes
583  binary_sort_copy(owners, rnod);
584  grph.configure(owners, _comm);
585  numrecv = sum(grph.rcnt);
586  sbuf.resize(numrecv);
587  MPI_Exchange(grph, rnod, sbuf, _comm);
588 
589  _alg_nod.append(sbuf.begin(), sbuf.end());
590  binary_sort(_alg_nod);
591  }
592 
593  // Generate the algebraic layout
594  {
595  commgraph<size_t> owned_layout;
596  owned_layout.resize(size);
597  size_t owned_idx_size = _alg_nod.size();
598  MPI_Allgather(&owned_idx_size, sizeof(size_t), MPI_BYTE,
599  owned_layout.scnt.data(), sizeof(size_t), MPI_BYTE, _comm);
600  dsp_from_cnt(owned_layout.scnt, owned_layout.sdsp);
601 
602  _alg_layout.resize(owned_layout.sdsp.size());
603  vec_assign(_alg_layout.data(), owned_layout.sdsp.data(), owned_layout.sdsp.size());
604  }
605  }
606 
607  inline void compute_layout()
608  {
609  T size = this->algebraic_layout().size()-1;
610  T nlidx = this->num_local_idx();
611 
612  _layout.resize(size + 1);
613  vector<T> cnt(size);
614 
615  MPI_Allgather(&nlidx, sizeof(T), MPI_BYTE, cnt.data(), sizeof(T), MPI_BYTE, _comm);
616  dsp_from_cnt(cnt, _layout);
617  }
618 
619  public:
621  overlapping_layout() : _glob_num_idx(0), _comm(SF_COMM) {}
622 
633  inline void assign(const vector<T> & idx, MPI_Comm comm)
634  {
635  _comm = comm;
636 
638 
639  this->find_domain_interfaces();
640  this->find_algebraic_layout();
641  this->compute_layout();
642 
643  this->localize(_inod);
644  binary_sort(_inod);
645  this->localize(_alg_nod);
646  binary_sort(_alg_nod);
647  }
648 
649 
658  inline const vector<T>& interface() const
659  {
660  return _inod;
661  }
662 
668  inline const vector<T> & algebraic_nodes() const
669  {
670  return _alg_nod;
671  }
672 
678  inline const vector<T> & algebraic_layout() const
679  {
680  return _alg_layout;
681  }
687  inline const vector<T> & layout() const
688  {
689  return _layout;
690  }
691 
702  inline T localize_algebraic(const T global_idx, const vector<T> & global_alg_nbr, const int rank)
703  {
704  T loc_nodal_idx = parallel_layout<T>::localize(global_idx);
705  if(loc_nodal_idx == -1)
706  return -1;
707 
708  T local_offset = _alg_layout[rank], local_size = _alg_layout[rank+1] - local_offset;
709  T local_alg_idx = global_alg_nbr[loc_nodal_idx] - local_offset;
710 
711  if(local_alg_idx > -1 && local_alg_idx < local_size)
712  return local_alg_idx;
713  else
714  return -1;
715  }
716 
718  inline size_t num_global_idx() const
719  {
720  return _glob_num_idx;
721  }
723  inline size_t num_local_idx() const
724  {
725  return parallel_layout<T>::_l2g.size();
726  }
728  inline size_t num_algebraic_idx() const
729  {
730  return _alg_nod.size();
731  }
732 
741  template<class V>
742  inline void reduce(vector<V> & ndat, const char* op) const
743  {
744  int size, rank;
745  MPI_Comm_size(_comm, &size); MPI_Comm_rank(_comm, &rank);
746 
747  // parallel layout of nodal data must match the one of the
748  // overlapping_layout class
749  assert(ndat.size() == parallel_layout<T>::_l2g.size());
750 
751  size_t isize = _inod.size();
752  vector<T> nod_sbuf(isize), dest(isize), perm;
753  vector<V> dat_sbuf(isize);
754 
755  for(size_t i=0; i<isize; i++) {
756  T lidx = _inod[i];
757  dest[i] = parallel_layout<T>::_l2g[lidx] % size;
758  }
759  interval(perm, 0, isize);
760  binary_sort_copy(dest, perm);
761 
762  for(size_t i=0; i<perm.size(); i++)
763  {
764  T lidx = _inod[perm[i]];
765  nod_sbuf[i] = parallel_layout<T>::_l2g[lidx];
766  dat_sbuf[i] = ndat[lidx];
767  }
768 
769  // set up a commgraph w.r.t. the destination
770  commgraph<size_t> grph;
771  grph.configure(dest, _comm);
772 
773  size_t numrecv = sum(grph.rcnt);
774  vector<T> nod_rbuf(numrecv);
775  vector<V> dat_rbuf(numrecv);
776 
777  MPI_Exchange(grph, nod_sbuf, nod_rbuf, _comm);
778  MPI_Exchange(grph, dat_sbuf, dat_rbuf, _comm);
779 
780  vector<T> acc_cnt(numrecv, 1), acc_dsp, acc_col;
781  interval(acc_col, 0, numrecv);
782 
783  binary_sort_copy(nod_rbuf, acc_col);
784  unique_accumulate(nod_rbuf, acc_cnt);
785 
786  acc_dsp.resize(acc_cnt.size() + 1);
787  dsp_from_cnt(acc_cnt, acc_dsp);
788 
789  short op_sw = 0;
790  if(! strcmp(op, "max"))
791  op_sw = 0;
792  else if(! strcmp(op, "min"))
793  op_sw = 1;
794  else if(! strcmp(op, "sum"))
795  op_sw = 2;
796  else
797  assert(0);
798 
799  for(size_t i=0; i<acc_cnt.size(); i++)
800  {
801  V val = 0.0; // value we will set for all interface nodes
802  // compute value based on selected operation
803  switch(op_sw)
804  {
805  // max
806  case 0:
807  {
808  V max = dat_rbuf[acc_col[acc_dsp[i]]];
809  T idx=1;
810  while(idx < acc_cnt[i])
811  {
812  T p = acc_col[acc_dsp[i]+idx];
813  if(max < dat_rbuf[p]) max = dat_rbuf[p];
814  idx++;
815  }
816  val = max;
817  break;
818  }
819 
820  // min
821  case 1:
822  {
823  V min = dat_rbuf[acc_col[acc_dsp[i]]];
824  T idx=1;
825  while(idx < acc_cnt[i])
826  {
827  T p = acc_col[acc_dsp[i]+idx];
828  if(min > dat_rbuf[p]) min = dat_rbuf[p];
829  idx++;
830  }
831  val = min;
832  break;
833  }
834 
835  // sum
836  case 2:
837  {
838  T idx=0;
839  while(idx < acc_cnt[i])
840  {
841  T p = acc_col[acc_dsp[i]+idx];
842  val += dat_rbuf[p];
843  idx++;
844  }
845  break;
846  }
847  }
848  for(T j = acc_dsp[i]; j < acc_dsp[i] + acc_cnt[i]; j++)
849  dat_rbuf[acc_col[j]] = val;
850  }
851 
852  grph.transpose();
853  MPI_Exchange(grph, dat_rbuf, dat_sbuf, _comm);
854 
855  for(size_t i=0; i<perm.size(); i++)
856  {
857  T lidx = _inod[perm[i]];
858  ndat[lidx] = dat_sbuf[i];
859  }
860  }
861 };
862 
868 template<class T>
870 {
871  private:
872  vector<T> _elem_layout;
873  MPI_Comm _comm;
874 
875  public:
878  {}
879 
888  inline void assign(const vector<T> & ref_eidx, MPI_Comm comm)
889  {
890  _comm = comm;
891 
892  parallel_layout<T>::assign(ref_eidx);
893 
894  int size; MPI_Comm_size(_comm, &size);
895 
896  T lsize = ref_eidx.size();
897  vector<T> layout_cnt(size);
898 
899  MPI_Allgather(&lsize, sizeof(T), MPI_BYTE, layout_cnt.data(), sizeof(T), MPI_BYTE, _comm);
900  dsp_from_cnt(layout_cnt, _elem_layout);
901  }
903  const vector<T> & algebraic_layout() const
904  {
905  return _elem_layout;
906  }
907 };
908 
909 
910 
914 template<class T, class S>
915 T local_nodal_to_local_petsc(const meshdata<T,S> & mesh, int rank, T local_nodal)
916 {
917  const vector<T> & alg_layout = mesh.pl.algebraic_layout();
918  const vector<T> & petsc_nbr = mesh.get_numbering(NBR_PETSC);
919 
920  const T my_offset = alg_layout[rank];
921 
922  return petsc_nbr[local_nodal] - my_offset;
923 }
924 
927 template<class T, class S>
928 T local_petsc_to_local_nodal(const meshdata<T,S> & mesh, int rank, T local_petsc)
929 {
930  const vector<T> & alg_layout = mesh.pl.algebraic_layout();
931  const vector<T> & alg_nod = mesh.pl.algebraic_nodes();
932  const vector<T> & petsc_nbr = mesh.get_numbering(NBR_PETSC);
933 
934  const T my_offset = alg_layout[rank];
935  const size_t num_alg = alg_nod.size();
936 
937  size_t idx = 0;
938  while(idx < num_alg && petsc_nbr[alg_nod[idx]] != local_petsc + my_offset) idx++;
939 
940  if(idx == num_alg) return -1;
941  else return alg_nod[idx];
942 }
943 
944 template<class T, class S>
945 void local_petsc_to_nodal_mapping(const meshdata<T,S> & mesh, index_mapping<T> & petsc_to_nodal)
946 {
947  int rank; MPI_Comm_rank(mesh.comm, &rank);
948  const vector<T> & alg_nod = mesh.pl.algebraic_nodes();
949  vector<T> petsc_idx(alg_nod.size());
950 
951  size_t idx = 0;
952  for(const T & n : alg_nod)
953  petsc_idx[idx++] = local_nodal_to_local_petsc(mesh, rank, n);
954 
955  petsc_to_nodal.assign(petsc_idx, alg_nod);
956 }
957 
958 
959 }
960 
961 #endif
Basic containers.
#define SF_COMM
the default SlimFem MPI communicator
Definition: SF_globals.h:28
The vector class and related algorithms.
The class holds the communication graph for a MPI_Exchange() call.
Definition: SF_container.h:638
vector< T > rcnt
Number of elements received from each rank.
Definition: SF_container.h:642
vector< T > scnt
Number of elements sent to each rank.
Definition: SF_container.h:640
void resize(size_t size)
Resize all vectors to size.
Definition: SF_container.h:646
void configure(const vector< V > &dest, MPI_Comm comm)
Set up the communication graph.
Definition: SF_container.h:679
vector< T > sdsp
Displacements w.r.t. scnt.
Definition: SF_container.h:641
vector< T > rdsp
Displacements w.r.t. rcnt.
Definition: SF_container.h:643
void source_ranks(vector< V > &source)
For every received data element, get the rank indices it was receive from.
Definition: SF_container.h:726
void transpose()
transpose comm graph (receive becomes send, and vice versa)
Definition: SF_container.h:663
Index mapping class. This is a bijective mapping.
Definition: SF_container.h:220
void assign(const vector< T > &a, const vector< T > &b)
Set up the index mapping between a and b.
Definition: SF_container.h:247
The mesh storage class. It contains both element and vertex data.
Definition: SF_container.h:396
overlapping_layout< T > pl
nodal parallel layout
Definition: SF_container.h:429
MPI_Comm comm
the parallel mesh is defined on a MPI world
Definition: SF_container.h:404
vector< T > & get_numbering(SF_nbr nbr_type)
Get the vector defining a certain numbering.
Definition: SF_container.h:464
The parallel layout of non overlapping indices.
const vector< T > & algebraic_layout() const
Getter function for the algebraic layout of the elements.
non_overlapping_layout()
Empty constructor. Use assign() to set up the layout.
void assign(const vector< T > &ref_eidx, MPI_Comm comm)
Generate the layout.
The overlapping_layout class contains the algorithms related to managing overlapping parallel index s...
T localize_algebraic(const T global_idx, const vector< T > &global_alg_nbr, const int rank)
map a global (REF_NBR, reference numbering) index to local algebraic (non-overlapping) indexing w....
const vector< T > & algebraic_layout() const
Getter function for the global algebraic node layout.
size_t num_algebraic_idx() const
Retrieve the number of local algebraic indices.
size_t num_local_idx() const
Retrieve the local number of indices.
const vector< T > & layout() const
Return the the overlapping layout.
const vector< T > & algebraic_nodes() const
Getter function for the local indices forming the local algebraic node set.
size_t num_global_idx() const
Retrieve the global number of indices.
void assign(const vector< T > &idx, MPI_Comm comm)
Initialization function.
const vector< T > & interface() const
Retrieve the local indices of the subdomain interfaces.
void reduce(vector< V > &ndat, const char *op) const
Compute a reduction on overlapping data.
overlapping_layout()
Non-parameterized constructor. Use assign() to initialize later.
The base class for parallel layouts.
T localize(T gidx) const
void assign(const vector< T > &idx)
Assign a parallel distributed index set that defines the parallel layout.
hashmap::unordered_map< T, T > _g2l
The global-to-local map for the DD domain.
void localize(vector< T > &gidx, vector< V > &gdat) const
Localize global indices and associated data.
vector< T > _l2g
The global indices of the local DD domain. Also serves as the local-to-global map.
void globalize(vector< T > &lvec) const
Globalize local indices.
T globalize(const T lidx) const
Globalize local indices.
void localize(vector< T > &gvec) const
Localize global indices.
A vector storing arbitrary data.
Definition: SF_vector.h:43
size_t size() const
The current size of the vector.
Definition: SF_vector.h:104
void resize(size_t n)
Resize a vector.
Definition: SF_vector.h:209
const T * end() const
Pointer to the vector's end.
Definition: SF_vector.h:128
void append(InputIterator s, InputIterator e)
Append data to the current data chunk.
Definition: SF_vector.h:268
void assign(InputIterator s, InputIterator e)
Assign a memory range.
Definition: SF_vector.h:161
void zero()
Definition: SF_vector.h:248
const T * begin() const
Pointer to the vector's start.
Definition: SF_vector.h:116
T * data()
Pointer to the vector's start.
Definition: SF_vector.h:91
iterator find(const K &key)
Search for key. Return iterator.
Definition: hashmap.hpp:641
Classes similar to unordered_set and unordered_map, but with better performance.
Definition: dense_mat.hpp:34
void dsp_from_cnt(const vector< T > &cnt, vector< T > &dsp)
Compute displacements from counts.
Definition: SF_vector.h:310
T local_petsc_to_local_nodal(const meshdata< T, S > &mesh, int rank, T local_petsc)
void interval(vector< T > &vec, size_t start, size_t end)
Create an integer interval between start and end.
Definition: SF_vector.h:350
void unique_accumulate(vector< T > &_P, vector< S > &_A)
Definition: SF_sort.h:409
void binary_sort_copy(vector< T > &_V, vector< S > &_W)
Definition: SF_sort.h:296
T sum(const vector< T > &vec)
Compute sum of a vector's entries.
Definition: SF_vector.h:340
void divide(const size_t gsize, const size_t num_parts, vector< T > &loc_sizes)
divide gsize into num_parts local parts with even distribution of the remainder
Definition: SF_vector.h:358
void local_petsc_to_nodal_mapping(const meshdata< T, S > &mesh, index_mapping< T > &petsc_to_nodal)
bool isEmpty(vector< T > &v)
Return whether an vector is empty (all values are 0).
Definition: SF_vector.h:378
void parallel_distribution_minrank(const vector< T > &gtarget, const vector< T > &cnt, const vector< T > &dsp, const vector< T > &ranks, vector< T > &owner, vector< T > &counts)
T local_nodal_to_local_petsc(const meshdata< T, S > &mesh, int rank, T local_nodal)
void vec_assign(S *lhs, const V *rhs, size_t size)
Assign the values in rhs to lhs. The data-type of rhs is cast to the type of lhs.
Definition: SF_vector.h:371
void MPI_Exchange(commgraph< T > &grph, vector< S > &send, vector< S > &recv, MPI_Comm comm)
Exchange data in parallel over MPI.
Definition: SF_network.h:47
void refine_distribution(const vector< T > &gtarget, const vector< T > &cnt, const vector< T > &dsp, const vector< T > &ranks, const int myrank, vector< T > &owner, vector< T > &counts)
Further refine a distribution generated by parallel_distribution().
void binary_sort(vector< T > &_V)
Definition: SF_sort.h:284
void parallel_distribution(const vector< T > &gtarget, const vector< T > &cnt, const vector< T > &dsp, const vector< T > &ranks, const int myrank, vector< T > &owner, vector< T > &counts)
The distribution distributes entities between all ranks.
@ NBR_PETSC
PETSc numbering of nodes.
Definition: SF_container.h:203
constexpr T min(T a, T b)
Definition: ion_type.h:33
constexpr T max(T a, T b)
Definition: ion_type.h:31