openCARP
Doxygen code documentation for the open cardiac electrophysiology simulator openCARP
SF_parallel_layout.h
Go to the documentation of this file.
1 // ----------------------------------------------------------------------------
2 // openCARP is an open cardiac electrophysiology simulator.
3 //
4 // Copyright (C) 2020 openCARP project
5 //
6 // This program is licensed under the openCARP Academic Public License (APL)
7 // v1.0: You can use and redistribute it and/or modify it in non-commercial
8 // academic environments under the terms of APL as published by the openCARP
9 // project v1.0, or (at your option) any later version. Commercial use requires
10 // a commercial license (info@opencarp.org).
11 //
12 // This program is distributed without any warranty; see the openCARP APL for
13 // more details.
14 //
15 // You should have received a copy of the openCARP APL along with this program
16 // and can find it online: http://www.opencarp.org/license
17 // ----------------------------------------------------------------------------
18 
27 #ifndef _SF_PARALLEL_LAYOUT_H
28 #define _SF_PARALLEL_LAYOUT_H
29 
30 
31 #include <iostream>
32 
33 #include "hashmap.hpp"
34 
35 #include "SF_vector.h"
36 #include "SF_container.h"
37 
38 namespace SF {
39 
56 template<class T>
57 inline void parallel_distribution(const vector<T> & gtarget,
58  const vector<T> & cnt,
59  const vector<T> & dsp,
60  const vector<T> & ranks,
61  const int myrank,
62  vector<T> & owner,
63  vector<T> & counts)
64 {
65  int size = gtarget.size();
66  size_t csize = cnt.size();
67  vector<T> ltarget(size);
68 
69  // this small algorithm computes ltarget[i] = gtarget[i] / size,
70  // but with a even distribution of the divisions remainder
71  // among the processes
72  for(int i=0; i<size; i++) {
73  int idx = (i + myrank) % size;
74  T gt = gtarget[idx];
75  ltarget[idx] = (i * gt + gt) / size - (i * gt) / size;
76  }
77 
78  // set initial distribution
79  vector<T> act_idx(csize);
80  counts.resize(size); counts.zero();
81  owner.resize(csize);
82  for(size_t nidx=0; nidx < csize; nidx++)
83  {
84  act_idx[nidx] = (unsigned int)(2147483647 * nidx) % cnt[nidx];
85 
86  T j = dsp[nidx] + act_idx[nidx];
87  T p = ranks[j];
88  owner[nidx] = p;
89  counts[p]++;
90  }
91 
92  // compute initial functional value
93  unsigned int J = 0;
94  for(int i = 0; i < size; i++) J += (counts[i] - ltarget[i])*(counts[i] - ltarget[i]);
95 
96  unsigned int k = 0, update = 0, osteps = 32;
97 
98  // optimize
99  while(J > 0 && k++ < osteps)
100  {
101  for(size_t nidx = 0; nidx < csize; nidx++)
102  {
103  // round-robin incrementation
104  act_idx[nidx]++;
105  if(act_idx[nidx] == cnt[nidx]) act_idx[nidx] = 0;
106 
107  T j = dsp[nidx] + act_idx[nidx];
108  T op = owner[nidx];
109  T p = ranks[j];
110  T n0 = counts[op], n1 = counts[p];
111  T n0_ = ltarget[op], n1_ = ltarget[p];
112  if( ((n0 - n0_) - (n1 - n1_)) >= 1 )
113  {
114  owner[nidx] = p;
115  counts[op]--; counts[p]++;
116  update++;
117  J += 2*(1 - (n0 - n0_) + (n1 - n1_));
118 
119  if (J == 0) break;
120  }
121  }
122  }
123 }
124 
125 
138 template<class T>
139 inline void refine_distribution(const vector<T> & gtarget,
140  const vector<T> & cnt,
141  const vector<T> & dsp,
142  const vector<T> & ranks,
143  const int myrank,
144  vector<T> & owner,
145  vector<T> & counts)
146 {
147  int size = gtarget.size();
148  size_t csize = cnt.size();
149  vector<T> ltarget(size);
150 
151  // this small algorithm computes ltarget[i] = gtarget[i] / size,
152  // but with a even distribution of the divisions remainder
153  // among the processes
154  for(int i=0; i<size; i++) {
155  int idx = (i + myrank) % size;
156  T gt = gtarget[idx];
157  ltarget[idx] = (i * gt + gt) / size - (i * gt) / size;
158  }
159 
160  vector<T> act_idx(csize);
161  counts.resize(size); counts.zero();
162 
163  // compute initial functional value
164  unsigned int J = 0;
165  for(int i = 0; i < size; i++) J += (counts[i] - ltarget[i])*(counts[i] - ltarget[i]);
166 
167  unsigned int k = 0, update = 0, osteps = 64;
168 
169  // choose random starting index
170  for(size_t nidx=0; nidx < csize; nidx++)
171  act_idx[nidx] = (unsigned int)(2147483647 * nidx) % cnt[nidx];
172 
173  // optimize
174  while(J > 0 && k++ < osteps)
175  {
176  for(size_t nidx = 0; nidx < csize; nidx++)
177  {
178  // round-robin incrementation
179  act_idx[nidx]++;
180  if(act_idx[nidx] == cnt[nidx]) act_idx[nidx] = 0;
181 
182  T j = dsp[nidx] + act_idx[nidx];
183  T op = owner[nidx];
184  T p = ranks[j];
185  T n0 = counts[op], n1 = counts[p];
186  T n0_ = ltarget[op], n1_ = ltarget[p];
187  if( ((n0 - n0_) - (n1 - n1_)) >= 1 )
188  {
189  owner[nidx] = p;
190  counts[op]--; counts[p]++;
191  update++;
192  J += 2*(1 - (n0 - n0_) + (n1 - n1_));
193 
194  if (J == 0) break;
195  }
196  }
197  }
198 }
199 
200 template<class T>
201 inline void parallel_distribution_minrank(const vector<T> & gtarget,
202  const vector<T> & cnt,
203  const vector<T> & dsp,
204  const vector<T> & ranks,
205  vector<T> & owner,
206  vector<T> & counts)
207 {
208  owner.resize(cnt.size());
209  counts.assign(gtarget.size(), 0);
210 
211  for(size_t i = 0; i < owner.size(); i++)
212  {
213  // get the smallest rank index that holds current node
214  int minrank = ranks[dsp[i]];
215  for(T j = dsp[i]; j < dsp[i+1]; j++)
216  if(minrank > ranks[j]) minrank = ranks[j];
217 
218  // this rank gets assigned to this node
219  owner[i] = minrank;
220  counts[minrank]++;
221  }
222 }
223 
224 
225 
234 template<class T>
236 {
237  protected:
242 
243  public:
254  inline void globalize(vector<T> & lvec) const
255  {
256  size_t lsize = _l2g.size(), widx = 0;
257 
258  for(size_t ridx=0; ridx<lvec.size(); ridx++)
259  {
260  T loc = lvec[ridx];
261  if(loc < (T)lsize)
262  lvec[widx++] = _l2g[loc];
263  }
264  lvec.resize(widx);
265  }
276  inline T globalize(const T lidx) const
277  {
278  size_t lsize = _l2g.size();
279 
280  if(lidx < (T)lsize)
281  return _l2g[lidx];
282  else
283  return T(-1);
284  }
285 
286 
297  inline void localize(vector<T> & gvec) const
298  {
299  size_t widx = 0;
301 
302  for(size_t ridx=0; ridx<gvec.size(); ridx++)
303  {
304  T glob = gvec[ridx];
305  it = _g2l.find(glob);
306  if(it != _g2l.end())
307  gvec[widx++] = it->second;
308  }
309  gvec.resize(widx);
310  }
311 
325  template<class V>
326  inline void localize(vector<T> & gidx, vector<V> & gdat) const
327  {
328  size_t widx = 0;
330 
331  for(size_t ridx=0; ridx<gidx.size(); ridx++)
332  {
333  T glob = gidx[ridx];
334  it = _g2l.find(glob);
335  if(it != _g2l.end()) {
336  gidx[widx] = it->second;
337  gdat[widx] = gdat[ridx];
338  widx++;
339  }
340  }
341  gidx.resize(widx);
342  gdat.resize(widx);
343  }
344 
345  inline T localize(T gidx) const
346  {
347  auto it = _g2l.find(gidx);
348  if(it != _g2l.end())
349  return it->second;
350  else
351  return T(-1);
352  }
353 
359  inline void assign(const vector<T> & idx)
360  {
361  _l2g.assign(idx.begin(), idx.end());
362  _g2l.clear();
363 
364  for(size_t i=0; i<_l2g.size(); i++)
365  _g2l[_l2g[i]] = i;
366  }
367 
368 };
369 
370 
375 template<class T>
376 class overlapping_layout : public parallel_layout<T>
377 {
378  private:
380  vector<T> _inod;
382  size_t _glob_num_idx;
383 
385  vector<T> _alg_nod;
387  vector<T> _alg_layout;
389  vector<T> _layout;
391  MPI_Comm _comm;
392 
400  inline void find_domain_interfaces()
401  {
402  int size, rank;
403  MPI_Comm_size(_comm, &size); MPI_Comm_rank(_comm, &rank);
404 
405  // compute a destination for each index in local domain
407  for(size_t i=0; i<parallel_layout<T>::_l2g.size(); i++)
408  dest[i] = parallel_layout<T>::_l2g[i] % size;
409  binary_sort_copy(dest, sbuf);
410 
411  // set up a commgraph w.r.t. the destination
412  commgraph<size_t> grph;
413  grph.configure(dest, _comm);
414  size_t numrecv = sum(grph.rcnt);
415 
416  // allocate receiving datastructs
417  vector<T> rnod(numrecv); // holds the received node indices
418  vector<T> rproc(numrecv); // holds the process rank each index was received from
419  vector<T> acc_cnt(numrecv, 1), acc_dsp;
420  grph.source_ranks(rproc);
421 
422  MPI_Exchange(grph, sbuf, rnod, _comm);
423 
424  binary_sort_copy(rnod, rproc);
425  unique_accumulate(rnod, acc_cnt);
426  acc_dsp.resize(acc_cnt.size()+1); dsp_from_cnt(acc_cnt, acc_dsp);
427 
428  // compute the global number of entities
429  unsigned long int num_unique = acc_cnt.size(), gnum_unique;
430  MPI_Allreduce(&num_unique, &gnum_unique, 1, MPI_UNSIGNED_LONG, MPI_SUM, _comm);
431  _glob_num_idx = gnum_unique;
432 
433  // now for each unique node rnod[i] we know the multiplicity acc_cnt[i],
434  // and the process ranks associated to it in rproc[acc_dsp[i]] till rproc[acc_dsp[i+1]]
435 
436  // compute commgraph for those entities with acc_cnt[i] > 1
437  grph.scnt.zero();
438  for(size_t i=0; i<acc_cnt.size(); i++) {
439  if(acc_cnt[i] > 1) {
440  for(T j=acc_dsp[i]; j<acc_dsp[i+1]; j++)
441  grph.scnt[rproc[j]]++;
442  }
443  }
444  dsp_from_cnt(grph.scnt, grph.sdsp);
445  MPI_Alltoall(grph.scnt.data(), sizeof(size_t), MPI_BYTE, grph.rcnt.data(), sizeof(size_t), MPI_BYTE, _comm);
446  dsp_from_cnt(grph.rcnt, grph.rdsp);
447 
448  // fill sbuf with those entities with acc_cnt[i] > 1
449  size_t numsend = sum(grph.scnt);
450  sbuf.resize(numsend);
451 
452  for(size_t i=0; i<acc_cnt.size(); i++) {
453  if(acc_cnt[i] > 1) {
454  for(T j=acc_dsp[i]; j<acc_dsp[i+1]; j++) {
455  T sproc = rproc[j];
456  sbuf[grph.sdsp[sproc]] = rnod[i];
457  grph.sdsp[sproc]++;
458  }
459  }
460  }
461 
462  dsp_from_cnt(grph.scnt, grph.sdsp);
463  numrecv = sum(grph.rcnt);
464  _inod.resize(numrecv);
465  MPI_Exchange(grph, sbuf, _inod, _comm);
466  binary_sort(_inod);
467  }
468 
481  inline void find_algebraic_layout()
482  {
483  // whether we want to be verbose about suboptimal algebraic node distributions. from
484  // a high-level view this is unimportant, so by default this is false.
485  const bool dist_warnings = false;
486 
487  int size, rank;
488  MPI_Comm_size(_comm, &size); MPI_Comm_rank(_comm, &rank);
489 
490  // first compute a unique ownership of a subset of the nodes in the local domain
491  {
492  // compute inner nodes via a set difference of all nodes and the interface nodes
493  vector<T> inner_nodes(parallel_layout<T>::_l2g.size());
494  {
495  vector<T> nodes(parallel_layout<T>::_l2g), intf_nodes(_inod);
496  binary_sort(nodes);
497  T* end = std::set_difference(nodes.begin(), nodes.end(),
498  intf_nodes.begin(), intf_nodes.end(), inner_nodes.begin());
499  inner_nodes.resize(end - inner_nodes.begin());
500  }
501  _alg_nod.assign(inner_nodes.begin(), inner_nodes.end());
502 
503  // compute a destination for each interface index in the local domain
504  vector<T> sbuf(_inod.size()), dest(_inod.size());
505  for(size_t i=0; i < sbuf.size(); i++) {
506  T nod = _inod[i];
507  dest[i] = nod % size;
508  sbuf[i] = nod;
509  }
510  binary_sort_copy(dest, sbuf);
511 
512  // set up a commgraph w.r.t. the destination
513  commgraph<size_t> grph;
514  grph.configure(dest, _comm);
515 
516  // allocate receiving datastructs
517  size_t numrecv = sum(grph.rcnt);
518  vector<T> rnod(numrecv); // holds the received node indices
519  vector<T> rproc(numrecv); // holds the process rank each index was received from
520 
521  grph.source_ranks(rproc);
522  MPI_Exchange(grph, sbuf, rnod, _comm);
523 
524  vector<T> acc_cnt(numrecv, 1), acc_dsp;
525  binary_sort_copy(rnod, rproc);
526  unique_accumulate(rnod, acc_cnt);
527  acc_dsp.resize(acc_cnt.size()+1); dsp_from_cnt(acc_cnt, acc_dsp);
528 
529  // initialize the target distribution to approx. _glob_num_idx / size
530  vector<int> target;
531  divide(_glob_num_idx, size, target);
532 
533  target[rank] -= inner_nodes.size();
534  MPI_Allreduce(MPI_IN_PLACE, target.data(), target.size(), MPI_INT, MPI_MIN, _comm);
535 
536  if(dist_warnings) {
537  // treat negative values in target with warning.
538  bool warn = false;
539  for(int i=0; i<size; i++)
540  if(target[i] < 0) {
541  warn = true;
542  break;
543  }
544  if(warn)
545  if(!rank) std::cerr << "Warning: Domains too unbalanced for balanced re-indexing!" << std::endl;
546  }
547 
548  vector<T> owners, counts;
549 
550  #if 1
551  // use optimization algorithm to find a parallel distribution that fits the specified
552  // target distribution
553  parallel_distribution(target, acc_cnt, acc_dsp, rproc, rank, owners, counts);
554  MPI_Allreduce(MPI_IN_PLACE, counts.data(), counts.size(), MPI_INT, MPI_SUM, _comm);
555  for(int i=0; i<size; i++) target[i] -= counts[i];
556 
557  unsigned int k = 0, numref = 10;
558  while( !isEmpty(target) && k++ < numref)
559  {
560  refine_distribution(target, acc_cnt, acc_dsp, rproc, rank, owners, counts);
561  MPI_Allreduce(MPI_IN_PLACE, counts.data(), counts.size(), MPI_INT, MPI_SUM, _comm);
562  for(int i=0; i<size; i++) target[i] -= counts[i];
563  }
564  #else
565  parallel_distribution_minrank(target, acc_cnt, acc_dsp, rproc, owners, counts);
566  MPI_Allreduce(MPI_IN_PLACE, counts.data(), counts.size(), MPI_INT, MPI_SUM, _comm);
567  for(int i=0; i<size; i++) target[i] -= counts[i];
568  #endif
569 
570  if( dist_warnings && rank == 0 ) {
571  if(!isEmpty(target)) {
572  std::cerr << "Warning: Balanced re-indexing could not be computed." << std::endl;
573  std::cerr << "Final differences to even distribution: " << std::endl;
574  for(int i=0; i<size; i++) std::cerr << target[i] << " ";
575  std::cerr << std::endl;
576  }
577  }
578 
579  // send the assigned nodes back to the respective processes
580  binary_sort_copy(owners, rnod);
581  grph.configure(owners, _comm);
582  numrecv = sum(grph.rcnt);
583  sbuf.resize(numrecv);
584  MPI_Exchange(grph, rnod, sbuf, _comm);
585 
586  _alg_nod.append(sbuf.begin(), sbuf.end());
587  binary_sort(_alg_nod);
588  }
589 
590  // Generate the algebraic layout
591  {
592  commgraph<size_t> owned_layout;
593  owned_layout.resize(size);
594  size_t owned_idx_size = _alg_nod.size();
595  MPI_Allgather(&owned_idx_size, sizeof(size_t), MPI_BYTE,
596  owned_layout.scnt.data(), sizeof(size_t), MPI_BYTE, _comm);
597  dsp_from_cnt(owned_layout.scnt, owned_layout.sdsp);
598 
599  _alg_layout.resize(owned_layout.sdsp.size());
600  vec_assign(_alg_layout.data(), owned_layout.sdsp.data(), owned_layout.sdsp.size());
601  }
602  }
603 
604  inline void compute_layout()
605  {
606  T size = this->algebraic_layout().size()-1;
607  T nlidx = this->num_local_idx();
608 
609  _layout.resize(size + 1);
610  vector<T> cnt(size);
611 
612  MPI_Allgather(&nlidx, sizeof(T), MPI_BYTE, cnt.data(), sizeof(T), MPI_BYTE, _comm);
613  dsp_from_cnt(cnt, _layout);
614  }
615 
616  public:
618  overlapping_layout() : _glob_num_idx(0), _comm(SF_COMM) {}
619 
630  inline void assign(const vector<T> & idx, MPI_Comm comm)
631  {
632  _comm = comm;
633 
635 
636  this->find_domain_interfaces();
637  this->find_algebraic_layout();
638  this->compute_layout();
639 
640  this->localize(_inod);
641  binary_sort(_inod);
642  this->localize(_alg_nod);
643  binary_sort(_alg_nod);
644  }
645 
646 
655  inline const vector<T>& interface() const
656  {
657  return _inod;
658  }
659 
665  inline const vector<T> & algebraic_nodes() const
666  {
667  return _alg_nod;
668  }
669 
675  inline const vector<T> & algebraic_layout() const
676  {
677  return _alg_layout;
678  }
684  inline const vector<T> & layout() const
685  {
686  return _layout;
687  }
688 
699  inline T localize_algebraic(const T global_idx, const vector<T> & global_alg_nbr, const int rank)
700  {
701  T loc_nodal_idx = parallel_layout<T>::localize(global_idx);
702  if(loc_nodal_idx == -1)
703  return -1;
704 
705  T local_offset = _alg_layout[rank], local_size = _alg_layout[rank+1] - local_offset;
706  T local_alg_idx = global_alg_nbr[loc_nodal_idx] - local_offset;
707 
708  if(local_alg_idx > -1 && local_alg_idx < local_size)
709  return local_alg_idx;
710  else
711  return -1;
712  }
713 
715  inline size_t num_global_idx() const
716  {
717  return _glob_num_idx;
718  }
720  inline size_t num_local_idx() const
721  {
722  return parallel_layout<T>::_l2g.size();
723  }
725  inline size_t num_algebraic_idx() const
726  {
727  return _alg_nod.size();
728  }
729 
738  template<class V>
739  inline void reduce(vector<V> & ndat, const char* op) const
740  {
741  int size, rank;
742  MPI_Comm_size(_comm, &size); MPI_Comm_rank(_comm, &rank);
743 
744  // parallel layout of nodal data must match the one of the
745  // overlapping_layout class
746  assert(ndat.size() == parallel_layout<T>::_l2g.size());
747 
748  size_t isize = _inod.size();
749  vector<T> nod_sbuf(isize), dest(isize), perm;
750  vector<V> dat_sbuf(isize);
751 
752  for(size_t i=0; i<isize; i++) {
753  T lidx = _inod[i];
754  dest[i] = parallel_layout<T>::_l2g[lidx] % size;
755  }
756  interval(perm, 0, isize);
757  binary_sort_copy(dest, perm);
758 
759  for(size_t i=0; i<perm.size(); i++)
760  {
761  T lidx = _inod[perm[i]];
762  nod_sbuf[i] = parallel_layout<T>::_l2g[lidx];
763  dat_sbuf[i] = ndat[lidx];
764  }
765 
766  // set up a commgraph w.r.t. the destination
767  commgraph<size_t> grph;
768  grph.configure(dest, _comm);
769 
770  size_t numrecv = sum(grph.rcnt);
771  vector<T> nod_rbuf(numrecv);
772  vector<V> dat_rbuf(numrecv);
773 
774  MPI_Exchange(grph, nod_sbuf, nod_rbuf, _comm);
775  MPI_Exchange(grph, dat_sbuf, dat_rbuf, _comm);
776 
777  vector<T> acc_cnt(numrecv, 1), acc_dsp, acc_col;
778  interval(acc_col, 0, numrecv);
779 
780  binary_sort_copy(nod_rbuf, acc_col);
781  unique_accumulate(nod_rbuf, acc_cnt);
782 
783  acc_dsp.resize(acc_cnt.size() + 1);
784  dsp_from_cnt(acc_cnt, acc_dsp);
785 
786  short op_sw = 0;
787  if(! strcmp(op, "max"))
788  op_sw = 0;
789  else if(! strcmp(op, "min"))
790  op_sw = 1;
791  else if(! strcmp(op, "sum"))
792  op_sw = 2;
793  else
794  assert(0);
795 
796  for(size_t i=0; i<acc_cnt.size(); i++)
797  {
798  V val = 0.0; // value we will set for all interface nodes
799  // compute value based on selected operation
800  switch(op_sw)
801  {
802  // max
803  case 0:
804  {
805  V max = dat_rbuf[acc_col[acc_dsp[i]]];
806  T idx=1;
807  while(idx < acc_cnt[i])
808  {
809  T p = acc_col[acc_dsp[i]+idx];
810  if(max < dat_rbuf[p]) max = dat_rbuf[p];
811  idx++;
812  }
813  val = max;
814  break;
815  }
816 
817  // min
818  case 1:
819  {
820  V min = dat_rbuf[acc_col[acc_dsp[i]]];
821  T idx=1;
822  while(idx < acc_cnt[i])
823  {
824  T p = acc_col[acc_dsp[i]+idx];
825  if(min > dat_rbuf[p]) min = dat_rbuf[p];
826  idx++;
827  }
828  val = min;
829  break;
830  }
831 
832  // sum
833  case 2:
834  {
835  T idx=0;
836  while(idx < acc_cnt[i])
837  {
838  T p = acc_col[acc_dsp[i]+idx];
839  val += dat_rbuf[p];
840  idx++;
841  }
842  break;
843  }
844  }
845  for(T j = acc_dsp[i]; j < acc_dsp[i] + acc_cnt[i]; j++)
846  dat_rbuf[acc_col[j]] = val;
847  }
848 
849  grph.transpose();
850  MPI_Exchange(grph, dat_rbuf, dat_sbuf, _comm);
851 
852  for(size_t i=0; i<perm.size(); i++)
853  {
854  T lidx = _inod[perm[i]];
855  ndat[lidx] = dat_sbuf[i];
856  }
857  }
858 };
859 
865 template<class T>
866 class non_overlapping_layout : public parallel_layout<T>
867 {
868  private:
869  vector<T> _elem_layout;
870  MPI_Comm _comm;
871 
872  public:
875  {}
876 
885  inline void assign(const vector<T> & ref_eidx, MPI_Comm comm)
886  {
887  _comm = comm;
888 
889  parallel_layout<T>::assign(ref_eidx);
890 
891  int size; MPI_Comm_size(_comm, &size);
892 
893  T lsize = ref_eidx.size();
894  vector<T> layout_cnt(size);
895 
896  MPI_Allgather(&lsize, sizeof(T), MPI_BYTE, layout_cnt.data(), sizeof(T), MPI_BYTE, _comm);
897  dsp_from_cnt(layout_cnt, _elem_layout);
898  }
900  const vector<T> & algebraic_layout() const
901  {
902  return _elem_layout;
903  }
904 };
905 
906 
907 
911 template<class T, class S>
912 T local_nodal_to_local_petsc(const meshdata<T,S> & mesh, int rank, T local_nodal)
913 {
914  const vector<T> & alg_layout = mesh.pl.algebraic_layout();
915  const vector<T> & petsc_nbr = mesh.get_numbering(NBR_PETSC);
916 
917  const T my_offset = alg_layout[rank];
918 
919  return petsc_nbr[local_nodal] - my_offset;
920 }
921 
924 template<class T, class S>
925 T local_petsc_to_local_nodal(const meshdata<T,S> & mesh, int rank, T local_petsc)
926 {
927  const vector<T> & alg_layout = mesh.pl.algebraic_layout();
928  const vector<T> & alg_nod = mesh.pl.algebraic_nodes();
929  const vector<T> & petsc_nbr = mesh.get_numbering(NBR_PETSC);
930 
931  const T my_offset = alg_layout[rank];
932  const size_t num_alg = alg_nod.size();
933 
934  size_t idx = 0;
935  while(idx < num_alg && petsc_nbr[alg_nod[idx]] != local_petsc + my_offset) idx++;
936 
937  if(idx == num_alg) return -1;
938  else return alg_nod[idx];
939 }
940 
941 template<class T, class S>
942 void local_petsc_to_nodal_mapping(const meshdata<T,S> & mesh, index_mapping<T> & petsc_to_nodal)
943 {
944  int rank; MPI_Comm_rank(mesh.comm, &rank);
945  const vector<T> & alg_nod = mesh.pl.algebraic_nodes();
946  vector<T> petsc_idx(alg_nod.size());
947 
948  size_t idx = 0;
949  for(const T & n : alg_nod)
950  petsc_idx[idx++] = local_nodal_to_local_petsc(mesh, rank, n);
951 
952  petsc_to_nodal.assign(petsc_idx, alg_nod);
953 }
954 
955 
956 }
957 
958 #endif
constexpr T min(T a, T b)
Definition: ion_type.h:33
const vector< T > & algebraic_layout() const
Getter function for the global algebraic node layout.
Definition: dense_mat.hpp:34
The mesh storage class. It contains both element and vertex data.
Definition: SF_container.h:383
The vector class and related algorithms.
vector< T > & get_numbering(SF_nbr nbr_type)
Get the vector defining a certain numbering.
Definition: SF_container.h:452
size_t num_local_idx() const
Retrieve the local number of indices.
void zero()
Definition: SF_vector.h:248
void local_petsc_to_nodal_mapping(const meshdata< T, S > &mesh, index_mapping< T > &petsc_to_nodal)
void parallel_distribution(const vector< T > &gtarget, const vector< T > &cnt, const vector< T > &dsp, const vector< T > &ranks, const int myrank, vector< T > &owner, vector< T > &counts)
The distribution distributes entities between all ranks.
MPI_Comm comm
the parallel mesh is defined on a MPI world
Definition: SF_container.h:392
void MPI_Exchange(commgraph< T > &grph, vector< S > &send, vector< S > &recv, MPI_Comm comm)
Exchange data in parallel over MPI.
Definition: SF_network.h:47
constexpr T max(T a, T b)
Definition: ion_type.h:31
PETSc numbering of nodes.
Definition: SF_container.h:191
void interval(vector< T > &vec, size_t start, size_t end)
Create an integer interval between start and end.
Definition: SF_vector.h:350
overlapping_layout< T > pl
nodal parallel layout
Definition: SF_container.h:417
void resize(size_t size)
Resize all vectors to size.
Definition: SF_container.h:634
void globalize(vector< T > &lvec) const
Globalize local indices.
const vector< T > & algebraic_layout() const
Getter function for the algebraic layout of the elements.
void divide(const size_t gsize, const size_t num_parts, vector< T > &loc_sizes)
divide gsize into num_parts local parts with even distribution of the remainder
Definition: SF_vector.h:358
void refine_distribution(const vector< T > &gtarget, const vector< T > &cnt, const vector< T > &dsp, const vector< T > &ranks, const int myrank, vector< T > &owner, vector< T > &counts)
Further refine a distribution generated by parallel_distribution().
Basic containers.
vector< T > rdsp
Displacements w.r.t. rcnt.
Definition: SF_container.h:631
const vector< T > & layout() const
Return the the overlapping layout.
T * data()
Pointer to the vector&#39;s start.
Definition: SF_vector.h:91
The base class for parallel layouts.
const T * end() const
Pointer to the vector&#39;s end.
Definition: SF_vector.h:128
overlapping_layout()
Non-parameterized constructor. Use assign() to initialize later.
const vector< T > & interface() const
Retrieve the local indices of the subdomain interfaces.
#define SF_COMM
the default SlimFem MPI communicator
Definition: SF_globals.h:27
T local_petsc_to_local_nodal(const meshdata< T, S > &mesh, int rank, T local_petsc)
void localize(vector< T > &gidx, vector< V > &gdat) const
Localize global indices and associated data.
void localize(vector< T > &gvec) const
Localize global indices.
bool isEmpty(vector< T > &v)
Return whether an vector is empty (all values are 0).
Definition: SF_vector.h:378
void configure(const vector< V > &dest, MPI_Comm comm)
Set up the communication graph.
Definition: SF_container.h:667
T sum(const vector< T > &vec)
Compute sum of a vector&#39;s entries.
Definition: SF_vector.h:340
void binary_sort_copy(vector< T > &_V, vector< S > &_W)
Definition: SF_sort.h:296
Index mapping class. This is a bijective mapping.
Definition: SF_container.h:207
vector< T > _l2g
The global indices of the local DD domain. Also serves as the local-to-global map.
size_t size() const
The current size of the vector.
Definition: SF_vector.h:104
vector< T > scnt
Number of elements sent to each rank.
Definition: SF_container.h:628
The parallel layout of non overlapping indices.
Definition: SF_container.h:198
T globalize(const T lidx) const
Globalize local indices.
T localize(T gidx) const
vector< T > sdsp
Displacements w.r.t. scnt.
Definition: SF_container.h:629
size_t num_global_idx() const
Retrieve the global number of indices.
void assign(const vector< T > &a, const vector< T > &b)
Set up the index mapping between a and b.
Definition: SF_container.h:235
void reduce(vector< V > &ndat, const char *op) const
Compute a reduction on overlapping data.
void unique_accumulate(vector< T > &_P, vector< S > &_A)
Definition: SF_sort.h:409
A vector storing arbitrary data.
Definition: SF_vector.h:42
void assign(const vector< T > &idx, MPI_Comm comm)
Initialization function.
const T * begin() const
Pointer to the vector&#39;s start.
Definition: SF_vector.h:116
void assign(const vector< T > &idx)
Assign a parallel distributed index set that defines the parallel layout.
non_overlapping_layout()
Empty constructor. Use assign() to set up the layout.
void assign(InputIterator s, InputIterator e)
Assign a memory range.
Definition: SF_vector.h:161
void source_ranks(vector< V > &source)
For every received data element, get the rank indices it was receive from.
Definition: SF_container.h:714
iterator find(const K &key)
Search for key. Return iterator.
Definition: hashmap.hpp:593
void vec_assign(S *lhs, const V *rhs, size_t size)
Assign the values in rhs to lhs. The data-type of rhs is cast to the type of lhs. ...
Definition: SF_vector.h:371
const vector< T > & algebraic_nodes() const
Getter function for the local indices forming the local algebraic node set.
T localize_algebraic(const T global_idx, const vector< T > &global_alg_nbr, const int rank)
map a global (REF_NBR, reference numbering) index to local algebraic (non-overlapping) indexing w...
void parallel_distribution_minrank(const vector< T > &gtarget, const vector< T > &cnt, const vector< T > &dsp, const vector< T > &ranks, vector< T > &owner, vector< T > &counts)
size_t num_algebraic_idx() const
Retrieve the number of local algebraic indices.
void transpose()
transpose comm graph (receive becomes send, and vice versa)
Definition: SF_container.h:651
vector< T > rcnt
Number of elements received from each rank.
Definition: SF_container.h:630
void binary_sort(vector< T > &_V)
Definition: SF_sort.h:284
void assign(const vector< T > &ref_eidx, MPI_Comm comm)
Generate the layout.
hashmap::unordered_map< T, T > _g2l
The global-to-local map for the DD domain.
void resize(size_t n)
Resize a vector.
Definition: SF_vector.h:209
void append(InputIterator s, InputIterator e)
Append data to the current data chunk.
Definition: SF_vector.h:268
Classes similar to unordered_set and unordered_map, but with better performance.
The overlapping_layout class contains the algorithms related to managing overlapping parallel index s...
Definition: SF_container.h:197
void dsp_from_cnt(const vector< T > &cnt, vector< T > &dsp)
Compute displacements from counts.
Definition: SF_vector.h:310
T local_nodal_to_local_petsc(const meshdata< T, S > &mesh, int rank, T local_nodal)