doxygen/master/SF__parallel__layout_8h_source.html

 // ----------------------------------------------------------------------------
 // openCARP is an open cardiac electrophysiology simulator.
 //
 // Copyright (C) 2020 openCARP project
 //
 // This program is licensed under the openCARP Academic Public License (APL)
 // v1.0: You can use and redistribute it and/or modify it in non-commercial
 // academic environments under the terms of APL as published by the openCARP
 // project v1.0, or (at your option) any later version. Commercial use requires
 // a commercial license (info@opencarp.org).
 //
 // This program is distributed without any warranty; see the openCARP APL for
 // more details.
 //
 // You should have received a copy of the openCARP APL along with this program
 // and can find it online: http://www.opencarp.org/license
 // ----------------------------------------------------------------------------

 #ifndef _SF_PARALLEL_LAYOUT_H
 #define _SF_PARALLEL_LAYOUT_H


 #include <iostream>

 #include "hashmap.hpp"

 #include "SF_vector.h"
 #include "SF_container.h"

 namespace SF {

 template<class T>
 inline void parallel_distribution(const vector<T> & gtarget,
                            const vector<T> & cnt,
                            const vector<T> & dsp,
                            const vector<T> & ranks,
                            const int myrank,
                            vector<T> & owner,
                            vector<T> & counts)
 {
   int size = gtarget.size();
   size_t csize = cnt.size();
   vector<T> ltarget(size);

   // this small algorithm computes ltarget[i] = gtarget[i] / size,
   // but with a even distribution of the divisions remainder
   // among the processes
   for(int i=0; i<size; i++) {
     int idx = (i + myrank) % size;
     T gt = gtarget[idx];
     ltarget[idx] = (i * gt + gt) / size - (i * gt) / size;
   }

   // set initial distribution
   vector<T> act_idx(csize);
   counts.resize(size); counts.zero();
   owner.resize(csize);
   for(size_t nidx=0; nidx < csize; nidx++)
   {
     act_idx[nidx] = (unsigned int)(2147483647 * nidx) % cnt[nidx];

     T j = dsp[nidx] + act_idx[nidx];
     T p = ranks[j];
     owner[nidx] = p;
     counts[p]++;
   }

   // compute initial functional value
   unsigned int J = 0;
   for(int i = 0; i < size; i++) J += (counts[i] - ltarget[i])*(counts[i] - ltarget[i]);

   unsigned int k = 0, update = 0, osteps = 32;

   // optimize
   while(J > 0 && k++ < osteps)
   {
     for(size_t nidx = 0; nidx < csize; nidx++)
     {
       // round-robin incrementation
       act_idx[nidx]++;
       if(act_idx[nidx] == cnt[nidx]) act_idx[nidx] = 0;

       T j = dsp[nidx] + act_idx[nidx];
       T op = owner[nidx];
       T p  = ranks[j];
       T n0 = counts[op], n1 = counts[p];
       T n0_ = ltarget[op], n1_ = ltarget[p];
       if( ((n0 - n0_) - (n1 - n1_)) >= 1 )
       {
         owner[nidx] = p;
         counts[op]--; counts[p]++;
         update++;
         J += 2*(1 - (n0 - n0_) + (n1 - n1_));

         if (J == 0) break;
       }
     }
   }
 }


 template<class T>
 inline void refine_distribution(const vector<T> & gtarget,
                          const vector<T> & cnt,
                          const vector<T> & dsp,
                          const vector<T> & ranks,
                          const int myrank,
                          vector<T> & owner,
                          vector<T> & counts)
 {
   int size = gtarget.size();
   size_t csize = cnt.size();
   vector<T> ltarget(size);

   // this small algorithm computes ltarget[i] = gtarget[i] / size,
   // but with a even distribution of the divisions remainder
   // among the processes
   for(int i=0; i<size; i++) {
     int idx = (i + myrank) % size;
     T gt = gtarget[idx];
     ltarget[idx] = (i * gt + gt) / size - (i * gt) / size;
   }

   vector<T> act_idx(csize);
   counts.resize(size); counts.zero();

   // compute initial functional value
   unsigned int J = 0;
   for(int i = 0; i < size; i++) J += (counts[i] - ltarget[i])*(counts[i] - ltarget[i]);

   unsigned int k = 0, update = 0, osteps = 64;

   // choose random starting index
   for(size_t nidx=0; nidx < csize; nidx++)
     act_idx[nidx] = (unsigned int)(2147483647 * nidx) % cnt[nidx];

   // optimize
   while(J > 0 && k++ < osteps)
   {
     for(size_t nidx = 0; nidx < csize; nidx++)
     {
       // round-robin incrementation
       act_idx[nidx]++;
       if(act_idx[nidx] == cnt[nidx]) act_idx[nidx] = 0;

       T j = dsp[nidx] + act_idx[nidx];
       T op = owner[nidx];
       T p  = ranks[j];
       T n0 = counts[op], n1 = counts[p];
       T n0_ = ltarget[op], n1_ = ltarget[p];
       if( ((n0 - n0_) - (n1 - n1_)) >= 1 )
       {
         owner[nidx] = p;
         counts[op]--; counts[p]++;
         update++;
         J += 2*(1 - (n0 - n0_) + (n1 - n1_));

         if (J == 0) break;
       }
     }
   }
 }

 template<class T>
 inline void parallel_distribution_minrank(const vector<T> & gtarget,
                            const vector<T> & cnt,
                            const vector<T> & dsp,
                            const vector<T> & ranks,
                            vector<T> & owner,
                            vector<T> & counts)
 {
   owner.resize(cnt.size());
   counts.assign(gtarget.size(), 0);

   for(size_t i = 0; i < owner.size(); i++)
   {
     // get the smallest rank index that holds current node
     int minrank = ranks[dsp[i]];
     for(T j = dsp[i]; j < dsp[i+1]; j++)
       if(minrank > ranks[j]) minrank = ranks[j];

     // this rank gets assigned to this node
     owner[i] = minrank;
     counts[minrank]++;
   }
 }


 template<class T>
 class parallel_layout
 {
   protected:
   vector<T> _l2g;
   hashmap::unordered_map<T, T> _g2l;

   public:
   inline void globalize(vector<T> & lvec) const
   {
     size_t lsize = _l2g.size(), widx = 0;

     for(size_t ridx=0; ridx<lvec.size(); ridx++)
     {
       T loc = lvec[ridx];
       if(loc < (T)lsize)
         lvec[widx++] = _l2g[loc];
     }
     lvec.resize(widx);
   }
   inline T globalize(const T lidx) const
   {
     size_t lsize = _l2g.size();

     if(lidx < (T)lsize)
       return _l2g[lidx];
     else
       return T(-1);
   }


   inline void localize(vector<T> & gvec) const
   {
     size_t widx = 0;
     typename hashmap::unordered_map<T, T>::const_iterator it;

     for(size_t ridx=0; ridx<gvec.size(); ridx++)
     {
       T glob = gvec[ridx];
       it = _g2l.find(glob);
       if(it != _g2l.end())
         gvec[widx++] = it->second;
     }
     gvec.resize(widx);
   }

   template<class V>
   inline void localize(vector<T> & gidx, vector<V> & gdat) const
   {
     size_t widx = 0;
     typename hashmap::unordered_map<T, T>::const_iterator it;

     for(size_t ridx=0; ridx<gidx.size(); ridx++)
     {
       T glob = gidx[ridx];
       it = _g2l.find(glob);
       if(it != _g2l.end()) {
         gidx[widx] = it->second;
         gdat[widx] = gdat[ridx];
         widx++;
       }
     }
     gidx.resize(widx);
     gdat.resize(widx);
   }

   inline T localize(T gidx) const
   {
     auto it = _g2l.find(gidx);
     if(it != _g2l.end())
       return it->second;
     else
       return T(-1);
   }

   inline void assign(const vector<T> & idx)
   {
     _l2g.assign(idx.begin(), idx.end());
     _g2l.clear();

     for(size_t i=0; i<_l2g.size(); i++)
       _g2l[_l2g[i]] = i;
   }

 };


 template<class T>
 class overlapping_layout : public parallel_layout<T>
 {
   private:
   vector<T> _inod;
   size_t _glob_num_idx;

   vector<T> _alg_nod;
   vector<T> _alg_layout;
   vector<T> _layout;
   MPI_Comm _comm;

   inline void find_domain_interfaces()
   {
     int size, rank;
     MPI_Comm_size(_comm, &size); MPI_Comm_rank(_comm, &rank);

     // compute a destination for each index in local domain
     vector<T> sbuf(parallel_layout<T>::_l2g), dest(parallel_layout<T>::_l2g.size());
     for(size_t i=0; i<parallel_layout<T>::_l2g.size(); i++)
       dest[i] = parallel_layout<T>::_l2g[i] % size;
     binary_sort_copy(dest, sbuf);

     // set up a commgraph w.r.t. the destination
     commgraph<size_t> grph;
     grph.configure(dest, _comm);
     size_t numrecv = sum(grph.rcnt);

     // allocate receiving datastructs
     vector<T> rnod(numrecv);   // holds the received node indices
     vector<T> rproc(numrecv);  // holds the process rank each index was received from
     vector<T> acc_cnt(numrecv, 1), acc_dsp;
     grph.source_ranks(rproc);

     MPI_Exchange(grph, sbuf, rnod, _comm);

     binary_sort_copy(rnod, rproc);
     unique_accumulate(rnod, acc_cnt);
     acc_dsp.resize(acc_cnt.size()+1); dsp_from_cnt(acc_cnt, acc_dsp);

     // compute the global number of entities
     unsigned long int num_unique = acc_cnt.size(), gnum_unique;
     MPI_Allreduce(&num_unique, &gnum_unique, 1, MPI_UNSIGNED_LONG, MPI_SUM, _comm);
     _glob_num_idx = gnum_unique;

     // now for each unique node rnod[i] we know the multiplicity acc_cnt[i],
     // and the process ranks associated to it in rproc[acc_dsp[i]] till rproc[acc_dsp[i+1]]

     // compute commgraph for those entities with acc_cnt[i] > 1
     grph.scnt.zero();
     for(size_t i=0; i<acc_cnt.size(); i++) {
       if(acc_cnt[i] > 1) {
         for(T j=acc_dsp[i]; j<acc_dsp[i+1]; j++)
           grph.scnt[rproc[j]]++;
       }
     }
     dsp_from_cnt(grph.scnt, grph.sdsp);
     MPI_Alltoall(grph.scnt.data(), sizeof(size_t), MPI_BYTE, grph.rcnt.data(), sizeof(size_t), MPI_BYTE, _comm);
     dsp_from_cnt(grph.rcnt, grph.rdsp);

     // fill sbuf with those entities with acc_cnt[i] > 1
     size_t numsend = sum(grph.scnt);
     sbuf.resize(numsend);

     for(size_t i=0; i<acc_cnt.size(); i++) {
       if(acc_cnt[i] > 1) {
         for(T j=acc_dsp[i]; j<acc_dsp[i+1]; j++) {
           T sproc = rproc[j];
           sbuf[grph.sdsp[sproc]] = rnod[i];
           grph.sdsp[sproc]++;
         }
       }
     }

     dsp_from_cnt(grph.scnt, grph.sdsp);
     numrecv = sum(grph.rcnt);
     _inod.resize(numrecv);
     MPI_Exchange(grph, sbuf, _inod, _comm);
     binary_sort(_inod);
   }

   inline void find_algebraic_layout()
   {
     // whether we want to be verbose about suboptimal algebraic node distributions. from
     // a high-level view this is unimportant, so by default this is false.
     const bool dist_warnings = false;

     int size, rank;
     MPI_Comm_size(_comm, &size); MPI_Comm_rank(_comm, &rank);

     // first compute a unique ownership of a subset of the nodes in the local domain
     {
       // compute inner nodes via a set difference of all nodes and the interface nodes
       vector<T> inner_nodes(parallel_layout<T>::_l2g.size());
       {
         vector<T> nodes(parallel_layout<T>::_l2g), intf_nodes(_inod);
         binary_sort(nodes);
         T* end = std::set_difference(nodes.begin(), nodes.end(),
                                      intf_nodes.begin(), intf_nodes.end(), inner_nodes.begin());
         inner_nodes.resize(end - inner_nodes.begin());
       }
       _alg_nod.assign(inner_nodes.begin(), inner_nodes.end());

       // compute a destination for each interface index in the local domain
       vector<T> sbuf(_inod.size()), dest(_inod.size());
       for(size_t i=0; i < sbuf.size(); i++) {
         T nod = _inod[i];
         dest[i] = nod % size;
         sbuf[i] = nod;
       }
       binary_sort_copy(dest, sbuf);

       // set up a commgraph w.r.t. the destination
       commgraph<size_t> grph;
       grph.configure(dest, _comm);

       // allocate receiving datastructs
       size_t numrecv = sum(grph.rcnt);
       vector<T> rnod(numrecv);   // holds the received node indices
       vector<T> rproc(numrecv);  // holds the process rank each index was received from

       grph.source_ranks(rproc);
       MPI_Exchange(grph, sbuf, rnod, _comm);

       vector<T> acc_cnt(numrecv, 1), acc_dsp;
       binary_sort_copy(rnod, rproc);
       unique_accumulate(rnod, acc_cnt);
       acc_dsp.resize(acc_cnt.size()+1); dsp_from_cnt(acc_cnt, acc_dsp);

       // initialize the target distribution to approx. _glob_num_idx / size
       vector<int> target;
       divide(_glob_num_idx, size, target);

       target[rank] -= inner_nodes.size();
       MPI_Allreduce(MPI_IN_PLACE, target.data(), target.size(), MPI_INT, MPI_MIN, _comm);

       if(dist_warnings) {
         // treat negative values in target with warning.
         bool warn = false;
         for(int i=0; i<size; i++)
           if(target[i] < 0) {
             warn = true;
             break;
           }
         if(warn)
           if(!rank) std::cerr << "Warning: Domains too unbalanced for balanced re-indexing!" << std::endl;
       }

       vector<T> owners, counts;

       #if 1
       // use optimization algorithm to find a parallel distribution that fits the specified
       // target distribution
       parallel_distribution(target, acc_cnt, acc_dsp, rproc, rank, owners, counts);
       MPI_Allreduce(MPI_IN_PLACE, counts.data(), counts.size(), MPI_INT, MPI_SUM, _comm);
       for(int i=0; i<size; i++) target[i] -= counts[i];

       unsigned int k = 0, numref = 10;
       while( !isEmpty(target) && k++ < numref)
       {
         refine_distribution(target, acc_cnt, acc_dsp, rproc, rank, owners, counts);
         MPI_Allreduce(MPI_IN_PLACE, counts.data(), counts.size(), MPI_INT, MPI_SUM, _comm);
         for(int i=0; i<size; i++) target[i] -= counts[i];
       }
       #else
       parallel_distribution_minrank(target, acc_cnt, acc_dsp, rproc, owners, counts);
       MPI_Allreduce(MPI_IN_PLACE, counts.data(), counts.size(), MPI_INT, MPI_SUM, _comm);
       for(int i=0; i<size; i++) target[i] -= counts[i];
       #endif

       if( dist_warnings && rank == 0 ) {
         if(!isEmpty(target)) {
           std::cerr << "Warning: Balanced re-indexing could not be computed." << std::endl;
           std::cerr << "Final differences to even distribution: " << std::endl;
           for(int i=0; i<size; i++) std::cerr << target[i] << " ";
           std::cerr << std::endl;
         }
       }

       // send the assigned nodes back to the respective processes
       binary_sort_copy(owners, rnod);
       grph.configure(owners, _comm);
       numrecv = sum(grph.rcnt);
       sbuf.resize(numrecv);
       MPI_Exchange(grph, rnod, sbuf, _comm);

       _alg_nod.append(sbuf.begin(), sbuf.end());
       binary_sort(_alg_nod);
     }

     // Generate the algebraic layout
     {
       commgraph<size_t> owned_layout;
       owned_layout.resize(size);
       size_t owned_idx_size = _alg_nod.size();
       MPI_Allgather(&owned_idx_size, sizeof(size_t), MPI_BYTE,
                     owned_layout.scnt.data(), sizeof(size_t), MPI_BYTE, _comm);
       dsp_from_cnt(owned_layout.scnt, owned_layout.sdsp);

       _alg_layout.resize(owned_layout.sdsp.size());
       vec_assign(_alg_layout.data(), owned_layout.sdsp.data(), owned_layout.sdsp.size());
     }
   }

   inline void compute_layout()
   {
     T size  = this->algebraic_layout().size()-1;
     T nlidx = this->num_local_idx();

     _layout.resize(size + 1);
     vector<T> cnt(size);

     MPI_Allgather(&nlidx, sizeof(T), MPI_BYTE, cnt.data(), sizeof(T), MPI_BYTE, _comm);
     dsp_from_cnt(cnt, _layout);
   }

   public:
   overlapping_layout() : _glob_num_idx(0), _comm(SF_COMM) {}

   inline void assign(const vector<T> & idx, MPI_Comm comm)
   {
     _comm = comm;

     parallel_layout<T>::assign(idx);

     this->find_domain_interfaces();
     this->find_algebraic_layout();
     this->compute_layout();

     this->localize(_inod);
     binary_sort(_inod);
     this->localize(_alg_nod);
     binary_sort(_alg_nod);
   }


   inline const vector<T>& interface() const
   {
     return _inod;
   }

   inline const vector<T> & algebraic_nodes() const
   {
     return _alg_nod;
   }

   inline const vector<T> & algebraic_layout() const
   {
     return _alg_layout;
   }
   inline const vector<T> & layout() const
   {
     return _layout;
   }

   inline T localize_algebraic(const T global_idx, const vector<T> & global_alg_nbr, const int rank)
   {
     T loc_nodal_idx = parallel_layout<T>::localize(global_idx);
     if(loc_nodal_idx == -1)
       return -1;

     T local_offset   = _alg_layout[rank], local_size = _alg_layout[rank+1] - local_offset;
     T local_alg_idx  = global_alg_nbr[loc_nodal_idx] - local_offset;

     if(local_alg_idx > -1 && local_alg_idx < local_size)
       return local_alg_idx;
     else
       return -1;
   }

   inline size_t num_global_idx() const
   {
     return _glob_num_idx;
   }
   inline size_t num_local_idx() const
   {
     return parallel_layout<T>::_l2g.size();
   }
   inline size_t num_algebraic_idx() const
   {
     return _alg_nod.size();
   }

   template<class V>
   inline void reduce(vector<V> & ndat, const char* op) const
   {
     int size, rank;
     MPI_Comm_size(_comm, &size); MPI_Comm_rank(_comm, &rank);

     // parallel layout of nodal data must match the one of the
     // overlapping_layout class
     assert(ndat.size() == parallel_layout<T>::_l2g.size());

     size_t isize = _inod.size();
     vector<T> nod_sbuf(isize), dest(isize), perm;
     vector<V> dat_sbuf(isize);

     for(size_t i=0; i<isize; i++) {
       T lidx = _inod[i];
       dest[i] = parallel_layout<T>::_l2g[lidx] % size;
     }
     interval(perm, 0, isize);
     binary_sort_copy(dest, perm);

     for(size_t i=0; i<perm.size(); i++)
     {
       T lidx = _inod[perm[i]];
       nod_sbuf[i] = parallel_layout<T>::_l2g[lidx];
       dat_sbuf[i] = ndat[lidx];
     }

     // set up a commgraph w.r.t. the destination
     commgraph<size_t> grph;
     grph.configure(dest, _comm);

     size_t numrecv = sum(grph.rcnt);
     vector<T> nod_rbuf(numrecv);
     vector<V> dat_rbuf(numrecv);

     MPI_Exchange(grph, nod_sbuf, nod_rbuf, _comm);
     MPI_Exchange(grph, dat_sbuf, dat_rbuf, _comm);

     vector<T> acc_cnt(numrecv, 1), acc_dsp, acc_col;
     interval(acc_col, 0, numrecv);

     binary_sort_copy(nod_rbuf, acc_col);
     unique_accumulate(nod_rbuf, acc_cnt);

     acc_dsp.resize(acc_cnt.size() + 1);
     dsp_from_cnt(acc_cnt, acc_dsp);

     short op_sw = 0;
     if(! strcmp(op, "max"))
       op_sw = 0;
     else if(! strcmp(op, "min"))
       op_sw = 1;
     else if(! strcmp(op, "sum"))
       op_sw = 2;
     else
       assert(0);

     for(size_t i=0; i<acc_cnt.size(); i++)
     {
       V val = 0.0;  // value we will set for all interface nodes
       // compute value based on selected operation
       switch(op_sw)
       {
         // max
         case 0:
         {
           V max = dat_rbuf[acc_col[acc_dsp[i]]];
           T idx=1;
           while(idx < acc_cnt[i])
           {
             T p = acc_col[acc_dsp[i]+idx];
             if(max < dat_rbuf[p]) max = dat_rbuf[p];
             idx++;
           }
           val = max;
           break;
         }

         // min
         case 1:
         {
           V min = dat_rbuf[acc_col[acc_dsp[i]]];
           T idx=1;
           while(idx < acc_cnt[i])
           {
             T p = acc_col[acc_dsp[i]+idx];
             if(min > dat_rbuf[p]) min = dat_rbuf[p];
             idx++;
           }
           val = min;
           break;
         }

         // sum
         case 2:
         {
           T idx=0;
           while(idx < acc_cnt[i])
           {
             T p = acc_col[acc_dsp[i]+idx];
             val += dat_rbuf[p];
             idx++;
           }
           break;
         }
       }
       for(T j = acc_dsp[i]; j < acc_dsp[i] + acc_cnt[i]; j++)
         dat_rbuf[acc_col[j]] = val;
     }

     grph.transpose();
     MPI_Exchange(grph, dat_rbuf, dat_sbuf, _comm);

     for(size_t i=0; i<perm.size(); i++)
     {
       T lidx = _inod[perm[i]];
       ndat[lidx] = dat_sbuf[i];
     }
   }
 };

 template<class T>
 class non_overlapping_layout : public parallel_layout<T>
 {
   private:
   vector<T> _elem_layout;
   MPI_Comm  _comm;

   public:
   non_overlapping_layout()
   {}

   inline void assign(const vector<T> & ref_eidx, MPI_Comm comm)
   {
     _comm = comm;

     parallel_layout<T>::assign(ref_eidx);

     int size; MPI_Comm_size(_comm, &size);

     T lsize = ref_eidx.size();
     vector<T> layout_cnt(size);

     MPI_Allgather(&lsize, sizeof(T), MPI_BYTE, layout_cnt.data(), sizeof(T), MPI_BYTE, _comm);
     dsp_from_cnt(layout_cnt, _elem_layout);
   }
   const vector<T> & algebraic_layout() const
   {
     return _elem_layout;
   }
 };


 template<class T, class S>
 T local_nodal_to_local_petsc(const meshdata<T,S> & mesh, int rank, T local_nodal)
 {
   const vector<T> & alg_layout = mesh.pl.algebraic_layout();
   const vector<T> & petsc_nbr  = mesh.get_numbering(NBR_PETSC);

   const T my_offset = alg_layout[rank];

   return petsc_nbr[local_nodal] - my_offset;
 }

 template<class T, class S>
 T local_petsc_to_local_nodal(const meshdata<T,S> & mesh, int rank, T local_petsc)
 {
   const vector<T> & alg_layout = mesh.pl.algebraic_layout();
   const vector<T> & alg_nod    = mesh.pl.algebraic_nodes();
   const vector<T> & petsc_nbr  = mesh.get_numbering(NBR_PETSC);

   const T my_offset = alg_layout[rank];
   const size_t num_alg = alg_nod.size();

   size_t idx = 0;
   while(idx < num_alg && petsc_nbr[alg_nod[idx]] != local_petsc + my_offset) idx++;

   if(idx == num_alg) return -1;
   else               return alg_nod[idx];
 }

 template<class T, class S>
 void local_petsc_to_nodal_mapping(const meshdata<T,S> & mesh, index_mapping<T> & petsc_to_nodal)
 {
   int rank; MPI_Comm_rank(mesh.comm, &rank);
   const vector<T> & alg_nod = mesh.pl.algebraic_nodes();
   vector<T> petsc_idx(alg_nod.size());

   size_t idx = 0;
   for(const T & n : alg_nod)
     petsc_idx[idx++] = local_nodal_to_local_petsc(mesh, rank, n);

   petsc_to_nodal.assign(petsc_idx, alg_nod);
 }


 }

 #endif
limpet::min
constexpr T min(T a, T b)
Definition: ion_type.h:33

SF::overlapping_layout::algebraic_layout
const vector< T > & algebraic_layout() const
Getter function for the global algebraic node layout.
Definition: SF_parallel_layout.h:675

hashmap::unordered_map::const_iterator
Const iterator.
Definition: hashmap.hpp:452

SF
Definition: dense_mat.hpp:34

SF::meshdata
The mesh storage class. It contains both element and vertex data.
Definition: SF_container.h:383

SF_vector.h
The vector class and related algorithms.

SF::meshdata::get_numbering
vector< T > & get_numbering(SF_nbr nbr_type)
Get the vector defining a certain numbering.
Definition: SF_container.h:452

SF::overlapping_layout::num_local_idx
size_t num_local_idx() const
Retrieve the local number of indices.
Definition: SF_parallel_layout.h:720

SF::vector::zero
void zero()
Definition: SF_vector.h:248

SF::local_petsc_to_nodal_mapping
void local_petsc_to_nodal_mapping(const meshdata< T, S > &mesh, index_mapping< T > &petsc_to_nodal)
Definition: SF_parallel_layout.h:942

SF::parallel_distribution
void parallel_distribution(const vector< T > &gtarget, const vector< T > &cnt, const vector< T > &dsp, const vector< T > &ranks, const int myrank, vector< T > &owner, vector< T > &counts)
The distribution distributes entities between all ranks.
Definition: SF_parallel_layout.h:57

SF::meshdata::comm
MPI_Comm comm
the parallel mesh is defined on a MPI world
Definition: SF_container.h:392

SF::MPI_Exchange
void MPI_Exchange(commgraph< T > &grph, vector< S > &send, vector< S > &recv, MPI_Comm comm)
Exchange data in parallel over MPI.
Definition: SF_network.h:47

limpet::max
constexpr T max(T a, T b)
Definition: ion_type.h:31

SF::NBR_PETSC
PETSc numbering of nodes.
Definition: SF_container.h:191

SF::interval
void interval(vector< T > &vec, size_t start, size_t end)
Create an integer interval between start and end.
Definition: SF_vector.h:350

SF::meshdata::pl
overlapping_layout< T > pl
nodal parallel layout
Definition: SF_container.h:417

SF::commgraph::resize
void resize(size_t size)
Resize all vectors to size.
Definition: SF_container.h:634

SF::parallel_layout::globalize
void globalize(vector< T > &lvec) const
Globalize local indices.
Definition: SF_parallel_layout.h:254

SF::non_overlapping_layout::algebraic_layout
const vector< T > & algebraic_layout() const
Getter function for the algebraic layout of the elements.
Definition: SF_parallel_layout.h:900

SF::divide
void divide(const size_t gsize, const size_t num_parts, vector< T > &loc_sizes)
divide gsize into num_parts local parts with even distribution of the remainder
Definition: SF_vector.h:358

SF::refine_distribution
void refine_distribution(const vector< T > &gtarget, const vector< T > &cnt, const vector< T > &dsp, const vector< T > &ranks, const int myrank, vector< T > &owner, vector< T > &counts)
Further refine a distribution generated by parallel_distribution().
Definition: SF_parallel_layout.h:139

SF_container.h
Basic containers.

SF::commgraph::rdsp
vector< T > rdsp
Displacements w.r.t. rcnt.
Definition: SF_container.h:631

SF::overlapping_layout::layout
const vector< T > & layout() const
Return the the overlapping layout.
Definition: SF_parallel_layout.h:684

SF::vector::data
T * data()
Pointer to the vector&#39;s start.
Definition: SF_vector.h:91

SF::parallel_layout
The base class for parallel layouts.
Definition: SF_parallel_layout.h:235

SF::vector::end
const T * end() const
Pointer to the vector&#39;s end.
Definition: SF_vector.h:128

SF::overlapping_layout::overlapping_layout
overlapping_layout()
Non-parameterized constructor. Use assign() to initialize later.
Definition: SF_parallel_layout.h:618

SF::overlapping_layout::interface
const vector< T > & interface() const
Retrieve the local indices of the subdomain interfaces.
Definition: SF_parallel_layout.h:655

SF_COMM
#define SF_COMM
the default SlimFem MPI communicator
Definition: SF_globals.h:27

SF::local_petsc_to_local_nodal
T local_petsc_to_local_nodal(const meshdata< T, S > &mesh, int rank, T local_petsc)
Definition: SF_parallel_layout.h:925

SF::commgraph< size_t >

SF::parallel_layout::localize
void localize(vector< T > &gidx, vector< V > &gdat) const
Localize global indices and associated data.
Definition: SF_parallel_layout.h:326

SF::parallel_layout::localize
void localize(vector< T > &gvec) const
Localize global indices.
Definition: SF_parallel_layout.h:297

SF::isEmpty
bool isEmpty(vector< T > &v)
Return whether an vector is empty (all values are 0).
Definition: SF_vector.h:378

SF::commgraph::configure
void configure(const vector< V > &dest, MPI_Comm comm)
Set up the communication graph.
Definition: SF_container.h:667

SF::sum
T sum(const vector< T > &vec)
Compute sum of a vector&#39;s entries.
Definition: SF_vector.h:340

hashmap::unordered_map< T, T >

SF::binary_sort_copy
void binary_sort_copy(vector< T > &_V, vector< S > &_W)
Definition: SF_sort.h:296

SF::index_mapping
Index mapping class. This is a bijective mapping.
Definition: SF_container.h:207

SF::parallel_layout::_l2g
vector< T > _l2g
The global indices of the local DD domain. Also serves as the local-to-global map.
Definition: SF_parallel_layout.h:239

SF::vector::size
size_t size() const
The current size of the vector.
Definition: SF_vector.h:104

SF::commgraph::scnt
vector< T > scnt
Number of elements sent to each rank.
Definition: SF_container.h:628

SF::non_overlapping_layout
The parallel layout of non overlapping indices.
Definition: SF_container.h:198

SF::parallel_layout::globalize
T globalize(const T lidx) const
Globalize local indices.
Definition: SF_parallel_layout.h:276

SF::parallel_layout::localize
T localize(T gidx) const
Definition: SF_parallel_layout.h:345

SF::commgraph::sdsp
vector< T > sdsp
Displacements w.r.t. scnt.
Definition: SF_container.h:629

SF::overlapping_layout::num_global_idx
size_t num_global_idx() const
Retrieve the global number of indices.
Definition: SF_parallel_layout.h:715

hashmap::unordered_map::end
iterator end()
Definition: hashmap.hpp:692

SF::index_mapping::assign
void assign(const vector< T > &a, const vector< T > &b)
Set up the index mapping between a and b.
Definition: SF_container.h:235

SF::overlapping_layout::reduce
void reduce(vector< V > &ndat, const char *op) const
Compute a reduction on overlapping data.
Definition: SF_parallel_layout.h:739

SF::unique_accumulate
void unique_accumulate(vector< T > &_P, vector< S > &_A)
Definition: SF_sort.h:409

SF::vector
A vector storing arbitrary data.
Definition: SF_vector.h:42

SF::overlapping_layout::assign
void assign(const vector< T > &idx, MPI_Comm comm)
Initialization function.
Definition: SF_parallel_layout.h:630

SF::vector::begin
const T * begin() const
Pointer to the vector&#39;s start.
Definition: SF_vector.h:116

SF::parallel_layout::assign
void assign(const vector< T > &idx)
Assign a parallel distributed index set that defines the parallel layout.
Definition: SF_parallel_layout.h:359

SF::non_overlapping_layout::non_overlapping_layout
non_overlapping_layout()
Empty constructor. Use assign() to set up the layout.
Definition: SF_parallel_layout.h:874

SF::vector::assign
void assign(InputIterator s, InputIterator e)
Assign a memory range.
Definition: SF_vector.h:161

SF::commgraph::source_ranks
void source_ranks(vector< V > &source)
For every received data element, get the rank indices it was receive from.
Definition: SF_container.h:714

hashmap::unordered_map::find
iterator find(const K &key)
Search for key. Return iterator.
Definition: hashmap.hpp:593

SF::vec_assign
void vec_assign(S *lhs, const V *rhs, size_t size)
Assign the values in rhs to lhs. The data-type of rhs is cast to the type of lhs. ...
Definition: SF_vector.h:371

SF::overlapping_layout::algebraic_nodes
const vector< T > & algebraic_nodes() const
Getter function for the local indices forming the local algebraic node set.
Definition: SF_parallel_layout.h:665

SF::overlapping_layout::localize_algebraic
T localize_algebraic(const T global_idx, const vector< T > &global_alg_nbr, const int rank)
map a global (REF_NBR, reference numbering) index to local algebraic (non-overlapping) indexing w...
Definition: SF_parallel_layout.h:699

SF::parallel_distribution_minrank
void parallel_distribution_minrank(const vector< T > &gtarget, const vector< T > &cnt, const vector< T > &dsp, const vector< T > &ranks, vector< T > &owner, vector< T > &counts)
Definition: SF_parallel_layout.h:201

SF::overlapping_layout::num_algebraic_idx
size_t num_algebraic_idx() const
Retrieve the number of local algebraic indices.
Definition: SF_parallel_layout.h:725

SF::commgraph::transpose
void transpose()
transpose comm graph (receive becomes send, and vice versa)
Definition: SF_container.h:651

SF::commgraph::rcnt
vector< T > rcnt
Number of elements received from each rank.
Definition: SF_container.h:630

SF::binary_sort
void binary_sort(vector< T > &_V)
Definition: SF_sort.h:284

SF::non_overlapping_layout::assign
void assign(const vector< T > &ref_eidx, MPI_Comm comm)
Generate the layout.
Definition: SF_parallel_layout.h:885

SF::parallel_layout::_g2l
hashmap::unordered_map< T, T > _g2l
The global-to-local map for the DD domain.
Definition: SF_parallel_layout.h:241

SF::vector::resize
void resize(size_t n)
Resize a vector.
Definition: SF_vector.h:209

SF::vector::append
void append(InputIterator s, InputIterator e)
Append data to the current data chunk.
Definition: SF_vector.h:268

hashmap.hpp
Classes similar to unordered_set and unordered_map, but with better performance.

hashmap::unordered_map::clear
void clear()
Definition: hashmap.hpp:689

SF::overlapping_layout
The overlapping_layout class contains the algorithms related to managing overlapping parallel index s...
Definition: SF_container.h:197

SF::dsp_from_cnt
void dsp_from_cnt(const vector< T > &cnt, vector< T > &dsp)
Compute displacements from counts.
Definition: SF_vector.h:310

SF::local_nodal_to_local_petsc
T local_nodal_to_local_petsc(const meshdata< T, S > &mesh, int rank, T local_nodal)
Definition: SF_parallel_layout.h:912