27 #ifndef _SF_PARALLEL_LAYOUT_H 28 #define _SF_PARALLEL_LAYOUT_H 65 int size = gtarget.
size();
66 size_t csize = cnt.
size();
72 for(
int i=0; i<size; i++) {
73 int idx = (i + myrank) % size;
75 ltarget[idx] = (i * gt + gt) / size - (i * gt) / size;
82 for(
size_t nidx=0; nidx < csize; nidx++)
84 act_idx[nidx] = (
unsigned int)(2147483647 * nidx) % cnt[nidx];
86 T j = dsp[nidx] + act_idx[nidx];
94 for(
int i = 0; i < size; i++) J += (counts[i] - ltarget[i])*(counts[i] - ltarget[i]);
96 unsigned int k = 0, update = 0, osteps = 32;
99 while(J > 0 && k++ < osteps)
101 for(
size_t nidx = 0; nidx < csize; nidx++)
105 if(act_idx[nidx] == cnt[nidx]) act_idx[nidx] = 0;
107 T j = dsp[nidx] + act_idx[nidx];
110 T n0 = counts[op], n1 = counts[p];
111 T n0_ = ltarget[op], n1_ = ltarget[p];
112 if( ((n0 - n0_) - (n1 - n1_)) >= 1 )
115 counts[op]--; counts[p]++;
117 J += 2*(1 - (n0 - n0_) + (n1 - n1_));
147 int size = gtarget.
size();
148 size_t csize = cnt.
size();
154 for(
int i=0; i<size; i++) {
155 int idx = (i + myrank) % size;
157 ltarget[idx] = (i * gt + gt) / size - (i * gt) / size;
165 for(
int i = 0; i < size; i++) J += (counts[i] - ltarget[i])*(counts[i] - ltarget[i]);
167 unsigned int k = 0, update = 0, osteps = 64;
170 for(
size_t nidx=0; nidx < csize; nidx++)
171 act_idx[nidx] = (
unsigned int)(2147483647 * nidx) % cnt[nidx];
174 while(J > 0 && k++ < osteps)
176 for(
size_t nidx = 0; nidx < csize; nidx++)
180 if(act_idx[nidx] == cnt[nidx]) act_idx[nidx] = 0;
182 T j = dsp[nidx] + act_idx[nidx];
185 T n0 = counts[op], n1 = counts[p];
186 T n0_ = ltarget[op], n1_ = ltarget[p];
187 if( ((n0 - n0_) - (n1 - n1_)) >= 1 )
190 counts[op]--; counts[p]++;
192 J += 2*(1 - (n0 - n0_) + (n1 - n1_));
211 for(
size_t i = 0; i < owner.
size(); i++)
214 int minrank = ranks[dsp[i]];
215 for(T j = dsp[i]; j < dsp[i+1]; j++)
216 if(minrank > ranks[j]) minrank = ranks[j];
256 size_t lsize = _l2g.
size(), widx = 0;
258 for(
size_t ridx=0; ridx<lvec.
size(); ridx++)
262 lvec[widx++] = _l2g[loc];
278 size_t lsize = _l2g.
size();
302 for(
size_t ridx=0; ridx<gvec.
size(); ridx++)
305 it = _g2l.
find(glob);
307 gvec[widx++] = it->second;
331 for(
size_t ridx=0; ridx<gidx.
size(); ridx++)
334 it = _g2l.
find(glob);
335 if(it != _g2l.
end()) {
336 gidx[widx] = it->second;
337 gdat[widx] = gdat[ridx];
347 auto it = _g2l.
find(gidx);
364 for(
size_t i=0; i<_l2g.
size(); i++)
382 size_t _glob_num_idx;
400 inline void find_domain_interfaces()
403 MPI_Comm_size(_comm, &size); MPI_Comm_rank(_comm, &rank);
407 for(
size_t i=0; i<parallel_layout<T>::_l2g.size(); i++)
414 size_t numrecv =
sum(grph.
rcnt);
429 unsigned long int num_unique = acc_cnt.size(), gnum_unique;
430 MPI_Allreduce(&num_unique, &gnum_unique, 1, MPI_UNSIGNED_LONG, MPI_SUM, _comm);
431 _glob_num_idx = gnum_unique;
438 for(
size_t i=0; i<acc_cnt.size(); i++) {
440 for(T j=acc_dsp[i]; j<acc_dsp[i+1]; j++)
441 grph.
scnt[rproc[j]]++;
445 MPI_Alltoall(grph.
scnt.
data(),
sizeof(size_t), MPI_BYTE, grph.
rcnt.
data(),
sizeof(size_t), MPI_BYTE, _comm);
449 size_t numsend =
sum(grph.
scnt);
450 sbuf.resize(numsend);
452 for(
size_t i=0; i<acc_cnt.size(); i++) {
454 for(T j=acc_dsp[i]; j<acc_dsp[i+1]; j++) {
456 sbuf[grph.
sdsp[sproc]] = rnod[i];
481 inline void find_algebraic_layout()
485 const bool dist_warnings =
false;
488 MPI_Comm_size(_comm, &size); MPI_Comm_rank(_comm, &rank);
497 T* end = std::set_difference(nodes.begin(), nodes.end(),
505 for(
size_t i=0; i < sbuf.size(); i++) {
507 dest[i] = nod % size;
517 size_t numrecv =
sum(grph.
rcnt);
531 divide(_glob_num_idx, size, target);
533 target[rank] -= inner_nodes.
size();
534 MPI_Allreduce(MPI_IN_PLACE, target.
data(), target.
size(), MPI_INT, MPI_MIN, _comm);
539 for(
int i=0; i<size; i++)
545 if(!rank) std::cerr <<
"Warning: Domains too unbalanced for balanced re-indexing!" << std::endl;
554 MPI_Allreduce(MPI_IN_PLACE, counts.
data(), counts.
size(), MPI_INT, MPI_SUM, _comm);
555 for(
int i=0; i<size; i++) target[i] -= counts[i];
557 unsigned int k = 0, numref = 10;
558 while( !
isEmpty(target) && k++ < numref)
561 MPI_Allreduce(MPI_IN_PLACE, counts.
data(), counts.
size(), MPI_INT, MPI_SUM, _comm);
562 for(
int i=0; i<size; i++) target[i] -= counts[i];
566 MPI_Allreduce(MPI_IN_PLACE, counts.
data(), counts.
size(), MPI_INT, MPI_SUM, _comm);
567 for(
int i=0; i<size; i++) target[i] -= counts[i];
570 if( dist_warnings && rank == 0 ) {
572 std::cerr <<
"Warning: Balanced re-indexing could not be computed." << std::endl;
573 std::cerr <<
"Final differences to even distribution: " << std::endl;
574 for(
int i=0; i<size; i++) std::cerr << target[i] <<
" ";
575 std::cerr << std::endl;
583 sbuf.resize(numrecv);
586 _alg_nod.
append(sbuf.begin(), sbuf.end());
593 owned_layout.
resize(size);
594 size_t owned_idx_size = _alg_nod.
size();
595 MPI_Allgather(&owned_idx_size,
sizeof(
size_t), MPI_BYTE,
596 owned_layout.
scnt.
data(),
sizeof(size_t), MPI_BYTE, _comm);
604 inline void compute_layout()
606 T size = this->algebraic_layout().size()-1;
607 T nlidx = this->num_local_idx();
612 MPI_Allgather(&nlidx,
sizeof(T), MPI_BYTE, cnt.
data(),
sizeof(T), MPI_BYTE, _comm);
636 this->find_domain_interfaces();
637 this->find_algebraic_layout();
638 this->compute_layout();
702 if(loc_nodal_idx == -1)
705 T local_offset = _alg_layout[rank], local_size = _alg_layout[rank+1] - local_offset;
706 T local_alg_idx = global_alg_nbr[loc_nodal_idx] - local_offset;
708 if(local_alg_idx > -1 && local_alg_idx < local_size)
709 return local_alg_idx;
717 return _glob_num_idx;
727 return _alg_nod.
size();
742 MPI_Comm_size(_comm, &size); MPI_Comm_rank(_comm, &rank);
748 size_t isize = _inod.
size();
749 vector<T> nod_sbuf(isize), dest(isize), perm;
752 for(
size_t i=0; i<isize; i++) {
759 for(
size_t i=0; i<perm.
size(); i++)
761 T lidx = _inod[perm[i]];
763 dat_sbuf[i] = ndat[lidx];
770 size_t numrecv =
sum(grph.
rcnt);
777 vector<T> acc_cnt(numrecv, 1), acc_dsp, acc_col;
783 acc_dsp.
resize(acc_cnt.size() + 1);
787 if(! strcmp(op,
"max"))
789 else if(! strcmp(op,
"min"))
791 else if(! strcmp(op,
"sum"))
796 for(
size_t i=0; i<acc_cnt.size(); i++)
805 V
max = dat_rbuf[acc_col[acc_dsp[i]]];
807 while(idx < acc_cnt[i])
809 T p = acc_col[acc_dsp[i]+idx];
810 if(max < dat_rbuf[p]) max = dat_rbuf[p];
820 V
min = dat_rbuf[acc_col[acc_dsp[i]]];
822 while(idx < acc_cnt[i])
824 T p = acc_col[acc_dsp[i]+idx];
825 if(min > dat_rbuf[p]) min = dat_rbuf[p];
836 while(idx < acc_cnt[i])
838 T p = acc_col[acc_dsp[i]+idx];
845 for(T j = acc_dsp[i]; j < acc_dsp[i] + acc_cnt[i]; j++)
846 dat_rbuf[acc_col[j]] = val;
852 for(
size_t i=0; i<perm.
size(); i++)
854 T lidx = _inod[perm[i]];
855 ndat[lidx] = dat_sbuf[i];
891 int size; MPI_Comm_size(_comm, &size);
893 T lsize = ref_eidx.
size();
896 MPI_Allgather(&lsize,
sizeof(T), MPI_BYTE, layout_cnt.
data(),
sizeof(T), MPI_BYTE, _comm);
911 template<
class T,
class S>
914 const vector<T> & alg_layout = mesh.
pl.algebraic_layout();
917 const T my_offset = alg_layout[rank];
919 return petsc_nbr[local_nodal] - my_offset;
924 template<
class T,
class S>
927 const vector<T> & alg_layout = mesh.
pl.algebraic_layout();
928 const vector<T> & alg_nod = mesh.
pl.algebraic_nodes();
931 const T my_offset = alg_layout[rank];
932 const size_t num_alg = alg_nod.
size();
935 while(idx < num_alg && petsc_nbr[alg_nod[idx]] != local_petsc + my_offset) idx++;
937 if(idx == num_alg)
return -1;
938 else return alg_nod[idx];
941 template<
class T,
class S>
944 int rank; MPI_Comm_rank(mesh.
comm, &rank);
945 const vector<T> & alg_nod = mesh.
pl.algebraic_nodes();
949 for(
const T & n : alg_nod)
952 petsc_to_nodal.
assign(petsc_idx, alg_nod);
constexpr T min(T a, T b)
const vector< T > & algebraic_layout() const
Getter function for the global algebraic node layout.
The mesh storage class. It contains both element and vertex data.
The vector class and related algorithms.
vector< T > & get_numbering(SF_nbr nbr_type)
Get the vector defining a certain numbering.
size_t num_local_idx() const
Retrieve the local number of indices.
void local_petsc_to_nodal_mapping(const meshdata< T, S > &mesh, index_mapping< T > &petsc_to_nodal)
void parallel_distribution(const vector< T > >arget, const vector< T > &cnt, const vector< T > &dsp, const vector< T > &ranks, const int myrank, vector< T > &owner, vector< T > &counts)
The distribution distributes entities between all ranks.
MPI_Comm comm
the parallel mesh is defined on a MPI world
void MPI_Exchange(commgraph< T > &grph, vector< S > &send, vector< S > &recv, MPI_Comm comm)
Exchange data in parallel over MPI.
constexpr T max(T a, T b)
PETSc numbering of nodes.
void interval(vector< T > &vec, size_t start, size_t end)
Create an integer interval between start and end.
overlapping_layout< T > pl
nodal parallel layout
void resize(size_t size)
Resize all vectors to size.
void globalize(vector< T > &lvec) const
Globalize local indices.
const vector< T > & algebraic_layout() const
Getter function for the algebraic layout of the elements.
void divide(const size_t gsize, const size_t num_parts, vector< T > &loc_sizes)
divide gsize into num_parts local parts with even distribution of the remainder
void refine_distribution(const vector< T > >arget, const vector< T > &cnt, const vector< T > &dsp, const vector< T > &ranks, const int myrank, vector< T > &owner, vector< T > &counts)
Further refine a distribution generated by parallel_distribution().
vector< T > rdsp
Displacements w.r.t. rcnt.
const vector< T > & layout() const
Return the the overlapping layout.
T * data()
Pointer to the vector's start.
The base class for parallel layouts.
const T * end() const
Pointer to the vector's end.
overlapping_layout()
Non-parameterized constructor. Use assign() to initialize later.
const vector< T > & interface() const
Retrieve the local indices of the subdomain interfaces.
#define SF_COMM
the default SlimFem MPI communicator
T local_petsc_to_local_nodal(const meshdata< T, S > &mesh, int rank, T local_petsc)
void localize(vector< T > &gidx, vector< V > &gdat) const
Localize global indices and associated data.
void localize(vector< T > &gvec) const
Localize global indices.
bool isEmpty(vector< T > &v)
Return whether an vector is empty (all values are 0).
void configure(const vector< V > &dest, MPI_Comm comm)
Set up the communication graph.
T sum(const vector< T > &vec)
Compute sum of a vector's entries.
void binary_sort_copy(vector< T > &_V, vector< S > &_W)
Index mapping class. This is a bijective mapping.
vector< T > _l2g
The global indices of the local DD domain. Also serves as the local-to-global map.
size_t size() const
The current size of the vector.
vector< T > scnt
Number of elements sent to each rank.
The parallel layout of non overlapping indices.
T globalize(const T lidx) const
Globalize local indices.
vector< T > sdsp
Displacements w.r.t. scnt.
size_t num_global_idx() const
Retrieve the global number of indices.
void assign(const vector< T > &a, const vector< T > &b)
Set up the index mapping between a and b.
void reduce(vector< V > &ndat, const char *op) const
Compute a reduction on overlapping data.
void unique_accumulate(vector< T > &_P, vector< S > &_A)
A vector storing arbitrary data.
void assign(const vector< T > &idx, MPI_Comm comm)
Initialization function.
const T * begin() const
Pointer to the vector's start.
void assign(const vector< T > &idx)
Assign a parallel distributed index set that defines the parallel layout.
non_overlapping_layout()
Empty constructor. Use assign() to set up the layout.
void assign(InputIterator s, InputIterator e)
Assign a memory range.
void source_ranks(vector< V > &source)
For every received data element, get the rank indices it was receive from.
iterator find(const K &key)
Search for key. Return iterator.
void vec_assign(S *lhs, const V *rhs, size_t size)
Assign the values in rhs to lhs. The data-type of rhs is cast to the type of lhs. ...
const vector< T > & algebraic_nodes() const
Getter function for the local indices forming the local algebraic node set.
T localize_algebraic(const T global_idx, const vector< T > &global_alg_nbr, const int rank)
map a global (REF_NBR, reference numbering) index to local algebraic (non-overlapping) indexing w...
void parallel_distribution_minrank(const vector< T > >arget, const vector< T > &cnt, const vector< T > &dsp, const vector< T > &ranks, vector< T > &owner, vector< T > &counts)
size_t num_algebraic_idx() const
Retrieve the number of local algebraic indices.
void transpose()
transpose comm graph (receive becomes send, and vice versa)
vector< T > rcnt
Number of elements received from each rank.
void binary_sort(vector< T > &_V)
void assign(const vector< T > &ref_eidx, MPI_Comm comm)
Generate the layout.
hashmap::unordered_map< T, T > _g2l
The global-to-local map for the DD domain.
void resize(size_t n)
Resize a vector.
void append(InputIterator s, InputIterator e)
Append data to the current data chunk.
Classes similar to unordered_set and unordered_map, but with better performance.
The overlapping_layout class contains the algorithms related to managing overlapping parallel index s...
void dsp_from_cnt(const vector< T > &cnt, vector< T > &dsp)
Compute displacements from counts.
T local_nodal_to_local_petsc(const meshdata< T, S > &mesh, int rank, T local_nodal)