27 #ifndef _SF_PARALLEL_LAYOUT_H
28 #define _SF_PARALLEL_LAYOUT_H
66 int size = gtarget.
size();
67 size_t csize = cnt.
size();
73 for(
int i=0; i<size; i++) {
74 int idx = (i + myrank) % size;
76 ltarget[idx] = (i * gt + gt) / size - (i * gt) / size;
83 for(
size_t nidx=0; nidx < csize; nidx++)
85 act_idx[nidx] = (
unsigned int)(2147483647 * nidx) % cnt[nidx];
87 T j = dsp[nidx] + act_idx[nidx];
95 for(
int i = 0; i < size; i++) J += (counts[i] - ltarget[i])*(counts[i] - ltarget[i]);
97 unsigned int k = 0, update = 0, osteps = 32;
100 while(J > 0 && k++ < osteps)
102 for(
size_t nidx = 0; nidx < csize; nidx++)
106 if(act_idx[nidx] == cnt[nidx]) act_idx[nidx] = 0;
108 T j = dsp[nidx] + act_idx[nidx];
111 T n0 = counts[op], n1 = counts[p];
112 T n0_ = ltarget[op], n1_ = ltarget[p];
113 if( ((n0 - n0_) - (n1 - n1_)) >= 1 )
116 counts[op]--; counts[p]++;
118 J += 2*(1 - (n0 - n0_) + (n1 - n1_));
148 int size = gtarget.
size();
149 size_t csize = cnt.
size();
155 for(
int i=0; i<size; i++) {
156 int idx = (i + myrank) % size;
158 ltarget[idx] = (i * gt + gt) / size - (i * gt) / size;
166 for(
int i = 0; i < size; i++) J += (counts[i] - ltarget[i])*(counts[i] - ltarget[i]);
168 unsigned int k = 0, update = 0, osteps = 64;
171 for(
size_t nidx=0; nidx < csize; nidx++)
172 act_idx[nidx] = (
unsigned int)(2147483647 * nidx) % cnt[nidx];
175 while(J > 0 && k++ < osteps)
177 for(
size_t nidx = 0; nidx < csize; nidx++)
181 if(act_idx[nidx] == cnt[nidx]) act_idx[nidx] = 0;
183 T j = dsp[nidx] + act_idx[nidx];
186 T n0 = counts[op], n1 = counts[p];
187 T n0_ = ltarget[op], n1_ = ltarget[p];
188 if( ((n0 - n0_) - (n1 - n1_)) >= 1 )
191 counts[op]--; counts[p]++;
193 J += 2*(1 - (n0 - n0_) + (n1 - n1_));
212 for(
size_t i = 0; i < owner.
size(); i++)
215 int minrank = ranks[dsp[i]];
216 for(T j = dsp[i]; j < dsp[i+1]; j++)
217 if(minrank > ranks[j]) minrank = ranks[j];
257 size_t lsize =
_l2g.size(), widx = 0;
259 for(
size_t ridx=0; ridx<lvec.
size(); ridx++)
263 lvec[widx++] =
_l2g[loc];
279 size_t lsize =
_l2g.size();
303 for(
size_t ridx=0; ridx<gvec.
size(); ridx++)
308 gvec[widx++] = it->second;
332 for(
size_t ridx=0; ridx<gidx.
size(); ridx++)
337 gidx[widx] = it->second;
338 gdat[widx] = gdat[ridx];
365 for(
size_t i=0; i<
_l2g.size(); i++)
383 size_t _glob_num_idx;
401 inline void find_domain_interfaces()
404 MPI_Comm_size(_comm, &size); MPI_Comm_rank(_comm, &rank);
408 for(
size_t i=0; i<parallel_layout<T>::_l2g.size(); i++)
415 size_t numrecv =
sum(grph.
rcnt);
430 unsigned long int num_unique = acc_cnt.size(), gnum_unique;
431 MPI_Allreduce(&num_unique, &gnum_unique, 1, MPI_UNSIGNED_LONG, MPI_SUM, _comm);
432 _glob_num_idx = gnum_unique;
439 for(
size_t i=0; i<acc_cnt.size(); i++) {
441 for(T j=acc_dsp[i]; j<acc_dsp[i+1]; j++)
442 grph.
scnt[rproc[j]]++;
446 MPI_Alltoall(grph.
scnt.data(),
sizeof(
size_t), MPI_BYTE, grph.
rcnt.data(),
sizeof(
size_t), MPI_BYTE, _comm);
450 size_t numsend =
sum(grph.
scnt);
451 sbuf.resize(numsend);
453 for(
size_t i=0; i<acc_cnt.size(); i++) {
455 for(T j=acc_dsp[i]; j<acc_dsp[i+1]; j++) {
457 sbuf[grph.
sdsp[sproc]] = rnod[i];
482 inline void find_algebraic_layout()
486 const bool dist_warnings =
false;
489 MPI_Comm_size(_comm, &size); MPI_Comm_rank(_comm, &rank);
498 T* end = std::set_difference(nodes.begin(), nodes.end(),
506 for(
size_t i=0; i < sbuf.size(); i++) {
508 dest[i] = nod % size;
518 size_t numrecv =
sum(grph.
rcnt);
530 const MPI_Datatype mpi_t = opencarp::mpi_datatype<T>();
534 divide(_glob_num_idx, size, target);
536 target[rank] -= inner_nodes.
size();
537 MPI_Allreduce(MPI_IN_PLACE, target.
data(), target.
size(), mpi_t, MPI_MIN, _comm);
542 for(
int i=0; i<size; i++)
548 if(!rank) std::cerr <<
"Warning: Domains too unbalanced for balanced re-indexing!" << std::endl;
557 MPI_Allreduce(MPI_IN_PLACE, counts.
data(), counts.
size(), mpi_t, MPI_SUM, _comm);
558 for(
int i=0; i<size; i++) target[i] -= counts[i];
560 unsigned int k = 0, numref = 10;
561 while( !
isEmpty(target) && k++ < numref)
564 MPI_Allreduce(MPI_IN_PLACE, counts.
data(), counts.
size(), mpi_t, MPI_SUM, _comm);
565 for(
int i=0; i<size; i++) target[i] -= counts[i];
569 MPI_Allreduce(MPI_IN_PLACE, counts.
data(), counts.
size(), mpi_t, MPI_SUM, _comm);
570 for(
int i=0; i<size; i++) target[i] -= counts[i];
573 if( dist_warnings && rank == 0 ) {
575 std::cerr <<
"Warning: Balanced re-indexing could not be computed." << std::endl;
576 std::cerr <<
"Final differences to even distribution: " << std::endl;
577 for(
int i=0; i<size; i++) std::cerr << target[i] <<
" ";
578 std::cerr << std::endl;
586 sbuf.resize(numrecv);
589 _alg_nod.
append(sbuf.begin(), sbuf.end());
596 owned_layout.
resize(size);
597 size_t owned_idx_size = _alg_nod.
size();
598 MPI_Allgather(&owned_idx_size,
sizeof(
size_t), MPI_BYTE,
599 owned_layout.
scnt.data(),
sizeof(
size_t), MPI_BYTE, _comm);
607 inline void compute_layout()
615 MPI_Allgather(&nlidx,
sizeof(T), MPI_BYTE, cnt.
data(),
sizeof(T), MPI_BYTE, _comm);
639 this->find_domain_interfaces();
640 this->find_algebraic_layout();
641 this->compute_layout();
705 if(loc_nodal_idx == -1)
708 T local_offset = _alg_layout[rank], local_size = _alg_layout[rank+1] - local_offset;
709 T local_alg_idx = global_alg_nbr[loc_nodal_idx] - local_offset;
711 if(local_alg_idx > -1 && local_alg_idx < local_size)
712 return local_alg_idx;
720 return _glob_num_idx;
730 return _alg_nod.
size();
745 MPI_Comm_size(_comm, &size); MPI_Comm_rank(_comm, &rank);
751 size_t isize = _inod.
size();
752 vector<T> nod_sbuf(isize), dest(isize), perm;
755 for(
size_t i=0; i<isize; i++) {
762 for(
size_t i=0; i<perm.
size(); i++)
764 T lidx = _inod[perm[i]];
766 dat_sbuf[i] = ndat[lidx];
773 size_t numrecv =
sum(grph.
rcnt);
780 vector<T> acc_cnt(numrecv, 1), acc_dsp, acc_col;
786 acc_dsp.
resize(acc_cnt.size() + 1);
790 if(! strcmp(op,
"max"))
792 else if(! strcmp(op,
"min"))
794 else if(! strcmp(op,
"sum"))
799 for(
size_t i=0; i<acc_cnt.size(); i++)
808 V
max = dat_rbuf[acc_col[acc_dsp[i]]];
810 while(idx < acc_cnt[i])
812 T p = acc_col[acc_dsp[i]+idx];
813 if(
max < dat_rbuf[p])
max = dat_rbuf[p];
823 V
min = dat_rbuf[acc_col[acc_dsp[i]]];
825 while(idx < acc_cnt[i])
827 T p = acc_col[acc_dsp[i]+idx];
828 if(
min > dat_rbuf[p])
min = dat_rbuf[p];
839 while(idx < acc_cnt[i])
841 T p = acc_col[acc_dsp[i]+idx];
848 for(T j = acc_dsp[i]; j < acc_dsp[i] + acc_cnt[i]; j++)
849 dat_rbuf[acc_col[j]] = val;
855 for(
size_t i=0; i<perm.
size(); i++)
857 T lidx = _inod[perm[i]];
858 ndat[lidx] = dat_sbuf[i];
894 int size; MPI_Comm_size(_comm, &size);
896 T lsize = ref_eidx.
size();
899 MPI_Allgather(&lsize,
sizeof(T), MPI_BYTE, layout_cnt.
data(),
sizeof(T), MPI_BYTE, _comm);
914 template<
class T,
class S>
917 const vector<T> & alg_layout = mesh.
pl.algebraic_layout();
920 const T my_offset = alg_layout[rank];
922 return petsc_nbr[local_nodal] - my_offset;
927 template<
class T,
class S>
930 const vector<T> & alg_layout = mesh.
pl.algebraic_layout();
931 const vector<T> & alg_nod = mesh.
pl.algebraic_nodes();
934 const T my_offset = alg_layout[rank];
935 const size_t num_alg = alg_nod.
size();
938 while(idx < num_alg && petsc_nbr[alg_nod[idx]] != local_petsc + my_offset) idx++;
940 if(idx == num_alg)
return -1;
941 else return alg_nod[idx];
944 template<
class T,
class S>
947 int rank; MPI_Comm_rank(mesh.
comm, &rank);
948 const vector<T> & alg_nod = mesh.
pl.algebraic_nodes();
952 for(
const T & n : alg_nod)
955 petsc_to_nodal.
assign(petsc_idx, alg_nod);
#define SF_COMM
the default SlimFem MPI communicator
The vector class and related algorithms.
The class holds the communication graph for a MPI_Exchange() call.
vector< T > rcnt
Number of elements received from each rank.
vector< T > scnt
Number of elements sent to each rank.
void resize(size_t size)
Resize all vectors to size.
void configure(const vector< V > &dest, MPI_Comm comm)
Set up the communication graph.
vector< T > sdsp
Displacements w.r.t. scnt.
vector< T > rdsp
Displacements w.r.t. rcnt.
void source_ranks(vector< V > &source)
For every received data element, get the rank indices it was receive from.
void transpose()
transpose comm graph (receive becomes send, and vice versa)
Index mapping class. This is a bijective mapping.
void assign(const vector< T > &a, const vector< T > &b)
Set up the index mapping between a and b.
The mesh storage class. It contains both element and vertex data.
overlapping_layout< T > pl
nodal parallel layout
MPI_Comm comm
the parallel mesh is defined on a MPI world
vector< T > & get_numbering(SF_nbr nbr_type)
Get the vector defining a certain numbering.
The parallel layout of non overlapping indices.
const vector< T > & algebraic_layout() const
Getter function for the algebraic layout of the elements.
non_overlapping_layout()
Empty constructor. Use assign() to set up the layout.
void assign(const vector< T > &ref_eidx, MPI_Comm comm)
Generate the layout.
The overlapping_layout class contains the algorithms related to managing overlapping parallel index s...
T localize_algebraic(const T global_idx, const vector< T > &global_alg_nbr, const int rank)
map a global (REF_NBR, reference numbering) index to local algebraic (non-overlapping) indexing w....
const vector< T > & algebraic_layout() const
Getter function for the global algebraic node layout.
size_t num_algebraic_idx() const
Retrieve the number of local algebraic indices.
size_t num_local_idx() const
Retrieve the local number of indices.
const vector< T > & layout() const
Return the the overlapping layout.
const vector< T > & algebraic_nodes() const
Getter function for the local indices forming the local algebraic node set.
size_t num_global_idx() const
Retrieve the global number of indices.
void assign(const vector< T > &idx, MPI_Comm comm)
Initialization function.
const vector< T > & interface() const
Retrieve the local indices of the subdomain interfaces.
void reduce(vector< V > &ndat, const char *op) const
Compute a reduction on overlapping data.
overlapping_layout()
Non-parameterized constructor. Use assign() to initialize later.
The base class for parallel layouts.
void assign(const vector< T > &idx)
Assign a parallel distributed index set that defines the parallel layout.
hashmap::unordered_map< T, T > _g2l
The global-to-local map for the DD domain.
void localize(vector< T > &gidx, vector< V > &gdat) const
Localize global indices and associated data.
vector< T > _l2g
The global indices of the local DD domain. Also serves as the local-to-global map.
void globalize(vector< T > &lvec) const
Globalize local indices.
T globalize(const T lidx) const
Globalize local indices.
void localize(vector< T > &gvec) const
Localize global indices.
A vector storing arbitrary data.
size_t size() const
The current size of the vector.
void resize(size_t n)
Resize a vector.
const T * end() const
Pointer to the vector's end.
void append(InputIterator s, InputIterator e)
Append data to the current data chunk.
void assign(InputIterator s, InputIterator e)
Assign a memory range.
const T * begin() const
Pointer to the vector's start.
T * data()
Pointer to the vector's start.
iterator find(const K &key)
Search for key. Return iterator.
Classes similar to unordered_set and unordered_map, but with better performance.
void dsp_from_cnt(const vector< T > &cnt, vector< T > &dsp)
Compute displacements from counts.
T local_petsc_to_local_nodal(const meshdata< T, S > &mesh, int rank, T local_petsc)
void interval(vector< T > &vec, size_t start, size_t end)
Create an integer interval between start and end.
void unique_accumulate(vector< T > &_P, vector< S > &_A)
void binary_sort_copy(vector< T > &_V, vector< S > &_W)
T sum(const vector< T > &vec)
Compute sum of a vector's entries.
void divide(const size_t gsize, const size_t num_parts, vector< T > &loc_sizes)
divide gsize into num_parts local parts with even distribution of the remainder
void local_petsc_to_nodal_mapping(const meshdata< T, S > &mesh, index_mapping< T > &petsc_to_nodal)
bool isEmpty(vector< T > &v)
Return whether an vector is empty (all values are 0).
void parallel_distribution_minrank(const vector< T > >arget, const vector< T > &cnt, const vector< T > &dsp, const vector< T > &ranks, vector< T > &owner, vector< T > &counts)
T local_nodal_to_local_petsc(const meshdata< T, S > &mesh, int rank, T local_nodal)
void vec_assign(S *lhs, const V *rhs, size_t size)
Assign the values in rhs to lhs. The data-type of rhs is cast to the type of lhs.
void MPI_Exchange(commgraph< T > &grph, vector< S > &send, vector< S > &recv, MPI_Comm comm)
Exchange data in parallel over MPI.
void refine_distribution(const vector< T > >arget, const vector< T > &cnt, const vector< T > &dsp, const vector< T > &ranks, const int myrank, vector< T > &owner, vector< T > &counts)
Further refine a distribution generated by parallel_distribution().
void binary_sort(vector< T > &_V)
void parallel_distribution(const vector< T > >arget, const vector< T > &cnt, const vector< T > &dsp, const vector< T > &ranks, const int myrank, vector< T > &owner, vector< T > &counts)
The distribution distributes entities between all ranks.
@ NBR_PETSC
PETSc numbering of nodes.
constexpr T min(T a, T b)
constexpr T max(T a, T b)