doxygen/master/SF__parallel__utils_8h_source.html

 // ----------------------------------------------------------------------------
 // openCARP is an open cardiac electrophysiology simulator.
 //
 // Copyright (C) 2020 openCARP project
 //
 // This program is licensed under the openCARP Academic Public License (APL)
 // v1.0: You can use and redistribute it and/or modify it in non-commercial
 // academic environments under the terms of APL as published by the openCARP
 // project v1.0, or (at your option) any later version. Commercial use requires
 // a commercial license (info@opencarp.org).
 //
 // This program is distributed without any warranty; see the openCARP APL for
 // more details.
 //
 // You should have received a copy of the openCARP APL along with this program
 // and can find it online: http://www.opencarp.org/license
 // ----------------------------------------------------------------------------

 #ifndef _SF_PARALLEL_UTILS_H
 #define _SF_PARALLEL_UTILS_H


 #include <mpi.h>


 #include "SF_container.h"
 #include "SF_globals.h"
 #include "SF_mesh_io.h"
 #include "SF_vector.h"


 namespace SF {

 template<class T>
 void sort_parallel(MPI_Comm comm, const vector<T> & idx, vector<T> & out_idx)
 {
   int size, rank;
   MPI_Comm_size(comm, &size); MPI_Comm_rank(comm, &rank);

   // determine global min and max indices
   T gmax = global_max(idx, comm);
   T gmin = global_min(idx, comm);

   // block size
   T bsize = (gmax - gmin) / size + 1;

   // distribute tuples uniquely and linearly ascending across the ranks ----------------
   vector<T> dest(idx.size()), perm;
   interval(perm, 0, idx.size());

   // find a destination for every tuple
   for(size_t i=0; i<dest.size(); i++)
     dest[i] = (idx[i] - gmin) / bsize;

   // find permutation to sort tuples in the send buffer
   binary_sort_copy(dest, perm);

   // fill send buffer
   vector<T> snd_idx(idx.size());
   for(size_t i=0; i<perm.size(); i++)
     snd_idx[i] = idx[perm[i]];

   // communicate
   commgraph<size_t> grph;
   grph.configure(dest, comm);

   size_t rsize = sum(grph.rcnt);
   out_idx.resize(rsize);

   MPI_Exchange(grph, snd_idx, out_idx, comm);

   // sort the received values locally
   binary_sort(out_idx);
 }

 template<class T, class V>
 void sort_parallel(MPI_Comm comm, const vector<T> & idx, const vector<V> & val,
                    vector<T> & out_idx, vector<V> & out_val)
 {
   int size, rank;
   MPI_Comm_size(comm, &size); MPI_Comm_rank(comm, &rank);

   // determine global min and max indices
   T gmax = global_max(idx, comm);
   T gmin = global_min(idx, comm);

   // block size
   T bsize = (gmax - gmin) / size + 1;

   // distribute tuples uniquely and linearly ascending across the ranks ----------------
   vector<T> dest(idx.size()), perm;
   interval(perm, 0, idx.size());

   // find a destination for every tuple
   for(size_t i=0; i<dest.size(); i++)
     dest[i] = (idx[i] - gmin) / bsize;

   // find permutation to sort tuples in the send buffer
   binary_sort_copy(dest, perm);

   // fill send buffer
   vector<T> snd_idx(idx.size());
   vector<V> snd_val(idx.size());
   for(size_t i=0; i<perm.size(); i++) {
     snd_idx[i] = idx[perm[i]];
     snd_val[i] = val[perm[i]];
   }

   // communicate
   commgraph<size_t> grph;
   grph.configure(dest, comm);

   size_t rsize = sum(grph.rcnt);
   out_idx.resize(rsize);
   out_val.resize(rsize);

   MPI_Exchange(grph, snd_idx, out_idx, comm);
   MPI_Exchange(grph, snd_val, out_val, comm);

   // sort the received values locally
   binary_sort_copy(out_idx, out_val);
 }

 template<class T, class V>
 void sort_parallel(MPI_Comm comm, const vector<T> & idx, const vector<T> & cnt, const vector<V> & val,
                    vector<T> & out_idx, vector<T> & out_cnt, vector<V> & out_val)
 {
   int size, rank;
   MPI_Comm_size(comm, &size); MPI_Comm_rank(comm, &rank);

   // determine global min and max indices
   T gmax = global_max(idx, comm);
   T gmin = global_min(idx, comm);

   // block size
   T bsize = (gmax - gmin) / size + 1;

   // distribute tuples uniquely and linearly ascending across the ranks ----------------
   vector<T> dest(idx.size()), perm, dsp;
   interval(perm, 0, idx.size());
   dsp_from_cnt(cnt, dsp);

   // find a destination for every tuple
   for(size_t i=0; i<dest.size(); i++)
     dest[i] = (idx[i] - gmin) / bsize;

   // find permutation to sort tuples in the send buffer
   binary_sort_copy(dest, perm);

   // fill send buffer. this has to happen in two steps since
   // each element has a different size
   vector<T> snd_idx(idx.size()), snd_cnt(idx.size()), snd_dsp;
   vector<V> snd_val(val.size());

   for(size_t i=0; i<perm.size(); i++) {
     snd_idx[i] = idx[perm[i]];
     snd_cnt[i] = cnt[perm[i]];
   }
   dsp_from_cnt(snd_cnt, snd_dsp);

   for(size_t i=0; i<perm.size(); i++) {
     const V* read  = val.data() + dsp[perm[i]];
     V*       write = snd_val.data() + snd_dsp[i];

     for(T j=0; j<snd_cnt[i]; j++)
       write[j] = read[j];
   }

   // set up two communication graphs, one for one entry per element and one
   // for multiple entries
   commgraph<T> grph, grph_entr;
   grph.configure(dest, comm);
   grph_entr.configure(dest, snd_cnt, comm);

   size_t rsize = sum(grph.rcnt), rsize_entr = sum(grph_entr.rcnt);
   vector<T> rec_cnt(rsize), rec_dsp, out_dsp;
   vector<V> rec_val(rsize_entr);
   out_idx.resize(rsize); out_cnt.resize(rsize);
   out_val.resize(rsize_entr);

   // communicate
   MPI_Exchange(grph, snd_idx, out_idx, comm);
   MPI_Exchange(grph, snd_cnt, rec_cnt, comm);
   MPI_Exchange(grph_entr, snd_val, rec_val, comm);

   dsp_from_cnt(rec_cnt, rec_dsp);

   // sort the received values locally, again in two steps
   interval(perm, 0, rsize);
   binary_sort_copy(out_idx, perm);

   for(size_t i=0; i<perm.size(); i++)
     out_cnt[i] = rec_cnt[perm[i]];

   dsp_from_cnt(out_cnt, out_dsp);

   for(size_t i=0; i<perm.size(); i++) {
     const V* read  = rec_val.data() + rec_dsp[perm[i]];
     V*       write = out_val.data() + out_dsp[i];

     for(T j=0; j<out_cnt[i]; j++)
       write[j] = read[j];
   }
 }


 template<class V>
 size_t root_write(FILE* fd, const vector<V> & vec, MPI_Comm comm)
 {
   int size, rank;
   MPI_Comm_rank(comm, &rank); MPI_Comm_size(comm, &size);
   long int lsize = vec.size();
   long int nwr = 0;

   if(rank == 0) {
     // file descriptor on root must be valid
     assert(fd != NULL);
     // write own chunk
     nwr += fwrite(vec.data(), sizeof(V), vec.size(), fd);
   }

   vector<V> wbuff;

   // iterate over other ranks and write their chunks
   for(int pid=1; pid < size; pid++)
   {
     if(rank == pid) {
       MPI_Send(&lsize, 1, MPI_LONG, 0, SF_MPITAG, comm);
       MPI_Send(vec.data(), lsize*sizeof(V), MPI_BYTE, 0, SF_MPITAG, comm);
     }
     else if (rank == 0) {
       long int rsize;
       MPI_Status stat;

       MPI_Recv(&rsize, 1, MPI_LONG, pid, SF_MPITAG, comm, &stat);
       wbuff.resize(rsize);

       MPI_Recv(wbuff.data(), rsize*sizeof(V), MPI_BYTE, pid, SF_MPITAG, comm, &stat);

       nwr += fwrite(wbuff.data(), sizeof(V), rsize, fd);
     }

     MPI_Barrier(comm);
   }

   MPI_Bcast(&nwr, 1, MPI_LONG, 0, comm);
   return nwr;
 }

 template<class V>
 size_t root_write(FILE* fd, V* vec, const size_t vec_size, MPI_Comm comm)
 {
   vector<V> vecbuff;
   vecbuff.assign(vec_size, vec, false);

   size_t nwr = root_write(fd, vecbuff, comm);

   vecbuff.assign(0, NULL, false);

   return nwr;
 }

 template<class V>
 size_t root_read(FILE* fd, vector<V> & vec, MPI_Comm comm)
 {
   int size, rank;
   MPI_Comm_rank(comm, &rank); MPI_Comm_size(comm, &size);
   long int lsize = vec.size();
   long int nrd = 0;

   if(rank == 0) {
     // file descriptor on root must be valid
     assert(fd != NULL);

     // read own chunk
     nrd += fread(vec.data(), sizeof(V), vec.size(), fd);
     vector<V> rbuff;

     // iterate over other ranks and write their chunks
     for(int pid=1; pid < size; pid++)
     {
       long int rsize;
       MPI_Status stat;
       MPI_Recv(&rsize, 1, MPI_LONG, pid, SF_MPITAG, comm, &stat);

       rbuff.resize(rsize);
       nrd += fread(rbuff.data(), sizeof(V), rsize, fd);

       MPI_Send(rbuff.data(), rsize*sizeof(V), MPI_BYTE, pid, SF_MPITAG, comm);
     }
   }
   else {
     MPI_Send(&lsize, 1, MPI_LONG, 0, SF_MPITAG, comm);

     MPI_Status stat;
     MPI_Recv(vec.data(), lsize*sizeof(V), MPI_BYTE, 0, SF_MPITAG, comm, &stat);
   }

   MPI_Bcast(&nrd, 1, MPI_LONG, 0, comm);
   return nrd;
 }

 template<class V>
 size_t root_read_ascii(FILE* fd, vector<V> & vec, MPI_Comm comm, bool int_data)
 {
   int size, rank;
   MPI_Comm_rank(comm, &rank); MPI_Comm_size(comm, &size);
   long int lsize = vec.size();
   long int nrd = 0;

   double   fbuff;
   long int ibuff;

   if(rank == 0) {
     // file descriptor on root must be valid
     assert(fd != NULL);

     // read own chunk
     if(int_data) {
       for(size_t i=0; i<vec.size(); i++) {
         nrd += fscanf(fd, "%ld", &ibuff);
         vec[i] = V(ibuff);
       }
     }
     else {
       for(size_t i=0; i<vec.size(); i++) {
         nrd += fscanf(fd, "%lf", &fbuff);
         vec[i] = V(fbuff);
       }
     }

     vector<V> rbuff;

     // iterate over other ranks and write their chunks
     for(int pid=1; pid < size; pid++)
     {
       long int rsize;
       MPI_Status stat;
       MPI_Recv(&rsize, 1, MPI_LONG, pid, SF_MPITAG, comm, &stat);

       rbuff.resize(rsize);
       for(long int i=0; i<rsize; i++) {
         if(int_data) {
           nrd += fscanf(fd, "%ld", &ibuff);
           rbuff[i] = V(ibuff);
         }
         else {
           nrd += fscanf(fd, "%lf", &fbuff);
           rbuff[i] = V(fbuff);
         }
       }

       MPI_Send(rbuff.data(), rsize*sizeof(V), MPI_BYTE, pid, SF_MPITAG, comm);
     }
   }
   else {
     MPI_Send(&lsize, 1, MPI_LONG, 0, SF_MPITAG, comm);

     MPI_Status stat;
     MPI_Recv(vec.data(), lsize*sizeof(V), MPI_BYTE, 0, SF_MPITAG, comm, &stat);
   }

   MPI_Bcast(&nrd, 1, MPI_LONG, 0, comm);
   return nrd;
 }

 inline size_t root_count_ascii_lines(std::string file, MPI_Comm comm)
 {
   int size, rank;
   MPI_Comm_size(comm, &size); MPI_Comm_rank(comm, &rank);

   size_t line_count = 0;
   if(rank == 0) {
     FILE* fd = fopen(file.c_str(), "r");

     if(fd) {
       size_t fsize = file_size(fd);

       vector<int> chunk_sizes;
       divide(fsize, size, chunk_sizes);

       vector<unsigned char> chunk;
       for(int chunk_size : chunk_sizes) {
         chunk.resize(chunk_size);
         fread(chunk.data(), chunk_size, 1, fd);

         for(unsigned char c : chunk)
           if(c == '\n') line_count++;
       }

       fclose(fd);
     } else {
       fprintf(stderr, "%s error: Cannot open file %s!\n", __func__, file.c_str());
     }
   }

   MPI_Bcast(&line_count, sizeof(size_t), MPI_BYTE, 0, comm);
   return line_count;
 }

 template<class V>
 size_t root_read(FILE* fd, V* vec, const size_t vec_size, MPI_Comm comm)
 {
   vector<V> vecbuff;
   vecbuff.assign(vec_size, vec, false);

   size_t nrd = root_read(fd, vecbuff, comm);

   vecbuff.assign(0, NULL, false);

   return nrd;
 }

 template<class V>
 size_t root_read_ascii(FILE* fd, V* vec, const size_t vec_size, MPI_Comm comm, bool int_type)
 {
   vector<V> vecbuff;
   vecbuff.assign(vec_size, vec, false);

   size_t nrd = root_read_ascii(fd, vecbuff, comm, int_type);

   vecbuff.assign(0, NULL, false);

   return nrd;
 }

 template<class T, class V>
 size_t root_write_ordered(FILE* fd, const vector<T> & idx, const vector<V> & vec, MPI_Comm comm)
 {
   vector<T> srt_idx;
   vector<V> srt_vec;

   sort_parallel(comm, idx, vec, srt_idx, srt_vec);
   return root_write(fd, srt_vec, comm);
 }

 template<class T, class V>
 size_t root_write_ordered(FILE* fd, const vector<T> & idx, const vector<T> & cnt,
                           const vector<V> & vec, MPI_Comm comm)
 {
   vector<T> srt_idx, srt_cnt;
   vector<V> srt_vec;

   sort_parallel(comm, idx, cnt, vec, srt_idx, srt_cnt, srt_vec);
   return root_write(fd, srt_vec, comm);
 }


 template<class T, class V>
 size_t root_write_ordered(FILE* fd, T* idx, V* vec, const size_t vec_size, MPI_Comm comm)
 {
   vector<T> idxbuff;
   vector<V> vecbuff;

   idxbuff.assign(vec_size, idx, false);
   vecbuff.assign(vec_size, vec, false);

   size_t nwr = root_write_ordered(fd, idxbuff, vecbuff, comm);

   idxbuff.assign(0, NULL, false);
   vecbuff.assign(0, NULL, false);
   return nwr;
 }

 template<class T, class V>
 size_t root_write_ordered(FILE* fd, T* idx, T* cnt, V* vec,
                           const size_t idx_size, const size_t vec_size, MPI_Comm comm)
 {
   vector<T> idxbuff, cntbuff;
   vector<V> vecbuff;

   idxbuff.assign(idx_size, idx, false);
   cntbuff.assign(idx_size, cnt, false);
   vecbuff.assign(vec_size, vec, false);

   size_t nwr = root_write_ordered(fd, idxbuff, cntbuff, vecbuff, comm);

   idxbuff.assign(0, NULL, false);
   cntbuff.assign(0, NULL, false);
   vecbuff.assign(0, NULL, false);

   return nwr;
 }


 template<class T>
 void print_vector(MPI_Comm comm, const vector<T> & vec, const short dpn, FILE* fd)
 {
   int size, rank;
   MPI_Comm_size(comm, &size), MPI_Comm_rank(comm, &rank);

   if(rank == 0) {
     for (size_t i=0; i<vec.size() / dpn; i++ ) {
       for(short j=0; j<dpn-1; j++)
         fprintf(fd, "%g ", double(vec[i*dpn + j]) );
       fprintf(fd, "%g\n", double(vec[i*dpn + (dpn-1)]) );
     }

     // iterate over other ranks and write their chunks
     vector<T> wbuff;
     for(int pid=1; pid < size; pid++)
     {
       long int rsize;
       MPI_Status stat;
       MPI_Recv(&rsize, 1, MPI_LONG, pid, SF_MPITAG, comm, &stat);
       wbuff.resize(rsize);
       MPI_Recv(wbuff.data(), rsize*sizeof(T), MPI_BYTE, pid, SF_MPITAG, comm, &stat);

       for (size_t i=0; i<wbuff.size() / dpn; i++ ) {
         for(short j=0; j<dpn-1; j++)
           fprintf(fd, "%g ", double(wbuff[i*dpn + j]) );

         fprintf(fd, "%g\n", double(wbuff[i*dpn + (dpn-1)]) );
       }
     }
   } else {
     long int lsize = vec.size();
     MPI_Send(&lsize, 1, MPI_LONG, 0, SF_MPITAG, comm);
     MPI_Send(vec.data(), lsize*sizeof(T), MPI_BYTE, 0, SF_MPITAG, comm);
   }
 }


 template<class T, class S>
 void write_data_ascii(const MPI_Comm comm, const vector<T> & idx, const vector<S> & data,
                       std::string file, short dpn = 1)
 {
   assert(idx.size() == data.size());

   int rank;
   MPI_Comm_rank(comm, &rank);

   vector<T> srt_idx;
   vector<S> srt_data;
   sort_parallel(comm, idx, data, srt_idx, srt_data);

   FILE* fd = NULL;
   if(rank == 0) {
     fd = fopen(file.c_str(), "w");
     if(fd == NULL) {
       fprintf(stderr, "%s error: Cannot open file %s for writing! Aborting!\n", __func__, file.c_str());
       exit(1);
     }
   }

   print_vector(comm, srt_data, dpn, fd);

   if(fd) fclose(fd);
 }

 }
 #endif
SF
Definition: dense_mat.hpp:34

SF::global_min
T global_min(const vector< T > &vec, MPI_Comm comm)
Compute the global minimum of a distributed vector.
Definition: SF_network.h:126

SF_vector.h
The vector class and related algorithms.

SF::root_read_ascii
size_t root_read_ascii(FILE *fd, vector< V > &vec, MPI_Comm comm, bool int_data)
Read binary data into a vector.
Definition: SF_parallel_utils.h:376

SF::MPI_Exchange
void MPI_Exchange(commgraph< T > &grph, vector< S > &send, vector< S > &recv, MPI_Comm comm)
Exchange data in parallel over MPI.
Definition: SF_network.h:47

SF::sort_parallel
void sort_parallel(MPI_Comm comm, const vector< T > &idx, vector< T > &out_idx)
Sort index values parallel ascending across the ranks.
Definition: SF_parallel_utils.h:52

SF::interval
void interval(vector< T > &vec, size_t start, size_t end)
Create an integer interval between start and end.
Definition: SF_vector.h:350

SF::file_size
size_t file_size(FILE *fd)
return file size from a file descriptor
Definition: SF_io_base.h:72

SF::write_data_ascii
void write_data_ascii(const MPI_Comm comm, const vector< T > &idx, const vector< S > &data, std::string file, short dpn=1)
Definition: SF_parallel_utils.h:641

SF::divide
void divide(const size_t gsize, const size_t num_parts, vector< T > &loc_sizes)
divide gsize into num_parts local parts with even distribution of the remainder
Definition: SF_vector.h:358

SF_container.h
Basic containers.

SF_MPITAG
#define SF_MPITAG
the MPI tag when communicating
Definition: SF_globals.h:29

SF::vector::data
T * data()
Pointer to the vector&#39;s start.
Definition: SF_vector.h:91

SF::root_write_ordered
size_t root_write_ordered(FILE *fd, const vector< T > &idx, const vector< V > &vec, MPI_Comm comm)
Write index value pairs to disk in ordered permutation.
Definition: SF_parallel_utils.h:522

SF_mesh_io.h
Functions related to mesh IO.

SF::root_read
size_t root_read(FILE *fd, vector< V > &vec, MPI_Comm comm)
Read binary data into a vector.
Definition: SF_parallel_utils.h:322

SF::commgraph< size_t >

SF::commgraph::configure
void configure(const vector< V > &dest, MPI_Comm comm)
Set up the communication graph.
Definition: SF_container.h:667

SF::sum
T sum(const vector< T > &vec)
Compute sum of a vector&#39;s entries.
Definition: SF_vector.h:340

SF::root_count_ascii_lines
size_t root_count_ascii_lines(std::string file, MPI_Comm comm)
count the lines in a ascii file
Definition: SF_parallel_utils.h:440

SF::binary_sort_copy
void binary_sort_copy(vector< T > &_V, vector< S > &_W)
Definition: SF_sort.h:296

SF::global_max
T global_max(const vector< T > &vec, MPI_Comm comm)
Compute the global maximum of a distributed vector.
Definition: SF_network.h:156

SF_globals.h

SF::vector::size
size_t size() const
The current size of the vector.
Definition: SF_vector.h:104

SF::vector
A vector storing arbitrary data.
Definition: SF_vector.h:42

SF::vector::assign
void assign(InputIterator s, InputIterator e)
Assign a memory range.
Definition: SF_vector.h:161

SF::commgraph::rcnt
vector< T > rcnt
Number of elements received from each rank.
Definition: SF_container.h:630

SF::binary_sort
void binary_sort(vector< T > &_V)
Definition: SF_sort.h:284

SF::root_write
size_t root_write(FILE *fd, const vector< V > &vec, MPI_Comm comm)
Write vector data binary to disk.
Definition: SF_parallel_utils.h:250

SF::vector::resize
void resize(size_t n)
Resize a vector.
Definition: SF_vector.h:209

SF::print_vector
void print_vector(MPI_Comm comm, const vector< T > &vec, const short dpn, FILE *fd)
Definition: SF_parallel_utils.h:603

SF::dsp_from_cnt
void dsp_from_cnt(const vector< T > &cnt, vector< T > &dsp)
Compute displacements from counts.
Definition: SF_vector.h:310