openCARP
Doxygen code documentation for the open cardiac electrophysiology simulator openCARP
SF_parallel_utils.h
Go to the documentation of this file.
1 // ----------------------------------------------------------------------------
2 // openCARP is an open cardiac electrophysiology simulator.
3 //
4 // Copyright (C) 2020 openCARP project
5 //
6 // This program is licensed under the openCARP Academic Public License (APL)
7 // v1.0: You can use and redistribute it and/or modify it in non-commercial
8 // academic environments under the terms of APL as published by the openCARP
9 // project v1.0, or (at your option) any later version. Commercial use requires
10 // a commercial license (info@opencarp.org).
11 //
12 // This program is distributed without any warranty; see the openCARP APL for
13 // more details.
14 //
15 // You should have received a copy of the openCARP APL along with this program
16 // and can find it online: http://www.opencarp.org/license
17 // ----------------------------------------------------------------------------
18 
26 #ifndef _SF_PARALLEL_UTILS_H
27 #define _SF_PARALLEL_UTILS_H
28 
29 
30 #include <mpi.h>
31 
32 
33 #include "SF_container.h"
34 #include "SF_globals.h"
35 #include "SF_mesh_io.h"
36 #include "SF_vector.h"
37 #if WITH_EMI_MODEL
38  #include "SF_mesh_io_emi.h"
39 #endif
40 
41 namespace SF {
42 
53 template<class T>
54 void sort_parallel(MPI_Comm comm, const vector<T> & idx, vector<T> & out_idx)
55 {
56  int size, rank;
57  MPI_Comm_size(comm, &size); MPI_Comm_rank(comm, &rank);
58 
59  // determine global min and max indices
60  T gmax = global_max(idx, comm);
61  T gmin = global_min(idx, comm);
62 
63  // block size
64  T bsize = (gmax - gmin) / size + 1;
65 
66  // distribute tuples uniquely and linearly ascending across the ranks ----------------
67  vector<T> dest(idx.size()), perm;
68  interval(perm, 0, idx.size());
69 
70  // find a destination for every tuple
71  for(size_t i=0; i<dest.size(); i++)
72  dest[i] = (idx[i] - gmin) / bsize;
73 
74  // find permutation to sort tuples in the send buffer
75  binary_sort_copy(dest, perm);
76 
77  // fill send buffer
78  vector<T> snd_idx(idx.size());
79  for(size_t i=0; i<perm.size(); i++)
80  snd_idx[i] = idx[perm[i]];
81 
82  // communicate
83  commgraph<size_t> grph;
84  grph.configure(dest, comm);
85 
86  size_t rsize = sum(grph.rcnt);
87  out_idx.resize(rsize);
88 
89  MPI_Exchange(grph, snd_idx, out_idx, comm);
90 
91  // sort the received values locally
92  binary_sort(out_idx);
93 }
94 
107 template<class T, class V>
108 void sort_parallel(MPI_Comm comm, const vector<T> & idx, const vector<V> & val,
109  vector<T> & out_idx, vector<V> & out_val)
110 {
111  int size, rank;
112  MPI_Comm_size(comm, &size); MPI_Comm_rank(comm, &rank);
113 
114  // determine global min and max indices
115  T gmax = global_max(idx, comm);
116  T gmin = global_min(idx, comm);
117 
118  // block size
119  T bsize = (gmax - gmin) / size + 1;
120 
121  // distribute tuples uniquely and linearly ascending across the ranks ----------------
122  vector<T> dest(idx.size()), perm;
123  interval(perm, 0, idx.size());
124 
125  // find a destination for every tuple
126  for(size_t i=0; i<dest.size(); i++)
127  dest[i] = (idx[i] - gmin) / bsize;
128 
129  // find permutation to sort tuples in the send buffer
130  binary_sort_copy(dest, perm);
131 
132  // fill send buffer
133  vector<T> snd_idx(idx.size());
134  vector<V> snd_val(idx.size());
135  for(size_t i=0; i<perm.size(); i++) {
136  snd_idx[i] = idx[perm[i]];
137  snd_val[i] = val[perm[i]];
138  }
139 
140  // communicate
141  commgraph<size_t> grph;
142  grph.configure(dest, comm);
143 
144  size_t rsize = sum(grph.rcnt);
145  out_idx.resize(rsize);
146  out_val.resize(rsize);
147 
148  MPI_Exchange(grph, snd_idx, out_idx, comm);
149  MPI_Exchange(grph, snd_val, out_val, comm);
150 
151  // sort the received values locally
152  binary_sort_copy(out_idx, out_val);
153 }
154 
155 template<class T, class V>
156 void sort_parallel(MPI_Comm comm, const vector<T> & idx, const vector<T> & cnt, const vector<V> & val,
157  vector<T> & out_idx, vector<T> & out_cnt, vector<V> & out_val)
158 {
159  int size, rank;
160  MPI_Comm_size(comm, &size); MPI_Comm_rank(comm, &rank);
161 
162  // determine global min and max indices
163  T gmax = global_max(idx, comm);
164  T gmin = global_min(idx, comm);
165 
166  // block size
167  T bsize = (gmax - gmin) / size + 1;
168 
169  // distribute tuples uniquely and linearly ascending across the ranks ----------------
170  vector<T> dest(idx.size()), perm, dsp;
171  interval(perm, 0, idx.size());
172  dsp_from_cnt(cnt, dsp);
173 
174  // find a destination for every tuple
175  for(size_t i=0; i<dest.size(); i++)
176  dest[i] = (idx[i] - gmin) / bsize;
177 
178  // find permutation to sort tuples in the send buffer
179  binary_sort_copy(dest, perm);
180 
181  // fill send buffer. this has to happen in two steps since
182  // each element has a different size
183  vector<T> snd_idx(idx.size()), snd_cnt(idx.size()), snd_dsp;
184  vector<V> snd_val(val.size());
185 
186  for(size_t i=0; i<perm.size(); i++) {
187  snd_idx[i] = idx[perm[i]];
188  snd_cnt[i] = cnt[perm[i]];
189  }
190  dsp_from_cnt(snd_cnt, snd_dsp);
191 
192  for(size_t i=0; i<perm.size(); i++) {
193  const V* read = val.data() + dsp[perm[i]];
194  V* write = snd_val.data() + snd_dsp[i];
195 
196  for(T j=0; j<snd_cnt[i]; j++)
197  write[j] = read[j];
198  }
199 
200  // set up two communication graphs, one for one entry per element and one
201  // for multiple entries
202  commgraph<T> grph, grph_entr;
203  grph.configure(dest, comm);
204  grph_entr.configure(dest, snd_cnt, comm);
205 
206  size_t rsize = sum(grph.rcnt), rsize_entr = sum(grph_entr.rcnt);
207  vector<T> rec_cnt(rsize), rec_dsp, out_dsp;
208  vector<V> rec_val(rsize_entr);
209  out_idx.resize(rsize); out_cnt.resize(rsize);
210  out_val.resize(rsize_entr);
211 
212  // communicate
213  MPI_Exchange(grph, snd_idx, out_idx, comm);
214  MPI_Exchange(grph, snd_cnt, rec_cnt, comm);
215  MPI_Exchange(grph_entr, snd_val, rec_val, comm);
216 
217  dsp_from_cnt(rec_cnt, rec_dsp);
218 
219  // sort the received values locally, again in two steps
220  interval(perm, 0, rsize);
221  binary_sort_copy(out_idx, perm);
222 
223  for(size_t i=0; i<perm.size(); i++)
224  out_cnt[i] = rec_cnt[perm[i]];
225 
226  dsp_from_cnt(out_cnt, out_dsp);
227 
228  for(size_t i=0; i<perm.size(); i++) {
229  const V* read = rec_val.data() + rec_dsp[perm[i]];
230  V* write = out_val.data() + out_dsp[i];
231 
232  for(T j=0; j<out_cnt[i]; j++)
233  write[j] = read[j];
234  }
235 }
236 
237 
251 template<class V>
252 size_t root_write(FILE* fd, const vector<V> & vec, MPI_Comm comm)
253 {
254  int size, rank;
255  MPI_Comm_rank(comm, &rank); MPI_Comm_size(comm, &size);
256  long int lsize = vec.size();
257  long int nwr = 0;
258 
259  if(rank == 0) {
260  // file descriptor on root must be valid
261  assert(fd != NULL);
262  // write own chunk
263  nwr += fwrite(vec.data(), sizeof(V), vec.size(), fd);
264  }
265 
266  vector<V> wbuff;
267 
268  // iterate over other ranks and write their chunks
269  for(int pid=1; pid < size; pid++)
270  {
271  if(rank == pid) {
272  MPI_Send(&lsize, 1, MPI_LONG, 0, SF_MPITAG, comm);
273  MPI_Send(vec.data(), lsize*sizeof(V), MPI_BYTE, 0, SF_MPITAG, comm);
274  }
275  else if (rank == 0) {
276  long int rsize;
277  MPI_Status stat;
278 
279  MPI_Recv(&rsize, 1, MPI_LONG, pid, SF_MPITAG, comm, &stat);
280  wbuff.resize(rsize);
281 
282  MPI_Recv(wbuff.data(), rsize*sizeof(V), MPI_BYTE, pid, SF_MPITAG, comm, &stat);
283 
284  nwr += fwrite(wbuff.data(), sizeof(V), rsize, fd);
285  }
286 
287  MPI_Barrier(comm);
288  }
289 
290  MPI_Bcast(&nwr, 1, MPI_LONG, 0, comm);
291  return nwr;
292 }
293 
297 template<class V>
298 size_t root_write(FILE* fd, V* vec, const size_t vec_size, MPI_Comm comm)
299 {
300  vector<V> vecbuff;
301  vecbuff.assign(vec_size, vec, false);
302 
303  size_t nwr = root_write(fd, vecbuff, comm);
304 
305  vecbuff.assign(0, NULL, false);
306 
307  return nwr;
308 }
309 
323 template<class V>
324 size_t root_read(FILE* fd, vector<V> & vec, MPI_Comm comm)
325 {
326  int size, rank;
327  MPI_Comm_rank(comm, &rank); MPI_Comm_size(comm, &size);
328  long int lsize = vec.size();
329  long int nrd = 0;
330 
331  if(rank == 0) {
332  // file descriptor on root must be valid
333  assert(fd != NULL);
334 
335  // read own chunk
336  nrd += fread(vec.data(), sizeof(V), vec.size(), fd);
337  vector<V> rbuff;
338  // iterate over other ranks and write their chunks
339  for(int pid=1; pid < size; pid++)
340  {
341  long int rsize;
342  MPI_Status stat;
343  MPI_Recv(&rsize, 1, MPI_LONG, pid, SF_MPITAG, comm, &stat);
344 
345  rbuff.resize(rsize);
346  nrd += fread(rbuff.data(), sizeof(V), rsize, fd);
347 
348  MPI_Send(rbuff.data(), rsize*sizeof(V), MPI_BYTE, pid, SF_MPITAG, comm);
349  }
350  }
351  else {
352  MPI_Send(&lsize, 1, MPI_LONG, 0, SF_MPITAG, comm);
353 
354  MPI_Status stat;
355  MPI_Recv(vec.data(), lsize*sizeof(V), MPI_BYTE, 0, SF_MPITAG, comm, &stat);
356  }
357 
358  MPI_Bcast(&nrd, 1, MPI_LONG, 0, comm);
359  return nrd;
360 }
361 
376 template<class V>
377 size_t root_read_ascii(FILE* fd, vector<V> & vec, MPI_Comm comm, bool int_data)
378 {
379  int size, rank;
380  MPI_Comm_rank(comm, &rank); MPI_Comm_size(comm, &size);
381  long int lsize = vec.size();
382  long int nrd = 0;
383 
384  double fbuff;
385  long int ibuff;
386 
387  if(rank == 0) {
388  // file descriptor on root must be valid
389  assert(fd != NULL);
390 
391  // read own chunk
392  if(int_data) {
393  for(size_t i=0; i<vec.size(); i++) {
394  nrd += fscanf(fd, "%ld", &ibuff);
395  vec[i] = V(ibuff);
396  }
397  }
398  else {
399  for(size_t i=0; i<vec.size(); i++) {
400  nrd += fscanf(fd, "%lf", &fbuff);
401  vec[i] = V(fbuff);
402  }
403  }
404 
405  vector<V> rbuff;
406 
407  // iterate over other ranks and write their chunks
408  for(int pid=1; pid < size; pid++)
409  {
410  long int rsize;
411  MPI_Status stat;
412  MPI_Recv(&rsize, 1, MPI_LONG, pid, SF_MPITAG, comm, &stat);
413 
414  rbuff.resize(rsize);
415  for(long int i=0; i<rsize; i++) {
416  if(int_data) {
417  nrd += fscanf(fd, "%ld", &ibuff);
418  rbuff[i] = V(ibuff);
419  }
420  else {
421  nrd += fscanf(fd, "%lf", &fbuff);
422  rbuff[i] = V(fbuff);
423  }
424  }
425 
426  MPI_Send(rbuff.data(), rsize*sizeof(V), MPI_BYTE, pid, SF_MPITAG, comm);
427  }
428  }
429  else {
430  MPI_Send(&lsize, 1, MPI_LONG, 0, SF_MPITAG, comm);
431 
432  MPI_Status stat;
433  MPI_Recv(vec.data(), lsize*sizeof(V), MPI_BYTE, 0, SF_MPITAG, comm, &stat);
434  }
435 
436  MPI_Bcast(&nrd, 1, MPI_LONG, 0, comm);
437  return nrd;
438 }
439 
441 inline size_t root_count_ascii_lines(std::string file, MPI_Comm comm)
442 {
443  int size, rank;
444  MPI_Comm_size(comm, &size); MPI_Comm_rank(comm, &rank);
445 
446  size_t line_count = 0;
447  if(rank == 0) {
448  FILE* fd = fopen(file.c_str(), "r");
449 
450  if(fd) {
451  size_t fsize = file_size(fd);
452 
453  vector<int> chunk_sizes;
454  divide(fsize, size, chunk_sizes);
455 
456  vector<unsigned char> chunk;
457  for(int chunk_size : chunk_sizes) {
458  chunk.resize(chunk_size);
459  fread(chunk.data(), chunk_size, 1, fd);
460 
461  for(unsigned char c : chunk)
462  if(c == '\n') line_count++;
463  }
464 
465  fclose(fd);
466  } else {
467  fprintf(stderr, "%s error: Cannot open file %s!\n", __func__, file.c_str());
468  }
469  }
470 
471  MPI_Bcast(&line_count, sizeof(size_t), MPI_BYTE, 0, comm);
472  return line_count;
473 }
474 
478 template<class V>
479 size_t root_read(FILE* fd, V* vec, const size_t vec_size, MPI_Comm comm)
480 {
481  vector<V> vecbuff;
482  vecbuff.assign(vec_size, vec, false);
483 
484  size_t nrd = root_read(fd, vecbuff, comm);
485 
486  vecbuff.assign(0, NULL, false);
487 
488  return nrd;
489 }
490 
494 template<class V>
495 size_t root_read_ascii(FILE* fd, V* vec, const size_t vec_size, MPI_Comm comm, bool int_type)
496 {
497  vector<V> vecbuff;
498  vecbuff.assign(vec_size, vec, false);
499 
500  size_t nrd = root_read_ascii(fd, vecbuff, comm, int_type);
501 
502  vecbuff.assign(0, NULL, false);
503 
504  return nrd;
505 }
506 
522 template<class T, class V>
523 size_t root_write_ordered(FILE* fd, const vector<T> & idx, const vector<V> & vec, MPI_Comm comm)
524 {
525  vector<T> srt_idx;
526  vector<V> srt_vec;
527 
528  sort_parallel(comm, idx, vec, srt_idx, srt_vec);
529  return root_write(fd, srt_vec, comm);
530 }
531 
548 template<class T, class V>
549 size_t root_write_ordered(FILE* fd, const vector<T> & idx, const vector<T> & cnt,
550  const vector<V> & vec, MPI_Comm comm)
551 {
552  vector<T> srt_idx, srt_cnt;
553  vector<V> srt_vec;
554 
555  sort_parallel(comm, idx, cnt, vec, srt_idx, srt_cnt, srt_vec);
556  return root_write(fd, srt_vec, comm);
557 }
558 
559 
563 template<class T, class V>
564 size_t root_write_ordered(FILE* fd, T* idx, V* vec, const size_t vec_size, MPI_Comm comm)
565 {
566  vector<T> idxbuff;
567  vector<V> vecbuff;
568 
569  idxbuff.assign(vec_size, idx, false);
570  vecbuff.assign(vec_size, vec, false);
571 
572  size_t nwr = root_write_ordered(fd, idxbuff, vecbuff, comm);
573 
574  idxbuff.assign(0, NULL, false);
575  vecbuff.assign(0, NULL, false);
576  return nwr;
577 }
578 
582 template<class T, class V>
583 size_t root_write_ordered(FILE* fd, T* idx, T* cnt, V* vec,
584  const size_t idx_size, const size_t vec_size, MPI_Comm comm)
585 {
586  vector<T> idxbuff, cntbuff;
587  vector<V> vecbuff;
588 
589  idxbuff.assign(idx_size, idx, false);
590  cntbuff.assign(idx_size, cnt, false);
591  vecbuff.assign(vec_size, vec, false);
592 
593  size_t nwr = root_write_ordered(fd, idxbuff, cntbuff, vecbuff, comm);
594 
595  idxbuff.assign(0, NULL, false);
596  cntbuff.assign(0, NULL, false);
597  vecbuff.assign(0, NULL, false);
598 
599  return nwr;
600 }
601 
602 
603 template<class T>
604 void print_vector(MPI_Comm comm, const vector<T> & vec, const short dpn, FILE* fd)
605 {
606  int size, rank;
607  MPI_Comm_size(comm, &size), MPI_Comm_rank(comm, &rank);
608 
609  if(rank == 0) {
610  for (size_t i=0; i<vec.size() / dpn; i++ ) {
611  for(short j=0; j<dpn-1; j++)
612  fprintf(fd, "%g ", double(vec[i*dpn + j]) );
613  fprintf(fd, "%g\n", double(vec[i*dpn + (dpn-1)]) );
614  }
615 
616  // iterate over other ranks and write their chunks
617  vector<T> wbuff;
618  for(int pid=1; pid < size; pid++)
619  {
620  long int rsize;
621  MPI_Status stat;
622  MPI_Recv(&rsize, 1, MPI_LONG, pid, SF_MPITAG, comm, &stat);
623  wbuff.resize(rsize);
624  MPI_Recv(wbuff.data(), rsize*sizeof(T), MPI_BYTE, pid, SF_MPITAG, comm, &stat);
625 
626  for (size_t i=0; i<wbuff.size() / dpn; i++ ) {
627  for(short j=0; j<dpn-1; j++)
628  fprintf(fd, "%g ", double(wbuff[i*dpn + j]) );
629 
630  fprintf(fd, "%g\n", double(wbuff[i*dpn + (dpn-1)]) );
631  }
632  }
633  } else {
634  long int lsize = vec.size();
635  MPI_Send(&lsize, 1, MPI_LONG, 0, SF_MPITAG, comm);
636  MPI_Send(vec.data(), lsize*sizeof(T), MPI_BYTE, 0, SF_MPITAG, comm);
637  }
638 }
639 
640 
641 template<class T, class S>
642 void write_data_ascii(const MPI_Comm comm, const vector<T> & idx, const vector<S> & data,
643  std::string file, short dpn = 1)
644 {
645  assert(idx.size() == data.size());
646 
647  int rank;
648  MPI_Comm_rank(comm, &rank);
649 
650  vector<T> srt_idx;
651  vector<S> srt_data;
652  sort_parallel(comm, idx, data, srt_idx, srt_data);
653 
654  FILE* fd = NULL;
655  if(rank == 0) {
656  fd = fopen(file.c_str(), "w");
657  if(fd == NULL) {
658  fprintf(stderr, "%s error: Cannot open file %s for writing! Aborting!\n", __func__, file.c_str());
659  exit(1);
660  }
661  }
662 
663  print_vector(comm, srt_data, dpn, fd);
664 
665  if(fd) fclose(fd);
666 }
667 
668 }
669 #endif
Basic containers.
#define SF_MPITAG
the MPI tag when communicating
Definition: SF_globals.h:29
Functions related to mesh IO.
Functions related to EMI mesh IO.
The vector class and related algorithms.
The class holds the communication graph for a MPI_Exchange() call.
Definition: SF_container.h:638
vector< T > rcnt
Number of elements received from each rank.
Definition: SF_container.h:642
void configure(const vector< V > &dest, MPI_Comm comm)
Set up the communication graph.
Definition: SF_container.h:679
A vector storing arbitrary data.
Definition: SF_vector.h:43
size_t size() const
The current size of the vector.
Definition: SF_vector.h:104
void resize(size_t n)
Resize a vector.
Definition: SF_vector.h:209
void assign(InputIterator s, InputIterator e)
Assign a memory range.
Definition: SF_vector.h:161
T * data()
Pointer to the vector's start.
Definition: SF_vector.h:91
Definition: dense_mat.hpp:34
void write_data_ascii(const MPI_Comm comm, const vector< T > &idx, const vector< S > &data, std::string file, short dpn=1)
void dsp_from_cnt(const vector< T > &cnt, vector< T > &dsp)
Compute displacements from counts.
Definition: SF_vector.h:310
size_t file_size(FILE *fd)
return file size from a file descriptor
Definition: SF_io_base.h:72
void print_vector(MPI_Comm comm, const vector< T > &vec, const short dpn, FILE *fd)
void interval(vector< T > &vec, size_t start, size_t end)
Create an integer interval between start and end.
Definition: SF_vector.h:350
void sort_parallel(MPI_Comm comm, const vector< T > &idx, vector< T > &out_idx)
Sort index values parallel ascending across the ranks.
void binary_sort_copy(vector< T > &_V, vector< S > &_W)
Definition: SF_sort.h:296
T sum(const vector< T > &vec)
Compute sum of a vector's entries.
Definition: SF_vector.h:340
void divide(const size_t gsize, const size_t num_parts, vector< T > &loc_sizes)
divide gsize into num_parts local parts with even distribution of the remainder
Definition: SF_vector.h:358
size_t root_write_ordered(FILE *fd, const vector< T > &idx, const vector< V > &vec, MPI_Comm comm)
Write index value pairs to disk in ordered permutation.
T global_min(const vector< T > &vec, MPI_Comm comm)
Compute the global minimum of a distributed vector.
Definition: SF_network.h:126
size_t root_count_ascii_lines(std::string file, MPI_Comm comm)
count the lines in a ascii file
void MPI_Exchange(commgraph< T > &grph, vector< S > &send, vector< S > &recv, MPI_Comm comm)
Exchange data in parallel over MPI.
Definition: SF_network.h:47
size_t root_read(FILE *fd, vector< V > &vec, MPI_Comm comm)
Read binary data into a vector.
void binary_sort(vector< T > &_V)
Definition: SF_sort.h:284
T global_max(const vector< T > &vec, MPI_Comm comm)
Compute the global maximum of a distributed vector.
Definition: SF_network.h:156
size_t root_read_ascii(FILE *fd, vector< V > &vec, MPI_Comm comm, bool int_data)
Read binary data into a vector.
size_t root_write(FILE *fd, const vector< V > &vec, MPI_Comm comm)
Write vector data binary to disk.