openCARP
Doxygen code documentation for the open cardiac electrophysiology simulator openCARP
Rosenbrock.cc
Go to the documentation of this file.
1 // ----------------------------------------------------------------------------
2 // openCARP is an open cardiac electrophysiology simulator.
3 //
4 // Copyright (C) 2020 openCARP project
5 //
6 // This program is licensed under the openCARP Academic Public License (APL)
7 // v1.0: You can use and redistribute it and/or modify it in non-commercial
8 // academic environments under the terms of APL as published by the openCARP
9 // project v1.0, or (at your option) any later version. Commercial use requires
10 // a commercial license (info@opencarp.org).
11 //
12 // This program is distributed without any warranty; see the openCARP APL for
13 // more details.
14 //
15 // You should have received a copy of the openCARP APL along with this program
16 // and can find it online: http://www.opencarp.org/license
17 // ----------------------------------------------------------------------------
18 
19 #define NO_INCLUDE_ROSENBROCK_CU
20 
21 /* ----------------------------------------------------------------------------
22 Rosenbrock-Wolfbrandt Integration Coefficients, culled from:
23 http://ntrs.nasa.gov/archive/nasa/casi.ntrs.nasa.gov/19960015529_1996034599.pdf
24 ---------------------------------------------------------------------------- */
25 #include <string.h>
26 
27 #include "Rosenbrock.h"
28 
29 #if defined HAS_ROCM_MODEL
30 #include <hip/hip_runtime.h>
31 #endif
32 
33 namespace limpet {
34 
35 #ifdef __cplusplus
36 extern "C"
37 {
38 #endif // ifdef __cplusplus
39 
40 #if defined MLIR_CODEGEN && (defined __CUDA__ || defined __HIP__)
41 __device__
42 #endif
43 void rbCalcKi ( float **K, float **, float *X, int* ludI,
44  void (*calcDX)(float*, float*, void*),
45  void *params, float h, int N, int i );
46 
47 #if defined MLIR_CODEGEN && (defined __CUDA__ || defined __HIP__)
48 __device__
49 #endif
50 void rbSolver ( float **, float *, float *, int N );
51 
52 #if defined MLIR_CODEGEN && (defined __CUDA__ || defined __HIP__)
53 __device__
54 #endif
55 void fludcmp0 ( float **, int n, int *indx, float *);
56 
57 #if defined MLIR_CODEGEN && (defined __CUDA__ || defined __HIP__)
58 __device__
59 #endif
60 
61 #if defined MLIR_CODEGEN && (defined __CUDA__ || defined __HIP__)
62 __device__
63 #endif
64 void flubksb0 ( float **, int n, int *indx, float *);
65 
66 #if defined MLIR_CODEGEN && (defined __CUDA__ || defined __HIP__)
67 __device__
68 #endif
69 void fludcmp ( float **, int, int *, float *);
70 
71 #if defined MLIR_CODEGEN && (defined __CUDA__ || defined __HIP__)
72 __device__
73 #endif
74 void flubksb ( float **, int, int *, float * );
75 
76 
98 #if defined __CUDA__ || defined __HIP__
99 __device__
100 #endif
101 void rbStepX ( float *X, void (*calcDX)(float*, float*, void*),
102  void (*calcJ)(float**, float*, void*, int ),
103  void *params, float h, int N )
104 {
105  float kbuf[RS_MAX_N*RS_ORDER];
106  float *k[RS_ORDER];
107  float abuf[RS_MAX_N*RS_MAX_N];
108  float *a[RS_MAX_N];
109 
110  //const float A_rs[RS_ORDER] = { 0., 0.5, 0.5, 1. };
111  //const float B_rs[RS_ORDER] = { 0.25, 0.25, 0.25, 0.25 };
112  const float Chi[RS_ORDER] = { 14/3., 20/3., 4/3., 2/3. };
113 
114  if (N > RS_MAX_N) {
115 #if !defined __CUDA__ && !defined __HIP__
116  printf( "Increase limit for number of equations: currently %d\n", RS_MAX_N);
117 #elif defined __CUDA__
118  printf( "Error: Increase limit for number of equations (RS_MAX_N macro)\n");
119 #endif
120  return;
121  }
122 
123  memset(kbuf, 0, sizeof(kbuf));
124  memset(abuf, 0, sizeof(abuf));
125 
126  for( int i = 0; i < RS_ORDER || i < N; i++ ) {
127  if( i < RS_ORDER ) k[i] = &(kbuf[i*N]);
128  if( i < N ) a[i] = &(abuf[i*N]);
129  }
130  calcJ(a, X, params, N);
131 
132  // a = 4i - hj;
133  for( int i = 0; i < N; i++ )
134  for( int j = 0; j < N; j++ )
135  a[i][j] = ((i == j) ? 4 : 0) - h * a[i][j];
136 
137  int ludI[RS_MAX_N];
138  float ludD;
139  fludcmp0( a, N, ludI, &ludD );
140 
141  for( int ki = 0; ki < RS_ORDER; ki++)
142  rbCalcKi ( k, a, X, ludI, calcDX, params, h, N, ki );
143 
144  for( int Ki = 0; Ki < RS_ORDER; Ki++)
145  for( int row = 0; row < N; row++ )
146  X[row] += h * Chi[Ki] * k[Ki][row];
147 
148 }
149 
150 
167 #if defined MLIR_CODEGEN && (defined __CUDA__ || defined __HIP__)
168 __device__
169 #endif
170 void rbCalcKi ( float **K, float **A, float *X, int* ludI,
171  void (*calcDX)(float*, float*, void*),
172  void *params, float h, int N, int i )
173 {
174 
175  float xx[RS_MAX_N];
176  const float Alpha[RS_ORDER][RS_ORDER] = {
177  { 0, 0, 0, 0 },
178  { 2, 0, 0, 0 },
179  { 2, 2, 0, 0 },
180  { 6, 10, 4, 0 } };
181  const float Beta[RS_ORDER][RS_ORDER] = {
182  { 0, 0, 0, 0 },
183  { -4, 0, 0, 0 },
184  { -6, -10, 0, 0 },
185  { -4, -12, 0, 0 } };
186 
187  memcpy( xx, X, N * sizeof(float));
188 
189  for( int j = 0; j < i; j++ ) {
190  for( int row = 0; row < N; row++ ) {
191  xx[row] += h * Alpha[i][j] * K[j][row];
192  K[i][row] += Beta[i][j] * K[j][row];
193  }
194  }
195 
196  float DXi[RS_MAX_N];
197  calcDX( DXi, xx, params );
198 
199  for( int row = 0; row < N; row++ ) K[i][row] += DXi[row];
200 
201  // rbSolver( &A[0][0], &K[RM(N,i,0)], bb, N );
202  flubksb0( A, N, ludI, K[i] );
203  return;
204 }
205 
219 #if defined MLIR_CODEGEN && (defined __CUDA__ || defined __HIP__)
220 __device__
221 #endif
222 void rbSolver( float **A, float *x, float *b, int N ) {
223  // Purpose: solve the system Ax = b by Gaussian elimination
224 
225  // Step #1: convert the system to upper-diagonal form
226  for( int i = 1; i < N; i++ ) {
227  for( int j = 0; j < i; j++ ) {
228  // Set A[i][j] to zero.
229  float K = -A[i][j] / A[j][j];
230  for( int k = j; k < N; k++ ) A[i][k] += K * A[j][k];
231  b[i] += K * b[j];
232  }
233  }
234 
235  // Step #2: solve for x by back-substitution
236  for(int i = N - 1; i >= 0; i--) {
237  x[i] = b[i];
238  for(int j = i + 1; j < N; j++) x[i] -= A[i][j] * x[j];
239  x[i] /= A[i][i];
240  }
241 }
242 
243 
255 #if defined MLIR_CODEGEN && (defined __CUDA__ || defined __HIP__)
256 __device__
257 #endif
258 void fludcmp0( float **a, int n,int *indx, float *d)/* matrices start at 0 */
259 {
260  float *ptr[RS_MAX_N+1];
261  int i;
262 
263  for( i=0; i<n; i++ )
264  ptr[i+1] = a[i]-1;
265  fludcmp( ptr, n, indx-1, d );
266 
267  for(i=0; i<n; i++)
268  a[i] = ptr[i+1] + 1;
269 }
270 
282 #if defined MLIR_CODEGEN && (defined __CUDA__ || defined __HIP__)
283 __device__
284 #endif
285 void flubksb0( float **a, int n, int *indx, float *b)
286 {
287  float *ptr[RS_MAX_N+1];
288  int i;
289 
290  for( i=0; i<n; i++ )
291  ptr[i+1] = a[i]-1;
292 
293  flubksb( ptr, n, indx-1, b-1 );
294 }
295 
296 #define TINY 1.0e-20;
297 
298 #if defined MLIR_CODEGEN && (defined __CUDA__ || defined __HIP__)
299 __device__
300 #endif
301 void fludcmp( float **a, int n,int *indx, float *d)
302 {
303  int i,j;
304  int imax = 0,k;
305  float big,dum,temp;
306  float sum;
307  float *dumpoint, vv[RS_MAX_N+1];
308 
309 
310  *d=1.0;
311  i = 1;
312  do {
313  big=0.0;
314  for (j=1;j<=n;j++)
315  if ((temp=fabs(a[i][j])) > big) big=temp;
316  if (big == 0.0) {
317  #ifndef __HIP__
318  printf("Singular matrix in routine LUDCMP");
319  #endif
320  //exit(1);
321  //
322  }
323  vv[i]=1.0/big;
324  } while ( ++i <= n );
325 
326  j = 1;
327  do {
328  if( j > 1 ) {
329  i = 1;
330  do {
331  sum=a[i][j];
332  if ( i != 1 ) {
333  k = 1;
334  do {
335  sum -= a[i][k]*a[k][j];
336  } while ( ++k < i );
337  }
338  a[i][j]=sum;
339  } while ( ++i < j );
340  }
341  big=0.0;
342  i = j;
343  do {
344  sum=a[i][j];
345  if ( j > 1 ) {
346  k =1 ;
347  do {
348  sum -= a[i][k]*a[k][j];
349  } while( ++k < j );
350  }
351  a[i][j]=sum;
352  if ( (dum=vv[i]*fabs(sum)) >= big) {
353  big=dum;
354  imax=i;
355  }
356  } while( ++i <= n );
357  if (j != imax) { /* exchange rows */
358  dumpoint=a[imax];
359  a[imax]=a[j];
360  a[j]=dumpoint;
361  *d = -(*d);
362  vv[imax]=vv[j];
363  }
364  indx[j]=imax;
365  if (a[j][j] == 0.0) a[j][j]=TINY;
366  if (j != n) {
367  dum=1.0/(a[j][j]);
368  for (i=j+1;i<=n;i++) a[i][j] *= dum;
369  }
370  } while( ++j <= n );
371 }
372 
373 #undef TINY
374 
375 #if defined MLIR_CODEGEN && (defined __CUDA__ || defined __HIP__)
376 __device__
377 #endif
378 void flubksb( float **a, int n, int *indx, float *b )
379 {
380  int i,ii=0,ip;
381  int j;
382  float sum;
383 
384  for (i=1;i<=n;i++) {
385  ip=indx[i];
386  sum=b[ip];
387  b[ip]=b[i];
388  if (ii)
389  for (j=ii;j<=i-1;j++)
390  sum -= a[i][j]*b[j];
391  else if (sum)
392  ii=i;
393  b[i]=sum;
394  }
395  for (i=n;i>=1;i--) {
396  sum=b[i];
397  for (j=i+1;j<=n;j++)
398  sum -= a[i][j]*b[j];
399  b[i]=sum/a[i][i];
400  }
401 }
402 
403 #ifdef __cplusplus
404 }
405 #endif // ifdef __cplusplus
406 
407 #if defined MLIR_CODEGEN && !defined __CUDA__ && !defined __HIP__
408 
425 template<size_t vector_size>
426 void rbCalcKi(float **K, float **A, float *X, int* ludI,
427  void (*calcDX)(float*, float*, void*),
428  void *params, float h, int N, int i)
429 {
430 
431  float xx[RS_MAX_N * vector_size];
432  const float Alpha[RS_ORDER][RS_ORDER] = {
433  { 0, 0, 0, 0 },
434  { 2, 0, 0, 0 },
435  { 2, 2, 0, 0 },
436  { 6, 10, 4, 0 } };
437  const float Beta[RS_ORDER][RS_ORDER] = {
438  { 0, 0, 0, 0 },
439  { -4, 0, 0, 0 },
440  { -6, -10, 0, 0 },
441  { -4, -12, 0, 0 } };
442 
443  memcpy(xx, X, N*vector_size*sizeof(float));
444 
445  for (int v = 0; v < vector_size; ++v) {
446  for( int j = 0; j < i; j++ ) {
447  for( int row = 0; row < N; row++ ) {
448  xx[v*N + row] += h * Alpha[i][j] * K[v*RS_ORDER + j][row];
449  K[v*RS_ORDER + i][row] += Beta[i][j] * K[v*RS_ORDER + j][row];
450  }
451  }
452  }
453 
454  float DXi[RS_MAX_N * vector_size];
455  calcDX( DXi, xx, params );
456  for (int v = 0; v < vector_size; ++v)
457  for( int row = 0; row < N; row++ )
458  K[v*RS_ORDER + i][row] += DXi[v*N + row];
459 
460  for (int v = 0; v < vector_size; ++v)
461  flubksb0(&A[v*N], N, ludI+v*N, K[v*RS_ORDER + i]);
462 }
463 
485 template<size_t vector_size>
486 void rbStepX ( float *X, void (*calcDX)(float*, float*, void*),
487  void (*calcJ)(float*, float*, void*, int ),
488  void *params, float h, int N )
489 {
490  float kbuf[RS_MAX_N*RS_ORDER*vector_size];
491  float *k[RS_ORDER*vector_size];
492  float abuf[RS_MAX_N*RS_MAX_N*vector_size];
493  float *a[RS_MAX_N*vector_size];
494 
495  const float Chi[RS_ORDER] = { 14/3., 20/3., 4/3., 2/3. };
496 
497  if(N>RS_MAX_N) {
498  printf( "Increase limit for number of equations: currently %d\n", RS_MAX_N );
499  return;
500  }
501 
502  memset(kbuf, 0, sizeof(kbuf));
503  memset(abuf, 0, sizeof(abuf));
504 
505  int K_ORDER = N > RS_ORDER ? N : RS_ORDER;
506  for (int i = 0; i < vector_size; ++i) {
507  for( int j = 0; j < RS_ORDER || j < N; j++ ) {
508  if( j < RS_ORDER ) k[i*RS_ORDER + j] = &(kbuf[i*K_ORDER*K_ORDER + j*K_ORDER]);
509  if( j < N ) a[i*N + j] = &(abuf[i*N*N + j*N]);
510  }
511  }
512  calcJ(abuf, X, params, N);
513 
514  for (int i = 0; i < vector_size; ++i)
515  for( int j = 0; j < N; j++ )
516  for( int k = 0; k < N; k++ )
517  a[i*N + j][k] = ((j == k) ? 4 : 0) - h * a[i*N + j][k];
518 
519  int ludI[RS_MAX_N * vector_size];
520  for (int i = 0; i < vector_size; ++i) {
521  float ludD;
522  fludcmp0( &a[i*N], N, ludI + i * N, &ludD );
523  }
524 
525  for( int ki = 0; ki < RS_ORDER; ki++)
526  rbCalcKi<vector_size>( k, a, X, ludI, calcDX, params, h, N, ki);
527 
528  for (int i = 0; i < vector_size; ++i)
529  for( int Ki = 0; Ki < RS_ORDER; Ki++)
530  for( int row = 0; row < N; row++ )
531  X[i*N + row] += h * Chi[Ki] * k[i * RS_ORDER + Ki][row];
532 }
533 
534 #ifdef __cplusplus
535 extern "C"
536 {
537 #endif // ifdef __cplusplus
538 
539 void rbStepX_2 ( float *X, void (*calcDX)(float*, float*, void*),
540  void (*calcJ)(float*, float*, void*, int ),
541  void *params, float h, int N )
542 {
543  rbStepX<2>(X, calcDX, calcJ, params, h, N);
544 }
545 
546 void rbStepX_4 ( float *X, void (*calcDX)(float*, float*, void*),
547  void (*calcJ)(float*, float*, void*, int ),
548  void *params, float h, int N )
549 {
550  rbStepX<4>(X, calcDX, calcJ, params, h, N);
551 }
552 
553 void rbStepX_8 ( float *X, void (*calcDX)(float*, float*, void*),
554  void (*calcJ)(float*, float*, void*, int ),
555  void *params, float h, int N )
556 {
557  rbStepX<8>(X, calcDX, calcJ, params, h, N);
558 }
559 
560 void rbStepX_16 ( float *X, void (*calcDX)(float*, float*, void*),
561  void (*calcJ)(float*, float*, void*, int ),
562  void *params, float h, int N )
563 {
564  rbStepX<16>(X, calcDX, calcJ, params, h, N);
565 }
566 
567 void rbStepX_32 ( float *X, void (*calcDX)(float*, float*, void*),
568  void (*calcJ)(float*, float*, void*, int ),
569  void *params, float h, int N )
570 {
571  rbStepX<32>(X, calcDX, calcJ, params, h, N);
572 }
573 
574 #ifdef __cplusplus
575 }
576 #endif // ifdef __cplusplus
577 
578 #endif
579 
580 } // namespace limpet
void flubksb(float **, int, int *, float *)
Definition: Rosenbrock.cc:378
void fludcmp(float **, int, int *, float *)
Definition: Rosenbrock.cc:301
void flubksb0(float **, int n, int *indx, float *)
Definition: Rosenbrock.cc:285
void rbStepX(float *X, void(*calcDX)(float *, float *, void *), void(*calcJ)(float **, float *, void *, int), void *params, float h, int N)
Definition: Rosenbrock.cc:101
T sum(const vector< T > &vec)
Compute sum of a vector&#39;s entries.
Definition: SF_vector.h:340
#define RS_MAX_N
Definition: Rosenbrock.h:30
void rbSolver(float **, float *, float *, int N)
Definition: Rosenbrock.cc:222
void rbCalcKi(float **K, float **, float *X, int *ludI, void(*calcDX)(float *, float *, void *), void *params, float h, int N, int i)
Definition: Rosenbrock.cc:170
#define RS_ORDER
Definition: Rosenbrock.h:29
#define TINY
Definition: Rosenbrock.cc:296
void fludcmp0(float **, int n, int *indx, float *)
Definition: Rosenbrock.cc:258