ergo
template_lapack_ggbal.h
Go to the documentation of this file.
1/* Ergo, version 3.8.2, a program for linear scaling electronic structure
2 * calculations.
3 * Copyright (C) 2023 Elias Rudberg, Emanuel H. Rubensson, Pawel Salek,
4 * and Anastasia Kruchinina.
5 *
6 * This program is free software: you can redistribute it and/or modify
7 * it under the terms of the GNU General Public License as published by
8 * the Free Software Foundation, either version 3 of the License, or
9 * (at your option) any later version.
10 *
11 * This program is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 * GNU General Public License for more details.
15 *
16 * You should have received a copy of the GNU General Public License
17 * along with this program. If not, see <http://www.gnu.org/licenses/>.
18 *
19 * Primary academic reference:
20 * Ergo: An open-source program for linear-scaling electronic structure
21 * calculations,
22 * Elias Rudberg, Emanuel H. Rubensson, Pawel Salek, and Anastasia
23 * Kruchinina,
24 * SoftwareX 7, 107 (2018),
25 * <http://dx.doi.org/10.1016/j.softx.2018.03.005>
26 *
27 * For further information about Ergo, see <http://www.ergoscf.org>.
28 */
29
30 /* This file belongs to the template_lapack part of the Ergo source
31 * code. The source files in the template_lapack directory are modified
32 * versions of files originally distributed as CLAPACK, see the
33 * Copyright/license notice in the file template_lapack/COPYING.
34 */
35
36
37#ifndef TEMPLATE_LAPACK_GGBAL_HEADER
38#define TEMPLATE_LAPACK_GGBAL_HEADER
39
40
41template<class Treal>
42int template_lapack_ggbal(const char *job, const integer *n, Treal *a, const integer *
43 lda, Treal *b, const integer *ldb, integer *ilo, integer *ihi,
44 Treal *lscale, Treal *rscale, Treal *work, integer *
45 info)
46{
47/* -- LAPACK routine (version 3.0) --
48 Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
49 Courant Institute, Argonne National Lab, and Rice University
50 September 30, 1994
51
52
53 Purpose
54 =======
55
56 DGGBAL balances a pair of general real matrices (A,B). This
57 involves, first, permuting A and B by similarity transformations to
58 isolate eigenvalues in the first 1 to ILO$-$1 and last IHI+1 to N
59 elements on the diagonal; and second, applying a diagonal similarity
60 transformation to rows and columns ILO to IHI to make the rows
61 and columns as close in norm as possible. Both steps are optional.
62
63 Balancing may reduce the 1-norm of the matrices, and improve the
64 accuracy of the computed eigenvalues and/or eigenvectors in the
65 generalized eigenvalue problem A*x = lambda*B*x.
66
67 Arguments
68 =========
69
70 JOB (input) CHARACTER*1
71 Specifies the operations to be performed on A and B:
72 = 'N': none: simply set ILO = 1, IHI = N, LSCALE(I) = 1.0
73 and RSCALE(I) = 1.0 for i = 1,...,N.
74 = 'P': permute only;
75 = 'S': scale only;
76 = 'B': both permute and scale.
77
78 N (input) INTEGER
79 The order of the matrices A and B. N >= 0.
80
81 A (input/output) DOUBLE PRECISION array, dimension (LDA,N)
82 On entry, the input matrix A.
83 On exit, A is overwritten by the balanced matrix.
84 If JOB = 'N', A is not referenced.
85
86 LDA (input) INTEGER
87 The leading dimension of the array A. LDA >= max(1,N).
88
89 B (input/output) DOUBLE PRECISION array, dimension (LDB,N)
90 On entry, the input matrix B.
91 On exit, B is overwritten by the balanced matrix.
92 If JOB = 'N', B is not referenced.
93
94 LDB (input) INTEGER
95 The leading dimension of the array B. LDB >= max(1,N).
96
97 ILO (output) INTEGER
98 IHI (output) INTEGER
99 ILO and IHI are set to integers such that on exit
100 A(i,j) = 0 and B(i,j) = 0 if i > j and
101 j = 1,...,ILO-1 or i = IHI+1,...,N.
102 If JOB = 'N' or 'S', ILO = 1 and IHI = N.
103
104 LSCALE (output) DOUBLE PRECISION array, dimension (N)
105 Details of the permutations and scaling factors applied
106 to the left side of A and B. If P(j) is the index of the
107 row interchanged with row j, and D(j)
108 is the scaling factor applied to row j, then
109 LSCALE(j) = P(j) for J = 1,...,ILO-1
110 = D(j) for J = ILO,...,IHI
111 = P(j) for J = IHI+1,...,N.
112 The order in which the interchanges are made is N to IHI+1,
113 then 1 to ILO-1.
114
115 RSCALE (output) DOUBLE PRECISION array, dimension (N)
116 Details of the permutations and scaling factors applied
117 to the right side of A and B. If P(j) is the index of the
118 column interchanged with column j, and D(j)
119 is the scaling factor applied to column j, then
120 LSCALE(j) = P(j) for J = 1,...,ILO-1
121 = D(j) for J = ILO,...,IHI
122 = P(j) for J = IHI+1,...,N.
123 The order in which the interchanges are made is N to IHI+1,
124 then 1 to ILO-1.
125
126 WORK (workspace) DOUBLE PRECISION array, dimension (6*N)
127
128 INFO (output) INTEGER
129 = 0: successful exit
130 < 0: if INFO = -i, the i-th argument had an illegal value.
131
132 Further Details
133 ===============
134
135 See R.C. WARD, Balancing the generalized eigenvalue problem,
136 SIAM J. Sci. Stat. Comp. 2 (1981), 141-152.
137
138 =====================================================================
139
140
141 Test the input parameters
142
143 Parameter adjustments */
144 /* Table of constant values */
145 integer c__1 = 1;
146 Treal c_b34 = 10.;
147 Treal c_b70 = .5;
148
149 /* System generated locals */
150 integer a_dim1, a_offset, b_dim1, b_offset, i__1, i__2, i__3;
151 Treal d__1, d__2, d__3;
152 /* Local variables */
153 integer lcab;
154 Treal beta, coef;
155 integer irab, lrab;
156 Treal basl, cmax;
157 Treal coef2, coef5;
158 integer i__, j, k, l, m;
159 Treal gamma, t, alpha;
160 Treal sfmin, sfmax;
161 integer iflow;
162 integer kount, jc;
163 Treal ta, tb, tc;
164 integer ir, it;
165 Treal ew;
166 integer nr;
167 Treal pgamma;
168 integer lsfmin, lsfmax, ip1, jp1, lm1;
169 Treal cab, rab, ewc, cor, sum;
170 integer nrp2, icab;
171#define a_ref(a_1,a_2) a[(a_2)*a_dim1 + a_1]
172#define b_ref(a_1,a_2) b[(a_2)*b_dim1 + a_1]
173
174
175 a_dim1 = *lda;
176 a_offset = 1 + a_dim1 * 1;
177 a -= a_offset;
178 b_dim1 = *ldb;
179 b_offset = 1 + b_dim1 * 1;
180 b -= b_offset;
181 --lscale;
182 --rscale;
183 --work;
184
185 /* Initialization added by Elias to get rid of compiler warnings. */
186 pgamma = 0;
187 /* Function Body */
188 *info = 0;
189 if (! template_blas_lsame(job, "N") && ! template_blas_lsame(job, "P") && ! template_blas_lsame(job, "S")
190 && ! template_blas_lsame(job, "B")) {
191 *info = -1;
192 } else if (*n < 0) {
193 *info = -2;
194 } else if (*lda < maxMACRO(1,*n)) {
195 *info = -4;
196 } else if (*ldb < maxMACRO(1,*n)) {
197 *info = -5;
198 }
199 if (*info != 0) {
200 i__1 = -(*info);
201 template_blas_erbla("GGBAL ", &i__1);
202 return 0;
203 }
204
205 k = 1;
206 l = *n;
207
208/* Quick return if possible */
209
210 if (*n == 0) {
211 return 0;
212 }
213
214 if (template_blas_lsame(job, "N")) {
215 *ilo = 1;
216 *ihi = *n;
217 i__1 = *n;
218 for (i__ = 1; i__ <= i__1; ++i__) {
219 lscale[i__] = 1.;
220 rscale[i__] = 1.;
221/* L10: */
222 }
223 return 0;
224 }
225
226 if (k == l) {
227 *ilo = 1;
228 *ihi = 1;
229 lscale[1] = 1.;
230 rscale[1] = 1.;
231 return 0;
232 }
233
234 if (template_blas_lsame(job, "S")) {
235 goto L190;
236 }
237
238 goto L30;
239
240/* Permute the matrices A and B to isolate the eigenvalues.
241
242 Find row with one nonzero in columns 1 through L */
243
244L20:
245 l = lm1;
246 if (l != 1) {
247 goto L30;
248 }
249
250 rscale[1] = 1.;
251 lscale[1] = 1.;
252 goto L190;
253
254L30:
255 lm1 = l - 1;
256 for (i__ = l; i__ >= 1; --i__) {
257 i__1 = lm1;
258 for (j = 1; j <= i__1; ++j) {
259 jp1 = j + 1;
260 if (a_ref(i__, j) != 0. || b_ref(i__, j) != 0.) {
261 goto L50;
262 }
263/* L40: */
264 }
265 j = l;
266 goto L70;
267
268L50:
269 i__1 = l;
270 for (j = jp1; j <= i__1; ++j) {
271 if (a_ref(i__, j) != 0. || b_ref(i__, j) != 0.) {
272 goto L80;
273 }
274/* L60: */
275 }
276 j = jp1 - 1;
277
278L70:
279 m = l;
280 iflow = 1;
281 goto L160;
282L80:
283 ;
284 }
285 goto L100;
286
287/* Find column with one nonzero in rows K through N */
288
289L90:
290 ++k;
291
292L100:
293 i__1 = l;
294 for (j = k; j <= i__1; ++j) {
295 i__2 = lm1;
296 for (i__ = k; i__ <= i__2; ++i__) {
297 ip1 = i__ + 1;
298 if (a_ref(i__, j) != 0. || b_ref(i__, j) != 0.) {
299 goto L120;
300 }
301/* L110: */
302 }
303 i__ = l;
304 goto L140;
305L120:
306 i__2 = l;
307 for (i__ = ip1; i__ <= i__2; ++i__) {
308 if (a_ref(i__, j) != 0. || b_ref(i__, j) != 0.) {
309 goto L150;
310 }
311/* L130: */
312 }
313 i__ = ip1 - 1;
314L140:
315 m = k;
316 iflow = 2;
317 goto L160;
318L150:
319 ;
320 }
321 goto L190;
322
323/* Permute rows M and I */
324
325L160:
326 lscale[m] = (Treal) i__;
327 if (i__ == m) {
328 goto L170;
329 }
330 i__1 = *n - k + 1;
331 template_blas_swap(&i__1, &a_ref(i__, k), lda, &a_ref(m, k), lda);
332 i__1 = *n - k + 1;
333 template_blas_swap(&i__1, &b_ref(i__, k), ldb, &b_ref(m, k), ldb);
334
335/* Permute columns M and J */
336
337L170:
338 rscale[m] = (Treal) j;
339 if (j == m) {
340 goto L180;
341 }
342 template_blas_swap(&l, &a_ref(1, j), &c__1, &a_ref(1, m), &c__1);
343 template_blas_swap(&l, &b_ref(1, j), &c__1, &b_ref(1, m), &c__1);
344
345L180:
346 switch (iflow) {
347 case 1: goto L20;
348 case 2: goto L90;
349 }
350
351L190:
352 *ilo = k;
353 *ihi = l;
354
355 if (*ilo == *ihi) {
356 return 0;
357 }
358
359 if (template_blas_lsame(job, "P")) {
360 return 0;
361 }
362
363/* Balance the submatrix in rows ILO to IHI. */
364
365 nr = *ihi - *ilo + 1;
366 i__1 = *ihi;
367 for (i__ = *ilo; i__ <= i__1; ++i__) {
368 rscale[i__] = 0.;
369 lscale[i__] = 0.;
370
371 work[i__] = 0.;
372 work[i__ + *n] = 0.;
373 work[i__ + (*n << 1)] = 0.;
374 work[i__ + *n * 3] = 0.;
375 work[i__ + (*n << 2)] = 0.;
376 work[i__ + *n * 5] = 0.;
377/* L200: */
378 }
379
380/* Compute right side vector in resulting linear equations */
381
382 basl = template_blas_lg10(&c_b34);
383 i__1 = *ihi;
384 for (i__ = *ilo; i__ <= i__1; ++i__) {
385 i__2 = *ihi;
386 for (j = *ilo; j <= i__2; ++j) {
387 tb = b_ref(i__, j);
388 ta = a_ref(i__, j);
389 if (ta == 0.) {
390 goto L210;
391 }
392 d__1 = absMACRO(ta);
393 ta = template_blas_lg10(&d__1) / basl;
394L210:
395 if (tb == 0.) {
396 goto L220;
397 }
398 d__1 = absMACRO(tb);
399 tb = template_blas_lg10(&d__1) / basl;
400L220:
401 work[i__ + (*n << 2)] = work[i__ + (*n << 2)] - ta - tb;
402 work[j + *n * 5] = work[j + *n * 5] - ta - tb;
403/* L230: */
404 }
405/* L240: */
406 }
407
408 coef = 1. / (Treal) (nr << 1);
409 coef2 = coef * coef;
410 coef5 = coef2 * .5;
411 nrp2 = nr + 2;
412 beta = 0.;
413 it = 1;
414
415/* Start generalized conjugate gradient iteration */
416
417L250:
418
419 gamma = template_blas_dot(&nr, &work[*ilo + (*n << 2)], &c__1, &work[*ilo + (*n << 2)]
420 , &c__1) + template_blas_dot(&nr, &work[*ilo + *n * 5], &c__1, &work[*ilo + *
421 n * 5], &c__1);
422
423 ew = 0.;
424 ewc = 0.;
425 i__1 = *ihi;
426 for (i__ = *ilo; i__ <= i__1; ++i__) {
427 ew += work[i__ + (*n << 2)];
428 ewc += work[i__ + *n * 5];
429/* L260: */
430 }
431
432/* Computing 2nd power */
433 d__1 = ew;
434/* Computing 2nd power */
435 d__2 = ewc;
436/* Computing 2nd power */
437 d__3 = ew - ewc;
438 gamma = coef * gamma - coef2 * (d__1 * d__1 + d__2 * d__2) - coef5 * (
439 d__3 * d__3);
440 if (gamma == 0.) {
441 goto L350;
442 }
443 if (it != 1) {
444 beta = gamma / pgamma;
445 }
446 t = coef5 * (ewc - ew * 3.);
447 tc = coef5 * (ew - ewc * 3.);
448
449 template_blas_scal(&nr, &beta, &work[*ilo], &c__1);
450 template_blas_scal(&nr, &beta, &work[*ilo + *n], &c__1);
451
452 template_blas_axpy(&nr, &coef, &work[*ilo + (*n << 2)], &c__1, &work[*ilo + *n], &
453 c__1);
454 template_blas_axpy(&nr, &coef, &work[*ilo + *n * 5], &c__1, &work[*ilo], &c__1);
455
456 i__1 = *ihi;
457 for (i__ = *ilo; i__ <= i__1; ++i__) {
458 work[i__] += tc;
459 work[i__ + *n] += t;
460/* L270: */
461 }
462
463/* Apply matrix to vector */
464
465 i__1 = *ihi;
466 for (i__ = *ilo; i__ <= i__1; ++i__) {
467 kount = 0;
468 sum = 0.;
469 i__2 = *ihi;
470 for (j = *ilo; j <= i__2; ++j) {
471 if (a_ref(i__, j) == 0.) {
472 goto L280;
473 }
474 ++kount;
475 sum += work[j];
476L280:
477 if (b_ref(i__, j) == 0.) {
478 goto L290;
479 }
480 ++kount;
481 sum += work[j];
482L290:
483 ;
484 }
485 work[i__ + (*n << 1)] = (Treal) kount * work[i__ + *n] + sum;
486/* L300: */
487 }
488
489 i__1 = *ihi;
490 for (j = *ilo; j <= i__1; ++j) {
491 kount = 0;
492 sum = 0.;
493 i__2 = *ihi;
494 for (i__ = *ilo; i__ <= i__2; ++i__) {
495 if (a_ref(i__, j) == 0.) {
496 goto L310;
497 }
498 ++kount;
499 sum += work[i__ + *n];
500L310:
501 if (b_ref(i__, j) == 0.) {
502 goto L320;
503 }
504 ++kount;
505 sum += work[i__ + *n];
506L320:
507 ;
508 }
509 work[j + *n * 3] = (Treal) kount * work[j] + sum;
510/* L330: */
511 }
512
513 sum = template_blas_dot(&nr, &work[*ilo + *n], &c__1, &work[*ilo + (*n << 1)], &c__1)
514 + template_blas_dot(&nr, &work[*ilo], &c__1, &work[*ilo + *n * 3], &c__1);
515 alpha = gamma / sum;
516
517/* Determine correction to current iteration */
518
519 cmax = 0.;
520 i__1 = *ihi;
521 for (i__ = *ilo; i__ <= i__1; ++i__) {
522 cor = alpha * work[i__ + *n];
523 if (absMACRO(cor) > cmax) {
524 cmax = absMACRO(cor);
525 }
526 lscale[i__] += cor;
527 cor = alpha * work[i__];
528 if (absMACRO(cor) > cmax) {
529 cmax = absMACRO(cor);
530 }
531 rscale[i__] += cor;
532/* L340: */
533 }
534 if (cmax < .5) {
535 goto L350;
536 }
537
538 d__1 = -alpha;
539 template_blas_axpy(&nr, &d__1, &work[*ilo + (*n << 1)], &c__1, &work[*ilo + (*n << 2)]
540 , &c__1);
541 d__1 = -alpha;
542 template_blas_axpy(&nr, &d__1, &work[*ilo + *n * 3], &c__1, &work[*ilo + *n * 5], &
543 c__1);
544
545 pgamma = gamma;
546 ++it;
547 if (it <= nrp2) {
548 goto L250;
549 }
550
551/* End generalized conjugate gradient iteration */
552
553L350:
554 sfmin = template_lapack_lamch("S", (Treal)0);
555 sfmax = 1. / sfmin;
556 lsfmin = (integer) (template_blas_lg10(&sfmin) / basl + 1.);
557 lsfmax = (integer) (template_blas_lg10(&sfmax) / basl);
558 i__1 = *ihi;
559 for (i__ = *ilo; i__ <= i__1; ++i__) {
560 i__2 = *n - *ilo + 1;
561 irab = template_blas_idamax(&i__2, &a_ref(i__, *ilo), lda);
562 rab = (d__1 = a_ref(i__, irab + *ilo - 1), absMACRO(d__1));
563 i__2 = *n - *ilo + 1;
564 irab = template_blas_idamax(&i__2, &b_ref(i__, *ilo), lda);
565/* Computing MAX */
566 d__2 = rab, d__3 = (d__1 = b_ref(i__, irab + *ilo - 1), absMACRO(d__1));
567 rab = maxMACRO(d__2,d__3);
568 d__1 = rab + sfmin;
569 lrab = (integer) (template_blas_lg10(&d__1) / basl + 1.);
570 ir = (integer) (lscale[i__] + template_lapack_d_sign(&c_b70, &lscale[i__]));
571/* Computing MIN */
572 i__2 = maxMACRO(ir,lsfmin), i__2 = minMACRO(i__2,lsfmax), i__3 = lsfmax - lrab;
573 ir = minMACRO(i__2,i__3);
574 lscale[i__] = template_lapack_pow_di(&c_b34, &ir);
575 icab = template_blas_idamax(ihi, &a_ref(1, i__), &c__1);
576 cab = (d__1 = a_ref(icab, i__), absMACRO(d__1));
577 icab = template_blas_idamax(ihi, &b_ref(1, i__), &c__1);
578/* Computing MAX */
579 d__2 = cab, d__3 = (d__1 = b_ref(icab, i__), absMACRO(d__1));
580 cab = maxMACRO(d__2,d__3);
581 d__1 = cab + sfmin;
582 lcab = (integer) (template_blas_lg10(&d__1) / basl + 1.);
583 jc = (integer) (rscale[i__] + template_lapack_d_sign(&c_b70, &rscale[i__]));
584/* Computing MIN */
585 i__2 = maxMACRO(jc,lsfmin), i__2 = minMACRO(i__2,lsfmax), i__3 = lsfmax - lcab;
586 jc = minMACRO(i__2,i__3);
587 rscale[i__] = template_lapack_pow_di(&c_b34, &jc);
588/* L360: */
589 }
590
591/* Row scaling of matrices A and B */
592
593 i__1 = *ihi;
594 for (i__ = *ilo; i__ <= i__1; ++i__) {
595 i__2 = *n - *ilo + 1;
596 template_blas_scal(&i__2, &lscale[i__], &a_ref(i__, *ilo), lda);
597 i__2 = *n - *ilo + 1;
598 template_blas_scal(&i__2, &lscale[i__], &b_ref(i__, *ilo), ldb);
599/* L370: */
600 }
601
602/* Column scaling of matrices A and B */
603
604 i__1 = *ihi;
605 for (j = *ilo; j <= i__1; ++j) {
606 template_blas_scal(ihi, &rscale[j], &a_ref(1, j), &c__1);
607 template_blas_scal(ihi, &rscale[j], &b_ref(1, j), &c__1);
608/* L380: */
609 }
610
611 return 0;
612
613/* End of DGGBAL */
614
615} /* dggbal_ */
616
617#undef b_ref
618#undef a_ref
619
620
621#endif
int template_blas_axpy(const integer *n, const Treal *da, const Treal *dx, const integer *incx, Treal *dy, const integer *incy)
Definition: template_blas_axpy.h:43
int template_blas_erbla(const char *srname, integer *info)
Definition: template_blas_common.cc:146
logical template_blas_lsame(const char *ca, const char *cb)
Definition: template_blas_common.cc:46
int integer
Definition: template_blas_common.h:40
#define absMACRO(x)
Definition: template_blas_common.h:47
#define minMACRO(a, b)
Definition: template_blas_common.h:46
#define maxMACRO(a, b)
Definition: template_blas_common.h:45
Treal template_blas_dot(const integer *n, const Treal *dx, const integer *incx, const Treal *dy, const integer *incy)
Definition: template_blas_dot.h:43
integer template_blas_idamax(const integer *n, const Treal *dx, const integer *incx)
Definition: template_blas_idamax.h:42
int template_blas_scal(const integer *n, const Treal *da, Treal *dx, const integer *incx)
Definition: template_blas_scal.h:43
int template_blas_swap(const integer *n, Treal *dx, const integer *incx, Treal *dy, const integer *incy)
Definition: template_blas_swap.h:42
int template_lapack_ggbal(const char *job, const integer *n, Treal *a, const integer *lda, Treal *b, const integer *ldb, integer *ilo, integer *ihi, Treal *lscale, Treal *rscale, Treal *work, integer *info)
Definition: template_lapack_ggbal.h:42
#define a_ref(a_1, a_2)
#define b_ref(a_1, a_2)
Treal template_blas_lg10(Treal *x)
Definition: template_lapack_lamch.h:60
Treal template_lapack_lamch(const char *cmach, Treal dummyReal)
Definition: template_lapack_lamch.h:202
double template_lapack_pow_di(Treal *ap, integer *bp)
Definition: template_lapack_lamch.h:168
Treal template_lapack_d_sign(const Treal *a, const Treal *b)
Definition: template_lapack_lamch.h:48