ergo
template_lapack_lag2.h
Go to the documentation of this file.
1/* Ergo, version 3.8.2, a program for linear scaling electronic structure
2 * calculations.
3 * Copyright (C) 2023 Elias Rudberg, Emanuel H. Rubensson, Pawel Salek,
4 * and Anastasia Kruchinina.
5 *
6 * This program is free software: you can redistribute it and/or modify
7 * it under the terms of the GNU General Public License as published by
8 * the Free Software Foundation, either version 3 of the License, or
9 * (at your option) any later version.
10 *
11 * This program is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 * GNU General Public License for more details.
15 *
16 * You should have received a copy of the GNU General Public License
17 * along with this program. If not, see <http://www.gnu.org/licenses/>.
18 *
19 * Primary academic reference:
20 * Ergo: An open-source program for linear-scaling electronic structure
21 * calculations,
22 * Elias Rudberg, Emanuel H. Rubensson, Pawel Salek, and Anastasia
23 * Kruchinina,
24 * SoftwareX 7, 107 (2018),
25 * <http://dx.doi.org/10.1016/j.softx.2018.03.005>
26 *
27 * For further information about Ergo, see <http://www.ergoscf.org>.
28 */
29
30 /* This file belongs to the template_lapack part of the Ergo source
31 * code. The source files in the template_lapack directory are modified
32 * versions of files originally distributed as CLAPACK, see the
33 * Copyright/license notice in the file template_lapack/COPYING.
34 */
35
36
37#ifndef TEMPLATE_LAPACK_LAG2_HEADER
38#define TEMPLATE_LAPACK_LAG2_HEADER
39
40
41template<class Treal>
42int template_lapack_lag2(const Treal *a, const integer *lda, const Treal *b,
43 const integer *ldb, const Treal *safmin, Treal *scale1, Treal *
44 scale2, Treal *wr1, Treal *wr2, Treal *wi)
45{
46/* -- LAPACK auxiliary routine (version 3.0) --
47 Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
48 Courant Institute, Argonne National Lab, and Rice University
49 March 31, 1993
50
51
52 Purpose
53 =======
54
55 DLAG2 computes the eigenvalues of a 2 x 2 generalized eigenvalue
56 problem A - w B, with scaling as necessary to avoid over-/underflow.
57
58 The scaling factor "s" results in a modified eigenvalue equation
59
60 s A - w B
61
62 where s is a non-negative scaling factor chosen so that w, w B,
63 and s A do not overflow and, if possible, do not underflow, either.
64
65 Arguments
66 =========
67
68 A (input) DOUBLE PRECISION array, dimension (LDA, 2)
69 On entry, the 2 x 2 matrix A. It is assumed that its 1-norm
70 is less than 1/SAFMIN. Entries less than
71 sqrt(SAFMIN)*norm(A) are subject to being treated as zero.
72
73 LDA (input) INTEGER
74 The leading dimension of the array A. LDA >= 2.
75
76 B (input) DOUBLE PRECISION array, dimension (LDB, 2)
77 On entry, the 2 x 2 upper triangular matrix B. It is
78 assumed that the one-norm of B is less than 1/SAFMIN. The
79 diagonals should be at least sqrt(SAFMIN) times the largest
80 element of B (in absolute value); if a diagonal is smaller
81 than that, then +/- sqrt(SAFMIN) will be used instead of
82 that diagonal.
83
84 LDB (input) INTEGER
85 The leading dimension of the array B. LDB >= 2.
86
87 SAFMIN (input) DOUBLE PRECISION
88 The smallest positive number s.t. 1/SAFMIN does not
89 overflow. (This should always be DLAMCH('S') -- it is an
90 argument in order to avoid having to call DLAMCH frequently.)
91
92 SCALE1 (output) DOUBLE PRECISION
93 A scaling factor used to avoid over-/underflow in the
94 eigenvalue equation which defines the first eigenvalue. If
95 the eigenvalues are complex, then the eigenvalues are
96 ( WR1 +/- WI i ) / SCALE1 (which may lie outside the
97 exponent range of the machine), SCALE1=SCALE2, and SCALE1
98 will always be positive. If the eigenvalues are real, then
99 the first (real) eigenvalue is WR1 / SCALE1 , but this may
100 overflow or underflow, and in fact, SCALE1 may be zero or
101 less than the underflow threshhold if the exact eigenvalue
102 is sufficiently large.
103
104 SCALE2 (output) DOUBLE PRECISION
105 A scaling factor used to avoid over-/underflow in the
106 eigenvalue equation which defines the second eigenvalue. If
107 the eigenvalues are complex, then SCALE2=SCALE1. If the
108 eigenvalues are real, then the second (real) eigenvalue is
109 WR2 / SCALE2 , but this may overflow or underflow, and in
110 fact, SCALE2 may be zero or less than the underflow
111 threshhold if the exact eigenvalue is sufficiently large.
112
113 WR1 (output) DOUBLE PRECISION
114 If the eigenvalue is real, then WR1 is SCALE1 times the
115 eigenvalue closest to the (2,2) element of A B**(-1). If the
116 eigenvalue is complex, then WR1=WR2 is SCALE1 times the real
117 part of the eigenvalues.
118
119 WR2 (output) DOUBLE PRECISION
120 If the eigenvalue is real, then WR2 is SCALE2 times the
121 other eigenvalue. If the eigenvalue is complex, then
122 WR1=WR2 is SCALE1 times the real part of the eigenvalues.
123
124 WI (output) DOUBLE PRECISION
125 If the eigenvalue is real, then WI is zero. If the
126 eigenvalue is complex, then WI is SCALE1 times the imaginary
127 part of the eigenvalues. WI will always be non-negative.
128
129 =====================================================================
130
131
132 Parameter adjustments */
133 /* System generated locals */
134 integer a_dim1, a_offset, b_dim1, b_offset;
135 Treal d__1, d__2, d__3, d__4, d__5, d__6;
136 /* Local variables */
137 Treal diff, bmin, wbig, wabs, wdet, r__, binv11, binv22,
138 discr, anorm, bnorm, bsize, shift, c1, c2, c3, c4, c5, rtmin,
139 rtmax, wsize, s1, s2, a11, a12, a21, a22, b11, b12, b22, ascale,
140 bscale, pp, qq, ss, wscale, safmax, wsmall, as11, as12, as22, sum,
141 abi22;
142#define a_ref(a_1,a_2) a[(a_2)*a_dim1 + a_1]
143#define b_ref(a_1,a_2) b[(a_2)*b_dim1 + a_1]
144
145 a_dim1 = *lda;
146 a_offset = 1 + a_dim1 * 1;
147 a -= a_offset;
148 b_dim1 = *ldb;
149 b_offset = 1 + b_dim1 * 1;
150 b -= b_offset;
151
152 /* Function Body */
153 rtmin = template_blas_sqrt(*safmin);
154 rtmax = 1. / rtmin;
155 safmax = 1. / *safmin;
156
157/* Scale A
158
159 Computing MAX */
160 d__5 = (d__1 = a_ref(1, 1), absMACRO(d__1)) + (d__2 = a_ref(2, 1), absMACRO(d__2)),
161 d__6 = (d__3 = a_ref(1, 2), absMACRO(d__3)) + (d__4 = a_ref(2, 2), absMACRO(
162 d__4)), d__5 = maxMACRO(d__5,d__6);
163 anorm = maxMACRO(d__5,*safmin);
164 ascale = 1. / anorm;
165 a11 = ascale * a_ref(1, 1);
166 a21 = ascale * a_ref(2, 1);
167 a12 = ascale * a_ref(1, 2);
168 a22 = ascale * a_ref(2, 2);
169
170/* Perturb B if necessary to insure non-singularity */
171
172 b11 = b_ref(1, 1);
173 b12 = b_ref(1, 2);
174 b22 = b_ref(2, 2);
175/* Computing MAX */
176 d__1 = absMACRO(b11), d__2 = absMACRO(b12), d__1 = maxMACRO(d__1,d__2), d__2 = absMACRO(b22),
177 d__1 = maxMACRO(d__1,d__2);
178 bmin = rtmin * maxMACRO(d__1,rtmin);
179 if (absMACRO(b11) < bmin) {
180 b11 = template_lapack_d_sign(&bmin, &b11);
181 }
182 if (absMACRO(b22) < bmin) {
183 b22 = template_lapack_d_sign(&bmin, &b22);
184 }
185
186/* Scale B
187
188 Computing MAX */
189 d__1 = absMACRO(b11), d__2 = absMACRO(b12) + absMACRO(b22), d__1 = maxMACRO(d__1,d__2);
190 bnorm = maxMACRO(d__1,*safmin);
191/* Computing MAX */
192 d__1 = absMACRO(b11), d__2 = absMACRO(b22);
193 bsize = maxMACRO(d__1,d__2);
194 bscale = 1. / bsize;
195 b11 *= bscale;
196 b12 *= bscale;
197 b22 *= bscale;
198
199/* Compute larger eigenvalue by method described by C. van Loan
200
201 ( AS is A shifted by -SHIFT*B ) */
202
203 binv11 = 1. / b11;
204 binv22 = 1. / b22;
205 s1 = a11 * binv11;
206 s2 = a22 * binv22;
207 if (absMACRO(s1) <= absMACRO(s2)) {
208 as12 = a12 - s1 * b12;
209 as22 = a22 - s1 * b22;
210 ss = a21 * (binv11 * binv22);
211 abi22 = as22 * binv22 - ss * b12;
212 pp = abi22 * .5;
213 shift = s1;
214 } else {
215 as12 = a12 - s2 * b12;
216 as11 = a11 - s2 * b11;
217 ss = a21 * (binv11 * binv22);
218 abi22 = -ss * b12;
219 pp = (as11 * binv11 + abi22) * .5;
220 shift = s2;
221 }
222 qq = ss * as12;
223 if ((d__1 = pp * rtmin, absMACRO(d__1)) >= 1.) {
224/* Computing 2nd power */
225 d__1 = rtmin * pp;
226 discr = d__1 * d__1 + qq * *safmin;
227 r__ = template_blas_sqrt((absMACRO(discr))) * rtmax;
228 } else {
229/* Computing 2nd power */
230 d__1 = pp;
231 if (d__1 * d__1 + absMACRO(qq) <= *safmin) {
232/* Computing 2nd power */
233 d__1 = rtmax * pp;
234 discr = d__1 * d__1 + qq * safmax;
235 r__ = template_blas_sqrt((absMACRO(discr))) * rtmin;
236 } else {
237/* Computing 2nd power */
238 d__1 = pp;
239 discr = d__1 * d__1 + qq;
240 r__ = template_blas_sqrt((absMACRO(discr)));
241 }
242 }
243
244/* Note: the test of R in the following IF is to cover the case when
245 DISCR is small and negative and is flushed to zero during
246 the calculation of R. On machines which have a consistent
247 flush-to-zero threshhold and handle numbers above that
248 threshhold correctly, it would not be necessary. */
249
250 if (discr >= 0. || r__ == 0.) {
251 sum = pp + template_lapack_d_sign(&r__, &pp);
252 diff = pp - template_lapack_d_sign(&r__, &pp);
253 wbig = shift + sum;
254
255/* Compute smaller eigenvalue */
256
257 wsmall = shift + diff;
258/* Computing MAX */
259 d__1 = absMACRO(wsmall);
260 if (absMACRO(wbig) * .5 > maxMACRO(d__1,*safmin)) {
261 wdet = (a11 * a22 - a12 * a21) * (binv11 * binv22);
262 wsmall = wdet / wbig;
263 }
264
265/* Choose (real) eigenvalue closest to 2,2 element of A*B**(-1)
266 for WR1. */
267
268 if (pp > abi22) {
269 *wr1 = minMACRO(wbig,wsmall);
270 *wr2 = maxMACRO(wbig,wsmall);
271 } else {
272 *wr1 = maxMACRO(wbig,wsmall);
273 *wr2 = minMACRO(wbig,wsmall);
274 }
275 *wi = 0.;
276 } else {
277
278/* Complex eigenvalues */
279
280 *wr1 = shift + pp;
281 *wr2 = *wr1;
282 *wi = r__;
283 }
284
285/* Further scaling to avoid underflow and overflow in computing
286 SCALE1 and overflow in computing w*B.
287
288 This scale factor (WSCALE) is bounded from above using C1 and C2,
289 and from below using C3 and C4.
290 C1 implements the condition s A must never overflow.
291 C2 implements the condition w B must never overflow.
292 C3, with C2,
293 implement the condition that s A - w B must never overflow.
294 C4 implements the condition s should not underflow.
295 C5 implements the condition max(s,|w|) should be at least 2. */
296
297 c1 = bsize * (*safmin * maxMACRO(1.,ascale));
298 c2 = *safmin * maxMACRO(1.,bnorm);
299 c3 = bsize * *safmin;
300 if (ascale <= 1. && bsize <= 1.) {
301/* Computing MIN */
302 d__1 = 1., d__2 = ascale / *safmin * bsize;
303 c4 = minMACRO(d__1,d__2);
304 } else {
305 c4 = 1.;
306 }
307 if (ascale <= 1. || bsize <= 1.) {
308/* Computing MIN */
309 d__1 = 1., d__2 = ascale * bsize;
310 c5 = minMACRO(d__1,d__2);
311 } else {
312 c5 = 1.;
313 }
314
315/* Scale first eigenvalue */
316
317 wabs = absMACRO(*wr1) + absMACRO(*wi);
318/* Computing MAX
319 Computing MIN */
320 d__3 = c4, d__4 = maxMACRO(wabs,c5) * .5;
321 d__1 = maxMACRO(*safmin,c1), d__2 = (wabs * c2 + c3) * 1.0000100000000001,
322 d__1 = maxMACRO(d__1,d__2), d__2 = minMACRO(d__3,d__4);
323 wsize = maxMACRO(d__1,d__2);
324 if (wsize != 1.) {
325 wscale = 1. / wsize;
326 if (wsize > 1.) {
327 *scale1 = maxMACRO(ascale,bsize) * wscale * minMACRO(ascale,bsize);
328 } else {
329 *scale1 = minMACRO(ascale,bsize) * wscale * maxMACRO(ascale,bsize);
330 }
331 *wr1 *= wscale;
332 if (*wi != 0.) {
333 *wi *= wscale;
334 *wr2 = *wr1;
335 *scale2 = *scale1;
336 }
337 } else {
338 *scale1 = ascale * bsize;
339 *scale2 = *scale1;
340 }
341
342/* Scale second eigenvalue (if real) */
343
344 if (*wi == 0.) {
345/* Computing MAX
346 Computing MIN
347 Computing MAX */
348 d__5 = absMACRO(*wr2);
349 d__3 = c4, d__4 = maxMACRO(d__5,c5) * .5;
350 d__1 = maxMACRO(*safmin,c1), d__2 = (absMACRO(*wr2) * c2 + c3) *
351 1.0000100000000001, d__1 = maxMACRO(d__1,d__2), d__2 = minMACRO(d__3,
352 d__4);
353 wsize = maxMACRO(d__1,d__2);
354 if (wsize != 1.) {
355 wscale = 1. / wsize;
356 if (wsize > 1.) {
357 *scale2 = maxMACRO(ascale,bsize) * wscale * minMACRO(ascale,bsize);
358 } else {
359 *scale2 = minMACRO(ascale,bsize) * wscale * maxMACRO(ascale,bsize);
360 }
361 *wr2 *= wscale;
362 } else {
363 *scale2 = ascale * bsize;
364 }
365 }
366
367/* End of DLAG2 */
368
369 return 0;
370} /* dlag2_ */
371
372#undef b_ref
373#undef a_ref
374
375
376#endif
Treal template_blas_sqrt(Treal x)
int integer
Definition: template_blas_common.h:40
#define absMACRO(x)
Definition: template_blas_common.h:47
#define minMACRO(a, b)
Definition: template_blas_common.h:46
#define maxMACRO(a, b)
Definition: template_blas_common.h:45
int template_lapack_lag2(const Treal *a, const integer *lda, const Treal *b, const integer *ldb, const Treal *safmin, Treal *scale1, Treal *scale2, Treal *wr1, Treal *wr2, Treal *wi)
Definition: template_lapack_lag2.h:42
#define a_ref(a_1, a_2)
#define b_ref(a_1, a_2)
Treal template_lapack_d_sign(const Treal *a, const Treal *b)
Definition: template_lapack_lamch.h:48