ergo
template_lapack_lasq2.h
Go to the documentation of this file.
1/* Ergo, version 3.8.2, a program for linear scaling electronic structure
2 * calculations.
3 * Copyright (C) 2023 Elias Rudberg, Emanuel H. Rubensson, Pawel Salek,
4 * and Anastasia Kruchinina.
5 *
6 * This program is free software: you can redistribute it and/or modify
7 * it under the terms of the GNU General Public License as published by
8 * the Free Software Foundation, either version 3 of the License, or
9 * (at your option) any later version.
10 *
11 * This program is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 * GNU General Public License for more details.
15 *
16 * You should have received a copy of the GNU General Public License
17 * along with this program. If not, see <http://www.gnu.org/licenses/>.
18 *
19 * Primary academic reference:
20 * Ergo: An open-source program for linear-scaling electronic structure
21 * calculations,
22 * Elias Rudberg, Emanuel H. Rubensson, Pawel Salek, and Anastasia
23 * Kruchinina,
24 * SoftwareX 7, 107 (2018),
25 * <http://dx.doi.org/10.1016/j.softx.2018.03.005>
26 *
27 * For further information about Ergo, see <http://www.ergoscf.org>.
28 */
29
30 /* This file belongs to the template_lapack part of the Ergo source
31 * code. The source files in the template_lapack directory are modified
32 * versions of files originally distributed as CLAPACK, see the
33 * Copyright/license notice in the file template_lapack/COPYING.
34 */
35
36
37#ifndef TEMPLATE_LAPACK_LASQ2_HEADER
38#define TEMPLATE_LAPACK_LASQ2_HEADER
39
40
42
43
44template<class Treal>
45int template_lapack_lasq2(integer *n, Treal *z__, integer *info)
46{
47 /* System generated locals */
48 integer i__1, i__2, i__3;
49 Treal d__1, d__2;
50
51
52 /* Local variables */
53 Treal d__, e, g;
54 integer k;
55 Treal s, t;
56 integer i0, i4, n0;
57 Treal dn;
58 integer pp;
59 Treal dn1, dn2, dee, eps, tau, tol;
60 integer ipn4;
61 Treal tol2;
62 logical ieee;
63 integer nbig;
64 Treal dmin__, emin, emax;
65 integer kmin, ndiv, iter;
66 Treal qmin, temp, qmax, zmax;
67 integer splt;
68 Treal dmin1, dmin2;
69 integer nfail;
70 Treal desig, trace, sigma;
71 integer iinfo, ttype;
72 Treal deemin;
73 integer iwhila, iwhilb;
74 Treal oldemn, safmin;
75
76
77/* -- LAPACK routine (version 3.2) -- */
78
79/* -- Contributed by Osni Marques of the Lawrence Berkeley National -- */
80/* -- Laboratory and Beresford Parlett of the Univ. of California at -- */
81/* -- Berkeley -- */
82/* -- November 2008 -- */
83
84/* -- LAPACK is a software package provided by Univ. of Tennessee, -- */
85/* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- */
86
87/* .. Scalar Arguments .. */
88/* .. */
89/* .. Array Arguments .. */
90/* .. */
91
92/* Purpose */
93/* ======= */
94
95/* DLASQ2 computes all the eigenvalues of the symmetric positive */
96/* definite tridiagonal matrix associated with the qd array Z to high */
97/* relative accuracy are computed to high relative accuracy, in the */
98/* absence of denormalization, underflow and overflow. */
99
100/* To see the relation of Z to the tridiagonal matrix, let L be a */
101/* unit lower bidiagonal matrix with subdiagonals Z(2,4,6,,..) and */
102/* let U be an upper bidiagonal matrix with 1's above and diagonal */
103/* Z(1,3,5,,..). The tridiagonal is L*U or, if you prefer, the */
104/* symmetric tridiagonal to which it is similar. */
105
106/* Note : DLASQ2 defines a logical variable, IEEE, which is true */
107/* on machines which follow ieee-754 floating-point standard in their */
108/* handling of infinities and NaNs, and false otherwise. This variable */
109/* is passed to DLASQ3. */
110
111/* Arguments */
112/* ========= */
113
114/* N (input) INTEGER */
115/* The number of rows and columns in the matrix. N >= 0. */
116
117/* Z (input/output) DOUBLE PRECISION array, dimension ( 4*N ) */
118/* On entry Z holds the qd array. On exit, entries 1 to N hold */
119/* the eigenvalues in decreasing order, Z( 2*N+1 ) holds the */
120/* trace, and Z( 2*N+2 ) holds the sum of the eigenvalues. If */
121/* N > 2, then Z( 2*N+3 ) holds the iteration count, Z( 2*N+4 ) */
122/* holds NDIVS/NIN^2, and Z( 2*N+5 ) holds the percentage of */
123/* shifts that failed. */
124
125/* INFO (output) INTEGER */
126/* = 0: successful exit */
127/* < 0: if the i-th argument is a scalar and had an illegal */
128/* value, then INFO = -i, if the i-th argument is an */
129/* array and the j-entry had an illegal value, then */
130/* INFO = -(i*100+j) */
131/* > 0: the algorithm failed */
132/* = 1, a split was marked by a positive value in E */
133/* = 2, current block of Z not diagonalized after 30*N */
134/* iterations (in inner while loop) */
135/* = 3, termination criterion of outer while loop not met */
136/* (program created more than N unreduced blocks) */
137
138/* Further Details */
139/* =============== */
140/* Local Variables: I0:N0 defines a current unreduced segment of Z. */
141/* The shifts are accumulated in SIGMA. Iteration count is in ITER. */
142/* Ping-pong is controlled by PP (alternates between 0 and 1). */
143
144/* ===================================================================== */
145
146/* .. Parameters .. */
147/* .. */
148/* .. Local Scalars .. */
149/* .. */
150/* .. External Subroutines .. */
151/* .. */
152/* .. External Functions .. */
153/* .. */
154/* .. Intrinsic Functions .. */
155/* .. */
156/* .. Executable Statements .. */
157
158/* Test the input arguments. */
159/* (in case DLASQ2 is not called by DLASQ1) */
160
161/* Table of constant values */
162
163 integer c__1 = 1;
164 integer c__2 = 2;
165 integer c__10 = 10;
166 integer c__3 = 3;
167 integer c__4 = 4;
168 integer c__11 = 11;
169
170 /* Parameter adjustments */
171 --z__;
172
173 /* Function Body */
174 *info = 0;
175 eps = template_lapack_lamch("Precision", (Treal)0);
176 safmin = template_lapack_lamch("Safe minimum", (Treal)0);
177 tol = eps * 100.;
178/* Computing 2nd power */
179 d__1 = tol;
180 tol2 = d__1 * d__1;
181
182 if (*n < 0) {
183 *info = -1;
184 template_blas_erbla("DLASQ2", &c__1);
185 return 0;
186 } else if (*n == 0) {
187 return 0;
188 } else if (*n == 1) {
189
190/* 1-by-1 case. */
191
192 if (z__[1] < 0.) {
193 *info = -201;
194 template_blas_erbla("DLASQ2", &c__2);
195 }
196 return 0;
197 } else if (*n == 2) {
198
199/* 2-by-2 case. */
200
201 if (z__[2] < 0. || z__[3] < 0.) {
202 *info = -2;
203 template_blas_erbla("DLASQ2", &c__2);
204 return 0;
205 } else if (z__[3] > z__[1]) {
206 d__ = z__[3];
207 z__[3] = z__[1];
208 z__[1] = d__;
209 }
210 z__[5] = z__[1] + z__[2] + z__[3];
211 if (z__[2] > z__[3] * tol2) {
212 t = (z__[1] - z__[3] + z__[2]) * .5;
213 s = z__[3] * (z__[2] / t);
214 if (s <= t) {
215 s = z__[3] * (z__[2] / (t * (template_blas_sqrt(s / t + 1.) + 1.)));
216 } else {
217 s = z__[3] * (z__[2] / (t + template_blas_sqrt(t) * template_blas_sqrt(t + s)));
218 }
219 t = z__[1] + (s + z__[2]);
220 z__[3] *= z__[1] / t;
221 z__[1] = t;
222 }
223 z__[2] = z__[3];
224 z__[6] = z__[2] + z__[1];
225 return 0;
226 }
227
228/* Check for negative data and compute sums of q's and e's. */
229
230 z__[*n * 2] = 0.;
231 emin = z__[2];
232 qmax = 0.;
233 zmax = 0.;
234 d__ = 0.;
235 e = 0.;
236
237 i__1 = ( *n - 1 ) << 1;
238 for (k = 1; k <= i__1; k += 2) {
239 if (z__[k] < 0.) {
240 *info = -(k + 200);
241 template_blas_erbla("DLASQ2", &c__2);
242 return 0;
243 } else if (z__[k + 1] < 0.) {
244 *info = -(k + 201);
245 template_blas_erbla("DLASQ2", &c__2);
246 return 0;
247 }
248 d__ += z__[k];
249 e += z__[k + 1];
250/* Computing MAX */
251 d__1 = qmax, d__2 = z__[k];
252 qmax = maxMACRO(d__1,d__2);
253/* Computing MIN */
254 d__1 = emin, d__2 = z__[k + 1];
255 emin = minMACRO(d__1,d__2);
256/* Computing MAX */
257 d__1 = maxMACRO(qmax,zmax), d__2 = z__[k + 1];
258 zmax = maxMACRO(d__1,d__2);
259/* L10: */
260 }
261 if (z__[(*n << 1) - 1] < 0.) {
262 *info = -((*n << 1) + 199);
263 template_blas_erbla("DLASQ2", &c__2);
264 return 0;
265 }
266 d__ += z__[(*n << 1) - 1];
267/* Computing MAX */
268 d__1 = qmax, d__2 = z__[(*n << 1) - 1];
269 qmax = maxMACRO(d__1,d__2);
270 zmax = maxMACRO(qmax,zmax);
271
272/* Check for diagonality. */
273
274 if (e == 0.) {
275 i__1 = *n;
276 for (k = 2; k <= i__1; ++k) {
277 z__[k] = z__[(k << 1) - 1];
278/* L20: */
279 }
280 template_lapack_lasrt("D", n, &z__[1], &iinfo);
281 z__[(*n << 1) - 1] = d__;
282 return 0;
283 }
284
285 trace = d__ + e;
286
287/* Check for zero data. */
288
289 if (trace == 0.) {
290 z__[(*n << 1) - 1] = 0.;
291 return 0;
292 }
293
294/* Check whether the machine is IEEE conformable. */
295
296 ieee = template_lapack_ilaenv(&c__10, "DLASQ2", "N", &c__1, &c__2, &c__3, &c__4, (ftnlen)6, (ftnlen)1) == 1 && template_lapack_ilaenv(&c__11, "DLASQ2", "N", &c__1, &c__2,
297 &c__3, &c__4, (ftnlen)6, (ftnlen)1) == 1;
298
299/* Rearrange data for locality: Z=(q1,qq1,e1,ee1,q2,qq2,e2,ee2,...). */
300
301 for (k = *n << 1; k >= 2; k += -2) {
302 z__[k * 2] = 0.;
303 z__[(k << 1) - 1] = z__[k];
304 z__[(k << 1) - 2] = 0.;
305 z__[(k << 1) - 3] = z__[k - 1];
306/* L30: */
307 }
308
309 i0 = 1;
310 n0 = *n;
311
312/* Reverse the qd-array, if warranted. */
313
314 if (z__[(i0 << 2) - 3] * 1.5 < z__[(n0 << 2) - 3]) {
315 ipn4 = ( i0 + n0 ) << 2;
316 i__1 = ( i0 + n0 - 1 ) << 1;
317 for (i4 = i0 << 2; i4 <= i__1; i4 += 4) {
318 temp = z__[i4 - 3];
319 z__[i4 - 3] = z__[ipn4 - i4 - 3];
320 z__[ipn4 - i4 - 3] = temp;
321 temp = z__[i4 - 1];
322 z__[i4 - 1] = z__[ipn4 - i4 - 5];
323 z__[ipn4 - i4 - 5] = temp;
324/* L40: */
325 }
326 }
327
328/* Initial split checking via dqd and Li's test. */
329
330 pp = 0;
331
332 for (k = 1; k <= 2; ++k) {
333
334 d__ = z__[(n0 << 2) + pp - 3];
335 i__1 = (i0 << 2) + pp;
336 for (i4 = ( ( n0 - 1 ) << 2) + pp; i4 >= i__1; i4 += -4) {
337 if (z__[i4 - 1] <= tol2 * d__) {
338 z__[i4 - 1] = -0.;
339 d__ = z__[i4 - 3];
340 } else {
341 d__ = z__[i4 - 3] * (d__ / (d__ + z__[i4 - 1]));
342 }
343/* L50: */
344 }
345
346/* dqd maps Z to ZZ plus Li's test. */
347
348 emin = z__[(i0 << 2) + pp + 1];
349 d__ = z__[(i0 << 2) + pp - 3];
350 i__1 = ( ( n0 - 1 ) << 2) + pp;
351 for (i4 = (i0 << 2) + pp; i4 <= i__1; i4 += 4) {
352 z__[i4 - (pp << 1) - 2] = d__ + z__[i4 - 1];
353 if (z__[i4 - 1] <= tol2 * d__) {
354 z__[i4 - 1] = -0.;
355 z__[i4 - (pp << 1) - 2] = d__;
356 z__[i4 - (pp << 1)] = 0.;
357 d__ = z__[i4 + 1];
358 } else if (safmin * z__[i4 + 1] < z__[i4 - (pp << 1) - 2] &&
359 safmin * z__[i4 - (pp << 1) - 2] < z__[i4 + 1]) {
360 temp = z__[i4 + 1] / z__[i4 - (pp << 1) - 2];
361 z__[i4 - (pp << 1)] = z__[i4 - 1] * temp;
362 d__ *= temp;
363 } else {
364 z__[i4 - (pp << 1)] = z__[i4 + 1] * (z__[i4 - 1] / z__[i4 - (
365 pp << 1) - 2]);
366 d__ = z__[i4 + 1] * (d__ / z__[i4 - (pp << 1) - 2]);
367 }
368/* Computing MIN */
369 d__1 = emin, d__2 = z__[i4 - (pp << 1)];
370 emin = minMACRO(d__1,d__2);
371/* L60: */
372 }
373 z__[(n0 << 2) - pp - 2] = d__;
374
375/* Now find qmax. */
376
377 qmax = z__[(i0 << 2) - pp - 2];
378 i__1 = (n0 << 2) - pp - 2;
379 for (i4 = (i0 << 2) - pp + 2; i4 <= i__1; i4 += 4) {
380/* Computing MAX */
381 d__1 = qmax, d__2 = z__[i4];
382 qmax = maxMACRO(d__1,d__2);
383/* L70: */
384 }
385
386/* Prepare for the next iteration on K. */
387
388 pp = 1 - pp;
389/* L80: */
390 }
391
392/* Initialise variables to pass to DLASQ3. */
393
394 ttype = 0;
395 dmin1 = 0.;
396 dmin2 = 0.;
397 dn = 0.;
398 dn1 = 0.;
399 dn2 = 0.;
400 g = 0.;
401 tau = 0.;
402
403 iter = 2;
404 nfail = 0;
405 ndiv = ( n0 - i0 ) << 1;
406
407 i__1 = *n + 1;
408 for (iwhila = 1; iwhila <= i__1; ++iwhila) {
409 if (n0 < 1) {
410 goto L170;
411 }
412
413/* While array unfinished do */
414
415/* E(N0) holds the value of SIGMA when submatrix in I0:N0 */
416/* splits from the rest of the array, but is negated. */
417
418 desig = 0.;
419 if (n0 == *n) {
420 sigma = 0.;
421 } else {
422 sigma = -z__[(n0 << 2) - 1];
423 }
424 if (sigma < 0.) {
425 *info = 1;
426 return 0;
427 }
428
429/* Find last unreduced submatrix's top index I0, find QMAX and */
430/* EMIN. Find Gershgorin-type bound if Q's much greater than E's. */
431
432 emax = 0.;
433 if (n0 > i0) {
434 emin = (d__1 = z__[(n0 << 2) - 5], absMACRO(d__1));
435 } else {
436 emin = 0.;
437 }
438 qmin = z__[(n0 << 2) - 3];
439 qmax = qmin;
440 for (i4 = n0 << 2; i4 >= 8; i4 += -4) {
441 if (z__[i4 - 5] <= 0.) {
442 goto L100;
443 }
444 if (qmin >= emax * 4.) {
445/* Computing MIN */
446 d__1 = qmin, d__2 = z__[i4 - 3];
447 qmin = minMACRO(d__1,d__2);
448/* Computing MAX */
449 d__1 = emax, d__2 = z__[i4 - 5];
450 emax = maxMACRO(d__1,d__2);
451 }
452/* Computing MAX */
453 d__1 = qmax, d__2 = z__[i4 - 7] + z__[i4 - 5];
454 qmax = maxMACRO(d__1,d__2);
455/* Computing MIN */
456 d__1 = emin, d__2 = z__[i4 - 5];
457 emin = minMACRO(d__1,d__2);
458/* L90: */
459 }
460 i4 = 4;
461
462L100:
463 i0 = i4 / 4;
464 pp = 0;
465
466 if (n0 - i0 > 1) {
467 dee = z__[(i0 << 2) - 3];
468 deemin = dee;
469 kmin = i0;
470 i__2 = (n0 << 2) - 3;
471 for (i4 = (i0 << 2) + 1; i4 <= i__2; i4 += 4) {
472 dee = z__[i4] * (dee / (dee + z__[i4 - 2]));
473 if (dee <= deemin) {
474 deemin = dee;
475 kmin = (i4 + 3) / 4;
476 }
477/* L110: */
478 }
479 if ( ( kmin - i0 ) << 1 < n0 - kmin && deemin <= z__[(n0 << 2) - 3] *
480 .5) {
481 ipn4 = ( i0 + n0 ) << 2;
482 pp = 2;
483 i__2 = ( i0 + n0 - 1 ) << 1;
484 for (i4 = i0 << 2; i4 <= i__2; i4 += 4) {
485 temp = z__[i4 - 3];
486 z__[i4 - 3] = z__[ipn4 - i4 - 3];
487 z__[ipn4 - i4 - 3] = temp;
488 temp = z__[i4 - 2];
489 z__[i4 - 2] = z__[ipn4 - i4 - 2];
490 z__[ipn4 - i4 - 2] = temp;
491 temp = z__[i4 - 1];
492 z__[i4 - 1] = z__[ipn4 - i4 - 5];
493 z__[ipn4 - i4 - 5] = temp;
494 temp = z__[i4];
495 z__[i4] = z__[ipn4 - i4 - 4];
496 z__[ipn4 - i4 - 4] = temp;
497/* L120: */
498 }
499 }
500 }
501
502/* Put -(initial shift) into DMIN. */
503
504/* Computing MAX */
505 d__1 = 0., d__2 = qmin - template_blas_sqrt(qmin) * 2. * template_blas_sqrt(emax);
506 dmin__ = -maxMACRO(d__1,d__2);
507
508/* Now I0:N0 is unreduced. */
509/* PP = 0 for ping, PP = 1 for pong. */
510/* PP = 2 indicates that flipping was applied to the Z array and */
511/* and that the tests for deflation upon entry in DLASQ3 */
512/* should not be performed. */
513
514 nbig = (n0 - i0 + 1) * 30;
515 i__2 = nbig;
516 for (iwhilb = 1; iwhilb <= i__2; ++iwhilb) {
517 if (i0 > n0) {
518 goto L150;
519 }
520
521/* While submatrix unfinished take a good dqds step. */
522
523 template_lapack_lasq3(&i0, &n0, &z__[1], &pp, &dmin__, &sigma, &desig, &qmax, &
524 nfail, &iter, &ndiv, &ieee, &ttype, &dmin1, &dmin2, &dn, &
525 dn1, &dn2, &g, &tau);
526
527 pp = 1 - pp;
528
529/* When EMIN is very small check for splits. */
530
531 if (pp == 0 && n0 - i0 >= 3) {
532 if (z__[n0 * 4] <= tol2 * qmax || z__[(n0 << 2) - 1] <= tol2 *
533 sigma) {
534 splt = i0 - 1;
535 qmax = z__[(i0 << 2) - 3];
536 emin = z__[(i0 << 2) - 1];
537 oldemn = z__[i0 * 4];
538 i__3 = ( n0 - 3 ) << 2;
539 for (i4 = i0 << 2; i4 <= i__3; i4 += 4) {
540 if (z__[i4] <= tol2 * z__[i4 - 3] || z__[i4 - 1] <=
541 tol2 * sigma) {
542 z__[i4 - 1] = -sigma;
543 splt = i4 / 4;
544 qmax = 0.;
545 emin = z__[i4 + 3];
546 oldemn = z__[i4 + 4];
547 } else {
548/* Computing MAX */
549 d__1 = qmax, d__2 = z__[i4 + 1];
550 qmax = maxMACRO(d__1,d__2);
551/* Computing MIN */
552 d__1 = emin, d__2 = z__[i4 - 1];
553 emin = minMACRO(d__1,d__2);
554/* Computing MIN */
555 d__1 = oldemn, d__2 = z__[i4];
556 oldemn = minMACRO(d__1,d__2);
557 }
558/* L130: */
559 }
560 z__[(n0 << 2) - 1] = emin;
561 z__[n0 * 4] = oldemn;
562 i0 = splt + 1;
563 }
564 }
565
566/* L140: */
567 }
568
569 *info = 2;
570 return 0;
571
572/* end IWHILB */
573
574L150:
575
576/* L160: */
577 ;
578 }
579
580 *info = 3;
581 return 0;
582
583/* end IWHILA */
584
585L170:
586
587/* Move q's to the front. */
588
589 i__1 = *n;
590 for (k = 2; k <= i__1; ++k) {
591 z__[k] = z__[(k << 2) - 3];
592/* L180: */
593 }
594
595/* Sort and compute sum of eigenvalues. */
596
597 template_lapack_lasrt("D", n, &z__[1], &iinfo);
598
599 e = 0.;
600 for (k = *n; k >= 1; --k) {
601 e += z__[k];
602/* L190: */
603 }
604
605/* Store trace, sum(eigenvalues) and information on performance. */
606
607 z__[(*n << 1) + 1] = trace;
608 z__[(*n << 1) + 2] = e;
609 z__[(*n << 1) + 3] = (Treal) iter;
610/* Computing 2nd power */
611 i__1 = *n;
612 z__[(*n << 1) + 4] = (Treal) ndiv / (Treal) (i__1 * i__1);
613 z__[(*n << 1) + 5] = nfail * 100. / (Treal) iter;
614 return 0;
615
616/* End of DLASQ2 */
617
618} /* dlasq2_ */
619
620#endif
Treal template_blas_sqrt(Treal x)
int template_blas_erbla(const char *srname, integer *info)
Definition: template_blas_common.cc:146
int integer
Definition: template_blas_common.h:40
#define absMACRO(x)
Definition: template_blas_common.h:47
int ftnlen
Definition: template_blas_common.h:42
#define minMACRO(a, b)
Definition: template_blas_common.h:46
#define maxMACRO(a, b)
Definition: template_blas_common.h:45
bool logical
Definition: template_blas_common.h:41
integer template_lapack_ilaenv(const integer *ispec, const char *name__, const char *opts, const integer *n1, const integer *n2, const integer *n3, const integer *n4, ftnlen name_len, ftnlen opts_len)
Definition: template_lapack_common.cc:281
Treal template_lapack_lamch(const char *cmach, Treal dummyReal)
Definition: template_lapack_lamch.h:202
int template_lapack_lasq2(integer *n, Treal *z__, integer *info)
Definition: template_lapack_lasq2.h:45
int template_lapack_lasq3(integer *i0, integer *n0, Treal *z__, integer *pp, Treal *dmin__, Treal *sigma, Treal *desig, Treal *qmax, integer *nfail, integer *iter, integer *ndiv, logical *ieee, integer *ttype, Treal *dmin1, Treal *dmin2, Treal *dn, Treal *dn1, Treal *dn2, Treal *g, Treal *tau)
Definition: template_lapack_lasq3.h:47
int template_lapack_lasrt(const char *id, const integer *n, Treal *d__, integer *info)
Definition: template_lapack_lasrt.h:42