ergo
template_lapack_lasq3.h
Go to the documentation of this file.
1/* Ergo, version 3.8.2, a program for linear scaling electronic structure
2 * calculations.
3 * Copyright (C) 2023 Elias Rudberg, Emanuel H. Rubensson, Pawel Salek,
4 * and Anastasia Kruchinina.
5 *
6 * This program is free software: you can redistribute it and/or modify
7 * it under the terms of the GNU General Public License as published by
8 * the Free Software Foundation, either version 3 of the License, or
9 * (at your option) any later version.
10 *
11 * This program is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 * GNU General Public License for more details.
15 *
16 * You should have received a copy of the GNU General Public License
17 * along with this program. If not, see <http://www.gnu.org/licenses/>.
18 *
19 * Primary academic reference:
20 * Ergo: An open-source program for linear-scaling electronic structure
21 * calculations,
22 * Elias Rudberg, Emanuel H. Rubensson, Pawel Salek, and Anastasia
23 * Kruchinina,
24 * SoftwareX 7, 107 (2018),
25 * <http://dx.doi.org/10.1016/j.softx.2018.03.005>
26 *
27 * For further information about Ergo, see <http://www.ergoscf.org>.
28 */
29
30 /* This file belongs to the template_lapack part of the Ergo source
31 * code. The source files in the template_lapack directory are modified
32 * versions of files originally distributed as CLAPACK, see the
33 * Copyright/license notice in the file template_lapack/COPYING.
34 */
35
36
37#ifndef TEMPLATE_LAPACK_LASQ3_HEADER
38#define TEMPLATE_LAPACK_LASQ3_HEADER
39
40
44
45
46template<class Treal>
47int template_lapack_lasq3(integer *i0, integer *n0, Treal *z__,
48 integer *pp, Treal *dmin__, Treal *sigma, Treal *desig,
49 Treal *qmax, integer *nfail, integer *iter, integer *ndiv,
50 logical *ieee, integer *ttype, Treal *dmin1, Treal *dmin2,
51 Treal *dn, Treal *dn1, Treal *dn2, Treal *g,
52 Treal *tau)
53{
54 /* System generated locals */
55 integer i__1;
56 Treal d__1, d__2;
57
58
59 /* Local variables */
60 Treal s, t;
61 integer j4, nn;
62 Treal eps, tol;
63 integer n0in, ipn4;
64 Treal tol2, temp;
65
66
67/* -- LAPACK routine (version 3.2) -- */
68
69/* -- Contributed by Osni Marques of the Lawrence Berkeley National -- */
70/* -- Laboratory and Beresford Parlett of the Univ. of California at -- */
71/* -- Berkeley -- */
72/* -- November 2008 -- */
73
74/* -- LAPACK is a software package provided by Univ. of Tennessee, -- */
75/* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- */
76
77/* .. Scalar Arguments .. */
78/* .. */
79/* .. Array Arguments .. */
80/* .. */
81
82/* Purpose */
83/* ======= */
84
85/* DLASQ3 checks for deflation, computes a shift (TAU) and calls dqds. */
86/* In case of failure it changes shifts, and tries again until output */
87/* is positive. */
88
89/* Arguments */
90/* ========= */
91
92/* I0 (input) INTEGER */
93/* First index. */
94
95/* N0 (input) INTEGER */
96/* Last index. */
97
98/* Z (input) DOUBLE PRECISION array, dimension ( 4*N ) */
99/* Z holds the qd array. */
100
101/* PP (input/output) INTEGER */
102/* PP=0 for ping, PP=1 for pong. */
103/* PP=2 indicates that flipping was applied to the Z array */
104/* and that the initial tests for deflation should not be */
105/* performed. */
106
107/* DMIN (output) DOUBLE PRECISION */
108/* Minimum value of d. */
109
110/* SIGMA (output) DOUBLE PRECISION */
111/* Sum of shifts used in current segment. */
112
113/* DESIG (input/output) DOUBLE PRECISION */
114/* Lower order part of SIGMA */
115
116/* QMAX (input) DOUBLE PRECISION */
117/* Maximum value of q. */
118
119/* NFAIL (output) INTEGER */
120/* Number of times shift was too big. */
121
122/* ITER (output) INTEGER */
123/* Number of iterations. */
124
125/* NDIV (output) INTEGER */
126/* Number of divisions. */
127
128/* IEEE (input) LOGICAL */
129/* Flag for IEEE or non IEEE arithmetic (passed to DLASQ5). */
130
131/* TTYPE (input/output) INTEGER */
132/* Shift type. */
133
134/* DMIN1, DMIN2, DN, DN1, DN2, G, TAU (input/output) DOUBLE PRECISION */
135/* These are passed as arguments in order to save their values */
136/* between calls to DLASQ3. */
137
138/* ===================================================================== */
139
140/* .. Parameters .. */
141/* .. */
142/* .. Local Scalars .. */
143/* .. */
144/* .. External Subroutines .. */
145/* .. */
146/* .. External Function .. */
147/* .. */
148/* .. Intrinsic Functions .. */
149/* .. */
150/* .. Executable Statements .. */
151
152 /* Parameter adjustments */
153 --z__;
154
155 /* Function Body */
156 n0in = *n0;
157 eps = template_lapack_lamch("Precision", (Treal)0);
158 tol = eps * 100.;
159/* Computing 2nd power */
160 d__1 = tol;
161 tol2 = d__1 * d__1;
162
163/* Check for deflation. */
164
165L10:
166
167 if (*n0 < *i0) {
168 return 0;
169 }
170 if (*n0 == *i0) {
171 goto L20;
172 }
173 nn = (*n0 << 2) + *pp;
174 if (*n0 == *i0 + 1) {
175 goto L40;
176 }
177
178/* Check whether E(N0-1) is negligible, 1 eigenvalue. */
179
180 if (z__[nn - 5] > tol2 * (*sigma + z__[nn - 3]) && z__[nn - (*pp << 1) -
181 4] > tol2 * z__[nn - 7]) {
182 goto L30;
183 }
184
185L20:
186
187 z__[(*n0 << 2) - 3] = z__[(*n0 << 2) + *pp - 3] + *sigma;
188 --(*n0);
189 goto L10;
190
191/* Check whether E(N0-2) is negligible, 2 eigenvalues. */
192
193L30:
194
195 if (z__[nn - 9] > tol2 * *sigma && z__[nn - (*pp << 1) - 8] > tol2 * z__[
196 nn - 11]) {
197 goto L50;
198 }
199
200L40:
201
202 if (z__[nn - 3] > z__[nn - 7]) {
203 s = z__[nn - 3];
204 z__[nn - 3] = z__[nn - 7];
205 z__[nn - 7] = s;
206 }
207 if (z__[nn - 5] > z__[nn - 3] * tol2) {
208 t = (z__[nn - 7] - z__[nn - 3] + z__[nn - 5]) * .5;
209 s = z__[nn - 3] * (z__[nn - 5] / t);
210 if (s <= t) {
211 s = z__[nn - 3] * (z__[nn - 5] / (t * (template_blas_sqrt(s / t + 1.) + 1.)));
212 } else {
213 s = z__[nn - 3] * (z__[nn - 5] / (t + template_blas_sqrt(t) * template_blas_sqrt(t + s)));
214 }
215 t = z__[nn - 7] + (s + z__[nn - 5]);
216 z__[nn - 3] *= z__[nn - 7] / t;
217 z__[nn - 7] = t;
218 }
219 z__[(*n0 << 2) - 7] = z__[nn - 7] + *sigma;
220 z__[(*n0 << 2) - 3] = z__[nn - 3] + *sigma;
221 *n0 += -2;
222 goto L10;
223
224L50:
225 if (*pp == 2) {
226 *pp = 0;
227 }
228
229/* Reverse the qd-array, if warranted. */
230
231 if (*dmin__ <= 0. || *n0 < n0in) {
232 if (z__[(*i0 << 2) + *pp - 3] * 1.5 < z__[(*n0 << 2) + *pp - 3]) {
233 ipn4 = ( *i0 + *n0 ) << 2;
234 i__1 = ( *i0 + *n0 - 1 ) << 1;
235 for (j4 = *i0 << 2; j4 <= i__1; j4 += 4) {
236 temp = z__[j4 - 3];
237 z__[j4 - 3] = z__[ipn4 - j4 - 3];
238 z__[ipn4 - j4 - 3] = temp;
239 temp = z__[j4 - 2];
240 z__[j4 - 2] = z__[ipn4 - j4 - 2];
241 z__[ipn4 - j4 - 2] = temp;
242 temp = z__[j4 - 1];
243 z__[j4 - 1] = z__[ipn4 - j4 - 5];
244 z__[ipn4 - j4 - 5] = temp;
245 temp = z__[j4];
246 z__[j4] = z__[ipn4 - j4 - 4];
247 z__[ipn4 - j4 - 4] = temp;
248/* L60: */
249 }
250 if (*n0 - *i0 <= 4) {
251 z__[(*n0 << 2) + *pp - 1] = z__[(*i0 << 2) + *pp - 1];
252 z__[(*n0 << 2) - *pp] = z__[(*i0 << 2) - *pp];
253 }
254/* Computing MIN */
255 d__1 = *dmin2, d__2 = z__[(*n0 << 2) + *pp - 1];
256 *dmin2 = minMACRO(d__1,d__2);
257/* Computing MIN */
258 d__1 = z__[(*n0 << 2) + *pp - 1], d__2 = z__[(*i0 << 2) + *pp - 1]
259 , d__1 = minMACRO(d__1,d__2), d__2 = z__[(*i0 << 2) + *pp + 3];
260 z__[(*n0 << 2) + *pp - 1] = minMACRO(d__1,d__2);
261/* Computing MIN */
262 d__1 = z__[(*n0 << 2) - *pp], d__2 = z__[(*i0 << 2) - *pp], d__1 =
263 minMACRO(d__1,d__2), d__2 = z__[(*i0 << 2) - *pp + 4];
264 z__[(*n0 << 2) - *pp] = minMACRO(d__1,d__2);
265/* Computing MAX */
266 d__1 = *qmax, d__2 = z__[(*i0 << 2) + *pp - 3], d__1 = maxMACRO(d__1,
267 d__2), d__2 = z__[(*i0 << 2) + *pp + 1];
268 *qmax = maxMACRO(d__1,d__2);
269 *dmin__ = -0.;
270 }
271 }
272
273/* Choose a shift. */
274
275 template_lapack_lasq4(i0, n0, &z__[1], pp, &n0in, dmin__, dmin1, dmin2, dn, dn1, dn2,
276 tau, ttype, g);
277
278/* Call dqds until DMIN > 0. */
279
280L70:
281
282 template_lapack_lasq5(i0, n0, &z__[1], pp, tau, dmin__, dmin1, dmin2, dn, dn1, dn2,
283 ieee);
284
285 *ndiv += *n0 - *i0 + 2;
286 ++(*iter);
287
288/* Check status. */
289
290 if (*dmin__ >= 0. && *dmin1 > 0.) {
291
292/* Success. */
293
294 goto L90;
295
296 } else if (*dmin__ < 0. && *dmin1 > 0. && z__[( ( *n0 - 1 ) << 2) - *pp] < tol
297 * (*sigma + *dn1) && absMACRO(*dn) < tol * *sigma) {
298
299/* Convergence hidden by negative DN. */
300
301 z__[( ( *n0 - 1 ) << 2) - *pp + 2] = 0.;
302 *dmin__ = 0.;
303 goto L90;
304 } else if (*dmin__ < 0.) {
305
306/* TAU too big. Select new TAU and try again. */
307
308 ++(*nfail);
309 if (*ttype < -22) {
310
311/* Failed twice. Play it safe. */
312
313 *tau = 0.;
314 } else if (*dmin1 > 0.) {
315
316/* Late failure. Gives excellent shift. */
317
318 *tau = (*tau + *dmin__) * (1. - eps * 2.);
319 *ttype += -11;
320 } else {
321
322/* Early failure. Divide by 4. */
323
324 *tau *= .25;
325 *ttype += -12;
326 }
327 goto L70;
328 } else if (template_lapack_isnan(dmin__)) {
329
330/* NaN. */
331
332 if (*tau == 0.) {
333 goto L80;
334 } else {
335 *tau = 0.;
336 goto L70;
337 }
338 } else {
339
340/* Possible underflow. Play it safe. */
341
342 goto L80;
343 }
344
345/* Risk of underflow. */
346
347L80:
348 template_lapack_lasq6(i0, n0, &z__[1], pp, dmin__, dmin1, dmin2, dn, dn1, dn2);
349 *ndiv += *n0 - *i0 + 2;
350 ++(*iter);
351 *tau = 0.;
352
353L90:
354 if (*tau < *sigma) {
355 *desig += *tau;
356 t = *sigma + *desig;
357 *desig -= t - *sigma;
358 } else {
359 t = *sigma + *tau;
360 *desig = *sigma - (t - *tau) + *desig;
361 }
362 *sigma = t;
363
364 return 0;
365
366/* End of DLASQ3 */
367
368} /* dlasq3_ */
369
370#endif
Treal template_blas_sqrt(Treal x)
int integer
Definition: template_blas_common.h:40
#define absMACRO(x)
Definition: template_blas_common.h:47
#define minMACRO(a, b)
Definition: template_blas_common.h:46
#define maxMACRO(a, b)
Definition: template_blas_common.h:45
bool logical
Definition: template_blas_common.h:41
logical template_lapack_isnan(Treal *din)
Definition: template_lapack_isnan.h:45
Treal template_lapack_lamch(const char *cmach, Treal dummyReal)
Definition: template_lapack_lamch.h:202
int template_lapack_lasq3(integer *i0, integer *n0, Treal *z__, integer *pp, Treal *dmin__, Treal *sigma, Treal *desig, Treal *qmax, integer *nfail, integer *iter, integer *ndiv, logical *ieee, integer *ttype, Treal *dmin1, Treal *dmin2, Treal *dn, Treal *dn1, Treal *dn2, Treal *g, Treal *tau)
Definition: template_lapack_lasq3.h:47
int template_lapack_lasq4(integer *i0, integer *n0, Treal *z__, integer *pp, integer *n0in, Treal *dmin__, Treal *dmin1, Treal *dmin2, Treal *dn, Treal *dn1, Treal *dn2, Treal *tau, integer *ttype, Treal *g)
Definition: template_lapack_lasq4.h:41
int template_lapack_lasq5(integer *i0, integer *n0, Treal *z__, integer *pp, Treal *tau, Treal *dmin__, Treal *dmin1, Treal *dmin2, Treal *dn, Treal *dnm1, Treal *dnm2, logical *ieee)
Definition: template_lapack_lasq5.h:41
int template_lapack_lasq6(integer *i0, integer *n0, Treal *z__, integer *pp, Treal *dmin__, Treal *dmin1, Treal *dmin2, Treal *dn, Treal *dnm1, Treal *dnm2)
Definition: template_lapack_lasq6.h:41