ergo
template_lapack_stein.h
Go to the documentation of this file.
1/* Ergo, version 3.8.2, a program for linear scaling electronic structure
2 * calculations.
3 * Copyright (C) 2023 Elias Rudberg, Emanuel H. Rubensson, Pawel Salek,
4 * and Anastasia Kruchinina.
5 *
6 * This program is free software: you can redistribute it and/or modify
7 * it under the terms of the GNU General Public License as published by
8 * the Free Software Foundation, either version 3 of the License, or
9 * (at your option) any later version.
10 *
11 * This program is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 * GNU General Public License for more details.
15 *
16 * You should have received a copy of the GNU General Public License
17 * along with this program. If not, see <http://www.gnu.org/licenses/>.
18 *
19 * Primary academic reference:
20 * Ergo: An open-source program for linear-scaling electronic structure
21 * calculations,
22 * Elias Rudberg, Emanuel H. Rubensson, Pawel Salek, and Anastasia
23 * Kruchinina,
24 * SoftwareX 7, 107 (2018),
25 * <http://dx.doi.org/10.1016/j.softx.2018.03.005>
26 *
27 * For further information about Ergo, see <http://www.ergoscf.org>.
28 */
29
30 /* This file belongs to the template_lapack part of the Ergo source
31 * code. The source files in the template_lapack directory are modified
32 * versions of files originally distributed as CLAPACK, see the
33 * Copyright/license notice in the file template_lapack/COPYING.
34 */
35
36
37#ifndef TEMPLATE_LAPACK_STEIN_HEADER
38#define TEMPLATE_LAPACK_STEIN_HEADER
39
40
41template<class Treal>
42int template_lapack_stein(const integer *n, const Treal *d__, const Treal *e,
43 const integer *m, const Treal *w, const integer *iblock, const integer *isplit,
44 Treal *z__, const integer *ldz, Treal *work, integer *iwork,
45 integer *ifail, integer *info)
46{
47/* -- LAPACK routine (version 3.0) --
48 Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
49 Courant Institute, Argonne National Lab, and Rice University
50 September 30, 1994
51
52
53 Purpose
54 =======
55
56 DSTEIN computes the eigenvectors of a real symmetric tridiagonal
57 matrix T corresponding to specified eigenvalues, using inverse
58 iteration.
59
60 The maximum number of iterations allowed for each eigenvector is
61 specified by an internal parameter MAXITS (currently set to 5).
62
63 Arguments
64 =========
65
66 N (input) INTEGER
67 The order of the matrix. N >= 0.
68
69 D (input) DOUBLE PRECISION array, dimension (N)
70 The n diagonal elements of the tridiagonal matrix T.
71
72 E (input) DOUBLE PRECISION array, dimension (N)
73 The (n-1) subdiagonal elements of the tridiagonal matrix
74 T, in elements 1 to N-1. E(N) need not be set.
75
76 M (input) INTEGER
77 The number of eigenvectors to be found. 0 <= M <= N.
78
79 W (input) DOUBLE PRECISION array, dimension (N)
80 The first M elements of W contain the eigenvalues for
81 which eigenvectors are to be computed. The eigenvalues
82 should be grouped by split-off block and ordered from
83 smallest to largest within the block. ( The output array
84 W from DSTEBZ with ORDER = 'B' is expected here. )
85
86 IBLOCK (input) INTEGER array, dimension (N)
87 The submatrix indices associated with the corresponding
88 eigenvalues in W; IBLOCK(i)=1 if eigenvalue W(i) belongs to
89 the first submatrix from the top, =2 if W(i) belongs to
90 the second submatrix, etc. ( The output array IBLOCK
91 from DSTEBZ is expected here. )
92
93 ISPLIT (input) INTEGER array, dimension (N)
94 The splitting points, at which T breaks up into submatrices.
95 The first submatrix consists of rows/columns 1 to
96 ISPLIT( 1 ), the second of rows/columns ISPLIT( 1 )+1
97 through ISPLIT( 2 ), etc.
98 ( The output array ISPLIT from DSTEBZ is expected here. )
99
100 Z (output) DOUBLE PRECISION array, dimension (LDZ, M)
101 The computed eigenvectors. The eigenvector associated
102 with the eigenvalue W(i) is stored in the i-th column of
103 Z. Any vector which fails to converge is set to its current
104 iterate after MAXITS iterations.
105
106 LDZ (input) INTEGER
107 The leading dimension of the array Z. LDZ >= max(1,N).
108
109 WORK (workspace) DOUBLE PRECISION array, dimension (5*N)
110
111 IWORK (workspace) INTEGER array, dimension (N)
112
113 IFAIL (output) INTEGER array, dimension (M)
114 On normal exit, all elements of IFAIL are zero.
115 If one or more eigenvectors fail to converge after
116 MAXITS iterations, then their indices are stored in
117 array IFAIL.
118
119 INFO (output) INTEGER
120 = 0: successful exit.
121 < 0: if INFO = -i, the i-th argument had an illegal value
122 > 0: if INFO = i, then i eigenvectors failed to converge
123 in MAXITS iterations. Their indices are stored in
124 array IFAIL.
125
126 Internal Parameters
127 ===================
128
129 MAXITS INTEGER, default = 5
130 The maximum number of iterations performed.
131
132 EXTRA INTEGER, default = 2
133 The number of iterations performed after norm growth
134 criterion is satisfied, should be at least 1.
135
136 =====================================================================
137
138
139 Test the input parameters.
140
141 Parameter adjustments */
142 /* Table of constant values */
143 integer c__2 = 2;
144 integer c__1 = 1;
145 integer c_n1 = -1;
146
147 /* System generated locals */
148 integer z_dim1, z_offset, i__1, i__2, i__3;
149 Treal d__1, d__2, d__3, d__4, d__5;
150 /* Local variables */
151 integer jblk, nblk;
152 integer jmax;
153 integer i__, j;
154 integer iseed[4], gpind, iinfo;
155 integer b1;
156 integer j1;
157 Treal ortol;
158 integer indrv1, indrv2, indrv3, indrv4, indrv5, bn;
159 Treal xj;
160 integer nrmchk;
161 integer blksiz;
162 Treal onenrm, dtpcrt, pertol, scl, eps, sep, nrm, tol;
163 integer its;
164 Treal xjm, ztr, eps1;
165#define z___ref(a_1,a_2) z__[(a_2)*z_dim1 + a_1]
166
167
168 --d__;
169 --e;
170 --w;
171 --iblock;
172 --isplit;
173 z_dim1 = *ldz;
174 z_offset = 1 + z_dim1 * 1;
175 z__ -= z_offset;
176 --work;
177 --iwork;
178 --ifail;
179
180 /* Initialization added by Elias to get rid of compiler warnings. */
181 ortol = dtpcrt = xjm = onenrm = gpind = 0;
182 /* Function Body */
183 *info = 0;
184 i__1 = *m;
185 for (i__ = 1; i__ <= i__1; ++i__) {
186 ifail[i__] = 0;
187/* L10: */
188 }
189
190 if (*n < 0) {
191 *info = -1;
192 } else if (*m < 0 || *m > *n) {
193 *info = -4;
194 } else if (*ldz < maxMACRO(1,*n)) {
195 *info = -9;
196 } else {
197 i__1 = *m;
198 for (j = 2; j <= i__1; ++j) {
199 if (iblock[j] < iblock[j - 1]) {
200 *info = -6;
201 goto L30;
202 }
203 if (iblock[j] == iblock[j - 1] && w[j] < w[j - 1]) {
204 *info = -5;
205 goto L30;
206 }
207/* L20: */
208 }
209L30:
210 ;
211 }
212
213 if (*info != 0) {
214 i__1 = -(*info);
215 template_blas_erbla("STEIN ", &i__1);
216 return 0;
217 }
218
219/* Quick return if possible */
220
221 if (*n == 0 || *m == 0) {
222 return 0;
223 } else if (*n == 1) {
224 z___ref(1, 1) = 1.;
225 return 0;
226 }
227
228/* Get machine constants. */
229
230 eps = template_lapack_lamch("Precision", (Treal)0);
231
232/* Initialize seed for random number generator DLARNV. */
233
234 for (i__ = 1; i__ <= 4; ++i__) {
235 iseed[i__ - 1] = 1;
236/* L40: */
237 }
238
239/* Initialize pointers. */
240
241 indrv1 = 0;
242 indrv2 = indrv1 + *n;
243 indrv3 = indrv2 + *n;
244 indrv4 = indrv3 + *n;
245 indrv5 = indrv4 + *n;
246
247/* Compute eigenvectors of matrix blocks. */
248
249 j1 = 1;
250 i__1 = iblock[*m];
251 for (nblk = 1; nblk <= i__1; ++nblk) {
252
253/* Find starting and ending indices of block nblk. */
254
255 if (nblk == 1) {
256 b1 = 1;
257 } else {
258 b1 = isplit[nblk - 1] + 1;
259 }
260 bn = isplit[nblk];
261 blksiz = bn - b1 + 1;
262 if (blksiz == 1) {
263 goto L60;
264 }
265 gpind = b1;
266
267/* Compute reorthogonalization criterion and stopping criterion. */
268
269 onenrm = (d__1 = d__[b1], absMACRO(d__1)) + (d__2 = e[b1], absMACRO(d__2));
270/* Computing MAX */
271 d__3 = onenrm, d__4 = (d__1 = d__[bn], absMACRO(d__1)) + (d__2 = e[bn - 1],
272 absMACRO(d__2));
273 onenrm = maxMACRO(d__3,d__4);
274 i__2 = bn - 1;
275 for (i__ = b1 + 1; i__ <= i__2; ++i__) {
276/* Computing MAX */
277 d__4 = onenrm, d__5 = (d__1 = d__[i__], absMACRO(d__1)) + (d__2 = e[
278 i__ - 1], absMACRO(d__2)) + (d__3 = e[i__], absMACRO(d__3));
279 onenrm = maxMACRO(d__4,d__5);
280/* L50: */
281 }
282 ortol = onenrm * .001;
283
284 dtpcrt = template_blas_sqrt(.1 / blksiz);
285
286/* Loop through eigenvalues of block nblk. */
287
288L60:
289 jblk = 0;
290 i__2 = *m;
291 for (j = j1; j <= i__2; ++j) {
292 if (iblock[j] != nblk) {
293 j1 = j;
294 goto L160;
295 }
296 ++jblk;
297 xj = w[j];
298
299/* Skip all the work if the block size is one. */
300
301 if (blksiz == 1) {
302 work[indrv1 + 1] = 1.;
303 goto L120;
304 }
305
306/* If eigenvalues j and j-1 are too close, add a relatively
307 small perturbation. */
308
309 if (jblk > 1) {
310 eps1 = (d__1 = eps * xj, absMACRO(d__1));
311 pertol = eps1 * 10.;
312 sep = xj - xjm;
313 if (sep < pertol) {
314 xj = xjm + pertol;
315 }
316 }
317
318 its = 0;
319 nrmchk = 0;
320
321/* Get random starting vector. */
322
323 template_lapack_larnv(&c__2, iseed, &blksiz, &work[indrv1 + 1]);
324
325/* Copy the matrix T so it won't be destroyed in factorization. */
326
327 template_blas_copy(&blksiz, &d__[b1], &c__1, &work[indrv4 + 1], &c__1);
328 i__3 = blksiz - 1;
329 template_blas_copy(&i__3, &e[b1], &c__1, &work[indrv2 + 2], &c__1);
330 i__3 = blksiz - 1;
331 template_blas_copy(&i__3, &e[b1], &c__1, &work[indrv3 + 1], &c__1);
332
333/* Compute LU factors with partial pivoting ( PT = LU ) */
334
335 tol = 0.;
336 template_lapack_lagtf(&blksiz, &work[indrv4 + 1], &xj, &work[indrv2 + 2], &work[
337 indrv3 + 1], &tol, &work[indrv5 + 1], &iwork[1], &iinfo);
338
339/* Update iteration count. */
340
341L70:
342 ++its;
343 if (its > 5) {
344 goto L100;
345 }
346
347/* Normalize and scale the righthand side vector Pb.
348
349 Computing MAX */
350 d__2 = eps, d__3 = (d__1 = work[indrv4 + blksiz], absMACRO(d__1));
351 scl = blksiz * onenrm * maxMACRO(d__2,d__3) / template_blas_asum(&blksiz, &work[
352 indrv1 + 1], &c__1);
353 template_blas_scal(&blksiz, &scl, &work[indrv1 + 1], &c__1);
354
355/* Solve the system LU = Pb. */
356
357 template_lapack_lagts(&c_n1, &blksiz, &work[indrv4 + 1], &work[indrv2 + 2], &
358 work[indrv3 + 1], &work[indrv5 + 1], &iwork[1], &work[
359 indrv1 + 1], &tol, &iinfo);
360
361/* Reorthogonalize by modified Gram-Schmidt if eigenvalues are
362 close enough. */
363
364 if (jblk == 1) {
365 goto L90;
366 }
367 if ((d__1 = xj - xjm, absMACRO(d__1)) > ortol) {
368 gpind = j;
369 }
370 if (gpind != j) {
371 i__3 = j - 1;
372 for (i__ = gpind; i__ <= i__3; ++i__) {
373 ztr = -template_blas_dot(&blksiz, &work[indrv1 + 1], &c__1, &z___ref(
374 b1, i__), &c__1);
375 template_blas_axpy(&blksiz, &ztr, &z___ref(b1, i__), &c__1, &work[
376 indrv1 + 1], &c__1);
377/* L80: */
378 }
379 }
380
381/* Check the infinity norm of the iterate. */
382
383L90:
384 jmax = template_blas_idamax(&blksiz, &work[indrv1 + 1], &c__1);
385 nrm = (d__1 = work[indrv1 + jmax], absMACRO(d__1));
386
387/* Continue for additional iterations after norm reaches
388 stopping criterion. */
389
390 if (nrm < dtpcrt) {
391 goto L70;
392 }
393 ++nrmchk;
394 if (nrmchk < 3) {
395 goto L70;
396 }
397
398 goto L110;
399
400/* If stopping criterion was not satisfied, update info and
401 store eigenvector number in array ifail. */
402
403L100:
404 ++(*info);
405 ifail[*info] = j;
406
407/* Accept iterate as jth eigenvector. */
408
409L110:
410 scl = 1. / template_blas_nrm2(&blksiz, &work[indrv1 + 1], &c__1);
411 jmax = template_blas_idamax(&blksiz, &work[indrv1 + 1], &c__1);
412 if (work[indrv1 + jmax] < 0.) {
413 scl = -scl;
414 }
415 template_blas_scal(&blksiz, &scl, &work[indrv1 + 1], &c__1);
416L120:
417 i__3 = *n;
418 for (i__ = 1; i__ <= i__3; ++i__) {
419 z___ref(i__, j) = 0.;
420/* L130: */
421 }
422 i__3 = blksiz;
423 for (i__ = 1; i__ <= i__3; ++i__) {
424 z___ref(b1 + i__ - 1, j) = work[indrv1 + i__];
425/* L140: */
426 }
427
428/* Save the shift to check eigenvalue spacing at next
429 iteration. */
430
431 xjm = xj;
432
433/* L150: */
434 }
435L160:
436 ;
437 }
438
439 return 0;
440
441/* End of DSTEIN */
442
443} /* dstein_ */
444
445#undef z___ref
446
447
448#endif
Treal template_blas_asum(const integer *n, const Treal *dx, const integer *incx)
Definition: template_blas_asum.h:42
int template_blas_axpy(const integer *n, const Treal *da, const Treal *dx, const integer *incx, Treal *dy, const integer *incy)
Definition: template_blas_axpy.h:43
Treal template_blas_sqrt(Treal x)
int template_blas_erbla(const char *srname, integer *info)
Definition: template_blas_common.cc:146
int integer
Definition: template_blas_common.h:40
#define absMACRO(x)
Definition: template_blas_common.h:47
#define maxMACRO(a, b)
Definition: template_blas_common.h:45
int template_blas_copy(const integer *n, const Treal *dx, const integer *incx, Treal *dy, const integer *incy)
Definition: template_blas_copy.h:42
Treal template_blas_dot(const integer *n, const Treal *dx, const integer *incx, const Treal *dy, const integer *incy)
Definition: template_blas_dot.h:43
integer template_blas_idamax(const integer *n, const Treal *dx, const integer *incx)
Definition: template_blas_idamax.h:42
Treal template_blas_nrm2(const integer *n, const Treal *x, const integer *incx)
Definition: template_blas_nrm2.h:42
int template_blas_scal(const integer *n, const Treal *da, Treal *dx, const integer *incx)
Definition: template_blas_scal.h:43
int template_lapack_lagtf(const integer *n, Treal *a, const Treal *lambda, Treal *b, Treal *c__, const Treal *tol, Treal *d__, integer *in, integer *info)
Definition: template_lapack_lagtf.h:42
int template_lapack_lagts(const integer *job, const integer *n, const Treal *a, const Treal *b, const Treal *c__, const Treal *d__, const integer *in, Treal *y, Treal *tol, integer *info)
Definition: template_lapack_lagts.h:42
Treal template_lapack_lamch(const char *cmach, Treal dummyReal)
Definition: template_lapack_lamch.h:202
int template_lapack_larnv(const integer *idist, integer *iseed, const integer *n, Treal *x)
Definition: template_lapack_larnv.h:42
#define z___ref(a_1, a_2)
int template_lapack_stein(const integer *n, const Treal *d__, const Treal *e, const integer *m, const Treal *w, const integer *iblock, const integer *isplit, Treal *z__, const integer *ldz, Treal *work, integer *iwork, integer *ifail, integer *info)
Definition: template_lapack_stein.h:42