ergo
template_lapack_geqrf.h
Go to the documentation of this file.
1/* Ergo, version 3.8, a program for linear scaling electronic structure
2 * calculations.
3 * Copyright (C) 2019 Elias Rudberg, Emanuel H. Rubensson, Pawel Salek,
4 * and Anastasia Kruchinina.
5 *
6 * This program is free software: you can redistribute it and/or modify
7 * it under the terms of the GNU General Public License as published by
8 * the Free Software Foundation, either version 3 of the License, or
9 * (at your option) any later version.
10 *
11 * This program is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 * GNU General Public License for more details.
15 *
16 * You should have received a copy of the GNU General Public License
17 * along with this program. If not, see <http://www.gnu.org/licenses/>.
18 *
19 * Primary academic reference:
20 * Ergo: An open-source program for linear-scaling electronic structure
21 * calculations,
22 * Elias Rudberg, Emanuel H. Rubensson, Pawel Salek, and Anastasia
23 * Kruchinina,
24 * SoftwareX 7, 107 (2018),
25 * <http://dx.doi.org/10.1016/j.softx.2018.03.005>
26 *
27 * For further information about Ergo, see <http://www.ergoscf.org>.
28 */
29
30 /* This file belongs to the template_lapack part of the Ergo source
31 * code. The source files in the template_lapack directory are modified
32 * versions of files originally distributed as CLAPACK, see the
33 * Copyright/license notice in the file template_lapack/COPYING.
34 */
35
36
37#ifndef TEMPLATE_LAPACK_GEQRF_HEADER
38#define TEMPLATE_LAPACK_GEQRF_HEADER
39
40
41template<class Treal>
42int template_lapack_geqrf(const integer *m, const integer *n, Treal *a, const integer *
43 lda, Treal *tau, Treal *work, const integer *lwork, integer *info)
44{
45/* -- LAPACK routine (version 3.0) --
46 Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
47 Courant Institute, Argonne National Lab, and Rice University
48 June 30, 1999
49
50
51 Purpose
52 =======
53
54 DGEQRF computes a QR factorization of a real M-by-N matrix A:
55 A = Q * R.
56
57 Arguments
58 =========
59
60 M (input) INTEGER
61 The number of rows of the matrix A. M >= 0.
62
63 N (input) INTEGER
64 The number of columns of the matrix A. N >= 0.
65
66 A (input/output) DOUBLE PRECISION array, dimension (LDA,N)
67 On entry, the M-by-N matrix A.
68 On exit, the elements on and above the diagonal of the array
69 contain the min(M,N)-by-N upper trapezoidal matrix R (R is
70 upper triangular if m >= n); the elements below the diagonal,
71 with the array TAU, represent the orthogonal matrix Q as a
72 product of min(m,n) elementary reflectors (see Further
73 Details).
74
75 LDA (input) INTEGER
76 The leading dimension of the array A. LDA >= max(1,M).
77
78 TAU (output) DOUBLE PRECISION array, dimension (min(M,N))
79 The scalar factors of the elementary reflectors (see Further
80 Details).
81
82 WORK (workspace/output) DOUBLE PRECISION array, dimension (LWORK)
83 On exit, if INFO = 0, WORK(1) returns the optimal LWORK.
84
85 LWORK (input) INTEGER
86 The dimension of the array WORK. LWORK >= max(1,N).
87 For optimum performance LWORK >= N*NB, where NB is
88 the optimal blocksize.
89
90 If LWORK = -1, then a workspace query is assumed; the routine
91 only calculates the optimal size of the WORK array, returns
92 this value as the first entry of the WORK array, and no error
93 message related to LWORK is issued by XERBLA.
94
95 INFO (output) INTEGER
96 = 0: successful exit
97 < 0: if INFO = -i, the i-th argument had an illegal value
98
99 Further Details
100 ===============
101
102 The matrix Q is represented as a product of elementary reflectors
103
104 Q = H(1) H(2) . . . H(k), where k = min(m,n).
105
106 Each H(i) has the form
107
108 H(i) = I - tau * v * v'
109
110 where tau is a real scalar, and v is a real vector with
111 v(1:i-1) = 0 and v(i) = 1; v(i+1:m) is stored on exit in A(i+1:m,i),
112 and tau in TAU(i).
113
114 =====================================================================
115
116
117 Test the input arguments
118
119 Parameter adjustments */
120 /* Table of constant values */
121 integer c__1 = 1;
122 integer c_n1 = -1;
123 integer c__3 = 3;
124 integer c__2 = 2;
125
126 /* System generated locals */
127 integer a_dim1, a_offset, i__1, i__2, i__3, i__4;
128 /* Local variables */
129 integer i__, k, nbmin, iinfo;
130 integer ib, nb;
131 integer nx;
132 integer ldwork, lwkopt;
133 logical lquery;
134 integer iws;
135#define a_ref(a_1,a_2) a[(a_2)*a_dim1 + a_1]
136
137
138 a_dim1 = *lda;
139 a_offset = 1 + a_dim1 * 1;
140 a -= a_offset;
141 --tau;
142 --work;
143
144 /* Function Body */
145 *info = 0;
146 nb = template_lapack_ilaenv(&c__1, "DGEQRF", " ", m, n, &c_n1, &c_n1, (ftnlen)6, (ftnlen)
147 1);
148 lwkopt = *n * nb;
149 work[1] = (Treal) lwkopt;
150 lquery = *lwork == -1;
151 if (*m < 0) {
152 *info = -1;
153 } else if (*n < 0) {
154 *info = -2;
155 } else if (*lda < maxMACRO(1,*m)) {
156 *info = -4;
157 } else if (*lwork < maxMACRO(1,*n) && ! lquery) {
158 *info = -7;
159 }
160 if (*info != 0) {
161 i__1 = -(*info);
162 template_blas_erbla("GEQRF ", &i__1);
163 return 0;
164 } else if (lquery) {
165 return 0;
166 }
167
168/* Quick return if possible */
169
170 k = minMACRO(*m,*n);
171 if (k == 0) {
172 work[1] = 1.;
173 return 0;
174 }
175
176 nbmin = 2;
177 nx = 0;
178 iws = *n;
179 if (nb > 1 && nb < k) {
180
181/* Determine when to cross over from blocked to unblocked code.
182
183 Computing MAX */
184 i__1 = 0, i__2 = template_lapack_ilaenv(&c__3, "DGEQRF", " ", m, n, &c_n1, &c_n1, (
185 ftnlen)6, (ftnlen)1);
186 nx = maxMACRO(i__1,i__2);
187 if (nx < k) {
188
189/* Determine if workspace is large enough for blocked code. */
190
191 ldwork = *n;
192 iws = ldwork * nb;
193 if (*lwork < iws) {
194
195/* Not enough workspace to use optimal NB: reduce NB and
196 determine the minimum value of NB. */
197
198 nb = *lwork / ldwork;
199/* Computing MAX */
200 i__1 = 2, i__2 = template_lapack_ilaenv(&c__2, "DGEQRF", " ", m, n, &c_n1, &
201 c_n1, (ftnlen)6, (ftnlen)1);
202 nbmin = maxMACRO(i__1,i__2);
203 }
204 }
205 }
206
207 if (nb >= nbmin && nb < k && nx < k) {
208
209/* Use blocked code initially */
210
211 i__1 = k - nx;
212 i__2 = nb;
213 for (i__ = 1; i__2 < 0 ? i__ >= i__1 : i__ <= i__1; i__ += i__2) {
214/* Computing MIN */
215 i__3 = k - i__ + 1;
216 ib = minMACRO(i__3,nb);
217
218/* Compute the QR factorization of the current block
219 A(i:m,i:i+ib-1) */
220
221 i__3 = *m - i__ + 1;
222 template_lapack_geqr2(&i__3, &ib, &a_ref(i__, i__), lda, &tau[i__], &work[1], &
223 iinfo);
224 if (i__ + ib <= *n) {
225
226/* Form the triangular factor of the block reflector
227 H = H(i) H(i+1) . . . H(i+ib-1) */
228
229 i__3 = *m - i__ + 1;
230 template_lapack_larft("Forward", "Columnwise", &i__3, &ib, &a_ref(i__, i__),
231 lda, &tau[i__], &work[1], &ldwork);
232
233/* Apply H' to A(i:m,i+ib:n) from the left */
234
235 i__3 = *m - i__ + 1;
236 i__4 = *n - i__ - ib + 1;
237 template_lapack_larfb("Left", "Transpose", "Forward", "Columnwise", &i__3, &
238 i__4, &ib, &a_ref(i__, i__), lda, &work[1], &ldwork, &
239 a_ref(i__, i__ + ib), lda, &work[ib + 1], &ldwork);
240 }
241/* L10: */
242 }
243 } else {
244 i__ = 1;
245 }
246
247/* Use unblocked code to factor the last or only block. */
248
249 if (i__ <= k) {
250 i__2 = *m - i__ + 1;
251 i__1 = *n - i__ + 1;
252 template_lapack_geqr2(&i__2, &i__1, &a_ref(i__, i__), lda, &tau[i__], &work[1], &
253 iinfo);
254 }
255
256 work[1] = (Treal) iws;
257 return 0;
258
259/* End of DGEQRF */
260
261} /* dgeqrf_ */
262
263#undef a_ref
264
265
266#endif
int template_blas_erbla(const char *srname, integer *info)
Definition: template_blas_common.cc:146
int integer
Definition: template_blas_common.h:40
int ftnlen
Definition: template_blas_common.h:42
#define minMACRO(a, b)
Definition: template_blas_common.h:46
#define maxMACRO(a, b)
Definition: template_blas_common.h:45
bool logical
Definition: template_blas_common.h:41
integer template_lapack_ilaenv(const integer *ispec, const char *name__, const char *opts, const integer *n1, const integer *n2, const integer *n3, const integer *n4, ftnlen name_len, ftnlen opts_len)
Definition: template_lapack_common.cc:281
int template_lapack_geqr2(const integer *m, const integer *n, Treal *a, const integer *lda, Treal *tau, Treal *work, integer *info)
Definition: template_lapack_geqr2.h:42
#define a_ref(a_1, a_2)
int template_lapack_geqrf(const integer *m, const integer *n, Treal *a, const integer *lda, Treal *tau, Treal *work, const integer *lwork, integer *info)
Definition: template_lapack_geqrf.h:42
int template_lapack_larfb(const char *side, const char *trans, const char *direct, const char *storev, const integer *m, const integer *n, const integer *k, const Treal *v, const integer *ldv, const Treal *t, const integer *ldt, Treal *c__, const integer *ldc, Treal *work, const integer *ldwork)
Definition: template_lapack_larfb.h:42
int template_lapack_larft(const char *direct, const char *storev, const integer *n, const integer *k, Treal *v, const integer *ldv, const Treal *tau, Treal *t, const integer *ldt)
Definition: template_lapack_larft.h:42