Actual source code: pipecr.c
1: #include <petsc/private/kspimpl.h>
3: /*
4: KSPSetUp_PIPECR - Sets up the workspace needed by the PIPECR method.
6: This is called once, usually automatically by KSPSolve() or KSPSetUp()
7: but can be called directly by KSPSetUp()
8: */
9: static PetscErrorCode KSPSetUp_PIPECR(KSP ksp)
10: {
11: /* get work vectors needed by PIPECR */
12: KSPSetWorkVecs(ksp,7);
13: return 0;
14: }
16: /*
17: KSPSolve_PIPECR - This routine actually applies the pipelined conjugate residual method
18: */
19: static PetscErrorCode KSPSolve_PIPECR(KSP ksp)
20: {
21: PetscInt i;
22: PetscScalar alpha=0.0,beta=0.0,gamma,gammaold=0.0,delta;
23: PetscReal dp = 0.0;
24: Vec X,B,Z,P,W,Q,U,M,N;
25: Mat Amat,Pmat;
26: PetscBool diagonalscale;
28: PCGetDiagonalScale(ksp->pc,&diagonalscale);
31: X = ksp->vec_sol;
32: B = ksp->vec_rhs;
33: M = ksp->work[0];
34: Z = ksp->work[1];
35: P = ksp->work[2];
36: N = ksp->work[3];
37: W = ksp->work[4];
38: Q = ksp->work[5];
39: U = ksp->work[6];
41: PCGetOperators(ksp->pc,&Amat,&Pmat);
43: ksp->its = 0;
44: /* we don't have an R vector, so put the (unpreconditioned) residual in w for now */
45: if (!ksp->guess_zero) {
46: KSP_MatMult(ksp,Amat,X,W); /* w <- b - Ax */
47: VecAYPX(W,-1.0,B);
48: } else {
49: VecCopy(B,W); /* w <- b (x is 0) */
50: }
51: KSP_PCApply(ksp,W,U); /* u <- Bw */
53: switch (ksp->normtype) {
54: case KSP_NORM_PRECONDITIONED:
55: VecNormBegin(U,NORM_2,&dp); /* dp <- u'*u = e'*A'*B'*B*A'*e' */
56: PetscCommSplitReductionBegin(PetscObjectComm((PetscObject)U));
57: KSP_MatMult(ksp,Amat,U,W); /* w <- Au */
58: VecNormEnd(U,NORM_2,&dp);
59: break;
60: case KSP_NORM_NONE:
61: KSP_MatMult(ksp,Amat,U,W);
62: dp = 0.0;
63: break;
64: default: SETERRQ(PetscObjectComm((PetscObject)ksp),PETSC_ERR_SUP,"%s",KSPNormTypes[ksp->normtype]);
65: }
66: KSPLogResidualHistory(ksp,dp);
67: KSPMonitor(ksp,0,dp);
68: ksp->rnorm = dp;
69: (*ksp->converged)(ksp,0,dp,&ksp->reason,ksp->cnvP); /* test for convergence */
70: if (ksp->reason) return 0;
72: i = 0;
73: do {
74: KSP_PCApply(ksp,W,M); /* m <- Bw */
76: if (i > 0 && ksp->normtype == KSP_NORM_PRECONDITIONED) {
77: VecNormBegin(U,NORM_2,&dp);
78: }
79: VecDotBegin(W,U,&gamma);
80: VecDotBegin(M,W,&delta);
81: PetscCommSplitReductionBegin(PetscObjectComm((PetscObject)U));
83: KSP_MatMult(ksp,Amat,M,N); /* n <- Am */
85: if (i > 0 && ksp->normtype == KSP_NORM_PRECONDITIONED) {
86: VecNormEnd(U,NORM_2,&dp);
87: }
88: VecDotEnd(W,U,&gamma);
89: VecDotEnd(M,W,&delta);
91: if (i > 0) {
92: if (ksp->normtype == KSP_NORM_NONE) dp = 0.0;
93: ksp->rnorm = dp;
94: KSPLogResidualHistory(ksp,dp);
95: KSPMonitor(ksp,i,dp);
96: (*ksp->converged)(ksp,i,dp,&ksp->reason,ksp->cnvP);
97: if (ksp->reason) return 0;
98: }
100: if (i == 0) {
101: alpha = gamma / delta;
102: VecCopy(N,Z); /* z <- n */
103: VecCopy(M,Q); /* q <- m */
104: VecCopy(U,P); /* p <- u */
105: } else {
106: beta = gamma / gammaold;
107: alpha = gamma / (delta - beta / alpha * gamma);
108: VecAYPX(Z,beta,N); /* z <- n + beta * z */
109: VecAYPX(Q,beta,M); /* q <- m + beta * q */
110: VecAYPX(P,beta,U); /* p <- u + beta * p */
111: }
112: VecAXPY(X, alpha,P); /* x <- x + alpha * p */
113: VecAXPY(U,-alpha,Q); /* u <- u - alpha * q */
114: VecAXPY(W,-alpha,Z); /* w <- w - alpha * z */
115: gammaold = gamma;
116: i++;
117: ksp->its = i;
119: /* if (i%50 == 0) { */
120: /* KSP_MatMult(ksp,Amat,X,W); /\* w <- b - Ax *\/ */
121: /* VecAYPX(W,-1.0,B); */
122: /* KSP_PCApply(ksp,W,U); */
123: /* KSP_MatMult(ksp,Amat,U,W); */
124: /* } */
126: } while (i<=ksp->max_it);
127: if (i >= ksp->max_it) ksp->reason = KSP_DIVERGED_ITS;
128: return 0;
129: }
131: /*MC
132: KSPPIPECR - Pipelined conjugate residual method
134: This method has only a single non-blocking reduction per iteration, compared to 2 blocking for standard CR. The
135: non-blocking reduction is overlapped by the matrix-vector product, but not the preconditioner application.
137: See also KSPPIPECG, where the reduction is only overlapped with the matrix-vector product.
139: Level: intermediate
141: Notes:
142: MPI configuration may be necessary for reductions to make asynchronous progress, which is important for performance of pipelined methods.
143: See the FAQ on the PETSc website for details.
145: Contributed by:
146: Pieter Ghysels, Universiteit Antwerpen, Intel Exascience lab Flanders
148: Reference:
149: P. Ghysels and W. Vanroose, "Hiding global synchronization latency in the preconditioned Conjugate Gradient algorithm",
150: Submitted to Parallel Computing, 2012.
152: .seealso: KSPCreate(), KSPSetType(), KSPPIPECG, KSPGROPPCG, KSPPGMRES, KSPCG, KSPCGUseSingleReduction()
153: M*/
155: PETSC_EXTERN PetscErrorCode KSPCreate_PIPECR(KSP ksp)
156: {
157: KSPSetSupportedNorm(ksp,KSP_NORM_PRECONDITIONED,PC_LEFT,2);
158: KSPSetSupportedNorm(ksp,KSP_NORM_NONE,PC_LEFT,1);
160: ksp->ops->setup = KSPSetUp_PIPECR;
161: ksp->ops->solve = KSPSolve_PIPECR;
162: ksp->ops->destroy = KSPDestroyDefault;
163: ksp->ops->view = NULL;
164: ksp->ops->setfromoptions = NULL;
165: ksp->ops->buildsolution = KSPBuildSolutionDefault;
166: ksp->ops->buildresidual = KSPBuildResidualDefault;
167: return 0;
168: }