Actual source code: ex9.c
1: static char help[]= "This example shows 1) how to transfer vectors from a parent communicator to vectors on a child communicator and vice versa;\n\
2: 2) how to transfer vectors from a subcommunicator to vectors on another subcommunicator. The two subcommunicators are not\n\
3: required to cover all processes in PETSC_COMM_WORLD; 3) how to copy a vector from a parent communicator to vectors on its child communicators.\n\
4: To run any example with VECCUDA vectors, add -vectype cuda to the argument list\n\n";
6: #include <petscvec.h>
7: int main(int argc,char **argv)
8: {
9: PetscMPIInt nproc,grank,mycolor;
10: PetscInt i,n,N = 20,low,high;
11: MPI_Comm subcomm;
12: Vec x = PETSC_NULL; /* global vectors on PETSC_COMM_WORLD */
13: Vec yg = PETSC_NULL; /* global vectors on PETSC_COMM_WORLD */
14: VecScatter vscat;
15: IS ix,iy;
16: PetscBool iscuda = PETSC_FALSE; /* Option to use VECCUDA vectors */
17: PetscBool optionflag, compareflag;
18: char vectypename[PETSC_MAX_PATH_LEN];
19: PetscBool world2sub = PETSC_FALSE; /* Copy a vector from WORLD to a subcomm? */
20: PetscBool sub2sub = PETSC_FALSE; /* Copy a vector from a subcomm to another subcomm? */
21: PetscBool world2subs = PETSC_FALSE; /* Copy a vector from WORLD to multiple subcomms? */
23: PetscInitialize(&argc,&argv,(char*)0,help);
24: MPI_Comm_size(PETSC_COMM_WORLD,&nproc);
25: MPI_Comm_rank(PETSC_COMM_WORLD,&grank);
29: PetscOptionsGetBool(NULL,0,"-world2sub",&world2sub,NULL);
30: PetscOptionsGetBool(NULL,0,"-sub2sub",&sub2sub,NULL);
31: PetscOptionsGetBool(NULL,0,"-world2subs",&world2subs,NULL);
32: PetscOptionsGetString(NULL,NULL,"-vectype",vectypename,sizeof(vectypename),&optionflag);
33: if (optionflag) {
34: PetscStrncmp(vectypename, "cuda", (size_t)4, &compareflag);
35: if (compareflag) iscuda = PETSC_TRUE;
36: }
38: /* Split PETSC_COMM_WORLD into three subcomms. Each process can only see the subcomm it belongs to */
39: mycolor = grank % 3;
40: MPI_Comm_split(PETSC_COMM_WORLD,mycolor,grank,&subcomm);
42: /*===========================================================================
43: * Transfer a vector x defined on PETSC_COMM_WORLD to a vector y defined on
44: * a subcommunicator of PETSC_COMM_WORLD and vice versa.
45: *===========================================================================*/
46: if (world2sub) {
47: VecCreate(PETSC_COMM_WORLD, &x);
48: VecSetSizes(x, PETSC_DECIDE, N);
49: if (iscuda) {
50: VecSetType(x, VECCUDA);
51: } else {
52: VecSetType(x, VECSTANDARD);
53: }
54: VecSetUp(x);
55: PetscObjectSetName((PetscObject)x,"x_commworld"); /* Give a name to view x clearly */
57: /* Initialize x to [-0.0, -1.0, -2.0, ..., -19.0] */
58: VecGetOwnershipRange(x,&low,&high);
59: for (i=low; i<high; i++) {
60: PetscScalar val = -i;
61: VecSetValue(x,i,val,INSERT_VALUES);
62: }
63: VecAssemblyBegin(x);
64: VecAssemblyEnd(x);
66: /* Transfer x to a vector y only defined on subcomm0 and vice versa */
67: if (mycolor == 0) { /* subcomm0 contains ranks 0, 3, 6, ... in PETSC_COMM_WORLD */
68: Vec y;
69: PetscScalar *yvalue;
70: VecCreate(subcomm, &y);
71: VecSetSizes(y, PETSC_DECIDE, N);
72: if (iscuda) {
73: VecSetType(y, VECCUDA);
74: } else {
75: VecSetType(y, VECSTANDARD);
76: }
77: VecSetUp(y);
78: PetscObjectSetName((PetscObject)y,"y_subcomm_0"); /* Give a name to view y clearly */
79: VecGetLocalSize(y,&n);
80: if (iscuda) {
81: #if defined(PETSC_HAVE_CUDA)
82: VecCUDAGetArray(y,&yvalue);
83: #endif
84: } else {
85: VecGetArray(y,&yvalue);
86: }
87: /* Create yg on PETSC_COMM_WORLD and alias yg with y. They share the memory pointed by yvalue.
88: Note this is a collective call. All processes have to call it and supply consistent N.
89: */
90: if (iscuda) {
91: #if defined(PETSC_HAVE_CUDA)
92: VecCreateMPICUDAWithArray(PETSC_COMM_WORLD,1,n,N,yvalue,&yg);
93: #endif
94: } else {
95: VecCreateMPIWithArray(PETSC_COMM_WORLD,1,n,N,yvalue,&yg);
96: }
98: /* Create an identity map that makes yg[i] = x[i], i=0..N-1 */
99: VecGetOwnershipRange(yg,&low,&high); /* low, high are global indices */
100: ISCreateStride(PETSC_COMM_SELF,high-low,low,1,&ix);
101: ISDuplicate(ix,&iy);
103: /* Union of ix's on subcomm0 covers the full range of [0,N) */
104: VecScatterCreate(x,ix,yg,iy,&vscat);
105: VecScatterBegin(vscat,x,yg,INSERT_VALUES,SCATTER_FORWARD);
106: VecScatterEnd(vscat,x,yg,INSERT_VALUES,SCATTER_FORWARD);
108: /* Once yg got the data from x, we return yvalue to y so that we can use y in other operations.
109: VecGetArray must be paired with VecRestoreArray.
110: */
111: if (iscuda) {
112: #if defined(PETSC_HAVE_CUDA)
113: VecCUDARestoreArray(y,&yvalue);
114: #endif
115: } else {
116: VecRestoreArray(y,&yvalue);
117: }
119: /* Libraries on subcomm0 can safely use y now, for example, view and scale it */
120: VecView(y,PETSC_VIEWER_STDOUT_(subcomm));
121: VecScale(y,2.0);
123: /* Send the new y back to x */
124: VecGetArray(y,&yvalue); /* If VecScale is done on GPU, Petsc will prepare a valid yvalue for access */
125: /* Supply new yvalue to yg without memory copying */
126: VecPlaceArray(yg,yvalue);
127: VecScatterBegin(vscat,yg,x,INSERT_VALUES,SCATTER_REVERSE);
128: VecScatterEnd(vscat,yg,x,INSERT_VALUES,SCATTER_REVERSE);
129: VecResetArray(yg);
130: if (iscuda) {
131: #if defined(PETSC_HAVE_CUDA)
132: VecCUDARestoreArray(y,&yvalue);
133: #endif
134: } else {
135: VecRestoreArray(y,&yvalue);
136: }
138: VecDestroy(&y);
139: } else {
140: /* Ranks outside of subcomm0 do not supply values to yg */
141: if (iscuda) {
142: #if defined(PETSC_HAVE_CUDA)
143: VecCreateMPICUDAWithArray(PETSC_COMM_WORLD,1,0/*n*/,N,NULL,&yg);
144: #endif
145: } else {
146: VecCreateMPIWithArray(PETSC_COMM_WORLD,1,0/*n*/,N,NULL,&yg);
147: }
149: /* Ranks in subcomm0 already specified the full range of the identity map. The remaining
150: ranks just need to create empty ISes to cheat VecScatterCreate.
151: */
152: ISCreateStride(PETSC_COMM_SELF,0,0,1,&ix);
153: ISDuplicate(ix,&iy);
155: VecScatterCreate(x,ix,yg,iy,&vscat);
156: VecScatterBegin(vscat,x,yg,INSERT_VALUES,SCATTER_FORWARD);
157: VecScatterEnd(vscat,x,yg,INSERT_VALUES,SCATTER_FORWARD);
159: /* Send the new y back to x. Ranks outside of subcomm0 actually have nothing to send.
160: But they have to call VecScatterBegin/End since these routines are collective.
161: */
162: VecScatterBegin(vscat,yg,x,INSERT_VALUES,SCATTER_REVERSE);
163: VecScatterEnd(vscat,yg,x,INSERT_VALUES,SCATTER_REVERSE);
164: }
166: VecView(x,PETSC_VIEWER_STDOUT_WORLD);
167: ISDestroy(&ix);
168: ISDestroy(&iy);
169: VecDestroy(&x);
170: VecDestroy(&yg);
171: VecScatterDestroy(&vscat);
172: } /* world2sub */
174: /*===========================================================================
175: * Transfer a vector x defined on subcomm0 to a vector y defined on
176: * subcomm1. The two subcomms are not overlapping and their union is
177: * not necessarily equal to PETSC_COMM_WORLD.
178: *===========================================================================*/
179: if (sub2sub) {
180: if (mycolor == 0) {
181: /* Intentionally declare N as a local variable so that processes in subcomm1 do not know its value */
182: PetscInt n,N = 22;
183: Vec x,xg,yg;
184: IS ix,iy;
185: VecScatter vscat;
186: const PetscScalar *xvalue;
187: MPI_Comm intercomm,parentcomm;
188: PetscMPIInt lrank;
190: MPI_Comm_rank(subcomm,&lrank);
191: /* x is on subcomm */
192: VecCreate(subcomm, &x);
193: VecSetSizes(x, PETSC_DECIDE, N);
194: if (iscuda) {
195: VecSetType(x, VECCUDA);
196: } else {
197: VecSetType(x, VECSTANDARD);
198: }
199: VecSetUp(x);
200: VecGetOwnershipRange(x,&low,&high);
202: /* initialize x = [0.0, 1.0, 2.0, ..., 21.0] */
203: for (i=low; i<high; i++) {
204: PetscScalar val = i;
205: VecSetValue(x,i,val,INSERT_VALUES);
206: }
207: VecAssemblyBegin(x);
208: VecAssemblyEnd(x);
210: MPI_Intercomm_create(subcomm,0,PETSC_COMM_WORLD/*peer_comm*/,1,100/*tag*/,&intercomm);
212: /* Tell rank 0 of subcomm1 the global size of x */
213: if (!lrank) MPI_Send(&N,1,MPIU_INT,0/*receiver's rank in remote comm, i.e., subcomm1*/,200/*tag*/,intercomm);
215: /* Create an intracomm Petsc can work on. Ranks in subcomm0 are ordered before ranks in subcomm1 in parentcomm.
216: But this order actually does not matter, since what we care is vector y, which is defined on subcomm1.
217: */
218: MPI_Intercomm_merge(intercomm,0/*low*/,&parentcomm);
220: /* Create a vector xg on parentcomm, which shares memory with x */
221: VecGetLocalSize(x,&n);
222: if (iscuda) {
223: #if defined(PETSC_HAVE_CUDA)
224: VecCUDAGetArrayRead(x,&xvalue);
225: VecCreateMPICUDAWithArray(parentcomm,1,n,N,xvalue,&xg);
226: #endif
227: } else {
228: VecGetArrayRead(x,&xvalue);
229: VecCreateMPIWithArray(parentcomm,1,n,N,xvalue,&xg);
230: }
232: /* Ranks in subcomm 0 have nothing on yg, so they simply have n=0, array=NULL */
233: if (iscuda) {
234: #if defined(PETSC_HAVE_CUDA)
235: VecCreateMPICUDAWithArray(parentcomm,1,0/*n*/,N,NULL/*array*/,&yg);
236: #endif
237: } else {
238: VecCreateMPIWithArray(parentcomm,1,0/*n*/,N,NULL/*array*/,&yg);
239: }
241: /* Create the vecscatter, which does identity map by setting yg[i] = xg[i], i=0..N-1. */
242: VecGetOwnershipRange(xg,&low,&high); /* low, high are global indices of xg */
243: ISCreateStride(PETSC_COMM_SELF,high-low,low,1,&ix);
244: ISDuplicate(ix,&iy);
245: VecScatterCreate(xg,ix,yg,iy,&vscat);
247: /* Scatter values from xg to yg */
248: VecScatterBegin(vscat,xg,yg,INSERT_VALUES,SCATTER_FORWARD);
249: VecScatterEnd(vscat,xg,yg,INSERT_VALUES,SCATTER_FORWARD);
251: /* After the VecScatter is done, xg is idle so we can safely return xvalue to x */
252: if (iscuda) {
253: #if defined(PETSC_HAVE_CUDA)
254: VecCUDARestoreArrayRead(x,&xvalue);
255: #endif
256: } else {
257: VecRestoreArrayRead(x,&xvalue);
258: }
259: VecDestroy(&x);
260: ISDestroy(&ix);
261: ISDestroy(&iy);
262: VecDestroy(&xg);
263: VecDestroy(&yg);
264: VecScatterDestroy(&vscat);
265: MPI_Comm_free(&intercomm);
266: MPI_Comm_free(&parentcomm);
267: } else if (mycolor == 1) { /* subcomm 1, containing ranks 1, 4, 7, ... in PETSC_COMM_WORLD */
268: PetscInt n,N;
269: Vec y,xg,yg;
270: IS ix,iy;
271: VecScatter vscat;
272: PetscScalar *yvalue;
273: MPI_Comm intercomm,parentcomm;
274: PetscMPIInt lrank;
276: MPI_Comm_rank(subcomm,&lrank);
277: MPI_Intercomm_create(subcomm,0,PETSC_COMM_WORLD/*peer_comm*/,0/*remote_leader*/,100/*tag*/,&intercomm);
279: /* Two rank-0 are talking */
280: if (!lrank) MPI_Recv(&N,1,MPIU_INT,0/*sender's rank in remote comm, i.e. subcomm0*/,200/*tag*/,intercomm,MPI_STATUS_IGNORE);
281: /* Rank 0 of subcomm1 bcasts N to its members */
282: MPI_Bcast(&N,1,MPIU_INT,0/*local root*/,subcomm);
284: /* Create a intracomm Petsc can work on */
285: MPI_Intercomm_merge(intercomm,1/*high*/,&parentcomm);
287: /* Ranks in subcomm1 have nothing on xg, so they simply have n=0, array=NULL.*/
288: if (iscuda) {
289: #if defined(PETSC_HAVE_CUDA)
290: VecCreateMPICUDAWithArray(parentcomm,1/*bs*/,0/*n*/,N,NULL/*array*/,&xg);
291: #endif
292: } else {
293: VecCreateMPIWithArray(parentcomm,1/*bs*/,0/*n*/,N,NULL/*array*/,&xg);
294: }
296: VecCreate(subcomm, &y);
297: VecSetSizes(y, PETSC_DECIDE, N);
298: if (iscuda) {
299: VecSetType(y, VECCUDA);
300: } else {
301: VecSetType(y, VECSTANDARD);
302: }
303: VecSetUp(y);
305: PetscObjectSetName((PetscObject)y,"y_subcomm_1"); /* Give a name to view y clearly */
306: VecGetLocalSize(y,&n);
307: if (iscuda) {
308: #if defined(PETSC_HAVE_CUDA)
309: VecCUDAGetArray(y,&yvalue);
310: #endif
311: } else {
312: VecGetArray(y,&yvalue);
313: }
314: /* Create a vector yg on parentcomm, which shares memory with y. xg and yg must be
315: created in the same order in subcomm0/1. For example, we can not reverse the order of
316: creating xg and yg in subcomm1.
317: */
318: if (iscuda) {
319: #if defined(PETSC_HAVE_CUDA)
320: VecCreateMPICUDAWithArray(parentcomm,1/*bs*/,n,N,yvalue,&yg);
321: #endif
322: } else {
323: VecCreateMPIWithArray(parentcomm,1/*bs*/,n,N,yvalue,&yg);
324: }
326: /* Ranks in subcomm0 already specified the full range of the identity map.
327: ranks in subcomm1 just need to create empty ISes to cheat VecScatterCreate.
328: */
329: ISCreateStride(PETSC_COMM_SELF,0,0,1,&ix);
330: ISDuplicate(ix,&iy);
331: VecScatterCreate(xg,ix,yg,iy,&vscat);
333: /* Scatter values from xg to yg */
334: VecScatterBegin(vscat,xg,yg,INSERT_VALUES,SCATTER_FORWARD);
335: VecScatterEnd(vscat,xg,yg,INSERT_VALUES,SCATTER_FORWARD);
337: /* After the VecScatter is done, values in yg are available. y is our interest, so we return yvalue to y */
338: if (iscuda) {
339: #if defined(PETSC_HAVE_CUDA)
340: VecCUDARestoreArray(y,&yvalue);
341: #endif
342: } else {
343: VecRestoreArray(y,&yvalue);
344: }
346: /* Libraries on subcomm1 can safely use y now, for example, view it */
347: VecView(y,PETSC_VIEWER_STDOUT_(subcomm));
349: VecDestroy(&y);
350: ISDestroy(&ix);
351: ISDestroy(&iy);
352: VecDestroy(&xg);
353: VecDestroy(&yg);
354: VecScatterDestroy(&vscat);
355: MPI_Comm_free(&intercomm);
356: MPI_Comm_free(&parentcomm);
357: } else if (mycolor == 2) { /* subcomm2 */
358: /* Processes in subcomm2 do not participate in the VecScatter. They can freely do unrelated things on subcomm2 */
359: }
360: } /* sub2sub */
362: /*===========================================================================
363: * Copy a vector x defined on PETSC_COMM_WORLD to vectors y defined on
364: * every subcommunicator of PETSC_COMM_WORLD. We could use multiple transfers
365: * as we did in case 1, but that is not efficient. Instead, we use one vecscatter
366: * to achieve that.
367: *===========================================================================*/
368: if (world2subs) {
369: Vec y;
370: PetscInt n,N=15,xstart,ystart,low,high;
371: PetscScalar *yvalue;
373: /* Initialize x to [0, 1, 2, 3, ..., N-1] */
374: VecCreate(PETSC_COMM_WORLD, &x);
375: VecSetSizes(x, PETSC_DECIDE, N);
376: if (iscuda) {
377: VecSetType(x, VECCUDA);
378: } else {
379: VecSetType(x, VECSTANDARD);
380: }
381: VecSetUp(x);
382: VecGetOwnershipRange(x,&low,&high);
383: for (i=low; i<high; i++) VecSetValue(x,i,(PetscScalar)i,INSERT_VALUES);
384: VecAssemblyBegin(x);
385: VecAssemblyEnd(x);
387: /* Every subcomm has a y as long as x */
388: VecCreate(subcomm, &y);
389: VecSetSizes(y, PETSC_DECIDE, N);
390: if (iscuda) {
391: VecSetType(y, VECCUDA);
392: } else {
393: VecSetType(y, VECSTANDARD);
394: }
395: VecSetUp(y);
396: VecGetLocalSize(y,&n);
398: /* Create a global vector yg on PETSC_COMM_WORLD using y's memory. yg's global size = N*(number of subcommunicators).
399: Eeach rank in subcomms contributes a piece to construct the global yg. Keep in mind that pieces from a subcomm are not
400: necessarily consecutive in yg. That depends on how PETSC_COMM_WORLD is split. In our case, subcomm0 is made of rank
401: 0, 3, 6 etc from PETSC_COMM_WORLD. So subcomm0's pieces are interleaved with pieces from other subcomms in yg.
402: */
403: if (iscuda) {
404: #if defined(PETSC_HAVE_CUDA)
405: VecCUDAGetArray(y,&yvalue);
406: VecCreateMPICUDAWithArray(PETSC_COMM_WORLD,1,n,PETSC_DECIDE,yvalue,&yg);
407: #endif
408: } else {
409: VecGetArray(y,&yvalue);
410: VecCreateMPIWithArray(PETSC_COMM_WORLD,1,n,PETSC_DECIDE,yvalue,&yg);
411: }
412: PetscObjectSetName((PetscObject)yg,"yg_on_subcomms"); /* Give a name to view yg clearly */
414: /* The following two lines are key. From xstart, we know where to pull entries from x. Note that we get xstart from y,
415: since first entry of y on this rank is from x[xstart]. From ystart, we know where ot put entries to yg.
416: */
417: VecGetOwnershipRange(y,&xstart,NULL);
418: VecGetOwnershipRange(yg,&ystart,NULL);
420: ISCreateStride(PETSC_COMM_SELF,n,xstart,1,&ix);
421: ISCreateStride(PETSC_COMM_SELF,n,ystart,1,&iy);
422: VecScatterCreate(x,ix,yg,iy,&vscat);
423: VecScatterBegin(vscat,x,yg,INSERT_VALUES,SCATTER_FORWARD);
424: VecScatterEnd(vscat,x,yg,INSERT_VALUES,SCATTER_FORWARD);
426: /* View yg on PETSC_COMM_WORLD before destroying it. We shall see the interleaving effect in output. */
427: VecView(yg,PETSC_VIEWER_STDOUT_WORLD);
428: VecDestroy(&yg);
430: /* Restory yvalue so that processes in subcomm can use y from now on. */
431: if (iscuda) {
432: #if defined(PETSC_HAVE_CUDA)
433: VecCUDARestoreArray(y,&yvalue);
434: #endif
435: } else {
436: VecRestoreArray(y,&yvalue);
437: }
438: VecScale(y,3.0);
440: ISDestroy(&ix); /* One can also destroy ix, iy immediately after VecScatterCreate() */
441: ISDestroy(&iy);
442: VecDestroy(&x);
443: VecDestroy(&y);
444: VecScatterDestroy(&vscat);
445: } /* world2subs */
447: MPI_Comm_free(&subcomm);
448: PetscFinalize();
449: return 0;
450: }
452: /*TEST
454: build:
455: requires: !defined(PETSC_HAVE_MPIUNI)
457: testset:
458: nsize: 7
460: test:
461: suffix: 1
462: args: -world2sub
464: test:
465: suffix: 2
466: args: -sub2sub
467: # deadlocks with NECMPI and INTELMPI (20210400300)
468: requires: !defined(PETSC_HAVE_NECMPI) !defined(PETSC_HAVE_I_MPI_NUMVERSION)
470: test:
471: suffix: 3
472: args: -world2subs
474: test:
475: suffix: 4
476: args: -world2sub -vectype cuda
477: requires: cuda
479: test:
480: suffix: 5
481: args: -sub2sub -vectype cuda
482: requires: cuda
484: test:
485: suffix: 6
486: args: -world2subs -vectype cuda
487: requires: cuda
489: test:
490: suffix: 7
491: args: -world2sub -sf_type neighbor
492: output_file: output/ex9_1.out
493: # OpenMPI has a bug wrt MPI_Neighbor_alltoallv etc (https://github.com/open-mpi/ompi/pull/6782). Once the patch is in, we can remove !define(PETSC_HAVE_OMPI_MAJOR_VERSION)
494: # segfaults with NECMPI
495: requires: defined(PETSC_HAVE_MPI_NEIGHBORHOOD_COLLECTIVES) !defined(PETSC_HAVE_OMPI_MAJOR_VERSION) !defined(PETSC_HAVE_NECMPI)
496: TEST*/