template<class ArrayT>
Definition at line 1012 of file PxDistribution.h. References _myCPU, _nrCPUs, ArrayBD(), ArrayBH(), ArrayBW(), ArrayD(), ArrayH(), ArrayPB(), ArrayW(), PxLclDepth(), PxLclHeight(), and PxLclWidth(). Referenced by PxGatherArray(). 01013 { 01014 /*** MPI-Gather. May be faster than OFT/SBT scatter provided ***/ 01015 /*** above as specific communication subsystem capabilities may ***/ 01016 /*** be incorporated in the implementation. ***/ 01017 /*** ***/ 01018 /*** NOTE: This function can only be used if: ***/ 01019 /*** ***/ 01020 /*** Borders must be sent as well OR all border sizes are 0 AND ***/ 01021 /*** xcpus>=1 AND ArrayH(glob)=1 AND ArrayD(glob)=1, OR ***/ 01022 /*** xcpus=1 AND ycpus>=1 AND ArrayD(glob)=1, OR ***/ 01023 /*** xcpus=1 AND ycpus=1 AND zcpus>=1. ***/ 01024 /*** ***/ 01025 /*** This is due to the lack of possibility to define an array ***/ 01026 /*** of derived datatypes at the root. In principle, we could ***/ 01027 /*** temporarily re-arrange the layout of the array to still be ***/ 01028 /*** able to use the standard MPI functionality. However, this ***/ 01029 /*** requires a costly copy operation which is not implemented. ***/ 01030 01031 int *counts, *disps; 01032 01033 /*** Determine size and position of each local array ***/ 01034 01035 if (_myCPU == root) { 01036 counts = new int[_nrCPUs]; 01037 disps = new int[_nrCPUs]; 01038 int slice = 2*((ArrayW(glob)*ArrayH(glob)*ArrayBD(glob)) + 01039 (ArrayW(glob)*ArrayBH(glob))); 01040 for (int i=0; i<_nrCPUs; i++) { 01041 counts[i] = (PxLclWidth(i) + 2*ArrayBW(glob)) * 01042 (PxLclHeight(i) + 2*ArrayBH(glob)) * 01043 (PxLclDepth(i) + 2*ArrayBD(glob)); 01044 disps[i] = (i==0) ? 0 : disps[i-1] + counts[i-1] - slice; 01045 } 01046 } 01047 01048 /*** Scatter array data ***/ 01049 01050 int sSize = ArrayT::ElemSize()*sizeof(typename ArrayT::StorType); 01051 MPI_Datatype elem; 01052 MPI_Type_contiguous(sSize, MPI_BYTE, &elem); 01053 MPI_Type_commit(&elem); 01054 MPI_Gatherv(ArrayPB(loc), 01055 ArrayW(loc)*ArrayH(loc)*ArrayD(loc), elem, 01056 ArrayPB(glob), counts, disps, elem, root, 01057 MPI_COMM_WORLD); 01058 MPI_Type_free(&elem); 01059 01060 if (_myCPU == root) { 01061 delete counts; 01062 delete disps; 01063 } 01064 }
Here is the call graph for this function:
|