template<class ArrayT>
Definition at line 900 of file PxDistribution.h. References _logCPUs, _maxCPUs, _myCPU, _nrCPUs, ArrayBD(), ArrayBH(), ArrayBW(), ArrayCPB(), ArrayD(), ArrayH(), ArrayPB(), ArrayW(), GATH_TAG, PxGetSBTorder(), PxLclArrayCopy(), PxLclStart(), PxLineRange(), and TO. Referenced by PxGatherArray(). 00901 { 00902 /*** SBT (Spanning Binomial Tree) or hypercube gather. Its ***/ 00903 /*** use is restricted slightly for implementation reasons. ***/ 00904 /*** NOTE: should work correctly, except: ***/ 00905 /*** ***/ 00906 /*** - IF ("zCPUs" == 1) AND ("yCPUs" > 1) THEN ***/ 00907 /*** "xCPUs" MUST be a power of 2 (so: 1, 2, 4, 8, etc...) ***/ 00908 /*** - IF ("zCPUs" > 1) THEN ***/ 00909 /*** "xCPUs" AND "yCPUs" both MUST be a power of 2 ***/ 00910 00911 int *order = new int[_maxCPUs], myIndex, bufW, bufH, bufD; 00912 ArrayT *buf; 00913 00914 /*** Determine ordering of CPUs in Spanning Binomial Tree ***/ 00915 00916 PxGetSBTorder(root, _maxCPUs, order, &myIndex); 00917 00918 /*** Create temporary array buffer (including borders) ***/ 00919 00920 if (_myCPU == root) { 00921 bufW = ArrayW(glob); 00922 bufH = ArrayH(glob); 00923 bufD = ArrayD(glob); 00924 buf = glob; 00925 } else { 00926 bufW = PxLineRange(1, myIndex, order); 00927 bufH = PxLineRange(2, myIndex, order); 00928 bufD = PxLineRange(3, myIndex, order); 00929 buf = PxArrayCreate<ArrayT>(bufW, bufH, bufD, ArrayBW(glob), 00930 ArrayBH(glob), ArrayBD(glob)); 00931 bufW += 2*ArrayBW(glob); 00932 bufH += 2*ArrayBH(glob); 00933 bufD += 2*ArrayBD(glob); 00934 } 00935 PxLclArrayCopy(loc, TO, buf, root, bdata); 00936 00937 /*** Gather array data using Spanning Binomial Tree ***/ 00938 00939 int eSize = ArrayT::ElemSize(); 00940 int sSize = eSize*sizeof(typename ArrayT::StorType); 00941 MPI_Datatype elem, blk2d, blk3d; 00942 MPI_Status stat; 00943 MPI_Type_contiguous(sSize, MPI_BYTE, &elem); 00944 00945 int mask = 1; 00946 for (int i=0; i<_logCPUs; i++) { 00947 int partnerIndex = myIndex ^ mask; 00948 int partner = order[partnerIndex]; 00949 if ((myIndex % mask == 0) && (partner < _nrCPUs)) { 00950 if (myIndex > partnerIndex) { // send data to SBT parent 00951 int xSize = PxLineRange(1, myIndex, order); 00952 int ySize = PxLineRange(2, myIndex, order); 00953 int zSize = PxLineRange(3, myIndex, order); 00954 if (bdata) { 00955 xSize += 2*ArrayBW(glob); 00956 ySize += 2*ArrayBH(glob); 00957 zSize += 2*ArrayBD(glob); 00958 } 00959 if (xSize != 0 && ySize != 0 && zSize != 0) { 00960 MPI_Type_vector(ySize, xSize, bufW, elem, &blk2d); 00961 MPI_Type_hvector(zSize, 1, 00962 bufW*bufH*sSize, blk2d, &blk3d); 00963 MPI_Type_commit(&blk3d); 00964 MPI_Send((bdata) ? ArrayPB(buf) : ArrayCPB(buf), 00965 1, blk3d, partner, GATH_TAG, MPI_COMM_WORLD); 00966 MPI_Type_free(&blk3d); 00967 MPI_Type_free(&blk2d); 00968 } 00969 } else { // receive data from SBT child 00970 int xSize = PxLineRange(1, partnerIndex, order); 00971 int ySize = PxLineRange(2, partnerIndex, order); 00972 int zSize = PxLineRange(3, partnerIndex, order); 00973 int offset = PxLclStart(partner, ArrayBW(glob), 00974 ArrayBH(glob), ArrayBD(glob)); 00975 if (_myCPU != root) { 00976 offset -= (PxLclStart(_myCPU, ArrayBW(glob), 00977 ArrayBH(glob), ArrayBD(glob)) - 00978 (bufW*bufH*ArrayBD(glob) + 00979 bufW*ArrayBH(glob) + ArrayBW(glob))); 00980 } 00981 if (bdata) { 00982 xSize += 2*ArrayBW(glob); 00983 ySize += 2*ArrayBH(glob); 00984 zSize += 2*ArrayBD(glob); 00985 offset -= (bufW*bufH*ArrayBD(glob) + 00986 bufW*ArrayBH(glob) + ArrayBW(glob)); 00987 } 00988 if (xSize != 0 && ySize != 0 && zSize != 0) { 00989 MPI_Type_vector(ySize, xSize, bufW, elem, &blk2d); 00990 MPI_Type_hvector(zSize, 1, 00991 bufW*bufH*sSize, blk2d, &blk3d); 00992 MPI_Type_commit(&blk3d); 00993 MPI_Recv(ArrayPB(buf) + offset*eSize, 1, blk3d, 00994 partner, GATH_TAG, MPI_COMM_WORLD, &stat); 00995 MPI_Type_free(&blk3d); 00996 MPI_Type_free(&blk2d); 00997 } 00998 } 00999 } 01000 mask <<= 1; 01001 } 01002 MPI_Type_free(&elem); 01003 if (_myCPU != root) { 01004 delete buf; 01005 } 01006 delete order; 01007 }
Here is the call graph for this function:
|