Home || Architecture || Video Search || Visual Search || Scripts || Applications || Important Messages || OGL || Src

template<class ArrayT>
static void Impala::Core::Array::Pattern::PxGatherArraySBT ( ArrayT *  glob,
ArrayT *  loc,
int  root,
bool  bdata 
) [inline, static]

Definition at line 900 of file PxDistribution.h.

References _logCPUs, _maxCPUs, _myCPU, _nrCPUs, ArrayBD(), ArrayBH(), ArrayBW(), ArrayCPB(), ArrayD(), ArrayH(), ArrayPB(), ArrayW(), GATH_TAG, PxGetSBTorder(), PxLclArrayCopy(), PxLclStart(), PxLineRange(), and TO.

Referenced by PxGatherArray().

00901 {
00902     /*** SBT (Spanning Binomial Tree) or hypercube gather. Its   ***/
00903     /*** use is restricted slightly for implementation reasons.  ***/
00904     /*** NOTE: should work correctly, except:                    ***/
00905     /***                                                         ***/
00906     /*** - IF ("zCPUs" == 1) AND ("yCPUs" > 1) THEN              ***/
00907     /***   "xCPUs" MUST be a power of 2 (so: 1, 2, 4, 8, etc...) ***/
00908     /*** - IF ("zCPUs" > 1) THEN                                 ***/
00909     /***   "xCPUs" AND "yCPUs" both MUST be a power of 2         ***/
00910 
00911     int     *order = new int[_maxCPUs], myIndex, bufW, bufH, bufD;
00912     ArrayT  *buf;
00913 
00914     /*** Determine ordering of CPUs in Spanning Binomial Tree ***/
00915 
00916     PxGetSBTorder(root, _maxCPUs, order, &myIndex);
00917 
00918     /*** Create temporary array buffer (including borders) ***/
00919 
00920     if (_myCPU == root) {
00921         bufW = ArrayW(glob);
00922         bufH = ArrayH(glob);
00923         bufD = ArrayD(glob);
00924         buf  = glob;
00925     } else {
00926         bufW = PxLineRange(1, myIndex, order);
00927         bufH = PxLineRange(2, myIndex, order);
00928         bufD = PxLineRange(3, myIndex, order);
00929         buf  = PxArrayCreate<ArrayT>(bufW, bufH, bufD, ArrayBW(glob),
00930                                      ArrayBH(glob), ArrayBD(glob));
00931         bufW += 2*ArrayBW(glob);
00932         bufH += 2*ArrayBH(glob);
00933         bufD += 2*ArrayBD(glob);
00934     }
00935     PxLclArrayCopy(loc, TO, buf, root, bdata);
00936 
00937     /*** Gather array data using Spanning Binomial Tree ***/
00938 
00939     int eSize = ArrayT::ElemSize();
00940     int sSize = eSize*sizeof(typename ArrayT::StorType);
00941     MPI_Datatype elem, blk2d, blk3d;
00942     MPI_Status stat;
00943     MPI_Type_contiguous(sSize, MPI_BYTE, &elem);
00944 
00945     int mask = 1;
00946     for (int i=0; i<_logCPUs; i++) {
00947         int partnerIndex = myIndex ^ mask;
00948         int partner = order[partnerIndex];
00949         if ((myIndex % mask == 0) && (partner < _nrCPUs)) {
00950             if (myIndex > partnerIndex) {   // send data to SBT parent
00951                 int xSize = PxLineRange(1, myIndex, order);
00952                 int ySize = PxLineRange(2, myIndex, order);
00953                 int zSize = PxLineRange(3, myIndex, order);
00954                 if (bdata) {
00955                     xSize += 2*ArrayBW(glob);
00956                     ySize += 2*ArrayBH(glob);
00957                     zSize += 2*ArrayBD(glob);
00958                 }
00959                 if (xSize != 0 && ySize != 0 && zSize != 0) {
00960                     MPI_Type_vector(ySize, xSize, bufW, elem, &blk2d);
00961                     MPI_Type_hvector(zSize, 1,
00962                                      bufW*bufH*sSize, blk2d, &blk3d);
00963                     MPI_Type_commit(&blk3d);
00964                     MPI_Send((bdata) ? ArrayPB(buf) : ArrayCPB(buf),
00965                            1, blk3d, partner, GATH_TAG, MPI_COMM_WORLD);
00966                     MPI_Type_free(&blk3d);
00967                     MPI_Type_free(&blk2d);
00968                 }
00969             } else {                    // receive data from SBT child
00970                 int xSize  = PxLineRange(1, partnerIndex, order);
00971                 int ySize  = PxLineRange(2, partnerIndex, order);
00972                 int zSize  = PxLineRange(3, partnerIndex, order);
00973                 int offset = PxLclStart(partner, ArrayBW(glob),
00974                                     ArrayBH(glob), ArrayBD(glob));
00975                 if (_myCPU != root) {
00976                     offset -= (PxLclStart(_myCPU, ArrayBW(glob),
00977                                ArrayBH(glob), ArrayBD(glob)) -
00978                               (bufW*bufH*ArrayBD(glob) +
00979                                bufW*ArrayBH(glob) + ArrayBW(glob)));
00980                 }
00981                 if (bdata) {
00982                     xSize += 2*ArrayBW(glob);
00983                     ySize += 2*ArrayBH(glob);
00984                     zSize += 2*ArrayBD(glob);
00985                     offset -= (bufW*bufH*ArrayBD(glob) +
00986                                bufW*ArrayBH(glob) + ArrayBW(glob));
00987                 }
00988                 if (xSize != 0 && ySize != 0 && zSize != 0) {
00989                     MPI_Type_vector(ySize, xSize, bufW, elem, &blk2d);
00990                     MPI_Type_hvector(zSize, 1,
00991                                      bufW*bufH*sSize, blk2d, &blk3d);
00992                     MPI_Type_commit(&blk3d);
00993                     MPI_Recv(ArrayPB(buf) + offset*eSize, 1, blk3d,
00994                              partner, GATH_TAG, MPI_COMM_WORLD, &stat);
00995                     MPI_Type_free(&blk3d);
00996                     MPI_Type_free(&blk2d);
00997                 }
00998             }
00999         }
01000         mask <<= 1;
01001     }
01002     MPI_Type_free(&elem);
01003     if (_myCPU != root) {
01004         delete buf;
01005     }
01006     delete order;
01007 }

Here is the call graph for this function:


Generated on Fri Mar 19 11:02:42 2010 for ImpalaSrc by  doxygen 1.5.1