Home || Architecture || Video Search || Visual Search || Scripts || Applications || Important Messages || OGL || Src

template<class ArrayT>
void Impala::Core::Array::Pattern::PxRedistArray ( ArrayT **  a,
int  xcpus,
int  ycpus,
int  zcpus,
bool  bdata = false 
) [inline]

Definition at line 1255 of file PxDistribution.h.

References _myCPU, _nrCPUs, ArrayBD(), ArrayBH(), ArrayBW(), ArrayCPB(), ArrayD(), ArrayH(), ArrayPB(), ArrayW(), PxLclDepth(), PxLclHeight(), PxLclStartX(), PxLclStartY(), PxLclStartZ(), PxLclWidth(), PxRePartition(), and RDIST_TAG.

Referenced by PatRecGenConv2dSep().

01257 {
01258     /*** Complete redistribution of array data (incl. potential    ***/
01259     /*** borders) using the Direct Communication schema. This code ***/
01260     /*** is based on: 'Parallel Matrix Transpose Algorithms on     ***/
01261     /*** Distributed Memory Concurrent Computers' (Choi, Dongarra, ***/
01262     /*** Walker) Mathematical Sciences Section, Oak Ridge National ***/
01263     /*** Lab., and Dept. Computer Science, Univ. of Tennessee at   ***/
01264     /*** Knoxville, LAPACK Working Note 65 (CS-93-215), Oct 1993.  ***/
01265 
01266     int bw = ArrayBW(*a), bh = ArrayBH(*a), bd = ArrayBD(*a);
01267 
01268     /*** Save old local image characteristics ***/
01269 
01270     int* oldStartX = new int[_nrCPUs];
01271     int* oldStartY = new int[_nrCPUs];
01272     int* oldStartZ = new int[_nrCPUs];
01273     int* oldEndX = new int[_nrCPUs];
01274     int* oldEndY = new int[_nrCPUs];
01275     int* oldEndZ = new int[_nrCPUs];
01276 
01277     int i;
01278     for (i=0; i<_nrCPUs; i++) {
01279         oldStartX[i] = PxLclStartX(i);
01280         oldStartY[i] = PxLclStartY(i);
01281         oldStartZ[i] = PxLclStartZ(i);
01282         oldEndX[i]   = oldStartX[i] + PxLclWidth(i)  - 1;
01283         oldEndY[i]   = oldStartY[i] + PxLclHeight(i) - 1;
01284         oldEndZ[i]   = oldStartZ[i] + PxLclDepth(i)  - 1;
01285         if (bdata) {
01286             oldEndX[i] += 2*bw;
01287             oldEndY[i] += 2*bh;
01288             oldEndZ[i] += 2*bd;
01289         }
01290     }
01291 
01292     /*** Repartition according to new CPU grid ***/
01293 
01294     PxRePartition(xcpus, ycpus, zcpus);
01295 
01296     /*** Obtain new local image characteristics ***/
01297 
01298     int* newStartX = new int[_nrCPUs];
01299     int* newStartY = new int[_nrCPUs];
01300     int* newStartZ = new int[_nrCPUs];
01301     int* newEndX = new int[_nrCPUs];
01302     int* newEndY = new int[_nrCPUs];
01303     int* newEndZ = new int[_nrCPUs];
01304 
01305     for (i=0; i<_nrCPUs; i++) {
01306         newStartX[i] = PxLclStartX(i);
01307         newStartY[i] = PxLclStartY(i);
01308         newStartZ[i] = PxLclStartZ(i);
01309         newEndX[i]   = newStartX[i] + PxLclWidth(i)  - 1;
01310         newEndY[i]   = newStartY[i] + PxLclHeight(i) - 1;
01311         newEndZ[i]   = newStartZ[i] + PxLclDepth(i)  - 1;
01312         if (bdata) {
01313             newEndX[i] += 2*bw;
01314             newEndY[i] += 2*bh;
01315             newEndZ[i] += 2*bd;
01316         }
01317     }
01318     int newW = PxLclWidth(_myCPU);
01319     int newH = PxLclHeight(_myCPU);
01320     int newD = PxLclDepth(_myCPU);
01321 
01322     /*** Create new local array and temporary MPI buffer ***/
01323 
01324     ArrayT* nA = PxArrayCreate<ArrayT>(newW, newH, newD, bw, bh, bd);
01325     typedef typename ArrayT::StorType storT;
01326     int eSize = ArrayT::ElemSize();
01327     int sSize = eSize*sizeof(storT);
01328     int mpibuffsize = sizeof(MPI_BYTE) * sSize *
01329                       ArrayW(nA) * ArrayH(nA) * ArrayD(nA);
01330 //  int mpibuffsize = sizeof(MPI_BYTE) * sSize * newW * newH * newD;
01331     char *mpibuff = new char[mpibuffsize];
01332     MPI_Buffer_attach(mpibuff, mpibuffsize);
01333 
01334     /*** Send to/receive from all other CPUs (direct communication) ***/
01335 
01336     MPI_Datatype elem, blk2d, blk3d;
01337     MPI_Status stat;
01338     MPI_Type_contiguous(sSize, MPI_BYTE, &elem);
01339 
01340     for (i=0; i<_nrCPUs; i++) {
01341 
01342         int partner = (_nrCPUs - _myCPU + i) % _nrCPUs;
01343 
01344         /*** Send data to partner ***/
01345 
01346         int maxStartX = (oldStartX[_myCPU] > newStartX[partner]) ?
01347                          oldStartX[_myCPU] : newStartX[partner];
01348         int maxStartY = (oldStartY[_myCPU] > newStartY[partner]) ?
01349                          oldStartY[_myCPU] : newStartY[partner];
01350         int maxStartZ = (oldStartZ[_myCPU] > newStartZ[partner]) ?
01351                          oldStartZ[_myCPU] : newStartZ[partner];
01352         int minEndX   = (oldEndX[_myCPU] < newEndX[partner]) ?
01353                          oldEndX[_myCPU] : newEndX[partner];
01354         int minEndY   = (oldEndY[_myCPU] < newEndY[partner]) ?
01355                          oldEndY[_myCPU] : newEndY[partner];
01356         int minEndZ   = (oldEndZ[_myCPU] < newEndZ[partner]) ?
01357                          oldEndZ[_myCPU] : newEndZ[partner];
01358         int xSize = minEndX - maxStartX + 1;
01359         int ySize = minEndY - maxStartY + 1;
01360         int zSize = minEndZ - maxStartZ + 1;
01361 
01362         if (xSize > 0 && ySize > 0 && zSize > 0) {
01363 
01364             int offset = eSize *
01365             ((maxStartZ-oldStartZ[_myCPU]) * ArrayW(*a)*ArrayH(*a) +
01366              (maxStartY-oldStartY[_myCPU]) * ArrayW(*a) +
01367              (maxStartX-oldStartX[_myCPU]));
01368 
01369             MPI_Type_vector(ySize, xSize, ArrayW(*a), elem, &blk2d);
01370             MPI_Type_hvector(zSize, 1,
01371                         ArrayW(*a)*ArrayH(*a)*sSize, blk2d, &blk3d);
01372             MPI_Type_commit(&blk3d);
01373             MPI_Bsend((bdata) ?
01374                      ArrayPB(*a) + offset : ArrayCPB(*a) + offset,
01375                      1, blk3d, partner, RDIST_TAG, MPI_COMM_WORLD);
01376             MPI_Type_free(&blk3d);
01377             MPI_Type_free(&blk2d);
01378         }
01379 
01380         /*** Receive data from partner ***/
01381 
01382         maxStartX = (oldStartX[partner] > newStartX[_myCPU]) ?
01383                      oldStartX[partner] : newStartX[_myCPU];
01384         maxStartY = (oldStartY[partner] > newStartY[_myCPU]) ?
01385                      oldStartY[partner] : newStartY[_myCPU];
01386         maxStartZ = (oldStartZ[partner] > newStartZ[_myCPU]) ?
01387                      oldStartZ[partner] : newStartZ[_myCPU];
01388         minEndX   = (oldEndX[partner] < newEndX[_myCPU]) ?
01389                      oldEndX[partner] : newEndX[_myCPU];
01390         minEndY   = (oldEndY[partner] < newEndY[_myCPU]) ?
01391                      oldEndY[partner] : newEndY[_myCPU];
01392         minEndZ   = (oldEndZ[partner] < newEndZ[_myCPU]) ?
01393                      oldEndZ[partner] : newEndZ[_myCPU];
01394         xSize = minEndX - maxStartX + 1;
01395         ySize = minEndY - maxStartY + 1;
01396         zSize = minEndZ - maxStartZ + 1;
01397 
01398         if (xSize > 0 && ySize > 0 && zSize > 0) {
01399 
01400             int offset = eSize *
01401             ((maxStartZ-newStartZ[_myCPU]) * ArrayW(nA)*ArrayH(nA) +
01402              (maxStartY-newStartY[_myCPU]) * ArrayW(nA) +
01403              (maxStartX-newStartX[_myCPU]));
01404 
01405             MPI_Type_vector(ySize, xSize, ArrayW(nA), elem, &blk2d);
01406             MPI_Type_hvector(zSize, 1,
01407                         ArrayW(nA)*ArrayH(nA)*sSize, blk2d, &blk3d);
01408             MPI_Type_commit(&blk3d);
01409             MPI_Recv((bdata) ?
01410                     ArrayPB(nA) + offset : ArrayCPB(nA) + offset, 1,
01411                     blk3d, partner, RDIST_TAG, MPI_COMM_WORLD, &stat);
01412             MPI_Type_free(&blk3d);
01413             MPI_Type_free(&blk2d);
01414         }
01415     }
01416 
01417     /*** Clean up and make new local array permanent ***/
01418 
01419     delete oldStartX;
01420     delete oldStartY;
01421     delete oldStartZ;
01422     delete oldEndX;
01423     delete oldEndY;
01424     delete oldEndZ;
01425     delete newStartX;
01426     delete newStartY;
01427     delete newStartZ;
01428     delete newEndX;
01429     delete newEndY;
01430     delete newEndZ;
01431     MPI_Type_free(&elem);
01432     MPI_Buffer_detach(&mpibuff, &mpibuffsize);
01433     delete mpibuff;
01434     delete *a;
01435     *a = nA;
01436 }

Here is the call graph for this function:


Generated on Fri Mar 19 11:02:49 2010 for ImpalaSrc by  doxygen 1.5.1