template<class ArrayT>
Definition at line 1255 of file PxDistribution.h. References _myCPU, _nrCPUs, ArrayBD(), ArrayBH(), ArrayBW(), ArrayCPB(), ArrayD(), ArrayH(), ArrayPB(), ArrayW(), PxLclDepth(), PxLclHeight(), PxLclStartX(), PxLclStartY(), PxLclStartZ(), PxLclWidth(), PxRePartition(), and RDIST_TAG. Referenced by PatRecGenConv2dSep(). 01257 { 01258 /*** Complete redistribution of array data (incl. potential ***/ 01259 /*** borders) using the Direct Communication schema. This code ***/ 01260 /*** is based on: 'Parallel Matrix Transpose Algorithms on ***/ 01261 /*** Distributed Memory Concurrent Computers' (Choi, Dongarra, ***/ 01262 /*** Walker) Mathematical Sciences Section, Oak Ridge National ***/ 01263 /*** Lab., and Dept. Computer Science, Univ. of Tennessee at ***/ 01264 /*** Knoxville, LAPACK Working Note 65 (CS-93-215), Oct 1993. ***/ 01265 01266 int bw = ArrayBW(*a), bh = ArrayBH(*a), bd = ArrayBD(*a); 01267 01268 /*** Save old local image characteristics ***/ 01269 01270 int* oldStartX = new int[_nrCPUs]; 01271 int* oldStartY = new int[_nrCPUs]; 01272 int* oldStartZ = new int[_nrCPUs]; 01273 int* oldEndX = new int[_nrCPUs]; 01274 int* oldEndY = new int[_nrCPUs]; 01275 int* oldEndZ = new int[_nrCPUs]; 01276 01277 int i; 01278 for (i=0; i<_nrCPUs; i++) { 01279 oldStartX[i] = PxLclStartX(i); 01280 oldStartY[i] = PxLclStartY(i); 01281 oldStartZ[i] = PxLclStartZ(i); 01282 oldEndX[i] = oldStartX[i] + PxLclWidth(i) - 1; 01283 oldEndY[i] = oldStartY[i] + PxLclHeight(i) - 1; 01284 oldEndZ[i] = oldStartZ[i] + PxLclDepth(i) - 1; 01285 if (bdata) { 01286 oldEndX[i] += 2*bw; 01287 oldEndY[i] += 2*bh; 01288 oldEndZ[i] += 2*bd; 01289 } 01290 } 01291 01292 /*** Repartition according to new CPU grid ***/ 01293 01294 PxRePartition(xcpus, ycpus, zcpus); 01295 01296 /*** Obtain new local image characteristics ***/ 01297 01298 int* newStartX = new int[_nrCPUs]; 01299 int* newStartY = new int[_nrCPUs]; 01300 int* newStartZ = new int[_nrCPUs]; 01301 int* newEndX = new int[_nrCPUs]; 01302 int* newEndY = new int[_nrCPUs]; 01303 int* newEndZ = new int[_nrCPUs]; 01304 01305 for (i=0; i<_nrCPUs; i++) { 01306 newStartX[i] = PxLclStartX(i); 01307 newStartY[i] = PxLclStartY(i); 01308 newStartZ[i] = PxLclStartZ(i); 01309 newEndX[i] = newStartX[i] + PxLclWidth(i) - 1; 01310 newEndY[i] = newStartY[i] + PxLclHeight(i) - 1; 01311 newEndZ[i] = newStartZ[i] + PxLclDepth(i) - 1; 01312 if (bdata) { 01313 newEndX[i] += 2*bw; 01314 newEndY[i] += 2*bh; 01315 newEndZ[i] += 2*bd; 01316 } 01317 } 01318 int newW = PxLclWidth(_myCPU); 01319 int newH = PxLclHeight(_myCPU); 01320 int newD = PxLclDepth(_myCPU); 01321 01322 /*** Create new local array and temporary MPI buffer ***/ 01323 01324 ArrayT* nA = PxArrayCreate<ArrayT>(newW, newH, newD, bw, bh, bd); 01325 typedef typename ArrayT::StorType storT; 01326 int eSize = ArrayT::ElemSize(); 01327 int sSize = eSize*sizeof(storT); 01328 int mpibuffsize = sizeof(MPI_BYTE) * sSize * 01329 ArrayW(nA) * ArrayH(nA) * ArrayD(nA); 01330 // int mpibuffsize = sizeof(MPI_BYTE) * sSize * newW * newH * newD; 01331 char *mpibuff = new char[mpibuffsize]; 01332 MPI_Buffer_attach(mpibuff, mpibuffsize); 01333 01334 /*** Send to/receive from all other CPUs (direct communication) ***/ 01335 01336 MPI_Datatype elem, blk2d, blk3d; 01337 MPI_Status stat; 01338 MPI_Type_contiguous(sSize, MPI_BYTE, &elem); 01339 01340 for (i=0; i<_nrCPUs; i++) { 01341 01342 int partner = (_nrCPUs - _myCPU + i) % _nrCPUs; 01343 01344 /*** Send data to partner ***/ 01345 01346 int maxStartX = (oldStartX[_myCPU] > newStartX[partner]) ? 01347 oldStartX[_myCPU] : newStartX[partner]; 01348 int maxStartY = (oldStartY[_myCPU] > newStartY[partner]) ? 01349 oldStartY[_myCPU] : newStartY[partner]; 01350 int maxStartZ = (oldStartZ[_myCPU] > newStartZ[partner]) ? 01351 oldStartZ[_myCPU] : newStartZ[partner]; 01352 int minEndX = (oldEndX[_myCPU] < newEndX[partner]) ? 01353 oldEndX[_myCPU] : newEndX[partner]; 01354 int minEndY = (oldEndY[_myCPU] < newEndY[partner]) ? 01355 oldEndY[_myCPU] : newEndY[partner]; 01356 int minEndZ = (oldEndZ[_myCPU] < newEndZ[partner]) ? 01357 oldEndZ[_myCPU] : newEndZ[partner]; 01358 int xSize = minEndX - maxStartX + 1; 01359 int ySize = minEndY - maxStartY + 1; 01360 int zSize = minEndZ - maxStartZ + 1; 01361 01362 if (xSize > 0 && ySize > 0 && zSize > 0) { 01363 01364 int offset = eSize * 01365 ((maxStartZ-oldStartZ[_myCPU]) * ArrayW(*a)*ArrayH(*a) + 01366 (maxStartY-oldStartY[_myCPU]) * ArrayW(*a) + 01367 (maxStartX-oldStartX[_myCPU])); 01368 01369 MPI_Type_vector(ySize, xSize, ArrayW(*a), elem, &blk2d); 01370 MPI_Type_hvector(zSize, 1, 01371 ArrayW(*a)*ArrayH(*a)*sSize, blk2d, &blk3d); 01372 MPI_Type_commit(&blk3d); 01373 MPI_Bsend((bdata) ? 01374 ArrayPB(*a) + offset : ArrayCPB(*a) + offset, 01375 1, blk3d, partner, RDIST_TAG, MPI_COMM_WORLD); 01376 MPI_Type_free(&blk3d); 01377 MPI_Type_free(&blk2d); 01378 } 01379 01380 /*** Receive data from partner ***/ 01381 01382 maxStartX = (oldStartX[partner] > newStartX[_myCPU]) ? 01383 oldStartX[partner] : newStartX[_myCPU]; 01384 maxStartY = (oldStartY[partner] > newStartY[_myCPU]) ? 01385 oldStartY[partner] : newStartY[_myCPU]; 01386 maxStartZ = (oldStartZ[partner] > newStartZ[_myCPU]) ? 01387 oldStartZ[partner] : newStartZ[_myCPU]; 01388 minEndX = (oldEndX[partner] < newEndX[_myCPU]) ? 01389 oldEndX[partner] : newEndX[_myCPU]; 01390 minEndY = (oldEndY[partner] < newEndY[_myCPU]) ? 01391 oldEndY[partner] : newEndY[_myCPU]; 01392 minEndZ = (oldEndZ[partner] < newEndZ[_myCPU]) ? 01393 oldEndZ[partner] : newEndZ[_myCPU]; 01394 xSize = minEndX - maxStartX + 1; 01395 ySize = minEndY - maxStartY + 1; 01396 zSize = minEndZ - maxStartZ + 1; 01397 01398 if (xSize > 0 && ySize > 0 && zSize > 0) { 01399 01400 int offset = eSize * 01401 ((maxStartZ-newStartZ[_myCPU]) * ArrayW(nA)*ArrayH(nA) + 01402 (maxStartY-newStartY[_myCPU]) * ArrayW(nA) + 01403 (maxStartX-newStartX[_myCPU])); 01404 01405 MPI_Type_vector(ySize, xSize, ArrayW(nA), elem, &blk2d); 01406 MPI_Type_hvector(zSize, 1, 01407 ArrayW(nA)*ArrayH(nA)*sSize, blk2d, &blk3d); 01408 MPI_Type_commit(&blk3d); 01409 MPI_Recv((bdata) ? 01410 ArrayPB(nA) + offset : ArrayCPB(nA) + offset, 1, 01411 blk3d, partner, RDIST_TAG, MPI_COMM_WORLD, &stat); 01412 MPI_Type_free(&blk3d); 01413 MPI_Type_free(&blk2d); 01414 } 01415 } 01416 01417 /*** Clean up and make new local array permanent ***/ 01418 01419 delete oldStartX; 01420 delete oldStartY; 01421 delete oldStartZ; 01422 delete oldEndX; 01423 delete oldEndY; 01424 delete oldEndZ; 01425 delete newStartX; 01426 delete newStartY; 01427 delete newStartZ; 01428 delete newEndX; 01429 delete newEndY; 01430 delete newEndZ; 01431 MPI_Type_free(&elem); 01432 MPI_Buffer_detach(&mpibuff, &mpibuffsize); 01433 delete mpibuff; 01434 delete *a; 01435 *a = nA; 01436 }
Here is the call graph for this function:
|