template<class ArrayT>
Definition at line 640 of file PxDistribution.h. References _logCPUs, _maxCPUs, _myCPU, _nrCPUs, ArrayBD(), ArrayBH(), ArrayBW(), ArrayCPB(), ArrayD(), ArrayH(), ArrayPB(), ArrayW(), FROM, PxGetSBTorder(), PxLclArrayCopy(), PxLclStart(), PxLineRange(), and SCAT_TAG. Referenced by PxScatterArray(). 00641 { 00642 /*** SBT (Spanning Binomial Tree) or hypercube scatter. Its ***/ 00643 /*** use is restricted slightly for implementation reasons. ***/ 00644 /*** NOTE: should work correctly, except: ***/ 00645 /*** ***/ 00646 /*** - IF ("zCPUs" == 1) AND ("yCPUs" > 1) THEN ***/ 00647 /*** "xCPUs" MUST be a power of 2 (so: 1, 2, 4, 8, etc...) ***/ 00648 /*** - IF ("zCPUs" > 1) THEN ***/ 00649 /*** "xCPUs" AND "yCPUs" both MUST be a power of 2 ***/ 00650 00651 int *order = new int[_maxCPUs], myIndex, bufW, bufH, bufD; 00652 ArrayT *buf; 00653 00654 /*** Determine ordering of CPUs in Spanning Binomial Tree ***/ 00655 00656 PxGetSBTorder(root, _maxCPUs, order, &myIndex); 00657 00658 /*** Create temporary array buffer (including borders) ***/ 00659 00660 if (_myCPU == root) { 00661 bufW = ArrayW(glob); 00662 bufH = ArrayH(glob); 00663 bufD = ArrayD(glob); 00664 buf = glob; 00665 } else { 00666 bufW = PxLineRange(1, myIndex, order); 00667 bufH = PxLineRange(2, myIndex, order); 00668 bufD = PxLineRange(3, myIndex, order); 00669 buf = PxArrayCreate<ArrayT>(bufW, bufH, bufD, ArrayBW(glob), 00670 ArrayBH(glob), ArrayBD(glob)); 00671 bufW += 2*ArrayBW(glob); 00672 bufH += 2*ArrayBH(glob); 00673 bufD += 2*ArrayBD(glob); 00674 } 00675 00676 /*** Scatter array data using Spanning Binomial Tree ***/ 00677 00678 int eSize = ArrayT::ElemSize(); 00679 int sSize = eSize*sizeof(typename ArrayT::StorType); 00680 MPI_Datatype elem, blk2d, blk3d; 00681 MPI_Status stat; 00682 MPI_Type_contiguous(sSize, MPI_BYTE, &elem); 00683 00684 int mask = 1 << (_logCPUs-1); 00685 for (int i=0; i<_logCPUs; i++) { 00686 int partnerIndex = myIndex ^ mask; 00687 int partner = order[partnerIndex]; 00688 if ((myIndex % mask == 0) && (partner < _nrCPUs)) { 00689 if (myIndex < partnerIndex) { // send data to SBT child 00690 int xSize = PxLineRange(1, partnerIndex, order); 00691 int ySize = PxLineRange(2, partnerIndex, order); 00692 int zSize = PxLineRange(3, partnerIndex, order); 00693 int offset = PxLclStart(partner, ArrayBW(glob), 00694 ArrayBH(glob), ArrayBD(glob)); 00695 if (_myCPU != root) { 00696 offset -= (PxLclStart(_myCPU, ArrayBW(glob), 00697 ArrayBH(glob), ArrayBD(glob)) - 00698 (bufW*bufH*ArrayBD(glob) + 00699 bufW*ArrayBH(glob) + ArrayBW(glob))); 00700 } 00701 if (bdata) { 00702 xSize += 2*ArrayBW(glob); 00703 ySize += 2*ArrayBH(glob); 00704 zSize += 2*ArrayBD(glob); 00705 offset -= (bufW*bufH*ArrayBD(glob) + 00706 bufW*ArrayBH(glob) + ArrayBW(glob)); 00707 } 00708 if (xSize != 0 && ySize != 0 && zSize != 0) { 00709 MPI_Type_vector(ySize, xSize, bufW, elem, &blk2d); 00710 MPI_Type_hvector(zSize, 1, 00711 bufW*bufH*sSize, blk2d, &blk3d); 00712 MPI_Type_commit(&blk3d); 00713 MPI_Send(ArrayPB(buf) + offset*eSize, 1, blk3d, 00714 partner, SCAT_TAG, MPI_COMM_WORLD); 00715 MPI_Type_free(&blk3d); 00716 MPI_Type_free(&blk2d); 00717 } 00718 } else { // receive data from SBT parent 00719 int xSize = PxLineRange(1, myIndex, order); 00720 int ySize = PxLineRange(2, myIndex, order); 00721 int zSize = PxLineRange(3, myIndex, order); 00722 if (bdata) { 00723 xSize += 2*ArrayBW(glob); 00724 ySize += 2*ArrayBH(glob); 00725 zSize += 2*ArrayBD(glob); 00726 } 00727 if (xSize != 0 && ySize != 0 && zSize != 0) { 00728 MPI_Type_vector(ySize, xSize, bufW, elem, &blk2d); 00729 MPI_Type_hvector(zSize, 1, 00730 bufW*bufH*sSize, blk2d, &blk3d); 00731 MPI_Type_commit(&blk3d); 00732 MPI_Recv((bdata) ? ArrayPB(buf) : ArrayCPB(buf), 00733 1, blk3d, partner, SCAT_TAG, MPI_COMM_WORLD, 00734 &stat); 00735 MPI_Type_free(&blk3d); 00736 MPI_Type_free(&blk2d); 00737 } 00738 } 00739 } 00740 mask >>= 1; 00741 } 00742 MPI_Type_free(&elem); 00743 PxLclArrayCopy(loc, FROM, buf, root, bdata); 00744 if (_myCPU != root) { 00745 delete buf; 00746 } 00747 delete order; 00748 }
Here is the call graph for this function:
|