00001 /* 00002 * Copyright (c) 2003-2004, University of Amsterdam, The Netherlands. 00003 * All rights reserved. 00004 * 00005 * Author(s): 00006 * Frank Seinstra <fjseins@wins.uva.nl> 00007 */ 00008 00009 #ifndef __PxPartition_h_ 00010 #define __PxPartition_h_ 00011 00012 00013 #include "Core/Array/Pattern/PxSystem.h" 00014 00015 namespace Impala 00016 { 00017 namespace Core 00018 { 00019 namespace Array 00020 { 00021 namespace Pattern 00022 { 00023 00024 00025 /**********************************************************************/ 00026 /*** The partitioning routines below define which compute node is ***/ 00027 /*** responsible for which part of an array data structure, EXCLU- ***/ 00028 /*** DING ANY POTENTIAL BORDER INFORMATION! ***/ 00029 /*** In this implementation, the responsibilities are dependent on ***/ 00030 /*** the number of compute nodes that are available, how the nodes ***/ 00031 /*** are logically connected and how the array data is mapped onto ***/ 00032 /*** the logical 3D grid formed by the compute nodes. ***/ 00033 /*** Here, the grid of compute nodes is formed based on the three ***/ 00034 /*** parameters to 'PxInitPartition(..)' that represent the number ***/ 00035 /*** of compute nodes in each of the grid's 3 dimensions ('xcpus', ***/ 00036 /*** 'ycpus', and 'zcpus'). ***/ 00037 /*** ***/ 00038 /*** [see: PxInitPartition(), PxRePartition(), ***/ 00039 /*** PxLogPartXcpus(), PxLogPartYcpus(), PxLogPartZcpus()] ***/ 00040 /*** ***/ 00041 /*** The total number of CPUs (nrCPUs) used is given by: ***/ 00042 /*** ***/ 00043 /*** nrCPUs = xCPUs * yCPUs * zCPUs ***/ 00044 /*** ***/ 00045 /*** Each node has a unique number from the set of consecutive in- ***/ 00046 /*** tegers in the range: 0 .. nrCPUs-1. ***/ 00047 /*** Nodes are positioned in a grid according to their unique num- ***/ 00048 /*** ber (starting with node 0 and increasing by 1 after each pla- ***/ 00049 /*** cement), entirely filling the 3 dimensions in turn (x follow- ***/ 00050 /*** by y, followed by z). ***/ 00051 /*** A grid having 'xCPUs'=4, 'yCPUs'=3, and 'zCPUs'=2, thus looks ***/ 00052 /*** as follows ('CN' means 'Compute Node'): ***/ 00053 /*** ***/ 00054 /*** z --------------------------------------- ***/ 00055 /*** / /| [CN 12] | [CN 13] | [CN 14] | [CN 15]/| ***/ 00056 /*** / / |---------|---------|---------|-------/-| ***/ 00057 /*** 0---- x BACK/->| [CN 16] | [CN 17] | [CN 18] | [CN 19] | ***/ 00058 /*** | / |---------|---------|---------|-----/---| ***/ 00059 /*** | / | [CN 20] | [CN 21] | [CN 22] | [CN/23] | ***/ 00060 /*** y / /---------------------------------/-----/ ***/ 00061 /*** --------------------------------------- / ***/ 00062 /*** TOP ->| [CN 0] | [CN 1] | [CN 2] | [CN 3] | / ***/ 00063 /*** |---------|---------|---------|---------| / ***/ 00064 /*** | [CN 4] | [CN 5] | [CN 6] | [CN 7] |<- FRONT ***/ 00065 /*** |---------|---------|---------|---------| / ***/ 00066 /*** BOTTOM ->| [CN 8] | [CN 9] | [CN 10] | [CN 11] |/ ***/ 00067 /*** --------------------------------------- ***/ 00068 /*** ^ ^ ***/ 00069 /*** | | ***/ 00070 /*** LEFT RIGHT ***/ 00071 /*** ***/ 00072 /*** Given this grid definition it is always possible to determine ***/ 00073 /*** the grid-index of a compute-node for each of the 3 dimensions ***/ 00074 /*** by its unique number. For example: the compute nodes 0, 4, 8, ***/ 00075 /*** 12, 16, and 20, all have a 'gridIndexX' of 0; the nodes 8, 9, ***/ 00076 /*** 10, 11, 20, 21, 22, and 23, all have a 'gridIndexY' of 2; the ***/ 00077 /*** nodes 12-23 all have a 'gridIndexZ' of 1. ***/ 00078 /*** ***/ 00079 /*** [see: PxGridIndexX(), PxGridIndexY(), PxGridIndexZ()] ***/ 00080 /*** ***/ 00081 /*** For each node it is also possible to determine its neighbours ***/ 00082 /*** in each of the 3 dimensions. For example, compute node 5 has ***/ 00083 /*** the following neighbours: ***/ 00084 /*** left neighb. = 4 top neighb. = 1 front neighb.= none\-1 ***/ 00085 /*** right neighb.= 6 bottom neighb.= 9 back neigb. = 17 ***/ 00086 /*** ***/ 00087 /*** [see: PxMyLeftCPU(), PxMyRightCPU(), PxMyUpCPU(), ***/ 00088 /*** PxMyDownCPU(), PxMyFrontCPU(), PxMyBackCPU() ] ***/ 00089 /*** ***/ 00090 /*** Finally, for each node it is possible to determine whether it ***/ 00091 /*** is placed in one of the 'outermost' slices of the grid, i.e.: ***/ 00092 /*** left, right, top, bottom, front, back. CN 23, for example, is ***/ 00093 /*** placed in the rightmost slice, but also in the slices at the ***/ 00094 /*** bottom and back. ***/ 00095 /*** ***/ 00096 /*** [see: PxIsLeftCPU(), PxIsRightCPU(), PxIsTopCPU(), ***/ 00097 /*** PxIsBottomCPU(), PxIsFrontCPU(), PxIsBackCPU() ] ***/ 00098 /*** ***/ 00099 /*** The logical view of an array is similar to that of the compu- ***/ 00100 /*** te node grid defined above. This means that in the process of ***/ 00101 /*** partitioning the number of pixels in the x-direction (width) ***/ 00102 /*** of the array is divided by the number of nodes defined in the ***/ 00103 /*** x-direction of the grid. The same holds for the additional y- ***/ 00104 /*** and z-directions. ***/ 00105 /*** In the process of partitioning it may turn out that the size ***/ 00106 /*** of the global array in any of the 3 dimensions is not a full ***/ 00107 /*** multiple of the number of compute nodes defined in each rela- ***/ 00108 /*** ted dimension respectively. A standard division therefore re- ***/ 00109 /*** sults in the 'mimimal number of pixels in one dimension' that ***/ 00110 /*** all compute nodes are at least responsible for. Any possible ***/ 00111 /*** amount of remaining pixels in 1 dimension (called 'overflow') ***/ 00112 /*** is partitioned by incrementing (by '1') the 'amount of pixels ***/ 00113 /*** responsible for' for as many of the lowest numbered nodes in ***/ 00114 /*** that dimension as needed, for all values of the remaining di- ***/ 00115 /*** mensions. ***/ 00116 /*** ***/ 00117 /*** [see: PxMinLclWidth(), PxMinLclHeight(), PxMinLclDepth() ***/ 00118 /*** PxOverflowX(), PxOverflowY(), PxOverflowZ() ] ***/ 00119 /*** ***/ 00120 /*** An array with 'width'=19, 'height'=10, and 'depth'=7, for the ***/ 00121 /*** grid drawn above will be partitioned in the following manner: ***/ 00122 /*** ***/ 00123 /*** --------------------------------------- ***/ 00124 /*** /| (5,4,2) | (5,4,2) | (5,4,2) | (4,4,2)/| ***/ 00125 /*** / |---------|---------|---------|-------/-| ***/ 00126 /*** / | (5,3,2) | (5,3,2) | (5,3,2) | (4,3,2) | ***/ 00127 /*** / |---------|---------|---------|-----/---| ***/ 00128 /*** / | (5,3,2) | (5,3,2) | (5,3,2) | (4,3,2) | ***/ 00129 /*** / /---------------------------------/-----/ ***/ 00130 /*** --------------------------------------- / ***/ 00131 /*** | (5,4,3) | (5,4,3) | (5,4,3) | (4,4,3) | / ***/ 00132 /*** |---------|---------|---------|---------| / ***/ 00133 /*** | (5,3,3) | (5,3,3) | (5,3,3) | (4,3,3) | / ***/ 00134 /*** |---------|---------|---------|---------| / ***/ 00135 /*** | (5,3,3) | (5,3,3) | (5,3,3) | (4,3,3) |/ ***/ 00136 /*** --------------------------------------- ***/ 00137 /*** ***/ 00138 /*** (a,b,c) = (width,height,depth) ***/ 00139 /*** ***/ 00140 /*** Based on this way of partitioning the size of a partial array ***/ 00141 /*** (the part of the entire array one node is responsible for) is ***/ 00142 /*** obtainable from the unique number of the compute node. ***/ 00143 /*** ***/ 00144 /*** [see: PxLclWidth(), PxLclHeight(), PxLclDepth() ] ***/ 00145 /*** ***/ 00146 /*** Also, it is possible to obtain the index (for each dimension) ***/ 00147 /*** of the starting point of a partial array in the entire array. ***/ 00148 /*** ***/ 00149 /*** [see: PxLclStartX(), PxLclStartY(), PxLclStartZ() ] ***/ 00150 /*** ***/ 00151 /**********************************************************************/ 00152 00153 00154 /*** Global variables *************************************************/ 00155 00156 int _fullW; /* Global array x-size (core-width) */ 00157 int _fullH; /* Global array y-size (core-height) */ 00158 int _fullD; /* Global array z-size (core-depth) */ 00159 00160 int _xCPUs; /* Nr. of CPUs in x-direction of grid */ 00161 int _yCPUs; /* Nr. of CPUs in y-direction of grid */ 00162 int _zCPUs; /* Nr. of CPUs in z-direction of grid */ 00163 00164 int _leftCPU; /* CPU-nr of left neigbour of myCPU */ 00165 int _rightCPU; /* CPU-nr of right neigbour of myCPU */ 00166 int _upCPU; /* CPU-nr of up neighbour of myCPU */ 00167 int _downCPU; /* CPU-nr of down neigbour of myCPU */ 00168 int _frontCPU; /* CPU-nr of front neigbour of myCPU */ 00169 int _backCPU; /* CPU-nr of back neigbour of myCPU */ 00170 00171 /* NOTE: The following parameters represent the minimum number of */ 00172 /* pixels all CPUs are responsible for in each of the 3 directions. */ 00173 00174 int _minLocalW; /* Min. #elements in x-dir. (on each CPU) */ 00175 int _minLocalH; /* Min. #elements in y-dir. (on each CPU) */ 00176 int _minLocalD; /* Min. #elements in z-dir. (on each CPU) */ 00177 00178 /* NOTE: CPUs may be responsible for an additional elemt in any of */ 00179 /* the three directions (due to the fact that the nr. of elemts in */ 00180 /* a certain direction is not a (full) multiple of the nr. of CPUs */ 00181 /* defined in that direction). The following parameters represent */ 00182 /* the total amount of this 'element-overflow' in each direction. */ 00183 00184 int _overflowX; /* Element overflow in x-direction */ 00185 int _overflowY; /* Element overflow in y-direction */ 00186 int _overflowZ; /* Element overflow in z-direction */ 00187 00188 00189 /*** Functions ********************************************************/ 00190 00191 00192 inline int 00193 PxFullWidth() 00194 { 00195 return _fullW; 00196 } 00197 00198 00199 inline int 00200 PxFullHeight() 00201 { 00202 return _fullH; 00203 } 00204 00205 00206 inline int 00207 PxFullDepth() 00208 { 00209 return _fullD; 00210 } 00211 00212 00213 inline int 00214 PxFullSize() 00215 { 00216 return (_fullW * _fullH * _fullD); 00217 } 00218 00219 00220 /**********************************************************************/ 00221 00222 00223 inline int 00224 PxLogPartXcpus() 00225 { 00226 return _xCPUs; 00227 } 00228 00229 00230 inline int 00231 PxLogPartYcpus() 00232 { 00233 return _yCPUs; 00234 } 00235 00236 00237 inline int 00238 PxLogPartZcpus() 00239 { 00240 return _zCPUs; 00241 } 00242 00243 00244 /**********************************************************************/ 00245 00246 00247 inline int 00248 PxMyLeftCPU() 00249 { 00250 return _leftCPU; 00251 } 00252 00253 00254 inline int 00255 PxMyRightCPU() 00256 { 00257 return _rightCPU; 00258 } 00259 00260 00261 inline int 00262 PxMyUpCPU() 00263 { 00264 return _upCPU; 00265 } 00266 00267 00268 inline int 00269 PxMyDownCPU() 00270 { 00271 return _downCPU; 00272 } 00273 00274 00275 inline int 00276 PxMyFrontCPU() 00277 { 00278 return _frontCPU; 00279 } 00280 00281 00282 inline int 00283 PxMyBackCPU() 00284 { 00285 return _backCPU; 00286 } 00287 00288 00289 /**********************************************************************/ 00290 00291 00292 inline int 00293 PxMinLclWidth() 00294 { 00295 return _minLocalW; 00296 } 00297 00298 00299 inline int 00300 PxMinLclHeight() 00301 { 00302 return _minLocalH; 00303 } 00304 00305 00306 inline int 00307 PxMinLclDepth() 00308 { 00309 return _minLocalD; 00310 } 00311 00312 00313 /**********************************************************************/ 00314 00315 00316 inline int 00317 PxOverflowX() 00318 { 00319 return _overflowX; 00320 } 00321 00322 00323 inline int 00324 PxOverflowY() 00325 { 00326 return _overflowY; 00327 } 00328 00329 00330 inline int 00331 PxOverflowZ() 00332 { 00333 return _overflowZ; 00334 } 00335 00336 00337 /**********************************************************************/ 00338 00339 00340 inline int 00341 PxGridIndexX(int cpu) 00342 { 00343 return (cpu % _xCPUs); 00344 } 00345 00346 00347 inline int 00348 PxGridIndexY(int cpu) 00349 { 00350 return ((cpu / _xCPUs) % _yCPUs); 00351 } 00352 00353 00354 inline int 00355 PxGridIndexZ(int cpu) 00356 { 00357 return (cpu / (_xCPUs * _yCPUs)); 00358 } 00359 00360 00361 /**********************************************************************/ 00362 00363 00364 inline int 00365 PxIsLeftCPU(int cpu) 00366 { 00367 return ((PxGridIndexX(cpu) == 0) ? 1 : 0); 00368 } 00369 00370 00371 inline int 00372 PxIsRightCPU(int cpu) 00373 { 00374 return ((PxGridIndexX(cpu) == _xCPUs-1) ? 1 : 0); 00375 } 00376 00377 00378 inline int 00379 PxIsTopCPU(int cpu) 00380 { 00381 return ((PxGridIndexY(cpu) == 0) ? 1 : 0); 00382 } 00383 00384 00385 inline int 00386 PxIsBottomCPU(int cpu) 00387 { 00388 return ((PxGridIndexY(cpu) == _yCPUs-1) ? 1 : 0); 00389 } 00390 00391 00392 inline int 00393 PxIsFrontCPU(int cpu) 00394 { 00395 return ((PxGridIndexZ(cpu) == 0) ? 1 : 0); 00396 } 00397 00398 00399 inline int 00400 PxIsBackCPU(int cpu) 00401 { 00402 return ((PxGridIndexZ(cpu) == _zCPUs-1) ? 1 : 0); 00403 } 00404 00405 00406 /**********************************************************************/ 00407 00408 00409 inline int 00410 PxLclWidth(int cpu) 00411 { 00412 if (PxGridIndexX(cpu) < _overflowX) { 00413 return (_minLocalW + 1); 00414 } else { 00415 return _minLocalW; 00416 } 00417 } 00418 00419 00420 inline int 00421 PxLclHeight(int cpu) 00422 { 00423 if (PxGridIndexY(cpu) < _overflowY) { 00424 return (_minLocalH + 1); 00425 } else { 00426 return _minLocalH; 00427 } 00428 } 00429 00430 00431 inline int 00432 PxLclDepth(int cpu) 00433 { 00434 if (PxGridIndexZ(cpu) < _overflowZ) { 00435 return (_minLocalD + 1); 00436 } else { 00437 return _minLocalD; 00438 } 00439 } 00440 00441 00442 inline int 00443 PxLclSize(int cpu) 00444 { 00445 return (PxLclWidth(cpu) * PxLclHeight(cpu) * PxLclDepth(cpu)); 00446 } 00447 00448 00449 /**********************************************************************/ 00450 00451 00452 inline int 00453 PxLclStartX(int cpu) 00454 { 00455 /* Gives index in x-direction of global array representing the 00456 * start of the partial array for which CPU 'cpu' is responsible. 00457 */ 00458 00459 if (PxGridIndexX(cpu) < _overflowX) { 00460 return (PxGridIndexX(cpu) * (_minLocalW + 1)); 00461 } else { 00462 return (PxGridIndexX(cpu) * _minLocalW + _overflowX); 00463 } 00464 } 00465 00466 00467 inline int 00468 PxLclStartY(int cpu) 00469 { 00470 /* Gives index in y-direction of global array representing the 00471 * start of the partial array for which CPU 'cpu' is responsible. 00472 */ 00473 00474 if (PxGridIndexY(cpu) < _overflowY) { 00475 return (PxGridIndexY(cpu) * (_minLocalH + 1)); 00476 } else { 00477 return (PxGridIndexY(cpu) * _minLocalH + _overflowY); 00478 } 00479 } 00480 00481 00482 inline int 00483 PxLclStartZ(int cpu) 00484 { 00485 /* Gives index in z-direction of global array representing the 00486 * start of the partial array for which CPU 'cpu' is responsible. 00487 */ 00488 00489 if (PxGridIndexZ(cpu) < _overflowZ) { 00490 return (PxGridIndexZ(cpu) * (_minLocalD + 1)); 00491 } else { 00492 return (PxGridIndexZ(cpu) * _minLocalD + _overflowZ); 00493 } 00494 } 00495 00496 00497 inline int 00498 PxLclStart(int cpu, int bw=0, int bh=0, int bd=0) 00499 { 00500 /* Gives index in global array representing the start of the 00501 * partial array for which CPU 'cpu' is responsible - INCLUDING 00502 * any potential borders surrounding the array structure. 00503 * 00504 * BEWARE: Dependent on array data layout in memory!!! 00505 */ 00506 00507 int realW = _fullW + 2 * bw; 00508 int realH = _fullH + 2 * bh; 00509 00510 return (((PxLclStartZ(cpu) + bd) * realW * realH) + 00511 ((PxLclStartY(cpu) + bh) * realW) + 00512 ((PxLclStartX(cpu) + bw))); 00513 } 00514 00515 00516 /**********************************************************************/ 00517 00518 00519 inline void 00520 PxInitPartition(int aw, int ah, int ad, 00521 int xcpus, int ycpus, int zcpus) 00522 { 00523 int _myCPU = PxMyCPU(); 00524 00525 _fullW = (aw < 1) ? 1 : aw; 00526 _fullH = (ah < 1) ? 1 : ah; 00527 _fullD = (ad < 1) ? 1 : ad; 00528 00529 _xCPUs = (xcpus < 1) ? 1 : xcpus; 00530 _yCPUs = (ycpus < 1) ? 1 : ycpus; 00531 _zCPUs = (zcpus < 1) ? 1 : zcpus; 00532 00533 _leftCPU = (PxIsLeftCPU(_myCPU)) ? -1 : _myCPU - 1; 00534 _rightCPU = (PxIsRightCPU(_myCPU)) ? -1 : _myCPU + 1; 00535 _upCPU = (PxIsTopCPU(_myCPU)) ? -1 : _myCPU - _xCPUs; 00536 _downCPU = (PxIsBottomCPU(_myCPU)) ? -1 : _myCPU + _xCPUs; 00537 _frontCPU = (PxIsFrontCPU(_myCPU)) ? -1 : _myCPU - (_xCPUs*_yCPUs); 00538 _backCPU = (PxIsBackCPU(_myCPU)) ? -1 : _myCPU + (_xCPUs*_yCPUs); 00539 00540 _minLocalW = _fullW / _xCPUs; 00541 _minLocalH = _fullH / _yCPUs; 00542 _minLocalD = _fullD / _zCPUs; 00543 00544 _overflowX = _fullW % _xCPUs; 00545 _overflowY = _fullH % _yCPUs; 00546 _overflowZ = _fullD % _zCPUs; 00547 } 00548 00549 00550 inline void 00551 PxRePartition(int xcpus, int ycpus, int zcpus) 00552 { 00553 PxInitPartition(_fullW, _fullH, _fullD, xcpus, ycpus, zcpus); 00554 } 00555 00556 00557 } // namespace Pattern 00558 } // namespace Array 00559 } // namespace Core 00560 } // namespace Impala 00561 00562 #endif /* __PxPartition_h_*/