template<class DstArrayT, class SrcArrayT, class KerArrayT, class KerArithT, class PixOpT, class RedOpT>
Definition at line 28 of file PatRecGenConv2dSep.h. References ArrayCW(), BORDERPROPAGATENORMALIZED, Impala::Core::Array::CheckBorderSize(), Impala::Core::Array::Element::E1Cast(), FuncRecGenConv2dSepDispatch_H(), FuncRecGenConv2dSepDispatch_V(), Impala::Core::Array::PAR_PART, PatSet(), PatSetBorder(), PxArrayForceNonDistributed(), PxArrayGetState(), PxArrayPD(), PxArrayPostStateTrans(), PxArrayPreStateTrans(), PxInitSystemGrid(), PxNrCPUs(), PxRedistArray(), PxRunLazyParallel(), PxRunParallel(), PxXCPUs(), PxYCPUs(), PxZCPUs(), Impala::Core::Array::SEQ_PART, Impala::Util::TimePlot::SplitTime(), and STRONG. Referenced by Impala::Samples::Talk::DoTalkRecGenConv2dSep(), and Impala::Core::Array::RecConvKernelSeparated(). 00033 { 00034 int numX = ArrayCW(ker1) / 2 * 2; 00035 int numY = ArrayCW(ker2) / 2 * 2; 00036 SrcArrayT* checkedSrc = CheckBorderSize(src, numX, numY); 00037 00038 if (checkedSrc != src) 00039 { 00040 if ((dst != 0) && (dst != src)) 00041 delete dst; 00042 dst = checkedSrc; 00043 } 00044 else 00045 { 00046 if (dst != src) 00047 PatSet(dst, src); 00048 } 00049 00050 KerArithT dummy = Element::E1Cast(0, KerArithT()); 00051 00052 #ifdef PX_HORUS_USED 00053 if (!PxRunParallel()) { // run sequential 00054 #endif 00055 if (plotter) 00056 plotter->SplitTime(); 00057 PatSetBorder(dst, numX, 0, BORDERPROPAGATENORMALIZED, dummy, 00058 leftBorderNorm, rightBorderNorm, topBorderNorm, 00059 bottomBorderNorm); 00060 if (plotter) 00061 plotter->SplitTime(); 00062 FuncRecGenConv2dSepDispatch_H(dst, ker1, pixOp, redOp, rightBorderNorm); 00063 if (plotter) 00064 plotter->SplitTime(); 00065 PatSetBorder(dst, 0, numY, BORDERPROPAGATENORMALIZED, dummy, 00066 leftBorderNorm, rightBorderNorm, topBorderNorm, 00067 bottomBorderNorm); 00068 if (plotter) 00069 plotter->SplitTime(); 00070 FuncRecGenConv2dSepDispatch_V(dst, ker2, pixOp, redOp, bottomBorderNorm, 00071 false); 00072 00073 #ifdef PX_HORUS_USED 00074 } else { // run parallel 00075 int oldXCPUs = PxXCPUs(); 00076 int oldYCPUs = PxYCPUs(); 00077 int oldZCPUs = PxZCPUs(); 00078 00079 // Initial array redistribution (if needed) 00080 00081 if (PxYCPUs() != PxNrCPUs()) { 00082 PxInitSystemGrid(1, PxNrCPUs(), 1); 00083 int dstState = PxArrayGetState(dst); 00084 if (dstState == SEQ_PART || dstState == PAR_PART) { 00085 PxRedistArray(&(dst->mPartArray),1,PxNrCPUs(),1,false); 00086 } 00087 } 00088 00089 // Compute horizontal 00090 00091 PxArrayPreStateTrans(dst, PAR_PART, STRONG); 00092 PatSetBorder(PxArrayPD(dst), numX, 0, 00093 BORDERPROPAGATENORMALIZED, dummy, leftBorderNorm, 00094 rightBorderNorm, topBorderNorm, bottomBorderNorm); 00095 FuncRecGenConv2dSepDispatch_H(PxArrayPD(dst), ker1, 00096 pixOp, redOp, rightBorderNorm); 00097 00098 // Redistribute array 00099 00100 PxInitSystemGrid(PxNrCPUs(), 1, 1); 00101 PxRedistArray(&(dst->mPartArray), PxNrCPUs(), 1, 1, false); 00102 00103 // Compute vertical 00104 00105 PatSetBorder(PxArrayPD(dst), 0, numY, 00106 BORDERPROPAGATENORMALIZED, dummy, leftBorderNorm, 00107 rightBorderNorm, topBorderNorm, bottomBorderNorm); 00108 FuncRecGenConv2dSepDispatch_V(PxArrayPD(dst), ker2, pixOp, 00109 redOp, bottomBorderNorm, false); 00110 PxArrayPostStateTrans(dst); 00111 00112 // Undo array redistribution 00113 00114 PxInitSystemGrid(oldXCPUs, oldYCPUs, oldZCPUs); 00115 PxRedistArray(&(dst->mPartArray), 00116 oldXCPUs, oldYCPUs, oldZCPUs, false); 00117 00118 if (!PxRunLazyParallel()) { 00119 PxArrayForceNonDistributed(dst); 00120 } 00121 } 00122 #endif 00123 }
Here is the call graph for this function:
|