00001 #ifndef Impala_Core_Array_Pattern_PatRecGenConv2dSep_h
00002 #define Impala_Core_Array_Pattern_PatRecGenConv2dSep_h
00003
00004 #include "Core/Array/Pattern/PatSet.h"
00005 #include "Core/Array/Pattern/PatSetBorder.h"
00006 #include "Core/Array/Pattern/FuncRecGenConv2dSep.h"
00007 #include "Core/Array/CheckBorderSize.h"
00008 #include "Util/TimePlot.h"
00009
00010 #ifdef PX_HORUS_USED
00011 #include "Core/Array/Pattern/PxArrayFunc.h"
00012 #include "Core/Array/Pattern/PxStateTrans.h"
00013 #endif
00014
00015 namespace Impala
00016 {
00017 namespace Core
00018 {
00019 namespace Array
00020 {
00021 namespace Pattern
00022 {
00023
00024
00025 template <class DstArrayT, class SrcArrayT, class KerArrayT,
00026 class KerArithT, class PixOpT, class RedOpT>
00027 inline void
00028 PatRecGenConv2dSep(DstArrayT*& dst, SrcArrayT* src, KerArrayT* ker1,
00029 KerArrayT* ker2, KerArithT leftBorderNorm,
00030 KerArithT rightBorderNorm, KerArithT topBorderNorm,
00031 KerArithT bottomBorderNorm, PixOpT& pixOp,
00032 RedOpT& redOp, Util::TimePlot* plotter)
00033 {
00034 int numX = ArrayCW(ker1) / 2 * 2;
00035 int numY = ArrayCW(ker2) / 2 * 2;
00036 SrcArrayT* checkedSrc = CheckBorderSize(src, numX, numY);
00037
00038 if (checkedSrc != src)
00039 {
00040 if ((dst != 0) && (dst != src))
00041 delete dst;
00042 dst = checkedSrc;
00043 }
00044 else
00045 {
00046 if (dst != src)
00047 PatSet(dst, src);
00048 }
00049
00050 KerArithT dummy = Element::E1Cast(0, KerArithT());
00051
00052 #ifdef PX_HORUS_USED
00053 if (!PxRunParallel()) {
00054 #endif
00055 if (plotter)
00056 plotter->SplitTime();
00057 PatSetBorder(dst, numX, 0, BORDERPROPAGATENORMALIZED, dummy,
00058 leftBorderNorm, rightBorderNorm, topBorderNorm,
00059 bottomBorderNorm);
00060 if (plotter)
00061 plotter->SplitTime();
00062 FuncRecGenConv2dSepDispatch_H(dst, ker1, pixOp, redOp, rightBorderNorm);
00063 if (plotter)
00064 plotter->SplitTime();
00065 PatSetBorder(dst, 0, numY, BORDERPROPAGATENORMALIZED, dummy,
00066 leftBorderNorm, rightBorderNorm, topBorderNorm,
00067 bottomBorderNorm);
00068 if (plotter)
00069 plotter->SplitTime();
00070 FuncRecGenConv2dSepDispatch_V(dst, ker2, pixOp, redOp, bottomBorderNorm,
00071 false);
00072
00073 #ifdef PX_HORUS_USED
00074 } else {
00075 int oldXCPUs = PxXCPUs();
00076 int oldYCPUs = PxYCPUs();
00077 int oldZCPUs = PxZCPUs();
00078
00079
00080
00081 if (PxYCPUs() != PxNrCPUs()) {
00082 PxInitSystemGrid(1, PxNrCPUs(), 1);
00083 int dstState = PxArrayGetState(dst);
00084 if (dstState == SEQ_PART || dstState == PAR_PART) {
00085 PxRedistArray(&(dst->mPartArray),1,PxNrCPUs(),1,false);
00086 }
00087 }
00088
00089
00090
00091 PxArrayPreStateTrans(dst, PAR_PART, STRONG);
00092 PatSetBorder(PxArrayPD(dst), numX, 0,
00093 BORDERPROPAGATENORMALIZED, dummy, leftBorderNorm,
00094 rightBorderNorm, topBorderNorm, bottomBorderNorm);
00095 FuncRecGenConv2dSepDispatch_H(PxArrayPD(dst), ker1,
00096 pixOp, redOp, rightBorderNorm);
00097
00098
00099
00100 PxInitSystemGrid(PxNrCPUs(), 1, 1);
00101 PxRedistArray(&(dst->mPartArray), PxNrCPUs(), 1, 1, false);
00102
00103
00104
00105 PatSetBorder(PxArrayPD(dst), 0, numY,
00106 BORDERPROPAGATENORMALIZED, dummy, leftBorderNorm,
00107 rightBorderNorm, topBorderNorm, bottomBorderNorm);
00108 FuncRecGenConv2dSepDispatch_V(PxArrayPD(dst), ker2, pixOp,
00109 redOp, bottomBorderNorm, false);
00110 PxArrayPostStateTrans(dst);
00111
00112
00113
00114 PxInitSystemGrid(oldXCPUs, oldYCPUs, oldZCPUs);
00115 PxRedistArray(&(dst->mPartArray),
00116 oldXCPUs, oldYCPUs, oldZCPUs, false);
00117
00118 if (!PxRunLazyParallel()) {
00119 PxArrayForceNonDistributed(dst);
00120 }
00121 }
00122 #endif
00123 }
00124
00125 }
00126 }
00127 }
00128 }
00129
00130 #endif