Horus Doc || C++ Reference || Class Overview   Pixels   Images   Detector   Geometry   Registry || Doxygen's quick Index  

HxFuncGenConv2dSep.c File Reference

More...

#include "HxFuncGenConv2dSep.h"
#include "HxEnvironment.h"

Pix_variations

template<class SrcDataPtrT, class ArithT, class PixOpT, class RedOpT> void HxFuncGenConv2dSep_Pix_Xdir (ArithT *buf, SrcDataPtrT srcPtr, ArithT *kernel, int bufIdx, int kerWidth, PixOpT &pixOp, RedOpT &redOp, ArithT neutralElement)
 Pix : X direction. More...

template<class DstDataPtrT, class ArithT, class PixOpT, class RedOpT> void HxFuncGenConv2dSep_Pix_Ydir (DstDataPtrT dstPtr, ArithT *bufPtr, ArithT *kernel, int bufWidth, int kerWidth, PixOpT &pixOp, RedOpT &redOp, ArithT neutralElement)
 Pix : Y direction. More...


Line_variations

template<class SrcDataPtrT, class ArithT, class PixOpT, class RedOpT> void HxFuncGenConv2dSep_Line_Xdir (ArithT *bufLine, SrcDataPtrT srcPtr, ArithT *kernel, int srcWidth, int dstWidth, int kerWidth, PixOpT &pixOp, RedOpT &redOp)
 Line : X direction. More...

template<class SrcDataPtrT, class ArithT, class PixOpT, class RedOpT> void HxFuncGenConv2dSep_Line_XdirInc (ArithT *bufLine, SrcDataPtrT srcPtr, ArithT *kernel, int srcWidth, int dstWidth, int kerWidth, PixOpT &pixOp, RedOpT &redOp)
 Line : X direction (inc). More...

template<class SrcDataPtrT, class ArithT, class PixOpT, class RedOpT> void HxFuncGenConv2dSep_Line_XdirVerInc (ArithT *buf, SrcDataPtrT srcPtr, ArithT *kernel1, int srcWidth, int dstWidth, int ker1Width, int ker2Width, PixOpT &pixOp, RedOpT &redOp)
 Line : X direction, "vertical buffer" (inc). More...

template<class DstDataPtrT, class ArithT, class PixOpT, class RedOpT> void HxFuncGenConv2dSep_Line_YdirNaiInc (DstDataPtrT dstPtr, ArithT *buf, ArithT *kernel, int dstWidth, int dstHeight, int kerWidth, PixOpT &pixOp, RedOpT &redOp)
 Line : Y direction, naive (inc). More...

template<class DstDataPtrT, class ArithT, class PixOpT, class RedOpT> void HxFuncGenConv2dSep_Line_YdirSim (DstDataPtrT dstPtr, ArithT *buf, ArithT *kernel, int dstWidth, int kerWidth, PixOpT &pixOp, RedOpT &redOp)
 Line : Y direction, simple. More...

template<class DstDataPtrT, class ArithT, class PixOpT, class RedOpT> void HxFuncGenConv2dSep_Line_YdirSimInc (DstDataPtrT dstPtr, ArithT *buf, ArithT *kernel, int dstWidth, int kerWidth, PixOpT &pixOp, RedOpT &redOp)
 Line : Y direction, simple (inc). More...

template<class DstDataPtrT, class ArithT, class PixOpT, class RedOpT> void HxFuncGenConv2dSep_Line_YdirHor (DstDataPtrT dstPtr, ArithT *buf, ArithT *kernel, int dstWidth, int lineIdx, int kerWidth, PixOpT &pixOp, RedOpT &redOp)
 Line : Y direction, "horizontal buffer". More...

template<class DstDataPtrT, class ArithT, class PixOpT, class RedOpT> void HxFuncGenConv2dSep_Line_YdirHorInc (DstDataPtrT dstPtr, ArithT *buf, ArithT *kernel, int dstWidth, int lineIdx, int kerWidth, PixOpT &pixOp, RedOpT &redOp)
 Line : Y direction, "horizontal buffer" (inc). More...

template<class DstDataPtrT, class ArithT, class PixOpT, class RedOpT> void HxFuncGenConv2dSep_Line_YdirVerInc (DstDataPtrT dstPtr, ArithT *buf, ArithT *kernel, int dstWidth, int lineIdx, int kerWidth, PixOpT &pixOp, RedOpT &redOp)
 Line : Y direction, "vertical buffer" (inc). More...

template<class DstDataPtrT, class SrcDataPtrT, class ArithT, class PixOpT, class RedOpT> void HxFuncGenConv2dSep_Line_XYdirVerCycInc (DstDataPtrT dstPtr, ArithT *buf, ArithT *bufPtr, SrcDataPtrT srcPtr, ArithT *kernel1, ArithT *kernel2, int dstWidth, int ker1Width, int ker2Width, PixOpT &pixOp, RedOpT &redOp)
 Line : X and Y direction, "vertical buffer", two-way cyclic (inc). More...

template<class DstDataPtrT, class SrcDataPtrT, class ArithT, class PixOpT, class RedOpT> void HxFuncGenConv2dSep_Line_XYdirMinInc (DstDataPtrT dstPtr, ArithT *buf, SrcDataPtrT srcPtr, ArithT *kernel1, ArithT *kernel2, int srcWidth, int dstWidth, int ker1Width, int ker2Width, PixOpT &pixOp, RedOpT &redOp)
 Line : X and Y direction, "minimal buffer" (inc). More...


GenConv2dSep_variations

template<class DstDataPtrType, class SrcDataPtrType, class KernelT, class PixOpT, class RedOpT> void HxFuncGenConv2dSep_Sim (DstDataPtrType dstPtr, SrcDataPtrType srcPtr, KernelT &kernel1, KernelT &kernel2, HxSizes dstSize, HxSizes srcSize, PixOpT &pixOp, RedOpT &redOp, int vType)
 GenConv2dSep : simple. More...

template<class DstDataPtrType, class SrcDataPtrType, class KernelT, class PixOpT, class RedOpT> void HxFuncGenConv2dSep_Hor (DstDataPtrType dstPtr, SrcDataPtrType srcPtr, KernelT &kernel1, KernelT &kernel2, HxSizes dstSize, HxSizes srcSize, PixOpT &pixOp, RedOpT &redOp, int vType)
 GenConv2dSep : "horizontal buffer". More...

template<class DstDataPtrType, class SrcDataPtrType, class KernelT, class PixOpT, class RedOpT> void HxFuncGenConv2dSep_Ver (DstDataPtrType dstPtr, SrcDataPtrType srcPtr, KernelT &kernel1, KernelT &kernel2, HxSizes dstSize, HxSizes srcSize, PixOpT &pixOp, RedOpT &redOp, int vType)
 GenConv2dSep : "vertical buffer". More...

template<class DstDataPtrType, class SrcDataPtrType, class KernelT, class PixOpT, class RedOpT> void HxFuncGenConv2dSep_VerCyc (DstDataPtrType dstPtr, SrcDataPtrType srcPtr, KernelT &kernel1, KernelT &kernel2, HxSizes dstSize, HxSizes srcSize, PixOpT &pixOp, RedOpT &redOp, int vType)
 GenConv2dSep : "vertical buffer", two-way cyclic (localized computation). More...

template<class DstDataPtrType, class SrcDataPtrType, class KernelT, class PixOpT, class RedOpT> void HxFuncGenConv2dSep_Min (DstDataPtrType dstPtr, SrcDataPtrType srcPtr, KernelT &kernel1, KernelT &kernel2, HxSizes dstSize, HxSizes srcSize, PixOpT &pixOp, RedOpT &redOp, int vType)
 GenConv2dSep : "minimal buffer". More...


Functions

template<class KernelT, class ArithType> ArithType * HxFuncGenConv2dSep_CopyKernel (KernelT &kernel, ArithType)
 Copy (1d) kernel to an array of ArithT elements. More...

template<class DstDataPtrType, class SrcDataPtrType, class KernelT, class PixOpT, class RedOpT> void HxFuncGenConv2dSepDispatch (DstDataPtrType dstPtr, SrcDataPtrType srcPtr, KernelT &kernel1, KernelT &kernel2, HxSizes dstSize, HxSizes srcSize, PixOpT &pixOp, RedOpT &redOp, int vType)
 Dispatch function for GenConv2dSep (see Global functions for GenConv2dSep) Dispatch is based on the vType parameter. More...


Detailed Description


Function Documentation

template<class SrcDataPtrT, class ArithT, class PixOpT, class RedOpT>
void HxFuncGenConv2dSep_Pix_Xdir ArithT *    buf,
SrcDataPtrT    srcPtr,
ArithT *    kernel,
int    bufIdx,
int    kerWidth,
PixOpT &    pixOp,
RedOpT &    redOp,
ArithT    neutralElement
[inline, static]
 

Pix : X direction.

Does a single genconv between srcPtr and kernel of kerWidth pixels and stores the result at buf[bufIdx]. Memory layout of all srcPtr and kernel is assumed contiguous.

00082 {
00083     ArithT result(neutralElement);
00084     for (int k=0; k<kerWidth; k++)
00085         redOp.doIt(result, pixOp.doIt(srcPtr.readIncX(), kernel[k]));
00086     buf[bufIdx] = result;
00087 }

template<class DstDataPtrT, class ArithT, class PixOpT, class RedOpT>
void HxFuncGenConv2dSep_Pix_Ydir DstDataPtrT    dstPtr,
ArithT *    bufPtr,
ArithT *    kernel,
int    bufWidth,
int    kerWidth,
PixOpT &    pixOp,
RedOpT &    redOp,
ArithT    neutralElement
[inline, static]
 

Pix : Y direction.

Does a single genconv between bufPtr and kernel of kerWidth pixels and stores the result at dstPtr. Memory layout of kernel is assumed to be contiguous. Pixels in buf are assumed to be bufWidth elements apart.

00103 {
00104     ArithT result(neutralElement);
00105     int idx = 0;
00106     for (int k=0; k<kerWidth; k++) {
00107         redOp.doIt(result, pixOp.doIt(bufPtr[idx], kernel[k]));
00108         idx += bufWidth;
00109     }
00110     dstPtr.write(result);
00111 }

template<class SrcDataPtrT, class ArithT, class PixOpT, class RedOpT>
void HxFuncGenConv2dSep_Line_Xdir ArithT *    bufLine,
SrcDataPtrT    srcPtr,
ArithT *    kernel,
int    srcWidth,
int    dstWidth,
int    kerWidth,
PixOpT &    pixOp,
RedOpT &    redOp
[static]
 

Line : X direction.

Processes an entire line starting at srcPtr. Calls HxFuncGenConv2dSep_Pix_Xdir "dstWidth" times to do a neighbourhood. The resulting "dstWidth" values are stored on bufLine (contiguous).

00132 {
00133     for (int x=0 ; x<dstWidth ; x++) {
00134         HxFuncGenConv2dSep_Pix_Xdir(
00135             bufLine, srcPtr, kernel, x, kerWidth,
00136             pixOp, redOp, RedOpT::neutralElement());
00137         srcPtr.incX();
00138     }
00139 }

template<class SrcDataPtrT, class ArithT, class PixOpT, class RedOpT>
void HxFuncGenConv2dSep_Line_XdirInc ArithT *    bufLine,
SrcDataPtrT    srcPtr,
ArithT *    kernel,
int    srcWidth,
int    dstWidth,
int    kerWidth,
PixOpT &    pixOp,
RedOpT &    redOp
[static]
 

Line : X direction (inc).

Processes an entire line starting at srcPtr (including the "Pix" variation). The resulting "dstWidth" values are stored in bufLine (contiguous).

This function does the same operation as HxFuncGenConv2dSep_Line_Xdir but the code of HxFuncGenConv2dSep_Pix_Xdir is inserted in the loop (instead of calling the function like HxFuncGenConv2dSep_Line_Xdir does).

00156 {
00157     ArithT neutralElement = RedOpT::neutralElement();
00158     for (int x=0 ; x<dstWidth ; x++) {
00159         SrcDataPtrT sPtr(srcPtr);
00160         ArithT result(neutralElement);
00161         for (int k=0; k<kerWidth; k++)
00162             redOp.doIt(result, pixOp.doIt(sPtr.readIncX(), kernel[k]));
00163         bufLine[x] = result;
00164         srcPtr.incX();
00165     }
00166 }

template<class SrcDataPtrT, class ArithT, class PixOpT, class RedOpT>
void HxFuncGenConv2dSep_Line_XdirVerInc ArithT *    buf,
SrcDataPtrT    srcPtr,
ArithT *    kernel1,
int    srcWidth,
int    dstWidth,
int    ker1Width,
int    ker2Width,
PixOpT &    pixOp,
RedOpT &    redOp
[static]
 

Line : X direction, "vertical buffer" (inc).

Processes an entire line starting at srcPtr (including the "Pix" variation). The resulting "dstWidth" values are stored in buf using a stride ker2Width.

00180 {
00181     ArithT neutralElement = RedOpT::neutralElement();
00182     int idx = 0;
00183     for (int x=0 ; x<dstWidth ; x++) {
00184         SrcDataPtrT sPtr(srcPtr);
00185         ArithT result(neutralElement);
00186         for (int k=0; k<ker1Width; k++)
00187             redOp.doIt(result, pixOp.doIt(sPtr.readIncX(), kernel1[k]));
00188         buf[idx] = result;
00189         idx += ker2Width;
00190         srcPtr.incX();
00191     }
00192 }

template<class DstDataPtrT, class ArithT, class PixOpT, class RedOpT>
void HxFuncGenConv2dSep_Line_YdirNaiInc DstDataPtrT    dstPtr,
ArithT *    buf,
ArithT *    kernel,
int    dstWidth,
int    dstHeight,
int    kerWidth,
PixOpT &    pixOp,
RedOpT &    redOp
[static]
 

Line : Y direction, naive (inc).

00201 {
00202     ArithT neutralElement = RedOpT::neutralElement();
00203     for (int y=0 ; y<dstHeight ; y++) {
00204         ArithT result(neutralElement);
00205         int idx = 0;
00206         for (int k=0; k<kerWidth; k++) {
00207             redOp.doIt(result, pixOp.doIt(buf[idx], kernel[k]));
00208             idx += dstWidth;
00209         }
00210         dstPtr.write(result);
00211         dstPtr.incY();
00212         buf += dstWidth;
00213     }
00214 }

template<class DstDataPtrT, class ArithT, class PixOpT, class RedOpT>
void HxFuncGenConv2dSep_Line_YdirSim DstDataPtrT    dstPtr,
ArithT *    buf,
ArithT *    kernel,
int    dstWidth,
int    kerWidth,
PixOpT &    pixOp,
RedOpT &    redOp
[static]
 

Line : Y direction, simple.

00223 {
00224     for (int x=0 ; x<dstWidth ; x++) {
00225         ArithT* bPtr = &buf[x];
00226         HxFuncGenConv2dSep_Pix_Ydir(
00227             dstPtr, bPtr, kernel, dstWidth, kerWidth,
00228             pixOp, redOp, RedOpT::neutralElement());
00229         dstPtr.incX();
00230     }
00231 }

template<class DstDataPtrT, class ArithT, class PixOpT, class RedOpT>
void HxFuncGenConv2dSep_Line_YdirSimInc DstDataPtrT    dstPtr,
ArithT *    buf,
ArithT *    kernel,
int    dstWidth,
int    kerWidth,
PixOpT &    pixOp,
RedOpT &    redOp
[static]
 

Line : Y direction, simple (inc).

00240 {
00241     ArithT neutralElement = RedOpT::neutralElement();
00242     for (int x=0 ; x<dstWidth ; x++) {
00243         ArithT* bufPtr = &buf[x];
00244         ArithT result(neutralElement);
00245         int idx = 0;
00246         for (int k=0; k<kerWidth; k++) {
00247             redOp.doIt(result, pixOp.doIt(bufPtr[idx], kernel[k]));
00248             idx += dstWidth;
00249         }
00250         dstPtr.writeIncX(result);
00251     }
00252 }

template<class DstDataPtrT, class ArithT, class PixOpT, class RedOpT>
void HxFuncGenConv2dSep_Line_YdirHor DstDataPtrT    dstPtr,
ArithT *    buf,
ArithT *    kernel,
int    dstWidth,
int    lineIdx,
int    kerWidth,
PixOpT &    pixOp,
RedOpT &    redOp
[static]
 

Line : Y direction, "horizontal buffer".

00261 {
00262     HxPixelAllocator<ArithT> allocator;
00263 
00264     // Copy kernel data into cycKer to match the cycle of buf
00265 
00266     ArithT* cycKer = allocator.allocate(kerWidth);
00267     for (int k=0 ; k<kerWidth ; k++) {
00268         cycKer[lineIdx] = kernel[k];
00269         lineIdx = (lineIdx + 1) % kerWidth;
00270     }
00271 
00272     for (int x=0 ; x<dstWidth ; x++) {
00273         ArithT* bufPtr = &buf[x];
00274         HxFuncGenConv2dSep_Pix_Ydir(
00275             dstPtr, bufPtr, cycKer, dstWidth, kerWidth,
00276             pixOp, redOp, RedOpT::neutralElement());
00277         dstPtr.incX();
00278     }
00279 
00280     allocator.deallocate(cycKer, kerWidth);
00281 }

template<class DstDataPtrT, class ArithT, class PixOpT, class RedOpT>
void HxFuncGenConv2dSep_Line_YdirHorInc DstDataPtrT    dstPtr,
ArithT *    buf,
ArithT *    kernel,
int    dstWidth,
int    lineIdx,
int    kerWidth,
PixOpT &    pixOp,
RedOpT &    redOp
[static]
 

Line : Y direction, "horizontal buffer" (inc).

00290 {
00291     HxPixelAllocator<ArithT> allocator;
00292 
00293     // Copy kernel data into cycKer to match the cycle of buf
00294 
00295     ArithT* cycKer = allocator.allocate(kerWidth);
00296     for (int k=0 ; k<kerWidth ; k++) {
00297         cycKer[lineIdx] = kernel[k];
00298         lineIdx = (lineIdx + 1) % kerWidth;
00299     }
00300 
00301     ArithT neutralElement = RedOpT::neutralElement();
00302     for (int x=0 ; x<dstWidth ; x++) {
00303         ArithT* bufPtr = &buf[x];
00304         ArithT result(neutralElement);
00305         int idx = 0;
00306         for (int k=0; k<kerWidth; k++) {
00307             redOp.doIt(result, pixOp.doIt(bufPtr[idx], cycKer[k]));
00308             idx += dstWidth;
00309         }
00310         dstPtr.writeIncX(result);
00311     }
00312 
00313     allocator.deallocate(cycKer, kerWidth);
00314 }

template<class DstDataPtrT, class ArithT, class PixOpT, class RedOpT>
void HxFuncGenConv2dSep_Line_YdirVerInc DstDataPtrT    dstPtr,
ArithT *    buf,
ArithT *    kernel,
int    dstWidth,
int    lineIdx,
int    kerWidth,
PixOpT &    pixOp,
RedOpT &    redOp
[static]
 

Line : Y direction, "vertical buffer" (inc).

00323 {
00324     HxPixelAllocator<ArithT> allocator;
00325 
00326     // Copy kernel data into cycKer to match the cycle of buf
00327 
00328     ArithT* cycKer = allocator.allocate(kerWidth);
00329     for (int k=0 ; k<kerWidth ; k++) {
00330         cycKer[lineIdx] = kernel[k];
00331         lineIdx = (lineIdx + 1) % kerWidth;
00332     }
00333 
00334     ArithT neutralElement = RedOpT::neutralElement();
00335     int idx = 0;
00336     for (int x=0 ; x<dstWidth ; x++) {
00337         ArithT* bufPtr = &buf[idx];
00338         ArithT result(neutralElement);
00339         for (int k=0; k<kerWidth; k++) {
00340             redOp.doIt(result, pixOp.doIt(bufPtr[k], cycKer[k]));
00341         }
00342         dstPtr.writeIncX(result);
00343         idx += kerWidth;
00344     }
00345 
00346     allocator.deallocate(cycKer, kerWidth);
00347 }

template<class DstDataPtrT, class SrcDataPtrT, class ArithT, class PixOpT, class RedOpT>
void HxFuncGenConv2dSep_Line_XYdirVerCycInc DstDataPtrT    dstPtr,
ArithT *    buf,
ArithT *    bufPtr,
SrcDataPtrT    srcPtr,
ArithT *    kernel1,
ArithT *    kernel2,
int    dstWidth,
int    ker1Width,
int    ker2Width,
PixOpT &    pixOp,
RedOpT &    redOp
[static]
 

Line : X and Y direction, "vertical buffer", two-way cyclic (inc).

00358 {
00359     ArithT  neutralElement = RedOpT::neutralElement();
00360     int     lastKerElt = ker2Width-1;
00361     int     bufCycSize = dstWidth * ker2Width;
00362     ArithT* bufOverflow = buf + bufCycSize;
00363 
00364     while (--dstWidth >= 0) {
00365         // first do the X direction (from src to buf) for the last element
00366         // needed for the kernel in the Y direction (all other elements
00367         // needed for the Y direction are already computed and stored in buf)
00368         SrcDataPtrT sPtr(srcPtr);
00369         ArithT result1(neutralElement);
00370         for (int k1=0 ; k1<ker1Width ; k1++)
00371             redOp.doIt(result1, pixOp.doIt(sPtr.readIncX(), kernel1[k1]));
00372         bufPtr[lastKerElt] = result1;
00373         srcPtr.incX();
00374         // now the Y direction from buf to dst
00375         ArithT result2(neutralElement);
00376         for (int k2=0 ; k2<ker2Width ; k2++)
00377             redOp.doIt(result2, pixOp.doIt(bufPtr[k2], kernel2[k2]));
00378         dstPtr.writeIncX(result2);
00379         bufPtr += ker2Width;
00380         if (bufPtr >= bufOverflow)
00381             bufPtr -= bufCycSize;
00382     }
00383 }

template<class DstDataPtrT, class SrcDataPtrT, class ArithT, class PixOpT, class RedOpT>
void HxFuncGenConv2dSep_Line_XYdirMinInc DstDataPtrT    dstPtr,
ArithT *    buf,
SrcDataPtrT    srcPtr,
ArithT *    kernel1,
ArithT *    kernel2,
int    srcWidth,
int    dstWidth,
int    ker1Width,
int    ker2Width,
PixOpT &    pixOp,
RedOpT &    redOp
[static]
 

Line : X and Y direction, "minimal buffer" (inc).

00431 {
00432     ArithT neutralElement = RedOpT::neutralElement();
00433 
00434     // first Y dir from src to buf using kernel2
00435 
00436     for (int x=0 ; x<srcWidth ; x++) {
00437         SrcDataPtrT sPtr(srcPtr);
00438         ArithT result(neutralElement);
00439         for (int k=0 ; k<ker2Width ; k++) {
00440                 redOp.doIt(result, pixOp.doIt(sPtr.read(), kernel2[k]));
00441                 sPtr.incY();
00442         }
00443         buf[x] = result;
00444         srcPtr.incX();
00445     }
00446 
00447     // now X dir from buf to dst using kernel1
00448 
00449     for (int x2=0 ; x2<dstWidth ; x2++) {
00450         ArithT* bufPtr = &buf[x2];
00451         ArithT result(neutralElement);
00452         for (int k=0; k<ker1Width; k++)
00453             redOp.doIt(result, pixOp.doIt(bufPtr[k], kernel1[k]));
00454         dstPtr.writeIncX(result);
00455     }
00456 }

template<class KernelT, class ArithType>
ArithType* HxFuncGenConv2dSep_CopyKernel KernelT &    kernel,
ArithType   
[static]
 

Copy (1d) kernel to an array of ArithT elements.

00466 {
00467     HxPixelAllocator<ArithType> allocator;
00468     HxSizes kerSize = kernel.sizes();
00469     int width = kerSize.x();
00470     ArithType* kerArray = allocator.allocate(width);
00471     for (int i=0; i<width; i++)
00472         kerArray[i] = kernel(i);
00473     return kerArray;
00474 }

template<class DstDataPtrType, class SrcDataPtrType, class KernelT, class PixOpT, class RedOpT>
void HxFuncGenConv2dSep_Sim DstDataPtrType    dstPtr,
SrcDataPtrType    srcPtr,
KernelT &    kernel1,
KernelT &    kernel2,
HxSizes    dstSize,
HxSizes    srcSize,
PixOpT &    pixOp,
RedOpT &    redOp,
int    vType
[static]
 

GenConv2dSep : simple.

The conceptually simple implementation basically does the X direction from src to a scratch image, and then the Y direction from the scratch image to dst.

src has a complete border (so both in the X and Y direction). scratch has a border in the Y direction only. dst has no border.

Has two basic versions (designated by vType). The versions do (conceptuelly) the same algorithm in the X direction. With vType == 0 the Y direction is processed in column order (so first the whole column with X=0 is processed before we move on to the next column. With vType == {1,2} the Y direction is processed in a row-wise manner. That is, the kernel is applied at the first position in all columns before it is applied to the next position (again in all columns).

Further variations determine whether the processing of a single kernel is done in a separate function or not.

00510 {
00511     typedef typename KernelT::ArithType ArithType;
00512     HxPixelAllocator<ArithType> allocator;
00513 
00514     int ker1Width = kernel1.sizes().x();
00515     ArithType* ker1Array = HxFuncGenConv2dSep_CopyKernel(kernel1, ArithType());
00516 
00517     int ker2Width = kernel2.sizes().x();
00518     ArithType* ker2Array = HxFuncGenConv2dSep_CopyKernel(kernel2, ArithType());
00519 
00520     int srcWidth = srcSize.x();
00521     int srcHeight = srcSize.y();
00522     int dstWidth = dstSize.x();
00523     int dstHeight = dstSize.y();
00524 
00525     // Allocate scratchImage of size (dstWidth,srcHeight)
00526 
00527     int scratchSize = dstWidth * srcHeight;
00528     ArithType* scratch = allocator.allocate(scratchSize);
00529     ArithType* sLine;
00530     int x, y;
00531     
00532     // do the x direction from src to scratch
00533 
00534     for (y=0 ; y<srcHeight ; y++) {
00535         SrcDataPtrType sPtr(srcPtr);
00536         sPtr.incY(y);
00537         sLine = &scratch[y * dstWidth];
00538         if (vType == 1)
00539             HxFuncGenConv2dSep_Line_Xdir(
00540                 sLine, sPtr, ker1Array, srcWidth, dstWidth, ker1Width,
00541                 pixOp, redOp);
00542         if ((vType == 0) || (vType == 2))
00543             HxFuncGenConv2dSep_Line_XdirInc(
00544                 sLine, sPtr, ker1Array, srcWidth, dstWidth, ker1Width,
00545                 pixOp, redOp);
00546     }
00547 
00548     // do the y direction from scratch to dst
00549 
00550     if (vType == 0) {
00551         for (x=0 ; x<dstWidth ; x++) {
00552             DstDataPtrType dPtr(dstPtr);
00553             dPtr.incX(x);
00554             sLine = &scratch[x];
00555             HxFuncGenConv2dSep_Line_YdirNaiInc(
00556                 dPtr, sLine, ker2Array, dstWidth, dstHeight, ker2Width,
00557                 pixOp, redOp);
00558         }
00559     } else { // vType == 1 or 2
00560         for (y=0 ; y<dstHeight ; y++) {
00561             DstDataPtrType dPtr(dstPtr);
00562             dPtr.incY(y);
00563             sLine = &scratch[y * dstWidth];
00564             if (vType == 1)
00565                 HxFuncGenConv2dSep_Line_YdirSim(
00566                     dPtr, sLine, ker2Array, dstWidth, ker2Width, pixOp, redOp);
00567             if (vType == 2)
00568                 HxFuncGenConv2dSep_Line_YdirSimInc(
00569                     dPtr, sLine, ker2Array, dstWidth, ker2Width, pixOp, redOp);
00570         }
00571     }
00572 
00573     allocator.deallocate(ker1Array, ker1Width);
00574     allocator.deallocate(ker2Array, ker2Width);
00575     allocator.deallocate(scratch, scratchSize);
00576 }

template<class DstDataPtrType, class SrcDataPtrType, class KernelT, class PixOpT, class RedOpT>
void HxFuncGenConv2dSep_Hor DstDataPtrType    dstPtr,
SrcDataPtrType    srcPtr,
KernelT &    kernel1,
KernelT &    kernel2,
HxSizes    dstSize,
HxSizes    srcSize,
PixOpT &    pixOp,
RedOpT &    redOp,
int    vType
[static]
 

GenConv2dSep : "horizontal buffer".

This implementation does the X direction from src to a "horizontal buffer", and then the Y direction from the buffer to dst.

Processing the image is done in a row-wise manner. The buffer holds just enough (processed) rows of the image to be able to apply the kernel in the Y direction. So, the buffer is used in a cyclic manner.

The buffer is "horizontal" in that the data organization matches that of the images. That is, if pixels are next to eachother on a row in src, their processed results are also next to eachother in the buffer. The size of the buffer is dstWidth x ker2Width.

src has a complete border (so both in the X and Y direction). the buffer has no border. dst has no border.

Has two (designated by vType) that determine whether the processing of a single kernel is done in a separate function or not.

00607 {
00608     typedef typename KernelT::ArithType ArithType;
00609     HxPixelAllocator<ArithType> allocator;
00610 
00611     int ker1Width = kernel1.sizes().x();
00612     ArithType* ker1Array = HxFuncGenConv2dSep_CopyKernel(kernel1, ArithType());
00613 
00614     int ker2Width = kernel2.sizes().x();
00615     ArithType* ker2Array = HxFuncGenConv2dSep_CopyKernel(kernel2, ArithType());
00616 
00617     int srcWidth = srcSize.x();
00618     int srcHeight = srcSize.y();
00619     int dstWidth = dstSize.x();
00620     int dstHeight = dstSize.y();
00621 
00622     // Allocate buffer of size (dstWidth,ker2Width)
00623 
00624     int bufSize = dstWidth * ker2Width;
00625     ArithType* buf = allocator.allocate(bufSize);
00626     ArithType* bufLine;
00627     int lineIdx = 0; // (cyclic) line index in buf
00628     
00629     // initialize buf with first ker2Width - 1 lines
00630     int y;
00631     for (y=0 ; y<ker2Width-1 ; y++) {
00632         bufLine = &buf[lineIdx * dstWidth];
00633         if (vType == 3)
00634             HxFuncGenConv2dSep_Line_Xdir(
00635                 bufLine, srcPtr, ker1Array, srcWidth, dstWidth, ker1Width,
00636                 pixOp, redOp);
00637         if (vType == 4)
00638             HxFuncGenConv2dSep_Line_XdirInc(
00639                 bufLine, srcPtr, ker1Array, srcWidth, dstWidth, ker1Width,
00640                 pixOp, redOp);
00641         lineIdx = (lineIdx + 1) % ker2Width; // buffer is cyclic
00642         srcPtr.incY();
00643     }
00644 
00645     // now do the image
00646 
00647     for (y=0 ; y<dstHeight ; y++) {
00648 
00649         // do X direction to next line in the buffer
00650         // the next line is actually the last "element" for ker2 in the Y direction
00651 
00652         bufLine = &buf[lineIdx * dstWidth];
00653         if (vType == 3)
00654             HxFuncGenConv2dSep_Line_Xdir(
00655                 bufLine, srcPtr, ker1Array, srcWidth, dstWidth, ker1Width,
00656                 pixOp, redOp);
00657         if (vType == 4)
00658             HxFuncGenConv2dSep_Line_XdirInc(
00659                 bufLine, srcPtr, ker1Array, srcWidth, dstWidth, ker1Width,
00660                 pixOp, redOp);
00661         lineIdx = (lineIdx + 1) % ker2Width; // buffer is cyclic
00662         srcPtr.incY();
00663         
00664         // do Y direction from buffer to dstImg
00665         // Since the buffer is cyclic, the "ker2" location starts at lineIdx
00666 
00667         if (vType == 3)
00668             HxFuncGenConv2dSep_Line_YdirHor(
00669                 dstPtr, buf, ker2Array, dstWidth, lineIdx, ker2Width,
00670                 pixOp, redOp);
00671         if (vType == 4)
00672             HxFuncGenConv2dSep_Line_YdirHorInc(
00673                 dstPtr, buf, ker2Array, dstWidth, lineIdx, ker2Width,
00674                 pixOp, redOp);
00675         dstPtr.incY();
00676     }
00677 
00678     allocator.deallocate(ker1Array, ker1Width);
00679     allocator.deallocate(ker2Array, ker2Width);
00680     allocator.deallocate(buf, bufSize);
00681 }

template<class DstDataPtrType, class SrcDataPtrType, class KernelT, class PixOpT, class RedOpT>
void HxFuncGenConv2dSep_Ver DstDataPtrType    dstPtr,
SrcDataPtrType    srcPtr,
KernelT &    kernel1,
KernelT &    kernel2,
HxSizes    dstSize,
HxSizes    srcSize,
PixOpT &    pixOp,
RedOpT &    redOp,
int    vType
[static]
 

GenConv2dSep : "vertical buffer".

This implementation does the X direction from src to a "vertical buffer", and then the Y direction from the buffer to dst.

Processing the image is done in a row-wise manner. The buffer holds just enough (processed) rows of the image to be able to apply the kernel in the Y direction. So, the buffer is used in a cyclic manner.

The buffer is "vertical" in that the data organization is rotated w.r.t. the images. That is, if pixels are next to eachother on a row in src, their processed results are ker2width apart in the buffer. In other words, the processing of a (horizontal) row in the image is stored in a (vertical) column of the buffer. The size of the buffer is ker2Width x dstWidth.

src has a complete border (so both in the X and Y direction). the buffer has no border. dst has no border.

Has two (designated by vType) that determine whether the processing of a single kernel is done in a separate function or not.

00714 {
00715     typedef typename KernelT::ArithType ArithType;
00716     HxPixelAllocator<ArithType> allocator;
00717 
00718     int ker1Width = kernel1.sizes().x();
00719     ArithType* ker1Array = HxFuncGenConv2dSep_CopyKernel(kernel1, ArithType());
00720 
00721     int ker2Width = kernel2.sizes().x();
00722     ArithType* ker2Array = HxFuncGenConv2dSep_CopyKernel(kernel2, ArithType());
00723 
00724     int srcWidth = srcSize.x();
00725     int srcHeight = srcSize.y();
00726     int dstWidth = dstSize.x();
00727     int dstHeight = dstSize.y();
00728 
00729     // Allocate buffer of size (ker2Width,dstWidth)
00730 
00731     int bufSize = dstWidth * ker2Width;
00732     ArithType* buf = allocator.allocate(bufSize);
00733     ArithType* bufLine;
00734     int lineIdx = 0; // (cyclic) line index in buf
00735     
00736     // initialize buf with first ker2Width - 1 lines
00737     int y;
00738     for (y=0 ; y<ker2Width-1 ; y++) {
00739         bufLine = &buf[lineIdx];
00740         HxFuncGenConv2dSep_Line_XdirVerInc(
00741             bufLine, srcPtr, ker1Array, srcWidth, dstWidth,
00742             ker1Width, ker2Width, pixOp, redOp);
00743         lineIdx = (lineIdx + 1) % ker2Width; // buffer is cyclic
00744         srcPtr.incY();
00745     }
00746 
00747     // now do the image
00748 
00749     for (y=0 ; y<dstHeight ; y++) {
00750 
00751         // do X direction to next line in the buffer
00752         // the next line is actually the last "element" for ker2 in the Y direction
00753 
00754         bufLine = &buf[lineIdx];
00755         HxFuncGenConv2dSep_Line_XdirVerInc(
00756             bufLine, srcPtr, ker1Array, srcWidth, dstWidth,
00757             ker1Width, ker2Width, pixOp, redOp);
00758         lineIdx = (lineIdx + 1) % ker2Width; // buffer is cyclic
00759         srcPtr.incY();
00760 
00761         // do Y direction from buffer to dstImg
00762         // Since the buffer is cyclic, the "ker2" location starts at lineIdx
00763 
00764         HxFuncGenConv2dSep_Line_YdirVerInc(
00765             dstPtr, buf, ker2Array, dstWidth, lineIdx, ker2Width,
00766             pixOp, redOp);
00767         dstPtr.incY();
00768     }
00769 
00770     allocator.deallocate(ker1Array, ker1Width);
00771     allocator.deallocate(ker2Array, ker2Width);
00772     allocator.deallocate(buf, bufSize);
00773 }

template<class DstDataPtrType, class SrcDataPtrType, class KernelT, class PixOpT, class RedOpT>
void HxFuncGenConv2dSep_VerCyc DstDataPtrType    dstPtr,
SrcDataPtrType    srcPtr,
KernelT &    kernel1,
KernelT &    kernel2,
HxSizes    dstSize,
HxSizes    srcSize,
PixOpT &    pixOp,
RedOpT &    redOp,
int    vType
[static]
 

GenConv2dSep : "vertical buffer", two-way cyclic (localized computation).

This implementation does the X direction from src to a "vertical buffer", and then the Y direction from the buffer to dst.

Processing the image is done in a row-wise manner. The buffer holds just enough (processed) rows of the image to be able to apply the kernel in the Y direction. So, the buffer is used in a cyclic manner.

The buffer is "vertical" in that the data organization is rotated w.r.t. the images. That is, if pixels are next to eachother on a row in src, their processed results are ker2width apart in the buffer. In other words, the processing of a (horizontal) row in the image is stored in a (vertical) column of the buffer. The size of the buffer is ker2Width x (dstWidth + 1). The 1 is for overflow.

In this variation, the buffer in not only cyclic in the "X direction" (that is when storing "row results" in columns) but it is also cyclic in the "Y direction". That is, a "row result" is not always stored at the beginning of a column of the buffer.

src has a complete border (so both in the X and Y direction). the buffer has no border. dst has no border.

00808 {
00809     typedef typename KernelT::ArithType ArithType;
00810     HxPixelAllocator<ArithType> allocator;
00811 
00812     int ker1Width = kernel1.sizes().x();
00813     ArithType* ker1Array = HxFuncGenConv2dSep_CopyKernel(kernel1, ArithType());
00814 
00815     int ker2Width = kernel2.sizes().x();
00816     ArithType* ker2Array = HxFuncGenConv2dSep_CopyKernel(kernel2, ArithType());
00817 
00818     int srcWidth = srcSize.x();
00819     int srcHeight = srcSize.y();
00820     int dstWidth = dstSize.x();
00821     int dstHeight = dstSize.y();
00822 
00823     // Allocate buffer of size (ker2Width, dstWidth+1)
00824 
00825     int         bufCycSize = ker2Width * dstWidth; // size of the "cyclic" part
00826     int         bufTotSize = bufCycSize + ker2Width; // the total size
00827     ArithType*  buf = allocator.allocate(bufTotSize);
00828     ArithType*  bufOverflow = buf + bufCycSize;
00829 
00830     // initialize buf with first ker2Width - 1 lines
00831 
00832     int lineIdx = 0;
00833     int y;
00834     for (y=0 ; y<ker2Width-1 ; y++) {
00835         ArithType *bufLine = &buf[lineIdx];
00836         HxFuncGenConv2dSep_Line_XdirVerInc(
00837             bufLine, srcPtr, ker1Array, srcWidth, dstWidth,
00838             ker1Width, ker2Width, pixOp, redOp);
00839         lineIdx++;
00840         srcPtr.incY();
00841     }
00842 
00843     // now do the image
00844 
00845     ArithType* bufPtr= buf;
00846     for (y=dstHeight ; y>0 ; ) {
00847         int k;
00848         for (k=ker2Width < y ? ker2Width : y ; --k >= 0 ; y--) {
00849             HxFuncGenConv2dSep_Line_XYdirVerCycInc(
00850                 dstPtr, buf, bufPtr, srcPtr, ker1Array, ker2Array,
00851                 dstWidth, ker1Width, ker2Width, pixOp, redOp);
00852             dstPtr.incY();
00853             srcPtr.incY();
00854             bufPtr++;
00855         }
00856         // buffer is cyclic
00857         if (bufPtr >= bufOverflow)
00858             bufPtr -= bufCycSize;
00859         // copy overflow of buffer to first line of buffer
00860         for (k=0; k<ker2Width; k++)
00861             buf[k] = bufOverflow[k];
00862     }
00863 
00864     allocator.deallocate(ker1Array, ker1Width);
00865     allocator.deallocate(ker2Array, ker2Width);
00866     allocator.deallocate(buf, bufTotSize);
00867 }

template<class DstDataPtrType, class SrcDataPtrType, class KernelT, class PixOpT, class RedOpT>
void HxFuncGenConv2dSep_Min DstDataPtrType    dstPtr,
SrcDataPtrType    srcPtr,
KernelT &    kernel1,
KernelT &    kernel2,
HxSizes    dstSize,
HxSizes    srcSize,
PixOpT &    pixOp,
RedOpT &    redOp,
int    vType
[static]
 

GenConv2dSep : "minimal buffer".

This implementation does the Y (!) direction from src to a buffer, and then the X direction from the buffer to dst.

Processing the image is done in a row-wise manner. Since the Y direction is done first, the buffer needs to contain only one row of results. The size of the buffer is srcWidth.

src has a complete border (so both in the X and Y direction). the buffer has a border in the X direction only. dst has no border.

00890 {
00891     typedef typename KernelT::ArithType ArithType;
00892     HxPixelAllocator<ArithType> allocator;
00893 
00894     int ker1Width = kernel1.sizes().x();
00895     ArithType* ker1Array = HxFuncGenConv2dSep_CopyKernel(kernel1, ArithType());
00896 
00897     int ker2Width = kernel2.sizes().x();
00898     ArithType* ker2Array = HxFuncGenConv2dSep_CopyKernel(kernel2, ArithType());
00899 
00900     int srcWidth = srcSize.x();
00901     int srcHeight = srcSize.y();
00902     int dstWidth = dstSize.x();
00903     int dstHeight = dstSize.y();
00904 
00905     // Allocate buffer of size (srcWidth)
00906 
00907     int bufSize = srcWidth;
00908     ArithType* buf = allocator.allocate(bufSize);
00909 
00910     // now do the image
00911 
00912     for (int y=0 ; y<dstHeight ; y++) {
00913         HxFuncGenConv2dSep_Line_XYdirMinInc(
00914             dstPtr, buf, srcPtr, ker1Array, ker2Array, srcWidth, dstWidth,
00915             ker1Width, ker2Width, pixOp, redOp);
00916         srcPtr.incY();
00917         dstPtr.incY();
00918     }
00919 
00920     allocator.deallocate(ker1Array, ker1Width);
00921     allocator.deallocate(ker2Array, ker2Width);
00922     allocator.deallocate(buf, bufSize);
00923 }

template<class DstDataPtrType, class SrcDataPtrType, class KernelT, class PixOpT, class RedOpT>
void HxFuncGenConv2dSepDispatch DstDataPtrType    dstPtr,
SrcDataPtrType    srcPtr,
KernelT &    kernel1,
KernelT &    kernel2,
HxSizes    dstSize,
HxSizes    srcSize,
PixOpT &    pixOp,
RedOpT &    redOp,
int    vType
 

Dispatch function for GenConv2dSep (see Global functions for GenConv2dSep) Dispatch is based on the vType parameter.

Assertions:

Parameters:
dstPtr  Output image: IS = dstSize, IBS = 0
srcPtr  Input image: IS = srcSize, IBS = (ker1_NBW,ker2_NBW), srcPtr is at (IX0,IY0)
ker1Ptr  Input image, IS = ker1Size, IBS = 0
ker2Ptr  Input image, IS = ker2Size, IBS = 0

00948 {
00949     switch (vType) {
00950     case 0:
00951     case 1:
00952     case 2:
00953         HxFuncGenConv2dSep_Sim(
00954             dstPtr, srcPtr, kernel1, kernel2, dstSize, srcSize, pixOp, redOp, vType);
00955         break;
00956     case 3:
00957     case 4:
00958         HxFuncGenConv2dSep_Hor(
00959             dstPtr, srcPtr, kernel1, kernel2, dstSize, srcSize, pixOp, redOp, vType);
00960         break;
00961     case 5:
00962         HxFuncGenConv2dSep_Ver(
00963             dstPtr, srcPtr, kernel1, kernel2, dstSize, srcSize, pixOp, redOp, vType);
00964         break;
00965     case 6: // this is the default (set in HxImgFtorGenConv2dSep)
00966         HxFuncGenConv2dSep_VerCyc(
00967             dstPtr, srcPtr, kernel1, kernel2, dstSize, srcSize, pixOp, redOp, vType);
00968         break;
00969     case 7:
00970         HxFuncGenConv2dSep_Min(
00971             dstPtr, srcPtr, kernel1, kernel2, dstSize, srcSize, pixOp, redOp, vType);
00972         break;
00973     default :
00974         HxEnvironment::instance()->errorStream()
00975             << "HxFuncGenConv2dSepDispatch: unknown vType " << vType
00976             << STD_ENDL;
00977         HxEnvironment::instance()->flush();
00978     }
00979 }


Generated on Tue Feb 3 14:18:47 2004 for C++Reference by doxygen1.2.12 written by Dimitri van Heesch, © 1997-2001