#include "HxFuncGenConv2dSep.h"
#include "HxEnvironment.h"
Pix_variations | |
template<class SrcDataPtrT, class ArithT, class PixOpT, class RedOpT> void | HxFuncGenConv2dSep_Pix_Xdir (ArithT *buf, SrcDataPtrT srcPtr, ArithT *kernel, int bufIdx, int kerWidth, PixOpT &pixOp, RedOpT &redOp, ArithT neutralElement) |
Pix : X direction. More... | |
template<class DstDataPtrT, class ArithT, class PixOpT, class RedOpT> void | HxFuncGenConv2dSep_Pix_Ydir (DstDataPtrT dstPtr, ArithT *bufPtr, ArithT *kernel, int bufWidth, int kerWidth, PixOpT &pixOp, RedOpT &redOp, ArithT neutralElement) |
Pix : Y direction. More... | |
Line_variations | |
template<class SrcDataPtrT, class ArithT, class PixOpT, class RedOpT> void | HxFuncGenConv2dSep_Line_Xdir (ArithT *bufLine, SrcDataPtrT srcPtr, ArithT *kernel, int srcWidth, int dstWidth, int kerWidth, PixOpT &pixOp, RedOpT &redOp) |
Line : X direction. More... | |
template<class SrcDataPtrT, class ArithT, class PixOpT, class RedOpT> void | HxFuncGenConv2dSep_Line_XdirInc (ArithT *bufLine, SrcDataPtrT srcPtr, ArithT *kernel, int srcWidth, int dstWidth, int kerWidth, PixOpT &pixOp, RedOpT &redOp) |
Line : X direction (inc). More... | |
template<class SrcDataPtrT, class ArithT, class PixOpT, class RedOpT> void | HxFuncGenConv2dSep_Line_XdirVerInc (ArithT *buf, SrcDataPtrT srcPtr, ArithT *kernel1, int srcWidth, int dstWidth, int ker1Width, int ker2Width, PixOpT &pixOp, RedOpT &redOp) |
Line : X direction, "vertical buffer" (inc). More... | |
template<class DstDataPtrT, class ArithT, class PixOpT, class RedOpT> void | HxFuncGenConv2dSep_Line_YdirNaiInc (DstDataPtrT dstPtr, ArithT *buf, ArithT *kernel, int dstWidth, int dstHeight, int kerWidth, PixOpT &pixOp, RedOpT &redOp) |
Line : Y direction, naive (inc). More... | |
template<class DstDataPtrT, class ArithT, class PixOpT, class RedOpT> void | HxFuncGenConv2dSep_Line_YdirSim (DstDataPtrT dstPtr, ArithT *buf, ArithT *kernel, int dstWidth, int kerWidth, PixOpT &pixOp, RedOpT &redOp) |
Line : Y direction, simple. More... | |
template<class DstDataPtrT, class ArithT, class PixOpT, class RedOpT> void | HxFuncGenConv2dSep_Line_YdirSimInc (DstDataPtrT dstPtr, ArithT *buf, ArithT *kernel, int dstWidth, int kerWidth, PixOpT &pixOp, RedOpT &redOp) |
Line : Y direction, simple (inc). More... | |
template<class DstDataPtrT, class ArithT, class PixOpT, class RedOpT> void | HxFuncGenConv2dSep_Line_YdirHor (DstDataPtrT dstPtr, ArithT *buf, ArithT *kernel, int dstWidth, int lineIdx, int kerWidth, PixOpT &pixOp, RedOpT &redOp) |
Line : Y direction, "horizontal buffer". More... | |
template<class DstDataPtrT, class ArithT, class PixOpT, class RedOpT> void | HxFuncGenConv2dSep_Line_YdirHorInc (DstDataPtrT dstPtr, ArithT *buf, ArithT *kernel, int dstWidth, int lineIdx, int kerWidth, PixOpT &pixOp, RedOpT &redOp) |
Line : Y direction, "horizontal buffer" (inc). More... | |
template<class DstDataPtrT, class ArithT, class PixOpT, class RedOpT> void | HxFuncGenConv2dSep_Line_YdirVerInc (DstDataPtrT dstPtr, ArithT *buf, ArithT *kernel, int dstWidth, int lineIdx, int kerWidth, PixOpT &pixOp, RedOpT &redOp) |
Line : Y direction, "vertical buffer" (inc). More... | |
template<class DstDataPtrT, class SrcDataPtrT, class ArithT, class PixOpT, class RedOpT> void | HxFuncGenConv2dSep_Line_XYdirVerCycInc (DstDataPtrT dstPtr, ArithT *buf, ArithT *bufPtr, SrcDataPtrT srcPtr, ArithT *kernel1, ArithT *kernel2, int dstWidth, int ker1Width, int ker2Width, PixOpT &pixOp, RedOpT &redOp) |
Line : X and Y direction, "vertical buffer", two-way cyclic (inc). More... | |
template<class DstDataPtrT, class SrcDataPtrT, class ArithT, class PixOpT, class RedOpT> void | HxFuncGenConv2dSep_Line_XYdirMinInc (DstDataPtrT dstPtr, ArithT *buf, SrcDataPtrT srcPtr, ArithT *kernel1, ArithT *kernel2, int srcWidth, int dstWidth, int ker1Width, int ker2Width, PixOpT &pixOp, RedOpT &redOp) |
Line : X and Y direction, "minimal buffer" (inc). More... | |
GenConv2dSep_variations | |
template<class DstDataPtrType, class SrcDataPtrType, class KernelT, class PixOpT, class RedOpT> void | HxFuncGenConv2dSep_Sim (DstDataPtrType dstPtr, SrcDataPtrType srcPtr, KernelT &kernel1, KernelT &kernel2, HxSizes dstSize, HxSizes srcSize, PixOpT &pixOp, RedOpT &redOp, int vType) |
GenConv2dSep : simple. More... | |
template<class DstDataPtrType, class SrcDataPtrType, class KernelT, class PixOpT, class RedOpT> void | HxFuncGenConv2dSep_Hor (DstDataPtrType dstPtr, SrcDataPtrType srcPtr, KernelT &kernel1, KernelT &kernel2, HxSizes dstSize, HxSizes srcSize, PixOpT &pixOp, RedOpT &redOp, int vType) |
GenConv2dSep : "horizontal buffer". More... | |
template<class DstDataPtrType, class SrcDataPtrType, class KernelT, class PixOpT, class RedOpT> void | HxFuncGenConv2dSep_Ver (DstDataPtrType dstPtr, SrcDataPtrType srcPtr, KernelT &kernel1, KernelT &kernel2, HxSizes dstSize, HxSizes srcSize, PixOpT &pixOp, RedOpT &redOp, int vType) |
GenConv2dSep : "vertical buffer". More... | |
template<class DstDataPtrType, class SrcDataPtrType, class KernelT, class PixOpT, class RedOpT> void | HxFuncGenConv2dSep_VerCyc (DstDataPtrType dstPtr, SrcDataPtrType srcPtr, KernelT &kernel1, KernelT &kernel2, HxSizes dstSize, HxSizes srcSize, PixOpT &pixOp, RedOpT &redOp, int vType) |
GenConv2dSep : "vertical buffer", two-way cyclic (localized computation). More... | |
template<class DstDataPtrType, class SrcDataPtrType, class KernelT, class PixOpT, class RedOpT> void | HxFuncGenConv2dSep_Min (DstDataPtrType dstPtr, SrcDataPtrType srcPtr, KernelT &kernel1, KernelT &kernel2, HxSizes dstSize, HxSizes srcSize, PixOpT &pixOp, RedOpT &redOp, int vType) |
GenConv2dSep : "minimal buffer". More... | |
Functions | |
template<class KernelT, class ArithType> ArithType * | HxFuncGenConv2dSep_CopyKernel (KernelT &kernel, ArithType) |
Copy (1d) kernel to an array of ArithT elements. More... | |
template<class DstDataPtrType, class SrcDataPtrType, class KernelT, class PixOpT, class RedOpT> void | HxFuncGenConv2dSepDispatch (DstDataPtrType dstPtr, SrcDataPtrType srcPtr, KernelT &kernel1, KernelT &kernel2, HxSizes dstSize, HxSizes srcSize, PixOpT &pixOp, RedOpT &redOp, int vType) |
Dispatch function for GenConv2dSep (see Global functions for GenConv2dSep) Dispatch is based on the vType parameter. More... |
|
Pix : X direction. Does a single genconv between srcPtr and kernel of kerWidth pixels and stores the result at buf[bufIdx]. Memory layout of all srcPtr and kernel is assumed contiguous.
00082 { 00083 ArithT result(neutralElement); 00084 for (int k=0; k<kerWidth; k++) 00085 redOp.doIt(result, pixOp.doIt(srcPtr.readIncX(), kernel[k])); 00086 buf[bufIdx] = result; 00087 } |
|
Pix : Y direction. Does a single genconv between bufPtr and kernel of kerWidth pixels and stores the result at dstPtr. Memory layout of kernel is assumed to be contiguous. Pixels in buf are assumed to be bufWidth elements apart.
00103 { 00104 ArithT result(neutralElement); 00105 int idx = 0; 00106 for (int k=0; k<kerWidth; k++) { 00107 redOp.doIt(result, pixOp.doIt(bufPtr[idx], kernel[k])); 00108 idx += bufWidth; 00109 } 00110 dstPtr.write(result); 00111 } |
|
Line : X direction. Processes an entire line starting at srcPtr. Calls HxFuncGenConv2dSep_Pix_Xdir "dstWidth" times to do a neighbourhood. The resulting "dstWidth" values are stored on bufLine (contiguous).
00132 { 00133 for (int x=0 ; x<dstWidth ; x++) { 00134 HxFuncGenConv2dSep_Pix_Xdir( 00135 bufLine, srcPtr, kernel, x, kerWidth, 00136 pixOp, redOp, RedOpT::neutralElement()); 00137 srcPtr.incX(); 00138 } 00139 } |
|
Line : X direction (inc). Processes an entire line starting at srcPtr (including the "Pix" variation). The resulting "dstWidth" values are stored in bufLine (contiguous). This function does the same operation as HxFuncGenConv2dSep_Line_Xdir but the code of HxFuncGenConv2dSep_Pix_Xdir is inserted in the loop (instead of calling the function like HxFuncGenConv2dSep_Line_Xdir does).
00156 { 00157 ArithT neutralElement = RedOpT::neutralElement(); 00158 for (int x=0 ; x<dstWidth ; x++) { 00159 SrcDataPtrT sPtr(srcPtr); 00160 ArithT result(neutralElement); 00161 for (int k=0; k<kerWidth; k++) 00162 redOp.doIt(result, pixOp.doIt(sPtr.readIncX(), kernel[k])); 00163 bufLine[x] = result; 00164 srcPtr.incX(); 00165 } 00166 } |
|
Line : X direction, "vertical buffer" (inc). Processes an entire line starting at srcPtr (including the "Pix" variation). The resulting "dstWidth" values are stored in buf using a stride ker2Width.
00180 { 00181 ArithT neutralElement = RedOpT::neutralElement(); 00182 int idx = 0; 00183 for (int x=0 ; x<dstWidth ; x++) { 00184 SrcDataPtrT sPtr(srcPtr); 00185 ArithT result(neutralElement); 00186 for (int k=0; k<ker1Width; k++) 00187 redOp.doIt(result, pixOp.doIt(sPtr.readIncX(), kernel1[k])); 00188 buf[idx] = result; 00189 idx += ker2Width; 00190 srcPtr.incX(); 00191 } 00192 } |
|
Line : Y direction, naive (inc).
00201 { 00202 ArithT neutralElement = RedOpT::neutralElement(); 00203 for (int y=0 ; y<dstHeight ; y++) { 00204 ArithT result(neutralElement); 00205 int idx = 0; 00206 for (int k=0; k<kerWidth; k++) { 00207 redOp.doIt(result, pixOp.doIt(buf[idx], kernel[k])); 00208 idx += dstWidth; 00209 } 00210 dstPtr.write(result); 00211 dstPtr.incY(); 00212 buf += dstWidth; 00213 } 00214 } |
|
Line : Y direction, simple.
00223 { 00224 for (int x=0 ; x<dstWidth ; x++) { 00225 ArithT* bPtr = &buf[x]; 00226 HxFuncGenConv2dSep_Pix_Ydir( 00227 dstPtr, bPtr, kernel, dstWidth, kerWidth, 00228 pixOp, redOp, RedOpT::neutralElement()); 00229 dstPtr.incX(); 00230 } 00231 } |
|
Line : Y direction, simple (inc).
00240 { 00241 ArithT neutralElement = RedOpT::neutralElement(); 00242 for (int x=0 ; x<dstWidth ; x++) { 00243 ArithT* bufPtr = &buf[x]; 00244 ArithT result(neutralElement); 00245 int idx = 0; 00246 for (int k=0; k<kerWidth; k++) { 00247 redOp.doIt(result, pixOp.doIt(bufPtr[idx], kernel[k])); 00248 idx += dstWidth; 00249 } 00250 dstPtr.writeIncX(result); 00251 } 00252 } |
|
Line : Y direction, "horizontal buffer".
00261 { 00262 HxPixelAllocator<ArithT> allocator; 00263 00264 // Copy kernel data into cycKer to match the cycle of buf 00265 00266 ArithT* cycKer = allocator.allocate(kerWidth); 00267 for (int k=0 ; k<kerWidth ; k++) { 00268 cycKer[lineIdx] = kernel[k]; 00269 lineIdx = (lineIdx + 1) % kerWidth; 00270 } 00271 00272 for (int x=0 ; x<dstWidth ; x++) { 00273 ArithT* bufPtr = &buf[x]; 00274 HxFuncGenConv2dSep_Pix_Ydir( 00275 dstPtr, bufPtr, cycKer, dstWidth, kerWidth, 00276 pixOp, redOp, RedOpT::neutralElement()); 00277 dstPtr.incX(); 00278 } 00279 00280 allocator.deallocate(cycKer, kerWidth); 00281 } |
|
Line : Y direction, "horizontal buffer" (inc).
00290 { 00291 HxPixelAllocator<ArithT> allocator; 00292 00293 // Copy kernel data into cycKer to match the cycle of buf 00294 00295 ArithT* cycKer = allocator.allocate(kerWidth); 00296 for (int k=0 ; k<kerWidth ; k++) { 00297 cycKer[lineIdx] = kernel[k]; 00298 lineIdx = (lineIdx + 1) % kerWidth; 00299 } 00300 00301 ArithT neutralElement = RedOpT::neutralElement(); 00302 for (int x=0 ; x<dstWidth ; x++) { 00303 ArithT* bufPtr = &buf[x]; 00304 ArithT result(neutralElement); 00305 int idx = 0; 00306 for (int k=0; k<kerWidth; k++) { 00307 redOp.doIt(result, pixOp.doIt(bufPtr[idx], cycKer[k])); 00308 idx += dstWidth; 00309 } 00310 dstPtr.writeIncX(result); 00311 } 00312 00313 allocator.deallocate(cycKer, kerWidth); 00314 } |
|
Line : Y direction, "vertical buffer" (inc).
00323 { 00324 HxPixelAllocator<ArithT> allocator; 00325 00326 // Copy kernel data into cycKer to match the cycle of buf 00327 00328 ArithT* cycKer = allocator.allocate(kerWidth); 00329 for (int k=0 ; k<kerWidth ; k++) { 00330 cycKer[lineIdx] = kernel[k]; 00331 lineIdx = (lineIdx + 1) % kerWidth; 00332 } 00333 00334 ArithT neutralElement = RedOpT::neutralElement(); 00335 int idx = 0; 00336 for (int x=0 ; x<dstWidth ; x++) { 00337 ArithT* bufPtr = &buf[idx]; 00338 ArithT result(neutralElement); 00339 for (int k=0; k<kerWidth; k++) { 00340 redOp.doIt(result, pixOp.doIt(bufPtr[k], cycKer[k])); 00341 } 00342 dstPtr.writeIncX(result); 00343 idx += kerWidth; 00344 } 00345 00346 allocator.deallocate(cycKer, kerWidth); 00347 } |
|
Line : X and Y direction, "vertical buffer", two-way cyclic (inc).
00358 { 00359 ArithT neutralElement = RedOpT::neutralElement(); 00360 int lastKerElt = ker2Width-1; 00361 int bufCycSize = dstWidth * ker2Width; 00362 ArithT* bufOverflow = buf + bufCycSize; 00363 00364 while (--dstWidth >= 0) { 00365 // first do the X direction (from src to buf) for the last element 00366 // needed for the kernel in the Y direction (all other elements 00367 // needed for the Y direction are already computed and stored in buf) 00368 SrcDataPtrT sPtr(srcPtr); 00369 ArithT result1(neutralElement); 00370 for (int k1=0 ; k1<ker1Width ; k1++) 00371 redOp.doIt(result1, pixOp.doIt(sPtr.readIncX(), kernel1[k1])); 00372 bufPtr[lastKerElt] = result1; 00373 srcPtr.incX(); 00374 // now the Y direction from buf to dst 00375 ArithT result2(neutralElement); 00376 for (int k2=0 ; k2<ker2Width ; k2++) 00377 redOp.doIt(result2, pixOp.doIt(bufPtr[k2], kernel2[k2])); 00378 dstPtr.writeIncX(result2); 00379 bufPtr += ker2Width; 00380 if (bufPtr >= bufOverflow) 00381 bufPtr -= bufCycSize; 00382 } 00383 } |
|
Line : X and Y direction, "minimal buffer" (inc).
00431 { 00432 ArithT neutralElement = RedOpT::neutralElement(); 00433 00434 // first Y dir from src to buf using kernel2 00435 00436 for (int x=0 ; x<srcWidth ; x++) { 00437 SrcDataPtrT sPtr(srcPtr); 00438 ArithT result(neutralElement); 00439 for (int k=0 ; k<ker2Width ; k++) { 00440 redOp.doIt(result, pixOp.doIt(sPtr.read(), kernel2[k])); 00441 sPtr.incY(); 00442 } 00443 buf[x] = result; 00444 srcPtr.incX(); 00445 } 00446 00447 // now X dir from buf to dst using kernel1 00448 00449 for (int x2=0 ; x2<dstWidth ; x2++) { 00450 ArithT* bufPtr = &buf[x2]; 00451 ArithT result(neutralElement); 00452 for (int k=0; k<ker1Width; k++) 00453 redOp.doIt(result, pixOp.doIt(bufPtr[k], kernel1[k])); 00454 dstPtr.writeIncX(result); 00455 } 00456 } |
|
Copy (1d) kernel to an array of ArithT elements.
00466 { 00467 HxPixelAllocator<ArithType> allocator; 00468 HxSizes kerSize = kernel.sizes(); 00469 int width = kerSize.x(); 00470 ArithType* kerArray = allocator.allocate(width); 00471 for (int i=0; i<width; i++) 00472 kerArray[i] = kernel(i); 00473 return kerArray; 00474 } |
|
GenConv2dSep : simple. The conceptually simple implementation basically does the X direction from src to a scratch image, and then the Y direction from the scratch image to dst. src has a complete border (so both in the X and Y direction). scratch has a border in the Y direction only. dst has no border. Has two basic versions (designated by vType). The versions do (conceptuelly) the same algorithm in the X direction. With vType == 0 the Y direction is processed in column order (so first the whole column with X=0 is processed before we move on to the next column. With vType == {1,2} the Y direction is processed in a row-wise manner. That is, the kernel is applied at the first position in all columns before it is applied to the next position (again in all columns). Further variations determine whether the processing of a single kernel is done in a separate function or not.
00510 { 00511 typedef typename KernelT::ArithType ArithType; 00512 HxPixelAllocator<ArithType> allocator; 00513 00514 int ker1Width = kernel1.sizes().x(); 00515 ArithType* ker1Array = HxFuncGenConv2dSep_CopyKernel(kernel1, ArithType()); 00516 00517 int ker2Width = kernel2.sizes().x(); 00518 ArithType* ker2Array = HxFuncGenConv2dSep_CopyKernel(kernel2, ArithType()); 00519 00520 int srcWidth = srcSize.x(); 00521 int srcHeight = srcSize.y(); 00522 int dstWidth = dstSize.x(); 00523 int dstHeight = dstSize.y(); 00524 00525 // Allocate scratchImage of size (dstWidth,srcHeight) 00526 00527 int scratchSize = dstWidth * srcHeight; 00528 ArithType* scratch = allocator.allocate(scratchSize); 00529 ArithType* sLine; 00530 int x, y; 00531 00532 // do the x direction from src to scratch 00533 00534 for (y=0 ; y<srcHeight ; y++) { 00535 SrcDataPtrType sPtr(srcPtr); 00536 sPtr.incY(y); 00537 sLine = &scratch[y * dstWidth]; 00538 if (vType == 1) 00539 HxFuncGenConv2dSep_Line_Xdir( 00540 sLine, sPtr, ker1Array, srcWidth, dstWidth, ker1Width, 00541 pixOp, redOp); 00542 if ((vType == 0) || (vType == 2)) 00543 HxFuncGenConv2dSep_Line_XdirInc( 00544 sLine, sPtr, ker1Array, srcWidth, dstWidth, ker1Width, 00545 pixOp, redOp); 00546 } 00547 00548 // do the y direction from scratch to dst 00549 00550 if (vType == 0) { 00551 for (x=0 ; x<dstWidth ; x++) { 00552 DstDataPtrType dPtr(dstPtr); 00553 dPtr.incX(x); 00554 sLine = &scratch[x]; 00555 HxFuncGenConv2dSep_Line_YdirNaiInc( 00556 dPtr, sLine, ker2Array, dstWidth, dstHeight, ker2Width, 00557 pixOp, redOp); 00558 } 00559 } else { // vType == 1 or 2 00560 for (y=0 ; y<dstHeight ; y++) { 00561 DstDataPtrType dPtr(dstPtr); 00562 dPtr.incY(y); 00563 sLine = &scratch[y * dstWidth]; 00564 if (vType == 1) 00565 HxFuncGenConv2dSep_Line_YdirSim( 00566 dPtr, sLine, ker2Array, dstWidth, ker2Width, pixOp, redOp); 00567 if (vType == 2) 00568 HxFuncGenConv2dSep_Line_YdirSimInc( 00569 dPtr, sLine, ker2Array, dstWidth, ker2Width, pixOp, redOp); 00570 } 00571 } 00572 00573 allocator.deallocate(ker1Array, ker1Width); 00574 allocator.deallocate(ker2Array, ker2Width); 00575 allocator.deallocate(scratch, scratchSize); 00576 } |
|
GenConv2dSep : "horizontal buffer". This implementation does the X direction from src to a "horizontal buffer", and then the Y direction from the buffer to dst. Processing the image is done in a row-wise manner. The buffer holds just enough (processed) rows of the image to be able to apply the kernel in the Y direction. So, the buffer is used in a cyclic manner. The buffer is "horizontal" in that the data organization matches that of the images. That is, if pixels are next to eachother on a row in src, their processed results are also next to eachother in the buffer. The size of the buffer is dstWidth x ker2Width. src has a complete border (so both in the X and Y direction). the buffer has no border. dst has no border. Has two (designated by vType) that determine whether the processing of a single kernel is done in a separate function or not.
00607 { 00608 typedef typename KernelT::ArithType ArithType; 00609 HxPixelAllocator<ArithType> allocator; 00610 00611 int ker1Width = kernel1.sizes().x(); 00612 ArithType* ker1Array = HxFuncGenConv2dSep_CopyKernel(kernel1, ArithType()); 00613 00614 int ker2Width = kernel2.sizes().x(); 00615 ArithType* ker2Array = HxFuncGenConv2dSep_CopyKernel(kernel2, ArithType()); 00616 00617 int srcWidth = srcSize.x(); 00618 int srcHeight = srcSize.y(); 00619 int dstWidth = dstSize.x(); 00620 int dstHeight = dstSize.y(); 00621 00622 // Allocate buffer of size (dstWidth,ker2Width) 00623 00624 int bufSize = dstWidth * ker2Width; 00625 ArithType* buf = allocator.allocate(bufSize); 00626 ArithType* bufLine; 00627 int lineIdx = 0; // (cyclic) line index in buf 00628 00629 // initialize buf with first ker2Width - 1 lines 00630 int y; 00631 for (y=0 ; y<ker2Width-1 ; y++) { 00632 bufLine = &buf[lineIdx * dstWidth]; 00633 if (vType == 3) 00634 HxFuncGenConv2dSep_Line_Xdir( 00635 bufLine, srcPtr, ker1Array, srcWidth, dstWidth, ker1Width, 00636 pixOp, redOp); 00637 if (vType == 4) 00638 HxFuncGenConv2dSep_Line_XdirInc( 00639 bufLine, srcPtr, ker1Array, srcWidth, dstWidth, ker1Width, 00640 pixOp, redOp); 00641 lineIdx = (lineIdx + 1) % ker2Width; // buffer is cyclic 00642 srcPtr.incY(); 00643 } 00644 00645 // now do the image 00646 00647 for (y=0 ; y<dstHeight ; y++) { 00648 00649 // do X direction to next line in the buffer 00650 // the next line is actually the last "element" for ker2 in the Y direction 00651 00652 bufLine = &buf[lineIdx * dstWidth]; 00653 if (vType == 3) 00654 HxFuncGenConv2dSep_Line_Xdir( 00655 bufLine, srcPtr, ker1Array, srcWidth, dstWidth, ker1Width, 00656 pixOp, redOp); 00657 if (vType == 4) 00658 HxFuncGenConv2dSep_Line_XdirInc( 00659 bufLine, srcPtr, ker1Array, srcWidth, dstWidth, ker1Width, 00660 pixOp, redOp); 00661 lineIdx = (lineIdx + 1) % ker2Width; // buffer is cyclic 00662 srcPtr.incY(); 00663 00664 // do Y direction from buffer to dstImg 00665 // Since the buffer is cyclic, the "ker2" location starts at lineIdx 00666 00667 if (vType == 3) 00668 HxFuncGenConv2dSep_Line_YdirHor( 00669 dstPtr, buf, ker2Array, dstWidth, lineIdx, ker2Width, 00670 pixOp, redOp); 00671 if (vType == 4) 00672 HxFuncGenConv2dSep_Line_YdirHorInc( 00673 dstPtr, buf, ker2Array, dstWidth, lineIdx, ker2Width, 00674 pixOp, redOp); 00675 dstPtr.incY(); 00676 } 00677 00678 allocator.deallocate(ker1Array, ker1Width); 00679 allocator.deallocate(ker2Array, ker2Width); 00680 allocator.deallocate(buf, bufSize); 00681 } |
|
GenConv2dSep : "vertical buffer". This implementation does the X direction from src to a "vertical buffer", and then the Y direction from the buffer to dst. Processing the image is done in a row-wise manner. The buffer holds just enough (processed) rows of the image to be able to apply the kernel in the Y direction. So, the buffer is used in a cyclic manner. The buffer is "vertical" in that the data organization is rotated w.r.t. the images. That is, if pixels are next to eachother on a row in src, their processed results are ker2width apart in the buffer. In other words, the processing of a (horizontal) row in the image is stored in a (vertical) column of the buffer. The size of the buffer is ker2Width x dstWidth. src has a complete border (so both in the X and Y direction). the buffer has no border. dst has no border. Has two (designated by vType) that determine whether the processing of a single kernel is done in a separate function or not.
00714 { 00715 typedef typename KernelT::ArithType ArithType; 00716 HxPixelAllocator<ArithType> allocator; 00717 00718 int ker1Width = kernel1.sizes().x(); 00719 ArithType* ker1Array = HxFuncGenConv2dSep_CopyKernel(kernel1, ArithType()); 00720 00721 int ker2Width = kernel2.sizes().x(); 00722 ArithType* ker2Array = HxFuncGenConv2dSep_CopyKernel(kernel2, ArithType()); 00723 00724 int srcWidth = srcSize.x(); 00725 int srcHeight = srcSize.y(); 00726 int dstWidth = dstSize.x(); 00727 int dstHeight = dstSize.y(); 00728 00729 // Allocate buffer of size (ker2Width,dstWidth) 00730 00731 int bufSize = dstWidth * ker2Width; 00732 ArithType* buf = allocator.allocate(bufSize); 00733 ArithType* bufLine; 00734 int lineIdx = 0; // (cyclic) line index in buf 00735 00736 // initialize buf with first ker2Width - 1 lines 00737 int y; 00738 for (y=0 ; y<ker2Width-1 ; y++) { 00739 bufLine = &buf[lineIdx]; 00740 HxFuncGenConv2dSep_Line_XdirVerInc( 00741 bufLine, srcPtr, ker1Array, srcWidth, dstWidth, 00742 ker1Width, ker2Width, pixOp, redOp); 00743 lineIdx = (lineIdx + 1) % ker2Width; // buffer is cyclic 00744 srcPtr.incY(); 00745 } 00746 00747 // now do the image 00748 00749 for (y=0 ; y<dstHeight ; y++) { 00750 00751 // do X direction to next line in the buffer 00752 // the next line is actually the last "element" for ker2 in the Y direction 00753 00754 bufLine = &buf[lineIdx]; 00755 HxFuncGenConv2dSep_Line_XdirVerInc( 00756 bufLine, srcPtr, ker1Array, srcWidth, dstWidth, 00757 ker1Width, ker2Width, pixOp, redOp); 00758 lineIdx = (lineIdx + 1) % ker2Width; // buffer is cyclic 00759 srcPtr.incY(); 00760 00761 // do Y direction from buffer to dstImg 00762 // Since the buffer is cyclic, the "ker2" location starts at lineIdx 00763 00764 HxFuncGenConv2dSep_Line_YdirVerInc( 00765 dstPtr, buf, ker2Array, dstWidth, lineIdx, ker2Width, 00766 pixOp, redOp); 00767 dstPtr.incY(); 00768 } 00769 00770 allocator.deallocate(ker1Array, ker1Width); 00771 allocator.deallocate(ker2Array, ker2Width); 00772 allocator.deallocate(buf, bufSize); 00773 } |
|
GenConv2dSep : "vertical buffer", two-way cyclic (localized computation). This implementation does the X direction from src to a "vertical buffer", and then the Y direction from the buffer to dst. Processing the image is done in a row-wise manner. The buffer holds just enough (processed) rows of the image to be able to apply the kernel in the Y direction. So, the buffer is used in a cyclic manner. The buffer is "vertical" in that the data organization is rotated w.r.t. the images. That is, if pixels are next to eachother on a row in src, their processed results are ker2width apart in the buffer. In other words, the processing of a (horizontal) row in the image is stored in a (vertical) column of the buffer. The size of the buffer is ker2Width x (dstWidth + 1). The 1 is for overflow. In this variation, the buffer in not only cyclic in the "X direction" (that is when storing "row results" in columns) but it is also cyclic in the "Y direction". That is, a "row result" is not always stored at the beginning of a column of the buffer. src has a complete border (so both in the X and Y direction). the buffer has no border. dst has no border.
00808 { 00809 typedef typename KernelT::ArithType ArithType; 00810 HxPixelAllocator<ArithType> allocator; 00811 00812 int ker1Width = kernel1.sizes().x(); 00813 ArithType* ker1Array = HxFuncGenConv2dSep_CopyKernel(kernel1, ArithType()); 00814 00815 int ker2Width = kernel2.sizes().x(); 00816 ArithType* ker2Array = HxFuncGenConv2dSep_CopyKernel(kernel2, ArithType()); 00817 00818 int srcWidth = srcSize.x(); 00819 int srcHeight = srcSize.y(); 00820 int dstWidth = dstSize.x(); 00821 int dstHeight = dstSize.y(); 00822 00823 // Allocate buffer of size (ker2Width, dstWidth+1) 00824 00825 int bufCycSize = ker2Width * dstWidth; // size of the "cyclic" part 00826 int bufTotSize = bufCycSize + ker2Width; // the total size 00827 ArithType* buf = allocator.allocate(bufTotSize); 00828 ArithType* bufOverflow = buf + bufCycSize; 00829 00830 // initialize buf with first ker2Width - 1 lines 00831 00832 int lineIdx = 0; 00833 int y; 00834 for (y=0 ; y<ker2Width-1 ; y++) { 00835 ArithType *bufLine = &buf[lineIdx]; 00836 HxFuncGenConv2dSep_Line_XdirVerInc( 00837 bufLine, srcPtr, ker1Array, srcWidth, dstWidth, 00838 ker1Width, ker2Width, pixOp, redOp); 00839 lineIdx++; 00840 srcPtr.incY(); 00841 } 00842 00843 // now do the image 00844 00845 ArithType* bufPtr= buf; 00846 for (y=dstHeight ; y>0 ; ) { 00847 int k; 00848 for (k=ker2Width < y ? ker2Width : y ; --k >= 0 ; y--) { 00849 HxFuncGenConv2dSep_Line_XYdirVerCycInc( 00850 dstPtr, buf, bufPtr, srcPtr, ker1Array, ker2Array, 00851 dstWidth, ker1Width, ker2Width, pixOp, redOp); 00852 dstPtr.incY(); 00853 srcPtr.incY(); 00854 bufPtr++; 00855 } 00856 // buffer is cyclic 00857 if (bufPtr >= bufOverflow) 00858 bufPtr -= bufCycSize; 00859 // copy overflow of buffer to first line of buffer 00860 for (k=0; k<ker2Width; k++) 00861 buf[k] = bufOverflow[k]; 00862 } 00863 00864 allocator.deallocate(ker1Array, ker1Width); 00865 allocator.deallocate(ker2Array, ker2Width); 00866 allocator.deallocate(buf, bufTotSize); 00867 } |
|
GenConv2dSep : "minimal buffer". This implementation does the Y (!) direction from src to a buffer, and then the X direction from the buffer to dst. Processing the image is done in a row-wise manner. Since the Y direction is done first, the buffer needs to contain only one row of results. The size of the buffer is srcWidth. src has a complete border (so both in the X and Y direction). the buffer has a border in the X direction only. dst has no border.
00890 { 00891 typedef typename KernelT::ArithType ArithType; 00892 HxPixelAllocator<ArithType> allocator; 00893 00894 int ker1Width = kernel1.sizes().x(); 00895 ArithType* ker1Array = HxFuncGenConv2dSep_CopyKernel(kernel1, ArithType()); 00896 00897 int ker2Width = kernel2.sizes().x(); 00898 ArithType* ker2Array = HxFuncGenConv2dSep_CopyKernel(kernel2, ArithType()); 00899 00900 int srcWidth = srcSize.x(); 00901 int srcHeight = srcSize.y(); 00902 int dstWidth = dstSize.x(); 00903 int dstHeight = dstSize.y(); 00904 00905 // Allocate buffer of size (srcWidth) 00906 00907 int bufSize = srcWidth; 00908 ArithType* buf = allocator.allocate(bufSize); 00909 00910 // now do the image 00911 00912 for (int y=0 ; y<dstHeight ; y++) { 00913 HxFuncGenConv2dSep_Line_XYdirMinInc( 00914 dstPtr, buf, srcPtr, ker1Array, ker2Array, srcWidth, dstWidth, 00915 ker1Width, ker2Width, pixOp, redOp); 00916 srcPtr.incY(); 00917 dstPtr.incY(); 00918 } 00919 00920 allocator.deallocate(ker1Array, ker1Width); 00921 allocator.deallocate(ker2Array, ker2Width); 00922 allocator.deallocate(buf, bufSize); 00923 } |
|
Dispatch function for GenConv2dSep (see Global functions for GenConv2dSep) Dispatch is based on the vType parameter. Assertions:
00948 { 00949 switch (vType) { 00950 case 0: 00951 case 1: 00952 case 2: 00953 HxFuncGenConv2dSep_Sim( 00954 dstPtr, srcPtr, kernel1, kernel2, dstSize, srcSize, pixOp, redOp, vType); 00955 break; 00956 case 3: 00957 case 4: 00958 HxFuncGenConv2dSep_Hor( 00959 dstPtr, srcPtr, kernel1, kernel2, dstSize, srcSize, pixOp, redOp, vType); 00960 break; 00961 case 5: 00962 HxFuncGenConv2dSep_Ver( 00963 dstPtr, srcPtr, kernel1, kernel2, dstSize, srcSize, pixOp, redOp, vType); 00964 break; 00965 case 6: // this is the default (set in HxImgFtorGenConv2dSep) 00966 HxFuncGenConv2dSep_VerCyc( 00967 dstPtr, srcPtr, kernel1, kernel2, dstSize, srcSize, pixOp, redOp, vType); 00968 break; 00969 case 7: 00970 HxFuncGenConv2dSep_Min( 00971 dstPtr, srcPtr, kernel1, kernel2, dstSize, srcSize, pixOp, redOp, vType); 00972 break; 00973 default : 00974 HxEnvironment::instance()->errorStream() 00975 << "HxFuncGenConv2dSepDispatch: unknown vType " << vType 00976 << STD_ENDL; 00977 HxEnvironment::instance()->flush(); 00978 } 00979 } |