Impala Documentation

00001 #ifndef Impala_Core_VideoSet_KfrMotionExtractor_h
00002 #define Impala_Core_VideoSet_KfrMotionExtractor_h
00003 
00004 // created by Jun Wu, on Dec 18, 2009
00005 //
00006 // purpose: extract motion feature before and after keyframes.
00007 // Note: 
00008 // 1) we maintaince a window, the center frame is key-frame
00009 // 2) within it, we also have a sliding buffer, to compute the energy distribution function
00010 // 3) considering all siding buffer as a sequence, we use the average as the final output
00011 //      window size: 7, 9, 11, 13, ...
00012 //      default buffer size: = 5
00013 // 4) arguments: requires --srcWindow "size;9;rgb2ooo"
00014 // 5) to represent dominant motion (horizontal,vertical,radial), we use three motion filters
00015 
00016 #include "Core/VideoSet/Reporter.h"
00017 #include "Core/Vector/VectorTem.h"
00018 #include "Core/Feature/FeatureTable.h"
00019 #include "Core/Stream/SeqConvKernel.h"
00020 #include "Core/Array/RGB2Gray.h"
00021 
00022 // comment this line when commit to impala
00023 //#define __BITMAP_FUNC__
00024 
00025 #ifdef __BITMAP_FUNC__
00026 #include "Core/Feature/Bitmap.h"
00027 #endif
00028 
00029 // motion estimation only supports 16x16 block.
00030 // Note: do NOT change marco MVBLOCKSIZE
00031 #define MVBLOCKSIZE   16 
00032 
00033 #define MAX_DISP    10
00034 #define TPL_NUM      3
00035 
00036 #define BYTE unsigned char
00037 #define BOOL bool
00038 #define UINT unsigned int
00039 
00040 
00041 namespace Impala
00042 {
00043 namespace Core
00044 {
00045 namespace VideoSet
00046 {
00047 
00048 class KfrMotionExtractor : public Listener
00049 {
00050 
00051 protected:
00052 
00053     typedef Vector::VectorTem<Real64> VectorReal64;
00054 
00055 public:
00056 
00057     KfrMotionExtractor(Reporter* reporter, 
00058         CmdOptions& options, CString featureName, int binCount)
00059     {
00060         mReporter = reporter;
00061         mFeatureName = featureName;
00062         mBinCount = binCount;
00063         Feature::FeatureDefinition def(GetFeatureName());
00064         mHistogramTable = new Feature::FeatureTable(def, 1000, GetBinCount());
00065 
00066         m_bNormalize = TRUE;
00067         m_verbose = FALSE;
00068         m_verbose2 = FALSE;
00069     }
00070 
00071     KfrMotionExtractor(Reporter* reporter, CmdOptions& options)
00072     {
00073         mReporter = reporter;
00074         mFeatureName = "kfrmotion";
00075         mBinCount = 2 * TPL_NUM;
00076         Feature::FeatureDefinition def(GetFeatureName());
00077         mHistogramTable = new Feature::FeatureTable(def, 1000, GetBinCount());
00078 
00079         m_bNormalize = TRUE;
00080 
00081         m_verbose = FALSE;
00082         m_verbose2 = FALSE;
00083         //m_verbose = TRUE;
00084         //m_verbose2 = TRUE;
00085         
00086     }
00087 
00088     virtual 
00089     ~KfrMotionExtractor()
00090     {
00091     }
00092 
00093     String
00094     GetFeatureName() const
00095     {
00096         return mFeatureName;
00097     }
00098 
00099     virtual void
00100     HandleNewFrame(VideoSet* vs, int fileId, Stream::RgbDataSrc* src)
00101     {
00102         if (!src)
00103         {
00104             ILOG_ERROR("Not a valid source for fileId " << fileId);
00105             return;
00106         }
00107 
00108         Array::Array2dVec3UInt8* im = Array::ArrayCreate<Array::Array2dVec3UInt8>
00109             (src->FrameWidth(), src->FrameHeight(), 0, 0, src->DataPtr(), true);
00110         if (!im)
00111         {
00112             ILOG_ERROR("Couldn't load image for fileId " << fileId);
00113             return;
00114         }
00115 
00116         if (m_verbose)
00117         {
00118             std::cout << "Current Frame No. = " << src->FrameNr() << std::endl;
00119         }
00120 
00121 #ifdef __BITMAP_FUNC__
00122         Core::Feature::Bitmap bitmap;
00123         bitmap.SaveRgb2BitmapFile("test_rgb_00.bmp",src->DataPtr(), src->FrameWidth(), src->FrameHeight() );
00124 #endif
00125 
00126         /*VectorReal64 histogram = ComputeHistogram(im);
00127         Quid quid = vs->GetQuidFrame(fileId, src->FrameNr());
00128         mHistogramTable->Add(quid, histogram);*/
00129 
00130         double feature[2*TPL_NUM];
00131         memset(feature,0,sizeof(double)*2*TPL_NUM);
00132 
00133         // For temporal filtering use something like this instead:
00134         // (requires --srcWindow "size;5;rgb2ooo")
00135         typedef Stream::RgbDataSrcWindow<Stream::WindowPrepRgb2Ooo> WindowType;
00136         WindowType* window = dynamic_cast<WindowType*>(src);
00137         if (window)
00138         {
00139             if (m_verbose)
00140             {
00141                 std::cout << "window size = " << window->WindowSize() << std::endl;
00142             }
00143 
00144             //Array::Array2dScalarReal64* tGauss = 0;
00145             //double temporalSigma = 0.75;
00146             //tGauss = Array::MakeGaussian1d(temporalSigma, 0, 3.0, 100);
00147             //Array::Array2dVec3Real64* res = 0;
00148             //SeqConvKernel(res, window, tGauss);
00150             //Array::Array2dVec3UInt8* src;
00151             //Array::InvWiccest(res, src, false, false, 0.75, 3, true);
00152 
00153 
00154             // test looping for all frames within the window
00155             //for (int i=0 ; i<window->WindowSize(); i++)
00156             //{
00157             //    Array::Array2dVec3UInt8* cur = Array::ArrayCreate<Array::Array2dVec3UInt8>
00158             //        (window->FrameWidth(), window->FrameHeight(), 0, 0, window->DataPtrWindow(i), true);
00159             //    if (!cur)
00160             //    {
00161             //        ILOG_ERROR("Couldn't load image for fileId " << fileId);
00162             //        return;
00163             //    }
00164 
00165             //    VectorReal64 cur_histogram = ComputeHistogram(cur);
00166             //    Quid quid = vs->GetQuidFrame(fileId, window->FrameNr() - window->WindowSize()/2 + i);
00167             //    mHistogramTable->Add(quid, cur_histogram);
00168             //}
00169             
00170 
00171             int bWidth = window->FrameWidth()/MVBLOCKSIZE;
00172             int bHeight = window->FrameHeight()/MVBLOCKSIZE;
00173             
00174             // Note: we have tested these pairs: (10,7) (7,10) (10,8) (8,10) (22,18) (20,15)
00175             //int bWidth = 10;
00176             //int bHeight = 7;
00177 
00178             for (int t=0;t<TPL_NUM;t++)
00179             {
00180                 m_Weight[t] = new double[bWidth * bHeight];
00181                 memset(m_Weight[t], 0, sizeof(double)* bWidth * bHeight);
00182 
00183                 if (m_verbose2)
00184                 {
00185                     DumpWeightMatrix(m_Weight[t], bWidth, bHeight, t);
00186                 }
00187             }
00188 
00189             m_EnergyBuffer = new BYTE[bWidth * bHeight];
00190             m_EnergyMap = new double[bWidth * bHeight];
00191             m_Buffer = new BYTE[bWidth * bHeight * 2];
00192 
00193             // Initialization
00194             LoadEnergyTemplate(bWidth, bHeight);
00195 
00196             // calculate the mean of weight for all templates
00197             CalcTPLMean(bWidth, bHeight);
00198 
00199             m_MVseq = new BYTE[bWidth * bHeight * 2 * (window->WindowSize()-1)];
00200             memset(m_MVseq, 0, sizeof(BYTE)*bWidth * bHeight * 2 * (window->WindowSize()-1) );
00201             //DumpMVFseq(m_MVseq, bWidth, bHeight, window->WindowSize()-1);
00202 
00203             // compute motion features between adjacent frames
00204             // if window size as 9: 0-1,1-2,2-3,3-4,4-5,5-6,6-7,7-8,8-9
00205             // based on these motion vectors field, we compute the energy distribution
00206             for (int i=1 ; i<window->WindowSize() ; i++)
00207             {
00208                 // previous frame: pre
00209                 Array::Array2dVec3UInt8* pre_rgb = Array::ArrayCreate<Array::Array2dVec3UInt8>
00210                     (window->FrameWidth(), window->FrameHeight(), 0, 0, window->DataPtrWindow(i-1), true);
00211                 if (!pre_rgb)
00212                 {
00213                     ILOG_ERROR("Couldn't load image for fileId " << fileId);
00214                     return;
00215                 }
00216 
00217                 // current frame: cur
00218                 Array::Array2dVec3UInt8* cur_rgb = Array::ArrayCreate<Array::Array2dVec3UInt8>
00219                     (window->FrameWidth(), window->FrameHeight(), 0, 0, window->DataPtrWindow(i), true);
00220                 if (!cur_rgb)
00221                 {
00222                     ILOG_ERROR("Couldn't load image for fileId " << fileId);
00223                     return;
00224                 }
00225 
00226 #ifdef __BITMAP_FUNC__
00227                 //Core::Feature::Bitmap bitmap;
00228                 //bitmap.SaveRgb2BitmapFile("test_rgb_0.bmp",cur_rgb->CPB(), cur_rgb->CW(), cur_rgb->CH() );
00229 #endif
00230                 //Array2dScalarReal64* pre = 0;
00231                 //Array2dScalarReal64* cur = 0;
00232                 Array::Array2dScalarUInt8* pre = 0;
00233                 Array::Array2dScalarUInt8* cur = 0;
00234                 RGB2Gray(pre,pre_rgb);
00235                 RGB2Gray(cur,cur_rgb);
00236 
00237                 // motion vector computing between (pre,cur)
00238                 Int64 body = pre->BW() + pre->BH();
00239 
00240 #ifdef __BITMAP_FUNC__
00241                 //Core::Feature::Bitmap bitmap;
00242                 //bitmap.SaveGray2BitmapFile("test_gray_0.bmp",cur->CPB(), cur->CW(), cur->CH() );
00243 #endif
00244                 CalcMotionVectors((unsigned char*)pre->CPB(),(unsigned char*)cur->CPB(),window->FrameWidth(),window->FrameHeight(),m_MVseq, i-1);
00245 
00246                 if (m_verbose2) DumpMVF(m_MVseq, bWidth, bHeight, i-1);
00247 
00248                 // DO remember: release the memory
00249                 delete pre_rgb;
00250                 delete cur_rgb;
00251                 delete pre;
00252                 delete cur;
00253 
00254             }
00255             //DumpMVFseq(m_MVseq, bWidth, bHeight, window->WindowSize()-1);
00256 
00257             // -----------------------------------------------------
00258             // #frame within the window:  window->WindowSize()
00259             // Motion Vector Field:       window->WindowSize() - 1
00260             // Energy Distribution Curve: window->WindowSize() - 2
00261             // -----------------------------------------------------
00262 
00263             // compute energy based on weighted templates
00264             // using a sliding buffer within a window
00265             int result = ComputeEnergy(window->WindowSize()-1, bWidth, bHeight, m_MVseq, feature);
00266 
00267             if (0 !=  result) return;
00268 
00269             for (int i=0;i<TPL_NUM;i++)
00270                 delete []m_Weight[i];
00271             delete []m_EnergyBuffer;
00272             delete []m_EnergyMap;
00273             delete []m_Buffer;
00274             delete []m_MVseq;
00275      
00276         }
00277         delete im;
00278 
00279         VectorReal64 mvfeature(2*TPL_NUM);
00280         for (int x=0; x<2*TPL_NUM; x++)
00281         {
00282             mvfeature[x] = feature[x];
00283         }
00284         Quid quid = vs->GetQuidFrame(fileId, src->FrameNr());
00285         mHistogramTable->Add(quid, mvfeature);
00286 
00287         ILOG_DEBUG("... done keyframe " << src->FrameNr());
00288 
00289     }
00290 
00291     virtual void
00292     HandleDoneFile(VideoSet* vs, int fileId, Stream::RgbDataSrc* src)
00293     {
00294         bool binary = true;
00295         Feature::FeatureDefinition def = mHistogramTable->GetFeatureDefinition();
00296         //std::string fName = vs->GetFilePathFeatureData(true, def, fileId, false, -1, true, false);
00297         //std::string fName = vs->GetFilePathFeatureData(def, fileId, false, -1, true, false);
00298         std::string fName = vs->GetFilePathFeatureData("Keyframes", def, fileId, false, -1, true, false);
00299         
00300         if (!fName.empty())
00301             Table::Write(mHistogramTable, fName, vs->GetDatabase(), binary);
00302         mHistogramTable->SetSize(0);
00303     }
00304 
00305     // output all the MVF (Motion Vector Field) in the sequence
00306     void DumpMVFseq(BYTE* mvseq, int bWidth, int bHeight, int N)
00307     {
00308         //memset(mvseq, 0, sizeof(BYTE)*bWidth * bHeight * 2 * (window->WindowSize()-1) );
00309 
00310         for (int w=0; w<N; w++)
00311         {
00312             printf("MVF (%dx%d) for window %d:\n",bWidth,bHeight,w);
00313             for (int j=0; j<bHeight; j++)
00314             {
00315                 for (int i=0; i<bWidth; i++)
00316                 {
00317                     BYTE sx = mvseq[sizeof(BYTE)*bWidth*bHeight*2* w + (j*bWidth + i)*2 +0];
00318                     BYTE sy = mvseq[sizeof(BYTE)*bWidth*bHeight*2* w + (j*bWidth + i)*2 +1];
00319                     printf("(%d,%d) ",sx,sy);
00320                 }
00321                 printf("\n");
00322             }
00323             printf("\n");
00324         }
00325     }
00326 
00327     // output the index-th MVF (Motion Vector Field) in the sequence
00328     void DumpMVF(BYTE* mvseq, int bWidth, int bHeight, int index)
00329     {
00330         printf("MVF (%dx%d) for window %d:\n",bWidth,bHeight,index);
00331         for (int j=0; j<bHeight; j++)
00332         {
00333             for (int i=0; i<bWidth; i++)
00334             {
00335                 BYTE sx = mvseq[sizeof(BYTE)*bWidth*bHeight*2* index + (j*bWidth + i)*2 +0];
00336                 BYTE sy = mvseq[sizeof(BYTE)*bWidth*bHeight*2* index + (j*bWidth + i)*2 +1];
00337                 printf("(%d,%d) ",sx,sy);
00338             }
00339             printf("\n");
00340         }
00341         printf("\n");
00342     }
00343 
00344     void DumpEnergyMap(int bWidth, int bHeight, char* comments="")
00345     {
00346         printf("Energy Map (%dx%d): Window Index -> %s\n", bWidth, bHeight, comments);
00347         for (int j=0; j<bHeight; j++)
00348         {
00349             for (int i=0; i<bWidth; i++)
00350             {
00351                 printf("%.1f ",m_EnergyMap[j*bWidth + i]);
00352             }
00353             printf("\n");
00354         }
00355         printf("\n");
00356     }
00357 
00358     void DumpEnergyBuffer(int bWidth, int bHeight)
00359     {
00360         printf("Energy Buffer (%dx%d):\n",bWidth,bHeight);
00361         for (int j=0; j<bHeight; j++)
00362         {
00363             for (int i=0; i<bWidth; i++)
00364             {
00365                 printf("%d ",m_EnergyBuffer[j*bWidth + i]);
00366             }
00367             printf("\n");
00368         }
00369         printf("\n");
00370     }
00371 
00372     //
00373     // compute the energy distribution map based on sliding buffer,
00374     // CurBufSize: = 5 (default)
00375     // WindowLen > 5: such as 7,9,11,13,... 
00376     int ComputeEnergy(int WindowLen, int bWidth, int bHeight, BYTE* mvseq, double* feature, int CurBufSize = 5)
00377     {
00378         IsInShot = FALSE;
00379 
00380         // ZeroMemory for m_EnergyMap[] and m_EnergyBuffer[]
00381         OnShotStart(bWidth, bHeight);
00382 
00383         for (int t=0; t<2*TPL_NUM; t++)
00384         {
00385             feature[t] = 0.0;
00386         }
00387 
00388         //int CurBufSize = 5;
00389         if (WindowLen < CurBufSize)
00390         {
00391             printf("Error: WindowLen (%d) is smaller than CurBufSize (%d)\n", WindowLen, CurBufSize);
00392             return 1;
00393         }
00394         int num = WindowLen - CurBufSize;
00395         for (int index = 0; index < 1+num; index++)
00396         {
00397             // Just the fill the buffer according to the motion vector information
00398             FillBuffer(CurBufSize, bWidth, bHeight, mvseq + sizeof(BYTE)*bWidth*bHeight*2*index);
00399 
00400             int i=0;
00401             float Temp1Energy[TPL_NUM];
00402             float Temp2Energy[TPL_NUM];
00403             float diffEnergy[TPL_NUM];
00404             memset(diffEnergy,0,TPL_NUM*sizeof(float));
00405 
00406             for (i=0;i<TPL_NUM;i++)
00407             {
00408                 // Use the weight template to calculate the total energy value
00409                 float Energy = 0.0;
00410                 if (m_bNormalize)
00411                 {
00412                     Energy = CalcEnergy(i, bWidth, bHeight);
00413                 } else {
00414                     Energy = CalcEnergy(i, bWidth, bHeight, FALSE);
00415                 }
00416 
00417                 // update Temp1Energy[] and Temp2Energy[] for late compute their diffs
00418                 if (index==0)
00419                     Temp1Energy[i]=Energy;
00420                 else if (index==1)
00421                     Temp2Energy[i]=Energy;
00422                 else //index>1
00423                 {
00424                     Temp1Energy[i] = Temp2Energy[i];
00425                     Temp2Energy[i] = Energy;
00426                 }
00427 
00428                 if (m_verbose) printf("%.5f\t", Energy);
00429 
00430                 //sum up all the values, to compute averages later.
00431                 feature[i] += Energy;
00432             }
00433             //printf("\n");
00434 
00435             // Here there are TPL_NUM pairs of data, we get the diffs of them
00436             // first diff energy has been initialized as zero
00437             if (index>0)
00438             {
00439                 for (i=0;i<TPL_NUM;i++)
00440                 {
00441                     if (m_bNormalize)
00442                     {
00443                         // [-1,1] --> [0,1]
00444                         diffEnergy[i] = fabs(Temp2Energy[i] - Temp1Energy[i]);
00445                         //diffEnergy[i] = Normalization(diffEnergy[i],-1.0,1.0);
00446                     } else {
00447                         diffEnergy[i] = Temp2Energy[i] - Temp1Energy[i];
00448                     }
00449 
00450                     if (m_verbose) printf("%.5f\t", diffEnergy[i]);
00451 
00452                     //sum up all the values, to compute averages later.
00453                     feature[TPL_NUM+i] += diffEnergy[i];
00454 
00455                 }
00456                 if (m_verbose) printf("\n");
00457 
00458             } else {
00459 
00460                 //index==0
00461                 if (m_verbose) printf("diff1\tdiff2\tdiff3\n");
00462 
00463                 for (i=0;i<TPL_NUM;i++)
00464                 {
00465                     //if (m_verbose) printf("%.5f\t", diffEnergy[i]);
00466                     feature[TPL_NUM+i] = 0.0;
00467                 }
00468             }
00469         }
00470         
00471         OnShotEnd(bWidth, bHeight);
00472 
00473         // calculate the averages of the feature within current Window buffer
00474         for (int t=0; t<TPL_NUM; t++)
00475         {
00476             feature[t] /= num+1;
00477             feature[TPL_NUM+t] /= num; // one less than three original values
00478         }
00479 
00480         if (m_verbose)
00481         {
00482             printf("\nThe Average as the final feature:\n");
00483             for (int t=0; t<2*TPL_NUM; t++)
00484             {
00485                 printf("%.5f ",feature[t]);
00486             }
00487             printf("\n\n");
00488         }
00489 
00490         return 0;
00491 
00492     }
00493 
00494     // template: m_Weight[0] - m_Weight[2]:
00495     // 
00496     //Template Weigth Matrix (10x7)_0 is:
00497     // 1  5  9 13 17 21 25 29 33 37
00498     // 1  5  9 13 17 21 25 29 33 37
00499     // 1  5  9 13 17 21 25 29 33 37
00500     // 1  5  9 13 17 21 25 29 33 37
00501     // 1  5  9 13 17 21 25 29 33 37
00502     // 1  5  9 13 17 21 25 29 33 37
00503     // 1  5  9 13 17 21 25 29 33 37
00504     //
00505     //Template Weigth Matrix (10x7)_1 is:
00506     // 1  1  1  1  1  1  1  1  1  1
00507     // 6  6  6  6  6  6  6  6  6  6
00508     //11 11 11 11 11 11 11 11 11 11
00509     //16 16 16 16 16 16 16 16 16 16
00510     //21 21 21 21 21 21 21 21 21 21
00511     //26 26 26 26 26 26 26 26 26 26
00512     //31 31 31 31 31 31 31 31 31 31
00513     //
00514     //Template Weigth Matrix (10x7)_2 is:
00515     // 1  1  1  1  1  1  1  1  1  1
00516     // 1 17 17 17 17 17 17 17 17  1
00517     // 1 17 33 33 33 33 33 33 17  1
00518     // 1 17 33 49 49 49 49 33 17  1
00519     // 1 17 33 33 33 33 33 33 17  1
00520     // 1 17 17 17 17 17 17 17 17  1
00521     // 1  1  1  1  1  1  1  1  1  1
00522 
00523     // Till now, we have verified pairs for (bWidth,bHeight) as follows:
00524     //
00525     //    (10,7) (7,10) (10,8) (8,10) (22,18) (20,15)
00526     //
00527 
00528     // initialize template buffer, and record the (min,max) values
00529     void LoadEnergyTemplate(int bWidth, int bHeight)
00530     {
00531         int i=0,j=0;
00532 
00533         // pan: horizontal template
00534         int base = 1;
00535         int delta = 4;
00536         for (i=0; i<bWidth; i++)
00537             for (j=0; j<bHeight; j++)
00538             {
00539                 m_Weight[0][j*bWidth+i] = base + delta*i;
00540             }
00541         m_TPLMin[0] = base;
00542         m_TPLMax[0] = base + delta*(bWidth-1);
00543 
00544         // tilt: vertical tempate
00545         delta = 5;
00546         for (j=0; j<bHeight; j++)   
00547             for (i=0; i<bWidth; i++)
00548             {
00549                 m_Weight[1][j*bWidth+i] = base + delta*j;
00550             }
00551         m_TPLMin[1] = base;
00552         m_TPLMax[1] = base + delta*(bHeight-1);
00553 
00554         // zoom: radial tempate
00555         delta=16;
00556         int k=0;
00557         int dim = (bWidth<=bHeight) ? bWidth: bHeight;
00558         for (k=0; k<dim/2; k++)
00559         {
00560             // first column: left
00561             i=0+k;
00562             for (j=0+k; j<bHeight-k; j++)
00563             {
00564                 m_Weight[2][j*bWidth+i] = base + delta*k;
00565             }
00566 
00567             // last colum: right
00568             i=bWidth-1-k;
00569             for (j=0+k; j<bHeight-k; j++)
00570             {
00571                 m_Weight[2][j*bWidth+i] = base + delta*k;
00572             }
00573 
00574             // first row: top
00575             j=0+k;
00576             for (i=0+k; i<bWidth-k; i++)
00577             {
00578                 m_Weight[2][j*bWidth+i] = base + delta*k;
00579             }
00580 
00581             // last row: bottom
00582             j=bHeight-1-k;
00583             for (i=0+k; i<bWidth-k; i++)
00584             {
00585                 m_Weight[2][j*bWidth+i] = base + delta*k;
00586             }
00587 
00588             //DumpWeightMatrix(m_Weight[2], bWidth, bHeight, 2);
00589 
00590         }
00591 
00592         // if dim is odd, one row or one colum will NOT be process.
00593         // here, we will do this processing for the left one.
00594         m_TPLMin[2] = base;
00595         m_TPLMax[2] = base + delta*k;
00596         if (dim%2==1)
00597         {
00598             if (bWidth >= bHeight)
00599             {
00600                 // middle row
00601                 j=0+k;
00602                 for (i=0+k; i<bWidth-k; i++)
00603                 {
00604                     m_Weight[2][j*bWidth+i] = base + delta*k;
00605                 }
00606             }
00607             else
00608             {
00609                 // middle column
00610                 i=0+k;
00611                 for (j=0+k; j<bHeight-k; j++)
00612                 {
00613                     m_Weight[2][j*bWidth+i] = base + delta*k;
00614                 }
00615             }
00616 
00617             //DumpWeightMatrix(m_Weight[2], bWidth, bHeight, 2);
00618         }
00619 
00620         // output the weight matrix
00621         if (m_verbose2)
00622         {
00623             for (int t=0;t<TPL_NUM;t++)
00624             {
00625                 DumpWeightMatrix(m_Weight[t], bWidth, bHeight, t);
00626             }
00627         }
00628 
00629     }
00630 
00631     void DumpWeightMatrix(double* weight, int bWidth, int bHeight, int index)
00632     {
00633         // output the weight matrix
00634         printf("Template Weigth Matrix (%dx%d)_%d is:\n",bWidth,bHeight, index);
00635 
00636         int i,j;
00637         for (j=0; j<bHeight; j++)
00638         {
00639             for (i=0; i<bWidth; i++)
00640             {
00641                 printf("%2.d ",(int)weight[j*bWidth+i]);
00642                 //printf("%.2f ",weight[j*bWidth+i]);
00643             }
00644             printf("\n");
00645         }
00646         printf("(Min, Max) = (%.2f, %.2f)\n", m_TPLMin[index], m_TPLMax[index]);
00647         printf("\n");
00648     }
00649 
00650     void CalcTPLMean(int bWidth, int bHeight)
00651     {
00652         for (int indexOfTPL=0;indexOfTPL<TPL_NUM;indexOfTPL++)
00653         {
00654             m_TPLMean[indexOfTPL] = 0;
00655             for (int i = 0; i < bWidth * bHeight; i ++)
00656             m_TPLMean[indexOfTPL] += m_Weight[indexOfTPL][i];
00657         }
00658 
00659         if (m_verbose2)
00660         {
00661             printf("Template Mean Values TPL[0...2]: ");
00662             for (int indexOfTPL=0;indexOfTPL<TPL_NUM;indexOfTPL++)
00663             {
00664                 printf("%.2f ",m_TPLMean[indexOfTPL]);
00665             }
00666             printf("\n\n");
00667         }
00668 
00669     }
00670 
00671     void OnShotStart(int bWidth, int bHeight)
00672     {
00673         if (!IsInShot)
00674         {
00675             for (int i = 0; i < bWidth * bHeight; i++)
00676                 m_EnergyMap[i] = 1.0;
00677             memset(m_EnergyBuffer, 255, bWidth * bHeight);
00678 
00679             IsInShot = TRUE;
00680         }
00681 
00682         //DumpEnergyBuffer(bWidth, bHeight);
00683 
00684     }
00685 
00686     void OnShotEnd(int bWidth, int bHeight)
00687     {
00688         if (IsInShot)
00689         {
00690             IsInShot = FALSE;
00691             memset(m_EnergyBuffer, 0, bWidth * bHeight);
00692         }
00693 
00694     }
00695 
00696 
00697     // fill and update the energy map based on MVF sequence
00698     void FillBuffer(int WindowLen, int bWidth, int bHeight, BYTE* mvseq)
00699     {
00700         double * Add = new double[bWidth * bHeight];
00701         if (IsInShot)
00702         {
00703             UINT BufferLen = bWidth * bHeight;
00704             double * Temp = new double[BufferLen];
00705             if (Temp == NULL || Add == NULL)
00706             {
00707                 if (Temp != NULL) delete[] Temp;
00708                 if (Add != NULL) delete[] Add;
00709                 printf("Not Enough Memory! No Map Generated!");
00710                 return;
00711             }
00712 
00713             // Initialize the map value to 1.0
00714             for (int i = 0; i < BufferLen; i++)
00715             {
00716                 m_EnergyMap[i] = 1.0;
00717                 Add[i] = 0.0;
00718             }
00719 
00720             //DumpEnergyMap(bWidth, bHeight, "Initialization");
00721 
00722             for (int w = 0; w < WindowLen; w ++)
00723             {
00724                 memset(Temp, 0, BufferLen * sizeof(double));
00725 
00726                 // read the motion vectors from the "mvseq" buffer
00727                 memcpy(m_Buffer, mvseq + sizeof(BYTE)*bWidth*bHeight*2* w, BufferLen * 2);
00728 
00729                 // loop for the energy map :(20*15, for example)
00730                 for (int i = 0; i < bWidth; i++)
00731                     for (int j = 0; j < bHeight; j++)
00732                     {
00733                         double CurValue = m_EnergyMap[j * bWidth + i];
00734                         
00735                         int cx, cy;
00736                         long Ptr = 2 * (j * bWidth + i);
00737 
00738                         // (cx,cy) is the motion vector
00739                         cx = (char)m_Buffer[Ptr];
00740                         cy = (char)m_Buffer[Ptr + 1];    
00741                         
00742                         char SignX, SignY;
00743                         SignX = (cx >= 0)? 1:-1;
00744                         SignY = (cy >= 0)? 1:-1;
00745                         
00746                         int BaseX,BaseY;
00747                         BaseX = i + (int)(cx / MVBLOCKSIZE);
00748                         BaseY = j + (int)(cy / MVBLOCKSIZE);
00749                         
00750                         double Factor;
00751                         if (BaseX >= bWidth || BaseY >= bHeight || BaseX < 0 || BaseY < 0) 
00752                         {
00753                             if (abs(cx) >= abs(cy))
00754                             {
00755                                 Factor = (double)(cy) / (double)(cx);
00756                                 cx = ((SignX > 0)?bWidth:0) * MVBLOCKSIZE - MVBLOCKSIZE / 2;
00757                                 cy = Factor * cx + j * MVBLOCKSIZE - Factor * i * MVBLOCKSIZE;
00758                                 if (cy <= -MVBLOCKSIZE || cy >= bHeight * MVBLOCKSIZE)
00759                                 {
00760                                     cy = ((SignY > 0)?bHeight:0) * MVBLOCKSIZE - MVBLOCKSIZE / 2;
00761                                     cx = (cy - j * MVBLOCKSIZE + Factor * i * MVBLOCKSIZE) / Factor;
00762                                 }
00763                             }
00764                             else
00765                             {
00766                                 Factor = (double)(cx) / (double)(cy);
00767                                 cy = ((SignY > 0)?bHeight:0) * MVBLOCKSIZE - MVBLOCKSIZE / 2;
00768                                 cx = Factor * cy + i * MVBLOCKSIZE - Factor * j * MVBLOCKSIZE;
00769                                 if (cx <= - MVBLOCKSIZE || cx >= bWidth * MVBLOCKSIZE)
00770                                 {
00771                                     cx = ((SignX > 0)?bWidth:0) * MVBLOCKSIZE - MVBLOCKSIZE / 2;
00772                                     cy = (cx - i * MVBLOCKSIZE + Factor * j * MVBLOCKSIZE) / Factor;
00773                                 }
00774                             }
00775                             cx -= i * MVBLOCKSIZE;
00776                             cy -= j * MVBLOCKSIZE;
00777                             BaseX = i + (int)(cx / MVBLOCKSIZE);
00778                             BaseY = j + (int)(cy / MVBLOCKSIZE);
00779                         }
00780                         
00781                         double mX, mY;
00782                         mX = (double)(abs(cx % MVBLOCKSIZE))/MVBLOCKSIZE;
00783                         mY = (double)(abs(cy % MVBLOCKSIZE))/MVBLOCKSIZE;
00784                         if (BaseX + SignX >= bWidth || BaseX + SignX < 0) mX = 0.0;
00785                         if (BaseY + SignY >= bHeight || BaseY + SignY < 0) mY = 0.0;
00786                         
00787                         Temp[BaseY * bWidth + BaseX] += (1 - mX) * (1 - mY) * CurValue;
00788                         
00789                         if (BaseX + SignX < bWidth && BaseX + SignX >= 0)
00790                             Temp[BaseY * bWidth + BaseX + SignX] += mX * (1 - mY) * CurValue;
00791                         if (BaseY + SignY < bHeight && BaseY + SignY >= 0)
00792                         {
00793                             Temp[(BaseY + SignY) * bWidth + BaseX] += (1 - mX) * mY * CurValue;
00794                             if (BaseX + SignX < bWidth && BaseX + SignX >= 0)
00795                                 Temp[(BaseY + SignY) * bWidth + BaseX + SignX] += mX * mY * CurValue;
00796                         }
00797                         
00798                     }
00799 
00800                 double * Exch;
00801                 Exch = Temp;
00802                 Temp = m_EnergyMap;
00803                 m_EnergyMap = Exch;
00804 
00805                 for (int k = 0; k < bWidth * bHeight; k ++)
00806                     Add[k] += m_EnergyMap[k];
00807 
00808                 //char strWinLen[256];
00809                 //_itoa(w,strWinLen,10);
00810                 //DumpEnergyMap(bWidth, bHeight, strWinLen);
00811 
00812             }
00813             delete[] Temp;
00814 
00815         }
00816 
00817         // Average the energy map within the window length
00818         for (int i = 0; i < bWidth * bHeight; i ++)
00819         {
00820             m_EnergyMap[i] = Add[i] / WindowLen;
00821             m_EnergyBuffer[i] = (BYTE)(m_EnergyMap[i] * 255);
00822         }
00823         delete[] Add;
00824 
00825         // dump out the final energy map within current sliding window
00826         if (m_verbose2)
00827         {
00828             DumpEnergyMap(bWidth, bHeight, "Average within Window (size=4)");
00829             //DumpEnergyBuffer(bWidth, bHeight);
00830         }
00831 
00832     }
00833 
00834     // based on three different templates
00835     double CalcEnergy(const int indexOfTPL, int bWidth, int bHeight, BOOL bNormalized=TRUE)
00836     {
00837         double Energy = 0;
00838         for (int i = 0; i < bWidth * bHeight; i ++)
00839             Energy += m_EnergyMap[i] * m_Weight[indexOfTPL][i];
00840 
00841         if (bNormalized){
00842             //normalize the energy between [0,1]
00843             double min = m_TPLMin[indexOfTPL]*bWidth*bHeight;
00844             double max = m_TPLMax[indexOfTPL]*bWidth*bHeight;
00845             Energy = Normalization(Energy,min,max);
00846             
00847         } else {
00848             // Note: this case is OK only if all the video files are with the same size.
00849             //       Or, videos with different (bWidth,bHeight)s can NOT be compared each other.
00850             //
00851             // only adjust according to the mean of the template
00852             Energy = Energy - m_TPLMean[indexOfTPL];
00853         }
00854         return Energy;
00855     }
00856 
00857     // [min, max] --> [0,1]
00858     double Normalization(const double x, const double min, const double max)
00859     {
00860         return (x-min)/(max-min);
00861     }
00862 
00863     // the main function for the motion vector computering
00864     // pre, cur: are both gray images
00865     // ------------------------------------------------------
00866     BOOL CalcMotionVectors(unsigned char* pre, unsigned char* cur, int Width, int Height, BYTE* mvseq, int index)
00867     {
00868         // bWidth, bHeight: size of block grid
00869         int bWidth  = Width / MVBLOCKSIZE;
00870         int bHeight = Height / MVBLOCKSIZE;
00871 
00872         int i, j;
00873         int x, y, x_modify, y_modify;
00874         char sx, sy;
00875 
00876         if (pre == NULL || cur == NULL)
00877             return TRUE;
00878 
00879         int count=0;
00880         for (j = 0, y = 0; j < bHeight; j ++, y += 16)
00881         {
00882             for (i = 0, x = 0; i < bWidth; i ++, x += 16)
00883             {
00884                 x_modify = x;
00885                 y_modify = y;
00886                 DiamondSearch16(pre, cur, Width, Height, &x_modify, &y_modify);
00887                 //FullSearch16(pre, cur, Width, Height, &x_modify, &y_modify);
00888 
00889                 sx = x_modify - x;
00890                 sy = y_modify - y;
00891 
00892                 count += abs(sx)+abs(sy);
00893 
00894                 //m_MVseq = new BYTE[bWidth * bHeight * 2 * (window->WindowSize()-1)];
00895                 mvseq[bWidth*bHeight*2*index + 2*(j*bWidth+i) + 0] = sx;
00896                 mvseq[bWidth*bHeight*2*index + 2*(j*bWidth+i) + 1] = sy;
00897 
00898             }
00899         }
00900 
00901         return TRUE;
00902 
00903     }
00904 
00905     // Use diamond search method to find the best matching block postion for this block
00906     // return the value of : &x_modify, &y_modify 
00907     // ------------------------------------------------
00908     int DiamondSearch16(unsigned char *refframe, unsigned char *desframe, 
00909                         int framewidth, int frameheight, int *cx, int *cy)
00910     {
00911         static char FirstDiamondX[] = { 0,  0, -2,  2, -1,  1, -1,  1};
00912         static char FirstDiamondY[] = {-2,  2,  0,  0, -1, -1,  1,  1};
00913         static char FinalDiamondX[] = {-1,  0,  1,  0}; 
00914         static char FinalDiamondY[] = { 0, -1,  0,  1}; 
00915 
00916         static char CornerDiamondX[] = {0,  0,  1,   1,  2}; 
00917         static char CornerDiamondY[] = {2, -2,  1,  -1,  0}; 
00918 
00919         static char EdgeDiamondX[] = { 0,  1,  2}; 
00920         static char EdgeDiamondY[] = { 2,  1,  0}; 
00921 
00922         int minX = 0, minY = 0, minerror;
00923         int curX, curY;
00924         int error, signX, signY;
00925         int sX, sY;
00926 
00927         int i;
00928         unsigned char * block;
00929 
00930         // the postion of current block for the current frame!
00931         // this positon will not change during the search process
00932         block = desframe + * cy * framewidth + * cx; 
00933 
00934         curX = * cx; curY = * cy;
00935         if (curX < 0 || curX > framewidth - 16 || curY < 0 || curY > frameheight - 16) 
00936         {
00937             * cx = * cy = 0;
00938             return -1;
00939         }
00940 
00941         // Initialize the minerror value
00942         minerror = GetMotionError16(refframe, block, framewidth, curX, curY);
00943 
00944         sX = * cx;
00945         sY = * cy;
00946         for (i = 0; i < 8; i ++)
00947         {
00948             curX = sX + FirstDiamondX[i]; 
00949             curY = sY + FirstDiamondY[i];
00950             if (curX < 0 || curX > framewidth - 16 || curY < 0 || curY > frameheight - 16) 
00951                 continue;
00952 
00953             error = GetMotionError16(refframe, block, framewidth, curX, curY);
00954             if (error < minerror)
00955             {
00956                 minX = FirstDiamondX[i];
00957                 minY = FirstDiamondY[i];
00958                 minerror = error;
00959             }
00960         }
00961 
00962         while (1)
00963         {
00964             if (!minX && !minY) break;
00965 
00966             sX += minX;
00967             sY += minY;
00968 
00969             if (abs(sX - * cx) > MAX_DISP) break;
00970             if (abs(sY - * cy) > MAX_DISP) break;
00971             
00972             if (!minY)
00973             {
00974                 if (minX > 0) signX = 1;
00975                 else signX = -1;
00976 
00977                 minX = minY = 0;
00978                 for (i = 0; i < 5; i ++)
00979                 {
00980                     curX = sX + CornerDiamondX[i] * signX; 
00981                     curY = sY + CornerDiamondY[i];
00982                     if (curX < 0 || curX > framewidth - 16 || curY < 0 || curY > frameheight - 16) 
00983                         continue;
00984 
00985                     error = GetMotionError16(refframe, block, framewidth, curX, curY);
00986                     if (error < minerror)
00987                     {
00988                         minX = curX - sX;
00989                         minY = curY - sY;
00990                         minerror = error;
00991                     }
00992                 }
00993                 continue;
00994             }
00995             
00996             if (!minX)
00997             {
00998                 if (minY > 0) signY = 1;
00999                 else signY = -1;
01000 
01001                 minX = minY = 0;
01002                 for (i = 0; i < 5; i ++)
01003                 {
01004                     curX = sX + CornerDiamondY[i]; 
01005                     curY = sY + CornerDiamondX[i] * signY;
01006                     if (curX < 0 || curX > framewidth - 16 || curY < 0 || curY > frameheight - 16) 
01007                         continue;
01008 
01009                     error = GetMotionError16(refframe, block, framewidth, curX, curY);
01010                     if (error < minerror)
01011                     {
01012                         minX = curX - sX;
01013                         minY = curY - sY;
01014                         minerror = error;
01015                     }
01016                 }
01017                 continue;
01018             }
01019 
01020             if (minX > 0) signX = 1;
01021             else signX = -1;
01022             if (minY > 0) signY = 1;
01023             else signY = -1;
01024             
01025             minX = minY = 0;
01026             for (i = 0; i < 3; i ++)
01027             {
01028                 curX = sX + EdgeDiamondX[i] * signX; 
01029                 curY = sY + EdgeDiamondY[i] * signY;
01030                 if (curX < 0 || curX > framewidth - 16 || curY < 0 || curY > frameheight - 16) 
01031                     continue;
01032 
01033                 error = GetMotionError16(refframe, block, framewidth, curX, curY);
01034                 if (error < minerror)
01035                 {
01036                     minX = curX - sX;
01037                     minY = curY - sY;
01038                     minerror = error;
01039                 }
01040             }
01041         }
01042 
01043         for (i = 0; i < 4; i ++)
01044         {
01045             curX = sX + FinalDiamondX[i]; 
01046             curY = sY + FinalDiamondY[i];
01047             if (curX < 0 || curX > framewidth - 16 || curY < 0 || curY > frameheight - 16) 
01048                 continue;
01049 
01050             error = GetMotionError16(refframe, block, framewidth, curX, curY);
01051             if (error < minerror)
01052             {
01053                 minX = FinalDiamondX[i];
01054                 minY = FinalDiamondY[i];
01055                 minerror = error;
01056             }
01057         }
01058 
01059         * cx = sX + minX;
01060         * cy = sY + minY;
01061         return minerror;
01062 
01063     }
01064 
01065     int FullSearch16(unsigned char *refframe, unsigned char *desframe, int framewidth, int frameheight, int *cx, int *cy)
01066     {
01067         int minerror;
01068         int curX, curY;
01069         int error, oX, oY;
01070         int sx, sy;
01071 
01072         int i, j;
01073         unsigned char * block;
01074         block = desframe + * cy * framewidth + * cx;
01075 
01076         // first
01077         oX = curX = * cx; 
01078         oY = curY = * cy;
01079         if (curX < 0 || curX > framewidth - 16|| curY < 0 || curY > frameheight - 16) 
01080         {
01081             * cx = * cy = 0;
01082             return -1;
01083         }
01084         minerror = GetMotionError16(refframe, block, framewidth, curX, curY);
01085 
01086         for (i = 1; i <= MAX_DISP; i ++)
01087         {
01088             sx = sy = -i;
01089             for (j = 0; j < 8 * i; j ++)
01090             {
01091                 curX = oX + sx;
01092                 curY = oY + sy;
01093 
01094                 if (!(curX < 0 || curX > framewidth - 16|| curY < 0 || curY > frameheight - 16))
01095                 {
01096                     error = GetMotionError16(refframe, block, framewidth, curX, curY);
01097                     if (error < minerror)
01098                     {
01099                         minerror = error;
01100                         * cx = curX;
01101                         * cy = curY;
01102                     }
01103                 }
01104 
01105                 if (j < 2 * i) sx++;
01106                 else if (j < 4 * i) sy++;
01107                 else if (j < 6 * i) sx--;
01108                 else sy--;
01109             }
01110         }
01111 
01112         return minerror;
01113 
01114     }
01115 
01116 
01117     // ------------------------------------------------------------------------------------
01118     //    refframe:   the former frame buffer  (with fx,fy )
01119     //    block   :   the current block position in the current frame buffer (not change!)
01120     //  framewidth:   the whole frame width
01121     //    fx ,fy  :   the ref block position       (fx<=Width,fy<=Height)
01122     // ------------------------------------------------------------------------------------
01123     int GetMotionError16(unsigned char * refframe, unsigned char * block, 
01124                          int framewidth, int fx, int fy)
01125     {
01126         int i,j;
01127         int SAD = 0;
01128         BYTE* refBlock = refframe + fy*framewidth + fx;
01129 
01130         // i: fx direction (width)
01131         // j: fy direction (height)
01132         for (j=0;j<MVBLOCKSIZE;j++)
01133             for (i=0;i<MVBLOCKSIZE;i++)
01134             {
01135                 int diff = block[j*framewidth+i] - refBlock[j*framewidth+i];
01136                 diff = abs(diff);
01137                 SAD+=diff;
01138             }
01139         return SAD;
01140     }
01141 
01142 
01143 protected:
01144 
01145     int
01146     GetBinCount() const
01147     {
01148         return mBinCount;
01149     }
01150 
01151     //virtual VectorReal64
01152     //ComputeHistogram(Array::Array2dVec3UInt8* image) = 0;
01153 
01154     // compute rgb histogram
01155     /*VectorReal64
01156     ComputeHistogram(Array::Array2dVec3UInt8* image)
01157     {
01158         UInt8* data = image->CPB(0, 0);
01159         const int nPix = image->CW() * image->CH();
01160 
01161         VectorReal64 histogram(GetBinCount());
01162         Real64* histR = histogram.GetData();
01163         Real64* histG = histR + BINS_PER_CHANNEL;
01164         Real64* histB = histG + BINS_PER_CHANNEL;
01165 
01166         for (int i = 0; i < GetBinCount(); i++)
01167             histR[i] = 0;
01168 
01169         for (int j = 0; j < nPix; j++) 
01170         {
01171             const int r = ComputeBin((int) data[0], 0, 255, BINS_PER_CHANNEL);
01172             histR[r] += 1;
01173 
01174             const int g = ComputeBin((int) data[1], 0, 255, BINS_PER_CHANNEL);
01175             histG[g] += 1;
01176 
01177             const int b = ComputeBin((int) data[2], 0, 255, BINS_PER_CHANNEL);
01178             histB[b] += 1;
01179 
01180             data += 3;
01181         }
01182 
01183         const Real64 factor = nPix;
01184         Vector::DivAssign(histogram, factor);
01185 
01186         return histogram;
01187     }*/
01188 
01189     template <class T>
01190     int
01191     ComputeBin(const T val, const T low, const T high, const int nrBins) const
01192     {
01193         const int bin = (nrBins * (val - low)) / (high - low);
01194         if (bin >= 0 && bin < nrBins)
01195             return bin;
01196 
01197         if (val == high)
01198             return nrBins - 1;
01199 
01200         ILOG_WARN("Unexpected value: " << val << 
01201             " is not in range [" << low << ", " << high << "]");
01202         return (bin < 0) ? 0 : nrBins-1;
01203     }
01204 
01205 private:
01206 
01207     //static const int BINS_PER_CHANNEL = 32;
01208 
01209     Reporter*              mReporter;
01210     String                 mFeatureName;
01211     int                    mBinCount;
01212     Feature::FeatureTable* mHistogramTable;
01213 
01214     // calc the energy map from motion vectors
01215     double*                m_Weight[TPL_NUM];
01216     double                 m_TPLMean[TPL_NUM];
01217     double                 m_TPLMin[TPL_NUM];
01218     double                 m_TPLMax[TPL_NUM];
01219     double*                m_EnergyMap;
01220 
01221     BOOL                   m_bNormalize;
01222 
01223     BOOL                   IsInShot;
01224     BYTE*                  m_MVseq;   // store all the motion vectors field within the window
01225     BYTE*                  m_Buffer;
01226     BYTE*                  m_EnergyBuffer;
01227 
01228     BOOL                   m_verbose;
01229     BOOL                   m_verbose2; // for dedug only; more detailed info
01230 
01231     ILOG_VAR_DECL;
01232 };
01233 
01234 ILOG_VAR_INIT(KfrMotionExtractor, Impala.Core.VideoSet);
01235 
01236 } // namespace VideoSet
01237 } // namespace Core
01238 } // namespace Impala
01239 
01240 #endif
KfrMotionExtractor.h