Impala Documentation

00001 #ifndef Impala_Core_VideoSet_KfrMotionExtractor_h
00002 #define Impala_Core_VideoSet_KfrMotionExtractor_h
00003 
00004 // created by Jun Wu, on Dec 18, 2009
00005 //
00006 // purpose: extract motion feature before and after keyframes.
00007 // Note: 
00008 // 1) we maintaince a window, the center frame is key-frame
00009 // 2) within it, we also have a sliding buffer, to compute the energy distribution function
00010 // 3) considering all siding buffer as a sequence, we use the average as the final output
00011 //      window size: 7, 9, 11, 13, ...
00012 //      default buffer size: = 5
00013 // 4) arguments: requires --srcWindow "size;9;rgb2ooo"
00014 // 5) to represent dominant motion (horizontal,vertical,radial), we use three motion filters
00015 
00016 #include "Core/VideoSet/Reporter.h"
00017 #include "Core/Vector/VectorTem.h"
00018 #include "Persistency/FeatureTableRepository.h"
00019 #include "Core/Stream/SeqConvKernel.h"
00020 #include "Core/Array/RGB2Gray.h"
00021 
00022 // comment this line when commit to impala
00023 //#define __BITMAP_FUNC__
00024 
00025 #ifdef __BITMAP_FUNC__
00026 #include "Core/Feature/Bitmap.h"
00027 #endif
00028 
00029 // motion estimation only supports 16x16 block.
00030 // Note: do NOT change marco MVBLOCKSIZE
00031 #define MVBLOCKSIZE   16 
00032 
00033 #define MAX_DISP    10
00034 #define TPL_NUM      3
00035 
00036 #ifndef BYTE
00037 #define BYTE unsigned char
00038 #endif
00039 
00040 #ifndef BOOL
00041 #define BOOL bool
00042 #endif
00043 
00044 #ifndef UINT
00045 #define UINT unsigned int
00046 #endif
00047 
00048 
00049 namespace Impala
00050 {
00051 namespace Core
00052 {
00053 namespace VideoSet
00054 {
00055 
00056 class KfrMotionExtractor : public Listener
00057 {
00058 
00059 protected:
00060 
00061     typedef Vector::VectorTem<Real64> VectorReal64;
00062 
00063 public:
00064 
00065     KfrMotionExtractor(Reporter* reporter, 
00066         CmdOptions& options, CString featureName, int binCount)
00067     {
00068         mReporter = reporter;
00069         mFeatureName = featureName;
00070         mBinCount = binCount;
00071         Feature::FeatureDefinition def(GetFeatureName());
00072         mHistogramTable = new Feature::FeatureTable(def, 1000, GetBinCount());
00073 
00074         m_bNormalize = TRUE;
00075         m_verbose = FALSE;
00076         m_verbose2 = FALSE;
00077     }
00078 
00079     KfrMotionExtractor(Reporter* reporter, CmdOptions& options)
00080     {
00081         mReporter = reporter;
00082         mFeatureName = "kfrmotion";
00083         mBinCount = 2 * TPL_NUM;
00084         Feature::FeatureDefinition def(GetFeatureName());
00085         mHistogramTable = new Feature::FeatureTable(def, 1000, GetBinCount());
00086 
00087         m_bNormalize = TRUE;
00088 
00089         m_verbose = FALSE;
00090         m_verbose2 = FALSE;
00091         //m_verbose = TRUE;
00092         //m_verbose2 = TRUE;
00093         
00094     }
00095 
00096     virtual 
00097     ~KfrMotionExtractor()
00098     {
00099     }
00100 
00101     String
00102     GetFeatureName() const
00103     {
00104         return mFeatureName;
00105     }
00106 
00107     virtual void
00108     HandleNewFrame(VideoSet* vs, int fileId, Stream::RgbDataSrc* src)
00109     {
00110         if (!src)
00111         {
00112             ILOG_ERROR("Not a valid source for fileId " << fileId);
00113             return;
00114         }
00115 
00116         Array::Array2dVec3UInt8* im = Array::ArrayCreate<Array::Array2dVec3UInt8>
00117             (src->FrameWidth(), src->FrameHeight(), 0, 0, src->DataPtr(), true);
00118         if (!im)
00119         {
00120             ILOG_ERROR("Couldn't load image for fileId " << fileId);
00121             return;
00122         }
00123 
00124         if (m_verbose)
00125         {
00126             std::cout << "Current Frame No. = " << src->FrameNr() << std::endl;
00127         }
00128 
00129 #ifdef __BITMAP_FUNC__
00130         Core::Feature::Bitmap bitmap;
00131         bitmap.SaveRgb2BitmapFile("test_rgb_00.bmp",src->DataPtr(), src->FrameWidth(), src->FrameHeight() );
00132 #endif
00133 
00134         /*VectorReal64 histogram = ComputeHistogram(im);
00135         Quid quid = vs->GetQuidFrame(fileId, src->FrameNr());
00136         mHistogramTable->Add(quid, histogram);*/
00137 
00138         double feature[2*TPL_NUM];
00139         memset(feature,0,sizeof(double)*2*TPL_NUM);
00140 
00141         // For temporal filtering use something like this instead:
00142         // (requires --srcWindow "size;5;rgb2ooo")
00143         typedef Stream::RgbDataSrcWindow<Stream::WindowPrepRgb2Ooo> WindowType;
00144         //WindowType* window = dynamic_cast<WindowType*>(src);
00145         WindowType* window = (WindowType*)src;
00146         if (window)
00147         {
00148             if (m_verbose)
00149             {
00150                 std::cout << "window size = " << window->WindowSize() << std::endl;
00151             }
00152 
00153             //Array::Array2dScalarReal64* tGauss = 0;
00154             //double temporalSigma = 0.75;
00155             //tGauss = Array::MakeGaussian1d(temporalSigma, 0, 3.0, 100);
00156             //Array::Array2dVec3Real64* res = 0;
00157             //SeqConvKernel(res, window, tGauss);
00159             //Array::Array2dVec3UInt8* src;
00160             //Array::InvWiccest(res, src, false, false, 0.75, 3, true);
00161 
00162 
00163             // test looping for all frames within the window
00164             //for (int i=0 ; i<window->WindowSize(); i++)
00165             //{
00166             //    Array::Array2dVec3UInt8* cur = Array::ArrayCreate<Array::Array2dVec3UInt8>
00167             //        (window->FrameWidth(), window->FrameHeight(), 0, 0, window->DataPtrWindow(i), true);
00168             //    if (!cur)
00169             //    {
00170             //        ILOG_ERROR("Couldn't load image for fileId " << fileId);
00171             //        return;
00172             //    }
00173 
00174             //    VectorReal64 cur_histogram = ComputeHistogram(cur);
00175             //    Quid quid = vs->GetQuidFrame(fileId, window->FrameNr() - window->WindowSize()/2 + i);
00176             //    mHistogramTable->Add(quid, cur_histogram);
00177             //}
00178             
00179 
00180             int bWidth = window->FrameWidth()/MVBLOCKSIZE;
00181             int bHeight = window->FrameHeight()/MVBLOCKSIZE;
00182             
00183             // Note: we have tested these pairs: (10,7) (7,10) (10,8) (8,10) (22,18) (20,15)
00184             //int bWidth = 10;
00185             //int bHeight = 7;
00186 
00187             for (int t=0;t<TPL_NUM;t++)
00188             {
00189                 m_Weight[t] = new double[bWidth * bHeight];
00190                 memset(m_Weight[t], 0, sizeof(double)* bWidth * bHeight);
00191 
00192                 if (m_verbose2)
00193                 {
00194                     DumpWeightMatrix(m_Weight[t], bWidth, bHeight, t);
00195                 }
00196             }
00197 
00198             m_EnergyBuffer = new BYTE[bWidth * bHeight];
00199             m_EnergyMap = new double[bWidth * bHeight];
00200             m_Buffer = new BYTE[bWidth * bHeight * 2];
00201 
00202             // Initialization
00203             LoadEnergyTemplate(bWidth, bHeight);
00204 
00205             // calculate the mean of weight for all templates
00206             CalcTPLMean(bWidth, bHeight);
00207 
00208             m_MVseq = new BYTE[bWidth * bHeight * 2 * (window->WindowSize()-1)];
00209             memset(m_MVseq, 0, sizeof(BYTE)*bWidth * bHeight * 2 * (window->WindowSize()-1) );
00210             //DumpMVFseq(m_MVseq, bWidth, bHeight, window->WindowSize()-1);
00211 
00212             // compute motion features between adjacent frames
00213             // if window size as 9: 0-1,1-2,2-3,3-4,4-5,5-6,6-7,7-8,8-9
00214             // based on these motion vectors field, we compute the energy distribution
00215             for (int i=1 ; i<window->WindowSize() ; i++)
00216             {
00217                 // previous frame: pre
00218                 Array::Array2dVec3UInt8* pre_rgb = Array::ArrayCreate<Array::Array2dVec3UInt8>
00219                     (window->FrameWidth(), window->FrameHeight(), 0, 0, window->DataPtrWindow(i-1), true);
00220                 if (!pre_rgb)
00221                 {
00222                     ILOG_ERROR("Couldn't load image for fileId " << fileId);
00223                     return;
00224                 }
00225 
00226                 // current frame: cur
00227                 Array::Array2dVec3UInt8* cur_rgb = Array::ArrayCreate<Array::Array2dVec3UInt8>
00228                     (window->FrameWidth(), window->FrameHeight(), 0, 0, window->DataPtrWindow(i), true);
00229                 if (!cur_rgb)
00230                 {
00231                     ILOG_ERROR("Couldn't load image for fileId " << fileId);
00232                     return;
00233                 }
00234 
00235 #ifdef __BITMAP_FUNC__
00236                 //Core::Feature::Bitmap bitmap;
00237                 //bitmap.SaveRgb2BitmapFile("test_rgb_0.bmp",cur_rgb->CPB(), cur_rgb->CW(), cur_rgb->CH() );
00238 #endif
00239                 //Array2dScalarReal64* pre = 0;
00240                 //Array2dScalarReal64* cur = 0;
00241                 Array::Array2dScalarUInt8* pre = 0;
00242                 Array::Array2dScalarUInt8* cur = 0;
00243                 RGB2Gray(pre,pre_rgb);
00244                 RGB2Gray(cur,cur_rgb);
00245 
00246                 // motion vector computing between (pre,cur)
00247                 Int64 body = pre->BW() + pre->BH();
00248 
00249 #ifdef __BITMAP_FUNC__
00250                 //Core::Feature::Bitmap bitmap;
00251                 //bitmap.SaveGray2BitmapFile("test_gray_0.bmp",cur->CPB(), cur->CW(), cur->CH() );
00252 #endif
00253                 CalcMotionVectors((unsigned char*)pre->CPB(),(unsigned char*)cur->CPB(),window->FrameWidth(),window->FrameHeight(),m_MVseq, i-1);
00254 
00255                 if (m_verbose2) DumpMVF(m_MVseq, bWidth, bHeight, i-1);
00256 
00257                 // DO remember: release the memory
00258                 delete pre_rgb;
00259                 delete cur_rgb;
00260                 delete pre;
00261                 delete cur;
00262 
00263             }
00264             //DumpMVFseq(m_MVseq, bWidth, bHeight, window->WindowSize()-1);
00265 
00266             // -----------------------------------------------------
00267             // #frame within the window:  window->WindowSize()
00268             // Motion Vector Field:       window->WindowSize() - 1
00269             // Energy Distribution Curve: window->WindowSize() - 2
00270             // -----------------------------------------------------
00271 
00272             // compute energy based on weighted templates
00273             // using a sliding buffer within a window
00274             int result = ComputeEnergy(window->WindowSize()-1, bWidth, bHeight, m_MVseq, feature);
00275 
00276             if (0 !=  result) return;
00277 
00278             for (int i=0;i<TPL_NUM;i++)
00279                 delete []m_Weight[i];
00280             delete []m_EnergyBuffer;
00281             delete []m_EnergyMap;
00282             delete []m_Buffer;
00283             delete []m_MVseq;
00284      
00285         }
00286         delete im;
00287 
00288         VectorReal64 mvfeature(2*TPL_NUM);
00289         for (int x=0; x<2*TPL_NUM; x++)
00290         {
00291             mvfeature[x] = feature[x];
00292         }
00293         Quid quid = vs->GetQuidFrame(fileId, src->FrameNr());
00294         mHistogramTable->Add(quid, mvfeature);
00295 
00296         ILOG_DEBUG("... done keyframe " << src->FrameNr());
00297 
00298     }
00299 
00300     virtual void
00301     HandleDoneFile(VideoSet* vs, int fileId, Stream::RgbDataSrc* src)
00302     {
00303         bool binary = true;
00304         Feature::FeatureDefinition def = mHistogramTable->GetFeatureDefinition();
00305         typedef Persistency::FeatureLocator FeatureLocator;
00306         FeatureLocator loc(vs->GetLocator(), false, false, "Keyframes",
00307                            def.AsString(), vs->GetContainerFile(fileId));
00308         Persistency::FeatureTableRepository().Add(loc, mHistogramTable);
00309         mHistogramTable->SetSize(0);
00310     }
00311 
00312     // output all the MVF (Motion Vector Field) in the sequence
00313     void DumpMVFseq(BYTE* mvseq, int bWidth, int bHeight, int N)
00314     {
00315         //memset(mvseq, 0, sizeof(BYTE)*bWidth * bHeight * 2 * (window->WindowSize()-1) );
00316 
00317         for (int w=0; w<N; w++)
00318         {
00319             printf("MVF (%dx%d) for window %d:\n",bWidth,bHeight,w);
00320             for (int j=0; j<bHeight; j++)
00321             {
00322                 for (int i=0; i<bWidth; i++)
00323                 {
00324                     BYTE sx = mvseq[sizeof(BYTE)*bWidth*bHeight*2* w + (j*bWidth + i)*2 +0];
00325                     BYTE sy = mvseq[sizeof(BYTE)*bWidth*bHeight*2* w + (j*bWidth + i)*2 +1];
00326                     printf("(%d,%d) ",sx,sy);
00327                 }
00328                 printf("\n");
00329             }
00330             printf("\n");
00331         }
00332     }
00333 
00334     // output the index-th MVF (Motion Vector Field) in the sequence
00335     void DumpMVF(BYTE* mvseq, int bWidth, int bHeight, int index)
00336     {
00337         printf("MVF (%dx%d) for window %d:\n",bWidth,bHeight,index);
00338         for (int j=0; j<bHeight; j++)
00339         {
00340             for (int i=0; i<bWidth; i++)
00341             {
00342                 BYTE sx = mvseq[sizeof(BYTE)*bWidth*bHeight*2* index + (j*bWidth + i)*2 +0];
00343                 BYTE sy = mvseq[sizeof(BYTE)*bWidth*bHeight*2* index + (j*bWidth + i)*2 +1];
00344                 printf("(%d,%d) ",sx,sy);
00345             }
00346             printf("\n");
00347         }
00348         printf("\n");
00349     }
00350 
00351     void DumpEnergyMap(int bWidth, int bHeight, char* comments="")
00352     {
00353         printf("Energy Map (%dx%d): Window Index -> %s\n", bWidth, bHeight, comments);
00354         for (int j=0; j<bHeight; j++)
00355         {
00356             for (int i=0; i<bWidth; i++)
00357             {
00358                 printf("%.1f ",m_EnergyMap[j*bWidth + i]);
00359             }
00360             printf("\n");
00361         }
00362         printf("\n");
00363     }
00364 
00365     void DumpEnergyBuffer(int bWidth, int bHeight)
00366     {
00367         printf("Energy Buffer (%dx%d):\n",bWidth,bHeight);
00368         for (int j=0; j<bHeight; j++)
00369         {
00370             for (int i=0; i<bWidth; i++)
00371             {
00372                 printf("%d ",m_EnergyBuffer[j*bWidth + i]);
00373             }
00374             printf("\n");
00375         }
00376         printf("\n");
00377     }
00378 
00379     //
00380     // compute the energy distribution map based on sliding buffer,
00381     // CurBufSize: = 5 (default)
00382     // WindowLen > 5: such as 7,9,11,13,... 
00383     int ComputeEnergy(int WindowLen, int bWidth, int bHeight, BYTE* mvseq, double* feature, int CurBufSize = 5)
00384     {
00385         IsInShot = FALSE;
00386 
00387         // ZeroMemory for m_EnergyMap[] and m_EnergyBuffer[]
00388         OnShotStart(bWidth, bHeight);
00389 
00390         for (int t=0; t<2*TPL_NUM; t++)
00391         {
00392             feature[t] = 0.0;
00393         }
00394 
00395         //int CurBufSize = 5;
00396         if (WindowLen < CurBufSize)
00397         {
00398             printf("Error: WindowLen (%d) is smaller than CurBufSize (%d)\n", WindowLen, CurBufSize);
00399             return 1;
00400         }
00401         int num = WindowLen - CurBufSize;
00402         for (int index = 0; index < 1+num; index++)
00403         {
00404             // Just the fill the buffer according to the motion vector information
00405             FillBuffer(CurBufSize, bWidth, bHeight, mvseq + sizeof(BYTE)*bWidth*bHeight*2*index);
00406 
00407             int i=0;
00408             float Temp1Energy[TPL_NUM];
00409             float Temp2Energy[TPL_NUM];
00410             float diffEnergy[TPL_NUM];
00411             memset(diffEnergy,0,TPL_NUM*sizeof(float));
00412 
00413             for (i=0;i<TPL_NUM;i++)
00414             {
00415                 // Use the weight template to calculate the total energy value
00416                 float Energy = 0.0;
00417                 if (m_bNormalize)
00418                 {
00419                     Energy = CalcEnergy(i, bWidth, bHeight);
00420                 } else {
00421                     Energy = CalcEnergy(i, bWidth, bHeight, FALSE);
00422                 }
00423 
00424                 // update Temp1Energy[] and Temp2Energy[] for late compute their diffs
00425                 if (index==0)
00426                     Temp1Energy[i]=Energy;
00427                 else if (index==1)
00428                     Temp2Energy[i]=Energy;
00429                 else //index>1
00430                 {
00431                     Temp1Energy[i] = Temp2Energy[i];
00432                     Temp2Energy[i] = Energy;
00433                 }
00434 
00435                 if (m_verbose) printf("%.5f\t", Energy);
00436 
00437                 //sum up all the values, to compute averages later.
00438                 feature[i] += Energy;
00439             }
00440             //printf("\n");
00441 
00442             // Here there are TPL_NUM pairs of data, we get the diffs of them
00443             // first diff energy has been initialized as zero
00444             if (index>0)
00445             {
00446                 for (i=0;i<TPL_NUM;i++)
00447                 {
00448                     if (m_bNormalize)
00449                     {
00450                         // [-1,1] --> [0,1]
00451                         diffEnergy[i] = fabs(Temp2Energy[i] - Temp1Energy[i]);
00452                         //diffEnergy[i] = Normalization(diffEnergy[i],-1.0,1.0);
00453                     } else {
00454                         diffEnergy[i] = Temp2Energy[i] - Temp1Energy[i];
00455                     }
00456 
00457                     if (m_verbose) printf("%.5f\t", diffEnergy[i]);
00458 
00459                     //sum up all the values, to compute averages later.
00460                     feature[TPL_NUM+i] += diffEnergy[i];
00461 
00462                 }
00463                 if (m_verbose) printf("\n");
00464 
00465             } else {
00466 
00467                 //index==0
00468                 if (m_verbose) printf("diff1\tdiff2\tdiff3\n");
00469 
00470                 for (i=0;i<TPL_NUM;i++)
00471                 {
00472                     //if (m_verbose) printf("%.5f\t", diffEnergy[i]);
00473                     feature[TPL_NUM+i] = 0.0;
00474                 }
00475             }
00476         }
00477         
00478         OnShotEnd(bWidth, bHeight);
00479 
00480         // calculate the averages of the feature within current Window buffer
00481         for (int t=0; t<TPL_NUM; t++)
00482         {
00483             feature[t] /= num+1;
00484             feature[TPL_NUM+t] /= num; // one less than three original values
00485         }
00486 
00487         if (m_verbose)
00488         {
00489             printf("\nThe Average as the final feature:\n");
00490             for (int t=0; t<2*TPL_NUM; t++)
00491             {
00492                 printf("%.5f ",feature[t]);
00493             }
00494             printf("\n\n");
00495         }
00496 
00497         return 0;
00498 
00499     }
00500 
00501     // template: m_Weight[0] - m_Weight[2]:
00502     // 
00503     //Template Weigth Matrix (10x7)_0 is:
00504     // 1  5  9 13 17 21 25 29 33 37
00505     // 1  5  9 13 17 21 25 29 33 37
00506     // 1  5  9 13 17 21 25 29 33 37
00507     // 1  5  9 13 17 21 25 29 33 37
00508     // 1  5  9 13 17 21 25 29 33 37
00509     // 1  5  9 13 17 21 25 29 33 37
00510     // 1  5  9 13 17 21 25 29 33 37
00511     //
00512     //Template Weigth Matrix (10x7)_1 is:
00513     // 1  1  1  1  1  1  1  1  1  1
00514     // 6  6  6  6  6  6  6  6  6  6
00515     //11 11 11 11 11 11 11 11 11 11
00516     //16 16 16 16 16 16 16 16 16 16
00517     //21 21 21 21 21 21 21 21 21 21
00518     //26 26 26 26 26 26 26 26 26 26
00519     //31 31 31 31 31 31 31 31 31 31
00520     //
00521     //Template Weigth Matrix (10x7)_2 is:
00522     // 1  1  1  1  1  1  1  1  1  1
00523     // 1 17 17 17 17 17 17 17 17  1
00524     // 1 17 33 33 33 33 33 33 17  1
00525     // 1 17 33 49 49 49 49 33 17  1
00526     // 1 17 33 33 33 33 33 33 17  1
00527     // 1 17 17 17 17 17 17 17 17  1
00528     // 1  1  1  1  1  1  1  1  1  1
00529 
00530     // Till now, we have verified pairs for (bWidth,bHeight) as follows:
00531     //
00532     //    (10,7) (7,10) (10,8) (8,10) (22,18) (20,15)
00533     //
00534 
00535     // initialize template buffer, and record the (min,max) values
00536     void LoadEnergyTemplate(int bWidth, int bHeight)
00537     {
00538         int i=0,j=0;
00539 
00540         // pan: horizontal template
00541         int base = 1;
00542         int delta = 4;
00543         for (i=0; i<bWidth; i++)
00544             for (j=0; j<bHeight; j++)
00545             {
00546                 m_Weight[0][j*bWidth+i] = base + delta*i;
00547             }
00548         m_TPLMin[0] = base;
00549         m_TPLMax[0] = base + delta*(bWidth-1);
00550 
00551         // tilt: vertical tempate
00552         delta = 5;
00553         for (j=0; j<bHeight; j++)   
00554             for (i=0; i<bWidth; i++)
00555             {
00556                 m_Weight[1][j*bWidth+i] = base + delta*j;
00557             }
00558         m_TPLMin[1] = base;
00559         m_TPLMax[1] = base + delta*(bHeight-1);
00560 
00561         // zoom: radial tempate
00562         delta=16;
00563         int k=0;
00564         int dim = (bWidth<=bHeight) ? bWidth: bHeight;
00565         for (k=0; k<dim/2; k++)
00566         {
00567             // first column: left
00568             i=0+k;
00569             for (j=0+k; j<bHeight-k; j++)
00570             {
00571                 m_Weight[2][j*bWidth+i] = base + delta*k;
00572             }
00573 
00574             // last colum: right
00575             i=bWidth-1-k;
00576             for (j=0+k; j<bHeight-k; j++)
00577             {
00578                 m_Weight[2][j*bWidth+i] = base + delta*k;
00579             }
00580 
00581             // first row: top
00582             j=0+k;
00583             for (i=0+k; i<bWidth-k; i++)
00584             {
00585                 m_Weight[2][j*bWidth+i] = base + delta*k;
00586             }
00587 
00588             // last row: bottom
00589             j=bHeight-1-k;
00590             for (i=0+k; i<bWidth-k; i++)
00591             {
00592                 m_Weight[2][j*bWidth+i] = base + delta*k;
00593             }
00594 
00595             //DumpWeightMatrix(m_Weight[2], bWidth, bHeight, 2);
00596 
00597         }
00598 
00599         // if dim is odd, one row or one colum will NOT be process.
00600         // here, we will do this processing for the left one.
00601         m_TPLMin[2] = base;
00602         m_TPLMax[2] = base + delta*k;
00603         if (dim%2==1)
00604         {
00605             if (bWidth >= bHeight)
00606             {
00607                 // middle row
00608                 j=0+k;
00609                 for (i=0+k; i<bWidth-k; i++)
00610                 {
00611                     m_Weight[2][j*bWidth+i] = base + delta*k;
00612                 }
00613             }
00614             else
00615             {
00616                 // middle column
00617                 i=0+k;
00618                 for (j=0+k; j<bHeight-k; j++)
00619                 {
00620                     m_Weight[2][j*bWidth+i] = base + delta*k;
00621                 }
00622             }
00623 
00624             //DumpWeightMatrix(m_Weight[2], bWidth, bHeight, 2);
00625         }
00626 
00627         // output the weight matrix
00628         if (m_verbose2)
00629         {
00630             for (int t=0;t<TPL_NUM;t++)
00631             {
00632                 DumpWeightMatrix(m_Weight[t], bWidth, bHeight, t);
00633             }
00634         }
00635 
00636     }
00637 
00638     void DumpWeightMatrix(double* weight, int bWidth, int bHeight, int index)
00639     {
00640         // output the weight matrix
00641         printf("Template Weigth Matrix (%dx%d)_%d is:\n",bWidth,bHeight, index);
00642 
00643         int i,j;
00644         for (j=0; j<bHeight; j++)
00645         {
00646             for (i=0; i<bWidth; i++)
00647             {
00648                 printf("%2.d ",(int)weight[j*bWidth+i]);
00649                 //printf("%.2f ",weight[j*bWidth+i]);
00650             }
00651             printf("\n");
00652         }
00653         printf("(Min, Max) = (%.2f, %.2f)\n", m_TPLMin[index], m_TPLMax[index]);
00654         printf("\n");
00655     }
00656 
00657     void CalcTPLMean(int bWidth, int bHeight)
00658     {
00659         for (int indexOfTPL=0;indexOfTPL<TPL_NUM;indexOfTPL++)
00660         {
00661             m_TPLMean[indexOfTPL] = 0;
00662             for (int i = 0; i < bWidth * bHeight; i ++)
00663             m_TPLMean[indexOfTPL] += m_Weight[indexOfTPL][i];
00664         }
00665 
00666         if (m_verbose2)
00667         {
00668             printf("Template Mean Values TPL[0...2]: ");
00669             for (int indexOfTPL=0;indexOfTPL<TPL_NUM;indexOfTPL++)
00670             {
00671                 printf("%.2f ",m_TPLMean[indexOfTPL]);
00672             }
00673             printf("\n\n");
00674         }
00675 
00676     }
00677 
00678     void OnShotStart(int bWidth, int bHeight)
00679     {
00680         if (!IsInShot)
00681         {
00682             for (int i = 0; i < bWidth * bHeight; i++)
00683                 m_EnergyMap[i] = 1.0;
00684             memset(m_EnergyBuffer, 255, bWidth * bHeight);
00685 
00686             IsInShot = TRUE;
00687         }
00688 
00689         //DumpEnergyBuffer(bWidth, bHeight);
00690 
00691     }
00692 
00693     void OnShotEnd(int bWidth, int bHeight)
00694     {
00695         if (IsInShot)
00696         {
00697             IsInShot = FALSE;
00698             memset(m_EnergyBuffer, 0, bWidth * bHeight);
00699         }
00700 
00701     }
00702 
00703 
00704     // fill and update the energy map based on MVF sequence
00705     void FillBuffer(int WindowLen, int bWidth, int bHeight, BYTE* mvseq)
00706     {
00707         double * Add = new double[bWidth * bHeight];
00708         if (IsInShot)
00709         {
00710             UINT BufferLen = bWidth * bHeight;
00711             double * Temp = new double[BufferLen];
00712             if (Temp == NULL || Add == NULL)
00713             {
00714                 if (Temp != NULL) delete[] Temp;
00715                 if (Add != NULL) delete[] Add;
00716                 printf("Not Enough Memory! No Map Generated!");
00717                 return;
00718             }
00719 
00720             // Initialize the map value to 1.0
00721             for (int i = 0; i < BufferLen; i++)
00722             {
00723                 m_EnergyMap[i] = 1.0;
00724                 Add[i] = 0.0;
00725             }
00726 
00727             //DumpEnergyMap(bWidth, bHeight, "Initialization");
00728 
00729             for (int w = 0; w < WindowLen; w ++)
00730             {
00731                 memset(Temp, 0, BufferLen * sizeof(double));
00732 
00733                 // read the motion vectors from the "mvseq" buffer
00734                 memcpy(m_Buffer, mvseq + sizeof(BYTE)*bWidth*bHeight*2* w, BufferLen * 2);
00735 
00736                 // loop for the energy map :(20*15, for example)
00737                 for (int i = 0; i < bWidth; i++)
00738                     for (int j = 0; j < bHeight; j++)
00739                     {
00740                         double CurValue = m_EnergyMap[j * bWidth + i];
00741                         
00742                         int cx, cy;
00743                         long Ptr = 2 * (j * bWidth + i);
00744 
00745                         // (cx,cy) is the motion vector
00746                         cx = (char)m_Buffer[Ptr];
00747                         cy = (char)m_Buffer[Ptr + 1];    
00748                         
00749                         char SignX, SignY;
00750                         SignX = (cx >= 0)? 1:-1;
00751                         SignY = (cy >= 0)? 1:-1;
00752                         
00753                         int BaseX,BaseY;
00754                         BaseX = i + (int)(cx / MVBLOCKSIZE);
00755                         BaseY = j + (int)(cy / MVBLOCKSIZE);
00756                         
00757                         double Factor;
00758                         if (BaseX >= bWidth || BaseY >= bHeight || BaseX < 0 || BaseY < 0) 
00759                         {
00760                             if (abs(cx) >= abs(cy))
00761                             {
00762                                 Factor = (double)(cy) / (double)(cx);
00763                                 cx = ((SignX > 0)?bWidth:0) * MVBLOCKSIZE - MVBLOCKSIZE / 2;
00764                                 cy = Factor * cx + j * MVBLOCKSIZE - Factor * i * MVBLOCKSIZE;
00765                                 if (cy <= -MVBLOCKSIZE || cy >= bHeight * MVBLOCKSIZE)
00766                                 {
00767                                     cy = ((SignY > 0)?bHeight:0) * MVBLOCKSIZE - MVBLOCKSIZE / 2;
00768                                     cx = (cy - j * MVBLOCKSIZE + Factor * i * MVBLOCKSIZE) / Factor;
00769                                 }
00770                             }
00771                             else
00772                             {
00773                                 Factor = (double)(cx) / (double)(cy);
00774                                 cy = ((SignY > 0)?bHeight:0) * MVBLOCKSIZE - MVBLOCKSIZE / 2;
00775                                 cx = Factor * cy + i * MVBLOCKSIZE - Factor * j * MVBLOCKSIZE;
00776                                 if (cx <= - MVBLOCKSIZE || cx >= bWidth * MVBLOCKSIZE)
00777                                 {
00778                                     cx = ((SignX > 0)?bWidth:0) * MVBLOCKSIZE - MVBLOCKSIZE / 2;
00779                                     cy = (cx - i * MVBLOCKSIZE + Factor * j * MVBLOCKSIZE) / Factor;
00780                                 }
00781                             }
00782                             cx -= i * MVBLOCKSIZE;
00783                             cy -= j * MVBLOCKSIZE;
00784                             BaseX = i + (int)(cx / MVBLOCKSIZE);
00785                             BaseY = j + (int)(cy / MVBLOCKSIZE);
00786                         }
00787                         
00788                         double mX, mY;
00789                         mX = (double)(abs(cx % MVBLOCKSIZE))/MVBLOCKSIZE;
00790                         mY = (double)(abs(cy % MVBLOCKSIZE))/MVBLOCKSIZE;
00791                         if (BaseX + SignX >= bWidth || BaseX + SignX < 0) mX = 0.0;
00792                         if (BaseY + SignY >= bHeight || BaseY + SignY < 0) mY = 0.0;
00793                         
00794                         Temp[BaseY * bWidth + BaseX] += (1 - mX) * (1 - mY) * CurValue;
00795                         
00796                         if (BaseX + SignX < bWidth && BaseX + SignX >= 0)
00797                             Temp[BaseY * bWidth + BaseX + SignX] += mX * (1 - mY) * CurValue;
00798                         if (BaseY + SignY < bHeight && BaseY + SignY >= 0)
00799                         {
00800                             Temp[(BaseY + SignY) * bWidth + BaseX] += (1 - mX) * mY * CurValue;
00801                             if (BaseX + SignX < bWidth && BaseX + SignX >= 0)
00802                                 Temp[(BaseY + SignY) * bWidth + BaseX + SignX] += mX * mY * CurValue;
00803                         }
00804                         
00805                     }
00806 
00807                 double * Exch;
00808                 Exch = Temp;
00809                 Temp = m_EnergyMap;
00810                 m_EnergyMap = Exch;
00811 
00812                 for (int k = 0; k < bWidth * bHeight; k ++)
00813                     Add[k] += m_EnergyMap[k];
00814 
00815                 //char strWinLen[256];
00816                 //_itoa(w,strWinLen,10);
00817                 //DumpEnergyMap(bWidth, bHeight, strWinLen);
00818 
00819             }
00820             delete[] Temp;
00821 
00822         }
00823 
00824         // Average the energy map within the window length
00825         for (int i = 0; i < bWidth * bHeight; i ++)
00826         {
00827             m_EnergyMap[i] = Add[i] / WindowLen;
00828             m_EnergyBuffer[i] = (BYTE)(m_EnergyMap[i] * 255);
00829         }
00830         delete[] Add;
00831 
00832         // dump out the final energy map within current sliding window
00833         if (m_verbose2)
00834         {
00835             DumpEnergyMap(bWidth, bHeight, "Average within Window (size=4)");
00836             //DumpEnergyBuffer(bWidth, bHeight);
00837         }
00838 
00839     }
00840 
00841     // based on three different templates
00842     double CalcEnergy(const int indexOfTPL, int bWidth, int bHeight, BOOL bNormalized=TRUE)
00843     {
00844         double Energy = 0;
00845         for (int i = 0; i < bWidth * bHeight; i ++)
00846             Energy += m_EnergyMap[i] * m_Weight[indexOfTPL][i];
00847 
00848         if (bNormalized){
00849             //normalize the energy between [0,1]
00850             double min = m_TPLMin[indexOfTPL]*bWidth*bHeight;
00851             double max = m_TPLMax[indexOfTPL]*bWidth*bHeight;
00852             Energy = Normalization(Energy,min,max);
00853             
00854         } else {
00855             // Note: this case is OK only if all the video files are with the same size.
00856             //       Or, videos with different (bWidth,bHeight)s can NOT be compared each other.
00857             //
00858             // only adjust according to the mean of the template
00859             Energy = Energy - m_TPLMean[indexOfTPL];
00860         }
00861         return Energy;
00862     }
00863 
00864     // [min, max] --> [0,1]
00865     double Normalization(const double x, const double min, const double max)
00866     {
00867         return (x-min)/(max-min);
00868     }
00869 
00870     // the main function for the motion vector computering
00871     // pre, cur: are both gray images
00872     // ------------------------------------------------------
00873     BOOL CalcMotionVectors(unsigned char* pre, unsigned char* cur, int Width, int Height, BYTE* mvseq, int index)
00874     {
00875         // bWidth, bHeight: size of block grid
00876         int bWidth  = Width / MVBLOCKSIZE;
00877         int bHeight = Height / MVBLOCKSIZE;
00878 
00879         int i, j;
00880         int x, y, x_modify, y_modify;
00881         char sx, sy;
00882 
00883         if (pre == NULL || cur == NULL)
00884             return TRUE;
00885 
00886         int count=0;
00887         for (j = 0, y = 0; j < bHeight; j ++, y += 16)
00888         {
00889             for (i = 0, x = 0; i < bWidth; i ++, x += 16)
00890             {
00891                 x_modify = x;
00892                 y_modify = y;
00893                 DiamondSearch16(pre, cur, Width, Height, &x_modify, &y_modify);
00894                 //FullSearch16(pre, cur, Width, Height, &x_modify, &y_modify);
00895 
00896                 sx = x_modify - x;
00897                 sy = y_modify - y;
00898 
00899                 count += abs(sx)+abs(sy);
00900 
00901                 //m_MVseq = new BYTE[bWidth * bHeight * 2 * (window->WindowSize()-1)];
00902                 mvseq[bWidth*bHeight*2*index + 2*(j*bWidth+i) + 0] = sx;
00903                 mvseq[bWidth*bHeight*2*index + 2*(j*bWidth+i) + 1] = sy;
00904 
00905             }
00906         }
00907 
00908         return TRUE;
00909 
00910     }
00911 
00912     // Use diamond search method to find the best matching block postion for this block
00913     // return the value of : &x_modify, &y_modify 
00914     // ------------------------------------------------
00915     int DiamondSearch16(unsigned char *refframe, unsigned char *desframe, 
00916                         int framewidth, int frameheight, int *cx, int *cy)
00917     {
00918         static char FirstDiamondX[] = { 0,  0, -2,  2, -1,  1, -1,  1};
00919         static char FirstDiamondY[] = {-2,  2,  0,  0, -1, -1,  1,  1};
00920         static char FinalDiamondX[] = {-1,  0,  1,  0}; 
00921         static char FinalDiamondY[] = { 0, -1,  0,  1}; 
00922 
00923         static char CornerDiamondX[] = {0,  0,  1,   1,  2}; 
00924         static char CornerDiamondY[] = {2, -2,  1,  -1,  0}; 
00925 
00926         static char EdgeDiamondX[] = { 0,  1,  2}; 
00927         static char EdgeDiamondY[] = { 2,  1,  0}; 
00928 
00929         int minX = 0, minY = 0, minerror;
00930         int curX, curY;
00931         int error, signX, signY;
00932         int sX, sY;
00933 
00934         int i;
00935         unsigned char * block;
00936 
00937         // the postion of current block for the current frame!
00938         // this positon will not change during the search process
00939         block = desframe + * cy * framewidth + * cx; 
00940 
00941         curX = * cx; curY = * cy;
00942         if (curX < 0 || curX > framewidth - 16 || curY < 0 || curY > frameheight - 16) 
00943         {
00944             * cx = * cy = 0;
00945             return -1;
00946         }
00947 
00948         // Initialize the minerror value
00949         minerror = GetMotionError16(refframe, block, framewidth, curX, curY);
00950 
00951         sX = * cx;
00952         sY = * cy;
00953         for (i = 0; i < 8; i ++)
00954         {
00955             curX = sX + FirstDiamondX[i]; 
00956             curY = sY + FirstDiamondY[i];
00957             if (curX < 0 || curX > framewidth - 16 || curY < 0 || curY > frameheight - 16) 
00958                 continue;
00959 
00960             error = GetMotionError16(refframe, block, framewidth, curX, curY);
00961             if (error < minerror)
00962             {
00963                 minX = FirstDiamondX[i];
00964                 minY = FirstDiamondY[i];
00965                 minerror = error;
00966             }
00967         }
00968 
00969         while (1)
00970         {
00971             if (!minX && !minY) break;
00972 
00973             sX += minX;
00974             sY += minY;
00975 
00976             if (abs(sX - * cx) > MAX_DISP) break;
00977             if (abs(sY - * cy) > MAX_DISP) break;
00978             
00979             if (!minY)
00980             {
00981                 if (minX > 0) signX = 1;
00982                 else signX = -1;
00983 
00984                 minX = minY = 0;
00985                 for (i = 0; i < 5; i ++)
00986                 {
00987                     curX = sX + CornerDiamondX[i] * signX; 
00988                     curY = sY + CornerDiamondY[i];
00989                     if (curX < 0 || curX > framewidth - 16 || curY < 0 || curY > frameheight - 16) 
00990                         continue;
00991 
00992                     error = GetMotionError16(refframe, block, framewidth, curX, curY);
00993                     if (error < minerror)
00994                     {
00995                         minX = curX - sX;
00996                         minY = curY - sY;
00997                         minerror = error;
00998                     }
00999                 }
01000                 continue;
01001             }
01002             
01003             if (!minX)
01004             {
01005                 if (minY > 0) signY = 1;
01006                 else signY = -1;
01007 
01008                 minX = minY = 0;
01009                 for (i = 0; i < 5; i ++)
01010                 {
01011                     curX = sX + CornerDiamondY[i]; 
01012                     curY = sY + CornerDiamondX[i] * signY;
01013                     if (curX < 0 || curX > framewidth - 16 || curY < 0 || curY > frameheight - 16) 
01014                         continue;
01015 
01016                     error = GetMotionError16(refframe, block, framewidth, curX, curY);
01017                     if (error < minerror)
01018                     {
01019                         minX = curX - sX;
01020                         minY = curY - sY;
01021                         minerror = error;
01022                     }
01023                 }
01024                 continue;
01025             }
01026 
01027             if (minX > 0) signX = 1;
01028             else signX = -1;
01029             if (minY > 0) signY = 1;
01030             else signY = -1;
01031             
01032             minX = minY = 0;
01033             for (i = 0; i < 3; i ++)
01034             {
01035                 curX = sX + EdgeDiamondX[i] * signX; 
01036                 curY = sY + EdgeDiamondY[i] * signY;
01037                 if (curX < 0 || curX > framewidth - 16 || curY < 0 || curY > frameheight - 16) 
01038                     continue;
01039 
01040                 error = GetMotionError16(refframe, block, framewidth, curX, curY);
01041                 if (error < minerror)
01042                 {
01043                     minX = curX - sX;
01044                     minY = curY - sY;
01045                     minerror = error;
01046                 }
01047             }
01048         }
01049 
01050         for (i = 0; i < 4; i ++)
01051         {
01052             curX = sX + FinalDiamondX[i]; 
01053             curY = sY + FinalDiamondY[i];
01054             if (curX < 0 || curX > framewidth - 16 || curY < 0 || curY > frameheight - 16) 
01055                 continue;
01056 
01057             error = GetMotionError16(refframe, block, framewidth, curX, curY);
01058             if (error < minerror)
01059             {
01060                 minX = FinalDiamondX[i];
01061                 minY = FinalDiamondY[i];
01062                 minerror = error;
01063             }
01064         }
01065 
01066         * cx = sX + minX;
01067         * cy = sY + minY;
01068         return minerror;
01069 
01070     }
01071 
01072     int FullSearch16(unsigned char *refframe, unsigned char *desframe, int framewidth, int frameheight, int *cx, int *cy)
01073     {
01074         int minerror;
01075         int curX, curY;
01076         int error, oX, oY;
01077         int sx, sy;
01078 
01079         int i, j;
01080         unsigned char * block;
01081         block = desframe + * cy * framewidth + * cx;
01082 
01083         // first
01084         oX = curX = * cx; 
01085         oY = curY = * cy;
01086         if (curX < 0 || curX > framewidth - 16|| curY < 0 || curY > frameheight - 16) 
01087         {
01088             * cx = * cy = 0;
01089             return -1;
01090         }
01091         minerror = GetMotionError16(refframe, block, framewidth, curX, curY);
01092 
01093         for (i = 1; i <= MAX_DISP; i ++)
01094         {
01095             sx = sy = -i;
01096             for (j = 0; j < 8 * i; j ++)
01097             {
01098                 curX = oX + sx;
01099                 curY = oY + sy;
01100 
01101                 if (!(curX < 0 || curX > framewidth - 16|| curY < 0 || curY > frameheight - 16))
01102                 {
01103                     error = GetMotionError16(refframe, block, framewidth, curX, curY);
01104                     if (error < minerror)
01105                     {
01106                         minerror = error;
01107                         * cx = curX;
01108                         * cy = curY;
01109                     }
01110                 }
01111 
01112                 if (j < 2 * i) sx++;
01113                 else if (j < 4 * i) sy++;
01114                 else if (j < 6 * i) sx--;
01115                 else sy--;
01116             }
01117         }
01118 
01119         return minerror;
01120 
01121     }
01122 
01123 
01124     // ------------------------------------------------------------------------------------
01125     //    refframe:   the former frame buffer  (with fx,fy )
01126     //    block   :   the current block position in the current frame buffer (not change!)
01127     //  framewidth:   the whole frame width
01128     //    fx ,fy  :   the ref block position       (fx<=Width,fy<=Height)
01129     // ------------------------------------------------------------------------------------
01130     int GetMotionError16(unsigned char * refframe, unsigned char * block, 
01131                          int framewidth, int fx, int fy)
01132     {
01133         int i,j;
01134         int SAD = 0;
01135         BYTE* refBlock = refframe + fy*framewidth + fx;
01136 
01137         // i: fx direction (width)
01138         // j: fy direction (height)
01139         for (j=0;j<MVBLOCKSIZE;j++)
01140             for (i=0;i<MVBLOCKSIZE;i++)
01141             {
01142                 int diff = block[j*framewidth+i] - refBlock[j*framewidth+i];
01143                 diff = abs(diff);
01144                 SAD+=diff;
01145             }
01146         return SAD;
01147     }
01148 
01149 
01150 protected:
01151 
01152     int
01153     GetBinCount() const
01154     {
01155         return mBinCount;
01156     }
01157 
01158     //virtual VectorReal64
01159     //ComputeHistogram(Array::Array2dVec3UInt8* image) = 0;
01160 
01161     // compute rgb histogram
01162     /*VectorReal64
01163     ComputeHistogram(Array::Array2dVec3UInt8* image)
01164     {
01165         UInt8* data = image->CPB(0, 0);
01166         const int nPix = image->CW() * image->CH();
01167 
01168         VectorReal64 histogram(GetBinCount());
01169         Real64* histR = histogram.GetData();
01170         Real64* histG = histR + BINS_PER_CHANNEL;
01171         Real64* histB = histG + BINS_PER_CHANNEL;
01172 
01173         for (int i = 0; i < GetBinCount(); i++)
01174             histR[i] = 0;
01175 
01176         for (int j = 0; j < nPix; j++) 
01177         {
01178             const int r = ComputeBin((int) data[0], 0, 255, BINS_PER_CHANNEL);
01179             histR[r] += 1;
01180 
01181             const int g = ComputeBin((int) data[1], 0, 255, BINS_PER_CHANNEL);
01182             histG[g] += 1;
01183 
01184             const int b = ComputeBin((int) data[2], 0, 255, BINS_PER_CHANNEL);
01185             histB[b] += 1;
01186 
01187             data += 3;
01188         }
01189 
01190         const Real64 factor = nPix;
01191         Vector::DivAssign(histogram, factor);
01192 
01193         return histogram;
01194     }*/
01195 
01196     template <class T>
01197     int
01198     ComputeBin(const T val, const T low, const T high, const int nrBins) const
01199     {
01200         const int bin = (nrBins * (val - low)) / (high - low);
01201         if (bin >= 0 && bin < nrBins)
01202             return bin;
01203 
01204         if (val == high)
01205             return nrBins - 1;
01206 
01207         ILOG_WARN("Unexpected value: " << val << 
01208             " is not in range [" << low << ", " << high << "]");
01209         return (bin < 0) ? 0 : nrBins-1;
01210     }
01211 
01212 private:
01213 
01214     //static const int BINS_PER_CHANNEL = 32;
01215 
01216     Reporter*              mReporter;
01217     String                 mFeatureName;
01218     int                    mBinCount;
01219     Feature::FeatureTable* mHistogramTable;
01220 
01221     // calc the energy map from motion vectors
01222     double*                m_Weight[TPL_NUM];
01223     double                 m_TPLMean[TPL_NUM];
01224     double                 m_TPLMin[TPL_NUM];
01225     double                 m_TPLMax[TPL_NUM];
01226     double*                m_EnergyMap;
01227 
01228     BOOL                   m_bNormalize;
01229 
01230     BOOL                   IsInShot;
01231     BYTE*                  m_MVseq;   // store all the motion vectors field within the window
01232     BYTE*                  m_Buffer;
01233     BYTE*                  m_EnergyBuffer;
01234 
01235     BOOL                   m_verbose;
01236     BOOL                   m_verbose2; // for dedug only; more detailed info
01237 
01238     ILOG_VAR_DECL;
01239 };
01240 
01241 ILOG_VAR_INIT(KfrMotionExtractor, Impala.Core.VideoSet);
01242 
01243 } // namespace VideoSet
01244 } // namespace Core
01245 } // namespace Impala
01246 
01247 #endif
KfrMotionExtractor.h