Home || Visual Search || Applications || Architecture || Important Messages || OGL || Src

ShotSegmenter.h

Go to the documentation of this file.
00001 #ifndef Impala_Core_VideoSet_ShotSegmenter_h
00002 #define Impala_Core_VideoSet_ShotSegmenter_h
00003 
00004 #include "Persistency/SimilarityTableSetRepository.h"
00005 #include "Core/Histogram/MakeHistogram1d.h"
00006 #include "Core/Array/SetVal.h"
00007 #include "Core/Array/ProjectRange.h"
00008 #include "Core/Array/MakeGaussian1d.h"
00009 #include "Core/Array/RecGauss.h"
00010 #include "Core/Geometry/RectanglePyramid.h"
00011 #include "Core/Vector/ElemMin.h"
00012 #include "Core/Vector/HistogramIntersection.h"
00013 #include "Core/VideoSet/Reporter.h"
00014 #include "Core/VideoSet/Mpeg7DocWrite.h"
00015 
00016 
00017 namespace Impala
00018 {
00019 namespace Core
00020 {
00021 namespace VideoSet
00022 {
00023 
00024 
00025 class ShotSegmenter : public Listener
00026 {
00027 public:
00028 
00029     typedef Core::Histogram::Histogram1dTem<Real64> HistType;
00030     typedef Array::Array2dScalarReal64 ArrayType;
00031     typedef Array::Array2dScalarReal64 Array2dScalarReal64;
00032 
00033     ShotSegmenter(Reporter* reporter, CmdOptions& options)
00034     {
00035         mBlurSigma   = 1.0;
00036         mRadius      = 6;
00037         mBinCount    = 32;
00038         mProbThresh  = options.GetDouble("SSProbThresh");
00039         mDoMpeg7     = ! options.GetBool("SSNoMpeg7");
00040         mDoSim       = ! options.GetBool("SSNoSim");
00041         mDoNrkf      = ! options.GetBool("SSNoNrkf");
00042 
00043         mWindowSize  = 2 * mRadius;
00044         mPyramid = Geometry::RectanglePyramid(0, 1, 2, 1, -1, -1);
00045         mHistPerFrame = 3 * mPyramid.TotalNrRects();
00046         // Prepare the 1-d gaussian filter to be used for scaling in time
00047         mGauss = Array::MakeGaussian1d(18, 0, 0, mWindowSize*2+1,
00048                                        mWindowSize*2+1);
00049         mWeights = Array::ArrayCreate<Array2dScalarReal64>(mWindowSize,
00050                                                            mWindowSize);
00051         mFeatures = new Vector::VectorTem<Real64>(mWindowSize);
00052         mHistograms = new HistType*[mWindowSize*mHistPerFrame];
00053         for (int i=0 ; i<mWindowSize*mHistPerFrame ; i++)
00054             mHistograms[i] = new HistType(0, 256, mBinCount, 0);
00055 
00056         mScores = 0;
00057         mPredictions = 0;
00058     }
00059 
00060     virtual
00061     ~ShotSegmenter()
00062     {
00063         delete mGauss;
00064         delete mWeights;
00065         delete mFeatures;
00066 
00067         for (int i=0 ; i<mWindowSize*mHistPerFrame ; i++)
00068             delete mHistograms[i];
00069         delete[] mHistograms;
00070 
00071         CleanUpScoresAndPredictions();
00072     }
00073 
00074     void
00075     HandleNewFile(VideoSet* vs, int fileId, Stream::RgbDataSrc* src)
00076     {
00077         if (src->LastFrame() < 0)
00078         {
00079             ILOG_ERROR("HandleNewFile: src->LastFrame() = " << src->LastFrame()
00080                        << " (fileId = " << fileId << ")");
00081             return;
00082         }
00083         int lastFrame = src->LastFrame();
00084 
00085         // Reset data for the next file
00086         SetVal(mWeights, 0);
00087 
00088         CleanUpScoresAndPredictions();
00089 
00090         mPredictions = Array::ArrayCreate<Array2dScalarReal64>(lastFrame,1,0,0);
00091         SetVal(mPredictions, 0);
00092 
00093         mScores = Array::ArrayCreate<Array2dScalarReal64>(lastFrame,1,0,0);
00094         SetVal(mScores, 0);
00095 
00096         mPyramid.SetImageSize(src->FrameWidth(), src->FrameHeight());
00097     }
00098 
00099     void
00100     HandleNewFrame(VideoSet* vs, int fileId, Stream::RgbDataSrc* src)
00101     {
00102         int processFrame = src->FrameNr();
00103         int scoreFrame = processFrame - mRadius;
00104         int decisionFrame = processFrame - mWindowSize;
00105         int predictFrame = (decisionFrame > 0) ? decisionFrame : 0;
00106 
00107         UpdateHistograms(src);
00108 
00109         if (processFrame >= mWindowSize - 1)
00110         {
00111             ComputeSSWeights();
00112             Real64 score = CalculateScores();
00113             mScores->SetValue(score, scoreFrame, 0);
00114         }
00115 
00116         Predict(predictFrame);
00117     }
00118 
00119     void
00120     HandleDoneFile(VideoSet* vs, int fileId, Stream::RgbDataSrc* src)
00121     {
00122         WriteMp7ShotSeg(vs, fileId, src);
00123         WriteShotSimilarity(vs, fileId, src);
00124     }
00125 
00126 
00127 protected:
00128 
00129     Mpeg7Doc* 
00130     ConstructMpeg7Doc(CString id, CString fileName, int realId, 
00131                       int nrOfFrames, int frameRateNum, int frameRateDen) const
00132     {
00133         int fractions = 0;
00134         Mpeg7Doc* mp7 = new Mpeg7Doc(id, fileName, nrOfFrames, fractions, true, 
00135                                      "", "", frameRateNum, frameRateDen);
00136 
00137         int firstFrameInShot = 0;
00138         int frameAtSemiCut = -1;
00139         int lastFrame = nrOfFrames - 1;
00140         int shotNr = 0;
00141 
00142         for (int i = 0; i <= lastFrame; i++)
00143         {
00144             // If the probability is >mProbThresh then there was a cut 
00145             // between previous and current frame
00146             if ((mPredictions->Value(i, 0) > mProbThresh) ||
00147                 (i == lastFrame))
00148             {
00149                 String shotName = "shot" + MakeString(realId + 1) + "_"
00150                     + MakeString(shotNr + 1);
00151                 std::vector<String> keyNames;
00152                 keyNames.push_back(shotName + "_RKF");
00153                 if (frameAtSemiCut != -1)
00154                     keyNames.push_back(shotName + "_NRKF");
00155                 std::vector<int> keyFrames;
00156                 if (frameAtSemiCut == -1)
00157                 {
00158                     keyFrames.push_back((firstFrameInShot + i) / 2);
00159                 }
00160                 else
00161                 {
00162                     keyFrames.push_back((firstFrameInShot + frameAtSemiCut) / 2);
00163                     keyFrames.push_back((frameAtSemiCut + i) / 2);
00164                 }
00165                 mp7->AddShot(shotName, firstFrameInShot, i, keyNames, keyFrames);
00166                 firstFrameInShot = i + 1;
00167                 frameAtSemiCut = -1;
00168                 shotNr++;
00169             }
00170             else if (mDoNrkf &&
00171                      (mPredictions->Value(i, 0) > 0.8 * mProbThresh) &&
00172                      (i - firstFrameInShot > 25))
00173             {
00174                 frameAtSemiCut = i;
00175             }
00176         }
00177         return mp7;
00178     }
00179 
00180 
00181 private:
00182 
00183     void
00184     BlurImage(Array2dScalarReal64*& channel, double sigma)
00185     {
00186         Array2dScalarReal64* blurred = 0;
00187         RecGauss(blurred, channel, sigma, sigma, 0, 0, 1);
00188         delete channel;
00189         channel = blurred;
00190     }
00191 
00192     void
00193     UpdateHistograms(Stream::RgbDataSrc* src)
00194     {
00195         typedef Array::Array2dVec3Real64 Array2dVec3Real64;
00196         typedef Array::Array2dVec3UInt8 Array2dVec3UInt8;
00197 
00198         HistType** tmpHist;
00199         tmpHist = new HistType*[mHistPerFrame];
00200         // Back up the first frame's histogram pointers
00201         for (int i=0; i<mHistPerFrame ;i++)
00202             tmpHist[i] = mHistograms[i];
00203         // Shift the rest of the histograms
00204         for (int i=0 ; i<(mWindowSize-1)*mHistPerFrame ; i++)
00205             mHistograms[i] = mHistograms[i+mHistPerFrame];
00206 
00207         for (int i=(mWindowSize-1)*mHistPerFrame ; i<mWindowSize*mHistPerFrame ;
00208              i++)
00209         {
00210             mHistograms[i] = tmpHist[i - (mWindowSize - 1) * mHistPerFrame];
00211         }
00212         delete [] tmpHist;
00213 
00214         Array2dVec3Real64* srcWrap =
00215             Array::ArrayCreate<Array2dVec3Real64>(src->FrameWidth(),
00216                                                   src->FrameHeight(), 0, 0);
00217 
00218         Array::MakeFromData2<Array2dVec3Real64,Array2dVec3UInt8>(srcWrap,
00219                                                                  src->DataPtr());
00220 
00221         Array::Array2dScalarReal64* srcO1 = 0;
00222         Array::Array2dScalarReal64* srcO2 = 0;
00223         Array::Array2dScalarReal64* srcO3 = 0;
00224 
00225         Array::ProjectRange(srcO1, srcWrap, 1);
00226         Array::ProjectRange(srcO2, srcWrap, 2);
00227         Array::ProjectRange(srcO3, srcWrap, 3);
00228     
00229         BlurImage(srcO1, mBlurSigma);
00230         BlurImage(srcO2, mBlurSigma);
00231         BlurImage(srcO3, mBlurSigma);
00232 
00233         int k = (mWindowSize - 1) * mHistPerFrame - 1;
00234         for (int i=0 ; i<mPyramid.NrLevels() ; i++)
00235         {
00236             for (int j=0 ; j<mPyramid.NrRects(i) ; j++)
00237             {
00238                 MakeHistogram1d(mHistograms[++k], srcO1, mPyramid.Rect(i, j));
00239                 mHistograms[k]->Normalize();
00240                 
00241                 MakeHistogram1d(mHistograms[++k], srcO2, mPyramid.Rect(i, j));
00242                 mHistograms[k]->Normalize();
00243 
00244                 MakeHistogram1d(mHistograms[++k], srcO3, mPyramid.Rect(i, j));
00245                 mHistograms[k]->Normalize();
00246             }
00247         }
00248 
00249         delete srcWrap;
00250         delete srcO1;
00251         delete srcO2;
00252         delete srcO3;
00253     }
00254 
00255     void
00256     ComputeSSWeights()
00257     {   
00258         for (int i=0 ; i<mWindowSize ; i++)
00259         {
00260             for (int j=0 ; j<mWindowSize ; j++)
00261             {
00262                 Real64 weight=0;
00263                 for (int k=0 ; k<mHistPerFrame ; k++)
00264                 {
00265                     weight += Core::Vector::HistogramIntersection
00266                         (*mHistograms[i*mHistPerFrame+k],
00267                          *mHistograms[j*mHistPerFrame+k]);
00268                 }
00269                 mWeights->SetValue(weight*mGauss->Value(mWindowSize+i-j,0),i,j);
00270             }
00271         }
00272     }
00273 
00274     Real64
00275     CalculateScores()
00276     {
00277         int center = mRadius;
00278         Real64 disSimA = 0;
00279         Real64 disSimB = 0;
00280         Real64 simA = 0;
00281         Real64 simB = 0;
00282 
00283         for (int i=0 ; i<mRadius ; i++)
00284         {
00285             for (int j=0 ; j<mRadius ; j++)
00286             {
00287                 simA += mWeights->Value(i, j);
00288                 disSimA += mWeights->Value(i, j + center);
00289             }
00290         }
00291 
00292         for (int i=center ; i<mWindowSize ; i++)
00293         {
00294             for (int j=0 ; j<mRadius ; j++)
00295             {
00296                 simB += mWeights->Value(i, j + center);
00297                 disSimB += mWeights->Value(i, j);
00298             }
00299             
00300         }
00301 
00302         if ((simA * simB == 0) || (disSimA * disSimB == 0))
00303             return 4;
00304         Real64 cutAB = disSimA + disSimB;
00305         Real64 s = cutAB / simA + cutAB / simB;
00306         return s;
00307     }
00308     
00309     void
00310     Predict(int predictFrame)
00311     {
00312         int predictBase = predictFrame - mRadius + 1; 
00313         int size = mWindowSize;
00314         if (predictBase < 0)
00315         {
00316             mPredictions->SetValue(0.0, predictFrame, 0);
00317             return;
00318         }
00319         
00320         // Update the feature vector
00321         for (int i=0 ; i<size ; i++)
00322         {
00323             (*mFeatures)[i] = mScores->Value(predictBase + i, 0);
00324         }
00325 
00326         Real64 minScore = Vector::ElemMin(*mFeatures);
00327         if (mFeatures->Elem(mRadius-1) == minScore)
00328         {
00329             mPredictions->SetValue((4.0-minScore) / 2.0, predictFrame, 0);
00330         }
00331         else
00332         {
00333             mPredictions->SetValue(0.0, predictFrame, 0);
00334         }
00335     }
00336 
00337     void
00338     WriteMp7ShotSeg(VideoSet* vs, int fileId, Stream::RgbDataSrc* src) const
00339     {
00340         if (!mDoMpeg7)
00341             return;
00342         if ((!src) || (src->FrameRateNum() < 0) || (src->FrameRateDen() < 0))
00343         {
00344             ILOG_ERROR("WriteMp7ShotSeg: need src with known frame rate");
00345             return;
00346         }
00347 
00348         String fileName = vs->GetFile(fileId);
00349         String id = FileNameBase(fileName);
00350         int realId = vs->GetFileId(fileId);
00351 
00352         Mpeg7Doc* mp7 = 
00353             ConstructMpeg7Doc(id, fileName, realId, src->LastFrame() + 1, 
00354                               src->FrameRateNum(), src->FrameRateDen());
00355 
00356         Mpeg7DocWrite(mp7, vs, fileId);
00357         delete mp7;
00358     }
00359 
00360     void
00361     WriteShotSimilarity(VideoSet* vs, int fileId, Stream::RgbDataSrc* src)
00362     {
00363         if (!mDoSim)
00364             return;
00365         int lastFrame = src->LastFrame();
00366         std::vector<String> names;
00367         names.push_back("ShotBoundary");
00368         Table::SimilarityTableSet simSet(names, lastFrame + 1);
00369         Table::QuidTable* qTable = simSet.GetQuidTable();
00370         int idx = 0;
00371         Table::SimilarityTableSet::SimTableType* sTable = simSet.GetSimTable(idx);
00372         for (int i=0 ; i<=lastFrame ; i++)
00373         {
00374             Quid q = vs->GetQuidFrame(fileId, i);
00375             qTable->Add(q);
00376             Real64 v = mPredictions->Value(i,0) / 2; // to get between 0 and 1
00377             sTable->Add(v);
00378         }
00379         simSet.ComputeRanks(true);
00380         Persistency::SimilarityTableSetLocator loc
00381             (vs->GetLocator(), false, "Frames", "streamConcepts.txt", "no_model",
00382              "direct", vs->GetAsPath(fileId));
00383         Persistency::SimilarityTableSetRepository().Add(loc, &simSet);
00384     }
00385 
00386     void
00387     CleanUpScoresAndPredictions()
00388     {
00389         if (mScores)
00390         {
00391             delete mScores;
00392             mScores = 0;
00393         }
00394         if (mPredictions)
00395         {
00396             delete mPredictions;
00397             mPredictions = 0;
00398         }
00399     }
00400 
00401     double mBlurSigma;
00402     int    mRadius;
00403     int    mBinCount;
00404     double mProbThresh;
00405     bool   mDoMpeg7;
00406     bool   mDoSim;
00407     bool   mDoNrkf;
00408     int    mWindowSize;
00409     int    mHistPerFrame;
00410 
00411     Geometry::RectanglePyramid mPyramid;
00412     
00413     Array2dScalarReal64* mGauss;
00414     Array2dScalarReal64* mWeights;
00415     Array2dScalarReal64* mScores;
00416     Array2dScalarReal64* mPredictions;
00417 
00418     Vector::VectorTem<Real64>* mFeatures;
00419 
00420     HistType** mHistograms;
00421 
00422     ILOG_VAR_DEC;
00423 };
00424 
00425 ILOG_VAR_INIT(ShotSegmenter, Impala.Core.VideoSet);
00426 
00427 } // namespace VideoSet
00428 } // namespace Core
00429 } // namespace Impala
00430 
00431 #endif

Generated on Thu Jan 13 09:04:47 2011 for ImpalaSrc by  doxygen 1.5.1