Home || Visual Search || Applications || Architecture || Important Messages || OGL || Src

Mpeg7Doc.h

Go to the documentation of this file.
00001 #ifndef Impala_Core_VideoSet_Mpeg7Doc_h
00002 #define Impala_Core_VideoSet_Mpeg7Doc_h
00003 
00004 #include "Util/Database.h"
00005 #include "Util/XmlDoc.h"
00006 #include "Util/QuickSort.h"
00007 #include "Util/StringParser.h"
00008 #include "Persistency/XmlFileReader.h"
00009 #include "Core/Geometry/OverlapsAnywhere1d.h"
00010 
00011 namespace Impala
00012 {
00013 namespace Core
00014 {
00015 namespace VideoSet
00016 {
00017 
00018 
00019 class Mpeg7Doc : public Util::XmlDoc
00020 {
00021 
00022 public:
00023 
00024     // ioBuf will be deleted by this constructor
00025     Mpeg7Doc(String fileName, Util::IOBuffer* ioBuf, int frameRateNum,
00026              int frameRateDen)
00027     {
00028         mAnnoIsConcept = false;
00029         mFractions = 0;
00030         mFrameRateNum = frameRateNum;
00031         mFrameRateDen = frameRateDen;
00032         ReadData(fileName, ioBuf); // ReadData deletes ioBuf
00033     }
00034 
00035     Mpeg7Doc(String id, String fileName, int nrFrames, int fractions,
00036              bool withKeyframes, String conceptSet, String annoFileName,
00037              int frameRateNum, int frameRateDen)
00038     {
00039         mId = id;
00040         mVideoFileName = fileName;
00041         mNrFrames = nrFrames;
00042         mFractions = fractions;
00043         mOriginalHadKeyframes = withKeyframes;
00044         mConceptSet = conceptSet;
00045         mAnnoFileName = annoFileName;
00046         mAnnoIsConcept = false;
00047         mFrameRateNum = frameRateNum;
00048         mFrameRateDen = frameRateDen;
00049         CheckKnownFraction();
00050     }
00051 
00052     virtual
00053     ~Mpeg7Doc()
00054     {
00055     }
00056 
00057     void
00058     AddShot(String name, int startFrame, int endFrame)
00059     {
00060         mShotName.push_back(name);
00061         mStartFrame.push_back(startFrame);
00062         mEndFrame.push_back(endFrame);
00063     }
00064 
00065     void
00066     AddShot(String name, int startFrame, int endFrame,
00067             std::vector<String> keyframeNames, std::vector<int> keyframeFrames)
00068     {
00069         mShotName.push_back(name);
00070         mStartFrame.push_back(startFrame);
00071         mEndFrame.push_back(endFrame);
00072         mKeyframeName.push_back(keyframeNames);
00073         mKeyframeFrame.push_back(keyframeFrames);
00074     }
00075 
00076     void
00077     AddAnnotation(String annotation, String shotName, int startFrame,
00078                   int endFrame, double relevance, double confidence)
00079     {
00080         mAnnotation.push_back(annotation);
00081         mShotName.push_back(shotName);
00082         mStartFrame.push_back(startFrame);
00083         mEndFrame.push_back(endFrame);
00084         mRelevance.push_back(relevance);
00085         mConfidence.push_back(confidence);
00086     }
00087 
00088     // To allow identification of segment to be replaced, by any frame
00089     // within it, rather than identification by startFrame only
00090     int
00091     ReplaceAnnotationEx(String annotation, String shotName,
00092                         int aFrame, int startFrame, int endFrame,
00093                         double relevance, double confidence)
00094     {
00095         int idx = FindShot(aFrame);
00096         if (idx == -1)
00097         {
00098             mAnnotation.push_back(annotation);
00099             mShotName.push_back(shotName);
00100             mStartFrame.push_back(startFrame);
00101             mEndFrame.push_back(endFrame);
00102             mRelevance.push_back(relevance);
00103             mConfidence.push_back(confidence);
00104         }
00105         else
00106         {   // assume only one kind of annotation
00107             mShotName[idx] = shotName;
00108             mStartFrame[idx] = startFrame;
00109             mEndFrame[idx] = endFrame;
00110             mRelevance[idx] = relevance;
00111             mConfidence[idx] = confidence;
00112         }
00113         return idx;
00114     }
00115 
00116     int
00117     ReplaceAnnotation(String annotation, String shotName,
00118                       int startFrame, int endFrame, double relevance,
00119                       double confidence)
00120     {
00121         return ReplaceAnnotationEx(annotation, shotName, startFrame,
00122                                    startFrame, endFrame,relevance, confidence);
00123     }
00124 
00125     void
00126     SetAnnoIsConcept()
00127     {
00128         mAnnoIsConcept = true;
00129     }
00130 
00131     bool
00132     GetAnnoIsConcept()
00133     {
00134         return mAnnoIsConcept;
00135     }
00136 
00137     // inquiry
00138 
00139     String
00140     GetId()
00141     {
00142         return mId;
00143     }
00144 
00145     String
00146     GetVideoFileName()
00147     {
00148         return mVideoFileName;
00149     }
00150 
00151     int
00152     GetNrFrames()
00153     {
00154         return mNrFrames;
00155     }
00156 
00157     int
00158     GetFractions()
00159     {
00160         return mFractions;
00161     }
00162 
00163     int
00164     NrShots() const
00165     {
00166         return mShotName.size();
00167     }
00168 
00169     String
00170     ShotName(int shot)
00171     {
00172         return mShotName[shot];
00173     }
00174 
00175     int
00176     StartFrame(int shot)
00177     {
00178         return mStartFrame[shot];
00179     }
00180 
00181     int
00182     EndFrame(int shot)
00183     {
00184         return mEndFrame[shot];
00185     }
00186 
00187     int
00188     NrKeyframes(int shot) const
00189     {
00190         return mKeyframeName[shot].size();
00191     }
00192 
00193     String
00194     KeyframeName(int shot, int keyframe) const
00195     {
00196         return mKeyframeName[shot][keyframe];
00197     }
00198 
00199     int
00200     KeyframeFrame(int shot, int keyframe) const
00201     {
00202         return mKeyframeFrame[shot][keyframe];
00203     }
00204 
00205     double
00206     Relevance(int shot)
00207     {
00208         return mRelevance[shot];
00209     }
00210 
00211     double
00212     FrameRelevance(int frameNr)
00213     {
00214         int shotIdx = FindShot(frameNr);
00215         return shotIdx != -1 ? Relevance(shotIdx) : 0.0;
00216     }
00217 
00218     double
00219     Confidence(int shot)
00220     {
00221         return mConfidence[shot];
00222     }
00223 
00224     int
00225     FindShot(int frameNr)
00226     {
00227         for (int i=0 ; i<mStartFrame.size() ; i++)
00228         {
00229             int sF = mStartFrame[i];
00230             int eF = mEndFrame[i];
00231             if (eF > 13000)
00232                 int g = 10;
00233             if ((mStartFrame[i] <= frameNr) && (frameNr <= mEndFrame[i]))
00234                 return i;
00235         }
00236         return -1;
00237     }
00238 
00239     std::vector<int>
00240     FindShots(int startFrame, int endFrame) const
00241     {
00242         std::vector<int> res;
00243         for (int i=0 ; i<mStartFrame.size() ; i++)
00244         {
00245             if (Geometry::OverlapsAnywhere1d(startFrame, endFrame,
00246                                              mStartFrame[i], mEndFrame[i]))
00247             {
00248                 res.push_back(i);
00249             }
00250         }
00251         return res;
00252     }
00253 
00254     // Annotation / Concept part
00255 
00256     bool
00257     HasAnnotations()
00258     {
00259         return !mAnnotation.empty();
00260     }
00261 
00262     void
00263     SetConceptSet(String conceptSet)
00264     {
00265         mConceptSet = conceptSet;
00266     }
00267 
00268     String
00269     GetConceptSet()
00270     {
00271         return mConceptSet;
00272     }
00273 
00274     String
00275     GetAnnoFileName()
00276     {
00277         return mAnnoFileName;
00278     }
00279 
00280     String
00281     Annotation(int shot)
00282     {
00283         return mAnnotation[shot];
00284     }
00285 
00286     /*
00287 
00288 Impala annotatietool uses the following conventions:
00289 
00290 positive: relevance="1" confidence="1"
00291 negative: relevance="0" confidence="1"
00292 skip: relevance="0.5" confidence="1"
00293 
00294 Trec2007 annotations:
00295 
00296 positive: relevance="1" confidence="1"
00297 positive skip: relevance="1" confidence="0.5"
00298 negative: relevance="0" confidence="1"
00299 negative skip: relevance="0" confidence="0.5"
00300 
00301     */
00302 
00303     bool
00304     IsPositiveAnno(int shot)
00305     {
00306         return (Relevance(shot) == 1.0) && (Confidence(shot) > 0.5);
00307     }
00308 
00309     bool
00310     IsNegativeAnno(int shot)
00311     {
00312         return (Relevance(shot) == 0.0) && (Confidence(shot) > 0.5);
00313     }
00314 
00315     // Feature part
00316 
00317     bool
00318     HasFeatures()
00319     {
00320         return mFeatureName.size() != 0;
00321     }
00322 
00323     String
00324     GetFeature()
00325     {
00326         return mFeatureName[0];
00327     }
00328 
00329     void
00330     AddFeature(String featureName, String shotName, int startFrame,
00331                int endFrame, std::vector<double> featureValues)
00332     {
00333         mFeatureName.push_back(featureName);
00334         mShotName.push_back(shotName);
00335         mStartFrame.push_back(startFrame);
00336         mEndFrame.push_back(endFrame);
00337         mFeatureValues.push_back(featureValues);
00338     }
00339 
00340     // Misc
00341 
00342     void
00343     Dump(int from = 0, int to = -1)
00344     {
00345         if (to == -1 || to > NrShots())
00346             to = NrShots();
00347         if (to < from)
00348             to = from;
00349         std::cout << "Dumping mpeg7 from " << from << " to " << to
00350                   << " (size=" << NrShots() << ")" << std::endl;
00351         for (int i=from ; i<to ; i++)
00352         {
00353             std::cout << "segment " << ShotName(i) << " from " << StartFrame(i)
00354                       << " to " << EndFrame(i) << ": ";
00355             if (HasAnnotations())
00356             {
00357                 std::cout << "relevance=" << Relevance(i) << ", confidence="
00358                           << Confidence(i) << ", anno=" << Annotation(i);
00359             }
00360             if (mOriginalHadKeyframes)
00361             {
00362                 for (int j=0 ; j<NrKeyframes(i) ; j++)
00363                 {
00364                     std::cout << "keyname=" << KeyframeName(i, j)
00365                               << ",frame=" << KeyframeFrame(i, j) << " ";
00366                 }
00367             }
00368             if (HasFeatures())
00369             {
00370                 std::cout << "feature=" << mFeatureName[i] + ", values=";
00371                 for (int j=0 ; j<mFeatureValues[i].size() ; j++)
00372                 {
00373                     std::cout << mFeatureValues[i][j] << " ";
00374                 }
00375             }
00376             std::cout << std::endl;
00377         }
00378     }
00379 
00380     int
00381     Diff(Mpeg7Doc* arg)
00382     {
00383         if (NrShots() != arg->NrShots())
00384         {
00385             ILOG_ERROR("Diff: nrShots differs: " << NrShots() << " vs " <<
00386                        arg->NrShots());
00387             return 1;
00388         }
00389         int nDiff = 0;
00390         for (int i=0 ; i<NrShots() ; i++)
00391         {
00392             if (ShotName(i) != arg->ShotName(i))
00393             {
00394                 ILOG_DEBUG("ShotName " << i << " differs " << ShotName(i) <<
00395                            " vs " << arg->ShotName(i));
00396                 nDiff++;
00397             }
00398             else if (StartFrame(i) != arg->StartFrame(i))
00399             {
00400                 ILOG_DEBUG("Start " << i << " differs " << StartFrame(i) <<
00401                            " vs " << arg->StartFrame(i));
00402                 nDiff++;
00403             }
00404             else if (EndFrame(i) != arg->EndFrame(i))
00405             {
00406                 ILOG_DEBUG("End " << i << " differs " << EndFrame(i) <<
00407                            " vs " << arg->EndFrame(i));
00408                 nDiff++;
00409             }
00410             else if (HasAnnotations())
00411             {
00412                 if (Relevance(i) != arg->Relevance(i))
00413                 {
00414                     ILOG_DEBUG("Relevance " << i << " differs " << Relevance(i)
00415                                << " vs " << arg->Relevance(i));
00416                     nDiff++;
00417                 }
00418                 else if (Confidence(i) != arg->Confidence(i))
00419                 {
00420                     ILOG_DEBUG("Confidence " << i << " differs " << Confidence(i)
00421                                << " vs " << arg->Confidence(i));
00422                     nDiff++;
00423                 }
00424             }
00425             else if (mOriginalHadKeyframes)
00426             {
00427                 for (int j=0 ; j<NrKeyframes(i) ; j++)
00428                 {
00429                     if (KeyframeName(i, j) != arg->KeyframeName(i, j))
00430                     {
00431                         ILOG_DEBUG("KeyframeName " << i << "," << j <<
00432                                    " differs " << KeyframeName(i, j) <<
00433                                    " vs " << arg->KeyframeName(i, j));
00434                         nDiff++;
00435                         break;
00436                     }
00437                     else if (KeyframeFrame(i, j) != arg->KeyframeFrame(i, j))
00438                     {
00439                         ILOG_DEBUG("KeyframeFrame " << i << "," << j <<
00440                                    " differs " << KeyframeFrame(i, j) <<
00441                                    " vs " << arg->KeyframeFrame(i, j));
00442                         nDiff++;
00443                         break;
00444                     }
00445                 }
00446             }
00447             else if (HasFeatures())
00448             {
00449                 if (mFeatureName[i] != arg->mFeatureName[i])
00450                 {
00451                     ILOG_DEBUG("FeatureName " << i << " differs " <<
00452                                mFeatureName[i] << " vs " << arg->mFeatureName[i]);
00453                     nDiff++;
00454                 }
00455                 else for (int j=0 ; j<mFeatureValues[i].size() ; j++)
00456                 {
00457                     if (mFeatureValues[i][j] != arg->mFeatureValues[i][j])
00458                     {
00459                         ILOG_DEBUG("FeatureValue " << i << "," << j <<
00460                                    " differs " << mFeatureValues[i][j] <<
00461                                    " vs " << mFeatureValues[i][j]);
00462                         nDiff++;
00463                         break;
00464                     }
00465                 }
00466             }
00467         }
00468         if (nDiff > 0)
00469             ILOG_ERROR("Found " << nDiff << " differences");
00470         return nDiff;
00471     }
00472 
00473     // I/O
00474 
00475     /*
00476     void
00477     WriteVxs(String fileName)
00478     {
00479         File f(fileName, "w");
00480         if (! f.Valid())
00481             return;
00482         fprintf(f.Fp(), "#@ \"start\" \"end\" \"id\" \"keystr\"\n");
00483         fprintf(f.Fp(), "#= \"int\" \"int\" \"string\" \"string\"\n");
00484         for (int i=0 ; i<NrShots() ; i++)
00485         {
00486             int dummy = 0;
00487             fprintf(f.Fp(), " %d %d \"%s\" \"", mStartFrame[i], mEndFrame[i],
00488                     mShotName[i].c_str());
00489             for (int k=0 ; k<mKeyframeName[i].size() ; k++)
00490                 fprintf(f.Fp(), " %s %d", mKeyframeName[i][k].c_str(), 
00491                         mKeyframeFrame[i][k]);
00492             fprintf(f.Fp(), "\"\n");
00493         }
00494     }
00495     */
00496 
00497     void
00498     WriteTo(std::ostream& os)
00499     {
00500         for (int i=0 ; i<NrShots() ; i++)
00501         {
00502             os << mShotName[i] << " " << mStartFrame[i] << " " << mEndFrame[i];
00503             if (mOriginalHadKeyframes)
00504                 for (int k=0 ; k<mKeyframeName[i].size() ; k++)
00505                     os << " " << mKeyframeName[i][k] << " "
00506                        << mKeyframeFrame[i][k];
00507             if (HasAnnotations())
00508                 os << " " << mAnnotation[i];
00509             os << std::endl;
00510         }
00511     }
00512 
00513     void
00514     ExportXml(Util::IOBuffer* ioBuf)
00515     {
00516         ioBuf->Puts("<?xml version=\"1.0\" encoding=\"UTF-8\"?>");
00517         ioBuf->Puts("<Mpeg7 xmlns=\"urn:mpeg:mpeg7:schema:2001\" xmlns:mpeg7=\"urn:mpeg:mpeg7:schema:2001\" xmlns:xsi=\"http://www.w3.org/2001/XMLSchema-instance\">");
00518         ioBuf->Puts("<Description xsi:type=\"ContentEntityType\">");
00519         ioBuf->Puts("<MultimediaContent xsi:type=\"VideoType\">");
00520         ioBuf->Puts("");
00521         ioBuf->Puts("<Video id=\"" + String2Xml(mId) + "\">");
00522         ioBuf->Puts("<MediaLocator>");
00523         ioBuf->Puts("<MediaUri>" + String2Xml(mVideoFileName) + "</MediaUri>");
00524         ioBuf->Puts("</MediaLocator>");
00525         ExportMediaTime(ioBuf, 0, mNrFrames);
00526         if (mStartFrame.size() > 0)
00527         {
00528             if (HasAnnotations())
00529                 ioBuf->Puts("<TemporalDecomposition gap=\"true\" overlap=\"true\">");
00530             else
00531                 ioBuf->Puts("<TemporalDecomposition gap=\"false\" overlap=\"false\">");
00532             int nrFrames = mStartFrame.size();
00533             int* frames = new int[nrFrames];
00534             int* order = new int[nrFrames];
00535             for (int i=0 ; i<nrFrames ; i++)
00536             {
00537                 frames[i] = mStartFrame[i];
00538                 order[i] = i;
00539             }
00540             Util::QuickSortCo(frames, order, 0, nrFrames - 1);
00541             for (int i=0 ; i<mStartFrame.size() ; i++)
00542                 //ExportShot(ioBuf, i);
00543                 ExportShot(ioBuf, order[i]);
00544             delete frames;
00545             delete order;
00546             ioBuf->Puts("</TemporalDecomposition>");
00547         }
00548         ioBuf->Puts("</Video>");
00549         ioBuf->Puts("</MultimediaContent>");
00550         ioBuf->Puts("</Description>");
00551         ioBuf->Puts("</Mpeg7>");
00552     }
00553 
00554 private:
00555 
00556     /* file excerpt:
00557 
00558 <?xml version="1.0" encoding="UTF-8"?>
00559 <Mpeg7 xmlns="urn:mpeg:mpeg7:schema:2001" xmlns:mpeg7="urn:mpeg:mpeg7:schema:2001" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance">
00560 <Description xsi:type="ContentEntityType">
00561 <MultimediaContent xsi:type="VideoType">
00562 
00563 <!-- Shots with keyframes look like this: -->
00564 
00565 <Video id="TRECVID2006_1">
00566   <MediaLocator>
00567     <MediaUri>20051102_142800_LBC_NAHAR_ARB.mpg</MediaUri>
00568   </MediaLocator>
00569   <MediaTime>
00570     <MediaTimePoint>T00:00:00:0F30000</MediaTimePoint>
00571     <MediaDuration>PT01H02M19S28086N30000F</MediaDuration>
00572   </MediaTime>
00573   <TemporalDecomposition gap="false" overlap="false">
00574 
00575     <VideoSegment id="shot1_1">
00576       <MediaTime>
00577         <MediaTimePoint>T00:00:00:0F30000</MediaTimePoint>
00578         <MediaDuration>PT00H00M04S2122N30000F</MediaDuration>
00579       </MediaTime>
00580       <TemporalDecomposition>
00581         <VideoSegment id="shot1_1_RKF">
00582           <MediaTime>
00583             <MediaTimePoint>T00:00:02:60F30000</MediaTimePoint>
00584           </MediaTime>
00585         </VideoSegment>
00586       </TemporalDecomposition>
00587     </VideoSegment>
00588 
00589     <VideoSegment id="shot1_5">
00590       <MediaTime>
00591         <MediaTimePoint>T00:00:16:27507F30000</MediaTimePoint>
00592         <MediaDuration>PT00H00M02S19079N30000F</MediaDuration>
00593       </MediaTime>
00594       <TemporalDecomposition>
00595         <VideoSegment id="shot1_5_NRKF_1">
00596           <MediaTime>
00597             <MediaTimePoint>T00:00:17:17527F30000</MediaTimePoint>
00598           </MediaTime>
00599         </VideoSegment>
00600         <VideoSegment id="shot1_5_RKF">
00601           <MediaTime>
00602             <MediaTimePoint>T00:00:18:6546F30000</MediaTimePoint>
00603           </MediaTime>
00604         </VideoSegment>
00605         <VideoSegment id="shot1_5_NRKF_2">
00606           <MediaTime>
00607             <MediaTimePoint>T00:00:18:27567F30000</MediaTimePoint>
00608           </MediaTime>
00609         </VideoSegment>
00610       </TemporalDecomposition>
00611     </VideoSegment>
00612 
00613   </TemporalDecomposition>
00614 </Video>
00615 
00616 <!-- Annotations look like this: -->
00617 
00618 <Video id="20051102_142800_LBC_NAHAR_ARB.mpg">
00619   <MediaLocator>
00620     <MediaUri>20051102_142800_LBC_NAHAR_ARB.mpg</MediaUri>
00621   </MediaLocator>
00622   <MediaTime>
00623     <MediaTimePoint>T00:00:00:0F30000</MediaTimePoint>
00624     <MediaDuration>PT01H02M19S28086N30000F</MediaDuration>
00625   </MediaTime>
00626   <TemporalDecomposition gap="true" overlap="true">
00627 
00628     <VideoSegment id="shot1_1_RKF">
00629       <MediaTime>
00630         <MediaTimePoint>T00:00:00:0F30000</MediaTimePoint>
00631         <MediaDuration>PT00H00M00S1001N30000F</MediaDuration>
00632       </MediaTime>
00633       <TextAnnotation relevance="1" confidence="1">
00634         <KeywordAnnotation>
00635           <Keyword>Airplane</Keyword>
00636         </KeywordAnnotation>
00637       </TextAnnotation>
00638     </VideoSegment>
00639 
00640   </TemporalDecomposition>
00641 </Video>
00642 
00643 <!-- Note that the old version can also be read: -->
00644 
00645       <TextAnnotation relevance="1" confidence="1">
00646         <FreeTextAnnotation>desert</FreeTextAnnotation>
00647       </TextAnnotation>
00648 
00649 <!-- Features look like this: -->
00650 
00651 <Video id="20051102_142800_LBC_NAHAR_ARB.mpg">
00652   <MediaLocator>
00653     <MediaUri>20051102_142800_LBC_NAHAR_ARB.mpg</MediaUri>
00654   </MediaLocator>
00655   <MediaTime>
00656     <MediaTimePoint>T00:00:00:0F30000</MediaTimePoint>
00657     <MediaDuration>PT01H02M19S28086N30000F</MediaDuration>
00658   </MediaTime>
00659   <TemporalDecomposition gap="true" overlap="true">
00660 
00661     <VideoSegment id="shot1_1_RKF">
00662       <MediaTime>
00663         <MediaTimePoint>T00:00:00:0F30000</MediaTimePoint>
00664         <MediaDuration>PT00H00M00S1001N30000F</MediaDuration>
00665       </MediaTime>
00666       <DescriptorUnit xsi:type="DescriptorCollectionType">
00667         <Descriptor xsi:type="VissemVisualFeatures">
00668           <Coefficients>
00669             <!--numbers separated by spaces-->
00670           </Coefficients>
00671         </Descriptor>
00672       </DescriptorUnit>
00673     </VideoSegment>
00674 
00675   </TemporalDecomposition>
00676 </Video>
00677 
00678 <!-- common part for the end -->
00679 
00680 </MultimediaContent>
00681 </Description>
00682 </Mpeg7>
00683 
00684     */
00685 
00686     String
00687     String2(int nr)
00688     {
00689         return MakeString(nr, 2, '0');
00690     }
00691 
00692 /* Note on time to frame conversion:
00693 
00694 The key element in conversion is the mediaTimeUnit (which is often
00695 not present in the mpeg7 file itself but can be retrieved from the video).
00696 This class encountered the following units:
00697 
00698 PT1N25F       = PAL  = 25    fps
00699 PT1001N30000F = NTSC = 29.97 fps = 30000 / 1001
00700 
00701 */
00702 
00703     bool
00704     CheckKnownFraction()
00705     {
00706         if ((mFrameRateNum != 0) && (mFrameRateDen != 0))
00707         {
00708             mFractions = mFrameRateNum;
00709             return true;
00710         }
00711 
00712         if (mFractions == 25)
00713         {
00714             mFrameRateNum = 25;
00715             mFrameRateDen = 1;
00716             return true;
00717         }
00718         if (mFractions == 30000)
00719         {
00720             mFrameRateNum = 30000;
00721             mFrameRateDen = 1001;
00722             return true;
00723         }
00724 
00725         ILOG_ERROR("Unknown fraction " << mFractions << " in " <<
00726                    mVideoFileName);
00727         mFrameRateNum = mFractions;
00728         mFrameRateDen = 1;
00729         return false;
00730     }
00731 
00732     long
00733     Frame2Fraction(long frame)
00734     {
00735         CheckKnownFraction();
00736         return frame * mFrameRateDen;
00737     }
00738 
00739     long
00740     Fraction2Frame(long fractionCount)
00741     {
00742         CheckKnownFraction();
00743         return fractionCount / mFrameRateDen;
00744     }
00745  
00746     String
00747     Frame2TimePoint(int frame)
00748     {
00749         // produce something like T00:28:21:5985F30000
00750         long totalFractions = Frame2Fraction(frame);
00751         long nrFractions = totalFractions % mFractions;
00752         long totalSeconds = (totalFractions - nrFractions) / mFractions;
00753         long hours = totalSeconds / 3600;
00754         totalSeconds %= 3600;
00755         long minutes = totalSeconds / 60;
00756         long seconds = totalSeconds % 60;
00757         return "T" + String2(hours) + ":" + String2(minutes) + ":" +
00758             String2(seconds) + ":" + MakeString(nrFractions) + "F" +
00759             String2(mFractions);
00760     }
00761 
00762     String
00763     Frame2Duration(int frame)
00764     {
00765         // produce something like PT00H00M04S2122N30000F
00766         long totalFractions = Frame2Fraction(frame);
00767         long nrFractions = totalFractions % mFractions;
00768         long totalSeconds = (totalFractions - nrFractions) / mFractions;
00769         long hours = totalSeconds / 3600;
00770         totalSeconds %= 3600;
00771         long minutes = totalSeconds / 60;
00772         long seconds = totalSeconds % 60;
00773         return "PT" + String2(hours) + "H" + String2(minutes) + "M" +
00774             String2(seconds) + "S" + String2(nrFractions) + "N" +
00775             String2(mFractions) + "F";
00776     }
00777 
00778     void
00779     ExportShot(Util::IOBuffer* ioBuf, int idx)
00780     {
00781         ioBuf->Puts("<VideoSegment id=\"" + mShotName[idx] + "\">");
00782         ExportMediaTime(ioBuf, mStartFrame[idx],
00783                         mEndFrame[idx] - mStartFrame[idx] + 1);
00784         if (HasAnnotations())
00785         {
00786             String r = "relevance=\"" + MakeString(mRelevance[idx]) + "\"";
00787             String c = "confidence=\"" + MakeString(mConfidence[idx]) + "\"";
00788             ioBuf->Puts("<TextAnnotation " + r + " " + c + ">");
00789             ioBuf->Puts("<KeywordAnnotation>");
00790             ioBuf->Puts("<Keyword>" + mAnnotation[idx] + "</Keyword>");
00791             ioBuf->Puts("</KeywordAnnotation>");
00792             ioBuf->Puts("</TextAnnotation>"); 
00793            
00794         }
00795         if (mOriginalHadKeyframes)
00796         {
00797             ioBuf->Puts("<TemporalDecomposition>");
00798             std::vector<String> names = mKeyframeName[idx];
00799             std::vector<int> frames = mKeyframeFrame[idx];
00800             for (int i=0 ; i<names.size() ; i++)
00801             {
00802                 ioBuf->Puts("<VideoSegment id=\"" + names[i] + "\">");
00803                 ExportMediaTime(ioBuf, frames[i], -1);
00804                 ioBuf->Puts("</VideoSegment>");
00805             }
00806             ioBuf->Puts("</TemporalDecomposition>");
00807         }
00808         if (HasFeatures())
00809         {
00810             ioBuf->Puts("<DescriptorUnit xsi:type=\"DescriptorCollectionType\">");
00811             ioBuf->Puts("<Descriptor xsi:type=\"" + mFeatureName[idx] + "\">");
00812             ioBuf->Puts("<Coefficients>");
00813             std::vector<double> v = mFeatureValues[idx];
00814             int nr = 0;
00815             String s;
00816             for (int i=0 ; i<v.size() ; i++)
00817             {
00818                 s += MakeString(v[i]) + " ";
00819                 if (++nr == 10)
00820                 {
00821                     ioBuf->Puts(s);
00822                     s = "";
00823                     nr = 0;
00824                 }
00825             }
00826             if (nr != 0)
00827                 ioBuf->Puts(s);
00828             ioBuf->Puts("</Coefficients>");
00829             ioBuf->Puts("</Descriptor>");
00830             ioBuf->Puts("</DescriptorUnit>");
00831             
00832         }
00833         ioBuf->Puts("</VideoSegment>");
00834     }
00835 
00836     void
00837     ExportMediaTime(Util::IOBuffer* ioBuf, int point, int duration)
00838     {
00839         ioBuf->Puts("<MediaTime>");
00840         ioBuf->Puts("<MediaTimePoint>" + Frame2TimePoint(point) +
00841                     "</MediaTimePoint>");
00842         if (duration != -1)
00843             ioBuf->Puts("<MediaDuration>" + Frame2Duration(duration) +
00844                         "</MediaDuration>");
00845         ioBuf->Puts("</MediaTime>");
00846     }
00847 
00848     // ioBuf will be delete as soon as possible by ReadData
00849     void
00850     ReadData(String fileName, Util::IOBuffer* ioBuf)
00851     {
00852         Persistency::XmlFileReader reader;
00853         DOMDocument* doc = reader.Read(fileName, ioBuf);
00854         delete ioBuf;
00855         DOMNode* mpeg7 = GetChildNode(doc, "Mpeg7", true);
00856         DOMNode* desc = GetChildNode(mpeg7, "Description", true);
00857         DOMNode* mmContent = GetChildNode(desc, "MultimediaContent", true);
00858         DOMNode* video = GetChildNode(mmContent, "Video", true);
00859         mId = Xml2String(GetAttributeValue(video, "id"));
00860         ILOG_DEBUG("video id = " << mId);
00861         DOMNode* mediaLoc = GetChildNode(video, "MediaLocator", true);
00862         DOMNode* mediaUri = GetChildNode(mediaLoc, "MediaUri", true);
00863         mVideoFileName = Xml2String(GetElementValue(mediaUri));
00864         ILOG_DEBUG("filename = " << mVideoFileName);
00865         DOMNode* mediaTime = GetChildNode(video, "MediaTime", true);
00866         mFractions = GetFractions(mediaTime);
00867         ILOG_DEBUG("fractions = " << mFractions);
00868         CheckKnownFraction();
00869         mNrFrames = GetMediaDuration(mediaTime);
00870         ILOG_DEBUG("nrFrames = " << mNrFrames);
00871         DOMNode* decomp = GetChildNode(video, "TemporalDecomposition", true);
00872         std::vector<DOMNode*> segments = GetChildNodes(decomp, "VideoSegment");
00873         ILOG_DEBUG("nr VideoSegments = " << segments.size());
00874         for (int i=0 ; i<segments.size() ; i++)
00875             AddShot(segments[i]);
00876     }
00877 
00878     void
00879     AddShot(DOMNode* segment)
00880     {
00881         String id = GetAttributeValue(segment, "id");
00882         ILOG_DEBUG("shot id = " << id);
00883         mShotName.push_back(id);
00884         DOMNode* textAnno = GetChildNode(segment, "TextAnnotation", false);
00885         if (textAnno)
00886         {
00887             String rel = GetAttributeValue(textAnno, "relevance");
00888             mRelevance.push_back(atof(rel));
00889             String conf = GetAttributeValue(textAnno, "confidence");
00890             mConfidence.push_back(atof(conf));
00891             String anno;
00892             DOMNode* freeText = GetChildNode(textAnno, "FreeTextAnnotation",
00893                                              false);
00894             if (freeText)
00895             {
00896                 anno = GetElementValue(freeText);
00897             }
00898             else
00899             {
00900                 DOMNode* keyAnno = GetChildNode(textAnno, "KeywordAnnotation",
00901                                                 true);
00902                 DOMNode* keyWord = GetChildNode(keyAnno, "Keyword", true);
00903                 anno = GetElementValue(keyWord);
00904             }
00905             mAnnotation.push_back(anno);
00906             mAnnoFileName = anno;
00907         }
00908         else
00909         {
00910             mAnnoFileName = "";
00911         }
00912         DOMNode* mediaTime = GetChildNode(segment, "MediaTime", true);
00913         long frame = GetMediaTimePoint(mediaTime);
00914         long duration = GetMediaDuration(mediaTime);
00915         mStartFrame.push_back(frame);
00916         mEndFrame.push_back(frame + duration - 1);
00917 
00918         std::vector<String> keyframeNames;
00919         std::vector<int> keyframeFrames;
00920         DOMNode* td = GetChildNode(segment, "TemporalDecomposition", false);
00921         if (td)
00922         {
00923             mOriginalHadKeyframes = true;
00924             GetTemporalDecomposition(td, keyframeNames, keyframeFrames);
00925         }
00926         else
00927         {
00928             mOriginalHadKeyframes = false;
00929             keyframeNames.push_back(id + "_RKF");
00930             keyframeFrames.push_back(frame + duration/2);
00931         }
00932         mKeyframeName.push_back(keyframeNames);
00933         mKeyframeFrame.push_back(keyframeFrames);
00934     }
00935 
00936     long
00937     GetMediaTimePoint(DOMNode* mediaTime)
00938     {
00939         DOMNode* timePoint = GetChildNode(mediaTime, "MediaTimePoint", true);
00940         String s = GetElementValue(timePoint);
00941         // e.g. s = T00:28:21:5985F30000
00942         Util::StringParser p(s);
00943         p.Eat('T'); // T is begin of time point
00944         long hours = p.GetInt(':');
00945         long minutes = p.GetInt(':');
00946         long seconds = p.GetInt(':');
00947         p.AdvanceP(1); // skip ':', since it is not whitespace for next
00948         long fractions = p.GetInt('F');
00949         p.AdvanceP(1);
00950         long nrFractions = p.GetInt();
00951         long frame;
00952         frame = (hours*3600 + minutes*60 + seconds)*nrFractions + fractions;
00953         return Fraction2Frame(frame);
00954     }
00955 
00956     long
00957     GetMediaDuration(DOMNode* mediaTime)
00958     {
00959         DOMNode* duration = GetChildNode(mediaTime, "MediaDuration", true);
00960         String s = GetElementValue(duration);
00961         // e.g. s = PT00H00M04S2122N30000F
00962         Util::StringParser p(s);
00963         p.Eat('T'); // T is begin of duration
00964         long total = 0;
00965         if (p.Contains("H"))
00966         {
00967             total += p.GetInt('H') * 3600;
00968             p.AdvanceP(1);
00969         }
00970         if (p.Contains("M"))
00971         {
00972             total += p.GetInt('M') * 60;
00973             p.AdvanceP(1);
00974         }
00975         if (p.Contains("S"))
00976         {
00977             total += p.GetInt('S');
00978             p.AdvanceP(1);
00979         }
00980         long fractions = p.GetInt('N');
00981         p.AdvanceP(1);
00982         long nrFractions = p.GetInt('F');
00983         long dur = total*nrFractions + fractions;
00984         return Fraction2Frame(dur);
00985     }
00986 
00987     int
00988     GetFractions(DOMNode* mediaTime)
00989     {
00990         DOMNode* timePoint = GetChildNode(mediaTime, "MediaTimePoint", true);
00991         String s = GetElementValue(timePoint);
00992         // e.g. s = T00:28:21:5985F30000
00993         Util::StringParser p(s);
00994         p.Eat('F');
00995         return p.GetInt();
00996     }
00997 
00998     void
00999     GetTemporalDecomposition(DOMNode* td, std::vector<String>& names,
01000                              std::vector<int>& frames)
01001     {
01002         std::vector<DOMNode*> segments = GetChildNodes(td, "VideoSegment");
01003         for (int i=0 ; i<segments.size() ; i++)
01004         {
01005             DOMNode* segment = segments[i];
01006             String id = GetAttributeValue(segment, "id");
01007             names.push_back(id);
01008             DOMNode* mediaTime = GetChildNode(segment, "MediaTime", true);
01009             long frame = GetMediaTimePoint(mediaTime);
01010             frames.push_back(frame);
01011         }
01012     }
01013 
01014     String
01015     String2Xml(CString src)
01016     {
01017         return StringReplaceAll(src, "&", "&amp;", false);
01018     }
01019 
01020     String
01021     Xml2String(CString src)
01022     {
01023         return StringReplaceAll(src, "&amp;", "&", false);
01024     }
01025  
01026     // data
01027 
01028     String mId; // e.g. TRECVID2006_1
01029     String mVideoFileName;
01030     int    mNrFrames;
01031     int    mFractions;
01032     long   mFrameRateNum;
01033     long   mFrameRateDen;
01034     bool   mOriginalHadKeyframes;
01035     String mConceptSet;
01036     String mAnnoFileName;
01037     bool   mAnnoIsConcept;
01038 
01039     std::vector<String>                mShotName;
01040     std::vector<int>                   mStartFrame;
01041     std::vector<int>                   mEndFrame;
01042     std::vector< std::vector<String> > mKeyframeName;
01043     std::vector< std::vector<int> >    mKeyframeFrame;
01044     std::vector<String>                mAnnotation;
01045     std::vector<double>                mRelevance;
01046     std::vector<double>                mConfidence;
01047     std::vector<String>                mFeatureName;
01048     std::vector< std::vector<double> > mFeatureValues;
01049 
01050     ILOG_VAR_DEC;
01051 
01052 };
01053 
01054 ILOG_VAR_INIT(Mpeg7Doc, Impala.Core.VideoSet);
01055 
01056 } // namespace VideoSet
01057 } // namespace Core
01058 } // namespace Impala
01059 
01060 #endif

Generated on Thu Jan 13 09:04:47 2011 for ImpalaSrc by  doxygen 1.5.1