Impala Documentation

00001 #ifndef Impala_Core_VideoSet_Mpeg7Doc_h
00002 #define Impala_Core_VideoSet_Mpeg7Doc_h
00003 
00004 #include "Util/Database.h"
00005 #include "Util/XmlDoc.h"
00006 #include "Util/QuickSort.h"
00007 #include "Util/StringParser.h"
00008 #include "Persistency/XmlFileReader.h"
00009 #include "Core/Geometry/OverlapsAnywhere1d.h"
00010 
00011 namespace Impala
00012 {
00013 namespace Core
00014 {
00015 namespace VideoSet
00016 {
00017 
00018 
00019 class Mpeg7Doc : public Util::XmlDoc
00020 {
00021 
00022 public:
00023 
00024 #ifndef REPOSITORY_USED // Here comes the deprecated stuff
00025     Mpeg7Doc(String fileName, Util::Database* db)
00026     {
00027         mAnnoIsConcept = false;
00028         Util::IOBuffer* ioBuf = db->GetIOBuffer(fileName, true, true, "");
00029         ReadData(fileName, ioBuf); // ReadData deletes ioBuf
00030     }
00031 #endif // REPOSITORY_USED
00032 
00033     // ioBuf will be deleted by this constructor
00034     Mpeg7Doc(String fileName, Util::IOBuffer* ioBuf)
00035     {
00036         mAnnoIsConcept = false;
00037         ReadData(fileName, ioBuf); // ReadData deletes ioBuf
00038     }
00039 
00040     Mpeg7Doc(String id, String fileName, int nrFrames, int fractions,
00041              bool withKeyframes, String conceptSet, String annoFileName)
00042     {
00043         mId = id;
00044         mVideoFileName = fileName;
00045         mNrFrames = nrFrames;
00046         mFractions = fractions;
00047         mOriginalHadKeyframes = withKeyframes;
00048         mConceptSet = conceptSet;
00049         mAnnoFileName = annoFileName;
00050         mAnnoIsConcept = false;
00051     }
00052 
00053     virtual
00054     ~Mpeg7Doc()
00055     {
00056     }
00057 
00058     void
00059     AddShot(String name, int startFrame, int endFrame)
00060     {
00061         mShotName.push_back(name);
00062         mStartFrame.push_back(startFrame);
00063         mEndFrame.push_back(endFrame);
00064     }
00065 
00066     void
00067     AddShot(String name, int startFrame, int endFrame,
00068             std::vector<String> keyframeNames, std::vector<int> keyframeFrames)
00069     {
00070         mShotName.push_back(name);
00071         mStartFrame.push_back(startFrame);
00072         mEndFrame.push_back(endFrame);
00073         mKeyframeName.push_back(keyframeNames);
00074         mKeyframeFrame.push_back(keyframeFrames);
00075     }
00076 
00077     void
00078     AddAnnotation(String annotation, String shotName, int startFrame,
00079                   int endFrame, double relevance, double confidence)
00080     {
00081         mAnnotation.push_back(annotation);
00082         mShotName.push_back(shotName);
00083         mStartFrame.push_back(startFrame);
00084         mEndFrame.push_back(endFrame);
00085         mRelevance.push_back(relevance);
00086         mConfidence.push_back(confidence);
00087     }
00088 
00089     // To allow identification of segment to be replaced, by any frame
00090     // within it, rather than identification by startFrame only
00091     int
00092     ReplaceAnnotationEx(String annotation, String shotName,
00093                         int aFrame, int startFrame, int endFrame,
00094                         double relevance, double confidence)
00095     {
00096         int idx = FindShot(aFrame);
00097         if (idx == -1)
00098         {
00099             mAnnotation.push_back(annotation);
00100             mShotName.push_back(shotName);
00101             mStartFrame.push_back(startFrame);
00102             mEndFrame.push_back(endFrame);
00103             mRelevance.push_back(relevance);
00104             mConfidence.push_back(confidence);
00105         }
00106         else
00107         {   // assume only one kind of annotation
00108             mShotName[idx] = shotName;
00109             mStartFrame[idx] = startFrame;
00110             mEndFrame[idx] = endFrame;
00111             mRelevance[idx] = relevance;
00112             mConfidence[idx] = confidence;
00113         }
00114         return idx;
00115     }
00116 
00117     int
00118     ReplaceAnnotation(String annotation, String shotName,
00119                       int startFrame, int endFrame, double relevance,
00120                       double confidence)
00121     {
00122         return ReplaceAnnotationEx(annotation, shotName, startFrame,
00123                                    startFrame, endFrame,relevance, confidence);
00124     }
00125 
00126     void
00127     SetAnnoIsConcept()
00128     {
00129         mAnnoIsConcept = true;
00130     }
00131 
00132     bool
00133     GetAnnoIsConcept()
00134     {
00135         return mAnnoIsConcept;
00136     }
00137 
00138     // inquiry
00139 
00140     String
00141     GetId()
00142     {
00143         return mId;
00144     }
00145 
00146     String
00147     GetVideoFileName()
00148     {
00149         return mVideoFileName;
00150     }
00151 
00152     int
00153     GetNrFrames()
00154     {
00155         return mNrFrames;
00156     }
00157 
00158     int
00159     GetFractions()
00160     {
00161         return mFractions;
00162     }
00163 
00164     int
00165     NrShots()
00166     {
00167         return mShotName.size();
00168     }
00169 
00170     String
00171     ShotName(int shot)
00172     {
00173         return mShotName[shot];
00174     }
00175 
00176     int
00177     StartFrame(int shot)
00178     {
00179         return mStartFrame[shot];
00180     }
00181 
00182     int
00183     EndFrame(int shot)
00184     {
00185         return mEndFrame[shot];
00186     }
00187 
00188     int
00189     NrKeyframes(int shot)
00190     {
00191         return mKeyframeName[shot].size();
00192     }
00193 
00194     String
00195     KeyframeName(int shot, int keyframe)
00196     {
00197         return mKeyframeName[shot][keyframe];
00198     }
00199 
00200     int
00201     KeyframeFrame(int shot, int keyframe)
00202     {
00203         return mKeyframeFrame[shot][keyframe];
00204     }
00205 
00206     double
00207     Relevance(int shot)
00208     {
00209         return mRelevance[shot];
00210     }
00211 
00212     double
00213     FrameRelevance(int frameNr)
00214     {
00215         int shotIdx = FindShot(frameNr);
00216         return shotIdx != -1 ? Relevance(shotIdx) : 0.0;
00217     }
00218 
00219     double
00220     Confidence(int shot)
00221     {
00222         return mConfidence[shot];
00223     }
00224 
00225     int
00226     FindShot(int frameNr)
00227     {
00228         for (int i=0 ; i<mStartFrame.size() ; i++)
00229         {
00230             int sF = mStartFrame[i];
00231             int eF = mEndFrame[i];
00232             if (eF > 13000)
00233                 int g = 10;
00234             if ((mStartFrame[i] <= frameNr) && (frameNr <= mEndFrame[i]))
00235                 return i;
00236         }
00237         return -1;
00238     }
00239 
00240     std::vector<int>
00241     FindShots(int startFrame, int endFrame)
00242     {
00243         std::vector<int> res;
00244         for (int i=0 ; i<mStartFrame.size() ; i++)
00245         {
00246             if (Geometry::OverlapsAnywhere1d(startFrame, endFrame,
00247                                              mStartFrame[i], mEndFrame[i]))
00248             {
00249                 res.push_back(i);
00250             }
00251         }
00252         return res;
00253     }
00254 
00255     // Annotation / Concept part
00256 
00257     bool
00258     HasAnnotations()
00259     {
00260         return !mAnnotation.empty();
00261     }
00262 
00263     void
00264     SetConceptSet(String conceptSet)
00265     {
00266         mConceptSet = conceptSet;
00267     }
00268 
00269     String
00270     GetConceptSet()
00271     {
00272         return mConceptSet;
00273     }
00274 
00275     String
00276     GetAnnoFileName()
00277     {
00278         return mAnnoFileName;
00279     }
00280 
00281     String
00282     Annotation(int shot)
00283     {
00284         return mAnnotation[shot];
00285     }
00286 
00287     /*
00288 
00289 Impala annotatietool uses the following conventions:
00290 
00291 positive: relevance="1" confidence="1"
00292 negative: relevance="0" confidence="1"
00293 skip: relevance="0.5" confidence="1"
00294 
00295 Trec2007 annotations:
00296 
00297 positive: relevance="1" confidence="1"
00298 positive skip: relevance="1" confidence="0.5"
00299 negative: relevance="0" confidence="1"
00300 negative skip: relevance="0" confidence="0.5"
00301 
00302     */
00303 
00304     bool
00305     IsPositiveAnno(int shot)
00306     {
00307         return (Relevance(shot) == 1.0) && (Confidence(shot) > 0.5);
00308     }
00309 
00310     bool
00311     IsNegativeAnno(int shot)
00312     {
00313         return (Relevance(shot) == 0.0) && (Confidence(shot) > 0.5);
00314     }
00315 
00316     // Feature part
00317 
00318     bool
00319     HasFeatures()
00320     {
00321         return mFeatureName.size() != 0;
00322     }
00323 
00324     String
00325     GetFeature()
00326     {
00327         return mFeatureName[0];
00328     }
00329 
00330     void
00331     AddFeature(String featureName, String shotName, int startFrame,
00332                int endFrame, std::vector<double> featureValues)
00333     {
00334         mFeatureName.push_back(featureName);
00335         mShotName.push_back(shotName);
00336         mStartFrame.push_back(startFrame);
00337         mEndFrame.push_back(endFrame);
00338         mFeatureValues.push_back(featureValues);
00339     }
00340 
00341     // I/O
00342 
00343     /*
00344     void
00345     WriteVxs(String fileName)
00346     {
00347         File f(fileName, "w");
00348         if (! f.Valid())
00349             return;
00350         fprintf(f.Fp(), "#@ \"start\" \"end\" \"id\" \"keystr\"\n");
00351         fprintf(f.Fp(), "#= \"int\" \"int\" \"string\" \"string\"\n");
00352         for (int i=0 ; i<NrShots() ; i++)
00353         {
00354             int dummy = 0;
00355             fprintf(f.Fp(), " %d %d \"%s\" \"", mStartFrame[i], mEndFrame[i],
00356                     mShotName[i].c_str());
00357             for (int k=0 ; k<mKeyframeName[i].size() ; k++)
00358                 fprintf(f.Fp(), " %s %d", mKeyframeName[i][k].c_str(), 
00359                         mKeyframeFrame[i][k]);
00360             fprintf(f.Fp(), "\"\n");
00361         }
00362     }
00363     */
00364 
00365     void
00366     WriteTo(std::ostream& os)
00367     {
00368         for (int i=0 ; i<NrShots() ; i++)
00369         {
00370             os << mShotName[i] << " " << mStartFrame[i] << " " << mEndFrame[i];
00371             if (mOriginalHadKeyframes)
00372                 for (int k=0 ; k<mKeyframeName[i].size() ; k++)
00373                     os << " " << mKeyframeName[i][k] << " "
00374                        << mKeyframeFrame[i][k];
00375             if (HasAnnotations())
00376                 os << " " << mAnnotation[i];
00377             os << std::endl;
00378         }
00379     }
00380 
00381     void
00382     ExportXml(Util::IOBuffer* ioBuf)
00383     {
00384         ioBuf->Puts("<?xml version=\"1.0\" encoding=\"UTF-8\"?>");
00385         ioBuf->Puts("<Mpeg7 xmlns=\"urn:mpeg:mpeg7:schema:2001\" xmlns:mpeg7=\"urn:mpeg:mpeg7:schema:2001\" xmlns:xsi=\"http://www.w3.org/2001/XMLSchema-instance\">");
00386         ioBuf->Puts("<Description xsi:type=\"ContentEntityType\">");
00387         ioBuf->Puts("<MultimediaContent xsi:type=\"VideoType\">");
00388         ioBuf->Puts("");
00389         ioBuf->Puts("<Video id=\"" + String2Xml(mId) + "\">");
00390         ioBuf->Puts("<MediaLocator>");
00391         ioBuf->Puts("<MediaUri>" + String2Xml(mVideoFileName) + "</MediaUri>");
00392         ioBuf->Puts("</MediaLocator>");
00393         ExportMediaTime(ioBuf, 0, mNrFrames);
00394         if (mStartFrame.size() > 0)
00395         {
00396             if (HasAnnotations())
00397                 ioBuf->Puts("<TemporalDecomposition gap=\"true\" overlap=\"true\">");
00398             else
00399                 ioBuf->Puts("<TemporalDecomposition gap=\"false\" overlap=\"false\">");
00400             int nrFrames = mStartFrame.size();
00401             int* frames = new int[nrFrames];
00402             int* order = new int[nrFrames];
00403             for (int i=0 ; i<nrFrames ; i++)
00404             {
00405                 frames[i] = mStartFrame[i];
00406                 order[i] = i;
00407             }
00408             Util::QuickSortCo(frames, order, 0, nrFrames - 1);
00409             for (int i=0 ; i<mStartFrame.size() ; i++)
00410                 //ExportShot(ioBuf, i);
00411                 ExportShot(ioBuf, order[i]);
00412             delete frames;
00413             delete order;
00414             ioBuf->Puts("</TemporalDecomposition>");
00415         }
00416         ioBuf->Puts("</Video>");
00417         ioBuf->Puts("</MultimediaContent>");
00418         ioBuf->Puts("</Description>");
00419         ioBuf->Puts("</Mpeg7>");
00420     }
00421 
00422 private:
00423 
00424     /* file excerpt:
00425 
00426 <?xml version="1.0" encoding="UTF-8"?>
00427 <Mpeg7 xmlns="urn:mpeg:mpeg7:schema:2001" xmlns:mpeg7="urn:mpeg:mpeg7:schema:2001" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance">
00428 <Description xsi:type="ContentEntityType">
00429 <MultimediaContent xsi:type="VideoType">
00430 
00431 <!-- Shots with keyframes look like this: -->
00432 
00433 <Video id="TRECVID2006_1">
00434   <MediaLocator>
00435     <MediaUri>20051102_142800_LBC_NAHAR_ARB.mpg</MediaUri>
00436   </MediaLocator>
00437   <MediaTime>
00438     <MediaTimePoint>T00:00:00:0F30000</MediaTimePoint>
00439     <MediaDuration>PT01H02M19S28086N30000F</MediaDuration>
00440   </MediaTime>
00441   <TemporalDecomposition gap="false" overlap="false">
00442 
00443     <VideoSegment id="shot1_1">
00444       <MediaTime>
00445         <MediaTimePoint>T00:00:00:0F30000</MediaTimePoint>
00446         <MediaDuration>PT00H00M04S2122N30000F</MediaDuration>
00447       </MediaTime>
00448       <TemporalDecomposition>
00449         <VideoSegment id="shot1_1_RKF">
00450           <MediaTime>
00451             <MediaTimePoint>T00:00:02:60F30000</MediaTimePoint>
00452           </MediaTime>
00453         </VideoSegment>
00454       </TemporalDecomposition>
00455     </VideoSegment>
00456 
00457     <VideoSegment id="shot1_5">
00458       <MediaTime>
00459         <MediaTimePoint>T00:00:16:27507F30000</MediaTimePoint>
00460         <MediaDuration>PT00H00M02S19079N30000F</MediaDuration>
00461       </MediaTime>
00462       <TemporalDecomposition>
00463         <VideoSegment id="shot1_5_NRKF_1">
00464           <MediaTime>
00465             <MediaTimePoint>T00:00:17:17527F30000</MediaTimePoint>
00466           </MediaTime>
00467         </VideoSegment>
00468         <VideoSegment id="shot1_5_RKF">
00469           <MediaTime>
00470             <MediaTimePoint>T00:00:18:6546F30000</MediaTimePoint>
00471           </MediaTime>
00472         </VideoSegment>
00473         <VideoSegment id="shot1_5_NRKF_2">
00474           <MediaTime>
00475             <MediaTimePoint>T00:00:18:27567F30000</MediaTimePoint>
00476           </MediaTime>
00477         </VideoSegment>
00478       </TemporalDecomposition>
00479     </VideoSegment>
00480 
00481   </TemporalDecomposition>
00482 </Video>
00483 
00484 <!-- Annotations look like this: -->
00485 
00486 <Video id="20051102_142800_LBC_NAHAR_ARB.mpg">
00487   <MediaLocator>
00488     <MediaUri>20051102_142800_LBC_NAHAR_ARB.mpg</MediaUri>
00489   </MediaLocator>
00490   <MediaTime>
00491     <MediaTimePoint>T00:00:00:0F30000</MediaTimePoint>
00492     <MediaDuration>PT01H02M19S28086N30000F</MediaDuration>
00493   </MediaTime>
00494   <TemporalDecomposition gap="true" overlap="true">
00495 
00496     <VideoSegment id="shot1_1_RKF">
00497       <MediaTime>
00498         <MediaTimePoint>T00:00:00:0F30000</MediaTimePoint>
00499         <MediaDuration>PT00H00M00S1001N30000F</MediaDuration>
00500       </MediaTime>
00501       <TextAnnotation relevance="1" confidence="1">
00502         <KeywordAnnotation>
00503           <Keyword>Airplane</Keyword>
00504         </KeywordAnnotation>
00505       </TextAnnotation>
00506     </VideoSegment>
00507 
00508   </TemporalDecomposition>
00509 </Video>
00510 
00511 <!-- Note that the old version can also be read: -->
00512 
00513       <TextAnnotation relevance="1" confidence="1">
00514         <FreeTextAnnotation>desert</FreeTextAnnotation>
00515       </TextAnnotation>
00516 
00517 <!-- Features look like this: -->
00518 
00519 <Video id="20051102_142800_LBC_NAHAR_ARB.mpg">
00520   <MediaLocator>
00521     <MediaUri>20051102_142800_LBC_NAHAR_ARB.mpg</MediaUri>
00522   </MediaLocator>
00523   <MediaTime>
00524     <MediaTimePoint>T00:00:00:0F30000</MediaTimePoint>
00525     <MediaDuration>PT01H02M19S28086N30000F</MediaDuration>
00526   </MediaTime>
00527   <TemporalDecomposition gap="true" overlap="true">
00528 
00529     <VideoSegment id="shot1_1_RKF">
00530       <MediaTime>
00531         <MediaTimePoint>T00:00:00:0F30000</MediaTimePoint>
00532         <MediaDuration>PT00H00M00S1001N30000F</MediaDuration>
00533       </MediaTime>
00534       <DescriptorUnit xsi:type="DescriptorCollectionType">
00535         <Descriptor xsi:type="VissemVisualFeatures">
00536           <Coefficients>
00537             <!--numbers separated by spaces-->
00538           </Coefficients>
00539         </Descriptor>
00540       </DescriptorUnit>
00541     </VideoSegment>
00542 
00543   </TemporalDecomposition>
00544 </Video>
00545 
00546 <!-- common part for the end -->
00547 
00548 </MultimediaContent>
00549 </Description>
00550 </Mpeg7>
00551 
00552     */
00553 
00554     String
00555     String2(int nr)
00556     {
00557         return MakeString(nr, 2, '0');
00558     }
00559 
00560     String
00561     Frame2TimePoint(int frame)
00562     {
00563         // produce something like T00:28:21:5985F30000
00564         long totalFractions = frame;
00565         if (mFractions == 30000) // probably NTSC with time unit PT1001N30000F
00566             totalFractions *= 1001;
00567         long nrFractions = totalFractions % mFractions;
00568         long totalSeconds = (totalFractions - nrFractions) / mFractions;
00569         long hours = totalSeconds / 3600;
00570         totalSeconds %= 3600;
00571         long minutes = totalSeconds / 60;
00572         long seconds = totalSeconds % 60;
00573         return "T" + String2(hours) + ":" + String2(minutes) + ":" +
00574             String2(seconds) + ":" + MakeString(nrFractions) + "F" +
00575             String2(mFractions);
00576     }
00577 
00578     String
00579     Frame2Duration(int frame)
00580     {
00581         // produce something like PT00H00M04S2122N30000F
00582         long totalFractions = frame;
00583         if (mFractions == 30000) // probably NTSC with time unit PT1001N30000F
00584             totalFractions *= 1001;
00585         long nrFractions = totalFractions % mFractions;
00586         long totalSeconds = (totalFractions - nrFractions) / mFractions;
00587         long hours = totalSeconds / 3600;
00588         totalSeconds %= 3600;
00589         long minutes = totalSeconds / 60;
00590         long seconds = totalSeconds % 60;
00591         return "PT" + String2(hours) + "H" + String2(minutes) + "M" +
00592             String2(seconds) + "S" + String2(nrFractions) + "N" +
00593             String2(mFractions) + "F";
00594     }
00595 
00596     void
00597     ExportShot(Util::IOBuffer* ioBuf, int idx)
00598     {
00599         ioBuf->Puts("<VideoSegment id=\"" + mShotName[idx] + "\">");
00600         ExportMediaTime(ioBuf, mStartFrame[idx],
00601                         mEndFrame[idx] - mStartFrame[idx] + 1);
00602         if (HasAnnotations())
00603         {
00604             String r = "relevance=\"" + MakeString(mRelevance[idx]) + "\"";
00605             String c = "confidence=\"" + MakeString(mConfidence[idx]) + "\"";
00606             ioBuf->Puts("<TextAnnotation " + r + " " + c + ">");
00607             ioBuf->Puts("<KeywordAnnotation>");
00608             ioBuf->Puts("<Keyword>" + mAnnotation[idx] + "</Keyword>");
00609             ioBuf->Puts("</KeywordAnnotation>");
00610             ioBuf->Puts("</TextAnnotation>"); 
00611            
00612         }
00613         if (mOriginalHadKeyframes)
00614         {
00615             ioBuf->Puts("<TemporalDecomposition>");
00616             std::vector<String> names = mKeyframeName[idx];
00617             std::vector<int> frames = mKeyframeFrame[idx];
00618             for (int i=0 ; i<names.size() ; i++)
00619             {
00620                 ioBuf->Puts("<VideoSegment id=\"" + names[i] + "\">");
00621                 ExportMediaTime(ioBuf, frames[i], -1);
00622                 ioBuf->Puts("</VideoSegment>");
00623             }
00624             ioBuf->Puts("</TemporalDecomposition>");
00625         }
00626         if (HasFeatures())
00627         {
00628             ioBuf->Puts("<DescriptorUnit xsi:type=\"DescriptorCollectionType\">");
00629             ioBuf->Puts("<Descriptor xsi:type=\"" + mFeatureName[idx] + "\">");
00630             ioBuf->Puts("<Coefficients>");
00631             std::vector<double> v = mFeatureValues[idx];
00632             int nr = 0;
00633             String s;
00634             for (int i=0 ; i<v.size() ; i++)
00635             {
00636                 s += MakeString(v[i]) + " ";
00637                 if (++nr == 10)
00638                 {
00639                     ioBuf->Puts(s);
00640                     s = "";
00641                     nr = 0;
00642                 }
00643             }
00644             if (nr != 0)
00645                 ioBuf->Puts(s);
00646             ioBuf->Puts("</Coefficients>");
00647             ioBuf->Puts("</Descriptor>");
00648             ioBuf->Puts("</DescriptorUnit>");
00649             
00650         }
00651         ioBuf->Puts("</VideoSegment>");
00652     }
00653 
00654     void
00655     ExportMediaTime(Util::IOBuffer* ioBuf, int point, int duration)
00656     {
00657         ioBuf->Puts("<MediaTime>");
00658         ioBuf->Puts("<MediaTimePoint>" + Frame2TimePoint(point) +
00659                     "</MediaTimePoint>");
00660         if (duration != -1)
00661             ioBuf->Puts("<MediaDuration>" + Frame2Duration(duration) +
00662                         "</MediaDuration>");
00663         ioBuf->Puts("</MediaTime>");
00664     }
00665 
00666     // ioBuf will be delete as soon as possible by ReadData
00667     void
00668     ReadData(String fileName, Util::IOBuffer* ioBuf)
00669     {
00670         Persistency::XmlFileReader reader;
00671         DOMDocument* doc = reader.Read(fileName, ioBuf);
00672         delete ioBuf;
00673         DOMNode* mpeg7 = GetChildNode(doc, "Mpeg7", true);
00674         DOMNode* desc = GetChildNode(mpeg7, "Description", true);
00675         DOMNode* mmContent = GetChildNode(desc, "MultimediaContent", true);
00676         DOMNode* video = GetChildNode(mmContent, "Video", true);
00677         mId = Xml2String(GetAttributeValue(video, "id"));
00678         ILOG_DEBUG("video id = " << mId);
00679         DOMNode* mediaLoc = GetChildNode(video, "MediaLocator", true);
00680         DOMNode* mediaUri = GetChildNode(mediaLoc, "MediaUri", true);
00681         mVideoFileName = Xml2String(GetElementValue(mediaUri));
00682         ILOG_DEBUG("filename = " << mVideoFileName);
00683         DOMNode* mediaTime = GetChildNode(video, "MediaTime", true);
00684         mNrFrames = GetMediaDuration(mediaTime);
00685         ILOG_DEBUG("nrFrames = " << mNrFrames);
00686         mFractions = GetFractions(mediaTime);
00687         ILOG_DEBUG("fractions = " << mFractions);
00688         DOMNode* decomp = GetChildNode(video, "TemporalDecomposition", true);
00689         std::vector<DOMNode*> segments = GetChildNodes(decomp, "VideoSegment");
00690         ILOG_DEBUG("nr VideoSegments = " << segments.size());
00691         for (int i=0 ; i<segments.size() ; i++)
00692             AddShot(segments[i]);
00693     }
00694 
00695     void
00696     AddShot(DOMNode* segment)
00697     {
00698         String id = GetAttributeValue(segment, "id");
00699         ILOG_DEBUG("shot id = " << id);
00700         mShotName.push_back(id);
00701         DOMNode* textAnno = GetChildNode(segment, "TextAnnotation", false);
00702         if (textAnno)
00703         {
00704             String rel = GetAttributeValue(textAnno, "relevance");
00705             mRelevance.push_back(atof(rel));
00706             String conf = GetAttributeValue(textAnno, "confidence");
00707             mConfidence.push_back(atof(conf));
00708             String anno;
00709             DOMNode* freeText = GetChildNode(textAnno, "FreeTextAnnotation",
00710                                              false);
00711             if (freeText)
00712             {
00713                 anno = GetElementValue(freeText);
00714             }
00715             else
00716             {
00717                 DOMNode* keyAnno = GetChildNode(textAnno, "KeywordAnnotation",
00718                                                 true);
00719                 DOMNode* keyWord = GetChildNode(keyAnno, "Keyword", true);
00720                 anno = GetElementValue(keyWord);
00721             }
00722             mAnnotation.push_back(anno);
00723             mAnnoFileName = anno;
00724         }
00725         else
00726         {
00727             mAnnoFileName = "";
00728         }
00729         DOMNode* mediaTime = GetChildNode(segment, "MediaTime", true);
00730         long frame = GetMediaTimePoint(mediaTime);
00731         long duration = GetMediaDuration(mediaTime);
00732         mStartFrame.push_back(frame);
00733         mEndFrame.push_back(frame + duration - 1);
00734 
00735         std::vector<String> keyframeNames;
00736         std::vector<int> keyframeFrames;
00737         DOMNode* td = GetChildNode(segment, "TemporalDecomposition", false);
00738         if (td)
00739         {
00740             mOriginalHadKeyframes = true;
00741             GetTemporalDecomposition(td, keyframeNames, keyframeFrames);
00742         }
00743         else
00744         {
00745             mOriginalHadKeyframes = false;
00746             keyframeNames.push_back(id + "_RKF");
00747             keyframeFrames.push_back(frame + duration/2);
00748         }
00749         mKeyframeName.push_back(keyframeNames);
00750         mKeyframeFrame.push_back(keyframeFrames);
00751     }
00752 
00753     long
00754     GetMediaTimePoint(DOMNode* mediaTime)
00755     {
00756         DOMNode* timePoint = GetChildNode(mediaTime, "MediaTimePoint", true);
00757         String s = GetElementValue(timePoint);
00758         // e.g. s = T00:28:21:5985F30000
00759         Util::StringParser p(s);
00760         p.Eat('T'); // T is begin of time point
00761         long hours = p.GetInt(':');
00762         long minutes = p.GetInt(':');
00763         long seconds = p.GetInt(':');
00764         p.AdvanceP(1); // skip ':', since it is not whitespace for next
00765         long fractions = p.GetInt('F');
00766         p.AdvanceP(1);
00767         long nrFractions = p.GetInt();
00768         long frame;
00769         frame = (hours*3600 + minutes*60 + seconds)*nrFractions + fractions;
00770         if (nrFractions == 30000) // probably NTSC with time unit PT1001N30000F
00771             frame = frame / 1001;
00772         return frame;
00773     }
00774 
00775     long
00776     GetMediaDuration(DOMNode* mediaTime)
00777     {
00778         DOMNode* duration = GetChildNode(mediaTime, "MediaDuration", true);
00779         String s = GetElementValue(duration);
00780         // e.g. s = PT00H00M04S2122N30000F
00781         Util::StringParser p(s);
00782         p.Eat('T'); // T is begin of duration
00783         long total = 0;
00784         if (p.Contains("H"))
00785         {
00786             total += p.GetInt('H') * 3600;
00787             p.AdvanceP(1);
00788         }
00789         if (p.Contains("M"))
00790         {
00791             total += p.GetInt('M') * 60;
00792             p.AdvanceP(1);
00793         }
00794         if (p.Contains("S"))
00795         {
00796             total += p.GetInt('S');
00797             p.AdvanceP(1);
00798         }
00799         long fractions = p.GetInt('N');
00800         p.AdvanceP(1);
00801         long nrFractions = p.GetInt('F');
00802         long dur = total*nrFractions + fractions;
00803         if (nrFractions == 30000) // probably NTSC with time unit PT1001N30000F
00804             dur /= 1001;
00805         return dur;
00806     }
00807 
00808     int
00809     GetFractions(DOMNode* mediaTime)
00810     {
00811         DOMNode* timePoint = GetChildNode(mediaTime, "MediaTimePoint", true);
00812         String s = GetElementValue(timePoint);
00813         // e.g. s = T00:28:21:5985F30000
00814         Util::StringParser p(s);
00815         p.Eat('F');
00816         return p.GetInt();
00817     }
00818 
00819     void
00820     GetTemporalDecomposition(DOMNode* td, std::vector<String>& names,
00821                              std::vector<int>& frames)
00822     {
00823         std::vector<DOMNode*> segments = GetChildNodes(td, "VideoSegment");
00824         for (int i=0 ; i<segments.size() ; i++)
00825         {
00826             DOMNode* segment = segments[i];
00827             String id = GetAttributeValue(segment, "id");
00828             names.push_back(id);
00829             DOMNode* mediaTime = GetChildNode(segment, "MediaTime", true);
00830             long frame = GetMediaTimePoint(mediaTime);
00831             frames.push_back(frame);
00832         }
00833     }
00834 
00835     String 
00836     String2Xml(CString src)
00837     {
00838         return StringReplaceAll(src, "&", "&amp;", false);
00839     }
00840 
00841     String 
00842     Xml2String(CString src)
00843     {
00844         return StringReplaceAll(src, "&amp;", "&", false);
00845     }
00846  
00847     // data
00848 
00849     String mId; // e.g. TRECVID2006_1
00850     String mVideoFileName;
00851     int    mNrFrames;
00852     int    mFractions;
00853     bool   mOriginalHadKeyframes;
00854     String mConceptSet;
00855     String mAnnoFileName;
00856     bool   mAnnoIsConcept;
00857 
00858     std::vector<String>                mShotName;
00859     std::vector<int>                   mStartFrame;
00860     std::vector<int>                   mEndFrame;
00861     std::vector< std::vector<String> > mKeyframeName;
00862     std::vector< std::vector<int> >    mKeyframeFrame;
00863     std::vector<String>                mAnnotation;
00864     std::vector<double>                mRelevance;
00865     std::vector<double>                mConfidence;
00866     std::vector<String>                mFeatureName;
00867     std::vector< std::vector<double> > mFeatureValues;
00868 
00869     ILOG_VAR_DEC;
00870 
00871 };
00872 
00873 ILOG_VAR_INIT(Mpeg7Doc, Impala.Core.VideoSet);
00874 
00875 } // namespace VideoSet
00876 } // namespace Core
00877 } // namespace Impala
00878 
00879 #endif
Mpeg7Doc.h