Home || Architecture || Video Search || Visual Search || Scripts || Applications || Important Messages || OGL || Src

Mpeg7DocAudio.h

Go to the documentation of this file.
00001 #ifndef Impala_Core_VideoSet_Mpeg7DocAudio_h
00002 #define Impala_Core_VideoSet_Mpeg7DocAudio_h
00003 
00004 #include "Util/XmlDoc.h"
00005 #include "Util/QuickSort.h"
00006 #include "Persistency/XmlFileReader.h"
00007 
00008 namespace Impala
00009 {
00010 namespace Core
00011 {
00012 namespace VideoSet
00013 {
00014 
00015 
00016 class Mpeg7DocAudio : public Util::XmlDoc
00017 {
00018 
00019 public:
00020 
00021     Mpeg7DocAudio(String fileName, Util::Database* db)
00022     {
00023         ReadData(fileName, db);
00024     }
00025 
00026     virtual
00027     ~Mpeg7DocAudio()
00028     {
00029     }
00030 
00031     // inquiry
00032 
00033     int
00034     NrSegments()
00035     {
00036         return mSegmentId.size();
00037     }
00038 
00039     String
00040     SegmentId(int segment)
00041     {
00042         return mSegmentId[segment];
00043     }
00044 
00045     int
00046     StartFrame(int segment)
00047     {
00048         return mStartFrame[segment];
00049     }
00050 
00051     int
00052     StartFrameReFraction(int segment, int dstFraction)
00053     {
00054         return ReFraction(mStartFrame[segment], mFractions, dstFraction);
00055     }
00056 
00057     int
00058     EndFrame(int segment)
00059     {
00060         return mEndFrame[segment];
00061     }
00062 
00063     int
00064     EndFrameReFraction(int segment, int dstFraction)
00065     {
00066         return ReFraction(mEndFrame[segment], mFractions, dstFraction);
00067     }
00068 
00069     int
00070     NrAnno(int segment)
00071     {
00072         return mAnnos[segment].size();
00073     }
00074 
00075     String
00076     AnnoType(int segment, int anno)
00077     {
00078         return mAnnoTypes[segment][anno];
00079     }
00080 
00081     String
00082     Anno(int segment, int anno)
00083     {
00084         return mAnnos[segment][anno];
00085     }
00086 
00087     double
00088     Confidence(int segment)
00089     {
00090         return mConfidence[segment];
00091     }
00092 
00093     int
00094     FindSegment(int frameNr)
00095     {
00096         for (int i=0 ; i<mStartFrame.size() ; i++)
00097             if ((mStartFrame[i] <= frameNr) && (frameNr <= mEndFrame[i]))
00098                 return i;
00099         return -1;
00100     }
00101 
00102     static int
00103     ReFraction(int srcFrame, int srcFraction, int dstFraction)
00104     {
00105         Int64 totalFractions = srcFrame;
00106         if (srcFraction == 30000) // probably NTSC with time unit PT1001N30000F
00107             totalFractions *= 1001;
00108         Int64 nrFractions = totalFractions % srcFraction;
00109         Int64 seconds = (totalFractions - nrFractions) / srcFraction;
00110         double f = (double) nrFractions / (double) srcFraction;
00111         nrFractions = f * dstFraction;
00112         return seconds * dstFraction + nrFractions;
00113     }
00114 
00115 private:
00116 
00117     /* file excerpt:
00118 
00119 <!-- Audio concept files look like this: -->
00120 
00121 <?xml version="1.0" encoding="ISO-8859-1"?>
00122 <Mpeg7 xmlns="urn:mpeg:mpeg7:schema:2001" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xmlns:mpeg7="urn:mpeg:mpeg7:schema:2001" xsi:schemaLocation="urn:mpeg:mpeg7:schema:2001 Mpeg7-2001.xsd">
00123 
00124 <Description xsi:type="MediaDescriptionType">
00125   <MediaInformation>
00126     <MediaProfile>
00127       <MediaInstance>
00128         <InstanceIdentifier>Birds</InstanceIdentifier>
00129         <MediaLocator>
00130           <MediaUri>Birds.xml</MediaUri>
00131         </MediaLocator>
00132       </MediaInstance>
00133     </MediaProfile>
00134   </MediaInformation>
00135 </Description>
00136 
00137 <Description xsi:type="ContentEntityType">
00138   <MultimediaContent xsi:type="AudioType">
00139     <Audio>
00140       <MediaTime>
00141         <MediaTimePoint>T00:00:00:0F100</MediaTimePoint>
00142         <MediaDuration>PT0H40M54S7N100F</MediaDuration>
00143       </MediaTime>
00144       <TemporalDecomposition gap="true" overlap="false">
00145 
00146         <AudioSegment>
00147           <TextAnnotation confidence="0.746556">
00148             <FreeTextAnnotation>Birds</FreeTextAnnotation>
00149           </TextAnnotation>
00150           <MediaTime>
00151             <MediaTimePoint>T00:00:02:75F100</MediaTimePoint>
00152             <MediaDuration>PT0H0M39S75N100F</MediaDuration>
00153           </MediaTime>
00154         </AudioSegment>
00155 
00156 
00157 <!-- Audio segmentation files look like this: -->
00158 
00159 <?xml version="1.0" encoding="ISO-8859-1"?>
00160 <Mpeg7 xmlns="urn:mpeg:mpeg7:schema:2001" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xmlns:mpeg7="urn:mpeg:mpeg7:schema:2001" xsi:schemaLocation="urn:mpeg:mpeg7:schema:2001 Mpeg7-2001.xsd">
00161 
00162 <Description xsi:type="MediaDescriptionType">
00163   <MediaInformation>
00164     <MediaProfile>
00165       <MediaInstance>
00166         <InstanceIdentifier>
00167           Kosovosodo
00168         </InstanceIdentifier>
00169         <MediaLocator>
00170           <MediaUri>
00171             Kosovosodo.xml
00172           </MediaUri>
00173         </MediaLocator>
00174       </MediaInstance>
00175     </MediaProfile>
00176   </MediaInformation>
00177 </Description>
00178 
00179 <Description xsi:type="ContentEntityType">
00180   <MultimediaContent xsi:type="AudioType">
00181     <Audio>
00182       <MediaTime>
00183         <MediaTimePoint>T00:00:00:0F100</MediaTimePoint>
00184         <MediaDuration>PT0H40M54S7N100F</MediaDuration>
00185       </MediaTime>
00186       <TemporalDecomposition gap="true" overlap="false">
00187 
00188         <AudioSegment>
00189           <TextAnnotation type="background">
00190             <FreeTextAnnotation>Noise</FreeTextAnnotation>
00191           </TextAnnotation>
00192           <MediaTime>
00193             <MediaTimePoint>T00:00:00:0F100</MediaTimePoint>
00194             <MediaDuration>PT0H0M53S95N100F</MediaDuration>
00195           </MediaTime>
00196         </AudioSegment>
00197         <AudioSegment id="SPK1000-001">
00198           <TextAnnotation type="gender">
00199             <FreeTextAnnotation>Male</FreeTextAnnotation>
00200           </TextAnnotation>
00201           <TextAnnotation type="background">
00202             <FreeTextAnnotation>Noise</FreeTextAnnotation>
00203           </TextAnnotation>
00204           <MediaTime>
00205             <MediaTimePoint>T00:00:53:96F100</MediaTimePoint>
00206             <MediaDuration>PT0H0M2S95N100F</MediaDuration>
00207           </MediaTime>
00208         </AudioSegment>
00209 
00210     */
00211 
00212     void
00213     ReadData(String fileName, Util::Database* db)
00214     {
00215         Util::IOBuffer* ioBuf = db->GetIOBuffer(fileName, true, true, "");
00216         Persistency::XmlFileReader reader;
00217         DOMDocument* doc = reader.Read(fileName, ioBuf);
00218         delete ioBuf;
00219         DOMNode* mpeg7 = GetChildNode(doc, "Mpeg7", true);
00220         DOMNode* desc1 = GetChildNode(mpeg7, "Description", true);
00221         DOMNode* desc2 = GetChildNodeAfter(mpeg7, desc1, "Description", true);
00222         DOMNode* mmContent = GetChildNode(desc2, "MultimediaContent", true);
00223         DOMNode* audio = GetChildNode(mmContent, "Audio", true);
00224         DOMNode* mediaTime = GetChildNode(audio, "MediaTime", true);
00225         mNrFrames = GetMediaDuration(mediaTime);
00226         ILOG_DEBUG("nrFrames = " << mNrFrames);
00227         mFractions = GetFractions(mediaTime);
00228         ILOG_DEBUG("fractions = " << mFractions);
00229         DOMNode* decomp = GetChildNode(audio, "TemporalDecomposition", true);
00230         std::vector<DOMNode*> segments = GetChildNodes(decomp, "AudioSegment");
00231         ILOG_DEBUG("nr AudioSegments = " << segments.size());
00232         for (int i=0 ; i<segments.size() ; i++)
00233             AddSegment(segments[i]);
00234     }
00235 
00236     void
00237     AddSegment(DOMNode* segment)
00238     {
00239         String id = GetAttributeValue(segment, "id", "<none>");
00240         ILOG_DEBUG("segment id = " << id);
00241         mSegmentId.push_back(id);
00242         std::vector<DOMNode*> annoNodes = GetChildNodes(segment,
00243                                                         "TextAnnotation");
00244         std::vector<String> annoTypes;
00245         std::vector<String> annos;
00246         for (int i=0 ; i<annoNodes.size() ; i++)
00247         {
00248             DOMNode* annoNode = annoNodes[i];
00249             String conf = GetAttributeValue(annoNode, "confidence", "1.0");
00250             mConfidence.push_back(atof(conf));
00251             DOMNode* freeText = GetChildNode(annoNode, "FreeTextAnnotation",
00252                                              true);
00253             String anno = GetElementValue(freeText);
00254             annos.push_back(anno);
00255             ILOG_DEBUG("anno = " << anno << " conf = " << conf);
00256         }
00257         mAnnoTypes.push_back(annoTypes);
00258         mAnnos.push_back(annos);
00259 
00260         DOMNode* mediaTime = GetChildNode(segment, "MediaTime", true);
00261         long frame = GetMediaTimePoint(mediaTime);
00262         long duration = GetMediaDuration(mediaTime);
00263         mStartFrame.push_back(frame);
00264         mEndFrame.push_back(frame + duration - 1);
00265     }
00266 
00267     long
00268     GetMediaTimePoint(DOMNode* mediaTime)
00269     {
00270         DOMNode* timePoint = GetChildNode(mediaTime, "MediaTimePoint", true);
00271         String s = GetElementValue(timePoint);
00272         // e.g. s = T00:28:21:5985F30000
00273         Util::StringParser p(s);
00274         p.Eat('T'); // T is begin of time point
00275         long hours = p.GetInt(':');
00276         long minutes = p.GetInt(':');
00277         long seconds = p.GetInt(':');
00278         p.AdvanceP(1); // skip ':', since it is not whitespace for next
00279         long fractions = p.GetInt('F');
00280         p.AdvanceP(1);
00281         long nrFractions = p.GetInt();
00282         long frame;
00283         frame = (hours*3600 + minutes*60 + seconds)*nrFractions + fractions;
00284         if (nrFractions == 30000) // probably NTSC with time unit PT1001N30000F
00285             frame = frame / 1001;
00286         return frame;
00287     }
00288 
00289     long
00290     GetMediaDuration(DOMNode* mediaTime)
00291     {
00292         DOMNode* duration = GetChildNode(mediaTime, "MediaDuration", true);
00293         String s = GetElementValue(duration);
00294         // e.g. s = PT00H00M04S2122N30000F
00295         Util::StringParser p(s);
00296         p.Eat('T'); // T is begin of duration
00297         long total = 0;
00298         if (p.Contains("H"))
00299         {
00300             total += p.GetInt('H') * 3600;
00301             p.AdvanceP(1);
00302         }
00303         if (p.Contains("M"))
00304         {
00305             total += p.GetInt('M') * 60;
00306             p.AdvanceP(1);
00307         }
00308         if (p.Contains("S"))
00309         {
00310             total += p.GetInt('S');
00311             p.AdvanceP(1);
00312         }
00313         long fractions = p.GetInt('N');
00314         p.AdvanceP(1);
00315         long nrFractions = p.GetInt('F');
00316         long dur = total*nrFractions + fractions;
00317         if (nrFractions == 30000) // probably NTSC with time unit PT1001N30000F
00318             dur /= 1001;
00319         return dur;
00320     }
00321 
00322     int
00323     GetFractions(DOMNode* mediaTime)
00324     {
00325         DOMNode* timePoint = GetChildNode(mediaTime, "MediaTimePoint", true);
00326         String s = GetElementValue(timePoint);
00327         // e.g. s = T00:28:21:5985F30000
00328         Util::StringParser p(s);
00329         p.Eat('F');
00330         return p.GetInt();
00331     }
00332 
00333     // data
00334 
00335     int    mNrFrames;
00336     int    mFractions;
00337 
00338     std::vector<String>                mSegmentId;
00339     std::vector<int>                   mStartFrame;
00340     std::vector<int>                   mEndFrame;
00341     std::vector< std::vector<String> > mAnnoTypes;
00342     std::vector< std::vector<String> > mAnnos;
00343     std::vector<double>                mConfidence;
00344     ILOG_VAR_DEC;
00345 
00346 };
00347 
00348 ILOG_VAR_INIT(Mpeg7DocAudio, Impala.Core.VideoSet);
00349 
00350 } // namespace VideoSet
00351 } // namespace Core
00352 } // namespace Impala
00353 
00354 #endif

Generated on Fri Mar 19 09:31:31 2010 for ImpalaSrc by  doxygen 1.5.1