Home || Visual Search || Applications || Architecture || Important Messages || OGL || Src

Mpeg7DocAudio.h

Go to the documentation of this file.
00001 #ifndef Impala_Core_VideoSet_Mpeg7DocAudio_h
00002 #define Impala_Core_VideoSet_Mpeg7DocAudio_h
00003 
00004 #include "Util/XmlDoc.h"
00005 #include "Util/QuickSort.h"
00006 #include "Persistency/XmlFileReader.h"
00007 
00008 namespace Impala
00009 {
00010 namespace Core
00011 {
00012 namespace VideoSet
00013 {
00014 
00015 
00016 class Mpeg7DocAudio : public Util::XmlDoc
00017 {
00018 
00019 public:
00020 
00021     Mpeg7DocAudio(String fileName, Util::IOBuffer* ioBuf)
00022     {
00023         ReadData(fileName, ioBuf);
00024     }
00025 
00026     virtual
00027     ~Mpeg7DocAudio()
00028     {
00029     }
00030 
00031     // inquiry
00032 
00033     int
00034     NrSegments()
00035     {
00036         return mSegmentId.size();
00037     }
00038 
00039     String
00040     SegmentId(int segment)
00041     {
00042         return mSegmentId[segment];
00043     }
00044 
00045     int
00046     StartFrame(int segment)
00047     {
00048         return mStartFrame[segment];
00049     }
00050 
00051     int
00052     StartFrameReFraction(int segment, int dstFraction)
00053     {
00054         return ReFraction(mStartFrame[segment], mFractions, dstFraction);
00055     }
00056 
00057     int
00058     EndFrame(int segment)
00059     {
00060         return mEndFrame[segment];
00061     }
00062 
00063     int
00064     EndFrameReFraction(int segment, int dstFraction)
00065     {
00066         return ReFraction(mEndFrame[segment], mFractions, dstFraction);
00067     }
00068 
00069     int
00070     NrAnno(int segment)
00071     {
00072         return mAnnos[segment].size();
00073     }
00074 
00075     String
00076     AnnoType(int segment, int anno)
00077     {
00078         return mAnnoTypes[segment][anno];
00079     }
00080 
00081     String
00082     Anno(int segment, int anno)
00083     {
00084         return mAnnos[segment][anno];
00085     }
00086 
00087     double
00088     Confidence(int segment)
00089     {
00090         return mConfidence[segment];
00091     }
00092 
00093     int
00094     FindSegment(int frameNr)
00095     {
00096         for (int i=0 ; i<mStartFrame.size() ; i++)
00097             if ((mStartFrame[i] <= frameNr) && (frameNr <= mEndFrame[i]))
00098                 return i;
00099         return -1;
00100     }
00101 
00102     static int
00103     ReFraction(int srcFrame, int srcFraction, int dstFraction)
00104     {
00105         Int64 totalFractions = srcFrame;
00106         if (srcFraction == 30000) // probably NTSC with time unit PT1001N30000F
00107             totalFractions *= 1001;
00108         Int64 nrFractions = totalFractions % srcFraction;
00109         Int64 seconds = (totalFractions - nrFractions) / srcFraction;
00110         double f = (double) nrFractions / (double) srcFraction;
00111         nrFractions = f * dstFraction;
00112         return seconds * dstFraction + nrFractions;
00113     }
00114 
00115 private:
00116 
00117     /* file excerpt:
00118 
00119 <!-- Audio concept files look like this: -->
00120 
00121 <?xml version="1.0" encoding="ISO-8859-1"?>
00122 <Mpeg7 xmlns="urn:mpeg:mpeg7:schema:2001" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xmlns:mpeg7="urn:mpeg:mpeg7:schema:2001" xsi:schemaLocation="urn:mpeg:mpeg7:schema:2001 Mpeg7-2001.xsd">
00123 
00124 <Description xsi:type="MediaDescriptionType">
00125   <MediaInformation>
00126     <MediaProfile>
00127       <MediaInstance>
00128         <InstanceIdentifier>Birds</InstanceIdentifier>
00129         <MediaLocator>
00130           <MediaUri>Birds.xml</MediaUri>
00131         </MediaLocator>
00132       </MediaInstance>
00133     </MediaProfile>
00134   </MediaInformation>
00135 </Description>
00136 
00137 <Description xsi:type="ContentEntityType">
00138   <MultimediaContent xsi:type="AudioType">
00139     <Audio>
00140       <MediaTime>
00141         <MediaTimePoint>T00:00:00:0F100</MediaTimePoint>
00142         <MediaDuration>PT0H40M54S7N100F</MediaDuration>
00143       </MediaTime>
00144       <TemporalDecomposition gap="true" overlap="false">
00145 
00146         <AudioSegment>
00147           <TextAnnotation confidence="0.746556">
00148             <FreeTextAnnotation>Birds</FreeTextAnnotation>
00149           </TextAnnotation>
00150           <MediaTime>
00151             <MediaTimePoint>T00:00:02:75F100</MediaTimePoint>
00152             <MediaDuration>PT0H0M39S75N100F</MediaDuration>
00153           </MediaTime>
00154         </AudioSegment>
00155 
00156 
00157 <!-- Audio segmentation files look like this: -->
00158 
00159 <?xml version="1.0" encoding="ISO-8859-1"?>
00160 <Mpeg7 xmlns="urn:mpeg:mpeg7:schema:2001" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xmlns:mpeg7="urn:mpeg:mpeg7:schema:2001" xsi:schemaLocation="urn:mpeg:mpeg7:schema:2001 Mpeg7-2001.xsd">
00161 
00162 <Description xsi:type="MediaDescriptionType">
00163   <MediaInformation>
00164     <MediaProfile>
00165       <MediaInstance>
00166         <InstanceIdentifier>
00167           Kosovosodo
00168         </InstanceIdentifier>
00169         <MediaLocator>
00170           <MediaUri>
00171             Kosovosodo.xml
00172           </MediaUri>
00173         </MediaLocator>
00174       </MediaInstance>
00175     </MediaProfile>
00176   </MediaInformation>
00177 </Description>
00178 
00179 <Description xsi:type="ContentEntityType">
00180   <MultimediaContent xsi:type="AudioType">
00181     <Audio>
00182       <MediaTime>
00183         <MediaTimePoint>T00:00:00:0F100</MediaTimePoint>
00184         <MediaDuration>PT0H40M54S7N100F</MediaDuration>
00185       </MediaTime>
00186       <TemporalDecomposition gap="true" overlap="false">
00187 
00188         <AudioSegment>
00189           <TextAnnotation type="background">
00190             <FreeTextAnnotation>Noise</FreeTextAnnotation>
00191           </TextAnnotation>
00192           <MediaTime>
00193             <MediaTimePoint>T00:00:00:0F100</MediaTimePoint>
00194             <MediaDuration>PT0H0M53S95N100F</MediaDuration>
00195           </MediaTime>
00196         </AudioSegment>
00197         <AudioSegment id="SPK1000-001">
00198           <TextAnnotation type="gender">
00199             <FreeTextAnnotation>Male</FreeTextAnnotation>
00200           </TextAnnotation>
00201           <TextAnnotation type="background">
00202             <FreeTextAnnotation>Noise</FreeTextAnnotation>
00203           </TextAnnotation>
00204           <MediaTime>
00205             <MediaTimePoint>T00:00:53:96F100</MediaTimePoint>
00206             <MediaDuration>PT0H0M2S95N100F</MediaDuration>
00207           </MediaTime>
00208         </AudioSegment>
00209 
00210     */
00211 
00212     void
00213     ReadData(String fileName, Util::IOBuffer* ioBuf)
00214     {
00215         Persistency::XmlFileReader reader;
00216         DOMDocument* doc = reader.Read(fileName, ioBuf);
00217         delete ioBuf;
00218         DOMNode* mpeg7 = GetChildNode(doc, "Mpeg7", true);
00219         DOMNode* desc1 = GetChildNode(mpeg7, "Description", true);
00220         DOMNode* desc2 = GetChildNodeAfter(mpeg7, desc1, "Description", true);
00221         DOMNode* mmContent = GetChildNode(desc2, "MultimediaContent", true);
00222         DOMNode* audio = GetChildNode(mmContent, "Audio", true);
00223         DOMNode* mediaTime = GetChildNode(audio, "MediaTime", true);
00224         mNrFrames = GetMediaDuration(mediaTime);
00225         ILOG_DEBUG("nrFrames = " << mNrFrames);
00226         mFractions = GetFractions(mediaTime);
00227         ILOG_DEBUG("fractions = " << mFractions);
00228         DOMNode* decomp = GetChildNode(audio, "TemporalDecomposition", true);
00229         std::vector<DOMNode*> segments = GetChildNodes(decomp, "AudioSegment");
00230         ILOG_DEBUG("nr AudioSegments = " << segments.size());
00231         for (int i=0 ; i<segments.size() ; i++)
00232             AddSegment(segments[i]);
00233     }
00234 
00235     void
00236     AddSegment(DOMNode* segment)
00237     {
00238         String id = GetAttributeValue(segment, "id", "<none>");
00239         ILOG_DEBUG("segment id = " << id);
00240         mSegmentId.push_back(id);
00241         std::vector<DOMNode*> annoNodes = GetChildNodes(segment,
00242                                                         "TextAnnotation");
00243         std::vector<String> annoTypes;
00244         std::vector<String> annos;
00245         for (int i=0 ; i<annoNodes.size() ; i++)
00246         {
00247             DOMNode* annoNode = annoNodes[i];
00248             String conf = GetAttributeValue(annoNode, "confidence", "1.0");
00249             mConfidence.push_back(atof(conf));
00250             DOMNode* freeText = GetChildNode(annoNode, "FreeTextAnnotation",
00251                                              true);
00252             String anno = GetElementValue(freeText);
00253             annos.push_back(anno);
00254             ILOG_DEBUG("anno = " << anno << " conf = " << conf);
00255         }
00256         mAnnoTypes.push_back(annoTypes);
00257         mAnnos.push_back(annos);
00258 
00259         DOMNode* mediaTime = GetChildNode(segment, "MediaTime", true);
00260         long frame = GetMediaTimePoint(mediaTime);
00261         long duration = GetMediaDuration(mediaTime);
00262         mStartFrame.push_back(frame);
00263         mEndFrame.push_back(frame + duration - 1);
00264     }
00265 
00266     long
00267     GetMediaTimePoint(DOMNode* mediaTime)
00268     {
00269         DOMNode* timePoint = GetChildNode(mediaTime, "MediaTimePoint", true);
00270         String s = GetElementValue(timePoint);
00271         // e.g. s = T00:28:21:5985F30000
00272         Util::StringParser p(s);
00273         p.Eat('T'); // T is begin of time point
00274         long hours = p.GetInt(':');
00275         long minutes = p.GetInt(':');
00276         long seconds = p.GetInt(':');
00277         p.AdvanceP(1); // skip ':', since it is not whitespace for next
00278         long fractions = p.GetInt('F');
00279         p.AdvanceP(1);
00280         long nrFractions = p.GetInt();
00281         long frame;
00282         frame = (hours*3600 + minutes*60 + seconds)*nrFractions + fractions;
00283         if (nrFractions == 30000) // probably NTSC with time unit PT1001N30000F
00284             frame = frame / 1001;
00285         return frame;
00286     }
00287 
00288     long
00289     GetMediaDuration(DOMNode* mediaTime)
00290     {
00291         DOMNode* duration = GetChildNode(mediaTime, "MediaDuration", true);
00292         String s = GetElementValue(duration);
00293         // e.g. s = PT00H00M04S2122N30000F
00294         Util::StringParser p(s);
00295         p.Eat('T'); // T is begin of duration
00296         long total = 0;
00297         if (p.Contains("H"))
00298         {
00299             total += p.GetInt('H') * 3600;
00300             p.AdvanceP(1);
00301         }
00302         if (p.Contains("M"))
00303         {
00304             total += p.GetInt('M') * 60;
00305             p.AdvanceP(1);
00306         }
00307         if (p.Contains("S"))
00308         {
00309             total += p.GetInt('S');
00310             p.AdvanceP(1);
00311         }
00312         long fractions = p.GetInt('N');
00313         p.AdvanceP(1);
00314         long nrFractions = p.GetInt('F');
00315         long dur = total*nrFractions + fractions;
00316         if (nrFractions == 30000) // probably NTSC with time unit PT1001N30000F
00317             dur /= 1001;
00318         return dur;
00319     }
00320 
00321     int
00322     GetFractions(DOMNode* mediaTime)
00323     {
00324         DOMNode* timePoint = GetChildNode(mediaTime, "MediaTimePoint", true);
00325         String s = GetElementValue(timePoint);
00326         // e.g. s = T00:28:21:5985F30000
00327         Util::StringParser p(s);
00328         p.Eat('F');
00329         return p.GetInt();
00330     }
00331 
00332     // data
00333 
00334     int    mNrFrames;
00335     int    mFractions;
00336 
00337     std::vector<String>                mSegmentId;
00338     std::vector<int>                   mStartFrame;
00339     std::vector<int>                   mEndFrame;
00340     std::vector< std::vector<String> > mAnnoTypes;
00341     std::vector< std::vector<String> > mAnnos;
00342     std::vector<double>                mConfidence;
00343     ILOG_VAR_DEC;
00344 
00345 };
00346 
00347 ILOG_VAR_INIT(Mpeg7DocAudio, Impala.Core.VideoSet);
00348 
00349 } // namespace VideoSet
00350 } // namespace Core
00351 } // namespace Impala
00352 
00353 #endif

Generated on Thu Jan 13 09:04:47 2011 for ImpalaSrc by  doxygen 1.5.1