00001 #ifndef Impala_Core_VideoSet_Mpeg7DocAudio_h
00002 #define Impala_Core_VideoSet_Mpeg7DocAudio_h
00003
00004 #include "Util/XmlDoc.h"
00005 #include "Util/QuickSort.h"
00006 #include "Persistency/XmlFileReader.h"
00007
00008 namespace Impala
00009 {
00010 namespace Core
00011 {
00012 namespace VideoSet
00013 {
00014
00015
00016 class Mpeg7DocAudio : public Util::XmlDoc
00017 {
00018
00019 public:
00020
00021 Mpeg7DocAudio(String fileName, Util::IOBuffer* ioBuf)
00022 {
00023 ReadData(fileName, ioBuf);
00024 }
00025
00026 virtual
00027 ~Mpeg7DocAudio()
00028 {
00029 }
00030
00031
00032
00033 int
00034 NrSegments()
00035 {
00036 return mSegmentId.size();
00037 }
00038
00039 String
00040 SegmentId(int segment)
00041 {
00042 return mSegmentId[segment];
00043 }
00044
00045 int
00046 StartFrame(int segment)
00047 {
00048 return mStartFrame[segment];
00049 }
00050
00051 int
00052 StartFrameReFraction(int segment, int dstFraction)
00053 {
00054 return ReFraction(mStartFrame[segment], mFractions, dstFraction);
00055 }
00056
00057 int
00058 EndFrame(int segment)
00059 {
00060 return mEndFrame[segment];
00061 }
00062
00063 int
00064 EndFrameReFraction(int segment, int dstFraction)
00065 {
00066 return ReFraction(mEndFrame[segment], mFractions, dstFraction);
00067 }
00068
00069 int
00070 NrAnno(int segment)
00071 {
00072 return mAnnos[segment].size();
00073 }
00074
00075 String
00076 AnnoType(int segment, int anno)
00077 {
00078 return mAnnoTypes[segment][anno];
00079 }
00080
00081 String
00082 Anno(int segment, int anno)
00083 {
00084 return mAnnos[segment][anno];
00085 }
00086
00087 double
00088 Confidence(int segment)
00089 {
00090 return mConfidence[segment];
00091 }
00092
00093 int
00094 FindSegment(int frameNr)
00095 {
00096 for (int i=0 ; i<mStartFrame.size() ; i++)
00097 if ((mStartFrame[i] <= frameNr) && (frameNr <= mEndFrame[i]))
00098 return i;
00099 return -1;
00100 }
00101
00102 static int
00103 ReFraction(int srcFrame, int srcFraction, int dstFraction)
00104 {
00105 Int64 totalFractions = srcFrame;
00106 if (srcFraction == 30000)
00107 totalFractions *= 1001;
00108 Int64 nrFractions = totalFractions % srcFraction;
00109 Int64 seconds = (totalFractions - nrFractions) / srcFraction;
00110 double f = (double) nrFractions / (double) srcFraction;
00111 nrFractions = f * dstFraction;
00112 return seconds * dstFraction + nrFractions;
00113 }
00114
00115 private:
00116
00117
00118
00119
00120
00121
00122
00123
00124
00125
00126
00127
00128
00129
00130
00131
00132
00133
00134
00135
00136
00137
00138
00139
00140
00141
00142
00143
00144
00145
00146
00147
00148
00149
00150
00151
00152
00153
00154
00155
00156
00157
00158
00159
00160
00161
00162
00163
00164
00165
00166
00167
00168
00169
00170
00171
00172
00173
00174
00175
00176
00177
00178
00179
00180
00181
00182
00183
00184
00185
00186
00187
00188
00189
00190
00191
00192
00193
00194
00195
00196
00197
00198
00199
00200
00201
00202
00203
00204
00205
00206
00207
00208
00209
00210
00211
00212 void
00213 ReadData(String fileName, Util::IOBuffer* ioBuf)
00214 {
00215 Persistency::XmlFileReader reader;
00216 DOMDocument* doc = reader.Read(fileName, ioBuf);
00217 delete ioBuf;
00218 DOMNode* mpeg7 = GetChildNode(doc, "Mpeg7", true);
00219 DOMNode* desc1 = GetChildNode(mpeg7, "Description", true);
00220 DOMNode* desc2 = GetChildNodeAfter(mpeg7, desc1, "Description", true);
00221 DOMNode* mmContent = GetChildNode(desc2, "MultimediaContent", true);
00222 DOMNode* audio = GetChildNode(mmContent, "Audio", true);
00223 DOMNode* mediaTime = GetChildNode(audio, "MediaTime", true);
00224 mNrFrames = GetMediaDuration(mediaTime);
00225 ILOG_DEBUG("nrFrames = " << mNrFrames);
00226 mFractions = GetFractions(mediaTime);
00227 ILOG_DEBUG("fractions = " << mFractions);
00228 DOMNode* decomp = GetChildNode(audio, "TemporalDecomposition", true);
00229 std::vector<DOMNode*> segments = GetChildNodes(decomp, "AudioSegment");
00230 ILOG_DEBUG("nr AudioSegments = " << segments.size());
00231 for (int i=0 ; i<segments.size() ; i++)
00232 AddSegment(segments[i]);
00233 }
00234
00235 void
00236 AddSegment(DOMNode* segment)
00237 {
00238 String id = GetAttributeValue(segment, "id", "<none>");
00239 ILOG_DEBUG("segment id = " << id);
00240 mSegmentId.push_back(id);
00241 std::vector<DOMNode*> annoNodes = GetChildNodes(segment,
00242 "TextAnnotation");
00243 std::vector<String> annoTypes;
00244 std::vector<String> annos;
00245 for (int i=0 ; i<annoNodes.size() ; i++)
00246 {
00247 DOMNode* annoNode = annoNodes[i];
00248 String conf = GetAttributeValue(annoNode, "confidence", "1.0");
00249 mConfidence.push_back(atof(conf));
00250 DOMNode* freeText = GetChildNode(annoNode, "FreeTextAnnotation",
00251 true);
00252 String anno = GetElementValue(freeText);
00253 annos.push_back(anno);
00254 ILOG_DEBUG("anno = " << anno << " conf = " << conf);
00255 }
00256 mAnnoTypes.push_back(annoTypes);
00257 mAnnos.push_back(annos);
00258
00259 DOMNode* mediaTime = GetChildNode(segment, "MediaTime", true);
00260 long frame = GetMediaTimePoint(mediaTime);
00261 long duration = GetMediaDuration(mediaTime);
00262 mStartFrame.push_back(frame);
00263 mEndFrame.push_back(frame + duration - 1);
00264 }
00265
00266 long
00267 GetMediaTimePoint(DOMNode* mediaTime)
00268 {
00269 DOMNode* timePoint = GetChildNode(mediaTime, "MediaTimePoint", true);
00270 String s = GetElementValue(timePoint);
00271
00272 Util::StringParser p(s);
00273 p.Eat('T');
00274 long hours = p.GetInt(':');
00275 long minutes = p.GetInt(':');
00276 long seconds = p.GetInt(':');
00277 p.AdvanceP(1);
00278 long fractions = p.GetInt('F');
00279 p.AdvanceP(1);
00280 long nrFractions = p.GetInt();
00281 long frame;
00282 frame = (hours*3600 + minutes*60 + seconds)*nrFractions + fractions;
00283 if (nrFractions == 30000)
00284 frame = frame / 1001;
00285 return frame;
00286 }
00287
00288 long
00289 GetMediaDuration(DOMNode* mediaTime)
00290 {
00291 DOMNode* duration = GetChildNode(mediaTime, "MediaDuration", true);
00292 String s = GetElementValue(duration);
00293
00294 Util::StringParser p(s);
00295 p.Eat('T');
00296 long total = 0;
00297 if (p.Contains("H"))
00298 {
00299 total += p.GetInt('H') * 3600;
00300 p.AdvanceP(1);
00301 }
00302 if (p.Contains("M"))
00303 {
00304 total += p.GetInt('M') * 60;
00305 p.AdvanceP(1);
00306 }
00307 if (p.Contains("S"))
00308 {
00309 total += p.GetInt('S');
00310 p.AdvanceP(1);
00311 }
00312 long fractions = p.GetInt('N');
00313 p.AdvanceP(1);
00314 long nrFractions = p.GetInt('F');
00315 long dur = total*nrFractions + fractions;
00316 if (nrFractions == 30000)
00317 dur /= 1001;
00318 return dur;
00319 }
00320
00321 int
00322 GetFractions(DOMNode* mediaTime)
00323 {
00324 DOMNode* timePoint = GetChildNode(mediaTime, "MediaTimePoint", true);
00325 String s = GetElementValue(timePoint);
00326
00327 Util::StringParser p(s);
00328 p.Eat('F');
00329 return p.GetInt();
00330 }
00331
00332
00333
00334 int mNrFrames;
00335 int mFractions;
00336
00337 std::vector<String> mSegmentId;
00338 std::vector<int> mStartFrame;
00339 std::vector<int> mEndFrame;
00340 std::vector< std::vector<String> > mAnnoTypes;
00341 std::vector< std::vector<String> > mAnnos;
00342 std::vector<double> mConfidence;
00343 ILOG_VAR_DEC;
00344
00345 };
00346
00347 ILOG_VAR_INIT(Mpeg7DocAudio, Impala.Core.VideoSet);
00348
00349 }
00350 }
00351 }
00352
00353 #endif