00001 #ifndef Impala_Core_VideoSet_Mpeg7DocAudio_h
00002 #define Impala_Core_VideoSet_Mpeg7DocAudio_h
00003
00004 #include "Util/XmlDoc.h"
00005 #include "Util/QuickSort.h"
00006 #include "Persistency/XmlFileReader.h"
00007
00008 namespace Impala
00009 {
00010 namespace Core
00011 {
00012 namespace VideoSet
00013 {
00014
00015
00016 class Mpeg7DocAudio : public Util::XmlDoc
00017 {
00018
00019 public:
00020
00021 Mpeg7DocAudio(String fileName, Util::Database* db)
00022 {
00023 ReadData(fileName, db);
00024 }
00025
00026 virtual
00027 ~Mpeg7DocAudio()
00028 {
00029 }
00030
00031
00032
00033 int
00034 NrSegments()
00035 {
00036 return mSegmentId.size();
00037 }
00038
00039 String
00040 SegmentId(int segment)
00041 {
00042 return mSegmentId[segment];
00043 }
00044
00045 int
00046 StartFrame(int segment)
00047 {
00048 return mStartFrame[segment];
00049 }
00050
00051 int
00052 StartFrameReFraction(int segment, int dstFraction)
00053 {
00054 return ReFraction(mStartFrame[segment], mFractions, dstFraction);
00055 }
00056
00057 int
00058 EndFrame(int segment)
00059 {
00060 return mEndFrame[segment];
00061 }
00062
00063 int
00064 EndFrameReFraction(int segment, int dstFraction)
00065 {
00066 return ReFraction(mEndFrame[segment], mFractions, dstFraction);
00067 }
00068
00069 int
00070 NrAnno(int segment)
00071 {
00072 return mAnnos[segment].size();
00073 }
00074
00075 String
00076 AnnoType(int segment, int anno)
00077 {
00078 return mAnnoTypes[segment][anno];
00079 }
00080
00081 String
00082 Anno(int segment, int anno)
00083 {
00084 return mAnnos[segment][anno];
00085 }
00086
00087 double
00088 Confidence(int segment)
00089 {
00090 return mConfidence[segment];
00091 }
00092
00093 int
00094 FindSegment(int frameNr)
00095 {
00096 for (int i=0 ; i<mStartFrame.size() ; i++)
00097 if ((mStartFrame[i] <= frameNr) && (frameNr <= mEndFrame[i]))
00098 return i;
00099 return -1;
00100 }
00101
00102 static int
00103 ReFraction(int srcFrame, int srcFraction, int dstFraction)
00104 {
00105 Int64 totalFractions = srcFrame;
00106 if (srcFraction == 30000)
00107 totalFractions *= 1001;
00108 Int64 nrFractions = totalFractions % srcFraction;
00109 Int64 seconds = (totalFractions - nrFractions) / srcFraction;
00110 double f = (double) nrFractions / (double) srcFraction;
00111 nrFractions = f * dstFraction;
00112 return seconds * dstFraction + nrFractions;
00113 }
00114
00115 private:
00116
00117
00118
00119
00120
00121
00122
00123
00124
00125
00126
00127
00128
00129
00130
00131
00132
00133
00134
00135
00136
00137
00138
00139
00140
00141
00142
00143
00144
00145
00146
00147
00148
00149
00150
00151
00152
00153
00154
00155
00156
00157
00158
00159
00160
00161
00162
00163
00164
00165
00166
00167
00168
00169
00170
00171
00172
00173
00174
00175
00176
00177
00178
00179
00180
00181
00182
00183
00184
00185
00186
00187
00188
00189
00190
00191
00192
00193
00194
00195
00196
00197
00198
00199
00200
00201
00202
00203
00204
00205
00206
00207
00208
00209
00210
00211
00212 void
00213 ReadData(String fileName, Util::Database* db)
00214 {
00215 Util::IOBuffer* ioBuf = db->GetIOBuffer(fileName, true, true, "");
00216 Persistency::XmlFileReader reader;
00217 DOMDocument* doc = reader.Read(fileName, ioBuf);
00218 delete ioBuf;
00219 DOMNode* mpeg7 = GetChildNode(doc, "Mpeg7", true);
00220 DOMNode* desc1 = GetChildNode(mpeg7, "Description", true);
00221 DOMNode* desc2 = GetChildNodeAfter(mpeg7, desc1, "Description", true);
00222 DOMNode* mmContent = GetChildNode(desc2, "MultimediaContent", true);
00223 DOMNode* audio = GetChildNode(mmContent, "Audio", true);
00224 DOMNode* mediaTime = GetChildNode(audio, "MediaTime", true);
00225 mNrFrames = GetMediaDuration(mediaTime);
00226 ILOG_DEBUG("nrFrames = " << mNrFrames);
00227 mFractions = GetFractions(mediaTime);
00228 ILOG_DEBUG("fractions = " << mFractions);
00229 DOMNode* decomp = GetChildNode(audio, "TemporalDecomposition", true);
00230 std::vector<DOMNode*> segments = GetChildNodes(decomp, "AudioSegment");
00231 ILOG_DEBUG("nr AudioSegments = " << segments.size());
00232 for (int i=0 ; i<segments.size() ; i++)
00233 AddSegment(segments[i]);
00234 }
00235
00236 void
00237 AddSegment(DOMNode* segment)
00238 {
00239 String id = GetAttributeValue(segment, "id", "<none>");
00240 ILOG_DEBUG("segment id = " << id);
00241 mSegmentId.push_back(id);
00242 std::vector<DOMNode*> annoNodes = GetChildNodes(segment,
00243 "TextAnnotation");
00244 std::vector<String> annoTypes;
00245 std::vector<String> annos;
00246 for (int i=0 ; i<annoNodes.size() ; i++)
00247 {
00248 DOMNode* annoNode = annoNodes[i];
00249 String conf = GetAttributeValue(annoNode, "confidence", "1.0");
00250 mConfidence.push_back(atof(conf));
00251 DOMNode* freeText = GetChildNode(annoNode, "FreeTextAnnotation",
00252 true);
00253 String anno = GetElementValue(freeText);
00254 annos.push_back(anno);
00255 ILOG_DEBUG("anno = " << anno << " conf = " << conf);
00256 }
00257 mAnnoTypes.push_back(annoTypes);
00258 mAnnos.push_back(annos);
00259
00260 DOMNode* mediaTime = GetChildNode(segment, "MediaTime", true);
00261 long frame = GetMediaTimePoint(mediaTime);
00262 long duration = GetMediaDuration(mediaTime);
00263 mStartFrame.push_back(frame);
00264 mEndFrame.push_back(frame + duration - 1);
00265 }
00266
00267 long
00268 GetMediaTimePoint(DOMNode* mediaTime)
00269 {
00270 DOMNode* timePoint = GetChildNode(mediaTime, "MediaTimePoint", true);
00271 String s = GetElementValue(timePoint);
00272
00273 Util::StringParser p(s);
00274 p.Eat('T');
00275 long hours = p.GetInt(':');
00276 long minutes = p.GetInt(':');
00277 long seconds = p.GetInt(':');
00278 p.AdvanceP(1);
00279 long fractions = p.GetInt('F');
00280 p.AdvanceP(1);
00281 long nrFractions = p.GetInt();
00282 long frame;
00283 frame = (hours*3600 + minutes*60 + seconds)*nrFractions + fractions;
00284 if (nrFractions == 30000)
00285 frame = frame / 1001;
00286 return frame;
00287 }
00288
00289 long
00290 GetMediaDuration(DOMNode* mediaTime)
00291 {
00292 DOMNode* duration = GetChildNode(mediaTime, "MediaDuration", true);
00293 String s = GetElementValue(duration);
00294
00295 Util::StringParser p(s);
00296 p.Eat('T');
00297 long total = 0;
00298 if (p.Contains("H"))
00299 {
00300 total += p.GetInt('H') * 3600;
00301 p.AdvanceP(1);
00302 }
00303 if (p.Contains("M"))
00304 {
00305 total += p.GetInt('M') * 60;
00306 p.AdvanceP(1);
00307 }
00308 if (p.Contains("S"))
00309 {
00310 total += p.GetInt('S');
00311 p.AdvanceP(1);
00312 }
00313 long fractions = p.GetInt('N');
00314 p.AdvanceP(1);
00315 long nrFractions = p.GetInt('F');
00316 long dur = total*nrFractions + fractions;
00317 if (nrFractions == 30000)
00318 dur /= 1001;
00319 return dur;
00320 }
00321
00322 int
00323 GetFractions(DOMNode* mediaTime)
00324 {
00325 DOMNode* timePoint = GetChildNode(mediaTime, "MediaTimePoint", true);
00326 String s = GetElementValue(timePoint);
00327
00328 Util::StringParser p(s);
00329 p.Eat('F');
00330 return p.GetInt();
00331 }
00332
00333
00334
00335 int mNrFrames;
00336 int mFractions;
00337
00338 std::vector<String> mSegmentId;
00339 std::vector<int> mStartFrame;
00340 std::vector<int> mEndFrame;
00341 std::vector< std::vector<String> > mAnnoTypes;
00342 std::vector< std::vector<String> > mAnnos;
00343 std::vector<double> mConfidence;
00344 ILOG_VAR_DEC;
00345
00346 };
00347
00348 ILOG_VAR_INIT(Mpeg7DocAudio, Impala.Core.VideoSet);
00349
00350 }
00351 }
00352 }
00353
00354 #endif