00001 #ifndef Application_IDash_IDashQueryEngine_h
00002 #define Application_IDash_IDashQueryEngine_h
00003
00004 #include "Util/StringParser.h"
00005 #include "Core/Trec/ThreadSet.h"
00006 #include "Core/Trec/ShotResult.h"
00007 #include "Core/Trec/VisualQueryEngine.h"
00008 #include "Core/Table/SimilarityTableSet.h"
00009 #include "Core/Table/TableTem.h"
00010 #include "Core/Table/Sort.h"
00011
00012
00013
00014
00015
00016
00017 namespace Impala {
00018
00019
00020 namespace Core {
00021 namespace Trec {
00022
00023
00024 typedef Core::Table::TableTem
00025 <Core::Column::ColumnTem<Int32>,
00026 Core::Column::ColumnTem<double> > ShotWithWeight;
00027
00028
00029
00030
00031 class IDashQueryEngine
00032 {
00033 public:
00034 typedef std::list<ShotResult> ShotResultList;
00035
00036 IDashQueryEngine(ThreadSet *set)
00037 {
00038 Init(set);
00039 }
00040
00041 void ProcessIDashQueryList(std::list<String> qlist)
00042 {
00043 std::list<String>::iterator qlisti;
00044 for (qlisti=qlist.begin(); qlisti!=qlist.end(); qlisti++)
00045 {
00046 String q = *qlisti;
00047 ILOG_DEBUG("Processing query component: " << q);
00048
00049 if (q.find("CONCEPT:") == 0 )
00050 AddConcept(q.substr(8));
00051 }
00052 }
00053
00054 bool HasComponents()
00055 {
00056 return mSelectedConcepts.size() > 0 || mSelectedShots.size() > 0;
00057 }
00058
00059 void AddConcept(String conceptname, double amount = 1.0)
00060 {
00061 ILOG_DEBUG( "QueryEngine::AddConcept(" << conceptname << ")..." );
00062 if (mSelectedConcepts.find(conceptname) != mSelectedConcepts.end())
00063 {
00064 ILOG_DEBUG( " already selected. skipping." );
00065 return;
00066 }
00067 mSelectedConcepts[conceptname] = amount;
00068 }
00069
00070 void RemoveConcept(String conceptname)
00071 {
00072 if (mSelectedConcepts.find(conceptname) != mSelectedConcepts.end())
00073 mSelectedConcepts.erase(conceptname);
00074 }
00075
00076 void AddShots(String identifier, std::list<KeyframeResult> keyframes)
00077 {
00078 ILOG_DEBUG("QueryEngine::AddShots(" << identifier << ", " <<
00079 keyframes.size() << " keyframes)..." );
00080 if (mSelectedShots.find(identifier) != mSelectedShots.end())
00081 {
00082 ILOG_DEBUG( " already selected. replacing." );
00083 }
00084
00085 ShotResultList shots = ConvertToShotResult(keyframes);
00086 mSelectedShots[identifier] = shots;
00087 }
00088
00089 void AddShots(String identifier, ShotResultList shots)
00090 {
00091 ILOG_DEBUG("QueryEngine::AddShots(" << identifier << ", " <<
00092 shots.size() << " shots)..." );
00093 if (mSelectedShots.find(identifier) != mSelectedShots.end())
00094 {
00095 ILOG_DEBUG( " already selected. replacing." );
00096 }
00097 mSelectedShots[identifier] = shots;
00098 }
00099
00100 void RemoveShots(String identifier)
00101 {
00102 if (mSelectedShots.find(identifier) != mSelectedShots.end())
00103 mSelectedShots.erase(identifier);
00104 }
00105
00106 void Clear()
00107 {
00108 mSelectedShots.clear();
00109 mSelectedConcepts.clear();
00110 }
00111
00112
00113
00114
00115
00116
00117 void GenerateThreadFromQuery(String threadname)
00118 {
00119 ILOG_SYSTEM("generating thread from query: " << threadname << ":");
00120 std::list<ShotResultList > concepts;
00121 std::map<String, double>::iterator c;
00122 for (c = mSelectedConcepts.begin(); c != mSelectedConcepts.end(); c++)
00123 {
00124 ILOG_SYSTEM("contains concept: "<<c->first<<" ("<<c->second<<")");
00125 ShotResultList concept = ConvertToShotResult(mThreadSet, c->first);
00126 concepts.push_back(concept);
00127 }
00128
00129 std::map<String, ShotResultList >::iterator s;
00130 for (s = mSelectedShots.begin(); s != mSelectedShots.end(); s++)
00131 {
00132 ILOG_DEBUG("contains shots: " << s->first << " (" <<
00133 s->second.size() << " shots)" );
00134
00135 if (mThreadSet->GetThreadByName(s->first))
00136 {
00137 ILOG_DEBUG("textual thread " << s->first <<
00138 " already exists. skipping." );
00139 }
00140 else
00141 mThreadSet->AddThreadShots(s->first, s->second);
00142 concepts.push_back(s->second);
00143 }
00144 ILOG_SYSTEM("end of content list");
00145
00146 if (concepts.size() == 0)
00147 {
00148 ILOG_WARN("QueryEngine::GenerateThreadFromQuery: " <<
00149 "no content to generate. Abort." );
00150 return;
00151 }
00152
00153 ShotResultList merged = MergeResults(concepts);
00154 mThreadSet->AddThreadShots(threadname, merged);
00155 }
00156
00157
00158
00159 bool FilterByLanguage(ShotResult in)
00160 {
00161 if (!mDoLanguageFilter) return true;
00162 int shotID = in.shotid;
00163 int language = GetLanguage(shotID);
00164 if (language == LANGUAGE_ENGLISH && mLanguageAllowEng) return true;
00165 if (language == LANGUAGE_CHINESE && mLanguageAllowChi) return true;
00166 if (language == LANGUAGE_ARABIC && mLanguageAllowArb) return true;
00167 return false;
00168 }
00169
00170 ShotResultList
00171 ProcessList(ShotResultList in, bool (IDashQueryEngine::*f)(ShotResult))
00172 {
00173 ShotResultList out;
00174 for (ShotResultList::iterator i=in.begin();i!=in.end();i++)
00175 if((*this.*f)(*i))
00176 out.push_back(*i);
00177 return out;
00178 }
00179
00180 ShotResultList
00181 MergeResults(std::list<ShotResultList > lists,int mergeMethod=MERGE_DEFAULT)
00182 {
00183 switch (mergeMethod)
00184 {
00185 case MERGE_ROUNDROBIN:
00186 return MergeResultsByRoundRobin(lists);
00187 case MERGE_BORDAFUSION:
00188 return MergeResultsByBordaFusion(lists);
00189 default:
00190 ILOG_ERROR("QueryEngine::MergeResults: invalid merge function: "
00191 << mergeMethod);
00192 return MergeResultsByBordaFusion(lists);
00193 }
00194 }
00195
00196 ShotResultList
00197 MergeResultsByBordaFusion(std::list<ShotResultList > lists)
00198 {
00199 ShotResultList merged;
00200 std::map<int, double> fusionvalue;
00201 std::map<int, double> weights;
00202
00203
00204
00205
00206 int N = 1000;
00207
00208
00209
00210 int listidx = 0;
00211 std::list<ShotResultList >::iterator i;
00212 for (i = lists.begin(); i != lists.end(); i++)
00213 weights[listidx++] = 1.0 / (double)lists.size();
00214
00215
00216
00217 listidx = 0;
00218 for (i = lists.begin(); i != lists.end(); i++) {
00219
00220 double weight = weights[listidx++];
00221 int rank = 0;
00222 for (ShotResultList::iterator j = i->begin(); j != i->end(); j++)
00223 {
00224 int shotid = (*j).shotid;
00225 double weightedbordacc = weight * (double)(N - rank++);
00226 if (fusionvalue.find(shotid) == fusionvalue.end())
00227 fusionvalue[shotid] = weightedbordacc;
00228 else
00229 fusionvalue[shotid] = fusionvalue[shotid] + weightedbordacc;
00230 }
00231 }
00232
00233
00234
00235
00236 ShotWithWeight* sww = new ShotWithWeight(fusionvalue.size());
00237 std::map<int, double>::iterator mapIt;
00238 for (mapIt = fusionvalue.begin(); mapIt != fusionvalue.end(); mapIt++)
00239 sww->Add(mapIt->first, mapIt->second);
00240
00241 double maxscore = sww->Get2(sww->Size() - 1);
00242 int rank = 1;
00243
00244 Table::Sort(sww, 2, true);
00245 for (int i=sww->Size()-1; i>=0; i--)
00246 {
00247 ShotResult t;
00248 t.shotid = sww->Get1(i);
00249 t.score = sww->Get2(i) / maxscore;
00250 t.rank = rank++;
00251 merged.push_back(t);
00252 }
00253 return merged;
00254 }
00255
00256 ShotResultList
00257 MergeResultsByRoundRobin(std::list<ShotResultList > lists)
00258 {
00259 ShotResultList merged;
00260 std::map<int, int> counts;
00261 std::list<int> lengths;
00262 std::list<int> positions;
00263
00264 std::list<ShotResultList::iterator> iterators;
00265
00266
00267 std::list<ShotResultList >::iterator i;
00268 for (i = lists.begin(); i != lists.end(); i++)
00269 {
00270 iterators.push_back(i->begin());
00271 lengths.push_back(i->size());
00272 positions.push_back(0);
00273 }
00274
00275 for (int nr = 0; nr < mMaxResults; nr++)
00276 {
00277 std::list<int>::iterator length = lengths.begin();
00278 std::list<int>::iterator position = positions.begin();
00279 std::list<ShotResultList::iterator>::iterator it;
00280 for (it = iterators.begin(); it != iterators.end(); it++)
00281 {
00282 if (*position < *length)
00283 {
00284 int shotid = (*(*it)).shotid;
00285 if (counts.find(shotid) == counts.end())
00286 {
00287 merged.push_back(*(*it));
00288 counts[shotid] = 1;
00289 }
00290 else
00291 {
00292 counts[shotid] = counts[shotid] + 1;
00293 }
00294 (*it)++;
00295 (*position)++;
00296 }
00297 position++;
00298 length++;
00299 }
00300 }
00301
00302 merged = ProcessFilters(merged);
00303
00304 return merged;
00305 }
00306
00307
00308 ShotResultList ProcessFilters(ShotResultList in)
00309 {
00310 ILOG_DEBUG( "FILTERING: original " << in.size() << " shots." );
00311 in = ProcessList(in, &IDashQueryEngine::FilterByLanguage);
00312 ILOG_DEBUG( "FILTERING: language " << in.size() << " shots." );
00313 return in;
00314 }
00315
00316 void SetLanguageFilter(bool enableEng, bool enableChi, bool enableArb)
00317 {
00318 mLanguageAllowEng = enableEng;
00319 mLanguageAllowArb = enableArb;
00320 mLanguageAllowChi = enableChi;
00321 if (mDoLanguageFilter = !(enableEng && enableArb && enableChi))
00322 {
00323 ILOG_SYSTEM("Language filter enabled. " <<
00324 "English: " << mLanguageAllowEng << ", " <<
00325 "Chinese: " << mLanguageAllowChi << ", " <<
00326 "Arabic: " << mLanguageAllowArb);
00327 }
00328 else
00329 ILOG_SYSTEM( "Language filter disabled." );
00330 }
00331
00332 private:
00333 ShotResultList
00334 ConvertToShotResult(std::list<KeyframeResult> keyframes)
00335 {
00336 ShotResultList shots;
00337 std::list<KeyframeResult>::iterator i;
00338 for (i=keyframes.begin(); i != keyframes.end(); i++)
00339 {
00340 ShotResult r;
00341 int frameID = i->keyframeid;
00342 if (frameID == -1) {
00343 ILOG_WARN("QueryGui::ConvertToShotResult: " <<
00344 "WARNING: erroneous frame in set." );
00345 continue;
00346 }
00347 int shotID = mThreadSet->GetKeyframes()->GetShotId(frameID);
00348 r.shotid = shotID;
00349 r.score = i->score;
00350 r.rank = i->rank;
00351 shots.push_back(r);
00352 }
00353 return shots;
00354 }
00355
00356 ShotResultList
00357 ConvertToShotResult(ThreadSet *threadset, String concept)
00358 {
00359
00360 Thread *t = NULL;
00361 for (int i=0; i < threadset->GetNrThreads(); i++)
00362 {
00363 if (threadset->GetThread(i)->GetName() == String("rank_") + concept)
00364 {
00365 t = threadset->GetThread(i);
00366 break;
00367 }
00368 }
00369
00370
00371 ShotResultList out;
00372 if (t != NULL)
00373 {
00374
00375 int firstShot = t->GetFirstShot();
00376 for (int i=0; i<mMaxResults; i++)
00377 {
00378 ShotResult r;
00379 r.shotid = t->GetShotAt(firstShot, i);
00380 r.rank = i+1;
00381 r.score = 1.0;
00382 out.push_back(r);
00383 }
00384 }
00385 return out;
00386 }
00387
00388 int GetLanguage(int shotID)
00389 {
00390 int videoID = mThreadSet->GetSegmentation()->GetVideoId(shotID);
00391 String videofile =
00392 mThreadSet->GetSegmentation()->GetVideoSet()->GetFile(videoID);
00393
00394 if (videofile.find("_ENG") != String::npos)
00395 return LANGUAGE_ENGLISH;
00396 if (videofile.find("_CHN") != String::npos)
00397 return LANGUAGE_CHINESE;
00398 if (videofile.find("_ARB") != String::npos)
00399 return LANGUAGE_ARABIC;
00400
00401 ILOG_WARN("QueryEngine::GetLanguage(" << shotID <<
00402 "): WARNING: could not determine language of " << videofile );
00403 return -1;
00404 }
00405
00406 void Init(ThreadSet *set)
00407 {
00408 mThreadSet = set;
00409 mDoLanguageFilter = false;
00410 mLanguageAllowEng = mLanguageAllowArb = mLanguageAllowChi = true;
00411 mMaxResults = 2000;
00412 }
00413
00414 int mMaxResults;
00415
00416 bool mDoLanguageFilter;
00417 bool mLanguageAllowEng;
00418 bool mLanguageAllowArb;
00419 bool mLanguageAllowChi;
00420
00421 ThreadSet* mThreadSet;
00422 std::map<String, double> mSelectedConcepts;
00423 std::map<String, ShotResultList > mSelectedShots;
00424
00425
00426 static const int LANGUAGE_ENGLISH = 1;
00427 static const int LANGUAGE_CHINESE = 2;
00428 static const int LANGUAGE_ARABIC = 4;
00429
00430 static const int MERGE_ROUNDROBIN = 1;
00431 static const int MERGE_BORDAFUSION = 2;
00432
00433 static const int MERGE_DEFAULT = MERGE_BORDAFUSION;
00434
00435 ILOG_VAR_DEC;
00436 };
00437
00438 ILOG_VAR_INIT(IDashQueryEngine, Application.IDash);
00439
00440 }
00441 }
00442 }
00443
00444 #endif