00001 #ifndef Impala_Core_Table_SimilarityTableSet_h
00002 #define Impala_Core_Table_SimilarityTableSet_h
00003
00004 #include <vector>
00005 #include "Util/DatabaseReadString.h"
00006 #include "Util/DatabaseWriteString.h"
00007 #include "Basis/NativeTypes.h"
00008 #include "Core/Column/GenerateId.h"
00009 #include "Core/Column/Sort.h"
00010 #include "Core/Column/Find.h"
00011 #include "Core/Table/TableTem.h"
00012 #include "Core/Table/Copy.h"
00013 #include "Core/Table/Read.h"
00014 #include "Core/Table/Write.h"
00015 #include "Core/Table/Append.h"
00016 #include "Core/Table/QuidTable.h"
00017 #include "Core/Table/ScoreTable.h"
00018 #include "Core/Table/Equals.h"
00019 #include "Core/Database/PathCreator.h"
00020 #include "Core/ImageSet/ImageSet.h"
00021 #include "Core/VideoSet/VideoSet.h"
00022
00023 namespace Impala
00024 {
00025 namespace Core
00026 {
00027 namespace Table
00028 {
00029
00030
00037 class SimilarityTableSet
00038 {
00039 public:
00040
00041 typedef TableTem<Column::ColumnTem<Real64> > SimTableType;
00042 typedef TableTem<Column::ColumnTem<Quid> > RankTableType;
00043
00044 SimilarityTableSet(std::vector<String> names, int tableSize)
00045 {
00046 mQuids = new QuidTable(tableSize);
00047 mQuids->SetColName(1, "quid");
00048 for (size_t i=0 ; i<names.size() ; i++)
00049 {
00050 mNames.push_back(names[i]);
00051 SimTableType* sim = new SimTableType(tableSize);
00052 sim->SetColName(1, "similarity");
00053 mSims.push_back(sim);
00054 RankTableType* rank = new RankTableType(tableSize);
00055 rank->SetColName(1, "id");
00056 mRanks.push_back(rank);
00057 }
00058 }
00059
00060 virtual
00061 ~SimilarityTableSet()
00062 {
00063 delete mQuids;
00064 for (size_t i=0 ; i<mSims.size() ; i++)
00065 delete mSims[i];
00066 for (size_t i=0 ; i<mRanks.size() ; i++)
00067 delete mRanks[i];
00068 }
00069
00070 #ifndef REPOSITORY_USED // Here comes the deprecated stuff
00071 static SimilarityTableSet*
00072 MakeFromFile(VideoSet::VideoSet* videoSet, String conceptSet, String model,
00073 String feature)
00074 {
00075 String path = videoSet->GetFilePathSimilarityIndex(conceptSet, model,
00076 feature, "names.txt",
00077 false, false);
00078 if (path.empty())
00079 return 0;
00080 return MakeFromFile(path, videoSet->GetDatabase());
00081 }
00082
00083 static SimilarityTableSet*
00084 MakeFromFile(Database::RawDataSet* dataSet, String walkType, String conceptSet,
00085 String model, String featureString, int fileOrDirId)
00086 {
00087 String path =
00088 dataSet->GetFilePathSimilarityData(walkType, conceptSet, model,
00089 featureString, fileOrDirId,
00090 "names.txt", false, false);
00091 if (path.empty())
00092 return 0;
00093 return MakeFromFile(path, dataSet->GetDatabase());
00094 }
00095
00096
00097 static SimilarityTableSet*
00098 MakeFromFile(ImageSet::ImageSet* is, String conceptSet, String model,
00099 String featureString)
00100 {
00101 String path =
00102 is->GetFilePathSimilarityIndex(conceptSet, model, featureString,
00103 "names.txt", false, false);
00104 if (path.empty())
00105 return 0;
00106 return MakeFromFile(path, is->GetDatabase());
00107 }
00108
00109 static SimilarityTableSet*
00110 MakeFromFile(String path, Util::Database* db)
00111 {
00112 std::vector<String> names;
00113 Util::DatabaseReadString(std::back_inserter(names), path, db, true);
00114 SimilarityTableSet* res = new SimilarityTableSet(names, 0);
00115 path.resize(path.size() - 9);
00116 if (path.size() == 0)
00117 path = "./";
00118 res->Load(path, db);
00119 path.resize(path.size() - 1);
00120 String dir = FileNameTail(path);
00121 res->SetDescription(dir);
00122 return res;
00123 }
00124
00125 static bool
00126 Exists(Database::RawDataSet* dataSet, String walkType, String conceptSet,
00127 String model, String featureString, int fileId)
00128 {
00129 String path =
00130 dataSet->GetFilePathSimilarityData(walkType, conceptSet, model,
00131 featureString, fileId,
00132 "names.txt", false, true);
00133 return !path.empty();
00134 }
00135
00136 static bool
00137 Exists(Database::PathCreator* pathCreator)
00138 {
00139 String path =
00140 pathCreator->GetFilePathSimilarityData("names.txt", false, true);
00141 return !path.empty();
00142 }
00143 #endif // REPOSITORY_USED
00144
00145 String
00146 GetDescription()
00147 {
00148 return mDescription;
00149 }
00150
00151 void
00152 SetDescription(String description)
00153 {
00154 mDescription = description;
00155 }
00156
00157 int
00158 NrTables() const
00159 {
00160 return mNames.size();
00161 }
00162
00163 int
00164 TableSize() const
00165 {
00166 return mQuids->Size();
00167 }
00168
00169 std::vector<String>
00170 GetNames()
00171 {
00172 return mNames;
00173 }
00174
00175 String
00176 GetName(int id) const
00177 {
00178 return mNames[id];
00179 }
00180
00181 int
00182 GetIndex(String name)
00183 {
00184 for (int i=0 ; i<mNames.size() ; i++)
00185 if (mNames[i] == name)
00186 return i;
00187 return -1;
00188 }
00189
00190 void
00191 Rename(int id, String name)
00192 {
00193 mNames[id] = name;
00194 }
00195
00196 int
00197 GetQuidClass() const
00198 {
00199 if (mQuids->Size() < 1)
00200 return QUID_CLASS_UNKNOWN;
00201 return QuidClass(mQuids->Get1(0));
00202 }
00203
00204 QuidTable*
00205 GetQuidTable() const
00206 {
00207 return mQuids;
00208 }
00209
00210 int
00211 FindQuid(Quid quid)
00212 {
00213 return Column::Find(mQuids->GetColumn1(), quid);
00214 }
00215
00216 void
00217 ChangeQuidObject(int newObject)
00218 {
00219 int nrChanged = 0;
00220 for (int i=0 ; i<mQuids->Size() ; i++)
00221 {
00222 Quid q = mQuids->Get1(i);
00223 if (QuidObject(q) != newObject)
00224 {
00225 q = QuidObjectChange(q, newObject);
00226 mQuids->Set1(i, q);
00227 nrChanged++;
00228 }
00229 }
00230 if (nrChanged != 0)
00231 ILOG_INFO("Changed " << nrChanged << " quid objects");
00232 }
00233
00234 SimTableType*
00235 GetSimTable(int id) const
00236 {
00237 return mSims[id];
00238 }
00239
00240 void
00241 SetSimTable(int id, ScoreTable* scores)
00242 {
00243 ILOG_INFO("score table size = " << scores->Size());
00244 if(scores->Size() != mQuids->Size())
00245 {
00246 ILOG_ERROR("SetSimTable: size of quids and ScoreTable do not match");
00247 scores->GetQuidTable()->Dump(0, 0, -1);
00248 mQuids->Dump(0, 0, -1);
00249 exit(1);
00250 }
00251 SimTableType* dst = mSims[id];
00252 dst->SetSize(scores->Size());
00253 Copy(dst->GetColumn1(), scores->GetColumn2(), scores->Size());
00254 }
00255
00256 SimTableType*
00257 GetSimTable(String name) const
00258 {
00259 for (size_t i=0 ; i<mNames.size() ; i++)
00260 {
00261 if (mNames[i] == name)
00262 return mSims[i];
00263 }
00264 return 0;
00265 }
00266
00267 RankTableType*
00268 GetRankTable(int id) const
00269 {
00270 return mRanks[id];
00271 }
00272
00273 RankTableType*
00274 GetRankTable(String name) const
00275 {
00276 for (size_t i=0 ; i<mNames.size() ; i++)
00277 {
00278 if (mNames[i] == name)
00279 return mRanks[i];
00280 }
00281 return 0;
00282 }
00283
00284 void
00285 ComputeRank(int tableIdx, bool descending)
00286 {
00287 SimTableType* simTable = mSims[tableIdx];
00288 int size = simTable->Size();
00289 if (size != mQuids->Size())
00290 ILOG_ERROR("ComputeRank: tables sizes do not match: " << size <<
00291 " " << mQuids->Size());
00292 SimTableType* tmp = new SimTableType(size);
00293 Core::Table::Copy(tmp, simTable);
00294 Core::Table::Copy(mRanks[tableIdx], mQuids);
00295 if (descending)
00296 Column::SortDescending(mRanks[tableIdx]->GetColumn1(),
00297 tmp->GetColumn1(), tmp->Size());
00298 else
00299 Column::SortAscending(mRanks[tableIdx]->GetColumn1(),
00300 tmp->GetColumn1(), tmp->Size());
00301 delete tmp;
00302 }
00303
00304 void
00305 ComputeRanks(bool descending)
00306 {
00307 SimTableType* tmp = new SimTableType(mQuids->Size());
00308 for (int i=0 ; i<NrTables() ; i++)
00309 {
00310 ILOG_DEBUG("Sorting table " << i << " of " << NrTables());
00311 if (mSims[i]->Size() != mQuids->Size())
00312 {
00313 ILOG_ERROR("ComputeRanks: tables sizes do not match: " <<
00314 mSims[i]->Size() << " " << mQuids->Size());
00315 continue;
00316 }
00317 Core::Table::Copy(mRanks[i], mQuids);
00318 Core::Table::Copy(tmp, mSims[i]);
00319 if (descending)
00320 Column::SortDescending(mRanks[i]->GetColumn1(),
00321 tmp->GetColumn1(), tmp->Size());
00322 else
00323 Column::SortAscending(mRanks[i]->GetColumn1(),
00324 tmp->GetColumn1(), tmp->Size());
00325 }
00326 delete tmp;
00327 }
00328
00329 void
00330 ComputeRanksTopN(int topN, bool descending)
00331 {
00332 if (mQuids->Size() < topN)
00333 {
00334 ILOG_ERROR("ComputeRanksTopN: not enough quids for top " << topN);
00335 return;
00336 }
00337 if (mRanks[0]->Capacity() != topN)
00338 {
00339 for (int i=0 ; i<NrTables() ; i++)
00340 {
00341 mRanks[i]->Reserve(topN, false);
00342 mRanks[i]->SetSize(topN);
00343 }
00344 }
00345 Column::ColumnTem<Real64>* tmp = new Column::ColumnTem<Real64>(topN);
00346 for (int i=0 ; i<NrTables() ; i++)
00347 {
00348 if (descending)
00349 Column::SortDescendingTopN(mQuids->GetColumn1(),
00350 mSims[i]->GetColumn1(),
00351 mRanks[i]->GetColumn1(), tmp);
00352 else
00353 Column::SortAscendingTopN(mQuids->GetColumn1(),
00354 mSims[i]->GetColumn1(),
00355 mRanks[i]->GetColumn1(), tmp);
00356 }
00357 delete tmp;
00358 }
00359
00360 void
00361 Append(SimilarityTableSet* simSet)
00362 {
00363 if (NrTables() != simSet->NrTables())
00364 {
00365 ILOG_ERROR("Append: different number of tables");
00366 return;
00367 }
00368 Core::Table::Append(GetQuidTable(), simSet->GetQuidTable());
00369 for (int i=0 ; i<NrTables() ; i++)
00370 {
00371 Core::Table::Append(GetSimTable(i), simSet->GetSimTable(i));
00372 }
00373 }
00374
00375
00376 int
00377 Diff(SimilarityTableSet* arg, bool doDiffRank = false) const
00378 {
00379 if (NrTables() != arg->NrTables())
00380 {
00381 ILOG_ERROR("Diff: NrTables differs: " << NrTables() << " vs "
00382 << arg->NrTables());
00383 return 1;
00384 }
00385 if (TableSize() != arg->TableSize())
00386 {
00387 ILOG_ERROR("Diff: TableSize differs: " << TableSize() << " vs "
00388 << arg->TableSize());
00389 return 1;
00390 }
00391 QuidTable* qTable1 = GetQuidTable();
00392 QuidTable* qTable2 = arg->GetQuidTable();
00393 int qDiff = 0;
00394 for (int i=0 ; i<qTable1->Size() ; i++)
00395 {
00396 if (qTable1->Get1(i) != qTable2->Get1(i))
00397 {
00398 qDiff++;
00399 ILOG_DEBUG("Diff: quid elem " << i << ": "
00400 << qTable1->Get1(i) << " vs " << qTable2->Get1(i));
00401 }
00402 }
00403 if (qDiff > 0)
00404 {
00405 ILOG_ERROR("Diff: " << qDiff << " differences in quid table");
00406 return qDiff;
00407 }
00408
00409 int totalDiff = 0;
00410 for (int t=0 ; t<NrTables() ; t++)
00411 {
00412 int nDiff = 0;
00413 SimTableType* sTable1 = GetSimTable(t);
00414 SimTableType* sTable2 = arg->GetSimTable(t);
00415 for (int i=0 ; i<sTable1->Size() ; i++)
00416 {
00417 if (::fabs(sTable1->Get1(i) - sTable2->Get1(i)) > 0.00001)
00418 {
00419 nDiff++;
00420 ILOG_DEBUG("Diff: sim table " << t << ", elem " << i <<
00421 ": " << sTable1->Get1(i) << " vs " <<
00422 sTable2->Get1(i));
00423 }
00424 }
00425 if (nDiff > 0)
00426 ILOG_ERROR("Diff: " << nDiff << " differences in sim table " <<
00427 GetName(t));
00428 totalDiff += nDiff;
00429
00430 if (!doDiffRank)
00431 continue;
00432
00433 nDiff = 0;
00434 RankTableType* rTable1 = GetRankTable(t);
00435 RankTableType* rTable2 = arg->GetRankTable(t);
00436 for (int i=0 ; i<rTable1->Size() ; i++)
00437 {
00438 if (rTable1->Get1(i) != rTable2->Get1(i))
00439 {
00440 nDiff++;
00441 ILOG_DEBUG("Diff: rank table " << t << ", elem " << i <<
00442 ": " << rTable1->Get1(i) << " vs " <<
00443 rTable2->Get1(i));
00444 }
00445 }
00446 if (nDiff > 0)
00447 ILOG_ERROR("Diff: " << nDiff << " differences in rank table " <<
00448 GetName(t));
00449 totalDiff += nDiff;
00450 }
00451
00452 if (totalDiff > 0)
00453 ILOG_ERROR("Diff: total of " << totalDiff << " differences");
00454 return totalDiff;
00455 }
00456
00457
00458
00459 #ifndef REPOSITORY_USED // Here comes the deprecated stuff
00460 void
00461 Load(CString fileBase, Util::Database* db)
00462 {
00463 LoadQuids(fileBase, db);
00464 LoadSims(fileBase, db);
00465 LoadRanks(fileBase, db);
00466 }
00467
00468 void
00469 LoadQuids(CString fileBase, Util::Database* db)
00470 {
00471 String fName = fileBase + "all_quids.tab";
00472 Core::Table::Read(mQuids, fName, db);
00473 }
00474
00475 void
00476 LoadSim(size_t tableIdx, CString fileBase, Util::Database* db)
00477 {
00478 String fName = fileBase + mNames[tableIdx] + "_sim.tab";
00479 Core::Table::Read(mSims[tableIdx], fName, db);
00480 }
00481
00482 void
00483 LoadSims(CString fileBase, Util::Database* db)
00484 {
00485 for (size_t i=0 ; i<mNames.size() ; i++)
00486 LoadSim(i, fileBase, db);
00487 }
00488
00489 void
00490 LoadRank(size_t tableIdx, CString fileBase, Util::Database* db)
00491 {
00492 String fName = fileBase + mNames[tableIdx] + "_rank.tab";
00493 Core::Table::Read(mRanks[tableIdx], fName, db);
00494 }
00495
00496 void
00497 LoadRanks(CString fileBase, Util::Database* db)
00498 {
00499 for (size_t i=0 ; i<mNames.size() ; i++)
00500 LoadRank(i, fileBase, db);
00501 }
00502
00503 void
00504 Save(int tableIdx, CString fileBase, Util::Database* db, bool binary)
00505 {
00506 SaveQuids(fileBase, db, binary);
00507 SaveSim(tableIdx, fileBase, db, binary);
00508 SaveRank(tableIdx, fileBase, db, binary);
00509 }
00510
00511 void
00512 Save(Database::RawDataSet* dataSet, CString conceptSet, CString model,
00513 CString feature, bool binary)
00514 {
00515 String path = dataSet->GetFilePathSimilarityIndex(conceptSet, model,
00516 feature, "names.txt",
00517 true, false);
00518 if (path.empty())
00519 return;
00520 path.resize(path.size() - 9);
00521 Save(path, dataSet->GetDatabase(), binary);
00522 }
00523
00524 void
00525 Save(Database::RawDataSet* dataSet, String walkType, String conceptSet,
00526 CString model, CString feature, int fileOrDirId, bool binary)
00527 {
00528 String path = dataSet->GetFilePathSimilarityData
00529 (walkType, conceptSet, model, feature, fileOrDirId, "names.txt",
00530 true, false);
00531 if (path.empty())
00532 return;
00533 path.resize(path.size() - 9);
00534 Save(path, dataSet->GetDatabase(), binary);
00535 }
00536
00537 void
00538 Save(Database::PathCreator* pathCreator, bool binary=true)
00539 {
00540 String path = pathCreator->GetFilePathSimilarityData("names.txt", true);
00541 ILOG_DEBUG("simset want's to save at \"" << path << "\"");
00542 if (path.empty())
00543 {
00544 ILOG_ERROR("couldn't write simset");
00545 return;
00546 }
00547 path.resize(path.size() - 9);
00548 Save(path, pathCreator->GetDatabase(), binary);
00549 }
00550
00551 void
00552 Save(CString fileBase, Util::Database* db, bool binary)
00553 {
00554 SaveNames(fileBase, db);
00555 SaveQuids(fileBase, db, binary);
00556 SaveSims(fileBase, db, binary);
00557 SaveRanks(fileBase, db, binary);
00558 }
00559
00560 void
00561 SaveNames(CString fileBase, Util::Database* db)
00562 {
00563 String fName = fileBase + "names.txt";
00564 Util::DatabaseWriteString(fName, db, mNames.begin(), mNames.end());
00565 }
00566
00567 void
00568 SaveQuids(CString fileBase, Util::Database* db, bool binary)
00569 {
00570 String fName = fileBase + "all_quids.tab";
00571 Core::Table::Write(mQuids, fName, db, binary);
00572 }
00573
00574 void
00575 SaveSim(size_t tableIdx, CString fileBase, Util::Database* db, bool binary)
00576 {
00577 String fName = fileBase + mNames[tableIdx] + "_sim.tab";
00578 Core::Table::Write(mSims[tableIdx], fName, db, binary);
00579 }
00580
00581 void
00582 SaveSims(CString fileBase, Util::Database* db, bool binary)
00583 {
00584 for (size_t i=0 ; i<mNames.size() ; i++)
00585 SaveSim(i, fileBase, db, binary);
00586 }
00587
00588 void
00589 SaveRank(size_t tableIdx, CString fileBase, Util::Database* db, bool binary)
00590 {
00591 String fName = fileBase + mNames[tableIdx] + "_rank.tab";
00592 Core::Table::Write(mRanks[tableIdx], fName, db, binary);
00593 }
00594
00595 void
00596 SaveRanks(CString fileBase, Util::Database* db, bool binary)
00597 {
00598 for (size_t i=0 ; i<mNames.size() ; i++)
00599 SaveRank(i, fileBase, db, binary);
00600 }
00601 #endif // REPOSITORY_USED
00602
00603
00604
00605 void
00606 Dump(bool doTable)
00607 {
00608 std::cout << "SimilarityTableSet " << GetDescription() << std::endl;
00609 for (int i=0 ; i<NrTables() ; i++)
00610 {
00611 std::cout << "table " << i << ", name = " << GetName(i) << std::endl;
00612 if (doTable)
00613 GetSimTable(i)->Dump();
00614 }
00615 }
00616
00617 void
00618 DumpRanking(int from = 0, int to = -1)
00619 {
00620 DumpRanking(0, from, to);
00621 }
00622
00623 virtual void
00624 DumpRanking(Database::RawDataSet* set, int from = 0, int to = -1)
00625 {
00626 if (to == -1 || to > TableSize())
00627 to = TableSize();
00628 if (to < from)
00629 to = from;
00630 std::cout << "SimilarityTableSet " << GetDescription() << std::endl;
00631 for (int t=0 ; t<NrTables() ; t++)
00632 {
00633 SimTableType* simTable = GetSimTable(t);
00634 RankTableType* rankTable = GetRankTable(t);
00635 std::cout << "table " << t << ", name = " << GetName(t) << std::endl;
00636 if (simTable->Size() != mQuids->Size())
00637 {
00638 ILOG_ERROR("DumpRanking: simtable size doesn't match");
00639 continue;
00640 }
00641 if (rankTable->Size() != mQuids->Size())
00642 {
00643 ILOG_ERROR("DumpRanking: ranktable size doesn't match");
00644 continue;
00645 }
00646 std::cout << "i, Quid, similarity, rank, sim_at_rank" << std::endl;
00647 for (int i=from ; i<to ; i++)
00648 {
00649 Quid quid = mQuids->Get1(i);
00650 std::cout << i << ", ";
00651 if(set)
00652 std::cout << set->QuidToString(quid, true) << ", ";
00653 else
00654 std::cout << QuidObj(quid) << ", ";
00655 std::cout << simTable->Get1(i) << ", ";
00656 int rank = FindQuid(rankTable->Get1(i));
00657 std::cout << rank << ", " << simTable->Get1(rank) << std::endl;
00658 }
00659 std::cout << std::endl;
00660 }
00661 }
00662
00663 private:
00664
00665 String mDescription;
00666 QuidTable* mQuids;
00667 std::vector<String> mNames;
00668 std::vector<SimTableType*> mSims;
00669 std::vector<RankTableType*> mRanks;
00670
00671 ILOG_VAR_DEC;
00672
00673 };
00674
00675 ILOG_VAR_INIT(SimilarityTableSet, Impala.Core.Table);
00676
00677 }
00678 }
00679 }
00680
00681 #endif