Home || Architecture || Video Search || Visual Search || Scripts || Applications || Important Messages || OGL || Src

SimilarityTableSet.h

Go to the documentation of this file.
00001 #ifndef Impala_Core_Table_SimilarityTableSet_h
00002 #define Impala_Core_Table_SimilarityTableSet_h
00003 
00004 #include <vector>
00005 #include "Util/DatabaseReadString.h"
00006 #include "Util/DatabaseWriteString.h"
00007 #include "Basis/NativeTypes.h"
00008 #include "Core/Column/GenerateId.h"
00009 #include "Core/Column/Sort.h"
00010 #include "Core/Column/Find.h"
00011 #include "Core/Table/TableTem.h"
00012 #include "Core/Table/Copy.h"
00013 #include "Core/Table/Read.h"
00014 #include "Core/Table/Write.h"
00015 #include "Core/Table/Append.h"
00016 #include "Core/Table/QuidTable.h"
00017 #include "Core/Table/ScoreTable.h"
00018 #include "Core/Table/Equals.h"
00019 #include "Core/Database/PathCreator.h"
00020 #include "Core/ImageSet/ImageSet.h"
00021 #include "Core/VideoSet/VideoSet.h"
00022 
00023 namespace Impala
00024 {
00025 namespace Core
00026 {
00027 namespace Table
00028 {
00029 
00030 
00037 class SimilarityTableSet
00038 {
00039 public:
00040 
00041     typedef TableTem<Column::ColumnTem<Real64> > SimTableType;
00042     typedef TableTem<Column::ColumnTem<Quid> > RankTableType;
00043 
00044     SimilarityTableSet(std::vector<String> names, int tableSize)
00045     {
00046         mQuids = new QuidTable(tableSize);
00047         mQuids->SetColName(1, "quid");
00048         for (size_t i=0 ; i<names.size() ; i++)
00049         {
00050             mNames.push_back(names[i]);
00051             SimTableType* sim = new SimTableType(tableSize);
00052             sim->SetColName(1, "similarity");
00053             mSims.push_back(sim);
00054             RankTableType* rank = new RankTableType(tableSize);
00055             rank->SetColName(1, "id");
00056             mRanks.push_back(rank);
00057         }
00058     }
00059 
00060     virtual
00061     ~SimilarityTableSet()
00062     {
00063         delete mQuids;
00064         for (size_t i=0 ; i<mSims.size() ; i++)
00065             delete mSims[i];
00066         for (size_t i=0 ; i<mRanks.size() ; i++)
00067             delete mRanks[i];
00068     }
00069 
00070 #ifndef REPOSITORY_USED // Here comes the deprecated stuff
00071     static SimilarityTableSet*
00072     MakeFromFile(VideoSet::VideoSet* videoSet, String conceptSet, String model,
00073                  String feature)
00074     {
00075         String path = videoSet->GetFilePathSimilarityIndex(conceptSet, model,
00076                                                            feature, "names.txt",
00077                                                            false, false);
00078         if (path.empty())
00079             return 0;
00080         return MakeFromFile(path, videoSet->GetDatabase());
00081     }
00082 
00083     static SimilarityTableSet*
00084     MakeFromFile(Database::RawDataSet* dataSet, String walkType, String conceptSet,
00085                  String model, String featureString, int fileOrDirId)
00086     {
00087         String path =
00088             dataSet->GetFilePathSimilarityData(walkType, conceptSet, model,
00089                                                 featureString, fileOrDirId,
00090                                                 "names.txt", false, false);
00091         if (path.empty())
00092             return 0;
00093         return MakeFromFile(path, dataSet->GetDatabase());
00094     }
00095 
00096 // RvB: Added in support of VOC image sets
00097 static SimilarityTableSet*
00098 MakeFromFile(ImageSet::ImageSet* is, String conceptSet, String model,
00099              String featureString)
00100 {
00101     String path =
00102         is->GetFilePathSimilarityIndex(conceptSet, model, featureString,
00103                                       "names.txt", false, false);
00104     if (path.empty())
00105         return 0;
00106     return MakeFromFile(path, is->GetDatabase());
00107 }
00108 
00109     static SimilarityTableSet*
00110     MakeFromFile(String path, Util::Database* db) // path to "names.txt"
00111     {
00112         std::vector<String> names;
00113         Util::DatabaseReadString(std::back_inserter(names), path, db, true);
00114         SimilarityTableSet* res = new SimilarityTableSet(names, 0);
00115         path.resize(path.size() - 9); // remove "names.txt"
00116         if (path.size() == 0)
00117             path = "./";
00118         res->Load(path, db);
00119         path.resize(path.size() - 1); // remove "/"
00120         String dir = FileNameTail(path);
00121         res->SetDescription(dir);
00122         return res;
00123     }
00124 
00125     static bool
00126     Exists(Database::RawDataSet* dataSet, String walkType, String conceptSet,
00127            String model, String featureString, int fileId)
00128     {
00129         String path =
00130             dataSet->GetFilePathSimilarityData(walkType, conceptSet, model,
00131                                                featureString, fileId,
00132                                                "names.txt", false, true);
00133         return !path.empty();
00134     }
00135 
00136     static bool
00137     Exists(Database::PathCreator* pathCreator)
00138     {
00139         String path = 
00140             pathCreator->GetFilePathSimilarityData("names.txt", false, true);
00141         return !path.empty();
00142     }
00143 #endif // REPOSITORY_USED
00144 
00145     String
00146     GetDescription()
00147     {
00148         return mDescription;
00149     }
00150 
00151     void
00152     SetDescription(String description)
00153     {
00154         mDescription = description;
00155     }
00156 
00157     int
00158     NrTables() const
00159     {
00160         return mNames.size();
00161     }
00162 
00163     int
00164     TableSize() const
00165     {
00166         return mQuids->Size();
00167     }
00168 
00169     std::vector<String>
00170     GetNames()
00171     {
00172         return mNames;
00173     }
00174 
00175     String
00176     GetName(int id) const
00177     {
00178         return mNames[id];
00179     }
00180 
00181     int
00182     GetIndex(String name)
00183     {
00184         for (int i=0 ; i<mNames.size() ; i++)
00185             if (mNames[i] == name)
00186                 return i;
00187         return -1;
00188     }
00189 
00190     void
00191     Rename(int id, String name)
00192     {
00193         mNames[id] = name;
00194     }
00195 
00196     int
00197     GetQuidClass() const
00198     {
00199         if (mQuids->Size() < 1) // cannot tell
00200             return QUID_CLASS_UNKNOWN;
00201         return QuidClass(mQuids->Get1(0));
00202     }
00203 
00204     QuidTable*
00205     GetQuidTable() const
00206     {
00207         return mQuids;
00208     }
00209 
00210     int
00211     FindQuid(Quid quid)
00212     {
00213         return Column::Find(mQuids->GetColumn1(), quid);
00214     }
00215 
00216     void
00217     ChangeQuidObject(int newObject)
00218     {
00219         int nrChanged = 0;
00220         for (int i=0 ; i<mQuids->Size() ; i++)
00221         {
00222             Quid q = mQuids->Get1(i);
00223             if (QuidObject(q) != newObject)
00224             {
00225                 q = QuidObjectChange(q, newObject);
00226                 mQuids->Set1(i, q);
00227                 nrChanged++;
00228             }
00229         }
00230         if (nrChanged != 0)
00231             ILOG_INFO("Changed " << nrChanged << " quid objects");
00232     }
00233 
00234     SimTableType*
00235     GetSimTable(int id) const
00236     {
00237         return mSims[id];
00238     }
00239 
00240     void
00241     SetSimTable(int id, ScoreTable* scores)
00242     {
00243         ILOG_INFO("score table size = " << scores->Size());
00244         if(scores->Size() != mQuids->Size())
00245         {
00246             ILOG_ERROR("SetSimTable: size of quids and ScoreTable do not match");
00247             scores->GetQuidTable()->Dump(0, 0, -1);
00248             mQuids->Dump(0, 0, -1);
00249             exit(1);
00250         }
00251         SimTableType* dst = mSims[id];
00252         dst->SetSize(scores->Size());
00253         Copy(dst->GetColumn1(), scores->GetColumn2(), scores->Size());
00254     }
00255 
00256     SimTableType*
00257     GetSimTable(String name) const
00258     {
00259         for (size_t i=0 ; i<mNames.size() ; i++)
00260         {
00261             if (mNames[i] == name)
00262                 return mSims[i];
00263         }
00264         return 0;
00265     }
00266 
00267     RankTableType*
00268     GetRankTable(int id) const
00269     {
00270         return mRanks[id];
00271     }
00272 
00273     RankTableType*
00274     GetRankTable(String name) const
00275     {
00276         for (size_t i=0 ; i<mNames.size() ; i++)
00277         {
00278             if (mNames[i] == name)
00279                 return mRanks[i];
00280         }
00281         return 0;
00282     }
00283 
00284     void
00285     ComputeRank(int tableIdx, bool descending)
00286     {
00287         SimTableType* simTable = mSims[tableIdx];
00288         int size = simTable->Size();
00289         if (size != mQuids->Size())
00290             ILOG_ERROR("ComputeRank: tables sizes do not match: " << size << 
00291                        " " << mQuids->Size());
00292         SimTableType* tmp = new SimTableType(size);
00293         Core::Table::Copy(tmp, simTable);
00294         Core::Table::Copy(mRanks[tableIdx], mQuids);
00295         if (descending)
00296             Column::SortDescending(mRanks[tableIdx]->GetColumn1(),
00297                                    tmp->GetColumn1(), tmp->Size());
00298         else
00299             Column::SortAscending(mRanks[tableIdx]->GetColumn1(),
00300                                   tmp->GetColumn1(), tmp->Size());
00301         delete tmp;
00302     }
00303 
00304     void
00305     ComputeRanks(bool descending)
00306     {
00307         SimTableType* tmp = new SimTableType(mQuids->Size());
00308         for (int i=0 ; i<NrTables() ; i++)
00309         {
00310             ILOG_DEBUG("Sorting table " << i << " of " << NrTables());
00311             if (mSims[i]->Size() != mQuids->Size())
00312             {
00313                 ILOG_ERROR("ComputeRanks: tables sizes do not match: " <<
00314                            mSims[i]->Size() << " " << mQuids->Size());
00315                 continue;
00316             }
00317             Core::Table::Copy(mRanks[i], mQuids);
00318             Core::Table::Copy(tmp, mSims[i]);
00319             if (descending)
00320                 Column::SortDescending(mRanks[i]->GetColumn1(),
00321                                        tmp->GetColumn1(), tmp->Size());
00322             else
00323                 Column::SortAscending(mRanks[i]->GetColumn1(),
00324                                       tmp->GetColumn1(), tmp->Size());
00325         }
00326         delete tmp;
00327     }
00328 
00329     void
00330     ComputeRanksTopN(int topN, bool descending)
00331     {
00332         if (mQuids->Size() < topN)
00333         {
00334             ILOG_ERROR("ComputeRanksTopN: not enough quids for top " << topN);
00335             return;
00336         }
00337         if (mRanks[0]->Capacity() != topN)
00338         {
00339             for (int i=0 ; i<NrTables() ; i++)
00340             {
00341                 mRanks[i]->Reserve(topN, false);
00342                 mRanks[i]->SetSize(topN); // will be filled below
00343             }
00344         }
00345         Column::ColumnTem<Real64>* tmp = new Column::ColumnTem<Real64>(topN);
00346         for (int i=0 ; i<NrTables() ; i++)
00347         {
00348             if (descending)
00349                 Column::SortDescendingTopN(mQuids->GetColumn1(),
00350                                            mSims[i]->GetColumn1(),
00351                                            mRanks[i]->GetColumn1(), tmp);
00352             else
00353                 Column::SortAscendingTopN(mQuids->GetColumn1(),
00354                                           mSims[i]->GetColumn1(),
00355                                           mRanks[i]->GetColumn1(), tmp);
00356         }
00357         delete tmp;
00358     }
00359 
00360     void
00361     Append(SimilarityTableSet* simSet)
00362     {
00363         if (NrTables() != simSet->NrTables())
00364         {
00365             ILOG_ERROR("Append: different number of tables");
00366             return;
00367         }
00368         Core::Table::Append(GetQuidTable(), simSet->GetQuidTable());
00369         for (int i=0 ; i<NrTables() ; i++)
00370         {
00371             Core::Table::Append(GetSimTable(i), simSet->GetSimTable(i));
00372         }
00373     }
00374 
00375     // Differences in rank may occur due to non-deterministic sorting...
00376     int
00377     Diff(SimilarityTableSet* arg, bool doDiffRank = false) const
00378     {
00379         if (NrTables() != arg->NrTables())
00380         {
00381             ILOG_ERROR("Diff: NrTables differs: " << NrTables() << " vs "
00382                        << arg->NrTables());
00383             return 1;
00384         }
00385         if (TableSize() != arg->TableSize())
00386         {
00387             ILOG_ERROR("Diff: TableSize differs: " << TableSize() << " vs "
00388                        << arg->TableSize());
00389             return 1;
00390         }
00391         QuidTable* qTable1 = GetQuidTable();
00392         QuidTable* qTable2 = arg->GetQuidTable();
00393         int qDiff = 0;
00394         for (int i=0 ; i<qTable1->Size() ; i++)
00395         {
00396             if (qTable1->Get1(i) != qTable2->Get1(i))
00397             {
00398                 qDiff++;
00399                 ILOG_DEBUG("Diff: quid elem " << i << ": "
00400                            << qTable1->Get1(i) << " vs " << qTable2->Get1(i));
00401             }
00402         }
00403         if (qDiff > 0)
00404         {
00405             ILOG_ERROR("Diff: " << qDiff << " differences in quid table");
00406             return qDiff;
00407         }
00408 
00409         int totalDiff = 0;
00410         for (int t=0 ; t<NrTables() ; t++)
00411         {
00412             int nDiff = 0;
00413             SimTableType* sTable1 = GetSimTable(t);
00414             SimTableType* sTable2 = arg->GetSimTable(t);
00415             for (int i=0 ; i<sTable1->Size() ; i++)
00416             {
00417                 if (::fabs(sTable1->Get1(i) - sTable2->Get1(i)) > 0.00001)
00418                 {
00419                     nDiff++;
00420                     ILOG_DEBUG("Diff: sim table " << t << ", elem " << i <<
00421                                ": " << sTable1->Get1(i) << " vs " <<
00422                                sTable2->Get1(i));
00423                 }
00424             }
00425             if (nDiff > 0)
00426                 ILOG_ERROR("Diff: " << nDiff << " differences in sim table " <<
00427                            GetName(t));
00428             totalDiff += nDiff;
00429 
00430             if (!doDiffRank)
00431                 continue;
00432 
00433             nDiff = 0;
00434             RankTableType* rTable1 = GetRankTable(t);
00435             RankTableType* rTable2 = arg->GetRankTable(t);
00436             for (int i=0 ; i<rTable1->Size() ; i++)
00437             {
00438                 if (rTable1->Get1(i) != rTable2->Get1(i))
00439                 {
00440                     nDiff++;
00441                     ILOG_DEBUG("Diff: rank table " << t << ", elem " << i <<
00442                                ": " << rTable1->Get1(i) << " vs " <<
00443                                rTable2->Get1(i));
00444                 }
00445             }
00446             if (nDiff > 0)
00447                 ILOG_ERROR("Diff: " << nDiff << " differences in rank table " <<
00448                            GetName(t));
00449             totalDiff += nDiff;
00450         }
00451 
00452         if (totalDiff > 0)
00453             ILOG_ERROR("Diff: total of " << totalDiff << " differences");
00454         return totalDiff;
00455     }
00456 
00457     // I/O part
00458 
00459 #ifndef REPOSITORY_USED // Here comes the deprecated stuff
00460     void
00461     Load(CString fileBase, Util::Database* db)
00462     {
00463         LoadQuids(fileBase, db);
00464         LoadSims(fileBase, db);
00465         LoadRanks(fileBase, db);
00466     }
00467 
00468     void
00469     LoadQuids(CString fileBase, Util::Database* db)
00470     {
00471         String fName = fileBase + "all_quids.tab";
00472         Core::Table::Read(mQuids, fName, db);
00473     }
00474 
00475     void
00476     LoadSim(size_t tableIdx, CString fileBase, Util::Database* db)
00477     {
00478         String fName = fileBase + mNames[tableIdx] + "_sim.tab";
00479         Core::Table::Read(mSims[tableIdx], fName, db);
00480     }
00481 
00482     void
00483     LoadSims(CString fileBase, Util::Database* db)
00484     {
00485         for (size_t i=0 ; i<mNames.size() ; i++)
00486             LoadSim(i, fileBase, db);
00487     }
00488 
00489     void
00490     LoadRank(size_t tableIdx, CString fileBase, Util::Database* db)
00491     {
00492         String fName = fileBase + mNames[tableIdx] + "_rank.tab";
00493         Core::Table::Read(mRanks[tableIdx], fName, db);
00494     }
00495 
00496     void
00497     LoadRanks(CString fileBase, Util::Database* db)
00498     {
00499         for (size_t i=0 ; i<mNames.size() ; i++)
00500             LoadRank(i, fileBase, db);
00501     }
00502 
00503     void
00504     Save(int tableIdx, CString fileBase, Util::Database* db, bool binary)
00505     {
00506         SaveQuids(fileBase, db, binary);
00507         SaveSim(tableIdx, fileBase, db, binary);
00508         SaveRank(tableIdx, fileBase, db, binary);
00509     }
00510 
00511     void
00512     Save(Database::RawDataSet* dataSet, CString conceptSet, CString model,
00513          CString feature, bool binary)
00514     {
00515         String path = dataSet->GetFilePathSimilarityIndex(conceptSet, model,
00516                                                           feature, "names.txt",
00517                                                           true, false);
00518         if (path.empty())
00519             return;
00520         path.resize(path.size() - 9); // remove "names.txt"
00521         Save(path, dataSet->GetDatabase(), binary);
00522     }
00523 
00524     void
00525     Save(Database::RawDataSet* dataSet, String walkType, String conceptSet,
00526          CString model, CString feature, int fileOrDirId, bool binary)
00527     {
00528         String path = dataSet->GetFilePathSimilarityData
00529             (walkType, conceptSet, model, feature, fileOrDirId, "names.txt",
00530              true, false);
00531         if (path.empty())
00532             return;
00533         path.resize(path.size() - 9); // remove "names.txt"
00534         Save(path, dataSet->GetDatabase(), binary);
00535     }
00536 
00537     void
00538     Save(Database::PathCreator* pathCreator, bool binary=true)
00539     {
00540         String path = pathCreator->GetFilePathSimilarityData("names.txt", true);
00541         ILOG_DEBUG("simset want's to save at \"" << path << "\"");
00542         if (path.empty())
00543         {
00544             ILOG_ERROR("couldn't write simset");
00545             return;
00546         }
00547         path.resize(path.size() - 9); // remove "names.txt"
00548         Save(path, pathCreator->GetDatabase(), binary);
00549     }
00550 
00551     void
00552     Save(CString fileBase, Util::Database* db, bool binary)
00553     {
00554         SaveNames(fileBase, db);
00555         SaveQuids(fileBase, db, binary);
00556         SaveSims(fileBase, db, binary);
00557         SaveRanks(fileBase, db, binary);
00558     }
00559 
00560     void
00561     SaveNames(CString fileBase, Util::Database* db)
00562     {
00563         String fName = fileBase + "names.txt";
00564         Util::DatabaseWriteString(fName, db, mNames.begin(), mNames.end());
00565     }
00566 
00567     void
00568     SaveQuids(CString fileBase, Util::Database* db, bool binary)
00569     {
00570         String fName = fileBase + "all_quids.tab";
00571         Core::Table::Write(mQuids, fName, db, binary);
00572     }
00573 
00574     void
00575     SaveSim(size_t tableIdx, CString fileBase, Util::Database* db, bool binary)
00576     {
00577         String fName = fileBase + mNames[tableIdx] + "_sim.tab";
00578         Core::Table::Write(mSims[tableIdx], fName, db, binary);
00579     }
00580 
00581     void
00582     SaveSims(CString fileBase, Util::Database* db, bool binary)
00583     {
00584         for (size_t i=0 ; i<mNames.size() ; i++)
00585             SaveSim(i, fileBase, db, binary);
00586     }
00587 
00588     void
00589     SaveRank(size_t tableIdx, CString fileBase, Util::Database* db, bool binary)
00590     {
00591         String fName = fileBase + mNames[tableIdx] + "_rank.tab";
00592         Core::Table::Write(mRanks[tableIdx], fName, db, binary);
00593     }
00594 
00595     void
00596     SaveRanks(CString fileBase, Util::Database* db, bool binary)
00597     {
00598         for (size_t i=0 ; i<mNames.size() ; i++)
00599             SaveRank(i, fileBase, db, binary);
00600     }
00601 #endif // REPOSITORY_USED
00602 
00603     // misc
00604 
00605     void
00606     Dump(bool doTable)
00607     {
00608         std::cout << "SimilarityTableSet " << GetDescription() << std::endl;
00609         for (int i=0 ; i<NrTables() ; i++)
00610         {
00611             std::cout << "table " << i << ", name = " << GetName(i) << std::endl;
00612             if (doTable)
00613                 GetSimTable(i)->Dump();
00614         }
00615     }
00616 
00617     void
00618     DumpRanking(int from = 0, int to = -1)
00619     {
00620         DumpRanking(0, from, to);
00621     }
00622 
00623     virtual void
00624     DumpRanking(Database::RawDataSet* set, int from = 0, int to = -1)
00625     {
00626         if (to == -1 || to > TableSize())
00627             to = TableSize();
00628         if (to < from)
00629             to = from;
00630         std::cout << "SimilarityTableSet " << GetDescription() << std::endl;
00631         for (int t=0 ; t<NrTables() ; t++)
00632         {
00633             SimTableType* simTable = GetSimTable(t);
00634             RankTableType* rankTable = GetRankTable(t);
00635             std::cout << "table " << t << ", name = " << GetName(t) << std::endl;
00636             if (simTable->Size() != mQuids->Size())
00637             {
00638                 ILOG_ERROR("DumpRanking: simtable size doesn't match");
00639                 continue;
00640             }
00641             if (rankTable->Size() != mQuids->Size())
00642             {
00643                 ILOG_ERROR("DumpRanking: ranktable size doesn't match");
00644                 continue;
00645             }
00646             std::cout << "i, Quid, similarity, rank, sim_at_rank" << std::endl;
00647             for (int i=from ; i<to ; i++)
00648             {
00649                 Quid quid = mQuids->Get1(i);
00650                 std::cout << i << ", ";
00651                 if(set)
00652                     std::cout << set->QuidToString(quid, true) << ", ";
00653                 else
00654                     std::cout << QuidObj(quid) << ", ";
00655                 std::cout << simTable->Get1(i) << ", ";
00656                 int rank = FindQuid(rankTable->Get1(i));
00657                 std::cout << rank << ", " << simTable->Get1(rank) << std::endl;
00658             }
00659             std::cout << std::endl;
00660         }
00661     }
00662 
00663 private:
00664 
00665     String                      mDescription;
00666     QuidTable*                  mQuids;
00667     std::vector<String>         mNames;
00668     std::vector<SimTableType*>  mSims;
00669     std::vector<RankTableType*> mRanks;
00670 
00671     ILOG_VAR_DEC;
00672 
00673 };
00674 
00675 ILOG_VAR_INIT(SimilarityTableSet, Impala.Core.Table);
00676 
00677 } // namespace Table
00678 } // namespace Core
00679 } // namespace Impala
00680 
00681 #endif

Generated on Fri Mar 19 09:31:20 2010 for ImpalaSrc by  doxygen 1.5.1