Home || Visual Search || Applications || Architecture || Important Messages || OGL || Src

ConfusionMatrix.h

Go to the documentation of this file.
00001 #ifndef Impala_Core_Table_ConfusionMatrix_h
00002 #define Impala_Core_Table_ConfusionMatrix_h
00003 
00004 #include "Core/Table/Bayes.h"
00005 
00006 namespace Impala
00007 {
00008 namespace Core
00009 {
00010 namespace Table
00011 {
00012 
00013 
00017 class ConfusionMatrix : public Bayes
00018 {
00019 public:
00020 
00021     ConfusionMatrix(String setName, String conceptSet, String modelName, String featureName,
00022                                        String conceptPath, String quid, int nStart,int nEnd): 
00023          Bayes("", setName, conceptSet, conceptSet, quid, nStart, nEnd)
00024     {
00025 
00026         mSetName = setName;
00027         mModelName = modelName;
00028         mFeatureName = featureName;
00029 
00030         mConceptPath = conceptPath;
00031 
00032         // load annotation table set
00033         if (mIsImageSet)
00034         {
00035             mAnnoImgSet = Core::ImageSet::MakeImageSet(setName);
00036             mAnnoTabSet = Core::Table::AnnotationTableSet::MakeFromFile(mAnnoImgSet, mConceptSet, true,
00037                                                                            StringToQuidClass(mQuid));
00038         }
00039         else
00040         {
00041             mAnnoVidSet = Core::VideoSet::MakeVideoSet(setName);
00042             mAnnoTabSet = Core::Table::AnnotationTableSet::MakeFromFile(mAnnoVidSet, mConceptSet, true,
00043                                                                            StringToQuidClass(mQuid));            
00044         }
00045             
00046         if (mAnnoTabSet == NULL)
00047         {
00048             printf("ERROR: The Concept Set does NOT exist.\n");
00049             return;
00050         }
00051 
00052         mConceptNum = mAnnoTabSet->Size();
00053         if (mConceptNum<0)
00054         {
00055             printf("ERROR: The number of conceptis Zero.\n");
00056             return;
00057         }
00058 
00059         tabConcept = new AnnotationTable* [mConceptNum];
00060     
00061         // Note: mVidSet/mImgSet has been initialized in Bayes class   
00062         if (mIsImageSet)
00063         {
00064             //mGenreSimSet = Core::Table::SimilarityTableSet::MakeFromFile(genrePath, mImgSet->GetDatabase());
00065             mConceptSimSet = Core::Table::SimilarityTableSet::MakeFromFile(mConceptPath, mImgSet->GetDatabase());
00066         }
00067         else
00068         {
00069             mKeyframes = new Core::VideoSet::Keyframes(mVidSet, "keyframes"); 
00070 
00071             //mGenreSimSet = Core::Table::SimilarityTableSet::MakeFromFile(genrePath, mVidSet->GetDatabase());
00072             mConceptSimSet = Core::Table::SimilarityTableSet::MakeFromFile(mConceptPath, mVidSet->GetDatabase());
00073         }
00074             
00075 
00076         conceptSimTable = new SimilarityTableSet::SimTableType* [mConceptNum];
00077 
00078         mQuidTable = mConceptSimSet->GetQuidTable();
00079         mTableSize = mQuidTable->Size();
00080 
00081         /*Core::Table::QuidTable* conceptQuidTable = mConceptSimSet->GetQuidTable();
00082         int conceptTableSize = conceptQuidTable->Size();
00083 
00084         //_ASSERT(mTableSize == conceptTableSize);
00085         if (mTableSize != conceptTableSize)
00086         {
00087             printf("ERROR: Quid Table is NOT the same for concept and genre.\n");
00088             return;
00089         }*/
00090 
00091 
00092         if ((mEnd == -1) || (mEnd > mQuidTable->Size()))
00093             mEnd = mQuidTable->Size();
00094 
00095         //mWriteTable = bWriteTable;
00096 
00097         //genreSimTable = NULL;
00098         //conceptSimTable = NULL;
00099 
00100         AllocateMatrix(mConfusionMatrix, mConceptNum, mConceptNum);
00101         ResetMatrix(mConfusionMatrix, mConceptNum, mConceptNum);
00102     }
00103 
00104     virtual ~ConfusionMatrix()
00105     {
00106         //if (NULL != genreSimTable)
00107         //    delete []genreSimTable;
00108         if (NULL != conceptSimTable)
00109             delete []conceptSimTable;
00110 
00111         //delete mGenreSimSet;
00112         delete mConceptSimSet;
00113 
00114         delete mKeyframes;
00115 
00116         ReleaseMatrix(mConfusionMatrix, mConceptNum);
00117     }
00118 
00119     void
00120     DoConfusionMatrix()
00121     {
00122         std::cout << std::endl;
00123         std::cout << "DumpConfusionMatrix() - my own tool" << std::endl;
00124         std::cout << std::endl;
00125 
00126         // load annotation and similarity tables
00127         for (int c=0 ; c< mConceptNum ; c++)
00128         {
00129             tabConcept[c] = mAnnoTabSet->GetTable(c);
00130 
00131             std::cout << "table " << c << ", name = " << mConceptSimSet->GetName(c) << std::endl;
00132             conceptSimTable[c] = mConceptSimSet->GetSimTable(c);
00133         }
00134 
00135         // get the mean of each concept list
00136         Real64* pMean = new Real64[mConceptNum];
00137         memset(pMean,0,sizeof(Real64)*mConceptNum);
00138         for (int c=0 ; c<mConceptNum ; c++)
00139         {
00140             Real64 fSum = 0;
00141             for (int i=0 ; i<mTableSize ; i++)
00142             {
00143                 Quid quid = mQuidTable->Get1(i);                    
00144                 Real64 sim = conceptSimTable[c]->Get1(i);
00145                 fSum += sim;
00146             }
00147             pMean[c] = fSum/mTableSize;
00148         }
00149 
00150         // get the variance of each concept list
00151         Real64* pStd = new Real64[mConceptNum];
00152         memset(pStd,0,sizeof(Real64)*mConceptNum);
00153         for (int c=0 ; c<mConceptNum ; c++)
00154         {
00155             Real64 fSum = 0;
00156             for (int i=0 ; i<mTableSize ; i++)
00157             {
00158                 Quid quid = mQuidTable->Get1(i);                    
00159                 Real64 sim = conceptSimTable[c]->Get1(i);
00160                 fSum += pow(sim - pMean[c],2);
00161             }
00162             pStd[c] = sqrt(fSum/(mTableSize-1));
00163         }
00164 
00165         // dump the old data distribution
00166         std::cout << std::endl;
00167         std::cout << "Before gaussian normalization:" << std::endl;
00168         for (int c=0; c<mConceptNum; c++ )
00169         {
00170             printf("  (mean,std) = (%.10f, %.10f)\n", pMean[c], pStd[c]);
00171         }
00172 
00173         // do the gaussian normalization of the similarity table
00174         for (int c=0 ; c<mConceptNum ; c++)
00175         {
00176             Real64 fSum = 0;
00177             for (int i=0 ; i<mTableSize ; i++)
00178             {
00179                 Quid quid = mQuidTable->Get1(i);                    
00180                 Real64 old_score = conceptSimTable[c]->Get1(i);
00181                 Real64 new_score = (old_score - pMean[c])/pStd[c];
00182 
00183                 // set the normalized value as the new score
00184                 conceptSimTable[c]->Set1(i,new_score);
00185             }   
00186         }
00187 
00188         // check the updated data distribution
00189         memset(pMean,0,sizeof(Real64)*mConceptNum);
00190         memset(pStd,0,sizeof(Real64)*mConceptNum);
00191         for (int c=0 ; c<mConceptNum ; c++)
00192         {
00193             Real64 fSum = 0;
00194             for (int i=0 ; i<mTableSize ; i++)
00195             {
00196                 Quid quid = mQuidTable->Get1(i);                    
00197                 Real64 sim = conceptSimTable[c]->Get1(i);
00198                 fSum += sim;
00199             }
00200             pMean[c] = fSum/mTableSize;
00201         }
00202 
00203         // get the variance of each concept list
00204         for (int c=0 ; c<mConceptNum ; c++)
00205         {
00206             Real64 fSum = 0;
00207             for (int i=0 ; i<mTableSize ; i++)
00208             {
00209                 Quid quid = mQuidTable->Get1(i);                    
00210                 Real64 sim = conceptSimTable[c]->Get1(i);
00211                 fSum += pow(sim - pMean[c],2);
00212             }
00213             pStd[c] = sqrt(fSum/(mTableSize-1));
00214         }
00215 
00216         // dump the normalized data distribution
00217         std::cout << "After gaussian normalization:" << std::endl;
00218         for (int c=0; c<mConceptNum; c++ )
00219         {
00220             printf("  (mean,std) = (%.10f, %.10f)\n", pMean[c], pStd[c]);
00221         }
00222 
00223         delete []pMean;
00224         delete []pStd;
00225 
00226         // loop for all the key-frames in the table
00227         int nDupLabels = 0;
00228         for (int i=0 ; i<mTableSize ; i++)
00229         {
00230             Quid quid = mQuidTable->Get1(i);
00231 
00232             // for keyframe i: find the highest score among all concepts
00233             Real64 fMaxValue = -1;
00234             int nDetect = -1;
00235 
00236             for (int c=0 ; c<mConceptNum ; c++)
00237             {
00238                 //String concept = mConceptSimSet->GetName(c);
00239                 //std::cout << concept << ", ";
00240                 Real64 sim = conceptSimTable[c]->Get1(i);
00241                 if (sim>fMaxValue)
00242                 {
00243                     fMaxValue = sim;
00244                     nDetect = c;
00245                 }
00246             }
00247 
00248             // for keyframe i: find the ground truth that concept keyframe i belongs to
00249             int nTruth = -1;
00250             int count = 0;
00251             for (int c=0 ; c<mConceptNum ; c++)
00252             {
00253                 //String concept = mConceptSimSet->GetName(c);
00254                 //std::cout << concept << ", ";
00255                 if ( tabConcept[c]->IsPositive(i) )
00256                 {
00257                     nTruth = c;
00258                     count ++;
00259                 }
00260             }
00261             // be sure that only one unique label
00262 
00263             if (count == 0) // nTruth == -1
00264             {
00265                 // no ground truth for current key-frame
00266                 continue;
00267             }
00268             else if (count > 1)
00269             {
00270                 //std::cout << "Warning: there are " << count << " labels for shot " << i << std::endl;
00271                 nDupLabels++;
00272             }
00273 
00274             int nCorrect = 0;
00275             int nError = 0;
00276 
00277             // mConfusionMatrix[i=nTruth][j=nDetect]:
00278             // 1) A row (fixed i) stores all the predicted results for concept i
00279             // 2) A column (fixed j) stores all the ground truth of concept j
00280             // 3) the sum of a column j should be the total number of positive samples for concept j (before normalization)
00281             if (nDetect == nTruth)
00282             {
00283                 mConfusionMatrix[nTruth][nTruth] += 1.0;
00284                 nCorrect ++;
00285                                     
00286             }
00287             else
00288             {
00289                 // re-corrected order for row/column
00290                 mConfusionMatrix[nTruth][nDetect] += 1.0;
00291                 nError ++;
00292             }
00293         }
00294 
00295         std::cout << "Duplicated Labels: " << nDupLabels << std::endl;
00296 
00297         DumpMatrix("Confusion Matrix", mConfusionMatrix, mConceptNum, mConceptNum, false);
00298 
00299         String outfileInt = "./ConfusionMatrix/" + mSetName + "_" + mConceptSet + "_" + mFeatureName + "_int.txt";
00300         SaveConfigurationMatrix(outfileInt.c_str(), false);
00301 
00302         // normalizations: sum of a column y (fixed x) is 1
00303         // mConfusionMatrix[x=nTruth][y=nDetect]:
00304         for (int x=0 ; x<mConceptNum; x++)
00305         {
00306             Real64 sum = 0;
00307             for (int y=0 ; y<mConceptNum ; y++)
00308                 sum += mConfusionMatrix[x][y];
00309 
00310             if (sum < 0) continue;
00311 
00312             for (int y=0 ; y<mConceptNum ; y++)
00313                 mConfusionMatrix[x][y] /= sum;
00314         }
00315 
00316         printf("The sum of each column (actual class)\n");
00317         for (int x=0 ; x<mConceptNum; x++)
00318         {
00319             Real64 sum = 0;
00320             for (int y=0 ; y<mConceptNum ; y++)
00321                 sum += mConfusionMatrix[x][y];
00322 
00323             printf("%.5f ", sum);
00324         }
00325         printf("\n");
00326 
00327         DumpMatrix("Confusion Matrix", mConfusionMatrix, mConceptNum, mConceptNum);
00328 
00329         String outfile = "./ConfusionMatrix/" + mSetName + "_" + mConceptSet + "_" + mFeatureName + "_3f.txt";
00330         SaveConfigurationMatrix(outfile.c_str());
00331 
00332         int x=0;
00333     }
00334 
00335     void
00336     // dump simiarity table and annotation table into two matrix for CAMP plot (matlab tool by Jasper)
00337     DumpConfusionMatrixCAMP(bool bSorted = false)
00338     {
00339         std::cout << "DumpConfusionMatrixCAMP() - for CAMP tool by Jasper" << std::endl;
00340 
00341         // load annotation and similarity tables
00342         for (int c=0 ; c< mConceptNum ; c++)
00343         {
00344             tabConcept[c] = mAnnoTabSet->GetTable(c);
00345 
00346             std::cout << "sim table " << c << ", name = " << mConceptSimSet->GetName(c) << std::endl;
00347             conceptSimTable[c] = mConceptSimSet->GetSimTable(c);
00348         }
00349 
00350         // we assume skipping is concept-independent.
00351         Impala::Core::Table::AnnotationTable* truth = tabConcept[0];
00352         int nTotalShots = truth->GetNrPositive() + truth->GetNrNegative();
00353 
00354 
00355         // dump similarity table for all the concepts, into a matrix
00356         String fileSimTable = "./ConfusionMatrix/" + mSetName + "_" + mConceptSet + "_" + mFeatureName + "_sim_table.txt";
00357         FILE* fpSim = fopen(fileSimTable.c_str(), "wt");
00358         if (NULL == fpSim)
00359         {
00360             std::cout << "File path deos NOT exist: " << fileSimTable << std::endl;
00361             return;
00362         } 
00363         fprintf(fpSim,"%d\t%d\n",nTotalShots,mConceptNum); 
00364 
00365         // dump annotation table for all the concepts, into a matrix
00366         String fileAnnoTable = "./ConfusionMatrix/" + mSetName + "_" + mConceptSet + "_" + mFeatureName + "_anno_table.txt";
00367         FILE* fpAnno = fopen(fileAnnoTable.c_str(), "wt");
00368         if (NULL == fpAnno)
00369         {
00370             std::cout << "File path deos NOT exist: " << fileAnnoTable << std::endl;
00371             return;
00372         } 
00373         fprintf(fpAnno,"%d\t%d\n",nTotalShots,mConceptNum); 
00374 
00375         std::cout << "start to merge sim and anno tables into two single files ..." << std::endl;
00376         for (int i=0 ; i<mTableSize ; i++)
00377         {
00378             // only for un-sorted table
00379             Quid quid = mQuidTable->Get1(i);
00380 
00381             bool bIsSkip = false;
00382             for (int c=0 ; c<mConceptNum ; c++)
00383             {
00384                 Impala::Core::Table::SimilarityTableSet::RankTableType* rank = mConceptSimSet->GetRankTable(c);
00385                 Impala::Core::Table::AnnotationTable* groundTruth = tabConcept[c];
00386 
00387                 Quid q;
00388                 int curPos = -1;
00389                 if (bSorted)
00390                 {
00391                     // only for similarity-based sorted table
00392                     q = rank->Get1(i);
00393                     curPos = Column::Find(mQuidTable->GetColumn1(), q);
00394                     if (curPos == mTableSize)
00395                     {
00396                         // if returned index is equal to table size, it mean that quid is not found.
00397                         std::cout << "Can NOT find the Quid: " << QuidObj(q) << std::endl;
00398                         continue;
00399                     }
00400                 }
00401                 else
00402                 {
00403                     //for un-sorted table
00404                     q = quid;
00405                     curPos = i;
00406                 }
00407 
00408                 // 1) dump similarity table for the concept c, into a matrix
00409                 if (!groundTruth->IsSkip(q))
00410                 {
00411                     // skipped shots are all ignored.
00412                     // only saving positive or negative samples
00413                     Real64 prob = conceptSimTable[c]->Get1(curPos);
00414                     fprintf(fpSim,"%f\t", prob);
00415                 }
00416                 else
00417                 {
00418                     // we assume skipping is concept-independent.
00419                     bIsSkip = true;
00420                     
00421                     break;
00422                 }
00423 
00424                 // 2) dump annotation table for the concept c, into a matrix
00425                 if ( groundTruth->IsPositive(q) )
00426                     fprintf(fpAnno,"%d ", 1);
00427                 else // ( groundTruth->IsNegative(q) )
00428                     fprintf(fpAnno,"%d ", 0);
00429                 //else //( groundTruth->IsSkip(q) )
00430                     //fprintf(fpAnno,"%d ", -1);
00431                 
00432             }
00433              
00434             if ( !bIsSkip)
00435             {
00436                 // no blank line for skipped shots.
00437                 fprintf(fpSim,"\n");
00438                 fprintf(fpAnno,"\n");
00439             }
00440 
00441             if (i%20000 == 0 && i>0)
00442             {
00443                 std::cout << " both saved: " << i << " lines." << std::endl;
00444             }
00445             
00446         }
00447         fclose(fpSim);
00448         fclose(fpAnno);
00449 
00450         std::cout << "Done. [*_sim_table.txt;*_anno_table.txt]" << std::endl;
00451 
00452         int x=0;
00453     }
00454 
00455     void
00456     DumpConceptSimilarityTableSet()
00457     {
00458         std::cout << "SimilarityTableSet " << mConceptSimSet->GetDescription() << std::endl << std::endl;
00459 
00460         //for (int k=0 ; k<mConceptSimSet->NrTables() ; k++)
00461         //_ASSERT(mConceptNum == mConceptSimSet->NrTables());
00462         for (int k=0 ; k<mConceptNum ; k++)
00463         {
00464             //SimilarityTableSet::SimTableType* conceptSimTable = mConceptSimSet->GetSimTable(k);
00465             conceptSimTable[k] = mConceptSimSet->GetSimTable(k);
00466 
00467             std::cout << "table " << k << ", name = " << mConceptSimSet->GetName(k)
00468                       << std::endl;
00469             if (conceptSimTable[k]->Size() != mQuidTable->Size())
00470             {
00471                 //ILOG_ERROR("DumpRanking: simtable size doesn't match");
00472                 printf("DumpRanking: simtable size doesn't match\n");
00473                 continue;
00474             }
00475             String concept = mConceptSimSet->GetName(k);
00476             for (int i=mStart ; i<mEnd ; i++)
00477             {
00478                 Quid quid = mQuidTable->Get1(i);
00479                 //std::cout << i << ", ";
00480                 std::cout << concept << ", ";
00481 
00482                 if (!mIsImageSet)
00483                 {
00484                     int keyId = mKeyframes->GetFrameId(quid);
00485                     if (keyId != -1)
00486                         std::cout << FileNameBase(mKeyframes->GetName(keyId)) << ", ";
00487                     else
00488                         std::cout << mVidSet->QuidToString(quid, true) << ", ";
00489                 }
00490                 std::cout << conceptSimTable[k]->Get1(i) << std::endl;
00491             }
00492             std::cout << std::endl;
00493         }
00494     }
00495 
00496     void
00497     SaveConfigurationMatrix(const char* file, bool bDumpFloat = true)
00498     {
00499         if (bDumpFloat)
00500             DumpMatrix("mConfusionMatrix", mConfusionMatrix, mConceptNum, mConceptNum, file);
00501         else
00502             DumpMatrix("mConfusionMatrix", mConfusionMatrix, mConceptNum, mConceptNum, file, false);
00503     }
00504 
00505 protected:
00506 
00507     //SimilarityTableSet* mGenreSimSet;
00508     SimilarityTableSet* mConceptSimSet;
00509 
00510     //SimilarityTableSet::SimTableType** genreSimTable;
00511     SimilarityTableSet::SimTableType** conceptSimTable;
00512 
00513     Core::Table::QuidTable* mQuidTable;  // only for similiarty table set
00514 
00515     String mConceptPath;
00516 
00517     Core::VideoSet::Keyframes* mKeyframes;
00518 
00519     AnnotationTableSet* mAnnoTabSet;
00520 
00521     //bool mWriteTable;
00522 
00523     double** mConfusionMatrix;      // conditional probability P(c_{k}|g_{j})
00524 
00525     String mModelName;
00526     String mFeatureName;
00527 
00528 };
00529 
00530 } // namespace Table
00531 } // namespace Core
00532 } // namespace Impala
00533 
00534 #endif

Generated on Thu Jan 13 09:04:37 2011 for ImpalaSrc by  doxygen 1.5.1