Home || Architecture || Video Search || Visual Search || Scripts || Applications || Important Messages || OGL || Src

Fisher.h

Go to the documentation of this file.
00001 #ifndef Impala_Core_Training_Fisher_h
00002 #define Impala_Core_Training_Fisher_h
00003 
00004 #include "Core/Training/Classifier.h"
00005 #include "Core/Training/Utils.h"
00006 #include "Core/Table/Sort.h"
00007 
00008 #include "Core/Array/Set.h"
00009 #include "Core/Array/SetVal.h"
00010 #include "Core/Array/Array2dTem.h"
00011 #include "Core/Array/Arrays.h"
00012 #include "Core/Array/AddVal.h"
00013 #include "Core/Array/MulVal.h"
00014 #include "Core/Array/EqualsVal.h"
00015 #include "Core/Array/PrintData.h"
00016 #include "Core/Array/PixStat.h"
00017 
00018 #include "Core/Matrix/MatTranspose.h"
00019 #include "Core/Matrix/MatSVD.h"
00020 #include "Core/Matrix/MatPInv.h"
00021 #include "Core/Matrix/MatMul.h"
00022 #include "Core/Matrix/MatCopy.h"
00023 #include "Core/Matrix/MatReplicateMatrix.h"
00024 
00025 #include "Core/Vector/VectorTem.h"
00026 #include "Core/Vector/VectorSet.h"
00027 #include "Core/Vector/Avg.h"
00028 #include "Core/Vector/AddAssign.h"
00029 #include "Core/Vector/MulAssign.h"
00030 
00031 #include <iomanip>
00032 
00033 
00034 namespace Impala
00035 {
00036 namespace Core
00037 {
00038 namespace Training
00039 {
00040 
00043 class Fisher : public Classifier
00044 {
00045 public:
00046     Fisher()
00047     {
00048         mTrainLabels=0;
00049         mTrainFeatures=0;
00050         mTestLabels=0;
00051         mTestFeatures=0;
00052         mRotation=0;
00053         mOffset=0;
00054         mMapped=0;
00055         mProbabilities=0;
00056         mCount=0;
00057     }
00058 
00059     ~Fisher()
00060     {
00061         delete mTrainLabels;
00062         delete mTrainFeatures;
00063         delete mTestLabels;
00064         delete mTestFeatures;
00065         delete mRotation;
00066         delete mOffset;
00067         delete mMapped;
00068         delete mProbabilities;
00069     }
00070 
00071     virtual void SetTrainSet(Feature::FeatureTable* data,
00072                              Table::AnnotationTable* annotation,
00073                              Table::QuidTable* mask)
00074     {
00075         ILOG_INFO("SetTrainSet");
00076         int featureLength = data->GetFeatureVectorLength();
00077         int sampleLength  = data->Size();
00078         ILOG_INFO(sampleLength<<" samples of "<<featureLength<<" features");
00079         
00080         if(mTrainFeatures)
00081             delete mTrainFeatures;
00082         if(mTrainLabels)
00083             delete mTrainLabels;
00084 
00085         mTrainLabels=0;
00086         mTrainFeatures=0;
00087 
00088         mTrainLabels = Array::ArrayCreate<Array::Array2dScalarReal64>
00089                     (1,sampleLength,0,0);
00090         mTrainFeatures = Array::ArrayCreate<Array::Array2dScalarReal64>
00091                     (featureLength,sampleLength,0,0);
00092 
00093         for(int i=0;i<sampleLength;i++)
00094         {
00095             const Impala::Real64* values = data->GetColumn2()->GetVectorData(i);
00096             for( int j=0;j<featureLength;j++)
00097                  mTrainFeatures->SetValue(values[j],j,i);
00098 
00099             mTrainLabels->SetValue(annotation->Get2(i),0,i);
00100             if((i==0)||(i==sampleLength-1)){
00101                 ILOG_INFO("FeatureLength:"<<featureLength);
00102                 ILOG_INFO(i<<".\t"<<mTrainFeatures->Value(0,i)<<" ... "
00103                                 <<mTrainFeatures->Value(featureLength-1,i)<<" => "
00104                                 <<mTrainLabels->Value(0,i));
00105             }
00106         }
00107 
00108         ILOG_INFO("FeatureLength:"<<featureLength);
00109         ILOG_INFO(0<<".\t"<<mTrainFeatures->Value(0,0)<<" ... "
00110                           <<mTrainFeatures->Value(featureLength-1,0)<<" => "
00111                           <<mTrainLabels->Value(0,0));
00112         ILOG_INFO("End of SetTrainSet");
00113         //CopyFeatureTable(data,annotation,&mTrainFeatures,&mTrainLabels);
00114     }
00115 
00116     virtual void SetTestSet(Feature::FeatureTable* data,
00117                             Table::AnnotationTable* annotation,
00118                             Table::QuidTable* mask)
00119     {
00120         ILOG_INFO("SetTestSet");
00121         int featureLength = data->GetFeatureVectorLength();
00122         int sampleLength  = data->Size();
00123         ILOG_INFO(sampleLength<<" samples of "<<featureLength<<" features");
00124         
00125         if(mTestFeatures)
00126             delete (mTestFeatures);
00127         if(mTestLabels)
00128             delete (mTestLabels);
00129 
00130         mTestLabels=0;
00131         mTestFeatures=0;
00132 
00133         mTestLabels = Array::ArrayCreate<Array::Array2dScalarReal64>
00134                     (1,sampleLength,0,0);
00135         mTestFeatures = Array::ArrayCreate<Array::Array2dScalarReal64>
00136                     (featureLength,sampleLength,0,0);
00137 
00138         for(int i=0;i<sampleLength;i++)
00139         {
00140             const Impala::Real64* values = data->GetColumn2()->GetVectorData(i);
00141             for( int j=0;j<featureLength;j++)
00142                 mTestFeatures->SetValue(values[j],j,i);
00143 
00144             mTestLabels->SetValue(annotation->Get2(i),0,i);
00145             if((i==0)||(i==sampleLength-1)){
00146                 ILOG_INFO(i<<".\t"<<mTestFeatures->Value(0,i)<<" ... "
00147                                 <<mTestFeatures->Value(featureLength-1,i)<<" => "
00148                                 <<mTestLabels->Value(0,i));
00149             }
00150             
00151         }
00152         ILOG_INFO("End of SetTestSet");
00153         //CopyFeatureTable(data,annotation,&mTestFeatures,&mTestLabels);
00154     }
00155 
00156 
00157     void SetTestSet(const Vector::VectorSet<Array::Array2dScalarReal64>* set)
00158     {
00159         int featureLength = set->GetVectorLength(0);
00160         int sampleLength  = set->Size();
00161         if(mTestFeatures)
00162             delete mTestFeatures;
00163         if(mTestLabels)
00164             delete mTestLabels;
00165         mTestLabels=0;
00166         mTestFeatures=0;
00167 
00168         mTestLabels = Array::ArrayCreate<Array::Array2dScalarReal64>
00169                     (1,sampleLength,0,0);
00170         mTestFeatures = Array::ArrayCreate<Array::Array2dScalarReal64>
00171                     (featureLength,sampleLength,0,0);
00172         SetVal(mTestLabels,0);
00173 
00174         for(int i=0;i<sampleLength;i++){
00175             const Impala::Real64* values = set->GetVectorData(i);
00176             for( int j=0;j<featureLength;j++)
00177                 mTestFeatures->SetValue(values[j],i,j);
00178         }
00179         
00180     }
00181 
00182     void Train(Util::PropertySet* properties)
00183     { 
00184         int i,j;
00185         bool debug=false;
00186         int W = mTrainFeatures->CW();
00187         int H = mTrainFeatures->CH();
00188 
00189         Matrix::Mat* tmp    = 0;
00190         Matrix::Mat* tmp2   = 0;
00191         Matrix::Mat* tmp3   = 0;
00192 
00193         Matrix::Mat* Targets= 0;
00194         Matrix::Mat* FeatureAverages   = 0;
00195         Matrix::Mat* CenteredFeatureInverse = 0;
00196         Array::Array2dScalarReal64* CenteredFeatures   = 0;
00197         Array::Array2dScalarReal64* Features = 0;
00198         Array::Array2dScalarReal64* Labels   = 0;
00199 
00200         Vector::VectorSet<Array::Array2dScalarReal64> vs(true, W, H);
00201 
00202         Array::Set(Features,mTrainFeatures);
00203         Array::Set(Labels,mTrainLabels);
00204 
00206         //Clear the transformation if it was already calculated
00207         if(mRotation!=0)
00208             delete mRotation;
00209         if(mOffset!=0)
00210             delete mOffset;
00211 
00213         //Get The Average for each feature
00214         vs.SetStorage(Features);
00215         Vector::VectorTem<Real64> featureMeans=0;
00216         Avg(&featureMeans,&vs,H,0);
00217         
00219         //Center the data around 0 mean:
00220         Vector::MulAssign(featureMeans,Impala::Real64(-1));
00221         for(i=0;i<H;i++){
00222             Vector::VectorTem<Real64> tmpVec = vs.GetVector(i,true);
00223             AddAssign(tmpVec,featureMeans);
00224         }
00225         Vector::MulAssign(featureMeans,Impala::Real64(-1));
00226 
00228         //Now The data is centered around origin
00229         //We should add a column with 1's to the end of the feature vectors
00230 
00231         //If the rank of this Centered matrix with the ones column is 
00232         //less than the initial number of features for each sample,
00233         //Then we should find the Pseudo Inverse of this B matrice, and multiply
00234         //it with a two column labels matrix
00235         CenteredFeatures = Array::ArrayCreate<Array::Array2dScalarReal64>(W+1,H);
00236         SetVal(CenteredFeatures,1);
00237         SetPart(CenteredFeatures,Features,0,0,W,H,0,0);
00238 
00239 
00240         Targets         = Matrix::MatCreate<Array::Array2dScalarReal64>(H,2);
00241         FeatureAverages = Matrix::MatCreate<Array::Array2dScalarReal64>(1,W);
00242 
00243         for(int i=0;i<W;i++){
00244             FeatureAverages->SetValue(featureMeans.Elem(i),i,0);
00245         }
00246 
00247         SetPart(Targets,Labels,0,0,1,H,0,0);
00248         for(int i=0;i<H;i++){
00249             Targets->SetValue(Targets->Value(0,i),1,i);
00250             Targets->SetValue(-Targets->Value(0,i),0,i);
00251         }
00252         CenteredFeatureInverse=Matrix::MatPInv(CenteredFeatures);
00253 
00254             
00255         tmp = Matrix::MatMul(CenteredFeatureInverse,Targets);
00256         tmp2 = Matrix::MatCreate<Array::Array2dScalarReal64>(W,2);
00257 
00258         for(int i=0;i<W;i++){
00259             tmp2->SetValue(tmp->Value(0,i),0,i);
00260             tmp2->SetValue(tmp->Value(1,i),1,i);
00261         }
00262         tmp3 = Matrix::MatMul(FeatureAverages,tmp2);
00263 
00264         mOffset = Matrix::MatCreate<Array::Array2dScalarReal64>(1,2);
00265         mOffset->SetValue(tmp->Value(0,W)-tmp3->Value(0,0),0,0);
00266         mOffset->SetValue(tmp->Value(1,W)-tmp3->Value(1,0),1,0);
00267 
00268         mRotation = Matrix::MatCreate<Array::Array2dScalarReal64>(W,2);
00269         SetPart(mRotation,tmp,0,0,2,W,0,0);
00270 
00271         delete tmp;
00272         delete tmp2;
00273         delete tmp3;
00274 
00275         delete Targets;
00276         delete Labels;
00277         //delete Features; //will be released by the vector set
00278         delete FeatureAverages;
00279         delete CenteredFeatures;;
00280         delete CenteredFeatureInverse;
00281         CNormC();
00282         
00283     }
00285     // Classifier normalisation for ML posterior probabilities
00286     void ApplyMapping(Array::Array2dScalarReal64* feats)
00287     {
00288         if(mMapped)
00289             delete mMapped;
00290         if(mProbabilities)
00291             delete mProbabilities;
00292 
00293 
00294         int W=feats->CW();
00295         int H=feats->CH();
00296 
00297         Matrix::Mat* Features = Matrix::MatCreate<Array::Array2dScalarReal64>(H,W+1);
00298 
00299         Array::SetVal(Features,1);
00300         Array::SetPart(Features,feats,0,0,W,H,0,0);
00301         
00302         Matrix::Mat* Rot = Matrix::MatCreate<Array::Array2dScalarReal64>(W+1,2);
00303         Array::SetPart(Rot,mRotation,0,0,2,W,0,0);
00304 
00305         Rot->SetValue(mOffset->Value(0,0),0,W);
00306         Rot->SetValue(mOffset->Value(1,0),1,W);
00307         
00308         mMapped = Matrix::MatMul(Features,Rot);
00309 
00310         mProbabilities = SigmoidMap(mMapped,1);
00311 
00312         delete Rot;
00313         delete Features;
00314     
00315     }
00316 
00317     void CNormC()
00318     {
00319         int W=mTrainFeatures->CW();
00320         int H=mTrainFeatures->CH();
00321 
00322         ApplyMapping(mTrainFeatures);
00323 
00324         Impala::Real64 scale = 1E-10;
00325         Impala::Real64 reg = 1E-7;
00326         Impala::Real64 epsilon = 1E-6;
00327         Impala::Real64 likelihood= - std::numeric_limits<double>::infinity();
00328         Impala::Real64 likelihood_new= std::numeric_limits<double>::min() ;
00329         Impala::Real64 rmin= std::numeric_limits<double>::min() ;
00330 
00331         Impala::Real64 p;
00332         Array::Array2dScalarReal64* Positive = 0;
00333         Array::EqualsVal(Positive,mTrainLabels,1);
00334         p = Array::PixSum(Positive)/H;
00335         delete Positive;
00336 
00337         Matrix::Mat* pax;
00338         Matrix::Mat* pbx;
00339         Matrix::Mat* PosMapped = Matrix::MatCreate<Array::Array2dScalarReal64>(H,1);
00340         for(int i=0;i<H;i++)
00341             PosMapped->SetValue(mMapped->Value(mTrainLabels->Value(i,0)==1?1:0,i),0,i);
00342 
00343         Matrix::Mat* Probs = Matrix::MatCreate<Array::Array2dScalarReal64>(H,1);
00344         for(int i=0;i<H;i++)
00345             Probs->SetValue( ( mTrainLabels->Value(i,0)==1 ? p : 1-p ),0,i);
00346 
00347         while(abs(likelihood_new - likelihood) > epsilon)
00348         {
00349             Matrix::Mat* Mapped = 0;
00350             Array::MulVal(Mapped,PosMapped,scale);
00351 
00352             likelihood = likelihood_new;
00353 
00354             pax = SigmoidMap(Mapped,1);
00355             pbx = Matrix::MatCreate<Array::Array2dScalarReal64>(H,1);
00356             Array::MulVal(pbx,pax,-1);
00357             Array::AddVal(pbx,pbx,1);
00358 
00360             //Calculate new likelihood
00361             //
00362             Matrix::Mat* tmp = Matrix::MatCreate<Array::Array2dScalarReal64>(H,1);
00363             for(int i=0;i<H;i++)
00364                 tmp->SetValue(Probs->Value(0,i)*log(pax->Value(0,i)+rmin),0,i);
00365             Impala::Real64 mean = PixSum(tmp)/H;
00366 
00367             likelihood_new = mean - reg*log(scale+1);
00368 
00370             //Calculate the new scale
00371             //
00372             for(int i=0;i<H;i++)
00373                 tmp->SetValue(Probs->Value(0,i)*pbx->Value(0,i),0,i);
00374             
00375             Matrix::Mat* tmp2 = Matrix::MatCreate<Array::Array2dScalarReal64>(1,H);
00376             for(int i=0;i<H;i++)
00377                 tmp2->SetValue(tmp->Value(0,i),i,0);
00378 
00379             Matrix::Mat* tmp3=Matrix::MatMul(tmp2,PosMapped);
00380 
00382             Matrix::Mat* tmp4 = Matrix::MatCreate<Array::Array2dScalarReal64>(1,H);
00383             Matrix::Mat* tmp5 = Matrix::MatCreate<Array::Array2dScalarReal64>(H,1);
00384 
00385             for(int i=0;i<H;i++)
00386                 tmp4->SetValue(Probs->Value(0,i)*PosMapped->Value(0,i)*pax->Value(0,i),i,0);
00387 
00388             for(int i=0;i<H;i++)
00389                 tmp5->SetValue(PosMapped->Value(0,i)*pbx->Value(0,i),0,i);
00390 
00391             Matrix::Mat* tmp6 = Matrix::MatMul(tmp4,tmp5);
00392 
00394             Impala::Real64 a = tmp3->Value(0,0);
00395             Impala::Real64 b = reg*H/(scale+1);
00396             Impala::Real64 c = tmp6->Value(0,0) - H* reg/((scale+1)*(scale+1)) + rmin;
00397 
00398             scale = scale + (a - b) / c;
00400 
00401             delete Mapped;
00402             delete pax;
00403             delete pbx;
00404             delete tmp;
00405             delete tmp2;
00406             delete tmp3;
00407             delete tmp4;
00408             delete tmp5;
00409             delete tmp6;
00410             pax=0;
00411             pbx=0;
00412         }
00413 
00414         Impala::Real64 var;
00415         Impala::Real64* o=0;
00416         
00417         Array::PixStat(PosMapped,o,o,o,&var);
00418         
00419         // Dirty trick to avoid 0 and inf.
00420         scale = std::min(std::max(scale,0.1/(sqrt(var)+1e-16)),1e16);
00421 
00422         Array::MulVal(mRotation,mRotation,scale);
00423         Array::MulVal(mOffset,mOffset,scale);
00424 
00425         delete Probs;
00426         delete PosMapped;
00427     }
00428 
00429     Matrix::Mat*
00430     SigmoidMap(Matrix::Mat* Map,Impala::Real64 scale=1)
00431     {
00432         Matrix::Mat* res = 0;
00433         res = Matrix::MatCopy(Map);
00434         for(int i=0;i<Map->CW();i++)
00435             for(int j=0;j<Map->CH();j++)
00436                 res->SetValue(1/(1+exp(-res->Value(i,j)/scale)),i,j);
00437 
00438         return res;
00439     }
00440 
00441     virtual double Predict(const Vector::VectorTem<double>* feature)
00442     {
00443         
00444         Matrix::Mat* tmp = Matrix::MatCreate<Array::Array2dScalarReal64>
00445                                             (1,feature->Size(),
00446                                             const_cast<Impala::Real64*>(feature->GetData())
00447                                             ,true);
00448         ApplyMapping(tmp);
00449         delete tmp;
00450         return mProbabilities->Value(1,0);
00451     }
00452 
00453     virtual RankingTableType* Rank()
00454     {
00455         if(mTestFeatures == 0)
00456         {
00457             ILOG_ERROR("[Fisher::Train] no test set");
00458             return NULL;
00459         }
00460         int size=mTestFeatures->CH();
00461         RankingTableType* result = MakeRankingTable(size);
00462 
00463         ApplyMapping(mTestFeatures);
00464 
00465         for(int i=0 ; i<size ; i++)
00466         {
00467             result->Add("", i, mTestLabels->Value(i,0), mProbabilities->Value(1,i));
00468         }
00469 
00470         return result;
00471     }
00472 
00473     virtual void
00474     Predict(Table::SimilarityTableSet::SimTableType* result)
00475     {
00476 
00477         if (mTestFeatures == 0)
00478         {
00479             ILOG_ERROR("[Fisher::Predict] no test set");
00480             return;
00481         }
00482         result->SetSize(0);
00483         ApplyMapping(mTestFeatures);
00484         int size=mTestFeatures->CH();
00485         
00486         for (int i=0 ; i<size ; i++)
00487         {
00488             result->Add(mProbabilities->Value(1,i));
00489         }
00490     }
00491 
00492     void LoadModel(const std::string& name, Util::Database* db)
00493     {
00494         if (Util::Database::GetInstance().GetDataChannel())
00495         {
00496             std::string tmpName = FileNameTmp();
00497             Util::FileCopyRemoteToLocal(name, tmpName);
00498             
00499             Util::Database& db = Util::Database::GetInstance();
00500             Util::IOBuffer* buf = db.GetIOBuffer(tmpName, true, false, "");
00501             ReadRaw(mRotation,buf);
00502             ReadRaw(mOffset,buf);
00503             delete buf;
00504 
00505             unlink(tmpName.c_str());
00506         }
00507         else
00508         {
00509             Util::Database& db = Util::Database::GetInstance();
00510             Util::IOBuffer* buf = db.GetIOBuffer(name, true, false, "");
00511             ReadRaw(mRotation,buf);
00512             ReadRaw(mOffset,buf);
00513             delete buf;
00514         }
00515     }
00516     void SaveModel(const std::string& name, Util::Database* db)
00517     {
00518         if (Util::Database::GetInstance().GetDataChannel())
00519         {
00520             std::string tmpName = FileNameTmp();
00521             //if(svm_save_model(name.c_str(), mModel) == -1)
00522             Util::Database& db = Util::Database::GetInstance();
00523             Util::IOBuffer* buf = db.GetIOBuffer(tmpName, true, false, "");
00524 
00525             WriteRaw(mRotation,buf,true);
00526             WriteRaw(mOffset,buf,true);
00527             delete buf;
00528 
00529             Util::FileCopyLocalToRemote(tmpName, name);
00530             unlink(tmpName.c_str());
00531         }
00532         else
00533         {
00534             Util::Database& db = Util::Database::GetInstance();
00535             Util::IOBuffer* buf = db.GetIOBuffer(name, true, false, "");
00536 
00537             WriteRaw(mRotation,buf,true);
00538             WriteRaw(mOffset,buf,true);
00539             delete buf;
00540         }
00541     }
00542 private:
00543     Array::Array2dScalarReal64* mTrainFeatures;
00544     Array::Array2dScalarReal64* mTrainLabels;
00545     Array::Array2dScalarReal64* mTestFeatures;
00546     Array::Array2dScalarReal64* mTestLabels;
00547 
00548     Matrix::Mat* mRotation;
00549     Matrix::Mat* mOffset;
00550     Matrix::Mat* mMapped;
00551     Matrix::Mat* mProbabilities;
00552 
00553     int mSampleLength;
00554     int mFeatureLength;
00555     ILOG_VAR_DEC;
00556     int mCount;
00557 };
00558 
00559 ILOG_VAR_INIT(Fisher,Impala.Core.Training);
00560 
00561 }//namespace Core
00562 }//namespace Training
00563 }//Impala
00564 
00565 
00566 #endif

Generated on Fri Mar 19 09:31:24 2010 for ImpalaSrc by  doxygen 1.5.1