00001 #ifndef Impala_Core_Training_Fisher_h
00002 #define Impala_Core_Training_Fisher_h
00003
00004 #include "Core/Training/Classifier.h"
00005 #include "Core/Training/Utils.h"
00006 #include "Core/Table/Sort.h"
00007
00008 #include "Core/Array/Set.h"
00009 #include "Core/Array/SetVal.h"
00010 #include "Core/Array/Array2dTem.h"
00011 #include "Core/Array/Arrays.h"
00012 #include "Core/Array/AddVal.h"
00013 #include "Core/Array/MulVal.h"
00014 #include "Core/Array/EqualsVal.h"
00015 #include "Core/Array/PrintData.h"
00016 #include "Core/Array/PixStat.h"
00017
00018 #include "Core/Matrix/MatTranspose.h"
00019 #include "Core/Matrix/MatSVD.h"
00020 #include "Core/Matrix/MatPInv.h"
00021 #include "Core/Matrix/MatMul.h"
00022 #include "Core/Matrix/MatCopy.h"
00023 #include "Core/Matrix/MatReplicateMatrix.h"
00024
00025 #include "Core/Vector/VectorTem.h"
00026 #include "Core/Vector/VectorSet.h"
00027 #include "Core/Vector/Avg.h"
00028 #include "Core/Vector/AddAssign.h"
00029 #include "Core/Vector/MulAssign.h"
00030
00031 #include <iomanip>
00032
00033
00034 namespace Impala
00035 {
00036 namespace Core
00037 {
00038 namespace Training
00039 {
00040
00043 class Fisher : public Classifier
00044 {
00045 public:
00046 Fisher()
00047 {
00048 mTrainLabels=0;
00049 mTrainFeatures=0;
00050 mTestLabels=0;
00051 mTestFeatures=0;
00052 mRotation=0;
00053 mOffset=0;
00054 mMapped=0;
00055 mProbabilities=0;
00056 mCount=0;
00057 }
00058
00059 ~Fisher()
00060 {
00061 delete mTrainLabels;
00062 delete mTrainFeatures;
00063 delete mTestLabels;
00064 delete mTestFeatures;
00065 delete mRotation;
00066 delete mOffset;
00067 delete mMapped;
00068 delete mProbabilities;
00069 }
00070
00071 virtual void SetTrainSet(Feature::FeatureTable* data,
00072 Table::AnnotationTable* annotation,
00073 Table::QuidTable* mask)
00074 {
00075 ILOG_INFO("SetTrainSet");
00076 int featureLength = data->GetFeatureVectorLength();
00077 int sampleLength = data->Size();
00078 ILOG_INFO(sampleLength<<" samples of "<<featureLength<<" features");
00079
00080 if(mTrainFeatures)
00081 delete mTrainFeatures;
00082 if(mTrainLabels)
00083 delete mTrainLabels;
00084
00085 mTrainLabels=0;
00086 mTrainFeatures=0;
00087
00088 mTrainLabels = Array::ArrayCreate<Array::Array2dScalarReal64>
00089 (1,sampleLength,0,0);
00090 mTrainFeatures = Array::ArrayCreate<Array::Array2dScalarReal64>
00091 (featureLength,sampleLength,0,0);
00092
00093 for(int i=0;i<sampleLength;i++)
00094 {
00095 const Impala::Real64* values = data->GetColumn2()->GetVectorData(i);
00096 for( int j=0;j<featureLength;j++)
00097 mTrainFeatures->SetValue(values[j],j,i);
00098
00099 mTrainLabels->SetValue(annotation->Get2(i),0,i);
00100 if((i==0)||(i==sampleLength-1)){
00101 ILOG_INFO("FeatureLength:"<<featureLength);
00102 ILOG_INFO(i<<".\t"<<mTrainFeatures->Value(0,i)<<" ... "
00103 <<mTrainFeatures->Value(featureLength-1,i)<<" => "
00104 <<mTrainLabels->Value(0,i));
00105 }
00106 }
00107
00108 ILOG_INFO("FeatureLength:"<<featureLength);
00109 ILOG_INFO(0<<".\t"<<mTrainFeatures->Value(0,0)<<" ... "
00110 <<mTrainFeatures->Value(featureLength-1,0)<<" => "
00111 <<mTrainLabels->Value(0,0));
00112 ILOG_INFO("End of SetTrainSet");
00113
00114 }
00115
00116 virtual void SetTestSet(Feature::FeatureTable* data,
00117 Table::AnnotationTable* annotation,
00118 Table::QuidTable* mask)
00119 {
00120 ILOG_INFO("SetTestSet");
00121 int featureLength = data->GetFeatureVectorLength();
00122 int sampleLength = data->Size();
00123 ILOG_INFO(sampleLength<<" samples of "<<featureLength<<" features");
00124
00125 if(mTestFeatures)
00126 delete (mTestFeatures);
00127 if(mTestLabels)
00128 delete (mTestLabels);
00129
00130 mTestLabels=0;
00131 mTestFeatures=0;
00132
00133 mTestLabels = Array::ArrayCreate<Array::Array2dScalarReal64>
00134 (1,sampleLength,0,0);
00135 mTestFeatures = Array::ArrayCreate<Array::Array2dScalarReal64>
00136 (featureLength,sampleLength,0,0);
00137
00138 for(int i=0;i<sampleLength;i++)
00139 {
00140 const Impala::Real64* values = data->GetColumn2()->GetVectorData(i);
00141 for( int j=0;j<featureLength;j++)
00142 mTestFeatures->SetValue(values[j],j,i);
00143
00144 mTestLabels->SetValue(annotation->Get2(i),0,i);
00145 if((i==0)||(i==sampleLength-1)){
00146 ILOG_INFO(i<<".\t"<<mTestFeatures->Value(0,i)<<" ... "
00147 <<mTestFeatures->Value(featureLength-1,i)<<" => "
00148 <<mTestLabels->Value(0,i));
00149 }
00150
00151 }
00152 ILOG_INFO("End of SetTestSet");
00153
00154 }
00155
00156
00157 void SetTestSet(const Vector::VectorSet<Array::Array2dScalarReal64>* set)
00158 {
00159 int featureLength = set->GetVectorLength(0);
00160 int sampleLength = set->Size();
00161 if(mTestFeatures)
00162 delete mTestFeatures;
00163 if(mTestLabels)
00164 delete mTestLabels;
00165 mTestLabels=0;
00166 mTestFeatures=0;
00167
00168 mTestLabels = Array::ArrayCreate<Array::Array2dScalarReal64>
00169 (1,sampleLength,0,0);
00170 mTestFeatures = Array::ArrayCreate<Array::Array2dScalarReal64>
00171 (featureLength,sampleLength,0,0);
00172 SetVal(mTestLabels,0);
00173
00174 for(int i=0;i<sampleLength;i++){
00175 const Impala::Real64* values = set->GetVectorData(i);
00176 for( int j=0;j<featureLength;j++)
00177 mTestFeatures->SetValue(values[j],i,j);
00178 }
00179
00180 }
00181
00182 void Train(Util::PropertySet* properties)
00183 {
00184 int i,j;
00185 bool debug=false;
00186 int W = mTrainFeatures->CW();
00187 int H = mTrainFeatures->CH();
00188
00189 Matrix::Mat* tmp = 0;
00190 Matrix::Mat* tmp2 = 0;
00191 Matrix::Mat* tmp3 = 0;
00192
00193 Matrix::Mat* Targets= 0;
00194 Matrix::Mat* FeatureAverages = 0;
00195 Matrix::Mat* CenteredFeatureInverse = 0;
00196 Array::Array2dScalarReal64* CenteredFeatures = 0;
00197 Array::Array2dScalarReal64* Features = 0;
00198 Array::Array2dScalarReal64* Labels = 0;
00199
00200 Vector::VectorSet<Array::Array2dScalarReal64> vs(true, W, H);
00201
00202 Array::Set(Features,mTrainFeatures);
00203 Array::Set(Labels,mTrainLabels);
00204
00206
00207 if(mRotation!=0)
00208 delete mRotation;
00209 if(mOffset!=0)
00210 delete mOffset;
00211
00213
00214 vs.SetStorage(Features);
00215 Vector::VectorTem<Real64> featureMeans=0;
00216 Avg(&featureMeans,&vs,H,0);
00217
00219
00220 Vector::MulAssign(featureMeans,Impala::Real64(-1));
00221 for(i=0;i<H;i++){
00222 Vector::VectorTem<Real64> tmpVec = vs.GetVector(i,true);
00223 AddAssign(tmpVec,featureMeans);
00224 }
00225 Vector::MulAssign(featureMeans,Impala::Real64(-1));
00226
00228
00229
00230
00231
00232
00233
00234
00235 CenteredFeatures = Array::ArrayCreate<Array::Array2dScalarReal64>(W+1,H);
00236 SetVal(CenteredFeatures,1);
00237 SetPart(CenteredFeatures,Features,0,0,W,H,0,0);
00238
00239
00240 Targets = Matrix::MatCreate<Array::Array2dScalarReal64>(H,2);
00241 FeatureAverages = Matrix::MatCreate<Array::Array2dScalarReal64>(1,W);
00242
00243 for(int i=0;i<W;i++){
00244 FeatureAverages->SetValue(featureMeans.Elem(i),i,0);
00245 }
00246
00247 SetPart(Targets,Labels,0,0,1,H,0,0);
00248 for(int i=0;i<H;i++){
00249 Targets->SetValue(Targets->Value(0,i),1,i);
00250 Targets->SetValue(-Targets->Value(0,i),0,i);
00251 }
00252 CenteredFeatureInverse=Matrix::MatPInv(CenteredFeatures);
00253
00254
00255 tmp = Matrix::MatMul(CenteredFeatureInverse,Targets);
00256 tmp2 = Matrix::MatCreate<Array::Array2dScalarReal64>(W,2);
00257
00258 for(int i=0;i<W;i++){
00259 tmp2->SetValue(tmp->Value(0,i),0,i);
00260 tmp2->SetValue(tmp->Value(1,i),1,i);
00261 }
00262 tmp3 = Matrix::MatMul(FeatureAverages,tmp2);
00263
00264 mOffset = Matrix::MatCreate<Array::Array2dScalarReal64>(1,2);
00265 mOffset->SetValue(tmp->Value(0,W)-tmp3->Value(0,0),0,0);
00266 mOffset->SetValue(tmp->Value(1,W)-tmp3->Value(1,0),1,0);
00267
00268 mRotation = Matrix::MatCreate<Array::Array2dScalarReal64>(W,2);
00269 SetPart(mRotation,tmp,0,0,2,W,0,0);
00270
00271 delete tmp;
00272 delete tmp2;
00273 delete tmp3;
00274
00275 delete Targets;
00276 delete Labels;
00277
00278 delete FeatureAverages;
00279 delete CenteredFeatures;;
00280 delete CenteredFeatureInverse;
00281 CNormC();
00282
00283 }
00285
00286 void ApplyMapping(Array::Array2dScalarReal64* feats)
00287 {
00288 if(mMapped)
00289 delete mMapped;
00290 if(mProbabilities)
00291 delete mProbabilities;
00292
00293
00294 int W=feats->CW();
00295 int H=feats->CH();
00296
00297 Matrix::Mat* Features = Matrix::MatCreate<Array::Array2dScalarReal64>(H,W+1);
00298
00299 Array::SetVal(Features,1);
00300 Array::SetPart(Features,feats,0,0,W,H,0,0);
00301
00302 Matrix::Mat* Rot = Matrix::MatCreate<Array::Array2dScalarReal64>(W+1,2);
00303 Array::SetPart(Rot,mRotation,0,0,2,W,0,0);
00304
00305 Rot->SetValue(mOffset->Value(0,0),0,W);
00306 Rot->SetValue(mOffset->Value(1,0),1,W);
00307
00308 mMapped = Matrix::MatMul(Features,Rot);
00309
00310 mProbabilities = SigmoidMap(mMapped,1);
00311
00312 delete Rot;
00313 delete Features;
00314
00315 }
00316
00317 void CNormC()
00318 {
00319 int W=mTrainFeatures->CW();
00320 int H=mTrainFeatures->CH();
00321
00322 ApplyMapping(mTrainFeatures);
00323
00324 Impala::Real64 scale = 1E-10;
00325 Impala::Real64 reg = 1E-7;
00326 Impala::Real64 epsilon = 1E-6;
00327 Impala::Real64 likelihood= - std::numeric_limits<double>::infinity();
00328 Impala::Real64 likelihood_new= std::numeric_limits<double>::min() ;
00329 Impala::Real64 rmin= std::numeric_limits<double>::min() ;
00330
00331 Impala::Real64 p;
00332 Array::Array2dScalarReal64* Positive = 0;
00333 Array::EqualsVal(Positive,mTrainLabels,1);
00334 p = Array::PixSum(Positive)/H;
00335 delete Positive;
00336
00337 Matrix::Mat* pax;
00338 Matrix::Mat* pbx;
00339 Matrix::Mat* PosMapped = Matrix::MatCreate<Array::Array2dScalarReal64>(H,1);
00340 for(int i=0;i<H;i++)
00341 PosMapped->SetValue(mMapped->Value(mTrainLabels->Value(i,0)==1?1:0,i),0,i);
00342
00343 Matrix::Mat* Probs = Matrix::MatCreate<Array::Array2dScalarReal64>(H,1);
00344 for(int i=0;i<H;i++)
00345 Probs->SetValue( ( mTrainLabels->Value(i,0)==1 ? p : 1-p ),0,i);
00346
00347 while(abs(likelihood_new - likelihood) > epsilon)
00348 {
00349 Matrix::Mat* Mapped = 0;
00350 Array::MulVal(Mapped,PosMapped,scale);
00351
00352 likelihood = likelihood_new;
00353
00354 pax = SigmoidMap(Mapped,1);
00355 pbx = Matrix::MatCreate<Array::Array2dScalarReal64>(H,1);
00356 Array::MulVal(pbx,pax,-1);
00357 Array::AddVal(pbx,pbx,1);
00358
00360
00361
00362 Matrix::Mat* tmp = Matrix::MatCreate<Array::Array2dScalarReal64>(H,1);
00363 for(int i=0;i<H;i++)
00364 tmp->SetValue(Probs->Value(0,i)*log(pax->Value(0,i)+rmin),0,i);
00365 Impala::Real64 mean = PixSum(tmp)/H;
00366
00367 likelihood_new = mean - reg*log(scale+1);
00368
00370
00371
00372 for(int i=0;i<H;i++)
00373 tmp->SetValue(Probs->Value(0,i)*pbx->Value(0,i),0,i);
00374
00375 Matrix::Mat* tmp2 = Matrix::MatCreate<Array::Array2dScalarReal64>(1,H);
00376 for(int i=0;i<H;i++)
00377 tmp2->SetValue(tmp->Value(0,i),i,0);
00378
00379 Matrix::Mat* tmp3=Matrix::MatMul(tmp2,PosMapped);
00380
00382 Matrix::Mat* tmp4 = Matrix::MatCreate<Array::Array2dScalarReal64>(1,H);
00383 Matrix::Mat* tmp5 = Matrix::MatCreate<Array::Array2dScalarReal64>(H,1);
00384
00385 for(int i=0;i<H;i++)
00386 tmp4->SetValue(Probs->Value(0,i)*PosMapped->Value(0,i)*pax->Value(0,i),i,0);
00387
00388 for(int i=0;i<H;i++)
00389 tmp5->SetValue(PosMapped->Value(0,i)*pbx->Value(0,i),0,i);
00390
00391 Matrix::Mat* tmp6 = Matrix::MatMul(tmp4,tmp5);
00392
00394 Impala::Real64 a = tmp3->Value(0,0);
00395 Impala::Real64 b = reg*H/(scale+1);
00396 Impala::Real64 c = tmp6->Value(0,0) - H* reg/((scale+1)*(scale+1)) + rmin;
00397
00398 scale = scale + (a - b) / c;
00400
00401 delete Mapped;
00402 delete pax;
00403 delete pbx;
00404 delete tmp;
00405 delete tmp2;
00406 delete tmp3;
00407 delete tmp4;
00408 delete tmp5;
00409 delete tmp6;
00410 pax=0;
00411 pbx=0;
00412 }
00413
00414 Impala::Real64 var;
00415 Impala::Real64* o=0;
00416
00417 Array::PixStat(PosMapped,o,o,o,&var);
00418
00419
00420 scale = std::min(std::max(scale,0.1/(sqrt(var)+1e-16)),1e16);
00421
00422 Array::MulVal(mRotation,mRotation,scale);
00423 Array::MulVal(mOffset,mOffset,scale);
00424
00425 delete Probs;
00426 delete PosMapped;
00427 }
00428
00429 Matrix::Mat*
00430 SigmoidMap(Matrix::Mat* Map,Impala::Real64 scale=1)
00431 {
00432 Matrix::Mat* res = 0;
00433 res = Matrix::MatCopy(Map);
00434 for(int i=0;i<Map->CW();i++)
00435 for(int j=0;j<Map->CH();j++)
00436 res->SetValue(1/(1+exp(-res->Value(i,j)/scale)),i,j);
00437
00438 return res;
00439 }
00440
00441 virtual double Predict(const Vector::VectorTem<double>* feature)
00442 {
00443
00444 Matrix::Mat* tmp = Matrix::MatCreate<Array::Array2dScalarReal64>
00445 (1,feature->Size(),
00446 const_cast<Impala::Real64*>(feature->GetData())
00447 ,true);
00448 ApplyMapping(tmp);
00449 delete tmp;
00450 return mProbabilities->Value(1,0);
00451 }
00452
00453 virtual RankingTableType* Rank()
00454 {
00455 if(mTestFeatures == 0)
00456 {
00457 ILOG_ERROR("[Fisher::Train] no test set");
00458 return NULL;
00459 }
00460 int size=mTestFeatures->CH();
00461 RankingTableType* result = MakeRankingTable(size);
00462
00463 ApplyMapping(mTestFeatures);
00464
00465 for(int i=0 ; i<size ; i++)
00466 {
00467 result->Add("", i, mTestLabels->Value(i,0), mProbabilities->Value(1,i));
00468 }
00469
00470 return result;
00471 }
00472
00473 virtual void
00474 Predict(Table::SimilarityTableSet::SimTableType* result)
00475 {
00476
00477 if (mTestFeatures == 0)
00478 {
00479 ILOG_ERROR("[Fisher::Predict] no test set");
00480 return;
00481 }
00482 result->SetSize(0);
00483 ApplyMapping(mTestFeatures);
00484 int size=mTestFeatures->CH();
00485
00486 for (int i=0 ; i<size ; i++)
00487 {
00488 result->Add(mProbabilities->Value(1,i));
00489 }
00490 }
00491
00492 void LoadModel(const std::string& name, Util::Database* db)
00493 {
00494 if (Util::Database::GetInstance().GetDataChannel())
00495 {
00496 std::string tmpName = FileNameTmp();
00497 Util::FileCopyRemoteToLocal(name, tmpName);
00498
00499 Util::Database& db = Util::Database::GetInstance();
00500 Util::IOBuffer* buf = db.GetIOBuffer(tmpName, true, false, "");
00501 ReadRaw(mRotation,buf);
00502 ReadRaw(mOffset,buf);
00503 delete buf;
00504
00505 unlink(tmpName.c_str());
00506 }
00507 else
00508 {
00509 Util::Database& db = Util::Database::GetInstance();
00510 Util::IOBuffer* buf = db.GetIOBuffer(name, true, false, "");
00511 ReadRaw(mRotation,buf);
00512 ReadRaw(mOffset,buf);
00513 delete buf;
00514 }
00515 }
00516 void SaveModel(const std::string& name, Util::Database* db)
00517 {
00518 if (Util::Database::GetInstance().GetDataChannel())
00519 {
00520 std::string tmpName = FileNameTmp();
00521
00522 Util::Database& db = Util::Database::GetInstance();
00523 Util::IOBuffer* buf = db.GetIOBuffer(tmpName, true, false, "");
00524
00525 WriteRaw(mRotation,buf,true);
00526 WriteRaw(mOffset,buf,true);
00527 delete buf;
00528
00529 Util::FileCopyLocalToRemote(tmpName, name);
00530 unlink(tmpName.c_str());
00531 }
00532 else
00533 {
00534 Util::Database& db = Util::Database::GetInstance();
00535 Util::IOBuffer* buf = db.GetIOBuffer(name, true, false, "");
00536
00537 WriteRaw(mRotation,buf,true);
00538 WriteRaw(mOffset,buf,true);
00539 delete buf;
00540 }
00541 }
00542 private:
00543 Array::Array2dScalarReal64* mTrainFeatures;
00544 Array::Array2dScalarReal64* mTrainLabels;
00545 Array::Array2dScalarReal64* mTestFeatures;
00546 Array::Array2dScalarReal64* mTestLabels;
00547
00548 Matrix::Mat* mRotation;
00549 Matrix::Mat* mOffset;
00550 Matrix::Mat* mMapped;
00551 Matrix::Mat* mProbabilities;
00552
00553 int mSampleLength;
00554 int mFeatureLength;
00555 ILOG_VAR_DEC;
00556 int mCount;
00557 };
00558
00559 ILOG_VAR_INIT(Fisher,Impala.Core.Training);
00560
00561 }
00562 }
00563 }
00564
00565
00566 #endif