Home || Architecture || Video Search || Visual Search || Scripts || Applications || Important Messages || OGL || Src

ApplyConceptsHelperKernels.h

Go to the documentation of this file.
00001 #ifndef Impala_Core_Training_ApplyConceptsHelperKernels_h
00002 #define Impala_Core_Training_ApplyConceptsHelperKernels_h
00003 
00004 #include "Core/Training/ApplyConceptsHelper.h"
00005 #include "Util/StringParser.h"
00006 #include "Core/Array/Set.h"
00007 #include "Core/Array/Add.h"
00008 #include "Core/Array/DivVal.h"
00009 #include "Core/Array/MulVal.h"
00010 #include "Core/Array/Exp.h"
00011 #include "Core/Vector/Apply.h"
00012 #include "Core/Training/TrainDataSrcKernelMatrix.h"
00013 
00014 namespace Impala
00015 {
00016 namespace Core
00017 {
00018 namespace Training
00019 {
00020 
00021 class ApplyConceptsHelperKernels : public ApplyConceptsHelper
00022 {
00023 public:
00024     ApplyConceptsHelperKernels() 
00025         : mKernelMatrix(0), mFeatureExternal(0), mInitialized(false)
00026     {
00027     }
00028 
00029     virtual 
00030     ~ApplyConceptsHelperKernels()
00031     {
00032         if (mUseFeatureCache)
00033         {
00034             ILOG_INFO("Cleaning up disk cache...");
00035             for(int i = 0; i < mTrainFeatureTempNames.size(); i++)
00036             {
00037                 unlink(mTrainFeatureTempNames[i].c_str());
00038             }
00039         }
00040     }
00041         
00042     void
00043     Initialise(Database::RawDataSet* annoSet, String kernelMatrixName)
00044     {
00045         mAnnoSet = annoSet;
00046         mKernelMatrixName = kernelMatrixName;
00047         LoadInfoFile();
00048         mInitialized = true;
00049     }
00050 
00051     String
00052     GetModelName()
00053     {
00054         return mKernelMatrixName;
00055     }
00056 
00057     FeatureTable*
00058     LoadFeatures(Database::PathCreator* pathCreator)
00059     {
00060         if(!mInitialized)
00061             ILOG_ERROR("not initialized");
00062         if(mFeatureExternal)
00063             ILOG_ERROR("features not empty");
00064 
00065         String kernelDataCacheFilename =
00066             pathCreator->GetDataSet()->GetFilePathKernelData(
00067                     pathCreator->GetWalkType(),
00068                     mAnnoSet->GetSetName(),
00069                     mKernelMatrixName,
00070                     pathCreator->GetFolderOrVideoId(), false, true);
00071         if(!kernelDataCacheFilename.empty())
00072         {
00073             ILOG_INFO("Using cache from KernelData: " << 
00074                       kernelDataCacheFilename);
00075             // we have an on-disk cache in the KernelData folder
00076             // mFeatureExternal is used elsewhere for the quids and the size
00077             // so this featuretable can perform that role as well
00078             mFeatureExternal = FeatureTable::MakeFromFile(
00079                 Feature::FeatureDefinition("cache"), 
00080                 kernelDataCacheFilename, 
00081                 pathCreator->GetDatabase());
00082 
00083             // wrap the storage of this feature table in a matrix/array
00084             size_t w = mFeatureExternal->GetColumn2()->GetStorage()->CW();
00085             size_t h = mFeatureExternal->GetColumn2()->GetStorage()->CH();
00086             ILOG_DEBUG("size of kernel matrix is ("<< w <<", "<< h <<")");
00087             mKernelMatrix = new Array::Array2dScalarReal64(w, h, 0, 0, 
00088                 mFeatureExternal->GetColumn2()->GetStorage()->mData, true);
00089             return mFeatureExternal;
00090         }
00091 
00092         mUseFeatureCache = (mAnnoSet->GetDatabase()->GetDataChannel() &&
00093                             !CmdOptions::GetInstance().GetBool("noFeatureCache"));
00094         if(mTrainFeatureTempNames.size() == 0)
00095         {
00096             for(int i=0 ; i<mFeatureDefs.size() ; ++i)
00097             {
00098                 String path = mAnnoSet->GetFilePathFeatureIndex
00099                     (mFeatureDefs[i], "", false, false);
00100                 if (mUseFeatureCache)
00101                 {
00102                     /* open the IOBuffers for the training set features
00103                        (e.g. copy to local node temp if we are using a
00104                        datachannel) */
00105                     ILOG_INFO("buffering from train set FeatureIndex: " <<
00106                               mFeatureDefs[i].AsString());
00107                     // TODO: there might be a large gain possible here (?)
00108                     String tmpName = FileNameTmp();
00109                     Util::IOBuffer* buffer = mAnnoSet->GetDatabase()->GetIOBuffer
00110                         (path, true, false, tmpName);
00111                     mTrainFeatureTempNames.push_back(tmpName);
00112                     delete buffer;
00113                 }
00114                 else
00115                 {
00116                     mTrainFeatureTempNames.push_back(path);
00117                 }
00118             }
00119         }
00120 
00121         int sizeCheck = -1;
00122         Matrix::Mat* accumulator = 0;
00123         for(int i=0 ; i<mWeights.size() ; ++i)
00124         {
00125             // load apply to feature
00126             pathCreator->SetFeature(mFeatureDefs[i].AsString());
00127             String path = pathCreator->GetFilePathFeatureData(false, false);
00128             if(path.empty())
00129             {
00130                 ILOG_WARN("FeatureData path is empty for the set to apply to:" <<
00131                           " did you forget --keyframes?");
00132             }
00133             FeatureTable* feature = FeatureTable::MakeFromFile
00134                 (mFeatureDefs[i], path, pathCreator->GetDatabase());
00135             if(sizeCheck == -1)
00136                 sizeCheck = feature->Size();
00137             //TODO: what should happen is a comparison of the quids of the
00138             //different tables, not a check on just the size
00139             if(feature->Size() != sizeCheck)
00140             {
00141                 ILOG_ERROR("Size mismatch in the FeatureData for the dataset to"
00142                            << " apply to: have " << sizeCheck << " and " <<
00143                            feature->Size());
00144                 throw "bye";
00145             }
00146             ILOG_INFO("size of apply set " << mFeatureDefs[i].AsString() << " is " <<
00147                       feature->Size());
00148 
00149             // load train feature
00150             FeatureTable* trainFeature = new FeatureTable(mFeatureDefs[i]);
00151             Util::IOBuffer* buffer = 0;
00152             if(mUseFeatureCache)
00153             {
00154                 buffer = new Util::IOBufferFile
00155                     (mTrainFeatureTempNames[i], true, false);
00156             }
00157             else
00158             {
00159                 buffer = mAnnoSet->GetDatabase()->GetIOBuffer
00160                     (mTrainFeatureTempNames[i], true, false, "");
00161             }
00162 
00163             Read(trainFeature, buffer);
00164             ILOG_INFO("size of train " << mFeatureDefs[i].AsString() << " is " <<
00165                       trainFeature->Size());
00166             delete buffer;
00167 
00168             // compute kernel distances between features
00169             typedef Feature::FeatureTable::ColumnVectorSet VectorSet;
00170             VectorSet* vectorsV = feature->GetColumn2();
00171             VectorSet* vectorsH = trainFeature->GetColumn2();
00172             vectorsV->SetSize(feature->Size());
00173             vectorsH->SetSize(trainFeature->Size());
00174             ILOG_INFO("Compute distance matrix for feature " << i << "...");
00175             Matrix::Mat* distanceMatrix = Apply(&Training::Chi2Distance,
00176                                                 vectorsH, vectorsV);
00177                 
00178             delete trainFeature;
00179 
00180             // accumulate; this is the part inside the 'exp' in the kernel function
00181             MulVal(distanceMatrix, distanceMatrix, mWeights[i]);
00182             DivVal(distanceMatrix, distanceMatrix, -mAverages[i]);
00183             if(accumulator == 0)
00184                 Set(accumulator, distanceMatrix);
00185             else
00186                 Add(accumulator, accumulator, distanceMatrix);
00187             delete distanceMatrix;
00188             if(i == 0)
00189             {
00190                 mFeatureExternal = feature;
00191             }
00192             else
00193                 delete feature;
00194         }
00195         // only thing to do is weigh and exp
00196         DivVal(accumulator, accumulator, mTotalWeight);
00197         Exp(accumulator, accumulator);
00198         mKernelMatrix = accumulator;
00199         if(CmdOptions::GetInstance().GetBool("storeKernelData"))
00200         {
00201             FeatureTable* storage = new FeatureTable(mKernelMatrixName, sizeCheck, Matrix::MatNrCol(mKernelMatrix));
00202             Column::Copy(storage->GetColumn1(), mFeatureExternal->GetColumn1(), sizeCheck);
00203             // the VectorSet likes owning the storage after setting it,
00204             // so we must create a wrapper to prevent it from deleting the
00205             // kernel matrix when the storage is destroyed
00206             Array::Array2dScalarReal64* wrapper = 
00207                 new Array::Array2dScalarReal64(mKernelMatrix->CW(), mKernelMatrix->CH(), 0, 0, mKernelMatrix->mData, true);
00208             storage->GetColumn2()->SetStorage(wrapper);
00209             storage->GetColumn2()->SetSize(sizeCheck);
00210             storage->SetSize(sizeCheck);
00211             String storageFilename = 
00212                 pathCreator->GetDataSet()->GetFilePathKernelData(
00213                     pathCreator->GetWalkType(), 
00214                     mAnnoSet->GetSetName(), 
00215                     mKernelMatrixName,
00216                     pathCreator->GetFolderOrVideoId(), true, false);
00217             Write(storage, storageFilename, pathCreator->GetDatabase(), true);
00218             delete storage;
00219         }
00220         return mFeatureExternal;
00221     }
00222 
00223     Table::ScoreTable*
00224     Predict(String modelName)
00225     {
00226         if(!mInitialized)
00227             ILOG_ERROR("not initialized");
00228         Training::Svm svm;
00229         ILOG_DEBUG("loading model")
00230         svm.LoadModel(modelName, mAnnoSet->GetDatabase());
00231         Util::PropertySet ps;
00232         ps.Add("kernel","precomputed");
00233         svm.OverrideModelOptions(&ps);
00234 
00235         if(!mKernelMatrix)
00236         {
00237             ILOG_ERROR("Error: kernel matrix is NULL in Predict");
00238         }
00239 
00241         ILOG_DEBUG("creating datasrc");
00242         Training::TrainDataSrcKernelMatrix dataSrc(mKernelMatrix);
00243         ILOG_DEBUG("calling svm.predict");
00244         return svm.Predict(&dataSrc);
00245     }
00246 
00247     void
00248     UnloadFeatures()
00249     {
00250         if(mFeatureExternal)
00251         {
00252             delete mFeatureExternal;
00253             mFeatureExternal = 0;
00254         }
00255         if(mKernelMatrix)
00256         {
00257             delete mKernelMatrix;
00258             mKernelMatrix = 0;
00259         }
00260     }
00261 
00262 private:
00263     void
00264     LoadInfoFile()
00265     {
00266         ILOG_INFO("loading featuredefs, weights and averages ");
00267         ILOG_DEBUG_NODE("annoset = " << (void*)mAnnoSet << " name = " <<
00268                         mKernelMatrixName);
00269         String path = mAnnoSet->GetFilePathPrecomputedKernels
00270             (mKernelMatrixName + ".input.txt", "", false, false);
00271         ILOG_DEBUG_NODE("reading input.txt");
00272         std::vector<String> lines;
00273         Util::IOBuffer* buf2 = mAnnoSet->GetDatabase()->GetIOBuffer
00274             (path, true, false, "");
00275         if(!buf2)
00276         {
00277             ILOG_ERROR("Could not read input.txt:" << path);
00278         }
00279         while(buf2->Available())
00280         {
00281             lines.push_back(buf2->ReadLine());
00282         }
00283         delete buf2;
00284         ILOG_DEBUG_NODE("read input.txt");
00285         if(lines.size() == 0)
00286             ILOG_ERROR(String("Could not load .input.txt file or file empty...\n")+
00287                        " Perhaps because the precomputed kernel is made with an"+
00288                        " old version? you can make this file by hand: each line"+
00289                        " has a weight and a featureDef just as the input to"+
00290                        " precomputekernelmatrix");
00291 
00292         mTotalWeight = 0;
00293         for(int i=0 ; i<lines.size() ; ++i)
00294         {
00295             ILOG_DEBUG_NODE("parsing line " << lines[i]);
00296             Util::StringParser p(lines[i]);
00297             Real64 weight = p.GetDouble();
00298             if(weight == 0)
00299                 ILOG_ERROR("parse error in .input.txt on line " << i <<
00300                            ": no weight found at beginning, or weight == 0");
00301             mTotalWeight += weight;
00302             mWeights.push_back(weight);
00303             String feature = p.GetString(' ', false);
00304             if(feature == "")
00305                 ILOG_ERROR("parse error in .input.txt on line " << i <<
00306                            ": no featureDef found");
00307             mFeatureDefs.push_back(Feature::FeatureDefinition(feature));
00308         }
00309 
00310         ILOG_DEBUG_NODE("done parsing " << mWeights.size() << " entries");
00312         String path2 = mAnnoSet->GetFilePathPrecomputedKernels
00313             (mKernelMatrixName + ".averages.raw", "", false, false);
00314         Util::IOBuffer* buf = mAnnoSet->GetDatabase()->GetIOBuffer
00315             (path2, true, false, "");
00316         if (buf && buf->Valid())
00317         {
00318             ILOG_DEBUG_NODE("loading averages");
00319             for(int i=0 ; i<mWeights.size() ; ++i)
00320             {
00321                 Real64 d;
00322                 buf->Read(&d, sizeof(Real64));
00323                 mAverages.push_back(d);
00324             }
00325         }
00326         delete buf;
00327 
00328         int size = mWeights.size();
00329         if((mAverages.size() != size) ||
00330            (mFeatureDefs.size() != size))
00331             ILOG_ERROR("trouble after loading: sizes do not match: weights="
00332                        << mWeights.size() << "; averages=" << mAverages.size() << 
00333                        "; featuredefs=" << mFeatureDefs.size());
00334     }
00335 
00336     Real64 mTotalWeight;
00337     std::vector<Real64> mWeights;
00338     std::vector<Real64> mAverages;
00339     Database::RawDataSet* mAnnoSet;
00340     FeatureTable* mFeatureExternal;
00341     std::vector<String> mTrainFeatureTempNames;
00342     std::vector<Feature::FeatureDefinition> mFeatureDefs;
00343     std::string mKernelMatrixName;
00344     Matrix::Mat* mKernelMatrix;
00345     bool mInitialized;
00346     bool mUseFeatureCache;
00347 
00348     ILOG_VAR_DEC;
00349 };
00350 
00351 ILOG_VAR_INIT(ApplyConceptsHelperKernels, Impala.Core.Training);
00352 
00353 } //namespace
00354 } //namespace
00355 } //namespace
00356 
00357 #endif

Generated on Fri Mar 19 09:31:23 2010 for ImpalaSrc by  doxygen 1.5.1