00001 #ifndef Impala_Core_Training_ApplyConceptsHelperKernels_h
00002 #define Impala_Core_Training_ApplyConceptsHelperKernels_h
00003
00004 #include "Core/Training/ApplyConceptsHelper.h"
00005 #include "Util/StringParser.h"
00006 #include "Core/Array/Set.h"
00007 #include "Core/Array/Add.h"
00008 #include "Core/Array/DivVal.h"
00009 #include "Core/Array/MulVal.h"
00010 #include "Core/Array/Exp.h"
00011 #include "Core/Vector/Apply.h"
00012 #include "Core/Training/TrainDataSrcKernelMatrix.h"
00013
00014 namespace Impala
00015 {
00016 namespace Core
00017 {
00018 namespace Training
00019 {
00020
00021 class ApplyConceptsHelperKernels : public ApplyConceptsHelper
00022 {
00023 public:
00024 ApplyConceptsHelperKernels()
00025 : mKernelMatrix(0), mFeatureExternal(0), mInitialized(false)
00026 {
00027 }
00028
00029 virtual
00030 ~ApplyConceptsHelperKernels()
00031 {
00032 if (mUseFeatureCache)
00033 {
00034 ILOG_INFO("Cleaning up disk cache...");
00035 for(int i = 0; i < mTrainFeatureTempNames.size(); i++)
00036 {
00037 unlink(mTrainFeatureTempNames[i].c_str());
00038 }
00039 }
00040 }
00041
00042 void
00043 Initialise(Database::RawDataSet* annoSet, String kernelMatrixName)
00044 {
00045 mAnnoSet = annoSet;
00046 mKernelMatrixName = kernelMatrixName;
00047 LoadInfoFile();
00048 mInitialized = true;
00049 }
00050
00051 String
00052 GetModelName()
00053 {
00054 return mKernelMatrixName;
00055 }
00056
00057 FeatureTable*
00058 LoadFeatures(Database::PathCreator* pathCreator)
00059 {
00060 if(!mInitialized)
00061 ILOG_ERROR("not initialized");
00062 if(mFeatureExternal)
00063 ILOG_ERROR("features not empty");
00064
00065 String kernelDataCacheFilename =
00066 pathCreator->GetDataSet()->GetFilePathKernelData(
00067 pathCreator->GetWalkType(),
00068 mAnnoSet->GetSetName(),
00069 mKernelMatrixName,
00070 pathCreator->GetFolderOrVideoId(), false, true);
00071 if(!kernelDataCacheFilename.empty())
00072 {
00073 ILOG_INFO("Using cache from KernelData: " <<
00074 kernelDataCacheFilename);
00075
00076
00077
00078 mFeatureExternal = FeatureTable::MakeFromFile(
00079 Feature::FeatureDefinition("cache"),
00080 kernelDataCacheFilename,
00081 pathCreator->GetDatabase());
00082
00083
00084 size_t w = mFeatureExternal->GetColumn2()->GetStorage()->CW();
00085 size_t h = mFeatureExternal->GetColumn2()->GetStorage()->CH();
00086 ILOG_DEBUG("size of kernel matrix is ("<< w <<", "<< h <<")");
00087 mKernelMatrix = new Array::Array2dScalarReal64(w, h, 0, 0,
00088 mFeatureExternal->GetColumn2()->GetStorage()->mData, true);
00089 return mFeatureExternal;
00090 }
00091
00092 mUseFeatureCache = (mAnnoSet->GetDatabase()->GetDataChannel() &&
00093 !CmdOptions::GetInstance().GetBool("noFeatureCache"));
00094 if(mTrainFeatureTempNames.size() == 0)
00095 {
00096 for(int i=0 ; i<mFeatureDefs.size() ; ++i)
00097 {
00098 String path = mAnnoSet->GetFilePathFeatureIndex
00099 (mFeatureDefs[i], "", false, false);
00100 if (mUseFeatureCache)
00101 {
00102
00103
00104
00105 ILOG_INFO("buffering from train set FeatureIndex: " <<
00106 mFeatureDefs[i].AsString());
00107
00108 String tmpName = FileNameTmp();
00109 Util::IOBuffer* buffer = mAnnoSet->GetDatabase()->GetIOBuffer
00110 (path, true, false, tmpName);
00111 mTrainFeatureTempNames.push_back(tmpName);
00112 delete buffer;
00113 }
00114 else
00115 {
00116 mTrainFeatureTempNames.push_back(path);
00117 }
00118 }
00119 }
00120
00121 int sizeCheck = -1;
00122 Matrix::Mat* accumulator = 0;
00123 for(int i=0 ; i<mWeights.size() ; ++i)
00124 {
00125
00126 pathCreator->SetFeature(mFeatureDefs[i].AsString());
00127 String path = pathCreator->GetFilePathFeatureData(false, false);
00128 if(path.empty())
00129 {
00130 ILOG_WARN("FeatureData path is empty for the set to apply to:" <<
00131 " did you forget --keyframes?");
00132 }
00133 FeatureTable* feature = FeatureTable::MakeFromFile
00134 (mFeatureDefs[i], path, pathCreator->GetDatabase());
00135 if(sizeCheck == -1)
00136 sizeCheck = feature->Size();
00137
00138
00139 if(feature->Size() != sizeCheck)
00140 {
00141 ILOG_ERROR("Size mismatch in the FeatureData for the dataset to"
00142 << " apply to: have " << sizeCheck << " and " <<
00143 feature->Size());
00144 throw "bye";
00145 }
00146 ILOG_INFO("size of apply set " << mFeatureDefs[i].AsString() << " is " <<
00147 feature->Size());
00148
00149
00150 FeatureTable* trainFeature = new FeatureTable(mFeatureDefs[i]);
00151 Util::IOBuffer* buffer = 0;
00152 if(mUseFeatureCache)
00153 {
00154 buffer = new Util::IOBufferFile
00155 (mTrainFeatureTempNames[i], true, false);
00156 }
00157 else
00158 {
00159 buffer = mAnnoSet->GetDatabase()->GetIOBuffer
00160 (mTrainFeatureTempNames[i], true, false, "");
00161 }
00162
00163 Read(trainFeature, buffer);
00164 ILOG_INFO("size of train " << mFeatureDefs[i].AsString() << " is " <<
00165 trainFeature->Size());
00166 delete buffer;
00167
00168
00169 typedef Feature::FeatureTable::ColumnVectorSet VectorSet;
00170 VectorSet* vectorsV = feature->GetColumn2();
00171 VectorSet* vectorsH = trainFeature->GetColumn2();
00172 vectorsV->SetSize(feature->Size());
00173 vectorsH->SetSize(trainFeature->Size());
00174 ILOG_INFO("Compute distance matrix for feature " << i << "...");
00175 Matrix::Mat* distanceMatrix = Apply(&Training::Chi2Distance,
00176 vectorsH, vectorsV);
00177
00178 delete trainFeature;
00179
00180
00181 MulVal(distanceMatrix, distanceMatrix, mWeights[i]);
00182 DivVal(distanceMatrix, distanceMatrix, -mAverages[i]);
00183 if(accumulator == 0)
00184 Set(accumulator, distanceMatrix);
00185 else
00186 Add(accumulator, accumulator, distanceMatrix);
00187 delete distanceMatrix;
00188 if(i == 0)
00189 {
00190 mFeatureExternal = feature;
00191 }
00192 else
00193 delete feature;
00194 }
00195
00196 DivVal(accumulator, accumulator, mTotalWeight);
00197 Exp(accumulator, accumulator);
00198 mKernelMatrix = accumulator;
00199 if(CmdOptions::GetInstance().GetBool("storeKernelData"))
00200 {
00201 FeatureTable* storage = new FeatureTable(mKernelMatrixName, sizeCheck, Matrix::MatNrCol(mKernelMatrix));
00202 Column::Copy(storage->GetColumn1(), mFeatureExternal->GetColumn1(), sizeCheck);
00203
00204
00205
00206 Array::Array2dScalarReal64* wrapper =
00207 new Array::Array2dScalarReal64(mKernelMatrix->CW(), mKernelMatrix->CH(), 0, 0, mKernelMatrix->mData, true);
00208 storage->GetColumn2()->SetStorage(wrapper);
00209 storage->GetColumn2()->SetSize(sizeCheck);
00210 storage->SetSize(sizeCheck);
00211 String storageFilename =
00212 pathCreator->GetDataSet()->GetFilePathKernelData(
00213 pathCreator->GetWalkType(),
00214 mAnnoSet->GetSetName(),
00215 mKernelMatrixName,
00216 pathCreator->GetFolderOrVideoId(), true, false);
00217 Write(storage, storageFilename, pathCreator->GetDatabase(), true);
00218 delete storage;
00219 }
00220 return mFeatureExternal;
00221 }
00222
00223 Table::ScoreTable*
00224 Predict(String modelName)
00225 {
00226 if(!mInitialized)
00227 ILOG_ERROR("not initialized");
00228 Training::Svm svm;
00229 ILOG_DEBUG("loading model")
00230 svm.LoadModel(modelName, mAnnoSet->GetDatabase());
00231 Util::PropertySet ps;
00232 ps.Add("kernel","precomputed");
00233 svm.OverrideModelOptions(&ps);
00234
00235 if(!mKernelMatrix)
00236 {
00237 ILOG_ERROR("Error: kernel matrix is NULL in Predict");
00238 }
00239
00241 ILOG_DEBUG("creating datasrc");
00242 Training::TrainDataSrcKernelMatrix dataSrc(mKernelMatrix);
00243 ILOG_DEBUG("calling svm.predict");
00244 return svm.Predict(&dataSrc);
00245 }
00246
00247 void
00248 UnloadFeatures()
00249 {
00250 if(mFeatureExternal)
00251 {
00252 delete mFeatureExternal;
00253 mFeatureExternal = 0;
00254 }
00255 if(mKernelMatrix)
00256 {
00257 delete mKernelMatrix;
00258 mKernelMatrix = 0;
00259 }
00260 }
00261
00262 private:
00263 void
00264 LoadInfoFile()
00265 {
00266 ILOG_INFO("loading featuredefs, weights and averages ");
00267 ILOG_DEBUG_NODE("annoset = " << (void*)mAnnoSet << " name = " <<
00268 mKernelMatrixName);
00269 String path = mAnnoSet->GetFilePathPrecomputedKernels
00270 (mKernelMatrixName + ".input.txt", "", false, false);
00271 ILOG_DEBUG_NODE("reading input.txt");
00272 std::vector<String> lines;
00273 Util::IOBuffer* buf2 = mAnnoSet->GetDatabase()->GetIOBuffer
00274 (path, true, false, "");
00275 if(!buf2)
00276 {
00277 ILOG_ERROR("Could not read input.txt:" << path);
00278 }
00279 while(buf2->Available())
00280 {
00281 lines.push_back(buf2->ReadLine());
00282 }
00283 delete buf2;
00284 ILOG_DEBUG_NODE("read input.txt");
00285 if(lines.size() == 0)
00286 ILOG_ERROR(String("Could not load .input.txt file or file empty...\n")+
00287 " Perhaps because the precomputed kernel is made with an"+
00288 " old version? you can make this file by hand: each line"+
00289 " has a weight and a featureDef just as the input to"+
00290 " precomputekernelmatrix");
00291
00292 mTotalWeight = 0;
00293 for(int i=0 ; i<lines.size() ; ++i)
00294 {
00295 ILOG_DEBUG_NODE("parsing line " << lines[i]);
00296 Util::StringParser p(lines[i]);
00297 Real64 weight = p.GetDouble();
00298 if(weight == 0)
00299 ILOG_ERROR("parse error in .input.txt on line " << i <<
00300 ": no weight found at beginning, or weight == 0");
00301 mTotalWeight += weight;
00302 mWeights.push_back(weight);
00303 String feature = p.GetString(' ', false);
00304 if(feature == "")
00305 ILOG_ERROR("parse error in .input.txt on line " << i <<
00306 ": no featureDef found");
00307 mFeatureDefs.push_back(Feature::FeatureDefinition(feature));
00308 }
00309
00310 ILOG_DEBUG_NODE("done parsing " << mWeights.size() << " entries");
00312 String path2 = mAnnoSet->GetFilePathPrecomputedKernels
00313 (mKernelMatrixName + ".averages.raw", "", false, false);
00314 Util::IOBuffer* buf = mAnnoSet->GetDatabase()->GetIOBuffer
00315 (path2, true, false, "");
00316 if (buf && buf->Valid())
00317 {
00318 ILOG_DEBUG_NODE("loading averages");
00319 for(int i=0 ; i<mWeights.size() ; ++i)
00320 {
00321 Real64 d;
00322 buf->Read(&d, sizeof(Real64));
00323 mAverages.push_back(d);
00324 }
00325 }
00326 delete buf;
00327
00328 int size = mWeights.size();
00329 if((mAverages.size() != size) ||
00330 (mFeatureDefs.size() != size))
00331 ILOG_ERROR("trouble after loading: sizes do not match: weights="
00332 << mWeights.size() << "; averages=" << mAverages.size() <<
00333 "; featuredefs=" << mFeatureDefs.size());
00334 }
00335
00336 Real64 mTotalWeight;
00337 std::vector<Real64> mWeights;
00338 std::vector<Real64> mAverages;
00339 Database::RawDataSet* mAnnoSet;
00340 FeatureTable* mFeatureExternal;
00341 std::vector<String> mTrainFeatureTempNames;
00342 std::vector<Feature::FeatureDefinition> mFeatureDefs;
00343 std::string mKernelMatrixName;
00344 Matrix::Mat* mKernelMatrix;
00345 bool mInitialized;
00346 bool mUseFeatureCache;
00347
00348 ILOG_VAR_DEC;
00349 };
00350
00351 ILOG_VAR_INIT(ApplyConceptsHelperKernels, Impala.Core.Training);
00352
00353 }
00354 }
00355 }
00356
00357 #endif