00001 #ifndef Impala_Core_Training_ApplyConceptsHelperKernels_h
00002 #define Impala_Core_Training_ApplyConceptsHelperKernels_h
00003
00004 #include "Core/Training/ApplyConceptsHelper.h"
00005 #include "Util/StringParser.h"
00006 #include "Core/Array/Set.h"
00007 #include "Core/Array/Add.h"
00008 #include "Core/Array/DivVal.h"
00009 #include "Core/Array/MulVal.h"
00010 #include "Core/Array/Exp.h"
00011 #include "Core/Vector/Apply.h"
00012 #include "Core/Vector/Chi2Distance.h"
00013 #include "Core/Training/TrainDataSrcKernelMatrix.h"
00014 #include "Persistency/FeatureTableRepository.h"
00015 #include "Persistency/DistributedAccessRepository.h"
00016 #include "Persistency/KernelMatrixRepository.h"
00017 #include "Persistency/SvmRepository.h"
00018
00019 namespace Impala
00020 {
00021 namespace Core
00022 {
00023 namespace Training
00024 {
00025
00026
00027 class ApplyConceptsHelperKernels : public ApplyConceptsHelper
00028 {
00029 public:
00030
00031 ApplyConceptsHelperKernels()
00032 {
00033 mKernelMatrix = 0;
00034 mFeatureExternal = 0;
00035 }
00036
00037 virtual
00038 ~ApplyConceptsHelperKernels()
00039 {
00040 }
00041
00042 void
00043 Initialise(Database::RawDataSet* annoSet, const ModelLocator& modelLoc)
00044 {
00045 CmdOptions& options = CmdOptions::GetInstance();
00046 mFeatureIndexCat = options.GetString("featureIndexCat");
00047 mAnnoSet = annoSet;
00048 mModel = modelLoc.GetModel();
00049 mKernelMatrixName = modelLoc.GetFeature();
00050 typedef Persistency::KernelMatrixLocator KernelMatrixLocator;
00051 KernelMatrixLocator loc(mAnnoSet->GetLocator(), true, "", "",
00052 mModel, mKernelMatrixName, "");
00053 loc.SetFeatureIndexCat(mFeatureIndexCat);
00054 loc.SetStartNode(0);
00055 loc.SetNodeCount(Link::Mpi::NrProcs());
00056 loc.SetDoParts(0);
00057 mDA = Persistency::DistributedAccessRepository().Get(loc);
00058 }
00059
00060 FeatureTable*
00061 LoadFeatures(const FeatureLocator& featLoc)
00062 {
00063 typedef Persistency::KernelMatrixLocator KernelMatrixLocator;
00064 typedef Persistency::KernelMatrixRepository KernelMatrixRepository;
00065 typedef Persistency::FeatureTableRepository FeatureTableRepository;
00066 CmdOptions& options = CmdOptions::GetInstance();
00067 if (mFeatureExternal)
00068 ILOG_ERROR("features not empty");
00069
00070 KernelMatrixLocator kernelDataLoc
00071 (featLoc, false, featLoc.GetWalkType(), mAnnoSet->GetSetName(),
00072 mModel, featLoc.GetFeatureString(), featLoc.GetContainer());
00073 kernelDataLoc.SetFeatureIndexCat(mFeatureIndexCat);
00074 if (KernelMatrixRepository().Exists(kernelDataLoc))
00075 {
00076 ILOG_INFO("Using cache from KernelData: " << kernelDataLoc);
00077
00078
00079
00080
00081 KernelMatrix* kerData = KernelMatrixRepository().Get(kernelDataLoc);
00082 mFeatureExternal = kerData->GetFeatureTab();
00083
00084
00085
00086 size_t w = mFeatureExternal->GetColumn2()->GetStorage()->CW();
00087 size_t h = mFeatureExternal->GetColumn2()->GetStorage()->CH();
00088 ILOG_DEBUG("size of kernel matrix is ("<< w <<", "<< h <<")");
00089 mKernelMatrix = new Array::Array2dScalarReal64(w, h, 0, 0,
00090 mFeatureExternal->GetColumn2()->GetStorage()->mData, true);
00091 return mFeatureExternal;
00092 }
00093
00094 int sizeCheck = -1;
00095 Matrix::Mat* accumulator = 0;
00096 bool isHik = StringStartsWith(mModel, "hik");
00097 for (int i=0 ; i<mDA->GetNrFeatures() ; i++)
00098 {
00099
00100 FeatureLocator fLoc = featLoc;
00101 fLoc.SetFeatureString(mDA->GetFeature(i));
00102 FeatureTable* feature = FeatureTableRepository().Get(fLoc);
00103 if (sizeCheck == -1)
00104 sizeCheck = feature->Size();
00105 if (feature->Size() != sizeCheck)
00106 {
00107 ILOG_ERROR("Size mismatch in the FeatureData for the dataset to"
00108 << " apply to: have " << sizeCheck << " and " <<
00109 feature->Size());
00110 throw "bye";
00111 }
00112 ILOG_INFO("size of apply set " << mDA->GetFeature(i) <<
00113 " is " << feature->Size());
00114
00115
00116 FeatureLocator loc(mAnnoSet->GetLocator(), false, true,
00117 mFeatureIndexCat, mDA->GetFeature(i), "");
00118 FeatureTable* trainFeature =
00119 Persistency::FeatureTableRepository().Get(loc);
00120 ILOG_INFO("size of train " << mDA->GetFeature(i) << " is "
00121 << trainFeature->Size());
00122
00123
00124 typedef Feature::FeatureTable::ColumnVectorSet VectorSet;
00125 VectorSet* vectorsV = feature->GetColumn2();
00126 VectorSet* vectorsH = trainFeature->GetColumn2();
00127 vectorsV->SetSize(feature->Size());
00128 vectorsH->SetSize(trainFeature->Size());
00129 ILOG_INFO("Compute distance matrix for feature " << i << "...");
00130 Matrix::Mat* distanceMatrix = 0;
00131 if (isHik)
00132 {
00133 distanceMatrix = Apply(&Core::Vector::HistogramIntersectionDouble,
00134 vectorsH, vectorsV);
00135 }
00136 else
00137 {
00138 distanceMatrix = Apply(&Core::Vector::Chi2Distance,
00139 vectorsH, vectorsV);
00140 }
00141 delete trainFeature;
00142
00143
00144 MulVal(distanceMatrix, distanceMatrix, mDA->GetWeight(i));
00145 if (!isHik)
00146 DivVal(distanceMatrix, distanceMatrix, -mDA->GetAverage(i));
00147 if (accumulator == 0)
00148 Set(accumulator, distanceMatrix);
00149 else
00150 Add(accumulator, accumulator, distanceMatrix);
00151 delete distanceMatrix;
00152 if (i == 0)
00153 {
00154 mFeatureExternal = feature;
00155 }
00156 else
00157 {
00158 delete feature;
00159 }
00160 }
00161
00162 DivVal(accumulator, accumulator, mDA->GetTotalWeight());
00163 if (!isHik)
00164 Exp(accumulator, accumulator);
00165 mKernelMatrix = accumulator;
00166 if (options.GetBool("storeKernelData"))
00167 {
00168 FeatureTable* storage =
00169 new FeatureTable(mKernelMatrixName, sizeCheck,
00170 Matrix::MatNrCol(mKernelMatrix));
00171 Column::Copy(storage->GetColumn1(), mFeatureExternal->GetColumn1(),
00172 sizeCheck);
00173
00174
00175
00176 Array::Array2dScalarReal64* wrapper =
00177 new Array::Array2dScalarReal64(mKernelMatrix->CW(),
00178 mKernelMatrix->CH(), 0, 0,
00179 mKernelMatrix->mData, true);
00180 storage->GetColumn2()->SetStorage(wrapper);
00181 storage->GetColumn2()->SetSize(sizeCheck);
00182 storage->SetSize(sizeCheck);
00183 KernelMatrix kerData(storage);
00184 Persistency::KernelMatrixRepository().Add(kernelDataLoc, &kerData);
00185 }
00186 return mFeatureExternal;
00187 }
00188
00189 Table::ScoreTable*
00190 Predict(const ModelLocator& loc)
00191 {
00192 Training::Classifier* classifier = Persistency::SvmRepository().Get(loc);
00193 Util::PropertySet ps;
00194 ps.Add("kernel","precomputed");
00195 classifier->OverrideModelOptions(&ps);
00196
00197 if (!mKernelMatrix)
00198 {
00199 ILOG_ERROR("Error: kernel matrix is NULL in Predict");
00200 }
00201
00203 Training::TrainDataSrcKernelMatrix dataSrc(mKernelMatrix);
00204 Table::ScoreTable* scores = classifier->Predict(&dataSrc);
00205 delete classifier;
00206 return scores;
00207 }
00208
00209 void
00210 UnloadFeatures()
00211 {
00212 if (mFeatureExternal)
00213 {
00214 delete mFeatureExternal;
00215 mFeatureExternal = 0;
00216 }
00217 if (mKernelMatrix)
00218 {
00219 delete mKernelMatrix;
00220 mKernelMatrix = 0;
00221 }
00222 }
00223
00224 private:
00225
00226 Matrix::DistributedAccess* mDA;
00227 Database::RawDataSet* mAnnoSet;
00228 FeatureTable* mFeatureExternal;
00229 String mFeatureIndexCat;
00230 String mModel;
00231 String mKernelMatrixName;
00232 Matrix::Mat* mKernelMatrix;
00233
00234 ILOG_VAR_DEC;
00235 };
00236
00237 ILOG_VAR_INIT(ApplyConceptsHelperKernels, Impala.Core.Training);
00238
00239 }
00240 }
00241 }
00242
00243 #endif