00001 #ifndef Impala_Core_Training_PrecomputeTask_h
00002 #define Impala_Core_Training_PrecomputeTask_h
00003
00004 #include "Persistency/FeatureTableRepository.h"
00005 #include "Persistency/KernelMatrixRepository.h"
00006 #include "Link/Cuda/Cuda.h"
00007 #include "Core/Feature/WeightedFeatureList.h"
00008 #include "Core/Training/ComputeKernelMatrix.h"
00009
00010 namespace Impala
00011 {
00012 namespace Core
00013 {
00014 namespace Training
00015 {
00016
00017
00020 class PrecomputeTask
00021 {
00022 public:
00023
00024 typedef Matrix::DistributedAccess DistributedAccess;
00025 typedef Matrix::VirtualMatrix VirtualMatrix;
00026 typedef Feature::VirtualFeatureTable VirtualFeatureTable;
00027 typedef Feature::VirtualFeatureTableFactory VFTFactory;
00028 typedef Persistency::File File;
00029 typedef Persistency::FeatureLocator FeatureLocator;
00030 typedef Persistency::DistributedAccessRepository DistributedAccessRepository;
00031 typedef Persistency::FeatureTableRepository FeatureTableRepository;
00032 typedef Persistency::KernelMatrixLocator KernelMatrixLocator;
00033 typedef Persistency::KernelMatrixRepository KernelMatrixRepository;
00034
00035 static void
00036 ComputeDevelKernel(Database::RawDataSet* dataSet,
00037 const KernelMatrixLocator& kerLoc,
00038 CString inputFeatures)
00039 {
00040 bool override = CmdOptions::GetInstance().GetBool("override");
00041 if (DistributedAccessRepository().Exists(kerLoc) && !override)
00042 {
00043 ILOG_INFO("Skipping, Kernel already exists: " << kerLoc);
00044 return;
00045 }
00046
00047 Feature::WeightedFeatureList fList(inputFeatures);
00048 if (fList.Size() == 0)
00049 {
00050 ILOG_ERROR("Empty feature list");
00051 return;
00052 }
00053
00054 FeatureLocator featLoc(dataSet->GetLocator(), false, true,
00055 kerLoc.GetFeatureIndexCat(),
00056 fList.GetFeature(0), "");
00057 VirtualFeatureTable* features =
00058 VFTFactory::GetInstance().ConstructIOBufferReader(featLoc, true);
00059 int mSize = features->Size();
00060 DistributedAccess* kernel = new DistributedAccess(mSize, mSize, 1, 1,
00061 0, 1);
00062 features->GetQuids(kernel->GetRowQuids());
00063 features->GetQuids(kernel->GetColumnQuids());
00064 delete features;
00065
00066 bool isChi2 = (kerLoc.GetModel() == "chi2");
00067 for (int i=0 ; i<fList.Size() ; i++)
00068 {
00069 Real64 average = -1;
00070 if (isChi2)
00071 {
00072 FeatureLocator loc = featLoc;
00073 loc.SetSuffix(".average.raw");
00074 loc.SetFeatureString(fList.GetFeature(i));
00075 File file = FeatureTableRepository().ExposeFile(loc, false);
00076 if (file.Valid())
00077 {
00078 std::vector<double> v;
00079 file.ReadNative(std::back_inserter(v), true);
00080 average = v[0];
00081 kernel->SetHasOwnAverages(true);
00082 }
00083 else
00084 {
00085 ILOG_ERROR("Could not load average for " << loc);
00086 delete kernel;
00087 return;
00088 }
00089 }
00090 kernel->AddFeature(fList.GetFeature(i), fList.GetWeight(i), average);
00091 }
00092
00093 PrecomputeTask* pt = new PrecomputeTask(false, kerLoc, kernel, featLoc,
00094 featLoc);
00095 Execute(pt);
00096 }
00097
00098 static void
00099 ComputeTestKernel(const Persistency::Locator& dstLoc, CString walkType,
00100 CString container, Database::RawDataSet* develSet,
00101 DistributedAccess* develKernel, CString featureIndexCat,
00102 CString model, CString kernelMatrixName)
00103 {
00104 ILOG_VAR(Impala.Core.Training.PrecomputeTask.ComputeTestKernel);
00105 KernelMatrixLocator thisKerLoc(dstLoc, false, walkType,
00106 develSet->GetSetName(), model,
00107 kernelMatrixName, container);
00108 thisKerLoc.SetFeatureIndexCat(featureIndexCat);
00109
00110 bool override = CmdOptions::GetInstance().GetBool("override");
00111 if (KernelMatrixRepository().Exists(thisKerLoc) && !override)
00112 {
00113 ILOG_INFO("Skipping, KernelData already exists: " << thisKerLoc);
00114 return;
00115 }
00116
00117 if (develKernel->GetNrFeatures() == 0)
00118 {
00119 ILOG_ERROR("No features");
00120 return;
00121 }
00122
00123 FeatureLocator thisFeatLoc(dstLoc, false, false, walkType,
00124 "empty", container);
00125 FeatureLocator develFeatLoc(develSet->GetLocator(), false, true, "",
00126 "empty", "");
00127
00128 PrecomputeTask* pt = new Training::PrecomputeTask
00129 (true, thisKerLoc, develKernel, thisFeatLoc, develFeatLoc);
00130 Execute(pt);
00131 }
00132
00133 static void
00134 Execute(PrecomputeTask* pt)
00135 {
00136
00137
00138
00139 typedef float CPUFLOAT;
00140
00141
00142 bool GPU = Link::Cuda::CudaUsed();
00143 ILOG_INFO("GPU mode: " << GPU);
00144 int slabWidth = 1024;
00145 using Training::ComputeKernelMatrix;
00146 if (GPU)
00147 {
00148 ComputeKernelMatrix<float>(pt, slabWidth, GPU);
00149 }
00150 else
00151 {
00152 #ifdef CUDA
00153 ComputeKernelMatrix<CPUFLOAT>(pt, slabWidth, GPU);
00154 #else
00155 ComputeKernelMatrix<double>(pt, slabWidth, GPU);
00156 #endif
00157 }
00158 delete pt;
00159 }
00160
00161
00162 PrecomputeTask(bool computingTestKernel,
00163 const KernelMatrixLocator& thisKerLoc,
00164 DistributedAccess* develKernel,
00165 const FeatureLocator& thisFeatLoc,
00166 const FeatureLocator& develFeatLoc)
00167 {
00168 mComputingTestKernel = computingTestKernel;
00169 mDevelKernel = develKernel;
00170 mThisKerLoc = thisKerLoc;
00171 mThisFeatLoc = thisFeatLoc;
00172 mDevelFeatLoc = develFeatLoc;
00173 mMatrix = 0;
00174 mWriter = 0;
00175 for (int i=0 ; i<NrFeatures() ; i++)
00176 {
00177 ILOG_INFO(i << " " << GetFeatureAverage(i) << " (" <<
00178 GetFeatureWeight(i) << "*" << GetFeatureName(i) << ")");
00179 }
00180 }
00181
00182 virtual
00183 ~PrecomputeTask()
00184 {
00185 if (!mComputingTestKernel)
00186 delete mDevelKernel;
00187 if (mWriter)
00188 delete mWriter;
00189 }
00190
00191 size_t
00192 NrFeatures() const
00193 {
00194 return mDevelKernel->GetNrFeatures();
00195 }
00196
00197 String
00198 GetFeatureName(int feature) const
00199 {
00200 return mDevelKernel->GetFeature(feature);
00201 }
00202
00203 Real64
00204 GetFeatureWeight(int feature) const
00205 {
00206 return mDevelKernel->GetWeight(feature);
00207 }
00208
00209 Real64
00210 GetTotalFeatureWeight() const
00211 {
00212 return mDevelKernel->GetTotalWeight();
00213 }
00214
00215 Real64
00216 GetFeatureAverage(int feature) const
00217 {
00218 return mDevelKernel->GetAverage(feature);
00219 }
00220
00221 bool
00222 IsSymmetric() const
00223 {
00224 return (!mComputingTestKernel);
00225 }
00226
00227 bool
00228 IsChi2() const
00229 {
00230 return (mThisKerLoc.GetModel() == "chi2");
00231 }
00232
00233 FeatureLocator
00234 GetFeatureLocatorA(int index)
00235 {
00236 FeatureLocator loc = mDevelFeatLoc;
00237 loc.SetFeatureString(mDevelKernel->GetFeature(index));
00238 return loc;
00239 }
00240
00241 FeatureLocator
00242 GetFeatureLocatorB(int index)
00243 {
00244 FeatureLocator loc = mThisFeatLoc;
00245 loc.SetFeatureString(mDevelKernel->GetFeature(index));
00246 return loc;
00247 }
00248
00249 VirtualMatrix*
00250 GetWritableMatrix(int nrRow, int nrCol)
00251 {
00252 typedef Core::Matrix::VirtualMatrixFactory VMFactory;
00253 using Persistency::RepositoryGetFile;
00254
00255 String path = KernelMatrixRepository().ExposeFilePath(mThisKerLoc, true);
00256 if (path.empty())
00257 {
00258 ILOG_ERROR("Couldn't get writable file for kernel " << mThisKerLoc);
00259 return 0;
00260 }
00261 String suffix = (mComputingTestKernel) ? ".tab"
00262 : ".precomputed.part-R0-C0.raw";
00263 String fName = FileNamePath(path) + FileNameBase(path) + suffix;
00264 Persistency::FileLocator fileLoc(mThisKerLoc, fName);
00265 File file = RepositoryGetFile(fileLoc, true, false);
00266 if (mComputingTestKernel)
00267 {
00268 mWriter = VFTFactory::GetInstance().ConstructIOBufferWriter
00269 (nrRow, nrCol, file, false);
00270 return mWriter->GetMatrix();
00271 }
00272 else
00273 {
00274 mMatrix = VMFactory::GetInstance().ConstructIOBufferWriter
00275 (nrRow, nrCol, file, true, true);
00276 return mMatrix;
00277 }
00278 }
00279
00280 void
00281 Finalize()
00282 {
00283 if (mComputingTestKernel)
00284 {
00285 FeatureLocator loc = GetFeatureLocatorB(0);
00286 VirtualFeatureTable* features =
00287 VFTFactory::GetInstance().ConstructIOBufferReader(loc, false);
00288 Table::QuidTable quids(features->Size());
00289 features->GetQuids(&quids);
00290 delete features;
00291 mWriter->AddQuids(&quids);
00292 mWriter->SetCompleted();
00293 }
00294 else
00295 {
00296 delete mMatrix;
00297 KernelMatrixLocator loc = mThisKerLoc;
00298 loc.SetDoParts(0);
00299 Persistency::DistributedAccessRepository().Add(loc, mDevelKernel);
00300 }
00301 }
00302
00303 private:
00304
00305 bool mComputingTestKernel;
00306 KernelMatrixLocator mThisKerLoc;
00307 DistributedAccess* mDevelKernel;
00308 FeatureLocator mThisFeatLoc;
00309 FeatureLocator mDevelFeatLoc;
00310 VirtualMatrix* mMatrix;
00311 Core::Feature::VirtualFeatureTableIOBufferWriter* mWriter;
00312
00313 ILOG_VAR_DEC;
00314 };
00315
00316 ILOG_VAR_INIT(PrecomputeTask, Impala.Core.Training);
00317
00318 }
00319 }
00320 }
00321
00322 #endif