Home || Visual Search || Applications || Architecture || Important Messages || OGL || Src

PrecomputeTask.h

Go to the documentation of this file.
00001 #ifndef Impala_Core_Training_PrecomputeTask_h
00002 #define Impala_Core_Training_PrecomputeTask_h
00003 
00004 #include "Persistency/FeatureTableRepository.h"
00005 #include "Persistency/KernelMatrixRepository.h"
00006 #include "Link/Cuda/Cuda.h"
00007 #include "Core/Feature/WeightedFeatureList.h"
00008 #include "Core/Training/ComputeKernelMatrix.h"
00009 
00010 namespace Impala
00011 {
00012 namespace Core
00013 {
00014 namespace Training
00015 {
00016 
00017 
00020 class PrecomputeTask
00021 {
00022 public:
00023 
00024     typedef Matrix::DistributedAccess DistributedAccess;
00025     typedef Matrix::VirtualMatrix VirtualMatrix;
00026     typedef Feature::VirtualFeatureTable VirtualFeatureTable;
00027     typedef Feature::VirtualFeatureTableFactory VFTFactory;
00028     typedef Persistency::File File;
00029     typedef Persistency::FeatureLocator FeatureLocator;
00030     typedef Persistency::DistributedAccessRepository DistributedAccessRepository;
00031     typedef Persistency::FeatureTableRepository FeatureTableRepository;
00032     typedef Persistency::KernelMatrixLocator KernelMatrixLocator;
00033     typedef Persistency::KernelMatrixRepository KernelMatrixRepository;
00034 
00035     static void
00036     ComputeDevelKernel(Database::RawDataSet* dataSet,
00037                        const KernelMatrixLocator& kerLoc,
00038                        CString inputFeatures)
00039     {
00040         bool override = CmdOptions::GetInstance().GetBool("override");
00041         if (DistributedAccessRepository().Exists(kerLoc) && !override)
00042         {
00043             ILOG_INFO("Skipping, Kernel already exists: " << kerLoc);
00044             return;
00045         }
00046 
00047         Feature::WeightedFeatureList fList(inputFeatures);
00048         if (fList.Size() == 0)
00049         {
00050             ILOG_ERROR("Empty feature list");
00051             return;
00052         }
00053 
00054         FeatureLocator featLoc(dataSet->GetLocator(), false, true,
00055                                kerLoc.GetFeatureIndexCat(),
00056                                fList.GetFeature(0), "");
00057         VirtualFeatureTable* features =
00058             VFTFactory::GetInstance().ConstructIOBufferReader(featLoc, true);
00059         int mSize = features->Size();
00060         DistributedAccess* kernel = new DistributedAccess(mSize, mSize, 1, 1,
00061                                                           0, 1);
00062         features->GetQuids(kernel->GetRowQuids());
00063         features->GetQuids(kernel->GetColumnQuids());
00064         delete features;
00065 
00066         bool isChi2 = (kerLoc.GetModel() == "chi2");
00067         for (int i=0 ; i<fList.Size() ; i++)
00068         {
00069             Real64 average = -1;
00070             if (isChi2)
00071             {
00072                 FeatureLocator loc = featLoc;
00073                 loc.SetSuffix(".average.raw");
00074                 loc.SetFeatureString(fList.GetFeature(i));
00075                 File file = FeatureTableRepository().ExposeFile(loc, false);
00076                 if (file.Valid())
00077                 {
00078                     std::vector<double> v;
00079                     file.ReadNative(std::back_inserter(v), true);
00080                     average = v[0];
00081                     kernel->SetHasOwnAverages(true);
00082                 }
00083                 else
00084                 {
00085                     ILOG_ERROR("Could not load average for " << loc);
00086                     delete kernel;
00087                     return;
00088                 }
00089             }
00090             kernel->AddFeature(fList.GetFeature(i), fList.GetWeight(i), average);
00091         }
00092 
00093         PrecomputeTask* pt = new PrecomputeTask(false, kerLoc, kernel, featLoc,
00094                                                 featLoc);
00095         Execute(pt);
00096     }
00097 
00098     static void
00099     ComputeTestKernel(const Persistency::Locator& dstLoc, CString walkType,
00100                       CString container, Database::RawDataSet* develSet,
00101                       DistributedAccess* develKernel, CString featureIndexCat,
00102                       CString model, CString kernelMatrixName)
00103     {
00104         ILOG_VAR(Impala.Core.Training.PrecomputeTask.ComputeTestKernel);
00105         KernelMatrixLocator thisKerLoc(dstLoc, false, walkType,
00106                                        develSet->GetSetName(), model,
00107                                        kernelMatrixName, container);
00108         thisKerLoc.SetFeatureIndexCat(featureIndexCat);
00109 
00110         bool override = CmdOptions::GetInstance().GetBool("override");
00111         if (KernelMatrixRepository().Exists(thisKerLoc) && !override)
00112         {
00113             ILOG_INFO("Skipping, KernelData already exists: " << thisKerLoc);
00114             return;
00115         }
00116 
00117         if (develKernel->GetNrFeatures() == 0)
00118         {
00119             ILOG_ERROR("No features");
00120             return;
00121         }
00122 
00123         FeatureLocator thisFeatLoc(dstLoc, false, false, walkType,
00124                                    "empty", container);
00125         FeatureLocator develFeatLoc(develSet->GetLocator(), false, true, "",
00126                                     "empty", "");
00127 
00128         PrecomputeTask* pt = new Training::PrecomputeTask
00129             (true, thisKerLoc, develKernel, thisFeatLoc, develFeatLoc);
00130         Execute(pt);
00131     }
00132 
00133     static void
00134     Execute(PrecomputeTask* pt)
00135     {
00136 //#ifdef KOENFIX
00137 //#define CPUFLOAT double
00138 //#else
00139         typedef float CPUFLOAT;
00140 //#endif
00141 
00142         bool GPU = Link::Cuda::CudaUsed();
00143         ILOG_INFO("GPU mode: " << GPU);
00144         int slabWidth = 1024;
00145         using Training::ComputeKernelMatrix;
00146         if (GPU)
00147         {
00148             ComputeKernelMatrix<float>(pt, slabWidth, GPU);
00149         }
00150         else
00151         {
00152 #ifdef CUDA
00153             ComputeKernelMatrix<CPUFLOAT>(pt, slabWidth, GPU);
00154 #else
00155             ComputeKernelMatrix<double>(pt, slabWidth, GPU);
00156 #endif
00157         }
00158         delete pt;
00159     }
00160 
00161     // Takes ownership of develKernel only when not computing a test kernel
00162     PrecomputeTask(bool computingTestKernel,
00163                    const KernelMatrixLocator& thisKerLoc,
00164                    DistributedAccess* develKernel,
00165                    const FeatureLocator& thisFeatLoc,
00166                    const FeatureLocator& develFeatLoc)
00167     {
00168         mComputingTestKernel = computingTestKernel;
00169         mDevelKernel = develKernel;
00170         mThisKerLoc = thisKerLoc;
00171         mThisFeatLoc = thisFeatLoc;
00172         mDevelFeatLoc = develFeatLoc;
00173         mMatrix = 0;
00174         mWriter = 0;
00175         for (int i=0 ; i<NrFeatures() ; i++)
00176         {
00177             ILOG_INFO(i << " " << GetFeatureAverage(i) << " (" <<
00178                       GetFeatureWeight(i) << "*" << GetFeatureName(i) << ")");
00179         }
00180     }
00181 
00182     virtual
00183     ~PrecomputeTask()
00184     {
00185         if (!mComputingTestKernel)
00186             delete mDevelKernel;
00187         if (mWriter)
00188             delete mWriter;
00189     }
00190 
00191     size_t
00192     NrFeatures() const
00193     {
00194         return mDevelKernel->GetNrFeatures();
00195     }
00196 
00197     String
00198     GetFeatureName(int feature) const
00199     {
00200         return mDevelKernel->GetFeature(feature);
00201     }
00202 
00203     Real64
00204     GetFeatureWeight(int feature) const
00205     {
00206         return mDevelKernel->GetWeight(feature);
00207     }
00208 
00209     Real64
00210     GetTotalFeatureWeight() const
00211     {
00212         return mDevelKernel->GetTotalWeight();
00213     }
00214 
00215     Real64
00216     GetFeatureAverage(int feature) const
00217     {
00218         return mDevelKernel->GetAverage(feature);
00219     }
00220 
00221     bool
00222     IsSymmetric() const
00223     {
00224         return (!mComputingTestKernel);
00225     }
00226 
00227     bool
00228     IsChi2() const
00229     {
00230         return (mThisKerLoc.GetModel() == "chi2");
00231     }
00232 
00233     FeatureLocator
00234     GetFeatureLocatorA(int index)
00235     {
00236         FeatureLocator loc = mDevelFeatLoc;
00237         loc.SetFeatureString(mDevelKernel->GetFeature(index));
00238         return loc;
00239     }
00240 
00241     FeatureLocator
00242     GetFeatureLocatorB(int index)
00243     {
00244         FeatureLocator loc = mThisFeatLoc;
00245         loc.SetFeatureString(mDevelKernel->GetFeature(index));
00246         return loc;
00247     }
00248 
00249     VirtualMatrix*
00250     GetWritableMatrix(int nrRow, int nrCol)
00251     {
00252         typedef Core::Matrix::VirtualMatrixFactory VMFactory;
00253         using Persistency::RepositoryGetFile;
00254 
00255         String path = KernelMatrixRepository().ExposeFilePath(mThisKerLoc, true);
00256         if (path.empty())
00257         {
00258             ILOG_ERROR("Couldn't get writable file for kernel " << mThisKerLoc);
00259             return 0;
00260         }
00261         String suffix = (mComputingTestKernel) ? ".tab"
00262                                                : ".precomputed.part-R0-C0.raw";
00263         String fName = FileNamePath(path) + FileNameBase(path) + suffix;
00264         Persistency::FileLocator fileLoc(mThisKerLoc, fName);
00265         File file = RepositoryGetFile(fileLoc, true, false);
00266         if (mComputingTestKernel)
00267         {
00268             mWriter = VFTFactory::GetInstance().ConstructIOBufferWriter
00269                                              (nrRow, nrCol, file, false);
00270             return mWriter->GetMatrix();
00271         }
00272         else
00273         {
00274             mMatrix = VMFactory::GetInstance().ConstructIOBufferWriter
00275                                              (nrRow, nrCol, file, true, true);
00276             return mMatrix;
00277         }
00278     }
00279 
00280     void
00281     Finalize()
00282     {
00283         if (mComputingTestKernel)
00284         {
00285             FeatureLocator loc = GetFeatureLocatorB(0);
00286             VirtualFeatureTable* features =
00287                 VFTFactory::GetInstance().ConstructIOBufferReader(loc, false);
00288             Table::QuidTable quids(features->Size());
00289             features->GetQuids(&quids);
00290             delete features;
00291             mWriter->AddQuids(&quids);
00292             mWriter->SetCompleted();
00293         }
00294         else
00295         {
00296             delete mMatrix;
00297             KernelMatrixLocator loc = mThisKerLoc;
00298             loc.SetDoParts(0);
00299             Persistency::DistributedAccessRepository().Add(loc, mDevelKernel);
00300         }
00301     }
00302 
00303 private:
00304 
00305     bool                mComputingTestKernel;
00306     KernelMatrixLocator mThisKerLoc;
00307     DistributedAccess*  mDevelKernel;
00308     FeatureLocator      mThisFeatLoc;
00309     FeatureLocator      mDevelFeatLoc;
00310     VirtualMatrix*      mMatrix; // used in computation of devel kernel
00311     Core::Feature::VirtualFeatureTableIOBufferWriter* mWriter;// for test kernel
00312 
00313     ILOG_VAR_DEC;
00314 };
00315 
00316 ILOG_VAR_INIT(PrecomputeTask, Impala.Core.Training);
00317 
00318 } // namespace Training
00319 } // namespace Core
00320 } // namespace Impala
00321 
00322 #endif

Generated on Thu Jan 13 09:04:41 2011 for ImpalaSrc by  doxygen 1.5.1