Home || Visual Search || Applications || Architecture || Important Messages || OGL || Src

mainPrecomputeKernelMatrix.cpp

Go to the documentation of this file.
00001 #include "Basis/CmdOptions.h"
00002 #include "Link/Mpi/MpiFuncs.h"
00003 
00004 #include "Persistency/DistributedAccessRepository.h"
00005 #include "Persistency/FeatureTableRepository.h"
00006 #include "Core/Matrix/MatFunc.h"
00007 #include "Core/Matrix/VirtualMatrixFactory.h"
00008 #include "Core/Array/Pattern/PatMPixOp.h"
00009 #include "Core/Array/PixSum.h"
00010 #include "Core/Array/Set.h"
00011 #include "Core/Array/Add.h"
00012 #include "Core/Array/DivVal.h"
00013 #include "Core/Array/MulVal.h"
00014 #include "Core/Array/Exp.h"
00015 #include "Core/Array/WriteRaw.h"
00016 #include "Core/Array/MakeFromValue.h"
00017 #include "Core/Array/PrintData.h"
00018 #include "Core/Feature/FeatureTable.h"
00019 #include "Core/Feature/FeatureDefinition.h"
00020 #include "Core/Feature/FeatureTable.h"
00021 #include "Core/Table/Write.h"
00022 #include "Core/Table/Equals.h"
00023 #include "Core/Table/Select.h"
00024 #ifdef SSE_USED
00025 #include "Core/Vector/Chi2DistanceSSE.h"
00026 #endif
00027 #include "Core/Vector/Apply.h"
00028 #include "Core/VideoSet/MakeVideoSet.h"
00029 #include "Core/ImageSet/MakeImageSet.h"
00030 #include "Core/Vector/Chi2Distance.h"
00031 #include "Core/Vector/HistogramIntersection.h"
00032 #include "Core/Vector/DotProduct.h"
00033 #include "Util/PropertySet.h"
00034 #include "Core/Database/MakeRawDataSet.h"
00035 #include "Util/TimeStats.h"
00036 
00037 namespace Impala
00038 {
00039 namespace Application
00040 {
00041 namespace Precompute
00042 {
00043 
00044 
00045 using namespace Core;
00046 using namespace Persistency;
00047 using Core::Database::RawDataSet;
00048 using Core::Matrix::DistributedAccess;
00049 
00053 bool
00054 CheckFeatures(CmdOptions& options, RawDataSet* thisSet, DistributedAccess* da)
00055 {
00056     ILOG_VAR(Impala.Application.Precompute.CheckFeatures);
00057 
00058     /* add input features through the INI file */
00059     String inputFeatures = options.GetString("inputFeatures");
00060     if (!inputFeatures.empty())
00061     {
00062         ILOG_DEBUG("imput features: "<< inputFeatures);
00063         Util::StringParser p(inputFeatures);
00064         while (!p.TheEnd())
00065         {
00066             double weight = p.GetDouble();
00067             ILOG_DEBUG("w: "<<weight);
00068             if (weight == 0.0)
00069             {
00070                 ILOG_ERROR_HEADNODE("\"" << inputFeatures <<
00071                                     "\" contains a zero weight");
00072                 return false;
00073             }
00074             String name = p.GetString(' ', false);
00075             if (name.empty())
00076             {
00077                 ILOG_ERROR_HEADNODE("inputFeatures: no name for weight");
00078                 return false;
00079             }
00080             ILOG_DEBUG("n: "<< name);
00081             String indexCat = options.GetString("featureIndexCat");
00082             FeatureLocator loc(thisSet->GetLocator(), false, true, indexCat,
00083                                name, "");
00084             if (!FeatureTableRepository().Exists(loc))
00085             {
00086                 ILOG_ERROR_HEADNODE("Could not open \"" << name << "\"");
00087                 return false;
00088             }
00089             da->AddFeature(name, weight, -1);
00090         }
00091     }
00092     return true;
00093 }
00094 
00099 Feature::FeatureTable*
00100 OpenFeatureTable(Feature::FeatureDefinition& featureDef, String indexCat,
00101                  RawDataSet* dataset)
00102 {
00103     ILOG_VAR(Impala.Application.Precompute.OpenFeatureTable);
00104     FeatureLocator loc(dataset->GetLocator(), false, true, indexCat,
00105                        featureDef.AsString(), "");
00106     loc.SetUseBroadcast(true);
00107     Feature::FeatureTable* f = FeatureTableRepository().Get(loc);
00108     ILOG_INFO_HEADNODE(featureDef.AsString() << ", size = " << f->Size() <<
00109                        "; length = " << f->GetFeatureVectorLength());
00110     return f;
00111 }
00112 
00119 Feature::FeatureTable*
00120 GetPartial(Feature::FeatureTable* f, int partnumber, int partcount)
00121 {
00122     ILOG_VAR(Impala.Application.Precompute.GetPartial);
00123     Feature::FeatureTable* part = 
00124         new Feature::FeatureTable(f->GetFeatureDefinition(), 0,
00125                                   f->GetFeatureVectorLength());
00126     DistributedAccess::IndexConverter indexConvert(f->Size(), partcount);
00127     int from = indexConvert.PartToIndex(partnumber);
00128     int to   = indexConvert.PartToIndex(partnumber + 1);
00129     Table::Select(part, f, from, to, true);
00130     return part;
00131 }
00132 
00133 
00148 Matrix::Mat*
00149 ComputeMatrix(DistributedAccess* da, Feature::FeatureTable* devel,
00150               Feature::FeatureTable* test, String distanceFunction)
00151 {
00152     ILOG_VAR(Impala.Application.Precompute.ComputeMatrix);
00153     Feature::FeatureTable* horizontal;
00154     Feature::FeatureTable* vertical;
00155     if (da->GetTotalPartCount() == 1)
00156     {
00157         horizontal = devel;
00158         vertical = test;
00159     }
00160     else
00161     {
00162         int myPart = Link::Mpi::MyId();
00163         int column = da->GetColumnPartOfPart(myPart);
00164         int row = da->GetRowPartOfPart(myPart);
00165         horizontal = GetPartial(devel, column, da->GetColumnPartCount());
00166         vertical = GetPartial(test, row, da->GetRowPartCount());
00167         delete devel;
00168         if (test != devel)
00169             delete test;
00170     }
00171 
00172     // fill a matrix with chi2 distance
00173     typedef Feature::FeatureTable::ColumnVectorSet VectorSet;
00174     VectorSet* vectorsH = horizontal->GetColumn2();
00175     VectorSet* vectorsV = vertical->GetColumn2();
00176     vectorsH->SetSize(horizontal->Size());
00177     vectorsV->SetSize(vertical->Size());
00178     Matrix::Mat* distanceMatrix = 0;
00179     if (distanceFunction == "dot")
00180     {
00181         distanceMatrix = Apply(&Vector::DotProduct, vectorsH, vectorsV);
00182     }
00183     else if (distanceFunction == "histogramintersection")
00184     {
00185         distanceMatrix = Apply(&Vector::HistogramIntersectionDouble,
00186                                vectorsH, vectorsV);
00187     }
00188     else
00189     {
00190 #ifdef SSE_USED
00191         distanceMatrix = Apply(&Vector::Chi2DistanceSSE, vectorsH, vectorsV);
00192 #else
00193         distanceMatrix = Apply(&Vector::Chi2Distance, vectorsH, vectorsV);
00194 #endif
00195     }
00196     delete horizontal;
00197     if (horizontal != vertical)
00198         delete vertical;
00199     return distanceMatrix;
00200 }
00201 
00202 
00205 double
00206 GetAverage(Matrix::Mat *distanceMatrix)
00207 {
00208     double average;
00209     average = Array::PixSum(distanceMatrix);
00210     average = Link::Mpi::AllReduceSum(average);
00211     int pixcount = distanceMatrix->W() * distanceMatrix->H();
00212     pixcount = Link::Mpi::AllReduceSum(pixcount);
00213     average /= pixcount;
00214     return average;
00215 }
00216 
00217 int
00218 Precompute(CmdOptions& options)
00219 {
00220     ILOG_VAR(Impala.Application.Precompute.Precompute);
00221     RawDataSet* thisSet = Core::Database::MakeRawDataSet(options.GetArg(0));
00222     if (!thisSet)
00223     {
00224         ILOG_INFO_HEADNODE("Failed to open thisSet " << options.GetArg(0));
00225         return 1;
00226     }
00227     
00228     String develSetName = options.GetArg(1);
00229     ILOG_INFO_HEADNODE("DevelSetName = [" << develSetName << "]");
00230     RawDataSet* develSet = 0;
00231     if (!develSetName.empty())
00232     {
00233         develSet = Core::Database::MakeRawDataSet(develSetName);
00234         if (!develSet)
00235         {
00236             ILOG_INFO_HEADNODE("Failed to open develSet " << develSetName);
00237             return 1;
00238         }
00239     }
00240     String model = options.GetArg(2);
00241     String kernel = options.GetArg(3);
00242     String indexCat = options.GetString("featureIndexCat");
00243     KernelMatrixLocator daLoc(thisSet->GetLocator(), true, "",
00244                               develSetName, model, kernel, "");
00245     daLoc.SetFeatureIndexCat(indexCat);
00246     if (!options.GetBool("override") &&
00247         DistributedAccessRepository().Exists(daLoc))
00248     {
00249         ILOG_INFO("Kernel already exists.");
00250         return 0;
00251     }
00252 
00253     int startFeature = options.GetInt("startFeature");
00254     DistributedAccess* da = 0;
00255     if (startFeature == 0)
00256     {
00257         da = new DistributedAccess();
00258         if (!da->Valid(true, false))
00259             return 1;
00260 
00261         ILOG_INFO_HEADNODE("Using a " << da->GetRowPartCount() << "x" <<
00262                            da->GetColumnPartCount() << "grid");
00263         if (!CheckFeatures(options, thisSet, da))
00264             return 1;
00265     }
00266     else
00267     {
00268         daLoc.SetIncrement(startFeature);
00269         daLoc.SetStartNode(0);
00270         daLoc.SetNodeCount(Link::Mpi::NrProcs());
00271         da = DistributedAccessRepository().Get(daLoc);
00272     }
00273 
00274 
00275     if (develSet)
00276     {
00277         KernelMatrixLocator devLoc(develSet->GetLocator(), true, "", "",
00278                                    model, kernel, "");
00279         devLoc.SetFeatureIndexCat(indexCat);
00280         devLoc.SetDoParts(0);
00281         devLoc.SetStartNode(0);
00282         devLoc.SetNodeCount(Link::Mpi::NrProcs());
00283         DistributedAccess* devDA = DistributedAccessRepository().Get(devLoc);
00284         for (int i = 0; i < devDA->GetNrFeatures(); i++)
00285         {
00286             double average = devDA->GetAverage(i);
00287             ILOG_INFO_HEADNODE("average " << i << " " << average);
00288             da->SetAverage(i, average);
00289         }
00290         // Could do some more checking here...
00291         delete devDA;
00292     }
00293     else
00294     {
00295         if (model == "chi2")
00296             da->SetHasOwnAverages(true);
00297     }
00298 
00299     String distanceFunction = options.GetString("distanceFunction");
00300     int numberFeatures = options.GetInt("numberFeatures");
00301     if (numberFeatures == -1)
00302         numberFeatures = da->GetNrFeatures() - startFeature;
00303     if (startFeature + numberFeatures >= da->GetNrFeatures())
00304         numberFeatures = da->GetNrFeatures() - startFeature;
00305     Matrix::Mat* accumulator = 0;
00306     if (startFeature != 0)
00307         accumulator = da->StealPart();
00308     double totalweight = 0;
00309     for (int i=0 ; i<startFeature ; i++)
00310         totalweight += da->GetWeight(i);
00311     Util::TimeStats statsCompute;
00312     statsCompute.AddGroup("read features");
00313     statsCompute.AddGroup("compute");
00314     Util::TimeStats statsOverall;
00315     statsOverall.AddGroupsFromSub(&statsCompute);
00316     statsOverall.AddGroup("final exp");
00317     statsOverall.AddGroup("write");
00318     statsOverall.MeasureFirst();
00319     for (int i=startFeature ; i<startFeature+numberFeatures ; i++)
00320     {
00321         statsCompute.MeasureFirst();
00322         // open feature tables
00323         double weight = da->GetWeight(i);
00324         Feature::FeatureDefinition fDef(da->GetFeature(i));
00325         Feature::FeatureTable* f2 = OpenFeatureTable(fDef, indexCat, thisSet);
00326         Feature::FeatureTable* f1 = f2;
00327         if (develSet)
00328             f1 = OpenFeatureTable(fDef, indexCat, develSet);
00329 
00330         if ((Link::Mpi::MyId() == 0) && (i == 0))
00331         {
00332             Table::Copy(da->GetColumnQuids(), f1);
00333             da->SetColumns(f1->Size());
00334             Table::Copy(da->GetRowQuids(), f2);
00335             da->SetRows(f2->Size());
00336         }
00337 
00338         statsCompute.MeasureNext();
00339         // compute kernel distances between features
00340         Matrix::Mat *distanceMatrix = ComputeMatrix(da, f1, f2,
00341                                                     distanceFunction);
00342         // f1 & f2 are deleted by ComputeMatrix
00343         
00344         // if this is the 'learn' kernel matrix we compute the averages and
00345         // communicate between nodes to make sure everyone has access to them
00346         // otherwise, the averages are already loaded into the vector
00347         // Note : hik kernel does not use averages
00348         if (da->GetHasOwnAverages())
00349         {
00350             if (distanceFunction == "dot")
00351             {
00352                 da->SetAverage(i, 0.0);
00353             }
00354             else
00355             {
00356                 double average = GetAverage(distanceMatrix);
00357                 da->SetAverage(i, average);
00358             }
00359         }
00360 
00361         // accumulate; this is the part inside the 'exp' in the kernel function
00362         MulVal(distanceMatrix, distanceMatrix, weight);
00363         if (distanceFunction == "chi2")
00364         {
00365             DivVal(distanceMatrix, distanceMatrix, -da->GetAverage(i));
00366         }
00367         if (accumulator == 0)
00368             Set(accumulator, distanceMatrix);
00369         else
00370             Add(accumulator, accumulator, distanceMatrix);
00371         delete distanceMatrix;
00372 
00373         totalweight += weight;
00374         statsCompute.MeasureLast();
00375         ILOG_INFO_HEADNODE("Compute " << statsCompute.AsString());
00376         ILOG_INFO_HEADNODE("Memory " << Process::MemoryInfo::GetUsageString());
00377     }
00378     statsOverall.MeasureFromSub(&statsCompute);
00379 
00380     ILOG_INFO_HEADNODE("finalising...");
00381     if (startFeature + numberFeatures == da->GetNrFeatures())
00382     {
00383         // only thing to do is weigh and exp
00384         DivVal(accumulator, accumulator, totalweight);
00385         if (distanceFunction == "chi2")
00386         {
00387             Exp(accumulator, accumulator);
00388         }
00389         daLoc.SetIncrement(0);
00390         daLoc.SetWriteReal32(true);
00391     }
00392     else
00393     {
00394         daLoc.SetIncrement(startFeature + numberFeatures);
00395     }
00396 
00397     statsOverall.MeasureNext();
00398     typedef Matrix::VirtualMatrixFactory VirtualMatrixFactory;
00399     VirtualMatrixFactory& vmf = VirtualMatrixFactory::GetInstance();
00400     Matrix::VirtualMatrix* vm = vmf.ConstructMemory(accumulator);
00401     da->AddPart(vm);
00402     DistributedAccessRepository().Add(daLoc, da);
00403     statsOverall.MeasureLast();
00404     ILOG_INFO_HEADNODE("Overall " << statsOverall.AsString());
00405     delete da;
00406     return 0;
00407 }
00408 
00409 int
00410 ChopMatrix(CmdOptions& options)
00411 {
00412     ILOG_VAR(Impala.Application.Precompute.ChopMatrix);
00413     RawDataSet* thisSet = Core::Database::MakeRawDataSet(options.GetArg(0));
00414     if (!thisSet)
00415     {
00416         ILOG_INFO_HEADNODE("failed to open dataset " << options.GetArg(0));
00417         return 1;
00418     }
00419     RawDataSet* develSet = 0;
00420     String develSetName = options.GetArg(1);
00421     String model = options.GetArg(2);
00422     String kernel = options.GetArg(3);
00423     int nrParts = atol(options.GetArg(5));
00424     KernelMatrixLocator srcLoc(thisSet->GetLocator(), true, "",
00425                                develSetName, model, kernel, "");
00426     srcLoc.SetFeatureIndexCat(options.GetString("featureIndexCat"));
00427 
00428     srcLoc.SetStartNode(0);
00429     srcLoc.SetNodeCount(Link::Mpi::NrProcs());
00430     srcLoc.SetDoParts(2);
00431     DistributedAccess* srcDA = DistributedAccessRepository().Get(srcLoc);
00432     srcDA->Dump();
00433 
00434     Real64* lineBuf = new Real64[srcDA->NrCol()];
00435     typedef Matrix::Mat32 Mat32;
00436     DistributedAccess dstDA(srcDA->NrRow(), srcDA->NrCol(), nrParts, nrParts,
00437                             0, 1);
00438     dstDA.CopyQuidsFrom(srcDA);
00439     dstDA.CopyFeaturesFrom(srcDA);
00440     KernelMatrixLocator dstLoc = srcLoc;
00441     dstLoc.SetWalkType("chopped");
00442     dstLoc.SetDoParts(0);
00443     DistributedAccessRepository().Add(dstLoc, &dstDA);
00444 
00445     for (int row=0 ; row<nrParts ; row++)
00446     {
00447         for (int col=0 ; col<nrParts ; col++)
00448         {
00449             int startY = dstDA.GetRowStartOfPart(row);
00450             int endY = dstDA.GetRowEndOfPart(row);
00451             int startX = dstDA.GetColumnStartOfPart(col);
00452             int endX = dstDA.GetColumnEndOfPart(col);
00453             ILOG_INFO("row=" << row << ", col=" << col << ", startX=" << startX
00454                       << ", endX=" << endX << ", startY=:" << startY
00455                       << ", endY=" << endY);
00456             Mat32* mat = Matrix::MatCreate<Mat32>(endY - startY, endX - startX);
00457             for (int y=startY ; y<endY ; y++)
00458             {
00459                 srcDA->GetRow(y, lineBuf, srcDA->NrCol());
00460                 float* ptr = mat->CPB(0, y-startY);
00461                 for (int x=startX ; x<endX ; x++)
00462                     ptr[x-startX] = lineBuf[x];
00463             }
00464             Array::PrintDataCorners(mat, 2, 2);
00465 
00466             String filename = "PrecomputedKernels/chopped/" + model + "/"
00467                 + kernel + "/" + kernel + ".precomputed.part-R" + MakeString(row)
00468                 + "-C" + MakeString(col) + ".raw" ;
00469             Persistency::File file = RepositoryInFileSystem::GetInstance().
00470                 GetFile(dstLoc, "", filename, true, false);
00471             ILOG_INFO("Saving part in " << file.GetPath());
00472             Array::WriteRaw(mat, file, 1);
00473             delete mat;
00474         }
00475     }
00476 
00477     delete lineBuf;
00478     delete srcDA;
00479     return 0;
00480 }
00481 
00482 int
00483 MainPrecompute(int argc, char* argv[])
00484 {
00485     Link::Mpi::Init(&argc, &argv);
00486     CmdOptions& options = Impala::CmdOptions::GetInstance();
00487     options.Initialise(false, false, true);
00488     options.AddOption(0, "startFeature", "idx", "0");
00489     options.AddOption(0, "numberFeatures", "nr", "-1");
00490     options.AddOption(0, "featureIndexCat", "name", "");
00491     options.AddOption(0, "distanceFunction", "dot|chi2|histogramintersection", "chi2");
00492 
00493     if (! options.ParseArgs(argc, argv, "<dataset> <develset> <model> <kernel>", 4))
00494     {
00495         Link::Mpi::Finalize();
00496         return 1;
00497     }
00498         
00499     ILOG_VAR(Impala.Application.Precompute.Main);
00500 
00501     Timer timer;
00502     if ((options.GetNrArg() == 6) && (options.GetArg(4) == "chop"))
00503         ChopMatrix(options);
00504     else
00505         Precompute(options);
00506 
00507     ILOG_INFO_HEADNODE("Total execution time = " << timer.SplitTimeStr());
00508     int nrOfErrors = ILOG_ERROR_COUNT;
00509     nrOfErrors = Link::Mpi::ReduceSum(nrOfErrors);
00510     ILOG_INFO_HEADNODE("Root: total nr error = " << nrOfErrors);
00511 
00512     Link::Mpi::Finalize();
00513     return nrOfErrors;
00514 }
00515 
00516 } // namespace Precompute
00517 } // namespace Application
00518 } // namespace Impala
00519 
00520 int
00521 main(int argc, char* argv[])
00522 {
00523     return Impala::Application::Precompute::MainPrecompute(argc, argv);
00524 }

Generated on Thu Jan 13 09:03:43 2011 for ImpalaSrc by  doxygen 1.5.1