00001 #include "Basis/CmdOptions.h"
00002 #include "Link/Mpi/MpiFuncs.h"
00003
00004 #include "Persistency/DistributedAccessRepository.h"
00005 #include "Persistency/FeatureTableRepository.h"
00006 #include "Core/Matrix/MatFunc.h"
00007 #include "Core/Matrix/VirtualMatrixFactory.h"
00008 #include "Core/Array/Pattern/PatMPixOp.h"
00009 #include "Core/Array/PixSum.h"
00010 #include "Core/Array/Set.h"
00011 #include "Core/Array/Add.h"
00012 #include "Core/Array/DivVal.h"
00013 #include "Core/Array/MulVal.h"
00014 #include "Core/Array/Exp.h"
00015 #include "Core/Array/WriteRaw.h"
00016 #include "Core/Array/MakeFromValue.h"
00017 #include "Core/Array/PrintData.h"
00018 #include "Core/Feature/FeatureTable.h"
00019 #include "Core/Feature/FeatureDefinition.h"
00020 #include "Core/Feature/FeatureTable.h"
00021 #include "Core/Table/Write.h"
00022 #include "Core/Table/Equals.h"
00023 #include "Core/Table/Select.h"
00024 #ifdef SSE_USED
00025 #include "Core/Vector/Chi2DistanceSSE.h"
00026 #endif
00027 #include "Core/Vector/Apply.h"
00028 #include "Core/VideoSet/MakeVideoSet.h"
00029 #include "Core/ImageSet/MakeImageSet.h"
00030 #include "Core/Vector/Chi2Distance.h"
00031 #include "Core/Vector/HistogramIntersection.h"
00032 #include "Core/Vector/DotProduct.h"
00033 #include "Util/PropertySet.h"
00034 #include "Core/Database/MakeRawDataSet.h"
00035 #include "Util/TimeStats.h"
00036
00037 namespace Impala
00038 {
00039 namespace Application
00040 {
00041 namespace Precompute
00042 {
00043
00044
00045 using namespace Core;
00046 using namespace Persistency;
00047 using Core::Database::RawDataSet;
00048 using Core::Matrix::DistributedAccess;
00049
00053 bool
00054 CheckFeatures(CmdOptions& options, RawDataSet* thisSet, DistributedAccess* da)
00055 {
00056 ILOG_VAR(Impala.Application.Precompute.CheckFeatures);
00057
00058
00059 String inputFeatures = options.GetString("inputFeatures");
00060 if (!inputFeatures.empty())
00061 {
00062 ILOG_DEBUG("imput features: "<< inputFeatures);
00063 Util::StringParser p(inputFeatures);
00064 while (!p.TheEnd())
00065 {
00066 double weight = p.GetDouble();
00067 ILOG_DEBUG("w: "<<weight);
00068 if (weight == 0.0)
00069 {
00070 ILOG_ERROR_HEADNODE("\"" << inputFeatures <<
00071 "\" contains a zero weight");
00072 return false;
00073 }
00074 String name = p.GetString(' ', false);
00075 if (name.empty())
00076 {
00077 ILOG_ERROR_HEADNODE("inputFeatures: no name for weight");
00078 return false;
00079 }
00080 ILOG_DEBUG("n: "<< name);
00081 String indexCat = options.GetString("featureIndexCat");
00082 FeatureLocator loc(thisSet->GetLocator(), false, true, indexCat,
00083 name, "");
00084 if (!FeatureTableRepository().Exists(loc))
00085 {
00086 ILOG_ERROR_HEADNODE("Could not open \"" << name << "\"");
00087 return false;
00088 }
00089 da->AddFeature(name, weight, -1);
00090 }
00091 }
00092 return true;
00093 }
00094
00099 Feature::FeatureTable*
00100 OpenFeatureTable(Feature::FeatureDefinition& featureDef, String indexCat,
00101 RawDataSet* dataset)
00102 {
00103 ILOG_VAR(Impala.Application.Precompute.OpenFeatureTable);
00104 FeatureLocator loc(dataset->GetLocator(), false, true, indexCat,
00105 featureDef.AsString(), "");
00106 loc.SetUseBroadcast(true);
00107 Feature::FeatureTable* f = FeatureTableRepository().Get(loc);
00108 ILOG_INFO_HEADNODE(featureDef.AsString() << ", size = " << f->Size() <<
00109 "; length = " << f->GetFeatureVectorLength());
00110 return f;
00111 }
00112
00119 Feature::FeatureTable*
00120 GetPartial(Feature::FeatureTable* f, int partnumber, int partcount)
00121 {
00122 ILOG_VAR(Impala.Application.Precompute.GetPartial);
00123 Feature::FeatureTable* part =
00124 new Feature::FeatureTable(f->GetFeatureDefinition(), 0,
00125 f->GetFeatureVectorLength());
00126 DistributedAccess::IndexConverter indexConvert(f->Size(), partcount);
00127 int from = indexConvert.PartToIndex(partnumber);
00128 int to = indexConvert.PartToIndex(partnumber + 1);
00129 Table::Select(part, f, from, to, true);
00130 return part;
00131 }
00132
00133
00148 Matrix::Mat*
00149 ComputeMatrix(DistributedAccess* da, Feature::FeatureTable* devel,
00150 Feature::FeatureTable* test, String distanceFunction)
00151 {
00152 ILOG_VAR(Impala.Application.Precompute.ComputeMatrix);
00153 Feature::FeatureTable* horizontal;
00154 Feature::FeatureTable* vertical;
00155 if (da->GetTotalPartCount() == 1)
00156 {
00157 horizontal = devel;
00158 vertical = test;
00159 }
00160 else
00161 {
00162 int myPart = Link::Mpi::MyId();
00163 int column = da->GetColumnPartOfPart(myPart);
00164 int row = da->GetRowPartOfPart(myPart);
00165 horizontal = GetPartial(devel, column, da->GetColumnPartCount());
00166 vertical = GetPartial(test, row, da->GetRowPartCount());
00167 delete devel;
00168 if (test != devel)
00169 delete test;
00170 }
00171
00172
00173 typedef Feature::FeatureTable::ColumnVectorSet VectorSet;
00174 VectorSet* vectorsH = horizontal->GetColumn2();
00175 VectorSet* vectorsV = vertical->GetColumn2();
00176 vectorsH->SetSize(horizontal->Size());
00177 vectorsV->SetSize(vertical->Size());
00178 Matrix::Mat* distanceMatrix = 0;
00179 if (distanceFunction == "dot")
00180 {
00181 distanceMatrix = Apply(&Vector::DotProduct, vectorsH, vectorsV);
00182 }
00183 else if (distanceFunction == "histogramintersection")
00184 {
00185 distanceMatrix = Apply(&Vector::HistogramIntersectionDouble,
00186 vectorsH, vectorsV);
00187 }
00188 else
00189 {
00190 #ifdef SSE_USED
00191 distanceMatrix = Apply(&Vector::Chi2DistanceSSE, vectorsH, vectorsV);
00192 #else
00193 distanceMatrix = Apply(&Vector::Chi2Distance, vectorsH, vectorsV);
00194 #endif
00195 }
00196 delete horizontal;
00197 if (horizontal != vertical)
00198 delete vertical;
00199 return distanceMatrix;
00200 }
00201
00202
00205 double
00206 GetAverage(Matrix::Mat *distanceMatrix)
00207 {
00208 double average;
00209 average = Array::PixSum(distanceMatrix);
00210 average = Link::Mpi::AllReduceSum(average);
00211 int pixcount = distanceMatrix->W() * distanceMatrix->H();
00212 pixcount = Link::Mpi::AllReduceSum(pixcount);
00213 average /= pixcount;
00214 return average;
00215 }
00216
00217 int
00218 Precompute(CmdOptions& options)
00219 {
00220 ILOG_VAR(Impala.Application.Precompute.Precompute);
00221 RawDataSet* thisSet = Core::Database::MakeRawDataSet(options.GetArg(0));
00222 if (!thisSet)
00223 {
00224 ILOG_INFO_HEADNODE("Failed to open thisSet " << options.GetArg(0));
00225 return 1;
00226 }
00227
00228 String develSetName = options.GetArg(1);
00229 ILOG_INFO_HEADNODE("DevelSetName = [" << develSetName << "]");
00230 RawDataSet* develSet = 0;
00231 if (!develSetName.empty())
00232 {
00233 develSet = Core::Database::MakeRawDataSet(develSetName);
00234 if (!develSet)
00235 {
00236 ILOG_INFO_HEADNODE("Failed to open develSet " << develSetName);
00237 return 1;
00238 }
00239 }
00240 String model = options.GetArg(2);
00241 String kernel = options.GetArg(3);
00242 String indexCat = options.GetString("featureIndexCat");
00243 KernelMatrixLocator daLoc(thisSet->GetLocator(), true, "",
00244 develSetName, model, kernel, "");
00245 daLoc.SetFeatureIndexCat(indexCat);
00246 if (!options.GetBool("override") &&
00247 DistributedAccessRepository().Exists(daLoc))
00248 {
00249 ILOG_INFO("Kernel already exists.");
00250 return 0;
00251 }
00252
00253 int startFeature = options.GetInt("startFeature");
00254 DistributedAccess* da = 0;
00255 if (startFeature == 0)
00256 {
00257 da = new DistributedAccess();
00258 if (!da->Valid(true, false))
00259 return 1;
00260
00261 ILOG_INFO_HEADNODE("Using a " << da->GetRowPartCount() << "x" <<
00262 da->GetColumnPartCount() << "grid");
00263 if (!CheckFeatures(options, thisSet, da))
00264 return 1;
00265 }
00266 else
00267 {
00268 daLoc.SetIncrement(startFeature);
00269 daLoc.SetStartNode(0);
00270 daLoc.SetNodeCount(Link::Mpi::NrProcs());
00271 da = DistributedAccessRepository().Get(daLoc);
00272 }
00273
00274
00275 if (develSet)
00276 {
00277 KernelMatrixLocator devLoc(develSet->GetLocator(), true, "", "",
00278 model, kernel, "");
00279 devLoc.SetFeatureIndexCat(indexCat);
00280 devLoc.SetDoParts(0);
00281 devLoc.SetStartNode(0);
00282 devLoc.SetNodeCount(Link::Mpi::NrProcs());
00283 DistributedAccess* devDA = DistributedAccessRepository().Get(devLoc);
00284 for (int i = 0; i < devDA->GetNrFeatures(); i++)
00285 {
00286 double average = devDA->GetAverage(i);
00287 ILOG_INFO_HEADNODE("average " << i << " " << average);
00288 da->SetAverage(i, average);
00289 }
00290
00291 delete devDA;
00292 }
00293 else
00294 {
00295 if (model == "chi2")
00296 da->SetHasOwnAverages(true);
00297 }
00298
00299 String distanceFunction = options.GetString("distanceFunction");
00300 int numberFeatures = options.GetInt("numberFeatures");
00301 if (numberFeatures == -1)
00302 numberFeatures = da->GetNrFeatures() - startFeature;
00303 if (startFeature + numberFeatures >= da->GetNrFeatures())
00304 numberFeatures = da->GetNrFeatures() - startFeature;
00305 Matrix::Mat* accumulator = 0;
00306 if (startFeature != 0)
00307 accumulator = da->StealPart();
00308 double totalweight = 0;
00309 for (int i=0 ; i<startFeature ; i++)
00310 totalweight += da->GetWeight(i);
00311 Util::TimeStats statsCompute;
00312 statsCompute.AddGroup("read features");
00313 statsCompute.AddGroup("compute");
00314 Util::TimeStats statsOverall;
00315 statsOverall.AddGroupsFromSub(&statsCompute);
00316 statsOverall.AddGroup("final exp");
00317 statsOverall.AddGroup("write");
00318 statsOverall.MeasureFirst();
00319 for (int i=startFeature ; i<startFeature+numberFeatures ; i++)
00320 {
00321 statsCompute.MeasureFirst();
00322
00323 double weight = da->GetWeight(i);
00324 Feature::FeatureDefinition fDef(da->GetFeature(i));
00325 Feature::FeatureTable* f2 = OpenFeatureTable(fDef, indexCat, thisSet);
00326 Feature::FeatureTable* f1 = f2;
00327 if (develSet)
00328 f1 = OpenFeatureTable(fDef, indexCat, develSet);
00329
00330 if ((Link::Mpi::MyId() == 0) && (i == 0))
00331 {
00332 Table::Copy(da->GetColumnQuids(), f1);
00333 da->SetColumns(f1->Size());
00334 Table::Copy(da->GetRowQuids(), f2);
00335 da->SetRows(f2->Size());
00336 }
00337
00338 statsCompute.MeasureNext();
00339
00340 Matrix::Mat *distanceMatrix = ComputeMatrix(da, f1, f2,
00341 distanceFunction);
00342
00343
00344
00345
00346
00347
00348 if (da->GetHasOwnAverages())
00349 {
00350 if (distanceFunction == "dot")
00351 {
00352 da->SetAverage(i, 0.0);
00353 }
00354 else
00355 {
00356 double average = GetAverage(distanceMatrix);
00357 da->SetAverage(i, average);
00358 }
00359 }
00360
00361
00362 MulVal(distanceMatrix, distanceMatrix, weight);
00363 if (distanceFunction == "chi2")
00364 {
00365 DivVal(distanceMatrix, distanceMatrix, -da->GetAverage(i));
00366 }
00367 if (accumulator == 0)
00368 Set(accumulator, distanceMatrix);
00369 else
00370 Add(accumulator, accumulator, distanceMatrix);
00371 delete distanceMatrix;
00372
00373 totalweight += weight;
00374 statsCompute.MeasureLast();
00375 ILOG_INFO_HEADNODE("Compute " << statsCompute.AsString());
00376 ILOG_INFO_HEADNODE("Memory " << Process::MemoryInfo::GetUsageString());
00377 }
00378 statsOverall.MeasureFromSub(&statsCompute);
00379
00380 ILOG_INFO_HEADNODE("finalising...");
00381 if (startFeature + numberFeatures == da->GetNrFeatures())
00382 {
00383
00384 DivVal(accumulator, accumulator, totalweight);
00385 if (distanceFunction == "chi2")
00386 {
00387 Exp(accumulator, accumulator);
00388 }
00389 daLoc.SetIncrement(0);
00390 daLoc.SetWriteReal32(true);
00391 }
00392 else
00393 {
00394 daLoc.SetIncrement(startFeature + numberFeatures);
00395 }
00396
00397 statsOverall.MeasureNext();
00398 typedef Matrix::VirtualMatrixFactory VirtualMatrixFactory;
00399 VirtualMatrixFactory& vmf = VirtualMatrixFactory::GetInstance();
00400 Matrix::VirtualMatrix* vm = vmf.ConstructMemory(accumulator);
00401 da->AddPart(vm);
00402 DistributedAccessRepository().Add(daLoc, da);
00403 statsOverall.MeasureLast();
00404 ILOG_INFO_HEADNODE("Overall " << statsOverall.AsString());
00405 delete da;
00406 return 0;
00407 }
00408
00409 int
00410 ChopMatrix(CmdOptions& options)
00411 {
00412 ILOG_VAR(Impala.Application.Precompute.ChopMatrix);
00413 RawDataSet* thisSet = Core::Database::MakeRawDataSet(options.GetArg(0));
00414 if (!thisSet)
00415 {
00416 ILOG_INFO_HEADNODE("failed to open dataset " << options.GetArg(0));
00417 return 1;
00418 }
00419 RawDataSet* develSet = 0;
00420 String develSetName = options.GetArg(1);
00421 String model = options.GetArg(2);
00422 String kernel = options.GetArg(3);
00423 int nrParts = atol(options.GetArg(5));
00424 KernelMatrixLocator srcLoc(thisSet->GetLocator(), true, "",
00425 develSetName, model, kernel, "");
00426 srcLoc.SetFeatureIndexCat(options.GetString("featureIndexCat"));
00427
00428 srcLoc.SetStartNode(0);
00429 srcLoc.SetNodeCount(Link::Mpi::NrProcs());
00430 srcLoc.SetDoParts(2);
00431 DistributedAccess* srcDA = DistributedAccessRepository().Get(srcLoc);
00432 srcDA->Dump();
00433
00434 Real64* lineBuf = new Real64[srcDA->NrCol()];
00435 typedef Matrix::Mat32 Mat32;
00436 DistributedAccess dstDA(srcDA->NrRow(), srcDA->NrCol(), nrParts, nrParts,
00437 0, 1);
00438 dstDA.CopyQuidsFrom(srcDA);
00439 dstDA.CopyFeaturesFrom(srcDA);
00440 KernelMatrixLocator dstLoc = srcLoc;
00441 dstLoc.SetWalkType("chopped");
00442 dstLoc.SetDoParts(0);
00443 DistributedAccessRepository().Add(dstLoc, &dstDA);
00444
00445 for (int row=0 ; row<nrParts ; row++)
00446 {
00447 for (int col=0 ; col<nrParts ; col++)
00448 {
00449 int startY = dstDA.GetRowStartOfPart(row);
00450 int endY = dstDA.GetRowEndOfPart(row);
00451 int startX = dstDA.GetColumnStartOfPart(col);
00452 int endX = dstDA.GetColumnEndOfPart(col);
00453 ILOG_INFO("row=" << row << ", col=" << col << ", startX=" << startX
00454 << ", endX=" << endX << ", startY=:" << startY
00455 << ", endY=" << endY);
00456 Mat32* mat = Matrix::MatCreate<Mat32>(endY - startY, endX - startX);
00457 for (int y=startY ; y<endY ; y++)
00458 {
00459 srcDA->GetRow(y, lineBuf, srcDA->NrCol());
00460 float* ptr = mat->CPB(0, y-startY);
00461 for (int x=startX ; x<endX ; x++)
00462 ptr[x-startX] = lineBuf[x];
00463 }
00464 Array::PrintDataCorners(mat, 2, 2);
00465
00466 String filename = "PrecomputedKernels/chopped/" + model + "/"
00467 + kernel + "/" + kernel + ".precomputed.part-R" + MakeString(row)
00468 + "-C" + MakeString(col) + ".raw" ;
00469 Persistency::File file = RepositoryInFileSystem::GetInstance().
00470 GetFile(dstLoc, "", filename, true, false);
00471 ILOG_INFO("Saving part in " << file.GetPath());
00472 Array::WriteRaw(mat, file, 1);
00473 delete mat;
00474 }
00475 }
00476
00477 delete lineBuf;
00478 delete srcDA;
00479 return 0;
00480 }
00481
00482 int
00483 MainPrecompute(int argc, char* argv[])
00484 {
00485 Link::Mpi::Init(&argc, &argv);
00486 CmdOptions& options = Impala::CmdOptions::GetInstance();
00487 options.Initialise(false, false, true);
00488 options.AddOption(0, "startFeature", "idx", "0");
00489 options.AddOption(0, "numberFeatures", "nr", "-1");
00490 options.AddOption(0, "featureIndexCat", "name", "");
00491 options.AddOption(0, "distanceFunction", "dot|chi2|histogramintersection", "chi2");
00492
00493 if (! options.ParseArgs(argc, argv, "<dataset> <develset> <model> <kernel>", 4))
00494 {
00495 Link::Mpi::Finalize();
00496 return 1;
00497 }
00498
00499 ILOG_VAR(Impala.Application.Precompute.Main);
00500
00501 Timer timer;
00502 if ((options.GetNrArg() == 6) && (options.GetArg(4) == "chop"))
00503 ChopMatrix(options);
00504 else
00505 Precompute(options);
00506
00507 ILOG_INFO_HEADNODE("Total execution time = " << timer.SplitTimeStr());
00508 int nrOfErrors = ILOG_ERROR_COUNT;
00509 nrOfErrors = Link::Mpi::ReduceSum(nrOfErrors);
00510 ILOG_INFO_HEADNODE("Root: total nr error = " << nrOfErrors);
00511
00512 Link::Mpi::Finalize();
00513 return nrOfErrors;
00514 }
00515
00516 }
00517 }
00518 }
00519
00520 int
00521 main(int argc, char* argv[])
00522 {
00523 return Impala::Application::Precompute::MainPrecompute(argc, argv);
00524 }