00001 #include "Basis/CmdOptions.h"
00002 #include "Link/Mpi/MpiFuncs.h"
00003
00004 #include "Core/Matrix/DistributedAccess.h"
00005 #include "Core/Array/Pattern/PatMPixOp.h"
00006 #include "Core/Array/PixSum.h"
00007 #include "Core/Array/Set.h"
00008 #include "Core/Array/Add.h"
00009 #include "Core/Array/DivVal.h"
00010 #include "Core/Array/MulVal.h"
00011 #include "Core/Array/Exp.h"
00012 #include "Core/Array/WriteRaw.h"
00013 #include "Core/Array/MakeFromValue.h"
00014 #include "Core/Array/PrintData.h"
00015 #include "Core/Feature/FeatureTable.h"
00016 #include "Core/Feature/FeatureDefinition.h"
00017 #include "Core/Feature/FeatureTable.h"
00018 #include "Core/Table/Write.h"
00019 #include "Core/Table/Equals.h"
00020 #include "Core/Table/Select.h"
00021 #include "Core/Vector/Apply.h"
00022 #include "Core/VideoSet/MakeVideoSet.h"
00023 #include "Core/ImageSet/MakeImageSet.h"
00024 #include "Core/Training/KernelFunctions.h"
00025 #include "Util/PropertySet.h"
00026 #include "Core/Database/MakeRawDataSet.h"
00027
00028 namespace Impala
00029 {
00030 namespace Application
00031 {
00032
00033 using namespace Core;
00034 using Core::Database::RawDataSet;
00035
00041 bool CheckParameteres(CmdOptions& options,
00042 RawDataSet* dataset, RawDataSet* dataset2,
00043 std::vector<Feature::FeatureDefinition>& featureDefs,
00044 std::vector<double>& weights,
00045 String resultname)
00046 {
00047
00048
00049
00050
00051 ILOG_VAR(Application.mainPrecomputeKernelMatrix.CheckParameters);
00052
00053
00054 String inputFeatures = options.GetString("inputFeatures");
00055 if(!inputFeatures.empty())
00056 {
00057 ILOG_DEBUG("imput features: "<< inputFeatures);
00058 Util::StringParser p(inputFeatures);
00059 while(!p.TheEnd())
00060 {
00061 double weight = p.GetDouble();
00062 ILOG_DEBUG("w: "<<weight);
00063 if(weight == 0.0)
00064 {
00065 ILOG_ERROR_ONCE("\"" << inputFeatures << "\" contains a zero weight");
00066 return false;
00067 }
00068 String name = p.GetString(' ', false);
00069 if(name.empty())
00070 {
00071 ILOG_ERROR_ONCE("While parsing --inputFeatures: unbalanced length, empty name");
00072 return false;
00073 }
00074 ILOG_DEBUG("n: "<< name);
00075 weights.push_back(weight);
00076 Feature::FeatureDefinition fdef(name);
00077 name = dataset->GetFilePathFeatureIndex(fdef, "", false, false);
00078 if(name.empty())
00079 {
00080 ILOG_ERROR_ONCE("could not open \"" << fdef.AsString() << "\"");
00081 return false;
00082 }
00083 featureDefs.push_back(fdef);
00084 }
00085 }
00086
00087 if(Link::Mpi::MyId() == 0)
00088 {
00089 String filename;
00090 if (dataset2)
00091 filename = dataset2->GetFilePathPrecomputedKernels
00092 (resultname+".input.txt", dataset->GetSetName(),
00093 true, false);
00094 else
00095 filename = dataset->GetFilePathPrecomputedKernels
00096 (resultname+".input.txt", "", true, false);
00097
00098 Util::Database* db = (dataset2) ? dataset2->GetDatabase()
00099 : dataset->GetDatabase();
00100 Util::IOBuffer* buf = db->GetIOBuffer(filename, false, false, "tmp");
00101 if(buf)
00102 {
00103 for(int i=0 ; i<weights.size() ; ++i)
00104 {
00105 String s = MakeString(weights[i]) + " " + featureDefs[i].AsString();
00106 buf->Puts(s);
00107 }
00108 delete buf;
00109 }
00110 else
00111 ILOG_ERROR_ONCE("could not open input.txt");
00112 }
00113
00114 return true;
00115 }
00116
00117
00122 Feature::FeatureTable* OpenFeatureTable(Feature::FeatureDefinition& featureDef,
00123 RawDataSet* dataset)
00124 {
00125
00126
00127
00128
00129
00130 ILOG_VAR(Application.mainPrecomputeKernelMatrix.OpenFeatureTable);
00131 Feature::FeatureTable* f = 0;
00132 std::string filename =
00133 dataset->GetFilePathFeatureIndex(featureDef, "", false, false);
00134 ILOG_INFO_ONCE("opening " << featureDef.AsString());
00135 #ifndef MPI_USED
00136 f = Feature::FeatureTable::MakeFromFile(featureDef, filename,
00137 dataset->GetDatabase());
00138
00139
00140 #else
00141
00146 Util::IOBuffer* buf;
00147 int myId = Link::Mpi::MyId();
00148 if(myId == 0)
00149 {
00150
00151 Util::Database* db = dataset->GetDatabase();
00152 buf = db->GetIOBuffer(filename, true, true, "");
00153 if(buf == 0 || !buf->Valid())
00154 {
00155 ILOG_ERROR("node0 couldn't open file");
00156 }
00157 }
00158 else
00159 {
00160 buf = new Util::IOBuffer();
00161 }
00162 Broadcast(buf);
00163 if (buf && buf->Valid())
00164 {
00165 f = new Feature::FeatureTable(featureDef);
00166 Read(f, buf);
00167 delete buf;
00168 }
00169 else
00170 ILOG_ERROR(myId << ": could not read file from buffer");
00171 Link::Mpi::Barrier();
00172 #endif
00173 ILOG_INFO_ONCE("size = " << f->Size() << "; length = " << f->GetFeatureVectorLength());
00174
00175 return f;
00176 }
00177
00186 void GetPartialTask(int& partcount, int& row, int& column)
00187 {
00188 ILOG_VAR(Application.mainPrecomputeKernelMatrix.GetPartialTask);
00189 if(Link::Mpi::NrProcs() == 1)
00190 {
00191 partcount = 1;
00192 row = 0;
00193 column = 0;
00194 return;
00195 }
00196
00197 int cpuCount = Link::Mpi::NrProcs();
00198 partcount = sqrt((double)cpuCount);
00199 int myId = Link::Mpi::MyId();
00200 column = myId % partcount;
00201 row = myId / partcount;
00202 if(row >= partcount)
00203 {
00204 partcount = 0;
00205 row = 0;
00206 column = 0;
00207 }
00208 }
00209
00216 void CheckQuids(Feature::FeatureTable* f, RawDataSet* set, RawDataSet* set2,
00217 String resultname, int part, int partcount)
00218 {
00219 ILOG_VAR(Application.mainPrecomputeKernelMatrix.CheckQuids);
00220 bool equal = true;
00221 bool fileExists = true;
00222
00223 String basename = resultname + MakeString(part) + "of" +
00224 MakeString(partcount) + ".quids";
00225 String filename;
00226 if (set2)
00227 {
00228
00229 filename = set2->GetFilePathPrecomputedKernels
00230 (basename, set->GetSetName(), false, true);
00231 if (filename.empty())
00232 {
00233 fileExists = false;
00234 filename = set2->GetFilePathPrecomputedKernels
00235 (basename, set->GetSetName(), true, false);
00236 }
00237 }
00238 else
00239 {
00240
00241 filename = set->GetFilePathPrecomputedKernels(basename, "", false,
00242 true);
00243 if (filename.empty())
00244 {
00245 fileExists = false;
00246 filename = set->GetFilePathPrecomputedKernels(basename, "", true,
00247 false);
00248 }
00249 }
00250 Table::QuidTable* quids = f->GetQuidTable();
00251 Table::QuidTable* prevquids = new Table::QuidTable;
00252 if (fileExists)
00253 {
00254 Read(prevquids, filename, set->GetDatabase());
00255 equal = Equals(quids, prevquids);
00256 }
00257 else
00258 {
00259 if (!Write(quids, filename, set->GetDatabase(), true))
00260 ILOG_ERROR("could neither read nor write " << filename);
00261 }
00262 delete prevquids;
00263 delete quids;
00264 if(!equal)
00265 {
00266 ILOG_ERROR("the input tables do not have the same Quids");
00267 exit(0);
00268 }
00269 }
00270
00271
00272
00279 Feature::FeatureTable* GetPartial(Feature::FeatureTable* f,
00280 int partnumber, int partcount)
00281 {
00282 ILOG_VAR(Application.mainPrecomputeKernelMatrix.GetPartial);
00283 Feature::FeatureTable* part =
00284 new Feature::FeatureTable(f->GetFeatureDefinition(),0,
00285 f->GetFeatureVectorLength());
00286 int s = f->Size();
00287 Matrix::DistributedAccess::IndexConverter indexConvert(f->Size(), partcount);
00288 int from = indexConvert.PartToIndex(partnumber);
00289 int to = indexConvert.PartToIndex(partnumber + 1);
00290
00291
00292 Table::Select(part, f, from, to, true);
00293 return part;
00294 }
00295
00296
00309 Matrix::Mat* ComputeMatrix(Feature::FeatureTable *devel,
00310 Feature::FeatureTable *test, String resultname,
00311 RawDataSet* set, RawDataSet* set2)
00312 {
00313 ILOG_VAR(Application.mainPrecomputeKernelMatrix.ComputeMatrix);
00314 int partcount, row, column;
00315 GetPartialTask(partcount, row, column);
00316 if(partcount == 0)
00317 ILOG_ERROR_NODE("partcount == 0: shouldn't reach this point in code");
00318 Feature::FeatureTable* horizontal;
00319 Feature::FeatureTable* vertical;
00320 if(partcount == 1)
00321 {
00322 horizontal = devel;
00323 vertical = test;
00324 }
00325 else
00326 {
00327 horizontal = GetPartial(devel, column, partcount);
00328 vertical = GetPartial(test, row, partcount);
00329 delete devel;
00330 if(test != devel)
00331 delete test;
00332 }
00333 if(column == 0)
00334 CheckQuids(vertical, set, set2, resultname + ".rowindices", row, partcount);
00335 if(row == 0)
00336 CheckQuids(horizontal, set, set2, resultname + ".columnindices", column, partcount);
00337
00338
00339
00340 typedef Feature::FeatureTable::ColumnVectorSet VectorSet;
00341 VectorSet* vectorsH = horizontal->GetColumn2();
00342 VectorSet* vectorsV = vertical->GetColumn2();
00343 vectorsH->SetSize(horizontal->Size());
00344 vectorsV->SetSize(vertical->Size());
00345 Matrix::Mat* distanceMatrix = Apply(&Training::Chi2Distance, vectorsH, vectorsV);
00346 delete horizontal;
00347 if(horizontal != vertical)
00348 delete vertical;
00349 return distanceMatrix;
00350 }
00351
00352
00355 double GetAverage(Matrix::Mat *distanceMatrix)
00356 {
00357 double average;
00358 average = Array::PixSum(distanceMatrix);
00359 average = Link::Mpi::AllReduceSum(average);
00360 int pixcount = distanceMatrix->W() * distanceMatrix->H();
00361 pixcount = Link::Mpi::AllReduceSum(pixcount);
00362 average /= pixcount;
00363 return average;
00364 }
00365
00366 void
00367 WriteInfoFile(int columns, int rows, int partcount, String filepathname,
00368 Util::Database* db)
00369 {
00370 if(Link::Mpi::MyId() != 0)
00371 return;
00372 ILOG_VAR(Application.mainPrecomputeKernelMatrix.CheckQuids);
00373
00374 Util::IOBuffer* buf = db->GetIOBuffer(filepathname, false, false, "tmp");
00375 Util::PropertySet ps;
00376 ps.Add("totalrows", rows);
00377 ps.Add("totalcolumns", columns);
00378 ps.Add("rowparts", partcount);
00379 ps.Add("columnparts", partcount);
00380 ps.Print(buf);
00381 delete buf;
00382 }
00383
00384 void
00385 WriteAverages(String filepathname, Util::Database* db,
00386 std::vector<double> averages)
00387 {
00388 if(Link::Mpi::MyId() == 0)
00389 {
00390 Util::IOBuffer* buf = db->GetIOBuffer(filepathname, false, false, "tmp");
00391 if (buf)
00392 {
00393 for(int i=0 ; i<averages.size() ; ++i)
00394 buf->Write(&averages[i], sizeof(double));
00395 }
00396 delete buf;
00397 }
00398 }
00399
00400 void
00401 LoadAverages(RawDataSet* set, String filepathname, std::vector<double>& averages)
00402 {
00403 Util::Database* db = set->GetDatabase();
00404 Util::IOBuffer* buf = db->GetIOBuffer(filepathname, true, false, "");
00405 if (buf)
00406 {
00407 for(int i=0 ; i<averages.size() ; ++i)
00408 buf->Read(&averages[i], sizeof(double));
00409 }
00410 delete buf;
00411 }
00412
00413
00414 void
00415 WriteResult(String resultname, Util::Database* db, Matrix::Mat *accumulator)
00416 {
00417 int partcount, row, column;
00418 GetPartialTask(partcount, row, column);
00419 std::ostringstream filename;
00420 filename << resultname << ".precomputed";
00421 filename << ".part-R" << row << "-C" << column;
00422 filename << ".raw";
00423 WriteRaw(accumulator, filename.str(), db, true);
00424 }
00425
00426 int mainPrecomputeKernelMatrix(CmdOptions& options)
00427 {
00428
00429
00430
00431 ILOG_VAR(Application.mainPrecomputeKernelMatrix);
00432 RawDataSet* dataset = Core::Database::MakeRawDataSet(options.GetArg(0));
00433 if(!dataset)
00434 {
00435 ILOG_INFO_ONCE("failed to open dataset " << options.GetArg(0));
00436 return 1;
00437 }
00438 String name2 = options.GetArg(1);
00439 RawDataSet* dataset2 = 0;
00440 if((name2 != "0") && (atof(name2) == 0.0))
00441 {
00442 dataset2 = Core::Database::MakeRawDataSet(name2, true);
00443 }
00444 if(dataset2)
00445 ILOG_INFO_ONCE("2 sets loaded: " << dataset2->GetSetName());
00446
00447
00448
00449
00450 int partcount, row, column;
00451 GetPartialTask(partcount, row, column);
00452 ILOG_INFO_ONCE("Using a " << partcount << "x" << partcount << "grid");
00453 int cpuCount = Link::Mpi::NrProcs();
00454 ILOG_INFO_ONCE("total nodes = " << cpuCount << " unused nodes = "
00455 << cpuCount - partcount*partcount);
00456 if(partcount == 0)
00457 {
00458 ILOG_ERROR("unsupported number of nodes, number of nodes MUST be"
00459 << " sqare of a natural number");
00460 exit(0);
00461 }
00462 else
00463 {
00464 ILOG_DEBUG_NODE("col = " << column << " row = " << row);
00465 }
00466
00467 std::vector<Feature::FeatureDefinition> featureDefs;
00468 std::vector<double> weights;
00469 String resultname = options.GetArg(options.GetNrArg()-1);
00470 if(!CheckParameteres(options, dataset, dataset2, featureDefs, weights,
00471 resultname))
00472 return 0;
00473 std::vector<double> averages;
00474 averages.resize(weights.size());
00475 if(dataset2)
00476 {
00477 String filename = resultname + ".averages.raw";
00478 filename = dataset->GetFilePathPrecomputedKernels(filename, "", false, false);
00479 if(filename == "")
00480 {
00481 ILOG_ERROR("couldn't read averages");
00482 exit(0);
00483 }
00484 LoadAverages(dataset, filename, averages);
00485 for (int i = 0; i < averages.size(); i++)
00486 {
00487 ILOG_INFO_ONCE("average " << i << " " << averages[i]);
00488 }
00489 }
00490
00491 if(!dataset2)
00492 {
00493 String tmp = dataset->GetFilePathPrecomputedKernels(resultname+".info",
00494 "", true, true);
00495 String tmp2 = dataset->GetFilePathPrecomputedKernels(resultname+".averages.raw",
00496 "", true, true);
00497 if(tmp.empty() && tmp2.empty())
00498 {
00499 ILOG_INFO("A complete kernel already exists, nothing to do.");
00500 return 0;
00501 }
00502 if(tmp.empty())
00503 {
00504 ILOG_ERROR("Incomplete kernel exists! Cleanup files first.");
00505 return 1;
00506 }
00507 }
00508
00509 Util::Database* db = (dataset2) ? dataset2->GetDatabase()
00510 : dataset->GetDatabase();
00511 Matrix::Mat* accumulator = 0;
00512 double totalweight = 0;
00513 for(int i=0 ; i<weights.size() ; ++i)
00514 {
00515
00516 double weight = weights[i];
00517 Feature::FeatureTable* f1 = OpenFeatureTable(featureDefs[i], dataset);
00518 Feature::FeatureTable* f2 = f1;
00519 if(dataset2)
00520 f2 = OpenFeatureTable(featureDefs[i], dataset2);
00521 if(Link::Mpi::MyId() == 0 && i == 0)
00522 {
00523
00524 String filename = resultname+".info";
00525 if(dataset2)
00526 filename = dataset2->GetFilePathPrecomputedKernels(filename,
00527 dataset->GetSetName(), true, false);
00528 else
00529 filename = dataset->GetFilePathPrecomputedKernels(filename,
00530 "", true, false);
00531 if(filename == "")
00532 {
00533 ILOG_ERROR("could not save .info file");
00534 exit(0);
00535 }
00536 ILOG_INFO("Saving info in " << filename);
00537 WriteInfoFile(f1->Size(), f2->Size(), partcount, filename, db);
00538
00539
00540 if (dataset2)
00541 filename = dataset2->GetFilePathPrecomputedKernels(
00542 resultname + ".columns.tab", dataset->GetSetName(),
00543 true, false);
00544 else
00545 filename = dataset->GetFilePathPrecomputedKernels
00546 (resultname + ".columns.tab", "", true, false);
00547 ILOG_INFO("Saving columns in " << filename);
00548 if (!filename.empty())
00549 Write(f1->GetQuidTable(), filename, dataset->GetDatabase(),
00550 true);
00551 else
00552 ILOG_ERROR("Unable to write columns.tab");
00553
00554 }
00555
00556 Matrix::Mat *distanceMatrix = ComputeMatrix(f1, f2, resultname,
00557 dataset, dataset2);
00558
00559
00560
00561
00562
00563 if(dataset2 == 0)
00564 {
00565 double average = GetAverage(distanceMatrix);
00566 averages[i] = average;
00567 }
00568
00569
00570 MulVal(distanceMatrix, distanceMatrix, weight);
00571 DivVal(distanceMatrix, distanceMatrix, -averages[i]);
00572 if(accumulator == 0)
00573 Set(accumulator, distanceMatrix);
00574 else
00575 Add(accumulator, accumulator, distanceMatrix);
00576 delete distanceMatrix;
00577
00578 totalweight += weight;
00579 }
00580
00581 ILOG_INFO_ONCE("finalising...");
00582
00583 DivVal(accumulator, accumulator, totalweight);
00584 Exp(accumulator, accumulator);
00585
00586 if(dataset2 == 0)
00587 {
00588 if(Link::Mpi::MyId() == 0)
00589 {
00590 String filename = resultname + ".averages.raw";
00591 filename = dataset->GetFilePathPrecomputedKernels(filename, "", true, false);
00592 WriteAverages(filename, db, averages);
00593 }
00594 }
00595
00596 if (dataset2)
00597 {
00598 resultname = dataset2->GetFilePathPrecomputedKernels
00599 (resultname, dataset->GetSetName(), true, false);
00600 }
00601 else
00602 {
00603 resultname = dataset->GetFilePathPrecomputedKernels
00604 (resultname, "", true, false);
00605 }
00606 WriteResult(resultname, db, accumulator);
00607 return 0;
00608 }
00609
00610 Matrix::Mat* CreateTestMat(int row, int column, int partCount, int totalSize)
00611 {
00612 int xbegin = (totalSize*column)/partCount;
00613 int xend = (totalSize*(column+1))/partCount;
00614 int ybegin = (totalSize*row)/partCount;
00615 int yend = (totalSize*(row+1))/partCount;
00616 Matrix::Mat* m = new Matrix::Mat(xend - xbegin, yend - ybegin, 0, 0);
00617 for(int y=ybegin ; y<yend ; ++y)
00618 {
00619 for(int x=xbegin ; x<xend ; ++x)
00620 *(m->CPB(x-xbegin,y-ybegin)) = (x+1.) / (y+1.);
00621 }
00622 return m;
00623
00624 }
00625
00626 void
00627 CreateTestQuids(String resultname, Util::Database* db, int part, int partCount,
00628 int totalSize)
00629 {
00630 ILOG_VAR(Application.CreateTestQuids);
00631 String filename = resultname + MakeString(part) + "of" + MakeString(partCount) + ".quids";
00632 int begin = (totalSize*part)/partCount;
00633 int end = (totalSize*(part+1))/partCount;
00634 Table::QuidTable* quids = new Table::QuidTable(end-begin);
00635 for(int i=begin ; i<end ; ++i)
00636 quids->Add(i);
00637
00638 if (!Write(quids, filename, db, false))
00639 ILOG_ERROR("could not write " << filename);
00640 delete quids;
00641 }
00642
00643 int makeTestMatrix(CmdOptions& options)
00644 {
00645 ILOG_VAR(Application.makeTestMatrix);
00646 int partCount = 3;
00647 int totalSize = 11;
00648 String resultname = options.GetArg(options.GetNrArg()-1);
00649 Util::Database* db = &Util::Database::GetInstance();
00650 WriteInfoFile(totalSize, totalSize, partCount, resultname, db);
00651
00652 for(int row=0 ; row<partCount ; ++row)
00653 for(int column=0 ; column<partCount ; ++column)
00654 {
00655 Matrix::Mat *testMat = CreateTestMat(row, column, partCount, totalSize);
00656 if(column == 0)
00657 CreateTestQuids(resultname + ".rowindices", db, row, partCount, totalSize);
00658 if(row == 0)
00659 CreateTestQuids(resultname + ".columnindices", db, column, partCount, totalSize);
00660
00661 std::ostringstream filename;
00662 filename << resultname << ".precomputed";
00663 filename << ".part-R" << row << "-C" << column;
00664 filename << ".raw";
00665 WriteRaw(testMat, filename.str(), db, true);
00666 delete testMat;
00667 }
00668 return 1;
00669 }
00670
00671 }
00672 }
00673
00674 int
00675 main(int argc, char* argv[])
00676 {
00677 Impala::Link::Mpi::Init(&argc, &argv);
00678 Impala::CmdOptions& options = Impala::CmdOptions::GetInstance();
00679 options.Initialise(false, false, true);
00680 options.AddOption(0, "inputFeatures", "specify the input features and their weights (INI file only, because prun messes up the command line)", "");
00681
00682 int code = 1;
00683 if (options.ParseArgs(argc, argv, "<dataset> <result name>", 2))
00684 {
00685 if(options.GetArg(0) == "maketest")
00686 code = Impala::Application::makeTestMatrix(options);
00687 else
00688 code = Impala::Application::mainPrecomputeKernelMatrix(options);
00689 }
00690
00691 Impala::Link::Mpi::Finalize();
00692 return code;
00693 }