Home || Architecture || Video Search || Visual Search || Scripts || Applications || Important Messages || OGL || Src

int Impala::Application::mainPrecomputeKernelMatrix ( CmdOptions &  options  ) 

Definition at line 426 of file mainPrecomputeKernelMatrix.cpp.

References Impala::Core::Array::Add(), Impala::atof(), CheckParameteres(), ComputeMatrix(), Impala::Core::Array::DivVal(), Impala::Core::Array::Exp(), Impala::CmdOptions::GetArg(), GetAverage(), Impala::CmdOptions::GetNrArg(), GetPartialTask(), Impala::Core::Feature::FeatureTable::GetQuidTable(), ILOG_ERROR, ILOG_INFO, ILOG_VAR, LoadAverages(), Impala::Core::Database::MakeRawDataSet(), Impala::Core::Array::MulVal(), OpenFeatureTable(), Impala::Core::Array::Set(), Impala::Core::Table::Table::Size(), Impala::Core::Table::Write(), WriteAverages(), WriteInfoFile(), and WriteResult().

Referenced by main().

00427 {
00428     // this name is 26 characters. if you are tracing unexplainable C++ string
00429     // exceptions, check your log4cpp.properties to see if you are limiting
00430     // this name to just 25 characters...
00431     ILOG_VAR(Application.mainPrecomputeKernelMatrix);
00432     RawDataSet* dataset = Core::Database::MakeRawDataSet(options.GetArg(0));
00433     if(!dataset)
00434     {
00435         ILOG_INFO_ONCE("failed to open dataset " << options.GetArg(0));
00436         return 1;
00437     }
00438     String name2 = options.GetArg(1);
00439     RawDataSet* dataset2 = 0;
00440     if((name2 != "0") && (atof(name2) == 0.0))    // it is not a number, or 0
00441     {
00442         dataset2 = Core::Database::MakeRawDataSet(name2, true);
00443     }
00444     if(dataset2)
00445         ILOG_INFO_ONCE("2 sets loaded: " << dataset2->GetSetName());
00446 
00447     // for distribution we compute what part of the matrix we must compute
00448     // in single process case partcount == 1
00449     // in multi process case it is sqrt(NrProcs()) (or 0 if this node doesn't compute anything)
00450     int partcount, row, column;
00451     GetPartialTask(partcount, row, column);
00452     ILOG_INFO_ONCE("Using a " << partcount << "x" << partcount << "grid");
00453     int cpuCount = Link::Mpi::NrProcs();
00454     ILOG_INFO_ONCE("total nodes = " << cpuCount << " unused nodes = " 
00455                    << cpuCount - partcount*partcount);
00456     if(partcount == 0)
00457     {
00458         ILOG_ERROR("unsupported number of nodes, number of nodes MUST be"
00459                    << " sqare of a natural number");
00460         exit(0);
00461     }
00462     else
00463     {
00464         ILOG_DEBUG_NODE("col = " << column << " row = " << row);
00465     }
00466 
00467     std::vector<Feature::FeatureDefinition> featureDefs;
00468     std::vector<double> weights;
00469     String resultname = options.GetArg(options.GetNrArg()-1);
00470     if(!CheckParameteres(options, dataset, dataset2, featureDefs, weights, 
00471                          resultname))
00472         return 0;
00473     std::vector<double> averages;
00474     averages.resize(weights.size());
00475     if(dataset2)
00476     {
00477         String filename = resultname + ".averages.raw";
00478         filename = dataset->GetFilePathPrecomputedKernels(filename, "", false, false);
00479         if(filename == "")
00480         {
00481             ILOG_ERROR("couldn't read averages");
00482             exit(0);
00483         }
00484         LoadAverages(dataset, filename, averages);
00485         for (int i = 0; i < averages.size(); i++)
00486         {
00487             ILOG_INFO_ONCE("average " << i << " " << averages[i]);
00488         }
00489     }
00490 
00491     if(!dataset2)
00492     {
00493         String tmp = dataset->GetFilePathPrecomputedKernels(resultname+".info",
00494                        "", true, true);
00495         String tmp2 = dataset->GetFilePathPrecomputedKernels(resultname+".averages.raw",
00496                        "", true, true);
00497         if(tmp.empty() && tmp2.empty())
00498         {
00499             ILOG_INFO("A complete kernel already exists, nothing to do.");
00500             return 0;
00501         }
00502         if(tmp.empty())
00503         {
00504             ILOG_ERROR("Incomplete kernel exists! Cleanup files first.");
00505             return 1;
00506         }
00507     }
00508 
00509     Util::Database* db = (dataset2) ? dataset2->GetDatabase()
00510                                     : dataset->GetDatabase();
00511     Matrix::Mat* accumulator = 0;
00512     double totalweight = 0;
00513     for(int i=0 ; i<weights.size() ; ++i)
00514     {
00515         // open feature tables
00516         double weight = weights[i];
00517         Feature::FeatureTable* f1 = OpenFeatureTable(featureDefs[i], dataset);
00518         Feature::FeatureTable* f2 = f1;
00519         if(dataset2)
00520             f2 = OpenFeatureTable(featureDefs[i], dataset2);
00521         if(Link::Mpi::MyId() == 0 && i == 0)
00522         {
00523             // create a .info file once
00524             String filename = resultname+".info";
00525             if(dataset2)
00526                 filename = dataset2->GetFilePathPrecomputedKernels(filename,
00527                     dataset->GetSetName(), true, false);
00528             else
00529                 filename = dataset->GetFilePathPrecomputedKernels(filename,
00530                     "", true, false);
00531             if(filename == "")
00532             {
00533                 ILOG_ERROR("could not save .info file");
00534                 exit(0);
00535             }
00536             ILOG_INFO("Saving info in " << filename);
00537             WriteInfoFile(f1->Size(), f2->Size(), partcount, filename, db);
00538 
00539             // write the total list of devel-quids once
00540             if (dataset2)
00541                 filename = dataset2->GetFilePathPrecomputedKernels(
00542                           resultname + ".columns.tab", dataset->GetSetName(),
00543                           true, false);
00544             else
00545                 filename = dataset->GetFilePathPrecomputedKernels
00546                     (resultname + ".columns.tab", "", true, false);
00547             ILOG_INFO("Saving columns in " << filename);
00548             if (!filename.empty())
00549                 Write(f1->GetQuidTable(), filename, dataset->GetDatabase(),
00550                       true);
00551             else
00552                 ILOG_ERROR("Unable to write columns.tab");
00553 
00554         }
00555         // compute kernel distances between features
00556         Matrix::Mat *distanceMatrix = ComputeMatrix(f1, f2, resultname, 
00557                                                     dataset, dataset2);
00558         // f1 & f2 are deleted by ComputeMatrix
00559         
00560         // if this is the 'learn' kernel matrix we compute the averages and
00561         // communicate between nodes to make sure everyone has access to them
00562         // otherwise, the averages are already loaded into the vector
00563         if(dataset2 == 0)
00564         {
00565             double average = GetAverage(distanceMatrix);
00566             averages[i] = average;
00567         }
00568 
00569         // accumulate; this is the part inside the 'exp' in the kernel function
00570         MulVal(distanceMatrix, distanceMatrix, weight);
00571         DivVal(distanceMatrix, distanceMatrix, -averages[i]);
00572         if(accumulator == 0)
00573             Set(accumulator, distanceMatrix);
00574         else
00575             Add(accumulator, accumulator, distanceMatrix);
00576         delete distanceMatrix;
00577 
00578         totalweight += weight;
00579     }
00580 
00581     ILOG_INFO_ONCE("finalising...");
00582     // only thing to do is weigh and exp
00583     DivVal(accumulator, accumulator, totalweight);
00584     Exp(accumulator, accumulator);
00585 
00586     if(dataset2 == 0)
00587     {
00588         if(Link::Mpi::MyId() == 0)
00589         {
00590             String filename = resultname + ".averages.raw";
00591             filename = dataset->GetFilePathPrecomputedKernels(filename, "", true, false);
00592             WriteAverages(filename, db, averages);
00593         }
00594     }
00595     // write the matrix
00596     if (dataset2)
00597     {
00598         resultname = dataset2->GetFilePathPrecomputedKernels
00599             (resultname, dataset->GetSetName(), true, false);
00600     }
00601     else
00602     {
00603         resultname = dataset->GetFilePathPrecomputedKernels
00604             (resultname, "", true, false);
00605     }
00606     WriteResult(resultname, db, accumulator);
00607     return 0;
00608 }

Here is the call graph for this function:


Generated on Fri Mar 19 10:36:38 2010 for ImpalaSrc by  doxygen 1.5.1