Definition at line 426 of file mainPrecomputeKernelMatrix.cpp. References Impala::Core::Array::Add(), Impala::atof(), CheckParameteres(), ComputeMatrix(), Impala::Core::Array::DivVal(), Impala::Core::Array::Exp(), Impala::CmdOptions::GetArg(), GetAverage(), Impala::CmdOptions::GetNrArg(), GetPartialTask(), Impala::Core::Feature::FeatureTable::GetQuidTable(), ILOG_ERROR, ILOG_INFO, ILOG_VAR, LoadAverages(), Impala::Core::Database::MakeRawDataSet(), Impala::Core::Array::MulVal(), OpenFeatureTable(), Impala::Core::Array::Set(), Impala::Core::Table::Table::Size(), Impala::Core::Table::Write(), WriteAverages(), WriteInfoFile(), and WriteResult(). Referenced by main(). 00427 { 00428 // this name is 26 characters. if you are tracing unexplainable C++ string 00429 // exceptions, check your log4cpp.properties to see if you are limiting 00430 // this name to just 25 characters... 00431 ILOG_VAR(Application.mainPrecomputeKernelMatrix); 00432 RawDataSet* dataset = Core::Database::MakeRawDataSet(options.GetArg(0)); 00433 if(!dataset) 00434 { 00435 ILOG_INFO_ONCE("failed to open dataset " << options.GetArg(0)); 00436 return 1; 00437 } 00438 String name2 = options.GetArg(1); 00439 RawDataSet* dataset2 = 0; 00440 if((name2 != "0") && (atof(name2) == 0.0)) // it is not a number, or 0 00441 { 00442 dataset2 = Core::Database::MakeRawDataSet(name2, true); 00443 } 00444 if(dataset2) 00445 ILOG_INFO_ONCE("2 sets loaded: " << dataset2->GetSetName()); 00446 00447 // for distribution we compute what part of the matrix we must compute 00448 // in single process case partcount == 1 00449 // in multi process case it is sqrt(NrProcs()) (or 0 if this node doesn't compute anything) 00450 int partcount, row, column; 00451 GetPartialTask(partcount, row, column); 00452 ILOG_INFO_ONCE("Using a " << partcount << "x" << partcount << "grid"); 00453 int cpuCount = Link::Mpi::NrProcs(); 00454 ILOG_INFO_ONCE("total nodes = " << cpuCount << " unused nodes = " 00455 << cpuCount - partcount*partcount); 00456 if(partcount == 0) 00457 { 00458 ILOG_ERROR("unsupported number of nodes, number of nodes MUST be" 00459 << " sqare of a natural number"); 00460 exit(0); 00461 } 00462 else 00463 { 00464 ILOG_DEBUG_NODE("col = " << column << " row = " << row); 00465 } 00466 00467 std::vector<Feature::FeatureDefinition> featureDefs; 00468 std::vector<double> weights; 00469 String resultname = options.GetArg(options.GetNrArg()-1); 00470 if(!CheckParameteres(options, dataset, dataset2, featureDefs, weights, 00471 resultname)) 00472 return 0; 00473 std::vector<double> averages; 00474 averages.resize(weights.size()); 00475 if(dataset2) 00476 { 00477 String filename = resultname + ".averages.raw"; 00478 filename = dataset->GetFilePathPrecomputedKernels(filename, "", false, false); 00479 if(filename == "") 00480 { 00481 ILOG_ERROR("couldn't read averages"); 00482 exit(0); 00483 } 00484 LoadAverages(dataset, filename, averages); 00485 for (int i = 0; i < averages.size(); i++) 00486 { 00487 ILOG_INFO_ONCE("average " << i << " " << averages[i]); 00488 } 00489 } 00490 00491 if(!dataset2) 00492 { 00493 String tmp = dataset->GetFilePathPrecomputedKernels(resultname+".info", 00494 "", true, true); 00495 String tmp2 = dataset->GetFilePathPrecomputedKernels(resultname+".averages.raw", 00496 "", true, true); 00497 if(tmp.empty() && tmp2.empty()) 00498 { 00499 ILOG_INFO("A complete kernel already exists, nothing to do."); 00500 return 0; 00501 } 00502 if(tmp.empty()) 00503 { 00504 ILOG_ERROR("Incomplete kernel exists! Cleanup files first."); 00505 return 1; 00506 } 00507 } 00508 00509 Util::Database* db = (dataset2) ? dataset2->GetDatabase() 00510 : dataset->GetDatabase(); 00511 Matrix::Mat* accumulator = 0; 00512 double totalweight = 0; 00513 for(int i=0 ; i<weights.size() ; ++i) 00514 { 00515 // open feature tables 00516 double weight = weights[i]; 00517 Feature::FeatureTable* f1 = OpenFeatureTable(featureDefs[i], dataset); 00518 Feature::FeatureTable* f2 = f1; 00519 if(dataset2) 00520 f2 = OpenFeatureTable(featureDefs[i], dataset2); 00521 if(Link::Mpi::MyId() == 0 && i == 0) 00522 { 00523 // create a .info file once 00524 String filename = resultname+".info"; 00525 if(dataset2) 00526 filename = dataset2->GetFilePathPrecomputedKernels(filename, 00527 dataset->GetSetName(), true, false); 00528 else 00529 filename = dataset->GetFilePathPrecomputedKernels(filename, 00530 "", true, false); 00531 if(filename == "") 00532 { 00533 ILOG_ERROR("could not save .info file"); 00534 exit(0); 00535 } 00536 ILOG_INFO("Saving info in " << filename); 00537 WriteInfoFile(f1->Size(), f2->Size(), partcount, filename, db); 00538 00539 // write the total list of devel-quids once 00540 if (dataset2) 00541 filename = dataset2->GetFilePathPrecomputedKernels( 00542 resultname + ".columns.tab", dataset->GetSetName(), 00543 true, false); 00544 else 00545 filename = dataset->GetFilePathPrecomputedKernels 00546 (resultname + ".columns.tab", "", true, false); 00547 ILOG_INFO("Saving columns in " << filename); 00548 if (!filename.empty()) 00549 Write(f1->GetQuidTable(), filename, dataset->GetDatabase(), 00550 true); 00551 else 00552 ILOG_ERROR("Unable to write columns.tab"); 00553 00554 } 00555 // compute kernel distances between features 00556 Matrix::Mat *distanceMatrix = ComputeMatrix(f1, f2, resultname, 00557 dataset, dataset2); 00558 // f1 & f2 are deleted by ComputeMatrix 00559 00560 // if this is the 'learn' kernel matrix we compute the averages and 00561 // communicate between nodes to make sure everyone has access to them 00562 // otherwise, the averages are already loaded into the vector 00563 if(dataset2 == 0) 00564 { 00565 double average = GetAverage(distanceMatrix); 00566 averages[i] = average; 00567 } 00568 00569 // accumulate; this is the part inside the 'exp' in the kernel function 00570 MulVal(distanceMatrix, distanceMatrix, weight); 00571 DivVal(distanceMatrix, distanceMatrix, -averages[i]); 00572 if(accumulator == 0) 00573 Set(accumulator, distanceMatrix); 00574 else 00575 Add(accumulator, accumulator, distanceMatrix); 00576 delete distanceMatrix; 00577 00578 totalweight += weight; 00579 } 00580 00581 ILOG_INFO_ONCE("finalising..."); 00582 // only thing to do is weigh and exp 00583 DivVal(accumulator, accumulator, totalweight); 00584 Exp(accumulator, accumulator); 00585 00586 if(dataset2 == 0) 00587 { 00588 if(Link::Mpi::MyId() == 0) 00589 { 00590 String filename = resultname + ".averages.raw"; 00591 filename = dataset->GetFilePathPrecomputedKernels(filename, "", true, false); 00592 WriteAverages(filename, db, averages); 00593 } 00594 } 00595 // write the matrix 00596 if (dataset2) 00597 { 00598 resultname = dataset2->GetFilePathPrecomputedKernels 00599 (resultname, dataset->GetSetName(), true, false); 00600 } 00601 else 00602 { 00603 resultname = dataset->GetFilePathPrecomputedKernels 00604 (resultname, "", true, false); 00605 } 00606 WriteResult(resultname, db, accumulator); 00607 return 0; 00608 }
Here is the call graph for this function:
|