Definition at line 218 of file mainPrecomputeKernelMatrix.cpp. References Impala::Core::Array::Add(), Impala::Util::TimeStats::AddGroup(), Impala::Util::TimeStats::AddGroupsFromSub(), Impala::Util::TimeStats::AsString(), CheckFeatures(), ComputeMatrix(), Impala::Core::Matrix::VirtualMatrixFactory::ConstructMemory(), Impala::Core::Table::Copy(), Impala::Core::Array::DivVal(), Impala::Core::Array::Exp(), Impala::CmdOptions::GetArg(), GetAverage(), Impala::CmdOptions::GetBool(), Impala::Core::Matrix::VirtualMatrixFactory::GetInstance(), Impala::CmdOptions::GetInt(), Impala::CmdOptions::GetString(), Impala::Process::MemoryInfo::GetUsageString(), ILOG_INFO, ILOG_VAR, Impala::Core::Database::MakeRawDataSet(), Impala::Util::TimeStats::MeasureFirst(), Impala::Util::TimeStats::MeasureFromSub(), Impala::Util::TimeStats::MeasureLast(), Impala::Util::TimeStats::MeasureNext(), Impala::Core::Array::MulVal(), OpenFeatureTable(), Impala::Core::Array::Set(), and Impala::Core::Table::Table::Size(). Referenced by MainPrecompute(). 00219 { 00220 ILOG_VAR(Impala.Application.Precompute.Precompute); 00221 RawDataSet* thisSet = Core::Database::MakeRawDataSet(options.GetArg(0)); 00222 if (!thisSet) 00223 { 00224 ILOG_INFO_HEADNODE("Failed to open thisSet " << options.GetArg(0)); 00225 return 1; 00226 } 00227 00228 String develSetName = options.GetArg(1); 00229 ILOG_INFO_HEADNODE("DevelSetName = [" << develSetName << "]"); 00230 RawDataSet* develSet = 0; 00231 if (!develSetName.empty()) 00232 { 00233 develSet = Core::Database::MakeRawDataSet(develSetName); 00234 if (!develSet) 00235 { 00236 ILOG_INFO_HEADNODE("Failed to open develSet " << develSetName); 00237 return 1; 00238 } 00239 } 00240 String model = options.GetArg(2); 00241 String kernel = options.GetArg(3); 00242 String indexCat = options.GetString("featureIndexCat"); 00243 KernelMatrixLocator daLoc(thisSet->GetLocator(), true, "", 00244 develSetName, model, kernel, ""); 00245 daLoc.SetFeatureIndexCat(indexCat); 00246 if (!options.GetBool("override") && 00247 DistributedAccessRepository().Exists(daLoc)) 00248 { 00249 ILOG_INFO("Kernel already exists."); 00250 return 0; 00251 } 00252 00253 int startFeature = options.GetInt("startFeature"); 00254 DistributedAccess* da = 0; 00255 if (startFeature == 0) 00256 { 00257 da = new DistributedAccess(); 00258 if (!da->Valid(true, false)) 00259 return 1; 00260 00261 ILOG_INFO_HEADNODE("Using a " << da->GetRowPartCount() << "x" << 00262 da->GetColumnPartCount() << "grid"); 00263 if (!CheckFeatures(options, thisSet, da)) 00264 return 1; 00265 } 00266 else 00267 { 00268 daLoc.SetIncrement(startFeature); 00269 daLoc.SetStartNode(0); 00270 daLoc.SetNodeCount(Link::Mpi::NrProcs()); 00271 da = DistributedAccessRepository().Get(daLoc); 00272 } 00273 00274 00275 if (develSet) 00276 { 00277 KernelMatrixLocator devLoc(develSet->GetLocator(), true, "", "", 00278 model, kernel, ""); 00279 devLoc.SetFeatureIndexCat(indexCat); 00280 devLoc.SetDoParts(0); 00281 devLoc.SetStartNode(0); 00282 devLoc.SetNodeCount(Link::Mpi::NrProcs()); 00283 DistributedAccess* devDA = DistributedAccessRepository().Get(devLoc); 00284 for (int i = 0; i < devDA->GetNrFeatures(); i++) 00285 { 00286 double average = devDA->GetAverage(i); 00287 ILOG_INFO_HEADNODE("average " << i << " " << average); 00288 da->SetAverage(i, average); 00289 } 00290 // Could do some more checking here... 00291 delete devDA; 00292 } 00293 else 00294 { 00295 if (model == "chi2") 00296 da->SetHasOwnAverages(true); 00297 } 00298 00299 String distanceFunction = options.GetString("distanceFunction"); 00300 int numberFeatures = options.GetInt("numberFeatures"); 00301 if (numberFeatures == -1) 00302 numberFeatures = da->GetNrFeatures() - startFeature; 00303 if (startFeature + numberFeatures >= da->GetNrFeatures()) 00304 numberFeatures = da->GetNrFeatures() - startFeature; 00305 Matrix::Mat* accumulator = 0; 00306 if (startFeature != 0) 00307 accumulator = da->StealPart(); 00308 double totalweight = 0; 00309 for (int i=0 ; i<startFeature ; i++) 00310 totalweight += da->GetWeight(i); 00311 Util::TimeStats statsCompute; 00312 statsCompute.AddGroup("read features"); 00313 statsCompute.AddGroup("compute"); 00314 Util::TimeStats statsOverall; 00315 statsOverall.AddGroupsFromSub(&statsCompute); 00316 statsOverall.AddGroup("final exp"); 00317 statsOverall.AddGroup("write"); 00318 statsOverall.MeasureFirst(); 00319 for (int i=startFeature ; i<startFeature+numberFeatures ; i++) 00320 { 00321 statsCompute.MeasureFirst(); 00322 // open feature tables 00323 double weight = da->GetWeight(i); 00324 Feature::FeatureDefinition fDef(da->GetFeature(i)); 00325 Feature::FeatureTable* f2 = OpenFeatureTable(fDef, indexCat, thisSet); 00326 Feature::FeatureTable* f1 = f2; 00327 if (develSet) 00328 f1 = OpenFeatureTable(fDef, indexCat, develSet); 00329 00330 if ((Link::Mpi::MyId() == 0) && (i == 0)) 00331 { 00332 Table::Copy(da->GetColumnQuids(), f1); 00333 da->SetColumns(f1->Size()); 00334 Table::Copy(da->GetRowQuids(), f2); 00335 da->SetRows(f2->Size()); 00336 } 00337 00338 statsCompute.MeasureNext(); 00339 // compute kernel distances between features 00340 Matrix::Mat *distanceMatrix = ComputeMatrix(da, f1, f2, 00341 distanceFunction); 00342 // f1 & f2 are deleted by ComputeMatrix 00343 00344 // if this is the 'learn' kernel matrix we compute the averages and 00345 // communicate between nodes to make sure everyone has access to them 00346 // otherwise, the averages are already loaded into the vector 00347 // Note : hik kernel does not use averages 00348 if (da->GetHasOwnAverages()) 00349 { 00350 if (distanceFunction == "dot") 00351 { 00352 da->SetAverage(i, 0.0); 00353 } 00354 else 00355 { 00356 double average = GetAverage(distanceMatrix); 00357 da->SetAverage(i, average); 00358 } 00359 } 00360 00361 // accumulate; this is the part inside the 'exp' in the kernel function 00362 MulVal(distanceMatrix, distanceMatrix, weight); 00363 if (distanceFunction == "chi2") 00364 { 00365 DivVal(distanceMatrix, distanceMatrix, -da->GetAverage(i)); 00366 } 00367 if (accumulator == 0) 00368 Set(accumulator, distanceMatrix); 00369 else 00370 Add(accumulator, accumulator, distanceMatrix); 00371 delete distanceMatrix; 00372 00373 totalweight += weight; 00374 statsCompute.MeasureLast(); 00375 ILOG_INFO_HEADNODE("Compute " << statsCompute.AsString()); 00376 ILOG_INFO_HEADNODE("Memory " << Process::MemoryInfo::GetUsageString()); 00377 } 00378 statsOverall.MeasureFromSub(&statsCompute); 00379 00380 ILOG_INFO_HEADNODE("finalising..."); 00381 if (startFeature + numberFeatures == da->GetNrFeatures()) 00382 { 00383 // only thing to do is weigh and exp 00384 DivVal(accumulator, accumulator, totalweight); 00385 if (distanceFunction == "chi2") 00386 { 00387 Exp(accumulator, accumulator); 00388 } 00389 daLoc.SetIncrement(0); 00390 daLoc.SetWriteReal32(true); 00391 } 00392 else 00393 { 00394 daLoc.SetIncrement(startFeature + numberFeatures); 00395 } 00396 00397 statsOverall.MeasureNext(); 00398 typedef Matrix::VirtualMatrixFactory VirtualMatrixFactory; 00399 VirtualMatrixFactory& vmf = VirtualMatrixFactory::GetInstance(); 00400 Matrix::VirtualMatrix* vm = vmf.ConstructMemory(accumulator); 00401 da->AddPart(vm); 00402 DistributedAccessRepository().Add(daLoc, da); 00403 statsOverall.MeasureLast(); 00404 ILOG_INFO_HEADNODE("Overall " << statsOverall.AsString()); 00405 delete da; 00406 return 0; 00407 }
Here is the call graph for this function: ![]()
|