Home || Visual Search || Applications || Architecture || Important Messages || OGL || Src

mainConceptSet.cpp

Go to the documentation of this file.
00001 #include "Persistency/FikSvmRepository.h"
00002 #include "Persistency/KeywordListRepository.h"
00003 #include "Persistency/DistributedAccessRepository.h"
00004 #include "Persistency/SvmRepository.h"
00005 #include "Persistency/FeatureTableRepository.h"
00006 #include "Core/Database/MakeRawDataSet.h"
00007 #include "Core/Matrix/SetColumn.h"
00008 
00009 namespace Impala
00010 {
00011 namespace Application
00012 {
00013 namespace ConceptSet
00014 {
00015 
00016 
00017 using namespace Impala::Core::Database;
00018 using namespace Impala::Core::Table;
00019 using namespace Impala::Core::Matrix;
00020 using namespace Impala::Core::Feature;
00021 using namespace Impala::Core::Training;
00022 using namespace Impala::Persistency;
00023 
00024 
00025 void
00026 DoApproxFikModel(RawDataSet* dataSet, CString conceptSet, CString concept,
00027                  CmdOptions& options, int curArg)
00028 {
00029     ILOG_VAR(Impala.Application.ConceptSet.DoApproxFikModel);
00030     if (options.GetNrArg() < curArg + 3)
00031     {
00032         ILOG_ERROR("Need more arguments");
00033         return;
00034     }
00035     String model = options.GetArg(curArg++);
00036     String kernel = options.GetArg(curArg++);
00037     int nrBins = atol(options.GetArg(curArg++));
00038     String indexCat = options.GetString("featureIndexCat");
00039 
00040     String fikModel = model + "-approx-" + MakeString(nrBins);
00041     ModelLocator fikLoc(dataSet->GetLocator(), conceptSet, fikModel, kernel,
00042                         concept);
00043     if (FikSvmRepository().Exists(fikLoc))
00044     {
00045         ILOG_INFO("Model for " << concept << " already exists, skipping.");
00046         return;
00047     }
00048 
00049     KernelMatrixLocator daLoc(dataSet->GetLocator(), true, "", "", model,
00050                               kernel, "");
00051     daLoc.SetStartNode(0);
00052     daLoc.SetNodeCount(Link::Mpi::NrProcs());
00053     daLoc.SetDoParts(0);
00054     daLoc.SetFeatureIndexCat(indexCat);
00055     DistributedAccess* da = DistributedAccessRepository().Get(daLoc);
00056     ModelLocator mLoc(dataSet->GetLocator(), conceptSet, model, kernel, concept);
00057     Svm* svm = SvmRepository().Get(mLoc);
00058     const svm_model* svmModel = svm->GetModel();
00059     int nrSV = svmModel->l;
00060     double rho = svmModel->rho[0];
00061     ILOG_INFO("nr_sv = " << nrSV);
00062     double* coeffs = svmModel->sv_coef[0];
00063     QuidTable* kernelQuids = da->GetColumnQuids();
00064     QuidTable svQuids;
00065     for (int i=0 ; i<nrSV ; i++)
00066     {
00067         svm_node* node = svmModel->SV[i];
00068         int idx = node->value - 1;
00069         Quid q = kernelQuids->Get1(idx);
00070         svQuids.Add(q);
00071     }
00072 
00073     Mat* mat = 0;
00074     int nrFeatures = da->GetNrFeatures();
00075     WeightedFeatureList features;
00076     for (int i=0 ; i<nrFeatures ; i++)
00077         features.Add(da->GetFeature(i), da->GetWeight(i));
00078     Real64 probA = svmModel->probA[0];
00079     Real64 probB = svmModel->probB[0];
00080     int probIndex = (svmModel->label[1] == 1) ? 1 : 0;
00081     FikSvm fikSvm(features, nrBins, nrSV, rho, probA, probB, probIndex);
00082     for (int i=0 ; i<nrFeatures ; i++)
00083     {
00084         ILOG_INFO("feature = " << da->GetFeature(i));
00085         FeatureLocator fLoc(dataSet->GetLocator(), false, true, indexCat,
00086                             da->GetFeature(i), "");
00087         FeatureTable* tab = FeatureTableRepository().Get(fLoc);
00088         int featDim = tab->GetFeatureVectorLength();
00089         if ((mat == 0) || (mat->CH() != featDim))
00090         {
00091             ILOG_INFO("featDim = " << featDim);
00092             if (mat)
00093                 delete mat;
00094             mat = MatCreate<Mat>(featDim, nrSV);
00095         }
00096         for (int sv=0 ; sv<nrSV ; sv++)
00097         {
00098             Quid q = svQuids.Get1(sv);
00099             SetColumn(mat, sv, tab->FindFeature(q));
00100         }
00101         fikSvm.CreateSvm(featDim, coeffs, mat->CPB());
00102         //fikSvm.Dump(i, 3);
00103         delete tab;
00104     }
00105     ILOG_INFO("Saving in " << fikLoc);
00106     FikSvmRepository().Add(fikLoc, &fikSvm);
00107     delete mat;
00108     delete svm;
00109     delete da;
00110 }
00111 
00112 int
00113 mainConceptSet(int argc, char* argv[])
00114 {
00115     Link::Mpi::Init(&argc, &argv);
00116 
00117     CmdOptions& options = CmdOptions::GetInstance();
00118     options.Initialise(false, false, true);
00119     options.AddOption(0, "start", "index of concept to start with", "0");
00120     options.AddOption(0, "number", "number of concepts", "-1");
00121     options.AddOption(0, "concept", "name", "");
00122     options.AddOption(0, "featureIndexCat", "name", "");
00123 
00124     if (! options.ParseArgs(argc, argv, "cmd dataSet conceptSet [cmdArgs]", 3))
00125     {
00126         Link::Mpi::Finalize();
00127         return 1;
00128     }
00129 
00130     ILOG_VAR(Impala.Application.ConceptSet.mainConceptSet);
00131 
00132     int curArg = 0;
00133     String cmd = options.GetArg(curArg++);
00134     String dataSetName = options.GetArg(curArg++);
00135     String conceptSet = options.GetArg(curArg++);
00136 
00137     RawDataSet* dataSet = MakeRawDataSet(dataSetName);
00138     KeywordListLocator loc(dataSet->GetLocator(), conceptSet);
00139     KeywordList concepts = *(KeywordListRepository().Get(loc));
00140 
00141 
00142     int start = options.GetInt("start");
00143     int number = options.GetInt("number");
00144     String concept = options.GetString("concept");
00145     if (!concept.empty())
00146     {
00147         for (int i=0 ; i<concepts.size() ; i++)
00148         {
00149             if (concepts[i] == concept)
00150             {
00151                 start = i;
00152                 number = 1;
00153             }
00154         }
00155     }
00156     if (number == -1)
00157         number = concepts.size();
00158     if (start + number >= concepts.size())
00159         number = concepts.size() - start;
00160     int last = start + number;
00161     int numProcs = Link::Mpi::NrProcs();
00162     int myId = Link::Mpi::MyId();
00163     int taskSize = number / numProcs;
00164     int restSize = number % numProcs;
00165     int cur = start;
00166     for (int i=0 ; i<=myId ; i++)
00167     {
00168         start = cur;
00169         number = taskSize;
00170         if (--restSize >= 0)
00171             number++;
00172         if (start + number > last)
00173             number = last - start;
00174         cur += number;
00175     }
00176     ILOG_INFO("Process " << myId << " of " << numProcs << " is doing concepts "
00177               << start << " till " << start + number);
00178 
00179     ILOG_INFO_HEADNODE("conceptset command : " << cmd);
00180     for (int i=start ; i<start+number ; i++)
00181     {
00182         ILOG_INFO("doing concept " << concepts[i]);
00183         ILOG_NDC_PUSH(concepts[i]);
00184         if (cmd == "approxfikmodel")
00185             DoApproxFikModel(dataSet, conceptSet, concepts[i], options, curArg);
00186         else
00187         {
00188             ILOG_ERROR("Unknown command : " << cmd);
00189             ILOG_NDC_POP;
00190             break;
00191         }
00192         ILOG_NDC_POP;
00193     }
00194 
00195     int nrOfErrors = ILOG_ERROR_COUNT;
00196     ILOG_DEBUG("nr of errors collected = " << nrOfErrors);
00197 
00198     nrOfErrors = Link::Mpi::ReduceSum(nrOfErrors);
00199     ILOG_INFO_HEADNODE("Root: total nr error = " << nrOfErrors);
00200     Link::Mpi::Finalize();
00201 
00202     return nrOfErrors;
00203 }
00204 
00205 } // namespace ConceptSet
00206 } // namespace Application
00207 } // namespace Impala
00208 
00209 int
00210 main(int argc, char* argv[])
00211 {
00212 
00213 #ifdef NO_DIALOGONCRASH
00214 #ifdef WIN32
00215     DWORD oldMode = ::SetErrorMode(SEM_NOGPFAULTERRORBOX);
00216     ::SetErrorMode(oldMode | SEM_NOGPFAULTERRORBOX);
00217 #endif
00218 #endif
00219 
00220     return Impala::Application::ConceptSet::mainConceptSet(argc, argv);
00221 }

Generated on Thu Jan 13 09:03:41 2011 for ImpalaSrc by  doxygen 1.5.1