00001 #include "Persistency/FikSvmRepository.h"
00002 #include "Persistency/KeywordListRepository.h"
00003 #include "Persistency/DistributedAccessRepository.h"
00004 #include "Persistency/SvmRepository.h"
00005 #include "Persistency/FeatureTableRepository.h"
00006 #include "Core/Database/MakeRawDataSet.h"
00007 #include "Core/Matrix/SetColumn.h"
00008
00009 namespace Impala
00010 {
00011 namespace Application
00012 {
00013 namespace ConceptSet
00014 {
00015
00016
00017 using namespace Impala::Core::Database;
00018 using namespace Impala::Core::Table;
00019 using namespace Impala::Core::Matrix;
00020 using namespace Impala::Core::Feature;
00021 using namespace Impala::Core::Training;
00022 using namespace Impala::Persistency;
00023
00024
00025 void
00026 DoApproxFikModel(RawDataSet* dataSet, CString conceptSet, CString concept,
00027 CmdOptions& options, int curArg)
00028 {
00029 ILOG_VAR(Impala.Application.ConceptSet.DoApproxFikModel);
00030 if (options.GetNrArg() < curArg + 3)
00031 {
00032 ILOG_ERROR("Need more arguments");
00033 return;
00034 }
00035 String model = options.GetArg(curArg++);
00036 String kernel = options.GetArg(curArg++);
00037 int nrBins = atol(options.GetArg(curArg++));
00038 String indexCat = options.GetString("featureIndexCat");
00039
00040 String fikModel = model + "-approx-" + MakeString(nrBins);
00041 ModelLocator fikLoc(dataSet->GetLocator(), conceptSet, fikModel, kernel,
00042 concept);
00043 if (FikSvmRepository().Exists(fikLoc))
00044 {
00045 ILOG_INFO("Model for " << concept << " already exists, skipping.");
00046 return;
00047 }
00048
00049 KernelMatrixLocator daLoc(dataSet->GetLocator(), true, "", "", model,
00050 kernel, "");
00051 daLoc.SetStartNode(0);
00052 daLoc.SetNodeCount(Link::Mpi::NrProcs());
00053 daLoc.SetDoParts(0);
00054 daLoc.SetFeatureIndexCat(indexCat);
00055 DistributedAccess* da = DistributedAccessRepository().Get(daLoc);
00056 ModelLocator mLoc(dataSet->GetLocator(), conceptSet, model, kernel, concept);
00057 Svm* svm = SvmRepository().Get(mLoc);
00058 const svm_model* svmModel = svm->GetModel();
00059 int nrSV = svmModel->l;
00060 double rho = svmModel->rho[0];
00061 ILOG_INFO("nr_sv = " << nrSV);
00062 double* coeffs = svmModel->sv_coef[0];
00063 QuidTable* kernelQuids = da->GetColumnQuids();
00064 QuidTable svQuids;
00065 for (int i=0 ; i<nrSV ; i++)
00066 {
00067 svm_node* node = svmModel->SV[i];
00068 int idx = node->value - 1;
00069 Quid q = kernelQuids->Get1(idx);
00070 svQuids.Add(q);
00071 }
00072
00073 Mat* mat = 0;
00074 int nrFeatures = da->GetNrFeatures();
00075 WeightedFeatureList features;
00076 for (int i=0 ; i<nrFeatures ; i++)
00077 features.Add(da->GetFeature(i), da->GetWeight(i));
00078 Real64 probA = svmModel->probA[0];
00079 Real64 probB = svmModel->probB[0];
00080 int probIndex = (svmModel->label[1] == 1) ? 1 : 0;
00081 FikSvm fikSvm(features, nrBins, nrSV, rho, probA, probB, probIndex);
00082 for (int i=0 ; i<nrFeatures ; i++)
00083 {
00084 ILOG_INFO("feature = " << da->GetFeature(i));
00085 FeatureLocator fLoc(dataSet->GetLocator(), false, true, indexCat,
00086 da->GetFeature(i), "");
00087 FeatureTable* tab = FeatureTableRepository().Get(fLoc);
00088 int featDim = tab->GetFeatureVectorLength();
00089 if ((mat == 0) || (mat->CH() != featDim))
00090 {
00091 ILOG_INFO("featDim = " << featDim);
00092 if (mat)
00093 delete mat;
00094 mat = MatCreate<Mat>(featDim, nrSV);
00095 }
00096 for (int sv=0 ; sv<nrSV ; sv++)
00097 {
00098 Quid q = svQuids.Get1(sv);
00099 SetColumn(mat, sv, tab->FindFeature(q));
00100 }
00101 fikSvm.CreateSvm(featDim, coeffs, mat->CPB());
00102
00103 delete tab;
00104 }
00105 ILOG_INFO("Saving in " << fikLoc);
00106 FikSvmRepository().Add(fikLoc, &fikSvm);
00107 delete mat;
00108 delete svm;
00109 delete da;
00110 }
00111
00112 int
00113 mainConceptSet(int argc, char* argv[])
00114 {
00115 Link::Mpi::Init(&argc, &argv);
00116
00117 CmdOptions& options = CmdOptions::GetInstance();
00118 options.Initialise(false, false, true);
00119 options.AddOption(0, "start", "index of concept to start with", "0");
00120 options.AddOption(0, "number", "number of concepts", "-1");
00121 options.AddOption(0, "concept", "name", "");
00122 options.AddOption(0, "featureIndexCat", "name", "");
00123
00124 if (! options.ParseArgs(argc, argv, "cmd dataSet conceptSet [cmdArgs]", 3))
00125 {
00126 Link::Mpi::Finalize();
00127 return 1;
00128 }
00129
00130 ILOG_VAR(Impala.Application.ConceptSet.mainConceptSet);
00131
00132 int curArg = 0;
00133 String cmd = options.GetArg(curArg++);
00134 String dataSetName = options.GetArg(curArg++);
00135 String conceptSet = options.GetArg(curArg++);
00136
00137 RawDataSet* dataSet = MakeRawDataSet(dataSetName);
00138 KeywordListLocator loc(dataSet->GetLocator(), conceptSet);
00139 KeywordList concepts = *(KeywordListRepository().Get(loc));
00140
00141
00142 int start = options.GetInt("start");
00143 int number = options.GetInt("number");
00144 String concept = options.GetString("concept");
00145 if (!concept.empty())
00146 {
00147 for (int i=0 ; i<concepts.size() ; i++)
00148 {
00149 if (concepts[i] == concept)
00150 {
00151 start = i;
00152 number = 1;
00153 }
00154 }
00155 }
00156 if (number == -1)
00157 number = concepts.size();
00158 if (start + number >= concepts.size())
00159 number = concepts.size() - start;
00160 int last = start + number;
00161 int numProcs = Link::Mpi::NrProcs();
00162 int myId = Link::Mpi::MyId();
00163 int taskSize = number / numProcs;
00164 int restSize = number % numProcs;
00165 int cur = start;
00166 for (int i=0 ; i<=myId ; i++)
00167 {
00168 start = cur;
00169 number = taskSize;
00170 if (--restSize >= 0)
00171 number++;
00172 if (start + number > last)
00173 number = last - start;
00174 cur += number;
00175 }
00176 ILOG_INFO("Process " << myId << " of " << numProcs << " is doing concepts "
00177 << start << " till " << start + number);
00178
00179 ILOG_INFO_HEADNODE("conceptset command : " << cmd);
00180 for (int i=start ; i<start+number ; i++)
00181 {
00182 ILOG_INFO("doing concept " << concepts[i]);
00183 ILOG_NDC_PUSH(concepts[i]);
00184 if (cmd == "approxfikmodel")
00185 DoApproxFikModel(dataSet, conceptSet, concepts[i], options, curArg);
00186 else
00187 {
00188 ILOG_ERROR("Unknown command : " << cmd);
00189 ILOG_NDC_POP;
00190 break;
00191 }
00192 ILOG_NDC_POP;
00193 }
00194
00195 int nrOfErrors = ILOG_ERROR_COUNT;
00196 ILOG_DEBUG("nr of errors collected = " << nrOfErrors);
00197
00198 nrOfErrors = Link::Mpi::ReduceSum(nrOfErrors);
00199 ILOG_INFO_HEADNODE("Root: total nr error = " << nrOfErrors);
00200 Link::Mpi::Finalize();
00201
00202 return nrOfErrors;
00203 }
00204
00205 }
00206 }
00207 }
00208
00209 int
00210 main(int argc, char* argv[])
00211 {
00212
00213 #ifdef NO_DIALOGONCRASH
00214 #ifdef WIN32
00215 DWORD oldMode = ::SetErrorMode(SEM_NOGPFAULTERRORBOX);
00216 ::SetErrorMode(oldMode | SEM_NOGPFAULTERRORBOX);
00217 #endif
00218 #endif
00219
00220 return Impala::Application::ConceptSet::mainConceptSet(argc, argv);
00221 }