00001 #ifndef Impala_Core_DataFactory_h
00002 #define Impala_Core_DataFactory_h
00003
00004 #include "Util/StlHelpers.h"
00005 #include "Core/Table/Read.h"
00006 #include "Core/Table/Write.h"
00007 #include "Core/Table/AnnotationTable.h"
00008 #include "Core/VideoSet/MakeVideoSet.h"
00009 #include "Core/ImageSet/MakeImageSet.h"
00010 #include "Core/VideoSet/FilterRkf.h"
00011 #include "Core/Feature/FeatureDefinition.h"
00012 #include "Core/Feature/FeatureTable.h"
00013 #include "Core/Matrix/DistributedAccess.h"
00014 #include "Core/Training/Classifier.h"
00015 #include "Core/Feature/RandomTree.h"
00016
00017 namespace Impala
00018 {
00019 namespace Core
00020 {
00021
00029 class DataFactory
00030 {
00031 public:
00033 DataFactory(Database::RawDataSet* set, Util::Database* db, int quidClass,
00034 String conceptSet, String modelType,
00035 Feature::FeatureDefinition fdef,
00036 String kernelMatrixName, int conceptStart, int conceptNumber)
00037 {
00038 mDataSet = set;
00039 mDb = db;
00040 mQuidClass = quidClass;
00041 mConceptSet = conceptSet;
00042 mModelType = modelType;
00043 mFeatureDef = fdef;
00044 mKernelMatrixName = kernelMatrixName;
00045 mConceptStart = conceptStart;
00046 mConceptNumber = conceptNumber;
00047 mDistributedAccess = 0;
00048 }
00049
00050 virtual
00051 ~DataFactory()
00052 {
00053 if(mDistributedAccess)
00054 {
00055 mDistributedAccess->Unsubscribe();
00056 delete mDistributedAccess;
00057 }
00058 }
00059
00060 Array::Array2dVec3UInt8*
00061 MakeImage(Quid q)
00062 {
00063 QuidObj qo(q);
00064 if(qo.Class() != mQuidClass)
00065 {
00066 ILOG_ERROR("MakeImage with quid of quidClass "<< qo.Class());
00067 return 0;
00068 }
00069 if(mQuidClass == QUID_CLASS_IMAGE)
00070 {
00071 return static_cast<ImageSet::ImageSet*>(mDataSet)->GetImage(qo.Id());
00072 }
00073 else
00074 {
00075 ILOG_WARNING("MakeImage not tested for videosets");
00076 Stream::RgbDataSrc* src =
00077 static_cast<VideoSet::VideoSet*>(mDataSet)->GetVideo(qo.Object());
00078 src->GotoFrame(qo.Id());
00080 Array::Array2dVec3UInt8* im = Array::ArrayCreate<Array::Array2dVec3UInt8>
00081 (src->FrameWidth(), src->FrameHeight(), 0, 0, src->DataPtr(), true);
00082 delete src;
00083 return im;
00084 }
00085 }
00086
00087 std::vector<String>
00088 MakeConceptList()
00089 {
00090 std::vector<String> conceptList;
00091 String conceptFile =
00092 mDataSet->GetFilePathAnnotation(mConceptSet, false, false);
00093 Util::DatabaseReadStrings(conceptList, conceptFile, mDb);
00094
00095
00096
00097
00098
00099
00100
00101
00102
00103
00104
00105
00106
00107
00108
00109
00110
00111
00112
00113
00114 Util::SubSelectInPlace(&conceptList, mConceptStart, mConceptNumber);
00115 return conceptList;
00116 }
00117
00118 int
00119 FeatureTableCount() const
00120 {
00121 if(mQuidClass == QUID_CLASS_IMAGE)
00122 return mDataSet->NrDirs();
00123 else
00124 return mDataSet->NrFiles();
00125 }
00126
00128 Feature::FeatureTable*
00129 MakeFeatureTable(int videoId)
00130 {
00131 String path;
00132 if(mQuidClass == QUID_CLASS_IMAGE)
00133 path = mDataSet->GetFilePathFeatureData
00134 (mFeatureDef, videoId, false, 0, false, false);
00135 else
00136 path = mDataSet->GetFilePathFeatureData
00137 ("Keyframes", mFeatureDef, videoId, false, 0, false, false);
00138 if(path.empty())
00139 {
00140 return 0;
00141 }
00142
00143 ILOG_DEBUG_ONCE("read features(" << videoId << ")");
00144 Feature::FeatureTable* features =
00145 Feature::FeatureTable::MakeFromFile(mFeatureDef, path, mDb);
00146 ILOG_DEBUG_ONCE("done read features: "<< features->Size());
00147 return features;
00148 }
00149
00150 Table::QuidTable*
00151 MakeAllFeatureQuids()
00152 {
00153 Table::QuidTable * quids = new Table::QuidTable();
00154
00155 for(int i=0 ; i<FeatureTableCount() ; ++i)
00156 {
00157 Feature::FeatureTable* f = MakeFeatureTable(i);
00158
00159 quids->Append(f->GetQuidTable());
00160 delete f;
00161 }
00162 ILOG_DEBUG("done loop: "<< quids->Size() <<" quids retrieved");
00163 return quids;
00164 }
00165
00170 Table::QuidTable*
00171 MakeTrainFeatureQuids()
00172 {
00173 Table::QuidTable * quids = MakeAllFeatureQuids();
00174 ILOG_DEBUG("MakeTrainFeatureQuids(): got "<< quids->Size() <<" quids");
00175 if(quids->Size() > 0)
00176 {
00177 int oldSetId = QuidSet(quids->Get1(0));
00178 quids->SetQuidSet(mDataSet->GetSetId());
00179 if(mQuidClass != QUID_CLASS_IMAGE)
00180 VideoSet::FilterRkf(quids, static_cast<VideoSet::VideoSet*>(mDataSet));
00181 quids->SetQuidSet(oldSetId);
00182 }
00183 return quids;
00184 }
00185
00186 Table::AnnotationTable*
00187 MakeAnnotation(String concept)
00188 {
00189
00190 String filename = mDataSet->GetFilePathAnnotation
00191 (mQuidClass, mConceptSet, concept + ".tab", false, false);
00192 if (filename.empty())
00193 {
00194 ILOG_ERROR_ONCE("could not find the annotation of " << concept);
00195 return 0;
00196 }
00197 Table::AnnotationTable* annotation = new Table::AnnotationTable;
00198 Read(annotation, filename, mDb);
00199 ILOG_DEBUG_ONCE("Annotations size: " << annotation->Size() );
00200 ILOG_DEBUG_ONCE("nr pos = "<< annotation->GetNrPositive() <<
00201 "nr neg = "<< annotation->GetNrNegative());
00203
00204
00205
00206
00207
00208
00209
00210
00211
00212
00213
00214
00215 return annotation;
00216 }
00217
00218 bool
00219 CanMakeBestFile(String concept)
00220 {
00221 String filename = mDataSet->GetFilePathConceptModel
00222 (mConceptSet, mModelType, mFeatureDef, concept+".best",true,true);
00223 if(filename == "")
00224 return false;
00225 return true;
00226 }
00227
00228 bool
00229 CanMakeConceptModel(String concept)
00230 {
00231 String filename = mDataSet->GetFilePathConceptModel
00232 (mConceptSet, mModelType, mFeatureDef, concept+".model",true,true);
00233 if(filename == "")
00234 return false;
00235 return true;
00236 }
00237
00238 Util::IOBuffer*
00239 MakeBestFile(String concept, bool toWrite)
00240 {
00241 String name = concept+".best";
00242 String filename = mDataSet->GetFilePathConceptModel
00243 (mConceptSet, mModelType, mFeatureDef, name, toWrite, true);
00244 if(filename == "")
00245 return 0;
00246 ILOG_INFO("MakeBestFile: opening this filename: "<<filename);
00247 Util::IOBuffer* buf = mDb->GetIOBuffer(filename, !toWrite, false, "tmp", 0, true);
00248
00249 return buf;
00250 }
00251
00252 Util::IOBuffer*
00253 MakeBestFileWrite(String concept)
00254 {
00255 String filename = mDataSet->GetFilePathConceptModel
00256 (mConceptSet, mModelType, mFeatureDef, concept+".best",
00257 true, false);
00258 if(filename == "")
00259 return 0;
00260 Util::IOBuffer* buf = mDb->GetIOBuffer(filename, false, false, "tmp", 0, true);
00261
00262 return buf;
00263 }
00264
00265 Util::IOBuffer*
00266 MakeAllParametersFile(String concept)
00267 {
00268 String filename = mDataSet->GetFilePathConceptModel
00269 (mConceptSet, mModelType, mFeatureDef, concept+".allparameters",
00270 true, false);
00271 if(filename == "")
00272 return 0;
00273 Util::IOBuffer* buf = mDb->GetIOBuffer(filename, false, false, "tmp", 0, true);
00274
00275 return buf;
00276 }
00277
00278 Database::RawDataSet*
00279 GetDataSet()
00280 {
00281 return mDataSet;
00282 }
00283
00287 Matrix::DistributedAccess*
00288 GetDistributedAccess()
00289 {
00290 if(mDistributedAccess == 0)
00291 {
00292 if(Link::Mpi::NrProcs() == 1)
00293 mDistributedAccess = new Matrix::DistributedAccess
00294 (mKernelMatrixName, mDataSet, 0, 0, 1);
00295 else
00296 {
00297 mDistributedAccess = new Matrix::DistributedAccess
00298 (mKernelMatrixName, mDataSet, 0, 1, Link::Mpi::NrProcs() - 1);
00299 if(Link::Mpi::MyId() == 0)
00300 mDistributedAccess->Subscribe();
00301 }
00302 }
00303 return mDistributedAccess;
00304 }
00305
00306 void
00307 ServeDistributedAccess()
00308 {
00309 Matrix::DistributedAccess* da = GetDistributedAccess();
00310 da->StartEventLoop();
00311 }
00312
00319 void
00320 WriteConceptModel(String concept, Training::Classifier* c)
00321 {
00322 String modelname = mDataSet->GetFilePathConceptModel
00323 (mConceptSet, mModelType, mFeatureDef, concept+".model", true, false);
00324 if(modelname == "")
00325 {
00326 ILOG_ERROR("Could not write " << concept << ".model");
00327 }
00328 else
00329 c->SaveModel(modelname, mDb);
00330 }
00331
00332 void
00333 WriteScoreOnSelf(String concept, double score)
00334 {
00335 String filename = mDataSet->GetFilePathConceptModel
00336 (mConceptSet, mModelType, mFeatureDef, concept+".ScoreOnSelf",
00337 true, false);
00338 if(filename.empty())
00339 {
00340 ILOG_ERROR("Could not write " << concept << ".ScoreOnSelf");
00341 throw;
00342 }
00343 else
00344 {
00345 Util::IOBuffer* buf = mDb->GetIOBuffer(filename, false,false,"tmp", 0, true);
00346 buf->Puts(MakeString(score));
00347 delete buf;
00348 }
00349 }
00350
00351 bool
00352 CanWriteCodebook()
00353 {
00354 return !mDataSet->GetFilePathCodebook(mFeatureDef, true, true).empty();
00355 }
00356
00357 void
00358 WriteRandomForest(Feature::RandomTreeTable* forest)
00359 {
00360 String filename = mDataSet->GetFilePathRandomForest
00361 (mFeatureDef, true, false);
00362 if(filename.empty())
00363 {
00364 ILOG_ERROR("Could not write random forest for " <<
00365 mFeatureDef.AsString());
00366 throw;
00367 }
00368 else
00369 {
00370 Util::IOBuffer* buf = mDb->GetIOBuffer(filename, false, false,"tmp", 0, true);
00371 Write(forest, buf, true);
00372 delete buf;
00373 }
00374 }
00375
00376 void
00377 WriteCodebook(Feature::FeatureTable* forest)
00378 {
00379 String filename = mDataSet->GetFilePathCodebook
00380 (mFeatureDef, true, false);
00381 if(filename.empty())
00382 {
00383 ILOG_ERROR("Could not write random forest for " <<
00384 mFeatureDef.AsString());
00385 throw;
00386 }
00387 else
00388 {
00389 Util::IOBuffer* buf = mDb->GetIOBuffer(filename, false, false,"tmp", 0, true);
00390 Write(forest, buf, true);
00391 delete buf;
00392 }
00393 }
00394
00395 Feature::FeatureDefinition
00396 GetFeatureDefinition()
00397 {
00398 return mFeatureDef;
00399 }
00400
00401 private:
00402 void
00403 GetPartialTask(CmdOptions &options, std::vector<String> conceptList,
00404 int &start, int &end)
00405 {
00406 ILOG_VAR(Samples.mainCrossValidate.GetPartialTask);
00407 end = 0;
00408 start = options.GetInt("start");
00409 int number = options.GetInt("number");
00410 if (!options.GetString("concept").empty())
00411 {
00412 String con = options.GetString("concept");
00413 int idx = -1;
00414 for (int i=0 ; i<conceptList.size() ; i++)
00415 if (conceptList[i] == con)
00416 idx = i;
00417 if (idx == -1)
00418 {
00419 ILOG_ERROR("Unable to find concept " << con << " in list");
00420 start = end = 0;
00421 return;
00422 }
00423 start = idx;
00424 number = 1;
00425 }
00426 end = (number == -1) ? conceptList.size() : start + number;
00427 if (end > conceptList.size())
00428 end = conceptList.size();
00429 }
00430
00431
00432 Database::RawDataSet* mDataSet;
00433 Util::Database* mDb;
00434 int mQuidClass;
00435 int mConceptStart;
00436 int mConceptNumber;
00437 String mConceptSet;
00438 String mModelType;
00439 Feature::FeatureDefinition mFeatureDef;
00440 String mKernelMatrixName;
00441 Matrix::DistributedAccess* mDistributedAccess;
00442
00443 ILOG_VAR_DECL;
00444 };
00445
00446 ILOG_VAR_INIT(DataFactory, Impala.Core);
00447
00448 }
00449 }
00450
00451 #endif //Impala_Core_DataFactory_h