00001 #include "Persistency/FoldRepository.h"
00002 #include "Persistency/BestFileRepository.h"
00003 #include "Persistency/AllParameterFileRepository.h"
00004 #include "Link/Mpi/MpiFuncs.h"
00005 #include "Core/Training/Factory.h"
00006
00007
00008 #include "Link/Svm/LinkSvm.cpp"
00009
00010 namespace Impala
00011 {
00012 namespace Application
00013 {
00014
00015 using namespace Core;
00016 using namespace Core::Training;
00017 using namespace Persistency;
00018
00019
00020
00021
00022
00023 void
00024 TestFolds(Table::AnnotationTable* annotation, Database::RawDataSet* dataSet)
00025 {
00026 std::vector<Table::QuidTable*> folds = annotation->MakeEpisodeFolds(3, 0);
00027 for (int i=0 ; i<folds.size() ; ++i)
00028 {
00029 Table::QuidTable* fold = folds[i];
00030 Persistency::FoldLocator loc(dataSet->GetLocator(), QUID_CLASS_FRAME,
00031 "concepts.txt", annotation->GetLabel(),
00032 "episode_folds", i);
00033 Persistency::FoldRepository().Add(loc, fold);
00034
00035 delete fold;
00036 }
00037 }
00038
00039 void
00040 CrossValidate(Training::Factory* factory, bool testMode)
00041 {
00042 ILOG_VAR(Impala.Application.CrossValidate);
00043 std::vector<String> conceptList = factory->MakeConceptList();
00044 Util::PropertySet* properties = factory->GetProperties();
00045
00046 for (int i=0 ; i<conceptList.size(); ++i)
00047 {
00048 String concept = conceptList[i];
00049 properties->Add("concept", concept);
00050 ILOG_INFO_HEADNODE("starting on concept " << i << ": " << concept);
00051 ModelLocator modelLoc = factory->GetModelLocator();
00052 modelLoc.SetConcept(concept);
00053 if (BestFileRepository().Exists(modelLoc))
00054 {
00055 ILOG_INFO_HEADNODE("bestfile already exists: skipping...");
00056 continue;
00057 }
00058
00059 Table::AnnotationTable* annotation = factory->MakeAnnotation(concept);
00060 if (annotation == 0)
00061 {
00062 ILOG_INFO("no annotation found for " << concept << ", skipping...");
00063 continue;
00064 }
00065 if (testMode && (i == 0) && (Link::Mpi::MyId() == 0))
00066 {
00067 TestFolds(annotation, factory->GetDataSet());
00068 }
00069 ParameterSearcher* searcher = factory->MakeSearcher(annotation);
00070 Util::PropertySet results = searcher->Search();
00071 results.Add("value", searcher->GetBestScore());
00072
00073 if (Link::Mpi::MyId() == 0)
00074 {
00075 ILOG_INFO_HEADNODE("saving results for " << concept);
00076 BestFileRepository().Add(modelLoc, &results);
00077 std::vector<Util::PropertySet*> allP = searcher->GetAllScores();
00078 AllParameterFileRepository().Add(modelLoc, &allP);
00079 for (int i=0 ; i<allP.size() ; i++)
00080 delete allP[i];
00081 }
00082 delete searcher;
00083 delete annotation;
00084 }
00085 }
00086
00087 int
00088 mainCrossValidate(int argc, char** argv)
00089 {
00090 ILOG_VAR(Impala.Application.mainCrossValidate);
00091 Link::Mpi::Init(&argc, &argv);
00092 CmdOptions& options = CmdOptions::GetInstance();
00093 options.Initialise(false, false, true);
00094 options.AddOption(0, "start", "index of concept to start with", "0");
00095 options.AddOption(0, "number", "number of concepts", "-1");
00096 options.AddOption(0, "concept", "name", "");
00097 options.AddOption(0, "w1", "number or range", "[log-3:3/10]");
00098 options.AddOption(0, "w2", "number or range", "[log-3:3/10]");
00099 options.AddOption(0, "autoweight", "bool", "0");
00100 options.AddOption(0, "C", "number or range", "1");
00101 options.AddOption(0, "gamma", "number or range (-1 for 1/feat length)", "-1");
00102 options.AddOption('r', "repetitions", "number", "2");
00103 options.AddOption(0, "episode-constrained", "bool", "1");
00104 options.AddOption(0, "assume-shotid", "bool", "0");
00105 options.AddOption
00106 (0, "evaluator",
00107 "choose from {AP, BAP, AUC, P@N, R@N} where N is number (precision @ n)",
00108 "AP");
00109 options.AddOption('f', "folds", "number", "3");
00110 options.AddOption('m', "cache", "megabytes", "500");
00111 options.AddOption('p', "probability", "bool", "0");
00112 options.AddOption
00113 (0, "kernel",
00114 "string: [linear,poly,rbf,sigmoid,precomputed,hist,dist-precomputed]",
00115 "rbf");
00116 options.AddOption(0, "precompute-kernel", "string: [chi2]", "chi2");
00117 options.AddOption(0, "featureIndexCat", "name", "");
00118 options.AddOption(0, "maxVideoId", "index", "-1");
00119 options.AddOption(0, "maxPosPerVideo", "number", "-1");
00120 options.AddOption(0, "maxNegPerVideo", "number", "-1");
00121 options.AddOption(0, "restrictTestFoldSet", "setId", "-1");
00122 options.AddOption(0, "dumpFolds", "", "0");
00123 options.AddOption(0, "testMode", "", "0");
00124
00125 options.AddOption(0, "imCacheSize", "size", "1");
00126
00127 if (options.ParseArgs(argc, argv, "dataSet concepts model featureDef", 4))
00128 {
00129 Training::Factory factory(&options, true);
00130
00131
00132 String kernel = options.GetString("kernel");
00133 ILOG_INFO_HEADNODE("kernel = " << kernel);
00134 if ((kernel == "dist-precomputed") && (Link::Mpi::MyId() != 0))
00135 {
00136 factory.ServeDistributedAccess();
00137 }
00138 else
00139 {
00140
00141
00142 if (kernel == "dist-precomputed")
00143 factory.GetDistributedAccess();
00144 bool testMode = options.GetBool("testMode");
00145 CrossValidate(&factory, testMode);
00146 }
00147 }
00148
00149 int nrOfErrors = ILOG_ERROR_COUNT;
00150 nrOfErrors = Link::Mpi::ReduceSum(nrOfErrors);
00151 ILOG_INFO_HEADNODE("Root: total nr error = " << nrOfErrors);
00152 Link::Mpi::Finalize();
00153 return nrOfErrors;
00154 }
00155
00156 }
00157 }
00158
00159 int
00160 main(int argc, char* argv[])
00161 {
00162 return Impala::Application::mainCrossValidate(argc, argv);
00163 }