00001 #ifndef Impala_Core_Training_ParameterSearcher_h
00002 #define Impala_Core_Training_ParameterSearcher_h
00003
00004 #include "Util/PropertySet.h"
00005 #include "Util/RangeIterator.h"
00006
00007 #include "Link/Mpi/MpiFuncs.h"
00008 #include "Util/Mpi/Reduce.h"
00009 #include "Util/Mpi/JobManager.h"
00010 #include "Core/Training/ParameterEvaluator.h"
00011
00012 #include <algorithm>
00013 #include <sstream>
00014
00015 namespace Impala
00016 {
00017 namespace Core
00018 {
00019 namespace Training
00020 {
00021
00026 class ParameterSearcher
00027 {
00028 public:
00029 ParameterSearcher(Util::PropertySet* properties, ParameterEvaluator* evaluator)
00030 {
00031 #ifdef MPI_USED
00032 mParallelMode = true;
00033 #else
00034 mParallelMode = false;
00035 #endif
00036 mEvaluator = evaluator;
00037 mProperties = *properties;
00038 mBestScore = 0;
00039
00040 int i;
00041 for(i=0 ; i<mProperties.Size() ; ++i)
00042 {
00043 std::string val = mProperties.GetValue(i);
00044 ILOG_DEBUG("prop found: "<< val);
00045 if(Util::IsRangeDefinition(val))
00046 {
00047 mIterator.AddRange(mProperties.GetName(i), val);
00048 }
00049 }
00050 int r = mProperties.GetInt("repetitions");
00051 int f = mProperties.GetInt("folds");
00052 mScores.assign(mIterator.GetTotalIterations() * f * r, 0);
00053 }
00054
00055 virtual
00056 ~ParameterSearcher()
00057 {
00058 delete mEvaluator;
00059 }
00060
00061 void
00062 OverrideParallelMode(bool mode)
00063 {
00064 mParallelMode = mode;
00065 }
00066
00067 Util::PropertySet
00068 Search()
00069 {
00070 int r = mProperties.GetInt("repetitions");
00071 int f = mProperties.GetInt("folds");
00072 mScores.assign(mIterator.GetTotalIterations() * f * r, 0);
00073 mBestScore = 0;
00074
00075 if(mParallelMode)
00076 SearchParallel();
00077 else
00078 SearchSequential();
00079
00080
00081 std::vector<double> scores;
00082 int iters = mIterator.GetTotalIterations();
00083 scores.assign(iters, 0);
00084 int i;
00085 for(i=0 ; i<mScores.size() ; ++i)
00086 {
00087 scores[i%iters] += mScores[i];
00088 }
00089 double d = f*r;
00090 for(i=0 ; i<scores.size() ; ++i)
00091 {
00092 scores[i] /= d;
00093 }
00094
00095
00096 std::vector<double>::iterator best =
00097 std::max_element(scores.begin(), scores.end());
00098 int bestIteration = best - scores.begin();
00099 mIterator.SetIteration(bestIteration);
00100 mIterator.GetParameters(&mProperties);
00101 mBestScore = *best;
00102 ILOG_INFO_ONCE("best score = " << mBestScore <<
00103 " @ params: " << mProperties.GetDescription());
00104 return mProperties;
00105 }
00106
00107 double
00108 GetBestScore()
00109 {
00110 return mBestScore;
00111 }
00112
00113 void
00114 PrintAllScores(Util::IOBuffer* buf)
00115 {
00116 int index = 0;
00117 for(int r=0 ; r<mProperties.GetInt("repetitions") ; ++r)
00118 {
00119 for(int f=0 ; f<mProperties.GetInt("folds") ; ++f)
00120 {
00121 for(int i=0 ; i<mIterator.GetTotalIterations() ; ++i)
00122 {
00123 Util::PropertySet props;
00124 mIterator.SetIteration(i);
00125 mIterator.GetParameters(&props);
00126 props.Add("score", mScores[index]);
00127 ++index;
00128 props.Add("repetition", r);
00129 props.Add("fold", f);
00130 props.Print(buf);
00131 }
00132 }
00133 }
00134 }
00135
00136
00137 private:
00138 double
00139 CallEvaluator(int iteration, int index)
00140 {
00141 mIterator.SetIteration(iteration);
00142 mIterator.GetParameters(&mProperties);
00143 ILOG_DEBUG("calling evaluator::Evaluate");
00144 double score = mEvaluator->Evaluate(&mProperties);
00145 ILOG_DEBUG("evaluator::Evaluate returned");
00146 mScores[index] += score;
00147 return score;
00148 }
00149
00150 void
00151 SearchSequential()
00152 {
00153 if(mEvaluator == 0)
00154 {
00155 ILOG_ERROR("no evaluator set");
00156 return;
00157 }
00158
00159 int index = 0;
00160 int repetition, repetitionCount;
00161 repetitionCount = mProperties.GetInt("repetitions");
00162 ILOG_INFO("#repetitions " << repetitionCount)
00163 for(repetition=0 ; repetition<repetitionCount ; ++repetition)
00164 {
00165 ILOG_INFO("repetition #" << repetition)
00166 mEvaluator->SetRepetition(repetition, repetitionCount);
00167 int fold, foldCount;
00168 foldCount = mProperties.GetInt("folds");
00169 for(fold=0 ; fold<foldCount ; ++fold)
00170 {
00171 ILOG_INFO("fold #" << fold);
00172 mEvaluator->SetFold(fold, foldCount);
00173 int i;
00174 for(i=0 ; i<mIterator.GetTotalIterations() ; ++i)
00175 {
00176 double score = CallEvaluator(i, index);
00177 ++index;
00178 ILOG_INFO("it " << i << ", " << mProperties.GetDescription() <<
00179 " : " << score)
00180 }
00181 }
00182 }
00183 }
00184
00185
00186 void
00187 SearchParallel()
00188 {
00189 if(Link::Mpi::MyId() == 0)
00190 SearchServer();
00191 else
00192 SearchClient();
00193 Util::Mpi::Reduce(mScores);
00194 }
00195
00196 void
00197 SearchServer()
00198 {
00199 ILOG_DEBUG_NODE("I am the server");
00200
00201 Util::Mpi::JobManager jobManager;
00202 int f;
00203 for(f=0 ; f<mProperties.GetInt("folds") ; ++f)
00204 {
00205 int r;
00206 for(r=0 ; r<mProperties.GetInt("repetitions") ; ++r)
00207 {
00208 std::string id("r0f0");
00209 id[1] = '0'+r;
00210 id[3] = '0'+f;
00211 Util::PropertySet ps;
00212 ps.Add("repetition", r);
00213 ps.Add("fold", f);
00214
00215
00216 jobManager.CreateGroup(ps.GetDescription(), id,
00217 mIterator.GetTotalIterations());
00218 }
00219 }
00220
00221
00222 bool done=false;
00223 int runningJobs=0;
00224 while(true)
00225 {
00226
00227 int source;
00228 std::string message = Link::Mpi::ReceiveString(source);
00229 ILOG_DEBUG_NODE("server got mssg: " << message);
00230 Util::PropertySet job(message);
00231 if(job.GetString("JobManager::job-id") != "-1")
00232 --runningJobs;
00233
00234 jobManager.GetJob(&job);
00235
00236 if(job.GetString("JobManager::job-id") != "-1")
00237 ++runningJobs;
00238 std::ostringstream oss;
00239 job.Print(oss);
00240 ILOG_DEBUG_NODE("SERVER: sending job assignment " <<
00241 job.GetString("JobManager::job-id") << "," <<
00242 job.GetString("JobManager::group-id") << " to " << source);
00243 Link::Mpi::SendString(oss.str(), source);
00244 if(runningJobs == 0)
00245 break;
00246 }
00247 }
00248
00249 void
00250 SearchClient()
00251 {
00252 ILOG_DEBUG_NODE("I am a client");
00253 int id = Link::Mpi::MyId();
00254 int lastRepetition = -1;
00255 int lastFold = -1;
00256 Util::PropertySet job;
00257 job.Add("JobManager::job-id", "-1");
00258 while(true)
00259 {
00260 ILOG_DEBUG_NODE("client sends request for work");
00261
00262 Link::Mpi::SendString(job.GetDescription(),0);
00263
00264 int source;
00265 std::string message = Link::Mpi::ReceiveString(source);
00266 job.Parse(message);
00267 int repetition = job.GetInt("repetition", -1);
00268 int fold = job.GetInt("fold", -1);
00269 int iteration = job.GetInt("JobManager::job-id", -1);
00270
00271 ILOG_DEBUG_NODE("got assignment " << iteration << " from group " <<
00272 job.GetString("JobManager::group-id"));
00273 if(iteration == -1)
00274 break;
00275 if(repetition != lastRepetition)
00276 {
00277 lastFold = -1;
00278 mEvaluator->SetRepetition(repetition,
00279 mProperties.GetInt("repetitions"));
00280 lastRepetition = repetition;
00281 }
00282 if(fold != lastFold)
00283 {
00284 mEvaluator->SetFold(fold, mProperties.GetInt("folds"));
00285 lastFold = fold;
00286 }
00287 int iters = mIterator.GetTotalIterations();
00288 int folds = mProperties.GetInt("folds");
00289 int index = (((repetition * folds) + fold) * iters) + iteration;
00290 double score = CallEvaluator(iteration, index);
00291 ILOG_INFO("r" << repetition << " f" << fold << " i" << iteration <<
00292 ", " << mProperties.GetDescription() << " -> " << score);
00293 }
00294 }
00295
00296
00297 ParameterSearcher* operator=(ParameterSearcher&);
00298 ParameterSearcher(ParameterSearcher&);
00299
00300 ParameterEvaluator *mEvaluator;
00301 Util::PropertySet mProperties;
00302 Util::RangeIterator mIterator;
00303 std::vector<double> mScores;
00304 double mBestScore;
00305 int mScoreIndex;
00306 bool mParallelMode;
00307
00308 ILOG_VAR_DEC;
00309 };
00310
00311 ILOG_VAR_INIT(ParameterSearcher, Impala.Core.Training);
00312
00313
00314
00315 }
00316 }
00317 }
00318
00319 #endif