00001 #ifndef Impala_Core_Training_SvmFile_h
00002 #define Impala_Core_Training_SvmFile_h
00003
00004 #include "Link/Svm/LinkSvm.h"
00005 #include "Basis/File.h"
00006 #include <sstream>
00007 #include <fstream>
00008 #include "Util/ProgressPrinter.h"
00009 #include "Link/Svm/LinkSvm.h"
00010
00011 namespace Impala
00012 {
00013 namespace Core
00014 {
00015 namespace Training
00016 {
00017
00018 const double cInvalid = 666;
00019
00020 svm_problem* ReadSvmFile(const std::string& filename)
00021 {
00022 ILOG_VAR(Impala.Core.Training.SvmFile);
00023 File f(filename, "r");
00024 if (!f.Valid())
00025 return 0;
00026
00027 int sample=0;
00028 int nrNodes=0;
00029 while (!f.Eof())
00030 {
00031 std::istringstream iss(f.ReadLine(false));
00032 double label = cInvalid;
00033 iss >> label;
00034 if(label == cInvalid)
00035 break;
00036 ++sample;
00037 while(!iss.eof())
00038 {
00039 int index;
00040 double value;
00041 char c = 0;
00042 iss >> index >> c >> value;
00043 if(c != ':')
00044 {
00045 ILOG_ERROR("parse error in svm file "<< filename <<
00046 "on line "<< sample);
00047 return 0;
00048 }
00049 ++nrNodes;
00050 }
00051 ++nrNodes;
00052 }
00053 f.Rewind();
00054
00055
00056 svm_problem* p = new svm_problem;
00057 p->l = sample;
00058 p->y = new double[sample];
00059 p->x = new svm_node*[sample];
00060 svm_node* nodes = new svm_node[nrNodes];
00061 svm_node* dst = nodes;
00062 for(int i=0 ; i<sample ; ++i)
00063 {
00064 std::istringstream iss(f.ReadLine(false));
00065 iss >> p->y[i];
00066 p->x[i] = dst;
00067 while(!iss.eof())
00068 {
00069 char c;
00070 iss >> dst->index >> c >> dst->value;
00071 ++dst;
00072 }
00073 dst->index = -1;
00074 ++dst;
00075 }
00076 f.Close();
00077 return p;
00078 }
00079
00080 void WriteSvmFile(const svm_problem* p, const std::string& filename)
00081 {
00082 ILOG_VAR(Impala.Core.Training.SvmFile);
00083 std::ofstream out(filename.c_str());
00084 if(!out.is_open())
00085 {
00086 ILOG_ERROR("could not open file " << filename);
00087 return;
00088 }
00089 for(int i=0 ; i<p->l ; ++i)
00090 {
00091 out << p->y[i];
00092 for(int j=0 ; p->x[i][j].index != -1 ; ++j)
00093 out << " " << p->x[i][j].index << ":" << p->x[i][j].value;
00094 out << "\n";
00095 }
00096 out.close();
00097 }
00098
00099 }
00100 }
00101 }
00102
00103 #endif