00001 #ifndef Impala_Core_Feature_WeightedFeatureList_h
00002 #define Impala_Core_Feature_WeightedFeatureList_h
00003
00004 #include "Util/StringParser.h"
00005 #include "Util/IOBuffer.h"
00006
00007 namespace Impala
00008 {
00009 namespace Core
00010 {
00011 namespace Feature
00012 {
00013
00014
00015 class WeightedFeatureList
00016 {
00017 public:
00018
00019 WeightedFeatureList()
00020 {
00021 }
00022
00023 WeightedFeatureList(CString input)
00024 {
00025 ParseAndAdd(input);
00026 }
00027
00028 void
00029 Clear()
00030 {
00031 mFeatures.clear();
00032 mWeights.clear();
00033 }
00034
00035 int
00036 Size() const
00037 {
00038 return mFeatures.size();
00039 }
00040
00041 String
00042 GetFeature(int i) const
00043 {
00044 return mFeatures[i];
00045 }
00046
00047 Real64
00048 GetWeight(int i) const
00049 {
00050 return mWeights[i];
00051 }
00052
00053 Real64
00054 GetTotalWeight() const
00055 {
00056 Real64 res = 0;
00057 for (int i=0 ; i<mWeights.size() ; i++)
00058 res += mWeights[i];
00059 return res;
00060 }
00061
00062 void
00063 Add(CString feature, Real64 weight)
00064 {
00065 mFeatures.push_back(feature);
00066 mWeights.push_back(weight);
00067 }
00068
00069 void
00070 ParseAndAdd(String input)
00071 {
00072 if (StringStartsWith(input, "inputFeatures"))
00073 {
00074 Util::StringParser p(input);
00075 String dummy = p.GetString(' ', true);
00076 input = p.GetString('"', true);
00077 }
00078 Util::StringParser p(input);
00079 while (!p.TheEnd())
00080 {
00081 double weight = p.GetDouble(' ', true);
00082 String feature = p.GetString(' ', false);
00083 if (feature.empty())
00084 break;
00085 mWeights.push_back(weight);
00086 mFeatures.push_back(feature);
00087 }
00088 }
00089
00090 int
00091 Diff(const WeightedFeatureList* arg) const
00092 {
00093 if (Size() != arg->Size())
00094 {
00095 ILOG_ERROR("Size differs: " << Size() << " vs " << arg->Size());
00096 return 1;
00097 }
00098 int nDiff = 0;
00099 for (int i=0 ; i<Size() ; i++)
00100 {
00101 if (GetFeature(i) != arg->GetFeature(i))
00102 {
00103 ILOG_DEBUG("Feature " << i << " differs " << GetFeature(i)
00104 << " vs " << arg->GetFeature(i));
00105 nDiff++;
00106 }
00107 if (fabs(GetWeight(i) - arg->GetWeight(i)) > 0.00001)
00108 {
00109 ILOG_DEBUG("Weight " << i << " differs " << GetWeight(i)
00110 << " vs " << arg->GetWeight(i));
00111 nDiff++;
00112 }
00113 }
00114 if (nDiff > 0)
00115 ILOG_ERROR("Found " << nDiff << " differences in features/weights");
00116 return nDiff;
00117 }
00118
00119 private:
00120
00121 std::vector<String> mFeatures;
00122 std::vector<Real64> mWeights;
00123
00124 ILOG_VAR_DEC;
00125
00126 };
00127
00128 ILOG_VAR_INIT(WeightedFeatureList, Impala.Core.Feature);
00129
00130 void
00131 Read(WeightedFeatureList* featList, Util::IOBuffer* buf)
00132 {
00133 ILOG_VAR(Impala.Core.Feature.WeightedFeatureList.Read);
00134 if (! (buf && buf->Valid()))
00135 {
00136 ILOG_ERROR("Invalid IOBuffer");
00137 return;
00138 }
00139
00140 std::vector<String> lines;
00141 buf->ReadStrings(std::back_inserter(lines), true, -1);
00142 for (int i=0 ; i<lines.size() ; i++)
00143 featList->ParseAndAdd(lines[i]);
00144 }
00145
00146 bool
00147 Read(WeightedFeatureList* featList, Persistency::File file)
00148 {
00149 ILOG_VAR(Impala.Core.Feature.WeightedFeatureList.Read);
00150 Util::IOBuffer* buf = file.GetReadBuffer();
00151 if (buf)
00152 {
00153 Read(featList, buf);
00154 delete buf;
00155 return true;
00156 }
00157 return false;
00158 }
00159
00160 bool
00161 Write(WeightedFeatureList* featList, Util::IOBuffer* buf)
00162 {
00163 ILOG_VAR(Impala.Core.Feature.WeightedFeatureList.Write);
00164 if (! (buf && buf->Valid()))
00165 {
00166 ILOG_ERROR("Invalid IOBuffer");
00167 return false;
00168 }
00169
00170 for (int i=0 ; i<featList->Size() ; i++)
00171 {
00172 String s =
00173 MakeString(featList->GetWeight(i)) + " " + featList->GetFeature(i);
00174 buf->Puts(s);
00175 }
00176 return true;
00177 }
00178
00179 bool
00180 Write(WeightedFeatureList* featList, Persistency::File file)
00181 {
00182 Util::IOBuffer* buf = file.GetWriteBuffer();
00183 if (buf)
00184 {
00185 Write(featList, buf);
00186 delete buf;
00187 return true;
00188 }
00189 return false;
00190 }
00191
00192 }
00193 }
00194 }
00195
00196 #endif