00001 #ifndef Impala_Core_Feature_FeatureTable_h
00002 #define Impala_Core_Feature_FeatureTable_h
00003
00004 #include "Util/StringParser.h"
00005 #include "Core/Array/Arrays.h"
00006 #include "Core/Vector/VectorSet.h"
00007 #include "Core/Vector/Norm1Dist.h"
00008 #include "Core/Column/Find.h"
00009 #include "Core/Column/Copy.h"
00010 #include "Core/Table/Read.h"
00011 #include "Core/Feature/FeatureDefinition.h"
00012 #include "Core/Table/AnnotationTable.h"
00013 #include "Core/Table/QuidTable.h"
00014 #include "Core/Table/Read.h"
00015 #include "Core/Table/Append.h"
00016
00017 namespace Impala
00018 {
00019 namespace Core
00020 {
00021 namespace Feature
00022 {
00023
00024
00025 typedef Table::TableTem<Column::ColumnTem<Quid>,
00026 Vector::VectorSet<Array::Array2dScalarReal64> >
00027 FeatureTableBaseType;
00028
00029
00032 class FeatureTable : public FeatureTableBaseType
00033 {
00034 public:
00035
00036 typedef Column::ColumnTem<Quid> ColumnQuid;
00037 typedef Vector::VectorSet<Array::Array2dScalarReal64> ColumnVectorSet;
00038 typedef Vector::VectorTem<Real64> VectorReal64;
00039
00040 FeatureTable(FeatureDefinition def) :
00041 FeatureTableBaseType(ColumnQuid(0), ColumnVectorSet(true, 1, 0))
00042 {
00043 mFeatureDef = def;
00044 SetColName(1, "object");
00045 SetColName(2, "values");
00046 }
00047
00048 FeatureTable(FeatureDefinition def, int tableSize, int vecLen) :
00049 FeatureTableBaseType(ColumnQuid(tableSize),
00050 ColumnVectorSet(true, vecLen, tableSize))
00051 {
00052 mFeatureDef = def;
00053 SetColName(1, "object");
00054 SetColName(2, mFeatureDef.GetName());
00055 }
00056
00057 #ifndef REPOSITORY_USED // Here comes the deprecated stuff
00058 static FeatureTable*
00059 MakeFromFile(FeatureDefinition def, String fileName, Util::Database* db)
00060 {
00061 FeatureTable* res = new FeatureTable(def);
00062 Read(res, fileName, db);
00063 return res;
00064 }
00065
00066 static FeatureTable*
00067 MakeFromDataSet(Database::RawDataSet* is, FeatureDefinition mDef,
00068 String walkType)
00069 {
00070 Int64 totalSize = 0;
00071 int dimensionality = 0;
00072 std::vector<String> filenames;
00073 if (is->GetQuidClass() == QUID_CLASS_IMAGE)
00074 {
00075 for (int dirId = 0; dirId < is->NrDirs(); dirId++)
00076 {
00077 String fName = is->GetFilePathFeatureData(mDef, dirId, false,
00078 -1, false, false);
00079 filenames.push_back(fName);
00080 }
00081 }
00082 else if (is->GetQuidClass() == QUID_CLASS_FRAME)
00083 {
00084 for (int i = 0; i < is->NrFiles(); i++)
00085 {
00086 String fName = is->GetFilePathFeatureData(walkType, mDef, i,
00087 false, -1,
00088 false, false);
00089 filenames.push_back(fName);
00090 }
00091 }
00092 else
00093 {
00094 ILOG_ERROR("MakeFromDataSet: Unknown quid class");
00095 return 0;
00096 }
00097 for (int dirId = 0; dirId < filenames.size(); dirId++)
00098 {
00099 Feature::FeatureTable tmp(mDef);
00100 Core::Table::Read(&tmp, filenames[dirId], is->GetDatabase());
00101 totalSize += tmp.Size();
00102 dimensionality = tmp.GetFeatureVectorLength();
00103 }
00104 FeatureTable* table = new FeatureTable(mDef, totalSize, dimensionality);
00105 for(int dirId = 0; dirId < filenames.size(); dirId++)
00106 {
00107 Feature::FeatureTable tmp(mDef);
00108 Core::Table::Read(&tmp, filenames[dirId], is->GetDatabase());
00109 Core::Table::Append(table, &tmp);
00110 }
00111 return table;
00112 }
00113 #endif // REPOSITORY_USED
00114
00115 FeatureDefinition
00116 GetFeatureDefinition()
00117 {
00118 return mFeatureDef;
00119 }
00120
00121 void
00122 SetFeatureDefinition(FeatureDefinition def)
00123 {
00124 mFeatureDef = def;
00125 }
00126
00127 int
00128 GetFeatureVectorLength() const
00129 {
00130 return GetColumn2()->GetVectorLength(0);
00131 }
00132
00133 int
00134 FindQuid(Quid quid)
00135 {
00136 return Column::Find(GetColumn1(), quid);
00137 }
00138
00139 VectorReal64
00140 FindFeature(Quid quid)
00141 {
00142 int rank = FindQuid(quid);
00143 return Get2(rank);
00144 }
00145
00146 Core::Table::QuidTable*
00147 GetQuidTable()
00148 {
00149 Core::Table::QuidTable* t = new Core::Table::QuidTable(Size());
00150 Copy(t->GetColumn1(), GetColumn1(), Size(), 0, 0);
00151 t->SetSize(Size());
00152 return t;
00153 }
00154
00155 int
00156 Diff(FeatureTable* arg) const
00157 {
00158 if (Size() != arg->Size())
00159 {
00160 ILOG_ERROR("Diff: Size differs: " << Size() << " vs " <<
00161 arg->Size());
00162 return 1;
00163 }
00164 if (GetFeatureVectorLength() != arg->GetFeatureVectorLength())
00165 {
00166 ILOG_ERROR("Diff: vec length differs: " << GetFeatureVectorLength()
00167 << " vs " << arg->GetFeatureVectorLength());
00168 return 1;
00169 }
00170 int nDiff = 0;
00171 for (int i=0 ; i<Size() ; i++)
00172 {
00173 if (Get1(i) != arg->Get1(i))
00174 {
00175 ILOG_DEBUG("Quid " << i << " differs " << Get1(i) <<
00176 " vs " << arg->Get1(i));
00177 nDiff++;
00178 }
00179 else
00180 {
00181 Real64 dist = Vector::Norm1Dist(Get2(i), arg->Get2(i));
00182 if (IsNan(dist) || (dist > 0.00001))
00183 {
00184 ILOG_DEBUG("vector " << i << " differs " << Get2(i) <<
00185 " vs " << arg->Get2(i));
00186 nDiff++;
00187 }
00188 }
00189 }
00190 if (nDiff > 0)
00191 ILOG_ERROR("Found " << nDiff << " differences");
00192 return nDiff;
00193 }
00194
00195 #ifndef REPOSITORY_USED // Here comes the deprecated stuff
00196 void
00197 WriteSvmFile(String fileName, Util::Database* db)
00198 {
00199 WriteSvmFile(fileName, db, 0);
00200 }
00201
00202 void
00203 WriteSvmFile(String fileName, Util::Database* db,
00204 Core::Table::AnnotationTable* truth)
00205 {
00206 Util::IOBuffer* buf = db->GetIOBuffer(fileName, false, false, "tmp");
00207 if (buf)
00208 {
00209 ExportAscii(buf, truth, true, true);
00210 delete buf;
00211 }
00212 }
00213
00214 void
00215 ReadPlainAsciiFile(String fileName, Util::Database* db)
00216 {
00217 Util::IOBuffer* buf = db->GetIOBuffer(fileName, true, false, "");
00218 if (buf)
00219 {
00220 ImportAscii(buf);
00221 delete buf;
00222 }
00223 }
00224
00225 void
00226 WritePlainAsciiFile(String fileName, Util::Database* db)
00227 {
00228 Util::IOBuffer* buf = db->GetIOBuffer(fileName, false, false, "tmp");
00229 if (buf)
00230 {
00231 ExportAscii(buf, 0, false, false);
00232 delete buf;
00233 }
00234 }
00235 #endif // REPOSITORY_USED
00236
00237
00238 void
00239 ImportAscii(Util::IOBuffer* buf)
00240 {
00241 if (! (buf && buf->Valid()))
00242 return;
00243
00244 Quid quid = 0;
00245 int nrElem = -1;
00246 while (buf->Available())
00247 {
00248 String line = buf->ReadLine();
00249 Util::StringParser p(line);
00250 if (nrElem == -1)
00251 {
00252 Real64* data = new Real64[line.size() / 2];
00253 int i = 0;
00254 while (!p.TheEnd())
00255 {
00256 Real64 a = p.GetDouble(' ', false, true);
00257 data[i++] = a;
00258 }
00259 nrElem = i;
00260 int nrLines = buf->Size() / line.size() + 1;
00261 Array::Array2dScalarReal64* storage =
00262 new Array::Array2dScalarReal64(nrElem, nrLines, 0, 0);
00263 GetColumn2()->SetStorage(storage);
00264 VectorReal64 vec(nrElem, data, false);
00265 Add(quid, vec);
00266 delete data;
00267 }
00268 else
00269 {
00270 VectorReal64 vec(nrElem);
00271 int i = 0;
00272 while (!p.TheEnd())
00273 {
00274 Real64 a = p.GetDouble(' ', false, true);
00275 vec[i++] = a;
00276 if (i > nrElem)
00277 {
00278 ILOG_ERROR("ImportAscii: too many elements in vector");
00279 break;
00280 }
00281 }
00282 Add(quid, vec);
00283 }
00284 }
00285 }
00286
00287 void
00288 ExportAscii(Util::IOBuffer* buf, Core::Table::AnnotationTable* truth,
00289 bool printQuid, bool printIndex)
00290 {
00291 if (! (buf && buf->Valid()))
00292 return;
00293
00294 for (int i=0 ; i<Size() ; i++)
00295 {
00296 String s("");
00297 if (printQuid)
00298 {
00299 Quid quid = Get1(i);
00300 if (truth == 0)
00301 {
00302 s = QuidObj(quid).ToString() + " ";
00303 }
00304 else
00305 {
00306 s = (truth->IsPositive(quid)) ? "+1 " : "-1 ";
00307 }
00308 }
00309 ColumnVectorSet::ColElemType v = Get2(i);
00310 for (int j=0 ; j<v.Size() ; j++)
00311 {
00312 if (printIndex)
00313 s += MakeString(j+1) + ":";
00314 s += MakeString(v[j]) + " ";
00315 }
00316 buf->Puts(s);
00317 }
00318 }
00319
00320 virtual void
00321 Dump(int from = 0, int to = -1)
00322 {
00323 if (to == -1 || to > Size())
00324 to = Size();
00325 if (to < from)
00326 to = from;
00327 std::cout << "Dumping table from " << from << " to " << to
00328 << " (table size=" << Size() << ", capacity=" << Capacity()
00329 << ", featureLen=" << GetFeatureVectorLength() << ")"
00330 << std::endl;
00331 for (int i=0 ; i<2 ; i++)
00332 std::cout << i+1 << "=" << GetColName(i+1) << ", ";
00333 std::cout << std::endl;
00334 for (int i=from ; i<to ; i++)
00335 {
00336 std::cout << QuidObj(Get1(i)) << ", " << Get2(i) << std::endl;
00337 }
00338 std::cout << std::endl;
00339 }
00340
00341 virtual void
00342 Dump(Database::RawDataSet* dataSet, int from = 0, int to = -1)
00343 {
00344 if (dataSet == 0)
00345 return Dump(from, to);
00346
00347 if (to == -1 || to > Size())
00348 to = Size();
00349 if (to < from)
00350 to = from;
00351 std::cout << "Dumping table from " << from << " to " << to
00352 << " (table size=" << Size() << ", capacity=" << Capacity()
00353 << ")" << std::endl;
00354 for (int i=0 ; i<2 ; i++)
00355 std::cout << i+1 << "=" << GetColName(i+1) << ", ";
00356 std::cout << std::endl;
00357 for (int i=from ; i<to ; i++)
00358 {
00359 Quid quid = Get1(i);
00360 std::cout << dataSet->QuidToString(quid, true) << ", " << Get2(i)
00361 << std::endl;
00362 }
00363 std::cout << std::endl;
00364 }
00365
00366 private:
00367
00368 FeatureDefinition mFeatureDef;
00369
00370 ILOG_VAR_DEC;
00371 };
00372
00373 ILOG_VAR_INIT(FeatureTable, Impala.Core.Feature);
00374
00375 }
00376 }
00377 }
00378
00379 #endif