00001 #ifndef Impala_Core_Feature_FeatureTable_h
00002 #define Impala_Core_Feature_FeatureTable_h
00003
00004 #include "Util/StringParser.h"
00005 #include "Core/Array/Arrays.h"
00006 #include "Core/Vector/VectorSet.h"
00007 #include "Core/Vector/Norm1Dist.h"
00008 #include "Core/Column/Find.h"
00009 #include "Core/Column/Copy.h"
00010 #include "Core/Table/Read.h"
00011 #include "Core/Feature/FeatureDefinition.h"
00012 #include "Core/Table/AnnotationTable.h"
00013 #include "Core/Table/QuidTable.h"
00014 #include "Core/Table/Read.h"
00015 #include "Core/Table/Append.h"
00016
00017 namespace Impala
00018 {
00019 namespace Core
00020 {
00021 namespace Feature
00022 {
00023
00024
00025 typedef Table::TableTem<Column::ColumnTem<Quid>,
00026 Vector::VectorSet<Array::Array2dScalarReal64> >
00027 FeatureTableBaseType;
00028
00029
00032 class FeatureTable : public FeatureTableBaseType
00033 {
00034 public:
00035
00036 typedef Column::ColumnTem<Quid> ColumnQuid;
00037 typedef Vector::VectorSet<Array::Array2dScalarReal64> ColumnVectorSet;
00038 typedef Vector::VectorTem<Real64> VectorReal64;
00039
00040 FeatureTable(FeatureDefinition def) :
00041 FeatureTableBaseType(ColumnQuid(0), ColumnVectorSet(true, 1, 0))
00042 {
00043 mFeatureDef = def;
00044 SetColName(1, "object");
00045 SetColName(2, "values");
00046 }
00047
00048 FeatureTable(FeatureDefinition def, int tableSize, int vecLen) :
00049 FeatureTableBaseType(ColumnQuid(tableSize),
00050 ColumnVectorSet(true, vecLen, tableSize))
00051 {
00052 mFeatureDef = def;
00053 SetColName(1, "object");
00054 SetColName(2, mFeatureDef.GetName());
00055 }
00056
00057 #ifndef REPOSITORY_USED // Here comes the deprecated stuff
00058 static FeatureTable*
00059 MakeFromFile(FeatureDefinition def, String fileName, Util::Database* db)
00060 {
00061 FeatureTable* res = new FeatureTable(def);
00062 Read(res, fileName, db);
00063 return res;
00064 }
00065
00066 static FeatureTable*
00067 MakeFromDataSet(Database::RawDataSet* is, FeatureDefinition mDef,
00068 String walkType)
00069 {
00070 Int64 totalSize = 0;
00071 int dimensionality = 0;
00072 std::vector<String> filenames;
00073 if (is->GetQuidClass() == QUID_CLASS_IMAGE)
00074 {
00075 for (int dirId = 0; dirId < is->NrDirs(); dirId++)
00076 {
00077 String fName = is->GetFilePathFeatureData(mDef, dirId, false,
00078 -1, false, false);
00079 filenames.push_back(fName);
00080 }
00081 }
00082 else if (is->GetQuidClass() == QUID_CLASS_FRAME)
00083 {
00084 for (int i = 0; i < is->NrFiles(); i++)
00085 {
00086 String fName = is->GetFilePathFeatureData(walkType, mDef, i,
00087 false, -1,
00088 false, false);
00089 filenames.push_back(fName);
00090 }
00091 }
00092 else
00093 {
00094 ILOG_ERROR("MakeFromDataSet: Unknown quid class");
00095 return 0;
00096 }
00097 for (int dirId = 0; dirId < filenames.size(); dirId++)
00098 {
00099 Feature::FeatureTable tmp(mDef);
00100 Core::Table::Read(&tmp, filenames[dirId], is->GetDatabase());
00101 totalSize += tmp.Size();
00102 dimensionality = tmp.GetFeatureVectorLength();
00103 }
00104 FeatureTable* table = new FeatureTable(mDef, totalSize, dimensionality);
00105 for(int dirId = 0; dirId < filenames.size(); dirId++)
00106 {
00107 Feature::FeatureTable tmp(mDef);
00108 Core::Table::Read(&tmp, filenames[dirId], is->GetDatabase());
00109 Core::Table::Append(table, &tmp);
00110 }
00111 return table;
00112 }
00113 #endif // REPOSITORY_USED
00114
00115 FeatureDefinition
00116 GetFeatureDefinition()
00117 {
00118 return mFeatureDef;
00119 }
00120
00121 void
00122 SetFeatureDefinition(FeatureDefinition def)
00123 {
00124 mFeatureDef = def;
00125 }
00126
00127 int
00128 GetFeatureVectorLength() const
00129 {
00130 return GetColumn2()->GetVectorLength(0);
00131 }
00132
00133 int
00134 FindQuid(Quid quid)
00135 {
00136 return Column::Find(GetColumn1(), quid);
00137 }
00138
00139 VectorReal64
00140 FindFeature(Quid quid)
00141 {
00142 int rank = FindQuid(quid);
00143 if (rank >= Size())
00144 {
00145 ILOG_ERROR("FindFeature: no quid << " << QuidObj(quid));
00146 return VectorReal64();
00147 }
00148 return Get2(rank);
00149 }
00150
00151 Core::Table::QuidTable*
00152 GetQuidTable()
00153 {
00154 Core::Table::QuidTable* t = new Core::Table::QuidTable(Size());
00155 Copy(t->GetColumn1(), GetColumn1(), Size(), 0, 0);
00156 t->SetSize(Size());
00157 return t;
00158 }
00159
00160 int
00161 Diff(FeatureTable* arg) const
00162 {
00163 if (Size() != arg->Size())
00164 {
00165 ILOG_ERROR("Diff: Size differs: " << Size() << " vs " <<
00166 arg->Size());
00167 return 1;
00168 }
00169 if (GetFeatureVectorLength() != arg->GetFeatureVectorLength())
00170 {
00171 ILOG_ERROR("Diff: vec length differs: " << GetFeatureVectorLength()
00172 << " vs " << arg->GetFeatureVectorLength());
00173 return 1;
00174 }
00175 int nDiff = 0;
00176 for (int i=0 ; i<Size() ; i++)
00177 {
00178 if (Get1(i) != arg->Get1(i))
00179 {
00180 ILOG_DEBUG("Quid " << i << " differs " << Get1(i) <<
00181 " vs " << arg->Get1(i));
00182 nDiff++;
00183 }
00184 else
00185 {
00186 Real64 dist = Vector::Norm1Dist(Get2(i), arg->Get2(i));
00187 if (IsNan(dist) || (dist > 0.00001))
00188 {
00189
00190
00191 ILOG_DEBUG("vector " << i << " differs by " << dist << ": "
00192 << Get2(i).PrintE(5) << " vs "
00193 << arg->Get2(i).PrintE(5));
00194 nDiff++;
00195 }
00196 }
00197 }
00198 if (nDiff > 0)
00199 ILOG_ERROR("Found " << nDiff << " differences");
00200 return nDiff;
00201 }
00202
00203 int
00204 DiffMatrix(FeatureTable* arg) const
00205 {
00206 if (Size() != arg->Size())
00207 {
00208 ILOG_ERROR("Diff: Size differs: " << Size() << " vs " <<
00209 arg->Size());
00210 return 1;
00211 }
00212 if (GetFeatureVectorLength() != arg->GetFeatureVectorLength())
00213 {
00214 ILOG_ERROR("Diff: vec length differs: " << GetFeatureVectorLength()
00215 << " vs " << arg->GetFeatureVectorLength());
00216 return 1;
00217 }
00218 int nDiff = 0;
00219 for (int i=0 ; i<Size() ; i++)
00220 {
00221 if (Get1(i) != arg->Get1(i))
00222 {
00223 ILOG_DEBUG("Quid " << i << " differs " << Get1(i) <<
00224 " vs " << arg->Get1(i));
00225 nDiff++;
00226 }
00227 else
00228 {
00229 VectorReal64 v1 = Get2(i);
00230 VectorReal64 v2 = arg->Get2(i);
00231 for (int j=0 ; j<v1.Size() ; j++)
00232 {
00233 if (fabs(v1[j] - v2[j]) > 0.00001)
00234 {
00235 ILOG_DEBUG("row=" << i << ", col=" << j << " differs: "
00236 << v1[j] << " vs " << v2[j]);
00237 nDiff++;
00238 }
00239 }
00240 }
00241 }
00242 if (nDiff > 0)
00243 ILOG_ERROR("Found " << nDiff << " differences");
00244 return nDiff;
00245 }
00246
00247 #ifndef REPOSITORY_USED // Here comes the deprecated stuff
00248 void
00249 WriteSvmFile(String fileName, Util::Database* db)
00250 {
00251 WriteSvmFile(fileName, db, 0);
00252 }
00253
00254 void
00255 WriteSvmFile(String fileName, Util::Database* db,
00256 Core::Table::AnnotationTable* truth)
00257 {
00258 Util::IOBuffer* buf = db->GetIOBuffer(fileName, false, false, "tmp");
00259 if (buf)
00260 {
00261 ExportAscii(buf, truth, true, true);
00262 delete buf;
00263 }
00264 }
00265
00266 void
00267 ReadPlainAsciiFile(String fileName, Util::Database* db)
00268 {
00269 Util::IOBuffer* buf = db->GetIOBuffer(fileName, true, false, "");
00270 if (buf)
00271 {
00272 ImportAscii(buf);
00273 delete buf;
00274 }
00275 }
00276
00277 void
00278 WritePlainAsciiFile(String fileName, Util::Database* db)
00279 {
00280 Util::IOBuffer* buf = db->GetIOBuffer(fileName, false, false, "tmp");
00281 if (buf)
00282 {
00283 ExportAscii(buf, 0, false, false);
00284 delete buf;
00285 }
00286 }
00287 #endif // REPOSITORY_USED
00288
00289
00290 void
00291 ImportAscii(Util::IOBuffer* buf)
00292 {
00293 if (! (buf && buf->Valid()))
00294 return;
00295
00296 Quid quid = 0;
00297 int nrElem = -1;
00298 while (buf->Available())
00299 {
00300 String line = buf->ReadLine();
00301 Util::StringParser p(line);
00302 if (nrElem == -1)
00303 {
00304 Real64* data = new Real64[line.size() / 2];
00305 int i = 0;
00306 while (!p.TheEnd())
00307 {
00308 Real64 a = p.GetDouble(' ', false, true);
00309 data[i++] = a;
00310 }
00311 nrElem = i;
00312 int nrLines = buf->Size() / line.size() + 1;
00313 Array::Array2dScalarReal64* storage =
00314 new Array::Array2dScalarReal64(nrElem, nrLines, 0, 0);
00315 GetColumn2()->SetStorage(storage);
00316 VectorReal64 vec(nrElem, data, false);
00317 Add(quid, vec);
00318 delete data;
00319 }
00320 else
00321 {
00322 VectorReal64 vec(nrElem);
00323 int i = 0;
00324 while (!p.TheEnd())
00325 {
00326 Real64 a = p.GetDouble(' ', false, true);
00327 vec[i++] = a;
00328 if (i > nrElem)
00329 {
00330 ILOG_ERROR("ImportAscii: too many elements in vector");
00331 break;
00332 }
00333 }
00334 Add(quid, vec);
00335 }
00336 }
00337 }
00338
00339 void
00340 ExportAscii(Util::IOBuffer* buf, Core::Table::AnnotationTable* truth,
00341 bool printQuid, bool printIndex)
00342 {
00343 if (! (buf && buf->Valid()))
00344 return;
00345
00346 for (int i=0 ; i<Size() ; i++)
00347 {
00348 String s("");
00349 if (printQuid)
00350 {
00351 Quid quid = Get1(i);
00352 if (truth == 0)
00353 {
00354 s = QuidObj(quid).ToString() + " ";
00355 }
00356 else
00357 {
00358 s = (truth->IsPositive(quid)) ? "+1 " : "-1 ";
00359 }
00360 }
00361 ColumnVectorSet::ColElemType v = Get2(i);
00362 for (int j=0 ; j<v.Size() ; j++)
00363 {
00364 if (printIndex)
00365 s += MakeString(j+1) + ":";
00366 s += MakeString(v[j]) + " ";
00367 }
00368 buf->Puts(s);
00369 }
00370 }
00371
00372 virtual void
00373 Dump(int from = 0, int to = -1)
00374 {
00375 if (to == -1 || to > Size())
00376 to = Size();
00377 if (to < from)
00378 to = from;
00379 std::cout << "Dumping table from " << from << " to " << to
00380 << " (table size=" << Size() << ", capacity=" << Capacity()
00381 << ", featureLen=" << GetFeatureVectorLength() << ")"
00382 << std::endl;
00383 for (int i=0 ; i<2 ; i++)
00384 std::cout << i+1 << "=" << GetColName(i+1) << ", ";
00385 std::cout << std::endl;
00386 for (int i=from ; i<to ; i++)
00387 {
00388 std::cout << QuidObj(Get1(i)) << ", " << Get2(i) << std::endl;
00389 }
00390 std::cout << std::endl;
00391 }
00392
00393 virtual void
00394 Dump(Database::RawDataSet* dataSet, int from = 0, int to = -1)
00395 {
00396 if (dataSet == 0)
00397 return Dump(from, to);
00398
00399 if (to == -1 || to > Size())
00400 to = Size();
00401 if (to < from)
00402 to = from;
00403 std::cout << "Dumping table from " << from << " to " << to
00404 << " (table size=" << Size() << ", capacity=" << Capacity()
00405 << ")" << std::endl;
00406 for (int i=0 ; i<2 ; i++)
00407 std::cout << i+1 << "=" << GetColName(i+1) << ", ";
00408 std::cout << std::endl;
00409 for (int i=from ; i<to ; i++)
00410 {
00411 Quid quid = Get1(i);
00412 std::cout << dataSet->QuidToString(quid, true) << ", " << Get2(i)
00413 << std::endl;
00414 }
00415 std::cout << std::endl;
00416 }
00417
00418 private:
00419
00420 FeatureDefinition mFeatureDef;
00421
00422 ILOG_VAR_DEC;
00423 };
00424
00425 ILOG_VAR_INIT(FeatureTable, Impala.Core.Feature);
00426
00427 }
00428 }
00429 }
00430
00431 #endif