00001 #include "Util/Channel.h"
00002 #include "Basis/CmdOptions.h"
00003 #include "Basis/ILog.h"
00004 #include "Core/Matrix/KmeansClustering.h"
00005 #include "Core/Array/ReadRaw.h"
00006 #include "Core/Table/Write.h"
00007 #include "Core/Feature/FeatureTable.h"
00008 #include "Util/Database.h"
00009 #include "Core/Database/MakeRawDataSet.h"
00010
00011 #include "Basis/Timer.h"
00012
00013 #include <iostream>
00014 #include <fstream>
00015 #include <sstream>
00016
00017
00018 using namespace std;
00019
00020 namespace Impala
00021 {
00022 namespace Application
00023 {
00024
00025 using namespace Impala::Core::Array;
00026 using namespace Impala::Core::Matrix;
00027
00028
00029 int
00030 mainConstructCodebook(int argc, char* argv[])
00031 {
00032 CmdOptions& options = CmdOptions::GetInstance();
00033 options.Initialise(false, false, true);
00034 options.AddOption(0, "rawFile", "filename", "", 0, true);
00035
00036 ILOG_VAR(Sandbox.koen.mainConstructCodebook);
00037
00038 if (! options.ParseArgs(argc, argv, "dataset featurename codebookoutputname k", 4))
00039 return 1;
00040
00041 Core::Database::RawDataSet* dataset = Core::Database::MakeRawDataSet(options.GetArg(0));
00042 if(!dataset)
00043 {
00044 ILOG_INFO_HEADNODE("failed to open dataset " << options.GetArg(0));
00045 return 0;
00046 }
00047
00048 String feature = options.GetArg(1);
00049 String outputFilename = options.GetArg(2);
00050 int k = atoi(options.GetArg(3));
00051
00052 String outputCheck = dataset->GetFilePath("", outputFilename, false, true);
00053 if(!outputCheck.empty())
00054 {
00055 ILOG_WARN("Output codebook file already exists: " << outputFilename << "; skipping...");
00056 return 0;
00057 }
00058
00059 ILOG_INFO("Clustering from dataset: " << options.GetArg(0));
00060 ILOG_INFO("Feature: " << feature);
00061 ILOG_INFO("Output filename: " << outputFilename);
00062 ILOG_INFO("Clusters requested: " << k);
00063
00064 Mat* codebook = 0;
00065 if(options.GetString("rawFile").empty())
00066 {
00067 Core::Feature::FeatureTable* clusterInput =
00068 Core::Feature::FeatureTable::MakeFromDataSet(dataset,
00069 Core::Feature::FeatureDefinition(feature),
00070 "Keyframes");
00071 ILOG_INFO("Clustering on: " << clusterInput->Size() << " points");
00072 codebook = KmeansClustering(clusterInput->GetColumn2()->GetStorage(), k);
00073 delete clusterInput;
00074 }
00075 else
00076 {
00077 String rawFilename = options.GetString("rawFile");
00078 rawFilename = dataset->GetFilePath("", rawFilename, false, false);
00079 Mat* rawData = 0;
00080 ReadRaw(rawData, rawFilename, dataset->GetDatabase());
00081 codebook = KmeansClustering(rawData, k);
00082 delete rawData;
00083 }
00084
00085 Core::Feature::FeatureTable* codebookTable = new Core::Feature::FeatureTable(
00086 String("kmeans-clustering_k_") + MakeString(k), k,
00087 MatNrCol(codebook));
00088 for(int i = 0; i < MatNrRow(codebook); i++)
00089 {
00090 Quid descr = MakeQuidCodebook(dataset->GetQuidClass(), dataset->GetSetId(), i);
00091 Core::Vector::VectorTem<Real64> v(MatNrCol(codebook));
00092 for(int j = 0; j < MatNrCol(codebook); j++)
00093 {
00094 v[j] = *MatE(codebook, i, j);
00095 }
00096 codebookTable->Add(descr, v);
00097 }
00098 delete codebook;
00099
00100 dataset->MakeDir(FileNamePath(outputFilename));
00101 String output2 = dataset->GetFilePath("", outputFilename, true, false);
00102 Core::Table::Write(codebookTable, output2, dataset->GetDatabase(), true);
00103 delete codebookTable;
00104
00105 return 0;
00106 }
00107
00108
00109 }
00110 }
00111
00112 int
00113 main(int argc, char* argv[])
00114 {
00115 return Impala::Application::mainConstructCodebook(argc, argv);
00116 }