00001 #include "Util/Channel.h"
00002 #include "Basis/CmdOptions.h"
00003 #include "Basis/ILog.h"
00004 #include "Core/Matrix/KmeansClustering.h"
00005 #include "Core/Array/ReadRaw.h"
00006 #include "Core/Table/Write.h"
00007 #include "Core/Feature/FeatureTable.h"
00008 #include "Util/Database.h"
00009 #include "Core/Database/MakeRawDataSet.h"
00010
00011 #include "Basis/Timer.h"
00012
00013
00014 #ifdef LOG4CPP_USED
00015 #ifdef LOG4CPPHACK
00016 #include "Link/Log4cppLib.cpp"
00017 #endif
00018 #endif
00019
00020 #include <iostream>
00021 #include <fstream>
00022 #include <sstream>
00023
00024
00025 using namespace std;
00026
00027 namespace Impala
00028 {
00029 namespace Application
00030 {
00031
00032 using namespace Impala::Core::Array;
00033 using namespace Impala::Core::Matrix;
00034
00035
00036 int
00037 mainConstructCodebook(int argc, char* argv[])
00038 {
00039 CmdOptions& options = CmdOptions::GetInstance();
00040 options.Initialise(false, false, true);
00041 options.AddOption(0, "rawFile", "filename", "", 0, true);
00042
00043 ILOG_VAR(Sandbox.koen.mainConstructCodebook);
00044
00045 if (! options.ParseArgs(argc, argv, "dataset featurename codebookoutputname k", 4))
00046 return 1;
00047
00048 Core::Database::RawDataSet* dataset = Core::Database::MakeRawDataSet(options.GetArg(0));
00049 if(!dataset)
00050 {
00051 ILOG_INFO_ONCE("failed to open dataset " << options.GetArg(0));
00052 return 0;
00053 }
00054
00055 String feature = options.GetArg(1);
00056 String outputFilename = options.GetArg(2);
00057 int k = atoi(options.GetArg(3));
00058
00059 String outputCheck = dataset->GetFilePath("", outputFilename, false, true);
00060 if(!outputCheck.empty())
00061 {
00062 ILOG_WARN("Output codebook file already exists: " << outputFilename << "; skipping...");
00063 return 0;
00064 }
00065
00066 ILOG_INFO("Clustering from dataset: " << options.GetArg(0));
00067 ILOG_INFO("Feature: " << feature);
00068 ILOG_INFO("Output filename: " << outputFilename);
00069 ILOG_INFO("Clusters requested: " << k);
00070
00071 Mat* codebook = 0;
00072 if(options.GetString("rawFile").empty())
00073 {
00074 Core::Feature::FeatureTable* clusterInput =
00075 Core::Feature::FeatureTable::MakeFromDataSet(dataset,
00076 Core::Feature::FeatureDefinition(feature),
00077 "Keyframes");
00078 ILOG_INFO("Clustering on: " << clusterInput->Size() << " points");
00079 codebook = KmeansClustering(clusterInput->GetColumn2()->GetStorage(), k);
00080 delete clusterInput;
00081 }
00082 else
00083 {
00084 String rawFilename = options.GetString("rawFile");
00085 Mat* rawData = 0;
00086 ReadRaw(rawData, rawFilename, dataset->GetDatabase());
00087 codebook = KmeansClustering(rawData, k);
00088 delete rawData;
00089 }
00090
00091 Core::Feature::FeatureTable* codebookTable = new Core::Feature::FeatureTable(
00092 String("kmeans-clustering_k_") + MakeString(k), k,
00093 MatNrCol(codebook));
00094 for(int i = 0; i < MatNrRow(codebook); i++)
00095 {
00096 Quid descr = MakeQuidCodebook(dataset->GetQuidClass(), dataset->GetSetId(), i);
00097 Core::Vector::VectorTem<Real64> v(MatNrCol(codebook));
00098 for(int j = 0; j < MatNrCol(codebook); j++)
00099 {
00100 v[j] = *MatE(codebook, i, j);
00101 }
00102 codebookTable->Add(descr, v);
00103 }
00104 delete codebook;
00105
00106 dataset->MakeDir(FileNamePath(outputFilename));
00107 String output2 = dataset->GetFilePath("", outputFilename, true, false);
00108 Core::Table::Write(codebookTable, output2, dataset->GetDatabase(), true);
00109 delete codebookTable;
00110
00111 return 0;
00112 }
00113
00114
00115 }
00116 }
00117
00118 int
00119 main(int argc, char* argv[])
00120 {
00121 return Impala::Application::mainConstructCodebook(argc, argv);
00122 }