Home || Architecture || Video Search || Visual Search || Scripts || Applications || Important Messages || OGL || Src

KmeansClustering.h

Go to the documentation of this file.
00001 #ifndef Impala_Core_Matrix_KmeansClustering_h
00002 #define Impala_Core_Matrix_KmeansClustering_h
00003 
00004 #include "Core/Matrix/MatFunc.h"
00005 #include "Core/Matrix/Mat.h"
00006 #include "Core/Matrix/MatDrawRandomRows.h"
00007 #include "Core/Matrix/MatKeepSpecificRows.h"
00008 #include "Core/Matrix/VectorQuantize.h"
00009 
00010 namespace Impala
00011 {
00012 namespace Core
00013 {
00014 namespace Matrix
00015 {
00016 
00017 
00018 Mat*
00019 KmeansClustering(Mat* clusterInput, int k)
00020 {
00021     ILOG_VAR(Impala.Core.Matrix.KmeansClustering);
00022     Timer totalTimer;
00023 
00024     // initialize codebook by random drawing
00025     Mat* codebook = MatDrawRandomRows(clusterInput, k);
00026 
00027     double threshold = 1e-5;
00028     double diff = threshold + 1.0;
00029     std::vector<Real64> averageDistances;
00030     int n = MatNrRow(clusterInput);
00031     int d = MatNrCol(clusterInput);
00032     int* numberAssigned = new int[k];
00033     while(diff > threshold)
00034     {
00035         Timer iterTimer;
00036         // assignment will contain indices and distortion/distance
00037         Mat* assignment = VectorQuantize(clusterInput, codebook);
00038 
00039         Real64 averageDist = 0.0;
00040         for(int j = 0; j < n; j++)
00041         {
00042             averageDist += *MatE(assignment, j, 1);
00043         }
00044         averageDist = averageDist / MatNrRow(clusterInput);
00045         averageDistances.push_back(averageDist);
00046 
00047         // reset the codebook to all zeros, to prepare for update
00048         SetVal(codebook, 0.0);
00049 
00050         for(int i = 0; i < k; i++) numberAssigned[i] = 0;
00051 
00052         // sum the observations of the different clusters
00053         for(int j = 0; j < n; j++)
00054         {
00055             int cluster = static_cast<int>(*MatE(assignment, j, 0));
00056             numberAssigned[cluster]++;
00057             for(int i = 0; i < d; i++)
00058             {
00059                 *MatE(codebook, cluster, i) += *MatE(clusterInput, j, i);
00060             }
00061         }
00062 
00063         std::vector<int> usedClusters;
00064         usedClusters.reserve(MatNrRow(codebook));
00065         for(int cluster = 0; cluster < MatNrRow(codebook); cluster++)
00066         {
00067             if(numberAssigned[cluster] > 0)
00068             {
00069                 for(int i = 0; i < d; i++)
00070                 {
00071                     *MatE(codebook, cluster, i) /= numberAssigned[cluster];
00072                 }
00073                 usedClusters.push_back(cluster);
00074             }
00075         }
00076 
00077         if(usedClusters.size() < MatNrRow(codebook))
00078         {
00079             // one of the clusters has no elements: remove it
00080             Mat* temp = MatKeepSpecificRows(codebook, usedClusters);
00081             delete codebook;
00082             codebook = temp;
00083         }
00084 
00085         if(averageDistances.size() > 1)
00086         {
00087             diff = averageDistances[averageDistances.size()-2] - averageDistances[averageDistances.size()-1];
00088             ILOG_INFO("K-means progress: " << averageDistances[averageDistances.size()-2] << " " << averageDistances[averageDistances.size()-1] << " " << diff << " " << MatNrRow(codebook) << " iterTime=" << iterTimer.SplitTimeStr() << " totalTimer=" << totalTimer.SplitTimeStr());
00089         }
00090         delete assignment;
00091     }
00092     delete numberAssigned;
00093     return codebook;
00094 }
00095 
00096 
00097 } // namespace Matrix
00098 } // namespace Core
00099 } // namespace Impala
00100 
00101 #endif

Generated on Fri Mar 19 09:31:15 2010 for ImpalaSrc by  doxygen 1.5.1