Home || Architecture || Video Search || Visual Search || Scripts || Applications || Important Messages || OGL || Src

Mat* Impala::Core::Matrix::KmeansClustering ( Mat *  clusterInput,
int  k 
)

Definition at line 19 of file KmeansClustering.h.

References ILOG_INFO, ILOG_VAR, MatDrawRandomRows(), MatE(), MatKeepSpecificRows(), MatNrCol(), MatNrRow(), Impala::Core::Array::SetVal(), Impala::Timer::SplitTimeStr(), and VectorQuantize().

Referenced by Impala::Application::mainConstructCodebook().

00020 {
00021     ILOG_VAR(Impala.Core.Matrix.KmeansClustering);
00022     Timer totalTimer;
00023 
00024     // initialize codebook by random drawing
00025     Mat* codebook = MatDrawRandomRows(clusterInput, k);
00026 
00027     double threshold = 1e-5;
00028     double diff = threshold + 1.0;
00029     std::vector<Real64> averageDistances;
00030     int n = MatNrRow(clusterInput);
00031     int d = MatNrCol(clusterInput);
00032     int* numberAssigned = new int[k];
00033     while(diff > threshold)
00034     {
00035         Timer iterTimer;
00036         // assignment will contain indices and distortion/distance
00037         Mat* assignment = VectorQuantize(clusterInput, codebook);
00038 
00039         Real64 averageDist = 0.0;
00040         for(int j = 0; j < n; j++)
00041         {
00042             averageDist += *MatE(assignment, j, 1);
00043         }
00044         averageDist = averageDist / MatNrRow(clusterInput);
00045         averageDistances.push_back(averageDist);
00046 
00047         // reset the codebook to all zeros, to prepare for update
00048         SetVal(codebook, 0.0);
00049 
00050         for(int i = 0; i < k; i++) numberAssigned[i] = 0;
00051 
00052         // sum the observations of the different clusters
00053         for(int j = 0; j < n; j++)
00054         {
00055             int cluster = static_cast<int>(*MatE(assignment, j, 0));
00056             numberAssigned[cluster]++;
00057             for(int i = 0; i < d; i++)
00058             {
00059                 *MatE(codebook, cluster, i) += *MatE(clusterInput, j, i);
00060             }
00061         }
00062 
00063         std::vector<int> usedClusters;
00064         usedClusters.reserve(MatNrRow(codebook));
00065         for(int cluster = 0; cluster < MatNrRow(codebook); cluster++)
00066         {
00067             if(numberAssigned[cluster] > 0)
00068             {
00069                 for(int i = 0; i < d; i++)
00070                 {
00071                     *MatE(codebook, cluster, i) /= numberAssigned[cluster];
00072                 }
00073                 usedClusters.push_back(cluster);
00074             }
00075         }
00076 
00077         if(usedClusters.size() < MatNrRow(codebook))
00078         {
00079             // one of the clusters has no elements: remove it
00080             Mat* temp = MatKeepSpecificRows(codebook, usedClusters);
00081             delete codebook;
00082             codebook = temp;
00083         }
00084 
00085         if(averageDistances.size() > 1)
00086         {
00087             diff = averageDistances[averageDistances.size()-2] - averageDistances[averageDistances.size()-1];
00088             ILOG_INFO("K-means progress: " << averageDistances[averageDistances.size()-2] << " " << averageDistances[averageDistances.size()-1] << " " << diff << " " << MatNrRow(codebook) << " iterTime=" << iterTimer.SplitTimeStr() << " totalTimer=" << totalTimer.SplitTimeStr());
00089         }
00090         delete assignment;
00091     }
00092     delete numberAssigned;
00093     return codebook;
00094 }

Here is the call graph for this function:


Generated on Fri Mar 19 11:15:53 2010 for ImpalaSrc by  doxygen 1.5.1