Home || Visual Search || Applications || Architecture || Important Messages || OGL || Src

template<class ArrayT>
void Impala::Core::Matrix::MatKMeans ( ArrayT *  m,
ArrayT *  c,
VecScalarReal64 *  imap,
int  numloop,
bool  cIsInitialized,
int &  clustersFound,
VecScalarInt32 *  vectorsInC 
)

K-means clustering.

Matrix m is clustered, where each row is a vector. Size of m is nrVectors x nrDimensions (rows are contiguous in memory) Clusters are stored in c, size of c is nrClusters x nrDimensions Mapping of vector id's to cluster id's is stored in imap, imap contains nrVectors elements.

numloop: max number of loops. if (cIsInitialized) c contains the starting points for kmeans else c is assigned some data vectors. clustersFound indicates the number of clusters actually found vectorsInC contains the number of vectors mapped onto each cluster, and therefor has nrClusters elements

Definition at line 61 of file MatKMeans.h.

References MatE(), MatNrCol(), MatNrRow(), my_vec_distance(), and VecE().

Referenced by Impala::Core::Array::ColorSegmentation().

00064 {
00065     typedef typename ArrayT::StorType StorT;
00066     typedef typename ArrayT::ArithType ArithT;
00067 
00068     int nrVectors = MatNrRow(m);
00069     int nrDimensions = MatNrCol(m);
00070     int nrClusters = MatNrRow(c); 
00071 
00072     int i,j;
00073     StorT* cPtr;
00074     StorT* mPtr;
00075     double* imapPtr = VecE(imap, 0);
00076 
00077     if (!cIsInitialized)
00078     {
00079         // the cluster matrix is not initialized, initialize it with some vectors.
00080         int steps = nrVectors / nrClusters;
00081         cPtr = MatE(c, 0, 0);
00082         for (i=0 ; i<nrClusters ; i++)
00083         {
00084             mPtr = MatE(m, i*steps, 0);
00085             for (j=0 ; j<nrDimensions ; j++)
00086             {
00087                 *cPtr++ = *mPtr++;
00088             }
00089         }
00090     } 
00091 
00092     bool contin = true;
00093     ArithT err = 0, errOld = 0;
00094     int emptyclust = 0;
00095     StorT* nextC = new StorT[nrClusters*nrDimensions];
00096     int* nrInNextC = new int[nrClusters];
00097     StorT* nextPtr;
00098     while (contin)
00099     {
00100         emptyclust = 0;
00101         err = 0;
00102         contin = false;
00103         nextPtr = nextC;
00104         for (i=0 ; i<nrClusters ; i++)
00105         {
00106             nrInNextC[i] = 0;
00107             for (j=0 ; j<nrDimensions ; j++)
00108                 *nextPtr++ = 0;
00109         }
00110 
00111         for (i=0 ; i<nrVectors ; i++)
00112         {
00113             int myClus = 0;
00114             ArithT d = 0;
00115             mPtr = MatE(m, i, 0);
00116             ArithT dm = my_vec_distance(mPtr, MatE(c, 0, 0), nrDimensions);
00117             for (j=1 ; j<nrClusters ; j++)
00118             {
00119                 //d = vec_distance(mPtr, MatE(c, j, 0), nrDimensions, dm);
00120                 d = my_vec_distance(mPtr, MatE(c, j, 0), nrDimensions);
00121                 if (d<dm)
00122                 {
00123                     dm = d;
00124                     myClus = j;
00125                 }
00126             }
00127             imapPtr[i] = myClus;
00128             err += dm;
00129 
00130             // store this vector's information in nextC for next iteration
00131             nrInNextC[myClus] += 1;
00132             nextPtr = nextC + myClus*nrDimensions;
00133             for (j=0 ; j<nrDimensions ; j++)
00134                 *nextPtr++ += *mPtr++;
00135         }
00136         
00137         numloop--;
00138         // recalculate the position of the cluster vectors
00139         cPtr = MatE(c, 0, 0);
00140         nextPtr = nextC;
00141         for (i=0 ; i<nrClusters ; i++)
00142         {
00143             if (nrInNextC[i] > 0)
00144             {
00145                 for (j=0 ; j<nrDimensions ; j++)
00146                     *cPtr++ = *nextPtr++ / nrInNextC[i];
00147             }
00148             else
00149             {
00150                 for (j=0 ; j<nrDimensions ; j++)
00151                     *cPtr++ = *nextPtr++;
00152                 emptyclust++ ;
00153                 printf("empty cluster: %d\n",i);
00154             }
00155         }
00156 
00157         //printf("%d, ",numloop);
00158         /*
00159         cPtr = c;
00160         printf("\nloop %d\n", numloop);
00161         for (i=0 ; i<nrClusters ; i++)
00162         {
00163             printf("c %d : ", i);
00164             for (j=0 ; j<nrDimensions ; j++)
00165                 printf("%f ", *cPtr++);
00166             printf("\n");
00167         }
00168         */
00169         // quit if the numLoops is reached or if the error stays constant
00170         if (err != errOld && numloop > 0)
00171             contin = true;
00172 
00173         errOld = err;
00174     } // end while(contin)
00175     //printf("\n");
00176 
00177     for (i=0 ; i<nrClusters ; i++)
00178         //vectorsInC[i] = nrInNextC[i];
00179         *VecE(vectorsInC, i) = nrInNextC[i];
00180     clustersFound = nrClusters - emptyclust;
00181 
00182     delete nextC;
00183     delete nrInNextC;
00184 }

Here is the call graph for this function:


Generated on Thu Jan 13 09:20:12 2011 for ImpalaSrc by  doxygen 1.5.1