template<class ArrayT>
K-means clustering. Matrix m is clustered, where each row is a vector. Size of m is nrVectors x nrDimensions (rows are contiguous in memory) Clusters are stored in c, size of c is nrClusters x nrDimensions Mapping of vector id's to cluster id's is stored in imap, imap contains nrVectors elements. numloop: max number of loops. if (cIsInitialized) c contains the starting points for kmeans else c is assigned some data vectors. clustersFound indicates the number of clusters actually found vectorsInC contains the number of vectors mapped onto each cluster, and therefor has nrClusters elements Definition at line 61 of file MatKMeans.h. References MatE(), MatNrCol(), MatNrRow(), my_vec_distance(), and VecE(). Referenced by Impala::Core::Array::ColorSegmentation(). 00064 { 00065 typedef typename ArrayT::StorType StorT; 00066 typedef typename ArrayT::ArithType ArithT; 00067 00068 int nrVectors = MatNrRow(m); 00069 int nrDimensions = MatNrCol(m); 00070 int nrClusters = MatNrRow(c); 00071 00072 int i,j; 00073 StorT* cPtr; 00074 StorT* mPtr; 00075 double* imapPtr = VecE(imap, 0); 00076 00077 if (!cIsInitialized) 00078 { 00079 // the cluster matrix is not initialized, initialize it with some vectors. 00080 int steps = nrVectors / nrClusters; 00081 cPtr = MatE(c, 0, 0); 00082 for (i=0 ; i<nrClusters ; i++) 00083 { 00084 mPtr = MatE(m, i*steps, 0); 00085 for (j=0 ; j<nrDimensions ; j++) 00086 { 00087 *cPtr++ = *mPtr++; 00088 } 00089 } 00090 } 00091 00092 bool contin = true; 00093 ArithT err = 0, errOld = 0; 00094 int emptyclust = 0; 00095 StorT* nextC = new StorT[nrClusters*nrDimensions]; 00096 int* nrInNextC = new int[nrClusters]; 00097 StorT* nextPtr; 00098 while (contin) 00099 { 00100 emptyclust = 0; 00101 err = 0; 00102 contin = false; 00103 nextPtr = nextC; 00104 for (i=0 ; i<nrClusters ; i++) 00105 { 00106 nrInNextC[i] = 0; 00107 for (j=0 ; j<nrDimensions ; j++) 00108 *nextPtr++ = 0; 00109 } 00110 00111 for (i=0 ; i<nrVectors ; i++) 00112 { 00113 int myClus = 0; 00114 ArithT d = 0; 00115 mPtr = MatE(m, i, 0); 00116 ArithT dm = my_vec_distance(mPtr, MatE(c, 0, 0), nrDimensions); 00117 for (j=1 ; j<nrClusters ; j++) 00118 { 00119 //d = vec_distance(mPtr, MatE(c, j, 0), nrDimensions, dm); 00120 d = my_vec_distance(mPtr, MatE(c, j, 0), nrDimensions); 00121 if (d<dm) 00122 { 00123 dm = d; 00124 myClus = j; 00125 } 00126 } 00127 imapPtr[i] = myClus; 00128 err += dm; 00129 00130 // store this vector's information in nextC for next iteration 00131 nrInNextC[myClus] += 1; 00132 nextPtr = nextC + myClus*nrDimensions; 00133 for (j=0 ; j<nrDimensions ; j++) 00134 *nextPtr++ += *mPtr++; 00135 } 00136 00137 numloop--; 00138 // recalculate the position of the cluster vectors 00139 cPtr = MatE(c, 0, 0); 00140 nextPtr = nextC; 00141 for (i=0 ; i<nrClusters ; i++) 00142 { 00143 if (nrInNextC[i] > 0) 00144 { 00145 for (j=0 ; j<nrDimensions ; j++) 00146 *cPtr++ = *nextPtr++ / nrInNextC[i]; 00147 } 00148 else 00149 { 00150 for (j=0 ; j<nrDimensions ; j++) 00151 *cPtr++ = *nextPtr++; 00152 emptyclust++ ; 00153 printf("empty cluster: %d\n",i); 00154 } 00155 } 00156 00157 //printf("%d, ",numloop); 00158 /* 00159 cPtr = c; 00160 printf("\nloop %d\n", numloop); 00161 for (i=0 ; i<nrClusters ; i++) 00162 { 00163 printf("c %d : ", i); 00164 for (j=0 ; j<nrDimensions ; j++) 00165 printf("%f ", *cPtr++); 00166 printf("\n"); 00167 } 00168 */ 00169 // quit if the numLoops is reached or if the error stays constant 00170 if (err != errOld && numloop > 0) 00171 contin = true; 00172 00173 errOld = err; 00174 } // end while(contin) 00175 //printf("\n"); 00176 00177 for (i=0 ; i<nrClusters ; i++) 00178 //vectorsInC[i] = nrInNextC[i]; 00179 *VecE(vectorsInC, i) = nrInNextC[i]; 00180 clustersFound = nrClusters - emptyclust; 00181 00182 delete nextC; 00183 delete nrInNextC; 00184 }
Here is the call graph for this function:
|