Home || Architecture || Video Search || Visual Search || Scripts || Applications || Important Messages || OGL || Src

MatKMeans.h

Go to the documentation of this file.
00001 #ifndef Impala_Core_Matrix_MatKMeans_h
00002 #define Impala_Core_Matrix_MatKMeans_h
00003 
00004 #include "Core/Matrix/MatFunc.h"
00005 #include "Core/Matrix/Vecs.h"
00006 
00007 namespace Impala
00008 {
00009 namespace Core
00010 {
00011 namespace Matrix
00012 {
00013 
00014 
00015 // calculate vec distance
00016 inline double
00017 my_vec_distance(double* v1, double* v2, int nElem) 
00018 {
00019     double s=0,tmp ;
00020     for (int i=0;i<nElem;i++) 
00021         s += (tmp = v1[i] - v2[i],tmp*tmp);
00022     return sqrt(s);
00023 }
00024 
00025 // calculate vec distance, given a current minimum
00026 inline double
00027 my_vec_distance(double* v1, double* v2, int nElem, double currentMin) 
00028 {
00029     if (currentMin>0)
00030     {
00031         double currentMinSquared = currentMin*currentMin;
00032         double s=0,tmp ;
00033         int i=0 ;
00034         while (s<currentMinSquared && i<nElem)
00035         {
00036                 s += (tmp = v1[i] - v2[i],tmp*tmp);
00037                 i++;
00038         }
00039         return i==nElem?sqrt(s):currentMin;
00040     }
00041     return currentMin;  
00042 }
00043 
00059 template<class ArrayT>
00060 void
00061 MatKMeans(ArrayT* m, ArrayT* c, VecScalarReal64* imap,
00062           int numloop, bool cIsInitialized, int& clustersFound,
00063           VecScalarInt32* vectorsInC)
00064 {
00065     typedef typename ArrayT::StorType StorT;
00066     typedef typename ArrayT::ArithType ArithT;
00067 
00068     int nrVectors = MatNrRow(m);
00069     int nrDimensions = MatNrCol(m);
00070     int nrClusters = MatNrRow(c); 
00071 
00072     int i,j;
00073     StorT* cPtr;
00074     StorT* mPtr;
00075     double* imapPtr = VecE(imap, 0);
00076 
00077     if (!cIsInitialized)
00078     {
00079         // the cluster matrix is not initialized, initialize it with some vectors.
00080         int steps = nrVectors / nrClusters;
00081         cPtr = MatE(c, 0, 0);
00082         for (i=0 ; i<nrClusters ; i++)
00083         {
00084             mPtr = MatE(m, i*steps, 0);
00085             for (j=0 ; j<nrDimensions ; j++)
00086             {
00087                 *cPtr++ = *mPtr++;
00088             }
00089         }
00090     } 
00091 
00092     bool contin = true;
00093     ArithT err = 0, errOld = 0;
00094     int emptyclust = 0;
00095     StorT* nextC = new StorT[nrClusters*nrDimensions];
00096     int* nrInNextC = new int[nrClusters];
00097     StorT* nextPtr;
00098     while (contin)
00099     {
00100         emptyclust = 0;
00101         err = 0;
00102         contin = false;
00103         nextPtr = nextC;
00104         for (i=0 ; i<nrClusters ; i++)
00105         {
00106             nrInNextC[i] = 0;
00107             for (j=0 ; j<nrDimensions ; j++)
00108                 *nextPtr++ = 0;
00109         }
00110 
00111         for (i=0 ; i<nrVectors ; i++)
00112         {
00113             int myClus = 0;
00114             ArithT d = 0;
00115             mPtr = MatE(m, i, 0);
00116             ArithT dm = my_vec_distance(mPtr, MatE(c, 0, 0), nrDimensions);
00117             for (j=1 ; j<nrClusters ; j++)
00118             {
00119                 //d = vec_distance(mPtr, MatE(c, j, 0), nrDimensions, dm);
00120                 d = my_vec_distance(mPtr, MatE(c, j, 0), nrDimensions);
00121                 if (d<dm)
00122                 {
00123                     dm = d;
00124                     myClus = j;
00125                 }
00126             }
00127             imapPtr[i] = myClus;
00128             err += dm;
00129 
00130             // store this vector's information in nextC for next iteration
00131             nrInNextC[myClus] += 1;
00132             nextPtr = nextC + myClus*nrDimensions;
00133             for (j=0 ; j<nrDimensions ; j++)
00134                 *nextPtr++ += *mPtr++;
00135         }
00136         
00137         numloop--;
00138         // recalculate the position of the cluster vectors
00139         cPtr = MatE(c, 0, 0);
00140         nextPtr = nextC;
00141         for (i=0 ; i<nrClusters ; i++)
00142         {
00143             if (nrInNextC[i] > 0)
00144             {
00145                 for (j=0 ; j<nrDimensions ; j++)
00146                     *cPtr++ = *nextPtr++ / nrInNextC[i];
00147             }
00148             else
00149             {
00150                 for (j=0 ; j<nrDimensions ; j++)
00151                     *cPtr++ = *nextPtr++;
00152                 emptyclust++ ;
00153                 printf("empty cluster: %d\n",i);
00154             }
00155         }
00156 
00157         //printf("%d, ",numloop);
00158         /*
00159         cPtr = c;
00160         printf("\nloop %d\n", numloop);
00161         for (i=0 ; i<nrClusters ; i++)
00162         {
00163             printf("c %d : ", i);
00164             for (j=0 ; j<nrDimensions ; j++)
00165                 printf("%f ", *cPtr++);
00166             printf("\n");
00167         }
00168         */
00169         // quit if the numLoops is reached or if the error stays constant
00170         if (err != errOld && numloop > 0)
00171             contin = true;
00172 
00173         errOld = err;
00174     } // end while(contin)
00175     //printf("\n");
00176 
00177     for (i=0 ; i<nrClusters ; i++)
00178         //vectorsInC[i] = nrInNextC[i];
00179         *VecE(vectorsInC, i) = nrInNextC[i];
00180     clustersFound = nrClusters - emptyclust;
00181 
00182     delete nextC;
00183     delete nrInNextC;
00184 }
00185 
00186 } // namespace Matrix
00187 } // namespace Core
00188 } // namespace Impala
00189 
00190 #endif

Generated on Fri Mar 19 09:31:15 2010 for ImpalaSrc by  doxygen 1.5.1