Impala Documentation

00001 #ifndef Impala_Core_Vector_Chi2DistanceSSE_h
00002 #define Impala_Core_Vector_Chi2DistanceSSE_h
00003 
00004 #include "Core/Vector/VectorTem.h"
00005 // for includes, defines
00006 #include "Core/Matrix/MatNorm2DistSSE.h"
00007 
00008 namespace Impala
00009 {
00010 namespace Core
00011 {
00012 namespace Vector
00013 {
00014 
00015 
00016 double
00017 Chi2DistanceSSE(const VectorTem<double>& v1, const VectorTem<double>& v2)
00018 {
00019     // please do not add ILOG_VAR to this function
00020     int length=v1.Size();
00021     double* v1Copy = 0;
00022     double* v2Copy = 0;
00023     const double* v1Data = v1.GetData();
00024     const double* v2Data = v2.GetData();
00025     if(!POINTER_ALIGNED(v1Data))
00026     {
00027         // make a copy
00028         v1Copy = new double[length];
00029         for(int i = 0; i < length; i++)
00030             v1Copy[i] = v1Data[i];
00031         v1Data = v1Copy;
00032     }
00033     if(!POINTER_ALIGNED(v2Data))
00034     {
00035         // make a copy
00036         v2Copy = new double[length];
00037         for(int i = 0; i < length; i++)
00038             v2Copy[i] = v2Data[i];
00039         v2Data = v2Copy;
00040     }
00041     __m128d* baseA = (__m128d*)v1Data;
00042     __m128d* baseB = (__m128d*)v2Data;
00043     const int SSELength = IntAlignDown(length, 2) / 2;
00044     const __m128d zero = _mm_setzero_pd();
00045     __m128d accu = _mm_setzero_pd();
00046     for (unsigned int k = 0; k < SSELength; k++)
00047     {
00048         __m128d As = baseA[k];
00049         __m128d Bs = baseB[k];
00050         __m128d tmp = _mm_sub_pd(As, Bs);
00051         // sum += (As - Bs) * (As - Bs) / (As + Bs);
00052         tmp =_mm_div_pd(_mm_mul_pd(tmp, tmp), _mm_add_pd(As, Bs));
00053         // fix the NaN for 0/0
00054         tmp = _mm_max_pd(tmp, zero);
00055         accu = _mm_add_pd(accu, tmp);
00056     }
00057     // add the components of sum together
00058     __m128d shuffle = _mm_shuffle_pd(accu, accu, _MM_SHUFFLE2(0, 1));
00059     __m128d result  = _mm_add_pd(accu, shuffle) ;
00060     double accumulator = _mm_cvtsd_f64(result);
00061 
00062     for(int i=SSELength*2 ; i<length ; i++)
00063     {
00064         double sum = v1[i] + v2[i];
00065         if(sum)
00066             accumulator += (v1[i] - v2[i]) * (v1[i] - v2[i]) / sum;
00067     }
00068     if(v1Copy) delete[] v1Copy;
00069     if(v2Copy) delete[] v2Copy;
00070 
00071     // make it between 0..1, instead of 0..2 (this statement is only true if the input vectors sum to one)
00072     return accumulator / 2.0;
00073 }
00074 
00075 
00076 } // namespace Vector
00077 } // namespace Core
00078 } // namespace Impala
00079 
00080 
00081 #endif
Chi2DistanceSSE.h