00001 #ifndef Impala_Core_Vector_Chi2DistanceSSE_h
00002 #define Impala_Core_Vector_Chi2DistanceSSE_h
00003
00004 #include "Core/Vector/VectorTem.h"
00005
00006 #include "Core/Matrix/MatNorm2DistSSE.h"
00007
00008 namespace Impala
00009 {
00010 namespace Core
00011 {
00012 namespace Vector
00013 {
00014
00015
00016 double
00017 Chi2DistanceSSE(const VectorTem<double>& v1, const VectorTem<double>& v2)
00018 {
00019
00020 int length=v1.Size();
00021 double* v1Copy = 0;
00022 double* v2Copy = 0;
00023 const double* v1Data = v1.GetData();
00024 const double* v2Data = v2.GetData();
00025 if(!POINTER_ALIGNED(v1Data))
00026 {
00027
00028 v1Copy = new double[length];
00029 for(int i = 0; i < length; i++)
00030 v1Copy[i] = v1Data[i];
00031 v1Data = v1Copy;
00032 }
00033 if(!POINTER_ALIGNED(v2Data))
00034 {
00035
00036 v2Copy = new double[length];
00037 for(int i = 0; i < length; i++)
00038 v2Copy[i] = v2Data[i];
00039 v2Data = v2Copy;
00040 }
00041 __m128d* baseA = (__m128d*)v1Data;
00042 __m128d* baseB = (__m128d*)v2Data;
00043 const int SSELength = IntAlignDown(length, 2) / 2;
00044 const __m128d zero = _mm_setzero_pd();
00045 __m128d accu = _mm_setzero_pd();
00046 for (unsigned int k = 0; k < SSELength; k++)
00047 {
00048 __m128d As = baseA[k];
00049 __m128d Bs = baseB[k];
00050 __m128d tmp = _mm_sub_pd(As, Bs);
00051
00052 tmp =_mm_div_pd(_mm_mul_pd(tmp, tmp), _mm_add_pd(As, Bs));
00053
00054 tmp = _mm_max_pd(tmp, zero);
00055 accu = _mm_add_pd(accu, tmp);
00056 }
00057
00058 __m128d shuffle = _mm_shuffle_pd(accu, accu, _MM_SHUFFLE2(0, 1));
00059 __m128d result = _mm_add_pd(accu, shuffle) ;
00060 double accumulator = _mm_cvtsd_f64(result);
00061
00062 for(int i=SSELength*2 ; i<length ; i++)
00063 {
00064 double sum = v1[i] + v2[i];
00065 if(sum)
00066 accumulator += (v1[i] - v2[i]) * (v1[i] - v2[i]) / sum;
00067 }
00068 if(v1Copy) delete[] v1Copy;
00069 if(v2Copy) delete[] v2Copy;
00070
00071
00072 return accumulator / 2.0;
00073 }
00074
00075
00076 }
00077 }
00078 }
00079
00080
00081 #endif