Definition at line 17 of file Chi2DistanceSSE.h. References Impala::Core::Vector::VectorTem< ElemT >::GetData(), Impala::IntAlignDown(), POINTER_ALIGNED, and Impala::Core::Vector::VectorTem< ElemT >::Size(). Referenced by Impala::Application::Precompute::ComputeMatrix(). 00018 { 00019 // please do not add ILOG_VAR to this function 00020 int length=v1.Size(); 00021 double* v1Copy = 0; 00022 double* v2Copy = 0; 00023 const double* v1Data = v1.GetData(); 00024 const double* v2Data = v2.GetData(); 00025 if(!POINTER_ALIGNED(v1Data)) 00026 { 00027 // make a copy 00028 v1Copy = new double[length]; 00029 for(int i = 0; i < length; i++) 00030 v1Copy[i] = v1Data[i]; 00031 v1Data = v1Copy; 00032 } 00033 if(!POINTER_ALIGNED(v2Data)) 00034 { 00035 // make a copy 00036 v2Copy = new double[length]; 00037 for(int i = 0; i < length; i++) 00038 v2Copy[i] = v2Data[i]; 00039 v2Data = v2Copy; 00040 } 00041 __m128d* baseA = (__m128d*)v1Data; 00042 __m128d* baseB = (__m128d*)v2Data; 00043 const int SSELength = IntAlignDown(length, 2) / 2; 00044 const __m128d zero = _mm_setzero_pd(); 00045 __m128d accu = _mm_setzero_pd(); 00046 for (unsigned int k = 0; k < SSELength; k++) 00047 { 00048 __m128d As = baseA[k]; 00049 __m128d Bs = baseB[k]; 00050 __m128d tmp = _mm_sub_pd(As, Bs); 00051 // sum += (As - Bs) * (As - Bs) / (As + Bs); 00052 tmp =_mm_div_pd(_mm_mul_pd(tmp, tmp), _mm_add_pd(As, Bs)); 00053 // fix the NaN for 0/0 00054 tmp = _mm_max_pd(tmp, zero); 00055 accu = _mm_add_pd(accu, tmp); 00056 } 00057 // add the components of sum together 00058 __m128d shuffle = _mm_shuffle_pd(accu, accu, _MM_SHUFFLE2(0, 1)); 00059 __m128d result = _mm_add_pd(accu, shuffle) ; 00060 double accumulator = _mm_cvtsd_f64(result); 00061 00062 for(int i=SSELength*2 ; i<length ; i++) 00063 { 00064 double sum = v1[i] + v2[i]; 00065 if(sum) 00066 accumulator += (v1[i] - v2[i]) * (v1[i] - v2[i]) / sum; 00067 } 00068 if(v1Copy) delete[] v1Copy; 00069 if(v2Copy) delete[] v2Copy; 00070 00071 // make it between 0..1, instead of 0..2 (this statement is only true if the input vectors sum to one) 00072 return accumulator / 2.0; 00073 }
Here is the call graph for this function:
|