00001 #ifndef Impala_Core_Vector_Ssaxpy_h
00002 #define Impala_Core_Vector_Ssaxpy_h
00003
00004 #include "Core/Vector/VectorTem.h"
00005 #ifdef SSE_USED
00006 #include <xmmintrin.h>
00007 #include <stdint.h>
00008 #ifndef POINTER_ALIGNED
00009 #define POINTER_ALIGNED(x) (!((( intptr_t)x) & 0xF))
00010 #endif
00011 #endif // SSE_USED
00012
00013 namespace Impala
00014 {
00015 namespace Core
00016 {
00017 namespace Vector
00018 {
00019
00020
00021 #ifdef SSE_USED
00022
00023 void
00024 SaxpySSE(int elements, float alpha, float* C, float* D)
00025 {
00026 if(!POINTER_ALIGNED(C)) return;
00027 if(!POINTER_ALIGNED(D)) return;
00028 const int SSELength = IntAlignUp(elements, 4) / 4;
00029 __m128 *pC = (__m128*) C;
00030 __m128 *pD = (__m128*) D;
00031 const __m128 Alpha = _mm_set1_ps(alpha);
00032 #pragma omp parallel for
00033 for(int i = 0; i < SSELength; i++)
00034 {
00035 pD[i] = _mm_add_ps(_mm_mul_ps(Alpha, pC[i]), pD[i]);
00036 }
00037 }
00038
00039 #endif // SSE_USED
00040
00041 }
00042 }
00043 }
00044
00045 #endif