Home || Visual Search || Applications || Architecture || Important Messages || OGL || Src

Saxpy.h

Go to the documentation of this file.
00001 #ifndef Impala_Core_Vector_Ssaxpy_h
00002 #define Impala_Core_Vector_Ssaxpy_h
00003 
00004 #include "Core/Vector/VectorTem.h"
00005 #ifdef SSE_USED
00006 #include <xmmintrin.h>
00007 #include <stdint.h>
00008 #ifndef POINTER_ALIGNED
00009 #define POINTER_ALIGNED(x) (!((( intptr_t)x) & 0xF))
00010 #endif
00011 #endif // SSE_USED
00012 
00013 namespace Impala
00014 {
00015 namespace Core
00016 {
00017 namespace Vector
00018 {
00019 
00020 
00021 #ifdef SSE_USED
00022 
00023 void
00024 SaxpySSE(int elements, float alpha, float* C, float* D)
00025 {
00026     if(!POINTER_ALIGNED(C)) return;
00027     if(!POINTER_ALIGNED(D)) return;
00028     const int SSELength = IntAlignUp(elements, 4) / 4;
00029     __m128 *pC = (__m128*) C;
00030     __m128 *pD = (__m128*) D;
00031     const __m128 Alpha = _mm_set1_ps(alpha);
00032     #pragma omp parallel for
00033     for(int i = 0; i < SSELength; i++)
00034     {
00035         pD[i] = _mm_add_ps(_mm_mul_ps(Alpha, pC[i]), pD[i]);
00036     }
00037 }
00038 
00039 #endif // SSE_USED
00040 
00041 } // namespace Vector
00042 } // namespace Core
00043 } // namespace Impala
00044 
00045 #endif

Generated on Thu Jan 13 09:04:44 2011 for ImpalaSrc by  doxygen 1.5.1