Skip to content

Commit 174c475

Browse files
committed
hadd tweaks
1 parent 1f8dd9c commit 174c475

3 files changed

Lines changed: 11 additions & 10 deletions

File tree

include/xsimd/arch/xsimd_avx.hpp

Lines changed: 4 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -1067,14 +1067,10 @@ namespace xsimd
10671067
template <class A>
10681068
XSIMD_INLINE double reduce_add(batch<double, A> const& rhs, requires_arch<avx>) noexcept
10691069
{
1070-
// rhs = (x0, x1, x2, x3)
1071-
// tmp = (x2, x3, x0, x1)
1072-
__m256d tmp = _mm256_permute2f128_pd(rhs, rhs, 1);
1073-
// tmp = (x2+x0, x3+x1, -, -)
1074-
tmp = _mm256_add_pd(rhs, tmp);
1075-
// tmp = (x2+x0+x3+x1, -, -, -)
1076-
tmp = _mm256_hadd_pd(tmp, tmp);
1077-
return _mm_cvtsd_f64(_mm256_extractf128_pd(tmp, 0));
1070+
__m128d low, high;
1071+
detail::split_avx(rhs, low, high);
1072+
batch<double, sse4_2> blow(low), bhigh(high);
1073+
return reduce_add(blow+bhigh, sse4_2 {});
10781074
}
10791075
template <class A, class T, class = typename std::enable_if<std::is_integral<T>::value, void>::type>
10801076
XSIMD_INLINE T reduce_add(batch<T, A> const& self, requires_arch<avx>) noexcept

include/xsimd/arch/xsimd_avx512f.hpp

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1410,10 +1410,14 @@ namespace xsimd
14101410
template <class A>
14111411
XSIMD_INLINE double reduce_add(batch<double, A> const& rhs, requires_arch<avx512f>) noexcept
14121412
{
1413+
#if defined(__INTEL_COMPILER)
1414+
return _mm512_reduce_add_pd(rhs);
1415+
#else
14131416
__m256d tmp1 = _mm512_extractf64x4_pd(rhs, 1);
14141417
__m256d tmp2 = _mm512_extractf64x4_pd(rhs, 0);
14151418
__m256d res1 = _mm256_add_pd(tmp1, tmp2);
14161419
return reduce_add(batch<double, avx2>(res1), avx2 {});
1420+
#endif
14171421
}
14181422
template <class A, class T, class = typename std::enable_if<std::is_integral<T>::value, void>::type>
14191423
XSIMD_INLINE T reduce_add(batch<T, A> const& self, requires_arch<avx512f>) noexcept

include/xsimd/arch/xsimd_sse3.hpp

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -53,8 +53,9 @@ namespace xsimd
5353
template <class A>
5454
XSIMD_INLINE double reduce_add(batch<double, A> const& self, requires_arch<sse3>) noexcept
5555
{
56-
__m128d tmp0 = _mm_hadd_pd(self, self);
57-
return _mm_cvtsd_f64(tmp0);
56+
double low = _mm_cvtsd_f64(self); // get lower element
57+
double high = _mm_cvtsd_f64(_mm_shuffle_pd(self, self, 0x1)); // get upper element
58+
return low + high;
5859
}
5960

6061
}

0 commit comments

Comments
 (0)