diff --git a/include/ircd/math/mean.h b/include/ircd/math/mean.h index c24da44d6..292d33694 100644 --- a/include/ircd/math/mean.h +++ b/include/ircd/math/mean.h @@ -13,35 +13,39 @@ namespace ircd::math { - template - typename std::enable_if(), T>::type - mean(const vector_view &); + template + typename std::enable_if(), R>::type + mean(const vector_view); - template - typename std::enable_if(), simd::lane_type>::type - mean(const vector_view &); + template + typename std::enable_if(), simd::lane_type>::type + mean(const vector_view); } -template -inline typename std::enable_if(), ircd::simd::lane_type>::type -ircd::math::mean(const vector_view &a) +template +inline typename std::enable_if(), ircd::simd::lane_type>::type +ircd::math::mean(const vector_view a) { - using value_type = simd::lane_type; - - const auto &sum + R acc {0}; + simd::for_each(a.data(), u64x2{0, a.size()}, [&acc] + (const auto block, const auto mask) { - simd::accumulate(a.data(), u64x2{0, a.size()}, T{0}, [] - (auto &ret, const auto block, const auto mask) - { - ret += block; - }) - }; + const R dp + ( + simd::lane_cast(block) + ); - value_type num {0}; - for(size_t i{0}; i < simd::lanes(); ++i) - num += sum[i]; + acc += dp; + }); - const auto &den + auto num(acc[0]); + for(uint i(1); i < simd::lanes(); ++i) + num += acc[i]; + + const auto den { a.size() * simd::lanes() }; @@ -50,12 +54,12 @@ ircd::math::mean(const vector_view &a) return num; } -template -inline typename std::enable_if(), T>::type -ircd::math::mean(const vector_view &a) +template +inline typename std::enable_if(), R>::type +ircd::math::mean(const vector_view a) { - T ret{0}; - + R ret{0}; size_t i{0}; while(i < a.size()) ret += a[i++];