From 5403fbd8b2a4f42e9a9deed923d3017d449b3ab9 Mon Sep 17 00:00:00 2001 From: Brian Popow Date: Wed, 24 Nov 2021 12:34:34 +0100 Subject: [PATCH] Add better version of ReduceSum for Vector 256 --- src/ImageSharp/Common/Helpers/Numerics.cs | 13 +++++++++---- 1 file changed, 9 insertions(+), 4 deletions(-) diff --git a/src/ImageSharp/Common/Helpers/Numerics.cs b/src/ImageSharp/Common/Helpers/Numerics.cs index 9dc13079d..fa0af823d 100644 --- a/src/ImageSharp/Common/Helpers/Numerics.cs +++ b/src/ImageSharp/Common/Helpers/Numerics.cs @@ -828,11 +828,16 @@ namespace SixLabors.ImageSharp [MethodImpl(MethodImplOptions.AggressiveInlining)] public static int ReduceSum(Vector256 accumulator) { - Vector128 vec0 = Avx2.ExtractVector128(accumulator, 0); - Vector128 vec1 = Avx2.ExtractVector128(accumulator, 1); - Vector128 sum128 = Sse2.Add(vec0, vec1); + // Add upper lane to lower lane. + Vector128 vsum = Sse2.Add(accumulator.GetLower(), accumulator.GetUpper()); - return ReduceSum(sum128); + // Add odd to even. + vsum = Sse2.Add(vsum, Sse2.Shuffle(vsum, 0b_11_11_01_01)); + + // Add high to low. + vsum = Sse2.Add(vsum, Sse2.Shuffle(vsum, 0b_11_10_11_10)); + + return Sse2.ConvertToInt32(vsum); } ///