From 0a9c407ed46e22789d962eae88efced5f8e67386 Mon Sep 17 00:00:00 2001 From: James Jackson-South Date: Sat, 31 May 2025 00:39:32 +1000 Subject: [PATCH] Add explicit AdvSimd to MultiplyAddAdjacent --- .../Common/Helpers/Vector128Utilities.cs | 53 ++++++++++++++----- 1 file changed, 40 insertions(+), 13 deletions(-) diff --git a/src/ImageSharp/Common/Helpers/Vector128Utilities.cs b/src/ImageSharp/Common/Helpers/Vector128Utilities.cs index 3076788d1..a96f5fa73 100644 --- a/src/ImageSharp/Common/Helpers/Vector128Utilities.cs +++ b/src/ImageSharp/Common/Helpers/Vector128Utilities.cs @@ -436,6 +436,20 @@ internal static class Vector128_ return Vector128.Narrow(prodLo, prodHi); } + /// + /// Multiply packed signed 16-bit integers in and , producing + /// intermediate signed 32-bit integers. Horizontally add adjacent pairs of intermediate 32-bit integers, and + /// pack the results. + /// + /// + /// The first vector containing packed signed 16-bit integers to multiply and add. + /// + /// + /// The second vector containing packed signed 16-bit integers to multiply and add. + /// + /// + /// A vector containing the results of multiplying and adding adjacent pairs of packed signed 16-bit integers + /// public static Vector128 MultiplyAddAdjacent(Vector128 left, Vector128 right) { if (Sse2.IsSupported) @@ -443,22 +457,35 @@ internal static class Vector128_ return Sse2.MultiplyAddAdjacent(left, right); } - // Widen each half of the short vectors into two int vectors - (Vector128 leftLower, Vector128 leftUpper) = Vector128.Widen(left); - (Vector128 rightLower, Vector128 rightUpper) = Vector128.Widen(right); + if (AdvSimd.IsSupported) + { + Vector128 prodLo = AdvSimd.MultiplyWideningLower(left.GetLower(), right.GetLower()); + Vector128 prodHi = AdvSimd.MultiplyWideningLower(left.GetUpper(), right.GetUpper()); - // Elementwise multiply: each int lane now holds the full 32-bit product - Vector128 prodLo = leftLower * rightLower; - Vector128 prodHi = leftUpper * rightUpper; + Vector128 v0 = AdvSimd.AddPairwiseWidening(prodLo); + Vector128 v1 = AdvSimd.AddPairwiseWidening(prodHi); - // Extract the low and high parts of the products shuffling them to form a result we can add together. - // Use out-of-bounds to zero out the unused lanes. - Vector128 v0 = Vector128.Shuffle(prodLo, Vector128.Create(0, 2, 8, 8)); - Vector128 v1 = Vector128.Shuffle(prodHi, Vector128.Create(8, 8, 0, 2)); - Vector128 v2 = Vector128.Shuffle(prodLo, Vector128.Create(1, 3, 8, 8)); - Vector128 v3 = Vector128.Shuffle(prodHi, Vector128.Create(8, 8, 1, 3)); + return Vector128.Narrow(v0, v1); + } - return v0 + v1 + v2 + v3; + { + // Widen each half of the short vectors into two int vectors + (Vector128 leftLower, Vector128 leftUpper) = Vector128.Widen(left); + (Vector128 rightLower, Vector128 rightUpper) = Vector128.Widen(right); + + // Elementwise multiply: each int lane now holds the full 32-bit product + Vector128 prodLo = leftLower * rightLower; + Vector128 prodHi = leftUpper * rightUpper; + + // Extract the low and high parts of the products shuffling them to form a result we can add together. + // Use out-of-bounds to zero out the unused lanes. + Vector128 v0 = Vector128.Shuffle(prodLo, Vector128.Create(0, 2, 8, 8)); + Vector128 v1 = Vector128.Shuffle(prodHi, Vector128.Create(8, 8, 0, 2)); + Vector128 v2 = Vector128.Shuffle(prodLo, Vector128.Create(1, 3, 8, 8)); + Vector128 v3 = Vector128.Shuffle(prodHi, Vector128.Create(8, 8, 1, 3)); + + return v0 + v1 + v2 + v3; + } } ///