From 0a9c407ed46e22789d962eae88efced5f8e67386 Mon Sep 17 00:00:00 2001
From: James Jackson-South <james_south@hotmail.com>
Date: Sat, 31 May 2025 00:39:32 +1000
Subject: [PATCH] Add explicit AdvSimd to MultiplyAddAdjacent

---
 .../Common/Helpers/Vector128Utilities.cs      | 53 ++++++++++++++-----
 1 file changed, 40 insertions(+), 13 deletions(-)
diff --git a/src/ImageSharp/Common/Helpers/Vector128Utilities.cs b/src/ImageSharp/Common/Helpers/Vector128Utilities.cs
index 3076788d1..a96f5fa73 100644
--- a/src/ImageSharp/Common/Helpers/Vector128Utilities.cs
+++ b/src/ImageSharp/Common/Helpers/Vector128Utilities.cs
@@ -436,6 +436,20 @@ internal static class Vector128_
         return Vector128.Narrow(prodLo, prodHi);
     }
 
+    /// <summary>
+    /// Multiply packed signed 16-bit integers in <paramref name="left"/> and <paramref name="right"/>, producing
+    /// intermediate signed 32-bit integers. Horizontally add adjacent pairs of intermediate 32-bit integers, and
+    /// pack the results.
+    /// </summary>
+    /// <param name="left">
+    /// The first vector containing packed signed 16-bit integers to multiply and add.
+    /// </param>
+    /// <param name="right">
+    /// The second vector containing packed signed 16-bit integers to multiply and add.
+    /// </param>
+    /// <returns>
+    /// A vector containing the results of multiplying and adding adjacent pairs of packed signed 16-bit integers
+    /// </returns>
     public static Vector128<int> MultiplyAddAdjacent(Vector128<short> left, Vector128<short> right)
     {
         if (Sse2.IsSupported)
@@ -443,22 +457,35 @@ internal static class Vector128_
             return Sse2.MultiplyAddAdjacent(left, right);
         }
 
-        // Widen each half of the short vectors into two int vectors
-        (Vector128<int> leftLower, Vector128<int> leftUpper) = Vector128.Widen(left);
-        (Vector128<int> rightLower, Vector128<int> rightUpper) = Vector128.Widen(right);
+        if (AdvSimd.IsSupported)
+        {
+            Vector128<int> prodLo = AdvSimd.MultiplyWideningLower(left.GetLower(), right.GetLower());
+            Vector128<int> prodHi = AdvSimd.MultiplyWideningLower(left.GetUpper(), right.GetUpper());
 
-        // Elementwise multiply: each int lane now holds the full 32-bit product
-        Vector128<int> prodLo = leftLower * rightLower;
-        Vector128<int> prodHi = leftUpper * rightUpper;
+            Vector128<long> v0 = AdvSimd.AddPairwiseWidening(prodLo);
+            Vector128<long> v1 = AdvSimd.AddPairwiseWidening(prodHi);
 
-        // Extract the low and high parts of the products shuffling them to form a result we can add together.
-        // Use out-of-bounds to zero out the unused lanes.
-        Vector128<int> v0 = Vector128.Shuffle(prodLo, Vector128.Create(0, 2, 8, 8));
-        Vector128<int> v1 = Vector128.Shuffle(prodHi, Vector128.Create(8, 8, 0, 2));
-        Vector128<int> v2 = Vector128.Shuffle(prodLo, Vector128.Create(1, 3, 8, 8));
-        Vector128<int> v3 = Vector128.Shuffle(prodHi, Vector128.Create(8, 8, 1, 3));
+            return Vector128.Narrow(v0, v1);
+        }
 
-        return v0 + v1 + v2 + v3;
+        {
+            // Widen each half of the short vectors into two int vectors
+            (Vector128<int> leftLower, Vector128<int> leftUpper) = Vector128.Widen(left);
+            (Vector128<int> rightLower, Vector128<int> rightUpper) = Vector128.Widen(right);
+
+            // Elementwise multiply: each int lane now holds the full 32-bit product
+            Vector128<int> prodLo = leftLower * rightLower;
+            Vector128<int> prodHi = leftUpper * rightUpper;
+
+            // Extract the low and high parts of the products shuffling them to form a result we can add together.
+            // Use out-of-bounds to zero out the unused lanes.
+            Vector128<int> v0 = Vector128.Shuffle(prodLo, Vector128.Create(0, 2, 8, 8));
+            Vector128<int> v1 = Vector128.Shuffle(prodHi, Vector128.Create(8, 8, 0, 2));
+            Vector128<int> v2 = Vector128.Shuffle(prodLo, Vector128.Create(1, 3, 8, 8));
+            Vector128<int> v3 = Vector128.Shuffle(prodHi, Vector128.Create(8, 8, 1, 3));
+
+            return v0 + v1 + v2 + v3;
+        }
     }
 
     /// <summary>