diff --git a/src/ImageSharp/Common/Helpers/Vector128Utilities.cs b/src/ImageSharp/Common/Helpers/Vector128Utilities.cs
index 3076788d1..a96f5fa73 100644
--- a/src/ImageSharp/Common/Helpers/Vector128Utilities.cs
+++ b/src/ImageSharp/Common/Helpers/Vector128Utilities.cs
@@ -436,6 +436,20 @@ internal static class Vector128_
return Vector128.Narrow(prodLo, prodHi);
}
+ ///
+ /// Multiply packed signed 16-bit integers in and , producing
+ /// intermediate signed 32-bit integers. Horizontally add adjacent pairs of intermediate 32-bit integers, and
+ /// pack the results.
+ ///
+ ///
+ /// The first vector containing packed signed 16-bit integers to multiply and add.
+ ///
+ ///
+ /// The second vector containing packed signed 16-bit integers to multiply and add.
+ ///
+ ///
+ /// A vector containing the results of multiplying and adding adjacent pairs of packed signed 16-bit integers
+ ///
public static Vector128 MultiplyAddAdjacent(Vector128 left, Vector128 right)
{
if (Sse2.IsSupported)
@@ -443,22 +457,35 @@ internal static class Vector128_
return Sse2.MultiplyAddAdjacent(left, right);
}
- // Widen each half of the short vectors into two int vectors
- (Vector128 leftLower, Vector128 leftUpper) = Vector128.Widen(left);
- (Vector128 rightLower, Vector128 rightUpper) = Vector128.Widen(right);
+ if (AdvSimd.IsSupported)
+ {
+ Vector128 prodLo = AdvSimd.MultiplyWideningLower(left.GetLower(), right.GetLower());
+ Vector128 prodHi = AdvSimd.MultiplyWideningLower(left.GetUpper(), right.GetUpper());
- // Elementwise multiply: each int lane now holds the full 32-bit product
- Vector128 prodLo = leftLower * rightLower;
- Vector128 prodHi = leftUpper * rightUpper;
+ Vector128 v0 = AdvSimd.AddPairwiseWidening(prodLo);
+ Vector128 v1 = AdvSimd.AddPairwiseWidening(prodHi);
- // Extract the low and high parts of the products shuffling them to form a result we can add together.
- // Use out-of-bounds to zero out the unused lanes.
- Vector128 v0 = Vector128.Shuffle(prodLo, Vector128.Create(0, 2, 8, 8));
- Vector128 v1 = Vector128.Shuffle(prodHi, Vector128.Create(8, 8, 0, 2));
- Vector128 v2 = Vector128.Shuffle(prodLo, Vector128.Create(1, 3, 8, 8));
- Vector128 v3 = Vector128.Shuffle(prodHi, Vector128.Create(8, 8, 1, 3));
+ return Vector128.Narrow(v0, v1);
+ }
- return v0 + v1 + v2 + v3;
+ {
+ // Widen each half of the short vectors into two int vectors
+ (Vector128 leftLower, Vector128 leftUpper) = Vector128.Widen(left);
+ (Vector128 rightLower, Vector128 rightUpper) = Vector128.Widen(right);
+
+ // Elementwise multiply: each int lane now holds the full 32-bit product
+ Vector128 prodLo = leftLower * rightLower;
+ Vector128 prodHi = leftUpper * rightUpper;
+
+ // Extract the low and high parts of the products shuffling them to form a result we can add together.
+ // Use out-of-bounds to zero out the unused lanes.
+ Vector128 v0 = Vector128.Shuffle(prodLo, Vector128.Create(0, 2, 8, 8));
+ Vector128 v1 = Vector128.Shuffle(prodHi, Vector128.Create(8, 8, 0, 2));
+ Vector128 v2 = Vector128.Shuffle(prodLo, Vector128.Create(1, 3, 8, 8));
+ Vector128 v3 = Vector128.Shuffle(prodHi, Vector128.Create(8, 8, 1, 3));
+
+ return v0 + v1 + v2 + v3;
+ }
}
///