diff --git a/src/ImageSharp/Common/Helpers/Numerics.cs b/src/ImageSharp/Common/Helpers/Numerics.cs
index efe68977bb..513eb7ab19 100644
--- a/src/ImageSharp/Common/Helpers/Numerics.cs
+++ b/src/ImageSharp/Common/Helpers/Numerics.cs
@@ -690,7 +690,7 @@ internal static class Numerics
///
/// The span of vectors
[MethodImpl(MethodImplOptions.AggressiveInlining)]
- public static unsafe void CubePowOnXYZ(Span vectors)
+ public static void CubePowOnXYZ(Span vectors)
{
ref Vector4 baseRef = ref MemoryMarshal.GetReference(vectors);
ref Vector4 endRef = ref Unsafe.Add(ref baseRef, (uint)vectors.Length);
diff --git a/src/ImageSharp/Common/Helpers/SimdUtils.HwIntrinsics.cs b/src/ImageSharp/Common/Helpers/SimdUtils.HwIntrinsics.cs
index 076590605d..154f0b5e22 100644
--- a/src/ImageSharp/Common/Helpers/SimdUtils.HwIntrinsics.cs
+++ b/src/ImageSharp/Common/Helpers/SimdUtils.HwIntrinsics.cs
@@ -602,48 +602,25 @@ internal static partial class SimdUtils
}
///
- /// Performs a multiplication and an addition of the .
- /// TODO: Fix. The arguments are in a different order to the FMA intrinsic.
+ /// Performs a multiplication and a negated addition of the .
///
- /// ret = (vm0 * vm1) + va
- /// The vector to add to the intermediate result.
+ /// ret = va - (vm0 * vm1)
+ /// The vector to add to the negated intermediate result.
/// The first vector to multiply.
/// The second vector to multiply.
/// The .
- [MethodImpl(InliningOptions.AlwaysInline)]
- public static Vector256 MultiplyAdd(
+ [MethodImpl(InliningOptions.ShortMethod)]
+ public static Vector256 MultiplyAddNegated(
Vector256 va,
Vector256 vm0,
Vector256 vm1)
{
if (Fma.IsSupported)
{
- return Fma.MultiplyAdd(vm1, vm0, va);
- }
-
- return va + (vm0 * vm1);
- }
-
- ///
- /// Performs a multiplication and a negated addition of the .
- ///
- /// ret = c - (a * b)
- /// The first vector to multiply.
- /// The second vector to multiply.
- /// The vector to add negated to the intermediate result.
- /// The .
- [MethodImpl(InliningOptions.ShortMethod)]
- public static Vector256 MultiplyAddNegated(
- Vector256 a,
- Vector256 b,
- Vector256 c)
- {
- if (Fma.IsSupported)
- {
- return Fma.MultiplyAddNegated(a, b, c);
+ return Fma.MultiplyAddNegated(vm0, vm1, va);
}
- return Avx.Subtract(c, Avx.Multiply(a, b));
+ return Avx.Subtract(va, Avx.Multiply(vm0, vm1));
}
///
diff --git a/src/ImageSharp/Common/Helpers/Vector256Utilities.cs b/src/ImageSharp/Common/Helpers/Vector256Utilities.cs
index 14ac13dd8d..90e3169b37 100644
--- a/src/ImageSharp/Common/Helpers/Vector256Utilities.cs
+++ b/src/ImageSharp/Common/Helpers/Vector256Utilities.cs
@@ -115,6 +115,28 @@ internal static class Vector256_
return va + (vm0 * vm1);
}
+ ///
+ /// Performs a multiplication and a negated addition of the .
+ ///
+ /// ret = va - (vm0 * vm1)
+ /// The vector to add to the negated intermediate result.
+ /// The first vector to multiply.
+ /// The second vector to multiply.
+ /// The .
+ [MethodImpl(InliningOptions.ShortMethod)]
+ public static Vector256 MultiplyAddNegated(
+ Vector256 va,
+ Vector256 vm0,
+ Vector256 vm1)
+ {
+ if (Fma.IsSupported)
+ {
+ return Fma.MultiplyAddNegated(vm0, vm1, va);
+ }
+
+ return va - (vm0 * vm1);
+ }
+
///
/// Performs a multiplication and a subtraction of the .
///
diff --git a/src/ImageSharp/PixelFormats/PixelBlenders/PorterDuffFunctions.cs b/src/ImageSharp/PixelFormats/PixelBlenders/PorterDuffFunctions.cs
index ca358be31c..45c4aade7b 100644
--- a/src/ImageSharp/PixelFormats/PixelBlenders/PorterDuffFunctions.cs
+++ b/src/ImageSharp/PixelFormats/PixelBlenders/PorterDuffFunctions.cs
@@ -5,6 +5,7 @@ using System.Numerics;
using System.Runtime.CompilerServices;
using System.Runtime.Intrinsics;
using System.Runtime.Intrinsics.X86;
+using SixLabors.ImageSharp.Common.Helpers;
namespace SixLabors.ImageSharp.PixelFormats.PixelBlenders;
@@ -62,7 +63,7 @@ internal static partial class PorterDuffFunctions
/// The .
[MethodImpl(MethodImplOptions.AggressiveInlining)]
public static Vector256 Multiply(Vector256 backdrop, Vector256 source)
- => Avx.Multiply(backdrop, source);
+ => backdrop * source;
///
/// Returns the result of the "Add" compositing equation.
@@ -82,7 +83,7 @@ internal static partial class PorterDuffFunctions
/// The .
[MethodImpl(MethodImplOptions.AggressiveInlining)]
public static Vector256 Add(Vector256 backdrop, Vector256 source)
- => Avx.Min(Vector256.Create(1F), Avx.Add(backdrop, source));
+ => Vector256.Min(Vector256.Create(1F), backdrop + source);
///
/// Returns the result of the "Subtract" compositing equation.
@@ -102,7 +103,7 @@ internal static partial class PorterDuffFunctions
/// The .
[MethodImpl(MethodImplOptions.AggressiveInlining)]
public static Vector256 Subtract(Vector256 backdrop, Vector256 source)
- => Avx.Max(Vector256.Zero, Avx.Subtract(backdrop, source));
+ => Vector256.Max(Vector256.Zero, backdrop - source);
///
/// Returns the result of the "Screen" compositing equation.
@@ -124,7 +125,7 @@ internal static partial class PorterDuffFunctions
public static Vector256 Screen(Vector256 backdrop, Vector256 source)
{
Vector256 vOne = Vector256.Create(1F);
- return SimdUtils.HwIntrinsics.MultiplyAddNegated(Avx.Subtract(vOne, backdrop), Avx.Subtract(vOne, source), vOne);
+ return Vector256_.MultiplyAddNegated(vOne, vOne - backdrop, vOne - source);
}
///
@@ -145,7 +146,7 @@ internal static partial class PorterDuffFunctions
/// The .
[MethodImpl(MethodImplOptions.AggressiveInlining)]
public static Vector256 Darken(Vector256 backdrop, Vector256 source)
- => Avx.Min(backdrop, source);
+ => Vector256.Min(backdrop, source);
///
/// Returns the result of the "Lighten" compositing equation.
@@ -164,7 +165,7 @@ internal static partial class PorterDuffFunctions
/// The .
[MethodImpl(MethodImplOptions.AggressiveInlining)]
public static Vector256 Lighten(Vector256 backdrop, Vector256 source)
- => Avx.Max(backdrop, source);
+ => Vector256.Max(backdrop, source);
///
/// Returns the result of the "Overlay" compositing equation.
@@ -192,7 +193,7 @@ internal static partial class PorterDuffFunctions
public static Vector256 Overlay(Vector256 backdrop, Vector256 source)
{
Vector256 color = OverlayValueFunction(backdrop, source);
- return Avx.Min(Vector256.Create(1F), Avx.Blend(color, Vector256.Zero, BlendAlphaControl));
+ return Vector256.Min(Vector256.Create(1F), Avx.Blend(color, Vector256.Zero, BlendAlphaControl));
}
///
@@ -221,7 +222,7 @@ internal static partial class PorterDuffFunctions
public static Vector256 HardLight(Vector256 backdrop, Vector256 source)
{
Vector256 color = OverlayValueFunction(source, backdrop);
- return Avx.Min(Vector256.Create(1F), Avx.Blend(color, Vector256.Zero, BlendAlphaControl));
+ return Vector256.Min(Vector256.Create(1F), Avx.Blend(color, Vector256.Zero, BlendAlphaControl));
}
///
@@ -244,10 +245,10 @@ internal static partial class PorterDuffFunctions
public static Vector256 OverlayValueFunction(Vector256 backdrop, Vector256 source)
{
Vector256 vOne = Vector256.Create(1F);
- Vector256 left = Avx.Multiply(Avx.Add(backdrop, backdrop), source);
+ Vector256 left = (backdrop + backdrop) * source;
Vector256 vOneMinusSource = Avx.Subtract(vOne, source);
- Vector256 right = SimdUtils.HwIntrinsics.MultiplyAddNegated(Avx.Add(vOneMinusSource, vOneMinusSource), Avx.Subtract(vOne, backdrop), vOne);
+ Vector256 right = Vector256_.MultiplyAddNegated(vOne, vOneMinusSource + vOneMinusSource, vOne - backdrop);
Vector256 cmp = Avx.CompareGreaterThan(backdrop, Vector256.Create(.5F));
return Avx.BlendVariable(left, right, cmp);
}
@@ -295,17 +296,17 @@ internal static partial class PorterDuffFunctions
Vector256 sW = Avx.Permute(source, ShuffleAlphaControl);
Vector256 dW = Avx.Permute(destination, ShuffleAlphaControl);
- Vector256 blendW = Avx.Multiply(sW, dW);
- Vector256 dstW = Avx.Subtract(dW, blendW);
- Vector256 srcW = Avx.Subtract(sW, blendW);
+ Vector256 blendW = sW * dW;
+ Vector256 dstW = dW - blendW;
+ Vector256 srcW = sW - blendW;
// calculate final alpha
- Vector256 alpha = Avx.Add(dstW, sW);
+ Vector256 alpha = dstW + sW;
// calculate final color
- Vector256 color = Avx.Multiply(destination, dstW);
- color = SimdUtils.HwIntrinsics.MultiplyAdd(color, source, srcW);
- color = SimdUtils.HwIntrinsics.MultiplyAdd(color, blend, blendW);
+ Vector256 color = destination * dstW;
+ color = Vector256_.MultiplyAdd(color, source, srcW);
+ color = Vector256_.MultiplyAdd(color, blend, blendW);
// unpremultiply
return Numerics.UnPremultiply(color, alpha);
@@ -354,11 +355,11 @@ internal static partial class PorterDuffFunctions
// calculate weights
Vector256 sW = Avx.Permute(source, ShuffleAlphaControl);
- Vector256 blendW = Avx.Multiply(sW, alpha);
- Vector256 dstW = Avx.Subtract(alpha, blendW);
+ Vector256 blendW = sW * alpha;
+ Vector256 dstW = alpha - blendW;
// calculate final color
- Vector256 color = SimdUtils.HwIntrinsics.MultiplyAdd(Avx.Multiply(blend, blendW), destination, dstW);
+ Vector256 color = Vector256_.MultiplyAdd(Avx.Multiply(blend, blendW), destination, dstW);
// unpremultiply
return Numerics.UnPremultiply(color, alpha);
@@ -392,10 +393,10 @@ internal static partial class PorterDuffFunctions
public static Vector256 In(Vector256 destination, Vector256 source)
{
// calculate alpha
- Vector256 alpha = Avx.Permute(Avx.Multiply(source, destination), ShuffleAlphaControl);
+ Vector256 alpha = Avx.Permute(source * destination, ShuffleAlphaControl);
// premultiply
- Vector256 color = Avx.Multiply(source, alpha);
+ Vector256 color = source * alpha;
// unpremultiply
return Numerics.UnPremultiply(color, alpha);
@@ -429,10 +430,10 @@ internal static partial class PorterDuffFunctions
public static Vector256 Out(Vector256 destination, Vector256 source)
{
// calculate alpha
- Vector256 alpha = Avx.Permute(Avx.Multiply(source, Avx.Subtract(Vector256.Create(1F), destination)), ShuffleAlphaControl);
+ Vector256 alpha = Avx.Permute(source * (Vector256.Create(1F) - destination), ShuffleAlphaControl);
// premultiply
- Vector256 color = Avx.Multiply(source, alpha);
+ Vector256 color = source * alpha;
// unpremultiply
return Numerics.UnPremultiply(color, alpha);
@@ -475,12 +476,12 @@ internal static partial class PorterDuffFunctions
Vector256 dW = Avx.Shuffle(destination, destination, ShuffleAlphaControl);
Vector256 vOne = Vector256.Create(1F);
- Vector256 srcW = Avx.Subtract(vOne, dW);
- Vector256 dstW = Avx.Subtract(vOne, sW);
+ Vector256 srcW = vOne - dW;
+ Vector256 dstW = vOne - sW;
// calculate alpha
- Vector256 alpha = SimdUtils.HwIntrinsics.MultiplyAdd(Avx.Multiply(dW, dstW), sW, srcW);
- Vector256 color = SimdUtils.HwIntrinsics.MultiplyAdd(Avx.Multiply(Avx.Multiply(dW, destination), dstW), Avx.Multiply(sW, source), srcW);
+ Vector256 alpha = Vector256_.MultiplyAdd(Avx.Multiply(dW, dstW), sW, srcW);
+ Vector256 color = Vector256_.MultiplyAdd(Avx.Multiply(Avx.Multiply(dW, destination), dstW), Avx.Multiply(sW, source), srcW);
// unpremultiply
return Numerics.UnPremultiply(color, alpha);