diff --git a/src/ImageSharp/Common/Helpers/SimdUtils.HwIntrinsics.cs b/src/ImageSharp/Common/Helpers/SimdUtils.HwIntrinsics.cs index 128218aac2..7d2bab259e 100644 --- a/src/ImageSharp/Common/Helpers/SimdUtils.HwIntrinsics.cs +++ b/src/ImageSharp/Common/Helpers/SimdUtils.HwIntrinsics.cs @@ -533,6 +533,7 @@ internal static partial class SimdUtils /// /// Performs a multiplication and an addition of the . + /// TODO: Fix. The arguments are in a different order to the FMA intrinsic. /// /// ret = (vm0 * vm1) + va /// The vector to add to the intermediate result. @@ -555,6 +556,7 @@ internal static partial class SimdUtils /// /// Performs a multiplication and a subtraction of the . + /// TODO: Fix. The arguments are in a different order to the FMA intrinsic. /// /// ret = (vm0 * vm1) - vs /// The vector to subtract from the intermediate result. @@ -575,6 +577,28 @@ internal static partial class SimdUtils return Avx.Subtract(Avx.Multiply(vm0, vm1), vs); } + /// + /// Performs a multiplication and a negated addition of the . + /// + /// ret = c - (a * b) + /// The first vector to multiply. + /// The second vector to multiply. + /// The vector to add negated to the intermediate result. + /// The . + [MethodImpl(InliningOptions.ShortMethod)] + public static Vector256 MultiplyAddNegated( + in Vector256 a, + in Vector256 b, + in Vector256 c) + { + if (Fma.IsSupported) + { + return Fma.MultiplyAddNegated(a, b, c); + } + + return Avx.Subtract(c, Avx.Multiply(a, b)); + } + /// /// as many elements as possible, slicing them down (keeping the remainder). /// diff --git a/src/ImageSharp/PixelFormats/PixelBlenders/PorterDuffFunctions.cs b/src/ImageSharp/PixelFormats/PixelBlenders/PorterDuffFunctions.cs index d1bd5bad31..2d47f1a628 100644 --- a/src/ImageSharp/PixelFormats/PixelBlenders/PorterDuffFunctions.cs +++ b/src/ImageSharp/PixelFormats/PixelBlenders/PorterDuffFunctions.cs @@ -124,7 +124,7 @@ internal static partial class PorterDuffFunctions public static Vector256 Screen(Vector256 backdrop, Vector256 source) { Vector256 vOne = Vector256.Create(1F); - return Avx.Subtract(vOne, Avx.Multiply(Avx.Subtract(vOne, backdrop), Avx.Subtract(vOne, source))); + return SimdUtils.HwIntrinsics.MultiplyAddNegated(Avx.Subtract(vOne, backdrop), Avx.Subtract(vOne, source), vOne); } /// @@ -244,10 +244,10 @@ internal static partial class PorterDuffFunctions public static Vector256 OverlayValueFunction(Vector256 backdrop, Vector256 source) { Vector256 vOne = Vector256.Create(1F); - Vector256 vTwo = Vector256.Create(2F); Vector256 left = Avx.Multiply(Avx.Add(backdrop, backdrop), source); - Vector256 right = Avx.Subtract(vOne, Avx.Multiply(Avx.Multiply(vTwo, Avx.Subtract(vOne, source)), Avx.Subtract(vOne, backdrop))); + Vector256 vOneMinusSource = Avx.Subtract(vOne, source); + Vector256 right = SimdUtils.HwIntrinsics.MultiplyAddNegated(Avx.Add(vOneMinusSource, vOneMinusSource), Avx.Subtract(vOne, backdrop), vOne); Vector256 cmp = Avx.CompareGreaterThan(backdrop, Vector256.Create(.5F)); return Avx.BlendVariable(left, right, cmp); } @@ -430,9 +430,7 @@ internal static partial class PorterDuffFunctions public static Vector256 Out(Vector256 destination, Vector256 source) { // calculate alpha - Vector256 sW = Avx.Shuffle(source, source, ShuffleAlphaControl); - Vector256 dW = Avx.Shuffle(destination, destination, ShuffleAlphaControl); - Vector256 alpha = Avx.Multiply(Avx.Subtract(Vector256.Create(1F), dW), sW); + Vector256 alpha = Avx.Permute(Avx.Multiply(source, Avx.Subtract(Vector256.Create(1F), destination)), ShuffleAlphaControl); // premultiply Vector256 color = Avx.Multiply(source, alpha);