Browse Source

Use FMA where possible.

pull/2359/head
James Jackson-South 3 years ago
parent
commit
a666372f68
  1. 24
      src/ImageSharp/Common/Helpers/SimdUtils.HwIntrinsics.cs
  2. 10
      src/ImageSharp/PixelFormats/PixelBlenders/PorterDuffFunctions.cs

24
src/ImageSharp/Common/Helpers/SimdUtils.HwIntrinsics.cs

@ -533,6 +533,7 @@ internal static partial class SimdUtils
/// <summary>
/// Performs a multiplication and an addition of the <see cref="Vector256{Single}"/>.
/// TODO: Fix. The arguments are in a different order to the FMA intrinsic.
/// </summary>
/// <remarks>ret = (vm0 * vm1) + va</remarks>
/// <param name="va">The vector to add to the intermediate result.</param>
@ -555,6 +556,7 @@ internal static partial class SimdUtils
/// <summary>
/// Performs a multiplication and a subtraction of the <see cref="Vector256{Single}"/>.
/// TODO: Fix. The arguments are in a different order to the FMA intrinsic.
/// </summary>
/// <remarks>ret = (vm0 * vm1) - vs</remarks>
/// <param name="vs">The vector to subtract from the intermediate result.</param>
@ -575,6 +577,28 @@ internal static partial class SimdUtils
return Avx.Subtract(Avx.Multiply(vm0, vm1), vs);
}
/// <summary>
/// Performs a multiplication and a negated addition of the <see cref="Vector256{Single}"/>.
/// </summary>
/// <remarks>ret = c - (a * b)</remarks>
/// <param name="a">The first vector to multiply.</param>
/// <param name="b">The second vector to multiply.</param>
/// <param name="c">The vector to add negated to the intermediate result.</param>
/// <returns>The <see cref="Vector256{T}"/>.</returns>
[MethodImpl(InliningOptions.ShortMethod)]
public static Vector256<float> MultiplyAddNegated(
in Vector256<float> a,
in Vector256<float> b,
in Vector256<float> c)
{
if (Fma.IsSupported)
{
return Fma.MultiplyAddNegated(a, b, c);
}
return Avx.Subtract(c, Avx.Multiply(a, b));
}
/// <summary>
/// <see cref="ByteToNormalizedFloat"/> as many elements as possible, slicing them down (keeping the remainder).
/// </summary>

10
src/ImageSharp/PixelFormats/PixelBlenders/PorterDuffFunctions.cs

@ -124,7 +124,7 @@ internal static partial class PorterDuffFunctions
public static Vector256<float> Screen(Vector256<float> backdrop, Vector256<float> source)
{
Vector256<float> vOne = Vector256.Create(1F);
return Avx.Subtract(vOne, Avx.Multiply(Avx.Subtract(vOne, backdrop), Avx.Subtract(vOne, source)));
return SimdUtils.HwIntrinsics.MultiplyAddNegated(Avx.Subtract(vOne, backdrop), Avx.Subtract(vOne, source), vOne);
}
/// <summary>
@ -244,10 +244,10 @@ internal static partial class PorterDuffFunctions
public static Vector256<float> OverlayValueFunction(Vector256<float> backdrop, Vector256<float> source)
{
Vector256<float> vOne = Vector256.Create(1F);
Vector256<float> vTwo = Vector256.Create(2F);
Vector256<float> left = Avx.Multiply(Avx.Add(backdrop, backdrop), source);
Vector256<float> right = Avx.Subtract(vOne, Avx.Multiply(Avx.Multiply(vTwo, Avx.Subtract(vOne, source)), Avx.Subtract(vOne, backdrop)));
Vector256<float> vOneMinusSource = Avx.Subtract(vOne, source);
Vector256<float> right = SimdUtils.HwIntrinsics.MultiplyAddNegated(Avx.Add(vOneMinusSource, vOneMinusSource), Avx.Subtract(vOne, backdrop), vOne);
Vector256<float> cmp = Avx.CompareGreaterThan(backdrop, Vector256.Create(.5F));
return Avx.BlendVariable(left, right, cmp);
}
@ -430,9 +430,7 @@ internal static partial class PorterDuffFunctions
public static Vector256<float> Out(Vector256<float> destination, Vector256<float> source)
{
// calculate alpha
Vector256<float> sW = Avx.Shuffle(source, source, ShuffleAlphaControl);
Vector256<float> dW = Avx.Shuffle(destination, destination, ShuffleAlphaControl);
Vector256<float> alpha = Avx.Multiply(Avx.Subtract(Vector256.Create(1F), dW), sW);
Vector256<float> alpha = Avx.Permute(Avx.Multiply(source, Avx.Subtract(Vector256.Create(1F), destination)), ShuffleAlphaControl);
// premultiply
Vector256<float> color = Avx.Multiply(source, alpha);

Loading…
Cancel
Save