diff --git a/src/ImageSharp/Common/Constants.cs b/src/ImageSharp/Common/Constants.cs index fa2f72c74..d4640f133 100644 --- a/src/ImageSharp/Common/Constants.cs +++ b/src/ImageSharp/Common/Constants.cs @@ -1,4 +1,4 @@ -// Copyright (c) Six Labors. +// Copyright (c) Six Labors. // Licensed under the Six Labors Split License. namespace SixLabors.ImageSharp; diff --git a/src/ImageSharp/Common/Helpers/SimdUtils.HwIntrinsics.cs b/src/ImageSharp/Common/Helpers/SimdUtils.HwIntrinsics.cs index 4bc0040c6..7d2bab259 100644 --- a/src/ImageSharp/Common/Helpers/SimdUtils.HwIntrinsics.cs +++ b/src/ImageSharp/Common/Helpers/SimdUtils.HwIntrinsics.cs @@ -532,7 +532,8 @@ internal static partial class SimdUtils } /// - /// Performs a multiplication and an addition of the . + /// Performs a multiplication and an addition of the . + /// TODO: Fix. The arguments are in a different order to the FMA intrinsic. /// /// ret = (vm0 * vm1) + va /// The vector to add to the intermediate result. @@ -549,22 +550,21 @@ internal static partial class SimdUtils { return Fma.MultiplyAdd(vm1, vm0, va); } - else - { - return Avx.Add(Avx.Multiply(vm0, vm1), va); - } + + return Avx.Add(Avx.Multiply(vm0, vm1), va); } /// - /// Performs a multiplication and a substraction of the . + /// Performs a multiplication and a subtraction of the . + /// TODO: Fix. The arguments are in a different order to the FMA intrinsic. /// /// ret = (vm0 * vm1) - vs - /// The vector to substract from the intermediate result. + /// The vector to subtract from the intermediate result. /// The first vector to multiply. /// The second vector to multiply. /// The . [MethodImpl(InliningOptions.ShortMethod)] - public static Vector256 MultiplySubstract( + public static Vector256 MultiplySubtract( in Vector256 vs, in Vector256 vm0, in Vector256 vm1) @@ -573,10 +573,30 @@ internal static partial class SimdUtils { return Fma.MultiplySubtract(vm1, vm0, vs); } - else + + return Avx.Subtract(Avx.Multiply(vm0, vm1), vs); + } + + /// + /// Performs a multiplication and a negated addition of the . + /// + /// ret = c - (a * b) + /// The first vector to multiply. + /// The second vector to multiply. + /// The vector to add negated to the intermediate result. + /// The . + [MethodImpl(InliningOptions.ShortMethod)] + public static Vector256 MultiplyAddNegated( + in Vector256 a, + in Vector256 b, + in Vector256 c) + { + if (Fma.IsSupported) { - return Avx.Subtract(Avx.Multiply(vm0, vm1), vs); + return Fma.MultiplyAddNegated(a, b, c); } + + return Avx.Subtract(c, Avx.Multiply(a, b)); } /// diff --git a/src/ImageSharp/Formats/Jpeg/Components/FloatingPointDCT.Intrinsic.cs b/src/ImageSharp/Formats/Jpeg/Components/FloatingPointDCT.Intrinsic.cs index cae89fc3c..7e102f696 100644 --- a/src/ImageSharp/Formats/Jpeg/Components/FloatingPointDCT.Intrinsic.cs +++ b/src/ImageSharp/Formats/Jpeg/Components/FloatingPointDCT.Intrinsic.cs @@ -99,7 +99,7 @@ internal static partial class FloatingPointDCT var mm256_F_1_4142 = Vector256.Create(1.414213562f); Vector256 tmp13 = Avx.Add(tmp1, tmp3); - Vector256 tmp12 = SimdUtils.HwIntrinsics.MultiplySubstract(tmp13, Avx.Subtract(tmp1, tmp3), mm256_F_1_4142); + Vector256 tmp12 = SimdUtils.HwIntrinsics.MultiplySubtract(tmp13, Avx.Subtract(tmp1, tmp3), mm256_F_1_4142); tmp0 = Avx.Add(tmp10, tmp13); tmp3 = Avx.Subtract(tmp10, tmp13); diff --git a/src/ImageSharp/PixelFormats/PixelBlenders/DefaultPixelBlenders.Generated.cs b/src/ImageSharp/PixelFormats/PixelBlenders/DefaultPixelBlenders.Generated.cs index cf1910121..2db61a06f 100644 --- a/src/ImageSharp/PixelFormats/PixelBlenders/DefaultPixelBlenders.Generated.cs +++ b/src/ImageSharp/PixelFormats/PixelBlenders/DefaultPixelBlenders.Generated.cs @@ -3,6 +3,10 @@ // using System.Numerics; +using System.Runtime.CompilerServices; +using System.Runtime.InteropServices; +using System.Runtime.Intrinsics; +using System.Runtime.Intrinsics.X86; namespace SixLabors.ImageSharp.PixelFormats.PixelBlenders; @@ -43,18 +47,85 @@ internal static class DefaultPixelBlenders protected override void BlendFunction(Span destination, ReadOnlySpan background, ReadOnlySpan source, float amount) { amount = Numerics.Clamp(amount, 0, 1); - for (int i = 0; i < destination.Length; i++) + + if (Avx2.IsSupported && destination.Length >= 2) + { + // Divide by 2 as 4 elements per Vector4 and 8 per Vector256 + ref Vector256 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); + ref Vector256 destinationLast = ref Unsafe.Add(ref destinationBase, (IntPtr)((uint)destination.Length / 2u)); + + ref Vector256 backgroundBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(background)); + ref Vector256 sourceBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(source)); + Vector256 opacity = Vector256.Create(amount); + + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) + { + destinationBase = PorterDuffFunctions.NormalSrc(backgroundBase, sourceBase, opacity); + destinationBase = ref Unsafe.Add(ref destinationBase, 1); + backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); + sourceBase = ref Unsafe.Add(ref sourceBase, 1); + } + + if (Numerics.Modulo2(destination.Length) != 0) + { + // Vector4 fits neatly in pairs. Any overlap has to be equal to 1. + int i = destination.Length - 1; + destination[i] = PorterDuffFunctions.NormalSrc(background[i], source[i], amount); + } + } + else { - destination[i] = PorterDuffFunctions.NormalSrc(background[i], source[i], amount); + for (int i = 0; i < destination.Length; i++) + { + destination[i] = PorterDuffFunctions.NormalSrc(background[i], source[i], amount); + } } } /// protected override void BlendFunction(Span destination, ReadOnlySpan background, ReadOnlySpan source, ReadOnlySpan amount) { - for (int i = 0; i < destination.Length; i++) + if (Avx2.IsSupported && destination.Length >= 2) + { + // Divide by 2 as 4 elements per Vector4 and 8 per Vector256 + ref Vector256 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); + ref Vector256 destinationLast = ref Unsafe.Add(ref destinationBase, (IntPtr)((uint)destination.Length / 2u)); + + ref Vector256 backgroundBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(background)); + ref Vector256 sourceBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(source)); + ref float amountBase = ref MemoryMarshal.GetReference(amount); + + Vector256 vOne = Vector256.Create(1F); + + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) + { + // We need to create a Vector256 containing the current and next amount values + // taking up each half of the Vector256 and then clamp them. + Vector256 opacity = Vector256.Create( + Vector128.Create(amountBase), + Vector128.Create(Unsafe.Add(ref amountBase, 1))); + opacity = Avx.Min(Avx.Max(Vector256.Zero, opacity), vOne); + + destinationBase = PorterDuffFunctions.NormalSrc(backgroundBase, sourceBase, opacity); + destinationBase = ref Unsafe.Add(ref destinationBase, 1); + backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); + sourceBase = ref Unsafe.Add(ref sourceBase, 1); + amountBase = ref Unsafe.Add(ref amountBase, 2); + } + + if (Numerics.Modulo2(destination.Length) != 0) + { + // Vector4 fits neatly in pairs. Any overlap has to be equal to 1. + int i = destination.Length - 1; + destination[i] = PorterDuffFunctions.NormalSrc(background[i], source[i], Numerics.Clamp(amount[i], 0, 1F)); + } + } + else { - destination[i] = PorterDuffFunctions.NormalSrc(background[i], source[i], Numerics.Clamp(amount[i], 0, 1)); + for (int i = 0; i < destination.Length; i++) + { + destination[i] = PorterDuffFunctions.NormalSrc(background[i], source[i], Numerics.Clamp(amount[i], 0, 1F)); + } } } } @@ -81,18 +152,85 @@ internal static class DefaultPixelBlenders protected override void BlendFunction(Span destination, ReadOnlySpan background, ReadOnlySpan source, float amount) { amount = Numerics.Clamp(amount, 0, 1); - for (int i = 0; i < destination.Length; i++) + + if (Avx2.IsSupported && destination.Length >= 2) + { + // Divide by 2 as 4 elements per Vector4 and 8 per Vector256 + ref Vector256 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); + ref Vector256 destinationLast = ref Unsafe.Add(ref destinationBase, (IntPtr)((uint)destination.Length / 2u)); + + ref Vector256 backgroundBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(background)); + ref Vector256 sourceBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(source)); + Vector256 opacity = Vector256.Create(amount); + + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) + { + destinationBase = PorterDuffFunctions.MultiplySrc(backgroundBase, sourceBase, opacity); + destinationBase = ref Unsafe.Add(ref destinationBase, 1); + backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); + sourceBase = ref Unsafe.Add(ref sourceBase, 1); + } + + if (Numerics.Modulo2(destination.Length) != 0) + { + // Vector4 fits neatly in pairs. Any overlap has to be equal to 1. + int i = destination.Length - 1; + destination[i] = PorterDuffFunctions.MultiplySrc(background[i], source[i], amount); + } + } + else { - destination[i] = PorterDuffFunctions.MultiplySrc(background[i], source[i], amount); + for (int i = 0; i < destination.Length; i++) + { + destination[i] = PorterDuffFunctions.MultiplySrc(background[i], source[i], amount); + } } } /// protected override void BlendFunction(Span destination, ReadOnlySpan background, ReadOnlySpan source, ReadOnlySpan amount) { - for (int i = 0; i < destination.Length; i++) + if (Avx2.IsSupported && destination.Length >= 2) + { + // Divide by 2 as 4 elements per Vector4 and 8 per Vector256 + ref Vector256 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); + ref Vector256 destinationLast = ref Unsafe.Add(ref destinationBase, (IntPtr)((uint)destination.Length / 2u)); + + ref Vector256 backgroundBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(background)); + ref Vector256 sourceBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(source)); + ref float amountBase = ref MemoryMarshal.GetReference(amount); + + Vector256 vOne = Vector256.Create(1F); + + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) + { + // We need to create a Vector256 containing the current and next amount values + // taking up each half of the Vector256 and then clamp them. + Vector256 opacity = Vector256.Create( + Vector128.Create(amountBase), + Vector128.Create(Unsafe.Add(ref amountBase, 1))); + opacity = Avx.Min(Avx.Max(Vector256.Zero, opacity), vOne); + + destinationBase = PorterDuffFunctions.MultiplySrc(backgroundBase, sourceBase, opacity); + destinationBase = ref Unsafe.Add(ref destinationBase, 1); + backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); + sourceBase = ref Unsafe.Add(ref sourceBase, 1); + amountBase = ref Unsafe.Add(ref amountBase, 2); + } + + if (Numerics.Modulo2(destination.Length) != 0) + { + // Vector4 fits neatly in pairs. Any overlap has to be equal to 1. + int i = destination.Length - 1; + destination[i] = PorterDuffFunctions.MultiplySrc(background[i], source[i], Numerics.Clamp(amount[i], 0, 1F)); + } + } + else { - destination[i] = PorterDuffFunctions.MultiplySrc(background[i], source[i], Numerics.Clamp(amount[i], 0, 1)); + for (int i = 0; i < destination.Length; i++) + { + destination[i] = PorterDuffFunctions.MultiplySrc(background[i], source[i], Numerics.Clamp(amount[i], 0, 1F)); + } } } } @@ -119,18 +257,85 @@ internal static class DefaultPixelBlenders protected override void BlendFunction(Span destination, ReadOnlySpan background, ReadOnlySpan source, float amount) { amount = Numerics.Clamp(amount, 0, 1); - for (int i = 0; i < destination.Length; i++) + + if (Avx2.IsSupported && destination.Length >= 2) + { + // Divide by 2 as 4 elements per Vector4 and 8 per Vector256 + ref Vector256 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); + ref Vector256 destinationLast = ref Unsafe.Add(ref destinationBase, (IntPtr)((uint)destination.Length / 2u)); + + ref Vector256 backgroundBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(background)); + ref Vector256 sourceBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(source)); + Vector256 opacity = Vector256.Create(amount); + + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) + { + destinationBase = PorterDuffFunctions.AddSrc(backgroundBase, sourceBase, opacity); + destinationBase = ref Unsafe.Add(ref destinationBase, 1); + backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); + sourceBase = ref Unsafe.Add(ref sourceBase, 1); + } + + if (Numerics.Modulo2(destination.Length) != 0) + { + // Vector4 fits neatly in pairs. Any overlap has to be equal to 1. + int i = destination.Length - 1; + destination[i] = PorterDuffFunctions.AddSrc(background[i], source[i], amount); + } + } + else { - destination[i] = PorterDuffFunctions.AddSrc(background[i], source[i], amount); + for (int i = 0; i < destination.Length; i++) + { + destination[i] = PorterDuffFunctions.AddSrc(background[i], source[i], amount); + } } } /// protected override void BlendFunction(Span destination, ReadOnlySpan background, ReadOnlySpan source, ReadOnlySpan amount) { - for (int i = 0; i < destination.Length; i++) + if (Avx2.IsSupported && destination.Length >= 2) { - destination[i] = PorterDuffFunctions.AddSrc(background[i], source[i], Numerics.Clamp(amount[i], 0, 1)); + // Divide by 2 as 4 elements per Vector4 and 8 per Vector256 + ref Vector256 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); + ref Vector256 destinationLast = ref Unsafe.Add(ref destinationBase, (IntPtr)((uint)destination.Length / 2u)); + + ref Vector256 backgroundBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(background)); + ref Vector256 sourceBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(source)); + ref float amountBase = ref MemoryMarshal.GetReference(amount); + + Vector256 vOne = Vector256.Create(1F); + + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) + { + // We need to create a Vector256 containing the current and next amount values + // taking up each half of the Vector256 and then clamp them. + Vector256 opacity = Vector256.Create( + Vector128.Create(amountBase), + Vector128.Create(Unsafe.Add(ref amountBase, 1))); + opacity = Avx.Min(Avx.Max(Vector256.Zero, opacity), vOne); + + destinationBase = PorterDuffFunctions.AddSrc(backgroundBase, sourceBase, opacity); + destinationBase = ref Unsafe.Add(ref destinationBase, 1); + backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); + sourceBase = ref Unsafe.Add(ref sourceBase, 1); + amountBase = ref Unsafe.Add(ref amountBase, 2); + } + + if (Numerics.Modulo2(destination.Length) != 0) + { + // Vector4 fits neatly in pairs. Any overlap has to be equal to 1. + int i = destination.Length - 1; + destination[i] = PorterDuffFunctions.AddSrc(background[i], source[i], Numerics.Clamp(amount[i], 0, 1F)); + } + } + else + { + for (int i = 0; i < destination.Length; i++) + { + destination[i] = PorterDuffFunctions.AddSrc(background[i], source[i], Numerics.Clamp(amount[i], 0, 1F)); + } } } } @@ -157,18 +362,85 @@ internal static class DefaultPixelBlenders protected override void BlendFunction(Span destination, ReadOnlySpan background, ReadOnlySpan source, float amount) { amount = Numerics.Clamp(amount, 0, 1); - for (int i = 0; i < destination.Length; i++) + + if (Avx2.IsSupported && destination.Length >= 2) { - destination[i] = PorterDuffFunctions.SubtractSrc(background[i], source[i], amount); + // Divide by 2 as 4 elements per Vector4 and 8 per Vector256 + ref Vector256 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); + ref Vector256 destinationLast = ref Unsafe.Add(ref destinationBase, (IntPtr)((uint)destination.Length / 2u)); + + ref Vector256 backgroundBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(background)); + ref Vector256 sourceBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(source)); + Vector256 opacity = Vector256.Create(amount); + + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) + { + destinationBase = PorterDuffFunctions.SubtractSrc(backgroundBase, sourceBase, opacity); + destinationBase = ref Unsafe.Add(ref destinationBase, 1); + backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); + sourceBase = ref Unsafe.Add(ref sourceBase, 1); + } + + if (Numerics.Modulo2(destination.Length) != 0) + { + // Vector4 fits neatly in pairs. Any overlap has to be equal to 1. + int i = destination.Length - 1; + destination[i] = PorterDuffFunctions.SubtractSrc(background[i], source[i], amount); + } + } + else + { + for (int i = 0; i < destination.Length; i++) + { + destination[i] = PorterDuffFunctions.SubtractSrc(background[i], source[i], amount); + } } } /// protected override void BlendFunction(Span destination, ReadOnlySpan background, ReadOnlySpan source, ReadOnlySpan amount) { - for (int i = 0; i < destination.Length; i++) + if (Avx2.IsSupported && destination.Length >= 2) + { + // Divide by 2 as 4 elements per Vector4 and 8 per Vector256 + ref Vector256 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); + ref Vector256 destinationLast = ref Unsafe.Add(ref destinationBase, (IntPtr)((uint)destination.Length / 2u)); + + ref Vector256 backgroundBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(background)); + ref Vector256 sourceBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(source)); + ref float amountBase = ref MemoryMarshal.GetReference(amount); + + Vector256 vOne = Vector256.Create(1F); + + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) + { + // We need to create a Vector256 containing the current and next amount values + // taking up each half of the Vector256 and then clamp them. + Vector256 opacity = Vector256.Create( + Vector128.Create(amountBase), + Vector128.Create(Unsafe.Add(ref amountBase, 1))); + opacity = Avx.Min(Avx.Max(Vector256.Zero, opacity), vOne); + + destinationBase = PorterDuffFunctions.SubtractSrc(backgroundBase, sourceBase, opacity); + destinationBase = ref Unsafe.Add(ref destinationBase, 1); + backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); + sourceBase = ref Unsafe.Add(ref sourceBase, 1); + amountBase = ref Unsafe.Add(ref amountBase, 2); + } + + if (Numerics.Modulo2(destination.Length) != 0) + { + // Vector4 fits neatly in pairs. Any overlap has to be equal to 1. + int i = destination.Length - 1; + destination[i] = PorterDuffFunctions.SubtractSrc(background[i], source[i], Numerics.Clamp(amount[i], 0, 1F)); + } + } + else { - destination[i] = PorterDuffFunctions.SubtractSrc(background[i], source[i], Numerics.Clamp(amount[i], 0, 1)); + for (int i = 0; i < destination.Length; i++) + { + destination[i] = PorterDuffFunctions.SubtractSrc(background[i], source[i], Numerics.Clamp(amount[i], 0, 1F)); + } } } } @@ -195,18 +467,85 @@ internal static class DefaultPixelBlenders protected override void BlendFunction(Span destination, ReadOnlySpan background, ReadOnlySpan source, float amount) { amount = Numerics.Clamp(amount, 0, 1); - for (int i = 0; i < destination.Length; i++) + + if (Avx2.IsSupported && destination.Length >= 2) + { + // Divide by 2 as 4 elements per Vector4 and 8 per Vector256 + ref Vector256 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); + ref Vector256 destinationLast = ref Unsafe.Add(ref destinationBase, (IntPtr)((uint)destination.Length / 2u)); + + ref Vector256 backgroundBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(background)); + ref Vector256 sourceBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(source)); + Vector256 opacity = Vector256.Create(amount); + + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) + { + destinationBase = PorterDuffFunctions.ScreenSrc(backgroundBase, sourceBase, opacity); + destinationBase = ref Unsafe.Add(ref destinationBase, 1); + backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); + sourceBase = ref Unsafe.Add(ref sourceBase, 1); + } + + if (Numerics.Modulo2(destination.Length) != 0) + { + // Vector4 fits neatly in pairs. Any overlap has to be equal to 1. + int i = destination.Length - 1; + destination[i] = PorterDuffFunctions.ScreenSrc(background[i], source[i], amount); + } + } + else { - destination[i] = PorterDuffFunctions.ScreenSrc(background[i], source[i], amount); + for (int i = 0; i < destination.Length; i++) + { + destination[i] = PorterDuffFunctions.ScreenSrc(background[i], source[i], amount); + } } } /// protected override void BlendFunction(Span destination, ReadOnlySpan background, ReadOnlySpan source, ReadOnlySpan amount) { - for (int i = 0; i < destination.Length; i++) + if (Avx2.IsSupported && destination.Length >= 2) + { + // Divide by 2 as 4 elements per Vector4 and 8 per Vector256 + ref Vector256 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); + ref Vector256 destinationLast = ref Unsafe.Add(ref destinationBase, (IntPtr)((uint)destination.Length / 2u)); + + ref Vector256 backgroundBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(background)); + ref Vector256 sourceBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(source)); + ref float amountBase = ref MemoryMarshal.GetReference(amount); + + Vector256 vOne = Vector256.Create(1F); + + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) + { + // We need to create a Vector256 containing the current and next amount values + // taking up each half of the Vector256 and then clamp them. + Vector256 opacity = Vector256.Create( + Vector128.Create(amountBase), + Vector128.Create(Unsafe.Add(ref amountBase, 1))); + opacity = Avx.Min(Avx.Max(Vector256.Zero, opacity), vOne); + + destinationBase = PorterDuffFunctions.ScreenSrc(backgroundBase, sourceBase, opacity); + destinationBase = ref Unsafe.Add(ref destinationBase, 1); + backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); + sourceBase = ref Unsafe.Add(ref sourceBase, 1); + amountBase = ref Unsafe.Add(ref amountBase, 2); + } + + if (Numerics.Modulo2(destination.Length) != 0) + { + // Vector4 fits neatly in pairs. Any overlap has to be equal to 1. + int i = destination.Length - 1; + destination[i] = PorterDuffFunctions.ScreenSrc(background[i], source[i], Numerics.Clamp(amount[i], 0, 1F)); + } + } + else { - destination[i] = PorterDuffFunctions.ScreenSrc(background[i], source[i], Numerics.Clamp(amount[i], 0, 1)); + for (int i = 0; i < destination.Length; i++) + { + destination[i] = PorterDuffFunctions.ScreenSrc(background[i], source[i], Numerics.Clamp(amount[i], 0, 1F)); + } } } } @@ -233,18 +572,85 @@ internal static class DefaultPixelBlenders protected override void BlendFunction(Span destination, ReadOnlySpan background, ReadOnlySpan source, float amount) { amount = Numerics.Clamp(amount, 0, 1); - for (int i = 0; i < destination.Length; i++) + + if (Avx2.IsSupported && destination.Length >= 2) + { + // Divide by 2 as 4 elements per Vector4 and 8 per Vector256 + ref Vector256 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); + ref Vector256 destinationLast = ref Unsafe.Add(ref destinationBase, (IntPtr)((uint)destination.Length / 2u)); + + ref Vector256 backgroundBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(background)); + ref Vector256 sourceBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(source)); + Vector256 opacity = Vector256.Create(amount); + + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) + { + destinationBase = PorterDuffFunctions.DarkenSrc(backgroundBase, sourceBase, opacity); + destinationBase = ref Unsafe.Add(ref destinationBase, 1); + backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); + sourceBase = ref Unsafe.Add(ref sourceBase, 1); + } + + if (Numerics.Modulo2(destination.Length) != 0) + { + // Vector4 fits neatly in pairs. Any overlap has to be equal to 1. + int i = destination.Length - 1; + destination[i] = PorterDuffFunctions.DarkenSrc(background[i], source[i], amount); + } + } + else { - destination[i] = PorterDuffFunctions.DarkenSrc(background[i], source[i], amount); + for (int i = 0; i < destination.Length; i++) + { + destination[i] = PorterDuffFunctions.DarkenSrc(background[i], source[i], amount); + } } } /// protected override void BlendFunction(Span destination, ReadOnlySpan background, ReadOnlySpan source, ReadOnlySpan amount) { - for (int i = 0; i < destination.Length; i++) + if (Avx2.IsSupported && destination.Length >= 2) + { + // Divide by 2 as 4 elements per Vector4 and 8 per Vector256 + ref Vector256 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); + ref Vector256 destinationLast = ref Unsafe.Add(ref destinationBase, (IntPtr)((uint)destination.Length / 2u)); + + ref Vector256 backgroundBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(background)); + ref Vector256 sourceBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(source)); + ref float amountBase = ref MemoryMarshal.GetReference(amount); + + Vector256 vOne = Vector256.Create(1F); + + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) + { + // We need to create a Vector256 containing the current and next amount values + // taking up each half of the Vector256 and then clamp them. + Vector256 opacity = Vector256.Create( + Vector128.Create(amountBase), + Vector128.Create(Unsafe.Add(ref amountBase, 1))); + opacity = Avx.Min(Avx.Max(Vector256.Zero, opacity), vOne); + + destinationBase = PorterDuffFunctions.DarkenSrc(backgroundBase, sourceBase, opacity); + destinationBase = ref Unsafe.Add(ref destinationBase, 1); + backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); + sourceBase = ref Unsafe.Add(ref sourceBase, 1); + amountBase = ref Unsafe.Add(ref amountBase, 2); + } + + if (Numerics.Modulo2(destination.Length) != 0) + { + // Vector4 fits neatly in pairs. Any overlap has to be equal to 1. + int i = destination.Length - 1; + destination[i] = PorterDuffFunctions.DarkenSrc(background[i], source[i], Numerics.Clamp(amount[i], 0, 1F)); + } + } + else { - destination[i] = PorterDuffFunctions.DarkenSrc(background[i], source[i], Numerics.Clamp(amount[i], 0, 1)); + for (int i = 0; i < destination.Length; i++) + { + destination[i] = PorterDuffFunctions.DarkenSrc(background[i], source[i], Numerics.Clamp(amount[i], 0, 1F)); + } } } } @@ -271,18 +677,85 @@ internal static class DefaultPixelBlenders protected override void BlendFunction(Span destination, ReadOnlySpan background, ReadOnlySpan source, float amount) { amount = Numerics.Clamp(amount, 0, 1); - for (int i = 0; i < destination.Length; i++) + + if (Avx2.IsSupported && destination.Length >= 2) + { + // Divide by 2 as 4 elements per Vector4 and 8 per Vector256 + ref Vector256 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); + ref Vector256 destinationLast = ref Unsafe.Add(ref destinationBase, (IntPtr)((uint)destination.Length / 2u)); + + ref Vector256 backgroundBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(background)); + ref Vector256 sourceBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(source)); + Vector256 opacity = Vector256.Create(amount); + + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) + { + destinationBase = PorterDuffFunctions.LightenSrc(backgroundBase, sourceBase, opacity); + destinationBase = ref Unsafe.Add(ref destinationBase, 1); + backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); + sourceBase = ref Unsafe.Add(ref sourceBase, 1); + } + + if (Numerics.Modulo2(destination.Length) != 0) + { + // Vector4 fits neatly in pairs. Any overlap has to be equal to 1. + int i = destination.Length - 1; + destination[i] = PorterDuffFunctions.LightenSrc(background[i], source[i], amount); + } + } + else { - destination[i] = PorterDuffFunctions.LightenSrc(background[i], source[i], amount); + for (int i = 0; i < destination.Length; i++) + { + destination[i] = PorterDuffFunctions.LightenSrc(background[i], source[i], amount); + } } } /// protected override void BlendFunction(Span destination, ReadOnlySpan background, ReadOnlySpan source, ReadOnlySpan amount) { - for (int i = 0; i < destination.Length; i++) + if (Avx2.IsSupported && destination.Length >= 2) + { + // Divide by 2 as 4 elements per Vector4 and 8 per Vector256 + ref Vector256 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); + ref Vector256 destinationLast = ref Unsafe.Add(ref destinationBase, (IntPtr)((uint)destination.Length / 2u)); + + ref Vector256 backgroundBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(background)); + ref Vector256 sourceBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(source)); + ref float amountBase = ref MemoryMarshal.GetReference(amount); + + Vector256 vOne = Vector256.Create(1F); + + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) + { + // We need to create a Vector256 containing the current and next amount values + // taking up each half of the Vector256 and then clamp them. + Vector256 opacity = Vector256.Create( + Vector128.Create(amountBase), + Vector128.Create(Unsafe.Add(ref amountBase, 1))); + opacity = Avx.Min(Avx.Max(Vector256.Zero, opacity), vOne); + + destinationBase = PorterDuffFunctions.LightenSrc(backgroundBase, sourceBase, opacity); + destinationBase = ref Unsafe.Add(ref destinationBase, 1); + backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); + sourceBase = ref Unsafe.Add(ref sourceBase, 1); + amountBase = ref Unsafe.Add(ref amountBase, 2); + } + + if (Numerics.Modulo2(destination.Length) != 0) + { + // Vector4 fits neatly in pairs. Any overlap has to be equal to 1. + int i = destination.Length - 1; + destination[i] = PorterDuffFunctions.LightenSrc(background[i], source[i], Numerics.Clamp(amount[i], 0, 1F)); + } + } + else { - destination[i] = PorterDuffFunctions.LightenSrc(background[i], source[i], Numerics.Clamp(amount[i], 0, 1)); + for (int i = 0; i < destination.Length; i++) + { + destination[i] = PorterDuffFunctions.LightenSrc(background[i], source[i], Numerics.Clamp(amount[i], 0, 1F)); + } } } } @@ -309,18 +782,85 @@ internal static class DefaultPixelBlenders protected override void BlendFunction(Span destination, ReadOnlySpan background, ReadOnlySpan source, float amount) { amount = Numerics.Clamp(amount, 0, 1); - for (int i = 0; i < destination.Length; i++) + + if (Avx2.IsSupported && destination.Length >= 2) + { + // Divide by 2 as 4 elements per Vector4 and 8 per Vector256 + ref Vector256 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); + ref Vector256 destinationLast = ref Unsafe.Add(ref destinationBase, (IntPtr)((uint)destination.Length / 2u)); + + ref Vector256 backgroundBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(background)); + ref Vector256 sourceBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(source)); + Vector256 opacity = Vector256.Create(amount); + + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) + { + destinationBase = PorterDuffFunctions.OverlaySrc(backgroundBase, sourceBase, opacity); + destinationBase = ref Unsafe.Add(ref destinationBase, 1); + backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); + sourceBase = ref Unsafe.Add(ref sourceBase, 1); + } + + if (Numerics.Modulo2(destination.Length) != 0) + { + // Vector4 fits neatly in pairs. Any overlap has to be equal to 1. + int i = destination.Length - 1; + destination[i] = PorterDuffFunctions.OverlaySrc(background[i], source[i], amount); + } + } + else { - destination[i] = PorterDuffFunctions.OverlaySrc(background[i], source[i], amount); + for (int i = 0; i < destination.Length; i++) + { + destination[i] = PorterDuffFunctions.OverlaySrc(background[i], source[i], amount); + } } } /// protected override void BlendFunction(Span destination, ReadOnlySpan background, ReadOnlySpan source, ReadOnlySpan amount) { - for (int i = 0; i < destination.Length; i++) + if (Avx2.IsSupported && destination.Length >= 2) + { + // Divide by 2 as 4 elements per Vector4 and 8 per Vector256 + ref Vector256 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); + ref Vector256 destinationLast = ref Unsafe.Add(ref destinationBase, (IntPtr)((uint)destination.Length / 2u)); + + ref Vector256 backgroundBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(background)); + ref Vector256 sourceBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(source)); + ref float amountBase = ref MemoryMarshal.GetReference(amount); + + Vector256 vOne = Vector256.Create(1F); + + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) + { + // We need to create a Vector256 containing the current and next amount values + // taking up each half of the Vector256 and then clamp them. + Vector256 opacity = Vector256.Create( + Vector128.Create(amountBase), + Vector128.Create(Unsafe.Add(ref amountBase, 1))); + opacity = Avx.Min(Avx.Max(Vector256.Zero, opacity), vOne); + + destinationBase = PorterDuffFunctions.OverlaySrc(backgroundBase, sourceBase, opacity); + destinationBase = ref Unsafe.Add(ref destinationBase, 1); + backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); + sourceBase = ref Unsafe.Add(ref sourceBase, 1); + amountBase = ref Unsafe.Add(ref amountBase, 2); + } + + if (Numerics.Modulo2(destination.Length) != 0) + { + // Vector4 fits neatly in pairs. Any overlap has to be equal to 1. + int i = destination.Length - 1; + destination[i] = PorterDuffFunctions.OverlaySrc(background[i], source[i], Numerics.Clamp(amount[i], 0, 1F)); + } + } + else { - destination[i] = PorterDuffFunctions.OverlaySrc(background[i], source[i], Numerics.Clamp(amount[i], 0, 1)); + for (int i = 0; i < destination.Length; i++) + { + destination[i] = PorterDuffFunctions.OverlaySrc(background[i], source[i], Numerics.Clamp(amount[i], 0, 1F)); + } } } } @@ -347,18 +887,85 @@ internal static class DefaultPixelBlenders protected override void BlendFunction(Span destination, ReadOnlySpan background, ReadOnlySpan source, float amount) { amount = Numerics.Clamp(amount, 0, 1); - for (int i = 0; i < destination.Length; i++) + + if (Avx2.IsSupported && destination.Length >= 2) + { + // Divide by 2 as 4 elements per Vector4 and 8 per Vector256 + ref Vector256 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); + ref Vector256 destinationLast = ref Unsafe.Add(ref destinationBase, (IntPtr)((uint)destination.Length / 2u)); + + ref Vector256 backgroundBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(background)); + ref Vector256 sourceBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(source)); + Vector256 opacity = Vector256.Create(amount); + + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) + { + destinationBase = PorterDuffFunctions.HardLightSrc(backgroundBase, sourceBase, opacity); + destinationBase = ref Unsafe.Add(ref destinationBase, 1); + backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); + sourceBase = ref Unsafe.Add(ref sourceBase, 1); + } + + if (Numerics.Modulo2(destination.Length) != 0) + { + // Vector4 fits neatly in pairs. Any overlap has to be equal to 1. + int i = destination.Length - 1; + destination[i] = PorterDuffFunctions.HardLightSrc(background[i], source[i], amount); + } + } + else { - destination[i] = PorterDuffFunctions.HardLightSrc(background[i], source[i], amount); + for (int i = 0; i < destination.Length; i++) + { + destination[i] = PorterDuffFunctions.HardLightSrc(background[i], source[i], amount); + } } } /// protected override void BlendFunction(Span destination, ReadOnlySpan background, ReadOnlySpan source, ReadOnlySpan amount) { - for (int i = 0; i < destination.Length; i++) + if (Avx2.IsSupported && destination.Length >= 2) + { + // Divide by 2 as 4 elements per Vector4 and 8 per Vector256 + ref Vector256 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); + ref Vector256 destinationLast = ref Unsafe.Add(ref destinationBase, (IntPtr)((uint)destination.Length / 2u)); + + ref Vector256 backgroundBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(background)); + ref Vector256 sourceBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(source)); + ref float amountBase = ref MemoryMarshal.GetReference(amount); + + Vector256 vOne = Vector256.Create(1F); + + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) + { + // We need to create a Vector256 containing the current and next amount values + // taking up each half of the Vector256 and then clamp them. + Vector256 opacity = Vector256.Create( + Vector128.Create(amountBase), + Vector128.Create(Unsafe.Add(ref amountBase, 1))); + opacity = Avx.Min(Avx.Max(Vector256.Zero, opacity), vOne); + + destinationBase = PorterDuffFunctions.HardLightSrc(backgroundBase, sourceBase, opacity); + destinationBase = ref Unsafe.Add(ref destinationBase, 1); + backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); + sourceBase = ref Unsafe.Add(ref sourceBase, 1); + amountBase = ref Unsafe.Add(ref amountBase, 2); + } + + if (Numerics.Modulo2(destination.Length) != 0) + { + // Vector4 fits neatly in pairs. Any overlap has to be equal to 1. + int i = destination.Length - 1; + destination[i] = PorterDuffFunctions.HardLightSrc(background[i], source[i], Numerics.Clamp(amount[i], 0, 1F)); + } + } + else { - destination[i] = PorterDuffFunctions.HardLightSrc(background[i], source[i], Numerics.Clamp(amount[i], 0, 1)); + for (int i = 0; i < destination.Length; i++) + { + destination[i] = PorterDuffFunctions.HardLightSrc(background[i], source[i], Numerics.Clamp(amount[i], 0, 1F)); + } } } } @@ -385,18 +992,85 @@ internal static class DefaultPixelBlenders protected override void BlendFunction(Span destination, ReadOnlySpan background, ReadOnlySpan source, float amount) { amount = Numerics.Clamp(amount, 0, 1); - for (int i = 0; i < destination.Length; i++) + + if (Avx2.IsSupported && destination.Length >= 2) + { + // Divide by 2 as 4 elements per Vector4 and 8 per Vector256 + ref Vector256 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); + ref Vector256 destinationLast = ref Unsafe.Add(ref destinationBase, (IntPtr)((uint)destination.Length / 2u)); + + ref Vector256 backgroundBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(background)); + ref Vector256 sourceBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(source)); + Vector256 opacity = Vector256.Create(amount); + + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) + { + destinationBase = PorterDuffFunctions.NormalSrcAtop(backgroundBase, sourceBase, opacity); + destinationBase = ref Unsafe.Add(ref destinationBase, 1); + backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); + sourceBase = ref Unsafe.Add(ref sourceBase, 1); + } + + if (Numerics.Modulo2(destination.Length) != 0) + { + // Vector4 fits neatly in pairs. Any overlap has to be equal to 1. + int i = destination.Length - 1; + destination[i] = PorterDuffFunctions.NormalSrcAtop(background[i], source[i], amount); + } + } + else { - destination[i] = PorterDuffFunctions.NormalSrcAtop(background[i], source[i], amount); + for (int i = 0; i < destination.Length; i++) + { + destination[i] = PorterDuffFunctions.NormalSrcAtop(background[i], source[i], amount); + } } } /// protected override void BlendFunction(Span destination, ReadOnlySpan background, ReadOnlySpan source, ReadOnlySpan amount) { - for (int i = 0; i < destination.Length; i++) + if (Avx2.IsSupported && destination.Length >= 2) + { + // Divide by 2 as 4 elements per Vector4 and 8 per Vector256 + ref Vector256 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); + ref Vector256 destinationLast = ref Unsafe.Add(ref destinationBase, (IntPtr)((uint)destination.Length / 2u)); + + ref Vector256 backgroundBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(background)); + ref Vector256 sourceBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(source)); + ref float amountBase = ref MemoryMarshal.GetReference(amount); + + Vector256 vOne = Vector256.Create(1F); + + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) + { + // We need to create a Vector256 containing the current and next amount values + // taking up each half of the Vector256 and then clamp them. + Vector256 opacity = Vector256.Create( + Vector128.Create(amountBase), + Vector128.Create(Unsafe.Add(ref amountBase, 1))); + opacity = Avx.Min(Avx.Max(Vector256.Zero, opacity), vOne); + + destinationBase = PorterDuffFunctions.NormalSrcAtop(backgroundBase, sourceBase, opacity); + destinationBase = ref Unsafe.Add(ref destinationBase, 1); + backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); + sourceBase = ref Unsafe.Add(ref sourceBase, 1); + amountBase = ref Unsafe.Add(ref amountBase, 2); + } + + if (Numerics.Modulo2(destination.Length) != 0) + { + // Vector4 fits neatly in pairs. Any overlap has to be equal to 1. + int i = destination.Length - 1; + destination[i] = PorterDuffFunctions.NormalSrcAtop(background[i], source[i], Numerics.Clamp(amount[i], 0, 1F)); + } + } + else { - destination[i] = PorterDuffFunctions.NormalSrcAtop(background[i], source[i], Numerics.Clamp(amount[i], 0, 1)); + for (int i = 0; i < destination.Length; i++) + { + destination[i] = PorterDuffFunctions.NormalSrcAtop(background[i], source[i], Numerics.Clamp(amount[i], 0, 1F)); + } } } } @@ -423,18 +1097,85 @@ internal static class DefaultPixelBlenders protected override void BlendFunction(Span destination, ReadOnlySpan background, ReadOnlySpan source, float amount) { amount = Numerics.Clamp(amount, 0, 1); - for (int i = 0; i < destination.Length; i++) + + if (Avx2.IsSupported && destination.Length >= 2) + { + // Divide by 2 as 4 elements per Vector4 and 8 per Vector256 + ref Vector256 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); + ref Vector256 destinationLast = ref Unsafe.Add(ref destinationBase, (IntPtr)((uint)destination.Length / 2u)); + + ref Vector256 backgroundBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(background)); + ref Vector256 sourceBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(source)); + Vector256 opacity = Vector256.Create(amount); + + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) + { + destinationBase = PorterDuffFunctions.MultiplySrcAtop(backgroundBase, sourceBase, opacity); + destinationBase = ref Unsafe.Add(ref destinationBase, 1); + backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); + sourceBase = ref Unsafe.Add(ref sourceBase, 1); + } + + if (Numerics.Modulo2(destination.Length) != 0) + { + // Vector4 fits neatly in pairs. Any overlap has to be equal to 1. + int i = destination.Length - 1; + destination[i] = PorterDuffFunctions.MultiplySrcAtop(background[i], source[i], amount); + } + } + else { - destination[i] = PorterDuffFunctions.MultiplySrcAtop(background[i], source[i], amount); + for (int i = 0; i < destination.Length; i++) + { + destination[i] = PorterDuffFunctions.MultiplySrcAtop(background[i], source[i], amount); + } } } /// protected override void BlendFunction(Span destination, ReadOnlySpan background, ReadOnlySpan source, ReadOnlySpan amount) { - for (int i = 0; i < destination.Length; i++) + if (Avx2.IsSupported && destination.Length >= 2) { - destination[i] = PorterDuffFunctions.MultiplySrcAtop(background[i], source[i], Numerics.Clamp(amount[i], 0, 1)); + // Divide by 2 as 4 elements per Vector4 and 8 per Vector256 + ref Vector256 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); + ref Vector256 destinationLast = ref Unsafe.Add(ref destinationBase, (IntPtr)((uint)destination.Length / 2u)); + + ref Vector256 backgroundBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(background)); + ref Vector256 sourceBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(source)); + ref float amountBase = ref MemoryMarshal.GetReference(amount); + + Vector256 vOne = Vector256.Create(1F); + + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) + { + // We need to create a Vector256 containing the current and next amount values + // taking up each half of the Vector256 and then clamp them. + Vector256 opacity = Vector256.Create( + Vector128.Create(amountBase), + Vector128.Create(Unsafe.Add(ref amountBase, 1))); + opacity = Avx.Min(Avx.Max(Vector256.Zero, opacity), vOne); + + destinationBase = PorterDuffFunctions.MultiplySrcAtop(backgroundBase, sourceBase, opacity); + destinationBase = ref Unsafe.Add(ref destinationBase, 1); + backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); + sourceBase = ref Unsafe.Add(ref sourceBase, 1); + amountBase = ref Unsafe.Add(ref amountBase, 2); + } + + if (Numerics.Modulo2(destination.Length) != 0) + { + // Vector4 fits neatly in pairs. Any overlap has to be equal to 1. + int i = destination.Length - 1; + destination[i] = PorterDuffFunctions.MultiplySrcAtop(background[i], source[i], Numerics.Clamp(amount[i], 0, 1F)); + } + } + else + { + for (int i = 0; i < destination.Length; i++) + { + destination[i] = PorterDuffFunctions.MultiplySrcAtop(background[i], source[i], Numerics.Clamp(amount[i], 0, 1F)); + } } } } @@ -461,18 +1202,85 @@ internal static class DefaultPixelBlenders protected override void BlendFunction(Span destination, ReadOnlySpan background, ReadOnlySpan source, float amount) { amount = Numerics.Clamp(amount, 0, 1); - for (int i = 0; i < destination.Length; i++) + + if (Avx2.IsSupported && destination.Length >= 2) { - destination[i] = PorterDuffFunctions.AddSrcAtop(background[i], source[i], amount); + // Divide by 2 as 4 elements per Vector4 and 8 per Vector256 + ref Vector256 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); + ref Vector256 destinationLast = ref Unsafe.Add(ref destinationBase, (IntPtr)((uint)destination.Length / 2u)); + + ref Vector256 backgroundBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(background)); + ref Vector256 sourceBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(source)); + Vector256 opacity = Vector256.Create(amount); + + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) + { + destinationBase = PorterDuffFunctions.AddSrcAtop(backgroundBase, sourceBase, opacity); + destinationBase = ref Unsafe.Add(ref destinationBase, 1); + backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); + sourceBase = ref Unsafe.Add(ref sourceBase, 1); + } + + if (Numerics.Modulo2(destination.Length) != 0) + { + // Vector4 fits neatly in pairs. Any overlap has to be equal to 1. + int i = destination.Length - 1; + destination[i] = PorterDuffFunctions.AddSrcAtop(background[i], source[i], amount); + } + } + else + { + for (int i = 0; i < destination.Length; i++) + { + destination[i] = PorterDuffFunctions.AddSrcAtop(background[i], source[i], amount); + } } } /// protected override void BlendFunction(Span destination, ReadOnlySpan background, ReadOnlySpan source, ReadOnlySpan amount) { - for (int i = 0; i < destination.Length; i++) + if (Avx2.IsSupported && destination.Length >= 2) + { + // Divide by 2 as 4 elements per Vector4 and 8 per Vector256 + ref Vector256 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); + ref Vector256 destinationLast = ref Unsafe.Add(ref destinationBase, (IntPtr)((uint)destination.Length / 2u)); + + ref Vector256 backgroundBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(background)); + ref Vector256 sourceBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(source)); + ref float amountBase = ref MemoryMarshal.GetReference(amount); + + Vector256 vOne = Vector256.Create(1F); + + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) + { + // We need to create a Vector256 containing the current and next amount values + // taking up each half of the Vector256 and then clamp them. + Vector256 opacity = Vector256.Create( + Vector128.Create(amountBase), + Vector128.Create(Unsafe.Add(ref amountBase, 1))); + opacity = Avx.Min(Avx.Max(Vector256.Zero, opacity), vOne); + + destinationBase = PorterDuffFunctions.AddSrcAtop(backgroundBase, sourceBase, opacity); + destinationBase = ref Unsafe.Add(ref destinationBase, 1); + backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); + sourceBase = ref Unsafe.Add(ref sourceBase, 1); + amountBase = ref Unsafe.Add(ref amountBase, 2); + } + + if (Numerics.Modulo2(destination.Length) != 0) + { + // Vector4 fits neatly in pairs. Any overlap has to be equal to 1. + int i = destination.Length - 1; + destination[i] = PorterDuffFunctions.AddSrcAtop(background[i], source[i], Numerics.Clamp(amount[i], 0, 1F)); + } + } + else { - destination[i] = PorterDuffFunctions.AddSrcAtop(background[i], source[i], Numerics.Clamp(amount[i], 0, 1)); + for (int i = 0; i < destination.Length; i++) + { + destination[i] = PorterDuffFunctions.AddSrcAtop(background[i], source[i], Numerics.Clamp(amount[i], 0, 1F)); + } } } } @@ -499,18 +1307,85 @@ internal static class DefaultPixelBlenders protected override void BlendFunction(Span destination, ReadOnlySpan background, ReadOnlySpan source, float amount) { amount = Numerics.Clamp(amount, 0, 1); - for (int i = 0; i < destination.Length; i++) + + if (Avx2.IsSupported && destination.Length >= 2) + { + // Divide by 2 as 4 elements per Vector4 and 8 per Vector256 + ref Vector256 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); + ref Vector256 destinationLast = ref Unsafe.Add(ref destinationBase, (IntPtr)((uint)destination.Length / 2u)); + + ref Vector256 backgroundBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(background)); + ref Vector256 sourceBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(source)); + Vector256 opacity = Vector256.Create(amount); + + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) + { + destinationBase = PorterDuffFunctions.SubtractSrcAtop(backgroundBase, sourceBase, opacity); + destinationBase = ref Unsafe.Add(ref destinationBase, 1); + backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); + sourceBase = ref Unsafe.Add(ref sourceBase, 1); + } + + if (Numerics.Modulo2(destination.Length) != 0) + { + // Vector4 fits neatly in pairs. Any overlap has to be equal to 1. + int i = destination.Length - 1; + destination[i] = PorterDuffFunctions.SubtractSrcAtop(background[i], source[i], amount); + } + } + else { - destination[i] = PorterDuffFunctions.SubtractSrcAtop(background[i], source[i], amount); + for (int i = 0; i < destination.Length; i++) + { + destination[i] = PorterDuffFunctions.SubtractSrcAtop(background[i], source[i], amount); + } } } /// protected override void BlendFunction(Span destination, ReadOnlySpan background, ReadOnlySpan source, ReadOnlySpan amount) { - for (int i = 0; i < destination.Length; i++) + if (Avx2.IsSupported && destination.Length >= 2) + { + // Divide by 2 as 4 elements per Vector4 and 8 per Vector256 + ref Vector256 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); + ref Vector256 destinationLast = ref Unsafe.Add(ref destinationBase, (IntPtr)((uint)destination.Length / 2u)); + + ref Vector256 backgroundBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(background)); + ref Vector256 sourceBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(source)); + ref float amountBase = ref MemoryMarshal.GetReference(amount); + + Vector256 vOne = Vector256.Create(1F); + + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) + { + // We need to create a Vector256 containing the current and next amount values + // taking up each half of the Vector256 and then clamp them. + Vector256 opacity = Vector256.Create( + Vector128.Create(amountBase), + Vector128.Create(Unsafe.Add(ref amountBase, 1))); + opacity = Avx.Min(Avx.Max(Vector256.Zero, opacity), vOne); + + destinationBase = PorterDuffFunctions.SubtractSrcAtop(backgroundBase, sourceBase, opacity); + destinationBase = ref Unsafe.Add(ref destinationBase, 1); + backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); + sourceBase = ref Unsafe.Add(ref sourceBase, 1); + amountBase = ref Unsafe.Add(ref amountBase, 2); + } + + if (Numerics.Modulo2(destination.Length) != 0) + { + // Vector4 fits neatly in pairs. Any overlap has to be equal to 1. + int i = destination.Length - 1; + destination[i] = PorterDuffFunctions.SubtractSrcAtop(background[i], source[i], Numerics.Clamp(amount[i], 0, 1F)); + } + } + else { - destination[i] = PorterDuffFunctions.SubtractSrcAtop(background[i], source[i], Numerics.Clamp(amount[i], 0, 1)); + for (int i = 0; i < destination.Length; i++) + { + destination[i] = PorterDuffFunctions.SubtractSrcAtop(background[i], source[i], Numerics.Clamp(amount[i], 0, 1F)); + } } } } @@ -537,18 +1412,85 @@ internal static class DefaultPixelBlenders protected override void BlendFunction(Span destination, ReadOnlySpan background, ReadOnlySpan source, float amount) { amount = Numerics.Clamp(amount, 0, 1); - for (int i = 0; i < destination.Length; i++) + + if (Avx2.IsSupported && destination.Length >= 2) + { + // Divide by 2 as 4 elements per Vector4 and 8 per Vector256 + ref Vector256 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); + ref Vector256 destinationLast = ref Unsafe.Add(ref destinationBase, (IntPtr)((uint)destination.Length / 2u)); + + ref Vector256 backgroundBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(background)); + ref Vector256 sourceBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(source)); + Vector256 opacity = Vector256.Create(amount); + + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) + { + destinationBase = PorterDuffFunctions.ScreenSrcAtop(backgroundBase, sourceBase, opacity); + destinationBase = ref Unsafe.Add(ref destinationBase, 1); + backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); + sourceBase = ref Unsafe.Add(ref sourceBase, 1); + } + + if (Numerics.Modulo2(destination.Length) != 0) + { + // Vector4 fits neatly in pairs. Any overlap has to be equal to 1. + int i = destination.Length - 1; + destination[i] = PorterDuffFunctions.ScreenSrcAtop(background[i], source[i], amount); + } + } + else { - destination[i] = PorterDuffFunctions.ScreenSrcAtop(background[i], source[i], amount); + for (int i = 0; i < destination.Length; i++) + { + destination[i] = PorterDuffFunctions.ScreenSrcAtop(background[i], source[i], amount); + } } } /// protected override void BlendFunction(Span destination, ReadOnlySpan background, ReadOnlySpan source, ReadOnlySpan amount) { - for (int i = 0; i < destination.Length; i++) + if (Avx2.IsSupported && destination.Length >= 2) + { + // Divide by 2 as 4 elements per Vector4 and 8 per Vector256 + ref Vector256 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); + ref Vector256 destinationLast = ref Unsafe.Add(ref destinationBase, (IntPtr)((uint)destination.Length / 2u)); + + ref Vector256 backgroundBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(background)); + ref Vector256 sourceBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(source)); + ref float amountBase = ref MemoryMarshal.GetReference(amount); + + Vector256 vOne = Vector256.Create(1F); + + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) + { + // We need to create a Vector256 containing the current and next amount values + // taking up each half of the Vector256 and then clamp them. + Vector256 opacity = Vector256.Create( + Vector128.Create(amountBase), + Vector128.Create(Unsafe.Add(ref amountBase, 1))); + opacity = Avx.Min(Avx.Max(Vector256.Zero, opacity), vOne); + + destinationBase = PorterDuffFunctions.ScreenSrcAtop(backgroundBase, sourceBase, opacity); + destinationBase = ref Unsafe.Add(ref destinationBase, 1); + backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); + sourceBase = ref Unsafe.Add(ref sourceBase, 1); + amountBase = ref Unsafe.Add(ref amountBase, 2); + } + + if (Numerics.Modulo2(destination.Length) != 0) + { + // Vector4 fits neatly in pairs. Any overlap has to be equal to 1. + int i = destination.Length - 1; + destination[i] = PorterDuffFunctions.ScreenSrcAtop(background[i], source[i], Numerics.Clamp(amount[i], 0, 1F)); + } + } + else { - destination[i] = PorterDuffFunctions.ScreenSrcAtop(background[i], source[i], Numerics.Clamp(amount[i], 0, 1)); + for (int i = 0; i < destination.Length; i++) + { + destination[i] = PorterDuffFunctions.ScreenSrcAtop(background[i], source[i], Numerics.Clamp(amount[i], 0, 1F)); + } } } } @@ -575,18 +1517,85 @@ internal static class DefaultPixelBlenders protected override void BlendFunction(Span destination, ReadOnlySpan background, ReadOnlySpan source, float amount) { amount = Numerics.Clamp(amount, 0, 1); - for (int i = 0; i < destination.Length; i++) + + if (Avx2.IsSupported && destination.Length >= 2) + { + // Divide by 2 as 4 elements per Vector4 and 8 per Vector256 + ref Vector256 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); + ref Vector256 destinationLast = ref Unsafe.Add(ref destinationBase, (IntPtr)((uint)destination.Length / 2u)); + + ref Vector256 backgroundBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(background)); + ref Vector256 sourceBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(source)); + Vector256 opacity = Vector256.Create(amount); + + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) + { + destinationBase = PorterDuffFunctions.DarkenSrcAtop(backgroundBase, sourceBase, opacity); + destinationBase = ref Unsafe.Add(ref destinationBase, 1); + backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); + sourceBase = ref Unsafe.Add(ref sourceBase, 1); + } + + if (Numerics.Modulo2(destination.Length) != 0) + { + // Vector4 fits neatly in pairs. Any overlap has to be equal to 1. + int i = destination.Length - 1; + destination[i] = PorterDuffFunctions.DarkenSrcAtop(background[i], source[i], amount); + } + } + else { - destination[i] = PorterDuffFunctions.DarkenSrcAtop(background[i], source[i], amount); + for (int i = 0; i < destination.Length; i++) + { + destination[i] = PorterDuffFunctions.DarkenSrcAtop(background[i], source[i], amount); + } } } /// protected override void BlendFunction(Span destination, ReadOnlySpan background, ReadOnlySpan source, ReadOnlySpan amount) { - for (int i = 0; i < destination.Length; i++) + if (Avx2.IsSupported && destination.Length >= 2) + { + // Divide by 2 as 4 elements per Vector4 and 8 per Vector256 + ref Vector256 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); + ref Vector256 destinationLast = ref Unsafe.Add(ref destinationBase, (IntPtr)((uint)destination.Length / 2u)); + + ref Vector256 backgroundBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(background)); + ref Vector256 sourceBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(source)); + ref float amountBase = ref MemoryMarshal.GetReference(amount); + + Vector256 vOne = Vector256.Create(1F); + + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) + { + // We need to create a Vector256 containing the current and next amount values + // taking up each half of the Vector256 and then clamp them. + Vector256 opacity = Vector256.Create( + Vector128.Create(amountBase), + Vector128.Create(Unsafe.Add(ref amountBase, 1))); + opacity = Avx.Min(Avx.Max(Vector256.Zero, opacity), vOne); + + destinationBase = PorterDuffFunctions.DarkenSrcAtop(backgroundBase, sourceBase, opacity); + destinationBase = ref Unsafe.Add(ref destinationBase, 1); + backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); + sourceBase = ref Unsafe.Add(ref sourceBase, 1); + amountBase = ref Unsafe.Add(ref amountBase, 2); + } + + if (Numerics.Modulo2(destination.Length) != 0) + { + // Vector4 fits neatly in pairs. Any overlap has to be equal to 1. + int i = destination.Length - 1; + destination[i] = PorterDuffFunctions.DarkenSrcAtop(background[i], source[i], Numerics.Clamp(amount[i], 0, 1F)); + } + } + else { - destination[i] = PorterDuffFunctions.DarkenSrcAtop(background[i], source[i], Numerics.Clamp(amount[i], 0, 1)); + for (int i = 0; i < destination.Length; i++) + { + destination[i] = PorterDuffFunctions.DarkenSrcAtop(background[i], source[i], Numerics.Clamp(amount[i], 0, 1F)); + } } } } @@ -613,18 +1622,85 @@ internal static class DefaultPixelBlenders protected override void BlendFunction(Span destination, ReadOnlySpan background, ReadOnlySpan source, float amount) { amount = Numerics.Clamp(amount, 0, 1); - for (int i = 0; i < destination.Length; i++) + + if (Avx2.IsSupported && destination.Length >= 2) + { + // Divide by 2 as 4 elements per Vector4 and 8 per Vector256 + ref Vector256 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); + ref Vector256 destinationLast = ref Unsafe.Add(ref destinationBase, (IntPtr)((uint)destination.Length / 2u)); + + ref Vector256 backgroundBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(background)); + ref Vector256 sourceBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(source)); + Vector256 opacity = Vector256.Create(amount); + + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) + { + destinationBase = PorterDuffFunctions.LightenSrcAtop(backgroundBase, sourceBase, opacity); + destinationBase = ref Unsafe.Add(ref destinationBase, 1); + backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); + sourceBase = ref Unsafe.Add(ref sourceBase, 1); + } + + if (Numerics.Modulo2(destination.Length) != 0) + { + // Vector4 fits neatly in pairs. Any overlap has to be equal to 1. + int i = destination.Length - 1; + destination[i] = PorterDuffFunctions.LightenSrcAtop(background[i], source[i], amount); + } + } + else { - destination[i] = PorterDuffFunctions.LightenSrcAtop(background[i], source[i], amount); + for (int i = 0; i < destination.Length; i++) + { + destination[i] = PorterDuffFunctions.LightenSrcAtop(background[i], source[i], amount); + } } } /// protected override void BlendFunction(Span destination, ReadOnlySpan background, ReadOnlySpan source, ReadOnlySpan amount) { - for (int i = 0; i < destination.Length; i++) + if (Avx2.IsSupported && destination.Length >= 2) + { + // Divide by 2 as 4 elements per Vector4 and 8 per Vector256 + ref Vector256 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); + ref Vector256 destinationLast = ref Unsafe.Add(ref destinationBase, (IntPtr)((uint)destination.Length / 2u)); + + ref Vector256 backgroundBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(background)); + ref Vector256 sourceBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(source)); + ref float amountBase = ref MemoryMarshal.GetReference(amount); + + Vector256 vOne = Vector256.Create(1F); + + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) + { + // We need to create a Vector256 containing the current and next amount values + // taking up each half of the Vector256 and then clamp them. + Vector256 opacity = Vector256.Create( + Vector128.Create(amountBase), + Vector128.Create(Unsafe.Add(ref amountBase, 1))); + opacity = Avx.Min(Avx.Max(Vector256.Zero, opacity), vOne); + + destinationBase = PorterDuffFunctions.LightenSrcAtop(backgroundBase, sourceBase, opacity); + destinationBase = ref Unsafe.Add(ref destinationBase, 1); + backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); + sourceBase = ref Unsafe.Add(ref sourceBase, 1); + amountBase = ref Unsafe.Add(ref amountBase, 2); + } + + if (Numerics.Modulo2(destination.Length) != 0) + { + // Vector4 fits neatly in pairs. Any overlap has to be equal to 1. + int i = destination.Length - 1; + destination[i] = PorterDuffFunctions.LightenSrcAtop(background[i], source[i], Numerics.Clamp(amount[i], 0, 1F)); + } + } + else { - destination[i] = PorterDuffFunctions.LightenSrcAtop(background[i], source[i], Numerics.Clamp(amount[i], 0, 1)); + for (int i = 0; i < destination.Length; i++) + { + destination[i] = PorterDuffFunctions.LightenSrcAtop(background[i], source[i], Numerics.Clamp(amount[i], 0, 1F)); + } } } } @@ -651,18 +1727,85 @@ internal static class DefaultPixelBlenders protected override void BlendFunction(Span destination, ReadOnlySpan background, ReadOnlySpan source, float amount) { amount = Numerics.Clamp(amount, 0, 1); - for (int i = 0; i < destination.Length; i++) + + if (Avx2.IsSupported && destination.Length >= 2) + { + // Divide by 2 as 4 elements per Vector4 and 8 per Vector256 + ref Vector256 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); + ref Vector256 destinationLast = ref Unsafe.Add(ref destinationBase, (IntPtr)((uint)destination.Length / 2u)); + + ref Vector256 backgroundBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(background)); + ref Vector256 sourceBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(source)); + Vector256 opacity = Vector256.Create(amount); + + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) + { + destinationBase = PorterDuffFunctions.OverlaySrcAtop(backgroundBase, sourceBase, opacity); + destinationBase = ref Unsafe.Add(ref destinationBase, 1); + backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); + sourceBase = ref Unsafe.Add(ref sourceBase, 1); + } + + if (Numerics.Modulo2(destination.Length) != 0) + { + // Vector4 fits neatly in pairs. Any overlap has to be equal to 1. + int i = destination.Length - 1; + destination[i] = PorterDuffFunctions.OverlaySrcAtop(background[i], source[i], amount); + } + } + else { - destination[i] = PorterDuffFunctions.OverlaySrcAtop(background[i], source[i], amount); + for (int i = 0; i < destination.Length; i++) + { + destination[i] = PorterDuffFunctions.OverlaySrcAtop(background[i], source[i], amount); + } } } /// protected override void BlendFunction(Span destination, ReadOnlySpan background, ReadOnlySpan source, ReadOnlySpan amount) { - for (int i = 0; i < destination.Length; i++) + if (Avx2.IsSupported && destination.Length >= 2) + { + // Divide by 2 as 4 elements per Vector4 and 8 per Vector256 + ref Vector256 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); + ref Vector256 destinationLast = ref Unsafe.Add(ref destinationBase, (IntPtr)((uint)destination.Length / 2u)); + + ref Vector256 backgroundBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(background)); + ref Vector256 sourceBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(source)); + ref float amountBase = ref MemoryMarshal.GetReference(amount); + + Vector256 vOne = Vector256.Create(1F); + + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) + { + // We need to create a Vector256 containing the current and next amount values + // taking up each half of the Vector256 and then clamp them. + Vector256 opacity = Vector256.Create( + Vector128.Create(amountBase), + Vector128.Create(Unsafe.Add(ref amountBase, 1))); + opacity = Avx.Min(Avx.Max(Vector256.Zero, opacity), vOne); + + destinationBase = PorterDuffFunctions.OverlaySrcAtop(backgroundBase, sourceBase, opacity); + destinationBase = ref Unsafe.Add(ref destinationBase, 1); + backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); + sourceBase = ref Unsafe.Add(ref sourceBase, 1); + amountBase = ref Unsafe.Add(ref amountBase, 2); + } + + if (Numerics.Modulo2(destination.Length) != 0) + { + // Vector4 fits neatly in pairs. Any overlap has to be equal to 1. + int i = destination.Length - 1; + destination[i] = PorterDuffFunctions.OverlaySrcAtop(background[i], source[i], Numerics.Clamp(amount[i], 0, 1F)); + } + } + else { - destination[i] = PorterDuffFunctions.OverlaySrcAtop(background[i], source[i], Numerics.Clamp(amount[i], 0, 1)); + for (int i = 0; i < destination.Length; i++) + { + destination[i] = PorterDuffFunctions.OverlaySrcAtop(background[i], source[i], Numerics.Clamp(amount[i], 0, 1F)); + } } } } @@ -689,18 +1832,85 @@ internal static class DefaultPixelBlenders protected override void BlendFunction(Span destination, ReadOnlySpan background, ReadOnlySpan source, float amount) { amount = Numerics.Clamp(amount, 0, 1); - for (int i = 0; i < destination.Length; i++) + + if (Avx2.IsSupported && destination.Length >= 2) + { + // Divide by 2 as 4 elements per Vector4 and 8 per Vector256 + ref Vector256 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); + ref Vector256 destinationLast = ref Unsafe.Add(ref destinationBase, (IntPtr)((uint)destination.Length / 2u)); + + ref Vector256 backgroundBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(background)); + ref Vector256 sourceBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(source)); + Vector256 opacity = Vector256.Create(amount); + + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) + { + destinationBase = PorterDuffFunctions.HardLightSrcAtop(backgroundBase, sourceBase, opacity); + destinationBase = ref Unsafe.Add(ref destinationBase, 1); + backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); + sourceBase = ref Unsafe.Add(ref sourceBase, 1); + } + + if (Numerics.Modulo2(destination.Length) != 0) + { + // Vector4 fits neatly in pairs. Any overlap has to be equal to 1. + int i = destination.Length - 1; + destination[i] = PorterDuffFunctions.HardLightSrcAtop(background[i], source[i], amount); + } + } + else { - destination[i] = PorterDuffFunctions.HardLightSrcAtop(background[i], source[i], amount); + for (int i = 0; i < destination.Length; i++) + { + destination[i] = PorterDuffFunctions.HardLightSrcAtop(background[i], source[i], amount); + } } } /// protected override void BlendFunction(Span destination, ReadOnlySpan background, ReadOnlySpan source, ReadOnlySpan amount) { - for (int i = 0; i < destination.Length; i++) + if (Avx2.IsSupported && destination.Length >= 2) + { + // Divide by 2 as 4 elements per Vector4 and 8 per Vector256 + ref Vector256 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); + ref Vector256 destinationLast = ref Unsafe.Add(ref destinationBase, (IntPtr)((uint)destination.Length / 2u)); + + ref Vector256 backgroundBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(background)); + ref Vector256 sourceBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(source)); + ref float amountBase = ref MemoryMarshal.GetReference(amount); + + Vector256 vOne = Vector256.Create(1F); + + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) + { + // We need to create a Vector256 containing the current and next amount values + // taking up each half of the Vector256 and then clamp them. + Vector256 opacity = Vector256.Create( + Vector128.Create(amountBase), + Vector128.Create(Unsafe.Add(ref amountBase, 1))); + opacity = Avx.Min(Avx.Max(Vector256.Zero, opacity), vOne); + + destinationBase = PorterDuffFunctions.HardLightSrcAtop(backgroundBase, sourceBase, opacity); + destinationBase = ref Unsafe.Add(ref destinationBase, 1); + backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); + sourceBase = ref Unsafe.Add(ref sourceBase, 1); + amountBase = ref Unsafe.Add(ref amountBase, 2); + } + + if (Numerics.Modulo2(destination.Length) != 0) + { + // Vector4 fits neatly in pairs. Any overlap has to be equal to 1. + int i = destination.Length - 1; + destination[i] = PorterDuffFunctions.HardLightSrcAtop(background[i], source[i], Numerics.Clamp(amount[i], 0, 1F)); + } + } + else { - destination[i] = PorterDuffFunctions.HardLightSrcAtop(background[i], source[i], Numerics.Clamp(amount[i], 0, 1)); + for (int i = 0; i < destination.Length; i++) + { + destination[i] = PorterDuffFunctions.HardLightSrcAtop(background[i], source[i], Numerics.Clamp(amount[i], 0, 1F)); + } } } } @@ -727,18 +1937,85 @@ internal static class DefaultPixelBlenders protected override void BlendFunction(Span destination, ReadOnlySpan background, ReadOnlySpan source, float amount) { amount = Numerics.Clamp(amount, 0, 1); - for (int i = 0; i < destination.Length; i++) + + if (Avx2.IsSupported && destination.Length >= 2) + { + // Divide by 2 as 4 elements per Vector4 and 8 per Vector256 + ref Vector256 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); + ref Vector256 destinationLast = ref Unsafe.Add(ref destinationBase, (IntPtr)((uint)destination.Length / 2u)); + + ref Vector256 backgroundBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(background)); + ref Vector256 sourceBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(source)); + Vector256 opacity = Vector256.Create(amount); + + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) + { + destinationBase = PorterDuffFunctions.NormalSrcOver(backgroundBase, sourceBase, opacity); + destinationBase = ref Unsafe.Add(ref destinationBase, 1); + backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); + sourceBase = ref Unsafe.Add(ref sourceBase, 1); + } + + if (Numerics.Modulo2(destination.Length) != 0) + { + // Vector4 fits neatly in pairs. Any overlap has to be equal to 1. + int i = destination.Length - 1; + destination[i] = PorterDuffFunctions.NormalSrcOver(background[i], source[i], amount); + } + } + else { - destination[i] = PorterDuffFunctions.NormalSrcOver(background[i], source[i], amount); + for (int i = 0; i < destination.Length; i++) + { + destination[i] = PorterDuffFunctions.NormalSrcOver(background[i], source[i], amount); + } } } /// protected override void BlendFunction(Span destination, ReadOnlySpan background, ReadOnlySpan source, ReadOnlySpan amount) { - for (int i = 0; i < destination.Length; i++) + if (Avx2.IsSupported && destination.Length >= 2) { - destination[i] = PorterDuffFunctions.NormalSrcOver(background[i], source[i], Numerics.Clamp(amount[i], 0, 1)); + // Divide by 2 as 4 elements per Vector4 and 8 per Vector256 + ref Vector256 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); + ref Vector256 destinationLast = ref Unsafe.Add(ref destinationBase, (IntPtr)((uint)destination.Length / 2u)); + + ref Vector256 backgroundBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(background)); + ref Vector256 sourceBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(source)); + ref float amountBase = ref MemoryMarshal.GetReference(amount); + + Vector256 vOne = Vector256.Create(1F); + + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) + { + // We need to create a Vector256 containing the current and next amount values + // taking up each half of the Vector256 and then clamp them. + Vector256 opacity = Vector256.Create( + Vector128.Create(amountBase), + Vector128.Create(Unsafe.Add(ref amountBase, 1))); + opacity = Avx.Min(Avx.Max(Vector256.Zero, opacity), vOne); + + destinationBase = PorterDuffFunctions.NormalSrcOver(backgroundBase, sourceBase, opacity); + destinationBase = ref Unsafe.Add(ref destinationBase, 1); + backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); + sourceBase = ref Unsafe.Add(ref sourceBase, 1); + amountBase = ref Unsafe.Add(ref amountBase, 2); + } + + if (Numerics.Modulo2(destination.Length) != 0) + { + // Vector4 fits neatly in pairs. Any overlap has to be equal to 1. + int i = destination.Length - 1; + destination[i] = PorterDuffFunctions.NormalSrcOver(background[i], source[i], Numerics.Clamp(amount[i], 0, 1F)); + } + } + else + { + for (int i = 0; i < destination.Length; i++) + { + destination[i] = PorterDuffFunctions.NormalSrcOver(background[i], source[i], Numerics.Clamp(amount[i], 0, 1F)); + } } } } @@ -765,21 +2042,88 @@ internal static class DefaultPixelBlenders protected override void BlendFunction(Span destination, ReadOnlySpan background, ReadOnlySpan source, float amount) { amount = Numerics.Clamp(amount, 0, 1); - for (int i = 0; i < destination.Length; i++) + + if (Avx2.IsSupported && destination.Length >= 2) { - destination[i] = PorterDuffFunctions.MultiplySrcOver(background[i], source[i], amount); + // Divide by 2 as 4 elements per Vector4 and 8 per Vector256 + ref Vector256 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); + ref Vector256 destinationLast = ref Unsafe.Add(ref destinationBase, (IntPtr)((uint)destination.Length / 2u)); + + ref Vector256 backgroundBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(background)); + ref Vector256 sourceBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(source)); + Vector256 opacity = Vector256.Create(amount); + + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) + { + destinationBase = PorterDuffFunctions.MultiplySrcOver(backgroundBase, sourceBase, opacity); + destinationBase = ref Unsafe.Add(ref destinationBase, 1); + backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); + sourceBase = ref Unsafe.Add(ref sourceBase, 1); + } + + if (Numerics.Modulo2(destination.Length) != 0) + { + // Vector4 fits neatly in pairs. Any overlap has to be equal to 1. + int i = destination.Length - 1; + destination[i] = PorterDuffFunctions.MultiplySrcOver(background[i], source[i], amount); + } + } + else + { + for (int i = 0; i < destination.Length; i++) + { + destination[i] = PorterDuffFunctions.MultiplySrcOver(background[i], source[i], amount); + } } } /// protected override void BlendFunction(Span destination, ReadOnlySpan background, ReadOnlySpan source, ReadOnlySpan amount) { - for (int i = 0; i < destination.Length; i++) + if (Avx2.IsSupported && destination.Length >= 2) { - destination[i] = PorterDuffFunctions.MultiplySrcOver(background[i], source[i], Numerics.Clamp(amount[i], 0, 1)); - } - } - } + // Divide by 2 as 4 elements per Vector4 and 8 per Vector256 + ref Vector256 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); + ref Vector256 destinationLast = ref Unsafe.Add(ref destinationBase, (IntPtr)((uint)destination.Length / 2u)); + + ref Vector256 backgroundBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(background)); + ref Vector256 sourceBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(source)); + ref float amountBase = ref MemoryMarshal.GetReference(amount); + + Vector256 vOne = Vector256.Create(1F); + + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) + { + // We need to create a Vector256 containing the current and next amount values + // taking up each half of the Vector256 and then clamp them. + Vector256 opacity = Vector256.Create( + Vector128.Create(amountBase), + Vector128.Create(Unsafe.Add(ref amountBase, 1))); + opacity = Avx.Min(Avx.Max(Vector256.Zero, opacity), vOne); + + destinationBase = PorterDuffFunctions.MultiplySrcOver(backgroundBase, sourceBase, opacity); + destinationBase = ref Unsafe.Add(ref destinationBase, 1); + backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); + sourceBase = ref Unsafe.Add(ref sourceBase, 1); + amountBase = ref Unsafe.Add(ref amountBase, 2); + } + + if (Numerics.Modulo2(destination.Length) != 0) + { + // Vector4 fits neatly in pairs. Any overlap has to be equal to 1. + int i = destination.Length - 1; + destination[i] = PorterDuffFunctions.MultiplySrcOver(background[i], source[i], Numerics.Clamp(amount[i], 0, 1F)); + } + } + else + { + for (int i = 0; i < destination.Length; i++) + { + destination[i] = PorterDuffFunctions.MultiplySrcOver(background[i], source[i], Numerics.Clamp(amount[i], 0, 1F)); + } + } + } + } /// /// A pixel blender that implements the "AddSrcOver" composition equation. @@ -803,18 +2147,85 @@ internal static class DefaultPixelBlenders protected override void BlendFunction(Span destination, ReadOnlySpan background, ReadOnlySpan source, float amount) { amount = Numerics.Clamp(amount, 0, 1); - for (int i = 0; i < destination.Length; i++) + + if (Avx2.IsSupported && destination.Length >= 2) + { + // Divide by 2 as 4 elements per Vector4 and 8 per Vector256 + ref Vector256 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); + ref Vector256 destinationLast = ref Unsafe.Add(ref destinationBase, (IntPtr)((uint)destination.Length / 2u)); + + ref Vector256 backgroundBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(background)); + ref Vector256 sourceBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(source)); + Vector256 opacity = Vector256.Create(amount); + + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) + { + destinationBase = PorterDuffFunctions.AddSrcOver(backgroundBase, sourceBase, opacity); + destinationBase = ref Unsafe.Add(ref destinationBase, 1); + backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); + sourceBase = ref Unsafe.Add(ref sourceBase, 1); + } + + if (Numerics.Modulo2(destination.Length) != 0) + { + // Vector4 fits neatly in pairs. Any overlap has to be equal to 1. + int i = destination.Length - 1; + destination[i] = PorterDuffFunctions.AddSrcOver(background[i], source[i], amount); + } + } + else { - destination[i] = PorterDuffFunctions.AddSrcOver(background[i], source[i], amount); + for (int i = 0; i < destination.Length; i++) + { + destination[i] = PorterDuffFunctions.AddSrcOver(background[i], source[i], amount); + } } } /// protected override void BlendFunction(Span destination, ReadOnlySpan background, ReadOnlySpan source, ReadOnlySpan amount) { - for (int i = 0; i < destination.Length; i++) + if (Avx2.IsSupported && destination.Length >= 2) + { + // Divide by 2 as 4 elements per Vector4 and 8 per Vector256 + ref Vector256 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); + ref Vector256 destinationLast = ref Unsafe.Add(ref destinationBase, (IntPtr)((uint)destination.Length / 2u)); + + ref Vector256 backgroundBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(background)); + ref Vector256 sourceBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(source)); + ref float amountBase = ref MemoryMarshal.GetReference(amount); + + Vector256 vOne = Vector256.Create(1F); + + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) + { + // We need to create a Vector256 containing the current and next amount values + // taking up each half of the Vector256 and then clamp them. + Vector256 opacity = Vector256.Create( + Vector128.Create(amountBase), + Vector128.Create(Unsafe.Add(ref amountBase, 1))); + opacity = Avx.Min(Avx.Max(Vector256.Zero, opacity), vOne); + + destinationBase = PorterDuffFunctions.AddSrcOver(backgroundBase, sourceBase, opacity); + destinationBase = ref Unsafe.Add(ref destinationBase, 1); + backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); + sourceBase = ref Unsafe.Add(ref sourceBase, 1); + amountBase = ref Unsafe.Add(ref amountBase, 2); + } + + if (Numerics.Modulo2(destination.Length) != 0) + { + // Vector4 fits neatly in pairs. Any overlap has to be equal to 1. + int i = destination.Length - 1; + destination[i] = PorterDuffFunctions.AddSrcOver(background[i], source[i], Numerics.Clamp(amount[i], 0, 1F)); + } + } + else { - destination[i] = PorterDuffFunctions.AddSrcOver(background[i], source[i], Numerics.Clamp(amount[i], 0, 1)); + for (int i = 0; i < destination.Length; i++) + { + destination[i] = PorterDuffFunctions.AddSrcOver(background[i], source[i], Numerics.Clamp(amount[i], 0, 1F)); + } } } } @@ -841,18 +2252,85 @@ internal static class DefaultPixelBlenders protected override void BlendFunction(Span destination, ReadOnlySpan background, ReadOnlySpan source, float amount) { amount = Numerics.Clamp(amount, 0, 1); - for (int i = 0; i < destination.Length; i++) + + if (Avx2.IsSupported && destination.Length >= 2) { - destination[i] = PorterDuffFunctions.SubtractSrcOver(background[i], source[i], amount); + // Divide by 2 as 4 elements per Vector4 and 8 per Vector256 + ref Vector256 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); + ref Vector256 destinationLast = ref Unsafe.Add(ref destinationBase, (IntPtr)((uint)destination.Length / 2u)); + + ref Vector256 backgroundBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(background)); + ref Vector256 sourceBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(source)); + Vector256 opacity = Vector256.Create(amount); + + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) + { + destinationBase = PorterDuffFunctions.SubtractSrcOver(backgroundBase, sourceBase, opacity); + destinationBase = ref Unsafe.Add(ref destinationBase, 1); + backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); + sourceBase = ref Unsafe.Add(ref sourceBase, 1); + } + + if (Numerics.Modulo2(destination.Length) != 0) + { + // Vector4 fits neatly in pairs. Any overlap has to be equal to 1. + int i = destination.Length - 1; + destination[i] = PorterDuffFunctions.SubtractSrcOver(background[i], source[i], amount); + } + } + else + { + for (int i = 0; i < destination.Length; i++) + { + destination[i] = PorterDuffFunctions.SubtractSrcOver(background[i], source[i], amount); + } } } /// protected override void BlendFunction(Span destination, ReadOnlySpan background, ReadOnlySpan source, ReadOnlySpan amount) { - for (int i = 0; i < destination.Length; i++) + if (Avx2.IsSupported && destination.Length >= 2) + { + // Divide by 2 as 4 elements per Vector4 and 8 per Vector256 + ref Vector256 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); + ref Vector256 destinationLast = ref Unsafe.Add(ref destinationBase, (IntPtr)((uint)destination.Length / 2u)); + + ref Vector256 backgroundBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(background)); + ref Vector256 sourceBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(source)); + ref float amountBase = ref MemoryMarshal.GetReference(amount); + + Vector256 vOne = Vector256.Create(1F); + + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) + { + // We need to create a Vector256 containing the current and next amount values + // taking up each half of the Vector256 and then clamp them. + Vector256 opacity = Vector256.Create( + Vector128.Create(amountBase), + Vector128.Create(Unsafe.Add(ref amountBase, 1))); + opacity = Avx.Min(Avx.Max(Vector256.Zero, opacity), vOne); + + destinationBase = PorterDuffFunctions.SubtractSrcOver(backgroundBase, sourceBase, opacity); + destinationBase = ref Unsafe.Add(ref destinationBase, 1); + backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); + sourceBase = ref Unsafe.Add(ref sourceBase, 1); + amountBase = ref Unsafe.Add(ref amountBase, 2); + } + + if (Numerics.Modulo2(destination.Length) != 0) + { + // Vector4 fits neatly in pairs. Any overlap has to be equal to 1. + int i = destination.Length - 1; + destination[i] = PorterDuffFunctions.SubtractSrcOver(background[i], source[i], Numerics.Clamp(amount[i], 0, 1F)); + } + } + else { - destination[i] = PorterDuffFunctions.SubtractSrcOver(background[i], source[i], Numerics.Clamp(amount[i], 0, 1)); + for (int i = 0; i < destination.Length; i++) + { + destination[i] = PorterDuffFunctions.SubtractSrcOver(background[i], source[i], Numerics.Clamp(amount[i], 0, 1F)); + } } } } @@ -879,18 +2357,85 @@ internal static class DefaultPixelBlenders protected override void BlendFunction(Span destination, ReadOnlySpan background, ReadOnlySpan source, float amount) { amount = Numerics.Clamp(amount, 0, 1); - for (int i = 0; i < destination.Length; i++) + + if (Avx2.IsSupported && destination.Length >= 2) + { + // Divide by 2 as 4 elements per Vector4 and 8 per Vector256 + ref Vector256 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); + ref Vector256 destinationLast = ref Unsafe.Add(ref destinationBase, (IntPtr)((uint)destination.Length / 2u)); + + ref Vector256 backgroundBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(background)); + ref Vector256 sourceBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(source)); + Vector256 opacity = Vector256.Create(amount); + + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) + { + destinationBase = PorterDuffFunctions.ScreenSrcOver(backgroundBase, sourceBase, opacity); + destinationBase = ref Unsafe.Add(ref destinationBase, 1); + backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); + sourceBase = ref Unsafe.Add(ref sourceBase, 1); + } + + if (Numerics.Modulo2(destination.Length) != 0) + { + // Vector4 fits neatly in pairs. Any overlap has to be equal to 1. + int i = destination.Length - 1; + destination[i] = PorterDuffFunctions.ScreenSrcOver(background[i], source[i], amount); + } + } + else { - destination[i] = PorterDuffFunctions.ScreenSrcOver(background[i], source[i], amount); + for (int i = 0; i < destination.Length; i++) + { + destination[i] = PorterDuffFunctions.ScreenSrcOver(background[i], source[i], amount); + } } } /// protected override void BlendFunction(Span destination, ReadOnlySpan background, ReadOnlySpan source, ReadOnlySpan amount) { - for (int i = 0; i < destination.Length; i++) + if (Avx2.IsSupported && destination.Length >= 2) { - destination[i] = PorterDuffFunctions.ScreenSrcOver(background[i], source[i], Numerics.Clamp(amount[i], 0, 1)); + // Divide by 2 as 4 elements per Vector4 and 8 per Vector256 + ref Vector256 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); + ref Vector256 destinationLast = ref Unsafe.Add(ref destinationBase, (IntPtr)((uint)destination.Length / 2u)); + + ref Vector256 backgroundBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(background)); + ref Vector256 sourceBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(source)); + ref float amountBase = ref MemoryMarshal.GetReference(amount); + + Vector256 vOne = Vector256.Create(1F); + + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) + { + // We need to create a Vector256 containing the current and next amount values + // taking up each half of the Vector256 and then clamp them. + Vector256 opacity = Vector256.Create( + Vector128.Create(amountBase), + Vector128.Create(Unsafe.Add(ref amountBase, 1))); + opacity = Avx.Min(Avx.Max(Vector256.Zero, opacity), vOne); + + destinationBase = PorterDuffFunctions.ScreenSrcOver(backgroundBase, sourceBase, opacity); + destinationBase = ref Unsafe.Add(ref destinationBase, 1); + backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); + sourceBase = ref Unsafe.Add(ref sourceBase, 1); + amountBase = ref Unsafe.Add(ref amountBase, 2); + } + + if (Numerics.Modulo2(destination.Length) != 0) + { + // Vector4 fits neatly in pairs. Any overlap has to be equal to 1. + int i = destination.Length - 1; + destination[i] = PorterDuffFunctions.ScreenSrcOver(background[i], source[i], Numerics.Clamp(amount[i], 0, 1F)); + } + } + else + { + for (int i = 0; i < destination.Length; i++) + { + destination[i] = PorterDuffFunctions.ScreenSrcOver(background[i], source[i], Numerics.Clamp(amount[i], 0, 1F)); + } } } } @@ -917,18 +2462,85 @@ internal static class DefaultPixelBlenders protected override void BlendFunction(Span destination, ReadOnlySpan background, ReadOnlySpan source, float amount) { amount = Numerics.Clamp(amount, 0, 1); - for (int i = 0; i < destination.Length; i++) + + if (Avx2.IsSupported && destination.Length >= 2) + { + // Divide by 2 as 4 elements per Vector4 and 8 per Vector256 + ref Vector256 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); + ref Vector256 destinationLast = ref Unsafe.Add(ref destinationBase, (IntPtr)((uint)destination.Length / 2u)); + + ref Vector256 backgroundBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(background)); + ref Vector256 sourceBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(source)); + Vector256 opacity = Vector256.Create(amount); + + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) + { + destinationBase = PorterDuffFunctions.DarkenSrcOver(backgroundBase, sourceBase, opacity); + destinationBase = ref Unsafe.Add(ref destinationBase, 1); + backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); + sourceBase = ref Unsafe.Add(ref sourceBase, 1); + } + + if (Numerics.Modulo2(destination.Length) != 0) + { + // Vector4 fits neatly in pairs. Any overlap has to be equal to 1. + int i = destination.Length - 1; + destination[i] = PorterDuffFunctions.DarkenSrcOver(background[i], source[i], amount); + } + } + else { - destination[i] = PorterDuffFunctions.DarkenSrcOver(background[i], source[i], amount); + for (int i = 0; i < destination.Length; i++) + { + destination[i] = PorterDuffFunctions.DarkenSrcOver(background[i], source[i], amount); + } } } /// protected override void BlendFunction(Span destination, ReadOnlySpan background, ReadOnlySpan source, ReadOnlySpan amount) { - for (int i = 0; i < destination.Length; i++) + if (Avx2.IsSupported && destination.Length >= 2) + { + // Divide by 2 as 4 elements per Vector4 and 8 per Vector256 + ref Vector256 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); + ref Vector256 destinationLast = ref Unsafe.Add(ref destinationBase, (IntPtr)((uint)destination.Length / 2u)); + + ref Vector256 backgroundBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(background)); + ref Vector256 sourceBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(source)); + ref float amountBase = ref MemoryMarshal.GetReference(amount); + + Vector256 vOne = Vector256.Create(1F); + + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) + { + // We need to create a Vector256 containing the current and next amount values + // taking up each half of the Vector256 and then clamp them. + Vector256 opacity = Vector256.Create( + Vector128.Create(amountBase), + Vector128.Create(Unsafe.Add(ref amountBase, 1))); + opacity = Avx.Min(Avx.Max(Vector256.Zero, opacity), vOne); + + destinationBase = PorterDuffFunctions.DarkenSrcOver(backgroundBase, sourceBase, opacity); + destinationBase = ref Unsafe.Add(ref destinationBase, 1); + backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); + sourceBase = ref Unsafe.Add(ref sourceBase, 1); + amountBase = ref Unsafe.Add(ref amountBase, 2); + } + + if (Numerics.Modulo2(destination.Length) != 0) + { + // Vector4 fits neatly in pairs. Any overlap has to be equal to 1. + int i = destination.Length - 1; + destination[i] = PorterDuffFunctions.DarkenSrcOver(background[i], source[i], Numerics.Clamp(amount[i], 0, 1F)); + } + } + else { - destination[i] = PorterDuffFunctions.DarkenSrcOver(background[i], source[i], Numerics.Clamp(amount[i], 0, 1)); + for (int i = 0; i < destination.Length; i++) + { + destination[i] = PorterDuffFunctions.DarkenSrcOver(background[i], source[i], Numerics.Clamp(amount[i], 0, 1F)); + } } } } @@ -955,18 +2567,85 @@ internal static class DefaultPixelBlenders protected override void BlendFunction(Span destination, ReadOnlySpan background, ReadOnlySpan source, float amount) { amount = Numerics.Clamp(amount, 0, 1); - for (int i = 0; i < destination.Length; i++) + + if (Avx2.IsSupported && destination.Length >= 2) + { + // Divide by 2 as 4 elements per Vector4 and 8 per Vector256 + ref Vector256 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); + ref Vector256 destinationLast = ref Unsafe.Add(ref destinationBase, (IntPtr)((uint)destination.Length / 2u)); + + ref Vector256 backgroundBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(background)); + ref Vector256 sourceBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(source)); + Vector256 opacity = Vector256.Create(amount); + + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) + { + destinationBase = PorterDuffFunctions.LightenSrcOver(backgroundBase, sourceBase, opacity); + destinationBase = ref Unsafe.Add(ref destinationBase, 1); + backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); + sourceBase = ref Unsafe.Add(ref sourceBase, 1); + } + + if (Numerics.Modulo2(destination.Length) != 0) + { + // Vector4 fits neatly in pairs. Any overlap has to be equal to 1. + int i = destination.Length - 1; + destination[i] = PorterDuffFunctions.LightenSrcOver(background[i], source[i], amount); + } + } + else { - destination[i] = PorterDuffFunctions.LightenSrcOver(background[i], source[i], amount); + for (int i = 0; i < destination.Length; i++) + { + destination[i] = PorterDuffFunctions.LightenSrcOver(background[i], source[i], amount); + } } } /// protected override void BlendFunction(Span destination, ReadOnlySpan background, ReadOnlySpan source, ReadOnlySpan amount) { - for (int i = 0; i < destination.Length; i++) + if (Avx2.IsSupported && destination.Length >= 2) { - destination[i] = PorterDuffFunctions.LightenSrcOver(background[i], source[i], Numerics.Clamp(amount[i], 0, 1)); + // Divide by 2 as 4 elements per Vector4 and 8 per Vector256 + ref Vector256 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); + ref Vector256 destinationLast = ref Unsafe.Add(ref destinationBase, (IntPtr)((uint)destination.Length / 2u)); + + ref Vector256 backgroundBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(background)); + ref Vector256 sourceBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(source)); + ref float amountBase = ref MemoryMarshal.GetReference(amount); + + Vector256 vOne = Vector256.Create(1F); + + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) + { + // We need to create a Vector256 containing the current and next amount values + // taking up each half of the Vector256 and then clamp them. + Vector256 opacity = Vector256.Create( + Vector128.Create(amountBase), + Vector128.Create(Unsafe.Add(ref amountBase, 1))); + opacity = Avx.Min(Avx.Max(Vector256.Zero, opacity), vOne); + + destinationBase = PorterDuffFunctions.LightenSrcOver(backgroundBase, sourceBase, opacity); + destinationBase = ref Unsafe.Add(ref destinationBase, 1); + backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); + sourceBase = ref Unsafe.Add(ref sourceBase, 1); + amountBase = ref Unsafe.Add(ref amountBase, 2); + } + + if (Numerics.Modulo2(destination.Length) != 0) + { + // Vector4 fits neatly in pairs. Any overlap has to be equal to 1. + int i = destination.Length - 1; + destination[i] = PorterDuffFunctions.LightenSrcOver(background[i], source[i], Numerics.Clamp(amount[i], 0, 1F)); + } + } + else + { + for (int i = 0; i < destination.Length; i++) + { + destination[i] = PorterDuffFunctions.LightenSrcOver(background[i], source[i], Numerics.Clamp(amount[i], 0, 1F)); + } } } } @@ -993,18 +2672,85 @@ internal static class DefaultPixelBlenders protected override void BlendFunction(Span destination, ReadOnlySpan background, ReadOnlySpan source, float amount) { amount = Numerics.Clamp(amount, 0, 1); - for (int i = 0; i < destination.Length; i++) + + if (Avx2.IsSupported && destination.Length >= 2) + { + // Divide by 2 as 4 elements per Vector4 and 8 per Vector256 + ref Vector256 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); + ref Vector256 destinationLast = ref Unsafe.Add(ref destinationBase, (IntPtr)((uint)destination.Length / 2u)); + + ref Vector256 backgroundBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(background)); + ref Vector256 sourceBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(source)); + Vector256 opacity = Vector256.Create(amount); + + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) + { + destinationBase = PorterDuffFunctions.OverlaySrcOver(backgroundBase, sourceBase, opacity); + destinationBase = ref Unsafe.Add(ref destinationBase, 1); + backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); + sourceBase = ref Unsafe.Add(ref sourceBase, 1); + } + + if (Numerics.Modulo2(destination.Length) != 0) + { + // Vector4 fits neatly in pairs. Any overlap has to be equal to 1. + int i = destination.Length - 1; + destination[i] = PorterDuffFunctions.OverlaySrcOver(background[i], source[i], amount); + } + } + else { - destination[i] = PorterDuffFunctions.OverlaySrcOver(background[i], source[i], amount); + for (int i = 0; i < destination.Length; i++) + { + destination[i] = PorterDuffFunctions.OverlaySrcOver(background[i], source[i], amount); + } } } /// protected override void BlendFunction(Span destination, ReadOnlySpan background, ReadOnlySpan source, ReadOnlySpan amount) { - for (int i = 0; i < destination.Length; i++) + if (Avx2.IsSupported && destination.Length >= 2) { - destination[i] = PorterDuffFunctions.OverlaySrcOver(background[i], source[i], Numerics.Clamp(amount[i], 0, 1)); + // Divide by 2 as 4 elements per Vector4 and 8 per Vector256 + ref Vector256 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); + ref Vector256 destinationLast = ref Unsafe.Add(ref destinationBase, (IntPtr)((uint)destination.Length / 2u)); + + ref Vector256 backgroundBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(background)); + ref Vector256 sourceBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(source)); + ref float amountBase = ref MemoryMarshal.GetReference(amount); + + Vector256 vOne = Vector256.Create(1F); + + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) + { + // We need to create a Vector256 containing the current and next amount values + // taking up each half of the Vector256 and then clamp them. + Vector256 opacity = Vector256.Create( + Vector128.Create(amountBase), + Vector128.Create(Unsafe.Add(ref amountBase, 1))); + opacity = Avx.Min(Avx.Max(Vector256.Zero, opacity), vOne); + + destinationBase = PorterDuffFunctions.OverlaySrcOver(backgroundBase, sourceBase, opacity); + destinationBase = ref Unsafe.Add(ref destinationBase, 1); + backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); + sourceBase = ref Unsafe.Add(ref sourceBase, 1); + amountBase = ref Unsafe.Add(ref amountBase, 2); + } + + if (Numerics.Modulo2(destination.Length) != 0) + { + // Vector4 fits neatly in pairs. Any overlap has to be equal to 1. + int i = destination.Length - 1; + destination[i] = PorterDuffFunctions.OverlaySrcOver(background[i], source[i], Numerics.Clamp(amount[i], 0, 1F)); + } + } + else + { + for (int i = 0; i < destination.Length; i++) + { + destination[i] = PorterDuffFunctions.OverlaySrcOver(background[i], source[i], Numerics.Clamp(amount[i], 0, 1F)); + } } } } @@ -1031,18 +2777,85 @@ internal static class DefaultPixelBlenders protected override void BlendFunction(Span destination, ReadOnlySpan background, ReadOnlySpan source, float amount) { amount = Numerics.Clamp(amount, 0, 1); - for (int i = 0; i < destination.Length; i++) + + if (Avx2.IsSupported && destination.Length >= 2) { - destination[i] = PorterDuffFunctions.HardLightSrcOver(background[i], source[i], amount); + // Divide by 2 as 4 elements per Vector4 and 8 per Vector256 + ref Vector256 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); + ref Vector256 destinationLast = ref Unsafe.Add(ref destinationBase, (IntPtr)((uint)destination.Length / 2u)); + + ref Vector256 backgroundBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(background)); + ref Vector256 sourceBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(source)); + Vector256 opacity = Vector256.Create(amount); + + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) + { + destinationBase = PorterDuffFunctions.HardLightSrcOver(backgroundBase, sourceBase, opacity); + destinationBase = ref Unsafe.Add(ref destinationBase, 1); + backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); + sourceBase = ref Unsafe.Add(ref sourceBase, 1); + } + + if (Numerics.Modulo2(destination.Length) != 0) + { + // Vector4 fits neatly in pairs. Any overlap has to be equal to 1. + int i = destination.Length - 1; + destination[i] = PorterDuffFunctions.HardLightSrcOver(background[i], source[i], amount); + } + } + else + { + for (int i = 0; i < destination.Length; i++) + { + destination[i] = PorterDuffFunctions.HardLightSrcOver(background[i], source[i], amount); + } } } /// protected override void BlendFunction(Span destination, ReadOnlySpan background, ReadOnlySpan source, ReadOnlySpan amount) { - for (int i = 0; i < destination.Length; i++) + if (Avx2.IsSupported && destination.Length >= 2) + { + // Divide by 2 as 4 elements per Vector4 and 8 per Vector256 + ref Vector256 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); + ref Vector256 destinationLast = ref Unsafe.Add(ref destinationBase, (IntPtr)((uint)destination.Length / 2u)); + + ref Vector256 backgroundBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(background)); + ref Vector256 sourceBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(source)); + ref float amountBase = ref MemoryMarshal.GetReference(amount); + + Vector256 vOne = Vector256.Create(1F); + + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) + { + // We need to create a Vector256 containing the current and next amount values + // taking up each half of the Vector256 and then clamp them. + Vector256 opacity = Vector256.Create( + Vector128.Create(amountBase), + Vector128.Create(Unsafe.Add(ref amountBase, 1))); + opacity = Avx.Min(Avx.Max(Vector256.Zero, opacity), vOne); + + destinationBase = PorterDuffFunctions.HardLightSrcOver(backgroundBase, sourceBase, opacity); + destinationBase = ref Unsafe.Add(ref destinationBase, 1); + backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); + sourceBase = ref Unsafe.Add(ref sourceBase, 1); + amountBase = ref Unsafe.Add(ref amountBase, 2); + } + + if (Numerics.Modulo2(destination.Length) != 0) + { + // Vector4 fits neatly in pairs. Any overlap has to be equal to 1. + int i = destination.Length - 1; + destination[i] = PorterDuffFunctions.HardLightSrcOver(background[i], source[i], Numerics.Clamp(amount[i], 0, 1F)); + } + } + else { - destination[i] = PorterDuffFunctions.HardLightSrcOver(background[i], source[i], Numerics.Clamp(amount[i], 0, 1)); + for (int i = 0; i < destination.Length; i++) + { + destination[i] = PorterDuffFunctions.HardLightSrcOver(background[i], source[i], Numerics.Clamp(amount[i], 0, 1F)); + } } } } @@ -1069,18 +2882,85 @@ internal static class DefaultPixelBlenders protected override void BlendFunction(Span destination, ReadOnlySpan background, ReadOnlySpan source, float amount) { amount = Numerics.Clamp(amount, 0, 1); - for (int i = 0; i < destination.Length; i++) + + if (Avx2.IsSupported && destination.Length >= 2) + { + // Divide by 2 as 4 elements per Vector4 and 8 per Vector256 + ref Vector256 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); + ref Vector256 destinationLast = ref Unsafe.Add(ref destinationBase, (IntPtr)((uint)destination.Length / 2u)); + + ref Vector256 backgroundBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(background)); + ref Vector256 sourceBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(source)); + Vector256 opacity = Vector256.Create(amount); + + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) + { + destinationBase = PorterDuffFunctions.NormalSrcIn(backgroundBase, sourceBase, opacity); + destinationBase = ref Unsafe.Add(ref destinationBase, 1); + backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); + sourceBase = ref Unsafe.Add(ref sourceBase, 1); + } + + if (Numerics.Modulo2(destination.Length) != 0) + { + // Vector4 fits neatly in pairs. Any overlap has to be equal to 1. + int i = destination.Length - 1; + destination[i] = PorterDuffFunctions.NormalSrcIn(background[i], source[i], amount); + } + } + else { - destination[i] = PorterDuffFunctions.NormalSrcIn(background[i], source[i], amount); + for (int i = 0; i < destination.Length; i++) + { + destination[i] = PorterDuffFunctions.NormalSrcIn(background[i], source[i], amount); + } } } /// protected override void BlendFunction(Span destination, ReadOnlySpan background, ReadOnlySpan source, ReadOnlySpan amount) { - for (int i = 0; i < destination.Length; i++) + if (Avx2.IsSupported && destination.Length >= 2) { - destination[i] = PorterDuffFunctions.NormalSrcIn(background[i], source[i], Numerics.Clamp(amount[i], 0, 1)); + // Divide by 2 as 4 elements per Vector4 and 8 per Vector256 + ref Vector256 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); + ref Vector256 destinationLast = ref Unsafe.Add(ref destinationBase, (IntPtr)((uint)destination.Length / 2u)); + + ref Vector256 backgroundBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(background)); + ref Vector256 sourceBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(source)); + ref float amountBase = ref MemoryMarshal.GetReference(amount); + + Vector256 vOne = Vector256.Create(1F); + + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) + { + // We need to create a Vector256 containing the current and next amount values + // taking up each half of the Vector256 and then clamp them. + Vector256 opacity = Vector256.Create( + Vector128.Create(amountBase), + Vector128.Create(Unsafe.Add(ref amountBase, 1))); + opacity = Avx.Min(Avx.Max(Vector256.Zero, opacity), vOne); + + destinationBase = PorterDuffFunctions.NormalSrcIn(backgroundBase, sourceBase, opacity); + destinationBase = ref Unsafe.Add(ref destinationBase, 1); + backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); + sourceBase = ref Unsafe.Add(ref sourceBase, 1); + amountBase = ref Unsafe.Add(ref amountBase, 2); + } + + if (Numerics.Modulo2(destination.Length) != 0) + { + // Vector4 fits neatly in pairs. Any overlap has to be equal to 1. + int i = destination.Length - 1; + destination[i] = PorterDuffFunctions.NormalSrcIn(background[i], source[i], Numerics.Clamp(amount[i], 0, 1F)); + } + } + else + { + for (int i = 0; i < destination.Length; i++) + { + destination[i] = PorterDuffFunctions.NormalSrcIn(background[i], source[i], Numerics.Clamp(amount[i], 0, 1F)); + } } } } @@ -1107,18 +2987,85 @@ internal static class DefaultPixelBlenders protected override void BlendFunction(Span destination, ReadOnlySpan background, ReadOnlySpan source, float amount) { amount = Numerics.Clamp(amount, 0, 1); - for (int i = 0; i < destination.Length; i++) + + if (Avx2.IsSupported && destination.Length >= 2) + { + // Divide by 2 as 4 elements per Vector4 and 8 per Vector256 + ref Vector256 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); + ref Vector256 destinationLast = ref Unsafe.Add(ref destinationBase, (IntPtr)((uint)destination.Length / 2u)); + + ref Vector256 backgroundBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(background)); + ref Vector256 sourceBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(source)); + Vector256 opacity = Vector256.Create(amount); + + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) + { + destinationBase = PorterDuffFunctions.MultiplySrcIn(backgroundBase, sourceBase, opacity); + destinationBase = ref Unsafe.Add(ref destinationBase, 1); + backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); + sourceBase = ref Unsafe.Add(ref sourceBase, 1); + } + + if (Numerics.Modulo2(destination.Length) != 0) + { + // Vector4 fits neatly in pairs. Any overlap has to be equal to 1. + int i = destination.Length - 1; + destination[i] = PorterDuffFunctions.MultiplySrcIn(background[i], source[i], amount); + } + } + else { - destination[i] = PorterDuffFunctions.MultiplySrcIn(background[i], source[i], amount); + for (int i = 0; i < destination.Length; i++) + { + destination[i] = PorterDuffFunctions.MultiplySrcIn(background[i], source[i], amount); + } } } /// protected override void BlendFunction(Span destination, ReadOnlySpan background, ReadOnlySpan source, ReadOnlySpan amount) { - for (int i = 0; i < destination.Length; i++) + if (Avx2.IsSupported && destination.Length >= 2) + { + // Divide by 2 as 4 elements per Vector4 and 8 per Vector256 + ref Vector256 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); + ref Vector256 destinationLast = ref Unsafe.Add(ref destinationBase, (IntPtr)((uint)destination.Length / 2u)); + + ref Vector256 backgroundBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(background)); + ref Vector256 sourceBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(source)); + ref float amountBase = ref MemoryMarshal.GetReference(amount); + + Vector256 vOne = Vector256.Create(1F); + + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) + { + // We need to create a Vector256 containing the current and next amount values + // taking up each half of the Vector256 and then clamp them. + Vector256 opacity = Vector256.Create( + Vector128.Create(amountBase), + Vector128.Create(Unsafe.Add(ref amountBase, 1))); + opacity = Avx.Min(Avx.Max(Vector256.Zero, opacity), vOne); + + destinationBase = PorterDuffFunctions.MultiplySrcIn(backgroundBase, sourceBase, opacity); + destinationBase = ref Unsafe.Add(ref destinationBase, 1); + backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); + sourceBase = ref Unsafe.Add(ref sourceBase, 1); + amountBase = ref Unsafe.Add(ref amountBase, 2); + } + + if (Numerics.Modulo2(destination.Length) != 0) + { + // Vector4 fits neatly in pairs. Any overlap has to be equal to 1. + int i = destination.Length - 1; + destination[i] = PorterDuffFunctions.MultiplySrcIn(background[i], source[i], Numerics.Clamp(amount[i], 0, 1F)); + } + } + else { - destination[i] = PorterDuffFunctions.MultiplySrcIn(background[i], source[i], Numerics.Clamp(amount[i], 0, 1)); + for (int i = 0; i < destination.Length; i++) + { + destination[i] = PorterDuffFunctions.MultiplySrcIn(background[i], source[i], Numerics.Clamp(amount[i], 0, 1F)); + } } } } @@ -1145,18 +3092,85 @@ internal static class DefaultPixelBlenders protected override void BlendFunction(Span destination, ReadOnlySpan background, ReadOnlySpan source, float amount) { amount = Numerics.Clamp(amount, 0, 1); - for (int i = 0; i < destination.Length; i++) + + if (Avx2.IsSupported && destination.Length >= 2) + { + // Divide by 2 as 4 elements per Vector4 and 8 per Vector256 + ref Vector256 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); + ref Vector256 destinationLast = ref Unsafe.Add(ref destinationBase, (IntPtr)((uint)destination.Length / 2u)); + + ref Vector256 backgroundBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(background)); + ref Vector256 sourceBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(source)); + Vector256 opacity = Vector256.Create(amount); + + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) + { + destinationBase = PorterDuffFunctions.AddSrcIn(backgroundBase, sourceBase, opacity); + destinationBase = ref Unsafe.Add(ref destinationBase, 1); + backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); + sourceBase = ref Unsafe.Add(ref sourceBase, 1); + } + + if (Numerics.Modulo2(destination.Length) != 0) + { + // Vector4 fits neatly in pairs. Any overlap has to be equal to 1. + int i = destination.Length - 1; + destination[i] = PorterDuffFunctions.AddSrcIn(background[i], source[i], amount); + } + } + else { - destination[i] = PorterDuffFunctions.AddSrcIn(background[i], source[i], amount); + for (int i = 0; i < destination.Length; i++) + { + destination[i] = PorterDuffFunctions.AddSrcIn(background[i], source[i], amount); + } } } /// protected override void BlendFunction(Span destination, ReadOnlySpan background, ReadOnlySpan source, ReadOnlySpan amount) { - for (int i = 0; i < destination.Length; i++) + if (Avx2.IsSupported && destination.Length >= 2) + { + // Divide by 2 as 4 elements per Vector4 and 8 per Vector256 + ref Vector256 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); + ref Vector256 destinationLast = ref Unsafe.Add(ref destinationBase, (IntPtr)((uint)destination.Length / 2u)); + + ref Vector256 backgroundBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(background)); + ref Vector256 sourceBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(source)); + ref float amountBase = ref MemoryMarshal.GetReference(amount); + + Vector256 vOne = Vector256.Create(1F); + + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) + { + // We need to create a Vector256 containing the current and next amount values + // taking up each half of the Vector256 and then clamp them. + Vector256 opacity = Vector256.Create( + Vector128.Create(amountBase), + Vector128.Create(Unsafe.Add(ref amountBase, 1))); + opacity = Avx.Min(Avx.Max(Vector256.Zero, opacity), vOne); + + destinationBase = PorterDuffFunctions.AddSrcIn(backgroundBase, sourceBase, opacity); + destinationBase = ref Unsafe.Add(ref destinationBase, 1); + backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); + sourceBase = ref Unsafe.Add(ref sourceBase, 1); + amountBase = ref Unsafe.Add(ref amountBase, 2); + } + + if (Numerics.Modulo2(destination.Length) != 0) + { + // Vector4 fits neatly in pairs. Any overlap has to be equal to 1. + int i = destination.Length - 1; + destination[i] = PorterDuffFunctions.AddSrcIn(background[i], source[i], Numerics.Clamp(amount[i], 0, 1F)); + } + } + else { - destination[i] = PorterDuffFunctions.AddSrcIn(background[i], source[i], Numerics.Clamp(amount[i], 0, 1)); + for (int i = 0; i < destination.Length; i++) + { + destination[i] = PorterDuffFunctions.AddSrcIn(background[i], source[i], Numerics.Clamp(amount[i], 0, 1F)); + } } } } @@ -1183,18 +3197,85 @@ internal static class DefaultPixelBlenders protected override void BlendFunction(Span destination, ReadOnlySpan background, ReadOnlySpan source, float amount) { amount = Numerics.Clamp(amount, 0, 1); - for (int i = 0; i < destination.Length; i++) + + if (Avx2.IsSupported && destination.Length >= 2) { - destination[i] = PorterDuffFunctions.SubtractSrcIn(background[i], source[i], amount); + // Divide by 2 as 4 elements per Vector4 and 8 per Vector256 + ref Vector256 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); + ref Vector256 destinationLast = ref Unsafe.Add(ref destinationBase, (IntPtr)((uint)destination.Length / 2u)); + + ref Vector256 backgroundBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(background)); + ref Vector256 sourceBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(source)); + Vector256 opacity = Vector256.Create(amount); + + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) + { + destinationBase = PorterDuffFunctions.SubtractSrcIn(backgroundBase, sourceBase, opacity); + destinationBase = ref Unsafe.Add(ref destinationBase, 1); + backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); + sourceBase = ref Unsafe.Add(ref sourceBase, 1); + } + + if (Numerics.Modulo2(destination.Length) != 0) + { + // Vector4 fits neatly in pairs. Any overlap has to be equal to 1. + int i = destination.Length - 1; + destination[i] = PorterDuffFunctions.SubtractSrcIn(background[i], source[i], amount); + } + } + else + { + for (int i = 0; i < destination.Length; i++) + { + destination[i] = PorterDuffFunctions.SubtractSrcIn(background[i], source[i], amount); + } } } /// protected override void BlendFunction(Span destination, ReadOnlySpan background, ReadOnlySpan source, ReadOnlySpan amount) { - for (int i = 0; i < destination.Length; i++) + if (Avx2.IsSupported && destination.Length >= 2) + { + // Divide by 2 as 4 elements per Vector4 and 8 per Vector256 + ref Vector256 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); + ref Vector256 destinationLast = ref Unsafe.Add(ref destinationBase, (IntPtr)((uint)destination.Length / 2u)); + + ref Vector256 backgroundBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(background)); + ref Vector256 sourceBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(source)); + ref float amountBase = ref MemoryMarshal.GetReference(amount); + + Vector256 vOne = Vector256.Create(1F); + + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) + { + // We need to create a Vector256 containing the current and next amount values + // taking up each half of the Vector256 and then clamp them. + Vector256 opacity = Vector256.Create( + Vector128.Create(amountBase), + Vector128.Create(Unsafe.Add(ref amountBase, 1))); + opacity = Avx.Min(Avx.Max(Vector256.Zero, opacity), vOne); + + destinationBase = PorterDuffFunctions.SubtractSrcIn(backgroundBase, sourceBase, opacity); + destinationBase = ref Unsafe.Add(ref destinationBase, 1); + backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); + sourceBase = ref Unsafe.Add(ref sourceBase, 1); + amountBase = ref Unsafe.Add(ref amountBase, 2); + } + + if (Numerics.Modulo2(destination.Length) != 0) + { + // Vector4 fits neatly in pairs. Any overlap has to be equal to 1. + int i = destination.Length - 1; + destination[i] = PorterDuffFunctions.SubtractSrcIn(background[i], source[i], Numerics.Clamp(amount[i], 0, 1F)); + } + } + else { - destination[i] = PorterDuffFunctions.SubtractSrcIn(background[i], source[i], Numerics.Clamp(amount[i], 0, 1)); + for (int i = 0; i < destination.Length; i++) + { + destination[i] = PorterDuffFunctions.SubtractSrcIn(background[i], source[i], Numerics.Clamp(amount[i], 0, 1F)); + } } } } @@ -1221,18 +3302,85 @@ internal static class DefaultPixelBlenders protected override void BlendFunction(Span destination, ReadOnlySpan background, ReadOnlySpan source, float amount) { amount = Numerics.Clamp(amount, 0, 1); - for (int i = 0; i < destination.Length; i++) + + if (Avx2.IsSupported && destination.Length >= 2) + { + // Divide by 2 as 4 elements per Vector4 and 8 per Vector256 + ref Vector256 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); + ref Vector256 destinationLast = ref Unsafe.Add(ref destinationBase, (IntPtr)((uint)destination.Length / 2u)); + + ref Vector256 backgroundBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(background)); + ref Vector256 sourceBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(source)); + Vector256 opacity = Vector256.Create(amount); + + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) + { + destinationBase = PorterDuffFunctions.ScreenSrcIn(backgroundBase, sourceBase, opacity); + destinationBase = ref Unsafe.Add(ref destinationBase, 1); + backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); + sourceBase = ref Unsafe.Add(ref sourceBase, 1); + } + + if (Numerics.Modulo2(destination.Length) != 0) + { + // Vector4 fits neatly in pairs. Any overlap has to be equal to 1. + int i = destination.Length - 1; + destination[i] = PorterDuffFunctions.ScreenSrcIn(background[i], source[i], amount); + } + } + else { - destination[i] = PorterDuffFunctions.ScreenSrcIn(background[i], source[i], amount); + for (int i = 0; i < destination.Length; i++) + { + destination[i] = PorterDuffFunctions.ScreenSrcIn(background[i], source[i], amount); + } } } /// protected override void BlendFunction(Span destination, ReadOnlySpan background, ReadOnlySpan source, ReadOnlySpan amount) { - for (int i = 0; i < destination.Length; i++) + if (Avx2.IsSupported && destination.Length >= 2) + { + // Divide by 2 as 4 elements per Vector4 and 8 per Vector256 + ref Vector256 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); + ref Vector256 destinationLast = ref Unsafe.Add(ref destinationBase, (IntPtr)((uint)destination.Length / 2u)); + + ref Vector256 backgroundBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(background)); + ref Vector256 sourceBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(source)); + ref float amountBase = ref MemoryMarshal.GetReference(amount); + + Vector256 vOne = Vector256.Create(1F); + + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) + { + // We need to create a Vector256 containing the current and next amount values + // taking up each half of the Vector256 and then clamp them. + Vector256 opacity = Vector256.Create( + Vector128.Create(amountBase), + Vector128.Create(Unsafe.Add(ref amountBase, 1))); + opacity = Avx.Min(Avx.Max(Vector256.Zero, opacity), vOne); + + destinationBase = PorterDuffFunctions.ScreenSrcIn(backgroundBase, sourceBase, opacity); + destinationBase = ref Unsafe.Add(ref destinationBase, 1); + backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); + sourceBase = ref Unsafe.Add(ref sourceBase, 1); + amountBase = ref Unsafe.Add(ref amountBase, 2); + } + + if (Numerics.Modulo2(destination.Length) != 0) + { + // Vector4 fits neatly in pairs. Any overlap has to be equal to 1. + int i = destination.Length - 1; + destination[i] = PorterDuffFunctions.ScreenSrcIn(background[i], source[i], Numerics.Clamp(amount[i], 0, 1F)); + } + } + else { - destination[i] = PorterDuffFunctions.ScreenSrcIn(background[i], source[i], Numerics.Clamp(amount[i], 0, 1)); + for (int i = 0; i < destination.Length; i++) + { + destination[i] = PorterDuffFunctions.ScreenSrcIn(background[i], source[i], Numerics.Clamp(amount[i], 0, 1F)); + } } } } @@ -1259,18 +3407,85 @@ internal static class DefaultPixelBlenders protected override void BlendFunction(Span destination, ReadOnlySpan background, ReadOnlySpan source, float amount) { amount = Numerics.Clamp(amount, 0, 1); - for (int i = 0; i < destination.Length; i++) + + if (Avx2.IsSupported && destination.Length >= 2) { - destination[i] = PorterDuffFunctions.DarkenSrcIn(background[i], source[i], amount); + // Divide by 2 as 4 elements per Vector4 and 8 per Vector256 + ref Vector256 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); + ref Vector256 destinationLast = ref Unsafe.Add(ref destinationBase, (IntPtr)((uint)destination.Length / 2u)); + + ref Vector256 backgroundBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(background)); + ref Vector256 sourceBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(source)); + Vector256 opacity = Vector256.Create(amount); + + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) + { + destinationBase = PorterDuffFunctions.DarkenSrcIn(backgroundBase, sourceBase, opacity); + destinationBase = ref Unsafe.Add(ref destinationBase, 1); + backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); + sourceBase = ref Unsafe.Add(ref sourceBase, 1); + } + + if (Numerics.Modulo2(destination.Length) != 0) + { + // Vector4 fits neatly in pairs. Any overlap has to be equal to 1. + int i = destination.Length - 1; + destination[i] = PorterDuffFunctions.DarkenSrcIn(background[i], source[i], amount); + } + } + else + { + for (int i = 0; i < destination.Length; i++) + { + destination[i] = PorterDuffFunctions.DarkenSrcIn(background[i], source[i], amount); + } } } /// protected override void BlendFunction(Span destination, ReadOnlySpan background, ReadOnlySpan source, ReadOnlySpan amount) { - for (int i = 0; i < destination.Length; i++) + if (Avx2.IsSupported && destination.Length >= 2) + { + // Divide by 2 as 4 elements per Vector4 and 8 per Vector256 + ref Vector256 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); + ref Vector256 destinationLast = ref Unsafe.Add(ref destinationBase, (IntPtr)((uint)destination.Length / 2u)); + + ref Vector256 backgroundBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(background)); + ref Vector256 sourceBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(source)); + ref float amountBase = ref MemoryMarshal.GetReference(amount); + + Vector256 vOne = Vector256.Create(1F); + + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) + { + // We need to create a Vector256 containing the current and next amount values + // taking up each half of the Vector256 and then clamp them. + Vector256 opacity = Vector256.Create( + Vector128.Create(amountBase), + Vector128.Create(Unsafe.Add(ref amountBase, 1))); + opacity = Avx.Min(Avx.Max(Vector256.Zero, opacity), vOne); + + destinationBase = PorterDuffFunctions.DarkenSrcIn(backgroundBase, sourceBase, opacity); + destinationBase = ref Unsafe.Add(ref destinationBase, 1); + backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); + sourceBase = ref Unsafe.Add(ref sourceBase, 1); + amountBase = ref Unsafe.Add(ref amountBase, 2); + } + + if (Numerics.Modulo2(destination.Length) != 0) + { + // Vector4 fits neatly in pairs. Any overlap has to be equal to 1. + int i = destination.Length - 1; + destination[i] = PorterDuffFunctions.DarkenSrcIn(background[i], source[i], Numerics.Clamp(amount[i], 0, 1F)); + } + } + else { - destination[i] = PorterDuffFunctions.DarkenSrcIn(background[i], source[i], Numerics.Clamp(amount[i], 0, 1)); + for (int i = 0; i < destination.Length; i++) + { + destination[i] = PorterDuffFunctions.DarkenSrcIn(background[i], source[i], Numerics.Clamp(amount[i], 0, 1F)); + } } } } @@ -1297,18 +3512,85 @@ internal static class DefaultPixelBlenders protected override void BlendFunction(Span destination, ReadOnlySpan background, ReadOnlySpan source, float amount) { amount = Numerics.Clamp(amount, 0, 1); - for (int i = 0; i < destination.Length; i++) + + if (Avx2.IsSupported && destination.Length >= 2) + { + // Divide by 2 as 4 elements per Vector4 and 8 per Vector256 + ref Vector256 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); + ref Vector256 destinationLast = ref Unsafe.Add(ref destinationBase, (IntPtr)((uint)destination.Length / 2u)); + + ref Vector256 backgroundBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(background)); + ref Vector256 sourceBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(source)); + Vector256 opacity = Vector256.Create(amount); + + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) + { + destinationBase = PorterDuffFunctions.LightenSrcIn(backgroundBase, sourceBase, opacity); + destinationBase = ref Unsafe.Add(ref destinationBase, 1); + backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); + sourceBase = ref Unsafe.Add(ref sourceBase, 1); + } + + if (Numerics.Modulo2(destination.Length) != 0) + { + // Vector4 fits neatly in pairs. Any overlap has to be equal to 1. + int i = destination.Length - 1; + destination[i] = PorterDuffFunctions.LightenSrcIn(background[i], source[i], amount); + } + } + else { - destination[i] = PorterDuffFunctions.LightenSrcIn(background[i], source[i], amount); + for (int i = 0; i < destination.Length; i++) + { + destination[i] = PorterDuffFunctions.LightenSrcIn(background[i], source[i], amount); + } } } /// protected override void BlendFunction(Span destination, ReadOnlySpan background, ReadOnlySpan source, ReadOnlySpan amount) { - for (int i = 0; i < destination.Length; i++) + if (Avx2.IsSupported && destination.Length >= 2) + { + // Divide by 2 as 4 elements per Vector4 and 8 per Vector256 + ref Vector256 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); + ref Vector256 destinationLast = ref Unsafe.Add(ref destinationBase, (IntPtr)((uint)destination.Length / 2u)); + + ref Vector256 backgroundBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(background)); + ref Vector256 sourceBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(source)); + ref float amountBase = ref MemoryMarshal.GetReference(amount); + + Vector256 vOne = Vector256.Create(1F); + + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) + { + // We need to create a Vector256 containing the current and next amount values + // taking up each half of the Vector256 and then clamp them. + Vector256 opacity = Vector256.Create( + Vector128.Create(amountBase), + Vector128.Create(Unsafe.Add(ref amountBase, 1))); + opacity = Avx.Min(Avx.Max(Vector256.Zero, opacity), vOne); + + destinationBase = PorterDuffFunctions.LightenSrcIn(backgroundBase, sourceBase, opacity); + destinationBase = ref Unsafe.Add(ref destinationBase, 1); + backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); + sourceBase = ref Unsafe.Add(ref sourceBase, 1); + amountBase = ref Unsafe.Add(ref amountBase, 2); + } + + if (Numerics.Modulo2(destination.Length) != 0) + { + // Vector4 fits neatly in pairs. Any overlap has to be equal to 1. + int i = destination.Length - 1; + destination[i] = PorterDuffFunctions.LightenSrcIn(background[i], source[i], Numerics.Clamp(amount[i], 0, 1F)); + } + } + else { - destination[i] = PorterDuffFunctions.LightenSrcIn(background[i], source[i], Numerics.Clamp(amount[i], 0, 1)); + for (int i = 0; i < destination.Length; i++) + { + destination[i] = PorterDuffFunctions.LightenSrcIn(background[i], source[i], Numerics.Clamp(amount[i], 0, 1F)); + } } } } @@ -1335,18 +3617,85 @@ internal static class DefaultPixelBlenders protected override void BlendFunction(Span destination, ReadOnlySpan background, ReadOnlySpan source, float amount) { amount = Numerics.Clamp(amount, 0, 1); - for (int i = 0; i < destination.Length; i++) + + if (Avx2.IsSupported && destination.Length >= 2) + { + // Divide by 2 as 4 elements per Vector4 and 8 per Vector256 + ref Vector256 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); + ref Vector256 destinationLast = ref Unsafe.Add(ref destinationBase, (IntPtr)((uint)destination.Length / 2u)); + + ref Vector256 backgroundBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(background)); + ref Vector256 sourceBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(source)); + Vector256 opacity = Vector256.Create(amount); + + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) + { + destinationBase = PorterDuffFunctions.OverlaySrcIn(backgroundBase, sourceBase, opacity); + destinationBase = ref Unsafe.Add(ref destinationBase, 1); + backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); + sourceBase = ref Unsafe.Add(ref sourceBase, 1); + } + + if (Numerics.Modulo2(destination.Length) != 0) + { + // Vector4 fits neatly in pairs. Any overlap has to be equal to 1. + int i = destination.Length - 1; + destination[i] = PorterDuffFunctions.OverlaySrcIn(background[i], source[i], amount); + } + } + else { - destination[i] = PorterDuffFunctions.OverlaySrcIn(background[i], source[i], amount); + for (int i = 0; i < destination.Length; i++) + { + destination[i] = PorterDuffFunctions.OverlaySrcIn(background[i], source[i], amount); + } } } /// protected override void BlendFunction(Span destination, ReadOnlySpan background, ReadOnlySpan source, ReadOnlySpan amount) { - for (int i = 0; i < destination.Length; i++) + if (Avx2.IsSupported && destination.Length >= 2) + { + // Divide by 2 as 4 elements per Vector4 and 8 per Vector256 + ref Vector256 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); + ref Vector256 destinationLast = ref Unsafe.Add(ref destinationBase, (IntPtr)((uint)destination.Length / 2u)); + + ref Vector256 backgroundBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(background)); + ref Vector256 sourceBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(source)); + ref float amountBase = ref MemoryMarshal.GetReference(amount); + + Vector256 vOne = Vector256.Create(1F); + + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) + { + // We need to create a Vector256 containing the current and next amount values + // taking up each half of the Vector256 and then clamp them. + Vector256 opacity = Vector256.Create( + Vector128.Create(amountBase), + Vector128.Create(Unsafe.Add(ref amountBase, 1))); + opacity = Avx.Min(Avx.Max(Vector256.Zero, opacity), vOne); + + destinationBase = PorterDuffFunctions.OverlaySrcIn(backgroundBase, sourceBase, opacity); + destinationBase = ref Unsafe.Add(ref destinationBase, 1); + backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); + sourceBase = ref Unsafe.Add(ref sourceBase, 1); + amountBase = ref Unsafe.Add(ref amountBase, 2); + } + + if (Numerics.Modulo2(destination.Length) != 0) + { + // Vector4 fits neatly in pairs. Any overlap has to be equal to 1. + int i = destination.Length - 1; + destination[i] = PorterDuffFunctions.OverlaySrcIn(background[i], source[i], Numerics.Clamp(amount[i], 0, 1F)); + } + } + else { - destination[i] = PorterDuffFunctions.OverlaySrcIn(background[i], source[i], Numerics.Clamp(amount[i], 0, 1)); + for (int i = 0; i < destination.Length; i++) + { + destination[i] = PorterDuffFunctions.OverlaySrcIn(background[i], source[i], Numerics.Clamp(amount[i], 0, 1F)); + } } } } @@ -1373,18 +3722,85 @@ internal static class DefaultPixelBlenders protected override void BlendFunction(Span destination, ReadOnlySpan background, ReadOnlySpan source, float amount) { amount = Numerics.Clamp(amount, 0, 1); - for (int i = 0; i < destination.Length; i++) + + if (Avx2.IsSupported && destination.Length >= 2) + { + // Divide by 2 as 4 elements per Vector4 and 8 per Vector256 + ref Vector256 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); + ref Vector256 destinationLast = ref Unsafe.Add(ref destinationBase, (IntPtr)((uint)destination.Length / 2u)); + + ref Vector256 backgroundBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(background)); + ref Vector256 sourceBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(source)); + Vector256 opacity = Vector256.Create(amount); + + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) + { + destinationBase = PorterDuffFunctions.HardLightSrcIn(backgroundBase, sourceBase, opacity); + destinationBase = ref Unsafe.Add(ref destinationBase, 1); + backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); + sourceBase = ref Unsafe.Add(ref sourceBase, 1); + } + + if (Numerics.Modulo2(destination.Length) != 0) + { + // Vector4 fits neatly in pairs. Any overlap has to be equal to 1. + int i = destination.Length - 1; + destination[i] = PorterDuffFunctions.HardLightSrcIn(background[i], source[i], amount); + } + } + else { - destination[i] = PorterDuffFunctions.HardLightSrcIn(background[i], source[i], amount); + for (int i = 0; i < destination.Length; i++) + { + destination[i] = PorterDuffFunctions.HardLightSrcIn(background[i], source[i], amount); + } } } /// protected override void BlendFunction(Span destination, ReadOnlySpan background, ReadOnlySpan source, ReadOnlySpan amount) { - for (int i = 0; i < destination.Length; i++) + if (Avx2.IsSupported && destination.Length >= 2) + { + // Divide by 2 as 4 elements per Vector4 and 8 per Vector256 + ref Vector256 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); + ref Vector256 destinationLast = ref Unsafe.Add(ref destinationBase, (IntPtr)((uint)destination.Length / 2u)); + + ref Vector256 backgroundBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(background)); + ref Vector256 sourceBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(source)); + ref float amountBase = ref MemoryMarshal.GetReference(amount); + + Vector256 vOne = Vector256.Create(1F); + + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) + { + // We need to create a Vector256 containing the current and next amount values + // taking up each half of the Vector256 and then clamp them. + Vector256 opacity = Vector256.Create( + Vector128.Create(amountBase), + Vector128.Create(Unsafe.Add(ref amountBase, 1))); + opacity = Avx.Min(Avx.Max(Vector256.Zero, opacity), vOne); + + destinationBase = PorterDuffFunctions.HardLightSrcIn(backgroundBase, sourceBase, opacity); + destinationBase = ref Unsafe.Add(ref destinationBase, 1); + backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); + sourceBase = ref Unsafe.Add(ref sourceBase, 1); + amountBase = ref Unsafe.Add(ref amountBase, 2); + } + + if (Numerics.Modulo2(destination.Length) != 0) + { + // Vector4 fits neatly in pairs. Any overlap has to be equal to 1. + int i = destination.Length - 1; + destination[i] = PorterDuffFunctions.HardLightSrcIn(background[i], source[i], Numerics.Clamp(amount[i], 0, 1F)); + } + } + else { - destination[i] = PorterDuffFunctions.HardLightSrcIn(background[i], source[i], Numerics.Clamp(amount[i], 0, 1)); + for (int i = 0; i < destination.Length; i++) + { + destination[i] = PorterDuffFunctions.HardLightSrcIn(background[i], source[i], Numerics.Clamp(amount[i], 0, 1F)); + } } } } @@ -1411,18 +3827,85 @@ internal static class DefaultPixelBlenders protected override void BlendFunction(Span destination, ReadOnlySpan background, ReadOnlySpan source, float amount) { amount = Numerics.Clamp(amount, 0, 1); - for (int i = 0; i < destination.Length; i++) + + if (Avx2.IsSupported && destination.Length >= 2) + { + // Divide by 2 as 4 elements per Vector4 and 8 per Vector256 + ref Vector256 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); + ref Vector256 destinationLast = ref Unsafe.Add(ref destinationBase, (IntPtr)((uint)destination.Length / 2u)); + + ref Vector256 backgroundBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(background)); + ref Vector256 sourceBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(source)); + Vector256 opacity = Vector256.Create(amount); + + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) + { + destinationBase = PorterDuffFunctions.NormalSrcOut(backgroundBase, sourceBase, opacity); + destinationBase = ref Unsafe.Add(ref destinationBase, 1); + backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); + sourceBase = ref Unsafe.Add(ref sourceBase, 1); + } + + if (Numerics.Modulo2(destination.Length) != 0) + { + // Vector4 fits neatly in pairs. Any overlap has to be equal to 1. + int i = destination.Length - 1; + destination[i] = PorterDuffFunctions.NormalSrcOut(background[i], source[i], amount); + } + } + else { - destination[i] = PorterDuffFunctions.NormalSrcOut(background[i], source[i], amount); + for (int i = 0; i < destination.Length; i++) + { + destination[i] = PorterDuffFunctions.NormalSrcOut(background[i], source[i], amount); + } } } /// protected override void BlendFunction(Span destination, ReadOnlySpan background, ReadOnlySpan source, ReadOnlySpan amount) { - for (int i = 0; i < destination.Length; i++) + if (Avx2.IsSupported && destination.Length >= 2) + { + // Divide by 2 as 4 elements per Vector4 and 8 per Vector256 + ref Vector256 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); + ref Vector256 destinationLast = ref Unsafe.Add(ref destinationBase, (IntPtr)((uint)destination.Length / 2u)); + + ref Vector256 backgroundBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(background)); + ref Vector256 sourceBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(source)); + ref float amountBase = ref MemoryMarshal.GetReference(amount); + + Vector256 vOne = Vector256.Create(1F); + + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) + { + // We need to create a Vector256 containing the current and next amount values + // taking up each half of the Vector256 and then clamp them. + Vector256 opacity = Vector256.Create( + Vector128.Create(amountBase), + Vector128.Create(Unsafe.Add(ref amountBase, 1))); + opacity = Avx.Min(Avx.Max(Vector256.Zero, opacity), vOne); + + destinationBase = PorterDuffFunctions.NormalSrcOut(backgroundBase, sourceBase, opacity); + destinationBase = ref Unsafe.Add(ref destinationBase, 1); + backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); + sourceBase = ref Unsafe.Add(ref sourceBase, 1); + amountBase = ref Unsafe.Add(ref amountBase, 2); + } + + if (Numerics.Modulo2(destination.Length) != 0) + { + // Vector4 fits neatly in pairs. Any overlap has to be equal to 1. + int i = destination.Length - 1; + destination[i] = PorterDuffFunctions.NormalSrcOut(background[i], source[i], Numerics.Clamp(amount[i], 0, 1F)); + } + } + else { - destination[i] = PorterDuffFunctions.NormalSrcOut(background[i], source[i], Numerics.Clamp(amount[i], 0, 1)); + for (int i = 0; i < destination.Length; i++) + { + destination[i] = PorterDuffFunctions.NormalSrcOut(background[i], source[i], Numerics.Clamp(amount[i], 0, 1F)); + } } } } @@ -1449,18 +3932,85 @@ internal static class DefaultPixelBlenders protected override void BlendFunction(Span destination, ReadOnlySpan background, ReadOnlySpan source, float amount) { amount = Numerics.Clamp(amount, 0, 1); - for (int i = 0; i < destination.Length; i++) + + if (Avx2.IsSupported && destination.Length >= 2) { - destination[i] = PorterDuffFunctions.MultiplySrcOut(background[i], source[i], amount); + // Divide by 2 as 4 elements per Vector4 and 8 per Vector256 + ref Vector256 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); + ref Vector256 destinationLast = ref Unsafe.Add(ref destinationBase, (IntPtr)((uint)destination.Length / 2u)); + + ref Vector256 backgroundBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(background)); + ref Vector256 sourceBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(source)); + Vector256 opacity = Vector256.Create(amount); + + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) + { + destinationBase = PorterDuffFunctions.MultiplySrcOut(backgroundBase, sourceBase, opacity); + destinationBase = ref Unsafe.Add(ref destinationBase, 1); + backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); + sourceBase = ref Unsafe.Add(ref sourceBase, 1); + } + + if (Numerics.Modulo2(destination.Length) != 0) + { + // Vector4 fits neatly in pairs. Any overlap has to be equal to 1. + int i = destination.Length - 1; + destination[i] = PorterDuffFunctions.MultiplySrcOut(background[i], source[i], amount); + } + } + else + { + for (int i = 0; i < destination.Length; i++) + { + destination[i] = PorterDuffFunctions.MultiplySrcOut(background[i], source[i], amount); + } } } /// protected override void BlendFunction(Span destination, ReadOnlySpan background, ReadOnlySpan source, ReadOnlySpan amount) { - for (int i = 0; i < destination.Length; i++) + if (Avx2.IsSupported && destination.Length >= 2) { - destination[i] = PorterDuffFunctions.MultiplySrcOut(background[i], source[i], Numerics.Clamp(amount[i], 0, 1)); + // Divide by 2 as 4 elements per Vector4 and 8 per Vector256 + ref Vector256 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); + ref Vector256 destinationLast = ref Unsafe.Add(ref destinationBase, (IntPtr)((uint)destination.Length / 2u)); + + ref Vector256 backgroundBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(background)); + ref Vector256 sourceBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(source)); + ref float amountBase = ref MemoryMarshal.GetReference(amount); + + Vector256 vOne = Vector256.Create(1F); + + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) + { + // We need to create a Vector256 containing the current and next amount values + // taking up each half of the Vector256 and then clamp them. + Vector256 opacity = Vector256.Create( + Vector128.Create(amountBase), + Vector128.Create(Unsafe.Add(ref amountBase, 1))); + opacity = Avx.Min(Avx.Max(Vector256.Zero, opacity), vOne); + + destinationBase = PorterDuffFunctions.MultiplySrcOut(backgroundBase, sourceBase, opacity); + destinationBase = ref Unsafe.Add(ref destinationBase, 1); + backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); + sourceBase = ref Unsafe.Add(ref sourceBase, 1); + amountBase = ref Unsafe.Add(ref amountBase, 2); + } + + if (Numerics.Modulo2(destination.Length) != 0) + { + // Vector4 fits neatly in pairs. Any overlap has to be equal to 1. + int i = destination.Length - 1; + destination[i] = PorterDuffFunctions.MultiplySrcOut(background[i], source[i], Numerics.Clamp(amount[i], 0, 1F)); + } + } + else + { + for (int i = 0; i < destination.Length; i++) + { + destination[i] = PorterDuffFunctions.MultiplySrcOut(background[i], source[i], Numerics.Clamp(amount[i], 0, 1F)); + } } } } @@ -1487,18 +4037,85 @@ internal static class DefaultPixelBlenders protected override void BlendFunction(Span destination, ReadOnlySpan background, ReadOnlySpan source, float amount) { amount = Numerics.Clamp(amount, 0, 1); - for (int i = 0; i < destination.Length; i++) + + if (Avx2.IsSupported && destination.Length >= 2) + { + // Divide by 2 as 4 elements per Vector4 and 8 per Vector256 + ref Vector256 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); + ref Vector256 destinationLast = ref Unsafe.Add(ref destinationBase, (IntPtr)((uint)destination.Length / 2u)); + + ref Vector256 backgroundBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(background)); + ref Vector256 sourceBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(source)); + Vector256 opacity = Vector256.Create(amount); + + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) + { + destinationBase = PorterDuffFunctions.AddSrcOut(backgroundBase, sourceBase, opacity); + destinationBase = ref Unsafe.Add(ref destinationBase, 1); + backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); + sourceBase = ref Unsafe.Add(ref sourceBase, 1); + } + + if (Numerics.Modulo2(destination.Length) != 0) + { + // Vector4 fits neatly in pairs. Any overlap has to be equal to 1. + int i = destination.Length - 1; + destination[i] = PorterDuffFunctions.AddSrcOut(background[i], source[i], amount); + } + } + else { - destination[i] = PorterDuffFunctions.AddSrcOut(background[i], source[i], amount); + for (int i = 0; i < destination.Length; i++) + { + destination[i] = PorterDuffFunctions.AddSrcOut(background[i], source[i], amount); + } } } /// protected override void BlendFunction(Span destination, ReadOnlySpan background, ReadOnlySpan source, ReadOnlySpan amount) { - for (int i = 0; i < destination.Length; i++) + if (Avx2.IsSupported && destination.Length >= 2) { - destination[i] = PorterDuffFunctions.AddSrcOut(background[i], source[i], Numerics.Clamp(amount[i], 0, 1)); + // Divide by 2 as 4 elements per Vector4 and 8 per Vector256 + ref Vector256 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); + ref Vector256 destinationLast = ref Unsafe.Add(ref destinationBase, (IntPtr)((uint)destination.Length / 2u)); + + ref Vector256 backgroundBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(background)); + ref Vector256 sourceBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(source)); + ref float amountBase = ref MemoryMarshal.GetReference(amount); + + Vector256 vOne = Vector256.Create(1F); + + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) + { + // We need to create a Vector256 containing the current and next amount values + // taking up each half of the Vector256 and then clamp them. + Vector256 opacity = Vector256.Create( + Vector128.Create(amountBase), + Vector128.Create(Unsafe.Add(ref amountBase, 1))); + opacity = Avx.Min(Avx.Max(Vector256.Zero, opacity), vOne); + + destinationBase = PorterDuffFunctions.AddSrcOut(backgroundBase, sourceBase, opacity); + destinationBase = ref Unsafe.Add(ref destinationBase, 1); + backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); + sourceBase = ref Unsafe.Add(ref sourceBase, 1); + amountBase = ref Unsafe.Add(ref amountBase, 2); + } + + if (Numerics.Modulo2(destination.Length) != 0) + { + // Vector4 fits neatly in pairs. Any overlap has to be equal to 1. + int i = destination.Length - 1; + destination[i] = PorterDuffFunctions.AddSrcOut(background[i], source[i], Numerics.Clamp(amount[i], 0, 1F)); + } + } + else + { + for (int i = 0; i < destination.Length; i++) + { + destination[i] = PorterDuffFunctions.AddSrcOut(background[i], source[i], Numerics.Clamp(amount[i], 0, 1F)); + } } } } @@ -1525,18 +4142,85 @@ internal static class DefaultPixelBlenders protected override void BlendFunction(Span destination, ReadOnlySpan background, ReadOnlySpan source, float amount) { amount = Numerics.Clamp(amount, 0, 1); - for (int i = 0; i < destination.Length; i++) + + if (Avx2.IsSupported && destination.Length >= 2) { - destination[i] = PorterDuffFunctions.SubtractSrcOut(background[i], source[i], amount); + // Divide by 2 as 4 elements per Vector4 and 8 per Vector256 + ref Vector256 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); + ref Vector256 destinationLast = ref Unsafe.Add(ref destinationBase, (IntPtr)((uint)destination.Length / 2u)); + + ref Vector256 backgroundBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(background)); + ref Vector256 sourceBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(source)); + Vector256 opacity = Vector256.Create(amount); + + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) + { + destinationBase = PorterDuffFunctions.SubtractSrcOut(backgroundBase, sourceBase, opacity); + destinationBase = ref Unsafe.Add(ref destinationBase, 1); + backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); + sourceBase = ref Unsafe.Add(ref sourceBase, 1); + } + + if (Numerics.Modulo2(destination.Length) != 0) + { + // Vector4 fits neatly in pairs. Any overlap has to be equal to 1. + int i = destination.Length - 1; + destination[i] = PorterDuffFunctions.SubtractSrcOut(background[i], source[i], amount); + } + } + else + { + for (int i = 0; i < destination.Length; i++) + { + destination[i] = PorterDuffFunctions.SubtractSrcOut(background[i], source[i], amount); + } } } /// protected override void BlendFunction(Span destination, ReadOnlySpan background, ReadOnlySpan source, ReadOnlySpan amount) { - for (int i = 0; i < destination.Length; i++) + if (Avx2.IsSupported && destination.Length >= 2) + { + // Divide by 2 as 4 elements per Vector4 and 8 per Vector256 + ref Vector256 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); + ref Vector256 destinationLast = ref Unsafe.Add(ref destinationBase, (IntPtr)((uint)destination.Length / 2u)); + + ref Vector256 backgroundBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(background)); + ref Vector256 sourceBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(source)); + ref float amountBase = ref MemoryMarshal.GetReference(amount); + + Vector256 vOne = Vector256.Create(1F); + + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) + { + // We need to create a Vector256 containing the current and next amount values + // taking up each half of the Vector256 and then clamp them. + Vector256 opacity = Vector256.Create( + Vector128.Create(amountBase), + Vector128.Create(Unsafe.Add(ref amountBase, 1))); + opacity = Avx.Min(Avx.Max(Vector256.Zero, opacity), vOne); + + destinationBase = PorterDuffFunctions.SubtractSrcOut(backgroundBase, sourceBase, opacity); + destinationBase = ref Unsafe.Add(ref destinationBase, 1); + backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); + sourceBase = ref Unsafe.Add(ref sourceBase, 1); + amountBase = ref Unsafe.Add(ref amountBase, 2); + } + + if (Numerics.Modulo2(destination.Length) != 0) + { + // Vector4 fits neatly in pairs. Any overlap has to be equal to 1. + int i = destination.Length - 1; + destination[i] = PorterDuffFunctions.SubtractSrcOut(background[i], source[i], Numerics.Clamp(amount[i], 0, 1F)); + } + } + else { - destination[i] = PorterDuffFunctions.SubtractSrcOut(background[i], source[i], Numerics.Clamp(amount[i], 0, 1)); + for (int i = 0; i < destination.Length; i++) + { + destination[i] = PorterDuffFunctions.SubtractSrcOut(background[i], source[i], Numerics.Clamp(amount[i], 0, 1F)); + } } } } @@ -1563,18 +4247,85 @@ internal static class DefaultPixelBlenders protected override void BlendFunction(Span destination, ReadOnlySpan background, ReadOnlySpan source, float amount) { amount = Numerics.Clamp(amount, 0, 1); - for (int i = 0; i < destination.Length; i++) + + if (Avx2.IsSupported && destination.Length >= 2) { - destination[i] = PorterDuffFunctions.ScreenSrcOut(background[i], source[i], amount); + // Divide by 2 as 4 elements per Vector4 and 8 per Vector256 + ref Vector256 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); + ref Vector256 destinationLast = ref Unsafe.Add(ref destinationBase, (IntPtr)((uint)destination.Length / 2u)); + + ref Vector256 backgroundBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(background)); + ref Vector256 sourceBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(source)); + Vector256 opacity = Vector256.Create(amount); + + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) + { + destinationBase = PorterDuffFunctions.ScreenSrcOut(backgroundBase, sourceBase, opacity); + destinationBase = ref Unsafe.Add(ref destinationBase, 1); + backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); + sourceBase = ref Unsafe.Add(ref sourceBase, 1); + } + + if (Numerics.Modulo2(destination.Length) != 0) + { + // Vector4 fits neatly in pairs. Any overlap has to be equal to 1. + int i = destination.Length - 1; + destination[i] = PorterDuffFunctions.ScreenSrcOut(background[i], source[i], amount); + } + } + else + { + for (int i = 0; i < destination.Length; i++) + { + destination[i] = PorterDuffFunctions.ScreenSrcOut(background[i], source[i], amount); + } } } /// protected override void BlendFunction(Span destination, ReadOnlySpan background, ReadOnlySpan source, ReadOnlySpan amount) { - for (int i = 0; i < destination.Length; i++) + if (Avx2.IsSupported && destination.Length >= 2) { - destination[i] = PorterDuffFunctions.ScreenSrcOut(background[i], source[i], Numerics.Clamp(amount[i], 0, 1)); + // Divide by 2 as 4 elements per Vector4 and 8 per Vector256 + ref Vector256 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); + ref Vector256 destinationLast = ref Unsafe.Add(ref destinationBase, (IntPtr)((uint)destination.Length / 2u)); + + ref Vector256 backgroundBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(background)); + ref Vector256 sourceBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(source)); + ref float amountBase = ref MemoryMarshal.GetReference(amount); + + Vector256 vOne = Vector256.Create(1F); + + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) + { + // We need to create a Vector256 containing the current and next amount values + // taking up each half of the Vector256 and then clamp them. + Vector256 opacity = Vector256.Create( + Vector128.Create(amountBase), + Vector128.Create(Unsafe.Add(ref amountBase, 1))); + opacity = Avx.Min(Avx.Max(Vector256.Zero, opacity), vOne); + + destinationBase = PorterDuffFunctions.ScreenSrcOut(backgroundBase, sourceBase, opacity); + destinationBase = ref Unsafe.Add(ref destinationBase, 1); + backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); + sourceBase = ref Unsafe.Add(ref sourceBase, 1); + amountBase = ref Unsafe.Add(ref amountBase, 2); + } + + if (Numerics.Modulo2(destination.Length) != 0) + { + // Vector4 fits neatly in pairs. Any overlap has to be equal to 1. + int i = destination.Length - 1; + destination[i] = PorterDuffFunctions.ScreenSrcOut(background[i], source[i], Numerics.Clamp(amount[i], 0, 1F)); + } + } + else + { + for (int i = 0; i < destination.Length; i++) + { + destination[i] = PorterDuffFunctions.ScreenSrcOut(background[i], source[i], Numerics.Clamp(amount[i], 0, 1F)); + } } } } @@ -1601,18 +4352,85 @@ internal static class DefaultPixelBlenders protected override void BlendFunction(Span destination, ReadOnlySpan background, ReadOnlySpan source, float amount) { amount = Numerics.Clamp(amount, 0, 1); - for (int i = 0; i < destination.Length; i++) + + if (Avx2.IsSupported && destination.Length >= 2) + { + // Divide by 2 as 4 elements per Vector4 and 8 per Vector256 + ref Vector256 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); + ref Vector256 destinationLast = ref Unsafe.Add(ref destinationBase, (IntPtr)((uint)destination.Length / 2u)); + + ref Vector256 backgroundBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(background)); + ref Vector256 sourceBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(source)); + Vector256 opacity = Vector256.Create(amount); + + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) + { + destinationBase = PorterDuffFunctions.DarkenSrcOut(backgroundBase, sourceBase, opacity); + destinationBase = ref Unsafe.Add(ref destinationBase, 1); + backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); + sourceBase = ref Unsafe.Add(ref sourceBase, 1); + } + + if (Numerics.Modulo2(destination.Length) != 0) + { + // Vector4 fits neatly in pairs. Any overlap has to be equal to 1. + int i = destination.Length - 1; + destination[i] = PorterDuffFunctions.DarkenSrcOut(background[i], source[i], amount); + } + } + else { - destination[i] = PorterDuffFunctions.DarkenSrcOut(background[i], source[i], amount); + for (int i = 0; i < destination.Length; i++) + { + destination[i] = PorterDuffFunctions.DarkenSrcOut(background[i], source[i], amount); + } } } /// protected override void BlendFunction(Span destination, ReadOnlySpan background, ReadOnlySpan source, ReadOnlySpan amount) { - for (int i = 0; i < destination.Length; i++) + if (Avx2.IsSupported && destination.Length >= 2) + { + // Divide by 2 as 4 elements per Vector4 and 8 per Vector256 + ref Vector256 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); + ref Vector256 destinationLast = ref Unsafe.Add(ref destinationBase, (IntPtr)((uint)destination.Length / 2u)); + + ref Vector256 backgroundBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(background)); + ref Vector256 sourceBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(source)); + ref float amountBase = ref MemoryMarshal.GetReference(amount); + + Vector256 vOne = Vector256.Create(1F); + + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) + { + // We need to create a Vector256 containing the current and next amount values + // taking up each half of the Vector256 and then clamp them. + Vector256 opacity = Vector256.Create( + Vector128.Create(amountBase), + Vector128.Create(Unsafe.Add(ref amountBase, 1))); + opacity = Avx.Min(Avx.Max(Vector256.Zero, opacity), vOne); + + destinationBase = PorterDuffFunctions.DarkenSrcOut(backgroundBase, sourceBase, opacity); + destinationBase = ref Unsafe.Add(ref destinationBase, 1); + backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); + sourceBase = ref Unsafe.Add(ref sourceBase, 1); + amountBase = ref Unsafe.Add(ref amountBase, 2); + } + + if (Numerics.Modulo2(destination.Length) != 0) + { + // Vector4 fits neatly in pairs. Any overlap has to be equal to 1. + int i = destination.Length - 1; + destination[i] = PorterDuffFunctions.DarkenSrcOut(background[i], source[i], Numerics.Clamp(amount[i], 0, 1F)); + } + } + else { - destination[i] = PorterDuffFunctions.DarkenSrcOut(background[i], source[i], Numerics.Clamp(amount[i], 0, 1)); + for (int i = 0; i < destination.Length; i++) + { + destination[i] = PorterDuffFunctions.DarkenSrcOut(background[i], source[i], Numerics.Clamp(amount[i], 0, 1F)); + } } } } @@ -1639,18 +4457,85 @@ internal static class DefaultPixelBlenders protected override void BlendFunction(Span destination, ReadOnlySpan background, ReadOnlySpan source, float amount) { amount = Numerics.Clamp(amount, 0, 1); - for (int i = 0; i < destination.Length; i++) + + if (Avx2.IsSupported && destination.Length >= 2) { - destination[i] = PorterDuffFunctions.LightenSrcOut(background[i], source[i], amount); + // Divide by 2 as 4 elements per Vector4 and 8 per Vector256 + ref Vector256 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); + ref Vector256 destinationLast = ref Unsafe.Add(ref destinationBase, (IntPtr)((uint)destination.Length / 2u)); + + ref Vector256 backgroundBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(background)); + ref Vector256 sourceBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(source)); + Vector256 opacity = Vector256.Create(amount); + + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) + { + destinationBase = PorterDuffFunctions.LightenSrcOut(backgroundBase, sourceBase, opacity); + destinationBase = ref Unsafe.Add(ref destinationBase, 1); + backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); + sourceBase = ref Unsafe.Add(ref sourceBase, 1); + } + + if (Numerics.Modulo2(destination.Length) != 0) + { + // Vector4 fits neatly in pairs. Any overlap has to be equal to 1. + int i = destination.Length - 1; + destination[i] = PorterDuffFunctions.LightenSrcOut(background[i], source[i], amount); + } + } + else + { + for (int i = 0; i < destination.Length; i++) + { + destination[i] = PorterDuffFunctions.LightenSrcOut(background[i], source[i], amount); + } } } /// protected override void BlendFunction(Span destination, ReadOnlySpan background, ReadOnlySpan source, ReadOnlySpan amount) { - for (int i = 0; i < destination.Length; i++) + if (Avx2.IsSupported && destination.Length >= 2) + { + // Divide by 2 as 4 elements per Vector4 and 8 per Vector256 + ref Vector256 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); + ref Vector256 destinationLast = ref Unsafe.Add(ref destinationBase, (IntPtr)((uint)destination.Length / 2u)); + + ref Vector256 backgroundBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(background)); + ref Vector256 sourceBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(source)); + ref float amountBase = ref MemoryMarshal.GetReference(amount); + + Vector256 vOne = Vector256.Create(1F); + + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) + { + // We need to create a Vector256 containing the current and next amount values + // taking up each half of the Vector256 and then clamp them. + Vector256 opacity = Vector256.Create( + Vector128.Create(amountBase), + Vector128.Create(Unsafe.Add(ref amountBase, 1))); + opacity = Avx.Min(Avx.Max(Vector256.Zero, opacity), vOne); + + destinationBase = PorterDuffFunctions.LightenSrcOut(backgroundBase, sourceBase, opacity); + destinationBase = ref Unsafe.Add(ref destinationBase, 1); + backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); + sourceBase = ref Unsafe.Add(ref sourceBase, 1); + amountBase = ref Unsafe.Add(ref amountBase, 2); + } + + if (Numerics.Modulo2(destination.Length) != 0) + { + // Vector4 fits neatly in pairs. Any overlap has to be equal to 1. + int i = destination.Length - 1; + destination[i] = PorterDuffFunctions.LightenSrcOut(background[i], source[i], Numerics.Clamp(amount[i], 0, 1F)); + } + } + else { - destination[i] = PorterDuffFunctions.LightenSrcOut(background[i], source[i], Numerics.Clamp(amount[i], 0, 1)); + for (int i = 0; i < destination.Length; i++) + { + destination[i] = PorterDuffFunctions.LightenSrcOut(background[i], source[i], Numerics.Clamp(amount[i], 0, 1F)); + } } } } @@ -1677,18 +4562,85 @@ internal static class DefaultPixelBlenders protected override void BlendFunction(Span destination, ReadOnlySpan background, ReadOnlySpan source, float amount) { amount = Numerics.Clamp(amount, 0, 1); - for (int i = 0; i < destination.Length; i++) + + if (Avx2.IsSupported && destination.Length >= 2) + { + // Divide by 2 as 4 elements per Vector4 and 8 per Vector256 + ref Vector256 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); + ref Vector256 destinationLast = ref Unsafe.Add(ref destinationBase, (IntPtr)((uint)destination.Length / 2u)); + + ref Vector256 backgroundBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(background)); + ref Vector256 sourceBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(source)); + Vector256 opacity = Vector256.Create(amount); + + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) + { + destinationBase = PorterDuffFunctions.OverlaySrcOut(backgroundBase, sourceBase, opacity); + destinationBase = ref Unsafe.Add(ref destinationBase, 1); + backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); + sourceBase = ref Unsafe.Add(ref sourceBase, 1); + } + + if (Numerics.Modulo2(destination.Length) != 0) + { + // Vector4 fits neatly in pairs. Any overlap has to be equal to 1. + int i = destination.Length - 1; + destination[i] = PorterDuffFunctions.OverlaySrcOut(background[i], source[i], amount); + } + } + else { - destination[i] = PorterDuffFunctions.OverlaySrcOut(background[i], source[i], amount); + for (int i = 0; i < destination.Length; i++) + { + destination[i] = PorterDuffFunctions.OverlaySrcOut(background[i], source[i], amount); + } } } /// protected override void BlendFunction(Span destination, ReadOnlySpan background, ReadOnlySpan source, ReadOnlySpan amount) { - for (int i = 0; i < destination.Length; i++) + if (Avx2.IsSupported && destination.Length >= 2) + { + // Divide by 2 as 4 elements per Vector4 and 8 per Vector256 + ref Vector256 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); + ref Vector256 destinationLast = ref Unsafe.Add(ref destinationBase, (IntPtr)((uint)destination.Length / 2u)); + + ref Vector256 backgroundBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(background)); + ref Vector256 sourceBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(source)); + ref float amountBase = ref MemoryMarshal.GetReference(amount); + + Vector256 vOne = Vector256.Create(1F); + + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) + { + // We need to create a Vector256 containing the current and next amount values + // taking up each half of the Vector256 and then clamp them. + Vector256 opacity = Vector256.Create( + Vector128.Create(amountBase), + Vector128.Create(Unsafe.Add(ref amountBase, 1))); + opacity = Avx.Min(Avx.Max(Vector256.Zero, opacity), vOne); + + destinationBase = PorterDuffFunctions.OverlaySrcOut(backgroundBase, sourceBase, opacity); + destinationBase = ref Unsafe.Add(ref destinationBase, 1); + backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); + sourceBase = ref Unsafe.Add(ref sourceBase, 1); + amountBase = ref Unsafe.Add(ref amountBase, 2); + } + + if (Numerics.Modulo2(destination.Length) != 0) + { + // Vector4 fits neatly in pairs. Any overlap has to be equal to 1. + int i = destination.Length - 1; + destination[i] = PorterDuffFunctions.OverlaySrcOut(background[i], source[i], Numerics.Clamp(amount[i], 0, 1F)); + } + } + else { - destination[i] = PorterDuffFunctions.OverlaySrcOut(background[i], source[i], Numerics.Clamp(amount[i], 0, 1)); + for (int i = 0; i < destination.Length; i++) + { + destination[i] = PorterDuffFunctions.OverlaySrcOut(background[i], source[i], Numerics.Clamp(amount[i], 0, 1F)); + } } } } @@ -1715,18 +4667,85 @@ internal static class DefaultPixelBlenders protected override void BlendFunction(Span destination, ReadOnlySpan background, ReadOnlySpan source, float amount) { amount = Numerics.Clamp(amount, 0, 1); - for (int i = 0; i < destination.Length; i++) + + if (Avx2.IsSupported && destination.Length >= 2) + { + // Divide by 2 as 4 elements per Vector4 and 8 per Vector256 + ref Vector256 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); + ref Vector256 destinationLast = ref Unsafe.Add(ref destinationBase, (IntPtr)((uint)destination.Length / 2u)); + + ref Vector256 backgroundBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(background)); + ref Vector256 sourceBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(source)); + Vector256 opacity = Vector256.Create(amount); + + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) + { + destinationBase = PorterDuffFunctions.HardLightSrcOut(backgroundBase, sourceBase, opacity); + destinationBase = ref Unsafe.Add(ref destinationBase, 1); + backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); + sourceBase = ref Unsafe.Add(ref sourceBase, 1); + } + + if (Numerics.Modulo2(destination.Length) != 0) + { + // Vector4 fits neatly in pairs. Any overlap has to be equal to 1. + int i = destination.Length - 1; + destination[i] = PorterDuffFunctions.HardLightSrcOut(background[i], source[i], amount); + } + } + else { - destination[i] = PorterDuffFunctions.HardLightSrcOut(background[i], source[i], amount); + for (int i = 0; i < destination.Length; i++) + { + destination[i] = PorterDuffFunctions.HardLightSrcOut(background[i], source[i], amount); + } } } /// protected override void BlendFunction(Span destination, ReadOnlySpan background, ReadOnlySpan source, ReadOnlySpan amount) { - for (int i = 0; i < destination.Length; i++) + if (Avx2.IsSupported && destination.Length >= 2) { - destination[i] = PorterDuffFunctions.HardLightSrcOut(background[i], source[i], Numerics.Clamp(amount[i], 0, 1)); + // Divide by 2 as 4 elements per Vector4 and 8 per Vector256 + ref Vector256 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); + ref Vector256 destinationLast = ref Unsafe.Add(ref destinationBase, (IntPtr)((uint)destination.Length / 2u)); + + ref Vector256 backgroundBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(background)); + ref Vector256 sourceBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(source)); + ref float amountBase = ref MemoryMarshal.GetReference(amount); + + Vector256 vOne = Vector256.Create(1F); + + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) + { + // We need to create a Vector256 containing the current and next amount values + // taking up each half of the Vector256 and then clamp them. + Vector256 opacity = Vector256.Create( + Vector128.Create(amountBase), + Vector128.Create(Unsafe.Add(ref amountBase, 1))); + opacity = Avx.Min(Avx.Max(Vector256.Zero, opacity), vOne); + + destinationBase = PorterDuffFunctions.HardLightSrcOut(backgroundBase, sourceBase, opacity); + destinationBase = ref Unsafe.Add(ref destinationBase, 1); + backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); + sourceBase = ref Unsafe.Add(ref sourceBase, 1); + amountBase = ref Unsafe.Add(ref amountBase, 2); + } + + if (Numerics.Modulo2(destination.Length) != 0) + { + // Vector4 fits neatly in pairs. Any overlap has to be equal to 1. + int i = destination.Length - 1; + destination[i] = PorterDuffFunctions.HardLightSrcOut(background[i], source[i], Numerics.Clamp(amount[i], 0, 1F)); + } + } + else + { + for (int i = 0; i < destination.Length; i++) + { + destination[i] = PorterDuffFunctions.HardLightSrcOut(background[i], source[i], Numerics.Clamp(amount[i], 0, 1F)); + } } } } @@ -1753,18 +4772,85 @@ internal static class DefaultPixelBlenders protected override void BlendFunction(Span destination, ReadOnlySpan background, ReadOnlySpan source, float amount) { amount = Numerics.Clamp(amount, 0, 1); - for (int i = 0; i < destination.Length; i++) + + if (Avx2.IsSupported && destination.Length >= 2) + { + // Divide by 2 as 4 elements per Vector4 and 8 per Vector256 + ref Vector256 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); + ref Vector256 destinationLast = ref Unsafe.Add(ref destinationBase, (IntPtr)((uint)destination.Length / 2u)); + + ref Vector256 backgroundBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(background)); + ref Vector256 sourceBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(source)); + Vector256 opacity = Vector256.Create(amount); + + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) + { + destinationBase = PorterDuffFunctions.NormalDest(backgroundBase, sourceBase, opacity); + destinationBase = ref Unsafe.Add(ref destinationBase, 1); + backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); + sourceBase = ref Unsafe.Add(ref sourceBase, 1); + } + + if (Numerics.Modulo2(destination.Length) != 0) + { + // Vector4 fits neatly in pairs. Any overlap has to be equal to 1. + int i = destination.Length - 1; + destination[i] = PorterDuffFunctions.NormalDest(background[i], source[i], amount); + } + } + else { - destination[i] = PorterDuffFunctions.NormalDest(background[i], source[i], amount); + for (int i = 0; i < destination.Length; i++) + { + destination[i] = PorterDuffFunctions.NormalDest(background[i], source[i], amount); + } } } /// protected override void BlendFunction(Span destination, ReadOnlySpan background, ReadOnlySpan source, ReadOnlySpan amount) { - for (int i = 0; i < destination.Length; i++) + if (Avx2.IsSupported && destination.Length >= 2) + { + // Divide by 2 as 4 elements per Vector4 and 8 per Vector256 + ref Vector256 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); + ref Vector256 destinationLast = ref Unsafe.Add(ref destinationBase, (IntPtr)((uint)destination.Length / 2u)); + + ref Vector256 backgroundBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(background)); + ref Vector256 sourceBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(source)); + ref float amountBase = ref MemoryMarshal.GetReference(amount); + + Vector256 vOne = Vector256.Create(1F); + + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) + { + // We need to create a Vector256 containing the current and next amount values + // taking up each half of the Vector256 and then clamp them. + Vector256 opacity = Vector256.Create( + Vector128.Create(amountBase), + Vector128.Create(Unsafe.Add(ref amountBase, 1))); + opacity = Avx.Min(Avx.Max(Vector256.Zero, opacity), vOne); + + destinationBase = PorterDuffFunctions.NormalDest(backgroundBase, sourceBase, opacity); + destinationBase = ref Unsafe.Add(ref destinationBase, 1); + backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); + sourceBase = ref Unsafe.Add(ref sourceBase, 1); + amountBase = ref Unsafe.Add(ref amountBase, 2); + } + + if (Numerics.Modulo2(destination.Length) != 0) + { + // Vector4 fits neatly in pairs. Any overlap has to be equal to 1. + int i = destination.Length - 1; + destination[i] = PorterDuffFunctions.NormalDest(background[i], source[i], Numerics.Clamp(amount[i], 0, 1F)); + } + } + else { - destination[i] = PorterDuffFunctions.NormalDest(background[i], source[i], Numerics.Clamp(amount[i], 0, 1)); + for (int i = 0; i < destination.Length; i++) + { + destination[i] = PorterDuffFunctions.NormalDest(background[i], source[i], Numerics.Clamp(amount[i], 0, 1F)); + } } } } @@ -1791,18 +4877,85 @@ internal static class DefaultPixelBlenders protected override void BlendFunction(Span destination, ReadOnlySpan background, ReadOnlySpan source, float amount) { amount = Numerics.Clamp(amount, 0, 1); - for (int i = 0; i < destination.Length; i++) + + if (Avx2.IsSupported && destination.Length >= 2) + { + // Divide by 2 as 4 elements per Vector4 and 8 per Vector256 + ref Vector256 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); + ref Vector256 destinationLast = ref Unsafe.Add(ref destinationBase, (IntPtr)((uint)destination.Length / 2u)); + + ref Vector256 backgroundBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(background)); + ref Vector256 sourceBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(source)); + Vector256 opacity = Vector256.Create(amount); + + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) + { + destinationBase = PorterDuffFunctions.MultiplyDest(backgroundBase, sourceBase, opacity); + destinationBase = ref Unsafe.Add(ref destinationBase, 1); + backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); + sourceBase = ref Unsafe.Add(ref sourceBase, 1); + } + + if (Numerics.Modulo2(destination.Length) != 0) + { + // Vector4 fits neatly in pairs. Any overlap has to be equal to 1. + int i = destination.Length - 1; + destination[i] = PorterDuffFunctions.MultiplyDest(background[i], source[i], amount); + } + } + else { - destination[i] = PorterDuffFunctions.MultiplyDest(background[i], source[i], amount); + for (int i = 0; i < destination.Length; i++) + { + destination[i] = PorterDuffFunctions.MultiplyDest(background[i], source[i], amount); + } } } /// protected override void BlendFunction(Span destination, ReadOnlySpan background, ReadOnlySpan source, ReadOnlySpan amount) { - for (int i = 0; i < destination.Length; i++) + if (Avx2.IsSupported && destination.Length >= 2) { - destination[i] = PorterDuffFunctions.MultiplyDest(background[i], source[i], Numerics.Clamp(amount[i], 0, 1)); + // Divide by 2 as 4 elements per Vector4 and 8 per Vector256 + ref Vector256 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); + ref Vector256 destinationLast = ref Unsafe.Add(ref destinationBase, (IntPtr)((uint)destination.Length / 2u)); + + ref Vector256 backgroundBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(background)); + ref Vector256 sourceBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(source)); + ref float amountBase = ref MemoryMarshal.GetReference(amount); + + Vector256 vOne = Vector256.Create(1F); + + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) + { + // We need to create a Vector256 containing the current and next amount values + // taking up each half of the Vector256 and then clamp them. + Vector256 opacity = Vector256.Create( + Vector128.Create(amountBase), + Vector128.Create(Unsafe.Add(ref amountBase, 1))); + opacity = Avx.Min(Avx.Max(Vector256.Zero, opacity), vOne); + + destinationBase = PorterDuffFunctions.MultiplyDest(backgroundBase, sourceBase, opacity); + destinationBase = ref Unsafe.Add(ref destinationBase, 1); + backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); + sourceBase = ref Unsafe.Add(ref sourceBase, 1); + amountBase = ref Unsafe.Add(ref amountBase, 2); + } + + if (Numerics.Modulo2(destination.Length) != 0) + { + // Vector4 fits neatly in pairs. Any overlap has to be equal to 1. + int i = destination.Length - 1; + destination[i] = PorterDuffFunctions.MultiplyDest(background[i], source[i], Numerics.Clamp(amount[i], 0, 1F)); + } + } + else + { + for (int i = 0; i < destination.Length; i++) + { + destination[i] = PorterDuffFunctions.MultiplyDest(background[i], source[i], Numerics.Clamp(amount[i], 0, 1F)); + } } } } @@ -1829,18 +4982,85 @@ internal static class DefaultPixelBlenders protected override void BlendFunction(Span destination, ReadOnlySpan background, ReadOnlySpan source, float amount) { amount = Numerics.Clamp(amount, 0, 1); - for (int i = 0; i < destination.Length; i++) + + if (Avx2.IsSupported && destination.Length >= 2) + { + // Divide by 2 as 4 elements per Vector4 and 8 per Vector256 + ref Vector256 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); + ref Vector256 destinationLast = ref Unsafe.Add(ref destinationBase, (IntPtr)((uint)destination.Length / 2u)); + + ref Vector256 backgroundBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(background)); + ref Vector256 sourceBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(source)); + Vector256 opacity = Vector256.Create(amount); + + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) + { + destinationBase = PorterDuffFunctions.AddDest(backgroundBase, sourceBase, opacity); + destinationBase = ref Unsafe.Add(ref destinationBase, 1); + backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); + sourceBase = ref Unsafe.Add(ref sourceBase, 1); + } + + if (Numerics.Modulo2(destination.Length) != 0) + { + // Vector4 fits neatly in pairs. Any overlap has to be equal to 1. + int i = destination.Length - 1; + destination[i] = PorterDuffFunctions.AddDest(background[i], source[i], amount); + } + } + else { - destination[i] = PorterDuffFunctions.AddDest(background[i], source[i], amount); + for (int i = 0; i < destination.Length; i++) + { + destination[i] = PorterDuffFunctions.AddDest(background[i], source[i], amount); + } } } /// protected override void BlendFunction(Span destination, ReadOnlySpan background, ReadOnlySpan source, ReadOnlySpan amount) { - for (int i = 0; i < destination.Length; i++) + if (Avx2.IsSupported && destination.Length >= 2) { - destination[i] = PorterDuffFunctions.AddDest(background[i], source[i], Numerics.Clamp(amount[i], 0, 1)); + // Divide by 2 as 4 elements per Vector4 and 8 per Vector256 + ref Vector256 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); + ref Vector256 destinationLast = ref Unsafe.Add(ref destinationBase, (IntPtr)((uint)destination.Length / 2u)); + + ref Vector256 backgroundBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(background)); + ref Vector256 sourceBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(source)); + ref float amountBase = ref MemoryMarshal.GetReference(amount); + + Vector256 vOne = Vector256.Create(1F); + + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) + { + // We need to create a Vector256 containing the current and next amount values + // taking up each half of the Vector256 and then clamp them. + Vector256 opacity = Vector256.Create( + Vector128.Create(amountBase), + Vector128.Create(Unsafe.Add(ref amountBase, 1))); + opacity = Avx.Min(Avx.Max(Vector256.Zero, opacity), vOne); + + destinationBase = PorterDuffFunctions.AddDest(backgroundBase, sourceBase, opacity); + destinationBase = ref Unsafe.Add(ref destinationBase, 1); + backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); + sourceBase = ref Unsafe.Add(ref sourceBase, 1); + amountBase = ref Unsafe.Add(ref amountBase, 2); + } + + if (Numerics.Modulo2(destination.Length) != 0) + { + // Vector4 fits neatly in pairs. Any overlap has to be equal to 1. + int i = destination.Length - 1; + destination[i] = PorterDuffFunctions.AddDest(background[i], source[i], Numerics.Clamp(amount[i], 0, 1F)); + } + } + else + { + for (int i = 0; i < destination.Length; i++) + { + destination[i] = PorterDuffFunctions.AddDest(background[i], source[i], Numerics.Clamp(amount[i], 0, 1F)); + } } } } @@ -1867,18 +5087,85 @@ internal static class DefaultPixelBlenders protected override void BlendFunction(Span destination, ReadOnlySpan background, ReadOnlySpan source, float amount) { amount = Numerics.Clamp(amount, 0, 1); - for (int i = 0; i < destination.Length; i++) + + if (Avx2.IsSupported && destination.Length >= 2) + { + // Divide by 2 as 4 elements per Vector4 and 8 per Vector256 + ref Vector256 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); + ref Vector256 destinationLast = ref Unsafe.Add(ref destinationBase, (IntPtr)((uint)destination.Length / 2u)); + + ref Vector256 backgroundBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(background)); + ref Vector256 sourceBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(source)); + Vector256 opacity = Vector256.Create(amount); + + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) + { + destinationBase = PorterDuffFunctions.SubtractDest(backgroundBase, sourceBase, opacity); + destinationBase = ref Unsafe.Add(ref destinationBase, 1); + backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); + sourceBase = ref Unsafe.Add(ref sourceBase, 1); + } + + if (Numerics.Modulo2(destination.Length) != 0) + { + // Vector4 fits neatly in pairs. Any overlap has to be equal to 1. + int i = destination.Length - 1; + destination[i] = PorterDuffFunctions.SubtractDest(background[i], source[i], amount); + } + } + else { - destination[i] = PorterDuffFunctions.SubtractDest(background[i], source[i], amount); + for (int i = 0; i < destination.Length; i++) + { + destination[i] = PorterDuffFunctions.SubtractDest(background[i], source[i], amount); + } } } /// protected override void BlendFunction(Span destination, ReadOnlySpan background, ReadOnlySpan source, ReadOnlySpan amount) { - for (int i = 0; i < destination.Length; i++) + if (Avx2.IsSupported && destination.Length >= 2) { - destination[i] = PorterDuffFunctions.SubtractDest(background[i], source[i], Numerics.Clamp(amount[i], 0, 1)); + // Divide by 2 as 4 elements per Vector4 and 8 per Vector256 + ref Vector256 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); + ref Vector256 destinationLast = ref Unsafe.Add(ref destinationBase, (IntPtr)((uint)destination.Length / 2u)); + + ref Vector256 backgroundBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(background)); + ref Vector256 sourceBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(source)); + ref float amountBase = ref MemoryMarshal.GetReference(amount); + + Vector256 vOne = Vector256.Create(1F); + + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) + { + // We need to create a Vector256 containing the current and next amount values + // taking up each half of the Vector256 and then clamp them. + Vector256 opacity = Vector256.Create( + Vector128.Create(amountBase), + Vector128.Create(Unsafe.Add(ref amountBase, 1))); + opacity = Avx.Min(Avx.Max(Vector256.Zero, opacity), vOne); + + destinationBase = PorterDuffFunctions.SubtractDest(backgroundBase, sourceBase, opacity); + destinationBase = ref Unsafe.Add(ref destinationBase, 1); + backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); + sourceBase = ref Unsafe.Add(ref sourceBase, 1); + amountBase = ref Unsafe.Add(ref amountBase, 2); + } + + if (Numerics.Modulo2(destination.Length) != 0) + { + // Vector4 fits neatly in pairs. Any overlap has to be equal to 1. + int i = destination.Length - 1; + destination[i] = PorterDuffFunctions.SubtractDest(background[i], source[i], Numerics.Clamp(amount[i], 0, 1F)); + } + } + else + { + for (int i = 0; i < destination.Length; i++) + { + destination[i] = PorterDuffFunctions.SubtractDest(background[i], source[i], Numerics.Clamp(amount[i], 0, 1F)); + } } } } @@ -1905,18 +5192,85 @@ internal static class DefaultPixelBlenders protected override void BlendFunction(Span destination, ReadOnlySpan background, ReadOnlySpan source, float amount) { amount = Numerics.Clamp(amount, 0, 1); - for (int i = 0; i < destination.Length; i++) + + if (Avx2.IsSupported && destination.Length >= 2) { - destination[i] = PorterDuffFunctions.ScreenDest(background[i], source[i], amount); + // Divide by 2 as 4 elements per Vector4 and 8 per Vector256 + ref Vector256 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); + ref Vector256 destinationLast = ref Unsafe.Add(ref destinationBase, (IntPtr)((uint)destination.Length / 2u)); + + ref Vector256 backgroundBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(background)); + ref Vector256 sourceBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(source)); + Vector256 opacity = Vector256.Create(amount); + + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) + { + destinationBase = PorterDuffFunctions.ScreenDest(backgroundBase, sourceBase, opacity); + destinationBase = ref Unsafe.Add(ref destinationBase, 1); + backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); + sourceBase = ref Unsafe.Add(ref sourceBase, 1); + } + + if (Numerics.Modulo2(destination.Length) != 0) + { + // Vector4 fits neatly in pairs. Any overlap has to be equal to 1. + int i = destination.Length - 1; + destination[i] = PorterDuffFunctions.ScreenDest(background[i], source[i], amount); + } + } + else + { + for (int i = 0; i < destination.Length; i++) + { + destination[i] = PorterDuffFunctions.ScreenDest(background[i], source[i], amount); + } } } /// protected override void BlendFunction(Span destination, ReadOnlySpan background, ReadOnlySpan source, ReadOnlySpan amount) { - for (int i = 0; i < destination.Length; i++) + if (Avx2.IsSupported && destination.Length >= 2) + { + // Divide by 2 as 4 elements per Vector4 and 8 per Vector256 + ref Vector256 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); + ref Vector256 destinationLast = ref Unsafe.Add(ref destinationBase, (IntPtr)((uint)destination.Length / 2u)); + + ref Vector256 backgroundBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(background)); + ref Vector256 sourceBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(source)); + ref float amountBase = ref MemoryMarshal.GetReference(amount); + + Vector256 vOne = Vector256.Create(1F); + + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) + { + // We need to create a Vector256 containing the current and next amount values + // taking up each half of the Vector256 and then clamp them. + Vector256 opacity = Vector256.Create( + Vector128.Create(amountBase), + Vector128.Create(Unsafe.Add(ref amountBase, 1))); + opacity = Avx.Min(Avx.Max(Vector256.Zero, opacity), vOne); + + destinationBase = PorterDuffFunctions.ScreenDest(backgroundBase, sourceBase, opacity); + destinationBase = ref Unsafe.Add(ref destinationBase, 1); + backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); + sourceBase = ref Unsafe.Add(ref sourceBase, 1); + amountBase = ref Unsafe.Add(ref amountBase, 2); + } + + if (Numerics.Modulo2(destination.Length) != 0) + { + // Vector4 fits neatly in pairs. Any overlap has to be equal to 1. + int i = destination.Length - 1; + destination[i] = PorterDuffFunctions.ScreenDest(background[i], source[i], Numerics.Clamp(amount[i], 0, 1F)); + } + } + else { - destination[i] = PorterDuffFunctions.ScreenDest(background[i], source[i], Numerics.Clamp(amount[i], 0, 1)); + for (int i = 0; i < destination.Length; i++) + { + destination[i] = PorterDuffFunctions.ScreenDest(background[i], source[i], Numerics.Clamp(amount[i], 0, 1F)); + } } } } @@ -1943,18 +5297,85 @@ internal static class DefaultPixelBlenders protected override void BlendFunction(Span destination, ReadOnlySpan background, ReadOnlySpan source, float amount) { amount = Numerics.Clamp(amount, 0, 1); - for (int i = 0; i < destination.Length; i++) + + if (Avx2.IsSupported && destination.Length >= 2) + { + // Divide by 2 as 4 elements per Vector4 and 8 per Vector256 + ref Vector256 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); + ref Vector256 destinationLast = ref Unsafe.Add(ref destinationBase, (IntPtr)((uint)destination.Length / 2u)); + + ref Vector256 backgroundBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(background)); + ref Vector256 sourceBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(source)); + Vector256 opacity = Vector256.Create(amount); + + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) + { + destinationBase = PorterDuffFunctions.DarkenDest(backgroundBase, sourceBase, opacity); + destinationBase = ref Unsafe.Add(ref destinationBase, 1); + backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); + sourceBase = ref Unsafe.Add(ref sourceBase, 1); + } + + if (Numerics.Modulo2(destination.Length) != 0) + { + // Vector4 fits neatly in pairs. Any overlap has to be equal to 1. + int i = destination.Length - 1; + destination[i] = PorterDuffFunctions.DarkenDest(background[i], source[i], amount); + } + } + else { - destination[i] = PorterDuffFunctions.DarkenDest(background[i], source[i], amount); + for (int i = 0; i < destination.Length; i++) + { + destination[i] = PorterDuffFunctions.DarkenDest(background[i], source[i], amount); + } } } /// protected override void BlendFunction(Span destination, ReadOnlySpan background, ReadOnlySpan source, ReadOnlySpan amount) { - for (int i = 0; i < destination.Length; i++) + if (Avx2.IsSupported && destination.Length >= 2) + { + // Divide by 2 as 4 elements per Vector4 and 8 per Vector256 + ref Vector256 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); + ref Vector256 destinationLast = ref Unsafe.Add(ref destinationBase, (IntPtr)((uint)destination.Length / 2u)); + + ref Vector256 backgroundBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(background)); + ref Vector256 sourceBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(source)); + ref float amountBase = ref MemoryMarshal.GetReference(amount); + + Vector256 vOne = Vector256.Create(1F); + + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) + { + // We need to create a Vector256 containing the current and next amount values + // taking up each half of the Vector256 and then clamp them. + Vector256 opacity = Vector256.Create( + Vector128.Create(amountBase), + Vector128.Create(Unsafe.Add(ref amountBase, 1))); + opacity = Avx.Min(Avx.Max(Vector256.Zero, opacity), vOne); + + destinationBase = PorterDuffFunctions.DarkenDest(backgroundBase, sourceBase, opacity); + destinationBase = ref Unsafe.Add(ref destinationBase, 1); + backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); + sourceBase = ref Unsafe.Add(ref sourceBase, 1); + amountBase = ref Unsafe.Add(ref amountBase, 2); + } + + if (Numerics.Modulo2(destination.Length) != 0) + { + // Vector4 fits neatly in pairs. Any overlap has to be equal to 1. + int i = destination.Length - 1; + destination[i] = PorterDuffFunctions.DarkenDest(background[i], source[i], Numerics.Clamp(amount[i], 0, 1F)); + } + } + else { - destination[i] = PorterDuffFunctions.DarkenDest(background[i], source[i], Numerics.Clamp(amount[i], 0, 1)); + for (int i = 0; i < destination.Length; i++) + { + destination[i] = PorterDuffFunctions.DarkenDest(background[i], source[i], Numerics.Clamp(amount[i], 0, 1F)); + } } } } @@ -1981,18 +5402,85 @@ internal static class DefaultPixelBlenders protected override void BlendFunction(Span destination, ReadOnlySpan background, ReadOnlySpan source, float amount) { amount = Numerics.Clamp(amount, 0, 1); - for (int i = 0; i < destination.Length; i++) + + if (Avx2.IsSupported && destination.Length >= 2) + { + // Divide by 2 as 4 elements per Vector4 and 8 per Vector256 + ref Vector256 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); + ref Vector256 destinationLast = ref Unsafe.Add(ref destinationBase, (IntPtr)((uint)destination.Length / 2u)); + + ref Vector256 backgroundBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(background)); + ref Vector256 sourceBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(source)); + Vector256 opacity = Vector256.Create(amount); + + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) + { + destinationBase = PorterDuffFunctions.LightenDest(backgroundBase, sourceBase, opacity); + destinationBase = ref Unsafe.Add(ref destinationBase, 1); + backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); + sourceBase = ref Unsafe.Add(ref sourceBase, 1); + } + + if (Numerics.Modulo2(destination.Length) != 0) + { + // Vector4 fits neatly in pairs. Any overlap has to be equal to 1. + int i = destination.Length - 1; + destination[i] = PorterDuffFunctions.LightenDest(background[i], source[i], amount); + } + } + else { - destination[i] = PorterDuffFunctions.LightenDest(background[i], source[i], amount); + for (int i = 0; i < destination.Length; i++) + { + destination[i] = PorterDuffFunctions.LightenDest(background[i], source[i], amount); + } } } /// protected override void BlendFunction(Span destination, ReadOnlySpan background, ReadOnlySpan source, ReadOnlySpan amount) { - for (int i = 0; i < destination.Length; i++) + if (Avx2.IsSupported && destination.Length >= 2) + { + // Divide by 2 as 4 elements per Vector4 and 8 per Vector256 + ref Vector256 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); + ref Vector256 destinationLast = ref Unsafe.Add(ref destinationBase, (IntPtr)((uint)destination.Length / 2u)); + + ref Vector256 backgroundBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(background)); + ref Vector256 sourceBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(source)); + ref float amountBase = ref MemoryMarshal.GetReference(amount); + + Vector256 vOne = Vector256.Create(1F); + + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) + { + // We need to create a Vector256 containing the current and next amount values + // taking up each half of the Vector256 and then clamp them. + Vector256 opacity = Vector256.Create( + Vector128.Create(amountBase), + Vector128.Create(Unsafe.Add(ref amountBase, 1))); + opacity = Avx.Min(Avx.Max(Vector256.Zero, opacity), vOne); + + destinationBase = PorterDuffFunctions.LightenDest(backgroundBase, sourceBase, opacity); + destinationBase = ref Unsafe.Add(ref destinationBase, 1); + backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); + sourceBase = ref Unsafe.Add(ref sourceBase, 1); + amountBase = ref Unsafe.Add(ref amountBase, 2); + } + + if (Numerics.Modulo2(destination.Length) != 0) + { + // Vector4 fits neatly in pairs. Any overlap has to be equal to 1. + int i = destination.Length - 1; + destination[i] = PorterDuffFunctions.LightenDest(background[i], source[i], Numerics.Clamp(amount[i], 0, 1F)); + } + } + else { - destination[i] = PorterDuffFunctions.LightenDest(background[i], source[i], Numerics.Clamp(amount[i], 0, 1)); + for (int i = 0; i < destination.Length; i++) + { + destination[i] = PorterDuffFunctions.LightenDest(background[i], source[i], Numerics.Clamp(amount[i], 0, 1F)); + } } } } @@ -2019,18 +5507,85 @@ internal static class DefaultPixelBlenders protected override void BlendFunction(Span destination, ReadOnlySpan background, ReadOnlySpan source, float amount) { amount = Numerics.Clamp(amount, 0, 1); - for (int i = 0; i < destination.Length; i++) + + if (Avx2.IsSupported && destination.Length >= 2) + { + // Divide by 2 as 4 elements per Vector4 and 8 per Vector256 + ref Vector256 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); + ref Vector256 destinationLast = ref Unsafe.Add(ref destinationBase, (IntPtr)((uint)destination.Length / 2u)); + + ref Vector256 backgroundBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(background)); + ref Vector256 sourceBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(source)); + Vector256 opacity = Vector256.Create(amount); + + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) + { + destinationBase = PorterDuffFunctions.OverlayDest(backgroundBase, sourceBase, opacity); + destinationBase = ref Unsafe.Add(ref destinationBase, 1); + backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); + sourceBase = ref Unsafe.Add(ref sourceBase, 1); + } + + if (Numerics.Modulo2(destination.Length) != 0) + { + // Vector4 fits neatly in pairs. Any overlap has to be equal to 1. + int i = destination.Length - 1; + destination[i] = PorterDuffFunctions.OverlayDest(background[i], source[i], amount); + } + } + else { - destination[i] = PorterDuffFunctions.OverlayDest(background[i], source[i], amount); + for (int i = 0; i < destination.Length; i++) + { + destination[i] = PorterDuffFunctions.OverlayDest(background[i], source[i], amount); + } } } /// protected override void BlendFunction(Span destination, ReadOnlySpan background, ReadOnlySpan source, ReadOnlySpan amount) { - for (int i = 0; i < destination.Length; i++) + if (Avx2.IsSupported && destination.Length >= 2) + { + // Divide by 2 as 4 elements per Vector4 and 8 per Vector256 + ref Vector256 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); + ref Vector256 destinationLast = ref Unsafe.Add(ref destinationBase, (IntPtr)((uint)destination.Length / 2u)); + + ref Vector256 backgroundBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(background)); + ref Vector256 sourceBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(source)); + ref float amountBase = ref MemoryMarshal.GetReference(amount); + + Vector256 vOne = Vector256.Create(1F); + + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) + { + // We need to create a Vector256 containing the current and next amount values + // taking up each half of the Vector256 and then clamp them. + Vector256 opacity = Vector256.Create( + Vector128.Create(amountBase), + Vector128.Create(Unsafe.Add(ref amountBase, 1))); + opacity = Avx.Min(Avx.Max(Vector256.Zero, opacity), vOne); + + destinationBase = PorterDuffFunctions.OverlayDest(backgroundBase, sourceBase, opacity); + destinationBase = ref Unsafe.Add(ref destinationBase, 1); + backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); + sourceBase = ref Unsafe.Add(ref sourceBase, 1); + amountBase = ref Unsafe.Add(ref amountBase, 2); + } + + if (Numerics.Modulo2(destination.Length) != 0) + { + // Vector4 fits neatly in pairs. Any overlap has to be equal to 1. + int i = destination.Length - 1; + destination[i] = PorterDuffFunctions.OverlayDest(background[i], source[i], Numerics.Clamp(amount[i], 0, 1F)); + } + } + else { - destination[i] = PorterDuffFunctions.OverlayDest(background[i], source[i], Numerics.Clamp(amount[i], 0, 1)); + for (int i = 0; i < destination.Length; i++) + { + destination[i] = PorterDuffFunctions.OverlayDest(background[i], source[i], Numerics.Clamp(amount[i], 0, 1F)); + } } } } @@ -2057,18 +5612,85 @@ internal static class DefaultPixelBlenders protected override void BlendFunction(Span destination, ReadOnlySpan background, ReadOnlySpan source, float amount) { amount = Numerics.Clamp(amount, 0, 1); - for (int i = 0; i < destination.Length; i++) + + if (Avx2.IsSupported && destination.Length >= 2) + { + // Divide by 2 as 4 elements per Vector4 and 8 per Vector256 + ref Vector256 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); + ref Vector256 destinationLast = ref Unsafe.Add(ref destinationBase, (IntPtr)((uint)destination.Length / 2u)); + + ref Vector256 backgroundBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(background)); + ref Vector256 sourceBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(source)); + Vector256 opacity = Vector256.Create(amount); + + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) + { + destinationBase = PorterDuffFunctions.HardLightDest(backgroundBase, sourceBase, opacity); + destinationBase = ref Unsafe.Add(ref destinationBase, 1); + backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); + sourceBase = ref Unsafe.Add(ref sourceBase, 1); + } + + if (Numerics.Modulo2(destination.Length) != 0) + { + // Vector4 fits neatly in pairs. Any overlap has to be equal to 1. + int i = destination.Length - 1; + destination[i] = PorterDuffFunctions.HardLightDest(background[i], source[i], amount); + } + } + else { - destination[i] = PorterDuffFunctions.HardLightDest(background[i], source[i], amount); + for (int i = 0; i < destination.Length; i++) + { + destination[i] = PorterDuffFunctions.HardLightDest(background[i], source[i], amount); + } } } /// protected override void BlendFunction(Span destination, ReadOnlySpan background, ReadOnlySpan source, ReadOnlySpan amount) { - for (int i = 0; i < destination.Length; i++) + if (Avx2.IsSupported && destination.Length >= 2) + { + // Divide by 2 as 4 elements per Vector4 and 8 per Vector256 + ref Vector256 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); + ref Vector256 destinationLast = ref Unsafe.Add(ref destinationBase, (IntPtr)((uint)destination.Length / 2u)); + + ref Vector256 backgroundBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(background)); + ref Vector256 sourceBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(source)); + ref float amountBase = ref MemoryMarshal.GetReference(amount); + + Vector256 vOne = Vector256.Create(1F); + + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) + { + // We need to create a Vector256 containing the current and next amount values + // taking up each half of the Vector256 and then clamp them. + Vector256 opacity = Vector256.Create( + Vector128.Create(amountBase), + Vector128.Create(Unsafe.Add(ref amountBase, 1))); + opacity = Avx.Min(Avx.Max(Vector256.Zero, opacity), vOne); + + destinationBase = PorterDuffFunctions.HardLightDest(backgroundBase, sourceBase, opacity); + destinationBase = ref Unsafe.Add(ref destinationBase, 1); + backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); + sourceBase = ref Unsafe.Add(ref sourceBase, 1); + amountBase = ref Unsafe.Add(ref amountBase, 2); + } + + if (Numerics.Modulo2(destination.Length) != 0) + { + // Vector4 fits neatly in pairs. Any overlap has to be equal to 1. + int i = destination.Length - 1; + destination[i] = PorterDuffFunctions.HardLightDest(background[i], source[i], Numerics.Clamp(amount[i], 0, 1F)); + } + } + else { - destination[i] = PorterDuffFunctions.HardLightDest(background[i], source[i], Numerics.Clamp(amount[i], 0, 1)); + for (int i = 0; i < destination.Length; i++) + { + destination[i] = PorterDuffFunctions.HardLightDest(background[i], source[i], Numerics.Clamp(amount[i], 0, 1F)); + } } } } @@ -2095,18 +5717,85 @@ internal static class DefaultPixelBlenders protected override void BlendFunction(Span destination, ReadOnlySpan background, ReadOnlySpan source, float amount) { amount = Numerics.Clamp(amount, 0, 1); - for (int i = 0; i < destination.Length; i++) + + if (Avx2.IsSupported && destination.Length >= 2) + { + // Divide by 2 as 4 elements per Vector4 and 8 per Vector256 + ref Vector256 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); + ref Vector256 destinationLast = ref Unsafe.Add(ref destinationBase, (IntPtr)((uint)destination.Length / 2u)); + + ref Vector256 backgroundBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(background)); + ref Vector256 sourceBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(source)); + Vector256 opacity = Vector256.Create(amount); + + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) + { + destinationBase = PorterDuffFunctions.NormalDestAtop(backgroundBase, sourceBase, opacity); + destinationBase = ref Unsafe.Add(ref destinationBase, 1); + backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); + sourceBase = ref Unsafe.Add(ref sourceBase, 1); + } + + if (Numerics.Modulo2(destination.Length) != 0) + { + // Vector4 fits neatly in pairs. Any overlap has to be equal to 1. + int i = destination.Length - 1; + destination[i] = PorterDuffFunctions.NormalDestAtop(background[i], source[i], amount); + } + } + else { - destination[i] = PorterDuffFunctions.NormalDestAtop(background[i], source[i], amount); + for (int i = 0; i < destination.Length; i++) + { + destination[i] = PorterDuffFunctions.NormalDestAtop(background[i], source[i], amount); + } } } /// protected override void BlendFunction(Span destination, ReadOnlySpan background, ReadOnlySpan source, ReadOnlySpan amount) { - for (int i = 0; i < destination.Length; i++) + if (Avx2.IsSupported && destination.Length >= 2) + { + // Divide by 2 as 4 elements per Vector4 and 8 per Vector256 + ref Vector256 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); + ref Vector256 destinationLast = ref Unsafe.Add(ref destinationBase, (IntPtr)((uint)destination.Length / 2u)); + + ref Vector256 backgroundBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(background)); + ref Vector256 sourceBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(source)); + ref float amountBase = ref MemoryMarshal.GetReference(amount); + + Vector256 vOne = Vector256.Create(1F); + + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) + { + // We need to create a Vector256 containing the current and next amount values + // taking up each half of the Vector256 and then clamp them. + Vector256 opacity = Vector256.Create( + Vector128.Create(amountBase), + Vector128.Create(Unsafe.Add(ref amountBase, 1))); + opacity = Avx.Min(Avx.Max(Vector256.Zero, opacity), vOne); + + destinationBase = PorterDuffFunctions.NormalDestAtop(backgroundBase, sourceBase, opacity); + destinationBase = ref Unsafe.Add(ref destinationBase, 1); + backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); + sourceBase = ref Unsafe.Add(ref sourceBase, 1); + amountBase = ref Unsafe.Add(ref amountBase, 2); + } + + if (Numerics.Modulo2(destination.Length) != 0) + { + // Vector4 fits neatly in pairs. Any overlap has to be equal to 1. + int i = destination.Length - 1; + destination[i] = PorterDuffFunctions.NormalDestAtop(background[i], source[i], Numerics.Clamp(amount[i], 0, 1F)); + } + } + else { - destination[i] = PorterDuffFunctions.NormalDestAtop(background[i], source[i], Numerics.Clamp(amount[i], 0, 1)); + for (int i = 0; i < destination.Length; i++) + { + destination[i] = PorterDuffFunctions.NormalDestAtop(background[i], source[i], Numerics.Clamp(amount[i], 0, 1F)); + } } } } @@ -2133,18 +5822,85 @@ internal static class DefaultPixelBlenders protected override void BlendFunction(Span destination, ReadOnlySpan background, ReadOnlySpan source, float amount) { amount = Numerics.Clamp(amount, 0, 1); - for (int i = 0; i < destination.Length; i++) + + if (Avx2.IsSupported && destination.Length >= 2) + { + // Divide by 2 as 4 elements per Vector4 and 8 per Vector256 + ref Vector256 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); + ref Vector256 destinationLast = ref Unsafe.Add(ref destinationBase, (IntPtr)((uint)destination.Length / 2u)); + + ref Vector256 backgroundBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(background)); + ref Vector256 sourceBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(source)); + Vector256 opacity = Vector256.Create(amount); + + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) + { + destinationBase = PorterDuffFunctions.MultiplyDestAtop(backgroundBase, sourceBase, opacity); + destinationBase = ref Unsafe.Add(ref destinationBase, 1); + backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); + sourceBase = ref Unsafe.Add(ref sourceBase, 1); + } + + if (Numerics.Modulo2(destination.Length) != 0) + { + // Vector4 fits neatly in pairs. Any overlap has to be equal to 1. + int i = destination.Length - 1; + destination[i] = PorterDuffFunctions.MultiplyDestAtop(background[i], source[i], amount); + } + } + else { - destination[i] = PorterDuffFunctions.MultiplyDestAtop(background[i], source[i], amount); + for (int i = 0; i < destination.Length; i++) + { + destination[i] = PorterDuffFunctions.MultiplyDestAtop(background[i], source[i], amount); + } } } /// protected override void BlendFunction(Span destination, ReadOnlySpan background, ReadOnlySpan source, ReadOnlySpan amount) { - for (int i = 0; i < destination.Length; i++) + if (Avx2.IsSupported && destination.Length >= 2) + { + // Divide by 2 as 4 elements per Vector4 and 8 per Vector256 + ref Vector256 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); + ref Vector256 destinationLast = ref Unsafe.Add(ref destinationBase, (IntPtr)((uint)destination.Length / 2u)); + + ref Vector256 backgroundBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(background)); + ref Vector256 sourceBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(source)); + ref float amountBase = ref MemoryMarshal.GetReference(amount); + + Vector256 vOne = Vector256.Create(1F); + + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) + { + // We need to create a Vector256 containing the current and next amount values + // taking up each half of the Vector256 and then clamp them. + Vector256 opacity = Vector256.Create( + Vector128.Create(amountBase), + Vector128.Create(Unsafe.Add(ref amountBase, 1))); + opacity = Avx.Min(Avx.Max(Vector256.Zero, opacity), vOne); + + destinationBase = PorterDuffFunctions.MultiplyDestAtop(backgroundBase, sourceBase, opacity); + destinationBase = ref Unsafe.Add(ref destinationBase, 1); + backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); + sourceBase = ref Unsafe.Add(ref sourceBase, 1); + amountBase = ref Unsafe.Add(ref amountBase, 2); + } + + if (Numerics.Modulo2(destination.Length) != 0) + { + // Vector4 fits neatly in pairs. Any overlap has to be equal to 1. + int i = destination.Length - 1; + destination[i] = PorterDuffFunctions.MultiplyDestAtop(background[i], source[i], Numerics.Clamp(amount[i], 0, 1F)); + } + } + else { - destination[i] = PorterDuffFunctions.MultiplyDestAtop(background[i], source[i], Numerics.Clamp(amount[i], 0, 1)); + for (int i = 0; i < destination.Length; i++) + { + destination[i] = PorterDuffFunctions.MultiplyDestAtop(background[i], source[i], Numerics.Clamp(amount[i], 0, 1F)); + } } } } @@ -2171,18 +5927,85 @@ internal static class DefaultPixelBlenders protected override void BlendFunction(Span destination, ReadOnlySpan background, ReadOnlySpan source, float amount) { amount = Numerics.Clamp(amount, 0, 1); - for (int i = 0; i < destination.Length; i++) + + if (Avx2.IsSupported && destination.Length >= 2) + { + // Divide by 2 as 4 elements per Vector4 and 8 per Vector256 + ref Vector256 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); + ref Vector256 destinationLast = ref Unsafe.Add(ref destinationBase, (IntPtr)((uint)destination.Length / 2u)); + + ref Vector256 backgroundBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(background)); + ref Vector256 sourceBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(source)); + Vector256 opacity = Vector256.Create(amount); + + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) + { + destinationBase = PorterDuffFunctions.AddDestAtop(backgroundBase, sourceBase, opacity); + destinationBase = ref Unsafe.Add(ref destinationBase, 1); + backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); + sourceBase = ref Unsafe.Add(ref sourceBase, 1); + } + + if (Numerics.Modulo2(destination.Length) != 0) + { + // Vector4 fits neatly in pairs. Any overlap has to be equal to 1. + int i = destination.Length - 1; + destination[i] = PorterDuffFunctions.AddDestAtop(background[i], source[i], amount); + } + } + else { - destination[i] = PorterDuffFunctions.AddDestAtop(background[i], source[i], amount); + for (int i = 0; i < destination.Length; i++) + { + destination[i] = PorterDuffFunctions.AddDestAtop(background[i], source[i], amount); + } } } /// protected override void BlendFunction(Span destination, ReadOnlySpan background, ReadOnlySpan source, ReadOnlySpan amount) { - for (int i = 0; i < destination.Length; i++) + if (Avx2.IsSupported && destination.Length >= 2) { - destination[i] = PorterDuffFunctions.AddDestAtop(background[i], source[i], Numerics.Clamp(amount[i], 0, 1)); + // Divide by 2 as 4 elements per Vector4 and 8 per Vector256 + ref Vector256 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); + ref Vector256 destinationLast = ref Unsafe.Add(ref destinationBase, (IntPtr)((uint)destination.Length / 2u)); + + ref Vector256 backgroundBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(background)); + ref Vector256 sourceBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(source)); + ref float amountBase = ref MemoryMarshal.GetReference(amount); + + Vector256 vOne = Vector256.Create(1F); + + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) + { + // We need to create a Vector256 containing the current and next amount values + // taking up each half of the Vector256 and then clamp them. + Vector256 opacity = Vector256.Create( + Vector128.Create(amountBase), + Vector128.Create(Unsafe.Add(ref amountBase, 1))); + opacity = Avx.Min(Avx.Max(Vector256.Zero, opacity), vOne); + + destinationBase = PorterDuffFunctions.AddDestAtop(backgroundBase, sourceBase, opacity); + destinationBase = ref Unsafe.Add(ref destinationBase, 1); + backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); + sourceBase = ref Unsafe.Add(ref sourceBase, 1); + amountBase = ref Unsafe.Add(ref amountBase, 2); + } + + if (Numerics.Modulo2(destination.Length) != 0) + { + // Vector4 fits neatly in pairs. Any overlap has to be equal to 1. + int i = destination.Length - 1; + destination[i] = PorterDuffFunctions.AddDestAtop(background[i], source[i], Numerics.Clamp(amount[i], 0, 1F)); + } + } + else + { + for (int i = 0; i < destination.Length; i++) + { + destination[i] = PorterDuffFunctions.AddDestAtop(background[i], source[i], Numerics.Clamp(amount[i], 0, 1F)); + } } } } @@ -2209,18 +6032,85 @@ internal static class DefaultPixelBlenders protected override void BlendFunction(Span destination, ReadOnlySpan background, ReadOnlySpan source, float amount) { amount = Numerics.Clamp(amount, 0, 1); - for (int i = 0; i < destination.Length; i++) + + if (Avx2.IsSupported && destination.Length >= 2) { - destination[i] = PorterDuffFunctions.SubtractDestAtop(background[i], source[i], amount); + // Divide by 2 as 4 elements per Vector4 and 8 per Vector256 + ref Vector256 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); + ref Vector256 destinationLast = ref Unsafe.Add(ref destinationBase, (IntPtr)((uint)destination.Length / 2u)); + + ref Vector256 backgroundBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(background)); + ref Vector256 sourceBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(source)); + Vector256 opacity = Vector256.Create(amount); + + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) + { + destinationBase = PorterDuffFunctions.SubtractDestAtop(backgroundBase, sourceBase, opacity); + destinationBase = ref Unsafe.Add(ref destinationBase, 1); + backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); + sourceBase = ref Unsafe.Add(ref sourceBase, 1); + } + + if (Numerics.Modulo2(destination.Length) != 0) + { + // Vector4 fits neatly in pairs. Any overlap has to be equal to 1. + int i = destination.Length - 1; + destination[i] = PorterDuffFunctions.SubtractDestAtop(background[i], source[i], amount); + } + } + else + { + for (int i = 0; i < destination.Length; i++) + { + destination[i] = PorterDuffFunctions.SubtractDestAtop(background[i], source[i], amount); + } } } /// protected override void BlendFunction(Span destination, ReadOnlySpan background, ReadOnlySpan source, ReadOnlySpan amount) { - for (int i = 0; i < destination.Length; i++) + if (Avx2.IsSupported && destination.Length >= 2) + { + // Divide by 2 as 4 elements per Vector4 and 8 per Vector256 + ref Vector256 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); + ref Vector256 destinationLast = ref Unsafe.Add(ref destinationBase, (IntPtr)((uint)destination.Length / 2u)); + + ref Vector256 backgroundBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(background)); + ref Vector256 sourceBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(source)); + ref float amountBase = ref MemoryMarshal.GetReference(amount); + + Vector256 vOne = Vector256.Create(1F); + + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) + { + // We need to create a Vector256 containing the current and next amount values + // taking up each half of the Vector256 and then clamp them. + Vector256 opacity = Vector256.Create( + Vector128.Create(amountBase), + Vector128.Create(Unsafe.Add(ref amountBase, 1))); + opacity = Avx.Min(Avx.Max(Vector256.Zero, opacity), vOne); + + destinationBase = PorterDuffFunctions.SubtractDestAtop(backgroundBase, sourceBase, opacity); + destinationBase = ref Unsafe.Add(ref destinationBase, 1); + backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); + sourceBase = ref Unsafe.Add(ref sourceBase, 1); + amountBase = ref Unsafe.Add(ref amountBase, 2); + } + + if (Numerics.Modulo2(destination.Length) != 0) + { + // Vector4 fits neatly in pairs. Any overlap has to be equal to 1. + int i = destination.Length - 1; + destination[i] = PorterDuffFunctions.SubtractDestAtop(background[i], source[i], Numerics.Clamp(amount[i], 0, 1F)); + } + } + else { - destination[i] = PorterDuffFunctions.SubtractDestAtop(background[i], source[i], Numerics.Clamp(amount[i], 0, 1)); + for (int i = 0; i < destination.Length; i++) + { + destination[i] = PorterDuffFunctions.SubtractDestAtop(background[i], source[i], Numerics.Clamp(amount[i], 0, 1F)); + } } } } @@ -2247,18 +6137,85 @@ internal static class DefaultPixelBlenders protected override void BlendFunction(Span destination, ReadOnlySpan background, ReadOnlySpan source, float amount) { amount = Numerics.Clamp(amount, 0, 1); - for (int i = 0; i < destination.Length; i++) + + if (Avx2.IsSupported && destination.Length >= 2) + { + // Divide by 2 as 4 elements per Vector4 and 8 per Vector256 + ref Vector256 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); + ref Vector256 destinationLast = ref Unsafe.Add(ref destinationBase, (IntPtr)((uint)destination.Length / 2u)); + + ref Vector256 backgroundBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(background)); + ref Vector256 sourceBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(source)); + Vector256 opacity = Vector256.Create(amount); + + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) + { + destinationBase = PorterDuffFunctions.ScreenDestAtop(backgroundBase, sourceBase, opacity); + destinationBase = ref Unsafe.Add(ref destinationBase, 1); + backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); + sourceBase = ref Unsafe.Add(ref sourceBase, 1); + } + + if (Numerics.Modulo2(destination.Length) != 0) + { + // Vector4 fits neatly in pairs. Any overlap has to be equal to 1. + int i = destination.Length - 1; + destination[i] = PorterDuffFunctions.ScreenDestAtop(background[i], source[i], amount); + } + } + else { - destination[i] = PorterDuffFunctions.ScreenDestAtop(background[i], source[i], amount); + for (int i = 0; i < destination.Length; i++) + { + destination[i] = PorterDuffFunctions.ScreenDestAtop(background[i], source[i], amount); + } } } /// protected override void BlendFunction(Span destination, ReadOnlySpan background, ReadOnlySpan source, ReadOnlySpan amount) { - for (int i = 0; i < destination.Length; i++) + if (Avx2.IsSupported && destination.Length >= 2) + { + // Divide by 2 as 4 elements per Vector4 and 8 per Vector256 + ref Vector256 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); + ref Vector256 destinationLast = ref Unsafe.Add(ref destinationBase, (IntPtr)((uint)destination.Length / 2u)); + + ref Vector256 backgroundBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(background)); + ref Vector256 sourceBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(source)); + ref float amountBase = ref MemoryMarshal.GetReference(amount); + + Vector256 vOne = Vector256.Create(1F); + + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) + { + // We need to create a Vector256 containing the current and next amount values + // taking up each half of the Vector256 and then clamp them. + Vector256 opacity = Vector256.Create( + Vector128.Create(amountBase), + Vector128.Create(Unsafe.Add(ref amountBase, 1))); + opacity = Avx.Min(Avx.Max(Vector256.Zero, opacity), vOne); + + destinationBase = PorterDuffFunctions.ScreenDestAtop(backgroundBase, sourceBase, opacity); + destinationBase = ref Unsafe.Add(ref destinationBase, 1); + backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); + sourceBase = ref Unsafe.Add(ref sourceBase, 1); + amountBase = ref Unsafe.Add(ref amountBase, 2); + } + + if (Numerics.Modulo2(destination.Length) != 0) + { + // Vector4 fits neatly in pairs. Any overlap has to be equal to 1. + int i = destination.Length - 1; + destination[i] = PorterDuffFunctions.ScreenDestAtop(background[i], source[i], Numerics.Clamp(amount[i], 0, 1F)); + } + } + else { - destination[i] = PorterDuffFunctions.ScreenDestAtop(background[i], source[i], Numerics.Clamp(amount[i], 0, 1)); + for (int i = 0; i < destination.Length; i++) + { + destination[i] = PorterDuffFunctions.ScreenDestAtop(background[i], source[i], Numerics.Clamp(amount[i], 0, 1F)); + } } } } @@ -2285,18 +6242,85 @@ internal static class DefaultPixelBlenders protected override void BlendFunction(Span destination, ReadOnlySpan background, ReadOnlySpan source, float amount) { amount = Numerics.Clamp(amount, 0, 1); - for (int i = 0; i < destination.Length; i++) + + if (Avx2.IsSupported && destination.Length >= 2) + { + // Divide by 2 as 4 elements per Vector4 and 8 per Vector256 + ref Vector256 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); + ref Vector256 destinationLast = ref Unsafe.Add(ref destinationBase, (IntPtr)((uint)destination.Length / 2u)); + + ref Vector256 backgroundBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(background)); + ref Vector256 sourceBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(source)); + Vector256 opacity = Vector256.Create(amount); + + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) + { + destinationBase = PorterDuffFunctions.DarkenDestAtop(backgroundBase, sourceBase, opacity); + destinationBase = ref Unsafe.Add(ref destinationBase, 1); + backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); + sourceBase = ref Unsafe.Add(ref sourceBase, 1); + } + + if (Numerics.Modulo2(destination.Length) != 0) + { + // Vector4 fits neatly in pairs. Any overlap has to be equal to 1. + int i = destination.Length - 1; + destination[i] = PorterDuffFunctions.DarkenDestAtop(background[i], source[i], amount); + } + } + else { - destination[i] = PorterDuffFunctions.DarkenDestAtop(background[i], source[i], amount); + for (int i = 0; i < destination.Length; i++) + { + destination[i] = PorterDuffFunctions.DarkenDestAtop(background[i], source[i], amount); + } } } /// protected override void BlendFunction(Span destination, ReadOnlySpan background, ReadOnlySpan source, ReadOnlySpan amount) { - for (int i = 0; i < destination.Length; i++) + if (Avx2.IsSupported && destination.Length >= 2) + { + // Divide by 2 as 4 elements per Vector4 and 8 per Vector256 + ref Vector256 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); + ref Vector256 destinationLast = ref Unsafe.Add(ref destinationBase, (IntPtr)((uint)destination.Length / 2u)); + + ref Vector256 backgroundBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(background)); + ref Vector256 sourceBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(source)); + ref float amountBase = ref MemoryMarshal.GetReference(amount); + + Vector256 vOne = Vector256.Create(1F); + + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) + { + // We need to create a Vector256 containing the current and next amount values + // taking up each half of the Vector256 and then clamp them. + Vector256 opacity = Vector256.Create( + Vector128.Create(amountBase), + Vector128.Create(Unsafe.Add(ref amountBase, 1))); + opacity = Avx.Min(Avx.Max(Vector256.Zero, opacity), vOne); + + destinationBase = PorterDuffFunctions.DarkenDestAtop(backgroundBase, sourceBase, opacity); + destinationBase = ref Unsafe.Add(ref destinationBase, 1); + backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); + sourceBase = ref Unsafe.Add(ref sourceBase, 1); + amountBase = ref Unsafe.Add(ref amountBase, 2); + } + + if (Numerics.Modulo2(destination.Length) != 0) + { + // Vector4 fits neatly in pairs. Any overlap has to be equal to 1. + int i = destination.Length - 1; + destination[i] = PorterDuffFunctions.DarkenDestAtop(background[i], source[i], Numerics.Clamp(amount[i], 0, 1F)); + } + } + else { - destination[i] = PorterDuffFunctions.DarkenDestAtop(background[i], source[i], Numerics.Clamp(amount[i], 0, 1)); + for (int i = 0; i < destination.Length; i++) + { + destination[i] = PorterDuffFunctions.DarkenDestAtop(background[i], source[i], Numerics.Clamp(amount[i], 0, 1F)); + } } } } @@ -2323,18 +6347,85 @@ internal static class DefaultPixelBlenders protected override void BlendFunction(Span destination, ReadOnlySpan background, ReadOnlySpan source, float amount) { amount = Numerics.Clamp(amount, 0, 1); - for (int i = 0; i < destination.Length; i++) + + if (Avx2.IsSupported && destination.Length >= 2) + { + // Divide by 2 as 4 elements per Vector4 and 8 per Vector256 + ref Vector256 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); + ref Vector256 destinationLast = ref Unsafe.Add(ref destinationBase, (IntPtr)((uint)destination.Length / 2u)); + + ref Vector256 backgroundBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(background)); + ref Vector256 sourceBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(source)); + Vector256 opacity = Vector256.Create(amount); + + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) + { + destinationBase = PorterDuffFunctions.LightenDestAtop(backgroundBase, sourceBase, opacity); + destinationBase = ref Unsafe.Add(ref destinationBase, 1); + backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); + sourceBase = ref Unsafe.Add(ref sourceBase, 1); + } + + if (Numerics.Modulo2(destination.Length) != 0) + { + // Vector4 fits neatly in pairs. Any overlap has to be equal to 1. + int i = destination.Length - 1; + destination[i] = PorterDuffFunctions.LightenDestAtop(background[i], source[i], amount); + } + } + else { - destination[i] = PorterDuffFunctions.LightenDestAtop(background[i], source[i], amount); + for (int i = 0; i < destination.Length; i++) + { + destination[i] = PorterDuffFunctions.LightenDestAtop(background[i], source[i], amount); + } } } /// protected override void BlendFunction(Span destination, ReadOnlySpan background, ReadOnlySpan source, ReadOnlySpan amount) { - for (int i = 0; i < destination.Length; i++) + if (Avx2.IsSupported && destination.Length >= 2) + { + // Divide by 2 as 4 elements per Vector4 and 8 per Vector256 + ref Vector256 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); + ref Vector256 destinationLast = ref Unsafe.Add(ref destinationBase, (IntPtr)((uint)destination.Length / 2u)); + + ref Vector256 backgroundBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(background)); + ref Vector256 sourceBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(source)); + ref float amountBase = ref MemoryMarshal.GetReference(amount); + + Vector256 vOne = Vector256.Create(1F); + + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) + { + // We need to create a Vector256 containing the current and next amount values + // taking up each half of the Vector256 and then clamp them. + Vector256 opacity = Vector256.Create( + Vector128.Create(amountBase), + Vector128.Create(Unsafe.Add(ref amountBase, 1))); + opacity = Avx.Min(Avx.Max(Vector256.Zero, opacity), vOne); + + destinationBase = PorterDuffFunctions.LightenDestAtop(backgroundBase, sourceBase, opacity); + destinationBase = ref Unsafe.Add(ref destinationBase, 1); + backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); + sourceBase = ref Unsafe.Add(ref sourceBase, 1); + amountBase = ref Unsafe.Add(ref amountBase, 2); + } + + if (Numerics.Modulo2(destination.Length) != 0) + { + // Vector4 fits neatly in pairs. Any overlap has to be equal to 1. + int i = destination.Length - 1; + destination[i] = PorterDuffFunctions.LightenDestAtop(background[i], source[i], Numerics.Clamp(amount[i], 0, 1F)); + } + } + else { - destination[i] = PorterDuffFunctions.LightenDestAtop(background[i], source[i], Numerics.Clamp(amount[i], 0, 1)); + for (int i = 0; i < destination.Length; i++) + { + destination[i] = PorterDuffFunctions.LightenDestAtop(background[i], source[i], Numerics.Clamp(amount[i], 0, 1F)); + } } } } @@ -2361,18 +6452,85 @@ internal static class DefaultPixelBlenders protected override void BlendFunction(Span destination, ReadOnlySpan background, ReadOnlySpan source, float amount) { amount = Numerics.Clamp(amount, 0, 1); - for (int i = 0; i < destination.Length; i++) + + if (Avx2.IsSupported && destination.Length >= 2) + { + // Divide by 2 as 4 elements per Vector4 and 8 per Vector256 + ref Vector256 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); + ref Vector256 destinationLast = ref Unsafe.Add(ref destinationBase, (IntPtr)((uint)destination.Length / 2u)); + + ref Vector256 backgroundBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(background)); + ref Vector256 sourceBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(source)); + Vector256 opacity = Vector256.Create(amount); + + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) + { + destinationBase = PorterDuffFunctions.OverlayDestAtop(backgroundBase, sourceBase, opacity); + destinationBase = ref Unsafe.Add(ref destinationBase, 1); + backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); + sourceBase = ref Unsafe.Add(ref sourceBase, 1); + } + + if (Numerics.Modulo2(destination.Length) != 0) + { + // Vector4 fits neatly in pairs. Any overlap has to be equal to 1. + int i = destination.Length - 1; + destination[i] = PorterDuffFunctions.OverlayDestAtop(background[i], source[i], amount); + } + } + else { - destination[i] = PorterDuffFunctions.OverlayDestAtop(background[i], source[i], amount); + for (int i = 0; i < destination.Length; i++) + { + destination[i] = PorterDuffFunctions.OverlayDestAtop(background[i], source[i], amount); + } } } /// protected override void BlendFunction(Span destination, ReadOnlySpan background, ReadOnlySpan source, ReadOnlySpan amount) { - for (int i = 0; i < destination.Length; i++) + if (Avx2.IsSupported && destination.Length >= 2) + { + // Divide by 2 as 4 elements per Vector4 and 8 per Vector256 + ref Vector256 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); + ref Vector256 destinationLast = ref Unsafe.Add(ref destinationBase, (IntPtr)((uint)destination.Length / 2u)); + + ref Vector256 backgroundBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(background)); + ref Vector256 sourceBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(source)); + ref float amountBase = ref MemoryMarshal.GetReference(amount); + + Vector256 vOne = Vector256.Create(1F); + + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) + { + // We need to create a Vector256 containing the current and next amount values + // taking up each half of the Vector256 and then clamp them. + Vector256 opacity = Vector256.Create( + Vector128.Create(amountBase), + Vector128.Create(Unsafe.Add(ref amountBase, 1))); + opacity = Avx.Min(Avx.Max(Vector256.Zero, opacity), vOne); + + destinationBase = PorterDuffFunctions.OverlayDestAtop(backgroundBase, sourceBase, opacity); + destinationBase = ref Unsafe.Add(ref destinationBase, 1); + backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); + sourceBase = ref Unsafe.Add(ref sourceBase, 1); + amountBase = ref Unsafe.Add(ref amountBase, 2); + } + + if (Numerics.Modulo2(destination.Length) != 0) + { + // Vector4 fits neatly in pairs. Any overlap has to be equal to 1. + int i = destination.Length - 1; + destination[i] = PorterDuffFunctions.OverlayDestAtop(background[i], source[i], Numerics.Clamp(amount[i], 0, 1F)); + } + } + else { - destination[i] = PorterDuffFunctions.OverlayDestAtop(background[i], source[i], Numerics.Clamp(amount[i], 0, 1)); + for (int i = 0; i < destination.Length; i++) + { + destination[i] = PorterDuffFunctions.OverlayDestAtop(background[i], source[i], Numerics.Clamp(amount[i], 0, 1F)); + } } } } @@ -2399,18 +6557,85 @@ internal static class DefaultPixelBlenders protected override void BlendFunction(Span destination, ReadOnlySpan background, ReadOnlySpan source, float amount) { amount = Numerics.Clamp(amount, 0, 1); - for (int i = 0; i < destination.Length; i++) + + if (Avx2.IsSupported && destination.Length >= 2) + { + // Divide by 2 as 4 elements per Vector4 and 8 per Vector256 + ref Vector256 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); + ref Vector256 destinationLast = ref Unsafe.Add(ref destinationBase, (IntPtr)((uint)destination.Length / 2u)); + + ref Vector256 backgroundBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(background)); + ref Vector256 sourceBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(source)); + Vector256 opacity = Vector256.Create(amount); + + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) + { + destinationBase = PorterDuffFunctions.HardLightDestAtop(backgroundBase, sourceBase, opacity); + destinationBase = ref Unsafe.Add(ref destinationBase, 1); + backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); + sourceBase = ref Unsafe.Add(ref sourceBase, 1); + } + + if (Numerics.Modulo2(destination.Length) != 0) + { + // Vector4 fits neatly in pairs. Any overlap has to be equal to 1. + int i = destination.Length - 1; + destination[i] = PorterDuffFunctions.HardLightDestAtop(background[i], source[i], amount); + } + } + else { - destination[i] = PorterDuffFunctions.HardLightDestAtop(background[i], source[i], amount); + for (int i = 0; i < destination.Length; i++) + { + destination[i] = PorterDuffFunctions.HardLightDestAtop(background[i], source[i], amount); + } } } /// protected override void BlendFunction(Span destination, ReadOnlySpan background, ReadOnlySpan source, ReadOnlySpan amount) { - for (int i = 0; i < destination.Length; i++) + if (Avx2.IsSupported && destination.Length >= 2) + { + // Divide by 2 as 4 elements per Vector4 and 8 per Vector256 + ref Vector256 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); + ref Vector256 destinationLast = ref Unsafe.Add(ref destinationBase, (IntPtr)((uint)destination.Length / 2u)); + + ref Vector256 backgroundBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(background)); + ref Vector256 sourceBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(source)); + ref float amountBase = ref MemoryMarshal.GetReference(amount); + + Vector256 vOne = Vector256.Create(1F); + + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) + { + // We need to create a Vector256 containing the current and next amount values + // taking up each half of the Vector256 and then clamp them. + Vector256 opacity = Vector256.Create( + Vector128.Create(amountBase), + Vector128.Create(Unsafe.Add(ref amountBase, 1))); + opacity = Avx.Min(Avx.Max(Vector256.Zero, opacity), vOne); + + destinationBase = PorterDuffFunctions.HardLightDestAtop(backgroundBase, sourceBase, opacity); + destinationBase = ref Unsafe.Add(ref destinationBase, 1); + backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); + sourceBase = ref Unsafe.Add(ref sourceBase, 1); + amountBase = ref Unsafe.Add(ref amountBase, 2); + } + + if (Numerics.Modulo2(destination.Length) != 0) + { + // Vector4 fits neatly in pairs. Any overlap has to be equal to 1. + int i = destination.Length - 1; + destination[i] = PorterDuffFunctions.HardLightDestAtop(background[i], source[i], Numerics.Clamp(amount[i], 0, 1F)); + } + } + else { - destination[i] = PorterDuffFunctions.HardLightDestAtop(background[i], source[i], Numerics.Clamp(amount[i], 0, 1)); + for (int i = 0; i < destination.Length; i++) + { + destination[i] = PorterDuffFunctions.HardLightDestAtop(background[i], source[i], Numerics.Clamp(amount[i], 0, 1F)); + } } } } @@ -2437,18 +6662,85 @@ internal static class DefaultPixelBlenders protected override void BlendFunction(Span destination, ReadOnlySpan background, ReadOnlySpan source, float amount) { amount = Numerics.Clamp(amount, 0, 1); - for (int i = 0; i < destination.Length; i++) + + if (Avx2.IsSupported && destination.Length >= 2) + { + // Divide by 2 as 4 elements per Vector4 and 8 per Vector256 + ref Vector256 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); + ref Vector256 destinationLast = ref Unsafe.Add(ref destinationBase, (IntPtr)((uint)destination.Length / 2u)); + + ref Vector256 backgroundBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(background)); + ref Vector256 sourceBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(source)); + Vector256 opacity = Vector256.Create(amount); + + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) + { + destinationBase = PorterDuffFunctions.NormalDestOver(backgroundBase, sourceBase, opacity); + destinationBase = ref Unsafe.Add(ref destinationBase, 1); + backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); + sourceBase = ref Unsafe.Add(ref sourceBase, 1); + } + + if (Numerics.Modulo2(destination.Length) != 0) + { + // Vector4 fits neatly in pairs. Any overlap has to be equal to 1. + int i = destination.Length - 1; + destination[i] = PorterDuffFunctions.NormalDestOver(background[i], source[i], amount); + } + } + else { - destination[i] = PorterDuffFunctions.NormalDestOver(background[i], source[i], amount); + for (int i = 0; i < destination.Length; i++) + { + destination[i] = PorterDuffFunctions.NormalDestOver(background[i], source[i], amount); + } } } /// protected override void BlendFunction(Span destination, ReadOnlySpan background, ReadOnlySpan source, ReadOnlySpan amount) { - for (int i = 0; i < destination.Length; i++) + if (Avx2.IsSupported && destination.Length >= 2) + { + // Divide by 2 as 4 elements per Vector4 and 8 per Vector256 + ref Vector256 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); + ref Vector256 destinationLast = ref Unsafe.Add(ref destinationBase, (IntPtr)((uint)destination.Length / 2u)); + + ref Vector256 backgroundBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(background)); + ref Vector256 sourceBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(source)); + ref float amountBase = ref MemoryMarshal.GetReference(amount); + + Vector256 vOne = Vector256.Create(1F); + + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) + { + // We need to create a Vector256 containing the current and next amount values + // taking up each half of the Vector256 and then clamp them. + Vector256 opacity = Vector256.Create( + Vector128.Create(amountBase), + Vector128.Create(Unsafe.Add(ref amountBase, 1))); + opacity = Avx.Min(Avx.Max(Vector256.Zero, opacity), vOne); + + destinationBase = PorterDuffFunctions.NormalDestOver(backgroundBase, sourceBase, opacity); + destinationBase = ref Unsafe.Add(ref destinationBase, 1); + backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); + sourceBase = ref Unsafe.Add(ref sourceBase, 1); + amountBase = ref Unsafe.Add(ref amountBase, 2); + } + + if (Numerics.Modulo2(destination.Length) != 0) + { + // Vector4 fits neatly in pairs. Any overlap has to be equal to 1. + int i = destination.Length - 1; + destination[i] = PorterDuffFunctions.NormalDestOver(background[i], source[i], Numerics.Clamp(amount[i], 0, 1F)); + } + } + else { - destination[i] = PorterDuffFunctions.NormalDestOver(background[i], source[i], Numerics.Clamp(amount[i], 0, 1)); + for (int i = 0; i < destination.Length; i++) + { + destination[i] = PorterDuffFunctions.NormalDestOver(background[i], source[i], Numerics.Clamp(amount[i], 0, 1F)); + } } } } @@ -2475,18 +6767,85 @@ internal static class DefaultPixelBlenders protected override void BlendFunction(Span destination, ReadOnlySpan background, ReadOnlySpan source, float amount) { amount = Numerics.Clamp(amount, 0, 1); - for (int i = 0; i < destination.Length; i++) + + if (Avx2.IsSupported && destination.Length >= 2) + { + // Divide by 2 as 4 elements per Vector4 and 8 per Vector256 + ref Vector256 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); + ref Vector256 destinationLast = ref Unsafe.Add(ref destinationBase, (IntPtr)((uint)destination.Length / 2u)); + + ref Vector256 backgroundBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(background)); + ref Vector256 sourceBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(source)); + Vector256 opacity = Vector256.Create(amount); + + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) + { + destinationBase = PorterDuffFunctions.MultiplyDestOver(backgroundBase, sourceBase, opacity); + destinationBase = ref Unsafe.Add(ref destinationBase, 1); + backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); + sourceBase = ref Unsafe.Add(ref sourceBase, 1); + } + + if (Numerics.Modulo2(destination.Length) != 0) + { + // Vector4 fits neatly in pairs. Any overlap has to be equal to 1. + int i = destination.Length - 1; + destination[i] = PorterDuffFunctions.MultiplyDestOver(background[i], source[i], amount); + } + } + else { - destination[i] = PorterDuffFunctions.MultiplyDestOver(background[i], source[i], amount); + for (int i = 0; i < destination.Length; i++) + { + destination[i] = PorterDuffFunctions.MultiplyDestOver(background[i], source[i], amount); + } } } /// protected override void BlendFunction(Span destination, ReadOnlySpan background, ReadOnlySpan source, ReadOnlySpan amount) { - for (int i = 0; i < destination.Length; i++) + if (Avx2.IsSupported && destination.Length >= 2) { - destination[i] = PorterDuffFunctions.MultiplyDestOver(background[i], source[i], Numerics.Clamp(amount[i], 0, 1)); + // Divide by 2 as 4 elements per Vector4 and 8 per Vector256 + ref Vector256 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); + ref Vector256 destinationLast = ref Unsafe.Add(ref destinationBase, (IntPtr)((uint)destination.Length / 2u)); + + ref Vector256 backgroundBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(background)); + ref Vector256 sourceBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(source)); + ref float amountBase = ref MemoryMarshal.GetReference(amount); + + Vector256 vOne = Vector256.Create(1F); + + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) + { + // We need to create a Vector256 containing the current and next amount values + // taking up each half of the Vector256 and then clamp them. + Vector256 opacity = Vector256.Create( + Vector128.Create(amountBase), + Vector128.Create(Unsafe.Add(ref amountBase, 1))); + opacity = Avx.Min(Avx.Max(Vector256.Zero, opacity), vOne); + + destinationBase = PorterDuffFunctions.MultiplyDestOver(backgroundBase, sourceBase, opacity); + destinationBase = ref Unsafe.Add(ref destinationBase, 1); + backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); + sourceBase = ref Unsafe.Add(ref sourceBase, 1); + amountBase = ref Unsafe.Add(ref amountBase, 2); + } + + if (Numerics.Modulo2(destination.Length) != 0) + { + // Vector4 fits neatly in pairs. Any overlap has to be equal to 1. + int i = destination.Length - 1; + destination[i] = PorterDuffFunctions.MultiplyDestOver(background[i], source[i], Numerics.Clamp(amount[i], 0, 1F)); + } + } + else + { + for (int i = 0; i < destination.Length; i++) + { + destination[i] = PorterDuffFunctions.MultiplyDestOver(background[i], source[i], Numerics.Clamp(amount[i], 0, 1F)); + } } } } @@ -2513,18 +6872,85 @@ internal static class DefaultPixelBlenders protected override void BlendFunction(Span destination, ReadOnlySpan background, ReadOnlySpan source, float amount) { amount = Numerics.Clamp(amount, 0, 1); - for (int i = 0; i < destination.Length; i++) + + if (Avx2.IsSupported && destination.Length >= 2) { - destination[i] = PorterDuffFunctions.AddDestOver(background[i], source[i], amount); + // Divide by 2 as 4 elements per Vector4 and 8 per Vector256 + ref Vector256 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); + ref Vector256 destinationLast = ref Unsafe.Add(ref destinationBase, (IntPtr)((uint)destination.Length / 2u)); + + ref Vector256 backgroundBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(background)); + ref Vector256 sourceBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(source)); + Vector256 opacity = Vector256.Create(amount); + + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) + { + destinationBase = PorterDuffFunctions.AddDestOver(backgroundBase, sourceBase, opacity); + destinationBase = ref Unsafe.Add(ref destinationBase, 1); + backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); + sourceBase = ref Unsafe.Add(ref sourceBase, 1); + } + + if (Numerics.Modulo2(destination.Length) != 0) + { + // Vector4 fits neatly in pairs. Any overlap has to be equal to 1. + int i = destination.Length - 1; + destination[i] = PorterDuffFunctions.AddDestOver(background[i], source[i], amount); + } + } + else + { + for (int i = 0; i < destination.Length; i++) + { + destination[i] = PorterDuffFunctions.AddDestOver(background[i], source[i], amount); + } } } /// protected override void BlendFunction(Span destination, ReadOnlySpan background, ReadOnlySpan source, ReadOnlySpan amount) { - for (int i = 0; i < destination.Length; i++) + if (Avx2.IsSupported && destination.Length >= 2) + { + // Divide by 2 as 4 elements per Vector4 and 8 per Vector256 + ref Vector256 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); + ref Vector256 destinationLast = ref Unsafe.Add(ref destinationBase, (IntPtr)((uint)destination.Length / 2u)); + + ref Vector256 backgroundBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(background)); + ref Vector256 sourceBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(source)); + ref float amountBase = ref MemoryMarshal.GetReference(amount); + + Vector256 vOne = Vector256.Create(1F); + + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) + { + // We need to create a Vector256 containing the current and next amount values + // taking up each half of the Vector256 and then clamp them. + Vector256 opacity = Vector256.Create( + Vector128.Create(amountBase), + Vector128.Create(Unsafe.Add(ref amountBase, 1))); + opacity = Avx.Min(Avx.Max(Vector256.Zero, opacity), vOne); + + destinationBase = PorterDuffFunctions.AddDestOver(backgroundBase, sourceBase, opacity); + destinationBase = ref Unsafe.Add(ref destinationBase, 1); + backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); + sourceBase = ref Unsafe.Add(ref sourceBase, 1); + amountBase = ref Unsafe.Add(ref amountBase, 2); + } + + if (Numerics.Modulo2(destination.Length) != 0) + { + // Vector4 fits neatly in pairs. Any overlap has to be equal to 1. + int i = destination.Length - 1; + destination[i] = PorterDuffFunctions.AddDestOver(background[i], source[i], Numerics.Clamp(amount[i], 0, 1F)); + } + } + else { - destination[i] = PorterDuffFunctions.AddDestOver(background[i], source[i], Numerics.Clamp(amount[i], 0, 1)); + for (int i = 0; i < destination.Length; i++) + { + destination[i] = PorterDuffFunctions.AddDestOver(background[i], source[i], Numerics.Clamp(amount[i], 0, 1F)); + } } } } @@ -2551,18 +6977,85 @@ internal static class DefaultPixelBlenders protected override void BlendFunction(Span destination, ReadOnlySpan background, ReadOnlySpan source, float amount) { amount = Numerics.Clamp(amount, 0, 1); - for (int i = 0; i < destination.Length; i++) + + if (Avx2.IsSupported && destination.Length >= 2) + { + // Divide by 2 as 4 elements per Vector4 and 8 per Vector256 + ref Vector256 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); + ref Vector256 destinationLast = ref Unsafe.Add(ref destinationBase, (IntPtr)((uint)destination.Length / 2u)); + + ref Vector256 backgroundBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(background)); + ref Vector256 sourceBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(source)); + Vector256 opacity = Vector256.Create(amount); + + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) + { + destinationBase = PorterDuffFunctions.SubtractDestOver(backgroundBase, sourceBase, opacity); + destinationBase = ref Unsafe.Add(ref destinationBase, 1); + backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); + sourceBase = ref Unsafe.Add(ref sourceBase, 1); + } + + if (Numerics.Modulo2(destination.Length) != 0) + { + // Vector4 fits neatly in pairs. Any overlap has to be equal to 1. + int i = destination.Length - 1; + destination[i] = PorterDuffFunctions.SubtractDestOver(background[i], source[i], amount); + } + } + else { - destination[i] = PorterDuffFunctions.SubtractDestOver(background[i], source[i], amount); + for (int i = 0; i < destination.Length; i++) + { + destination[i] = PorterDuffFunctions.SubtractDestOver(background[i], source[i], amount); + } } } /// protected override void BlendFunction(Span destination, ReadOnlySpan background, ReadOnlySpan source, ReadOnlySpan amount) { - for (int i = 0; i < destination.Length; i++) + if (Avx2.IsSupported && destination.Length >= 2) + { + // Divide by 2 as 4 elements per Vector4 and 8 per Vector256 + ref Vector256 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); + ref Vector256 destinationLast = ref Unsafe.Add(ref destinationBase, (IntPtr)((uint)destination.Length / 2u)); + + ref Vector256 backgroundBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(background)); + ref Vector256 sourceBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(source)); + ref float amountBase = ref MemoryMarshal.GetReference(amount); + + Vector256 vOne = Vector256.Create(1F); + + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) + { + // We need to create a Vector256 containing the current and next amount values + // taking up each half of the Vector256 and then clamp them. + Vector256 opacity = Vector256.Create( + Vector128.Create(amountBase), + Vector128.Create(Unsafe.Add(ref amountBase, 1))); + opacity = Avx.Min(Avx.Max(Vector256.Zero, opacity), vOne); + + destinationBase = PorterDuffFunctions.SubtractDestOver(backgroundBase, sourceBase, opacity); + destinationBase = ref Unsafe.Add(ref destinationBase, 1); + backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); + sourceBase = ref Unsafe.Add(ref sourceBase, 1); + amountBase = ref Unsafe.Add(ref amountBase, 2); + } + + if (Numerics.Modulo2(destination.Length) != 0) + { + // Vector4 fits neatly in pairs. Any overlap has to be equal to 1. + int i = destination.Length - 1; + destination[i] = PorterDuffFunctions.SubtractDestOver(background[i], source[i], Numerics.Clamp(amount[i], 0, 1F)); + } + } + else { - destination[i] = PorterDuffFunctions.SubtractDestOver(background[i], source[i], Numerics.Clamp(amount[i], 0, 1)); + for (int i = 0; i < destination.Length; i++) + { + destination[i] = PorterDuffFunctions.SubtractDestOver(background[i], source[i], Numerics.Clamp(amount[i], 0, 1F)); + } } } } @@ -2589,18 +7082,85 @@ internal static class DefaultPixelBlenders protected override void BlendFunction(Span destination, ReadOnlySpan background, ReadOnlySpan source, float amount) { amount = Numerics.Clamp(amount, 0, 1); - for (int i = 0; i < destination.Length; i++) + + if (Avx2.IsSupported && destination.Length >= 2) + { + // Divide by 2 as 4 elements per Vector4 and 8 per Vector256 + ref Vector256 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); + ref Vector256 destinationLast = ref Unsafe.Add(ref destinationBase, (IntPtr)((uint)destination.Length / 2u)); + + ref Vector256 backgroundBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(background)); + ref Vector256 sourceBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(source)); + Vector256 opacity = Vector256.Create(amount); + + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) + { + destinationBase = PorterDuffFunctions.ScreenDestOver(backgroundBase, sourceBase, opacity); + destinationBase = ref Unsafe.Add(ref destinationBase, 1); + backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); + sourceBase = ref Unsafe.Add(ref sourceBase, 1); + } + + if (Numerics.Modulo2(destination.Length) != 0) + { + // Vector4 fits neatly in pairs. Any overlap has to be equal to 1. + int i = destination.Length - 1; + destination[i] = PorterDuffFunctions.ScreenDestOver(background[i], source[i], amount); + } + } + else { - destination[i] = PorterDuffFunctions.ScreenDestOver(background[i], source[i], amount); + for (int i = 0; i < destination.Length; i++) + { + destination[i] = PorterDuffFunctions.ScreenDestOver(background[i], source[i], amount); + } } } /// protected override void BlendFunction(Span destination, ReadOnlySpan background, ReadOnlySpan source, ReadOnlySpan amount) { - for (int i = 0; i < destination.Length; i++) + if (Avx2.IsSupported && destination.Length >= 2) + { + // Divide by 2 as 4 elements per Vector4 and 8 per Vector256 + ref Vector256 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); + ref Vector256 destinationLast = ref Unsafe.Add(ref destinationBase, (IntPtr)((uint)destination.Length / 2u)); + + ref Vector256 backgroundBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(background)); + ref Vector256 sourceBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(source)); + ref float amountBase = ref MemoryMarshal.GetReference(amount); + + Vector256 vOne = Vector256.Create(1F); + + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) + { + // We need to create a Vector256 containing the current and next amount values + // taking up each half of the Vector256 and then clamp them. + Vector256 opacity = Vector256.Create( + Vector128.Create(amountBase), + Vector128.Create(Unsafe.Add(ref amountBase, 1))); + opacity = Avx.Min(Avx.Max(Vector256.Zero, opacity), vOne); + + destinationBase = PorterDuffFunctions.ScreenDestOver(backgroundBase, sourceBase, opacity); + destinationBase = ref Unsafe.Add(ref destinationBase, 1); + backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); + sourceBase = ref Unsafe.Add(ref sourceBase, 1); + amountBase = ref Unsafe.Add(ref amountBase, 2); + } + + if (Numerics.Modulo2(destination.Length) != 0) + { + // Vector4 fits neatly in pairs. Any overlap has to be equal to 1. + int i = destination.Length - 1; + destination[i] = PorterDuffFunctions.ScreenDestOver(background[i], source[i], Numerics.Clamp(amount[i], 0, 1F)); + } + } + else { - destination[i] = PorterDuffFunctions.ScreenDestOver(background[i], source[i], Numerics.Clamp(amount[i], 0, 1)); + for (int i = 0; i < destination.Length; i++) + { + destination[i] = PorterDuffFunctions.ScreenDestOver(background[i], source[i], Numerics.Clamp(amount[i], 0, 1F)); + } } } } @@ -2627,18 +7187,85 @@ internal static class DefaultPixelBlenders protected override void BlendFunction(Span destination, ReadOnlySpan background, ReadOnlySpan source, float amount) { amount = Numerics.Clamp(amount, 0, 1); - for (int i = 0; i < destination.Length; i++) + + if (Avx2.IsSupported && destination.Length >= 2) + { + // Divide by 2 as 4 elements per Vector4 and 8 per Vector256 + ref Vector256 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); + ref Vector256 destinationLast = ref Unsafe.Add(ref destinationBase, (IntPtr)((uint)destination.Length / 2u)); + + ref Vector256 backgroundBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(background)); + ref Vector256 sourceBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(source)); + Vector256 opacity = Vector256.Create(amount); + + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) + { + destinationBase = PorterDuffFunctions.DarkenDestOver(backgroundBase, sourceBase, opacity); + destinationBase = ref Unsafe.Add(ref destinationBase, 1); + backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); + sourceBase = ref Unsafe.Add(ref sourceBase, 1); + } + + if (Numerics.Modulo2(destination.Length) != 0) + { + // Vector4 fits neatly in pairs. Any overlap has to be equal to 1. + int i = destination.Length - 1; + destination[i] = PorterDuffFunctions.DarkenDestOver(background[i], source[i], amount); + } + } + else { - destination[i] = PorterDuffFunctions.DarkenDestOver(background[i], source[i], amount); + for (int i = 0; i < destination.Length; i++) + { + destination[i] = PorterDuffFunctions.DarkenDestOver(background[i], source[i], amount); + } } } /// protected override void BlendFunction(Span destination, ReadOnlySpan background, ReadOnlySpan source, ReadOnlySpan amount) { - for (int i = 0; i < destination.Length; i++) + if (Avx2.IsSupported && destination.Length >= 2) + { + // Divide by 2 as 4 elements per Vector4 and 8 per Vector256 + ref Vector256 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); + ref Vector256 destinationLast = ref Unsafe.Add(ref destinationBase, (IntPtr)((uint)destination.Length / 2u)); + + ref Vector256 backgroundBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(background)); + ref Vector256 sourceBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(source)); + ref float amountBase = ref MemoryMarshal.GetReference(amount); + + Vector256 vOne = Vector256.Create(1F); + + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) + { + // We need to create a Vector256 containing the current and next amount values + // taking up each half of the Vector256 and then clamp them. + Vector256 opacity = Vector256.Create( + Vector128.Create(amountBase), + Vector128.Create(Unsafe.Add(ref amountBase, 1))); + opacity = Avx.Min(Avx.Max(Vector256.Zero, opacity), vOne); + + destinationBase = PorterDuffFunctions.DarkenDestOver(backgroundBase, sourceBase, opacity); + destinationBase = ref Unsafe.Add(ref destinationBase, 1); + backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); + sourceBase = ref Unsafe.Add(ref sourceBase, 1); + amountBase = ref Unsafe.Add(ref amountBase, 2); + } + + if (Numerics.Modulo2(destination.Length) != 0) + { + // Vector4 fits neatly in pairs. Any overlap has to be equal to 1. + int i = destination.Length - 1; + destination[i] = PorterDuffFunctions.DarkenDestOver(background[i], source[i], Numerics.Clamp(amount[i], 0, 1F)); + } + } + else { - destination[i] = PorterDuffFunctions.DarkenDestOver(background[i], source[i], Numerics.Clamp(amount[i], 0, 1)); + for (int i = 0; i < destination.Length; i++) + { + destination[i] = PorterDuffFunctions.DarkenDestOver(background[i], source[i], Numerics.Clamp(amount[i], 0, 1F)); + } } } } @@ -2665,18 +7292,85 @@ internal static class DefaultPixelBlenders protected override void BlendFunction(Span destination, ReadOnlySpan background, ReadOnlySpan source, float amount) { amount = Numerics.Clamp(amount, 0, 1); - for (int i = 0; i < destination.Length; i++) + + if (Avx2.IsSupported && destination.Length >= 2) + { + // Divide by 2 as 4 elements per Vector4 and 8 per Vector256 + ref Vector256 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); + ref Vector256 destinationLast = ref Unsafe.Add(ref destinationBase, (IntPtr)((uint)destination.Length / 2u)); + + ref Vector256 backgroundBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(background)); + ref Vector256 sourceBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(source)); + Vector256 opacity = Vector256.Create(amount); + + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) + { + destinationBase = PorterDuffFunctions.LightenDestOver(backgroundBase, sourceBase, opacity); + destinationBase = ref Unsafe.Add(ref destinationBase, 1); + backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); + sourceBase = ref Unsafe.Add(ref sourceBase, 1); + } + + if (Numerics.Modulo2(destination.Length) != 0) + { + // Vector4 fits neatly in pairs. Any overlap has to be equal to 1. + int i = destination.Length - 1; + destination[i] = PorterDuffFunctions.LightenDestOver(background[i], source[i], amount); + } + } + else { - destination[i] = PorterDuffFunctions.LightenDestOver(background[i], source[i], amount); + for (int i = 0; i < destination.Length; i++) + { + destination[i] = PorterDuffFunctions.LightenDestOver(background[i], source[i], amount); + } } } /// protected override void BlendFunction(Span destination, ReadOnlySpan background, ReadOnlySpan source, ReadOnlySpan amount) { - for (int i = 0; i < destination.Length; i++) + if (Avx2.IsSupported && destination.Length >= 2) + { + // Divide by 2 as 4 elements per Vector4 and 8 per Vector256 + ref Vector256 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); + ref Vector256 destinationLast = ref Unsafe.Add(ref destinationBase, (IntPtr)((uint)destination.Length / 2u)); + + ref Vector256 backgroundBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(background)); + ref Vector256 sourceBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(source)); + ref float amountBase = ref MemoryMarshal.GetReference(amount); + + Vector256 vOne = Vector256.Create(1F); + + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) + { + // We need to create a Vector256 containing the current and next amount values + // taking up each half of the Vector256 and then clamp them. + Vector256 opacity = Vector256.Create( + Vector128.Create(amountBase), + Vector128.Create(Unsafe.Add(ref amountBase, 1))); + opacity = Avx.Min(Avx.Max(Vector256.Zero, opacity), vOne); + + destinationBase = PorterDuffFunctions.LightenDestOver(backgroundBase, sourceBase, opacity); + destinationBase = ref Unsafe.Add(ref destinationBase, 1); + backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); + sourceBase = ref Unsafe.Add(ref sourceBase, 1); + amountBase = ref Unsafe.Add(ref amountBase, 2); + } + + if (Numerics.Modulo2(destination.Length) != 0) + { + // Vector4 fits neatly in pairs. Any overlap has to be equal to 1. + int i = destination.Length - 1; + destination[i] = PorterDuffFunctions.LightenDestOver(background[i], source[i], Numerics.Clamp(amount[i], 0, 1F)); + } + } + else { - destination[i] = PorterDuffFunctions.LightenDestOver(background[i], source[i], Numerics.Clamp(amount[i], 0, 1)); + for (int i = 0; i < destination.Length; i++) + { + destination[i] = PorterDuffFunctions.LightenDestOver(background[i], source[i], Numerics.Clamp(amount[i], 0, 1F)); + } } } } @@ -2703,18 +7397,85 @@ internal static class DefaultPixelBlenders protected override void BlendFunction(Span destination, ReadOnlySpan background, ReadOnlySpan source, float amount) { amount = Numerics.Clamp(amount, 0, 1); - for (int i = 0; i < destination.Length; i++) + + if (Avx2.IsSupported && destination.Length >= 2) + { + // Divide by 2 as 4 elements per Vector4 and 8 per Vector256 + ref Vector256 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); + ref Vector256 destinationLast = ref Unsafe.Add(ref destinationBase, (IntPtr)((uint)destination.Length / 2u)); + + ref Vector256 backgroundBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(background)); + ref Vector256 sourceBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(source)); + Vector256 opacity = Vector256.Create(amount); + + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) + { + destinationBase = PorterDuffFunctions.OverlayDestOver(backgroundBase, sourceBase, opacity); + destinationBase = ref Unsafe.Add(ref destinationBase, 1); + backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); + sourceBase = ref Unsafe.Add(ref sourceBase, 1); + } + + if (Numerics.Modulo2(destination.Length) != 0) + { + // Vector4 fits neatly in pairs. Any overlap has to be equal to 1. + int i = destination.Length - 1; + destination[i] = PorterDuffFunctions.OverlayDestOver(background[i], source[i], amount); + } + } + else { - destination[i] = PorterDuffFunctions.OverlayDestOver(background[i], source[i], amount); + for (int i = 0; i < destination.Length; i++) + { + destination[i] = PorterDuffFunctions.OverlayDestOver(background[i], source[i], amount); + } } } /// protected override void BlendFunction(Span destination, ReadOnlySpan background, ReadOnlySpan source, ReadOnlySpan amount) { - for (int i = 0; i < destination.Length; i++) + if (Avx2.IsSupported && destination.Length >= 2) + { + // Divide by 2 as 4 elements per Vector4 and 8 per Vector256 + ref Vector256 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); + ref Vector256 destinationLast = ref Unsafe.Add(ref destinationBase, (IntPtr)((uint)destination.Length / 2u)); + + ref Vector256 backgroundBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(background)); + ref Vector256 sourceBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(source)); + ref float amountBase = ref MemoryMarshal.GetReference(amount); + + Vector256 vOne = Vector256.Create(1F); + + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) + { + // We need to create a Vector256 containing the current and next amount values + // taking up each half of the Vector256 and then clamp them. + Vector256 opacity = Vector256.Create( + Vector128.Create(amountBase), + Vector128.Create(Unsafe.Add(ref amountBase, 1))); + opacity = Avx.Min(Avx.Max(Vector256.Zero, opacity), vOne); + + destinationBase = PorterDuffFunctions.OverlayDestOver(backgroundBase, sourceBase, opacity); + destinationBase = ref Unsafe.Add(ref destinationBase, 1); + backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); + sourceBase = ref Unsafe.Add(ref sourceBase, 1); + amountBase = ref Unsafe.Add(ref amountBase, 2); + } + + if (Numerics.Modulo2(destination.Length) != 0) + { + // Vector4 fits neatly in pairs. Any overlap has to be equal to 1. + int i = destination.Length - 1; + destination[i] = PorterDuffFunctions.OverlayDestOver(background[i], source[i], Numerics.Clamp(amount[i], 0, 1F)); + } + } + else { - destination[i] = PorterDuffFunctions.OverlayDestOver(background[i], source[i], Numerics.Clamp(amount[i], 0, 1)); + for (int i = 0; i < destination.Length; i++) + { + destination[i] = PorterDuffFunctions.OverlayDestOver(background[i], source[i], Numerics.Clamp(amount[i], 0, 1F)); + } } } } @@ -2741,18 +7502,85 @@ internal static class DefaultPixelBlenders protected override void BlendFunction(Span destination, ReadOnlySpan background, ReadOnlySpan source, float amount) { amount = Numerics.Clamp(amount, 0, 1); - for (int i = 0; i < destination.Length; i++) + + if (Avx2.IsSupported && destination.Length >= 2) + { + // Divide by 2 as 4 elements per Vector4 and 8 per Vector256 + ref Vector256 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); + ref Vector256 destinationLast = ref Unsafe.Add(ref destinationBase, (IntPtr)((uint)destination.Length / 2u)); + + ref Vector256 backgroundBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(background)); + ref Vector256 sourceBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(source)); + Vector256 opacity = Vector256.Create(amount); + + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) + { + destinationBase = PorterDuffFunctions.HardLightDestOver(backgroundBase, sourceBase, opacity); + destinationBase = ref Unsafe.Add(ref destinationBase, 1); + backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); + sourceBase = ref Unsafe.Add(ref sourceBase, 1); + } + + if (Numerics.Modulo2(destination.Length) != 0) + { + // Vector4 fits neatly in pairs. Any overlap has to be equal to 1. + int i = destination.Length - 1; + destination[i] = PorterDuffFunctions.HardLightDestOver(background[i], source[i], amount); + } + } + else { - destination[i] = PorterDuffFunctions.HardLightDestOver(background[i], source[i], amount); + for (int i = 0; i < destination.Length; i++) + { + destination[i] = PorterDuffFunctions.HardLightDestOver(background[i], source[i], amount); + } } } /// protected override void BlendFunction(Span destination, ReadOnlySpan background, ReadOnlySpan source, ReadOnlySpan amount) { - for (int i = 0; i < destination.Length; i++) + if (Avx2.IsSupported && destination.Length >= 2) + { + // Divide by 2 as 4 elements per Vector4 and 8 per Vector256 + ref Vector256 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); + ref Vector256 destinationLast = ref Unsafe.Add(ref destinationBase, (IntPtr)((uint)destination.Length / 2u)); + + ref Vector256 backgroundBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(background)); + ref Vector256 sourceBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(source)); + ref float amountBase = ref MemoryMarshal.GetReference(amount); + + Vector256 vOne = Vector256.Create(1F); + + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) + { + // We need to create a Vector256 containing the current and next amount values + // taking up each half of the Vector256 and then clamp them. + Vector256 opacity = Vector256.Create( + Vector128.Create(amountBase), + Vector128.Create(Unsafe.Add(ref amountBase, 1))); + opacity = Avx.Min(Avx.Max(Vector256.Zero, opacity), vOne); + + destinationBase = PorterDuffFunctions.HardLightDestOver(backgroundBase, sourceBase, opacity); + destinationBase = ref Unsafe.Add(ref destinationBase, 1); + backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); + sourceBase = ref Unsafe.Add(ref sourceBase, 1); + amountBase = ref Unsafe.Add(ref amountBase, 2); + } + + if (Numerics.Modulo2(destination.Length) != 0) + { + // Vector4 fits neatly in pairs. Any overlap has to be equal to 1. + int i = destination.Length - 1; + destination[i] = PorterDuffFunctions.HardLightDestOver(background[i], source[i], Numerics.Clamp(amount[i], 0, 1F)); + } + } + else { - destination[i] = PorterDuffFunctions.HardLightDestOver(background[i], source[i], Numerics.Clamp(amount[i], 0, 1)); + for (int i = 0; i < destination.Length; i++) + { + destination[i] = PorterDuffFunctions.HardLightDestOver(background[i], source[i], Numerics.Clamp(amount[i], 0, 1F)); + } } } } @@ -2779,18 +7607,85 @@ internal static class DefaultPixelBlenders protected override void BlendFunction(Span destination, ReadOnlySpan background, ReadOnlySpan source, float amount) { amount = Numerics.Clamp(amount, 0, 1); - for (int i = 0; i < destination.Length; i++) + + if (Avx2.IsSupported && destination.Length >= 2) + { + // Divide by 2 as 4 elements per Vector4 and 8 per Vector256 + ref Vector256 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); + ref Vector256 destinationLast = ref Unsafe.Add(ref destinationBase, (IntPtr)((uint)destination.Length / 2u)); + + ref Vector256 backgroundBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(background)); + ref Vector256 sourceBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(source)); + Vector256 opacity = Vector256.Create(amount); + + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) + { + destinationBase = PorterDuffFunctions.NormalDestIn(backgroundBase, sourceBase, opacity); + destinationBase = ref Unsafe.Add(ref destinationBase, 1); + backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); + sourceBase = ref Unsafe.Add(ref sourceBase, 1); + } + + if (Numerics.Modulo2(destination.Length) != 0) + { + // Vector4 fits neatly in pairs. Any overlap has to be equal to 1. + int i = destination.Length - 1; + destination[i] = PorterDuffFunctions.NormalDestIn(background[i], source[i], amount); + } + } + else { - destination[i] = PorterDuffFunctions.NormalDestIn(background[i], source[i], amount); + for (int i = 0; i < destination.Length; i++) + { + destination[i] = PorterDuffFunctions.NormalDestIn(background[i], source[i], amount); + } } } /// protected override void BlendFunction(Span destination, ReadOnlySpan background, ReadOnlySpan source, ReadOnlySpan amount) { - for (int i = 0; i < destination.Length; i++) + if (Avx2.IsSupported && destination.Length >= 2) { - destination[i] = PorterDuffFunctions.NormalDestIn(background[i], source[i], Numerics.Clamp(amount[i], 0, 1)); + // Divide by 2 as 4 elements per Vector4 and 8 per Vector256 + ref Vector256 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); + ref Vector256 destinationLast = ref Unsafe.Add(ref destinationBase, (IntPtr)((uint)destination.Length / 2u)); + + ref Vector256 backgroundBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(background)); + ref Vector256 sourceBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(source)); + ref float amountBase = ref MemoryMarshal.GetReference(amount); + + Vector256 vOne = Vector256.Create(1F); + + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) + { + // We need to create a Vector256 containing the current and next amount values + // taking up each half of the Vector256 and then clamp them. + Vector256 opacity = Vector256.Create( + Vector128.Create(amountBase), + Vector128.Create(Unsafe.Add(ref amountBase, 1))); + opacity = Avx.Min(Avx.Max(Vector256.Zero, opacity), vOne); + + destinationBase = PorterDuffFunctions.NormalDestIn(backgroundBase, sourceBase, opacity); + destinationBase = ref Unsafe.Add(ref destinationBase, 1); + backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); + sourceBase = ref Unsafe.Add(ref sourceBase, 1); + amountBase = ref Unsafe.Add(ref amountBase, 2); + } + + if (Numerics.Modulo2(destination.Length) != 0) + { + // Vector4 fits neatly in pairs. Any overlap has to be equal to 1. + int i = destination.Length - 1; + destination[i] = PorterDuffFunctions.NormalDestIn(background[i], source[i], Numerics.Clamp(amount[i], 0, 1F)); + } + } + else + { + for (int i = 0; i < destination.Length; i++) + { + destination[i] = PorterDuffFunctions.NormalDestIn(background[i], source[i], Numerics.Clamp(amount[i], 0, 1F)); + } } } } @@ -2817,18 +7712,85 @@ internal static class DefaultPixelBlenders protected override void BlendFunction(Span destination, ReadOnlySpan background, ReadOnlySpan source, float amount) { amount = Numerics.Clamp(amount, 0, 1); - for (int i = 0; i < destination.Length; i++) + + if (Avx2.IsSupported && destination.Length >= 2) { - destination[i] = PorterDuffFunctions.MultiplyDestIn(background[i], source[i], amount); + // Divide by 2 as 4 elements per Vector4 and 8 per Vector256 + ref Vector256 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); + ref Vector256 destinationLast = ref Unsafe.Add(ref destinationBase, (IntPtr)((uint)destination.Length / 2u)); + + ref Vector256 backgroundBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(background)); + ref Vector256 sourceBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(source)); + Vector256 opacity = Vector256.Create(amount); + + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) + { + destinationBase = PorterDuffFunctions.MultiplyDestIn(backgroundBase, sourceBase, opacity); + destinationBase = ref Unsafe.Add(ref destinationBase, 1); + backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); + sourceBase = ref Unsafe.Add(ref sourceBase, 1); + } + + if (Numerics.Modulo2(destination.Length) != 0) + { + // Vector4 fits neatly in pairs. Any overlap has to be equal to 1. + int i = destination.Length - 1; + destination[i] = PorterDuffFunctions.MultiplyDestIn(background[i], source[i], amount); + } + } + else + { + for (int i = 0; i < destination.Length; i++) + { + destination[i] = PorterDuffFunctions.MultiplyDestIn(background[i], source[i], amount); + } } } /// protected override void BlendFunction(Span destination, ReadOnlySpan background, ReadOnlySpan source, ReadOnlySpan amount) { - for (int i = 0; i < destination.Length; i++) + if (Avx2.IsSupported && destination.Length >= 2) + { + // Divide by 2 as 4 elements per Vector4 and 8 per Vector256 + ref Vector256 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); + ref Vector256 destinationLast = ref Unsafe.Add(ref destinationBase, (IntPtr)((uint)destination.Length / 2u)); + + ref Vector256 backgroundBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(background)); + ref Vector256 sourceBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(source)); + ref float amountBase = ref MemoryMarshal.GetReference(amount); + + Vector256 vOne = Vector256.Create(1F); + + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) + { + // We need to create a Vector256 containing the current and next amount values + // taking up each half of the Vector256 and then clamp them. + Vector256 opacity = Vector256.Create( + Vector128.Create(amountBase), + Vector128.Create(Unsafe.Add(ref amountBase, 1))); + opacity = Avx.Min(Avx.Max(Vector256.Zero, opacity), vOne); + + destinationBase = PorterDuffFunctions.MultiplyDestIn(backgroundBase, sourceBase, opacity); + destinationBase = ref Unsafe.Add(ref destinationBase, 1); + backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); + sourceBase = ref Unsafe.Add(ref sourceBase, 1); + amountBase = ref Unsafe.Add(ref amountBase, 2); + } + + if (Numerics.Modulo2(destination.Length) != 0) + { + // Vector4 fits neatly in pairs. Any overlap has to be equal to 1. + int i = destination.Length - 1; + destination[i] = PorterDuffFunctions.MultiplyDestIn(background[i], source[i], Numerics.Clamp(amount[i], 0, 1F)); + } + } + else { - destination[i] = PorterDuffFunctions.MultiplyDestIn(background[i], source[i], Numerics.Clamp(amount[i], 0, 1)); + for (int i = 0; i < destination.Length; i++) + { + destination[i] = PorterDuffFunctions.MultiplyDestIn(background[i], source[i], Numerics.Clamp(amount[i], 0, 1F)); + } } } } @@ -2855,18 +7817,85 @@ internal static class DefaultPixelBlenders protected override void BlendFunction(Span destination, ReadOnlySpan background, ReadOnlySpan source, float amount) { amount = Numerics.Clamp(amount, 0, 1); - for (int i = 0; i < destination.Length; i++) + + if (Avx2.IsSupported && destination.Length >= 2) + { + // Divide by 2 as 4 elements per Vector4 and 8 per Vector256 + ref Vector256 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); + ref Vector256 destinationLast = ref Unsafe.Add(ref destinationBase, (IntPtr)((uint)destination.Length / 2u)); + + ref Vector256 backgroundBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(background)); + ref Vector256 sourceBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(source)); + Vector256 opacity = Vector256.Create(amount); + + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) + { + destinationBase = PorterDuffFunctions.AddDestIn(backgroundBase, sourceBase, opacity); + destinationBase = ref Unsafe.Add(ref destinationBase, 1); + backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); + sourceBase = ref Unsafe.Add(ref sourceBase, 1); + } + + if (Numerics.Modulo2(destination.Length) != 0) + { + // Vector4 fits neatly in pairs. Any overlap has to be equal to 1. + int i = destination.Length - 1; + destination[i] = PorterDuffFunctions.AddDestIn(background[i], source[i], amount); + } + } + else { - destination[i] = PorterDuffFunctions.AddDestIn(background[i], source[i], amount); + for (int i = 0; i < destination.Length; i++) + { + destination[i] = PorterDuffFunctions.AddDestIn(background[i], source[i], amount); + } } } /// protected override void BlendFunction(Span destination, ReadOnlySpan background, ReadOnlySpan source, ReadOnlySpan amount) { - for (int i = 0; i < destination.Length; i++) + if (Avx2.IsSupported && destination.Length >= 2) + { + // Divide by 2 as 4 elements per Vector4 and 8 per Vector256 + ref Vector256 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); + ref Vector256 destinationLast = ref Unsafe.Add(ref destinationBase, (IntPtr)((uint)destination.Length / 2u)); + + ref Vector256 backgroundBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(background)); + ref Vector256 sourceBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(source)); + ref float amountBase = ref MemoryMarshal.GetReference(amount); + + Vector256 vOne = Vector256.Create(1F); + + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) + { + // We need to create a Vector256 containing the current and next amount values + // taking up each half of the Vector256 and then clamp them. + Vector256 opacity = Vector256.Create( + Vector128.Create(amountBase), + Vector128.Create(Unsafe.Add(ref amountBase, 1))); + opacity = Avx.Min(Avx.Max(Vector256.Zero, opacity), vOne); + + destinationBase = PorterDuffFunctions.AddDestIn(backgroundBase, sourceBase, opacity); + destinationBase = ref Unsafe.Add(ref destinationBase, 1); + backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); + sourceBase = ref Unsafe.Add(ref sourceBase, 1); + amountBase = ref Unsafe.Add(ref amountBase, 2); + } + + if (Numerics.Modulo2(destination.Length) != 0) + { + // Vector4 fits neatly in pairs. Any overlap has to be equal to 1. + int i = destination.Length - 1; + destination[i] = PorterDuffFunctions.AddDestIn(background[i], source[i], Numerics.Clamp(amount[i], 0, 1F)); + } + } + else { - destination[i] = PorterDuffFunctions.AddDestIn(background[i], source[i], Numerics.Clamp(amount[i], 0, 1)); + for (int i = 0; i < destination.Length; i++) + { + destination[i] = PorterDuffFunctions.AddDestIn(background[i], source[i], Numerics.Clamp(amount[i], 0, 1F)); + } } } } @@ -2893,18 +7922,85 @@ internal static class DefaultPixelBlenders protected override void BlendFunction(Span destination, ReadOnlySpan background, ReadOnlySpan source, float amount) { amount = Numerics.Clamp(amount, 0, 1); - for (int i = 0; i < destination.Length; i++) + + if (Avx2.IsSupported && destination.Length >= 2) + { + // Divide by 2 as 4 elements per Vector4 and 8 per Vector256 + ref Vector256 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); + ref Vector256 destinationLast = ref Unsafe.Add(ref destinationBase, (IntPtr)((uint)destination.Length / 2u)); + + ref Vector256 backgroundBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(background)); + ref Vector256 sourceBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(source)); + Vector256 opacity = Vector256.Create(amount); + + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) + { + destinationBase = PorterDuffFunctions.SubtractDestIn(backgroundBase, sourceBase, opacity); + destinationBase = ref Unsafe.Add(ref destinationBase, 1); + backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); + sourceBase = ref Unsafe.Add(ref sourceBase, 1); + } + + if (Numerics.Modulo2(destination.Length) != 0) + { + // Vector4 fits neatly in pairs. Any overlap has to be equal to 1. + int i = destination.Length - 1; + destination[i] = PorterDuffFunctions.SubtractDestIn(background[i], source[i], amount); + } + } + else { - destination[i] = PorterDuffFunctions.SubtractDestIn(background[i], source[i], amount); + for (int i = 0; i < destination.Length; i++) + { + destination[i] = PorterDuffFunctions.SubtractDestIn(background[i], source[i], amount); + } } } /// protected override void BlendFunction(Span destination, ReadOnlySpan background, ReadOnlySpan source, ReadOnlySpan amount) { - for (int i = 0; i < destination.Length; i++) + if (Avx2.IsSupported && destination.Length >= 2) + { + // Divide by 2 as 4 elements per Vector4 and 8 per Vector256 + ref Vector256 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); + ref Vector256 destinationLast = ref Unsafe.Add(ref destinationBase, (IntPtr)((uint)destination.Length / 2u)); + + ref Vector256 backgroundBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(background)); + ref Vector256 sourceBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(source)); + ref float amountBase = ref MemoryMarshal.GetReference(amount); + + Vector256 vOne = Vector256.Create(1F); + + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) + { + // We need to create a Vector256 containing the current and next amount values + // taking up each half of the Vector256 and then clamp them. + Vector256 opacity = Vector256.Create( + Vector128.Create(amountBase), + Vector128.Create(Unsafe.Add(ref amountBase, 1))); + opacity = Avx.Min(Avx.Max(Vector256.Zero, opacity), vOne); + + destinationBase = PorterDuffFunctions.SubtractDestIn(backgroundBase, sourceBase, opacity); + destinationBase = ref Unsafe.Add(ref destinationBase, 1); + backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); + sourceBase = ref Unsafe.Add(ref sourceBase, 1); + amountBase = ref Unsafe.Add(ref amountBase, 2); + } + + if (Numerics.Modulo2(destination.Length) != 0) + { + // Vector4 fits neatly in pairs. Any overlap has to be equal to 1. + int i = destination.Length - 1; + destination[i] = PorterDuffFunctions.SubtractDestIn(background[i], source[i], Numerics.Clamp(amount[i], 0, 1F)); + } + } + else { - destination[i] = PorterDuffFunctions.SubtractDestIn(background[i], source[i], Numerics.Clamp(amount[i], 0, 1)); + for (int i = 0; i < destination.Length; i++) + { + destination[i] = PorterDuffFunctions.SubtractDestIn(background[i], source[i], Numerics.Clamp(amount[i], 0, 1F)); + } } } } @@ -2931,18 +8027,85 @@ internal static class DefaultPixelBlenders protected override void BlendFunction(Span destination, ReadOnlySpan background, ReadOnlySpan source, float amount) { amount = Numerics.Clamp(amount, 0, 1); - for (int i = 0; i < destination.Length; i++) + + if (Avx2.IsSupported && destination.Length >= 2) + { + // Divide by 2 as 4 elements per Vector4 and 8 per Vector256 + ref Vector256 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); + ref Vector256 destinationLast = ref Unsafe.Add(ref destinationBase, (IntPtr)((uint)destination.Length / 2u)); + + ref Vector256 backgroundBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(background)); + ref Vector256 sourceBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(source)); + Vector256 opacity = Vector256.Create(amount); + + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) + { + destinationBase = PorterDuffFunctions.ScreenDestIn(backgroundBase, sourceBase, opacity); + destinationBase = ref Unsafe.Add(ref destinationBase, 1); + backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); + sourceBase = ref Unsafe.Add(ref sourceBase, 1); + } + + if (Numerics.Modulo2(destination.Length) != 0) + { + // Vector4 fits neatly in pairs. Any overlap has to be equal to 1. + int i = destination.Length - 1; + destination[i] = PorterDuffFunctions.ScreenDestIn(background[i], source[i], amount); + } + } + else { - destination[i] = PorterDuffFunctions.ScreenDestIn(background[i], source[i], amount); + for (int i = 0; i < destination.Length; i++) + { + destination[i] = PorterDuffFunctions.ScreenDestIn(background[i], source[i], amount); + } } } /// protected override void BlendFunction(Span destination, ReadOnlySpan background, ReadOnlySpan source, ReadOnlySpan amount) { - for (int i = 0; i < destination.Length; i++) + if (Avx2.IsSupported && destination.Length >= 2) + { + // Divide by 2 as 4 elements per Vector4 and 8 per Vector256 + ref Vector256 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); + ref Vector256 destinationLast = ref Unsafe.Add(ref destinationBase, (IntPtr)((uint)destination.Length / 2u)); + + ref Vector256 backgroundBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(background)); + ref Vector256 sourceBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(source)); + ref float amountBase = ref MemoryMarshal.GetReference(amount); + + Vector256 vOne = Vector256.Create(1F); + + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) + { + // We need to create a Vector256 containing the current and next amount values + // taking up each half of the Vector256 and then clamp them. + Vector256 opacity = Vector256.Create( + Vector128.Create(amountBase), + Vector128.Create(Unsafe.Add(ref amountBase, 1))); + opacity = Avx.Min(Avx.Max(Vector256.Zero, opacity), vOne); + + destinationBase = PorterDuffFunctions.ScreenDestIn(backgroundBase, sourceBase, opacity); + destinationBase = ref Unsafe.Add(ref destinationBase, 1); + backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); + sourceBase = ref Unsafe.Add(ref sourceBase, 1); + amountBase = ref Unsafe.Add(ref amountBase, 2); + } + + if (Numerics.Modulo2(destination.Length) != 0) + { + // Vector4 fits neatly in pairs. Any overlap has to be equal to 1. + int i = destination.Length - 1; + destination[i] = PorterDuffFunctions.ScreenDestIn(background[i], source[i], Numerics.Clamp(amount[i], 0, 1F)); + } + } + else { - destination[i] = PorterDuffFunctions.ScreenDestIn(background[i], source[i], Numerics.Clamp(amount[i], 0, 1)); + for (int i = 0; i < destination.Length; i++) + { + destination[i] = PorterDuffFunctions.ScreenDestIn(background[i], source[i], Numerics.Clamp(amount[i], 0, 1F)); + } } } } @@ -2969,18 +8132,85 @@ internal static class DefaultPixelBlenders protected override void BlendFunction(Span destination, ReadOnlySpan background, ReadOnlySpan source, float amount) { amount = Numerics.Clamp(amount, 0, 1); - for (int i = 0; i < destination.Length; i++) + + if (Avx2.IsSupported && destination.Length >= 2) + { + // Divide by 2 as 4 elements per Vector4 and 8 per Vector256 + ref Vector256 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); + ref Vector256 destinationLast = ref Unsafe.Add(ref destinationBase, (IntPtr)((uint)destination.Length / 2u)); + + ref Vector256 backgroundBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(background)); + ref Vector256 sourceBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(source)); + Vector256 opacity = Vector256.Create(amount); + + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) + { + destinationBase = PorterDuffFunctions.DarkenDestIn(backgroundBase, sourceBase, opacity); + destinationBase = ref Unsafe.Add(ref destinationBase, 1); + backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); + sourceBase = ref Unsafe.Add(ref sourceBase, 1); + } + + if (Numerics.Modulo2(destination.Length) != 0) + { + // Vector4 fits neatly in pairs. Any overlap has to be equal to 1. + int i = destination.Length - 1; + destination[i] = PorterDuffFunctions.DarkenDestIn(background[i], source[i], amount); + } + } + else { - destination[i] = PorterDuffFunctions.DarkenDestIn(background[i], source[i], amount); + for (int i = 0; i < destination.Length; i++) + { + destination[i] = PorterDuffFunctions.DarkenDestIn(background[i], source[i], amount); + } } } /// protected override void BlendFunction(Span destination, ReadOnlySpan background, ReadOnlySpan source, ReadOnlySpan amount) { - for (int i = 0; i < destination.Length; i++) + if (Avx2.IsSupported && destination.Length >= 2) + { + // Divide by 2 as 4 elements per Vector4 and 8 per Vector256 + ref Vector256 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); + ref Vector256 destinationLast = ref Unsafe.Add(ref destinationBase, (IntPtr)((uint)destination.Length / 2u)); + + ref Vector256 backgroundBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(background)); + ref Vector256 sourceBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(source)); + ref float amountBase = ref MemoryMarshal.GetReference(amount); + + Vector256 vOne = Vector256.Create(1F); + + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) + { + // We need to create a Vector256 containing the current and next amount values + // taking up each half of the Vector256 and then clamp them. + Vector256 opacity = Vector256.Create( + Vector128.Create(amountBase), + Vector128.Create(Unsafe.Add(ref amountBase, 1))); + opacity = Avx.Min(Avx.Max(Vector256.Zero, opacity), vOne); + + destinationBase = PorterDuffFunctions.DarkenDestIn(backgroundBase, sourceBase, opacity); + destinationBase = ref Unsafe.Add(ref destinationBase, 1); + backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); + sourceBase = ref Unsafe.Add(ref sourceBase, 1); + amountBase = ref Unsafe.Add(ref amountBase, 2); + } + + if (Numerics.Modulo2(destination.Length) != 0) + { + // Vector4 fits neatly in pairs. Any overlap has to be equal to 1. + int i = destination.Length - 1; + destination[i] = PorterDuffFunctions.DarkenDestIn(background[i], source[i], Numerics.Clamp(amount[i], 0, 1F)); + } + } + else { - destination[i] = PorterDuffFunctions.DarkenDestIn(background[i], source[i], Numerics.Clamp(amount[i], 0, 1)); + for (int i = 0; i < destination.Length; i++) + { + destination[i] = PorterDuffFunctions.DarkenDestIn(background[i], source[i], Numerics.Clamp(amount[i], 0, 1F)); + } } } } @@ -3007,18 +8237,85 @@ internal static class DefaultPixelBlenders protected override void BlendFunction(Span destination, ReadOnlySpan background, ReadOnlySpan source, float amount) { amount = Numerics.Clamp(amount, 0, 1); - for (int i = 0; i < destination.Length; i++) + + if (Avx2.IsSupported && destination.Length >= 2) + { + // Divide by 2 as 4 elements per Vector4 and 8 per Vector256 + ref Vector256 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); + ref Vector256 destinationLast = ref Unsafe.Add(ref destinationBase, (IntPtr)((uint)destination.Length / 2u)); + + ref Vector256 backgroundBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(background)); + ref Vector256 sourceBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(source)); + Vector256 opacity = Vector256.Create(amount); + + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) + { + destinationBase = PorterDuffFunctions.LightenDestIn(backgroundBase, sourceBase, opacity); + destinationBase = ref Unsafe.Add(ref destinationBase, 1); + backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); + sourceBase = ref Unsafe.Add(ref sourceBase, 1); + } + + if (Numerics.Modulo2(destination.Length) != 0) + { + // Vector4 fits neatly in pairs. Any overlap has to be equal to 1. + int i = destination.Length - 1; + destination[i] = PorterDuffFunctions.LightenDestIn(background[i], source[i], amount); + } + } + else { - destination[i] = PorterDuffFunctions.LightenDestIn(background[i], source[i], amount); + for (int i = 0; i < destination.Length; i++) + { + destination[i] = PorterDuffFunctions.LightenDestIn(background[i], source[i], amount); + } } } /// protected override void BlendFunction(Span destination, ReadOnlySpan background, ReadOnlySpan source, ReadOnlySpan amount) { - for (int i = 0; i < destination.Length; i++) + if (Avx2.IsSupported && destination.Length >= 2) + { + // Divide by 2 as 4 elements per Vector4 and 8 per Vector256 + ref Vector256 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); + ref Vector256 destinationLast = ref Unsafe.Add(ref destinationBase, (IntPtr)((uint)destination.Length / 2u)); + + ref Vector256 backgroundBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(background)); + ref Vector256 sourceBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(source)); + ref float amountBase = ref MemoryMarshal.GetReference(amount); + + Vector256 vOne = Vector256.Create(1F); + + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) + { + // We need to create a Vector256 containing the current and next amount values + // taking up each half of the Vector256 and then clamp them. + Vector256 opacity = Vector256.Create( + Vector128.Create(amountBase), + Vector128.Create(Unsafe.Add(ref amountBase, 1))); + opacity = Avx.Min(Avx.Max(Vector256.Zero, opacity), vOne); + + destinationBase = PorterDuffFunctions.LightenDestIn(backgroundBase, sourceBase, opacity); + destinationBase = ref Unsafe.Add(ref destinationBase, 1); + backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); + sourceBase = ref Unsafe.Add(ref sourceBase, 1); + amountBase = ref Unsafe.Add(ref amountBase, 2); + } + + if (Numerics.Modulo2(destination.Length) != 0) + { + // Vector4 fits neatly in pairs. Any overlap has to be equal to 1. + int i = destination.Length - 1; + destination[i] = PorterDuffFunctions.LightenDestIn(background[i], source[i], Numerics.Clamp(amount[i], 0, 1F)); + } + } + else { - destination[i] = PorterDuffFunctions.LightenDestIn(background[i], source[i], Numerics.Clamp(amount[i], 0, 1)); + for (int i = 0; i < destination.Length; i++) + { + destination[i] = PorterDuffFunctions.LightenDestIn(background[i], source[i], Numerics.Clamp(amount[i], 0, 1F)); + } } } } @@ -3045,18 +8342,85 @@ internal static class DefaultPixelBlenders protected override void BlendFunction(Span destination, ReadOnlySpan background, ReadOnlySpan source, float amount) { amount = Numerics.Clamp(amount, 0, 1); - for (int i = 0; i < destination.Length; i++) + + if (Avx2.IsSupported && destination.Length >= 2) + { + // Divide by 2 as 4 elements per Vector4 and 8 per Vector256 + ref Vector256 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); + ref Vector256 destinationLast = ref Unsafe.Add(ref destinationBase, (IntPtr)((uint)destination.Length / 2u)); + + ref Vector256 backgroundBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(background)); + ref Vector256 sourceBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(source)); + Vector256 opacity = Vector256.Create(amount); + + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) + { + destinationBase = PorterDuffFunctions.OverlayDestIn(backgroundBase, sourceBase, opacity); + destinationBase = ref Unsafe.Add(ref destinationBase, 1); + backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); + sourceBase = ref Unsafe.Add(ref sourceBase, 1); + } + + if (Numerics.Modulo2(destination.Length) != 0) + { + // Vector4 fits neatly in pairs. Any overlap has to be equal to 1. + int i = destination.Length - 1; + destination[i] = PorterDuffFunctions.OverlayDestIn(background[i], source[i], amount); + } + } + else { - destination[i] = PorterDuffFunctions.OverlayDestIn(background[i], source[i], amount); + for (int i = 0; i < destination.Length; i++) + { + destination[i] = PorterDuffFunctions.OverlayDestIn(background[i], source[i], amount); + } } } /// protected override void BlendFunction(Span destination, ReadOnlySpan background, ReadOnlySpan source, ReadOnlySpan amount) { - for (int i = 0; i < destination.Length; i++) + if (Avx2.IsSupported && destination.Length >= 2) + { + // Divide by 2 as 4 elements per Vector4 and 8 per Vector256 + ref Vector256 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); + ref Vector256 destinationLast = ref Unsafe.Add(ref destinationBase, (IntPtr)((uint)destination.Length / 2u)); + + ref Vector256 backgroundBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(background)); + ref Vector256 sourceBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(source)); + ref float amountBase = ref MemoryMarshal.GetReference(amount); + + Vector256 vOne = Vector256.Create(1F); + + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) + { + // We need to create a Vector256 containing the current and next amount values + // taking up each half of the Vector256 and then clamp them. + Vector256 opacity = Vector256.Create( + Vector128.Create(amountBase), + Vector128.Create(Unsafe.Add(ref amountBase, 1))); + opacity = Avx.Min(Avx.Max(Vector256.Zero, opacity), vOne); + + destinationBase = PorterDuffFunctions.OverlayDestIn(backgroundBase, sourceBase, opacity); + destinationBase = ref Unsafe.Add(ref destinationBase, 1); + backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); + sourceBase = ref Unsafe.Add(ref sourceBase, 1); + amountBase = ref Unsafe.Add(ref amountBase, 2); + } + + if (Numerics.Modulo2(destination.Length) != 0) + { + // Vector4 fits neatly in pairs. Any overlap has to be equal to 1. + int i = destination.Length - 1; + destination[i] = PorterDuffFunctions.OverlayDestIn(background[i], source[i], Numerics.Clamp(amount[i], 0, 1F)); + } + } + else { - destination[i] = PorterDuffFunctions.OverlayDestIn(background[i], source[i], Numerics.Clamp(amount[i], 0, 1)); + for (int i = 0; i < destination.Length; i++) + { + destination[i] = PorterDuffFunctions.OverlayDestIn(background[i], source[i], Numerics.Clamp(amount[i], 0, 1F)); + } } } } @@ -3083,18 +8447,85 @@ internal static class DefaultPixelBlenders protected override void BlendFunction(Span destination, ReadOnlySpan background, ReadOnlySpan source, float amount) { amount = Numerics.Clamp(amount, 0, 1); - for (int i = 0; i < destination.Length; i++) + + if (Avx2.IsSupported && destination.Length >= 2) + { + // Divide by 2 as 4 elements per Vector4 and 8 per Vector256 + ref Vector256 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); + ref Vector256 destinationLast = ref Unsafe.Add(ref destinationBase, (IntPtr)((uint)destination.Length / 2u)); + + ref Vector256 backgroundBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(background)); + ref Vector256 sourceBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(source)); + Vector256 opacity = Vector256.Create(amount); + + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) + { + destinationBase = PorterDuffFunctions.HardLightDestIn(backgroundBase, sourceBase, opacity); + destinationBase = ref Unsafe.Add(ref destinationBase, 1); + backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); + sourceBase = ref Unsafe.Add(ref sourceBase, 1); + } + + if (Numerics.Modulo2(destination.Length) != 0) + { + // Vector4 fits neatly in pairs. Any overlap has to be equal to 1. + int i = destination.Length - 1; + destination[i] = PorterDuffFunctions.HardLightDestIn(background[i], source[i], amount); + } + } + else { - destination[i] = PorterDuffFunctions.HardLightDestIn(background[i], source[i], amount); + for (int i = 0; i < destination.Length; i++) + { + destination[i] = PorterDuffFunctions.HardLightDestIn(background[i], source[i], amount); + } } } /// protected override void BlendFunction(Span destination, ReadOnlySpan background, ReadOnlySpan source, ReadOnlySpan amount) { - for (int i = 0; i < destination.Length; i++) + if (Avx2.IsSupported && destination.Length >= 2) { - destination[i] = PorterDuffFunctions.HardLightDestIn(background[i], source[i], Numerics.Clamp(amount[i], 0, 1)); + // Divide by 2 as 4 elements per Vector4 and 8 per Vector256 + ref Vector256 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); + ref Vector256 destinationLast = ref Unsafe.Add(ref destinationBase, (IntPtr)((uint)destination.Length / 2u)); + + ref Vector256 backgroundBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(background)); + ref Vector256 sourceBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(source)); + ref float amountBase = ref MemoryMarshal.GetReference(amount); + + Vector256 vOne = Vector256.Create(1F); + + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) + { + // We need to create a Vector256 containing the current and next amount values + // taking up each half of the Vector256 and then clamp them. + Vector256 opacity = Vector256.Create( + Vector128.Create(amountBase), + Vector128.Create(Unsafe.Add(ref amountBase, 1))); + opacity = Avx.Min(Avx.Max(Vector256.Zero, opacity), vOne); + + destinationBase = PorterDuffFunctions.HardLightDestIn(backgroundBase, sourceBase, opacity); + destinationBase = ref Unsafe.Add(ref destinationBase, 1); + backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); + sourceBase = ref Unsafe.Add(ref sourceBase, 1); + amountBase = ref Unsafe.Add(ref amountBase, 2); + } + + if (Numerics.Modulo2(destination.Length) != 0) + { + // Vector4 fits neatly in pairs. Any overlap has to be equal to 1. + int i = destination.Length - 1; + destination[i] = PorterDuffFunctions.HardLightDestIn(background[i], source[i], Numerics.Clamp(amount[i], 0, 1F)); + } + } + else + { + for (int i = 0; i < destination.Length; i++) + { + destination[i] = PorterDuffFunctions.HardLightDestIn(background[i], source[i], Numerics.Clamp(amount[i], 0, 1F)); + } } } } @@ -3121,18 +8552,85 @@ internal static class DefaultPixelBlenders protected override void BlendFunction(Span destination, ReadOnlySpan background, ReadOnlySpan source, float amount) { amount = Numerics.Clamp(amount, 0, 1); - for (int i = 0; i < destination.Length; i++) + + if (Avx2.IsSupported && destination.Length >= 2) { - destination[i] = PorterDuffFunctions.NormalDestOut(background[i], source[i], amount); + // Divide by 2 as 4 elements per Vector4 and 8 per Vector256 + ref Vector256 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); + ref Vector256 destinationLast = ref Unsafe.Add(ref destinationBase, (IntPtr)((uint)destination.Length / 2u)); + + ref Vector256 backgroundBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(background)); + ref Vector256 sourceBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(source)); + Vector256 opacity = Vector256.Create(amount); + + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) + { + destinationBase = PorterDuffFunctions.NormalDestOut(backgroundBase, sourceBase, opacity); + destinationBase = ref Unsafe.Add(ref destinationBase, 1); + backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); + sourceBase = ref Unsafe.Add(ref sourceBase, 1); + } + + if (Numerics.Modulo2(destination.Length) != 0) + { + // Vector4 fits neatly in pairs. Any overlap has to be equal to 1. + int i = destination.Length - 1; + destination[i] = PorterDuffFunctions.NormalDestOut(background[i], source[i], amount); + } + } + else + { + for (int i = 0; i < destination.Length; i++) + { + destination[i] = PorterDuffFunctions.NormalDestOut(background[i], source[i], amount); + } } } /// protected override void BlendFunction(Span destination, ReadOnlySpan background, ReadOnlySpan source, ReadOnlySpan amount) { - for (int i = 0; i < destination.Length; i++) + if (Avx2.IsSupported && destination.Length >= 2) + { + // Divide by 2 as 4 elements per Vector4 and 8 per Vector256 + ref Vector256 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); + ref Vector256 destinationLast = ref Unsafe.Add(ref destinationBase, (IntPtr)((uint)destination.Length / 2u)); + + ref Vector256 backgroundBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(background)); + ref Vector256 sourceBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(source)); + ref float amountBase = ref MemoryMarshal.GetReference(amount); + + Vector256 vOne = Vector256.Create(1F); + + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) + { + // We need to create a Vector256 containing the current and next amount values + // taking up each half of the Vector256 and then clamp them. + Vector256 opacity = Vector256.Create( + Vector128.Create(amountBase), + Vector128.Create(Unsafe.Add(ref amountBase, 1))); + opacity = Avx.Min(Avx.Max(Vector256.Zero, opacity), vOne); + + destinationBase = PorterDuffFunctions.NormalDestOut(backgroundBase, sourceBase, opacity); + destinationBase = ref Unsafe.Add(ref destinationBase, 1); + backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); + sourceBase = ref Unsafe.Add(ref sourceBase, 1); + amountBase = ref Unsafe.Add(ref amountBase, 2); + } + + if (Numerics.Modulo2(destination.Length) != 0) + { + // Vector4 fits neatly in pairs. Any overlap has to be equal to 1. + int i = destination.Length - 1; + destination[i] = PorterDuffFunctions.NormalDestOut(background[i], source[i], Numerics.Clamp(amount[i], 0, 1F)); + } + } + else { - destination[i] = PorterDuffFunctions.NormalDestOut(background[i], source[i], Numerics.Clamp(amount[i], 0, 1)); + for (int i = 0; i < destination.Length; i++) + { + destination[i] = PorterDuffFunctions.NormalDestOut(background[i], source[i], Numerics.Clamp(amount[i], 0, 1F)); + } } } } @@ -3159,18 +8657,85 @@ internal static class DefaultPixelBlenders protected override void BlendFunction(Span destination, ReadOnlySpan background, ReadOnlySpan source, float amount) { amount = Numerics.Clamp(amount, 0, 1); - for (int i = 0; i < destination.Length; i++) + + if (Avx2.IsSupported && destination.Length >= 2) + { + // Divide by 2 as 4 elements per Vector4 and 8 per Vector256 + ref Vector256 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); + ref Vector256 destinationLast = ref Unsafe.Add(ref destinationBase, (IntPtr)((uint)destination.Length / 2u)); + + ref Vector256 backgroundBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(background)); + ref Vector256 sourceBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(source)); + Vector256 opacity = Vector256.Create(amount); + + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) + { + destinationBase = PorterDuffFunctions.MultiplyDestOut(backgroundBase, sourceBase, opacity); + destinationBase = ref Unsafe.Add(ref destinationBase, 1); + backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); + sourceBase = ref Unsafe.Add(ref sourceBase, 1); + } + + if (Numerics.Modulo2(destination.Length) != 0) + { + // Vector4 fits neatly in pairs. Any overlap has to be equal to 1. + int i = destination.Length - 1; + destination[i] = PorterDuffFunctions.MultiplyDestOut(background[i], source[i], amount); + } + } + else { - destination[i] = PorterDuffFunctions.MultiplyDestOut(background[i], source[i], amount); + for (int i = 0; i < destination.Length; i++) + { + destination[i] = PorterDuffFunctions.MultiplyDestOut(background[i], source[i], amount); + } } } /// protected override void BlendFunction(Span destination, ReadOnlySpan background, ReadOnlySpan source, ReadOnlySpan amount) { - for (int i = 0; i < destination.Length; i++) + if (Avx2.IsSupported && destination.Length >= 2) + { + // Divide by 2 as 4 elements per Vector4 and 8 per Vector256 + ref Vector256 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); + ref Vector256 destinationLast = ref Unsafe.Add(ref destinationBase, (IntPtr)((uint)destination.Length / 2u)); + + ref Vector256 backgroundBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(background)); + ref Vector256 sourceBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(source)); + ref float amountBase = ref MemoryMarshal.GetReference(amount); + + Vector256 vOne = Vector256.Create(1F); + + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) + { + // We need to create a Vector256 containing the current and next amount values + // taking up each half of the Vector256 and then clamp them. + Vector256 opacity = Vector256.Create( + Vector128.Create(amountBase), + Vector128.Create(Unsafe.Add(ref amountBase, 1))); + opacity = Avx.Min(Avx.Max(Vector256.Zero, opacity), vOne); + + destinationBase = PorterDuffFunctions.MultiplyDestOut(backgroundBase, sourceBase, opacity); + destinationBase = ref Unsafe.Add(ref destinationBase, 1); + backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); + sourceBase = ref Unsafe.Add(ref sourceBase, 1); + amountBase = ref Unsafe.Add(ref amountBase, 2); + } + + if (Numerics.Modulo2(destination.Length) != 0) + { + // Vector4 fits neatly in pairs. Any overlap has to be equal to 1. + int i = destination.Length - 1; + destination[i] = PorterDuffFunctions.MultiplyDestOut(background[i], source[i], Numerics.Clamp(amount[i], 0, 1F)); + } + } + else { - destination[i] = PorterDuffFunctions.MultiplyDestOut(background[i], source[i], Numerics.Clamp(amount[i], 0, 1)); + for (int i = 0; i < destination.Length; i++) + { + destination[i] = PorterDuffFunctions.MultiplyDestOut(background[i], source[i], Numerics.Clamp(amount[i], 0, 1F)); + } } } } @@ -3197,18 +8762,85 @@ internal static class DefaultPixelBlenders protected override void BlendFunction(Span destination, ReadOnlySpan background, ReadOnlySpan source, float amount) { amount = Numerics.Clamp(amount, 0, 1); - for (int i = 0; i < destination.Length; i++) + + if (Avx2.IsSupported && destination.Length >= 2) + { + // Divide by 2 as 4 elements per Vector4 and 8 per Vector256 + ref Vector256 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); + ref Vector256 destinationLast = ref Unsafe.Add(ref destinationBase, (IntPtr)((uint)destination.Length / 2u)); + + ref Vector256 backgroundBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(background)); + ref Vector256 sourceBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(source)); + Vector256 opacity = Vector256.Create(amount); + + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) + { + destinationBase = PorterDuffFunctions.AddDestOut(backgroundBase, sourceBase, opacity); + destinationBase = ref Unsafe.Add(ref destinationBase, 1); + backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); + sourceBase = ref Unsafe.Add(ref sourceBase, 1); + } + + if (Numerics.Modulo2(destination.Length) != 0) + { + // Vector4 fits neatly in pairs. Any overlap has to be equal to 1. + int i = destination.Length - 1; + destination[i] = PorterDuffFunctions.AddDestOut(background[i], source[i], amount); + } + } + else { - destination[i] = PorterDuffFunctions.AddDestOut(background[i], source[i], amount); + for (int i = 0; i < destination.Length; i++) + { + destination[i] = PorterDuffFunctions.AddDestOut(background[i], source[i], amount); + } } } /// protected override void BlendFunction(Span destination, ReadOnlySpan background, ReadOnlySpan source, ReadOnlySpan amount) { - for (int i = 0; i < destination.Length; i++) + if (Avx2.IsSupported && destination.Length >= 2) + { + // Divide by 2 as 4 elements per Vector4 and 8 per Vector256 + ref Vector256 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); + ref Vector256 destinationLast = ref Unsafe.Add(ref destinationBase, (IntPtr)((uint)destination.Length / 2u)); + + ref Vector256 backgroundBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(background)); + ref Vector256 sourceBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(source)); + ref float amountBase = ref MemoryMarshal.GetReference(amount); + + Vector256 vOne = Vector256.Create(1F); + + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) + { + // We need to create a Vector256 containing the current and next amount values + // taking up each half of the Vector256 and then clamp them. + Vector256 opacity = Vector256.Create( + Vector128.Create(amountBase), + Vector128.Create(Unsafe.Add(ref amountBase, 1))); + opacity = Avx.Min(Avx.Max(Vector256.Zero, opacity), vOne); + + destinationBase = PorterDuffFunctions.AddDestOut(backgroundBase, sourceBase, opacity); + destinationBase = ref Unsafe.Add(ref destinationBase, 1); + backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); + sourceBase = ref Unsafe.Add(ref sourceBase, 1); + amountBase = ref Unsafe.Add(ref amountBase, 2); + } + + if (Numerics.Modulo2(destination.Length) != 0) + { + // Vector4 fits neatly in pairs. Any overlap has to be equal to 1. + int i = destination.Length - 1; + destination[i] = PorterDuffFunctions.AddDestOut(background[i], source[i], Numerics.Clamp(amount[i], 0, 1F)); + } + } + else { - destination[i] = PorterDuffFunctions.AddDestOut(background[i], source[i], Numerics.Clamp(amount[i], 0, 1)); + for (int i = 0; i < destination.Length; i++) + { + destination[i] = PorterDuffFunctions.AddDestOut(background[i], source[i], Numerics.Clamp(amount[i], 0, 1F)); + } } } } @@ -3235,18 +8867,85 @@ internal static class DefaultPixelBlenders protected override void BlendFunction(Span destination, ReadOnlySpan background, ReadOnlySpan source, float amount) { amount = Numerics.Clamp(amount, 0, 1); - for (int i = 0; i < destination.Length; i++) + + if (Avx2.IsSupported && destination.Length >= 2) + { + // Divide by 2 as 4 elements per Vector4 and 8 per Vector256 + ref Vector256 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); + ref Vector256 destinationLast = ref Unsafe.Add(ref destinationBase, (IntPtr)((uint)destination.Length / 2u)); + + ref Vector256 backgroundBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(background)); + ref Vector256 sourceBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(source)); + Vector256 opacity = Vector256.Create(amount); + + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) + { + destinationBase = PorterDuffFunctions.SubtractDestOut(backgroundBase, sourceBase, opacity); + destinationBase = ref Unsafe.Add(ref destinationBase, 1); + backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); + sourceBase = ref Unsafe.Add(ref sourceBase, 1); + } + + if (Numerics.Modulo2(destination.Length) != 0) + { + // Vector4 fits neatly in pairs. Any overlap has to be equal to 1. + int i = destination.Length - 1; + destination[i] = PorterDuffFunctions.SubtractDestOut(background[i], source[i], amount); + } + } + else { - destination[i] = PorterDuffFunctions.SubtractDestOut(background[i], source[i], amount); + for (int i = 0; i < destination.Length; i++) + { + destination[i] = PorterDuffFunctions.SubtractDestOut(background[i], source[i], amount); + } } } /// protected override void BlendFunction(Span destination, ReadOnlySpan background, ReadOnlySpan source, ReadOnlySpan amount) { - for (int i = 0; i < destination.Length; i++) + if (Avx2.IsSupported && destination.Length >= 2) + { + // Divide by 2 as 4 elements per Vector4 and 8 per Vector256 + ref Vector256 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); + ref Vector256 destinationLast = ref Unsafe.Add(ref destinationBase, (IntPtr)((uint)destination.Length / 2u)); + + ref Vector256 backgroundBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(background)); + ref Vector256 sourceBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(source)); + ref float amountBase = ref MemoryMarshal.GetReference(amount); + + Vector256 vOne = Vector256.Create(1F); + + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) + { + // We need to create a Vector256 containing the current and next amount values + // taking up each half of the Vector256 and then clamp them. + Vector256 opacity = Vector256.Create( + Vector128.Create(amountBase), + Vector128.Create(Unsafe.Add(ref amountBase, 1))); + opacity = Avx.Min(Avx.Max(Vector256.Zero, opacity), vOne); + + destinationBase = PorterDuffFunctions.SubtractDestOut(backgroundBase, sourceBase, opacity); + destinationBase = ref Unsafe.Add(ref destinationBase, 1); + backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); + sourceBase = ref Unsafe.Add(ref sourceBase, 1); + amountBase = ref Unsafe.Add(ref amountBase, 2); + } + + if (Numerics.Modulo2(destination.Length) != 0) + { + // Vector4 fits neatly in pairs. Any overlap has to be equal to 1. + int i = destination.Length - 1; + destination[i] = PorterDuffFunctions.SubtractDestOut(background[i], source[i], Numerics.Clamp(amount[i], 0, 1F)); + } + } + else { - destination[i] = PorterDuffFunctions.SubtractDestOut(background[i], source[i], Numerics.Clamp(amount[i], 0, 1)); + for (int i = 0; i < destination.Length; i++) + { + destination[i] = PorterDuffFunctions.SubtractDestOut(background[i], source[i], Numerics.Clamp(amount[i], 0, 1F)); + } } } } @@ -3273,18 +8972,85 @@ internal static class DefaultPixelBlenders protected override void BlendFunction(Span destination, ReadOnlySpan background, ReadOnlySpan source, float amount) { amount = Numerics.Clamp(amount, 0, 1); - for (int i = 0; i < destination.Length; i++) + + if (Avx2.IsSupported && destination.Length >= 2) + { + // Divide by 2 as 4 elements per Vector4 and 8 per Vector256 + ref Vector256 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); + ref Vector256 destinationLast = ref Unsafe.Add(ref destinationBase, (IntPtr)((uint)destination.Length / 2u)); + + ref Vector256 backgroundBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(background)); + ref Vector256 sourceBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(source)); + Vector256 opacity = Vector256.Create(amount); + + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) + { + destinationBase = PorterDuffFunctions.ScreenDestOut(backgroundBase, sourceBase, opacity); + destinationBase = ref Unsafe.Add(ref destinationBase, 1); + backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); + sourceBase = ref Unsafe.Add(ref sourceBase, 1); + } + + if (Numerics.Modulo2(destination.Length) != 0) + { + // Vector4 fits neatly in pairs. Any overlap has to be equal to 1. + int i = destination.Length - 1; + destination[i] = PorterDuffFunctions.ScreenDestOut(background[i], source[i], amount); + } + } + else { - destination[i] = PorterDuffFunctions.ScreenDestOut(background[i], source[i], amount); + for (int i = 0; i < destination.Length; i++) + { + destination[i] = PorterDuffFunctions.ScreenDestOut(background[i], source[i], amount); + } } } /// protected override void BlendFunction(Span destination, ReadOnlySpan background, ReadOnlySpan source, ReadOnlySpan amount) { - for (int i = 0; i < destination.Length; i++) + if (Avx2.IsSupported && destination.Length >= 2) + { + // Divide by 2 as 4 elements per Vector4 and 8 per Vector256 + ref Vector256 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); + ref Vector256 destinationLast = ref Unsafe.Add(ref destinationBase, (IntPtr)((uint)destination.Length / 2u)); + + ref Vector256 backgroundBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(background)); + ref Vector256 sourceBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(source)); + ref float amountBase = ref MemoryMarshal.GetReference(amount); + + Vector256 vOne = Vector256.Create(1F); + + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) + { + // We need to create a Vector256 containing the current and next amount values + // taking up each half of the Vector256 and then clamp them. + Vector256 opacity = Vector256.Create( + Vector128.Create(amountBase), + Vector128.Create(Unsafe.Add(ref amountBase, 1))); + opacity = Avx.Min(Avx.Max(Vector256.Zero, opacity), vOne); + + destinationBase = PorterDuffFunctions.ScreenDestOut(backgroundBase, sourceBase, opacity); + destinationBase = ref Unsafe.Add(ref destinationBase, 1); + backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); + sourceBase = ref Unsafe.Add(ref sourceBase, 1); + amountBase = ref Unsafe.Add(ref amountBase, 2); + } + + if (Numerics.Modulo2(destination.Length) != 0) + { + // Vector4 fits neatly in pairs. Any overlap has to be equal to 1. + int i = destination.Length - 1; + destination[i] = PorterDuffFunctions.ScreenDestOut(background[i], source[i], Numerics.Clamp(amount[i], 0, 1F)); + } + } + else { - destination[i] = PorterDuffFunctions.ScreenDestOut(background[i], source[i], Numerics.Clamp(amount[i], 0, 1)); + for (int i = 0; i < destination.Length; i++) + { + destination[i] = PorterDuffFunctions.ScreenDestOut(background[i], source[i], Numerics.Clamp(amount[i], 0, 1F)); + } } } } @@ -3311,18 +9077,85 @@ internal static class DefaultPixelBlenders protected override void BlendFunction(Span destination, ReadOnlySpan background, ReadOnlySpan source, float amount) { amount = Numerics.Clamp(amount, 0, 1); - for (int i = 0; i < destination.Length; i++) + + if (Avx2.IsSupported && destination.Length >= 2) + { + // Divide by 2 as 4 elements per Vector4 and 8 per Vector256 + ref Vector256 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); + ref Vector256 destinationLast = ref Unsafe.Add(ref destinationBase, (IntPtr)((uint)destination.Length / 2u)); + + ref Vector256 backgroundBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(background)); + ref Vector256 sourceBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(source)); + Vector256 opacity = Vector256.Create(amount); + + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) + { + destinationBase = PorterDuffFunctions.DarkenDestOut(backgroundBase, sourceBase, opacity); + destinationBase = ref Unsafe.Add(ref destinationBase, 1); + backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); + sourceBase = ref Unsafe.Add(ref sourceBase, 1); + } + + if (Numerics.Modulo2(destination.Length) != 0) + { + // Vector4 fits neatly in pairs. Any overlap has to be equal to 1. + int i = destination.Length - 1; + destination[i] = PorterDuffFunctions.DarkenDestOut(background[i], source[i], amount); + } + } + else { - destination[i] = PorterDuffFunctions.DarkenDestOut(background[i], source[i], amount); + for (int i = 0; i < destination.Length; i++) + { + destination[i] = PorterDuffFunctions.DarkenDestOut(background[i], source[i], amount); + } } } /// protected override void BlendFunction(Span destination, ReadOnlySpan background, ReadOnlySpan source, ReadOnlySpan amount) { - for (int i = 0; i < destination.Length; i++) + if (Avx2.IsSupported && destination.Length >= 2) + { + // Divide by 2 as 4 elements per Vector4 and 8 per Vector256 + ref Vector256 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); + ref Vector256 destinationLast = ref Unsafe.Add(ref destinationBase, (IntPtr)((uint)destination.Length / 2u)); + + ref Vector256 backgroundBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(background)); + ref Vector256 sourceBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(source)); + ref float amountBase = ref MemoryMarshal.GetReference(amount); + + Vector256 vOne = Vector256.Create(1F); + + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) + { + // We need to create a Vector256 containing the current and next amount values + // taking up each half of the Vector256 and then clamp them. + Vector256 opacity = Vector256.Create( + Vector128.Create(amountBase), + Vector128.Create(Unsafe.Add(ref amountBase, 1))); + opacity = Avx.Min(Avx.Max(Vector256.Zero, opacity), vOne); + + destinationBase = PorterDuffFunctions.DarkenDestOut(backgroundBase, sourceBase, opacity); + destinationBase = ref Unsafe.Add(ref destinationBase, 1); + backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); + sourceBase = ref Unsafe.Add(ref sourceBase, 1); + amountBase = ref Unsafe.Add(ref amountBase, 2); + } + + if (Numerics.Modulo2(destination.Length) != 0) + { + // Vector4 fits neatly in pairs. Any overlap has to be equal to 1. + int i = destination.Length - 1; + destination[i] = PorterDuffFunctions.DarkenDestOut(background[i], source[i], Numerics.Clamp(amount[i], 0, 1F)); + } + } + else { - destination[i] = PorterDuffFunctions.DarkenDestOut(background[i], source[i], Numerics.Clamp(amount[i], 0, 1)); + for (int i = 0; i < destination.Length; i++) + { + destination[i] = PorterDuffFunctions.DarkenDestOut(background[i], source[i], Numerics.Clamp(amount[i], 0, 1F)); + } } } } @@ -3349,18 +9182,85 @@ internal static class DefaultPixelBlenders protected override void BlendFunction(Span destination, ReadOnlySpan background, ReadOnlySpan source, float amount) { amount = Numerics.Clamp(amount, 0, 1); - for (int i = 0; i < destination.Length; i++) + + if (Avx2.IsSupported && destination.Length >= 2) + { + // Divide by 2 as 4 elements per Vector4 and 8 per Vector256 + ref Vector256 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); + ref Vector256 destinationLast = ref Unsafe.Add(ref destinationBase, (IntPtr)((uint)destination.Length / 2u)); + + ref Vector256 backgroundBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(background)); + ref Vector256 sourceBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(source)); + Vector256 opacity = Vector256.Create(amount); + + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) + { + destinationBase = PorterDuffFunctions.LightenDestOut(backgroundBase, sourceBase, opacity); + destinationBase = ref Unsafe.Add(ref destinationBase, 1); + backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); + sourceBase = ref Unsafe.Add(ref sourceBase, 1); + } + + if (Numerics.Modulo2(destination.Length) != 0) + { + // Vector4 fits neatly in pairs. Any overlap has to be equal to 1. + int i = destination.Length - 1; + destination[i] = PorterDuffFunctions.LightenDestOut(background[i], source[i], amount); + } + } + else { - destination[i] = PorterDuffFunctions.LightenDestOut(background[i], source[i], amount); + for (int i = 0; i < destination.Length; i++) + { + destination[i] = PorterDuffFunctions.LightenDestOut(background[i], source[i], amount); + } } } /// protected override void BlendFunction(Span destination, ReadOnlySpan background, ReadOnlySpan source, ReadOnlySpan amount) { - for (int i = 0; i < destination.Length; i++) + if (Avx2.IsSupported && destination.Length >= 2) + { + // Divide by 2 as 4 elements per Vector4 and 8 per Vector256 + ref Vector256 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); + ref Vector256 destinationLast = ref Unsafe.Add(ref destinationBase, (IntPtr)((uint)destination.Length / 2u)); + + ref Vector256 backgroundBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(background)); + ref Vector256 sourceBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(source)); + ref float amountBase = ref MemoryMarshal.GetReference(amount); + + Vector256 vOne = Vector256.Create(1F); + + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) + { + // We need to create a Vector256 containing the current and next amount values + // taking up each half of the Vector256 and then clamp them. + Vector256 opacity = Vector256.Create( + Vector128.Create(amountBase), + Vector128.Create(Unsafe.Add(ref amountBase, 1))); + opacity = Avx.Min(Avx.Max(Vector256.Zero, opacity), vOne); + + destinationBase = PorterDuffFunctions.LightenDestOut(backgroundBase, sourceBase, opacity); + destinationBase = ref Unsafe.Add(ref destinationBase, 1); + backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); + sourceBase = ref Unsafe.Add(ref sourceBase, 1); + amountBase = ref Unsafe.Add(ref amountBase, 2); + } + + if (Numerics.Modulo2(destination.Length) != 0) + { + // Vector4 fits neatly in pairs. Any overlap has to be equal to 1. + int i = destination.Length - 1; + destination[i] = PorterDuffFunctions.LightenDestOut(background[i], source[i], Numerics.Clamp(amount[i], 0, 1F)); + } + } + else { - destination[i] = PorterDuffFunctions.LightenDestOut(background[i], source[i], Numerics.Clamp(amount[i], 0, 1)); + for (int i = 0; i < destination.Length; i++) + { + destination[i] = PorterDuffFunctions.LightenDestOut(background[i], source[i], Numerics.Clamp(amount[i], 0, 1F)); + } } } } @@ -3387,18 +9287,85 @@ internal static class DefaultPixelBlenders protected override void BlendFunction(Span destination, ReadOnlySpan background, ReadOnlySpan source, float amount) { amount = Numerics.Clamp(amount, 0, 1); - for (int i = 0; i < destination.Length; i++) + + if (Avx2.IsSupported && destination.Length >= 2) + { + // Divide by 2 as 4 elements per Vector4 and 8 per Vector256 + ref Vector256 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); + ref Vector256 destinationLast = ref Unsafe.Add(ref destinationBase, (IntPtr)((uint)destination.Length / 2u)); + + ref Vector256 backgroundBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(background)); + ref Vector256 sourceBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(source)); + Vector256 opacity = Vector256.Create(amount); + + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) + { + destinationBase = PorterDuffFunctions.OverlayDestOut(backgroundBase, sourceBase, opacity); + destinationBase = ref Unsafe.Add(ref destinationBase, 1); + backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); + sourceBase = ref Unsafe.Add(ref sourceBase, 1); + } + + if (Numerics.Modulo2(destination.Length) != 0) + { + // Vector4 fits neatly in pairs. Any overlap has to be equal to 1. + int i = destination.Length - 1; + destination[i] = PorterDuffFunctions.OverlayDestOut(background[i], source[i], amount); + } + } + else { - destination[i] = PorterDuffFunctions.OverlayDestOut(background[i], source[i], amount); + for (int i = 0; i < destination.Length; i++) + { + destination[i] = PorterDuffFunctions.OverlayDestOut(background[i], source[i], amount); + } } } /// protected override void BlendFunction(Span destination, ReadOnlySpan background, ReadOnlySpan source, ReadOnlySpan amount) { - for (int i = 0; i < destination.Length; i++) + if (Avx2.IsSupported && destination.Length >= 2) { - destination[i] = PorterDuffFunctions.OverlayDestOut(background[i], source[i], Numerics.Clamp(amount[i], 0, 1)); + // Divide by 2 as 4 elements per Vector4 and 8 per Vector256 + ref Vector256 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); + ref Vector256 destinationLast = ref Unsafe.Add(ref destinationBase, (IntPtr)((uint)destination.Length / 2u)); + + ref Vector256 backgroundBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(background)); + ref Vector256 sourceBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(source)); + ref float amountBase = ref MemoryMarshal.GetReference(amount); + + Vector256 vOne = Vector256.Create(1F); + + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) + { + // We need to create a Vector256 containing the current and next amount values + // taking up each half of the Vector256 and then clamp them. + Vector256 opacity = Vector256.Create( + Vector128.Create(amountBase), + Vector128.Create(Unsafe.Add(ref amountBase, 1))); + opacity = Avx.Min(Avx.Max(Vector256.Zero, opacity), vOne); + + destinationBase = PorterDuffFunctions.OverlayDestOut(backgroundBase, sourceBase, opacity); + destinationBase = ref Unsafe.Add(ref destinationBase, 1); + backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); + sourceBase = ref Unsafe.Add(ref sourceBase, 1); + amountBase = ref Unsafe.Add(ref amountBase, 2); + } + + if (Numerics.Modulo2(destination.Length) != 0) + { + // Vector4 fits neatly in pairs. Any overlap has to be equal to 1. + int i = destination.Length - 1; + destination[i] = PorterDuffFunctions.OverlayDestOut(background[i], source[i], Numerics.Clamp(amount[i], 0, 1F)); + } + } + else + { + for (int i = 0; i < destination.Length; i++) + { + destination[i] = PorterDuffFunctions.OverlayDestOut(background[i], source[i], Numerics.Clamp(amount[i], 0, 1F)); + } } } } @@ -3425,18 +9392,85 @@ internal static class DefaultPixelBlenders protected override void BlendFunction(Span destination, ReadOnlySpan background, ReadOnlySpan source, float amount) { amount = Numerics.Clamp(amount, 0, 1); - for (int i = 0; i < destination.Length; i++) + + if (Avx2.IsSupported && destination.Length >= 2) { - destination[i] = PorterDuffFunctions.HardLightDestOut(background[i], source[i], amount); + // Divide by 2 as 4 elements per Vector4 and 8 per Vector256 + ref Vector256 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); + ref Vector256 destinationLast = ref Unsafe.Add(ref destinationBase, (IntPtr)((uint)destination.Length / 2u)); + + ref Vector256 backgroundBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(background)); + ref Vector256 sourceBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(source)); + Vector256 opacity = Vector256.Create(amount); + + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) + { + destinationBase = PorterDuffFunctions.HardLightDestOut(backgroundBase, sourceBase, opacity); + destinationBase = ref Unsafe.Add(ref destinationBase, 1); + backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); + sourceBase = ref Unsafe.Add(ref sourceBase, 1); + } + + if (Numerics.Modulo2(destination.Length) != 0) + { + // Vector4 fits neatly in pairs. Any overlap has to be equal to 1. + int i = destination.Length - 1; + destination[i] = PorterDuffFunctions.HardLightDestOut(background[i], source[i], amount); + } + } + else + { + for (int i = 0; i < destination.Length; i++) + { + destination[i] = PorterDuffFunctions.HardLightDestOut(background[i], source[i], amount); + } } } /// protected override void BlendFunction(Span destination, ReadOnlySpan background, ReadOnlySpan source, ReadOnlySpan amount) { - for (int i = 0; i < destination.Length; i++) + if (Avx2.IsSupported && destination.Length >= 2) + { + // Divide by 2 as 4 elements per Vector4 and 8 per Vector256 + ref Vector256 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); + ref Vector256 destinationLast = ref Unsafe.Add(ref destinationBase, (IntPtr)((uint)destination.Length / 2u)); + + ref Vector256 backgroundBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(background)); + ref Vector256 sourceBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(source)); + ref float amountBase = ref MemoryMarshal.GetReference(amount); + + Vector256 vOne = Vector256.Create(1F); + + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) + { + // We need to create a Vector256 containing the current and next amount values + // taking up each half of the Vector256 and then clamp them. + Vector256 opacity = Vector256.Create( + Vector128.Create(amountBase), + Vector128.Create(Unsafe.Add(ref amountBase, 1))); + opacity = Avx.Min(Avx.Max(Vector256.Zero, opacity), vOne); + + destinationBase = PorterDuffFunctions.HardLightDestOut(backgroundBase, sourceBase, opacity); + destinationBase = ref Unsafe.Add(ref destinationBase, 1); + backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); + sourceBase = ref Unsafe.Add(ref sourceBase, 1); + amountBase = ref Unsafe.Add(ref amountBase, 2); + } + + if (Numerics.Modulo2(destination.Length) != 0) + { + // Vector4 fits neatly in pairs. Any overlap has to be equal to 1. + int i = destination.Length - 1; + destination[i] = PorterDuffFunctions.HardLightDestOut(background[i], source[i], Numerics.Clamp(amount[i], 0, 1F)); + } + } + else { - destination[i] = PorterDuffFunctions.HardLightDestOut(background[i], source[i], Numerics.Clamp(amount[i], 0, 1)); + for (int i = 0; i < destination.Length; i++) + { + destination[i] = PorterDuffFunctions.HardLightDestOut(background[i], source[i], Numerics.Clamp(amount[i], 0, 1F)); + } } } } @@ -3463,18 +9497,85 @@ internal static class DefaultPixelBlenders protected override void BlendFunction(Span destination, ReadOnlySpan background, ReadOnlySpan source, float amount) { amount = Numerics.Clamp(amount, 0, 1); - for (int i = 0; i < destination.Length; i++) + + if (Avx2.IsSupported && destination.Length >= 2) + { + // Divide by 2 as 4 elements per Vector4 and 8 per Vector256 + ref Vector256 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); + ref Vector256 destinationLast = ref Unsafe.Add(ref destinationBase, (IntPtr)((uint)destination.Length / 2u)); + + ref Vector256 backgroundBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(background)); + ref Vector256 sourceBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(source)); + Vector256 opacity = Vector256.Create(amount); + + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) + { + destinationBase = PorterDuffFunctions.NormalClear(backgroundBase, sourceBase, opacity); + destinationBase = ref Unsafe.Add(ref destinationBase, 1); + backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); + sourceBase = ref Unsafe.Add(ref sourceBase, 1); + } + + if (Numerics.Modulo2(destination.Length) != 0) + { + // Vector4 fits neatly in pairs. Any overlap has to be equal to 1. + int i = destination.Length - 1; + destination[i] = PorterDuffFunctions.NormalClear(background[i], source[i], amount); + } + } + else { - destination[i] = PorterDuffFunctions.NormalClear(background[i], source[i], amount); + for (int i = 0; i < destination.Length; i++) + { + destination[i] = PorterDuffFunctions.NormalClear(background[i], source[i], amount); + } } } /// protected override void BlendFunction(Span destination, ReadOnlySpan background, ReadOnlySpan source, ReadOnlySpan amount) { - for (int i = 0; i < destination.Length; i++) + if (Avx2.IsSupported && destination.Length >= 2) + { + // Divide by 2 as 4 elements per Vector4 and 8 per Vector256 + ref Vector256 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); + ref Vector256 destinationLast = ref Unsafe.Add(ref destinationBase, (IntPtr)((uint)destination.Length / 2u)); + + ref Vector256 backgroundBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(background)); + ref Vector256 sourceBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(source)); + ref float amountBase = ref MemoryMarshal.GetReference(amount); + + Vector256 vOne = Vector256.Create(1F); + + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) + { + // We need to create a Vector256 containing the current and next amount values + // taking up each half of the Vector256 and then clamp them. + Vector256 opacity = Vector256.Create( + Vector128.Create(amountBase), + Vector128.Create(Unsafe.Add(ref amountBase, 1))); + opacity = Avx.Min(Avx.Max(Vector256.Zero, opacity), vOne); + + destinationBase = PorterDuffFunctions.NormalClear(backgroundBase, sourceBase, opacity); + destinationBase = ref Unsafe.Add(ref destinationBase, 1); + backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); + sourceBase = ref Unsafe.Add(ref sourceBase, 1); + amountBase = ref Unsafe.Add(ref amountBase, 2); + } + + if (Numerics.Modulo2(destination.Length) != 0) + { + // Vector4 fits neatly in pairs. Any overlap has to be equal to 1. + int i = destination.Length - 1; + destination[i] = PorterDuffFunctions.NormalClear(background[i], source[i], Numerics.Clamp(amount[i], 0, 1F)); + } + } + else { - destination[i] = PorterDuffFunctions.NormalClear(background[i], source[i], Numerics.Clamp(amount[i], 0, 1)); + for (int i = 0; i < destination.Length; i++) + { + destination[i] = PorterDuffFunctions.NormalClear(background[i], source[i], Numerics.Clamp(amount[i], 0, 1F)); + } } } } @@ -3501,18 +9602,85 @@ internal static class DefaultPixelBlenders protected override void BlendFunction(Span destination, ReadOnlySpan background, ReadOnlySpan source, float amount) { amount = Numerics.Clamp(amount, 0, 1); - for (int i = 0; i < destination.Length; i++) + + if (Avx2.IsSupported && destination.Length >= 2) + { + // Divide by 2 as 4 elements per Vector4 and 8 per Vector256 + ref Vector256 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); + ref Vector256 destinationLast = ref Unsafe.Add(ref destinationBase, (IntPtr)((uint)destination.Length / 2u)); + + ref Vector256 backgroundBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(background)); + ref Vector256 sourceBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(source)); + Vector256 opacity = Vector256.Create(amount); + + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) + { + destinationBase = PorterDuffFunctions.MultiplyClear(backgroundBase, sourceBase, opacity); + destinationBase = ref Unsafe.Add(ref destinationBase, 1); + backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); + sourceBase = ref Unsafe.Add(ref sourceBase, 1); + } + + if (Numerics.Modulo2(destination.Length) != 0) + { + // Vector4 fits neatly in pairs. Any overlap has to be equal to 1. + int i = destination.Length - 1; + destination[i] = PorterDuffFunctions.MultiplyClear(background[i], source[i], amount); + } + } + else { - destination[i] = PorterDuffFunctions.MultiplyClear(background[i], source[i], amount); + for (int i = 0; i < destination.Length; i++) + { + destination[i] = PorterDuffFunctions.MultiplyClear(background[i], source[i], amount); + } } } /// protected override void BlendFunction(Span destination, ReadOnlySpan background, ReadOnlySpan source, ReadOnlySpan amount) { - for (int i = 0; i < destination.Length; i++) + if (Avx2.IsSupported && destination.Length >= 2) + { + // Divide by 2 as 4 elements per Vector4 and 8 per Vector256 + ref Vector256 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); + ref Vector256 destinationLast = ref Unsafe.Add(ref destinationBase, (IntPtr)((uint)destination.Length / 2u)); + + ref Vector256 backgroundBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(background)); + ref Vector256 sourceBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(source)); + ref float amountBase = ref MemoryMarshal.GetReference(amount); + + Vector256 vOne = Vector256.Create(1F); + + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) + { + // We need to create a Vector256 containing the current and next amount values + // taking up each half of the Vector256 and then clamp them. + Vector256 opacity = Vector256.Create( + Vector128.Create(amountBase), + Vector128.Create(Unsafe.Add(ref amountBase, 1))); + opacity = Avx.Min(Avx.Max(Vector256.Zero, opacity), vOne); + + destinationBase = PorterDuffFunctions.MultiplyClear(backgroundBase, sourceBase, opacity); + destinationBase = ref Unsafe.Add(ref destinationBase, 1); + backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); + sourceBase = ref Unsafe.Add(ref sourceBase, 1); + amountBase = ref Unsafe.Add(ref amountBase, 2); + } + + if (Numerics.Modulo2(destination.Length) != 0) + { + // Vector4 fits neatly in pairs. Any overlap has to be equal to 1. + int i = destination.Length - 1; + destination[i] = PorterDuffFunctions.MultiplyClear(background[i], source[i], Numerics.Clamp(amount[i], 0, 1F)); + } + } + else { - destination[i] = PorterDuffFunctions.MultiplyClear(background[i], source[i], Numerics.Clamp(amount[i], 0, 1)); + for (int i = 0; i < destination.Length; i++) + { + destination[i] = PorterDuffFunctions.MultiplyClear(background[i], source[i], Numerics.Clamp(amount[i], 0, 1F)); + } } } } @@ -3539,18 +9707,85 @@ internal static class DefaultPixelBlenders protected override void BlendFunction(Span destination, ReadOnlySpan background, ReadOnlySpan source, float amount) { amount = Numerics.Clamp(amount, 0, 1); - for (int i = 0; i < destination.Length; i++) + + if (Avx2.IsSupported && destination.Length >= 2) + { + // Divide by 2 as 4 elements per Vector4 and 8 per Vector256 + ref Vector256 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); + ref Vector256 destinationLast = ref Unsafe.Add(ref destinationBase, (IntPtr)((uint)destination.Length / 2u)); + + ref Vector256 backgroundBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(background)); + ref Vector256 sourceBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(source)); + Vector256 opacity = Vector256.Create(amount); + + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) + { + destinationBase = PorterDuffFunctions.AddClear(backgroundBase, sourceBase, opacity); + destinationBase = ref Unsafe.Add(ref destinationBase, 1); + backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); + sourceBase = ref Unsafe.Add(ref sourceBase, 1); + } + + if (Numerics.Modulo2(destination.Length) != 0) + { + // Vector4 fits neatly in pairs. Any overlap has to be equal to 1. + int i = destination.Length - 1; + destination[i] = PorterDuffFunctions.AddClear(background[i], source[i], amount); + } + } + else { - destination[i] = PorterDuffFunctions.AddClear(background[i], source[i], amount); + for (int i = 0; i < destination.Length; i++) + { + destination[i] = PorterDuffFunctions.AddClear(background[i], source[i], amount); + } } } /// protected override void BlendFunction(Span destination, ReadOnlySpan background, ReadOnlySpan source, ReadOnlySpan amount) { - for (int i = 0; i < destination.Length; i++) + if (Avx2.IsSupported && destination.Length >= 2) + { + // Divide by 2 as 4 elements per Vector4 and 8 per Vector256 + ref Vector256 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); + ref Vector256 destinationLast = ref Unsafe.Add(ref destinationBase, (IntPtr)((uint)destination.Length / 2u)); + + ref Vector256 backgroundBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(background)); + ref Vector256 sourceBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(source)); + ref float amountBase = ref MemoryMarshal.GetReference(amount); + + Vector256 vOne = Vector256.Create(1F); + + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) + { + // We need to create a Vector256 containing the current and next amount values + // taking up each half of the Vector256 and then clamp them. + Vector256 opacity = Vector256.Create( + Vector128.Create(amountBase), + Vector128.Create(Unsafe.Add(ref amountBase, 1))); + opacity = Avx.Min(Avx.Max(Vector256.Zero, opacity), vOne); + + destinationBase = PorterDuffFunctions.AddClear(backgroundBase, sourceBase, opacity); + destinationBase = ref Unsafe.Add(ref destinationBase, 1); + backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); + sourceBase = ref Unsafe.Add(ref sourceBase, 1); + amountBase = ref Unsafe.Add(ref amountBase, 2); + } + + if (Numerics.Modulo2(destination.Length) != 0) + { + // Vector4 fits neatly in pairs. Any overlap has to be equal to 1. + int i = destination.Length - 1; + destination[i] = PorterDuffFunctions.AddClear(background[i], source[i], Numerics.Clamp(amount[i], 0, 1F)); + } + } + else { - destination[i] = PorterDuffFunctions.AddClear(background[i], source[i], Numerics.Clamp(amount[i], 0, 1)); + for (int i = 0; i < destination.Length; i++) + { + destination[i] = PorterDuffFunctions.AddClear(background[i], source[i], Numerics.Clamp(amount[i], 0, 1F)); + } } } } @@ -3577,18 +9812,85 @@ internal static class DefaultPixelBlenders protected override void BlendFunction(Span destination, ReadOnlySpan background, ReadOnlySpan source, float amount) { amount = Numerics.Clamp(amount, 0, 1); - for (int i = 0; i < destination.Length; i++) + + if (Avx2.IsSupported && destination.Length >= 2) + { + // Divide by 2 as 4 elements per Vector4 and 8 per Vector256 + ref Vector256 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); + ref Vector256 destinationLast = ref Unsafe.Add(ref destinationBase, (IntPtr)((uint)destination.Length / 2u)); + + ref Vector256 backgroundBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(background)); + ref Vector256 sourceBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(source)); + Vector256 opacity = Vector256.Create(amount); + + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) + { + destinationBase = PorterDuffFunctions.SubtractClear(backgroundBase, sourceBase, opacity); + destinationBase = ref Unsafe.Add(ref destinationBase, 1); + backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); + sourceBase = ref Unsafe.Add(ref sourceBase, 1); + } + + if (Numerics.Modulo2(destination.Length) != 0) + { + // Vector4 fits neatly in pairs. Any overlap has to be equal to 1. + int i = destination.Length - 1; + destination[i] = PorterDuffFunctions.SubtractClear(background[i], source[i], amount); + } + } + else { - destination[i] = PorterDuffFunctions.SubtractClear(background[i], source[i], amount); + for (int i = 0; i < destination.Length; i++) + { + destination[i] = PorterDuffFunctions.SubtractClear(background[i], source[i], amount); + } } } /// protected override void BlendFunction(Span destination, ReadOnlySpan background, ReadOnlySpan source, ReadOnlySpan amount) { - for (int i = 0; i < destination.Length; i++) + if (Avx2.IsSupported && destination.Length >= 2) + { + // Divide by 2 as 4 elements per Vector4 and 8 per Vector256 + ref Vector256 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); + ref Vector256 destinationLast = ref Unsafe.Add(ref destinationBase, (IntPtr)((uint)destination.Length / 2u)); + + ref Vector256 backgroundBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(background)); + ref Vector256 sourceBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(source)); + ref float amountBase = ref MemoryMarshal.GetReference(amount); + + Vector256 vOne = Vector256.Create(1F); + + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) + { + // We need to create a Vector256 containing the current and next amount values + // taking up each half of the Vector256 and then clamp them. + Vector256 opacity = Vector256.Create( + Vector128.Create(amountBase), + Vector128.Create(Unsafe.Add(ref amountBase, 1))); + opacity = Avx.Min(Avx.Max(Vector256.Zero, opacity), vOne); + + destinationBase = PorterDuffFunctions.SubtractClear(backgroundBase, sourceBase, opacity); + destinationBase = ref Unsafe.Add(ref destinationBase, 1); + backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); + sourceBase = ref Unsafe.Add(ref sourceBase, 1); + amountBase = ref Unsafe.Add(ref amountBase, 2); + } + + if (Numerics.Modulo2(destination.Length) != 0) + { + // Vector4 fits neatly in pairs. Any overlap has to be equal to 1. + int i = destination.Length - 1; + destination[i] = PorterDuffFunctions.SubtractClear(background[i], source[i], Numerics.Clamp(amount[i], 0, 1F)); + } + } + else { - destination[i] = PorterDuffFunctions.SubtractClear(background[i], source[i], Numerics.Clamp(amount[i], 0, 1)); + for (int i = 0; i < destination.Length; i++) + { + destination[i] = PorterDuffFunctions.SubtractClear(background[i], source[i], Numerics.Clamp(amount[i], 0, 1F)); + } } } } @@ -3615,18 +9917,85 @@ internal static class DefaultPixelBlenders protected override void BlendFunction(Span destination, ReadOnlySpan background, ReadOnlySpan source, float amount) { amount = Numerics.Clamp(amount, 0, 1); - for (int i = 0; i < destination.Length; i++) + + if (Avx2.IsSupported && destination.Length >= 2) + { + // Divide by 2 as 4 elements per Vector4 and 8 per Vector256 + ref Vector256 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); + ref Vector256 destinationLast = ref Unsafe.Add(ref destinationBase, (IntPtr)((uint)destination.Length / 2u)); + + ref Vector256 backgroundBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(background)); + ref Vector256 sourceBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(source)); + Vector256 opacity = Vector256.Create(amount); + + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) + { + destinationBase = PorterDuffFunctions.ScreenClear(backgroundBase, sourceBase, opacity); + destinationBase = ref Unsafe.Add(ref destinationBase, 1); + backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); + sourceBase = ref Unsafe.Add(ref sourceBase, 1); + } + + if (Numerics.Modulo2(destination.Length) != 0) + { + // Vector4 fits neatly in pairs. Any overlap has to be equal to 1. + int i = destination.Length - 1; + destination[i] = PorterDuffFunctions.ScreenClear(background[i], source[i], amount); + } + } + else { - destination[i] = PorterDuffFunctions.ScreenClear(background[i], source[i], amount); + for (int i = 0; i < destination.Length; i++) + { + destination[i] = PorterDuffFunctions.ScreenClear(background[i], source[i], amount); + } } } /// protected override void BlendFunction(Span destination, ReadOnlySpan background, ReadOnlySpan source, ReadOnlySpan amount) { - for (int i = 0; i < destination.Length; i++) + if (Avx2.IsSupported && destination.Length >= 2) + { + // Divide by 2 as 4 elements per Vector4 and 8 per Vector256 + ref Vector256 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); + ref Vector256 destinationLast = ref Unsafe.Add(ref destinationBase, (IntPtr)((uint)destination.Length / 2u)); + + ref Vector256 backgroundBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(background)); + ref Vector256 sourceBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(source)); + ref float amountBase = ref MemoryMarshal.GetReference(amount); + + Vector256 vOne = Vector256.Create(1F); + + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) + { + // We need to create a Vector256 containing the current and next amount values + // taking up each half of the Vector256 and then clamp them. + Vector256 opacity = Vector256.Create( + Vector128.Create(amountBase), + Vector128.Create(Unsafe.Add(ref amountBase, 1))); + opacity = Avx.Min(Avx.Max(Vector256.Zero, opacity), vOne); + + destinationBase = PorterDuffFunctions.ScreenClear(backgroundBase, sourceBase, opacity); + destinationBase = ref Unsafe.Add(ref destinationBase, 1); + backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); + sourceBase = ref Unsafe.Add(ref sourceBase, 1); + amountBase = ref Unsafe.Add(ref amountBase, 2); + } + + if (Numerics.Modulo2(destination.Length) != 0) + { + // Vector4 fits neatly in pairs. Any overlap has to be equal to 1. + int i = destination.Length - 1; + destination[i] = PorterDuffFunctions.ScreenClear(background[i], source[i], Numerics.Clamp(amount[i], 0, 1F)); + } + } + else { - destination[i] = PorterDuffFunctions.ScreenClear(background[i], source[i], Numerics.Clamp(amount[i], 0, 1)); + for (int i = 0; i < destination.Length; i++) + { + destination[i] = PorterDuffFunctions.ScreenClear(background[i], source[i], Numerics.Clamp(amount[i], 0, 1F)); + } } } } @@ -3653,18 +10022,85 @@ internal static class DefaultPixelBlenders protected override void BlendFunction(Span destination, ReadOnlySpan background, ReadOnlySpan source, float amount) { amount = Numerics.Clamp(amount, 0, 1); - for (int i = 0; i < destination.Length; i++) + + if (Avx2.IsSupported && destination.Length >= 2) + { + // Divide by 2 as 4 elements per Vector4 and 8 per Vector256 + ref Vector256 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); + ref Vector256 destinationLast = ref Unsafe.Add(ref destinationBase, (IntPtr)((uint)destination.Length / 2u)); + + ref Vector256 backgroundBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(background)); + ref Vector256 sourceBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(source)); + Vector256 opacity = Vector256.Create(amount); + + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) + { + destinationBase = PorterDuffFunctions.DarkenClear(backgroundBase, sourceBase, opacity); + destinationBase = ref Unsafe.Add(ref destinationBase, 1); + backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); + sourceBase = ref Unsafe.Add(ref sourceBase, 1); + } + + if (Numerics.Modulo2(destination.Length) != 0) + { + // Vector4 fits neatly in pairs. Any overlap has to be equal to 1. + int i = destination.Length - 1; + destination[i] = PorterDuffFunctions.DarkenClear(background[i], source[i], amount); + } + } + else { - destination[i] = PorterDuffFunctions.DarkenClear(background[i], source[i], amount); + for (int i = 0; i < destination.Length; i++) + { + destination[i] = PorterDuffFunctions.DarkenClear(background[i], source[i], amount); + } } } /// protected override void BlendFunction(Span destination, ReadOnlySpan background, ReadOnlySpan source, ReadOnlySpan amount) { - for (int i = 0; i < destination.Length; i++) + if (Avx2.IsSupported && destination.Length >= 2) + { + // Divide by 2 as 4 elements per Vector4 and 8 per Vector256 + ref Vector256 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); + ref Vector256 destinationLast = ref Unsafe.Add(ref destinationBase, (IntPtr)((uint)destination.Length / 2u)); + + ref Vector256 backgroundBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(background)); + ref Vector256 sourceBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(source)); + ref float amountBase = ref MemoryMarshal.GetReference(amount); + + Vector256 vOne = Vector256.Create(1F); + + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) + { + // We need to create a Vector256 containing the current and next amount values + // taking up each half of the Vector256 and then clamp them. + Vector256 opacity = Vector256.Create( + Vector128.Create(amountBase), + Vector128.Create(Unsafe.Add(ref amountBase, 1))); + opacity = Avx.Min(Avx.Max(Vector256.Zero, opacity), vOne); + + destinationBase = PorterDuffFunctions.DarkenClear(backgroundBase, sourceBase, opacity); + destinationBase = ref Unsafe.Add(ref destinationBase, 1); + backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); + sourceBase = ref Unsafe.Add(ref sourceBase, 1); + amountBase = ref Unsafe.Add(ref amountBase, 2); + } + + if (Numerics.Modulo2(destination.Length) != 0) + { + // Vector4 fits neatly in pairs. Any overlap has to be equal to 1. + int i = destination.Length - 1; + destination[i] = PorterDuffFunctions.DarkenClear(background[i], source[i], Numerics.Clamp(amount[i], 0, 1F)); + } + } + else { - destination[i] = PorterDuffFunctions.DarkenClear(background[i], source[i], Numerics.Clamp(amount[i], 0, 1)); + for (int i = 0; i < destination.Length; i++) + { + destination[i] = PorterDuffFunctions.DarkenClear(background[i], source[i], Numerics.Clamp(amount[i], 0, 1F)); + } } } } @@ -3691,18 +10127,85 @@ internal static class DefaultPixelBlenders protected override void BlendFunction(Span destination, ReadOnlySpan background, ReadOnlySpan source, float amount) { amount = Numerics.Clamp(amount, 0, 1); - for (int i = 0; i < destination.Length; i++) + + if (Avx2.IsSupported && destination.Length >= 2) + { + // Divide by 2 as 4 elements per Vector4 and 8 per Vector256 + ref Vector256 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); + ref Vector256 destinationLast = ref Unsafe.Add(ref destinationBase, (IntPtr)((uint)destination.Length / 2u)); + + ref Vector256 backgroundBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(background)); + ref Vector256 sourceBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(source)); + Vector256 opacity = Vector256.Create(amount); + + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) + { + destinationBase = PorterDuffFunctions.LightenClear(backgroundBase, sourceBase, opacity); + destinationBase = ref Unsafe.Add(ref destinationBase, 1); + backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); + sourceBase = ref Unsafe.Add(ref sourceBase, 1); + } + + if (Numerics.Modulo2(destination.Length) != 0) + { + // Vector4 fits neatly in pairs. Any overlap has to be equal to 1. + int i = destination.Length - 1; + destination[i] = PorterDuffFunctions.LightenClear(background[i], source[i], amount); + } + } + else { - destination[i] = PorterDuffFunctions.LightenClear(background[i], source[i], amount); + for (int i = 0; i < destination.Length; i++) + { + destination[i] = PorterDuffFunctions.LightenClear(background[i], source[i], amount); + } } } /// protected override void BlendFunction(Span destination, ReadOnlySpan background, ReadOnlySpan source, ReadOnlySpan amount) { - for (int i = 0; i < destination.Length; i++) + if (Avx2.IsSupported && destination.Length >= 2) { - destination[i] = PorterDuffFunctions.LightenClear(background[i], source[i], Numerics.Clamp(amount[i], 0, 1)); + // Divide by 2 as 4 elements per Vector4 and 8 per Vector256 + ref Vector256 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); + ref Vector256 destinationLast = ref Unsafe.Add(ref destinationBase, (IntPtr)((uint)destination.Length / 2u)); + + ref Vector256 backgroundBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(background)); + ref Vector256 sourceBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(source)); + ref float amountBase = ref MemoryMarshal.GetReference(amount); + + Vector256 vOne = Vector256.Create(1F); + + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) + { + // We need to create a Vector256 containing the current and next amount values + // taking up each half of the Vector256 and then clamp them. + Vector256 opacity = Vector256.Create( + Vector128.Create(amountBase), + Vector128.Create(Unsafe.Add(ref amountBase, 1))); + opacity = Avx.Min(Avx.Max(Vector256.Zero, opacity), vOne); + + destinationBase = PorterDuffFunctions.LightenClear(backgroundBase, sourceBase, opacity); + destinationBase = ref Unsafe.Add(ref destinationBase, 1); + backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); + sourceBase = ref Unsafe.Add(ref sourceBase, 1); + amountBase = ref Unsafe.Add(ref amountBase, 2); + } + + if (Numerics.Modulo2(destination.Length) != 0) + { + // Vector4 fits neatly in pairs. Any overlap has to be equal to 1. + int i = destination.Length - 1; + destination[i] = PorterDuffFunctions.LightenClear(background[i], source[i], Numerics.Clamp(amount[i], 0, 1F)); + } + } + else + { + for (int i = 0; i < destination.Length; i++) + { + destination[i] = PorterDuffFunctions.LightenClear(background[i], source[i], Numerics.Clamp(amount[i], 0, 1F)); + } } } } @@ -3729,18 +10232,85 @@ internal static class DefaultPixelBlenders protected override void BlendFunction(Span destination, ReadOnlySpan background, ReadOnlySpan source, float amount) { amount = Numerics.Clamp(amount, 0, 1); - for (int i = 0; i < destination.Length; i++) + + if (Avx2.IsSupported && destination.Length >= 2) { - destination[i] = PorterDuffFunctions.OverlayClear(background[i], source[i], amount); + // Divide by 2 as 4 elements per Vector4 and 8 per Vector256 + ref Vector256 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); + ref Vector256 destinationLast = ref Unsafe.Add(ref destinationBase, (IntPtr)((uint)destination.Length / 2u)); + + ref Vector256 backgroundBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(background)); + ref Vector256 sourceBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(source)); + Vector256 opacity = Vector256.Create(amount); + + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) + { + destinationBase = PorterDuffFunctions.OverlayClear(backgroundBase, sourceBase, opacity); + destinationBase = ref Unsafe.Add(ref destinationBase, 1); + backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); + sourceBase = ref Unsafe.Add(ref sourceBase, 1); + } + + if (Numerics.Modulo2(destination.Length) != 0) + { + // Vector4 fits neatly in pairs. Any overlap has to be equal to 1. + int i = destination.Length - 1; + destination[i] = PorterDuffFunctions.OverlayClear(background[i], source[i], amount); + } + } + else + { + for (int i = 0; i < destination.Length; i++) + { + destination[i] = PorterDuffFunctions.OverlayClear(background[i], source[i], amount); + } } } /// protected override void BlendFunction(Span destination, ReadOnlySpan background, ReadOnlySpan source, ReadOnlySpan amount) { - for (int i = 0; i < destination.Length; i++) + if (Avx2.IsSupported && destination.Length >= 2) + { + // Divide by 2 as 4 elements per Vector4 and 8 per Vector256 + ref Vector256 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); + ref Vector256 destinationLast = ref Unsafe.Add(ref destinationBase, (IntPtr)((uint)destination.Length / 2u)); + + ref Vector256 backgroundBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(background)); + ref Vector256 sourceBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(source)); + ref float amountBase = ref MemoryMarshal.GetReference(amount); + + Vector256 vOne = Vector256.Create(1F); + + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) + { + // We need to create a Vector256 containing the current and next amount values + // taking up each half of the Vector256 and then clamp them. + Vector256 opacity = Vector256.Create( + Vector128.Create(amountBase), + Vector128.Create(Unsafe.Add(ref amountBase, 1))); + opacity = Avx.Min(Avx.Max(Vector256.Zero, opacity), vOne); + + destinationBase = PorterDuffFunctions.OverlayClear(backgroundBase, sourceBase, opacity); + destinationBase = ref Unsafe.Add(ref destinationBase, 1); + backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); + sourceBase = ref Unsafe.Add(ref sourceBase, 1); + amountBase = ref Unsafe.Add(ref amountBase, 2); + } + + if (Numerics.Modulo2(destination.Length) != 0) + { + // Vector4 fits neatly in pairs. Any overlap has to be equal to 1. + int i = destination.Length - 1; + destination[i] = PorterDuffFunctions.OverlayClear(background[i], source[i], Numerics.Clamp(amount[i], 0, 1F)); + } + } + else { - destination[i] = PorterDuffFunctions.OverlayClear(background[i], source[i], Numerics.Clamp(amount[i], 0, 1)); + for (int i = 0; i < destination.Length; i++) + { + destination[i] = PorterDuffFunctions.OverlayClear(background[i], source[i], Numerics.Clamp(amount[i], 0, 1F)); + } } } } @@ -3767,18 +10337,85 @@ internal static class DefaultPixelBlenders protected override void BlendFunction(Span destination, ReadOnlySpan background, ReadOnlySpan source, float amount) { amount = Numerics.Clamp(amount, 0, 1); - for (int i = 0; i < destination.Length; i++) + + if (Avx2.IsSupported && destination.Length >= 2) + { + // Divide by 2 as 4 elements per Vector4 and 8 per Vector256 + ref Vector256 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); + ref Vector256 destinationLast = ref Unsafe.Add(ref destinationBase, (IntPtr)((uint)destination.Length / 2u)); + + ref Vector256 backgroundBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(background)); + ref Vector256 sourceBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(source)); + Vector256 opacity = Vector256.Create(amount); + + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) + { + destinationBase = PorterDuffFunctions.HardLightClear(backgroundBase, sourceBase, opacity); + destinationBase = ref Unsafe.Add(ref destinationBase, 1); + backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); + sourceBase = ref Unsafe.Add(ref sourceBase, 1); + } + + if (Numerics.Modulo2(destination.Length) != 0) + { + // Vector4 fits neatly in pairs. Any overlap has to be equal to 1. + int i = destination.Length - 1; + destination[i] = PorterDuffFunctions.HardLightClear(background[i], source[i], amount); + } + } + else { - destination[i] = PorterDuffFunctions.HardLightClear(background[i], source[i], amount); + for (int i = 0; i < destination.Length; i++) + { + destination[i] = PorterDuffFunctions.HardLightClear(background[i], source[i], amount); + } } } /// protected override void BlendFunction(Span destination, ReadOnlySpan background, ReadOnlySpan source, ReadOnlySpan amount) { - for (int i = 0; i < destination.Length; i++) + if (Avx2.IsSupported && destination.Length >= 2) + { + // Divide by 2 as 4 elements per Vector4 and 8 per Vector256 + ref Vector256 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); + ref Vector256 destinationLast = ref Unsafe.Add(ref destinationBase, (IntPtr)((uint)destination.Length / 2u)); + + ref Vector256 backgroundBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(background)); + ref Vector256 sourceBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(source)); + ref float amountBase = ref MemoryMarshal.GetReference(amount); + + Vector256 vOne = Vector256.Create(1F); + + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) + { + // We need to create a Vector256 containing the current and next amount values + // taking up each half of the Vector256 and then clamp them. + Vector256 opacity = Vector256.Create( + Vector128.Create(amountBase), + Vector128.Create(Unsafe.Add(ref amountBase, 1))); + opacity = Avx.Min(Avx.Max(Vector256.Zero, opacity), vOne); + + destinationBase = PorterDuffFunctions.HardLightClear(backgroundBase, sourceBase, opacity); + destinationBase = ref Unsafe.Add(ref destinationBase, 1); + backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); + sourceBase = ref Unsafe.Add(ref sourceBase, 1); + amountBase = ref Unsafe.Add(ref amountBase, 2); + } + + if (Numerics.Modulo2(destination.Length) != 0) + { + // Vector4 fits neatly in pairs. Any overlap has to be equal to 1. + int i = destination.Length - 1; + destination[i] = PorterDuffFunctions.HardLightClear(background[i], source[i], Numerics.Clamp(amount[i], 0, 1F)); + } + } + else { - destination[i] = PorterDuffFunctions.HardLightClear(background[i], source[i], Numerics.Clamp(amount[i], 0, 1)); + for (int i = 0; i < destination.Length; i++) + { + destination[i] = PorterDuffFunctions.HardLightClear(background[i], source[i], Numerics.Clamp(amount[i], 0, 1F)); + } } } } @@ -3805,18 +10442,85 @@ internal static class DefaultPixelBlenders protected override void BlendFunction(Span destination, ReadOnlySpan background, ReadOnlySpan source, float amount) { amount = Numerics.Clamp(amount, 0, 1); - for (int i = 0; i < destination.Length; i++) + + if (Avx2.IsSupported && destination.Length >= 2) + { + // Divide by 2 as 4 elements per Vector4 and 8 per Vector256 + ref Vector256 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); + ref Vector256 destinationLast = ref Unsafe.Add(ref destinationBase, (IntPtr)((uint)destination.Length / 2u)); + + ref Vector256 backgroundBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(background)); + ref Vector256 sourceBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(source)); + Vector256 opacity = Vector256.Create(amount); + + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) + { + destinationBase = PorterDuffFunctions.NormalXor(backgroundBase, sourceBase, opacity); + destinationBase = ref Unsafe.Add(ref destinationBase, 1); + backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); + sourceBase = ref Unsafe.Add(ref sourceBase, 1); + } + + if (Numerics.Modulo2(destination.Length) != 0) + { + // Vector4 fits neatly in pairs. Any overlap has to be equal to 1. + int i = destination.Length - 1; + destination[i] = PorterDuffFunctions.NormalXor(background[i], source[i], amount); + } + } + else { - destination[i] = PorterDuffFunctions.NormalXor(background[i], source[i], amount); + for (int i = 0; i < destination.Length; i++) + { + destination[i] = PorterDuffFunctions.NormalXor(background[i], source[i], amount); + } } } /// protected override void BlendFunction(Span destination, ReadOnlySpan background, ReadOnlySpan source, ReadOnlySpan amount) { - for (int i = 0; i < destination.Length; i++) + if (Avx2.IsSupported && destination.Length >= 2) + { + // Divide by 2 as 4 elements per Vector4 and 8 per Vector256 + ref Vector256 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); + ref Vector256 destinationLast = ref Unsafe.Add(ref destinationBase, (IntPtr)((uint)destination.Length / 2u)); + + ref Vector256 backgroundBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(background)); + ref Vector256 sourceBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(source)); + ref float amountBase = ref MemoryMarshal.GetReference(amount); + + Vector256 vOne = Vector256.Create(1F); + + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) + { + // We need to create a Vector256 containing the current and next amount values + // taking up each half of the Vector256 and then clamp them. + Vector256 opacity = Vector256.Create( + Vector128.Create(amountBase), + Vector128.Create(Unsafe.Add(ref amountBase, 1))); + opacity = Avx.Min(Avx.Max(Vector256.Zero, opacity), vOne); + + destinationBase = PorterDuffFunctions.NormalXor(backgroundBase, sourceBase, opacity); + destinationBase = ref Unsafe.Add(ref destinationBase, 1); + backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); + sourceBase = ref Unsafe.Add(ref sourceBase, 1); + amountBase = ref Unsafe.Add(ref amountBase, 2); + } + + if (Numerics.Modulo2(destination.Length) != 0) + { + // Vector4 fits neatly in pairs. Any overlap has to be equal to 1. + int i = destination.Length - 1; + destination[i] = PorterDuffFunctions.NormalXor(background[i], source[i], Numerics.Clamp(amount[i], 0, 1F)); + } + } + else { - destination[i] = PorterDuffFunctions.NormalXor(background[i], source[i], Numerics.Clamp(amount[i], 0, 1)); + for (int i = 0; i < destination.Length; i++) + { + destination[i] = PorterDuffFunctions.NormalXor(background[i], source[i], Numerics.Clamp(amount[i], 0, 1F)); + } } } } @@ -3843,18 +10547,85 @@ internal static class DefaultPixelBlenders protected override void BlendFunction(Span destination, ReadOnlySpan background, ReadOnlySpan source, float amount) { amount = Numerics.Clamp(amount, 0, 1); - for (int i = 0; i < destination.Length; i++) + + if (Avx2.IsSupported && destination.Length >= 2) + { + // Divide by 2 as 4 elements per Vector4 and 8 per Vector256 + ref Vector256 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); + ref Vector256 destinationLast = ref Unsafe.Add(ref destinationBase, (IntPtr)((uint)destination.Length / 2u)); + + ref Vector256 backgroundBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(background)); + ref Vector256 sourceBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(source)); + Vector256 opacity = Vector256.Create(amount); + + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) + { + destinationBase = PorterDuffFunctions.MultiplyXor(backgroundBase, sourceBase, opacity); + destinationBase = ref Unsafe.Add(ref destinationBase, 1); + backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); + sourceBase = ref Unsafe.Add(ref sourceBase, 1); + } + + if (Numerics.Modulo2(destination.Length) != 0) + { + // Vector4 fits neatly in pairs. Any overlap has to be equal to 1. + int i = destination.Length - 1; + destination[i] = PorterDuffFunctions.MultiplyXor(background[i], source[i], amount); + } + } + else { - destination[i] = PorterDuffFunctions.MultiplyXor(background[i], source[i], amount); + for (int i = 0; i < destination.Length; i++) + { + destination[i] = PorterDuffFunctions.MultiplyXor(background[i], source[i], amount); + } } } /// protected override void BlendFunction(Span destination, ReadOnlySpan background, ReadOnlySpan source, ReadOnlySpan amount) { - for (int i = 0; i < destination.Length; i++) + if (Avx2.IsSupported && destination.Length >= 2) + { + // Divide by 2 as 4 elements per Vector4 and 8 per Vector256 + ref Vector256 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); + ref Vector256 destinationLast = ref Unsafe.Add(ref destinationBase, (IntPtr)((uint)destination.Length / 2u)); + + ref Vector256 backgroundBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(background)); + ref Vector256 sourceBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(source)); + ref float amountBase = ref MemoryMarshal.GetReference(amount); + + Vector256 vOne = Vector256.Create(1F); + + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) + { + // We need to create a Vector256 containing the current and next amount values + // taking up each half of the Vector256 and then clamp them. + Vector256 opacity = Vector256.Create( + Vector128.Create(amountBase), + Vector128.Create(Unsafe.Add(ref amountBase, 1))); + opacity = Avx.Min(Avx.Max(Vector256.Zero, opacity), vOne); + + destinationBase = PorterDuffFunctions.MultiplyXor(backgroundBase, sourceBase, opacity); + destinationBase = ref Unsafe.Add(ref destinationBase, 1); + backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); + sourceBase = ref Unsafe.Add(ref sourceBase, 1); + amountBase = ref Unsafe.Add(ref amountBase, 2); + } + + if (Numerics.Modulo2(destination.Length) != 0) + { + // Vector4 fits neatly in pairs. Any overlap has to be equal to 1. + int i = destination.Length - 1; + destination[i] = PorterDuffFunctions.MultiplyXor(background[i], source[i], Numerics.Clamp(amount[i], 0, 1F)); + } + } + else { - destination[i] = PorterDuffFunctions.MultiplyXor(background[i], source[i], Numerics.Clamp(amount[i], 0, 1)); + for (int i = 0; i < destination.Length; i++) + { + destination[i] = PorterDuffFunctions.MultiplyXor(background[i], source[i], Numerics.Clamp(amount[i], 0, 1F)); + } } } } @@ -3881,18 +10652,85 @@ internal static class DefaultPixelBlenders protected override void BlendFunction(Span destination, ReadOnlySpan background, ReadOnlySpan source, float amount) { amount = Numerics.Clamp(amount, 0, 1); - for (int i = 0; i < destination.Length; i++) + + if (Avx2.IsSupported && destination.Length >= 2) + { + // Divide by 2 as 4 elements per Vector4 and 8 per Vector256 + ref Vector256 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); + ref Vector256 destinationLast = ref Unsafe.Add(ref destinationBase, (IntPtr)((uint)destination.Length / 2u)); + + ref Vector256 backgroundBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(background)); + ref Vector256 sourceBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(source)); + Vector256 opacity = Vector256.Create(amount); + + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) + { + destinationBase = PorterDuffFunctions.AddXor(backgroundBase, sourceBase, opacity); + destinationBase = ref Unsafe.Add(ref destinationBase, 1); + backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); + sourceBase = ref Unsafe.Add(ref sourceBase, 1); + } + + if (Numerics.Modulo2(destination.Length) != 0) + { + // Vector4 fits neatly in pairs. Any overlap has to be equal to 1. + int i = destination.Length - 1; + destination[i] = PorterDuffFunctions.AddXor(background[i], source[i], amount); + } + } + else { - destination[i] = PorterDuffFunctions.AddXor(background[i], source[i], amount); + for (int i = 0; i < destination.Length; i++) + { + destination[i] = PorterDuffFunctions.AddXor(background[i], source[i], amount); + } } } /// protected override void BlendFunction(Span destination, ReadOnlySpan background, ReadOnlySpan source, ReadOnlySpan amount) { - for (int i = 0; i < destination.Length; i++) + if (Avx2.IsSupported && destination.Length >= 2) + { + // Divide by 2 as 4 elements per Vector4 and 8 per Vector256 + ref Vector256 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); + ref Vector256 destinationLast = ref Unsafe.Add(ref destinationBase, (IntPtr)((uint)destination.Length / 2u)); + + ref Vector256 backgroundBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(background)); + ref Vector256 sourceBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(source)); + ref float amountBase = ref MemoryMarshal.GetReference(amount); + + Vector256 vOne = Vector256.Create(1F); + + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) + { + // We need to create a Vector256 containing the current and next amount values + // taking up each half of the Vector256 and then clamp them. + Vector256 opacity = Vector256.Create( + Vector128.Create(amountBase), + Vector128.Create(Unsafe.Add(ref amountBase, 1))); + opacity = Avx.Min(Avx.Max(Vector256.Zero, opacity), vOne); + + destinationBase = PorterDuffFunctions.AddXor(backgroundBase, sourceBase, opacity); + destinationBase = ref Unsafe.Add(ref destinationBase, 1); + backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); + sourceBase = ref Unsafe.Add(ref sourceBase, 1); + amountBase = ref Unsafe.Add(ref amountBase, 2); + } + + if (Numerics.Modulo2(destination.Length) != 0) + { + // Vector4 fits neatly in pairs. Any overlap has to be equal to 1. + int i = destination.Length - 1; + destination[i] = PorterDuffFunctions.AddXor(background[i], source[i], Numerics.Clamp(amount[i], 0, 1F)); + } + } + else { - destination[i] = PorterDuffFunctions.AddXor(background[i], source[i], Numerics.Clamp(amount[i], 0, 1)); + for (int i = 0; i < destination.Length; i++) + { + destination[i] = PorterDuffFunctions.AddXor(background[i], source[i], Numerics.Clamp(amount[i], 0, 1F)); + } } } } @@ -3919,18 +10757,85 @@ internal static class DefaultPixelBlenders protected override void BlendFunction(Span destination, ReadOnlySpan background, ReadOnlySpan source, float amount) { amount = Numerics.Clamp(amount, 0, 1); - for (int i = 0; i < destination.Length; i++) + + if (Avx2.IsSupported && destination.Length >= 2) + { + // Divide by 2 as 4 elements per Vector4 and 8 per Vector256 + ref Vector256 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); + ref Vector256 destinationLast = ref Unsafe.Add(ref destinationBase, (IntPtr)((uint)destination.Length / 2u)); + + ref Vector256 backgroundBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(background)); + ref Vector256 sourceBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(source)); + Vector256 opacity = Vector256.Create(amount); + + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) + { + destinationBase = PorterDuffFunctions.SubtractXor(backgroundBase, sourceBase, opacity); + destinationBase = ref Unsafe.Add(ref destinationBase, 1); + backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); + sourceBase = ref Unsafe.Add(ref sourceBase, 1); + } + + if (Numerics.Modulo2(destination.Length) != 0) + { + // Vector4 fits neatly in pairs. Any overlap has to be equal to 1. + int i = destination.Length - 1; + destination[i] = PorterDuffFunctions.SubtractXor(background[i], source[i], amount); + } + } + else { - destination[i] = PorterDuffFunctions.SubtractXor(background[i], source[i], amount); + for (int i = 0; i < destination.Length; i++) + { + destination[i] = PorterDuffFunctions.SubtractXor(background[i], source[i], amount); + } } } /// protected override void BlendFunction(Span destination, ReadOnlySpan background, ReadOnlySpan source, ReadOnlySpan amount) { - for (int i = 0; i < destination.Length; i++) + if (Avx2.IsSupported && destination.Length >= 2) + { + // Divide by 2 as 4 elements per Vector4 and 8 per Vector256 + ref Vector256 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); + ref Vector256 destinationLast = ref Unsafe.Add(ref destinationBase, (IntPtr)((uint)destination.Length / 2u)); + + ref Vector256 backgroundBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(background)); + ref Vector256 sourceBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(source)); + ref float amountBase = ref MemoryMarshal.GetReference(amount); + + Vector256 vOne = Vector256.Create(1F); + + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) + { + // We need to create a Vector256 containing the current and next amount values + // taking up each half of the Vector256 and then clamp them. + Vector256 opacity = Vector256.Create( + Vector128.Create(amountBase), + Vector128.Create(Unsafe.Add(ref amountBase, 1))); + opacity = Avx.Min(Avx.Max(Vector256.Zero, opacity), vOne); + + destinationBase = PorterDuffFunctions.SubtractXor(backgroundBase, sourceBase, opacity); + destinationBase = ref Unsafe.Add(ref destinationBase, 1); + backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); + sourceBase = ref Unsafe.Add(ref sourceBase, 1); + amountBase = ref Unsafe.Add(ref amountBase, 2); + } + + if (Numerics.Modulo2(destination.Length) != 0) + { + // Vector4 fits neatly in pairs. Any overlap has to be equal to 1. + int i = destination.Length - 1; + destination[i] = PorterDuffFunctions.SubtractXor(background[i], source[i], Numerics.Clamp(amount[i], 0, 1F)); + } + } + else { - destination[i] = PorterDuffFunctions.SubtractXor(background[i], source[i], Numerics.Clamp(amount[i], 0, 1)); + for (int i = 0; i < destination.Length; i++) + { + destination[i] = PorterDuffFunctions.SubtractXor(background[i], source[i], Numerics.Clamp(amount[i], 0, 1F)); + } } } } @@ -3957,18 +10862,85 @@ internal static class DefaultPixelBlenders protected override void BlendFunction(Span destination, ReadOnlySpan background, ReadOnlySpan source, float amount) { amount = Numerics.Clamp(amount, 0, 1); - for (int i = 0; i < destination.Length; i++) + + if (Avx2.IsSupported && destination.Length >= 2) + { + // Divide by 2 as 4 elements per Vector4 and 8 per Vector256 + ref Vector256 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); + ref Vector256 destinationLast = ref Unsafe.Add(ref destinationBase, (IntPtr)((uint)destination.Length / 2u)); + + ref Vector256 backgroundBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(background)); + ref Vector256 sourceBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(source)); + Vector256 opacity = Vector256.Create(amount); + + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) + { + destinationBase = PorterDuffFunctions.ScreenXor(backgroundBase, sourceBase, opacity); + destinationBase = ref Unsafe.Add(ref destinationBase, 1); + backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); + sourceBase = ref Unsafe.Add(ref sourceBase, 1); + } + + if (Numerics.Modulo2(destination.Length) != 0) + { + // Vector4 fits neatly in pairs. Any overlap has to be equal to 1. + int i = destination.Length - 1; + destination[i] = PorterDuffFunctions.ScreenXor(background[i], source[i], amount); + } + } + else { - destination[i] = PorterDuffFunctions.ScreenXor(background[i], source[i], amount); + for (int i = 0; i < destination.Length; i++) + { + destination[i] = PorterDuffFunctions.ScreenXor(background[i], source[i], amount); + } } } /// protected override void BlendFunction(Span destination, ReadOnlySpan background, ReadOnlySpan source, ReadOnlySpan amount) { - for (int i = 0; i < destination.Length; i++) + if (Avx2.IsSupported && destination.Length >= 2) + { + // Divide by 2 as 4 elements per Vector4 and 8 per Vector256 + ref Vector256 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); + ref Vector256 destinationLast = ref Unsafe.Add(ref destinationBase, (IntPtr)((uint)destination.Length / 2u)); + + ref Vector256 backgroundBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(background)); + ref Vector256 sourceBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(source)); + ref float amountBase = ref MemoryMarshal.GetReference(amount); + + Vector256 vOne = Vector256.Create(1F); + + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) + { + // We need to create a Vector256 containing the current and next amount values + // taking up each half of the Vector256 and then clamp them. + Vector256 opacity = Vector256.Create( + Vector128.Create(amountBase), + Vector128.Create(Unsafe.Add(ref amountBase, 1))); + opacity = Avx.Min(Avx.Max(Vector256.Zero, opacity), vOne); + + destinationBase = PorterDuffFunctions.ScreenXor(backgroundBase, sourceBase, opacity); + destinationBase = ref Unsafe.Add(ref destinationBase, 1); + backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); + sourceBase = ref Unsafe.Add(ref sourceBase, 1); + amountBase = ref Unsafe.Add(ref amountBase, 2); + } + + if (Numerics.Modulo2(destination.Length) != 0) + { + // Vector4 fits neatly in pairs. Any overlap has to be equal to 1. + int i = destination.Length - 1; + destination[i] = PorterDuffFunctions.ScreenXor(background[i], source[i], Numerics.Clamp(amount[i], 0, 1F)); + } + } + else { - destination[i] = PorterDuffFunctions.ScreenXor(background[i], source[i], Numerics.Clamp(amount[i], 0, 1)); + for (int i = 0; i < destination.Length; i++) + { + destination[i] = PorterDuffFunctions.ScreenXor(background[i], source[i], Numerics.Clamp(amount[i], 0, 1F)); + } } } } @@ -3995,18 +10967,85 @@ internal static class DefaultPixelBlenders protected override void BlendFunction(Span destination, ReadOnlySpan background, ReadOnlySpan source, float amount) { amount = Numerics.Clamp(amount, 0, 1); - for (int i = 0; i < destination.Length; i++) + + if (Avx2.IsSupported && destination.Length >= 2) + { + // Divide by 2 as 4 elements per Vector4 and 8 per Vector256 + ref Vector256 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); + ref Vector256 destinationLast = ref Unsafe.Add(ref destinationBase, (IntPtr)((uint)destination.Length / 2u)); + + ref Vector256 backgroundBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(background)); + ref Vector256 sourceBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(source)); + Vector256 opacity = Vector256.Create(amount); + + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) + { + destinationBase = PorterDuffFunctions.DarkenXor(backgroundBase, sourceBase, opacity); + destinationBase = ref Unsafe.Add(ref destinationBase, 1); + backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); + sourceBase = ref Unsafe.Add(ref sourceBase, 1); + } + + if (Numerics.Modulo2(destination.Length) != 0) + { + // Vector4 fits neatly in pairs. Any overlap has to be equal to 1. + int i = destination.Length - 1; + destination[i] = PorterDuffFunctions.DarkenXor(background[i], source[i], amount); + } + } + else { - destination[i] = PorterDuffFunctions.DarkenXor(background[i], source[i], amount); + for (int i = 0; i < destination.Length; i++) + { + destination[i] = PorterDuffFunctions.DarkenXor(background[i], source[i], amount); + } } } /// protected override void BlendFunction(Span destination, ReadOnlySpan background, ReadOnlySpan source, ReadOnlySpan amount) { - for (int i = 0; i < destination.Length; i++) + if (Avx2.IsSupported && destination.Length >= 2) { - destination[i] = PorterDuffFunctions.DarkenXor(background[i], source[i], Numerics.Clamp(amount[i], 0, 1)); + // Divide by 2 as 4 elements per Vector4 and 8 per Vector256 + ref Vector256 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); + ref Vector256 destinationLast = ref Unsafe.Add(ref destinationBase, (IntPtr)((uint)destination.Length / 2u)); + + ref Vector256 backgroundBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(background)); + ref Vector256 sourceBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(source)); + ref float amountBase = ref MemoryMarshal.GetReference(amount); + + Vector256 vOne = Vector256.Create(1F); + + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) + { + // We need to create a Vector256 containing the current and next amount values + // taking up each half of the Vector256 and then clamp them. + Vector256 opacity = Vector256.Create( + Vector128.Create(amountBase), + Vector128.Create(Unsafe.Add(ref amountBase, 1))); + opacity = Avx.Min(Avx.Max(Vector256.Zero, opacity), vOne); + + destinationBase = PorterDuffFunctions.DarkenXor(backgroundBase, sourceBase, opacity); + destinationBase = ref Unsafe.Add(ref destinationBase, 1); + backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); + sourceBase = ref Unsafe.Add(ref sourceBase, 1); + amountBase = ref Unsafe.Add(ref amountBase, 2); + } + + if (Numerics.Modulo2(destination.Length) != 0) + { + // Vector4 fits neatly in pairs. Any overlap has to be equal to 1. + int i = destination.Length - 1; + destination[i] = PorterDuffFunctions.DarkenXor(background[i], source[i], Numerics.Clamp(amount[i], 0, 1F)); + } + } + else + { + for (int i = 0; i < destination.Length; i++) + { + destination[i] = PorterDuffFunctions.DarkenXor(background[i], source[i], Numerics.Clamp(amount[i], 0, 1F)); + } } } } @@ -4033,18 +11072,85 @@ internal static class DefaultPixelBlenders protected override void BlendFunction(Span destination, ReadOnlySpan background, ReadOnlySpan source, float amount) { amount = Numerics.Clamp(amount, 0, 1); - for (int i = 0; i < destination.Length; i++) + + if (Avx2.IsSupported && destination.Length >= 2) { - destination[i] = PorterDuffFunctions.LightenXor(background[i], source[i], amount); + // Divide by 2 as 4 elements per Vector4 and 8 per Vector256 + ref Vector256 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); + ref Vector256 destinationLast = ref Unsafe.Add(ref destinationBase, (IntPtr)((uint)destination.Length / 2u)); + + ref Vector256 backgroundBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(background)); + ref Vector256 sourceBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(source)); + Vector256 opacity = Vector256.Create(amount); + + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) + { + destinationBase = PorterDuffFunctions.LightenXor(backgroundBase, sourceBase, opacity); + destinationBase = ref Unsafe.Add(ref destinationBase, 1); + backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); + sourceBase = ref Unsafe.Add(ref sourceBase, 1); + } + + if (Numerics.Modulo2(destination.Length) != 0) + { + // Vector4 fits neatly in pairs. Any overlap has to be equal to 1. + int i = destination.Length - 1; + destination[i] = PorterDuffFunctions.LightenXor(background[i], source[i], amount); + } + } + else + { + for (int i = 0; i < destination.Length; i++) + { + destination[i] = PorterDuffFunctions.LightenXor(background[i], source[i], amount); + } } } /// protected override void BlendFunction(Span destination, ReadOnlySpan background, ReadOnlySpan source, ReadOnlySpan amount) { - for (int i = 0; i < destination.Length; i++) + if (Avx2.IsSupported && destination.Length >= 2) + { + // Divide by 2 as 4 elements per Vector4 and 8 per Vector256 + ref Vector256 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); + ref Vector256 destinationLast = ref Unsafe.Add(ref destinationBase, (IntPtr)((uint)destination.Length / 2u)); + + ref Vector256 backgroundBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(background)); + ref Vector256 sourceBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(source)); + ref float amountBase = ref MemoryMarshal.GetReference(amount); + + Vector256 vOne = Vector256.Create(1F); + + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) + { + // We need to create a Vector256 containing the current and next amount values + // taking up each half of the Vector256 and then clamp them. + Vector256 opacity = Vector256.Create( + Vector128.Create(amountBase), + Vector128.Create(Unsafe.Add(ref amountBase, 1))); + opacity = Avx.Min(Avx.Max(Vector256.Zero, opacity), vOne); + + destinationBase = PorterDuffFunctions.LightenXor(backgroundBase, sourceBase, opacity); + destinationBase = ref Unsafe.Add(ref destinationBase, 1); + backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); + sourceBase = ref Unsafe.Add(ref sourceBase, 1); + amountBase = ref Unsafe.Add(ref amountBase, 2); + } + + if (Numerics.Modulo2(destination.Length) != 0) + { + // Vector4 fits neatly in pairs. Any overlap has to be equal to 1. + int i = destination.Length - 1; + destination[i] = PorterDuffFunctions.LightenXor(background[i], source[i], Numerics.Clamp(amount[i], 0, 1F)); + } + } + else { - destination[i] = PorterDuffFunctions.LightenXor(background[i], source[i], Numerics.Clamp(amount[i], 0, 1)); + for (int i = 0; i < destination.Length; i++) + { + destination[i] = PorterDuffFunctions.LightenXor(background[i], source[i], Numerics.Clamp(amount[i], 0, 1F)); + } } } } @@ -4071,18 +11177,85 @@ internal static class DefaultPixelBlenders protected override void BlendFunction(Span destination, ReadOnlySpan background, ReadOnlySpan source, float amount) { amount = Numerics.Clamp(amount, 0, 1); - for (int i = 0; i < destination.Length; i++) + + if (Avx2.IsSupported && destination.Length >= 2) + { + // Divide by 2 as 4 elements per Vector4 and 8 per Vector256 + ref Vector256 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); + ref Vector256 destinationLast = ref Unsafe.Add(ref destinationBase, (IntPtr)((uint)destination.Length / 2u)); + + ref Vector256 backgroundBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(background)); + ref Vector256 sourceBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(source)); + Vector256 opacity = Vector256.Create(amount); + + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) + { + destinationBase = PorterDuffFunctions.OverlayXor(backgroundBase, sourceBase, opacity); + destinationBase = ref Unsafe.Add(ref destinationBase, 1); + backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); + sourceBase = ref Unsafe.Add(ref sourceBase, 1); + } + + if (Numerics.Modulo2(destination.Length) != 0) + { + // Vector4 fits neatly in pairs. Any overlap has to be equal to 1. + int i = destination.Length - 1; + destination[i] = PorterDuffFunctions.OverlayXor(background[i], source[i], amount); + } + } + else { - destination[i] = PorterDuffFunctions.OverlayXor(background[i], source[i], amount); + for (int i = 0; i < destination.Length; i++) + { + destination[i] = PorterDuffFunctions.OverlayXor(background[i], source[i], amount); + } } } /// protected override void BlendFunction(Span destination, ReadOnlySpan background, ReadOnlySpan source, ReadOnlySpan amount) { - for (int i = 0; i < destination.Length; i++) + if (Avx2.IsSupported && destination.Length >= 2) + { + // Divide by 2 as 4 elements per Vector4 and 8 per Vector256 + ref Vector256 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); + ref Vector256 destinationLast = ref Unsafe.Add(ref destinationBase, (IntPtr)((uint)destination.Length / 2u)); + + ref Vector256 backgroundBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(background)); + ref Vector256 sourceBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(source)); + ref float amountBase = ref MemoryMarshal.GetReference(amount); + + Vector256 vOne = Vector256.Create(1F); + + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) + { + // We need to create a Vector256 containing the current and next amount values + // taking up each half of the Vector256 and then clamp them. + Vector256 opacity = Vector256.Create( + Vector128.Create(amountBase), + Vector128.Create(Unsafe.Add(ref amountBase, 1))); + opacity = Avx.Min(Avx.Max(Vector256.Zero, opacity), vOne); + + destinationBase = PorterDuffFunctions.OverlayXor(backgroundBase, sourceBase, opacity); + destinationBase = ref Unsafe.Add(ref destinationBase, 1); + backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); + sourceBase = ref Unsafe.Add(ref sourceBase, 1); + amountBase = ref Unsafe.Add(ref amountBase, 2); + } + + if (Numerics.Modulo2(destination.Length) != 0) + { + // Vector4 fits neatly in pairs. Any overlap has to be equal to 1. + int i = destination.Length - 1; + destination[i] = PorterDuffFunctions.OverlayXor(background[i], source[i], Numerics.Clamp(amount[i], 0, 1F)); + } + } + else { - destination[i] = PorterDuffFunctions.OverlayXor(background[i], source[i], Numerics.Clamp(amount[i], 0, 1)); + for (int i = 0; i < destination.Length; i++) + { + destination[i] = PorterDuffFunctions.OverlayXor(background[i], source[i], Numerics.Clamp(amount[i], 0, 1F)); + } } } } @@ -4109,18 +11282,85 @@ internal static class DefaultPixelBlenders protected override void BlendFunction(Span destination, ReadOnlySpan background, ReadOnlySpan source, float amount) { amount = Numerics.Clamp(amount, 0, 1); - for (int i = 0; i < destination.Length; i++) + + if (Avx2.IsSupported && destination.Length >= 2) + { + // Divide by 2 as 4 elements per Vector4 and 8 per Vector256 + ref Vector256 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); + ref Vector256 destinationLast = ref Unsafe.Add(ref destinationBase, (IntPtr)((uint)destination.Length / 2u)); + + ref Vector256 backgroundBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(background)); + ref Vector256 sourceBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(source)); + Vector256 opacity = Vector256.Create(amount); + + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) + { + destinationBase = PorterDuffFunctions.HardLightXor(backgroundBase, sourceBase, opacity); + destinationBase = ref Unsafe.Add(ref destinationBase, 1); + backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); + sourceBase = ref Unsafe.Add(ref sourceBase, 1); + } + + if (Numerics.Modulo2(destination.Length) != 0) + { + // Vector4 fits neatly in pairs. Any overlap has to be equal to 1. + int i = destination.Length - 1; + destination[i] = PorterDuffFunctions.HardLightXor(background[i], source[i], amount); + } + } + else { - destination[i] = PorterDuffFunctions.HardLightXor(background[i], source[i], amount); + for (int i = 0; i < destination.Length; i++) + { + destination[i] = PorterDuffFunctions.HardLightXor(background[i], source[i], amount); + } } } /// protected override void BlendFunction(Span destination, ReadOnlySpan background, ReadOnlySpan source, ReadOnlySpan amount) { - for (int i = 0; i < destination.Length; i++) - { - destination[i] = PorterDuffFunctions.HardLightXor(background[i], source[i], Numerics.Clamp(amount[i], 0, 1)); + if (Avx2.IsSupported && destination.Length >= 2) + { + // Divide by 2 as 4 elements per Vector4 and 8 per Vector256 + ref Vector256 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); + ref Vector256 destinationLast = ref Unsafe.Add(ref destinationBase, (IntPtr)((uint)destination.Length / 2u)); + + ref Vector256 backgroundBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(background)); + ref Vector256 sourceBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(source)); + ref float amountBase = ref MemoryMarshal.GetReference(amount); + + Vector256 vOne = Vector256.Create(1F); + + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) + { + // We need to create a Vector256 containing the current and next amount values + // taking up each half of the Vector256 and then clamp them. + Vector256 opacity = Vector256.Create( + Vector128.Create(amountBase), + Vector128.Create(Unsafe.Add(ref amountBase, 1))); + opacity = Avx.Min(Avx.Max(Vector256.Zero, opacity), vOne); + + destinationBase = PorterDuffFunctions.HardLightXor(backgroundBase, sourceBase, opacity); + destinationBase = ref Unsafe.Add(ref destinationBase, 1); + backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); + sourceBase = ref Unsafe.Add(ref sourceBase, 1); + amountBase = ref Unsafe.Add(ref amountBase, 2); + } + + if (Numerics.Modulo2(destination.Length) != 0) + { + // Vector4 fits neatly in pairs. Any overlap has to be equal to 1. + int i = destination.Length - 1; + destination[i] = PorterDuffFunctions.HardLightXor(background[i], source[i], Numerics.Clamp(amount[i], 0, 1F)); + } + } + else + { + for (int i = 0; i < destination.Length; i++) + { + destination[i] = PorterDuffFunctions.HardLightXor(background[i], source[i], Numerics.Clamp(amount[i], 0, 1F)); + } } } } diff --git a/src/ImageSharp/PixelFormats/PixelBlenders/DefaultPixelBlenders.Generated.tt b/src/ImageSharp/PixelFormats/PixelBlenders/DefaultPixelBlenders.Generated.tt index 7bd51439c..22b9ebf98 100644 --- a/src/ImageSharp/PixelFormats/PixelBlenders/DefaultPixelBlenders.Generated.tt +++ b/src/ImageSharp/PixelFormats/PixelBlenders/DefaultPixelBlenders.Generated.tt @@ -13,6 +13,10 @@ // using System.Numerics; +using System.Runtime.CompilerServices; +using System.Runtime.InteropServices; +using System.Runtime.Intrinsics; +using System.Runtime.Intrinsics.X86; namespace SixLabors.ImageSharp.PixelFormats.PixelBlenders; @@ -86,18 +90,85 @@ var blenders = new []{ protected override void BlendFunction(Span destination, ReadOnlySpan background, ReadOnlySpan source, float amount) { amount = Numerics.Clamp(amount, 0, 1); - for (int i = 0; i < destination.Length; i++) + + if (Avx2.IsSupported && destination.Length >= 2) { - destination[i] = PorterDuffFunctions.<#=blender_composer#>(background[i], source[i], amount); + // Divide by 2 as 4 elements per Vector4 and 8 per Vector256 + ref Vector256 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); + ref Vector256 destinationLast = ref Unsafe.Add(ref destinationBase, (IntPtr)((uint)destination.Length / 2u)); + + ref Vector256 backgroundBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(background)); + ref Vector256 sourceBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(source)); + Vector256 opacity = Vector256.Create(amount); + + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) + { + destinationBase = PorterDuffFunctions.<#=blender_composer#>(backgroundBase, sourceBase, opacity); + destinationBase = ref Unsafe.Add(ref destinationBase, 1); + backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); + sourceBase = ref Unsafe.Add(ref sourceBase, 1); + } + + if (Numerics.Modulo2(destination.Length) != 0) + { + // Vector4 fits neatly in pairs. Any overlap has to be equal to 1. + int i = destination.Length - 1; + destination[i] = PorterDuffFunctions.<#=blender_composer#>(background[i], source[i], amount); + } + } + else + { + for (int i = 0; i < destination.Length; i++) + { + destination[i] = PorterDuffFunctions.<#=blender_composer#>(background[i], source[i], amount); + } } } /// protected override void BlendFunction(Span destination, ReadOnlySpan background, ReadOnlySpan source, ReadOnlySpan amount) { - for (int i = 0; i < destination.Length; i++) + if (Avx2.IsSupported && destination.Length >= 2) + { + // Divide by 2 as 4 elements per Vector4 and 8 per Vector256 + ref Vector256 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); + ref Vector256 destinationLast = ref Unsafe.Add(ref destinationBase, (IntPtr)((uint)destination.Length / 2u)); + + ref Vector256 backgroundBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(background)); + ref Vector256 sourceBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(source)); + ref float amountBase = ref MemoryMarshal.GetReference(amount); + + Vector256 vOne = Vector256.Create(1F); + + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) + { + // We need to create a Vector256 containing the current and next amount values + // taking up each half of the Vector256 and then clamp them. + Vector256 opacity = Vector256.Create( + Vector128.Create(amountBase), + Vector128.Create(Unsafe.Add(ref amountBase, 1))); + opacity = Avx.Min(Avx.Max(Vector256.Zero, opacity), vOne); + + destinationBase = PorterDuffFunctions.<#=blender_composer#>(backgroundBase, sourceBase, opacity); + destinationBase = ref Unsafe.Add(ref destinationBase, 1); + backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); + sourceBase = ref Unsafe.Add(ref sourceBase, 1); + amountBase = ref Unsafe.Add(ref amountBase, 2); + } + + if (Numerics.Modulo2(destination.Length) != 0) + { + // Vector4 fits neatly in pairs. Any overlap has to be equal to 1. + int i = destination.Length - 1; + destination[i] = PorterDuffFunctions.<#=blender_composer#>(background[i], source[i], Numerics.Clamp(amount[i], 0, 1F)); + } + } + else { - destination[i] = PorterDuffFunctions.<#=blender_composer#>(background[i], source[i], Numerics.Clamp(amount[i], 0, 1)); + for (int i = 0; i < destination.Length; i++) + { + destination[i] = PorterDuffFunctions.<#=blender_composer#>(background[i], source[i], Numerics.Clamp(amount[i], 0, 1F)); + } } } } diff --git a/src/ImageSharp/PixelFormats/PixelBlenders/PorterDuffFunctions.Generated.cs b/src/ImageSharp/PixelFormats/PixelBlenders/PorterDuffFunctions.Generated.cs index ff41e70b2..bd522da19 100644 --- a/src/ImageSharp/PixelFormats/PixelBlenders/PorterDuffFunctions.Generated.cs +++ b/src/ImageSharp/PixelFormats/PixelBlenders/PorterDuffFunctions.Generated.cs @@ -5,6 +5,8 @@ using System.Numerics; using System.Runtime.CompilerServices; +using System.Runtime.Intrinsics; +using System.Runtime.Intrinsics.X86; namespace SixLabors.ImageSharp.PixelFormats.PixelBlenders; @@ -21,11 +23,22 @@ internal static partial class PorterDuffFunctions [MethodImpl(MethodImplOptions.AggressiveInlining)] public static Vector4 NormalSrc(Vector4 backdrop, Vector4 source, float opacity) { - source.W *= opacity; + source = WithW(source, source * opacity); return source; } + /// + /// Returns the result of the "NormalSrc compositing equation. + /// + /// The backdrop vector. + /// The source vector. + /// The source opacity. Range 0..1 + /// The . + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public static Vector256 NormalSrc(Vector256 backdrop, Vector256 source, Vector256 opacity) + => Avx.Blend(source, Avx.Multiply(source, opacity), BlendAlphaControl); + /// /// Returns the result of the "NormalSrcAtop" compositing equation. /// @@ -36,7 +49,22 @@ internal static partial class PorterDuffFunctions [MethodImpl(MethodImplOptions.AggressiveInlining)] public static Vector4 NormalSrcAtop(Vector4 backdrop, Vector4 source, float opacity) { - source.W *= opacity; + source = WithW(source, source * opacity); + + return Atop(backdrop, source, Normal(backdrop, source)); + } + + /// + /// Returns the result of the "NormalSrcAtop" compositing equation. + /// + /// The backdrop vector. + /// The source vector. + /// The source opacity. Range 0..1 + /// The . + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public static Vector256 NormalSrcAtop(Vector256 backdrop, Vector256 source, Vector256 opacity) + { + source = Avx.Blend(source, Avx.Multiply(source, opacity), BlendAlphaControl); return Atop(backdrop, source, Normal(backdrop, source)); } @@ -51,7 +79,22 @@ internal static partial class PorterDuffFunctions [MethodImpl(MethodImplOptions.AggressiveInlining)] public static Vector4 NormalSrcOver(Vector4 backdrop, Vector4 source, float opacity) { - source.W *= opacity; + source = WithW(source, source * opacity); + + return Over(backdrop, source, Normal(backdrop, source)); + } + + /// + /// Returns the result of the "NormalSrcOver" compositing equation. + /// + /// The backdrop vector. + /// The source vector. + /// The source opacity. Range 0..1 + /// The . + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public static Vector256 NormalSrcOver(Vector256 backdrop, Vector256 source, Vector256 opacity) + { + source = Avx.Blend(source, Avx.Multiply(source, opacity), BlendAlphaControl); return Over(backdrop, source, Normal(backdrop, source)); } @@ -66,11 +109,22 @@ internal static partial class PorterDuffFunctions [MethodImpl(MethodImplOptions.AggressiveInlining)] public static Vector4 NormalSrcIn(Vector4 backdrop, Vector4 source, float opacity) { - source.W *= opacity; + source = WithW(source, source * opacity); return In(backdrop, source); } + /// + /// Returns the result of the "NormalSrcIn" compositing equation. + /// + /// The backdrop vector. + /// The source vector. + /// The source opacity. Range 0..1 + /// The . + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public static Vector256 NormalSrcIn(Vector256 backdrop, Vector256 source, Vector256 opacity) + => In(backdrop, Avx.Blend(source, Avx.Multiply(source, opacity), BlendAlphaControl)); + /// /// Returns the result of the "NormalSrcOut" compositing equation. /// @@ -81,11 +135,22 @@ internal static partial class PorterDuffFunctions [MethodImpl(MethodImplOptions.AggressiveInlining)] public static Vector4 NormalSrcOut(Vector4 backdrop, Vector4 source, float opacity) { - source.W *= opacity; + source = WithW(source, source * opacity); return Out(backdrop, source); } + /// + /// Returns the result of the "NormalSrcOut" compositing equation. + /// + /// The backdrop vector. + /// The source vector. + /// The source opacity. Range 0..1 + /// The . + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public static Vector256 NormalSrcOut(Vector256 backdrop, Vector256 source, Vector256 opacity) + => Out(backdrop, Avx.Blend(source, Avx.Multiply(source, opacity), BlendAlphaControl)); + /// /// Returns the result of the "NormalDest" compositing equation. /// @@ -99,6 +164,19 @@ internal static partial class PorterDuffFunctions return backdrop; } + /// + /// Returns the result of the "NormalDest" compositing equation. + /// + /// The backdrop vector. + /// The source vector. + /// The source opacity. Range 0..1 + /// The . + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public static Vector256 NormalDest(Vector256 backdrop, Vector256 source, Vector256 opacity) + { + return backdrop; + } + /// /// Returns the result of the "NormalDestAtop" compositing equation. /// @@ -109,7 +187,22 @@ internal static partial class PorterDuffFunctions [MethodImpl(MethodImplOptions.AggressiveInlining)] public static Vector4 NormalDestAtop(Vector4 backdrop, Vector4 source, float opacity) { - source.W *= opacity; + source = WithW(source, source * opacity); + + return Atop(source, backdrop, Normal(source, backdrop)); + } + + /// + /// Returns the result of the "NormalDestAtop" compositing equation. + /// + /// The backdrop vector. + /// The source vector. + /// The source opacity. Range 0..1 + /// The . + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public static Vector256 NormalDestAtop(Vector256 backdrop, Vector256 source, Vector256 opacity) + { + source = Avx.Blend(source, Avx.Multiply(source, opacity), BlendAlphaControl); return Atop(source, backdrop, Normal(source, backdrop)); } @@ -124,7 +217,22 @@ internal static partial class PorterDuffFunctions [MethodImpl(MethodImplOptions.AggressiveInlining)] public static Vector4 NormalDestOver(Vector4 backdrop, Vector4 source, float opacity) { - source.W *= opacity; + source = WithW(source, source * opacity); + + return Over(source, backdrop, Normal(source, backdrop)); + } + + /// + /// Returns the result of the "NormalDestOver" compositing equation. + /// + /// The backdrop vector. + /// The source vector. + /// The source opacity. Range 0..1 + /// The . + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public static Vector256 NormalDestOver(Vector256 backdrop, Vector256 source, Vector256 opacity) + { + source = Avx.Blend(source, Avx.Multiply(source, opacity), BlendAlphaControl); return Over(source, backdrop, Normal(source, backdrop)); } @@ -139,11 +247,22 @@ internal static partial class PorterDuffFunctions [MethodImpl(MethodImplOptions.AggressiveInlining)] public static Vector4 NormalDestIn(Vector4 backdrop, Vector4 source, float opacity) { - source.W *= opacity; + source = WithW(source, source * opacity); return In(source, backdrop); } + /// + /// Returns the result of the "NormalDestIn" compositing equation. + /// + /// The backdrop vector. + /// The source vector. + /// The source opacity. Range 0..1 + /// The . + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public static Vector256 NormalDestIn(Vector256 backdrop, Vector256 source, Vector256 opacity) + => In(Avx.Blend(source, Avx.Multiply(source, opacity), BlendAlphaControl), backdrop); + /// /// Returns the result of the "NormalDestOut" compositing equation. /// @@ -154,11 +273,22 @@ internal static partial class PorterDuffFunctions [MethodImpl(MethodImplOptions.AggressiveInlining)] public static Vector4 NormalDestOut(Vector4 backdrop, Vector4 source, float opacity) { - source.W *= opacity; + source = WithW(source, source * opacity); return Out(source, backdrop); } + /// + /// Returns the result of the "NormalDestOut" compositing equation. + /// + /// The backdrop vector. + /// The source vector. + /// The source opacity. Range 0..1 + /// The . + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public static Vector256 NormalDestOut(Vector256 backdrop, Vector256 source, Vector256 opacity) + => Out(Avx.Blend(source, Avx.Multiply(source, opacity), BlendAlphaControl), backdrop); + /// /// Returns the result of the "NormalXor" compositing equation. /// @@ -169,11 +299,22 @@ internal static partial class PorterDuffFunctions [MethodImpl(MethodImplOptions.AggressiveInlining)] public static Vector4 NormalXor(Vector4 backdrop, Vector4 source, float opacity) { - source.W *= opacity; + source = WithW(source, source * opacity); return Xor(backdrop, source); } + /// + /// Returns the result of the "NormalXor" compositing equation. + /// + /// The backdrop vector. + /// The source vector. + /// The source opacity. Range 0..1 + /// The . + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public static Vector256 NormalXor(Vector256 backdrop, Vector256 source, Vector256 opacity) + => Xor(backdrop, Avx.Blend(source, Avx.Multiply(source, opacity), BlendAlphaControl)); + /// /// Returns the result of the "NormalClear" compositing equation. /// @@ -184,11 +325,22 @@ internal static partial class PorterDuffFunctions [MethodImpl(MethodImplOptions.AggressiveInlining)] public static Vector4 NormalClear(Vector4 backdrop, Vector4 source, float opacity) { - source.W *= opacity; + source = WithW(source, source * opacity); return Clear(backdrop, source); } + /// + /// Returns the result of the "NormalClear" compositing equation. + /// + /// The backdrop vector. + /// The source vector. + /// The source opacity. Range 0..1 + /// The . + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public static Vector256 NormalClear(Vector256 backdrop, Vector256 source, Vector256 opacity) + => Clear(backdrop, Avx.Blend(source, Avx.Multiply(source, opacity), BlendAlphaControl)); + /// /// Returns the result of the "NormalSrc" compositing equation. @@ -416,11 +568,22 @@ internal static partial class PorterDuffFunctions [MethodImpl(MethodImplOptions.AggressiveInlining)] public static Vector4 MultiplySrc(Vector4 backdrop, Vector4 source, float opacity) { - source.W *= opacity; + source = WithW(source, source * opacity); return source; } + /// + /// Returns the result of the "MultiplySrc compositing equation. + /// + /// The backdrop vector. + /// The source vector. + /// The source opacity. Range 0..1 + /// The . + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public static Vector256 MultiplySrc(Vector256 backdrop, Vector256 source, Vector256 opacity) + => Avx.Blend(source, Avx.Multiply(source, opacity), BlendAlphaControl); + /// /// Returns the result of the "MultiplySrcAtop" compositing equation. /// @@ -431,7 +594,22 @@ internal static partial class PorterDuffFunctions [MethodImpl(MethodImplOptions.AggressiveInlining)] public static Vector4 MultiplySrcAtop(Vector4 backdrop, Vector4 source, float opacity) { - source.W *= opacity; + source = WithW(source, source * opacity); + + return Atop(backdrop, source, Multiply(backdrop, source)); + } + + /// + /// Returns the result of the "MultiplySrcAtop" compositing equation. + /// + /// The backdrop vector. + /// The source vector. + /// The source opacity. Range 0..1 + /// The . + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public static Vector256 MultiplySrcAtop(Vector256 backdrop, Vector256 source, Vector256 opacity) + { + source = Avx.Blend(source, Avx.Multiply(source, opacity), BlendAlphaControl); return Atop(backdrop, source, Multiply(backdrop, source)); } @@ -446,7 +624,22 @@ internal static partial class PorterDuffFunctions [MethodImpl(MethodImplOptions.AggressiveInlining)] public static Vector4 MultiplySrcOver(Vector4 backdrop, Vector4 source, float opacity) { - source.W *= opacity; + source = WithW(source, source * opacity); + + return Over(backdrop, source, Multiply(backdrop, source)); + } + + /// + /// Returns the result of the "MultiplySrcOver" compositing equation. + /// + /// The backdrop vector. + /// The source vector. + /// The source opacity. Range 0..1 + /// The . + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public static Vector256 MultiplySrcOver(Vector256 backdrop, Vector256 source, Vector256 opacity) + { + source = Avx.Blend(source, Avx.Multiply(source, opacity), BlendAlphaControl); return Over(backdrop, source, Multiply(backdrop, source)); } @@ -461,11 +654,22 @@ internal static partial class PorterDuffFunctions [MethodImpl(MethodImplOptions.AggressiveInlining)] public static Vector4 MultiplySrcIn(Vector4 backdrop, Vector4 source, float opacity) { - source.W *= opacity; + source = WithW(source, source * opacity); return In(backdrop, source); } + /// + /// Returns the result of the "MultiplySrcIn" compositing equation. + /// + /// The backdrop vector. + /// The source vector. + /// The source opacity. Range 0..1 + /// The . + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public static Vector256 MultiplySrcIn(Vector256 backdrop, Vector256 source, Vector256 opacity) + => In(backdrop, Avx.Blend(source, Avx.Multiply(source, opacity), BlendAlphaControl)); + /// /// Returns the result of the "MultiplySrcOut" compositing equation. /// @@ -476,11 +680,22 @@ internal static partial class PorterDuffFunctions [MethodImpl(MethodImplOptions.AggressiveInlining)] public static Vector4 MultiplySrcOut(Vector4 backdrop, Vector4 source, float opacity) { - source.W *= opacity; + source = WithW(source, source * opacity); return Out(backdrop, source); } + /// + /// Returns the result of the "MultiplySrcOut" compositing equation. + /// + /// The backdrop vector. + /// The source vector. + /// The source opacity. Range 0..1 + /// The . + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public static Vector256 MultiplySrcOut(Vector256 backdrop, Vector256 source, Vector256 opacity) + => Out(backdrop, Avx.Blend(source, Avx.Multiply(source, opacity), BlendAlphaControl)); + /// /// Returns the result of the "MultiplyDest" compositing equation. /// @@ -494,6 +709,19 @@ internal static partial class PorterDuffFunctions return backdrop; } + /// + /// Returns the result of the "MultiplyDest" compositing equation. + /// + /// The backdrop vector. + /// The source vector. + /// The source opacity. Range 0..1 + /// The . + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public static Vector256 MultiplyDest(Vector256 backdrop, Vector256 source, Vector256 opacity) + { + return backdrop; + } + /// /// Returns the result of the "MultiplyDestAtop" compositing equation. /// @@ -504,7 +732,22 @@ internal static partial class PorterDuffFunctions [MethodImpl(MethodImplOptions.AggressiveInlining)] public static Vector4 MultiplyDestAtop(Vector4 backdrop, Vector4 source, float opacity) { - source.W *= opacity; + source = WithW(source, source * opacity); + + return Atop(source, backdrop, Multiply(source, backdrop)); + } + + /// + /// Returns the result of the "MultiplyDestAtop" compositing equation. + /// + /// The backdrop vector. + /// The source vector. + /// The source opacity. Range 0..1 + /// The . + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public static Vector256 MultiplyDestAtop(Vector256 backdrop, Vector256 source, Vector256 opacity) + { + source = Avx.Blend(source, Avx.Multiply(source, opacity), BlendAlphaControl); return Atop(source, backdrop, Multiply(source, backdrop)); } @@ -519,7 +762,22 @@ internal static partial class PorterDuffFunctions [MethodImpl(MethodImplOptions.AggressiveInlining)] public static Vector4 MultiplyDestOver(Vector4 backdrop, Vector4 source, float opacity) { - source.W *= opacity; + source = WithW(source, source * opacity); + + return Over(source, backdrop, Multiply(source, backdrop)); + } + + /// + /// Returns the result of the "MultiplyDestOver" compositing equation. + /// + /// The backdrop vector. + /// The source vector. + /// The source opacity. Range 0..1 + /// The . + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public static Vector256 MultiplyDestOver(Vector256 backdrop, Vector256 source, Vector256 opacity) + { + source = Avx.Blend(source, Avx.Multiply(source, opacity), BlendAlphaControl); return Over(source, backdrop, Multiply(source, backdrop)); } @@ -534,11 +792,22 @@ internal static partial class PorterDuffFunctions [MethodImpl(MethodImplOptions.AggressiveInlining)] public static Vector4 MultiplyDestIn(Vector4 backdrop, Vector4 source, float opacity) { - source.W *= opacity; + source = WithW(source, source * opacity); return In(source, backdrop); } + /// + /// Returns the result of the "MultiplyDestIn" compositing equation. + /// + /// The backdrop vector. + /// The source vector. + /// The source opacity. Range 0..1 + /// The . + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public static Vector256 MultiplyDestIn(Vector256 backdrop, Vector256 source, Vector256 opacity) + => In(Avx.Blend(source, Avx.Multiply(source, opacity), BlendAlphaControl), backdrop); + /// /// Returns the result of the "MultiplyDestOut" compositing equation. /// @@ -549,11 +818,22 @@ internal static partial class PorterDuffFunctions [MethodImpl(MethodImplOptions.AggressiveInlining)] public static Vector4 MultiplyDestOut(Vector4 backdrop, Vector4 source, float opacity) { - source.W *= opacity; + source = WithW(source, source * opacity); return Out(source, backdrop); } + /// + /// Returns the result of the "MultiplyDestOut" compositing equation. + /// + /// The backdrop vector. + /// The source vector. + /// The source opacity. Range 0..1 + /// The . + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public static Vector256 MultiplyDestOut(Vector256 backdrop, Vector256 source, Vector256 opacity) + => Out(Avx.Blend(source, Avx.Multiply(source, opacity), BlendAlphaControl), backdrop); + /// /// Returns the result of the "MultiplyXor" compositing equation. /// @@ -564,11 +844,22 @@ internal static partial class PorterDuffFunctions [MethodImpl(MethodImplOptions.AggressiveInlining)] public static Vector4 MultiplyXor(Vector4 backdrop, Vector4 source, float opacity) { - source.W *= opacity; + source = WithW(source, source * opacity); return Xor(backdrop, source); } + /// + /// Returns the result of the "MultiplyXor" compositing equation. + /// + /// The backdrop vector. + /// The source vector. + /// The source opacity. Range 0..1 + /// The . + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public static Vector256 MultiplyXor(Vector256 backdrop, Vector256 source, Vector256 opacity) + => Xor(backdrop, Avx.Blend(source, Avx.Multiply(source, opacity), BlendAlphaControl)); + /// /// Returns the result of the "MultiplyClear" compositing equation. /// @@ -579,11 +870,22 @@ internal static partial class PorterDuffFunctions [MethodImpl(MethodImplOptions.AggressiveInlining)] public static Vector4 MultiplyClear(Vector4 backdrop, Vector4 source, float opacity) { - source.W *= opacity; + source = WithW(source, source * opacity); return Clear(backdrop, source); } + /// + /// Returns the result of the "MultiplyClear" compositing equation. + /// + /// The backdrop vector. + /// The source vector. + /// The source opacity. Range 0..1 + /// The . + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public static Vector256 MultiplyClear(Vector256 backdrop, Vector256 source, Vector256 opacity) + => Clear(backdrop, Avx.Blend(source, Avx.Multiply(source, opacity), BlendAlphaControl)); + /// /// Returns the result of the "MultiplySrc" compositing equation. @@ -811,11 +1113,22 @@ internal static partial class PorterDuffFunctions [MethodImpl(MethodImplOptions.AggressiveInlining)] public static Vector4 AddSrc(Vector4 backdrop, Vector4 source, float opacity) { - source.W *= opacity; + source = WithW(source, source * opacity); return source; } + /// + /// Returns the result of the "AddSrc compositing equation. + /// + /// The backdrop vector. + /// The source vector. + /// The source opacity. Range 0..1 + /// The . + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public static Vector256 AddSrc(Vector256 backdrop, Vector256 source, Vector256 opacity) + => Avx.Blend(source, Avx.Multiply(source, opacity), BlendAlphaControl); + /// /// Returns the result of the "AddSrcAtop" compositing equation. /// @@ -826,65 +1139,130 @@ internal static partial class PorterDuffFunctions [MethodImpl(MethodImplOptions.AggressiveInlining)] public static Vector4 AddSrcAtop(Vector4 backdrop, Vector4 source, float opacity) { - source.W *= opacity; + source = WithW(source, source * opacity); return Atop(backdrop, source, Add(backdrop, source)); } /// - /// Returns the result of the "AddSrcOver" compositing equation. + /// Returns the result of the "AddSrcAtop" compositing equation. /// /// The backdrop vector. /// The source vector. /// The source opacity. Range 0..1 - /// The . + /// The . [MethodImpl(MethodImplOptions.AggressiveInlining)] - public static Vector4 AddSrcOver(Vector4 backdrop, Vector4 source, float opacity) + public static Vector256 AddSrcAtop(Vector256 backdrop, Vector256 source, Vector256 opacity) { - source.W *= opacity; + source = Avx.Blend(source, Avx.Multiply(source, opacity), BlendAlphaControl); - return Over(backdrop, source, Add(backdrop, source)); + return Atop(backdrop, source, Add(backdrop, source)); } /// - /// Returns the result of the "AddSrcIn" compositing equation. + /// Returns the result of the "AddSrcOver" compositing equation. /// /// The backdrop vector. /// The source vector. /// The source opacity. Range 0..1 /// The . [MethodImpl(MethodImplOptions.AggressiveInlining)] - public static Vector4 AddSrcIn(Vector4 backdrop, Vector4 source, float opacity) + public static Vector4 AddSrcOver(Vector4 backdrop, Vector4 source, float opacity) { - source.W *= opacity; + source = WithW(source, source * opacity); - return In(backdrop, source); + return Over(backdrop, source, Add(backdrop, source)); } /// - /// Returns the result of the "AddSrcOut" compositing equation. + /// Returns the result of the "AddSrcOver" compositing equation. /// /// The backdrop vector. /// The source vector. /// The source opacity. Range 0..1 - /// The . + /// The . [MethodImpl(MethodImplOptions.AggressiveInlining)] - public static Vector4 AddSrcOut(Vector4 backdrop, Vector4 source, float opacity) + public static Vector256 AddSrcOver(Vector256 backdrop, Vector256 source, Vector256 opacity) { - source.W *= opacity; + source = Avx.Blend(source, Avx.Multiply(source, opacity), BlendAlphaControl); - return Out(backdrop, source); + return Over(backdrop, source, Add(backdrop, source)); } /// - /// Returns the result of the "AddDest" compositing equation. + /// Returns the result of the "AddSrcIn" compositing equation. /// /// The backdrop vector. /// The source vector. /// The source opacity. Range 0..1 /// The . [MethodImpl(MethodImplOptions.AggressiveInlining)] - public static Vector4 AddDest(Vector4 backdrop, Vector4 source, float opacity) + public static Vector4 AddSrcIn(Vector4 backdrop, Vector4 source, float opacity) + { + source = WithW(source, source * opacity); + + return In(backdrop, source); + } + + /// + /// Returns the result of the "AddSrcIn" compositing equation. + /// + /// The backdrop vector. + /// The source vector. + /// The source opacity. Range 0..1 + /// The . + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public static Vector256 AddSrcIn(Vector256 backdrop, Vector256 source, Vector256 opacity) + => In(backdrop, Avx.Blend(source, Avx.Multiply(source, opacity), BlendAlphaControl)); + + /// + /// Returns the result of the "AddSrcOut" compositing equation. + /// + /// The backdrop vector. + /// The source vector. + /// The source opacity. Range 0..1 + /// The . + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public static Vector4 AddSrcOut(Vector4 backdrop, Vector4 source, float opacity) + { + source = WithW(source, source * opacity); + + return Out(backdrop, source); + } + + /// + /// Returns the result of the "AddSrcOut" compositing equation. + /// + /// The backdrop vector. + /// The source vector. + /// The source opacity. Range 0..1 + /// The . + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public static Vector256 AddSrcOut(Vector256 backdrop, Vector256 source, Vector256 opacity) + => Out(backdrop, Avx.Blend(source, Avx.Multiply(source, opacity), BlendAlphaControl)); + + /// + /// Returns the result of the "AddDest" compositing equation. + /// + /// The backdrop vector. + /// The source vector. + /// The source opacity. Range 0..1 + /// The . + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public static Vector4 AddDest(Vector4 backdrop, Vector4 source, float opacity) + { + return backdrop; + } + + /// + /// Returns the result of the "AddDest" compositing equation. + /// + /// The backdrop vector. + /// The source vector. + /// The source opacity. Range 0..1 + /// The . + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public static Vector256 AddDest(Vector256 backdrop, Vector256 source, Vector256 opacity) { return backdrop; } @@ -899,7 +1277,22 @@ internal static partial class PorterDuffFunctions [MethodImpl(MethodImplOptions.AggressiveInlining)] public static Vector4 AddDestAtop(Vector4 backdrop, Vector4 source, float opacity) { - source.W *= opacity; + source = WithW(source, source * opacity); + + return Atop(source, backdrop, Add(source, backdrop)); + } + + /// + /// Returns the result of the "AddDestAtop" compositing equation. + /// + /// The backdrop vector. + /// The source vector. + /// The source opacity. Range 0..1 + /// The . + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public static Vector256 AddDestAtop(Vector256 backdrop, Vector256 source, Vector256 opacity) + { + source = Avx.Blend(source, Avx.Multiply(source, opacity), BlendAlphaControl); return Atop(source, backdrop, Add(source, backdrop)); } @@ -914,7 +1307,22 @@ internal static partial class PorterDuffFunctions [MethodImpl(MethodImplOptions.AggressiveInlining)] public static Vector4 AddDestOver(Vector4 backdrop, Vector4 source, float opacity) { - source.W *= opacity; + source = WithW(source, source * opacity); + + return Over(source, backdrop, Add(source, backdrop)); + } + + /// + /// Returns the result of the "AddDestOver" compositing equation. + /// + /// The backdrop vector. + /// The source vector. + /// The source opacity. Range 0..1 + /// The . + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public static Vector256 AddDestOver(Vector256 backdrop, Vector256 source, Vector256 opacity) + { + source = Avx.Blend(source, Avx.Multiply(source, opacity), BlendAlphaControl); return Over(source, backdrop, Add(source, backdrop)); } @@ -929,11 +1337,22 @@ internal static partial class PorterDuffFunctions [MethodImpl(MethodImplOptions.AggressiveInlining)] public static Vector4 AddDestIn(Vector4 backdrop, Vector4 source, float opacity) { - source.W *= opacity; + source = WithW(source, source * opacity); return In(source, backdrop); } + /// + /// Returns the result of the "AddDestIn" compositing equation. + /// + /// The backdrop vector. + /// The source vector. + /// The source opacity. Range 0..1 + /// The . + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public static Vector256 AddDestIn(Vector256 backdrop, Vector256 source, Vector256 opacity) + => In(Avx.Blend(source, Avx.Multiply(source, opacity), BlendAlphaControl), backdrop); + /// /// Returns the result of the "AddDestOut" compositing equation. /// @@ -944,11 +1363,22 @@ internal static partial class PorterDuffFunctions [MethodImpl(MethodImplOptions.AggressiveInlining)] public static Vector4 AddDestOut(Vector4 backdrop, Vector4 source, float opacity) { - source.W *= opacity; + source = WithW(source, source * opacity); return Out(source, backdrop); } + /// + /// Returns the result of the "AddDestOut" compositing equation. + /// + /// The backdrop vector. + /// The source vector. + /// The source opacity. Range 0..1 + /// The . + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public static Vector256 AddDestOut(Vector256 backdrop, Vector256 source, Vector256 opacity) + => Out(Avx.Blend(source, Avx.Multiply(source, opacity), BlendAlphaControl), backdrop); + /// /// Returns the result of the "AddXor" compositing equation. /// @@ -959,11 +1389,22 @@ internal static partial class PorterDuffFunctions [MethodImpl(MethodImplOptions.AggressiveInlining)] public static Vector4 AddXor(Vector4 backdrop, Vector4 source, float opacity) { - source.W *= opacity; + source = WithW(source, source * opacity); return Xor(backdrop, source); } + /// + /// Returns the result of the "AddXor" compositing equation. + /// + /// The backdrop vector. + /// The source vector. + /// The source opacity. Range 0..1 + /// The . + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public static Vector256 AddXor(Vector256 backdrop, Vector256 source, Vector256 opacity) + => Xor(backdrop, Avx.Blend(source, Avx.Multiply(source, opacity), BlendAlphaControl)); + /// /// Returns the result of the "AddClear" compositing equation. /// @@ -974,11 +1415,22 @@ internal static partial class PorterDuffFunctions [MethodImpl(MethodImplOptions.AggressiveInlining)] public static Vector4 AddClear(Vector4 backdrop, Vector4 source, float opacity) { - source.W *= opacity; + source = WithW(source, source * opacity); return Clear(backdrop, source); } + /// + /// Returns the result of the "AddClear" compositing equation. + /// + /// The backdrop vector. + /// The source vector. + /// The source opacity. Range 0..1 + /// The . + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public static Vector256 AddClear(Vector256 backdrop, Vector256 source, Vector256 opacity) + => Clear(backdrop, Avx.Blend(source, Avx.Multiply(source, opacity), BlendAlphaControl)); + /// /// Returns the result of the "AddSrc" compositing equation. @@ -1206,11 +1658,22 @@ internal static partial class PorterDuffFunctions [MethodImpl(MethodImplOptions.AggressiveInlining)] public static Vector4 SubtractSrc(Vector4 backdrop, Vector4 source, float opacity) { - source.W *= opacity; + source = WithW(source, source * opacity); return source; } + /// + /// Returns the result of the "SubtractSrc compositing equation. + /// + /// The backdrop vector. + /// The source vector. + /// The source opacity. Range 0..1 + /// The . + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public static Vector256 SubtractSrc(Vector256 backdrop, Vector256 source, Vector256 opacity) + => Avx.Blend(source, Avx.Multiply(source, opacity), BlendAlphaControl); + /// /// Returns the result of the "SubtractSrcAtop" compositing equation. /// @@ -1221,7 +1684,22 @@ internal static partial class PorterDuffFunctions [MethodImpl(MethodImplOptions.AggressiveInlining)] public static Vector4 SubtractSrcAtop(Vector4 backdrop, Vector4 source, float opacity) { - source.W *= opacity; + source = WithW(source, source * opacity); + + return Atop(backdrop, source, Subtract(backdrop, source)); + } + + /// + /// Returns the result of the "SubtractSrcAtop" compositing equation. + /// + /// The backdrop vector. + /// The source vector. + /// The source opacity. Range 0..1 + /// The . + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public static Vector256 SubtractSrcAtop(Vector256 backdrop, Vector256 source, Vector256 opacity) + { + source = Avx.Blend(source, Avx.Multiply(source, opacity), BlendAlphaControl); return Atop(backdrop, source, Subtract(backdrop, source)); } @@ -1236,7 +1714,22 @@ internal static partial class PorterDuffFunctions [MethodImpl(MethodImplOptions.AggressiveInlining)] public static Vector4 SubtractSrcOver(Vector4 backdrop, Vector4 source, float opacity) { - source.W *= opacity; + source = WithW(source, source * opacity); + + return Over(backdrop, source, Subtract(backdrop, source)); + } + + /// + /// Returns the result of the "SubtractSrcOver" compositing equation. + /// + /// The backdrop vector. + /// The source vector. + /// The source opacity. Range 0..1 + /// The . + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public static Vector256 SubtractSrcOver(Vector256 backdrop, Vector256 source, Vector256 opacity) + { + source = Avx.Blend(source, Avx.Multiply(source, opacity), BlendAlphaControl); return Over(backdrop, source, Subtract(backdrop, source)); } @@ -1251,11 +1744,22 @@ internal static partial class PorterDuffFunctions [MethodImpl(MethodImplOptions.AggressiveInlining)] public static Vector4 SubtractSrcIn(Vector4 backdrop, Vector4 source, float opacity) { - source.W *= opacity; + source = WithW(source, source * opacity); return In(backdrop, source); } + /// + /// Returns the result of the "SubtractSrcIn" compositing equation. + /// + /// The backdrop vector. + /// The source vector. + /// The source opacity. Range 0..1 + /// The . + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public static Vector256 SubtractSrcIn(Vector256 backdrop, Vector256 source, Vector256 opacity) + => In(backdrop, Avx.Blend(source, Avx.Multiply(source, opacity), BlendAlphaControl)); + /// /// Returns the result of the "SubtractSrcOut" compositing equation. /// @@ -1266,11 +1770,22 @@ internal static partial class PorterDuffFunctions [MethodImpl(MethodImplOptions.AggressiveInlining)] public static Vector4 SubtractSrcOut(Vector4 backdrop, Vector4 source, float opacity) { - source.W *= opacity; + source = WithW(source, source * opacity); return Out(backdrop, source); } + /// + /// Returns the result of the "SubtractSrcOut" compositing equation. + /// + /// The backdrop vector. + /// The source vector. + /// The source opacity. Range 0..1 + /// The . + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public static Vector256 SubtractSrcOut(Vector256 backdrop, Vector256 source, Vector256 opacity) + => Out(backdrop, Avx.Blend(source, Avx.Multiply(source, opacity), BlendAlphaControl)); + /// /// Returns the result of the "SubtractDest" compositing equation. /// @@ -1284,6 +1799,19 @@ internal static partial class PorterDuffFunctions return backdrop; } + /// + /// Returns the result of the "SubtractDest" compositing equation. + /// + /// The backdrop vector. + /// The source vector. + /// The source opacity. Range 0..1 + /// The . + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public static Vector256 SubtractDest(Vector256 backdrop, Vector256 source, Vector256 opacity) + { + return backdrop; + } + /// /// Returns the result of the "SubtractDestAtop" compositing equation. /// @@ -1294,7 +1822,22 @@ internal static partial class PorterDuffFunctions [MethodImpl(MethodImplOptions.AggressiveInlining)] public static Vector4 SubtractDestAtop(Vector4 backdrop, Vector4 source, float opacity) { - source.W *= opacity; + source = WithW(source, source * opacity); + + return Atop(source, backdrop, Subtract(source, backdrop)); + } + + /// + /// Returns the result of the "SubtractDestAtop" compositing equation. + /// + /// The backdrop vector. + /// The source vector. + /// The source opacity. Range 0..1 + /// The . + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public static Vector256 SubtractDestAtop(Vector256 backdrop, Vector256 source, Vector256 opacity) + { + source = Avx.Blend(source, Avx.Multiply(source, opacity), BlendAlphaControl); return Atop(source, backdrop, Subtract(source, backdrop)); } @@ -1309,7 +1852,22 @@ internal static partial class PorterDuffFunctions [MethodImpl(MethodImplOptions.AggressiveInlining)] public static Vector4 SubtractDestOver(Vector4 backdrop, Vector4 source, float opacity) { - source.W *= opacity; + source = WithW(source, source * opacity); + + return Over(source, backdrop, Subtract(source, backdrop)); + } + + /// + /// Returns the result of the "SubtractDestOver" compositing equation. + /// + /// The backdrop vector. + /// The source vector. + /// The source opacity. Range 0..1 + /// The . + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public static Vector256 SubtractDestOver(Vector256 backdrop, Vector256 source, Vector256 opacity) + { + source = Avx.Blend(source, Avx.Multiply(source, opacity), BlendAlphaControl); return Over(source, backdrop, Subtract(source, backdrop)); } @@ -1324,11 +1882,22 @@ internal static partial class PorterDuffFunctions [MethodImpl(MethodImplOptions.AggressiveInlining)] public static Vector4 SubtractDestIn(Vector4 backdrop, Vector4 source, float opacity) { - source.W *= opacity; + source = WithW(source, source * opacity); return In(source, backdrop); } + /// + /// Returns the result of the "SubtractDestIn" compositing equation. + /// + /// The backdrop vector. + /// The source vector. + /// The source opacity. Range 0..1 + /// The . + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public static Vector256 SubtractDestIn(Vector256 backdrop, Vector256 source, Vector256 opacity) + => In(Avx.Blend(source, Avx.Multiply(source, opacity), BlendAlphaControl), backdrop); + /// /// Returns the result of the "SubtractDestOut" compositing equation. /// @@ -1339,11 +1908,22 @@ internal static partial class PorterDuffFunctions [MethodImpl(MethodImplOptions.AggressiveInlining)] public static Vector4 SubtractDestOut(Vector4 backdrop, Vector4 source, float opacity) { - source.W *= opacity; + source = WithW(source, source * opacity); return Out(source, backdrop); } + /// + /// Returns the result of the "SubtractDestOut" compositing equation. + /// + /// The backdrop vector. + /// The source vector. + /// The source opacity. Range 0..1 + /// The . + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public static Vector256 SubtractDestOut(Vector256 backdrop, Vector256 source, Vector256 opacity) + => Out(Avx.Blend(source, Avx.Multiply(source, opacity), BlendAlphaControl), backdrop); + /// /// Returns the result of the "SubtractXor" compositing equation. /// @@ -1354,11 +1934,22 @@ internal static partial class PorterDuffFunctions [MethodImpl(MethodImplOptions.AggressiveInlining)] public static Vector4 SubtractXor(Vector4 backdrop, Vector4 source, float opacity) { - source.W *= opacity; + source = WithW(source, source * opacity); return Xor(backdrop, source); } + /// + /// Returns the result of the "SubtractXor" compositing equation. + /// + /// The backdrop vector. + /// The source vector. + /// The source opacity. Range 0..1 + /// The . + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public static Vector256 SubtractXor(Vector256 backdrop, Vector256 source, Vector256 opacity) + => Xor(backdrop, Avx.Blend(source, Avx.Multiply(source, opacity), BlendAlphaControl)); + /// /// Returns the result of the "SubtractClear" compositing equation. /// @@ -1369,11 +1960,22 @@ internal static partial class PorterDuffFunctions [MethodImpl(MethodImplOptions.AggressiveInlining)] public static Vector4 SubtractClear(Vector4 backdrop, Vector4 source, float opacity) { - source.W *= opacity; + source = WithW(source, source * opacity); return Clear(backdrop, source); } + /// + /// Returns the result of the "SubtractClear" compositing equation. + /// + /// The backdrop vector. + /// The source vector. + /// The source opacity. Range 0..1 + /// The . + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public static Vector256 SubtractClear(Vector256 backdrop, Vector256 source, Vector256 opacity) + => Clear(backdrop, Avx.Blend(source, Avx.Multiply(source, opacity), BlendAlphaControl)); + /// /// Returns the result of the "SubtractSrc" compositing equation. @@ -1601,11 +2203,22 @@ internal static partial class PorterDuffFunctions [MethodImpl(MethodImplOptions.AggressiveInlining)] public static Vector4 ScreenSrc(Vector4 backdrop, Vector4 source, float opacity) { - source.W *= opacity; + source = WithW(source, source * opacity); return source; } + /// + /// Returns the result of the "ScreenSrc compositing equation. + /// + /// The backdrop vector. + /// The source vector. + /// The source opacity. Range 0..1 + /// The . + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public static Vector256 ScreenSrc(Vector256 backdrop, Vector256 source, Vector256 opacity) + => Avx.Blend(source, Avx.Multiply(source, opacity), BlendAlphaControl); + /// /// Returns the result of the "ScreenSrcAtop" compositing equation. /// @@ -1616,7 +2229,22 @@ internal static partial class PorterDuffFunctions [MethodImpl(MethodImplOptions.AggressiveInlining)] public static Vector4 ScreenSrcAtop(Vector4 backdrop, Vector4 source, float opacity) { - source.W *= opacity; + source = WithW(source, source * opacity); + + return Atop(backdrop, source, Screen(backdrop, source)); + } + + /// + /// Returns the result of the "ScreenSrcAtop" compositing equation. + /// + /// The backdrop vector. + /// The source vector. + /// The source opacity. Range 0..1 + /// The . + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public static Vector256 ScreenSrcAtop(Vector256 backdrop, Vector256 source, Vector256 opacity) + { + source = Avx.Blend(source, Avx.Multiply(source, opacity), BlendAlphaControl); return Atop(backdrop, source, Screen(backdrop, source)); } @@ -1631,7 +2259,22 @@ internal static partial class PorterDuffFunctions [MethodImpl(MethodImplOptions.AggressiveInlining)] public static Vector4 ScreenSrcOver(Vector4 backdrop, Vector4 source, float opacity) { - source.W *= opacity; + source = WithW(source, source * opacity); + + return Over(backdrop, source, Screen(backdrop, source)); + } + + /// + /// Returns the result of the "ScreenSrcOver" compositing equation. + /// + /// The backdrop vector. + /// The source vector. + /// The source opacity. Range 0..1 + /// The . + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public static Vector256 ScreenSrcOver(Vector256 backdrop, Vector256 source, Vector256 opacity) + { + source = Avx.Blend(source, Avx.Multiply(source, opacity), BlendAlphaControl); return Over(backdrop, source, Screen(backdrop, source)); } @@ -1646,11 +2289,22 @@ internal static partial class PorterDuffFunctions [MethodImpl(MethodImplOptions.AggressiveInlining)] public static Vector4 ScreenSrcIn(Vector4 backdrop, Vector4 source, float opacity) { - source.W *= opacity; + source = WithW(source, source * opacity); return In(backdrop, source); } + /// + /// Returns the result of the "ScreenSrcIn" compositing equation. + /// + /// The backdrop vector. + /// The source vector. + /// The source opacity. Range 0..1 + /// The . + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public static Vector256 ScreenSrcIn(Vector256 backdrop, Vector256 source, Vector256 opacity) + => In(backdrop, Avx.Blend(source, Avx.Multiply(source, opacity), BlendAlphaControl)); + /// /// Returns the result of the "ScreenSrcOut" compositing equation. /// @@ -1661,11 +2315,22 @@ internal static partial class PorterDuffFunctions [MethodImpl(MethodImplOptions.AggressiveInlining)] public static Vector4 ScreenSrcOut(Vector4 backdrop, Vector4 source, float opacity) { - source.W *= opacity; + source = WithW(source, source * opacity); return Out(backdrop, source); } + /// + /// Returns the result of the "ScreenSrcOut" compositing equation. + /// + /// The backdrop vector. + /// The source vector. + /// The source opacity. Range 0..1 + /// The . + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public static Vector256 ScreenSrcOut(Vector256 backdrop, Vector256 source, Vector256 opacity) + => Out(backdrop, Avx.Blend(source, Avx.Multiply(source, opacity), BlendAlphaControl)); + /// /// Returns the result of the "ScreenDest" compositing equation. /// @@ -1679,19 +2344,62 @@ internal static partial class PorterDuffFunctions return backdrop; } + /// + /// Returns the result of the "ScreenDest" compositing equation. + /// + /// The backdrop vector. + /// The source vector. + /// The source opacity. Range 0..1 + /// The . + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public static Vector256 ScreenDest(Vector256 backdrop, Vector256 source, Vector256 opacity) + { + return backdrop; + } + + /// + /// Returns the result of the "ScreenDestAtop" compositing equation. + /// + /// The backdrop vector. + /// The source vector. + /// The source opacity. Range 0..1 + /// The . + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public static Vector4 ScreenDestAtop(Vector4 backdrop, Vector4 source, float opacity) + { + source = WithW(source, source * opacity); + + return Atop(source, backdrop, Screen(source, backdrop)); + } + /// /// Returns the result of the "ScreenDestAtop" compositing equation. /// /// The backdrop vector. /// The source vector. /// The source opacity. Range 0..1 + /// The . + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public static Vector256 ScreenDestAtop(Vector256 backdrop, Vector256 source, Vector256 opacity) + { + source = Avx.Blend(source, Avx.Multiply(source, opacity), BlendAlphaControl); + + return Atop(source, backdrop, Screen(source, backdrop)); + } + + /// + /// Returns the result of the "ScreenDestOver" compositing equation. + /// + /// The backdrop vector. + /// The source vector. + /// The source opacity. Range 0..1 /// The . [MethodImpl(MethodImplOptions.AggressiveInlining)] - public static Vector4 ScreenDestAtop(Vector4 backdrop, Vector4 source, float opacity) + public static Vector4 ScreenDestOver(Vector4 backdrop, Vector4 source, float opacity) { - source.W *= opacity; + source = WithW(source, source * opacity); - return Atop(source, backdrop, Screen(source, backdrop)); + return Over(source, backdrop, Screen(source, backdrop)); } /// @@ -1700,11 +2408,11 @@ internal static partial class PorterDuffFunctions /// The backdrop vector. /// The source vector. /// The source opacity. Range 0..1 - /// The . + /// The . [MethodImpl(MethodImplOptions.AggressiveInlining)] - public static Vector4 ScreenDestOver(Vector4 backdrop, Vector4 source, float opacity) + public static Vector256 ScreenDestOver(Vector256 backdrop, Vector256 source, Vector256 opacity) { - source.W *= opacity; + source = Avx.Blend(source, Avx.Multiply(source, opacity), BlendAlphaControl); return Over(source, backdrop, Screen(source, backdrop)); } @@ -1719,11 +2427,22 @@ internal static partial class PorterDuffFunctions [MethodImpl(MethodImplOptions.AggressiveInlining)] public static Vector4 ScreenDestIn(Vector4 backdrop, Vector4 source, float opacity) { - source.W *= opacity; + source = WithW(source, source * opacity); return In(source, backdrop); } + /// + /// Returns the result of the "ScreenDestIn" compositing equation. + /// + /// The backdrop vector. + /// The source vector. + /// The source opacity. Range 0..1 + /// The . + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public static Vector256 ScreenDestIn(Vector256 backdrop, Vector256 source, Vector256 opacity) + => In(Avx.Blend(source, Avx.Multiply(source, opacity), BlendAlphaControl), backdrop); + /// /// Returns the result of the "ScreenDestOut" compositing equation. /// @@ -1734,11 +2453,22 @@ internal static partial class PorterDuffFunctions [MethodImpl(MethodImplOptions.AggressiveInlining)] public static Vector4 ScreenDestOut(Vector4 backdrop, Vector4 source, float opacity) { - source.W *= opacity; + source = WithW(source, source * opacity); return Out(source, backdrop); } + /// + /// Returns the result of the "ScreenDestOut" compositing equation. + /// + /// The backdrop vector. + /// The source vector. + /// The source opacity. Range 0..1 + /// The . + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public static Vector256 ScreenDestOut(Vector256 backdrop, Vector256 source, Vector256 opacity) + => Out(Avx.Blend(source, Avx.Multiply(source, opacity), BlendAlphaControl), backdrop); + /// /// Returns the result of the "ScreenXor" compositing equation. /// @@ -1749,11 +2479,22 @@ internal static partial class PorterDuffFunctions [MethodImpl(MethodImplOptions.AggressiveInlining)] public static Vector4 ScreenXor(Vector4 backdrop, Vector4 source, float opacity) { - source.W *= opacity; + source = WithW(source, source * opacity); return Xor(backdrop, source); } + /// + /// Returns the result of the "ScreenXor" compositing equation. + /// + /// The backdrop vector. + /// The source vector. + /// The source opacity. Range 0..1 + /// The . + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public static Vector256 ScreenXor(Vector256 backdrop, Vector256 source, Vector256 opacity) + => Xor(backdrop, Avx.Blend(source, Avx.Multiply(source, opacity), BlendAlphaControl)); + /// /// Returns the result of the "ScreenClear" compositing equation. /// @@ -1764,11 +2505,22 @@ internal static partial class PorterDuffFunctions [MethodImpl(MethodImplOptions.AggressiveInlining)] public static Vector4 ScreenClear(Vector4 backdrop, Vector4 source, float opacity) { - source.W *= opacity; + source = WithW(source, source * opacity); return Clear(backdrop, source); } + /// + /// Returns the result of the "ScreenClear" compositing equation. + /// + /// The backdrop vector. + /// The source vector. + /// The source opacity. Range 0..1 + /// The . + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public static Vector256 ScreenClear(Vector256 backdrop, Vector256 source, Vector256 opacity) + => Clear(backdrop, Avx.Blend(source, Avx.Multiply(source, opacity), BlendAlphaControl)); + /// /// Returns the result of the "ScreenSrc" compositing equation. @@ -1996,11 +2748,22 @@ internal static partial class PorterDuffFunctions [MethodImpl(MethodImplOptions.AggressiveInlining)] public static Vector4 DarkenSrc(Vector4 backdrop, Vector4 source, float opacity) { - source.W *= opacity; + source = WithW(source, source * opacity); return source; } + /// + /// Returns the result of the "DarkenSrc compositing equation. + /// + /// The backdrop vector. + /// The source vector. + /// The source opacity. Range 0..1 + /// The . + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public static Vector256 DarkenSrc(Vector256 backdrop, Vector256 source, Vector256 opacity) + => Avx.Blend(source, Avx.Multiply(source, opacity), BlendAlphaControl); + /// /// Returns the result of the "DarkenSrcAtop" compositing equation. /// @@ -2011,7 +2774,22 @@ internal static partial class PorterDuffFunctions [MethodImpl(MethodImplOptions.AggressiveInlining)] public static Vector4 DarkenSrcAtop(Vector4 backdrop, Vector4 source, float opacity) { - source.W *= opacity; + source = WithW(source, source * opacity); + + return Atop(backdrop, source, Darken(backdrop, source)); + } + + /// + /// Returns the result of the "DarkenSrcAtop" compositing equation. + /// + /// The backdrop vector. + /// The source vector. + /// The source opacity. Range 0..1 + /// The . + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public static Vector256 DarkenSrcAtop(Vector256 backdrop, Vector256 source, Vector256 opacity) + { + source = Avx.Blend(source, Avx.Multiply(source, opacity), BlendAlphaControl); return Atop(backdrop, source, Darken(backdrop, source)); } @@ -2026,7 +2804,22 @@ internal static partial class PorterDuffFunctions [MethodImpl(MethodImplOptions.AggressiveInlining)] public static Vector4 DarkenSrcOver(Vector4 backdrop, Vector4 source, float opacity) { - source.W *= opacity; + source = WithW(source, source * opacity); + + return Over(backdrop, source, Darken(backdrop, source)); + } + + /// + /// Returns the result of the "DarkenSrcOver" compositing equation. + /// + /// The backdrop vector. + /// The source vector. + /// The source opacity. Range 0..1 + /// The . + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public static Vector256 DarkenSrcOver(Vector256 backdrop, Vector256 source, Vector256 opacity) + { + source = Avx.Blend(source, Avx.Multiply(source, opacity), BlendAlphaControl); return Over(backdrop, source, Darken(backdrop, source)); } @@ -2041,11 +2834,22 @@ internal static partial class PorterDuffFunctions [MethodImpl(MethodImplOptions.AggressiveInlining)] public static Vector4 DarkenSrcIn(Vector4 backdrop, Vector4 source, float opacity) { - source.W *= opacity; + source = WithW(source, source * opacity); return In(backdrop, source); } + /// + /// Returns the result of the "DarkenSrcIn" compositing equation. + /// + /// The backdrop vector. + /// The source vector. + /// The source opacity. Range 0..1 + /// The . + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public static Vector256 DarkenSrcIn(Vector256 backdrop, Vector256 source, Vector256 opacity) + => In(backdrop, Avx.Blend(source, Avx.Multiply(source, opacity), BlendAlphaControl)); + /// /// Returns the result of the "DarkenSrcOut" compositing equation. /// @@ -2056,11 +2860,22 @@ internal static partial class PorterDuffFunctions [MethodImpl(MethodImplOptions.AggressiveInlining)] public static Vector4 DarkenSrcOut(Vector4 backdrop, Vector4 source, float opacity) { - source.W *= opacity; + source = WithW(source, source * opacity); return Out(backdrop, source); } + /// + /// Returns the result of the "DarkenSrcOut" compositing equation. + /// + /// The backdrop vector. + /// The source vector. + /// The source opacity. Range 0..1 + /// The . + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public static Vector256 DarkenSrcOut(Vector256 backdrop, Vector256 source, Vector256 opacity) + => Out(backdrop, Avx.Blend(source, Avx.Multiply(source, opacity), BlendAlphaControl)); + /// /// Returns the result of the "DarkenDest" compositing equation. /// @@ -2074,6 +2889,19 @@ internal static partial class PorterDuffFunctions return backdrop; } + /// + /// Returns the result of the "DarkenDest" compositing equation. + /// + /// The backdrop vector. + /// The source vector. + /// The source opacity. Range 0..1 + /// The . + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public static Vector256 DarkenDest(Vector256 backdrop, Vector256 source, Vector256 opacity) + { + return backdrop; + } + /// /// Returns the result of the "DarkenDestAtop" compositing equation. /// @@ -2084,7 +2912,22 @@ internal static partial class PorterDuffFunctions [MethodImpl(MethodImplOptions.AggressiveInlining)] public static Vector4 DarkenDestAtop(Vector4 backdrop, Vector4 source, float opacity) { - source.W *= opacity; + source = WithW(source, source * opacity); + + return Atop(source, backdrop, Darken(source, backdrop)); + } + + /// + /// Returns the result of the "DarkenDestAtop" compositing equation. + /// + /// The backdrop vector. + /// The source vector. + /// The source opacity. Range 0..1 + /// The . + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public static Vector256 DarkenDestAtop(Vector256 backdrop, Vector256 source, Vector256 opacity) + { + source = Avx.Blend(source, Avx.Multiply(source, opacity), BlendAlphaControl); return Atop(source, backdrop, Darken(source, backdrop)); } @@ -2099,7 +2942,22 @@ internal static partial class PorterDuffFunctions [MethodImpl(MethodImplOptions.AggressiveInlining)] public static Vector4 DarkenDestOver(Vector4 backdrop, Vector4 source, float opacity) { - source.W *= opacity; + source = WithW(source, source * opacity); + + return Over(source, backdrop, Darken(source, backdrop)); + } + + /// + /// Returns the result of the "DarkenDestOver" compositing equation. + /// + /// The backdrop vector. + /// The source vector. + /// The source opacity. Range 0..1 + /// The . + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public static Vector256 DarkenDestOver(Vector256 backdrop, Vector256 source, Vector256 opacity) + { + source = Avx.Blend(source, Avx.Multiply(source, opacity), BlendAlphaControl); return Over(source, backdrop, Darken(source, backdrop)); } @@ -2114,11 +2972,22 @@ internal static partial class PorterDuffFunctions [MethodImpl(MethodImplOptions.AggressiveInlining)] public static Vector4 DarkenDestIn(Vector4 backdrop, Vector4 source, float opacity) { - source.W *= opacity; + source = WithW(source, source * opacity); return In(source, backdrop); } + /// + /// Returns the result of the "DarkenDestIn" compositing equation. + /// + /// The backdrop vector. + /// The source vector. + /// The source opacity. Range 0..1 + /// The . + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public static Vector256 DarkenDestIn(Vector256 backdrop, Vector256 source, Vector256 opacity) + => In(Avx.Blend(source, Avx.Multiply(source, opacity), BlendAlphaControl), backdrop); + /// /// Returns the result of the "DarkenDestOut" compositing equation. /// @@ -2129,11 +2998,22 @@ internal static partial class PorterDuffFunctions [MethodImpl(MethodImplOptions.AggressiveInlining)] public static Vector4 DarkenDestOut(Vector4 backdrop, Vector4 source, float opacity) { - source.W *= opacity; + source = WithW(source, source * opacity); return Out(source, backdrop); } + /// + /// Returns the result of the "DarkenDestOut" compositing equation. + /// + /// The backdrop vector. + /// The source vector. + /// The source opacity. Range 0..1 + /// The . + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public static Vector256 DarkenDestOut(Vector256 backdrop, Vector256 source, Vector256 opacity) + => Out(Avx.Blend(source, Avx.Multiply(source, opacity), BlendAlphaControl), backdrop); + /// /// Returns the result of the "DarkenXor" compositing equation. /// @@ -2144,11 +3024,22 @@ internal static partial class PorterDuffFunctions [MethodImpl(MethodImplOptions.AggressiveInlining)] public static Vector4 DarkenXor(Vector4 backdrop, Vector4 source, float opacity) { - source.W *= opacity; + source = WithW(source, source * opacity); return Xor(backdrop, source); } + /// + /// Returns the result of the "DarkenXor" compositing equation. + /// + /// The backdrop vector. + /// The source vector. + /// The source opacity. Range 0..1 + /// The . + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public static Vector256 DarkenXor(Vector256 backdrop, Vector256 source, Vector256 opacity) + => Xor(backdrop, Avx.Blend(source, Avx.Multiply(source, opacity), BlendAlphaControl)); + /// /// Returns the result of the "DarkenClear" compositing equation. /// @@ -2159,11 +3050,22 @@ internal static partial class PorterDuffFunctions [MethodImpl(MethodImplOptions.AggressiveInlining)] public static Vector4 DarkenClear(Vector4 backdrop, Vector4 source, float opacity) { - source.W *= opacity; + source = WithW(source, source * opacity); return Clear(backdrop, source); } + /// + /// Returns the result of the "DarkenClear" compositing equation. + /// + /// The backdrop vector. + /// The source vector. + /// The source opacity. Range 0..1 + /// The . + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public static Vector256 DarkenClear(Vector256 backdrop, Vector256 source, Vector256 opacity) + => Clear(backdrop, Avx.Blend(source, Avx.Multiply(source, opacity), BlendAlphaControl)); + /// /// Returns the result of the "DarkenSrc" compositing equation. @@ -2391,11 +3293,22 @@ internal static partial class PorterDuffFunctions [MethodImpl(MethodImplOptions.AggressiveInlining)] public static Vector4 LightenSrc(Vector4 backdrop, Vector4 source, float opacity) { - source.W *= opacity; + source = WithW(source, source * opacity); return source; } + /// + /// Returns the result of the "LightenSrc compositing equation. + /// + /// The backdrop vector. + /// The source vector. + /// The source opacity. Range 0..1 + /// The . + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public static Vector256 LightenSrc(Vector256 backdrop, Vector256 source, Vector256 opacity) + => Avx.Blend(source, Avx.Multiply(source, opacity), BlendAlphaControl); + /// /// Returns the result of the "LightenSrcAtop" compositing equation. /// @@ -2406,7 +3319,22 @@ internal static partial class PorterDuffFunctions [MethodImpl(MethodImplOptions.AggressiveInlining)] public static Vector4 LightenSrcAtop(Vector4 backdrop, Vector4 source, float opacity) { - source.W *= opacity; + source = WithW(source, source * opacity); + + return Atop(backdrop, source, Lighten(backdrop, source)); + } + + /// + /// Returns the result of the "LightenSrcAtop" compositing equation. + /// + /// The backdrop vector. + /// The source vector. + /// The source opacity. Range 0..1 + /// The . + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public static Vector256 LightenSrcAtop(Vector256 backdrop, Vector256 source, Vector256 opacity) + { + source = Avx.Blend(source, Avx.Multiply(source, opacity), BlendAlphaControl); return Atop(backdrop, source, Lighten(backdrop, source)); } @@ -2421,7 +3349,22 @@ internal static partial class PorterDuffFunctions [MethodImpl(MethodImplOptions.AggressiveInlining)] public static Vector4 LightenSrcOver(Vector4 backdrop, Vector4 source, float opacity) { - source.W *= opacity; + source = WithW(source, source * opacity); + + return Over(backdrop, source, Lighten(backdrop, source)); + } + + /// + /// Returns the result of the "LightenSrcOver" compositing equation. + /// + /// The backdrop vector. + /// The source vector. + /// The source opacity. Range 0..1 + /// The . + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public static Vector256 LightenSrcOver(Vector256 backdrop, Vector256 source, Vector256 opacity) + { + source = Avx.Blend(source, Avx.Multiply(source, opacity), BlendAlphaControl); return Over(backdrop, source, Lighten(backdrop, source)); } @@ -2436,11 +3379,22 @@ internal static partial class PorterDuffFunctions [MethodImpl(MethodImplOptions.AggressiveInlining)] public static Vector4 LightenSrcIn(Vector4 backdrop, Vector4 source, float opacity) { - source.W *= opacity; + source = WithW(source, source * opacity); return In(backdrop, source); } + /// + /// Returns the result of the "LightenSrcIn" compositing equation. + /// + /// The backdrop vector. + /// The source vector. + /// The source opacity. Range 0..1 + /// The . + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public static Vector256 LightenSrcIn(Vector256 backdrop, Vector256 source, Vector256 opacity) + => In(backdrop, Avx.Blend(source, Avx.Multiply(source, opacity), BlendAlphaControl)); + /// /// Returns the result of the "LightenSrcOut" compositing equation. /// @@ -2451,11 +3405,22 @@ internal static partial class PorterDuffFunctions [MethodImpl(MethodImplOptions.AggressiveInlining)] public static Vector4 LightenSrcOut(Vector4 backdrop, Vector4 source, float opacity) { - source.W *= opacity; + source = WithW(source, source * opacity); return Out(backdrop, source); } + /// + /// Returns the result of the "LightenSrcOut" compositing equation. + /// + /// The backdrop vector. + /// The source vector. + /// The source opacity. Range 0..1 + /// The . + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public static Vector256 LightenSrcOut(Vector256 backdrop, Vector256 source, Vector256 opacity) + => Out(backdrop, Avx.Blend(source, Avx.Multiply(source, opacity), BlendAlphaControl)); + /// /// Returns the result of the "LightenDest" compositing equation. /// @@ -2469,6 +3434,19 @@ internal static partial class PorterDuffFunctions return backdrop; } + /// + /// Returns the result of the "LightenDest" compositing equation. + /// + /// The backdrop vector. + /// The source vector. + /// The source opacity. Range 0..1 + /// The . + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public static Vector256 LightenDest(Vector256 backdrop, Vector256 source, Vector256 opacity) + { + return backdrop; + } + /// /// Returns the result of the "LightenDestAtop" compositing equation. /// @@ -2479,7 +3457,22 @@ internal static partial class PorterDuffFunctions [MethodImpl(MethodImplOptions.AggressiveInlining)] public static Vector4 LightenDestAtop(Vector4 backdrop, Vector4 source, float opacity) { - source.W *= opacity; + source = WithW(source, source * opacity); + + return Atop(source, backdrop, Lighten(source, backdrop)); + } + + /// + /// Returns the result of the "LightenDestAtop" compositing equation. + /// + /// The backdrop vector. + /// The source vector. + /// The source opacity. Range 0..1 + /// The . + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public static Vector256 LightenDestAtop(Vector256 backdrop, Vector256 source, Vector256 opacity) + { + source = Avx.Blend(source, Avx.Multiply(source, opacity), BlendAlphaControl); return Atop(source, backdrop, Lighten(source, backdrop)); } @@ -2494,24 +3487,65 @@ internal static partial class PorterDuffFunctions [MethodImpl(MethodImplOptions.AggressiveInlining)] public static Vector4 LightenDestOver(Vector4 backdrop, Vector4 source, float opacity) { - source.W *= opacity; + source = WithW(source, source * opacity); + + return Over(source, backdrop, Lighten(source, backdrop)); + } + + /// + /// Returns the result of the "LightenDestOver" compositing equation. + /// + /// The backdrop vector. + /// The source vector. + /// The source opacity. Range 0..1 + /// The . + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public static Vector256 LightenDestOver(Vector256 backdrop, Vector256 source, Vector256 opacity) + { + source = Avx.Blend(source, Avx.Multiply(source, opacity), BlendAlphaControl); return Over(source, backdrop, Lighten(source, backdrop)); } /// - /// Returns the result of the "LightenDestIn" compositing equation. + /// Returns the result of the "LightenDestIn" compositing equation. + /// + /// The backdrop vector. + /// The source vector. + /// The source opacity. Range 0..1 + /// The . + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public static Vector4 LightenDestIn(Vector4 backdrop, Vector4 source, float opacity) + { + source = WithW(source, source * opacity); + + return In(source, backdrop); + } + + /// + /// Returns the result of the "LightenDestIn" compositing equation. + /// + /// The backdrop vector. + /// The source vector. + /// The source opacity. Range 0..1 + /// The . + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public static Vector256 LightenDestIn(Vector256 backdrop, Vector256 source, Vector256 opacity) + => In(Avx.Blend(source, Avx.Multiply(source, opacity), BlendAlphaControl), backdrop); + + /// + /// Returns the result of the "LightenDestOut" compositing equation. /// /// The backdrop vector. /// The source vector. /// The source opacity. Range 0..1 /// The . [MethodImpl(MethodImplOptions.AggressiveInlining)] - public static Vector4 LightenDestIn(Vector4 backdrop, Vector4 source, float opacity) + public static Vector4 LightenDestOut(Vector4 backdrop, Vector4 source, float opacity) { - source.W *= opacity; + source = WithW(source, source * opacity); - return In(source, backdrop); + return Out(source, backdrop); } /// @@ -2520,14 +3554,10 @@ internal static partial class PorterDuffFunctions /// The backdrop vector. /// The source vector. /// The source opacity. Range 0..1 - /// The . + /// The . [MethodImpl(MethodImplOptions.AggressiveInlining)] - public static Vector4 LightenDestOut(Vector4 backdrop, Vector4 source, float opacity) - { - source.W *= opacity; - - return Out(source, backdrop); - } + public static Vector256 LightenDestOut(Vector256 backdrop, Vector256 source, Vector256 opacity) + => Out(Avx.Blend(source, Avx.Multiply(source, opacity), BlendAlphaControl), backdrop); /// /// Returns the result of the "LightenXor" compositing equation. @@ -2539,11 +3569,22 @@ internal static partial class PorterDuffFunctions [MethodImpl(MethodImplOptions.AggressiveInlining)] public static Vector4 LightenXor(Vector4 backdrop, Vector4 source, float opacity) { - source.W *= opacity; + source = WithW(source, source * opacity); return Xor(backdrop, source); } + /// + /// Returns the result of the "LightenXor" compositing equation. + /// + /// The backdrop vector. + /// The source vector. + /// The source opacity. Range 0..1 + /// The . + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public static Vector256 LightenXor(Vector256 backdrop, Vector256 source, Vector256 opacity) + => Xor(backdrop, Avx.Blend(source, Avx.Multiply(source, opacity), BlendAlphaControl)); + /// /// Returns the result of the "LightenClear" compositing equation. /// @@ -2554,11 +3595,22 @@ internal static partial class PorterDuffFunctions [MethodImpl(MethodImplOptions.AggressiveInlining)] public static Vector4 LightenClear(Vector4 backdrop, Vector4 source, float opacity) { - source.W *= opacity; + source = WithW(source, source * opacity); return Clear(backdrop, source); } + /// + /// Returns the result of the "LightenClear" compositing equation. + /// + /// The backdrop vector. + /// The source vector. + /// The source opacity. Range 0..1 + /// The . + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public static Vector256 LightenClear(Vector256 backdrop, Vector256 source, Vector256 opacity) + => Clear(backdrop, Avx.Blend(source, Avx.Multiply(source, opacity), BlendAlphaControl)); + /// /// Returns the result of the "LightenSrc" compositing equation. @@ -2786,11 +3838,22 @@ internal static partial class PorterDuffFunctions [MethodImpl(MethodImplOptions.AggressiveInlining)] public static Vector4 OverlaySrc(Vector4 backdrop, Vector4 source, float opacity) { - source.W *= opacity; + source = WithW(source, source * opacity); return source; } + /// + /// Returns the result of the "OverlaySrc compositing equation. + /// + /// The backdrop vector. + /// The source vector. + /// The source opacity. Range 0..1 + /// The . + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public static Vector256 OverlaySrc(Vector256 backdrop, Vector256 source, Vector256 opacity) + => Avx.Blend(source, Avx.Multiply(source, opacity), BlendAlphaControl); + /// /// Returns the result of the "OverlaySrcAtop" compositing equation. /// @@ -2801,7 +3864,22 @@ internal static partial class PorterDuffFunctions [MethodImpl(MethodImplOptions.AggressiveInlining)] public static Vector4 OverlaySrcAtop(Vector4 backdrop, Vector4 source, float opacity) { - source.W *= opacity; + source = WithW(source, source * opacity); + + return Atop(backdrop, source, Overlay(backdrop, source)); + } + + /// + /// Returns the result of the "OverlaySrcAtop" compositing equation. + /// + /// The backdrop vector. + /// The source vector. + /// The source opacity. Range 0..1 + /// The . + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public static Vector256 OverlaySrcAtop(Vector256 backdrop, Vector256 source, Vector256 opacity) + { + source = Avx.Blend(source, Avx.Multiply(source, opacity), BlendAlphaControl); return Atop(backdrop, source, Overlay(backdrop, source)); } @@ -2816,7 +3894,22 @@ internal static partial class PorterDuffFunctions [MethodImpl(MethodImplOptions.AggressiveInlining)] public static Vector4 OverlaySrcOver(Vector4 backdrop, Vector4 source, float opacity) { - source.W *= opacity; + source = WithW(source, source * opacity); + + return Over(backdrop, source, Overlay(backdrop, source)); + } + + /// + /// Returns the result of the "OverlaySrcOver" compositing equation. + /// + /// The backdrop vector. + /// The source vector. + /// The source opacity. Range 0..1 + /// The . + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public static Vector256 OverlaySrcOver(Vector256 backdrop, Vector256 source, Vector256 opacity) + { + source = Avx.Blend(source, Avx.Multiply(source, opacity), BlendAlphaControl); return Over(backdrop, source, Overlay(backdrop, source)); } @@ -2831,11 +3924,22 @@ internal static partial class PorterDuffFunctions [MethodImpl(MethodImplOptions.AggressiveInlining)] public static Vector4 OverlaySrcIn(Vector4 backdrop, Vector4 source, float opacity) { - source.W *= opacity; + source = WithW(source, source * opacity); return In(backdrop, source); } + /// + /// Returns the result of the "OverlaySrcIn" compositing equation. + /// + /// The backdrop vector. + /// The source vector. + /// The source opacity. Range 0..1 + /// The . + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public static Vector256 OverlaySrcIn(Vector256 backdrop, Vector256 source, Vector256 opacity) + => In(backdrop, Avx.Blend(source, Avx.Multiply(source, opacity), BlendAlphaControl)); + /// /// Returns the result of the "OverlaySrcOut" compositing equation. /// @@ -2846,11 +3950,22 @@ internal static partial class PorterDuffFunctions [MethodImpl(MethodImplOptions.AggressiveInlining)] public static Vector4 OverlaySrcOut(Vector4 backdrop, Vector4 source, float opacity) { - source.W *= opacity; + source = WithW(source, source * opacity); return Out(backdrop, source); } + /// + /// Returns the result of the "OverlaySrcOut" compositing equation. + /// + /// The backdrop vector. + /// The source vector. + /// The source opacity. Range 0..1 + /// The . + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public static Vector256 OverlaySrcOut(Vector256 backdrop, Vector256 source, Vector256 opacity) + => Out(backdrop, Avx.Blend(source, Avx.Multiply(source, opacity), BlendAlphaControl)); + /// /// Returns the result of the "OverlayDest" compositing equation. /// @@ -2864,6 +3979,19 @@ internal static partial class PorterDuffFunctions return backdrop; } + /// + /// Returns the result of the "OverlayDest" compositing equation. + /// + /// The backdrop vector. + /// The source vector. + /// The source opacity. Range 0..1 + /// The . + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public static Vector256 OverlayDest(Vector256 backdrop, Vector256 source, Vector256 opacity) + { + return backdrop; + } + /// /// Returns the result of the "OverlayDestAtop" compositing equation. /// @@ -2874,7 +4002,22 @@ internal static partial class PorterDuffFunctions [MethodImpl(MethodImplOptions.AggressiveInlining)] public static Vector4 OverlayDestAtop(Vector4 backdrop, Vector4 source, float opacity) { - source.W *= opacity; + source = WithW(source, source * opacity); + + return Atop(source, backdrop, Overlay(source, backdrop)); + } + + /// + /// Returns the result of the "OverlayDestAtop" compositing equation. + /// + /// The backdrop vector. + /// The source vector. + /// The source opacity. Range 0..1 + /// The . + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public static Vector256 OverlayDestAtop(Vector256 backdrop, Vector256 source, Vector256 opacity) + { + source = Avx.Blend(source, Avx.Multiply(source, opacity), BlendAlphaControl); return Atop(source, backdrop, Overlay(source, backdrop)); } @@ -2889,7 +4032,22 @@ internal static partial class PorterDuffFunctions [MethodImpl(MethodImplOptions.AggressiveInlining)] public static Vector4 OverlayDestOver(Vector4 backdrop, Vector4 source, float opacity) { - source.W *= opacity; + source = WithW(source, source * opacity); + + return Over(source, backdrop, Overlay(source, backdrop)); + } + + /// + /// Returns the result of the "OverlayDestOver" compositing equation. + /// + /// The backdrop vector. + /// The source vector. + /// The source opacity. Range 0..1 + /// The . + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public static Vector256 OverlayDestOver(Vector256 backdrop, Vector256 source, Vector256 opacity) + { + source = Avx.Blend(source, Avx.Multiply(source, opacity), BlendAlphaControl); return Over(source, backdrop, Overlay(source, backdrop)); } @@ -2904,11 +4062,22 @@ internal static partial class PorterDuffFunctions [MethodImpl(MethodImplOptions.AggressiveInlining)] public static Vector4 OverlayDestIn(Vector4 backdrop, Vector4 source, float opacity) { - source.W *= opacity; + source = WithW(source, source * opacity); return In(source, backdrop); } + /// + /// Returns the result of the "OverlayDestIn" compositing equation. + /// + /// The backdrop vector. + /// The source vector. + /// The source opacity. Range 0..1 + /// The . + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public static Vector256 OverlayDestIn(Vector256 backdrop, Vector256 source, Vector256 opacity) + => In(Avx.Blend(source, Avx.Multiply(source, opacity), BlendAlphaControl), backdrop); + /// /// Returns the result of the "OverlayDestOut" compositing equation. /// @@ -2919,11 +4088,22 @@ internal static partial class PorterDuffFunctions [MethodImpl(MethodImplOptions.AggressiveInlining)] public static Vector4 OverlayDestOut(Vector4 backdrop, Vector4 source, float opacity) { - source.W *= opacity; + source = WithW(source, source * opacity); return Out(source, backdrop); } + /// + /// Returns the result of the "OverlayDestOut" compositing equation. + /// + /// The backdrop vector. + /// The source vector. + /// The source opacity. Range 0..1 + /// The . + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public static Vector256 OverlayDestOut(Vector256 backdrop, Vector256 source, Vector256 opacity) + => Out(Avx.Blend(source, Avx.Multiply(source, opacity), BlendAlphaControl), backdrop); + /// /// Returns the result of the "OverlayXor" compositing equation. /// @@ -2934,11 +4114,22 @@ internal static partial class PorterDuffFunctions [MethodImpl(MethodImplOptions.AggressiveInlining)] public static Vector4 OverlayXor(Vector4 backdrop, Vector4 source, float opacity) { - source.W *= opacity; + source = WithW(source, source * opacity); return Xor(backdrop, source); } + /// + /// Returns the result of the "OverlayXor" compositing equation. + /// + /// The backdrop vector. + /// The source vector. + /// The source opacity. Range 0..1 + /// The . + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public static Vector256 OverlayXor(Vector256 backdrop, Vector256 source, Vector256 opacity) + => Xor(backdrop, Avx.Blend(source, Avx.Multiply(source, opacity), BlendAlphaControl)); + /// /// Returns the result of the "OverlayClear" compositing equation. /// @@ -2949,11 +4140,22 @@ internal static partial class PorterDuffFunctions [MethodImpl(MethodImplOptions.AggressiveInlining)] public static Vector4 OverlayClear(Vector4 backdrop, Vector4 source, float opacity) { - source.W *= opacity; + source = WithW(source, source * opacity); return Clear(backdrop, source); } + /// + /// Returns the result of the "OverlayClear" compositing equation. + /// + /// The backdrop vector. + /// The source vector. + /// The source opacity. Range 0..1 + /// The . + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public static Vector256 OverlayClear(Vector256 backdrop, Vector256 source, Vector256 opacity) + => Clear(backdrop, Avx.Blend(source, Avx.Multiply(source, opacity), BlendAlphaControl)); + /// /// Returns the result of the "OverlaySrc" compositing equation. @@ -3181,11 +4383,22 @@ internal static partial class PorterDuffFunctions [MethodImpl(MethodImplOptions.AggressiveInlining)] public static Vector4 HardLightSrc(Vector4 backdrop, Vector4 source, float opacity) { - source.W *= opacity; + source = WithW(source, source * opacity); return source; } + /// + /// Returns the result of the "HardLightSrc compositing equation. + /// + /// The backdrop vector. + /// The source vector. + /// The source opacity. Range 0..1 + /// The . + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public static Vector256 HardLightSrc(Vector256 backdrop, Vector256 source, Vector256 opacity) + => Avx.Blend(source, Avx.Multiply(source, opacity), BlendAlphaControl); + /// /// Returns the result of the "HardLightSrcAtop" compositing equation. /// @@ -3196,7 +4409,22 @@ internal static partial class PorterDuffFunctions [MethodImpl(MethodImplOptions.AggressiveInlining)] public static Vector4 HardLightSrcAtop(Vector4 backdrop, Vector4 source, float opacity) { - source.W *= opacity; + source = WithW(source, source * opacity); + + return Atop(backdrop, source, HardLight(backdrop, source)); + } + + /// + /// Returns the result of the "HardLightSrcAtop" compositing equation. + /// + /// The backdrop vector. + /// The source vector. + /// The source opacity. Range 0..1 + /// The . + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public static Vector256 HardLightSrcAtop(Vector256 backdrop, Vector256 source, Vector256 opacity) + { + source = Avx.Blend(source, Avx.Multiply(source, opacity), BlendAlphaControl); return Atop(backdrop, source, HardLight(backdrop, source)); } @@ -3211,7 +4439,22 @@ internal static partial class PorterDuffFunctions [MethodImpl(MethodImplOptions.AggressiveInlining)] public static Vector4 HardLightSrcOver(Vector4 backdrop, Vector4 source, float opacity) { - source.W *= opacity; + source = WithW(source, source * opacity); + + return Over(backdrop, source, HardLight(backdrop, source)); + } + + /// + /// Returns the result of the "HardLightSrcOver" compositing equation. + /// + /// The backdrop vector. + /// The source vector. + /// The source opacity. Range 0..1 + /// The . + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public static Vector256 HardLightSrcOver(Vector256 backdrop, Vector256 source, Vector256 opacity) + { + source = Avx.Blend(source, Avx.Multiply(source, opacity), BlendAlphaControl); return Over(backdrop, source, HardLight(backdrop, source)); } @@ -3226,11 +4469,22 @@ internal static partial class PorterDuffFunctions [MethodImpl(MethodImplOptions.AggressiveInlining)] public static Vector4 HardLightSrcIn(Vector4 backdrop, Vector4 source, float opacity) { - source.W *= opacity; + source = WithW(source, source * opacity); return In(backdrop, source); } + /// + /// Returns the result of the "HardLightSrcIn" compositing equation. + /// + /// The backdrop vector. + /// The source vector. + /// The source opacity. Range 0..1 + /// The . + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public static Vector256 HardLightSrcIn(Vector256 backdrop, Vector256 source, Vector256 opacity) + => In(backdrop, Avx.Blend(source, Avx.Multiply(source, opacity), BlendAlphaControl)); + /// /// Returns the result of the "HardLightSrcOut" compositing equation. /// @@ -3241,11 +4495,22 @@ internal static partial class PorterDuffFunctions [MethodImpl(MethodImplOptions.AggressiveInlining)] public static Vector4 HardLightSrcOut(Vector4 backdrop, Vector4 source, float opacity) { - source.W *= opacity; + source = WithW(source, source * opacity); return Out(backdrop, source); } + /// + /// Returns the result of the "HardLightSrcOut" compositing equation. + /// + /// The backdrop vector. + /// The source vector. + /// The source opacity. Range 0..1 + /// The . + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public static Vector256 HardLightSrcOut(Vector256 backdrop, Vector256 source, Vector256 opacity) + => Out(backdrop, Avx.Blend(source, Avx.Multiply(source, opacity), BlendAlphaControl)); + /// /// Returns the result of the "HardLightDest" compositing equation. /// @@ -3259,6 +4524,19 @@ internal static partial class PorterDuffFunctions return backdrop; } + /// + /// Returns the result of the "HardLightDest" compositing equation. + /// + /// The backdrop vector. + /// The source vector. + /// The source opacity. Range 0..1 + /// The . + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public static Vector256 HardLightDest(Vector256 backdrop, Vector256 source, Vector256 opacity) + { + return backdrop; + } + /// /// Returns the result of the "HardLightDestAtop" compositing equation. /// @@ -3269,7 +4547,22 @@ internal static partial class PorterDuffFunctions [MethodImpl(MethodImplOptions.AggressiveInlining)] public static Vector4 HardLightDestAtop(Vector4 backdrop, Vector4 source, float opacity) { - source.W *= opacity; + source = WithW(source, source * opacity); + + return Atop(source, backdrop, HardLight(source, backdrop)); + } + + /// + /// Returns the result of the "HardLightDestAtop" compositing equation. + /// + /// The backdrop vector. + /// The source vector. + /// The source opacity. Range 0..1 + /// The . + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public static Vector256 HardLightDestAtop(Vector256 backdrop, Vector256 source, Vector256 opacity) + { + source = Avx.Blend(source, Avx.Multiply(source, opacity), BlendAlphaControl); return Atop(source, backdrop, HardLight(source, backdrop)); } @@ -3284,7 +4577,22 @@ internal static partial class PorterDuffFunctions [MethodImpl(MethodImplOptions.AggressiveInlining)] public static Vector4 HardLightDestOver(Vector4 backdrop, Vector4 source, float opacity) { - source.W *= opacity; + source = WithW(source, source * opacity); + + return Over(source, backdrop, HardLight(source, backdrop)); + } + + /// + /// Returns the result of the "HardLightDestOver" compositing equation. + /// + /// The backdrop vector. + /// The source vector. + /// The source opacity. Range 0..1 + /// The . + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public static Vector256 HardLightDestOver(Vector256 backdrop, Vector256 source, Vector256 opacity) + { + source = Avx.Blend(source, Avx.Multiply(source, opacity), BlendAlphaControl); return Over(source, backdrop, HardLight(source, backdrop)); } @@ -3299,11 +4607,22 @@ internal static partial class PorterDuffFunctions [MethodImpl(MethodImplOptions.AggressiveInlining)] public static Vector4 HardLightDestIn(Vector4 backdrop, Vector4 source, float opacity) { - source.W *= opacity; + source = WithW(source, source * opacity); return In(source, backdrop); } + /// + /// Returns the result of the "HardLightDestIn" compositing equation. + /// + /// The backdrop vector. + /// The source vector. + /// The source opacity. Range 0..1 + /// The . + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public static Vector256 HardLightDestIn(Vector256 backdrop, Vector256 source, Vector256 opacity) + => In(Avx.Blend(source, Avx.Multiply(source, opacity), BlendAlphaControl), backdrop); + /// /// Returns the result of the "HardLightDestOut" compositing equation. /// @@ -3314,11 +4633,22 @@ internal static partial class PorterDuffFunctions [MethodImpl(MethodImplOptions.AggressiveInlining)] public static Vector4 HardLightDestOut(Vector4 backdrop, Vector4 source, float opacity) { - source.W *= opacity; + source = WithW(source, source * opacity); return Out(source, backdrop); } + /// + /// Returns the result of the "HardLightDestOut" compositing equation. + /// + /// The backdrop vector. + /// The source vector. + /// The source opacity. Range 0..1 + /// The . + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public static Vector256 HardLightDestOut(Vector256 backdrop, Vector256 source, Vector256 opacity) + => Out(Avx.Blend(source, Avx.Multiply(source, opacity), BlendAlphaControl), backdrop); + /// /// Returns the result of the "HardLightXor" compositing equation. /// @@ -3329,11 +4659,22 @@ internal static partial class PorterDuffFunctions [MethodImpl(MethodImplOptions.AggressiveInlining)] public static Vector4 HardLightXor(Vector4 backdrop, Vector4 source, float opacity) { - source.W *= opacity; + source = WithW(source, source * opacity); return Xor(backdrop, source); } + /// + /// Returns the result of the "HardLightXor" compositing equation. + /// + /// The backdrop vector. + /// The source vector. + /// The source opacity. Range 0..1 + /// The . + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public static Vector256 HardLightXor(Vector256 backdrop, Vector256 source, Vector256 opacity) + => Xor(backdrop, Avx.Blend(source, Avx.Multiply(source, opacity), BlendAlphaControl)); + /// /// Returns the result of the "HardLightClear" compositing equation. /// @@ -3344,11 +4685,22 @@ internal static partial class PorterDuffFunctions [MethodImpl(MethodImplOptions.AggressiveInlining)] public static Vector4 HardLightClear(Vector4 backdrop, Vector4 source, float opacity) { - source.W *= opacity; + source = WithW(source, source * opacity); return Clear(backdrop, source); } + /// + /// Returns the result of the "HardLightClear" compositing equation. + /// + /// The backdrop vector. + /// The source vector. + /// The source opacity. Range 0..1 + /// The . + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public static Vector256 HardLightClear(Vector256 backdrop, Vector256 source, Vector256 opacity) + => Clear(backdrop, Avx.Blend(source, Avx.Multiply(source, opacity), BlendAlphaControl)); + /// /// Returns the result of the "HardLightSrc" compositing equation. diff --git a/src/ImageSharp/PixelFormats/PixelBlenders/PorterDuffFunctions.Generated.tt b/src/ImageSharp/PixelFormats/PixelBlenders/PorterDuffFunctions.Generated.tt index 40d8b8997..69dac875c 100644 --- a/src/ImageSharp/PixelFormats/PixelBlenders/PorterDuffFunctions.Generated.tt +++ b/src/ImageSharp/PixelFormats/PixelBlenders/PorterDuffFunctions.Generated.tt @@ -15,6 +15,8 @@ using System.Numerics; using System.Runtime.CompilerServices; +using System.Runtime.Intrinsics; +using System.Runtime.Intrinsics.X86; namespace SixLabors.ImageSharp.PixelFormats.PixelBlenders; @@ -31,11 +33,22 @@ internal static partial class PorterDuffFunctions [MethodImpl(MethodImplOptions.AggressiveInlining)] public static Vector4 <#=blender#>Src(Vector4 backdrop, Vector4 source, float opacity) { - source.W *= opacity; + source = WithW(source, source * opacity); return source; } + /// + /// Returns the result of the "<#=blender#>Src compositing equation. + /// + /// The backdrop vector. + /// The source vector. + /// The source opacity. Range 0..1 + /// The . + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public static Vector256 <#=blender#>Src(Vector256 backdrop, Vector256 source, Vector256 opacity) + => Avx.Blend(source, Avx.Multiply(source, opacity), BlendAlphaControl); + /// /// Returns the result of the "<#=blender#>SrcAtop" compositing equation. /// @@ -46,7 +59,22 @@ internal static partial class PorterDuffFunctions [MethodImpl(MethodImplOptions.AggressiveInlining)] public static Vector4 <#=blender#>SrcAtop(Vector4 backdrop, Vector4 source, float opacity) { - source.W *= opacity; + source = WithW(source, source * opacity); + + return Atop(backdrop, source, <#=blender#>(backdrop, source)); + } + + /// + /// Returns the result of the "<#=blender#>SrcAtop" compositing equation. + /// + /// The backdrop vector. + /// The source vector. + /// The source opacity. Range 0..1 + /// The . + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public static Vector256 <#=blender#>SrcAtop(Vector256 backdrop, Vector256 source, Vector256 opacity) + { + source = Avx.Blend(source, Avx.Multiply(source, opacity), BlendAlphaControl); return Atop(backdrop, source, <#=blender#>(backdrop, source)); } @@ -61,7 +89,22 @@ internal static partial class PorterDuffFunctions [MethodImpl(MethodImplOptions.AggressiveInlining)] public static Vector4 <#=blender#>SrcOver(Vector4 backdrop, Vector4 source, float opacity) { - source.W *= opacity; + source = WithW(source, source * opacity); + + return Over(backdrop, source, <#=blender#>(backdrop, source)); + } + + /// + /// Returns the result of the "<#=blender#>SrcOver" compositing equation. + /// + /// The backdrop vector. + /// The source vector. + /// The source opacity. Range 0..1 + /// The . + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public static Vector256 <#=blender#>SrcOver(Vector256 backdrop, Vector256 source, Vector256 opacity) + { + source = Avx.Blend(source, Avx.Multiply(source, opacity), BlendAlphaControl); return Over(backdrop, source, <#=blender#>(backdrop, source)); } @@ -76,11 +119,22 @@ internal static partial class PorterDuffFunctions [MethodImpl(MethodImplOptions.AggressiveInlining)] public static Vector4 <#=blender#>SrcIn(Vector4 backdrop, Vector4 source, float opacity) { - source.W *= opacity; + source = WithW(source, source * opacity); return In(backdrop, source); } + /// + /// Returns the result of the "<#=blender#>SrcIn" compositing equation. + /// + /// The backdrop vector. + /// The source vector. + /// The source opacity. Range 0..1 + /// The . + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public static Vector256 <#=blender#>SrcIn(Vector256 backdrop, Vector256 source, Vector256 opacity) + => In(backdrop, Avx.Blend(source, Avx.Multiply(source, opacity), BlendAlphaControl)); + /// /// Returns the result of the "<#=blender#>SrcOut" compositing equation. /// @@ -91,11 +145,22 @@ internal static partial class PorterDuffFunctions [MethodImpl(MethodImplOptions.AggressiveInlining)] public static Vector4 <#=blender#>SrcOut(Vector4 backdrop, Vector4 source, float opacity) { - source.W *= opacity; + source = WithW(source, source * opacity); return Out(backdrop, source); } + /// + /// Returns the result of the "<#=blender#>SrcOut" compositing equation. + /// + /// The backdrop vector. + /// The source vector. + /// The source opacity. Range 0..1 + /// The . + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public static Vector256 <#=blender#>SrcOut(Vector256 backdrop, Vector256 source, Vector256 opacity) + => Out(backdrop, Avx.Blend(source, Avx.Multiply(source, opacity), BlendAlphaControl)); + /// /// Returns the result of the "<#=blender#>Dest" compositing equation. /// @@ -109,6 +174,19 @@ internal static partial class PorterDuffFunctions return backdrop; } + /// + /// Returns the result of the "<#=blender#>Dest" compositing equation. + /// + /// The backdrop vector. + /// The source vector. + /// The source opacity. Range 0..1 + /// The . + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public static Vector256 <#=blender#>Dest(Vector256 backdrop, Vector256 source, Vector256 opacity) + { + return backdrop; + } + /// /// Returns the result of the "<#=blender#>DestAtop" compositing equation. /// @@ -119,7 +197,22 @@ internal static partial class PorterDuffFunctions [MethodImpl(MethodImplOptions.AggressiveInlining)] public static Vector4 <#=blender#>DestAtop(Vector4 backdrop, Vector4 source, float opacity) { - source.W *= opacity; + source = WithW(source, source * opacity); + + return Atop(source, backdrop, <#=blender#>(source, backdrop)); + } + + /// + /// Returns the result of the "<#=blender#>DestAtop" compositing equation. + /// + /// The backdrop vector. + /// The source vector. + /// The source opacity. Range 0..1 + /// The . + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public static Vector256 <#=blender#>DestAtop(Vector256 backdrop, Vector256 source, Vector256 opacity) + { + source = Avx.Blend(source, Avx.Multiply(source, opacity), BlendAlphaControl); return Atop(source, backdrop, <#=blender#>(source, backdrop)); } @@ -134,7 +227,22 @@ internal static partial class PorterDuffFunctions [MethodImpl(MethodImplOptions.AggressiveInlining)] public static Vector4 <#=blender#>DestOver(Vector4 backdrop, Vector4 source, float opacity) { - source.W *= opacity; + source = WithW(source, source * opacity); + + return Over(source, backdrop, <#=blender#>(source, backdrop)); + } + + /// + /// Returns the result of the "<#=blender#>DestOver" compositing equation. + /// + /// The backdrop vector. + /// The source vector. + /// The source opacity. Range 0..1 + /// The . + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public static Vector256 <#=blender#>DestOver(Vector256 backdrop, Vector256 source, Vector256 opacity) + { + source = Avx.Blend(source, Avx.Multiply(source, opacity), BlendAlphaControl); return Over(source, backdrop, <#=blender#>(source, backdrop)); } @@ -149,11 +257,22 @@ internal static partial class PorterDuffFunctions [MethodImpl(MethodImplOptions.AggressiveInlining)] public static Vector4 <#=blender#>DestIn(Vector4 backdrop, Vector4 source, float opacity) { - source.W *= opacity; + source = WithW(source, source * opacity); return In(source, backdrop); } + /// + /// Returns the result of the "<#=blender#>DestIn" compositing equation. + /// + /// The backdrop vector. + /// The source vector. + /// The source opacity. Range 0..1 + /// The . + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public static Vector256 <#=blender#>DestIn(Vector256 backdrop, Vector256 source, Vector256 opacity) + => In(Avx.Blend(source, Avx.Multiply(source, opacity), BlendAlphaControl), backdrop); + /// /// Returns the result of the "<#=blender#>DestOut" compositing equation. /// @@ -164,11 +283,22 @@ internal static partial class PorterDuffFunctions [MethodImpl(MethodImplOptions.AggressiveInlining)] public static Vector4 <#=blender#>DestOut(Vector4 backdrop, Vector4 source, float opacity) { - source.W *= opacity; + source = WithW(source, source * opacity); return Out(source, backdrop); } + /// + /// Returns the result of the "<#=blender#>DestOut" compositing equation. + /// + /// The backdrop vector. + /// The source vector. + /// The source opacity. Range 0..1 + /// The . + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public static Vector256 <#=blender#>DestOut(Vector256 backdrop, Vector256 source, Vector256 opacity) + => Out(Avx.Blend(source, Avx.Multiply(source, opacity), BlendAlphaControl), backdrop); + /// /// Returns the result of the "<#=blender#>Xor" compositing equation. /// @@ -179,11 +309,22 @@ internal static partial class PorterDuffFunctions [MethodImpl(MethodImplOptions.AggressiveInlining)] public static Vector4 <#=blender#>Xor(Vector4 backdrop, Vector4 source, float opacity) { - source.W *= opacity; + source = WithW(source, source * opacity); return Xor(backdrop, source); } + /// + /// Returns the result of the "<#=blender#>Xor" compositing equation. + /// + /// The backdrop vector. + /// The source vector. + /// The source opacity. Range 0..1 + /// The . + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public static Vector256 <#=blender#>Xor(Vector256 backdrop, Vector256 source, Vector256 opacity) + => Xor(backdrop, Avx.Blend(source, Avx.Multiply(source, opacity), BlendAlphaControl)); + /// /// Returns the result of the "<#=blender#>Clear" compositing equation. /// @@ -194,11 +335,22 @@ internal static partial class PorterDuffFunctions [MethodImpl(MethodImplOptions.AggressiveInlining)] public static Vector4 <#=blender#>Clear(Vector4 backdrop, Vector4 source, float opacity) { - source.W *= opacity; + source = WithW(source, source * opacity); return Clear(backdrop, source); } + /// + /// Returns the result of the "<#=blender#>Clear" compositing equation. + /// + /// The backdrop vector. + /// The source vector. + /// The source opacity. Range 0..1 + /// The . + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public static Vector256 <#=blender#>Clear(Vector256 backdrop, Vector256 source, Vector256 opacity) + => Clear(backdrop, Avx.Blend(source, Avx.Multiply(source, opacity), BlendAlphaControl)); + <#} #> <# void GenerateGenericPixelBlender(string blender, string composer) { #> diff --git a/src/ImageSharp/PixelFormats/PixelBlenders/PorterDuffFunctions.cs b/src/ImageSharp/PixelFormats/PixelBlenders/PorterDuffFunctions.cs index 9bc7e35f3..baf7d80c0 100644 --- a/src/ImageSharp/PixelFormats/PixelBlenders/PorterDuffFunctions.cs +++ b/src/ImageSharp/PixelFormats/PixelBlenders/PorterDuffFunctions.cs @@ -3,6 +3,8 @@ using System.Numerics; using System.Runtime.CompilerServices; +using System.Runtime.Intrinsics; +using System.Runtime.Intrinsics.X86; namespace SixLabors.ImageSharp.PixelFormats.PixelBlenders; @@ -19,6 +21,9 @@ namespace SixLabors.ImageSharp.PixelFormats.PixelBlenders; /// internal static partial class PorterDuffFunctions { + private const int BlendAlphaControl = 0b_10_00_10_00; + private const int ShuffleAlphaControl = 0b_11_11_11_11; + /// /// Returns the result of the "Normal" compositing equation. /// @@ -27,9 +32,17 @@ internal static partial class PorterDuffFunctions /// The . [MethodImpl(MethodImplOptions.AggressiveInlining)] public static Vector4 Normal(Vector4 backdrop, Vector4 source) - { - return source; - } + => source; + + /// + /// Returns the result of the "Normal" compositing equation. + /// + /// The backdrop vector. + /// The source vector. + /// The . + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public static Vector256 Normal(Vector256 backdrop, Vector256 source) + => source; /// /// Returns the result of the "Multiply" compositing equation. @@ -39,9 +52,17 @@ internal static partial class PorterDuffFunctions /// The . [MethodImpl(MethodImplOptions.AggressiveInlining)] public static Vector4 Multiply(Vector4 backdrop, Vector4 source) - { - return backdrop * source; - } + => backdrop * source; + + /// + /// Returns the result of the "Multiply" compositing equation. + /// + /// The backdrop vector. + /// The source vector. + /// The . + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public static Vector256 Multiply(Vector256 backdrop, Vector256 source) + => Avx.Multiply(backdrop, source); /// /// Returns the result of the "Add" compositing equation. @@ -51,9 +72,17 @@ internal static partial class PorterDuffFunctions /// The . [MethodImpl(MethodImplOptions.AggressiveInlining)] public static Vector4 Add(Vector4 backdrop, Vector4 source) - { - return Vector4.Min(Vector4.One, backdrop + source); - } + => Vector4.Min(Vector4.One, backdrop + source); + + /// + /// Returns the result of the "Add" compositing equation. + /// + /// The backdrop vector. + /// The source vector. + /// The . + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public static Vector256 Add(Vector256 backdrop, Vector256 source) + => Avx.Min(Vector256.Create(1F), Avx.Add(backdrop, source)); /// /// Returns the result of the "Subtract" compositing equation. @@ -63,9 +92,17 @@ internal static partial class PorterDuffFunctions /// The . [MethodImpl(MethodImplOptions.AggressiveInlining)] public static Vector4 Subtract(Vector4 backdrop, Vector4 source) - { - return Vector4.Max(Vector4.Zero, backdrop - source); - } + => Vector4.Max(Vector4.Zero, backdrop - source); + + /// + /// Returns the result of the "Subtract" compositing equation. + /// + /// The backdrop vector. + /// The source vector. + /// The . + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public static Vector256 Subtract(Vector256 backdrop, Vector256 source) + => Avx.Max(Vector256.Zero, Avx.Subtract(backdrop, source)); /// /// Returns the result of the "Screen" compositing equation. @@ -75,8 +112,19 @@ internal static partial class PorterDuffFunctions /// The . [MethodImpl(MethodImplOptions.AggressiveInlining)] public static Vector4 Screen(Vector4 backdrop, Vector4 source) + => Vector4.One - ((Vector4.One - backdrop) * (Vector4.One - source)); + + /// + /// Returns the result of the "Screen" compositing equation. + /// + /// The backdrop vector. + /// The source vector. + /// The . + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public static Vector256 Screen(Vector256 backdrop, Vector256 source) { - return Vector4.One - ((Vector4.One - backdrop) * (Vector4.One - source)); + Vector256 vOne = Vector256.Create(1F); + return SimdUtils.HwIntrinsics.MultiplyAddNegated(Avx.Subtract(vOne, backdrop), Avx.Subtract(vOne, source), vOne); } /// @@ -87,9 +135,17 @@ internal static partial class PorterDuffFunctions /// The . [MethodImpl(MethodImplOptions.AggressiveInlining)] public static Vector4 Darken(Vector4 backdrop, Vector4 source) - { - return Vector4.Min(backdrop, source); - } + => Vector4.Min(backdrop, source); + + /// + /// Returns the result of the "Darken" compositing equation. + /// + /// The backdrop vector. + /// The source vector. + /// The . + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public static Vector256 Darken(Vector256 backdrop, Vector256 source) + => Avx.Min(backdrop, source); /// /// Returns the result of the "Lighten" compositing equation. @@ -98,10 +154,17 @@ internal static partial class PorterDuffFunctions /// The source vector. /// The . [MethodImpl(MethodImplOptions.AggressiveInlining)] - public static Vector4 Lighten(Vector4 backdrop, Vector4 source) - { - return Vector4.Max(backdrop, source); - } + public static Vector4 Lighten(Vector4 backdrop, Vector4 source) => Vector4.Max(backdrop, source); + + /// + /// Returns the result of the "Lighten" compositing equation. + /// + /// The backdrop vector. + /// The source vector. + /// The . + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public static Vector256 Lighten(Vector256 backdrop, Vector256 source) + => Avx.Max(backdrop, source); /// /// Returns the result of the "Overlay" compositing equation. @@ -119,6 +182,19 @@ internal static partial class PorterDuffFunctions return Vector4.Min(Vector4.One, new Vector4(cr, cg, cb, 0)); } + /// + /// Returns the result of the "Overlay" compositing equation. + /// + /// The backdrop vector. + /// The source vector. + /// The . + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public static Vector256 Overlay(Vector256 backdrop, Vector256 source) + { + Vector256 color = OverlayValueFunction(backdrop, source); + return Avx.Min(Vector256.Create(1F), Avx.Blend(color, Vector256.Zero, BlendAlphaControl)); + } + /// /// Returns the result of the "HardLight" compositing equation. /// @@ -136,15 +212,44 @@ internal static partial class PorterDuffFunctions } /// - /// Helper function for Overlay andHardLight modes + /// Returns the result of the "HardLight" compositing equation. + /// + /// The backdrop vector. + /// The source vector. + /// The . + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public static Vector256 HardLight(Vector256 backdrop, Vector256 source) + { + Vector256 color = OverlayValueFunction(source, backdrop); + return Avx.Min(Vector256.Create(1F), Avx.Blend(color, Vector256.Zero, BlendAlphaControl)); + } + + /// + /// Helper function for Overlay and HardLight modes /// /// Backdrop color element /// Source color element /// Overlay value [MethodImpl(MethodImplOptions.AggressiveInlining)] private static float OverlayValueFunction(float backdrop, float source) + => backdrop <= 0.5f ? (2 * backdrop * source) : 1 - (2 * (1 - source) * (1 - backdrop)); + + /// + /// Helper function for Overlay and HardLight modes + /// + /// Backdrop color element + /// Source color element + /// Overlay value + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public static Vector256 OverlayValueFunction(Vector256 backdrop, Vector256 source) { - return backdrop <= 0.5f ? (2 * backdrop * source) : 1 - (2 * (1 - source) * (1 - backdrop)); + Vector256 vOne = Vector256.Create(1F); + Vector256 left = Avx.Multiply(Avx.Add(backdrop, backdrop), source); + + Vector256 vOneMinusSource = Avx.Subtract(vOne, source); + Vector256 right = SimdUtils.HwIntrinsics.MultiplyAddNegated(Avx.Add(vOneMinusSource, vOneMinusSource), Avx.Subtract(vOne, backdrop), vOne); + Vector256 cmp = Avx.CompareGreaterThan(backdrop, Vector256.Create(.5F)); + return Avx.BlendVariable(left, right, cmp); } /// @@ -158,21 +263,53 @@ internal static partial class PorterDuffFunctions public static Vector4 Over(Vector4 destination, Vector4 source, Vector4 blend) { // calculate weights - float blendW = destination.W * source.W; - float dstW = destination.W - blendW; - float srcW = source.W - blendW; + Vector4 sW = PermuteW(source); + Vector4 dW = PermuteW(destination); + + Vector4 blendW = sW * dW; + Vector4 dstW = dW - blendW; + Vector4 srcW = sW - blendW; // calculate final alpha - float alpha = dstW + source.W; + Vector4 alpha = dstW + sW; // calculate final color Vector4 color = (destination * dstW) + (source * srcW) + (blend * blendW); // unpremultiply - color /= MathF.Max(alpha, Constants.Epsilon); - color.W = alpha; + color /= Vector4.Max(alpha, new(Constants.Epsilon)); + return WithW(color, alpha); + } + + /// + /// Returns the result of the "Over" compositing equation. + /// + /// The destination vector. + /// The source vector. + /// The amount to blend. Range 0..1 + /// The . + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public static Vector256 Over(Vector256 destination, Vector256 source, Vector256 blend) + { + // calculate weights + Vector256 sW = Avx.Permute(source, ShuffleAlphaControl); + Vector256 dW = Avx.Permute(destination, ShuffleAlphaControl); + + Vector256 blendW = Avx.Multiply(sW, dW); + Vector256 dstW = Avx.Subtract(dW, blendW); + Vector256 srcW = Avx.Subtract(sW, blendW); + + // calculate final alpha + Vector256 alpha = Avx.Add(dstW, sW); + + // calculate final color + Vector256 color = Avx.Multiply(destination, dstW); + color = SimdUtils.HwIntrinsics.MultiplyAdd(color, source, srcW); + color = SimdUtils.HwIntrinsics.MultiplyAdd(color, blend, blendW); - return color; + // unpremultiply + color = Avx.Divide(color, Avx.Max(alpha, Vector256.Create(Constants.Epsilon))); + return Avx.Blend(color, alpha, BlendAlphaControl); } /// @@ -186,20 +323,47 @@ internal static partial class PorterDuffFunctions public static Vector4 Atop(Vector4 destination, Vector4 source, Vector4 blend) { // calculate weights - float blendW = destination.W * source.W; - float dstW = destination.W - blendW; + Vector4 sW = PermuteW(source); + Vector4 dW = PermuteW(destination); + + Vector4 blendW = sW * dW; + Vector4 dstW = dW - blendW; // calculate final alpha - float alpha = destination.W; + Vector4 alpha = dW; // calculate final color Vector4 color = (destination * dstW) + (blend * blendW); // unpremultiply - color /= MathF.Max(alpha, Constants.Epsilon); - color.W = alpha; + color /= Vector4.Max(alpha, new(Constants.Epsilon)); + return WithW(color, alpha); + } - return color; + /// + /// Returns the result of the "Atop" compositing equation. + /// + /// The destination vector. + /// The source vector. + /// The amount to blend. Range 0..1 + /// The . + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public static Vector256 Atop(Vector256 destination, Vector256 source, Vector256 blend) + { + // calculate final alpha + Vector256 alpha = Avx.Permute(destination, ShuffleAlphaControl); + + // calculate weights + Vector256 sW = Avx.Permute(source, ShuffleAlphaControl); + Vector256 blendW = Avx.Multiply(sW, alpha); + Vector256 dstW = Avx.Subtract(alpha, blendW); + + // calculate final color + Vector256 color = SimdUtils.HwIntrinsics.MultiplyAdd(Avx.Multiply(blend, blendW), destination, dstW); + + // unpremultiply + color = Avx.Divide(color, Avx.Max(alpha, Vector256.Create(Constants.Epsilon))); + return Avx.Blend(color, alpha, BlendAlphaControl); } /// @@ -211,13 +375,33 @@ internal static partial class PorterDuffFunctions [MethodImpl(MethodImplOptions.AggressiveInlining)] public static Vector4 In(Vector4 destination, Vector4 source) { - float alpha = destination.W * source.W; + Vector4 sW = PermuteW(source); + Vector4 dW = PermuteW(destination); + Vector4 alpha = dW * sW; Vector4 color = source * alpha; // premultiply - color /= MathF.Max(alpha, Constants.Epsilon); // unpremultiply - color.W = alpha; + color /= Vector4.Max(alpha, new(Constants.Epsilon)); // unpremultiply + return WithW(color, alpha); + } - return color; + /// + /// Returns the result of the "In" compositing equation. + /// + /// The destination vector. + /// The source vector. + /// The . + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public static Vector256 In(Vector256 destination, Vector256 source) + { + // calculate alpha + Vector256 alpha = Avx.Permute(Avx.Multiply(source, destination), ShuffleAlphaControl); + + // premultiply + Vector256 color = Avx.Multiply(source, alpha); + + // unpremultiply + color = Avx.Divide(color, Avx.Max(alpha, Vector256.Create(Constants.Epsilon))); + return Avx.Blend(color, alpha, BlendAlphaControl); } /// @@ -229,13 +413,33 @@ internal static partial class PorterDuffFunctions [MethodImpl(MethodImplOptions.AggressiveInlining)] public static Vector4 Out(Vector4 destination, Vector4 source) { - float alpha = (1 - destination.W) * source.W; + Vector4 sW = PermuteW(source); + Vector4 dW = PermuteW(destination); + Vector4 alpha = (Vector4.One - dW) * sW; Vector4 color = source * alpha; // premultiply - color /= MathF.Max(alpha, Constants.Epsilon); // unpremultiply - color.W = alpha; + color /= Vector4.Max(alpha, new(Constants.Epsilon)); // unpremultiply + return WithW(color, alpha); + } - return color; + /// + /// Returns the result of the "Out" compositing equation. + /// + /// The destination vector. + /// The source vector. + /// The . + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public static Vector256 Out(Vector256 destination, Vector256 source) + { + // calculate alpha + Vector256 alpha = Avx.Permute(Avx.Multiply(source, Avx.Subtract(Vector256.Create(1F), destination)), ShuffleAlphaControl); + + // premultiply + Vector256 color = Avx.Multiply(source, alpha); + + // unpremultiply + color = Avx.Divide(color, Avx.Max(alpha, Vector256.Create(Constants.Epsilon))); + return Avx.Blend(color, alpha, BlendAlphaControl); } /// @@ -247,22 +451,80 @@ internal static partial class PorterDuffFunctions [MethodImpl(MethodImplOptions.AggressiveInlining)] public static Vector4 Xor(Vector4 destination, Vector4 source) { - float srcW = 1 - destination.W; - float dstW = 1 - source.W; + Vector4 sW = PermuteW(source); + Vector4 dW = PermuteW(destination); + + Vector4 srcW = Vector4.One - dW; + Vector4 dstW = Vector4.One - sW; + + Vector4 alpha = (sW * srcW) + (dW * dstW); + Vector4 color = (sW * source * srcW) + (dW * destination * dstW); + + // unpremultiply + color /= Vector4.Max(alpha, new(Constants.Epsilon)); + return WithW(color, alpha); + } + + /// + /// Returns the result of the "XOr" compositing equation. + /// + /// The destination vector. + /// The source vector. + /// The . + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public static Vector256 Xor(Vector256 destination, Vector256 source) + { + // calculate weights + Vector256 sW = Avx.Shuffle(source, source, ShuffleAlphaControl); + Vector256 dW = Avx.Shuffle(destination, destination, ShuffleAlphaControl); + + Vector256 vOne = Vector256.Create(1F); + Vector256 srcW = Avx.Subtract(vOne, dW); + Vector256 dstW = Avx.Subtract(vOne, sW); - float alpha = (source.W * srcW) + (destination.W * dstW); - Vector4 color = (source.W * source * srcW) + (destination.W * destination * dstW); + // calculate alpha + Vector256 alpha = SimdUtils.HwIntrinsics.MultiplyAdd(Avx.Multiply(dW, dstW), sW, srcW); + Vector256 color = SimdUtils.HwIntrinsics.MultiplyAdd(Avx.Multiply(Avx.Multiply(dW, destination), dstW), Avx.Multiply(sW, source), srcW); // unpremultiply - color /= MathF.Max(alpha, Constants.Epsilon); - color.W = alpha; + color = Avx.Divide(color, Avx.Max(alpha, Vector256.Create(Constants.Epsilon))); + return Avx.Blend(color, alpha, BlendAlphaControl); + } + + [MethodImpl(MethodImplOptions.AggressiveInlining)] + private static Vector4 Clear(Vector4 backdrop, Vector4 source) => Vector4.Zero; - return color; + [MethodImpl(MethodImplOptions.AggressiveInlining)] + private static Vector256 Clear(Vector256 backdrop, Vector256 source) => Vector256.Zero; + + [MethodImpl(MethodImplOptions.AggressiveInlining)] + private static Vector4 WithW(Vector4 value, Vector4 w) + { + if (Sse41.IsSupported) + { + return Sse41.Insert(value.AsVector128(), w.AsVector128(), 0b11_11_0000).AsVector4(); + } + + if (Sse.IsSupported) + { + // Create tmp as + // Then return (which is ) + Vector128 tmp = Sse.Shuffle(w.AsVector128(), value.AsVector128(), 0b00_10_00_11); + return Sse.Shuffle(value.AsVector128(), tmp, 0b00_10_01_00).AsVector4(); + } + + value.W = w.W; + return value; } [MethodImpl(MethodImplOptions.AggressiveInlining)] - private static Vector4 Clear(Vector4 backdrop, Vector4 source) + private static Vector4 PermuteW(Vector4 value) { - return Vector4.Zero; + if (Sse.IsSupported) + { + return Sse.Shuffle(value.AsVector128(), value.AsVector128(), 0b11111111).AsVector4(); + } + + return new(value.W); } } diff --git a/tests/ImageSharp.Benchmarks/PixelBlenders/PorterDuffBulkVsPixel.cs b/tests/ImageSharp.Benchmarks/PixelBlenders/PorterDuffBulkVsPixel.cs index 68956c880..3e6667dbc 100644 --- a/tests/ImageSharp.Benchmarks/PixelBlenders/PorterDuffBulkVsPixel.cs +++ b/tests/ImageSharp.Benchmarks/PixelBlenders/PorterDuffBulkVsPixel.cs @@ -12,9 +12,9 @@ namespace SixLabors.ImageSharp.Benchmarks; public class PorterDuffBulkVsPixel { - private Configuration Configuration => Configuration.Default; + private static Configuration Configuration => Configuration.Default; - private void BulkVectorConvert( + private static void BulkVectorConvert( Span destination, Span background, Span source, @@ -31,18 +31,18 @@ public class PorterDuffBulkVsPixel Span backgroundSpan = buffer.Slice(destination.Length, destination.Length); Span sourceSpan = buffer.Slice(destination.Length * 2, destination.Length); - PixelOperations.Instance.ToVector4(this.Configuration, background, backgroundSpan); - PixelOperations.Instance.ToVector4(this.Configuration, source, sourceSpan); + PixelOperations.Instance.ToVector4(Configuration, background, backgroundSpan); + PixelOperations.Instance.ToVector4(Configuration, source, sourceSpan); for (int i = 0; i < destination.Length; i++) { destinationSpan[i] = PorterDuffFunctions.NormalSrcOver(backgroundSpan[i], sourceSpan[i], amount[i]); } - PixelOperations.Instance.FromVector4Destructive(this.Configuration, destinationSpan, destination); + PixelOperations.Instance.FromVector4Destructive(Configuration, destinationSpan, destination); } - private void BulkPixelConvert( + private static void BulkPixelConvert( Span destination, Span background, Span source, @@ -60,9 +60,9 @@ public class PorterDuffBulkVsPixel } [Benchmark(Description = "ImageSharp BulkVectorConvert")] - public Size BulkVectorConvert() + public static Size BulkVectorConvert() { - using var image = new Image(800, 800); + using Image image = new(800, 800); using IMemoryOwner amounts = Configuration.Default.MemoryAllocator.Allocate(image.Width); amounts.GetSpan().Fill(1); @@ -70,23 +70,23 @@ public class PorterDuffBulkVsPixel for (int y = 0; y < image.Height; y++) { Span span = pixels.DangerousGetRowSpan(y); - this.BulkVectorConvert(span, span, span, amounts.GetSpan()); + BulkVectorConvert(span, span, span, amounts.GetSpan()); } return new Size(image.Width, image.Height); } [Benchmark(Description = "ImageSharp BulkPixelConvert")] - public Size BulkPixelConvert() + public static Size BulkPixelConvert() { - using var image = new Image(800, 800); + using Image image = new(800, 800); using IMemoryOwner amounts = Configuration.Default.MemoryAllocator.Allocate(image.Width); amounts.GetSpan().Fill(1); Buffer2D pixels = image.GetRootFramePixelBuffer(); for (int y = 0; y < image.Height; y++) { Span span = pixels.DangerousGetRowSpan(y); - this.BulkPixelConvert(span, span, span, amounts.GetSpan()); + BulkPixelConvert(span, span, span, amounts.GetSpan()); } return new Size(image.Width, image.Height); diff --git a/tests/ImageSharp.Benchmarks/PixelBlenders/PorterDuffBulkVsSingleVector.cs b/tests/ImageSharp.Benchmarks/PixelBlenders/PorterDuffBulkVsSingleVector.cs new file mode 100644 index 000000000..fcf7e9dcc --- /dev/null +++ b/tests/ImageSharp.Benchmarks/PixelBlenders/PorterDuffBulkVsSingleVector.cs @@ -0,0 +1,68 @@ +// Copyright (c) Six Labors. +// Licensed under the Six Labors Split License. + +using System.Numerics; +using System.Runtime.CompilerServices; +using System.Runtime.InteropServices; +using System.Runtime.Intrinsics; +using BenchmarkDotNet.Attributes; +using SixLabors.ImageSharp.PixelFormats.PixelBlenders; + +namespace SixLabors.ImageSharp.Benchmarks.PixelBlenders; + +public class PorterDuffBulkVsSingleVector +{ + private Vector4[] backdrop; + private Vector4[] source; + + [GlobalSetup] + public void Setup() + { + this.backdrop = new Vector4[8 * 20]; + this.source = new Vector4[8 * 20]; + + FillRandom(this.backdrop); + FillRandom(this.source); + } + + private static void FillRandom(Vector4[] arr) + { + Random rng = new(); + for (int i = 0; i < arr.Length; i++) + { + arr[i].X = rng.NextSingle(); + arr[i].Y = rng.NextSingle(); + arr[i].Z = rng.NextSingle(); + arr[i].W = rng.NextSingle(); + } + } + + [Benchmark(Description = "Scalar", Baseline = true)] + public Vector4 OverlayValueFunction_Scalar() + { + Vector4 result = default; + for (int i = 0; i < this.backdrop.Length; i++) + { + result = PorterDuffFunctions.NormalSrcOver(this.backdrop[i], this.source[i], .5F); + } + + return result; + } + + [Benchmark(Description = "Avx")] + public Vector256 OverlayValueFunction_Avx() + { + ref Vector256 backdrop = ref Unsafe.As>(ref MemoryMarshal.GetReference(this.backdrop)); + ref Vector256 source = ref Unsafe.As>(ref MemoryMarshal.GetReference(this.source)); + + Vector256 result = default; + Vector256 opacity = Vector256.Create(.5F); + int count = this.backdrop.Length / 2; + for (int i = 0; i < count; i++) + { + result = PorterDuffFunctions.NormalSrcOver(Unsafe.Add(ref backdrop, i), Unsafe.Add(ref source, i), opacity); + } + + return result; + } +} diff --git a/tests/ImageSharp.Tests/PixelFormats/PixelBlenders/PorterDuffCompositorTests.cs b/tests/ImageSharp.Tests/PixelFormats/PixelBlenders/PorterDuffCompositorTests.cs index c81b0a74f..1086afe76 100644 --- a/tests/ImageSharp.Tests/PixelFormats/PixelBlenders/PorterDuffCompositorTests.cs +++ b/tests/ImageSharp.Tests/PixelFormats/PixelBlenders/PorterDuffCompositorTests.cs @@ -1,59 +1,66 @@ // Copyright (c) Six Labors. // Licensed under the Six Labors Split License. -namespace SixLabors.ImageSharp.Tests.PixelFormats.PixelBlenders; - using SixLabors.ImageSharp.PixelFormats; using SixLabors.ImageSharp.Processing; +using SixLabors.ImageSharp.Tests.TestUtilities; -using Xunit; +namespace SixLabors.ImageSharp.Tests.PixelFormats.PixelBlenders; public class PorterDuffCompositorTests { // TODO: Add other modes to compare. public static readonly TheoryData CompositingOperators = - new TheoryData - { - PixelAlphaCompositionMode.Src, - PixelAlphaCompositionMode.SrcAtop, - PixelAlphaCompositionMode.SrcOver, - PixelAlphaCompositionMode.SrcIn, - PixelAlphaCompositionMode.SrcOut, - PixelAlphaCompositionMode.Dest, - PixelAlphaCompositionMode.DestAtop, - PixelAlphaCompositionMode.DestOver, - PixelAlphaCompositionMode.DestIn, - PixelAlphaCompositionMode.DestOut, - PixelAlphaCompositionMode.Clear, - PixelAlphaCompositionMode.Xor - }; + new() + { + PixelAlphaCompositionMode.Src, + PixelAlphaCompositionMode.SrcAtop, + PixelAlphaCompositionMode.SrcOver, + PixelAlphaCompositionMode.SrcIn, + PixelAlphaCompositionMode.SrcOut, + PixelAlphaCompositionMode.Dest, + PixelAlphaCompositionMode.DestAtop, + PixelAlphaCompositionMode.DestOver, + PixelAlphaCompositionMode.DestIn, + PixelAlphaCompositionMode.DestOut, + PixelAlphaCompositionMode.Clear, + PixelAlphaCompositionMode.Xor + }; [Theory] [WithFile(TestImages.Png.PDDest, nameof(CompositingOperators), PixelTypes.Rgba32)] public void PorterDuffOutputIsCorrect(TestImageProvider provider, PixelAlphaCompositionMode mode) { - var srcFile = TestFile.Create(TestImages.Png.PDSrc); - using (Image src = srcFile.CreateRgba32Image()) - using (Image dest = provider.GetImage()) + static void RunTest(string providerDump, string alphaMode) { - var options = new GraphicsOptions + TestImageProvider provider + = BasicSerializer.Deserialize>(providerDump); + + TestFile srcFile = TestFile.Create(TestImages.Png.PDSrc); + using Image src = srcFile.CreateRgba32Image(); + using Image dest = provider.GetImage(); + GraphicsOptions options = new() { Antialias = false, - AlphaCompositionMode = mode + AlphaCompositionMode = Enum.Parse(alphaMode) }; - using (Image res = dest.Clone(x => x.DrawImage(src, options))) - { - string combinedMode = mode.ToString(); - - if (combinedMode != "Src" && combinedMode.StartsWith("Src")) - { - combinedMode = combinedMode.Substring(3); - } + using Image res = dest.Clone(x => x.DrawImage(src, options)); + string combinedMode = alphaMode; - res.DebugSave(provider, combinedMode); - res.CompareToReferenceOutput(provider, combinedMode); + if (combinedMode != "Src" && combinedMode.StartsWith("Src", StringComparison.OrdinalIgnoreCase)) + { + combinedMode = combinedMode[3..]; } + + res.DebugSave(provider, combinedMode); + res.CompareToReferenceOutput(provider, combinedMode); } + + FeatureTestRunner.RunWithHwIntrinsicsFeature( + RunTest, + HwIntrinsics.AllowAll | HwIntrinsics.DisableAVX, + provider, + mode.ToString()); } } diff --git a/tests/ImageSharp.Tests/PixelFormats/PixelBlenders/PorterDuffFunctionsTests.cs b/tests/ImageSharp.Tests/PixelFormats/PixelBlenders/PorterDuffFunctionsTests.cs index 45dece8ec..189d21f1e 100644 --- a/tests/ImageSharp.Tests/PixelFormats/PixelBlenders/PorterDuffFunctionsTests.cs +++ b/tests/ImageSharp.Tests/PixelFormats/PixelBlenders/PorterDuffFunctionsTests.cs @@ -2,6 +2,7 @@ // Licensed under the Six Labors Split License. using System.Numerics; +using System.Runtime.Intrinsics; using SixLabors.ImageSharp.PixelFormats.PixelBlenders; using SixLabors.ImageSharp.Tests.TestUtilities; @@ -9,7 +10,9 @@ namespace SixLabors.ImageSharp.Tests.PixelFormats.PixelBlenders; public class PorterDuffFunctionsTests { - public static TheoryData NormalBlendFunctionData = new TheoryData + private static readonly ApproximateFloatComparer FloatComparer = new(.000001F); + + public static TheoryData NormalBlendFunctionData { get; } = new() { { new TestVector4(1, 1, 1, 1), new TestVector4(1, 1, 1, 1), 1, new TestVector4(1, 1, 1, 1) }, { new TestVector4(1, 1, 1, 1), new TestVector4(0, 0, 0, .8f), .5f, new TestVector4(0.6f, 0.6f, 0.6f, 1) } @@ -23,7 +26,19 @@ public class PorterDuffFunctionsTests Assert.Equal(expected, actual); } - public static TheoryData MultiplyFunctionData = new TheoryData + [Theory] + [MemberData(nameof(NormalBlendFunctionData))] + public void NormalBlendFunction256(TestVector4 back, TestVector4 source, float amount, TestVector4 expected) + { + Vector256 back256 = Vector256.Create(back.X, back.Y, back.Z, back.W, back.X, back.Y, back.Z, back.W); + Vector256 source256 = Vector256.Create(source.X, source.Y, source.Z, source.W, source.X, source.Y, source.Z, source.W); + + Vector256 expected256 = Vector256.Create(expected.X, expected.Y, expected.Z, expected.W, expected.X, expected.Y, expected.Z, expected.W); + Vector256 actual = PorterDuffFunctions.NormalSrcOver(back256, source256, Vector256.Create(amount)); + Assert.Equal(expected256, actual, FloatComparer); + } + + public static TheoryData MultiplyFunctionData { get; } = new() { { new TestVector4(1, 1, 1, 1), new TestVector4(1, 1, 1, 1), 1, new TestVector4(1, 1, 1, 1) }, { new TestVector4(1, 1, 1, 1), new TestVector4(0, 0, 0, .8f), .5f, new TestVector4(0.6f, 0.6f, 0.6f, 1) }, @@ -38,22 +53,46 @@ public class PorterDuffFunctionsTests VectorAssert.Equal(expected, actual, 5); } - public static TheoryData AddFunctionData = new TheoryData + [Theory] + [MemberData(nameof(MultiplyFunctionData))] + public void MultiplyFunction256(TestVector4 back, TestVector4 source, float amount, TestVector4 expected) + { + Vector256 back256 = Vector256.Create(back.X, back.Y, back.Z, back.W, back.X, back.Y, back.Z, back.W); + Vector256 source256 = Vector256.Create(source.X, source.Y, source.Z, source.W, source.X, source.Y, source.Z, source.W); + + Vector256 expected256 = Vector256.Create(expected.X, expected.Y, expected.Z, expected.W, expected.X, expected.Y, expected.Z, expected.W); + Vector256 actual = PorterDuffFunctions.MultiplySrcOver(back256, source256, Vector256.Create(amount)); + Assert.Equal(expected256, actual, FloatComparer); + } + + public static TheoryData AddFunctionData { get; } = new() { { new TestVector4(1, 1, 1, 1), new TestVector4(1, 1, 1, 1), 1, new TestVector4(1, 1, 1, 1) }, - { new TestVector4(1, 1, 1, 1), new TestVector4(0, 0, 0, .8f), .5f, new TestVector4(.6f, .6f, .6f, 1f) }, - { new TestVector4(0.2f, 0.2f, 0.2f, 0.3f), new TestVector4(0.3f, 0.3f, 0.3f, 0.2f), .5f, new TestVector4(.2075676f, .2075676f, .2075676f, .37f) } + { new TestVector4(1, 1, 1, 1), new TestVector4(0, 0, 0, .8f), .5f, new TestVector4(1, 1, 1, 1) }, + { new TestVector4(0.2f, 0.2f, 0.2f, 0.3f), new TestVector4(0.3f, 0.3f, 0.3f, 0.2f), .5f, new TestVector4(0.24324325f, 0.24324325f, 0.24324325f, .37f) } }; [Theory] [MemberData(nameof(AddFunctionData))] public void AddFunction(TestVector4 back, TestVector4 source, float amount, TestVector4 expected) { - Vector4 actual = PorterDuffFunctions.MultiplySrcOver((Vector4)back, source, amount); + Vector4 actual = PorterDuffFunctions.AddSrcOver((Vector4)back, source, amount); VectorAssert.Equal(expected, actual, 5); } - public static TheoryData SubtractFunctionData = new TheoryData + [Theory] + [MemberData(nameof(AddFunctionData))] + public void AddFunction256(TestVector4 back, TestVector4 source, float amount, TestVector4 expected) + { + Vector256 back256 = Vector256.Create(back.X, back.Y, back.Z, back.W, back.X, back.Y, back.Z, back.W); + Vector256 source256 = Vector256.Create(source.X, source.Y, source.Z, source.W, source.X, source.Y, source.Z, source.W); + + Vector256 expected256 = Vector256.Create(expected.X, expected.Y, expected.Z, expected.W, expected.X, expected.Y, expected.Z, expected.W); + Vector256 actual = PorterDuffFunctions.AddSrcOver(back256, source256, Vector256.Create(amount)); + Assert.Equal(expected256, actual, FloatComparer); + } + + public static TheoryData SubtractFunctionData { get; } = new() { { new TestVector4(1, 1, 1, 1), new TestVector4(1, 1, 1, 1), 1, new TestVector4(0, 0, 0, 1) }, { new TestVector4(1, 1, 1, 1), new TestVector4(0, 0, 0, .8f), .5f, new TestVector4(1, 1, 1, 1f) }, @@ -68,7 +107,19 @@ public class PorterDuffFunctionsTests VectorAssert.Equal(expected, actual, 5); } - public static TheoryData ScreenFunctionData = new TheoryData + [Theory] + [MemberData(nameof(SubtractFunctionData))] + public void SubtractFunction256(TestVector4 back, TestVector4 source, float amount, TestVector4 expected) + { + Vector256 back256 = Vector256.Create(back.X, back.Y, back.Z, back.W, back.X, back.Y, back.Z, back.W); + Vector256 source256 = Vector256.Create(source.X, source.Y, source.Z, source.W, source.X, source.Y, source.Z, source.W); + + Vector256 expected256 = Vector256.Create(expected.X, expected.Y, expected.Z, expected.W, expected.X, expected.Y, expected.Z, expected.W); + Vector256 actual = PorterDuffFunctions.SubtractSrcOver(back256, source256, Vector256.Create(amount)); + Assert.Equal(expected256, actual, FloatComparer); + } + + public static TheoryData ScreenFunctionData { get; } = new() { { new TestVector4(1, 1, 1, 1), new TestVector4(1, 1, 1, 1), 1, new TestVector4(1, 1, 1, 1) }, { new TestVector4(1, 1, 1, 1), new TestVector4(0, 0, 0, .8f), .5f, new TestVector4(1, 1, 1, 1f) }, @@ -83,7 +134,19 @@ public class PorterDuffFunctionsTests VectorAssert.Equal(expected, actual, 5); } - public static TheoryData DarkenFunctionData = new TheoryData + [Theory] + [MemberData(nameof(ScreenFunctionData))] + public void ScreenFunction256(TestVector4 back, TestVector4 source, float amount, TestVector4 expected) + { + Vector256 back256 = Vector256.Create(back.X, back.Y, back.Z, back.W, back.X, back.Y, back.Z, back.W); + Vector256 source256 = Vector256.Create(source.X, source.Y, source.Z, source.W, source.X, source.Y, source.Z, source.W); + + Vector256 expected256 = Vector256.Create(expected.X, expected.Y, expected.Z, expected.W, expected.X, expected.Y, expected.Z, expected.W); + Vector256 actual = PorterDuffFunctions.ScreenSrcOver(back256, source256, Vector256.Create(amount)); + Assert.Equal(expected256, actual, FloatComparer); + } + + public static TheoryData DarkenFunctionData { get; } = new() { { new TestVector4(1, 1, 1, 1), new TestVector4(1, 1, 1, 1), 1, new TestVector4(1, 1, 1, 1) }, { new TestVector4(1, 1, 1, 1), new TestVector4(0, 0, 0, .8f), .5f, new TestVector4(.6f, .6f, .6f, 1f) }, @@ -98,7 +161,19 @@ public class PorterDuffFunctionsTests VectorAssert.Equal(expected, actual, 5); } - public static TheoryData LightenFunctionData = new TheoryData + [Theory] + [MemberData(nameof(DarkenFunctionData))] + public void DarkenFunction256(TestVector4 back, TestVector4 source, float amount, TestVector4 expected) + { + Vector256 back256 = Vector256.Create(back.X, back.Y, back.Z, back.W, back.X, back.Y, back.Z, back.W); + Vector256 source256 = Vector256.Create(source.X, source.Y, source.Z, source.W, source.X, source.Y, source.Z, source.W); + + Vector256 expected256 = Vector256.Create(expected.X, expected.Y, expected.Z, expected.W, expected.X, expected.Y, expected.Z, expected.W); + Vector256 actual = PorterDuffFunctions.DarkenSrcOver(back256, source256, Vector256.Create(amount)); + Assert.Equal(expected256, actual, FloatComparer); + } + + public static TheoryData LightenFunctionData { get; } = new() { { new TestVector4(1, 1, 1, 1), new TestVector4(1, 1, 1, 1), 1, new TestVector4(1, 1, 1, 1) }, { new TestVector4(1, 1, 1, 1), new TestVector4(0, 0, 0, .8f), .5f, new TestVector4(1, 1, 1, 1f) }, @@ -113,7 +188,19 @@ public class PorterDuffFunctionsTests VectorAssert.Equal(expected, actual, 5); } - public static TheoryData OverlayFunctionData = new TheoryData + [Theory] + [MemberData(nameof(LightenFunctionData))] + public void LightenFunction256(TestVector4 back, TestVector4 source, float amount, TestVector4 expected) + { + Vector256 back256 = Vector256.Create(back.X, back.Y, back.Z, back.W, back.X, back.Y, back.Z, back.W); + Vector256 source256 = Vector256.Create(source.X, source.Y, source.Z, source.W, source.X, source.Y, source.Z, source.W); + + Vector256 expected256 = Vector256.Create(expected.X, expected.Y, expected.Z, expected.W, expected.X, expected.Y, expected.Z, expected.W); + Vector256 actual = PorterDuffFunctions.LightenSrcOver(back256, source256, Vector256.Create(amount)); + Assert.Equal(expected256, actual, FloatComparer); + } + + public static TheoryData OverlayFunctionData { get; } = new() { { new TestVector4(1, 1, 1, 1), new TestVector4(1, 1, 1, 1), 1, new TestVector4(1, 1, 1, 1) }, { new TestVector4(1, 1, 1, 1), new TestVector4(0, 0, 0, .8f), .5f, new TestVector4(1, 1, 1, 1f) }, @@ -128,7 +215,19 @@ public class PorterDuffFunctionsTests VectorAssert.Equal(expected, actual, 5); } - public static TheoryData HardLightFunctionData = new TheoryData + [Theory] + [MemberData(nameof(OverlayFunctionData))] + public void OverlayFunction256(TestVector4 back, TestVector4 source, float amount, TestVector4 expected) + { + Vector256 back256 = Vector256.Create(back.X, back.Y, back.Z, back.W, back.X, back.Y, back.Z, back.W); + Vector256 source256 = Vector256.Create(source.X, source.Y, source.Z, source.W, source.X, source.Y, source.Z, source.W); + + Vector256 expected256 = Vector256.Create(expected.X, expected.Y, expected.Z, expected.W, expected.X, expected.Y, expected.Z, expected.W); + Vector256 actual = PorterDuffFunctions.OverlaySrcOver(back256, source256, Vector256.Create(amount)); + Assert.Equal(expected256, actual, FloatComparer); + } + + public static TheoryData HardLightFunctionData { get; } = new() { { new TestVector4(1, 1, 1, 1), new TestVector4(1, 1, 1, 1), 1, new TestVector4(1, 1, 1, 1) }, { new TestVector4(1, 1, 1, 1), new TestVector4(0, 0, 0, .8f), .5f, new TestVector4(0.6f, 0.6f, 0.6f, 1f) }, @@ -142,4 +241,16 @@ public class PorterDuffFunctionsTests Vector4 actual = PorterDuffFunctions.HardLightSrcOver((Vector4)back, source, amount); VectorAssert.Equal(expected, actual, 5); } + + [Theory] + [MemberData(nameof(HardLightFunctionData))] + public void HardLightFunction256(TestVector4 back, TestVector4 source, float amount, TestVector4 expected) + { + Vector256 back256 = Vector256.Create(back.X, back.Y, back.Z, back.W, back.X, back.Y, back.Z, back.W); + Vector256 source256 = Vector256.Create(source.X, source.Y, source.Z, source.W, source.X, source.Y, source.Z, source.W); + + Vector256 expected256 = Vector256.Create(expected.X, expected.Y, expected.Z, expected.W, expected.X, expected.Y, expected.Z, expected.W); + Vector256 actual = PorterDuffFunctions.HardLightSrcOver(back256, source256, Vector256.Create(amount)); + Assert.Equal(expected256, actual, FloatComparer); + } } diff --git a/tests/ImageSharp.Tests/TestUtilities/ApproximateFloatComparer.cs b/tests/ImageSharp.Tests/TestUtilities/ApproximateFloatComparer.cs index 6d9652d89..e35f36fee 100644 --- a/tests/ImageSharp.Tests/TestUtilities/ApproximateFloatComparer.cs +++ b/tests/ImageSharp.Tests/TestUtilities/ApproximateFloatComparer.cs @@ -1,7 +1,9 @@ // Copyright (c) Six Labors. // Licensed under the Six Labors Split License. +using System.Diagnostics.CodeAnalysis; using System.Numerics; +using System.Runtime.Intrinsics; using SixLabors.ImageSharp.PixelFormats; namespace SixLabors.ImageSharp.Tests; @@ -14,7 +16,8 @@ internal readonly struct ApproximateFloatComparer : IEqualityComparer, IEqualityComparer, IEqualityComparer, - IEqualityComparer + IEqualityComparer, + IEqualityComparer> { private readonly float epsilon; @@ -72,4 +75,16 @@ internal readonly struct ApproximateFloatComparer : /// public int GetHashCode(ColorMatrix obj) => obj.GetHashCode(); + + public bool Equals(Vector256 x, Vector256 y) + => this.Equals(x.GetElement(0), y.GetElement(0)) + && this.Equals(x.GetElement(1), y.GetElement(1)) + && this.Equals(x.GetElement(2), y.GetElement(2)) + && this.Equals(x.GetElement(3), y.GetElement(3)) + && this.Equals(x.GetElement(4), y.GetElement(4)) + && this.Equals(x.GetElement(5), y.GetElement(5)) + && this.Equals(x.GetElement(6), y.GetElement(6)) + && this.Equals(x.GetElement(7), y.GetElement(7)); + + public int GetHashCode([DisallowNull] Vector256 obj) => obj.GetHashCode(); } diff --git a/tests/ImageSharp.Tests/TestUtilities/FeatureTesting/FeatureTestRunner.cs b/tests/ImageSharp.Tests/TestUtilities/FeatureTesting/FeatureTestRunner.cs index 1bb64d99d..f68bfdbe6 100644 --- a/tests/ImageSharp.Tests/TestUtilities/FeatureTesting/FeatureTestRunner.cs +++ b/tests/ImageSharp.Tests/TestUtilities/FeatureTesting/FeatureTestRunner.cs @@ -257,6 +257,52 @@ public static class FeatureTestRunner } } + /// + /// Runs the given test within an environment + /// where the given features. + /// + /// The test action to run. + /// The intrinsics features. + /// The value to pass as a parameter to the test action. + /// The second value to pass as a parameter to the test action. + public static void RunWithHwIntrinsicsFeature( + Action action, + HwIntrinsics intrinsics, + T arg1, + string arg2) + where T : IXunitSerializable + { + if (!RemoteExecutor.IsSupported) + { + return; + } + + foreach (KeyValuePair intrinsic in intrinsics.ToFeatureKeyValueCollection()) + { + ProcessStartInfo processStartInfo = new(); + if (intrinsic.Key != HwIntrinsics.AllowAll) + { + processStartInfo.Environment[$"COMPlus_{intrinsic.Value}"] = "0"; + + RemoteExecutor.Invoke( + action, + BasicSerializer.Serialize(arg1), + arg2, + new RemoteInvokeOptions + { + StartInfo = processStartInfo + }) + .Dispose(); + } + else + { + // Since we are running using the default architecture there is no + // point creating the overhead of running the action in a separate process. + action(BasicSerializer.Serialize(arg1), arg2); + } + } + } + /// /// Runs the given test within an environment /// where the given features.