From 517ec8028058d80b8673129311715eb1d98da49e Mon Sep 17 00:00:00 2001 From: James Jackson-South Date: Fri, 17 Feb 2023 21:04:57 +1000 Subject: [PATCH 01/22] Port most of the function components. --- src/ImageSharp/Common/Constants.cs | 9 +- .../Common/Helpers/SimdUtils.HwIntrinsics.cs | 20 +- .../Components/FloatingPointDCT.Intrinsic.cs | 2 +- .../PixelBlenders/PorterDuffFunctions.cs | 253 ++++++++++++++++-- 4 files changed, 244 insertions(+), 40 deletions(-) diff --git a/src/ImageSharp/Common/Constants.cs b/src/ImageSharp/Common/Constants.cs index fa2f72c74..a3cfe3623 100644 --- a/src/ImageSharp/Common/Constants.cs +++ b/src/ImageSharp/Common/Constants.cs @@ -1,6 +1,8 @@ -// Copyright (c) Six Labors. +// Copyright (c) Six Labors. // Licensed under the Six Labors Split License. +using System.Runtime.Intrinsics; + namespace SixLabors.ImageSharp; /// @@ -13,6 +15,11 @@ internal static class Constants /// public static readonly float Epsilon = 0.001F; + /// + /// The epsilon value for comparing floating point numbers. + /// + public static readonly Vector256 Epsilon256 = Vector256.Create(0.001F); + /// /// The epsilon squared value for comparing floating point numbers. /// diff --git a/src/ImageSharp/Common/Helpers/SimdUtils.HwIntrinsics.cs b/src/ImageSharp/Common/Helpers/SimdUtils.HwIntrinsics.cs index 4bc0040c6..128218aac 100644 --- a/src/ImageSharp/Common/Helpers/SimdUtils.HwIntrinsics.cs +++ b/src/ImageSharp/Common/Helpers/SimdUtils.HwIntrinsics.cs @@ -532,7 +532,7 @@ internal static partial class SimdUtils } /// - /// Performs a multiplication and an addition of the . + /// Performs a multiplication and an addition of the . /// /// ret = (vm0 * vm1) + va /// The vector to add to the intermediate result. @@ -549,22 +549,20 @@ internal static partial class SimdUtils { return Fma.MultiplyAdd(vm1, vm0, va); } - else - { - return Avx.Add(Avx.Multiply(vm0, vm1), va); - } + + return Avx.Add(Avx.Multiply(vm0, vm1), va); } /// - /// Performs a multiplication and a substraction of the . + /// Performs a multiplication and a subtraction of the . /// /// ret = (vm0 * vm1) - vs - /// The vector to substract from the intermediate result. + /// The vector to subtract from the intermediate result. /// The first vector to multiply. /// The second vector to multiply. /// The . [MethodImpl(InliningOptions.ShortMethod)] - public static Vector256 MultiplySubstract( + public static Vector256 MultiplySubtract( in Vector256 vs, in Vector256 vm0, in Vector256 vm1) @@ -573,10 +571,8 @@ internal static partial class SimdUtils { return Fma.MultiplySubtract(vm1, vm0, vs); } - else - { - return Avx.Subtract(Avx.Multiply(vm0, vm1), vs); - } + + return Avx.Subtract(Avx.Multiply(vm0, vm1), vs); } /// diff --git a/src/ImageSharp/Formats/Jpeg/Components/FloatingPointDCT.Intrinsic.cs b/src/ImageSharp/Formats/Jpeg/Components/FloatingPointDCT.Intrinsic.cs index cae89fc3c..7e102f696 100644 --- a/src/ImageSharp/Formats/Jpeg/Components/FloatingPointDCT.Intrinsic.cs +++ b/src/ImageSharp/Formats/Jpeg/Components/FloatingPointDCT.Intrinsic.cs @@ -99,7 +99,7 @@ internal static partial class FloatingPointDCT var mm256_F_1_4142 = Vector256.Create(1.414213562f); Vector256 tmp13 = Avx.Add(tmp1, tmp3); - Vector256 tmp12 = SimdUtils.HwIntrinsics.MultiplySubstract(tmp13, Avx.Subtract(tmp1, tmp3), mm256_F_1_4142); + Vector256 tmp12 = SimdUtils.HwIntrinsics.MultiplySubtract(tmp13, Avx.Subtract(tmp1, tmp3), mm256_F_1_4142); tmp0 = Avx.Add(tmp10, tmp13); tmp3 = Avx.Subtract(tmp10, tmp13); diff --git a/src/ImageSharp/PixelFormats/PixelBlenders/PorterDuffFunctions.cs b/src/ImageSharp/PixelFormats/PixelBlenders/PorterDuffFunctions.cs index 9bc7e35f3..d7d31c0c8 100644 --- a/src/ImageSharp/PixelFormats/PixelBlenders/PorterDuffFunctions.cs +++ b/src/ImageSharp/PixelFormats/PixelBlenders/PorterDuffFunctions.cs @@ -3,6 +3,8 @@ using System.Numerics; using System.Runtime.CompilerServices; +using System.Runtime.Intrinsics; +using System.Runtime.Intrinsics.X86; namespace SixLabors.ImageSharp.PixelFormats.PixelBlenders; @@ -27,9 +29,17 @@ internal static partial class PorterDuffFunctions /// The . [MethodImpl(MethodImplOptions.AggressiveInlining)] public static Vector4 Normal(Vector4 backdrop, Vector4 source) - { - return source; - } + => source; + + /// + /// Returns the result of the "Normal" compositing equation. + /// + /// The backdrop vector. + /// The source vector. + /// The . + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public static Vector256 Normal(Vector256 backdrop, Vector256 source) + => source; /// /// Returns the result of the "Multiply" compositing equation. @@ -39,9 +49,17 @@ internal static partial class PorterDuffFunctions /// The . [MethodImpl(MethodImplOptions.AggressiveInlining)] public static Vector4 Multiply(Vector4 backdrop, Vector4 source) - { - return backdrop * source; - } + => backdrop * source; + + /// + /// Returns the result of the "Multiply" compositing equation. + /// + /// The backdrop vector. + /// The source vector. + /// The . + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public static Vector256 Multiply(Vector256 backdrop, Vector256 source) + => Avx.Multiply(backdrop, source); /// /// Returns the result of the "Add" compositing equation. @@ -51,9 +69,17 @@ internal static partial class PorterDuffFunctions /// The . [MethodImpl(MethodImplOptions.AggressiveInlining)] public static Vector4 Add(Vector4 backdrop, Vector4 source) - { - return Vector4.Min(Vector4.One, backdrop + source); - } + => Vector4.Min(Vector4.One, backdrop + source); + + /// + /// Returns the result of the "Add" compositing equation. + /// + /// The backdrop vector. + /// The source vector. + /// The . + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public static Vector256 Add(Vector256 backdrop, Vector256 source) + => Avx.Min(Vector256.Create(1F), Avx.Add(backdrop, source)); /// /// Returns the result of the "Subtract" compositing equation. @@ -63,9 +89,17 @@ internal static partial class PorterDuffFunctions /// The . [MethodImpl(MethodImplOptions.AggressiveInlining)] public static Vector4 Subtract(Vector4 backdrop, Vector4 source) - { - return Vector4.Max(Vector4.Zero, backdrop - source); - } + => Vector4.Max(Vector4.Zero, backdrop - source); + + /// + /// Returns the result of the "Subtract" compositing equation. + /// + /// The backdrop vector. + /// The source vector. + /// The . + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public static Vector256 Subtract(Vector256 backdrop, Vector256 source) + => Avx.Min(Vector256.Create(1F), Avx.Subtract(backdrop, source)); /// /// Returns the result of the "Screen" compositing equation. @@ -75,8 +109,19 @@ internal static partial class PorterDuffFunctions /// The . [MethodImpl(MethodImplOptions.AggressiveInlining)] public static Vector4 Screen(Vector4 backdrop, Vector4 source) + => Vector4.One - ((Vector4.One - backdrop) * (Vector4.One - source)); + + /// + /// Returns the result of the "Screen" compositing equation. + /// + /// The backdrop vector. + /// The source vector. + /// The . + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public static Vector256 Screen(Vector256 backdrop, Vector256 source) { - return Vector4.One - ((Vector4.One - backdrop) * (Vector4.One - source)); + Vector256 vOne = Vector256.Create(1F); + return Avx.Subtract(vOne, Avx.Multiply(Avx.Subtract(vOne, backdrop), Avx.Subtract(vOne, source))); } /// @@ -87,9 +132,17 @@ internal static partial class PorterDuffFunctions /// The . [MethodImpl(MethodImplOptions.AggressiveInlining)] public static Vector4 Darken(Vector4 backdrop, Vector4 source) - { - return Vector4.Min(backdrop, source); - } + => Vector4.Min(backdrop, source); + + /// + /// Returns the result of the "Darken" compositing equation. + /// + /// The backdrop vector. + /// The source vector. + /// The . + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public static Vector256 Darken(Vector256 backdrop, Vector256 source) + => Avx.Min(backdrop, source); /// /// Returns the result of the "Lighten" compositing equation. @@ -98,10 +151,17 @@ internal static partial class PorterDuffFunctions /// The source vector. /// The . [MethodImpl(MethodImplOptions.AggressiveInlining)] - public static Vector4 Lighten(Vector4 backdrop, Vector4 source) - { - return Vector4.Max(backdrop, source); - } + public static Vector4 Lighten(Vector4 backdrop, Vector4 source) => Vector4.Max(backdrop, source); + + /// + /// Returns the result of the "Lighten" compositing equation. + /// + /// The backdrop vector. + /// The source vector. + /// The . + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public static Vector256 Lighten(Vector256 backdrop, Vector256 source) + => Avx.Max(backdrop, source); /// /// Returns the result of the "Overlay" compositing equation. @@ -136,16 +196,14 @@ internal static partial class PorterDuffFunctions } /// - /// Helper function for Overlay andHardLight modes + /// Helper function for Overlay and HardLight modes /// /// Backdrop color element /// Source color element /// Overlay value [MethodImpl(MethodImplOptions.AggressiveInlining)] private static float OverlayValueFunction(float backdrop, float source) - { - return backdrop <= 0.5f ? (2 * backdrop * source) : 1 - (2 * (1 - source) * (1 - backdrop)); - } + => backdrop <= 0.5f ? (2 * backdrop * source) : 1 - (2 * (1 - source) * (1 - backdrop)); /// /// Returns the result of the "Over" compositing equation. @@ -175,6 +233,40 @@ internal static partial class PorterDuffFunctions return color; } + /// + /// Returns the result of the "Over" compositing equation. + /// + /// The destination vector. + /// The source vector. + /// The amount to blend. Range 0..1 + /// The . + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public static Vector256 Over(Vector256 destination, Vector256 source, Vector256 blend) + { + const int blendAlphaControl = 0b_10_00_10_00; + const int shuffleAlphaControl = 0b_11_11_11_11; + + // calculate weights + Vector256 sW = Avx.Shuffle(source, source, shuffleAlphaControl); + Vector256 dW = Avx.Shuffle(destination, destination, shuffleAlphaControl); + Vector256 blendW = Avx.Multiply(sW, dW); + + Vector256 dstW = Avx.Subtract(dW, blendW); + Vector256 srcW = Avx.Subtract(sW, blendW); + + // calculate final alpha + Vector256 alpha = Avx.Add(dstW, sW); + + // calculate final color + Vector256 color = Avx.Multiply(destination, dstW); + color = SimdUtils.HwIntrinsics.MultiplyAdd(source, srcW, color); + color = SimdUtils.HwIntrinsics.MultiplyAdd(blend, blendW, color); + + // unpremultiply + color = Avx.Divide(color, Avx.Max(alpha, Constants.Epsilon256)); + return Avx.Blend(color, alpha, blendAlphaControl); + } + /// /// Returns the result of the "Atop" compositing equation. /// @@ -202,6 +294,36 @@ internal static partial class PorterDuffFunctions return color; } + /// + /// Returns the result of the "Atop" compositing equation. + /// + /// The destination vector. + /// The source vector. + /// The amount to blend. Range 0..1 + /// The . + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public static Vector256 Atop(Vector256 destination, Vector256 source, Vector256 blend) + { + // calculate weights + const int blendAlphaControl = 0b_10_00_10_00; + const int shuffleAlphaControl = 0b_11_11_11_11; + + // calculate final alpha + Vector256 alpha = Avx.Shuffle(destination, destination, shuffleAlphaControl); + + // calculate weights + Vector256 sW = Avx.Shuffle(source, source, shuffleAlphaControl); + Vector256 blendW = Avx.Multiply(sW, alpha); + Vector256 dstW = Avx.Subtract(alpha, blendW); + + // calculate final color + Vector256 color = SimdUtils.HwIntrinsics.MultiplyAdd(destination, dstW, Avx.Multiply(blend, blendW)); + + // unpremultiply + color = Avx.Divide(color, Avx.Max(alpha, Constants.Epsilon256)); + return Avx.Blend(color, alpha, blendAlphaControl); + } + /// /// Returns the result of the "In" compositing equation. /// @@ -220,6 +342,31 @@ internal static partial class PorterDuffFunctions return color; } + /// + /// Returns the result of the "In" compositing equation. + /// + /// The destination vector. + /// The source vector. + /// The . + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public static Vector256 In(Vector256 destination, Vector256 source) + { + const int blendAlphaControl = 0b_10_00_10_00; + const int shuffleAlphaControl = 0b_11_11_11_11; + + // calculate alpha + Vector256 sW = Avx.Shuffle(source, source, shuffleAlphaControl); + Vector256 dW = Avx.Shuffle(destination, destination, shuffleAlphaControl); + Vector256 alpha = Avx.Multiply(sW, dW); + + // premultiply + Vector256 color = Avx.Multiply(source, alpha); + + // unpremultiply + color = Avx.Divide(color, Avx.Max(alpha, Constants.Epsilon256)); + return Avx.Blend(color, alpha, blendAlphaControl); + } + /// /// Returns the result of the "Out" compositing equation. /// @@ -238,6 +385,31 @@ internal static partial class PorterDuffFunctions return color; } + /// + /// Returns the result of the "Out" compositing equation. + /// + /// The destination vector. + /// The source vector. + /// The . + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public static Vector256 Out(Vector256 destination, Vector256 source) + { + const int blendAlphaControl = 0b_10_00_10_00; + const int shuffleAlphaControl = 0b_11_11_11_11; + + // calculate alpha + Vector256 sW = Avx.Shuffle(source, source, shuffleAlphaControl); + Vector256 dW = Avx.Shuffle(destination, destination, shuffleAlphaControl); + Vector256 alpha = Avx.Multiply(Avx.Subtract(Vector256.Create(1F), dW), sW); + + // premultiply + Vector256 color = Avx.Multiply(source, alpha); + + // unpremultiply + color = Avx.Divide(color, Avx.Max(alpha, Constants.Epsilon256)); + return Avx.Blend(color, alpha, blendAlphaControl); + } + /// /// Returns the result of the "XOr" compositing equation. /// @@ -260,9 +432,38 @@ internal static partial class PorterDuffFunctions return color; } + /// + /// Returns the result of the "XOr" compositing equation. + /// + /// The destination vector. + /// The source vector. + /// The . [MethodImpl(MethodImplOptions.AggressiveInlining)] - private static Vector4 Clear(Vector4 backdrop, Vector4 source) + public static Vector256 Xor(Vector256 destination, Vector256 source) { - return Vector4.Zero; + const int blendAlphaControl = 0b_10_00_10_00; + const int shuffleAlphaControl = 0b_11_11_11_11; + + // calculate weights + Vector256 sW = Avx.Shuffle(source, source, shuffleAlphaControl); + Vector256 dW = Avx.Shuffle(destination, destination, shuffleAlphaControl); + + Vector256 vOne = Vector256.Create(1F); + Vector256 srcW = Avx.Subtract(vOne, dW); + Vector256 dstW = Avx.Subtract(vOne, sW); + + // calculate alpha + Vector256 alpha = SimdUtils.HwIntrinsics.MultiplyAdd(sW, srcW, Avx.Multiply(dW, dstW)); + Vector256 color = SimdUtils.HwIntrinsics.MultiplyAdd(Avx.Multiply(sW, source), srcW, Avx.Multiply(Avx.Multiply(dW, destination), dstW)); + + // unpremultiply + color = Avx.Divide(color, Avx.Max(alpha, Constants.Epsilon256)); + return Avx.Blend(color, alpha, blendAlphaControl); } + + [MethodImpl(MethodImplOptions.AggressiveInlining)] + private static Vector4 Clear(Vector4 backdrop, Vector4 source) => Vector4.Zero; + + [MethodImpl(MethodImplOptions.AggressiveInlining)] + private static Vector256 Clear(Vector256 backdrop, Vector256 source) => Vector256.Zero; } From 746b34d46f52b13e6bab922899ba8825ab23b5b5 Mon Sep 17 00:00:00 2001 From: James Jackson-South Date: Sun, 19 Feb 2023 13:22:00 +1000 Subject: [PATCH 02/22] Finish porting function components --- .../PixelBlenders/PorterDuffFunctions.cs | 110 +++++++++++------- 1 file changed, 69 insertions(+), 41 deletions(-) diff --git a/src/ImageSharp/PixelFormats/PixelBlenders/PorterDuffFunctions.cs b/src/ImageSharp/PixelFormats/PixelBlenders/PorterDuffFunctions.cs index d7d31c0c8..551f17f20 100644 --- a/src/ImageSharp/PixelFormats/PixelBlenders/PorterDuffFunctions.cs +++ b/src/ImageSharp/PixelFormats/PixelBlenders/PorterDuffFunctions.cs @@ -21,6 +21,12 @@ namespace SixLabors.ImageSharp.PixelFormats.PixelBlenders; /// internal static partial class PorterDuffFunctions { + private const int BlendAlphaControl = 0b_10_00_10_00; + private const int ShuffleAlphaControl = 0b_11_11_11_11; + private static readonly Vector256 Vector256Half = Vector256.Create(0.5F); + private static readonly Vector256 Vector256One = Vector256.Create(1F); + private static readonly Vector256 Vector256Two = Vector256.Create(2F); + /// /// Returns the result of the "Normal" compositing equation. /// @@ -79,7 +85,7 @@ internal static partial class PorterDuffFunctions /// The . [MethodImpl(MethodImplOptions.AggressiveInlining)] public static Vector256 Add(Vector256 backdrop, Vector256 source) - => Avx.Min(Vector256.Create(1F), Avx.Add(backdrop, source)); + => Avx.Min(Vector256One, Avx.Add(backdrop, source)); /// /// Returns the result of the "Subtract" compositing equation. @@ -99,7 +105,7 @@ internal static partial class PorterDuffFunctions /// The . [MethodImpl(MethodImplOptions.AggressiveInlining)] public static Vector256 Subtract(Vector256 backdrop, Vector256 source) - => Avx.Min(Vector256.Create(1F), Avx.Subtract(backdrop, source)); + => Avx.Min(Vector256One, Avx.Subtract(backdrop, source)); /// /// Returns the result of the "Screen" compositing equation. @@ -119,10 +125,7 @@ internal static partial class PorterDuffFunctions /// The . [MethodImpl(MethodImplOptions.AggressiveInlining)] public static Vector256 Screen(Vector256 backdrop, Vector256 source) - { - Vector256 vOne = Vector256.Create(1F); - return Avx.Subtract(vOne, Avx.Multiply(Avx.Subtract(vOne, backdrop), Avx.Subtract(vOne, source))); - } + => Avx.Subtract(Vector256One, Avx.Multiply(Avx.Subtract(Vector256One, backdrop), Avx.Subtract(Vector256One, source))); /// /// Returns the result of the "Darken" compositing equation. @@ -179,6 +182,19 @@ internal static partial class PorterDuffFunctions return Vector4.Min(Vector4.One, new Vector4(cr, cg, cb, 0)); } + /// + /// Returns the result of the "Overlay" compositing equation. + /// + /// The backdrop vector. + /// The source vector. + /// The . + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public static Vector256 Overlay(Vector256 backdrop, Vector256 source) + { + Vector256 color = OverlayValueFunction(backdrop, source); + return Avx.Min(Vector256One, Avx.Blend(color, Vector256.Zero, BlendAlphaControl)); + } + /// /// Returns the result of the "HardLight" compositing equation. /// @@ -195,6 +211,19 @@ internal static partial class PorterDuffFunctions return Vector4.Min(Vector4.One, new Vector4(cr, cg, cb, 0)); } + /// + /// Returns the result of the "HardLight" compositing equation. + /// + /// The backdrop vector. + /// The source vector. + /// The . + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public static Vector256 HardLight(Vector256 backdrop, Vector256 source) + { + Vector256 color = OverlayValueFunction(source, backdrop); + return Avx.Min(Vector256One, Avx.Blend(color, Vector256.Zero, BlendAlphaControl)); + } + /// /// Helper function for Overlay and HardLight modes /// @@ -205,6 +234,22 @@ internal static partial class PorterDuffFunctions private static float OverlayValueFunction(float backdrop, float source) => backdrop <= 0.5f ? (2 * backdrop * source) : 1 - (2 * (1 - source) * (1 - backdrop)); + /// + /// Helper function for Overlay and HardLight modes + /// + /// Backdrop color element + /// Source color element + /// Overlay value + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public static Vector256 OverlayValueFunction(Vector256 backdrop, Vector256 source) + { + Vector256 left = Avx.Multiply(Avx.Multiply(Vector256Two, backdrop), source); + Vector256 right = Avx.Subtract(Vector256One, Avx.Multiply(Avx.Multiply(Vector256Two, Avx.Subtract(Vector256One, source)), Avx.Subtract(Vector256One, backdrop))); + + Vector256 cmp = Avx.CompareGreaterThan(backdrop, Vector256Half); + return Avx.BlendVariable(left, right, cmp); + } + /// /// Returns the result of the "Over" compositing equation. /// @@ -243,12 +288,9 @@ internal static partial class PorterDuffFunctions [MethodImpl(MethodImplOptions.AggressiveInlining)] public static Vector256 Over(Vector256 destination, Vector256 source, Vector256 blend) { - const int blendAlphaControl = 0b_10_00_10_00; - const int shuffleAlphaControl = 0b_11_11_11_11; - // calculate weights - Vector256 sW = Avx.Shuffle(source, source, shuffleAlphaControl); - Vector256 dW = Avx.Shuffle(destination, destination, shuffleAlphaControl); + Vector256 sW = Avx.Shuffle(source, source, ShuffleAlphaControl); + Vector256 dW = Avx.Shuffle(destination, destination, ShuffleAlphaControl); Vector256 blendW = Avx.Multiply(sW, dW); Vector256 dstW = Avx.Subtract(dW, blendW); @@ -264,7 +306,7 @@ internal static partial class PorterDuffFunctions // unpremultiply color = Avx.Divide(color, Avx.Max(alpha, Constants.Epsilon256)); - return Avx.Blend(color, alpha, blendAlphaControl); + return Avx.Blend(color, alpha, BlendAlphaControl); } /// @@ -304,15 +346,11 @@ internal static partial class PorterDuffFunctions [MethodImpl(MethodImplOptions.AggressiveInlining)] public static Vector256 Atop(Vector256 destination, Vector256 source, Vector256 blend) { - // calculate weights - const int blendAlphaControl = 0b_10_00_10_00; - const int shuffleAlphaControl = 0b_11_11_11_11; - // calculate final alpha - Vector256 alpha = Avx.Shuffle(destination, destination, shuffleAlphaControl); + Vector256 alpha = Avx.Shuffle(destination, destination, ShuffleAlphaControl); // calculate weights - Vector256 sW = Avx.Shuffle(source, source, shuffleAlphaControl); + Vector256 sW = Avx.Shuffle(source, source, ShuffleAlphaControl); Vector256 blendW = Avx.Multiply(sW, alpha); Vector256 dstW = Avx.Subtract(alpha, blendW); @@ -321,7 +359,7 @@ internal static partial class PorterDuffFunctions // unpremultiply color = Avx.Divide(color, Avx.Max(alpha, Constants.Epsilon256)); - return Avx.Blend(color, alpha, blendAlphaControl); + return Avx.Blend(color, alpha, BlendAlphaControl); } /// @@ -351,12 +389,9 @@ internal static partial class PorterDuffFunctions [MethodImpl(MethodImplOptions.AggressiveInlining)] public static Vector256 In(Vector256 destination, Vector256 source) { - const int blendAlphaControl = 0b_10_00_10_00; - const int shuffleAlphaControl = 0b_11_11_11_11; - // calculate alpha - Vector256 sW = Avx.Shuffle(source, source, shuffleAlphaControl); - Vector256 dW = Avx.Shuffle(destination, destination, shuffleAlphaControl); + Vector256 sW = Avx.Shuffle(source, source, ShuffleAlphaControl); + Vector256 dW = Avx.Shuffle(destination, destination, ShuffleAlphaControl); Vector256 alpha = Avx.Multiply(sW, dW); // premultiply @@ -364,7 +399,7 @@ internal static partial class PorterDuffFunctions // unpremultiply color = Avx.Divide(color, Avx.Max(alpha, Constants.Epsilon256)); - return Avx.Blend(color, alpha, blendAlphaControl); + return Avx.Blend(color, alpha, BlendAlphaControl); } /// @@ -394,20 +429,17 @@ internal static partial class PorterDuffFunctions [MethodImpl(MethodImplOptions.AggressiveInlining)] public static Vector256 Out(Vector256 destination, Vector256 source) { - const int blendAlphaControl = 0b_10_00_10_00; - const int shuffleAlphaControl = 0b_11_11_11_11; - // calculate alpha - Vector256 sW = Avx.Shuffle(source, source, shuffleAlphaControl); - Vector256 dW = Avx.Shuffle(destination, destination, shuffleAlphaControl); - Vector256 alpha = Avx.Multiply(Avx.Subtract(Vector256.Create(1F), dW), sW); + Vector256 sW = Avx.Shuffle(source, source, ShuffleAlphaControl); + Vector256 dW = Avx.Shuffle(destination, destination, ShuffleAlphaControl); + Vector256 alpha = Avx.Multiply(Avx.Subtract(Vector256One, dW), sW); // premultiply Vector256 color = Avx.Multiply(source, alpha); // unpremultiply color = Avx.Divide(color, Avx.Max(alpha, Constants.Epsilon256)); - return Avx.Blend(color, alpha, blendAlphaControl); + return Avx.Blend(color, alpha, BlendAlphaControl); } /// @@ -441,16 +473,12 @@ internal static partial class PorterDuffFunctions [MethodImpl(MethodImplOptions.AggressiveInlining)] public static Vector256 Xor(Vector256 destination, Vector256 source) { - const int blendAlphaControl = 0b_10_00_10_00; - const int shuffleAlphaControl = 0b_11_11_11_11; - // calculate weights - Vector256 sW = Avx.Shuffle(source, source, shuffleAlphaControl); - Vector256 dW = Avx.Shuffle(destination, destination, shuffleAlphaControl); + Vector256 sW = Avx.Shuffle(source, source, ShuffleAlphaControl); + Vector256 dW = Avx.Shuffle(destination, destination, ShuffleAlphaControl); - Vector256 vOne = Vector256.Create(1F); - Vector256 srcW = Avx.Subtract(vOne, dW); - Vector256 dstW = Avx.Subtract(vOne, sW); + Vector256 srcW = Avx.Subtract(Vector256One, dW); + Vector256 dstW = Avx.Subtract(Vector256One, sW); // calculate alpha Vector256 alpha = SimdUtils.HwIntrinsics.MultiplyAdd(sW, srcW, Avx.Multiply(dW, dstW)); @@ -458,7 +486,7 @@ internal static partial class PorterDuffFunctions // unpremultiply color = Avx.Divide(color, Avx.Max(alpha, Constants.Epsilon256)); - return Avx.Blend(color, alpha, blendAlphaControl); + return Avx.Blend(color, alpha, BlendAlphaControl); } [MethodImpl(MethodImplOptions.AggressiveInlining)] From 4c546d7de8c917e4dca0b1bcc8b62bb8ee92b007 Mon Sep 17 00:00:00 2001 From: James Jackson-South Date: Sun, 19 Feb 2023 14:12:30 +1000 Subject: [PATCH 03/22] Update the PorterDuffFunctions.Generated.tt to include the Vector256 variants. --- .../PorterDuffFunctions.Generated.cs | 1406 ++++++++++++++++- .../PorterDuffFunctions.Generated.tt | 152 ++ 2 files changed, 1531 insertions(+), 27 deletions(-) diff --git a/src/ImageSharp/PixelFormats/PixelBlenders/PorterDuffFunctions.Generated.cs b/src/ImageSharp/PixelFormats/PixelBlenders/PorterDuffFunctions.Generated.cs index ff41e70b2..2b365f177 100644 --- a/src/ImageSharp/PixelFormats/PixelBlenders/PorterDuffFunctions.Generated.cs +++ b/src/ImageSharp/PixelFormats/PixelBlenders/PorterDuffFunctions.Generated.cs @@ -5,6 +5,8 @@ using System.Numerics; using System.Runtime.CompilerServices; +using System.Runtime.Intrinsics; +using System.Runtime.Intrinsics.X86; namespace SixLabors.ImageSharp.PixelFormats.PixelBlenders; @@ -26,6 +28,17 @@ internal static partial class PorterDuffFunctions return source; } + /// + /// Returns the result of the "NormalSrc compositing equation. + /// + /// The backdrop vector. + /// The source vector. + /// The source opacity. Range 0..1 + /// The . + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public static Vector256 NormalSrc(Vector256 backdrop, Vector256 source, Vector256 opacity) + => Avx.Blend(source, Avx.Multiply(source, opacity), BlendAlphaControl); + /// /// Returns the result of the "NormalSrcAtop" compositing equation. /// @@ -41,6 +54,21 @@ internal static partial class PorterDuffFunctions return Atop(backdrop, source, Normal(backdrop, source)); } + /// + /// Returns the result of the "NormalSrcAtop" compositing equation. + /// + /// The backdrop vector. + /// The source vector. + /// The source opacity. Range 0..1 + /// The . + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public static Vector256 NormalSrcAtop(Vector256 backdrop, Vector256 source, Vector256 opacity) + { + source = Avx.Blend(source, Avx.Multiply(source, opacity), BlendAlphaControl); + + return Atop(backdrop, source, Normal(backdrop, source)); + } + /// /// Returns the result of the "NormalSrcOver" compositing equation. /// @@ -56,6 +84,21 @@ internal static partial class PorterDuffFunctions return Over(backdrop, source, Normal(backdrop, source)); } + /// + /// Returns the result of the "NormalSrcOver" compositing equation. + /// + /// The backdrop vector. + /// The source vector. + /// The source opacity. Range 0..1 + /// The . + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public static Vector256 NormalSrcOver(Vector256 backdrop, Vector256 source, Vector256 opacity) + { + source = Avx.Blend(source, Avx.Multiply(source, opacity), BlendAlphaControl); + + return Over(backdrop, source, Normal(backdrop, source)); + } + /// /// Returns the result of the "NormalSrcIn" compositing equation. /// @@ -71,6 +114,17 @@ internal static partial class PorterDuffFunctions return In(backdrop, source); } + /// + /// Returns the result of the "NormalSrcIn" compositing equation. + /// + /// The backdrop vector. + /// The source vector. + /// The source opacity. Range 0..1 + /// The . + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public static Vector256 NormalSrcIn(Vector256 backdrop, Vector256 source, Vector256 opacity) + => In(backdrop, Avx.Blend(source, Avx.Multiply(source, opacity), BlendAlphaControl)); + /// /// Returns the result of the "NormalSrcOut" compositing equation. /// @@ -86,6 +140,17 @@ internal static partial class PorterDuffFunctions return Out(backdrop, source); } + /// + /// Returns the result of the "NormalSrcOut" compositing equation. + /// + /// The backdrop vector. + /// The source vector. + /// The source opacity. Range 0..1 + /// The . + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public static Vector256 NormalSrcOut(Vector256 backdrop, Vector256 source, Vector256 opacity) + => Out(backdrop, Avx.Blend(source, Avx.Multiply(source, opacity), BlendAlphaControl)); + /// /// Returns the result of the "NormalDest" compositing equation. /// @@ -99,6 +164,19 @@ internal static partial class PorterDuffFunctions return backdrop; } + /// + /// Returns the result of the "NormalDest" compositing equation. + /// + /// The backdrop vector. + /// The source vector. + /// The source opacity. Range 0..1 + /// The . + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public static Vector256 NormalDest(Vector256 backdrop, Vector256 source, Vector256 opacity) + { + return backdrop; + } + /// /// Returns the result of the "NormalDestAtop" compositing equation. /// @@ -114,6 +192,21 @@ internal static partial class PorterDuffFunctions return Atop(source, backdrop, Normal(source, backdrop)); } + /// + /// Returns the result of the "NormalDestAtop" compositing equation. + /// + /// The backdrop vector. + /// The source vector. + /// The source opacity. Range 0..1 + /// The . + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public static Vector256 NormalDestAtop(Vector256 backdrop, Vector256 source, Vector256 opacity) + { + source = Avx.Blend(source, Avx.Multiply(source, opacity), BlendAlphaControl); + + return Atop(source, backdrop, Normal(source, backdrop)); + } + /// /// Returns the result of the "NormalDestOver" compositing equation. /// @@ -129,6 +222,21 @@ internal static partial class PorterDuffFunctions return Over(source, backdrop, Normal(source, backdrop)); } + /// + /// Returns the result of the "NormalDestOver" compositing equation. + /// + /// The backdrop vector. + /// The source vector. + /// The source opacity. Range 0..1 + /// The . + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public static Vector256 NormalDestOver(Vector256 backdrop, Vector256 source, Vector256 opacity) + { + source = Avx.Blend(source, Avx.Multiply(source, opacity), BlendAlphaControl); + + return Over(source, backdrop, Normal(source, backdrop)); + } + /// /// Returns the result of the "NormalDestIn" compositing equation. /// @@ -144,6 +252,17 @@ internal static partial class PorterDuffFunctions return In(source, backdrop); } + /// + /// Returns the result of the "NormalDestIn" compositing equation. + /// + /// The backdrop vector. + /// The source vector. + /// The source opacity. Range 0..1 + /// The . + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public static Vector256 NormalDestIn(Vector256 backdrop, Vector256 source, Vector256 opacity) + => In(Avx.Blend(source, Avx.Multiply(source, opacity), BlendAlphaControl), backdrop); + /// /// Returns the result of the "NormalDestOut" compositing equation. /// @@ -159,6 +278,17 @@ internal static partial class PorterDuffFunctions return Out(source, backdrop); } + /// + /// Returns the result of the "NormalDestOut" compositing equation. + /// + /// The backdrop vector. + /// The source vector. + /// The source opacity. Range 0..1 + /// The . + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public static Vector256 NormalDestOut(Vector256 backdrop, Vector256 source, Vector256 opacity) + => Out(Avx.Blend(source, Avx.Multiply(source, opacity), BlendAlphaControl), backdrop); + /// /// Returns the result of the "NormalXor" compositing equation. /// @@ -174,6 +304,17 @@ internal static partial class PorterDuffFunctions return Xor(backdrop, source); } + /// + /// Returns the result of the "NormalXor" compositing equation. + /// + /// The backdrop vector. + /// The source vector. + /// The source opacity. Range 0..1 + /// The . + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public static Vector256 NormalXor(Vector256 backdrop, Vector256 source, Vector256 opacity) + => Xor(backdrop, Avx.Blend(source, Avx.Multiply(source, opacity), BlendAlphaControl)); + /// /// Returns the result of the "NormalClear" compositing equation. /// @@ -189,6 +330,17 @@ internal static partial class PorterDuffFunctions return Clear(backdrop, source); } + /// + /// Returns the result of the "NormalClear" compositing equation. + /// + /// The backdrop vector. + /// The source vector. + /// The source opacity. Range 0..1 + /// The . + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public static Vector256 NormalXor(Vector256 backdrop, Vector256 source, Vector256 opacity) + => Clear(backdrop, Avx.Blend(source, Avx.Multiply(source, opacity), BlendAlphaControl)); + /// /// Returns the result of the "NormalSrc" compositing equation. @@ -421,6 +573,17 @@ internal static partial class PorterDuffFunctions return source; } + /// + /// Returns the result of the "MultiplySrc compositing equation. + /// + /// The backdrop vector. + /// The source vector. + /// The source opacity. Range 0..1 + /// The . + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public static Vector256 MultiplySrc(Vector256 backdrop, Vector256 source, Vector256 opacity) + => Avx.Blend(source, Avx.Multiply(source, opacity), BlendAlphaControl); + /// /// Returns the result of the "MultiplySrcAtop" compositing equation. /// @@ -436,6 +599,21 @@ internal static partial class PorterDuffFunctions return Atop(backdrop, source, Multiply(backdrop, source)); } + /// + /// Returns the result of the "MultiplySrcAtop" compositing equation. + /// + /// The backdrop vector. + /// The source vector. + /// The source opacity. Range 0..1 + /// The . + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public static Vector256 MultiplySrcAtop(Vector256 backdrop, Vector256 source, Vector256 opacity) + { + source = Avx.Blend(source, Avx.Multiply(source, opacity), BlendAlphaControl); + + return Atop(backdrop, source, Multiply(backdrop, source)); + } + /// /// Returns the result of the "MultiplySrcOver" compositing equation. /// @@ -451,6 +629,21 @@ internal static partial class PorterDuffFunctions return Over(backdrop, source, Multiply(backdrop, source)); } + /// + /// Returns the result of the "MultiplySrcOver" compositing equation. + /// + /// The backdrop vector. + /// The source vector. + /// The source opacity. Range 0..1 + /// The . + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public static Vector256 MultiplySrcOver(Vector256 backdrop, Vector256 source, Vector256 opacity) + { + source = Avx.Blend(source, Avx.Multiply(source, opacity), BlendAlphaControl); + + return Over(backdrop, source, Multiply(backdrop, source)); + } + /// /// Returns the result of the "MultiplySrcIn" compositing equation. /// @@ -466,6 +659,17 @@ internal static partial class PorterDuffFunctions return In(backdrop, source); } + /// + /// Returns the result of the "MultiplySrcIn" compositing equation. + /// + /// The backdrop vector. + /// The source vector. + /// The source opacity. Range 0..1 + /// The . + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public static Vector256 MultiplySrcIn(Vector256 backdrop, Vector256 source, Vector256 opacity) + => In(backdrop, Avx.Blend(source, Avx.Multiply(source, opacity), BlendAlphaControl)); + /// /// Returns the result of the "MultiplySrcOut" compositing equation. /// @@ -481,6 +685,17 @@ internal static partial class PorterDuffFunctions return Out(backdrop, source); } + /// + /// Returns the result of the "MultiplySrcOut" compositing equation. + /// + /// The backdrop vector. + /// The source vector. + /// The source opacity. Range 0..1 + /// The . + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public static Vector256 MultiplySrcOut(Vector256 backdrop, Vector256 source, Vector256 opacity) + => Out(backdrop, Avx.Blend(source, Avx.Multiply(source, opacity), BlendAlphaControl)); + /// /// Returns the result of the "MultiplyDest" compositing equation. /// @@ -494,6 +709,19 @@ internal static partial class PorterDuffFunctions return backdrop; } + /// + /// Returns the result of the "MultiplyDest" compositing equation. + /// + /// The backdrop vector. + /// The source vector. + /// The source opacity. Range 0..1 + /// The . + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public static Vector256 MultiplyDest(Vector256 backdrop, Vector256 source, Vector256 opacity) + { + return backdrop; + } + /// /// Returns the result of the "MultiplyDestAtop" compositing equation. /// @@ -509,6 +737,21 @@ internal static partial class PorterDuffFunctions return Atop(source, backdrop, Multiply(source, backdrop)); } + /// + /// Returns the result of the "MultiplyDestAtop" compositing equation. + /// + /// The backdrop vector. + /// The source vector. + /// The source opacity. Range 0..1 + /// The . + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public static Vector256 MultiplyDestAtop(Vector256 backdrop, Vector256 source, Vector256 opacity) + { + source = Avx.Blend(source, Avx.Multiply(source, opacity), BlendAlphaControl); + + return Atop(source, backdrop, Multiply(source, backdrop)); + } + /// /// Returns the result of the "MultiplyDestOver" compositing equation. /// @@ -524,6 +767,21 @@ internal static partial class PorterDuffFunctions return Over(source, backdrop, Multiply(source, backdrop)); } + /// + /// Returns the result of the "MultiplyDestOver" compositing equation. + /// + /// The backdrop vector. + /// The source vector. + /// The source opacity. Range 0..1 + /// The . + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public static Vector256 MultiplyDestOver(Vector256 backdrop, Vector256 source, Vector256 opacity) + { + source = Avx.Blend(source, Avx.Multiply(source, opacity), BlendAlphaControl); + + return Over(source, backdrop, Multiply(source, backdrop)); + } + /// /// Returns the result of the "MultiplyDestIn" compositing equation. /// @@ -539,6 +797,17 @@ internal static partial class PorterDuffFunctions return In(source, backdrop); } + /// + /// Returns the result of the "MultiplyDestIn" compositing equation. + /// + /// The backdrop vector. + /// The source vector. + /// The source opacity. Range 0..1 + /// The . + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public static Vector256 MultiplyDestIn(Vector256 backdrop, Vector256 source, Vector256 opacity) + => In(Avx.Blend(source, Avx.Multiply(source, opacity), BlendAlphaControl), backdrop); + /// /// Returns the result of the "MultiplyDestOut" compositing equation. /// @@ -554,6 +823,17 @@ internal static partial class PorterDuffFunctions return Out(source, backdrop); } + /// + /// Returns the result of the "MultiplyDestOut" compositing equation. + /// + /// The backdrop vector. + /// The source vector. + /// The source opacity. Range 0..1 + /// The . + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public static Vector256 MultiplyDestOut(Vector256 backdrop, Vector256 source, Vector256 opacity) + => Out(Avx.Blend(source, Avx.Multiply(source, opacity), BlendAlphaControl), backdrop); + /// /// Returns the result of the "MultiplyXor" compositing equation. /// @@ -569,6 +849,17 @@ internal static partial class PorterDuffFunctions return Xor(backdrop, source); } + /// + /// Returns the result of the "MultiplyXor" compositing equation. + /// + /// The backdrop vector. + /// The source vector. + /// The source opacity. Range 0..1 + /// The . + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public static Vector256 MultiplyXor(Vector256 backdrop, Vector256 source, Vector256 opacity) + => Xor(backdrop, Avx.Blend(source, Avx.Multiply(source, opacity), BlendAlphaControl)); + /// /// Returns the result of the "MultiplyClear" compositing equation. /// @@ -584,6 +875,17 @@ internal static partial class PorterDuffFunctions return Clear(backdrop, source); } + /// + /// Returns the result of the "MultiplyClear" compositing equation. + /// + /// The backdrop vector. + /// The source vector. + /// The source opacity. Range 0..1 + /// The . + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public static Vector256 MultiplyXor(Vector256 backdrop, Vector256 source, Vector256 opacity) + => Clear(backdrop, Avx.Blend(source, Avx.Multiply(source, opacity), BlendAlphaControl)); + /// /// Returns the result of the "MultiplySrc" compositing equation. @@ -816,6 +1118,17 @@ internal static partial class PorterDuffFunctions return source; } + /// + /// Returns the result of the "AddSrc compositing equation. + /// + /// The backdrop vector. + /// The source vector. + /// The source opacity. Range 0..1 + /// The . + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public static Vector256 AddSrc(Vector256 backdrop, Vector256 source, Vector256 opacity) + => Avx.Blend(source, Avx.Multiply(source, opacity), BlendAlphaControl); + /// /// Returns the result of the "AddSrcAtop" compositing equation. /// @@ -832,52 +1145,104 @@ internal static partial class PorterDuffFunctions } /// - /// Returns the result of the "AddSrcOver" compositing equation. + /// Returns the result of the "AddSrcAtop" compositing equation. /// /// The backdrop vector. /// The source vector. /// The source opacity. Range 0..1 - /// The . + /// The . [MethodImpl(MethodImplOptions.AggressiveInlining)] - public static Vector4 AddSrcOver(Vector4 backdrop, Vector4 source, float opacity) + public static Vector256 AddSrcAtop(Vector256 backdrop, Vector256 source, Vector256 opacity) { - source.W *= opacity; + source = Avx.Blend(source, Avx.Multiply(source, opacity), BlendAlphaControl); - return Over(backdrop, source, Add(backdrop, source)); + return Atop(backdrop, source, Add(backdrop, source)); } /// - /// Returns the result of the "AddSrcIn" compositing equation. + /// Returns the result of the "AddSrcOver" compositing equation. /// /// The backdrop vector. /// The source vector. /// The source opacity. Range 0..1 /// The . [MethodImpl(MethodImplOptions.AggressiveInlining)] - public static Vector4 AddSrcIn(Vector4 backdrop, Vector4 source, float opacity) + public static Vector4 AddSrcOver(Vector4 backdrop, Vector4 source, float opacity) { source.W *= opacity; - return In(backdrop, source); + return Over(backdrop, source, Add(backdrop, source)); } /// - /// Returns the result of the "AddSrcOut" compositing equation. + /// Returns the result of the "AddSrcOver" compositing equation. /// /// The backdrop vector. /// The source vector. /// The source opacity. Range 0..1 - /// The . + /// The . [MethodImpl(MethodImplOptions.AggressiveInlining)] - public static Vector4 AddSrcOut(Vector4 backdrop, Vector4 source, float opacity) + public static Vector256 AddSrcOver(Vector256 backdrop, Vector256 source, Vector256 opacity) { - source.W *= opacity; + source = Avx.Blend(source, Avx.Multiply(source, opacity), BlendAlphaControl); - return Out(backdrop, source); + return Over(backdrop, source, Add(backdrop, source)); } /// - /// Returns the result of the "AddDest" compositing equation. + /// Returns the result of the "AddSrcIn" compositing equation. + /// + /// The backdrop vector. + /// The source vector. + /// The source opacity. Range 0..1 + /// The . + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public static Vector4 AddSrcIn(Vector4 backdrop, Vector4 source, float opacity) + { + source.W *= opacity; + + return In(backdrop, source); + } + + /// + /// Returns the result of the "AddSrcIn" compositing equation. + /// + /// The backdrop vector. + /// The source vector. + /// The source opacity. Range 0..1 + /// The . + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public static Vector256 AddSrcIn(Vector256 backdrop, Vector256 source, Vector256 opacity) + => In(backdrop, Avx.Blend(source, Avx.Multiply(source, opacity), BlendAlphaControl)); + + /// + /// Returns the result of the "AddSrcOut" compositing equation. + /// + /// The backdrop vector. + /// The source vector. + /// The source opacity. Range 0..1 + /// The . + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public static Vector4 AddSrcOut(Vector4 backdrop, Vector4 source, float opacity) + { + source.W *= opacity; + + return Out(backdrop, source); + } + + /// + /// Returns the result of the "AddSrcOut" compositing equation. + /// + /// The backdrop vector. + /// The source vector. + /// The source opacity. Range 0..1 + /// The . + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public static Vector256 AddSrcOut(Vector256 backdrop, Vector256 source, Vector256 opacity) + => Out(backdrop, Avx.Blend(source, Avx.Multiply(source, opacity), BlendAlphaControl)); + + /// + /// Returns the result of the "AddDest" compositing equation. /// /// The backdrop vector. /// The source vector. @@ -889,6 +1254,19 @@ internal static partial class PorterDuffFunctions return backdrop; } + /// + /// Returns the result of the "AddDest" compositing equation. + /// + /// The backdrop vector. + /// The source vector. + /// The source opacity. Range 0..1 + /// The . + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public static Vector256 AddDest(Vector256 backdrop, Vector256 source, Vector256 opacity) + { + return backdrop; + } + /// /// Returns the result of the "AddDestAtop" compositing equation. /// @@ -904,6 +1282,21 @@ internal static partial class PorterDuffFunctions return Atop(source, backdrop, Add(source, backdrop)); } + /// + /// Returns the result of the "AddDestAtop" compositing equation. + /// + /// The backdrop vector. + /// The source vector. + /// The source opacity. Range 0..1 + /// The . + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public static Vector256 AddDestAtop(Vector256 backdrop, Vector256 source, Vector256 opacity) + { + source = Avx.Blend(source, Avx.Multiply(source, opacity), BlendAlphaControl); + + return Atop(source, backdrop, Add(source, backdrop)); + } + /// /// Returns the result of the "AddDestOver" compositing equation. /// @@ -919,6 +1312,21 @@ internal static partial class PorterDuffFunctions return Over(source, backdrop, Add(source, backdrop)); } + /// + /// Returns the result of the "AddDestOver" compositing equation. + /// + /// The backdrop vector. + /// The source vector. + /// The source opacity. Range 0..1 + /// The . + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public static Vector256 AddDestOver(Vector256 backdrop, Vector256 source, Vector256 opacity) + { + source = Avx.Blend(source, Avx.Multiply(source, opacity), BlendAlphaControl); + + return Over(source, backdrop, Add(source, backdrop)); + } + /// /// Returns the result of the "AddDestIn" compositing equation. /// @@ -934,6 +1342,17 @@ internal static partial class PorterDuffFunctions return In(source, backdrop); } + /// + /// Returns the result of the "AddDestIn" compositing equation. + /// + /// The backdrop vector. + /// The source vector. + /// The source opacity. Range 0..1 + /// The . + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public static Vector256 AddDestIn(Vector256 backdrop, Vector256 source, Vector256 opacity) + => In(Avx.Blend(source, Avx.Multiply(source, opacity), BlendAlphaControl), backdrop); + /// /// Returns the result of the "AddDestOut" compositing equation. /// @@ -949,6 +1368,17 @@ internal static partial class PorterDuffFunctions return Out(source, backdrop); } + /// + /// Returns the result of the "AddDestOut" compositing equation. + /// + /// The backdrop vector. + /// The source vector. + /// The source opacity. Range 0..1 + /// The . + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public static Vector256 AddDestOut(Vector256 backdrop, Vector256 source, Vector256 opacity) + => Out(Avx.Blend(source, Avx.Multiply(source, opacity), BlendAlphaControl), backdrop); + /// /// Returns the result of the "AddXor" compositing equation. /// @@ -964,6 +1394,17 @@ internal static partial class PorterDuffFunctions return Xor(backdrop, source); } + /// + /// Returns the result of the "AddXor" compositing equation. + /// + /// The backdrop vector. + /// The source vector. + /// The source opacity. Range 0..1 + /// The . + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public static Vector256 AddXor(Vector256 backdrop, Vector256 source, Vector256 opacity) + => Xor(backdrop, Avx.Blend(source, Avx.Multiply(source, opacity), BlendAlphaControl)); + /// /// Returns the result of the "AddClear" compositing equation. /// @@ -979,6 +1420,17 @@ internal static partial class PorterDuffFunctions return Clear(backdrop, source); } + /// + /// Returns the result of the "AddClear" compositing equation. + /// + /// The backdrop vector. + /// The source vector. + /// The source opacity. Range 0..1 + /// The . + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public static Vector256 AddXor(Vector256 backdrop, Vector256 source, Vector256 opacity) + => Clear(backdrop, Avx.Blend(source, Avx.Multiply(source, opacity), BlendAlphaControl)); + /// /// Returns the result of the "AddSrc" compositing equation. @@ -1211,6 +1663,17 @@ internal static partial class PorterDuffFunctions return source; } + /// + /// Returns the result of the "SubtractSrc compositing equation. + /// + /// The backdrop vector. + /// The source vector. + /// The source opacity. Range 0..1 + /// The . + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public static Vector256 SubtractSrc(Vector256 backdrop, Vector256 source, Vector256 opacity) + => Avx.Blend(source, Avx.Multiply(source, opacity), BlendAlphaControl); + /// /// Returns the result of the "SubtractSrcAtop" compositing equation. /// @@ -1226,6 +1689,21 @@ internal static partial class PorterDuffFunctions return Atop(backdrop, source, Subtract(backdrop, source)); } + /// + /// Returns the result of the "SubtractSrcAtop" compositing equation. + /// + /// The backdrop vector. + /// The source vector. + /// The source opacity. Range 0..1 + /// The . + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public static Vector256 SubtractSrcAtop(Vector256 backdrop, Vector256 source, Vector256 opacity) + { + source = Avx.Blend(source, Avx.Multiply(source, opacity), BlendAlphaControl); + + return Atop(backdrop, source, Subtract(backdrop, source)); + } + /// /// Returns the result of the "SubtractSrcOver" compositing equation. /// @@ -1241,6 +1719,21 @@ internal static partial class PorterDuffFunctions return Over(backdrop, source, Subtract(backdrop, source)); } + /// + /// Returns the result of the "SubtractSrcOver" compositing equation. + /// + /// The backdrop vector. + /// The source vector. + /// The source opacity. Range 0..1 + /// The . + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public static Vector256 SubtractSrcOver(Vector256 backdrop, Vector256 source, Vector256 opacity) + { + source = Avx.Blend(source, Avx.Multiply(source, opacity), BlendAlphaControl); + + return Over(backdrop, source, Subtract(backdrop, source)); + } + /// /// Returns the result of the "SubtractSrcIn" compositing equation. /// @@ -1256,6 +1749,17 @@ internal static partial class PorterDuffFunctions return In(backdrop, source); } + /// + /// Returns the result of the "SubtractSrcIn" compositing equation. + /// + /// The backdrop vector. + /// The source vector. + /// The source opacity. Range 0..1 + /// The . + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public static Vector256 SubtractSrcIn(Vector256 backdrop, Vector256 source, Vector256 opacity) + => In(backdrop, Avx.Blend(source, Avx.Multiply(source, opacity), BlendAlphaControl)); + /// /// Returns the result of the "SubtractSrcOut" compositing equation. /// @@ -1271,6 +1775,17 @@ internal static partial class PorterDuffFunctions return Out(backdrop, source); } + /// + /// Returns the result of the "SubtractSrcOut" compositing equation. + /// + /// The backdrop vector. + /// The source vector. + /// The source opacity. Range 0..1 + /// The . + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public static Vector256 SubtractSrcOut(Vector256 backdrop, Vector256 source, Vector256 opacity) + => Out(backdrop, Avx.Blend(source, Avx.Multiply(source, opacity), BlendAlphaControl)); + /// /// Returns the result of the "SubtractDest" compositing equation. /// @@ -1284,6 +1799,19 @@ internal static partial class PorterDuffFunctions return backdrop; } + /// + /// Returns the result of the "SubtractDest" compositing equation. + /// + /// The backdrop vector. + /// The source vector. + /// The source opacity. Range 0..1 + /// The . + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public static Vector256 SubtractDest(Vector256 backdrop, Vector256 source, Vector256 opacity) + { + return backdrop; + } + /// /// Returns the result of the "SubtractDestAtop" compositing equation. /// @@ -1299,6 +1827,21 @@ internal static partial class PorterDuffFunctions return Atop(source, backdrop, Subtract(source, backdrop)); } + /// + /// Returns the result of the "SubtractDestAtop" compositing equation. + /// + /// The backdrop vector. + /// The source vector. + /// The source opacity. Range 0..1 + /// The . + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public static Vector256 SubtractDestAtop(Vector256 backdrop, Vector256 source, Vector256 opacity) + { + source = Avx.Blend(source, Avx.Multiply(source, opacity), BlendAlphaControl); + + return Atop(source, backdrop, Subtract(source, backdrop)); + } + /// /// Returns the result of the "SubtractDestOver" compositing equation. /// @@ -1314,6 +1857,21 @@ internal static partial class PorterDuffFunctions return Over(source, backdrop, Subtract(source, backdrop)); } + /// + /// Returns the result of the "SubtractDestOver" compositing equation. + /// + /// The backdrop vector. + /// The source vector. + /// The source opacity. Range 0..1 + /// The . + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public static Vector256 SubtractDestOver(Vector256 backdrop, Vector256 source, Vector256 opacity) + { + source = Avx.Blend(source, Avx.Multiply(source, opacity), BlendAlphaControl); + + return Over(source, backdrop, Subtract(source, backdrop)); + } + /// /// Returns the result of the "SubtractDestIn" compositing equation. /// @@ -1329,6 +1887,17 @@ internal static partial class PorterDuffFunctions return In(source, backdrop); } + /// + /// Returns the result of the "SubtractDestIn" compositing equation. + /// + /// The backdrop vector. + /// The source vector. + /// The source opacity. Range 0..1 + /// The . + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public static Vector256 SubtractDestIn(Vector256 backdrop, Vector256 source, Vector256 opacity) + => In(Avx.Blend(source, Avx.Multiply(source, opacity), BlendAlphaControl), backdrop); + /// /// Returns the result of the "SubtractDestOut" compositing equation. /// @@ -1344,6 +1913,17 @@ internal static partial class PorterDuffFunctions return Out(source, backdrop); } + /// + /// Returns the result of the "SubtractDestOut" compositing equation. + /// + /// The backdrop vector. + /// The source vector. + /// The source opacity. Range 0..1 + /// The . + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public static Vector256 SubtractDestOut(Vector256 backdrop, Vector256 source, Vector256 opacity) + => Out(Avx.Blend(source, Avx.Multiply(source, opacity), BlendAlphaControl), backdrop); + /// /// Returns the result of the "SubtractXor" compositing equation. /// @@ -1359,6 +1939,17 @@ internal static partial class PorterDuffFunctions return Xor(backdrop, source); } + /// + /// Returns the result of the "SubtractXor" compositing equation. + /// + /// The backdrop vector. + /// The source vector. + /// The source opacity. Range 0..1 + /// The . + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public static Vector256 SubtractXor(Vector256 backdrop, Vector256 source, Vector256 opacity) + => Xor(backdrop, Avx.Blend(source, Avx.Multiply(source, opacity), BlendAlphaControl)); + /// /// Returns the result of the "SubtractClear" compositing equation. /// @@ -1374,6 +1965,17 @@ internal static partial class PorterDuffFunctions return Clear(backdrop, source); } + /// + /// Returns the result of the "SubtractClear" compositing equation. + /// + /// The backdrop vector. + /// The source vector. + /// The source opacity. Range 0..1 + /// The . + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public static Vector256 SubtractXor(Vector256 backdrop, Vector256 source, Vector256 opacity) + => Clear(backdrop, Avx.Blend(source, Avx.Multiply(source, opacity), BlendAlphaControl)); + /// /// Returns the result of the "SubtractSrc" compositing equation. @@ -1606,6 +2208,17 @@ internal static partial class PorterDuffFunctions return source; } + /// + /// Returns the result of the "ScreenSrc compositing equation. + /// + /// The backdrop vector. + /// The source vector. + /// The source opacity. Range 0..1 + /// The . + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public static Vector256 ScreenSrc(Vector256 backdrop, Vector256 source, Vector256 opacity) + => Avx.Blend(source, Avx.Multiply(source, opacity), BlendAlphaControl); + /// /// Returns the result of the "ScreenSrcAtop" compositing equation. /// @@ -1621,6 +2234,21 @@ internal static partial class PorterDuffFunctions return Atop(backdrop, source, Screen(backdrop, source)); } + /// + /// Returns the result of the "ScreenSrcAtop" compositing equation. + /// + /// The backdrop vector. + /// The source vector. + /// The source opacity. Range 0..1 + /// The . + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public static Vector256 ScreenSrcAtop(Vector256 backdrop, Vector256 source, Vector256 opacity) + { + source = Avx.Blend(source, Avx.Multiply(source, opacity), BlendAlphaControl); + + return Atop(backdrop, source, Screen(backdrop, source)); + } + /// /// Returns the result of the "ScreenSrcOver" compositing equation. /// @@ -1636,6 +2264,21 @@ internal static partial class PorterDuffFunctions return Over(backdrop, source, Screen(backdrop, source)); } + /// + /// Returns the result of the "ScreenSrcOver" compositing equation. + /// + /// The backdrop vector. + /// The source vector. + /// The source opacity. Range 0..1 + /// The . + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public static Vector256 ScreenSrcOver(Vector256 backdrop, Vector256 source, Vector256 opacity) + { + source = Avx.Blend(source, Avx.Multiply(source, opacity), BlendAlphaControl); + + return Over(backdrop, source, Screen(backdrop, source)); + } + /// /// Returns the result of the "ScreenSrcIn" compositing equation. /// @@ -1651,6 +2294,17 @@ internal static partial class PorterDuffFunctions return In(backdrop, source); } + /// + /// Returns the result of the "ScreenSrcIn" compositing equation. + /// + /// The backdrop vector. + /// The source vector. + /// The source opacity. Range 0..1 + /// The . + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public static Vector256 ScreenSrcIn(Vector256 backdrop, Vector256 source, Vector256 opacity) + => In(backdrop, Avx.Blend(source, Avx.Multiply(source, opacity), BlendAlphaControl)); + /// /// Returns the result of the "ScreenSrcOut" compositing equation. /// @@ -1666,6 +2320,17 @@ internal static partial class PorterDuffFunctions return Out(backdrop, source); } + /// + /// Returns the result of the "ScreenSrcOut" compositing equation. + /// + /// The backdrop vector. + /// The source vector. + /// The source opacity. Range 0..1 + /// The . + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public static Vector256 ScreenSrcOut(Vector256 backdrop, Vector256 source, Vector256 opacity) + => Out(backdrop, Avx.Blend(source, Avx.Multiply(source, opacity), BlendAlphaControl)); + /// /// Returns the result of the "ScreenDest" compositing equation. /// @@ -1679,6 +2344,19 @@ internal static partial class PorterDuffFunctions return backdrop; } + /// + /// Returns the result of the "ScreenDest" compositing equation. + /// + /// The backdrop vector. + /// The source vector. + /// The source opacity. Range 0..1 + /// The . + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public static Vector256 ScreenDest(Vector256 backdrop, Vector256 source, Vector256 opacity) + { + return backdrop; + } + /// /// Returns the result of the "ScreenDestAtop" compositing equation. /// @@ -1691,7 +2369,37 @@ internal static partial class PorterDuffFunctions { source.W *= opacity; - return Atop(source, backdrop, Screen(source, backdrop)); + return Atop(source, backdrop, Screen(source, backdrop)); + } + + /// + /// Returns the result of the "ScreenDestAtop" compositing equation. + /// + /// The backdrop vector. + /// The source vector. + /// The source opacity. Range 0..1 + /// The . + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public static Vector256 ScreenDestAtop(Vector256 backdrop, Vector256 source, Vector256 opacity) + { + source = Avx.Blend(source, Avx.Multiply(source, opacity), BlendAlphaControl); + + return Atop(source, backdrop, Screen(source, backdrop)); + } + + /// + /// Returns the result of the "ScreenDestOver" compositing equation. + /// + /// The backdrop vector. + /// The source vector. + /// The source opacity. Range 0..1 + /// The . + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public static Vector4 ScreenDestOver(Vector4 backdrop, Vector4 source, float opacity) + { + source.W *= opacity; + + return Over(source, backdrop, Screen(source, backdrop)); } /// @@ -1700,11 +2408,11 @@ internal static partial class PorterDuffFunctions /// The backdrop vector. /// The source vector. /// The source opacity. Range 0..1 - /// The . + /// The . [MethodImpl(MethodImplOptions.AggressiveInlining)] - public static Vector4 ScreenDestOver(Vector4 backdrop, Vector4 source, float opacity) + public static Vector256 ScreenDestOver(Vector256 backdrop, Vector256 source, Vector256 opacity) { - source.W *= opacity; + source = Avx.Blend(source, Avx.Multiply(source, opacity), BlendAlphaControl); return Over(source, backdrop, Screen(source, backdrop)); } @@ -1724,6 +2432,17 @@ internal static partial class PorterDuffFunctions return In(source, backdrop); } + /// + /// Returns the result of the "ScreenDestIn" compositing equation. + /// + /// The backdrop vector. + /// The source vector. + /// The source opacity. Range 0..1 + /// The . + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public static Vector256 ScreenDestIn(Vector256 backdrop, Vector256 source, Vector256 opacity) + => In(Avx.Blend(source, Avx.Multiply(source, opacity), BlendAlphaControl), backdrop); + /// /// Returns the result of the "ScreenDestOut" compositing equation. /// @@ -1739,6 +2458,17 @@ internal static partial class PorterDuffFunctions return Out(source, backdrop); } + /// + /// Returns the result of the "ScreenDestOut" compositing equation. + /// + /// The backdrop vector. + /// The source vector. + /// The source opacity. Range 0..1 + /// The . + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public static Vector256 ScreenDestOut(Vector256 backdrop, Vector256 source, Vector256 opacity) + => Out(Avx.Blend(source, Avx.Multiply(source, opacity), BlendAlphaControl), backdrop); + /// /// Returns the result of the "ScreenXor" compositing equation. /// @@ -1754,6 +2484,17 @@ internal static partial class PorterDuffFunctions return Xor(backdrop, source); } + /// + /// Returns the result of the "ScreenXor" compositing equation. + /// + /// The backdrop vector. + /// The source vector. + /// The source opacity. Range 0..1 + /// The . + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public static Vector256 ScreenXor(Vector256 backdrop, Vector256 source, Vector256 opacity) + => Xor(backdrop, Avx.Blend(source, Avx.Multiply(source, opacity), BlendAlphaControl)); + /// /// Returns the result of the "ScreenClear" compositing equation. /// @@ -1769,6 +2510,17 @@ internal static partial class PorterDuffFunctions return Clear(backdrop, source); } + /// + /// Returns the result of the "ScreenClear" compositing equation. + /// + /// The backdrop vector. + /// The source vector. + /// The source opacity. Range 0..1 + /// The . + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public static Vector256 ScreenXor(Vector256 backdrop, Vector256 source, Vector256 opacity) + => Clear(backdrop, Avx.Blend(source, Avx.Multiply(source, opacity), BlendAlphaControl)); + /// /// Returns the result of the "ScreenSrc" compositing equation. @@ -2001,6 +2753,17 @@ internal static partial class PorterDuffFunctions return source; } + /// + /// Returns the result of the "DarkenSrc compositing equation. + /// + /// The backdrop vector. + /// The source vector. + /// The source opacity. Range 0..1 + /// The . + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public static Vector256 DarkenSrc(Vector256 backdrop, Vector256 source, Vector256 opacity) + => Avx.Blend(source, Avx.Multiply(source, opacity), BlendAlphaControl); + /// /// Returns the result of the "DarkenSrcAtop" compositing equation. /// @@ -2016,6 +2779,21 @@ internal static partial class PorterDuffFunctions return Atop(backdrop, source, Darken(backdrop, source)); } + /// + /// Returns the result of the "DarkenSrcAtop" compositing equation. + /// + /// The backdrop vector. + /// The source vector. + /// The source opacity. Range 0..1 + /// The . + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public static Vector256 DarkenSrcAtop(Vector256 backdrop, Vector256 source, Vector256 opacity) + { + source = Avx.Blend(source, Avx.Multiply(source, opacity), BlendAlphaControl); + + return Atop(backdrop, source, Darken(backdrop, source)); + } + /// /// Returns the result of the "DarkenSrcOver" compositing equation. /// @@ -2031,6 +2809,21 @@ internal static partial class PorterDuffFunctions return Over(backdrop, source, Darken(backdrop, source)); } + /// + /// Returns the result of the "DarkenSrcOver" compositing equation. + /// + /// The backdrop vector. + /// The source vector. + /// The source opacity. Range 0..1 + /// The . + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public static Vector256 DarkenSrcOver(Vector256 backdrop, Vector256 source, Vector256 opacity) + { + source = Avx.Blend(source, Avx.Multiply(source, opacity), BlendAlphaControl); + + return Over(backdrop, source, Darken(backdrop, source)); + } + /// /// Returns the result of the "DarkenSrcIn" compositing equation. /// @@ -2046,6 +2839,17 @@ internal static partial class PorterDuffFunctions return In(backdrop, source); } + /// + /// Returns the result of the "DarkenSrcIn" compositing equation. + /// + /// The backdrop vector. + /// The source vector. + /// The source opacity. Range 0..1 + /// The . + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public static Vector256 DarkenSrcIn(Vector256 backdrop, Vector256 source, Vector256 opacity) + => In(backdrop, Avx.Blend(source, Avx.Multiply(source, opacity), BlendAlphaControl)); + /// /// Returns the result of the "DarkenSrcOut" compositing equation. /// @@ -2061,6 +2865,17 @@ internal static partial class PorterDuffFunctions return Out(backdrop, source); } + /// + /// Returns the result of the "DarkenSrcOut" compositing equation. + /// + /// The backdrop vector. + /// The source vector. + /// The source opacity. Range 0..1 + /// The . + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public static Vector256 DarkenSrcOut(Vector256 backdrop, Vector256 source, Vector256 opacity) + => Out(backdrop, Avx.Blend(source, Avx.Multiply(source, opacity), BlendAlphaControl)); + /// /// Returns the result of the "DarkenDest" compositing equation. /// @@ -2074,6 +2889,19 @@ internal static partial class PorterDuffFunctions return backdrop; } + /// + /// Returns the result of the "DarkenDest" compositing equation. + /// + /// The backdrop vector. + /// The source vector. + /// The source opacity. Range 0..1 + /// The . + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public static Vector256 DarkenDest(Vector256 backdrop, Vector256 source, Vector256 opacity) + { + return backdrop; + } + /// /// Returns the result of the "DarkenDestAtop" compositing equation. /// @@ -2089,6 +2917,21 @@ internal static partial class PorterDuffFunctions return Atop(source, backdrop, Darken(source, backdrop)); } + /// + /// Returns the result of the "DarkenDestAtop" compositing equation. + /// + /// The backdrop vector. + /// The source vector. + /// The source opacity. Range 0..1 + /// The . + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public static Vector256 DarkenDestAtop(Vector256 backdrop, Vector256 source, Vector256 opacity) + { + source = Avx.Blend(source, Avx.Multiply(source, opacity), BlendAlphaControl); + + return Atop(source, backdrop, Darken(source, backdrop)); + } + /// /// Returns the result of the "DarkenDestOver" compositing equation. /// @@ -2104,6 +2947,21 @@ internal static partial class PorterDuffFunctions return Over(source, backdrop, Darken(source, backdrop)); } + /// + /// Returns the result of the "DarkenDestOver" compositing equation. + /// + /// The backdrop vector. + /// The source vector. + /// The source opacity. Range 0..1 + /// The . + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public static Vector256 DarkenDestOver(Vector256 backdrop, Vector256 source, Vector256 opacity) + { + source = Avx.Blend(source, Avx.Multiply(source, opacity), BlendAlphaControl); + + return Over(source, backdrop, Darken(source, backdrop)); + } + /// /// Returns the result of the "DarkenDestIn" compositing equation. /// @@ -2119,6 +2977,17 @@ internal static partial class PorterDuffFunctions return In(source, backdrop); } + /// + /// Returns the result of the "DarkenDestIn" compositing equation. + /// + /// The backdrop vector. + /// The source vector. + /// The source opacity. Range 0..1 + /// The . + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public static Vector256 DarkenDestIn(Vector256 backdrop, Vector256 source, Vector256 opacity) + => In(Avx.Blend(source, Avx.Multiply(source, opacity), BlendAlphaControl), backdrop); + /// /// Returns the result of the "DarkenDestOut" compositing equation. /// @@ -2134,6 +3003,17 @@ internal static partial class PorterDuffFunctions return Out(source, backdrop); } + /// + /// Returns the result of the "DarkenDestOut" compositing equation. + /// + /// The backdrop vector. + /// The source vector. + /// The source opacity. Range 0..1 + /// The . + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public static Vector256 DarkenDestOut(Vector256 backdrop, Vector256 source, Vector256 opacity) + => Out(Avx.Blend(source, Avx.Multiply(source, opacity), BlendAlphaControl), backdrop); + /// /// Returns the result of the "DarkenXor" compositing equation. /// @@ -2149,6 +3029,17 @@ internal static partial class PorterDuffFunctions return Xor(backdrop, source); } + /// + /// Returns the result of the "DarkenXor" compositing equation. + /// + /// The backdrop vector. + /// The source vector. + /// The source opacity. Range 0..1 + /// The . + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public static Vector256 DarkenXor(Vector256 backdrop, Vector256 source, Vector256 opacity) + => Xor(backdrop, Avx.Blend(source, Avx.Multiply(source, opacity), BlendAlphaControl)); + /// /// Returns the result of the "DarkenClear" compositing equation. /// @@ -2164,6 +3055,17 @@ internal static partial class PorterDuffFunctions return Clear(backdrop, source); } + /// + /// Returns the result of the "DarkenClear" compositing equation. + /// + /// The backdrop vector. + /// The source vector. + /// The source opacity. Range 0..1 + /// The . + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public static Vector256 DarkenXor(Vector256 backdrop, Vector256 source, Vector256 opacity) + => Clear(backdrop, Avx.Blend(source, Avx.Multiply(source, opacity), BlendAlphaControl)); + /// /// Returns the result of the "DarkenSrc" compositing equation. @@ -2396,6 +3298,17 @@ internal static partial class PorterDuffFunctions return source; } + /// + /// Returns the result of the "LightenSrc compositing equation. + /// + /// The backdrop vector. + /// The source vector. + /// The source opacity. Range 0..1 + /// The . + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public static Vector256 LightenSrc(Vector256 backdrop, Vector256 source, Vector256 opacity) + => Avx.Blend(source, Avx.Multiply(source, opacity), BlendAlphaControl); + /// /// Returns the result of the "LightenSrcAtop" compositing equation. /// @@ -2411,6 +3324,21 @@ internal static partial class PorterDuffFunctions return Atop(backdrop, source, Lighten(backdrop, source)); } + /// + /// Returns the result of the "LightenSrcAtop" compositing equation. + /// + /// The backdrop vector. + /// The source vector. + /// The source opacity. Range 0..1 + /// The . + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public static Vector256 LightenSrcAtop(Vector256 backdrop, Vector256 source, Vector256 opacity) + { + source = Avx.Blend(source, Avx.Multiply(source, opacity), BlendAlphaControl); + + return Atop(backdrop, source, Lighten(backdrop, source)); + } + /// /// Returns the result of the "LightenSrcOver" compositing equation. /// @@ -2426,6 +3354,21 @@ internal static partial class PorterDuffFunctions return Over(backdrop, source, Lighten(backdrop, source)); } + /// + /// Returns the result of the "LightenSrcOver" compositing equation. + /// + /// The backdrop vector. + /// The source vector. + /// The source opacity. Range 0..1 + /// The . + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public static Vector256 LightenSrcOver(Vector256 backdrop, Vector256 source, Vector256 opacity) + { + source = Avx.Blend(source, Avx.Multiply(source, opacity), BlendAlphaControl); + + return Over(backdrop, source, Lighten(backdrop, source)); + } + /// /// Returns the result of the "LightenSrcIn" compositing equation. /// @@ -2441,6 +3384,17 @@ internal static partial class PorterDuffFunctions return In(backdrop, source); } + /// + /// Returns the result of the "LightenSrcIn" compositing equation. + /// + /// The backdrop vector. + /// The source vector. + /// The source opacity. Range 0..1 + /// The . + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public static Vector256 LightenSrcIn(Vector256 backdrop, Vector256 source, Vector256 opacity) + => In(backdrop, Avx.Blend(source, Avx.Multiply(source, opacity), BlendAlphaControl)); + /// /// Returns the result of the "LightenSrcOut" compositing equation. /// @@ -2456,6 +3410,17 @@ internal static partial class PorterDuffFunctions return Out(backdrop, source); } + /// + /// Returns the result of the "LightenSrcOut" compositing equation. + /// + /// The backdrop vector. + /// The source vector. + /// The source opacity. Range 0..1 + /// The . + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public static Vector256 LightenSrcOut(Vector256 backdrop, Vector256 source, Vector256 opacity) + => Out(backdrop, Avx.Blend(source, Avx.Multiply(source, opacity), BlendAlphaControl)); + /// /// Returns the result of the "LightenDest" compositing equation. /// @@ -2469,6 +3434,19 @@ internal static partial class PorterDuffFunctions return backdrop; } + /// + /// Returns the result of the "LightenDest" compositing equation. + /// + /// The backdrop vector. + /// The source vector. + /// The source opacity. Range 0..1 + /// The . + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public static Vector256 LightenDest(Vector256 backdrop, Vector256 source, Vector256 opacity) + { + return backdrop; + } + /// /// Returns the result of the "LightenDestAtop" compositing equation. /// @@ -2484,6 +3462,21 @@ internal static partial class PorterDuffFunctions return Atop(source, backdrop, Lighten(source, backdrop)); } + /// + /// Returns the result of the "LightenDestAtop" compositing equation. + /// + /// The backdrop vector. + /// The source vector. + /// The source opacity. Range 0..1 + /// The . + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public static Vector256 LightenDestAtop(Vector256 backdrop, Vector256 source, Vector256 opacity) + { + source = Avx.Blend(source, Avx.Multiply(source, opacity), BlendAlphaControl); + + return Atop(source, backdrop, Lighten(source, backdrop)); + } + /// /// Returns the result of the "LightenDestOver" compositing equation. /// @@ -2499,19 +3492,60 @@ internal static partial class PorterDuffFunctions return Over(source, backdrop, Lighten(source, backdrop)); } + /// + /// Returns the result of the "LightenDestOver" compositing equation. + /// + /// The backdrop vector. + /// The source vector. + /// The source opacity. Range 0..1 + /// The . + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public static Vector256 LightenDestOver(Vector256 backdrop, Vector256 source, Vector256 opacity) + { + source = Avx.Blend(source, Avx.Multiply(source, opacity), BlendAlphaControl); + + return Over(source, backdrop, Lighten(source, backdrop)); + } + + /// + /// Returns the result of the "LightenDestIn" compositing equation. + /// + /// The backdrop vector. + /// The source vector. + /// The source opacity. Range 0..1 + /// The . + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public static Vector4 LightenDestIn(Vector4 backdrop, Vector4 source, float opacity) + { + source.W *= opacity; + + return In(source, backdrop); + } + /// /// Returns the result of the "LightenDestIn" compositing equation. /// /// The backdrop vector. /// The source vector. /// The source opacity. Range 0..1 + /// The . + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public static Vector256 LightenDestIn(Vector256 backdrop, Vector256 source, Vector256 opacity) + => In(Avx.Blend(source, Avx.Multiply(source, opacity), BlendAlphaControl), backdrop); + + /// + /// Returns the result of the "LightenDestOut" compositing equation. + /// + /// The backdrop vector. + /// The source vector. + /// The source opacity. Range 0..1 /// The . [MethodImpl(MethodImplOptions.AggressiveInlining)] - public static Vector4 LightenDestIn(Vector4 backdrop, Vector4 source, float opacity) + public static Vector4 LightenDestOut(Vector4 backdrop, Vector4 source, float opacity) { source.W *= opacity; - return In(source, backdrop); + return Out(source, backdrop); } /// @@ -2520,14 +3554,10 @@ internal static partial class PorterDuffFunctions /// The backdrop vector. /// The source vector. /// The source opacity. Range 0..1 - /// The . + /// The . [MethodImpl(MethodImplOptions.AggressiveInlining)] - public static Vector4 LightenDestOut(Vector4 backdrop, Vector4 source, float opacity) - { - source.W *= opacity; - - return Out(source, backdrop); - } + public static Vector256 LightenDestOut(Vector256 backdrop, Vector256 source, Vector256 opacity) + => Out(Avx.Blend(source, Avx.Multiply(source, opacity), BlendAlphaControl), backdrop); /// /// Returns the result of the "LightenXor" compositing equation. @@ -2544,6 +3574,17 @@ internal static partial class PorterDuffFunctions return Xor(backdrop, source); } + /// + /// Returns the result of the "LightenXor" compositing equation. + /// + /// The backdrop vector. + /// The source vector. + /// The source opacity. Range 0..1 + /// The . + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public static Vector256 LightenXor(Vector256 backdrop, Vector256 source, Vector256 opacity) + => Xor(backdrop, Avx.Blend(source, Avx.Multiply(source, opacity), BlendAlphaControl)); + /// /// Returns the result of the "LightenClear" compositing equation. /// @@ -2559,6 +3600,17 @@ internal static partial class PorterDuffFunctions return Clear(backdrop, source); } + /// + /// Returns the result of the "LightenClear" compositing equation. + /// + /// The backdrop vector. + /// The source vector. + /// The source opacity. Range 0..1 + /// The . + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public static Vector256 LightenXor(Vector256 backdrop, Vector256 source, Vector256 opacity) + => Clear(backdrop, Avx.Blend(source, Avx.Multiply(source, opacity), BlendAlphaControl)); + /// /// Returns the result of the "LightenSrc" compositing equation. @@ -2791,6 +3843,17 @@ internal static partial class PorterDuffFunctions return source; } + /// + /// Returns the result of the "OverlaySrc compositing equation. + /// + /// The backdrop vector. + /// The source vector. + /// The source opacity. Range 0..1 + /// The . + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public static Vector256 OverlaySrc(Vector256 backdrop, Vector256 source, Vector256 opacity) + => Avx.Blend(source, Avx.Multiply(source, opacity), BlendAlphaControl); + /// /// Returns the result of the "OverlaySrcAtop" compositing equation. /// @@ -2806,6 +3869,21 @@ internal static partial class PorterDuffFunctions return Atop(backdrop, source, Overlay(backdrop, source)); } + /// + /// Returns the result of the "OverlaySrcAtop" compositing equation. + /// + /// The backdrop vector. + /// The source vector. + /// The source opacity. Range 0..1 + /// The . + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public static Vector256 OverlaySrcAtop(Vector256 backdrop, Vector256 source, Vector256 opacity) + { + source = Avx.Blend(source, Avx.Multiply(source, opacity), BlendAlphaControl); + + return Atop(backdrop, source, Overlay(backdrop, source)); + } + /// /// Returns the result of the "OverlaySrcOver" compositing equation. /// @@ -2821,6 +3899,21 @@ internal static partial class PorterDuffFunctions return Over(backdrop, source, Overlay(backdrop, source)); } + /// + /// Returns the result of the "OverlaySrcOver" compositing equation. + /// + /// The backdrop vector. + /// The source vector. + /// The source opacity. Range 0..1 + /// The . + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public static Vector256 OverlaySrcOver(Vector256 backdrop, Vector256 source, Vector256 opacity) + { + source = Avx.Blend(source, Avx.Multiply(source, opacity), BlendAlphaControl); + + return Over(backdrop, source, Overlay(backdrop, source)); + } + /// /// Returns the result of the "OverlaySrcIn" compositing equation. /// @@ -2836,6 +3929,17 @@ internal static partial class PorterDuffFunctions return In(backdrop, source); } + /// + /// Returns the result of the "OverlaySrcIn" compositing equation. + /// + /// The backdrop vector. + /// The source vector. + /// The source opacity. Range 0..1 + /// The . + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public static Vector256 OverlaySrcIn(Vector256 backdrop, Vector256 source, Vector256 opacity) + => In(backdrop, Avx.Blend(source, Avx.Multiply(source, opacity), BlendAlphaControl)); + /// /// Returns the result of the "OverlaySrcOut" compositing equation. /// @@ -2851,6 +3955,17 @@ internal static partial class PorterDuffFunctions return Out(backdrop, source); } + /// + /// Returns the result of the "OverlaySrcOut" compositing equation. + /// + /// The backdrop vector. + /// The source vector. + /// The source opacity. Range 0..1 + /// The . + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public static Vector256 OverlaySrcOut(Vector256 backdrop, Vector256 source, Vector256 opacity) + => Out(backdrop, Avx.Blend(source, Avx.Multiply(source, opacity), BlendAlphaControl)); + /// /// Returns the result of the "OverlayDest" compositing equation. /// @@ -2864,6 +3979,19 @@ internal static partial class PorterDuffFunctions return backdrop; } + /// + /// Returns the result of the "OverlayDest" compositing equation. + /// + /// The backdrop vector. + /// The source vector. + /// The source opacity. Range 0..1 + /// The . + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public static Vector256 OverlayDest(Vector256 backdrop, Vector256 source, Vector256 opacity) + { + return backdrop; + } + /// /// Returns the result of the "OverlayDestAtop" compositing equation. /// @@ -2879,6 +4007,21 @@ internal static partial class PorterDuffFunctions return Atop(source, backdrop, Overlay(source, backdrop)); } + /// + /// Returns the result of the "OverlayDestAtop" compositing equation. + /// + /// The backdrop vector. + /// The source vector. + /// The source opacity. Range 0..1 + /// The . + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public static Vector256 OverlayDestAtop(Vector256 backdrop, Vector256 source, Vector256 opacity) + { + source = Avx.Blend(source, Avx.Multiply(source, opacity), BlendAlphaControl); + + return Atop(source, backdrop, Overlay(source, backdrop)); + } + /// /// Returns the result of the "OverlayDestOver" compositing equation. /// @@ -2894,6 +4037,21 @@ internal static partial class PorterDuffFunctions return Over(source, backdrop, Overlay(source, backdrop)); } + /// + /// Returns the result of the "OverlayDestOver" compositing equation. + /// + /// The backdrop vector. + /// The source vector. + /// The source opacity. Range 0..1 + /// The . + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public static Vector256 OverlayDestOver(Vector256 backdrop, Vector256 source, Vector256 opacity) + { + source = Avx.Blend(source, Avx.Multiply(source, opacity), BlendAlphaControl); + + return Over(source, backdrop, Overlay(source, backdrop)); + } + /// /// Returns the result of the "OverlayDestIn" compositing equation. /// @@ -2909,6 +4067,17 @@ internal static partial class PorterDuffFunctions return In(source, backdrop); } + /// + /// Returns the result of the "OverlayDestIn" compositing equation. + /// + /// The backdrop vector. + /// The source vector. + /// The source opacity. Range 0..1 + /// The . + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public static Vector256 OverlayDestIn(Vector256 backdrop, Vector256 source, Vector256 opacity) + => In(Avx.Blend(source, Avx.Multiply(source, opacity), BlendAlphaControl), backdrop); + /// /// Returns the result of the "OverlayDestOut" compositing equation. /// @@ -2924,6 +4093,17 @@ internal static partial class PorterDuffFunctions return Out(source, backdrop); } + /// + /// Returns the result of the "OverlayDestOut" compositing equation. + /// + /// The backdrop vector. + /// The source vector. + /// The source opacity. Range 0..1 + /// The . + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public static Vector256 OverlayDestOut(Vector256 backdrop, Vector256 source, Vector256 opacity) + => Out(Avx.Blend(source, Avx.Multiply(source, opacity), BlendAlphaControl), backdrop); + /// /// Returns the result of the "OverlayXor" compositing equation. /// @@ -2939,6 +4119,17 @@ internal static partial class PorterDuffFunctions return Xor(backdrop, source); } + /// + /// Returns the result of the "OverlayXor" compositing equation. + /// + /// The backdrop vector. + /// The source vector. + /// The source opacity. Range 0..1 + /// The . + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public static Vector256 OverlayXor(Vector256 backdrop, Vector256 source, Vector256 opacity) + => Xor(backdrop, Avx.Blend(source, Avx.Multiply(source, opacity), BlendAlphaControl)); + /// /// Returns the result of the "OverlayClear" compositing equation. /// @@ -2954,6 +4145,17 @@ internal static partial class PorterDuffFunctions return Clear(backdrop, source); } + /// + /// Returns the result of the "OverlayClear" compositing equation. + /// + /// The backdrop vector. + /// The source vector. + /// The source opacity. Range 0..1 + /// The . + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public static Vector256 OverlayXor(Vector256 backdrop, Vector256 source, Vector256 opacity) + => Clear(backdrop, Avx.Blend(source, Avx.Multiply(source, opacity), BlendAlphaControl)); + /// /// Returns the result of the "OverlaySrc" compositing equation. @@ -3186,6 +4388,17 @@ internal static partial class PorterDuffFunctions return source; } + /// + /// Returns the result of the "HardLightSrc compositing equation. + /// + /// The backdrop vector. + /// The source vector. + /// The source opacity. Range 0..1 + /// The . + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public static Vector256 HardLightSrc(Vector256 backdrop, Vector256 source, Vector256 opacity) + => Avx.Blend(source, Avx.Multiply(source, opacity), BlendAlphaControl); + /// /// Returns the result of the "HardLightSrcAtop" compositing equation. /// @@ -3201,6 +4414,21 @@ internal static partial class PorterDuffFunctions return Atop(backdrop, source, HardLight(backdrop, source)); } + /// + /// Returns the result of the "HardLightSrcAtop" compositing equation. + /// + /// The backdrop vector. + /// The source vector. + /// The source opacity. Range 0..1 + /// The . + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public static Vector256 HardLightSrcAtop(Vector256 backdrop, Vector256 source, Vector256 opacity) + { + source = Avx.Blend(source, Avx.Multiply(source, opacity), BlendAlphaControl); + + return Atop(backdrop, source, HardLight(backdrop, source)); + } + /// /// Returns the result of the "HardLightSrcOver" compositing equation. /// @@ -3216,6 +4444,21 @@ internal static partial class PorterDuffFunctions return Over(backdrop, source, HardLight(backdrop, source)); } + /// + /// Returns the result of the "HardLightSrcOver" compositing equation. + /// + /// The backdrop vector. + /// The source vector. + /// The source opacity. Range 0..1 + /// The . + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public static Vector256 HardLightSrcOver(Vector256 backdrop, Vector256 source, Vector256 opacity) + { + source = Avx.Blend(source, Avx.Multiply(source, opacity), BlendAlphaControl); + + return Over(backdrop, source, HardLight(backdrop, source)); + } + /// /// Returns the result of the "HardLightSrcIn" compositing equation. /// @@ -3231,6 +4474,17 @@ internal static partial class PorterDuffFunctions return In(backdrop, source); } + /// + /// Returns the result of the "HardLightSrcIn" compositing equation. + /// + /// The backdrop vector. + /// The source vector. + /// The source opacity. Range 0..1 + /// The . + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public static Vector256 HardLightSrcIn(Vector256 backdrop, Vector256 source, Vector256 opacity) + => In(backdrop, Avx.Blend(source, Avx.Multiply(source, opacity), BlendAlphaControl)); + /// /// Returns the result of the "HardLightSrcOut" compositing equation. /// @@ -3246,6 +4500,17 @@ internal static partial class PorterDuffFunctions return Out(backdrop, source); } + /// + /// Returns the result of the "HardLightSrcOut" compositing equation. + /// + /// The backdrop vector. + /// The source vector. + /// The source opacity. Range 0..1 + /// The . + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public static Vector256 HardLightSrcOut(Vector256 backdrop, Vector256 source, Vector256 opacity) + => Out(backdrop, Avx.Blend(source, Avx.Multiply(source, opacity), BlendAlphaControl)); + /// /// Returns the result of the "HardLightDest" compositing equation. /// @@ -3259,6 +4524,19 @@ internal static partial class PorterDuffFunctions return backdrop; } + /// + /// Returns the result of the "HardLightDest" compositing equation. + /// + /// The backdrop vector. + /// The source vector. + /// The source opacity. Range 0..1 + /// The . + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public static Vector256 HardLightDest(Vector256 backdrop, Vector256 source, Vector256 opacity) + { + return backdrop; + } + /// /// Returns the result of the "HardLightDestAtop" compositing equation. /// @@ -3274,6 +4552,21 @@ internal static partial class PorterDuffFunctions return Atop(source, backdrop, HardLight(source, backdrop)); } + /// + /// Returns the result of the "HardLightDestAtop" compositing equation. + /// + /// The backdrop vector. + /// The source vector. + /// The source opacity. Range 0..1 + /// The . + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public static Vector256 HardLightDestAtop(Vector256 backdrop, Vector256 source, Vector256 opacity) + { + source = Avx.Blend(source, Avx.Multiply(source, opacity), BlendAlphaControl); + + return Atop(source, backdrop, HardLight(source, backdrop)); + } + /// /// Returns the result of the "HardLightDestOver" compositing equation. /// @@ -3289,6 +4582,21 @@ internal static partial class PorterDuffFunctions return Over(source, backdrop, HardLight(source, backdrop)); } + /// + /// Returns the result of the "HardLightDestOver" compositing equation. + /// + /// The backdrop vector. + /// The source vector. + /// The source opacity. Range 0..1 + /// The . + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public static Vector256 HardLightDestOver(Vector256 backdrop, Vector256 source, Vector256 opacity) + { + source = Avx.Blend(source, Avx.Multiply(source, opacity), BlendAlphaControl); + + return Over(source, backdrop, HardLight(source, backdrop)); + } + /// /// Returns the result of the "HardLightDestIn" compositing equation. /// @@ -3304,6 +4612,17 @@ internal static partial class PorterDuffFunctions return In(source, backdrop); } + /// + /// Returns the result of the "HardLightDestIn" compositing equation. + /// + /// The backdrop vector. + /// The source vector. + /// The source opacity. Range 0..1 + /// The . + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public static Vector256 HardLightDestIn(Vector256 backdrop, Vector256 source, Vector256 opacity) + => In(Avx.Blend(source, Avx.Multiply(source, opacity), BlendAlphaControl), backdrop); + /// /// Returns the result of the "HardLightDestOut" compositing equation. /// @@ -3319,6 +4638,17 @@ internal static partial class PorterDuffFunctions return Out(source, backdrop); } + /// + /// Returns the result of the "HardLightDestOut" compositing equation. + /// + /// The backdrop vector. + /// The source vector. + /// The source opacity. Range 0..1 + /// The . + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public static Vector256 HardLightDestOut(Vector256 backdrop, Vector256 source, Vector256 opacity) + => Out(Avx.Blend(source, Avx.Multiply(source, opacity), BlendAlphaControl), backdrop); + /// /// Returns the result of the "HardLightXor" compositing equation. /// @@ -3334,6 +4664,17 @@ internal static partial class PorterDuffFunctions return Xor(backdrop, source); } + /// + /// Returns the result of the "HardLightXor" compositing equation. + /// + /// The backdrop vector. + /// The source vector. + /// The source opacity. Range 0..1 + /// The . + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public static Vector256 HardLightXor(Vector256 backdrop, Vector256 source, Vector256 opacity) + => Xor(backdrop, Avx.Blend(source, Avx.Multiply(source, opacity), BlendAlphaControl)); + /// /// Returns the result of the "HardLightClear" compositing equation. /// @@ -3349,6 +4690,17 @@ internal static partial class PorterDuffFunctions return Clear(backdrop, source); } + /// + /// Returns the result of the "HardLightClear" compositing equation. + /// + /// The backdrop vector. + /// The source vector. + /// The source opacity. Range 0..1 + /// The . + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public static Vector256 HardLightXor(Vector256 backdrop, Vector256 source, Vector256 opacity) + => Clear(backdrop, Avx.Blend(source, Avx.Multiply(source, opacity), BlendAlphaControl)); + /// /// Returns the result of the "HardLightSrc" compositing equation. diff --git a/src/ImageSharp/PixelFormats/PixelBlenders/PorterDuffFunctions.Generated.tt b/src/ImageSharp/PixelFormats/PixelBlenders/PorterDuffFunctions.Generated.tt index 40d8b8997..5baa1e864 100644 --- a/src/ImageSharp/PixelFormats/PixelBlenders/PorterDuffFunctions.Generated.tt +++ b/src/ImageSharp/PixelFormats/PixelBlenders/PorterDuffFunctions.Generated.tt @@ -15,6 +15,8 @@ using System.Numerics; using System.Runtime.CompilerServices; +using System.Runtime.Intrinsics; +using System.Runtime.Intrinsics.X86; namespace SixLabors.ImageSharp.PixelFormats.PixelBlenders; @@ -36,6 +38,17 @@ internal static partial class PorterDuffFunctions return source; } + /// + /// Returns the result of the "<#=blender#>Src compositing equation. + /// + /// The backdrop vector. + /// The source vector. + /// The source opacity. Range 0..1 + /// The . + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public static Vector256 <#=blender#>Src(Vector256 backdrop, Vector256 source, Vector256 opacity) + => Avx.Blend(source, Avx.Multiply(source, opacity), BlendAlphaControl); + /// /// Returns the result of the "<#=blender#>SrcAtop" compositing equation. /// @@ -51,6 +64,21 @@ internal static partial class PorterDuffFunctions return Atop(backdrop, source, <#=blender#>(backdrop, source)); } + /// + /// Returns the result of the "<#=blender#>SrcAtop" compositing equation. + /// + /// The backdrop vector. + /// The source vector. + /// The source opacity. Range 0..1 + /// The . + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public static Vector256 <#=blender#>SrcAtop(Vector256 backdrop, Vector256 source, Vector256 opacity) + { + source = Avx.Blend(source, Avx.Multiply(source, opacity), BlendAlphaControl); + + return Atop(backdrop, source, <#=blender#>(backdrop, source)); + } + /// /// Returns the result of the "<#=blender#>SrcOver" compositing equation. /// @@ -66,6 +94,21 @@ internal static partial class PorterDuffFunctions return Over(backdrop, source, <#=blender#>(backdrop, source)); } + /// + /// Returns the result of the "<#=blender#>SrcOver" compositing equation. + /// + /// The backdrop vector. + /// The source vector. + /// The source opacity. Range 0..1 + /// The . + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public static Vector256 <#=blender#>SrcOver(Vector256 backdrop, Vector256 source, Vector256 opacity) + { + source = Avx.Blend(source, Avx.Multiply(source, opacity), BlendAlphaControl); + + return Over(backdrop, source, <#=blender#>(backdrop, source)); + } + /// /// Returns the result of the "<#=blender#>SrcIn" compositing equation. /// @@ -81,6 +124,17 @@ internal static partial class PorterDuffFunctions return In(backdrop, source); } + /// + /// Returns the result of the "<#=blender#>SrcIn" compositing equation. + /// + /// The backdrop vector. + /// The source vector. + /// The source opacity. Range 0..1 + /// The . + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public static Vector256 <#=blender#>SrcIn(Vector256 backdrop, Vector256 source, Vector256 opacity) + => In(backdrop, Avx.Blend(source, Avx.Multiply(source, opacity), BlendAlphaControl)); + /// /// Returns the result of the "<#=blender#>SrcOut" compositing equation. /// @@ -96,6 +150,17 @@ internal static partial class PorterDuffFunctions return Out(backdrop, source); } + /// + /// Returns the result of the "<#=blender#>SrcOut" compositing equation. + /// + /// The backdrop vector. + /// The source vector. + /// The source opacity. Range 0..1 + /// The . + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public static Vector256 <#=blender#>SrcOut(Vector256 backdrop, Vector256 source, Vector256 opacity) + => Out(backdrop, Avx.Blend(source, Avx.Multiply(source, opacity), BlendAlphaControl)); + /// /// Returns the result of the "<#=blender#>Dest" compositing equation. /// @@ -109,6 +174,19 @@ internal static partial class PorterDuffFunctions return backdrop; } + /// + /// Returns the result of the "<#=blender#>Dest" compositing equation. + /// + /// The backdrop vector. + /// The source vector. + /// The source opacity. Range 0..1 + /// The . + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public static Vector256 <#=blender#>Dest(Vector256 backdrop, Vector256 source, Vector256 opacity) + { + return backdrop; + } + /// /// Returns the result of the "<#=blender#>DestAtop" compositing equation. /// @@ -124,6 +202,21 @@ internal static partial class PorterDuffFunctions return Atop(source, backdrop, <#=blender#>(source, backdrop)); } + /// + /// Returns the result of the "<#=blender#>DestAtop" compositing equation. + /// + /// The backdrop vector. + /// The source vector. + /// The source opacity. Range 0..1 + /// The . + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public static Vector256 <#=blender#>DestAtop(Vector256 backdrop, Vector256 source, Vector256 opacity) + { + source = Avx.Blend(source, Avx.Multiply(source, opacity), BlendAlphaControl); + + return Atop(source, backdrop, <#=blender#>(source, backdrop)); + } + /// /// Returns the result of the "<#=blender#>DestOver" compositing equation. /// @@ -139,6 +232,21 @@ internal static partial class PorterDuffFunctions return Over(source, backdrop, <#=blender#>(source, backdrop)); } + /// + /// Returns the result of the "<#=blender#>DestOver" compositing equation. + /// + /// The backdrop vector. + /// The source vector. + /// The source opacity. Range 0..1 + /// The . + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public static Vector256 <#=blender#>DestOver(Vector256 backdrop, Vector256 source, Vector256 opacity) + { + source = Avx.Blend(source, Avx.Multiply(source, opacity), BlendAlphaControl); + + return Over(source, backdrop, <#=blender#>(source, backdrop)); + } + /// /// Returns the result of the "<#=blender#>DestIn" compositing equation. /// @@ -154,6 +262,17 @@ internal static partial class PorterDuffFunctions return In(source, backdrop); } + /// + /// Returns the result of the "<#=blender#>DestIn" compositing equation. + /// + /// The backdrop vector. + /// The source vector. + /// The source opacity. Range 0..1 + /// The . + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public static Vector256 <#=blender#>DestIn(Vector256 backdrop, Vector256 source, Vector256 opacity) + => In(Avx.Blend(source, Avx.Multiply(source, opacity), BlendAlphaControl), backdrop); + /// /// Returns the result of the "<#=blender#>DestOut" compositing equation. /// @@ -169,6 +288,17 @@ internal static partial class PorterDuffFunctions return Out(source, backdrop); } + /// + /// Returns the result of the "<#=blender#>DestOut" compositing equation. + /// + /// The backdrop vector. + /// The source vector. + /// The source opacity. Range 0..1 + /// The . + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public static Vector256 <#=blender#>DestOut(Vector256 backdrop, Vector256 source, Vector256 opacity) + => Out(Avx.Blend(source, Avx.Multiply(source, opacity), BlendAlphaControl), backdrop); + /// /// Returns the result of the "<#=blender#>Xor" compositing equation. /// @@ -184,6 +314,17 @@ internal static partial class PorterDuffFunctions return Xor(backdrop, source); } + /// + /// Returns the result of the "<#=blender#>Xor" compositing equation. + /// + /// The backdrop vector. + /// The source vector. + /// The source opacity. Range 0..1 + /// The . + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public static Vector256 <#=blender#>Xor(Vector256 backdrop, Vector256 source, Vector256 opacity) + => Xor(backdrop, Avx.Blend(source, Avx.Multiply(source, opacity), BlendAlphaControl)); + /// /// Returns the result of the "<#=blender#>Clear" compositing equation. /// @@ -199,6 +340,17 @@ internal static partial class PorterDuffFunctions return Clear(backdrop, source); } + /// + /// Returns the result of the "<#=blender#>Clear" compositing equation. + /// + /// The backdrop vector. + /// The source vector. + /// The source opacity. Range 0..1 + /// The . + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public static Vector256 <#=blender#>Xor(Vector256 backdrop, Vector256 source, Vector256 opacity) + => Clear(backdrop, Avx.Blend(source, Avx.Multiply(source, opacity), BlendAlphaControl)); + <#} #> <# void GenerateGenericPixelBlender(string blender, string composer) { #> From ef34960e81a5b9475b394d0fe4017befe17b1b63 Mon Sep 17 00:00:00 2001 From: James Jackson-South Date: Sun, 19 Feb 2023 14:40:29 +1000 Subject: [PATCH 04/22] Fix code generation --- .../PorterDuffFunctions.Generated.cs | 18 +++++++++--------- .../PorterDuffFunctions.Generated.tt | 2 +- 2 files changed, 10 insertions(+), 10 deletions(-) diff --git a/src/ImageSharp/PixelFormats/PixelBlenders/PorterDuffFunctions.Generated.cs b/src/ImageSharp/PixelFormats/PixelBlenders/PorterDuffFunctions.Generated.cs index 2b365f177..5740a704c 100644 --- a/src/ImageSharp/PixelFormats/PixelBlenders/PorterDuffFunctions.Generated.cs +++ b/src/ImageSharp/PixelFormats/PixelBlenders/PorterDuffFunctions.Generated.cs @@ -338,7 +338,7 @@ internal static partial class PorterDuffFunctions /// The source opacity. Range 0..1 /// The . [MethodImpl(MethodImplOptions.AggressiveInlining)] - public static Vector256 NormalXor(Vector256 backdrop, Vector256 source, Vector256 opacity) + public static Vector256 NormalClear(Vector256 backdrop, Vector256 source, Vector256 opacity) => Clear(backdrop, Avx.Blend(source, Avx.Multiply(source, opacity), BlendAlphaControl)); @@ -883,7 +883,7 @@ internal static partial class PorterDuffFunctions /// The source opacity. Range 0..1 /// The . [MethodImpl(MethodImplOptions.AggressiveInlining)] - public static Vector256 MultiplyXor(Vector256 backdrop, Vector256 source, Vector256 opacity) + public static Vector256 MultiplyClear(Vector256 backdrop, Vector256 source, Vector256 opacity) => Clear(backdrop, Avx.Blend(source, Avx.Multiply(source, opacity), BlendAlphaControl)); @@ -1428,7 +1428,7 @@ internal static partial class PorterDuffFunctions /// The source opacity. Range 0..1 /// The . [MethodImpl(MethodImplOptions.AggressiveInlining)] - public static Vector256 AddXor(Vector256 backdrop, Vector256 source, Vector256 opacity) + public static Vector256 AddClear(Vector256 backdrop, Vector256 source, Vector256 opacity) => Clear(backdrop, Avx.Blend(source, Avx.Multiply(source, opacity), BlendAlphaControl)); @@ -1973,7 +1973,7 @@ internal static partial class PorterDuffFunctions /// The source opacity. Range 0..1 /// The . [MethodImpl(MethodImplOptions.AggressiveInlining)] - public static Vector256 SubtractXor(Vector256 backdrop, Vector256 source, Vector256 opacity) + public static Vector256 SubtractClear(Vector256 backdrop, Vector256 source, Vector256 opacity) => Clear(backdrop, Avx.Blend(source, Avx.Multiply(source, opacity), BlendAlphaControl)); @@ -2518,7 +2518,7 @@ internal static partial class PorterDuffFunctions /// The source opacity. Range 0..1 /// The . [MethodImpl(MethodImplOptions.AggressiveInlining)] - public static Vector256 ScreenXor(Vector256 backdrop, Vector256 source, Vector256 opacity) + public static Vector256 ScreenClear(Vector256 backdrop, Vector256 source, Vector256 opacity) => Clear(backdrop, Avx.Blend(source, Avx.Multiply(source, opacity), BlendAlphaControl)); @@ -3063,7 +3063,7 @@ internal static partial class PorterDuffFunctions /// The source opacity. Range 0..1 /// The . [MethodImpl(MethodImplOptions.AggressiveInlining)] - public static Vector256 DarkenXor(Vector256 backdrop, Vector256 source, Vector256 opacity) + public static Vector256 DarkenClear(Vector256 backdrop, Vector256 source, Vector256 opacity) => Clear(backdrop, Avx.Blend(source, Avx.Multiply(source, opacity), BlendAlphaControl)); @@ -3608,7 +3608,7 @@ internal static partial class PorterDuffFunctions /// The source opacity. Range 0..1 /// The . [MethodImpl(MethodImplOptions.AggressiveInlining)] - public static Vector256 LightenXor(Vector256 backdrop, Vector256 source, Vector256 opacity) + public static Vector256 LightenClear(Vector256 backdrop, Vector256 source, Vector256 opacity) => Clear(backdrop, Avx.Blend(source, Avx.Multiply(source, opacity), BlendAlphaControl)); @@ -4153,7 +4153,7 @@ internal static partial class PorterDuffFunctions /// The source opacity. Range 0..1 /// The . [MethodImpl(MethodImplOptions.AggressiveInlining)] - public static Vector256 OverlayXor(Vector256 backdrop, Vector256 source, Vector256 opacity) + public static Vector256 OverlayClear(Vector256 backdrop, Vector256 source, Vector256 opacity) => Clear(backdrop, Avx.Blend(source, Avx.Multiply(source, opacity), BlendAlphaControl)); @@ -4698,7 +4698,7 @@ internal static partial class PorterDuffFunctions /// The source opacity. Range 0..1 /// The . [MethodImpl(MethodImplOptions.AggressiveInlining)] - public static Vector256 HardLightXor(Vector256 backdrop, Vector256 source, Vector256 opacity) + public static Vector256 HardLightClear(Vector256 backdrop, Vector256 source, Vector256 opacity) => Clear(backdrop, Avx.Blend(source, Avx.Multiply(source, opacity), BlendAlphaControl)); diff --git a/src/ImageSharp/PixelFormats/PixelBlenders/PorterDuffFunctions.Generated.tt b/src/ImageSharp/PixelFormats/PixelBlenders/PorterDuffFunctions.Generated.tt index 5baa1e864..34eeb78cb 100644 --- a/src/ImageSharp/PixelFormats/PixelBlenders/PorterDuffFunctions.Generated.tt +++ b/src/ImageSharp/PixelFormats/PixelBlenders/PorterDuffFunctions.Generated.tt @@ -348,7 +348,7 @@ internal static partial class PorterDuffFunctions /// The source opacity. Range 0..1 /// The . [MethodImpl(MethodImplOptions.AggressiveInlining)] - public static Vector256 <#=blender#>Xor(Vector256 backdrop, Vector256 source, Vector256 opacity) + public static Vector256 <#=blender#>Clear(Vector256 backdrop, Vector256 source, Vector256 opacity) => Clear(backdrop, Avx.Blend(source, Avx.Multiply(source, opacity), BlendAlphaControl)); <#} #> From 9f8bcc464db227bfc570c98ea6d23583f38fd4a7 Mon Sep 17 00:00:00 2001 From: James Jackson-South Date: Sun, 19 Feb 2023 14:41:55 +1000 Subject: [PATCH 05/22] Respond to feedback --- src/ImageSharp/Common/Constants.cs | 7 ------- 1 file changed, 7 deletions(-) diff --git a/src/ImageSharp/Common/Constants.cs b/src/ImageSharp/Common/Constants.cs index a3cfe3623..d4640f133 100644 --- a/src/ImageSharp/Common/Constants.cs +++ b/src/ImageSharp/Common/Constants.cs @@ -1,8 +1,6 @@ // Copyright (c) Six Labors. // Licensed under the Six Labors Split License. -using System.Runtime.Intrinsics; - namespace SixLabors.ImageSharp; /// @@ -15,11 +13,6 @@ internal static class Constants /// public static readonly float Epsilon = 0.001F; - /// - /// The epsilon value for comparing floating point numbers. - /// - public static readonly Vector256 Epsilon256 = Vector256.Create(0.001F); - /// /// The epsilon squared value for comparing floating point numbers. /// From 5fedca864782a3e23ea86d5a37d416a94e922559 Mon Sep 17 00:00:00 2001 From: James Jackson-South Date: Sun, 19 Feb 2023 14:42:13 +1000 Subject: [PATCH 06/22] Respond to feedback --- .../PixelBlenders/PorterDuffFunctions.cs | 41 ++++++++++--------- 1 file changed, 22 insertions(+), 19 deletions(-) diff --git a/src/ImageSharp/PixelFormats/PixelBlenders/PorterDuffFunctions.cs b/src/ImageSharp/PixelFormats/PixelBlenders/PorterDuffFunctions.cs index 551f17f20..3fe375344 100644 --- a/src/ImageSharp/PixelFormats/PixelBlenders/PorterDuffFunctions.cs +++ b/src/ImageSharp/PixelFormats/PixelBlenders/PorterDuffFunctions.cs @@ -23,9 +23,6 @@ internal static partial class PorterDuffFunctions { private const int BlendAlphaControl = 0b_10_00_10_00; private const int ShuffleAlphaControl = 0b_11_11_11_11; - private static readonly Vector256 Vector256Half = Vector256.Create(0.5F); - private static readonly Vector256 Vector256One = Vector256.Create(1F); - private static readonly Vector256 Vector256Two = Vector256.Create(2F); /// /// Returns the result of the "Normal" compositing equation. @@ -85,7 +82,7 @@ internal static partial class PorterDuffFunctions /// The . [MethodImpl(MethodImplOptions.AggressiveInlining)] public static Vector256 Add(Vector256 backdrop, Vector256 source) - => Avx.Min(Vector256One, Avx.Add(backdrop, source)); + => Avx.Min(Vector256.Create(1F), Avx.Add(backdrop, source)); /// /// Returns the result of the "Subtract" compositing equation. @@ -105,7 +102,7 @@ internal static partial class PorterDuffFunctions /// The . [MethodImpl(MethodImplOptions.AggressiveInlining)] public static Vector256 Subtract(Vector256 backdrop, Vector256 source) - => Avx.Min(Vector256One, Avx.Subtract(backdrop, source)); + => Avx.Min(Vector256.Create(1F), Avx.Subtract(backdrop, source)); /// /// Returns the result of the "Screen" compositing equation. @@ -125,7 +122,10 @@ internal static partial class PorterDuffFunctions /// The . [MethodImpl(MethodImplOptions.AggressiveInlining)] public static Vector256 Screen(Vector256 backdrop, Vector256 source) - => Avx.Subtract(Vector256One, Avx.Multiply(Avx.Subtract(Vector256One, backdrop), Avx.Subtract(Vector256One, source))); + { + Vector256 vOne = Vector256.Create(1F); + return Avx.Subtract(vOne, Avx.Multiply(Avx.Subtract(vOne, backdrop), Avx.Subtract(vOne, source))); + } /// /// Returns the result of the "Darken" compositing equation. @@ -192,7 +192,7 @@ internal static partial class PorterDuffFunctions public static Vector256 Overlay(Vector256 backdrop, Vector256 source) { Vector256 color = OverlayValueFunction(backdrop, source); - return Avx.Min(Vector256One, Avx.Blend(color, Vector256.Zero, BlendAlphaControl)); + return Avx.Min(Vector256.Create(1F), Avx.Blend(color, Vector256.Zero, BlendAlphaControl)); } /// @@ -221,7 +221,7 @@ internal static partial class PorterDuffFunctions public static Vector256 HardLight(Vector256 backdrop, Vector256 source) { Vector256 color = OverlayValueFunction(source, backdrop); - return Avx.Min(Vector256One, Avx.Blend(color, Vector256.Zero, BlendAlphaControl)); + return Avx.Min(Vector256.Create(1F), Avx.Blend(color, Vector256.Zero, BlendAlphaControl)); } /// @@ -243,10 +243,12 @@ internal static partial class PorterDuffFunctions [MethodImpl(MethodImplOptions.AggressiveInlining)] public static Vector256 OverlayValueFunction(Vector256 backdrop, Vector256 source) { - Vector256 left = Avx.Multiply(Avx.Multiply(Vector256Two, backdrop), source); - Vector256 right = Avx.Subtract(Vector256One, Avx.Multiply(Avx.Multiply(Vector256Two, Avx.Subtract(Vector256One, source)), Avx.Subtract(Vector256One, backdrop))); + Vector256 vOne = Vector256.Create(1F); + Vector256 vTwo = Vector256.Create(2F); + Vector256 left = Avx.Multiply(Avx.Add(backdrop, backdrop), source); + Vector256 right = Avx.Subtract(vOne, Avx.Multiply(Avx.Multiply(vTwo, Avx.Subtract(vOne, source)), Avx.Subtract(vOne, backdrop))); - Vector256 cmp = Avx.CompareGreaterThan(backdrop, Vector256Half); + Vector256 cmp = Avx.CompareGreaterThan(backdrop, Vector256.Create(.5F)); return Avx.BlendVariable(left, right, cmp); } @@ -305,7 +307,7 @@ internal static partial class PorterDuffFunctions color = SimdUtils.HwIntrinsics.MultiplyAdd(blend, blendW, color); // unpremultiply - color = Avx.Divide(color, Avx.Max(alpha, Constants.Epsilon256)); + color = Avx.Divide(color, Avx.Max(alpha, Vector256.Create(Constants.Epsilon))); return Avx.Blend(color, alpha, BlendAlphaControl); } @@ -358,7 +360,7 @@ internal static partial class PorterDuffFunctions Vector256 color = SimdUtils.HwIntrinsics.MultiplyAdd(destination, dstW, Avx.Multiply(blend, blendW)); // unpremultiply - color = Avx.Divide(color, Avx.Max(alpha, Constants.Epsilon256)); + color = Avx.Divide(color, Avx.Max(alpha, Vector256.Create(Constants.Epsilon))); return Avx.Blend(color, alpha, BlendAlphaControl); } @@ -398,7 +400,7 @@ internal static partial class PorterDuffFunctions Vector256 color = Avx.Multiply(source, alpha); // unpremultiply - color = Avx.Divide(color, Avx.Max(alpha, Constants.Epsilon256)); + color = Avx.Divide(color, Avx.Max(alpha, Vector256.Create(Constants.Epsilon))); return Avx.Blend(color, alpha, BlendAlphaControl); } @@ -432,13 +434,13 @@ internal static partial class PorterDuffFunctions // calculate alpha Vector256 sW = Avx.Shuffle(source, source, ShuffleAlphaControl); Vector256 dW = Avx.Shuffle(destination, destination, ShuffleAlphaControl); - Vector256 alpha = Avx.Multiply(Avx.Subtract(Vector256One, dW), sW); + Vector256 alpha = Avx.Multiply(Avx.Subtract(Vector256.Create(1F), dW), sW); // premultiply Vector256 color = Avx.Multiply(source, alpha); // unpremultiply - color = Avx.Divide(color, Avx.Max(alpha, Constants.Epsilon256)); + color = Avx.Divide(color, Avx.Max(alpha, Vector256.Create(Constants.Epsilon))); return Avx.Blend(color, alpha, BlendAlphaControl); } @@ -477,15 +479,16 @@ internal static partial class PorterDuffFunctions Vector256 sW = Avx.Shuffle(source, source, ShuffleAlphaControl); Vector256 dW = Avx.Shuffle(destination, destination, ShuffleAlphaControl); - Vector256 srcW = Avx.Subtract(Vector256One, dW); - Vector256 dstW = Avx.Subtract(Vector256One, sW); + Vector256 vOne = Vector256.Create(1F); + Vector256 srcW = Avx.Subtract(vOne, dW); + Vector256 dstW = Avx.Subtract(vOne, sW); // calculate alpha Vector256 alpha = SimdUtils.HwIntrinsics.MultiplyAdd(sW, srcW, Avx.Multiply(dW, dstW)); Vector256 color = SimdUtils.HwIntrinsics.MultiplyAdd(Avx.Multiply(sW, source), srcW, Avx.Multiply(Avx.Multiply(dW, destination), dstW)); // unpremultiply - color = Avx.Divide(color, Avx.Max(alpha, Constants.Epsilon256)); + color = Avx.Divide(color, Avx.Max(alpha, Vector256.Create(Constants.Epsilon))); return Avx.Blend(color, alpha, BlendAlphaControl); } From 907400f2dc426a2f709981dfdf1b47cabca79228 Mon Sep 17 00:00:00 2001 From: James Jackson-South Date: Sun, 19 Feb 2023 14:54:42 +1000 Subject: [PATCH 07/22] Use Permute --- .../PixelFormats/PixelBlenders/PorterDuffFunctions.cs | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/ImageSharp/PixelFormats/PixelBlenders/PorterDuffFunctions.cs b/src/ImageSharp/PixelFormats/PixelBlenders/PorterDuffFunctions.cs index 3fe375344..183f4b59f 100644 --- a/src/ImageSharp/PixelFormats/PixelBlenders/PorterDuffFunctions.cs +++ b/src/ImageSharp/PixelFormats/PixelBlenders/PorterDuffFunctions.cs @@ -291,8 +291,8 @@ internal static partial class PorterDuffFunctions public static Vector256 Over(Vector256 destination, Vector256 source, Vector256 blend) { // calculate weights - Vector256 sW = Avx.Shuffle(source, source, ShuffleAlphaControl); - Vector256 dW = Avx.Shuffle(destination, destination, ShuffleAlphaControl); + Vector256 sW = Avx.Permute(source, ShuffleAlphaControl); + Vector256 dW = Avx.Permute(destination, ShuffleAlphaControl); Vector256 blendW = Avx.Multiply(sW, dW); Vector256 dstW = Avx.Subtract(dW, blendW); From 9a552f17ece1a35d07794351c57904c5d8d6b505 Mon Sep 17 00:00:00 2001 From: James Jackson-South Date: Sun, 19 Feb 2023 14:59:34 +1000 Subject: [PATCH 08/22] Revert "Use Permute" This reverts commit 907400f2dc426a2f709981dfdf1b47cabca79228. --- .../PixelFormats/PixelBlenders/PorterDuffFunctions.cs | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/ImageSharp/PixelFormats/PixelBlenders/PorterDuffFunctions.cs b/src/ImageSharp/PixelFormats/PixelBlenders/PorterDuffFunctions.cs index 183f4b59f..3fe375344 100644 --- a/src/ImageSharp/PixelFormats/PixelBlenders/PorterDuffFunctions.cs +++ b/src/ImageSharp/PixelFormats/PixelBlenders/PorterDuffFunctions.cs @@ -291,8 +291,8 @@ internal static partial class PorterDuffFunctions public static Vector256 Over(Vector256 destination, Vector256 source, Vector256 blend) { // calculate weights - Vector256 sW = Avx.Permute(source, ShuffleAlphaControl); - Vector256 dW = Avx.Permute(destination, ShuffleAlphaControl); + Vector256 sW = Avx.Shuffle(source, source, ShuffleAlphaControl); + Vector256 dW = Avx.Shuffle(destination, destination, ShuffleAlphaControl); Vector256 blendW = Avx.Multiply(sW, dW); Vector256 dstW = Avx.Subtract(dW, blendW); From bde9324f17e4367cf94114244191ae66cfa831a7 Mon Sep 17 00:00:00 2001 From: James Jackson-South Date: Sun, 19 Feb 2023 15:58:47 +1000 Subject: [PATCH 09/22] Use Permute --- .../PixelFormats/PixelBlenders/PorterDuffFunctions.cs | 8 +++----- 1 file changed, 3 insertions(+), 5 deletions(-) diff --git a/src/ImageSharp/PixelFormats/PixelBlenders/PorterDuffFunctions.cs b/src/ImageSharp/PixelFormats/PixelBlenders/PorterDuffFunctions.cs index 3fe375344..fa497c872 100644 --- a/src/ImageSharp/PixelFormats/PixelBlenders/PorterDuffFunctions.cs +++ b/src/ImageSharp/PixelFormats/PixelBlenders/PorterDuffFunctions.cs @@ -291,8 +291,8 @@ internal static partial class PorterDuffFunctions public static Vector256 Over(Vector256 destination, Vector256 source, Vector256 blend) { // calculate weights - Vector256 sW = Avx.Shuffle(source, source, ShuffleAlphaControl); - Vector256 dW = Avx.Shuffle(destination, destination, ShuffleAlphaControl); + Vector256 sW = Avx.Permute(source, ShuffleAlphaControl); + Vector256 dW = Avx.Permute(destination, ShuffleAlphaControl); Vector256 blendW = Avx.Multiply(sW, dW); Vector256 dstW = Avx.Subtract(dW, blendW); @@ -392,9 +392,7 @@ internal static partial class PorterDuffFunctions public static Vector256 In(Vector256 destination, Vector256 source) { // calculate alpha - Vector256 sW = Avx.Shuffle(source, source, ShuffleAlphaControl); - Vector256 dW = Avx.Shuffle(destination, destination, ShuffleAlphaControl); - Vector256 alpha = Avx.Multiply(sW, dW); + Vector256 alpha = Avx.Permute(Avx.Multiply(source, destination), ShuffleAlphaControl); // premultiply Vector256 color = Avx.Multiply(source, alpha); From 41cfa9b1fb9fa9e3c80a67433263124a2470389f Mon Sep 17 00:00:00 2001 From: James Jackson-South Date: Sun, 19 Feb 2023 15:59:10 +1000 Subject: [PATCH 10/22] Port DefaultPixelBlenders --- .../DefaultPixelBlenders.Generated.cs | 7470 ++++++++++++++++- .../DefaultPixelBlenders.Generated.tt | 73 +- 2 files changed, 7100 insertions(+), 443 deletions(-) diff --git a/src/ImageSharp/PixelFormats/PixelBlenders/DefaultPixelBlenders.Generated.cs b/src/ImageSharp/PixelFormats/PixelBlenders/DefaultPixelBlenders.Generated.cs index cf1910121..c2d97efa0 100644 --- a/src/ImageSharp/PixelFormats/PixelBlenders/DefaultPixelBlenders.Generated.cs +++ b/src/ImageSharp/PixelFormats/PixelBlenders/DefaultPixelBlenders.Generated.cs @@ -3,6 +3,10 @@ // using System.Numerics; +using System.Runtime.CompilerServices; +using System.Runtime.InteropServices; +using System.Runtime.Intrinsics; +using System.Runtime.Intrinsics.X86; namespace SixLabors.ImageSharp.PixelFormats.PixelBlenders; @@ -43,18 +47,79 @@ internal static class DefaultPixelBlenders protected override void BlendFunction(Span destination, ReadOnlySpan background, ReadOnlySpan source, float amount) { amount = Numerics.Clamp(amount, 0, 1); - for (int i = 0; i < destination.Length; i++) + + if (Avx2.IsSupported && destination.Length >= 2) + { + // Divide by 2 as 4 elements per Vector4 and 8 per Vector256 + ref Vector256 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); + ref Vector256 destinationLast = ref Unsafe.Add(ref destinationBase, (IntPtr)((uint)destination.Length / 2u)); + + ref Vector256 backgroundBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(background)); + ref Vector256 sourceBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(source)); + Vector256 opacity = Vector256.Create(amount); + + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) + { + destinationBase = PorterDuffFunctions.NormalSrc(backgroundBase, sourceBase, opacity); + destinationBase = ref Unsafe.Add(ref destinationBase, 1); + backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); + sourceBase = ref Unsafe.Add(ref sourceBase, 1); + } + + if (Numerics.Modulo2(destination.Length) != 0) + { + // Vector4 fits neatly in pairs. Any overlap has to be equal to 1. + int i = destination.Length - 1; + destination[i] = PorterDuffFunctions.NormalSrc(background[i], source[i], amount); + } + } + else { - destination[i] = PorterDuffFunctions.NormalSrc(background[i], source[i], amount); + for (int i = 0; i < destination.Length; i++) + { + destination[i] = PorterDuffFunctions.NormalSrc(background[i], source[i], amount); + } } } /// protected override void BlendFunction(Span destination, ReadOnlySpan background, ReadOnlySpan source, ReadOnlySpan amount) { - for (int i = 0; i < destination.Length; i++) + if (Avx2.IsSupported && destination.Length >= 2) + { + // Divide by 2 as 4 elements per Vector4 and 8 per Vector256 + ref Vector256 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); + ref Vector256 destinationLast = ref Unsafe.Add(ref destinationBase, (IntPtr)((uint)destination.Length / 2u)); + + ref Vector256 backgroundBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(background)); + ref Vector256 sourceBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(source)); + ref float amountBase = ref MemoryMarshal.GetReference(amount); + + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) + { + // TODO: It would be better if we can clamp this outside of the loop using our SIMD methods. + Vector256 opacity = Vector256.Create(Numerics.Clamp(amountBase, 0, 1F)); + + destinationBase = PorterDuffFunctions.NormalSrc(backgroundBase, sourceBase, opacity); + destinationBase = ref Unsafe.Add(ref destinationBase, 1); + backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); + sourceBase = ref Unsafe.Add(ref sourceBase, 1); + amountBase = ref Unsafe.Add(ref amountBase, 1); + } + + if (Numerics.Modulo2(destination.Length) != 0) + { + // Vector4 fits neatly in pairs. Any overlap has to be equal to 1. + int i = destination.Length - 1; + destination[i] = PorterDuffFunctions.NormalSrc(background[i], source[i], Numerics.Clamp(amount[i], 0, 1F)); + } + } + else { - destination[i] = PorterDuffFunctions.NormalSrc(background[i], source[i], Numerics.Clamp(amount[i], 0, 1)); + for (int i = 0; i < destination.Length; i++) + { + destination[i] = PorterDuffFunctions.NormalSrc(background[i], source[i], Numerics.Clamp(amount[i], 0, 1F)); + } } } } @@ -81,18 +146,79 @@ internal static class DefaultPixelBlenders protected override void BlendFunction(Span destination, ReadOnlySpan background, ReadOnlySpan source, float amount) { amount = Numerics.Clamp(amount, 0, 1); - for (int i = 0; i < destination.Length; i++) + + if (Avx2.IsSupported && destination.Length >= 2) + { + // Divide by 2 as 4 elements per Vector4 and 8 per Vector256 + ref Vector256 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); + ref Vector256 destinationLast = ref Unsafe.Add(ref destinationBase, (IntPtr)((uint)destination.Length / 2u)); + + ref Vector256 backgroundBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(background)); + ref Vector256 sourceBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(source)); + Vector256 opacity = Vector256.Create(amount); + + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) + { + destinationBase = PorterDuffFunctions.MultiplySrc(backgroundBase, sourceBase, opacity); + destinationBase = ref Unsafe.Add(ref destinationBase, 1); + backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); + sourceBase = ref Unsafe.Add(ref sourceBase, 1); + } + + if (Numerics.Modulo2(destination.Length) != 0) + { + // Vector4 fits neatly in pairs. Any overlap has to be equal to 1. + int i = destination.Length - 1; + destination[i] = PorterDuffFunctions.MultiplySrc(background[i], source[i], amount); + } + } + else { - destination[i] = PorterDuffFunctions.MultiplySrc(background[i], source[i], amount); + for (int i = 0; i < destination.Length; i++) + { + destination[i] = PorterDuffFunctions.MultiplySrc(background[i], source[i], amount); + } } } /// protected override void BlendFunction(Span destination, ReadOnlySpan background, ReadOnlySpan source, ReadOnlySpan amount) { - for (int i = 0; i < destination.Length; i++) + if (Avx2.IsSupported && destination.Length >= 2) + { + // Divide by 2 as 4 elements per Vector4 and 8 per Vector256 + ref Vector256 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); + ref Vector256 destinationLast = ref Unsafe.Add(ref destinationBase, (IntPtr)((uint)destination.Length / 2u)); + + ref Vector256 backgroundBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(background)); + ref Vector256 sourceBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(source)); + ref float amountBase = ref MemoryMarshal.GetReference(amount); + + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) + { + // TODO: It would be better if we can clamp this outside of the loop using our SIMD methods. + Vector256 opacity = Vector256.Create(Numerics.Clamp(amountBase, 0, 1F)); + + destinationBase = PorterDuffFunctions.MultiplySrc(backgroundBase, sourceBase, opacity); + destinationBase = ref Unsafe.Add(ref destinationBase, 1); + backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); + sourceBase = ref Unsafe.Add(ref sourceBase, 1); + amountBase = ref Unsafe.Add(ref amountBase, 1); + } + + if (Numerics.Modulo2(destination.Length) != 0) + { + // Vector4 fits neatly in pairs. Any overlap has to be equal to 1. + int i = destination.Length - 1; + destination[i] = PorterDuffFunctions.MultiplySrc(background[i], source[i], Numerics.Clamp(amount[i], 0, 1F)); + } + } + else { - destination[i] = PorterDuffFunctions.MultiplySrc(background[i], source[i], Numerics.Clamp(amount[i], 0, 1)); + for (int i = 0; i < destination.Length; i++) + { + destination[i] = PorterDuffFunctions.MultiplySrc(background[i], source[i], Numerics.Clamp(amount[i], 0, 1F)); + } } } } @@ -119,18 +245,79 @@ internal static class DefaultPixelBlenders protected override void BlendFunction(Span destination, ReadOnlySpan background, ReadOnlySpan source, float amount) { amount = Numerics.Clamp(amount, 0, 1); - for (int i = 0; i < destination.Length; i++) + + if (Avx2.IsSupported && destination.Length >= 2) + { + // Divide by 2 as 4 elements per Vector4 and 8 per Vector256 + ref Vector256 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); + ref Vector256 destinationLast = ref Unsafe.Add(ref destinationBase, (IntPtr)((uint)destination.Length / 2u)); + + ref Vector256 backgroundBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(background)); + ref Vector256 sourceBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(source)); + Vector256 opacity = Vector256.Create(amount); + + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) + { + destinationBase = PorterDuffFunctions.AddSrc(backgroundBase, sourceBase, opacity); + destinationBase = ref Unsafe.Add(ref destinationBase, 1); + backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); + sourceBase = ref Unsafe.Add(ref sourceBase, 1); + } + + if (Numerics.Modulo2(destination.Length) != 0) + { + // Vector4 fits neatly in pairs. Any overlap has to be equal to 1. + int i = destination.Length - 1; + destination[i] = PorterDuffFunctions.AddSrc(background[i], source[i], amount); + } + } + else { - destination[i] = PorterDuffFunctions.AddSrc(background[i], source[i], amount); + for (int i = 0; i < destination.Length; i++) + { + destination[i] = PorterDuffFunctions.AddSrc(background[i], source[i], amount); + } } } /// protected override void BlendFunction(Span destination, ReadOnlySpan background, ReadOnlySpan source, ReadOnlySpan amount) { - for (int i = 0; i < destination.Length; i++) + if (Avx2.IsSupported && destination.Length >= 2) + { + // Divide by 2 as 4 elements per Vector4 and 8 per Vector256 + ref Vector256 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); + ref Vector256 destinationLast = ref Unsafe.Add(ref destinationBase, (IntPtr)((uint)destination.Length / 2u)); + + ref Vector256 backgroundBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(background)); + ref Vector256 sourceBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(source)); + ref float amountBase = ref MemoryMarshal.GetReference(amount); + + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) + { + // TODO: It would be better if we can clamp this outside of the loop using our SIMD methods. + Vector256 opacity = Vector256.Create(Numerics.Clamp(amountBase, 0, 1F)); + + destinationBase = PorterDuffFunctions.AddSrc(backgroundBase, sourceBase, opacity); + destinationBase = ref Unsafe.Add(ref destinationBase, 1); + backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); + sourceBase = ref Unsafe.Add(ref sourceBase, 1); + amountBase = ref Unsafe.Add(ref amountBase, 1); + } + + if (Numerics.Modulo2(destination.Length) != 0) + { + // Vector4 fits neatly in pairs. Any overlap has to be equal to 1. + int i = destination.Length - 1; + destination[i] = PorterDuffFunctions.AddSrc(background[i], source[i], Numerics.Clamp(amount[i], 0, 1F)); + } + } + else { - destination[i] = PorterDuffFunctions.AddSrc(background[i], source[i], Numerics.Clamp(amount[i], 0, 1)); + for (int i = 0; i < destination.Length; i++) + { + destination[i] = PorterDuffFunctions.AddSrc(background[i], source[i], Numerics.Clamp(amount[i], 0, 1F)); + } } } } @@ -157,18 +344,79 @@ internal static class DefaultPixelBlenders protected override void BlendFunction(Span destination, ReadOnlySpan background, ReadOnlySpan source, float amount) { amount = Numerics.Clamp(amount, 0, 1); - for (int i = 0; i < destination.Length; i++) + + if (Avx2.IsSupported && destination.Length >= 2) + { + // Divide by 2 as 4 elements per Vector4 and 8 per Vector256 + ref Vector256 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); + ref Vector256 destinationLast = ref Unsafe.Add(ref destinationBase, (IntPtr)((uint)destination.Length / 2u)); + + ref Vector256 backgroundBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(background)); + ref Vector256 sourceBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(source)); + Vector256 opacity = Vector256.Create(amount); + + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) + { + destinationBase = PorterDuffFunctions.SubtractSrc(backgroundBase, sourceBase, opacity); + destinationBase = ref Unsafe.Add(ref destinationBase, 1); + backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); + sourceBase = ref Unsafe.Add(ref sourceBase, 1); + } + + if (Numerics.Modulo2(destination.Length) != 0) + { + // Vector4 fits neatly in pairs. Any overlap has to be equal to 1. + int i = destination.Length - 1; + destination[i] = PorterDuffFunctions.SubtractSrc(background[i], source[i], amount); + } + } + else { - destination[i] = PorterDuffFunctions.SubtractSrc(background[i], source[i], amount); + for (int i = 0; i < destination.Length; i++) + { + destination[i] = PorterDuffFunctions.SubtractSrc(background[i], source[i], amount); + } } } /// protected override void BlendFunction(Span destination, ReadOnlySpan background, ReadOnlySpan source, ReadOnlySpan amount) { - for (int i = 0; i < destination.Length; i++) + if (Avx2.IsSupported && destination.Length >= 2) + { + // Divide by 2 as 4 elements per Vector4 and 8 per Vector256 + ref Vector256 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); + ref Vector256 destinationLast = ref Unsafe.Add(ref destinationBase, (IntPtr)((uint)destination.Length / 2u)); + + ref Vector256 backgroundBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(background)); + ref Vector256 sourceBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(source)); + ref float amountBase = ref MemoryMarshal.GetReference(amount); + + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) + { + // TODO: It would be better if we can clamp this outside of the loop using our SIMD methods. + Vector256 opacity = Vector256.Create(Numerics.Clamp(amountBase, 0, 1F)); + + destinationBase = PorterDuffFunctions.SubtractSrc(backgroundBase, sourceBase, opacity); + destinationBase = ref Unsafe.Add(ref destinationBase, 1); + backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); + sourceBase = ref Unsafe.Add(ref sourceBase, 1); + amountBase = ref Unsafe.Add(ref amountBase, 1); + } + + if (Numerics.Modulo2(destination.Length) != 0) + { + // Vector4 fits neatly in pairs. Any overlap has to be equal to 1. + int i = destination.Length - 1; + destination[i] = PorterDuffFunctions.SubtractSrc(background[i], source[i], Numerics.Clamp(amount[i], 0, 1F)); + } + } + else { - destination[i] = PorterDuffFunctions.SubtractSrc(background[i], source[i], Numerics.Clamp(amount[i], 0, 1)); + for (int i = 0; i < destination.Length; i++) + { + destination[i] = PorterDuffFunctions.SubtractSrc(background[i], source[i], Numerics.Clamp(amount[i], 0, 1F)); + } } } } @@ -195,18 +443,79 @@ internal static class DefaultPixelBlenders protected override void BlendFunction(Span destination, ReadOnlySpan background, ReadOnlySpan source, float amount) { amount = Numerics.Clamp(amount, 0, 1); - for (int i = 0; i < destination.Length; i++) + + if (Avx2.IsSupported && destination.Length >= 2) + { + // Divide by 2 as 4 elements per Vector4 and 8 per Vector256 + ref Vector256 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); + ref Vector256 destinationLast = ref Unsafe.Add(ref destinationBase, (IntPtr)((uint)destination.Length / 2u)); + + ref Vector256 backgroundBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(background)); + ref Vector256 sourceBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(source)); + Vector256 opacity = Vector256.Create(amount); + + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) + { + destinationBase = PorterDuffFunctions.ScreenSrc(backgroundBase, sourceBase, opacity); + destinationBase = ref Unsafe.Add(ref destinationBase, 1); + backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); + sourceBase = ref Unsafe.Add(ref sourceBase, 1); + } + + if (Numerics.Modulo2(destination.Length) != 0) + { + // Vector4 fits neatly in pairs. Any overlap has to be equal to 1. + int i = destination.Length - 1; + destination[i] = PorterDuffFunctions.ScreenSrc(background[i], source[i], amount); + } + } + else { - destination[i] = PorterDuffFunctions.ScreenSrc(background[i], source[i], amount); + for (int i = 0; i < destination.Length; i++) + { + destination[i] = PorterDuffFunctions.ScreenSrc(background[i], source[i], amount); + } } } /// protected override void BlendFunction(Span destination, ReadOnlySpan background, ReadOnlySpan source, ReadOnlySpan amount) { - for (int i = 0; i < destination.Length; i++) + if (Avx2.IsSupported && destination.Length >= 2) + { + // Divide by 2 as 4 elements per Vector4 and 8 per Vector256 + ref Vector256 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); + ref Vector256 destinationLast = ref Unsafe.Add(ref destinationBase, (IntPtr)((uint)destination.Length / 2u)); + + ref Vector256 backgroundBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(background)); + ref Vector256 sourceBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(source)); + ref float amountBase = ref MemoryMarshal.GetReference(amount); + + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) + { + // TODO: It would be better if we can clamp this outside of the loop using our SIMD methods. + Vector256 opacity = Vector256.Create(Numerics.Clamp(amountBase, 0, 1F)); + + destinationBase = PorterDuffFunctions.ScreenSrc(backgroundBase, sourceBase, opacity); + destinationBase = ref Unsafe.Add(ref destinationBase, 1); + backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); + sourceBase = ref Unsafe.Add(ref sourceBase, 1); + amountBase = ref Unsafe.Add(ref amountBase, 1); + } + + if (Numerics.Modulo2(destination.Length) != 0) + { + // Vector4 fits neatly in pairs. Any overlap has to be equal to 1. + int i = destination.Length - 1; + destination[i] = PorterDuffFunctions.ScreenSrc(background[i], source[i], Numerics.Clamp(amount[i], 0, 1F)); + } + } + else { - destination[i] = PorterDuffFunctions.ScreenSrc(background[i], source[i], Numerics.Clamp(amount[i], 0, 1)); + for (int i = 0; i < destination.Length; i++) + { + destination[i] = PorterDuffFunctions.ScreenSrc(background[i], source[i], Numerics.Clamp(amount[i], 0, 1F)); + } } } } @@ -233,18 +542,79 @@ internal static class DefaultPixelBlenders protected override void BlendFunction(Span destination, ReadOnlySpan background, ReadOnlySpan source, float amount) { amount = Numerics.Clamp(amount, 0, 1); - for (int i = 0; i < destination.Length; i++) + + if (Avx2.IsSupported && destination.Length >= 2) + { + // Divide by 2 as 4 elements per Vector4 and 8 per Vector256 + ref Vector256 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); + ref Vector256 destinationLast = ref Unsafe.Add(ref destinationBase, (IntPtr)((uint)destination.Length / 2u)); + + ref Vector256 backgroundBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(background)); + ref Vector256 sourceBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(source)); + Vector256 opacity = Vector256.Create(amount); + + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) + { + destinationBase = PorterDuffFunctions.DarkenSrc(backgroundBase, sourceBase, opacity); + destinationBase = ref Unsafe.Add(ref destinationBase, 1); + backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); + sourceBase = ref Unsafe.Add(ref sourceBase, 1); + } + + if (Numerics.Modulo2(destination.Length) != 0) + { + // Vector4 fits neatly in pairs. Any overlap has to be equal to 1. + int i = destination.Length - 1; + destination[i] = PorterDuffFunctions.DarkenSrc(background[i], source[i], amount); + } + } + else { - destination[i] = PorterDuffFunctions.DarkenSrc(background[i], source[i], amount); + for (int i = 0; i < destination.Length; i++) + { + destination[i] = PorterDuffFunctions.DarkenSrc(background[i], source[i], amount); + } } } /// protected override void BlendFunction(Span destination, ReadOnlySpan background, ReadOnlySpan source, ReadOnlySpan amount) { - for (int i = 0; i < destination.Length; i++) + if (Avx2.IsSupported && destination.Length >= 2) + { + // Divide by 2 as 4 elements per Vector4 and 8 per Vector256 + ref Vector256 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); + ref Vector256 destinationLast = ref Unsafe.Add(ref destinationBase, (IntPtr)((uint)destination.Length / 2u)); + + ref Vector256 backgroundBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(background)); + ref Vector256 sourceBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(source)); + ref float amountBase = ref MemoryMarshal.GetReference(amount); + + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) + { + // TODO: It would be better if we can clamp this outside of the loop using our SIMD methods. + Vector256 opacity = Vector256.Create(Numerics.Clamp(amountBase, 0, 1F)); + + destinationBase = PorterDuffFunctions.DarkenSrc(backgroundBase, sourceBase, opacity); + destinationBase = ref Unsafe.Add(ref destinationBase, 1); + backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); + sourceBase = ref Unsafe.Add(ref sourceBase, 1); + amountBase = ref Unsafe.Add(ref amountBase, 1); + } + + if (Numerics.Modulo2(destination.Length) != 0) + { + // Vector4 fits neatly in pairs. Any overlap has to be equal to 1. + int i = destination.Length - 1; + destination[i] = PorterDuffFunctions.DarkenSrc(background[i], source[i], Numerics.Clamp(amount[i], 0, 1F)); + } + } + else { - destination[i] = PorterDuffFunctions.DarkenSrc(background[i], source[i], Numerics.Clamp(amount[i], 0, 1)); + for (int i = 0; i < destination.Length; i++) + { + destination[i] = PorterDuffFunctions.DarkenSrc(background[i], source[i], Numerics.Clamp(amount[i], 0, 1F)); + } } } } @@ -271,18 +641,79 @@ internal static class DefaultPixelBlenders protected override void BlendFunction(Span destination, ReadOnlySpan background, ReadOnlySpan source, float amount) { amount = Numerics.Clamp(amount, 0, 1); - for (int i = 0; i < destination.Length; i++) + + if (Avx2.IsSupported && destination.Length >= 2) + { + // Divide by 2 as 4 elements per Vector4 and 8 per Vector256 + ref Vector256 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); + ref Vector256 destinationLast = ref Unsafe.Add(ref destinationBase, (IntPtr)((uint)destination.Length / 2u)); + + ref Vector256 backgroundBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(background)); + ref Vector256 sourceBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(source)); + Vector256 opacity = Vector256.Create(amount); + + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) + { + destinationBase = PorterDuffFunctions.LightenSrc(backgroundBase, sourceBase, opacity); + destinationBase = ref Unsafe.Add(ref destinationBase, 1); + backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); + sourceBase = ref Unsafe.Add(ref sourceBase, 1); + } + + if (Numerics.Modulo2(destination.Length) != 0) + { + // Vector4 fits neatly in pairs. Any overlap has to be equal to 1. + int i = destination.Length - 1; + destination[i] = PorterDuffFunctions.LightenSrc(background[i], source[i], amount); + } + } + else { - destination[i] = PorterDuffFunctions.LightenSrc(background[i], source[i], amount); + for (int i = 0; i < destination.Length; i++) + { + destination[i] = PorterDuffFunctions.LightenSrc(background[i], source[i], amount); + } } } /// protected override void BlendFunction(Span destination, ReadOnlySpan background, ReadOnlySpan source, ReadOnlySpan amount) { - for (int i = 0; i < destination.Length; i++) + if (Avx2.IsSupported && destination.Length >= 2) + { + // Divide by 2 as 4 elements per Vector4 and 8 per Vector256 + ref Vector256 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); + ref Vector256 destinationLast = ref Unsafe.Add(ref destinationBase, (IntPtr)((uint)destination.Length / 2u)); + + ref Vector256 backgroundBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(background)); + ref Vector256 sourceBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(source)); + ref float amountBase = ref MemoryMarshal.GetReference(amount); + + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) + { + // TODO: It would be better if we can clamp this outside of the loop using our SIMD methods. + Vector256 opacity = Vector256.Create(Numerics.Clamp(amountBase, 0, 1F)); + + destinationBase = PorterDuffFunctions.LightenSrc(backgroundBase, sourceBase, opacity); + destinationBase = ref Unsafe.Add(ref destinationBase, 1); + backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); + sourceBase = ref Unsafe.Add(ref sourceBase, 1); + amountBase = ref Unsafe.Add(ref amountBase, 1); + } + + if (Numerics.Modulo2(destination.Length) != 0) + { + // Vector4 fits neatly in pairs. Any overlap has to be equal to 1. + int i = destination.Length - 1; + destination[i] = PorterDuffFunctions.LightenSrc(background[i], source[i], Numerics.Clamp(amount[i], 0, 1F)); + } + } + else { - destination[i] = PorterDuffFunctions.LightenSrc(background[i], source[i], Numerics.Clamp(amount[i], 0, 1)); + for (int i = 0; i < destination.Length; i++) + { + destination[i] = PorterDuffFunctions.LightenSrc(background[i], source[i], Numerics.Clamp(amount[i], 0, 1F)); + } } } } @@ -309,18 +740,79 @@ internal static class DefaultPixelBlenders protected override void BlendFunction(Span destination, ReadOnlySpan background, ReadOnlySpan source, float amount) { amount = Numerics.Clamp(amount, 0, 1); - for (int i = 0; i < destination.Length; i++) + + if (Avx2.IsSupported && destination.Length >= 2) + { + // Divide by 2 as 4 elements per Vector4 and 8 per Vector256 + ref Vector256 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); + ref Vector256 destinationLast = ref Unsafe.Add(ref destinationBase, (IntPtr)((uint)destination.Length / 2u)); + + ref Vector256 backgroundBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(background)); + ref Vector256 sourceBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(source)); + Vector256 opacity = Vector256.Create(amount); + + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) + { + destinationBase = PorterDuffFunctions.OverlaySrc(backgroundBase, sourceBase, opacity); + destinationBase = ref Unsafe.Add(ref destinationBase, 1); + backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); + sourceBase = ref Unsafe.Add(ref sourceBase, 1); + } + + if (Numerics.Modulo2(destination.Length) != 0) + { + // Vector4 fits neatly in pairs. Any overlap has to be equal to 1. + int i = destination.Length - 1; + destination[i] = PorterDuffFunctions.OverlaySrc(background[i], source[i], amount); + } + } + else { - destination[i] = PorterDuffFunctions.OverlaySrc(background[i], source[i], amount); + for (int i = 0; i < destination.Length; i++) + { + destination[i] = PorterDuffFunctions.OverlaySrc(background[i], source[i], amount); + } } } /// protected override void BlendFunction(Span destination, ReadOnlySpan background, ReadOnlySpan source, ReadOnlySpan amount) { - for (int i = 0; i < destination.Length; i++) + if (Avx2.IsSupported && destination.Length >= 2) + { + // Divide by 2 as 4 elements per Vector4 and 8 per Vector256 + ref Vector256 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); + ref Vector256 destinationLast = ref Unsafe.Add(ref destinationBase, (IntPtr)((uint)destination.Length / 2u)); + + ref Vector256 backgroundBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(background)); + ref Vector256 sourceBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(source)); + ref float amountBase = ref MemoryMarshal.GetReference(amount); + + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) + { + // TODO: It would be better if we can clamp this outside of the loop using our SIMD methods. + Vector256 opacity = Vector256.Create(Numerics.Clamp(amountBase, 0, 1F)); + + destinationBase = PorterDuffFunctions.OverlaySrc(backgroundBase, sourceBase, opacity); + destinationBase = ref Unsafe.Add(ref destinationBase, 1); + backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); + sourceBase = ref Unsafe.Add(ref sourceBase, 1); + amountBase = ref Unsafe.Add(ref amountBase, 1); + } + + if (Numerics.Modulo2(destination.Length) != 0) + { + // Vector4 fits neatly in pairs. Any overlap has to be equal to 1. + int i = destination.Length - 1; + destination[i] = PorterDuffFunctions.OverlaySrc(background[i], source[i], Numerics.Clamp(amount[i], 0, 1F)); + } + } + else { - destination[i] = PorterDuffFunctions.OverlaySrc(background[i], source[i], Numerics.Clamp(amount[i], 0, 1)); + for (int i = 0; i < destination.Length; i++) + { + destination[i] = PorterDuffFunctions.OverlaySrc(background[i], source[i], Numerics.Clamp(amount[i], 0, 1F)); + } } } } @@ -347,18 +839,79 @@ internal static class DefaultPixelBlenders protected override void BlendFunction(Span destination, ReadOnlySpan background, ReadOnlySpan source, float amount) { amount = Numerics.Clamp(amount, 0, 1); - for (int i = 0; i < destination.Length; i++) + + if (Avx2.IsSupported && destination.Length >= 2) + { + // Divide by 2 as 4 elements per Vector4 and 8 per Vector256 + ref Vector256 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); + ref Vector256 destinationLast = ref Unsafe.Add(ref destinationBase, (IntPtr)((uint)destination.Length / 2u)); + + ref Vector256 backgroundBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(background)); + ref Vector256 sourceBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(source)); + Vector256 opacity = Vector256.Create(amount); + + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) + { + destinationBase = PorterDuffFunctions.HardLightSrc(backgroundBase, sourceBase, opacity); + destinationBase = ref Unsafe.Add(ref destinationBase, 1); + backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); + sourceBase = ref Unsafe.Add(ref sourceBase, 1); + } + + if (Numerics.Modulo2(destination.Length) != 0) + { + // Vector4 fits neatly in pairs. Any overlap has to be equal to 1. + int i = destination.Length - 1; + destination[i] = PorterDuffFunctions.HardLightSrc(background[i], source[i], amount); + } + } + else { - destination[i] = PorterDuffFunctions.HardLightSrc(background[i], source[i], amount); + for (int i = 0; i < destination.Length; i++) + { + destination[i] = PorterDuffFunctions.HardLightSrc(background[i], source[i], amount); + } } } /// protected override void BlendFunction(Span destination, ReadOnlySpan background, ReadOnlySpan source, ReadOnlySpan amount) { - for (int i = 0; i < destination.Length; i++) + if (Avx2.IsSupported && destination.Length >= 2) + { + // Divide by 2 as 4 elements per Vector4 and 8 per Vector256 + ref Vector256 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); + ref Vector256 destinationLast = ref Unsafe.Add(ref destinationBase, (IntPtr)((uint)destination.Length / 2u)); + + ref Vector256 backgroundBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(background)); + ref Vector256 sourceBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(source)); + ref float amountBase = ref MemoryMarshal.GetReference(amount); + + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) + { + // TODO: It would be better if we can clamp this outside of the loop using our SIMD methods. + Vector256 opacity = Vector256.Create(Numerics.Clamp(amountBase, 0, 1F)); + + destinationBase = PorterDuffFunctions.HardLightSrc(backgroundBase, sourceBase, opacity); + destinationBase = ref Unsafe.Add(ref destinationBase, 1); + backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); + sourceBase = ref Unsafe.Add(ref sourceBase, 1); + amountBase = ref Unsafe.Add(ref amountBase, 1); + } + + if (Numerics.Modulo2(destination.Length) != 0) + { + // Vector4 fits neatly in pairs. Any overlap has to be equal to 1. + int i = destination.Length - 1; + destination[i] = PorterDuffFunctions.HardLightSrc(background[i], source[i], Numerics.Clamp(amount[i], 0, 1F)); + } + } + else { - destination[i] = PorterDuffFunctions.HardLightSrc(background[i], source[i], Numerics.Clamp(amount[i], 0, 1)); + for (int i = 0; i < destination.Length; i++) + { + destination[i] = PorterDuffFunctions.HardLightSrc(background[i], source[i], Numerics.Clamp(amount[i], 0, 1F)); + } } } } @@ -385,18 +938,79 @@ internal static class DefaultPixelBlenders protected override void BlendFunction(Span destination, ReadOnlySpan background, ReadOnlySpan source, float amount) { amount = Numerics.Clamp(amount, 0, 1); - for (int i = 0; i < destination.Length; i++) + + if (Avx2.IsSupported && destination.Length >= 2) + { + // Divide by 2 as 4 elements per Vector4 and 8 per Vector256 + ref Vector256 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); + ref Vector256 destinationLast = ref Unsafe.Add(ref destinationBase, (IntPtr)((uint)destination.Length / 2u)); + + ref Vector256 backgroundBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(background)); + ref Vector256 sourceBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(source)); + Vector256 opacity = Vector256.Create(amount); + + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) + { + destinationBase = PorterDuffFunctions.NormalSrcAtop(backgroundBase, sourceBase, opacity); + destinationBase = ref Unsafe.Add(ref destinationBase, 1); + backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); + sourceBase = ref Unsafe.Add(ref sourceBase, 1); + } + + if (Numerics.Modulo2(destination.Length) != 0) + { + // Vector4 fits neatly in pairs. Any overlap has to be equal to 1. + int i = destination.Length - 1; + destination[i] = PorterDuffFunctions.NormalSrcAtop(background[i], source[i], amount); + } + } + else { - destination[i] = PorterDuffFunctions.NormalSrcAtop(background[i], source[i], amount); + for (int i = 0; i < destination.Length; i++) + { + destination[i] = PorterDuffFunctions.NormalSrcAtop(background[i], source[i], amount); + } } } /// protected override void BlendFunction(Span destination, ReadOnlySpan background, ReadOnlySpan source, ReadOnlySpan amount) { - for (int i = 0; i < destination.Length; i++) + if (Avx2.IsSupported && destination.Length >= 2) + { + // Divide by 2 as 4 elements per Vector4 and 8 per Vector256 + ref Vector256 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); + ref Vector256 destinationLast = ref Unsafe.Add(ref destinationBase, (IntPtr)((uint)destination.Length / 2u)); + + ref Vector256 backgroundBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(background)); + ref Vector256 sourceBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(source)); + ref float amountBase = ref MemoryMarshal.GetReference(amount); + + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) + { + // TODO: It would be better if we can clamp this outside of the loop using our SIMD methods. + Vector256 opacity = Vector256.Create(Numerics.Clamp(amountBase, 0, 1F)); + + destinationBase = PorterDuffFunctions.NormalSrcAtop(backgroundBase, sourceBase, opacity); + destinationBase = ref Unsafe.Add(ref destinationBase, 1); + backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); + sourceBase = ref Unsafe.Add(ref sourceBase, 1); + amountBase = ref Unsafe.Add(ref amountBase, 1); + } + + if (Numerics.Modulo2(destination.Length) != 0) + { + // Vector4 fits neatly in pairs. Any overlap has to be equal to 1. + int i = destination.Length - 1; + destination[i] = PorterDuffFunctions.NormalSrcAtop(background[i], source[i], Numerics.Clamp(amount[i], 0, 1F)); + } + } + else { - destination[i] = PorterDuffFunctions.NormalSrcAtop(background[i], source[i], Numerics.Clamp(amount[i], 0, 1)); + for (int i = 0; i < destination.Length; i++) + { + destination[i] = PorterDuffFunctions.NormalSrcAtop(background[i], source[i], Numerics.Clamp(amount[i], 0, 1F)); + } } } } @@ -423,18 +1037,79 @@ internal static class DefaultPixelBlenders protected override void BlendFunction(Span destination, ReadOnlySpan background, ReadOnlySpan source, float amount) { amount = Numerics.Clamp(amount, 0, 1); - for (int i = 0; i < destination.Length; i++) + + if (Avx2.IsSupported && destination.Length >= 2) + { + // Divide by 2 as 4 elements per Vector4 and 8 per Vector256 + ref Vector256 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); + ref Vector256 destinationLast = ref Unsafe.Add(ref destinationBase, (IntPtr)((uint)destination.Length / 2u)); + + ref Vector256 backgroundBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(background)); + ref Vector256 sourceBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(source)); + Vector256 opacity = Vector256.Create(amount); + + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) + { + destinationBase = PorterDuffFunctions.MultiplySrcAtop(backgroundBase, sourceBase, opacity); + destinationBase = ref Unsafe.Add(ref destinationBase, 1); + backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); + sourceBase = ref Unsafe.Add(ref sourceBase, 1); + } + + if (Numerics.Modulo2(destination.Length) != 0) + { + // Vector4 fits neatly in pairs. Any overlap has to be equal to 1. + int i = destination.Length - 1; + destination[i] = PorterDuffFunctions.MultiplySrcAtop(background[i], source[i], amount); + } + } + else { - destination[i] = PorterDuffFunctions.MultiplySrcAtop(background[i], source[i], amount); + for (int i = 0; i < destination.Length; i++) + { + destination[i] = PorterDuffFunctions.MultiplySrcAtop(background[i], source[i], amount); + } } } /// protected override void BlendFunction(Span destination, ReadOnlySpan background, ReadOnlySpan source, ReadOnlySpan amount) { - for (int i = 0; i < destination.Length; i++) + if (Avx2.IsSupported && destination.Length >= 2) + { + // Divide by 2 as 4 elements per Vector4 and 8 per Vector256 + ref Vector256 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); + ref Vector256 destinationLast = ref Unsafe.Add(ref destinationBase, (IntPtr)((uint)destination.Length / 2u)); + + ref Vector256 backgroundBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(background)); + ref Vector256 sourceBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(source)); + ref float amountBase = ref MemoryMarshal.GetReference(amount); + + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) + { + // TODO: It would be better if we can clamp this outside of the loop using our SIMD methods. + Vector256 opacity = Vector256.Create(Numerics.Clamp(amountBase, 0, 1F)); + + destinationBase = PorterDuffFunctions.MultiplySrcAtop(backgroundBase, sourceBase, opacity); + destinationBase = ref Unsafe.Add(ref destinationBase, 1); + backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); + sourceBase = ref Unsafe.Add(ref sourceBase, 1); + amountBase = ref Unsafe.Add(ref amountBase, 1); + } + + if (Numerics.Modulo2(destination.Length) != 0) + { + // Vector4 fits neatly in pairs. Any overlap has to be equal to 1. + int i = destination.Length - 1; + destination[i] = PorterDuffFunctions.MultiplySrcAtop(background[i], source[i], Numerics.Clamp(amount[i], 0, 1F)); + } + } + else { - destination[i] = PorterDuffFunctions.MultiplySrcAtop(background[i], source[i], Numerics.Clamp(amount[i], 0, 1)); + for (int i = 0; i < destination.Length; i++) + { + destination[i] = PorterDuffFunctions.MultiplySrcAtop(background[i], source[i], Numerics.Clamp(amount[i], 0, 1F)); + } } } } @@ -461,18 +1136,79 @@ internal static class DefaultPixelBlenders protected override void BlendFunction(Span destination, ReadOnlySpan background, ReadOnlySpan source, float amount) { amount = Numerics.Clamp(amount, 0, 1); - for (int i = 0; i < destination.Length; i++) + + if (Avx2.IsSupported && destination.Length >= 2) + { + // Divide by 2 as 4 elements per Vector4 and 8 per Vector256 + ref Vector256 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); + ref Vector256 destinationLast = ref Unsafe.Add(ref destinationBase, (IntPtr)((uint)destination.Length / 2u)); + + ref Vector256 backgroundBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(background)); + ref Vector256 sourceBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(source)); + Vector256 opacity = Vector256.Create(amount); + + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) + { + destinationBase = PorterDuffFunctions.AddSrcAtop(backgroundBase, sourceBase, opacity); + destinationBase = ref Unsafe.Add(ref destinationBase, 1); + backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); + sourceBase = ref Unsafe.Add(ref sourceBase, 1); + } + + if (Numerics.Modulo2(destination.Length) != 0) + { + // Vector4 fits neatly in pairs. Any overlap has to be equal to 1. + int i = destination.Length - 1; + destination[i] = PorterDuffFunctions.AddSrcAtop(background[i], source[i], amount); + } + } + else { - destination[i] = PorterDuffFunctions.AddSrcAtop(background[i], source[i], amount); + for (int i = 0; i < destination.Length; i++) + { + destination[i] = PorterDuffFunctions.AddSrcAtop(background[i], source[i], amount); + } } } /// protected override void BlendFunction(Span destination, ReadOnlySpan background, ReadOnlySpan source, ReadOnlySpan amount) { - for (int i = 0; i < destination.Length; i++) + if (Avx2.IsSupported && destination.Length >= 2) + { + // Divide by 2 as 4 elements per Vector4 and 8 per Vector256 + ref Vector256 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); + ref Vector256 destinationLast = ref Unsafe.Add(ref destinationBase, (IntPtr)((uint)destination.Length / 2u)); + + ref Vector256 backgroundBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(background)); + ref Vector256 sourceBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(source)); + ref float amountBase = ref MemoryMarshal.GetReference(amount); + + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) + { + // TODO: It would be better if we can clamp this outside of the loop using our SIMD methods. + Vector256 opacity = Vector256.Create(Numerics.Clamp(amountBase, 0, 1F)); + + destinationBase = PorterDuffFunctions.AddSrcAtop(backgroundBase, sourceBase, opacity); + destinationBase = ref Unsafe.Add(ref destinationBase, 1); + backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); + sourceBase = ref Unsafe.Add(ref sourceBase, 1); + amountBase = ref Unsafe.Add(ref amountBase, 1); + } + + if (Numerics.Modulo2(destination.Length) != 0) + { + // Vector4 fits neatly in pairs. Any overlap has to be equal to 1. + int i = destination.Length - 1; + destination[i] = PorterDuffFunctions.AddSrcAtop(background[i], source[i], Numerics.Clamp(amount[i], 0, 1F)); + } + } + else { - destination[i] = PorterDuffFunctions.AddSrcAtop(background[i], source[i], Numerics.Clamp(amount[i], 0, 1)); + for (int i = 0; i < destination.Length; i++) + { + destination[i] = PorterDuffFunctions.AddSrcAtop(background[i], source[i], Numerics.Clamp(amount[i], 0, 1F)); + } } } } @@ -499,18 +1235,79 @@ internal static class DefaultPixelBlenders protected override void BlendFunction(Span destination, ReadOnlySpan background, ReadOnlySpan source, float amount) { amount = Numerics.Clamp(amount, 0, 1); - for (int i = 0; i < destination.Length; i++) + + if (Avx2.IsSupported && destination.Length >= 2) + { + // Divide by 2 as 4 elements per Vector4 and 8 per Vector256 + ref Vector256 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); + ref Vector256 destinationLast = ref Unsafe.Add(ref destinationBase, (IntPtr)((uint)destination.Length / 2u)); + + ref Vector256 backgroundBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(background)); + ref Vector256 sourceBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(source)); + Vector256 opacity = Vector256.Create(amount); + + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) + { + destinationBase = PorterDuffFunctions.SubtractSrcAtop(backgroundBase, sourceBase, opacity); + destinationBase = ref Unsafe.Add(ref destinationBase, 1); + backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); + sourceBase = ref Unsafe.Add(ref sourceBase, 1); + } + + if (Numerics.Modulo2(destination.Length) != 0) + { + // Vector4 fits neatly in pairs. Any overlap has to be equal to 1. + int i = destination.Length - 1; + destination[i] = PorterDuffFunctions.SubtractSrcAtop(background[i], source[i], amount); + } + } + else { - destination[i] = PorterDuffFunctions.SubtractSrcAtop(background[i], source[i], amount); + for (int i = 0; i < destination.Length; i++) + { + destination[i] = PorterDuffFunctions.SubtractSrcAtop(background[i], source[i], amount); + } } } /// protected override void BlendFunction(Span destination, ReadOnlySpan background, ReadOnlySpan source, ReadOnlySpan amount) { - for (int i = 0; i < destination.Length; i++) + if (Avx2.IsSupported && destination.Length >= 2) + { + // Divide by 2 as 4 elements per Vector4 and 8 per Vector256 + ref Vector256 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); + ref Vector256 destinationLast = ref Unsafe.Add(ref destinationBase, (IntPtr)((uint)destination.Length / 2u)); + + ref Vector256 backgroundBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(background)); + ref Vector256 sourceBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(source)); + ref float amountBase = ref MemoryMarshal.GetReference(amount); + + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) + { + // TODO: It would be better if we can clamp this outside of the loop using our SIMD methods. + Vector256 opacity = Vector256.Create(Numerics.Clamp(amountBase, 0, 1F)); + + destinationBase = PorterDuffFunctions.SubtractSrcAtop(backgroundBase, sourceBase, opacity); + destinationBase = ref Unsafe.Add(ref destinationBase, 1); + backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); + sourceBase = ref Unsafe.Add(ref sourceBase, 1); + amountBase = ref Unsafe.Add(ref amountBase, 1); + } + + if (Numerics.Modulo2(destination.Length) != 0) + { + // Vector4 fits neatly in pairs. Any overlap has to be equal to 1. + int i = destination.Length - 1; + destination[i] = PorterDuffFunctions.SubtractSrcAtop(background[i], source[i], Numerics.Clamp(amount[i], 0, 1F)); + } + } + else { - destination[i] = PorterDuffFunctions.SubtractSrcAtop(background[i], source[i], Numerics.Clamp(amount[i], 0, 1)); + for (int i = 0; i < destination.Length; i++) + { + destination[i] = PorterDuffFunctions.SubtractSrcAtop(background[i], source[i], Numerics.Clamp(amount[i], 0, 1F)); + } } } } @@ -537,18 +1334,79 @@ internal static class DefaultPixelBlenders protected override void BlendFunction(Span destination, ReadOnlySpan background, ReadOnlySpan source, float amount) { amount = Numerics.Clamp(amount, 0, 1); - for (int i = 0; i < destination.Length; i++) + + if (Avx2.IsSupported && destination.Length >= 2) + { + // Divide by 2 as 4 elements per Vector4 and 8 per Vector256 + ref Vector256 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); + ref Vector256 destinationLast = ref Unsafe.Add(ref destinationBase, (IntPtr)((uint)destination.Length / 2u)); + + ref Vector256 backgroundBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(background)); + ref Vector256 sourceBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(source)); + Vector256 opacity = Vector256.Create(amount); + + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) + { + destinationBase = PorterDuffFunctions.ScreenSrcAtop(backgroundBase, sourceBase, opacity); + destinationBase = ref Unsafe.Add(ref destinationBase, 1); + backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); + sourceBase = ref Unsafe.Add(ref sourceBase, 1); + } + + if (Numerics.Modulo2(destination.Length) != 0) + { + // Vector4 fits neatly in pairs. Any overlap has to be equal to 1. + int i = destination.Length - 1; + destination[i] = PorterDuffFunctions.ScreenSrcAtop(background[i], source[i], amount); + } + } + else { - destination[i] = PorterDuffFunctions.ScreenSrcAtop(background[i], source[i], amount); + for (int i = 0; i < destination.Length; i++) + { + destination[i] = PorterDuffFunctions.ScreenSrcAtop(background[i], source[i], amount); + } } } /// protected override void BlendFunction(Span destination, ReadOnlySpan background, ReadOnlySpan source, ReadOnlySpan amount) { - for (int i = 0; i < destination.Length; i++) + if (Avx2.IsSupported && destination.Length >= 2) + { + // Divide by 2 as 4 elements per Vector4 and 8 per Vector256 + ref Vector256 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); + ref Vector256 destinationLast = ref Unsafe.Add(ref destinationBase, (IntPtr)((uint)destination.Length / 2u)); + + ref Vector256 backgroundBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(background)); + ref Vector256 sourceBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(source)); + ref float amountBase = ref MemoryMarshal.GetReference(amount); + + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) + { + // TODO: It would be better if we can clamp this outside of the loop using our SIMD methods. + Vector256 opacity = Vector256.Create(Numerics.Clamp(amountBase, 0, 1F)); + + destinationBase = PorterDuffFunctions.ScreenSrcAtop(backgroundBase, sourceBase, opacity); + destinationBase = ref Unsafe.Add(ref destinationBase, 1); + backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); + sourceBase = ref Unsafe.Add(ref sourceBase, 1); + amountBase = ref Unsafe.Add(ref amountBase, 1); + } + + if (Numerics.Modulo2(destination.Length) != 0) + { + // Vector4 fits neatly in pairs. Any overlap has to be equal to 1. + int i = destination.Length - 1; + destination[i] = PorterDuffFunctions.ScreenSrcAtop(background[i], source[i], Numerics.Clamp(amount[i], 0, 1F)); + } + } + else { - destination[i] = PorterDuffFunctions.ScreenSrcAtop(background[i], source[i], Numerics.Clamp(amount[i], 0, 1)); + for (int i = 0; i < destination.Length; i++) + { + destination[i] = PorterDuffFunctions.ScreenSrcAtop(background[i], source[i], Numerics.Clamp(amount[i], 0, 1F)); + } } } } @@ -575,18 +1433,79 @@ internal static class DefaultPixelBlenders protected override void BlendFunction(Span destination, ReadOnlySpan background, ReadOnlySpan source, float amount) { amount = Numerics.Clamp(amount, 0, 1); - for (int i = 0; i < destination.Length; i++) + + if (Avx2.IsSupported && destination.Length >= 2) + { + // Divide by 2 as 4 elements per Vector4 and 8 per Vector256 + ref Vector256 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); + ref Vector256 destinationLast = ref Unsafe.Add(ref destinationBase, (IntPtr)((uint)destination.Length / 2u)); + + ref Vector256 backgroundBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(background)); + ref Vector256 sourceBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(source)); + Vector256 opacity = Vector256.Create(amount); + + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) + { + destinationBase = PorterDuffFunctions.DarkenSrcAtop(backgroundBase, sourceBase, opacity); + destinationBase = ref Unsafe.Add(ref destinationBase, 1); + backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); + sourceBase = ref Unsafe.Add(ref sourceBase, 1); + } + + if (Numerics.Modulo2(destination.Length) != 0) + { + // Vector4 fits neatly in pairs. Any overlap has to be equal to 1. + int i = destination.Length - 1; + destination[i] = PorterDuffFunctions.DarkenSrcAtop(background[i], source[i], amount); + } + } + else { - destination[i] = PorterDuffFunctions.DarkenSrcAtop(background[i], source[i], amount); + for (int i = 0; i < destination.Length; i++) + { + destination[i] = PorterDuffFunctions.DarkenSrcAtop(background[i], source[i], amount); + } } } /// protected override void BlendFunction(Span destination, ReadOnlySpan background, ReadOnlySpan source, ReadOnlySpan amount) { - for (int i = 0; i < destination.Length; i++) + if (Avx2.IsSupported && destination.Length >= 2) + { + // Divide by 2 as 4 elements per Vector4 and 8 per Vector256 + ref Vector256 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); + ref Vector256 destinationLast = ref Unsafe.Add(ref destinationBase, (IntPtr)((uint)destination.Length / 2u)); + + ref Vector256 backgroundBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(background)); + ref Vector256 sourceBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(source)); + ref float amountBase = ref MemoryMarshal.GetReference(amount); + + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) + { + // TODO: It would be better if we can clamp this outside of the loop using our SIMD methods. + Vector256 opacity = Vector256.Create(Numerics.Clamp(amountBase, 0, 1F)); + + destinationBase = PorterDuffFunctions.DarkenSrcAtop(backgroundBase, sourceBase, opacity); + destinationBase = ref Unsafe.Add(ref destinationBase, 1); + backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); + sourceBase = ref Unsafe.Add(ref sourceBase, 1); + amountBase = ref Unsafe.Add(ref amountBase, 1); + } + + if (Numerics.Modulo2(destination.Length) != 0) + { + // Vector4 fits neatly in pairs. Any overlap has to be equal to 1. + int i = destination.Length - 1; + destination[i] = PorterDuffFunctions.DarkenSrcAtop(background[i], source[i], Numerics.Clamp(amount[i], 0, 1F)); + } + } + else { - destination[i] = PorterDuffFunctions.DarkenSrcAtop(background[i], source[i], Numerics.Clamp(amount[i], 0, 1)); + for (int i = 0; i < destination.Length; i++) + { + destination[i] = PorterDuffFunctions.DarkenSrcAtop(background[i], source[i], Numerics.Clamp(amount[i], 0, 1F)); + } } } } @@ -613,18 +1532,79 @@ internal static class DefaultPixelBlenders protected override void BlendFunction(Span destination, ReadOnlySpan background, ReadOnlySpan source, float amount) { amount = Numerics.Clamp(amount, 0, 1); - for (int i = 0; i < destination.Length; i++) + + if (Avx2.IsSupported && destination.Length >= 2) + { + // Divide by 2 as 4 elements per Vector4 and 8 per Vector256 + ref Vector256 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); + ref Vector256 destinationLast = ref Unsafe.Add(ref destinationBase, (IntPtr)((uint)destination.Length / 2u)); + + ref Vector256 backgroundBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(background)); + ref Vector256 sourceBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(source)); + Vector256 opacity = Vector256.Create(amount); + + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) + { + destinationBase = PorterDuffFunctions.LightenSrcAtop(backgroundBase, sourceBase, opacity); + destinationBase = ref Unsafe.Add(ref destinationBase, 1); + backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); + sourceBase = ref Unsafe.Add(ref sourceBase, 1); + } + + if (Numerics.Modulo2(destination.Length) != 0) + { + // Vector4 fits neatly in pairs. Any overlap has to be equal to 1. + int i = destination.Length - 1; + destination[i] = PorterDuffFunctions.LightenSrcAtop(background[i], source[i], amount); + } + } + else { - destination[i] = PorterDuffFunctions.LightenSrcAtop(background[i], source[i], amount); + for (int i = 0; i < destination.Length; i++) + { + destination[i] = PorterDuffFunctions.LightenSrcAtop(background[i], source[i], amount); + } } } /// protected override void BlendFunction(Span destination, ReadOnlySpan background, ReadOnlySpan source, ReadOnlySpan amount) { - for (int i = 0; i < destination.Length; i++) + if (Avx2.IsSupported && destination.Length >= 2) + { + // Divide by 2 as 4 elements per Vector4 and 8 per Vector256 + ref Vector256 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); + ref Vector256 destinationLast = ref Unsafe.Add(ref destinationBase, (IntPtr)((uint)destination.Length / 2u)); + + ref Vector256 backgroundBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(background)); + ref Vector256 sourceBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(source)); + ref float amountBase = ref MemoryMarshal.GetReference(amount); + + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) + { + // TODO: It would be better if we can clamp this outside of the loop using our SIMD methods. + Vector256 opacity = Vector256.Create(Numerics.Clamp(amountBase, 0, 1F)); + + destinationBase = PorterDuffFunctions.LightenSrcAtop(backgroundBase, sourceBase, opacity); + destinationBase = ref Unsafe.Add(ref destinationBase, 1); + backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); + sourceBase = ref Unsafe.Add(ref sourceBase, 1); + amountBase = ref Unsafe.Add(ref amountBase, 1); + } + + if (Numerics.Modulo2(destination.Length) != 0) + { + // Vector4 fits neatly in pairs. Any overlap has to be equal to 1. + int i = destination.Length - 1; + destination[i] = PorterDuffFunctions.LightenSrcAtop(background[i], source[i], Numerics.Clamp(amount[i], 0, 1F)); + } + } + else { - destination[i] = PorterDuffFunctions.LightenSrcAtop(background[i], source[i], Numerics.Clamp(amount[i], 0, 1)); + for (int i = 0; i < destination.Length; i++) + { + destination[i] = PorterDuffFunctions.LightenSrcAtop(background[i], source[i], Numerics.Clamp(amount[i], 0, 1F)); + } } } } @@ -651,18 +1631,79 @@ internal static class DefaultPixelBlenders protected override void BlendFunction(Span destination, ReadOnlySpan background, ReadOnlySpan source, float amount) { amount = Numerics.Clamp(amount, 0, 1); - for (int i = 0; i < destination.Length; i++) + + if (Avx2.IsSupported && destination.Length >= 2) + { + // Divide by 2 as 4 elements per Vector4 and 8 per Vector256 + ref Vector256 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); + ref Vector256 destinationLast = ref Unsafe.Add(ref destinationBase, (IntPtr)((uint)destination.Length / 2u)); + + ref Vector256 backgroundBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(background)); + ref Vector256 sourceBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(source)); + Vector256 opacity = Vector256.Create(amount); + + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) + { + destinationBase = PorterDuffFunctions.OverlaySrcAtop(backgroundBase, sourceBase, opacity); + destinationBase = ref Unsafe.Add(ref destinationBase, 1); + backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); + sourceBase = ref Unsafe.Add(ref sourceBase, 1); + } + + if (Numerics.Modulo2(destination.Length) != 0) + { + // Vector4 fits neatly in pairs. Any overlap has to be equal to 1. + int i = destination.Length - 1; + destination[i] = PorterDuffFunctions.OverlaySrcAtop(background[i], source[i], amount); + } + } + else { - destination[i] = PorterDuffFunctions.OverlaySrcAtop(background[i], source[i], amount); + for (int i = 0; i < destination.Length; i++) + { + destination[i] = PorterDuffFunctions.OverlaySrcAtop(background[i], source[i], amount); + } } } /// protected override void BlendFunction(Span destination, ReadOnlySpan background, ReadOnlySpan source, ReadOnlySpan amount) { - for (int i = 0; i < destination.Length; i++) + if (Avx2.IsSupported && destination.Length >= 2) + { + // Divide by 2 as 4 elements per Vector4 and 8 per Vector256 + ref Vector256 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); + ref Vector256 destinationLast = ref Unsafe.Add(ref destinationBase, (IntPtr)((uint)destination.Length / 2u)); + + ref Vector256 backgroundBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(background)); + ref Vector256 sourceBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(source)); + ref float amountBase = ref MemoryMarshal.GetReference(amount); + + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) + { + // TODO: It would be better if we can clamp this outside of the loop using our SIMD methods. + Vector256 opacity = Vector256.Create(Numerics.Clamp(amountBase, 0, 1F)); + + destinationBase = PorterDuffFunctions.OverlaySrcAtop(backgroundBase, sourceBase, opacity); + destinationBase = ref Unsafe.Add(ref destinationBase, 1); + backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); + sourceBase = ref Unsafe.Add(ref sourceBase, 1); + amountBase = ref Unsafe.Add(ref amountBase, 1); + } + + if (Numerics.Modulo2(destination.Length) != 0) + { + // Vector4 fits neatly in pairs. Any overlap has to be equal to 1. + int i = destination.Length - 1; + destination[i] = PorterDuffFunctions.OverlaySrcAtop(background[i], source[i], Numerics.Clamp(amount[i], 0, 1F)); + } + } + else { - destination[i] = PorterDuffFunctions.OverlaySrcAtop(background[i], source[i], Numerics.Clamp(amount[i], 0, 1)); + for (int i = 0; i < destination.Length; i++) + { + destination[i] = PorterDuffFunctions.OverlaySrcAtop(background[i], source[i], Numerics.Clamp(amount[i], 0, 1F)); + } } } } @@ -689,18 +1730,79 @@ internal static class DefaultPixelBlenders protected override void BlendFunction(Span destination, ReadOnlySpan background, ReadOnlySpan source, float amount) { amount = Numerics.Clamp(amount, 0, 1); - for (int i = 0; i < destination.Length; i++) + + if (Avx2.IsSupported && destination.Length >= 2) + { + // Divide by 2 as 4 elements per Vector4 and 8 per Vector256 + ref Vector256 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); + ref Vector256 destinationLast = ref Unsafe.Add(ref destinationBase, (IntPtr)((uint)destination.Length / 2u)); + + ref Vector256 backgroundBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(background)); + ref Vector256 sourceBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(source)); + Vector256 opacity = Vector256.Create(amount); + + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) + { + destinationBase = PorterDuffFunctions.HardLightSrcAtop(backgroundBase, sourceBase, opacity); + destinationBase = ref Unsafe.Add(ref destinationBase, 1); + backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); + sourceBase = ref Unsafe.Add(ref sourceBase, 1); + } + + if (Numerics.Modulo2(destination.Length) != 0) + { + // Vector4 fits neatly in pairs. Any overlap has to be equal to 1. + int i = destination.Length - 1; + destination[i] = PorterDuffFunctions.HardLightSrcAtop(background[i], source[i], amount); + } + } + else { - destination[i] = PorterDuffFunctions.HardLightSrcAtop(background[i], source[i], amount); + for (int i = 0; i < destination.Length; i++) + { + destination[i] = PorterDuffFunctions.HardLightSrcAtop(background[i], source[i], amount); + } } } /// protected override void BlendFunction(Span destination, ReadOnlySpan background, ReadOnlySpan source, ReadOnlySpan amount) { - for (int i = 0; i < destination.Length; i++) + if (Avx2.IsSupported && destination.Length >= 2) + { + // Divide by 2 as 4 elements per Vector4 and 8 per Vector256 + ref Vector256 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); + ref Vector256 destinationLast = ref Unsafe.Add(ref destinationBase, (IntPtr)((uint)destination.Length / 2u)); + + ref Vector256 backgroundBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(background)); + ref Vector256 sourceBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(source)); + ref float amountBase = ref MemoryMarshal.GetReference(amount); + + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) + { + // TODO: It would be better if we can clamp this outside of the loop using our SIMD methods. + Vector256 opacity = Vector256.Create(Numerics.Clamp(amountBase, 0, 1F)); + + destinationBase = PorterDuffFunctions.HardLightSrcAtop(backgroundBase, sourceBase, opacity); + destinationBase = ref Unsafe.Add(ref destinationBase, 1); + backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); + sourceBase = ref Unsafe.Add(ref sourceBase, 1); + amountBase = ref Unsafe.Add(ref amountBase, 1); + } + + if (Numerics.Modulo2(destination.Length) != 0) + { + // Vector4 fits neatly in pairs. Any overlap has to be equal to 1. + int i = destination.Length - 1; + destination[i] = PorterDuffFunctions.HardLightSrcAtop(background[i], source[i], Numerics.Clamp(amount[i], 0, 1F)); + } + } + else { - destination[i] = PorterDuffFunctions.HardLightSrcAtop(background[i], source[i], Numerics.Clamp(amount[i], 0, 1)); + for (int i = 0; i < destination.Length; i++) + { + destination[i] = PorterDuffFunctions.HardLightSrcAtop(background[i], source[i], Numerics.Clamp(amount[i], 0, 1F)); + } } } } @@ -727,18 +1829,79 @@ internal static class DefaultPixelBlenders protected override void BlendFunction(Span destination, ReadOnlySpan background, ReadOnlySpan source, float amount) { amount = Numerics.Clamp(amount, 0, 1); - for (int i = 0; i < destination.Length; i++) + + if (Avx2.IsSupported && destination.Length >= 2) + { + // Divide by 2 as 4 elements per Vector4 and 8 per Vector256 + ref Vector256 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); + ref Vector256 destinationLast = ref Unsafe.Add(ref destinationBase, (IntPtr)((uint)destination.Length / 2u)); + + ref Vector256 backgroundBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(background)); + ref Vector256 sourceBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(source)); + Vector256 opacity = Vector256.Create(amount); + + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) + { + destinationBase = PorterDuffFunctions.NormalSrcOver(backgroundBase, sourceBase, opacity); + destinationBase = ref Unsafe.Add(ref destinationBase, 1); + backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); + sourceBase = ref Unsafe.Add(ref sourceBase, 1); + } + + if (Numerics.Modulo2(destination.Length) != 0) + { + // Vector4 fits neatly in pairs. Any overlap has to be equal to 1. + int i = destination.Length - 1; + destination[i] = PorterDuffFunctions.NormalSrcOver(background[i], source[i], amount); + } + } + else { - destination[i] = PorterDuffFunctions.NormalSrcOver(background[i], source[i], amount); + for (int i = 0; i < destination.Length; i++) + { + destination[i] = PorterDuffFunctions.NormalSrcOver(background[i], source[i], amount); + } } } /// protected override void BlendFunction(Span destination, ReadOnlySpan background, ReadOnlySpan source, ReadOnlySpan amount) { - for (int i = 0; i < destination.Length; i++) + if (Avx2.IsSupported && destination.Length >= 2) + { + // Divide by 2 as 4 elements per Vector4 and 8 per Vector256 + ref Vector256 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); + ref Vector256 destinationLast = ref Unsafe.Add(ref destinationBase, (IntPtr)((uint)destination.Length / 2u)); + + ref Vector256 backgroundBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(background)); + ref Vector256 sourceBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(source)); + ref float amountBase = ref MemoryMarshal.GetReference(amount); + + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) + { + // TODO: It would be better if we can clamp this outside of the loop using our SIMD methods. + Vector256 opacity = Vector256.Create(Numerics.Clamp(amountBase, 0, 1F)); + + destinationBase = PorterDuffFunctions.NormalSrcOver(backgroundBase, sourceBase, opacity); + destinationBase = ref Unsafe.Add(ref destinationBase, 1); + backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); + sourceBase = ref Unsafe.Add(ref sourceBase, 1); + amountBase = ref Unsafe.Add(ref amountBase, 1); + } + + if (Numerics.Modulo2(destination.Length) != 0) + { + // Vector4 fits neatly in pairs. Any overlap has to be equal to 1. + int i = destination.Length - 1; + destination[i] = PorterDuffFunctions.NormalSrcOver(background[i], source[i], Numerics.Clamp(amount[i], 0, 1F)); + } + } + else { - destination[i] = PorterDuffFunctions.NormalSrcOver(background[i], source[i], Numerics.Clamp(amount[i], 0, 1)); + for (int i = 0; i < destination.Length; i++) + { + destination[i] = PorterDuffFunctions.NormalSrcOver(background[i], source[i], Numerics.Clamp(amount[i], 0, 1F)); + } } } } @@ -765,18 +1928,79 @@ internal static class DefaultPixelBlenders protected override void BlendFunction(Span destination, ReadOnlySpan background, ReadOnlySpan source, float amount) { amount = Numerics.Clamp(amount, 0, 1); - for (int i = 0; i < destination.Length; i++) + + if (Avx2.IsSupported && destination.Length >= 2) + { + // Divide by 2 as 4 elements per Vector4 and 8 per Vector256 + ref Vector256 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); + ref Vector256 destinationLast = ref Unsafe.Add(ref destinationBase, (IntPtr)((uint)destination.Length / 2u)); + + ref Vector256 backgroundBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(background)); + ref Vector256 sourceBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(source)); + Vector256 opacity = Vector256.Create(amount); + + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) + { + destinationBase = PorterDuffFunctions.MultiplySrcOver(backgroundBase, sourceBase, opacity); + destinationBase = ref Unsafe.Add(ref destinationBase, 1); + backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); + sourceBase = ref Unsafe.Add(ref sourceBase, 1); + } + + if (Numerics.Modulo2(destination.Length) != 0) + { + // Vector4 fits neatly in pairs. Any overlap has to be equal to 1. + int i = destination.Length - 1; + destination[i] = PorterDuffFunctions.MultiplySrcOver(background[i], source[i], amount); + } + } + else { - destination[i] = PorterDuffFunctions.MultiplySrcOver(background[i], source[i], amount); + for (int i = 0; i < destination.Length; i++) + { + destination[i] = PorterDuffFunctions.MultiplySrcOver(background[i], source[i], amount); + } } } /// protected override void BlendFunction(Span destination, ReadOnlySpan background, ReadOnlySpan source, ReadOnlySpan amount) { - for (int i = 0; i < destination.Length; i++) + if (Avx2.IsSupported && destination.Length >= 2) + { + // Divide by 2 as 4 elements per Vector4 and 8 per Vector256 + ref Vector256 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); + ref Vector256 destinationLast = ref Unsafe.Add(ref destinationBase, (IntPtr)((uint)destination.Length / 2u)); + + ref Vector256 backgroundBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(background)); + ref Vector256 sourceBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(source)); + ref float amountBase = ref MemoryMarshal.GetReference(amount); + + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) + { + // TODO: It would be better if we can clamp this outside of the loop using our SIMD methods. + Vector256 opacity = Vector256.Create(Numerics.Clamp(amountBase, 0, 1F)); + + destinationBase = PorterDuffFunctions.MultiplySrcOver(backgroundBase, sourceBase, opacity); + destinationBase = ref Unsafe.Add(ref destinationBase, 1); + backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); + sourceBase = ref Unsafe.Add(ref sourceBase, 1); + amountBase = ref Unsafe.Add(ref amountBase, 1); + } + + if (Numerics.Modulo2(destination.Length) != 0) + { + // Vector4 fits neatly in pairs. Any overlap has to be equal to 1. + int i = destination.Length - 1; + destination[i] = PorterDuffFunctions.MultiplySrcOver(background[i], source[i], Numerics.Clamp(amount[i], 0, 1F)); + } + } + else { - destination[i] = PorterDuffFunctions.MultiplySrcOver(background[i], source[i], Numerics.Clamp(amount[i], 0, 1)); + for (int i = 0; i < destination.Length; i++) + { + destination[i] = PorterDuffFunctions.MultiplySrcOver(background[i], source[i], Numerics.Clamp(amount[i], 0, 1F)); + } } } } @@ -803,18 +2027,79 @@ internal static class DefaultPixelBlenders protected override void BlendFunction(Span destination, ReadOnlySpan background, ReadOnlySpan source, float amount) { amount = Numerics.Clamp(amount, 0, 1); - for (int i = 0; i < destination.Length; i++) + + if (Avx2.IsSupported && destination.Length >= 2) + { + // Divide by 2 as 4 elements per Vector4 and 8 per Vector256 + ref Vector256 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); + ref Vector256 destinationLast = ref Unsafe.Add(ref destinationBase, (IntPtr)((uint)destination.Length / 2u)); + + ref Vector256 backgroundBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(background)); + ref Vector256 sourceBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(source)); + Vector256 opacity = Vector256.Create(amount); + + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) + { + destinationBase = PorterDuffFunctions.AddSrcOver(backgroundBase, sourceBase, opacity); + destinationBase = ref Unsafe.Add(ref destinationBase, 1); + backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); + sourceBase = ref Unsafe.Add(ref sourceBase, 1); + } + + if (Numerics.Modulo2(destination.Length) != 0) + { + // Vector4 fits neatly in pairs. Any overlap has to be equal to 1. + int i = destination.Length - 1; + destination[i] = PorterDuffFunctions.AddSrcOver(background[i], source[i], amount); + } + } + else { - destination[i] = PorterDuffFunctions.AddSrcOver(background[i], source[i], amount); + for (int i = 0; i < destination.Length; i++) + { + destination[i] = PorterDuffFunctions.AddSrcOver(background[i], source[i], amount); + } } } /// protected override void BlendFunction(Span destination, ReadOnlySpan background, ReadOnlySpan source, ReadOnlySpan amount) { - for (int i = 0; i < destination.Length; i++) + if (Avx2.IsSupported && destination.Length >= 2) + { + // Divide by 2 as 4 elements per Vector4 and 8 per Vector256 + ref Vector256 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); + ref Vector256 destinationLast = ref Unsafe.Add(ref destinationBase, (IntPtr)((uint)destination.Length / 2u)); + + ref Vector256 backgroundBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(background)); + ref Vector256 sourceBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(source)); + ref float amountBase = ref MemoryMarshal.GetReference(amount); + + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) + { + // TODO: It would be better if we can clamp this outside of the loop using our SIMD methods. + Vector256 opacity = Vector256.Create(Numerics.Clamp(amountBase, 0, 1F)); + + destinationBase = PorterDuffFunctions.AddSrcOver(backgroundBase, sourceBase, opacity); + destinationBase = ref Unsafe.Add(ref destinationBase, 1); + backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); + sourceBase = ref Unsafe.Add(ref sourceBase, 1); + amountBase = ref Unsafe.Add(ref amountBase, 1); + } + + if (Numerics.Modulo2(destination.Length) != 0) + { + // Vector4 fits neatly in pairs. Any overlap has to be equal to 1. + int i = destination.Length - 1; + destination[i] = PorterDuffFunctions.AddSrcOver(background[i], source[i], Numerics.Clamp(amount[i], 0, 1F)); + } + } + else { - destination[i] = PorterDuffFunctions.AddSrcOver(background[i], source[i], Numerics.Clamp(amount[i], 0, 1)); + for (int i = 0; i < destination.Length; i++) + { + destination[i] = PorterDuffFunctions.AddSrcOver(background[i], source[i], Numerics.Clamp(amount[i], 0, 1F)); + } } } } @@ -841,18 +2126,79 @@ internal static class DefaultPixelBlenders protected override void BlendFunction(Span destination, ReadOnlySpan background, ReadOnlySpan source, float amount) { amount = Numerics.Clamp(amount, 0, 1); - for (int i = 0; i < destination.Length; i++) + + if (Avx2.IsSupported && destination.Length >= 2) { - destination[i] = PorterDuffFunctions.SubtractSrcOver(background[i], source[i], amount); - } - } + // Divide by 2 as 4 elements per Vector4 and 8 per Vector256 + ref Vector256 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); + ref Vector256 destinationLast = ref Unsafe.Add(ref destinationBase, (IntPtr)((uint)destination.Length / 2u)); - /// + ref Vector256 backgroundBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(background)); + ref Vector256 sourceBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(source)); + Vector256 opacity = Vector256.Create(amount); + + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) + { + destinationBase = PorterDuffFunctions.SubtractSrcOver(backgroundBase, sourceBase, opacity); + destinationBase = ref Unsafe.Add(ref destinationBase, 1); + backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); + sourceBase = ref Unsafe.Add(ref sourceBase, 1); + } + + if (Numerics.Modulo2(destination.Length) != 0) + { + // Vector4 fits neatly in pairs. Any overlap has to be equal to 1. + int i = destination.Length - 1; + destination[i] = PorterDuffFunctions.SubtractSrcOver(background[i], source[i], amount); + } + } + else + { + for (int i = 0; i < destination.Length; i++) + { + destination[i] = PorterDuffFunctions.SubtractSrcOver(background[i], source[i], amount); + } + } + } + + /// protected override void BlendFunction(Span destination, ReadOnlySpan background, ReadOnlySpan source, ReadOnlySpan amount) { - for (int i = 0; i < destination.Length; i++) + if (Avx2.IsSupported && destination.Length >= 2) + { + // Divide by 2 as 4 elements per Vector4 and 8 per Vector256 + ref Vector256 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); + ref Vector256 destinationLast = ref Unsafe.Add(ref destinationBase, (IntPtr)((uint)destination.Length / 2u)); + + ref Vector256 backgroundBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(background)); + ref Vector256 sourceBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(source)); + ref float amountBase = ref MemoryMarshal.GetReference(amount); + + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) + { + // TODO: It would be better if we can clamp this outside of the loop using our SIMD methods. + Vector256 opacity = Vector256.Create(Numerics.Clamp(amountBase, 0, 1F)); + + destinationBase = PorterDuffFunctions.SubtractSrcOver(backgroundBase, sourceBase, opacity); + destinationBase = ref Unsafe.Add(ref destinationBase, 1); + backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); + sourceBase = ref Unsafe.Add(ref sourceBase, 1); + amountBase = ref Unsafe.Add(ref amountBase, 1); + } + + if (Numerics.Modulo2(destination.Length) != 0) + { + // Vector4 fits neatly in pairs. Any overlap has to be equal to 1. + int i = destination.Length - 1; + destination[i] = PorterDuffFunctions.SubtractSrcOver(background[i], source[i], Numerics.Clamp(amount[i], 0, 1F)); + } + } + else { - destination[i] = PorterDuffFunctions.SubtractSrcOver(background[i], source[i], Numerics.Clamp(amount[i], 0, 1)); + for (int i = 0; i < destination.Length; i++) + { + destination[i] = PorterDuffFunctions.SubtractSrcOver(background[i], source[i], Numerics.Clamp(amount[i], 0, 1F)); + } } } } @@ -879,18 +2225,79 @@ internal static class DefaultPixelBlenders protected override void BlendFunction(Span destination, ReadOnlySpan background, ReadOnlySpan source, float amount) { amount = Numerics.Clamp(amount, 0, 1); - for (int i = 0; i < destination.Length; i++) + + if (Avx2.IsSupported && destination.Length >= 2) { - destination[i] = PorterDuffFunctions.ScreenSrcOver(background[i], source[i], amount); + // Divide by 2 as 4 elements per Vector4 and 8 per Vector256 + ref Vector256 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); + ref Vector256 destinationLast = ref Unsafe.Add(ref destinationBase, (IntPtr)((uint)destination.Length / 2u)); + + ref Vector256 backgroundBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(background)); + ref Vector256 sourceBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(source)); + Vector256 opacity = Vector256.Create(amount); + + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) + { + destinationBase = PorterDuffFunctions.ScreenSrcOver(backgroundBase, sourceBase, opacity); + destinationBase = ref Unsafe.Add(ref destinationBase, 1); + backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); + sourceBase = ref Unsafe.Add(ref sourceBase, 1); + } + + if (Numerics.Modulo2(destination.Length) != 0) + { + // Vector4 fits neatly in pairs. Any overlap has to be equal to 1. + int i = destination.Length - 1; + destination[i] = PorterDuffFunctions.ScreenSrcOver(background[i], source[i], amount); + } + } + else + { + for (int i = 0; i < destination.Length; i++) + { + destination[i] = PorterDuffFunctions.ScreenSrcOver(background[i], source[i], amount); + } } } /// protected override void BlendFunction(Span destination, ReadOnlySpan background, ReadOnlySpan source, ReadOnlySpan amount) { - for (int i = 0; i < destination.Length; i++) + if (Avx2.IsSupported && destination.Length >= 2) { - destination[i] = PorterDuffFunctions.ScreenSrcOver(background[i], source[i], Numerics.Clamp(amount[i], 0, 1)); + // Divide by 2 as 4 elements per Vector4 and 8 per Vector256 + ref Vector256 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); + ref Vector256 destinationLast = ref Unsafe.Add(ref destinationBase, (IntPtr)((uint)destination.Length / 2u)); + + ref Vector256 backgroundBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(background)); + ref Vector256 sourceBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(source)); + ref float amountBase = ref MemoryMarshal.GetReference(amount); + + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) + { + // TODO: It would be better if we can clamp this outside of the loop using our SIMD methods. + Vector256 opacity = Vector256.Create(Numerics.Clamp(amountBase, 0, 1F)); + + destinationBase = PorterDuffFunctions.ScreenSrcOver(backgroundBase, sourceBase, opacity); + destinationBase = ref Unsafe.Add(ref destinationBase, 1); + backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); + sourceBase = ref Unsafe.Add(ref sourceBase, 1); + amountBase = ref Unsafe.Add(ref amountBase, 1); + } + + if (Numerics.Modulo2(destination.Length) != 0) + { + // Vector4 fits neatly in pairs. Any overlap has to be equal to 1. + int i = destination.Length - 1; + destination[i] = PorterDuffFunctions.ScreenSrcOver(background[i], source[i], Numerics.Clamp(amount[i], 0, 1F)); + } + } + else + { + for (int i = 0; i < destination.Length; i++) + { + destination[i] = PorterDuffFunctions.ScreenSrcOver(background[i], source[i], Numerics.Clamp(amount[i], 0, 1F)); + } } } } @@ -917,18 +2324,79 @@ internal static class DefaultPixelBlenders protected override void BlendFunction(Span destination, ReadOnlySpan background, ReadOnlySpan source, float amount) { amount = Numerics.Clamp(amount, 0, 1); - for (int i = 0; i < destination.Length; i++) + + if (Avx2.IsSupported && destination.Length >= 2) { - destination[i] = PorterDuffFunctions.DarkenSrcOver(background[i], source[i], amount); + // Divide by 2 as 4 elements per Vector4 and 8 per Vector256 + ref Vector256 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); + ref Vector256 destinationLast = ref Unsafe.Add(ref destinationBase, (IntPtr)((uint)destination.Length / 2u)); + + ref Vector256 backgroundBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(background)); + ref Vector256 sourceBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(source)); + Vector256 opacity = Vector256.Create(amount); + + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) + { + destinationBase = PorterDuffFunctions.DarkenSrcOver(backgroundBase, sourceBase, opacity); + destinationBase = ref Unsafe.Add(ref destinationBase, 1); + backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); + sourceBase = ref Unsafe.Add(ref sourceBase, 1); + } + + if (Numerics.Modulo2(destination.Length) != 0) + { + // Vector4 fits neatly in pairs. Any overlap has to be equal to 1. + int i = destination.Length - 1; + destination[i] = PorterDuffFunctions.DarkenSrcOver(background[i], source[i], amount); + } + } + else + { + for (int i = 0; i < destination.Length; i++) + { + destination[i] = PorterDuffFunctions.DarkenSrcOver(background[i], source[i], amount); + } } } /// protected override void BlendFunction(Span destination, ReadOnlySpan background, ReadOnlySpan source, ReadOnlySpan amount) { - for (int i = 0; i < destination.Length; i++) + if (Avx2.IsSupported && destination.Length >= 2) + { + // Divide by 2 as 4 elements per Vector4 and 8 per Vector256 + ref Vector256 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); + ref Vector256 destinationLast = ref Unsafe.Add(ref destinationBase, (IntPtr)((uint)destination.Length / 2u)); + + ref Vector256 backgroundBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(background)); + ref Vector256 sourceBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(source)); + ref float amountBase = ref MemoryMarshal.GetReference(amount); + + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) + { + // TODO: It would be better if we can clamp this outside of the loop using our SIMD methods. + Vector256 opacity = Vector256.Create(Numerics.Clamp(amountBase, 0, 1F)); + + destinationBase = PorterDuffFunctions.DarkenSrcOver(backgroundBase, sourceBase, opacity); + destinationBase = ref Unsafe.Add(ref destinationBase, 1); + backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); + sourceBase = ref Unsafe.Add(ref sourceBase, 1); + amountBase = ref Unsafe.Add(ref amountBase, 1); + } + + if (Numerics.Modulo2(destination.Length) != 0) + { + // Vector4 fits neatly in pairs. Any overlap has to be equal to 1. + int i = destination.Length - 1; + destination[i] = PorterDuffFunctions.DarkenSrcOver(background[i], source[i], Numerics.Clamp(amount[i], 0, 1F)); + } + } + else { - destination[i] = PorterDuffFunctions.DarkenSrcOver(background[i], source[i], Numerics.Clamp(amount[i], 0, 1)); + for (int i = 0; i < destination.Length; i++) + { + destination[i] = PorterDuffFunctions.DarkenSrcOver(background[i], source[i], Numerics.Clamp(amount[i], 0, 1F)); + } } } } @@ -955,18 +2423,79 @@ internal static class DefaultPixelBlenders protected override void BlendFunction(Span destination, ReadOnlySpan background, ReadOnlySpan source, float amount) { amount = Numerics.Clamp(amount, 0, 1); - for (int i = 0; i < destination.Length; i++) + + if (Avx2.IsSupported && destination.Length >= 2) + { + // Divide by 2 as 4 elements per Vector4 and 8 per Vector256 + ref Vector256 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); + ref Vector256 destinationLast = ref Unsafe.Add(ref destinationBase, (IntPtr)((uint)destination.Length / 2u)); + + ref Vector256 backgroundBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(background)); + ref Vector256 sourceBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(source)); + Vector256 opacity = Vector256.Create(amount); + + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) + { + destinationBase = PorterDuffFunctions.LightenSrcOver(backgroundBase, sourceBase, opacity); + destinationBase = ref Unsafe.Add(ref destinationBase, 1); + backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); + sourceBase = ref Unsafe.Add(ref sourceBase, 1); + } + + if (Numerics.Modulo2(destination.Length) != 0) + { + // Vector4 fits neatly in pairs. Any overlap has to be equal to 1. + int i = destination.Length - 1; + destination[i] = PorterDuffFunctions.LightenSrcOver(background[i], source[i], amount); + } + } + else { - destination[i] = PorterDuffFunctions.LightenSrcOver(background[i], source[i], amount); + for (int i = 0; i < destination.Length; i++) + { + destination[i] = PorterDuffFunctions.LightenSrcOver(background[i], source[i], amount); + } } } /// protected override void BlendFunction(Span destination, ReadOnlySpan background, ReadOnlySpan source, ReadOnlySpan amount) { - for (int i = 0; i < destination.Length; i++) + if (Avx2.IsSupported && destination.Length >= 2) { - destination[i] = PorterDuffFunctions.LightenSrcOver(background[i], source[i], Numerics.Clamp(amount[i], 0, 1)); + // Divide by 2 as 4 elements per Vector4 and 8 per Vector256 + ref Vector256 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); + ref Vector256 destinationLast = ref Unsafe.Add(ref destinationBase, (IntPtr)((uint)destination.Length / 2u)); + + ref Vector256 backgroundBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(background)); + ref Vector256 sourceBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(source)); + ref float amountBase = ref MemoryMarshal.GetReference(amount); + + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) + { + // TODO: It would be better if we can clamp this outside of the loop using our SIMD methods. + Vector256 opacity = Vector256.Create(Numerics.Clamp(amountBase, 0, 1F)); + + destinationBase = PorterDuffFunctions.LightenSrcOver(backgroundBase, sourceBase, opacity); + destinationBase = ref Unsafe.Add(ref destinationBase, 1); + backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); + sourceBase = ref Unsafe.Add(ref sourceBase, 1); + amountBase = ref Unsafe.Add(ref amountBase, 1); + } + + if (Numerics.Modulo2(destination.Length) != 0) + { + // Vector4 fits neatly in pairs. Any overlap has to be equal to 1. + int i = destination.Length - 1; + destination[i] = PorterDuffFunctions.LightenSrcOver(background[i], source[i], Numerics.Clamp(amount[i], 0, 1F)); + } + } + else + { + for (int i = 0; i < destination.Length; i++) + { + destination[i] = PorterDuffFunctions.LightenSrcOver(background[i], source[i], Numerics.Clamp(amount[i], 0, 1F)); + } } } } @@ -993,18 +2522,79 @@ internal static class DefaultPixelBlenders protected override void BlendFunction(Span destination, ReadOnlySpan background, ReadOnlySpan source, float amount) { amount = Numerics.Clamp(amount, 0, 1); - for (int i = 0; i < destination.Length; i++) + + if (Avx2.IsSupported && destination.Length >= 2) { - destination[i] = PorterDuffFunctions.OverlaySrcOver(background[i], source[i], amount); + // Divide by 2 as 4 elements per Vector4 and 8 per Vector256 + ref Vector256 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); + ref Vector256 destinationLast = ref Unsafe.Add(ref destinationBase, (IntPtr)((uint)destination.Length / 2u)); + + ref Vector256 backgroundBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(background)); + ref Vector256 sourceBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(source)); + Vector256 opacity = Vector256.Create(amount); + + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) + { + destinationBase = PorterDuffFunctions.OverlaySrcOver(backgroundBase, sourceBase, opacity); + destinationBase = ref Unsafe.Add(ref destinationBase, 1); + backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); + sourceBase = ref Unsafe.Add(ref sourceBase, 1); + } + + if (Numerics.Modulo2(destination.Length) != 0) + { + // Vector4 fits neatly in pairs. Any overlap has to be equal to 1. + int i = destination.Length - 1; + destination[i] = PorterDuffFunctions.OverlaySrcOver(background[i], source[i], amount); + } + } + else + { + for (int i = 0; i < destination.Length; i++) + { + destination[i] = PorterDuffFunctions.OverlaySrcOver(background[i], source[i], amount); + } } } /// protected override void BlendFunction(Span destination, ReadOnlySpan background, ReadOnlySpan source, ReadOnlySpan amount) { - for (int i = 0; i < destination.Length; i++) + if (Avx2.IsSupported && destination.Length >= 2) + { + // Divide by 2 as 4 elements per Vector4 and 8 per Vector256 + ref Vector256 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); + ref Vector256 destinationLast = ref Unsafe.Add(ref destinationBase, (IntPtr)((uint)destination.Length / 2u)); + + ref Vector256 backgroundBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(background)); + ref Vector256 sourceBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(source)); + ref float amountBase = ref MemoryMarshal.GetReference(amount); + + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) + { + // TODO: It would be better if we can clamp this outside of the loop using our SIMD methods. + Vector256 opacity = Vector256.Create(Numerics.Clamp(amountBase, 0, 1F)); + + destinationBase = PorterDuffFunctions.OverlaySrcOver(backgroundBase, sourceBase, opacity); + destinationBase = ref Unsafe.Add(ref destinationBase, 1); + backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); + sourceBase = ref Unsafe.Add(ref sourceBase, 1); + amountBase = ref Unsafe.Add(ref amountBase, 1); + } + + if (Numerics.Modulo2(destination.Length) != 0) + { + // Vector4 fits neatly in pairs. Any overlap has to be equal to 1. + int i = destination.Length - 1; + destination[i] = PorterDuffFunctions.OverlaySrcOver(background[i], source[i], Numerics.Clamp(amount[i], 0, 1F)); + } + } + else { - destination[i] = PorterDuffFunctions.OverlaySrcOver(background[i], source[i], Numerics.Clamp(amount[i], 0, 1)); + for (int i = 0; i < destination.Length; i++) + { + destination[i] = PorterDuffFunctions.OverlaySrcOver(background[i], source[i], Numerics.Clamp(amount[i], 0, 1F)); + } } } } @@ -1031,18 +2621,79 @@ internal static class DefaultPixelBlenders protected override void BlendFunction(Span destination, ReadOnlySpan background, ReadOnlySpan source, float amount) { amount = Numerics.Clamp(amount, 0, 1); - for (int i = 0; i < destination.Length; i++) + + if (Avx2.IsSupported && destination.Length >= 2) + { + // Divide by 2 as 4 elements per Vector4 and 8 per Vector256 + ref Vector256 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); + ref Vector256 destinationLast = ref Unsafe.Add(ref destinationBase, (IntPtr)((uint)destination.Length / 2u)); + + ref Vector256 backgroundBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(background)); + ref Vector256 sourceBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(source)); + Vector256 opacity = Vector256.Create(amount); + + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) + { + destinationBase = PorterDuffFunctions.HardLightSrcOver(backgroundBase, sourceBase, opacity); + destinationBase = ref Unsafe.Add(ref destinationBase, 1); + backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); + sourceBase = ref Unsafe.Add(ref sourceBase, 1); + } + + if (Numerics.Modulo2(destination.Length) != 0) + { + // Vector4 fits neatly in pairs. Any overlap has to be equal to 1. + int i = destination.Length - 1; + destination[i] = PorterDuffFunctions.HardLightSrcOver(background[i], source[i], amount); + } + } + else { - destination[i] = PorterDuffFunctions.HardLightSrcOver(background[i], source[i], amount); + for (int i = 0; i < destination.Length; i++) + { + destination[i] = PorterDuffFunctions.HardLightSrcOver(background[i], source[i], amount); + } } } /// protected override void BlendFunction(Span destination, ReadOnlySpan background, ReadOnlySpan source, ReadOnlySpan amount) { - for (int i = 0; i < destination.Length; i++) + if (Avx2.IsSupported && destination.Length >= 2) + { + // Divide by 2 as 4 elements per Vector4 and 8 per Vector256 + ref Vector256 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); + ref Vector256 destinationLast = ref Unsafe.Add(ref destinationBase, (IntPtr)((uint)destination.Length / 2u)); + + ref Vector256 backgroundBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(background)); + ref Vector256 sourceBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(source)); + ref float amountBase = ref MemoryMarshal.GetReference(amount); + + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) + { + // TODO: It would be better if we can clamp this outside of the loop using our SIMD methods. + Vector256 opacity = Vector256.Create(Numerics.Clamp(amountBase, 0, 1F)); + + destinationBase = PorterDuffFunctions.HardLightSrcOver(backgroundBase, sourceBase, opacity); + destinationBase = ref Unsafe.Add(ref destinationBase, 1); + backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); + sourceBase = ref Unsafe.Add(ref sourceBase, 1); + amountBase = ref Unsafe.Add(ref amountBase, 1); + } + + if (Numerics.Modulo2(destination.Length) != 0) + { + // Vector4 fits neatly in pairs. Any overlap has to be equal to 1. + int i = destination.Length - 1; + destination[i] = PorterDuffFunctions.HardLightSrcOver(background[i], source[i], Numerics.Clamp(amount[i], 0, 1F)); + } + } + else { - destination[i] = PorterDuffFunctions.HardLightSrcOver(background[i], source[i], Numerics.Clamp(amount[i], 0, 1)); + for (int i = 0; i < destination.Length; i++) + { + destination[i] = PorterDuffFunctions.HardLightSrcOver(background[i], source[i], Numerics.Clamp(amount[i], 0, 1F)); + } } } } @@ -1069,18 +2720,79 @@ internal static class DefaultPixelBlenders protected override void BlendFunction(Span destination, ReadOnlySpan background, ReadOnlySpan source, float amount) { amount = Numerics.Clamp(amount, 0, 1); - for (int i = 0; i < destination.Length; i++) + + if (Avx2.IsSupported && destination.Length >= 2) + { + // Divide by 2 as 4 elements per Vector4 and 8 per Vector256 + ref Vector256 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); + ref Vector256 destinationLast = ref Unsafe.Add(ref destinationBase, (IntPtr)((uint)destination.Length / 2u)); + + ref Vector256 backgroundBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(background)); + ref Vector256 sourceBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(source)); + Vector256 opacity = Vector256.Create(amount); + + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) + { + destinationBase = PorterDuffFunctions.NormalSrcIn(backgroundBase, sourceBase, opacity); + destinationBase = ref Unsafe.Add(ref destinationBase, 1); + backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); + sourceBase = ref Unsafe.Add(ref sourceBase, 1); + } + + if (Numerics.Modulo2(destination.Length) != 0) + { + // Vector4 fits neatly in pairs. Any overlap has to be equal to 1. + int i = destination.Length - 1; + destination[i] = PorterDuffFunctions.NormalSrcIn(background[i], source[i], amount); + } + } + else { - destination[i] = PorterDuffFunctions.NormalSrcIn(background[i], source[i], amount); + for (int i = 0; i < destination.Length; i++) + { + destination[i] = PorterDuffFunctions.NormalSrcIn(background[i], source[i], amount); + } } } /// protected override void BlendFunction(Span destination, ReadOnlySpan background, ReadOnlySpan source, ReadOnlySpan amount) { - for (int i = 0; i < destination.Length; i++) + if (Avx2.IsSupported && destination.Length >= 2) { - destination[i] = PorterDuffFunctions.NormalSrcIn(background[i], source[i], Numerics.Clamp(amount[i], 0, 1)); + // Divide by 2 as 4 elements per Vector4 and 8 per Vector256 + ref Vector256 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); + ref Vector256 destinationLast = ref Unsafe.Add(ref destinationBase, (IntPtr)((uint)destination.Length / 2u)); + + ref Vector256 backgroundBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(background)); + ref Vector256 sourceBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(source)); + ref float amountBase = ref MemoryMarshal.GetReference(amount); + + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) + { + // TODO: It would be better if we can clamp this outside of the loop using our SIMD methods. + Vector256 opacity = Vector256.Create(Numerics.Clamp(amountBase, 0, 1F)); + + destinationBase = PorterDuffFunctions.NormalSrcIn(backgroundBase, sourceBase, opacity); + destinationBase = ref Unsafe.Add(ref destinationBase, 1); + backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); + sourceBase = ref Unsafe.Add(ref sourceBase, 1); + amountBase = ref Unsafe.Add(ref amountBase, 1); + } + + if (Numerics.Modulo2(destination.Length) != 0) + { + // Vector4 fits neatly in pairs. Any overlap has to be equal to 1. + int i = destination.Length - 1; + destination[i] = PorterDuffFunctions.NormalSrcIn(background[i], source[i], Numerics.Clamp(amount[i], 0, 1F)); + } + } + else + { + for (int i = 0; i < destination.Length; i++) + { + destination[i] = PorterDuffFunctions.NormalSrcIn(background[i], source[i], Numerics.Clamp(amount[i], 0, 1F)); + } } } } @@ -1107,18 +2819,79 @@ internal static class DefaultPixelBlenders protected override void BlendFunction(Span destination, ReadOnlySpan background, ReadOnlySpan source, float amount) { amount = Numerics.Clamp(amount, 0, 1); - for (int i = 0; i < destination.Length; i++) + + if (Avx2.IsSupported && destination.Length >= 2) { - destination[i] = PorterDuffFunctions.MultiplySrcIn(background[i], source[i], amount); + // Divide by 2 as 4 elements per Vector4 and 8 per Vector256 + ref Vector256 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); + ref Vector256 destinationLast = ref Unsafe.Add(ref destinationBase, (IntPtr)((uint)destination.Length / 2u)); + + ref Vector256 backgroundBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(background)); + ref Vector256 sourceBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(source)); + Vector256 opacity = Vector256.Create(amount); + + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) + { + destinationBase = PorterDuffFunctions.MultiplySrcIn(backgroundBase, sourceBase, opacity); + destinationBase = ref Unsafe.Add(ref destinationBase, 1); + backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); + sourceBase = ref Unsafe.Add(ref sourceBase, 1); + } + + if (Numerics.Modulo2(destination.Length) != 0) + { + // Vector4 fits neatly in pairs. Any overlap has to be equal to 1. + int i = destination.Length - 1; + destination[i] = PorterDuffFunctions.MultiplySrcIn(background[i], source[i], amount); + } + } + else + { + for (int i = 0; i < destination.Length; i++) + { + destination[i] = PorterDuffFunctions.MultiplySrcIn(background[i], source[i], amount); + } } } /// protected override void BlendFunction(Span destination, ReadOnlySpan background, ReadOnlySpan source, ReadOnlySpan amount) { - for (int i = 0; i < destination.Length; i++) + if (Avx2.IsSupported && destination.Length >= 2) { - destination[i] = PorterDuffFunctions.MultiplySrcIn(background[i], source[i], Numerics.Clamp(amount[i], 0, 1)); + // Divide by 2 as 4 elements per Vector4 and 8 per Vector256 + ref Vector256 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); + ref Vector256 destinationLast = ref Unsafe.Add(ref destinationBase, (IntPtr)((uint)destination.Length / 2u)); + + ref Vector256 backgroundBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(background)); + ref Vector256 sourceBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(source)); + ref float amountBase = ref MemoryMarshal.GetReference(amount); + + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) + { + // TODO: It would be better if we can clamp this outside of the loop using our SIMD methods. + Vector256 opacity = Vector256.Create(Numerics.Clamp(amountBase, 0, 1F)); + + destinationBase = PorterDuffFunctions.MultiplySrcIn(backgroundBase, sourceBase, opacity); + destinationBase = ref Unsafe.Add(ref destinationBase, 1); + backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); + sourceBase = ref Unsafe.Add(ref sourceBase, 1); + amountBase = ref Unsafe.Add(ref amountBase, 1); + } + + if (Numerics.Modulo2(destination.Length) != 0) + { + // Vector4 fits neatly in pairs. Any overlap has to be equal to 1. + int i = destination.Length - 1; + destination[i] = PorterDuffFunctions.MultiplySrcIn(background[i], source[i], Numerics.Clamp(amount[i], 0, 1F)); + } + } + else + { + for (int i = 0; i < destination.Length; i++) + { + destination[i] = PorterDuffFunctions.MultiplySrcIn(background[i], source[i], Numerics.Clamp(amount[i], 0, 1F)); + } } } } @@ -1145,18 +2918,79 @@ internal static class DefaultPixelBlenders protected override void BlendFunction(Span destination, ReadOnlySpan background, ReadOnlySpan source, float amount) { amount = Numerics.Clamp(amount, 0, 1); - for (int i = 0; i < destination.Length; i++) + + if (Avx2.IsSupported && destination.Length >= 2) + { + // Divide by 2 as 4 elements per Vector4 and 8 per Vector256 + ref Vector256 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); + ref Vector256 destinationLast = ref Unsafe.Add(ref destinationBase, (IntPtr)((uint)destination.Length / 2u)); + + ref Vector256 backgroundBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(background)); + ref Vector256 sourceBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(source)); + Vector256 opacity = Vector256.Create(amount); + + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) + { + destinationBase = PorterDuffFunctions.AddSrcIn(backgroundBase, sourceBase, opacity); + destinationBase = ref Unsafe.Add(ref destinationBase, 1); + backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); + sourceBase = ref Unsafe.Add(ref sourceBase, 1); + } + + if (Numerics.Modulo2(destination.Length) != 0) + { + // Vector4 fits neatly in pairs. Any overlap has to be equal to 1. + int i = destination.Length - 1; + destination[i] = PorterDuffFunctions.AddSrcIn(background[i], source[i], amount); + } + } + else { - destination[i] = PorterDuffFunctions.AddSrcIn(background[i], source[i], amount); + for (int i = 0; i < destination.Length; i++) + { + destination[i] = PorterDuffFunctions.AddSrcIn(background[i], source[i], amount); + } } } /// protected override void BlendFunction(Span destination, ReadOnlySpan background, ReadOnlySpan source, ReadOnlySpan amount) { - for (int i = 0; i < destination.Length; i++) + if (Avx2.IsSupported && destination.Length >= 2) + { + // Divide by 2 as 4 elements per Vector4 and 8 per Vector256 + ref Vector256 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); + ref Vector256 destinationLast = ref Unsafe.Add(ref destinationBase, (IntPtr)((uint)destination.Length / 2u)); + + ref Vector256 backgroundBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(background)); + ref Vector256 sourceBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(source)); + ref float amountBase = ref MemoryMarshal.GetReference(amount); + + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) + { + // TODO: It would be better if we can clamp this outside of the loop using our SIMD methods. + Vector256 opacity = Vector256.Create(Numerics.Clamp(amountBase, 0, 1F)); + + destinationBase = PorterDuffFunctions.AddSrcIn(backgroundBase, sourceBase, opacity); + destinationBase = ref Unsafe.Add(ref destinationBase, 1); + backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); + sourceBase = ref Unsafe.Add(ref sourceBase, 1); + amountBase = ref Unsafe.Add(ref amountBase, 1); + } + + if (Numerics.Modulo2(destination.Length) != 0) + { + // Vector4 fits neatly in pairs. Any overlap has to be equal to 1. + int i = destination.Length - 1; + destination[i] = PorterDuffFunctions.AddSrcIn(background[i], source[i], Numerics.Clamp(amount[i], 0, 1F)); + } + } + else { - destination[i] = PorterDuffFunctions.AddSrcIn(background[i], source[i], Numerics.Clamp(amount[i], 0, 1)); + for (int i = 0; i < destination.Length; i++) + { + destination[i] = PorterDuffFunctions.AddSrcIn(background[i], source[i], Numerics.Clamp(amount[i], 0, 1F)); + } } } } @@ -1183,18 +3017,79 @@ internal static class DefaultPixelBlenders protected override void BlendFunction(Span destination, ReadOnlySpan background, ReadOnlySpan source, float amount) { amount = Numerics.Clamp(amount, 0, 1); - for (int i = 0; i < destination.Length; i++) + + if (Avx2.IsSupported && destination.Length >= 2) + { + // Divide by 2 as 4 elements per Vector4 and 8 per Vector256 + ref Vector256 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); + ref Vector256 destinationLast = ref Unsafe.Add(ref destinationBase, (IntPtr)((uint)destination.Length / 2u)); + + ref Vector256 backgroundBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(background)); + ref Vector256 sourceBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(source)); + Vector256 opacity = Vector256.Create(amount); + + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) + { + destinationBase = PorterDuffFunctions.SubtractSrcIn(backgroundBase, sourceBase, opacity); + destinationBase = ref Unsafe.Add(ref destinationBase, 1); + backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); + sourceBase = ref Unsafe.Add(ref sourceBase, 1); + } + + if (Numerics.Modulo2(destination.Length) != 0) + { + // Vector4 fits neatly in pairs. Any overlap has to be equal to 1. + int i = destination.Length - 1; + destination[i] = PorterDuffFunctions.SubtractSrcIn(background[i], source[i], amount); + } + } + else { - destination[i] = PorterDuffFunctions.SubtractSrcIn(background[i], source[i], amount); + for (int i = 0; i < destination.Length; i++) + { + destination[i] = PorterDuffFunctions.SubtractSrcIn(background[i], source[i], amount); + } } } /// protected override void BlendFunction(Span destination, ReadOnlySpan background, ReadOnlySpan source, ReadOnlySpan amount) { - for (int i = 0; i < destination.Length; i++) + if (Avx2.IsSupported && destination.Length >= 2) { - destination[i] = PorterDuffFunctions.SubtractSrcIn(background[i], source[i], Numerics.Clamp(amount[i], 0, 1)); + // Divide by 2 as 4 elements per Vector4 and 8 per Vector256 + ref Vector256 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); + ref Vector256 destinationLast = ref Unsafe.Add(ref destinationBase, (IntPtr)((uint)destination.Length / 2u)); + + ref Vector256 backgroundBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(background)); + ref Vector256 sourceBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(source)); + ref float amountBase = ref MemoryMarshal.GetReference(amount); + + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) + { + // TODO: It would be better if we can clamp this outside of the loop using our SIMD methods. + Vector256 opacity = Vector256.Create(Numerics.Clamp(amountBase, 0, 1F)); + + destinationBase = PorterDuffFunctions.SubtractSrcIn(backgroundBase, sourceBase, opacity); + destinationBase = ref Unsafe.Add(ref destinationBase, 1); + backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); + sourceBase = ref Unsafe.Add(ref sourceBase, 1); + amountBase = ref Unsafe.Add(ref amountBase, 1); + } + + if (Numerics.Modulo2(destination.Length) != 0) + { + // Vector4 fits neatly in pairs. Any overlap has to be equal to 1. + int i = destination.Length - 1; + destination[i] = PorterDuffFunctions.SubtractSrcIn(background[i], source[i], Numerics.Clamp(amount[i], 0, 1F)); + } + } + else + { + for (int i = 0; i < destination.Length; i++) + { + destination[i] = PorterDuffFunctions.SubtractSrcIn(background[i], source[i], Numerics.Clamp(amount[i], 0, 1F)); + } } } } @@ -1221,18 +3116,79 @@ internal static class DefaultPixelBlenders protected override void BlendFunction(Span destination, ReadOnlySpan background, ReadOnlySpan source, float amount) { amount = Numerics.Clamp(amount, 0, 1); - for (int i = 0; i < destination.Length; i++) + + if (Avx2.IsSupported && destination.Length >= 2) { - destination[i] = PorterDuffFunctions.ScreenSrcIn(background[i], source[i], amount); + // Divide by 2 as 4 elements per Vector4 and 8 per Vector256 + ref Vector256 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); + ref Vector256 destinationLast = ref Unsafe.Add(ref destinationBase, (IntPtr)((uint)destination.Length / 2u)); + + ref Vector256 backgroundBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(background)); + ref Vector256 sourceBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(source)); + Vector256 opacity = Vector256.Create(amount); + + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) + { + destinationBase = PorterDuffFunctions.ScreenSrcIn(backgroundBase, sourceBase, opacity); + destinationBase = ref Unsafe.Add(ref destinationBase, 1); + backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); + sourceBase = ref Unsafe.Add(ref sourceBase, 1); + } + + if (Numerics.Modulo2(destination.Length) != 0) + { + // Vector4 fits neatly in pairs. Any overlap has to be equal to 1. + int i = destination.Length - 1; + destination[i] = PorterDuffFunctions.ScreenSrcIn(background[i], source[i], amount); + } + } + else + { + for (int i = 0; i < destination.Length; i++) + { + destination[i] = PorterDuffFunctions.ScreenSrcIn(background[i], source[i], amount); + } } } /// protected override void BlendFunction(Span destination, ReadOnlySpan background, ReadOnlySpan source, ReadOnlySpan amount) { - for (int i = 0; i < destination.Length; i++) + if (Avx2.IsSupported && destination.Length >= 2) { - destination[i] = PorterDuffFunctions.ScreenSrcIn(background[i], source[i], Numerics.Clamp(amount[i], 0, 1)); + // Divide by 2 as 4 elements per Vector4 and 8 per Vector256 + ref Vector256 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); + ref Vector256 destinationLast = ref Unsafe.Add(ref destinationBase, (IntPtr)((uint)destination.Length / 2u)); + + ref Vector256 backgroundBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(background)); + ref Vector256 sourceBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(source)); + ref float amountBase = ref MemoryMarshal.GetReference(amount); + + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) + { + // TODO: It would be better if we can clamp this outside of the loop using our SIMD methods. + Vector256 opacity = Vector256.Create(Numerics.Clamp(amountBase, 0, 1F)); + + destinationBase = PorterDuffFunctions.ScreenSrcIn(backgroundBase, sourceBase, opacity); + destinationBase = ref Unsafe.Add(ref destinationBase, 1); + backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); + sourceBase = ref Unsafe.Add(ref sourceBase, 1); + amountBase = ref Unsafe.Add(ref amountBase, 1); + } + + if (Numerics.Modulo2(destination.Length) != 0) + { + // Vector4 fits neatly in pairs. Any overlap has to be equal to 1. + int i = destination.Length - 1; + destination[i] = PorterDuffFunctions.ScreenSrcIn(background[i], source[i], Numerics.Clamp(amount[i], 0, 1F)); + } + } + else + { + for (int i = 0; i < destination.Length; i++) + { + destination[i] = PorterDuffFunctions.ScreenSrcIn(background[i], source[i], Numerics.Clamp(amount[i], 0, 1F)); + } } } } @@ -1259,18 +3215,79 @@ internal static class DefaultPixelBlenders protected override void BlendFunction(Span destination, ReadOnlySpan background, ReadOnlySpan source, float amount) { amount = Numerics.Clamp(amount, 0, 1); - for (int i = 0; i < destination.Length; i++) + + if (Avx2.IsSupported && destination.Length >= 2) + { + // Divide by 2 as 4 elements per Vector4 and 8 per Vector256 + ref Vector256 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); + ref Vector256 destinationLast = ref Unsafe.Add(ref destinationBase, (IntPtr)((uint)destination.Length / 2u)); + + ref Vector256 backgroundBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(background)); + ref Vector256 sourceBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(source)); + Vector256 opacity = Vector256.Create(amount); + + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) + { + destinationBase = PorterDuffFunctions.DarkenSrcIn(backgroundBase, sourceBase, opacity); + destinationBase = ref Unsafe.Add(ref destinationBase, 1); + backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); + sourceBase = ref Unsafe.Add(ref sourceBase, 1); + } + + if (Numerics.Modulo2(destination.Length) != 0) + { + // Vector4 fits neatly in pairs. Any overlap has to be equal to 1. + int i = destination.Length - 1; + destination[i] = PorterDuffFunctions.DarkenSrcIn(background[i], source[i], amount); + } + } + else { - destination[i] = PorterDuffFunctions.DarkenSrcIn(background[i], source[i], amount); + for (int i = 0; i < destination.Length; i++) + { + destination[i] = PorterDuffFunctions.DarkenSrcIn(background[i], source[i], amount); + } } } /// protected override void BlendFunction(Span destination, ReadOnlySpan background, ReadOnlySpan source, ReadOnlySpan amount) { - for (int i = 0; i < destination.Length; i++) + if (Avx2.IsSupported && destination.Length >= 2) { - destination[i] = PorterDuffFunctions.DarkenSrcIn(background[i], source[i], Numerics.Clamp(amount[i], 0, 1)); + // Divide by 2 as 4 elements per Vector4 and 8 per Vector256 + ref Vector256 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); + ref Vector256 destinationLast = ref Unsafe.Add(ref destinationBase, (IntPtr)((uint)destination.Length / 2u)); + + ref Vector256 backgroundBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(background)); + ref Vector256 sourceBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(source)); + ref float amountBase = ref MemoryMarshal.GetReference(amount); + + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) + { + // TODO: It would be better if we can clamp this outside of the loop using our SIMD methods. + Vector256 opacity = Vector256.Create(Numerics.Clamp(amountBase, 0, 1F)); + + destinationBase = PorterDuffFunctions.DarkenSrcIn(backgroundBase, sourceBase, opacity); + destinationBase = ref Unsafe.Add(ref destinationBase, 1); + backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); + sourceBase = ref Unsafe.Add(ref sourceBase, 1); + amountBase = ref Unsafe.Add(ref amountBase, 1); + } + + if (Numerics.Modulo2(destination.Length) != 0) + { + // Vector4 fits neatly in pairs. Any overlap has to be equal to 1. + int i = destination.Length - 1; + destination[i] = PorterDuffFunctions.DarkenSrcIn(background[i], source[i], Numerics.Clamp(amount[i], 0, 1F)); + } + } + else + { + for (int i = 0; i < destination.Length; i++) + { + destination[i] = PorterDuffFunctions.DarkenSrcIn(background[i], source[i], Numerics.Clamp(amount[i], 0, 1F)); + } } } } @@ -1297,18 +3314,79 @@ internal static class DefaultPixelBlenders protected override void BlendFunction(Span destination, ReadOnlySpan background, ReadOnlySpan source, float amount) { amount = Numerics.Clamp(amount, 0, 1); - for (int i = 0; i < destination.Length; i++) + + if (Avx2.IsSupported && destination.Length >= 2) + { + // Divide by 2 as 4 elements per Vector4 and 8 per Vector256 + ref Vector256 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); + ref Vector256 destinationLast = ref Unsafe.Add(ref destinationBase, (IntPtr)((uint)destination.Length / 2u)); + + ref Vector256 backgroundBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(background)); + ref Vector256 sourceBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(source)); + Vector256 opacity = Vector256.Create(amount); + + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) + { + destinationBase = PorterDuffFunctions.LightenSrcIn(backgroundBase, sourceBase, opacity); + destinationBase = ref Unsafe.Add(ref destinationBase, 1); + backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); + sourceBase = ref Unsafe.Add(ref sourceBase, 1); + } + + if (Numerics.Modulo2(destination.Length) != 0) + { + // Vector4 fits neatly in pairs. Any overlap has to be equal to 1. + int i = destination.Length - 1; + destination[i] = PorterDuffFunctions.LightenSrcIn(background[i], source[i], amount); + } + } + else { - destination[i] = PorterDuffFunctions.LightenSrcIn(background[i], source[i], amount); + for (int i = 0; i < destination.Length; i++) + { + destination[i] = PorterDuffFunctions.LightenSrcIn(background[i], source[i], amount); + } } } /// protected override void BlendFunction(Span destination, ReadOnlySpan background, ReadOnlySpan source, ReadOnlySpan amount) { - for (int i = 0; i < destination.Length; i++) + if (Avx2.IsSupported && destination.Length >= 2) + { + // Divide by 2 as 4 elements per Vector4 and 8 per Vector256 + ref Vector256 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); + ref Vector256 destinationLast = ref Unsafe.Add(ref destinationBase, (IntPtr)((uint)destination.Length / 2u)); + + ref Vector256 backgroundBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(background)); + ref Vector256 sourceBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(source)); + ref float amountBase = ref MemoryMarshal.GetReference(amount); + + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) + { + // TODO: It would be better if we can clamp this outside of the loop using our SIMD methods. + Vector256 opacity = Vector256.Create(Numerics.Clamp(amountBase, 0, 1F)); + + destinationBase = PorterDuffFunctions.LightenSrcIn(backgroundBase, sourceBase, opacity); + destinationBase = ref Unsafe.Add(ref destinationBase, 1); + backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); + sourceBase = ref Unsafe.Add(ref sourceBase, 1); + amountBase = ref Unsafe.Add(ref amountBase, 1); + } + + if (Numerics.Modulo2(destination.Length) != 0) + { + // Vector4 fits neatly in pairs. Any overlap has to be equal to 1. + int i = destination.Length - 1; + destination[i] = PorterDuffFunctions.LightenSrcIn(background[i], source[i], Numerics.Clamp(amount[i], 0, 1F)); + } + } + else { - destination[i] = PorterDuffFunctions.LightenSrcIn(background[i], source[i], Numerics.Clamp(amount[i], 0, 1)); + for (int i = 0; i < destination.Length; i++) + { + destination[i] = PorterDuffFunctions.LightenSrcIn(background[i], source[i], Numerics.Clamp(amount[i], 0, 1F)); + } } } } @@ -1335,18 +3413,79 @@ internal static class DefaultPixelBlenders protected override void BlendFunction(Span destination, ReadOnlySpan background, ReadOnlySpan source, float amount) { amount = Numerics.Clamp(amount, 0, 1); - for (int i = 0; i < destination.Length; i++) + + if (Avx2.IsSupported && destination.Length >= 2) { - destination[i] = PorterDuffFunctions.OverlaySrcIn(background[i], source[i], amount); + // Divide by 2 as 4 elements per Vector4 and 8 per Vector256 + ref Vector256 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); + ref Vector256 destinationLast = ref Unsafe.Add(ref destinationBase, (IntPtr)((uint)destination.Length / 2u)); + + ref Vector256 backgroundBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(background)); + ref Vector256 sourceBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(source)); + Vector256 opacity = Vector256.Create(amount); + + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) + { + destinationBase = PorterDuffFunctions.OverlaySrcIn(backgroundBase, sourceBase, opacity); + destinationBase = ref Unsafe.Add(ref destinationBase, 1); + backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); + sourceBase = ref Unsafe.Add(ref sourceBase, 1); + } + + if (Numerics.Modulo2(destination.Length) != 0) + { + // Vector4 fits neatly in pairs. Any overlap has to be equal to 1. + int i = destination.Length - 1; + destination[i] = PorterDuffFunctions.OverlaySrcIn(background[i], source[i], amount); + } + } + else + { + for (int i = 0; i < destination.Length; i++) + { + destination[i] = PorterDuffFunctions.OverlaySrcIn(background[i], source[i], amount); + } } } /// protected override void BlendFunction(Span destination, ReadOnlySpan background, ReadOnlySpan source, ReadOnlySpan amount) { - for (int i = 0; i < destination.Length; i++) + if (Avx2.IsSupported && destination.Length >= 2) { - destination[i] = PorterDuffFunctions.OverlaySrcIn(background[i], source[i], Numerics.Clamp(amount[i], 0, 1)); + // Divide by 2 as 4 elements per Vector4 and 8 per Vector256 + ref Vector256 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); + ref Vector256 destinationLast = ref Unsafe.Add(ref destinationBase, (IntPtr)((uint)destination.Length / 2u)); + + ref Vector256 backgroundBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(background)); + ref Vector256 sourceBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(source)); + ref float amountBase = ref MemoryMarshal.GetReference(amount); + + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) + { + // TODO: It would be better if we can clamp this outside of the loop using our SIMD methods. + Vector256 opacity = Vector256.Create(Numerics.Clamp(amountBase, 0, 1F)); + + destinationBase = PorterDuffFunctions.OverlaySrcIn(backgroundBase, sourceBase, opacity); + destinationBase = ref Unsafe.Add(ref destinationBase, 1); + backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); + sourceBase = ref Unsafe.Add(ref sourceBase, 1); + amountBase = ref Unsafe.Add(ref amountBase, 1); + } + + if (Numerics.Modulo2(destination.Length) != 0) + { + // Vector4 fits neatly in pairs. Any overlap has to be equal to 1. + int i = destination.Length - 1; + destination[i] = PorterDuffFunctions.OverlaySrcIn(background[i], source[i], Numerics.Clamp(amount[i], 0, 1F)); + } + } + else + { + for (int i = 0; i < destination.Length; i++) + { + destination[i] = PorterDuffFunctions.OverlaySrcIn(background[i], source[i], Numerics.Clamp(amount[i], 0, 1F)); + } } } } @@ -1373,18 +3512,79 @@ internal static class DefaultPixelBlenders protected override void BlendFunction(Span destination, ReadOnlySpan background, ReadOnlySpan source, float amount) { amount = Numerics.Clamp(amount, 0, 1); - for (int i = 0; i < destination.Length; i++) + + if (Avx2.IsSupported && destination.Length >= 2) + { + // Divide by 2 as 4 elements per Vector4 and 8 per Vector256 + ref Vector256 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); + ref Vector256 destinationLast = ref Unsafe.Add(ref destinationBase, (IntPtr)((uint)destination.Length / 2u)); + + ref Vector256 backgroundBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(background)); + ref Vector256 sourceBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(source)); + Vector256 opacity = Vector256.Create(amount); + + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) + { + destinationBase = PorterDuffFunctions.HardLightSrcIn(backgroundBase, sourceBase, opacity); + destinationBase = ref Unsafe.Add(ref destinationBase, 1); + backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); + sourceBase = ref Unsafe.Add(ref sourceBase, 1); + } + + if (Numerics.Modulo2(destination.Length) != 0) + { + // Vector4 fits neatly in pairs. Any overlap has to be equal to 1. + int i = destination.Length - 1; + destination[i] = PorterDuffFunctions.HardLightSrcIn(background[i], source[i], amount); + } + } + else { - destination[i] = PorterDuffFunctions.HardLightSrcIn(background[i], source[i], amount); + for (int i = 0; i < destination.Length; i++) + { + destination[i] = PorterDuffFunctions.HardLightSrcIn(background[i], source[i], amount); + } } } /// protected override void BlendFunction(Span destination, ReadOnlySpan background, ReadOnlySpan source, ReadOnlySpan amount) { - for (int i = 0; i < destination.Length; i++) + if (Avx2.IsSupported && destination.Length >= 2) { - destination[i] = PorterDuffFunctions.HardLightSrcIn(background[i], source[i], Numerics.Clamp(amount[i], 0, 1)); + // Divide by 2 as 4 elements per Vector4 and 8 per Vector256 + ref Vector256 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); + ref Vector256 destinationLast = ref Unsafe.Add(ref destinationBase, (IntPtr)((uint)destination.Length / 2u)); + + ref Vector256 backgroundBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(background)); + ref Vector256 sourceBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(source)); + ref float amountBase = ref MemoryMarshal.GetReference(amount); + + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) + { + // TODO: It would be better if we can clamp this outside of the loop using our SIMD methods. + Vector256 opacity = Vector256.Create(Numerics.Clamp(amountBase, 0, 1F)); + + destinationBase = PorterDuffFunctions.HardLightSrcIn(backgroundBase, sourceBase, opacity); + destinationBase = ref Unsafe.Add(ref destinationBase, 1); + backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); + sourceBase = ref Unsafe.Add(ref sourceBase, 1); + amountBase = ref Unsafe.Add(ref amountBase, 1); + } + + if (Numerics.Modulo2(destination.Length) != 0) + { + // Vector4 fits neatly in pairs. Any overlap has to be equal to 1. + int i = destination.Length - 1; + destination[i] = PorterDuffFunctions.HardLightSrcIn(background[i], source[i], Numerics.Clamp(amount[i], 0, 1F)); + } + } + else + { + for (int i = 0; i < destination.Length; i++) + { + destination[i] = PorterDuffFunctions.HardLightSrcIn(background[i], source[i], Numerics.Clamp(amount[i], 0, 1F)); + } } } } @@ -1411,18 +3611,79 @@ internal static class DefaultPixelBlenders protected override void BlendFunction(Span destination, ReadOnlySpan background, ReadOnlySpan source, float amount) { amount = Numerics.Clamp(amount, 0, 1); - for (int i = 0; i < destination.Length; i++) + + if (Avx2.IsSupported && destination.Length >= 2) + { + // Divide by 2 as 4 elements per Vector4 and 8 per Vector256 + ref Vector256 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); + ref Vector256 destinationLast = ref Unsafe.Add(ref destinationBase, (IntPtr)((uint)destination.Length / 2u)); + + ref Vector256 backgroundBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(background)); + ref Vector256 sourceBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(source)); + Vector256 opacity = Vector256.Create(amount); + + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) + { + destinationBase = PorterDuffFunctions.NormalSrcOut(backgroundBase, sourceBase, opacity); + destinationBase = ref Unsafe.Add(ref destinationBase, 1); + backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); + sourceBase = ref Unsafe.Add(ref sourceBase, 1); + } + + if (Numerics.Modulo2(destination.Length) != 0) + { + // Vector4 fits neatly in pairs. Any overlap has to be equal to 1. + int i = destination.Length - 1; + destination[i] = PorterDuffFunctions.NormalSrcOut(background[i], source[i], amount); + } + } + else { - destination[i] = PorterDuffFunctions.NormalSrcOut(background[i], source[i], amount); + for (int i = 0; i < destination.Length; i++) + { + destination[i] = PorterDuffFunctions.NormalSrcOut(background[i], source[i], amount); + } } } /// protected override void BlendFunction(Span destination, ReadOnlySpan background, ReadOnlySpan source, ReadOnlySpan amount) { - for (int i = 0; i < destination.Length; i++) + if (Avx2.IsSupported && destination.Length >= 2) { - destination[i] = PorterDuffFunctions.NormalSrcOut(background[i], source[i], Numerics.Clamp(amount[i], 0, 1)); + // Divide by 2 as 4 elements per Vector4 and 8 per Vector256 + ref Vector256 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); + ref Vector256 destinationLast = ref Unsafe.Add(ref destinationBase, (IntPtr)((uint)destination.Length / 2u)); + + ref Vector256 backgroundBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(background)); + ref Vector256 sourceBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(source)); + ref float amountBase = ref MemoryMarshal.GetReference(amount); + + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) + { + // TODO: It would be better if we can clamp this outside of the loop using our SIMD methods. + Vector256 opacity = Vector256.Create(Numerics.Clamp(amountBase, 0, 1F)); + + destinationBase = PorterDuffFunctions.NormalSrcOut(backgroundBase, sourceBase, opacity); + destinationBase = ref Unsafe.Add(ref destinationBase, 1); + backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); + sourceBase = ref Unsafe.Add(ref sourceBase, 1); + amountBase = ref Unsafe.Add(ref amountBase, 1); + } + + if (Numerics.Modulo2(destination.Length) != 0) + { + // Vector4 fits neatly in pairs. Any overlap has to be equal to 1. + int i = destination.Length - 1; + destination[i] = PorterDuffFunctions.NormalSrcOut(background[i], source[i], Numerics.Clamp(amount[i], 0, 1F)); + } + } + else + { + for (int i = 0; i < destination.Length; i++) + { + destination[i] = PorterDuffFunctions.NormalSrcOut(background[i], source[i], Numerics.Clamp(amount[i], 0, 1F)); + } } } } @@ -1449,18 +3710,79 @@ internal static class DefaultPixelBlenders protected override void BlendFunction(Span destination, ReadOnlySpan background, ReadOnlySpan source, float amount) { amount = Numerics.Clamp(amount, 0, 1); - for (int i = 0; i < destination.Length; i++) + + if (Avx2.IsSupported && destination.Length >= 2) { - destination[i] = PorterDuffFunctions.MultiplySrcOut(background[i], source[i], amount); + // Divide by 2 as 4 elements per Vector4 and 8 per Vector256 + ref Vector256 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); + ref Vector256 destinationLast = ref Unsafe.Add(ref destinationBase, (IntPtr)((uint)destination.Length / 2u)); + + ref Vector256 backgroundBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(background)); + ref Vector256 sourceBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(source)); + Vector256 opacity = Vector256.Create(amount); + + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) + { + destinationBase = PorterDuffFunctions.MultiplySrcOut(backgroundBase, sourceBase, opacity); + destinationBase = ref Unsafe.Add(ref destinationBase, 1); + backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); + sourceBase = ref Unsafe.Add(ref sourceBase, 1); + } + + if (Numerics.Modulo2(destination.Length) != 0) + { + // Vector4 fits neatly in pairs. Any overlap has to be equal to 1. + int i = destination.Length - 1; + destination[i] = PorterDuffFunctions.MultiplySrcOut(background[i], source[i], amount); + } + } + else + { + for (int i = 0; i < destination.Length; i++) + { + destination[i] = PorterDuffFunctions.MultiplySrcOut(background[i], source[i], amount); + } } } /// protected override void BlendFunction(Span destination, ReadOnlySpan background, ReadOnlySpan source, ReadOnlySpan amount) { - for (int i = 0; i < destination.Length; i++) + if (Avx2.IsSupported && destination.Length >= 2) { - destination[i] = PorterDuffFunctions.MultiplySrcOut(background[i], source[i], Numerics.Clamp(amount[i], 0, 1)); + // Divide by 2 as 4 elements per Vector4 and 8 per Vector256 + ref Vector256 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); + ref Vector256 destinationLast = ref Unsafe.Add(ref destinationBase, (IntPtr)((uint)destination.Length / 2u)); + + ref Vector256 backgroundBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(background)); + ref Vector256 sourceBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(source)); + ref float amountBase = ref MemoryMarshal.GetReference(amount); + + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) + { + // TODO: It would be better if we can clamp this outside of the loop using our SIMD methods. + Vector256 opacity = Vector256.Create(Numerics.Clamp(amountBase, 0, 1F)); + + destinationBase = PorterDuffFunctions.MultiplySrcOut(backgroundBase, sourceBase, opacity); + destinationBase = ref Unsafe.Add(ref destinationBase, 1); + backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); + sourceBase = ref Unsafe.Add(ref sourceBase, 1); + amountBase = ref Unsafe.Add(ref amountBase, 1); + } + + if (Numerics.Modulo2(destination.Length) != 0) + { + // Vector4 fits neatly in pairs. Any overlap has to be equal to 1. + int i = destination.Length - 1; + destination[i] = PorterDuffFunctions.MultiplySrcOut(background[i], source[i], Numerics.Clamp(amount[i], 0, 1F)); + } + } + else + { + for (int i = 0; i < destination.Length; i++) + { + destination[i] = PorterDuffFunctions.MultiplySrcOut(background[i], source[i], Numerics.Clamp(amount[i], 0, 1F)); + } } } } @@ -1487,18 +3809,79 @@ internal static class DefaultPixelBlenders protected override void BlendFunction(Span destination, ReadOnlySpan background, ReadOnlySpan source, float amount) { amount = Numerics.Clamp(amount, 0, 1); - for (int i = 0; i < destination.Length; i++) + + if (Avx2.IsSupported && destination.Length >= 2) { - destination[i] = PorterDuffFunctions.AddSrcOut(background[i], source[i], amount); + // Divide by 2 as 4 elements per Vector4 and 8 per Vector256 + ref Vector256 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); + ref Vector256 destinationLast = ref Unsafe.Add(ref destinationBase, (IntPtr)((uint)destination.Length / 2u)); + + ref Vector256 backgroundBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(background)); + ref Vector256 sourceBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(source)); + Vector256 opacity = Vector256.Create(amount); + + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) + { + destinationBase = PorterDuffFunctions.AddSrcOut(backgroundBase, sourceBase, opacity); + destinationBase = ref Unsafe.Add(ref destinationBase, 1); + backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); + sourceBase = ref Unsafe.Add(ref sourceBase, 1); + } + + if (Numerics.Modulo2(destination.Length) != 0) + { + // Vector4 fits neatly in pairs. Any overlap has to be equal to 1. + int i = destination.Length - 1; + destination[i] = PorterDuffFunctions.AddSrcOut(background[i], source[i], amount); + } + } + else + { + for (int i = 0; i < destination.Length; i++) + { + destination[i] = PorterDuffFunctions.AddSrcOut(background[i], source[i], amount); + } } } /// protected override void BlendFunction(Span destination, ReadOnlySpan background, ReadOnlySpan source, ReadOnlySpan amount) { - for (int i = 0; i < destination.Length; i++) + if (Avx2.IsSupported && destination.Length >= 2) { - destination[i] = PorterDuffFunctions.AddSrcOut(background[i], source[i], Numerics.Clamp(amount[i], 0, 1)); + // Divide by 2 as 4 elements per Vector4 and 8 per Vector256 + ref Vector256 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); + ref Vector256 destinationLast = ref Unsafe.Add(ref destinationBase, (IntPtr)((uint)destination.Length / 2u)); + + ref Vector256 backgroundBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(background)); + ref Vector256 sourceBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(source)); + ref float amountBase = ref MemoryMarshal.GetReference(amount); + + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) + { + // TODO: It would be better if we can clamp this outside of the loop using our SIMD methods. + Vector256 opacity = Vector256.Create(Numerics.Clamp(amountBase, 0, 1F)); + + destinationBase = PorterDuffFunctions.AddSrcOut(backgroundBase, sourceBase, opacity); + destinationBase = ref Unsafe.Add(ref destinationBase, 1); + backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); + sourceBase = ref Unsafe.Add(ref sourceBase, 1); + amountBase = ref Unsafe.Add(ref amountBase, 1); + } + + if (Numerics.Modulo2(destination.Length) != 0) + { + // Vector4 fits neatly in pairs. Any overlap has to be equal to 1. + int i = destination.Length - 1; + destination[i] = PorterDuffFunctions.AddSrcOut(background[i], source[i], Numerics.Clamp(amount[i], 0, 1F)); + } + } + else + { + for (int i = 0; i < destination.Length; i++) + { + destination[i] = PorterDuffFunctions.AddSrcOut(background[i], source[i], Numerics.Clamp(amount[i], 0, 1F)); + } } } } @@ -1525,18 +3908,79 @@ internal static class DefaultPixelBlenders protected override void BlendFunction(Span destination, ReadOnlySpan background, ReadOnlySpan source, float amount) { amount = Numerics.Clamp(amount, 0, 1); - for (int i = 0; i < destination.Length; i++) + + if (Avx2.IsSupported && destination.Length >= 2) { - destination[i] = PorterDuffFunctions.SubtractSrcOut(background[i], source[i], amount); + // Divide by 2 as 4 elements per Vector4 and 8 per Vector256 + ref Vector256 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); + ref Vector256 destinationLast = ref Unsafe.Add(ref destinationBase, (IntPtr)((uint)destination.Length / 2u)); + + ref Vector256 backgroundBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(background)); + ref Vector256 sourceBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(source)); + Vector256 opacity = Vector256.Create(amount); + + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) + { + destinationBase = PorterDuffFunctions.SubtractSrcOut(backgroundBase, sourceBase, opacity); + destinationBase = ref Unsafe.Add(ref destinationBase, 1); + backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); + sourceBase = ref Unsafe.Add(ref sourceBase, 1); + } + + if (Numerics.Modulo2(destination.Length) != 0) + { + // Vector4 fits neatly in pairs. Any overlap has to be equal to 1. + int i = destination.Length - 1; + destination[i] = PorterDuffFunctions.SubtractSrcOut(background[i], source[i], amount); + } + } + else + { + for (int i = 0; i < destination.Length; i++) + { + destination[i] = PorterDuffFunctions.SubtractSrcOut(background[i], source[i], amount); + } } } /// protected override void BlendFunction(Span destination, ReadOnlySpan background, ReadOnlySpan source, ReadOnlySpan amount) { - for (int i = 0; i < destination.Length; i++) + if (Avx2.IsSupported && destination.Length >= 2) { - destination[i] = PorterDuffFunctions.SubtractSrcOut(background[i], source[i], Numerics.Clamp(amount[i], 0, 1)); + // Divide by 2 as 4 elements per Vector4 and 8 per Vector256 + ref Vector256 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); + ref Vector256 destinationLast = ref Unsafe.Add(ref destinationBase, (IntPtr)((uint)destination.Length / 2u)); + + ref Vector256 backgroundBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(background)); + ref Vector256 sourceBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(source)); + ref float amountBase = ref MemoryMarshal.GetReference(amount); + + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) + { + // TODO: It would be better if we can clamp this outside of the loop using our SIMD methods. + Vector256 opacity = Vector256.Create(Numerics.Clamp(amountBase, 0, 1F)); + + destinationBase = PorterDuffFunctions.SubtractSrcOut(backgroundBase, sourceBase, opacity); + destinationBase = ref Unsafe.Add(ref destinationBase, 1); + backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); + sourceBase = ref Unsafe.Add(ref sourceBase, 1); + amountBase = ref Unsafe.Add(ref amountBase, 1); + } + + if (Numerics.Modulo2(destination.Length) != 0) + { + // Vector4 fits neatly in pairs. Any overlap has to be equal to 1. + int i = destination.Length - 1; + destination[i] = PorterDuffFunctions.SubtractSrcOut(background[i], source[i], Numerics.Clamp(amount[i], 0, 1F)); + } + } + else + { + for (int i = 0; i < destination.Length; i++) + { + destination[i] = PorterDuffFunctions.SubtractSrcOut(background[i], source[i], Numerics.Clamp(amount[i], 0, 1F)); + } } } } @@ -1563,18 +4007,79 @@ internal static class DefaultPixelBlenders protected override void BlendFunction(Span destination, ReadOnlySpan background, ReadOnlySpan source, float amount) { amount = Numerics.Clamp(amount, 0, 1); - for (int i = 0; i < destination.Length; i++) + + if (Avx2.IsSupported && destination.Length >= 2) { - destination[i] = PorterDuffFunctions.ScreenSrcOut(background[i], source[i], amount); + // Divide by 2 as 4 elements per Vector4 and 8 per Vector256 + ref Vector256 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); + ref Vector256 destinationLast = ref Unsafe.Add(ref destinationBase, (IntPtr)((uint)destination.Length / 2u)); + + ref Vector256 backgroundBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(background)); + ref Vector256 sourceBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(source)); + Vector256 opacity = Vector256.Create(amount); + + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) + { + destinationBase = PorterDuffFunctions.ScreenSrcOut(backgroundBase, sourceBase, opacity); + destinationBase = ref Unsafe.Add(ref destinationBase, 1); + backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); + sourceBase = ref Unsafe.Add(ref sourceBase, 1); + } + + if (Numerics.Modulo2(destination.Length) != 0) + { + // Vector4 fits neatly in pairs. Any overlap has to be equal to 1. + int i = destination.Length - 1; + destination[i] = PorterDuffFunctions.ScreenSrcOut(background[i], source[i], amount); + } + } + else + { + for (int i = 0; i < destination.Length; i++) + { + destination[i] = PorterDuffFunctions.ScreenSrcOut(background[i], source[i], amount); + } } } /// protected override void BlendFunction(Span destination, ReadOnlySpan background, ReadOnlySpan source, ReadOnlySpan amount) { - for (int i = 0; i < destination.Length; i++) + if (Avx2.IsSupported && destination.Length >= 2) { - destination[i] = PorterDuffFunctions.ScreenSrcOut(background[i], source[i], Numerics.Clamp(amount[i], 0, 1)); + // Divide by 2 as 4 elements per Vector4 and 8 per Vector256 + ref Vector256 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); + ref Vector256 destinationLast = ref Unsafe.Add(ref destinationBase, (IntPtr)((uint)destination.Length / 2u)); + + ref Vector256 backgroundBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(background)); + ref Vector256 sourceBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(source)); + ref float amountBase = ref MemoryMarshal.GetReference(amount); + + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) + { + // TODO: It would be better if we can clamp this outside of the loop using our SIMD methods. + Vector256 opacity = Vector256.Create(Numerics.Clamp(amountBase, 0, 1F)); + + destinationBase = PorterDuffFunctions.ScreenSrcOut(backgroundBase, sourceBase, opacity); + destinationBase = ref Unsafe.Add(ref destinationBase, 1); + backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); + sourceBase = ref Unsafe.Add(ref sourceBase, 1); + amountBase = ref Unsafe.Add(ref amountBase, 1); + } + + if (Numerics.Modulo2(destination.Length) != 0) + { + // Vector4 fits neatly in pairs. Any overlap has to be equal to 1. + int i = destination.Length - 1; + destination[i] = PorterDuffFunctions.ScreenSrcOut(background[i], source[i], Numerics.Clamp(amount[i], 0, 1F)); + } + } + else + { + for (int i = 0; i < destination.Length; i++) + { + destination[i] = PorterDuffFunctions.ScreenSrcOut(background[i], source[i], Numerics.Clamp(amount[i], 0, 1F)); + } } } } @@ -1601,18 +4106,79 @@ internal static class DefaultPixelBlenders protected override void BlendFunction(Span destination, ReadOnlySpan background, ReadOnlySpan source, float amount) { amount = Numerics.Clamp(amount, 0, 1); - for (int i = 0; i < destination.Length; i++) + + if (Avx2.IsSupported && destination.Length >= 2) { - destination[i] = PorterDuffFunctions.DarkenSrcOut(background[i], source[i], amount); + // Divide by 2 as 4 elements per Vector4 and 8 per Vector256 + ref Vector256 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); + ref Vector256 destinationLast = ref Unsafe.Add(ref destinationBase, (IntPtr)((uint)destination.Length / 2u)); + + ref Vector256 backgroundBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(background)); + ref Vector256 sourceBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(source)); + Vector256 opacity = Vector256.Create(amount); + + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) + { + destinationBase = PorterDuffFunctions.DarkenSrcOut(backgroundBase, sourceBase, opacity); + destinationBase = ref Unsafe.Add(ref destinationBase, 1); + backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); + sourceBase = ref Unsafe.Add(ref sourceBase, 1); + } + + if (Numerics.Modulo2(destination.Length) != 0) + { + // Vector4 fits neatly in pairs. Any overlap has to be equal to 1. + int i = destination.Length - 1; + destination[i] = PorterDuffFunctions.DarkenSrcOut(background[i], source[i], amount); + } + } + else + { + for (int i = 0; i < destination.Length; i++) + { + destination[i] = PorterDuffFunctions.DarkenSrcOut(background[i], source[i], amount); + } } } /// protected override void BlendFunction(Span destination, ReadOnlySpan background, ReadOnlySpan source, ReadOnlySpan amount) { - for (int i = 0; i < destination.Length; i++) + if (Avx2.IsSupported && destination.Length >= 2) + { + // Divide by 2 as 4 elements per Vector4 and 8 per Vector256 + ref Vector256 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); + ref Vector256 destinationLast = ref Unsafe.Add(ref destinationBase, (IntPtr)((uint)destination.Length / 2u)); + + ref Vector256 backgroundBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(background)); + ref Vector256 sourceBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(source)); + ref float amountBase = ref MemoryMarshal.GetReference(amount); + + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) + { + // TODO: It would be better if we can clamp this outside of the loop using our SIMD methods. + Vector256 opacity = Vector256.Create(Numerics.Clamp(amountBase, 0, 1F)); + + destinationBase = PorterDuffFunctions.DarkenSrcOut(backgroundBase, sourceBase, opacity); + destinationBase = ref Unsafe.Add(ref destinationBase, 1); + backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); + sourceBase = ref Unsafe.Add(ref sourceBase, 1); + amountBase = ref Unsafe.Add(ref amountBase, 1); + } + + if (Numerics.Modulo2(destination.Length) != 0) + { + // Vector4 fits neatly in pairs. Any overlap has to be equal to 1. + int i = destination.Length - 1; + destination[i] = PorterDuffFunctions.DarkenSrcOut(background[i], source[i], Numerics.Clamp(amount[i], 0, 1F)); + } + } + else { - destination[i] = PorterDuffFunctions.DarkenSrcOut(background[i], source[i], Numerics.Clamp(amount[i], 0, 1)); + for (int i = 0; i < destination.Length; i++) + { + destination[i] = PorterDuffFunctions.DarkenSrcOut(background[i], source[i], Numerics.Clamp(amount[i], 0, 1F)); + } } } } @@ -1639,18 +4205,79 @@ internal static class DefaultPixelBlenders protected override void BlendFunction(Span destination, ReadOnlySpan background, ReadOnlySpan source, float amount) { amount = Numerics.Clamp(amount, 0, 1); - for (int i = 0; i < destination.Length; i++) + + if (Avx2.IsSupported && destination.Length >= 2) + { + // Divide by 2 as 4 elements per Vector4 and 8 per Vector256 + ref Vector256 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); + ref Vector256 destinationLast = ref Unsafe.Add(ref destinationBase, (IntPtr)((uint)destination.Length / 2u)); + + ref Vector256 backgroundBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(background)); + ref Vector256 sourceBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(source)); + Vector256 opacity = Vector256.Create(amount); + + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) + { + destinationBase = PorterDuffFunctions.LightenSrcOut(backgroundBase, sourceBase, opacity); + destinationBase = ref Unsafe.Add(ref destinationBase, 1); + backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); + sourceBase = ref Unsafe.Add(ref sourceBase, 1); + } + + if (Numerics.Modulo2(destination.Length) != 0) + { + // Vector4 fits neatly in pairs. Any overlap has to be equal to 1. + int i = destination.Length - 1; + destination[i] = PorterDuffFunctions.LightenSrcOut(background[i], source[i], amount); + } + } + else { - destination[i] = PorterDuffFunctions.LightenSrcOut(background[i], source[i], amount); + for (int i = 0; i < destination.Length; i++) + { + destination[i] = PorterDuffFunctions.LightenSrcOut(background[i], source[i], amount); + } } } /// protected override void BlendFunction(Span destination, ReadOnlySpan background, ReadOnlySpan source, ReadOnlySpan amount) { - for (int i = 0; i < destination.Length; i++) + if (Avx2.IsSupported && destination.Length >= 2) + { + // Divide by 2 as 4 elements per Vector4 and 8 per Vector256 + ref Vector256 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); + ref Vector256 destinationLast = ref Unsafe.Add(ref destinationBase, (IntPtr)((uint)destination.Length / 2u)); + + ref Vector256 backgroundBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(background)); + ref Vector256 sourceBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(source)); + ref float amountBase = ref MemoryMarshal.GetReference(amount); + + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) + { + // TODO: It would be better if we can clamp this outside of the loop using our SIMD methods. + Vector256 opacity = Vector256.Create(Numerics.Clamp(amountBase, 0, 1F)); + + destinationBase = PorterDuffFunctions.LightenSrcOut(backgroundBase, sourceBase, opacity); + destinationBase = ref Unsafe.Add(ref destinationBase, 1); + backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); + sourceBase = ref Unsafe.Add(ref sourceBase, 1); + amountBase = ref Unsafe.Add(ref amountBase, 1); + } + + if (Numerics.Modulo2(destination.Length) != 0) + { + // Vector4 fits neatly in pairs. Any overlap has to be equal to 1. + int i = destination.Length - 1; + destination[i] = PorterDuffFunctions.LightenSrcOut(background[i], source[i], Numerics.Clamp(amount[i], 0, 1F)); + } + } + else { - destination[i] = PorterDuffFunctions.LightenSrcOut(background[i], source[i], Numerics.Clamp(amount[i], 0, 1)); + for (int i = 0; i < destination.Length; i++) + { + destination[i] = PorterDuffFunctions.LightenSrcOut(background[i], source[i], Numerics.Clamp(amount[i], 0, 1F)); + } } } } @@ -1677,18 +4304,79 @@ internal static class DefaultPixelBlenders protected override void BlendFunction(Span destination, ReadOnlySpan background, ReadOnlySpan source, float amount) { amount = Numerics.Clamp(amount, 0, 1); - for (int i = 0; i < destination.Length; i++) + + if (Avx2.IsSupported && destination.Length >= 2) { - destination[i] = PorterDuffFunctions.OverlaySrcOut(background[i], source[i], amount); + // Divide by 2 as 4 elements per Vector4 and 8 per Vector256 + ref Vector256 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); + ref Vector256 destinationLast = ref Unsafe.Add(ref destinationBase, (IntPtr)((uint)destination.Length / 2u)); + + ref Vector256 backgroundBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(background)); + ref Vector256 sourceBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(source)); + Vector256 opacity = Vector256.Create(amount); + + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) + { + destinationBase = PorterDuffFunctions.OverlaySrcOut(backgroundBase, sourceBase, opacity); + destinationBase = ref Unsafe.Add(ref destinationBase, 1); + backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); + sourceBase = ref Unsafe.Add(ref sourceBase, 1); + } + + if (Numerics.Modulo2(destination.Length) != 0) + { + // Vector4 fits neatly in pairs. Any overlap has to be equal to 1. + int i = destination.Length - 1; + destination[i] = PorterDuffFunctions.OverlaySrcOut(background[i], source[i], amount); + } + } + else + { + for (int i = 0; i < destination.Length; i++) + { + destination[i] = PorterDuffFunctions.OverlaySrcOut(background[i], source[i], amount); + } } } - /// - protected override void BlendFunction(Span destination, ReadOnlySpan background, ReadOnlySpan source, ReadOnlySpan amount) - { - for (int i = 0; i < destination.Length; i++) + /// + protected override void BlendFunction(Span destination, ReadOnlySpan background, ReadOnlySpan source, ReadOnlySpan amount) + { + if (Avx2.IsSupported && destination.Length >= 2) + { + // Divide by 2 as 4 elements per Vector4 and 8 per Vector256 + ref Vector256 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); + ref Vector256 destinationLast = ref Unsafe.Add(ref destinationBase, (IntPtr)((uint)destination.Length / 2u)); + + ref Vector256 backgroundBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(background)); + ref Vector256 sourceBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(source)); + ref float amountBase = ref MemoryMarshal.GetReference(amount); + + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) + { + // TODO: It would be better if we can clamp this outside of the loop using our SIMD methods. + Vector256 opacity = Vector256.Create(Numerics.Clamp(amountBase, 0, 1F)); + + destinationBase = PorterDuffFunctions.OverlaySrcOut(backgroundBase, sourceBase, opacity); + destinationBase = ref Unsafe.Add(ref destinationBase, 1); + backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); + sourceBase = ref Unsafe.Add(ref sourceBase, 1); + amountBase = ref Unsafe.Add(ref amountBase, 1); + } + + if (Numerics.Modulo2(destination.Length) != 0) + { + // Vector4 fits neatly in pairs. Any overlap has to be equal to 1. + int i = destination.Length - 1; + destination[i] = PorterDuffFunctions.OverlaySrcOut(background[i], source[i], Numerics.Clamp(amount[i], 0, 1F)); + } + } + else { - destination[i] = PorterDuffFunctions.OverlaySrcOut(background[i], source[i], Numerics.Clamp(amount[i], 0, 1)); + for (int i = 0; i < destination.Length; i++) + { + destination[i] = PorterDuffFunctions.OverlaySrcOut(background[i], source[i], Numerics.Clamp(amount[i], 0, 1F)); + } } } } @@ -1715,18 +4403,79 @@ internal static class DefaultPixelBlenders protected override void BlendFunction(Span destination, ReadOnlySpan background, ReadOnlySpan source, float amount) { amount = Numerics.Clamp(amount, 0, 1); - for (int i = 0; i < destination.Length; i++) + + if (Avx2.IsSupported && destination.Length >= 2) + { + // Divide by 2 as 4 elements per Vector4 and 8 per Vector256 + ref Vector256 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); + ref Vector256 destinationLast = ref Unsafe.Add(ref destinationBase, (IntPtr)((uint)destination.Length / 2u)); + + ref Vector256 backgroundBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(background)); + ref Vector256 sourceBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(source)); + Vector256 opacity = Vector256.Create(amount); + + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) + { + destinationBase = PorterDuffFunctions.HardLightSrcOut(backgroundBase, sourceBase, opacity); + destinationBase = ref Unsafe.Add(ref destinationBase, 1); + backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); + sourceBase = ref Unsafe.Add(ref sourceBase, 1); + } + + if (Numerics.Modulo2(destination.Length) != 0) + { + // Vector4 fits neatly in pairs. Any overlap has to be equal to 1. + int i = destination.Length - 1; + destination[i] = PorterDuffFunctions.HardLightSrcOut(background[i], source[i], amount); + } + } + else { - destination[i] = PorterDuffFunctions.HardLightSrcOut(background[i], source[i], amount); + for (int i = 0; i < destination.Length; i++) + { + destination[i] = PorterDuffFunctions.HardLightSrcOut(background[i], source[i], amount); + } } } /// protected override void BlendFunction(Span destination, ReadOnlySpan background, ReadOnlySpan source, ReadOnlySpan amount) { - for (int i = 0; i < destination.Length; i++) + if (Avx2.IsSupported && destination.Length >= 2) + { + // Divide by 2 as 4 elements per Vector4 and 8 per Vector256 + ref Vector256 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); + ref Vector256 destinationLast = ref Unsafe.Add(ref destinationBase, (IntPtr)((uint)destination.Length / 2u)); + + ref Vector256 backgroundBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(background)); + ref Vector256 sourceBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(source)); + ref float amountBase = ref MemoryMarshal.GetReference(amount); + + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) + { + // TODO: It would be better if we can clamp this outside of the loop using our SIMD methods. + Vector256 opacity = Vector256.Create(Numerics.Clamp(amountBase, 0, 1F)); + + destinationBase = PorterDuffFunctions.HardLightSrcOut(backgroundBase, sourceBase, opacity); + destinationBase = ref Unsafe.Add(ref destinationBase, 1); + backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); + sourceBase = ref Unsafe.Add(ref sourceBase, 1); + amountBase = ref Unsafe.Add(ref amountBase, 1); + } + + if (Numerics.Modulo2(destination.Length) != 0) + { + // Vector4 fits neatly in pairs. Any overlap has to be equal to 1. + int i = destination.Length - 1; + destination[i] = PorterDuffFunctions.HardLightSrcOut(background[i], source[i], Numerics.Clamp(amount[i], 0, 1F)); + } + } + else { - destination[i] = PorterDuffFunctions.HardLightSrcOut(background[i], source[i], Numerics.Clamp(amount[i], 0, 1)); + for (int i = 0; i < destination.Length; i++) + { + destination[i] = PorterDuffFunctions.HardLightSrcOut(background[i], source[i], Numerics.Clamp(amount[i], 0, 1F)); + } } } } @@ -1753,18 +4502,79 @@ internal static class DefaultPixelBlenders protected override void BlendFunction(Span destination, ReadOnlySpan background, ReadOnlySpan source, float amount) { amount = Numerics.Clamp(amount, 0, 1); - for (int i = 0; i < destination.Length; i++) + + if (Avx2.IsSupported && destination.Length >= 2) + { + // Divide by 2 as 4 elements per Vector4 and 8 per Vector256 + ref Vector256 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); + ref Vector256 destinationLast = ref Unsafe.Add(ref destinationBase, (IntPtr)((uint)destination.Length / 2u)); + + ref Vector256 backgroundBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(background)); + ref Vector256 sourceBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(source)); + Vector256 opacity = Vector256.Create(amount); + + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) + { + destinationBase = PorterDuffFunctions.NormalDest(backgroundBase, sourceBase, opacity); + destinationBase = ref Unsafe.Add(ref destinationBase, 1); + backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); + sourceBase = ref Unsafe.Add(ref sourceBase, 1); + } + + if (Numerics.Modulo2(destination.Length) != 0) + { + // Vector4 fits neatly in pairs. Any overlap has to be equal to 1. + int i = destination.Length - 1; + destination[i] = PorterDuffFunctions.NormalDest(background[i], source[i], amount); + } + } + else { - destination[i] = PorterDuffFunctions.NormalDest(background[i], source[i], amount); + for (int i = 0; i < destination.Length; i++) + { + destination[i] = PorterDuffFunctions.NormalDest(background[i], source[i], amount); + } } } /// protected override void BlendFunction(Span destination, ReadOnlySpan background, ReadOnlySpan source, ReadOnlySpan amount) { - for (int i = 0; i < destination.Length; i++) + if (Avx2.IsSupported && destination.Length >= 2) + { + // Divide by 2 as 4 elements per Vector4 and 8 per Vector256 + ref Vector256 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); + ref Vector256 destinationLast = ref Unsafe.Add(ref destinationBase, (IntPtr)((uint)destination.Length / 2u)); + + ref Vector256 backgroundBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(background)); + ref Vector256 sourceBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(source)); + ref float amountBase = ref MemoryMarshal.GetReference(amount); + + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) + { + // TODO: It would be better if we can clamp this outside of the loop using our SIMD methods. + Vector256 opacity = Vector256.Create(Numerics.Clamp(amountBase, 0, 1F)); + + destinationBase = PorterDuffFunctions.NormalDest(backgroundBase, sourceBase, opacity); + destinationBase = ref Unsafe.Add(ref destinationBase, 1); + backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); + sourceBase = ref Unsafe.Add(ref sourceBase, 1); + amountBase = ref Unsafe.Add(ref amountBase, 1); + } + + if (Numerics.Modulo2(destination.Length) != 0) + { + // Vector4 fits neatly in pairs. Any overlap has to be equal to 1. + int i = destination.Length - 1; + destination[i] = PorterDuffFunctions.NormalDest(background[i], source[i], Numerics.Clamp(amount[i], 0, 1F)); + } + } + else { - destination[i] = PorterDuffFunctions.NormalDest(background[i], source[i], Numerics.Clamp(amount[i], 0, 1)); + for (int i = 0; i < destination.Length; i++) + { + destination[i] = PorterDuffFunctions.NormalDest(background[i], source[i], Numerics.Clamp(amount[i], 0, 1F)); + } } } } @@ -1791,18 +4601,79 @@ internal static class DefaultPixelBlenders protected override void BlendFunction(Span destination, ReadOnlySpan background, ReadOnlySpan source, float amount) { amount = Numerics.Clamp(amount, 0, 1); - for (int i = 0; i < destination.Length; i++) + + if (Avx2.IsSupported && destination.Length >= 2) + { + // Divide by 2 as 4 elements per Vector4 and 8 per Vector256 + ref Vector256 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); + ref Vector256 destinationLast = ref Unsafe.Add(ref destinationBase, (IntPtr)((uint)destination.Length / 2u)); + + ref Vector256 backgroundBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(background)); + ref Vector256 sourceBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(source)); + Vector256 opacity = Vector256.Create(amount); + + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) + { + destinationBase = PorterDuffFunctions.MultiplyDest(backgroundBase, sourceBase, opacity); + destinationBase = ref Unsafe.Add(ref destinationBase, 1); + backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); + sourceBase = ref Unsafe.Add(ref sourceBase, 1); + } + + if (Numerics.Modulo2(destination.Length) != 0) + { + // Vector4 fits neatly in pairs. Any overlap has to be equal to 1. + int i = destination.Length - 1; + destination[i] = PorterDuffFunctions.MultiplyDest(background[i], source[i], amount); + } + } + else { - destination[i] = PorterDuffFunctions.MultiplyDest(background[i], source[i], amount); + for (int i = 0; i < destination.Length; i++) + { + destination[i] = PorterDuffFunctions.MultiplyDest(background[i], source[i], amount); + } } } /// protected override void BlendFunction(Span destination, ReadOnlySpan background, ReadOnlySpan source, ReadOnlySpan amount) { - for (int i = 0; i < destination.Length; i++) + if (Avx2.IsSupported && destination.Length >= 2) + { + // Divide by 2 as 4 elements per Vector4 and 8 per Vector256 + ref Vector256 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); + ref Vector256 destinationLast = ref Unsafe.Add(ref destinationBase, (IntPtr)((uint)destination.Length / 2u)); + + ref Vector256 backgroundBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(background)); + ref Vector256 sourceBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(source)); + ref float amountBase = ref MemoryMarshal.GetReference(amount); + + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) + { + // TODO: It would be better if we can clamp this outside of the loop using our SIMD methods. + Vector256 opacity = Vector256.Create(Numerics.Clamp(amountBase, 0, 1F)); + + destinationBase = PorterDuffFunctions.MultiplyDest(backgroundBase, sourceBase, opacity); + destinationBase = ref Unsafe.Add(ref destinationBase, 1); + backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); + sourceBase = ref Unsafe.Add(ref sourceBase, 1); + amountBase = ref Unsafe.Add(ref amountBase, 1); + } + + if (Numerics.Modulo2(destination.Length) != 0) + { + // Vector4 fits neatly in pairs. Any overlap has to be equal to 1. + int i = destination.Length - 1; + destination[i] = PorterDuffFunctions.MultiplyDest(background[i], source[i], Numerics.Clamp(amount[i], 0, 1F)); + } + } + else { - destination[i] = PorterDuffFunctions.MultiplyDest(background[i], source[i], Numerics.Clamp(amount[i], 0, 1)); + for (int i = 0; i < destination.Length; i++) + { + destination[i] = PorterDuffFunctions.MultiplyDest(background[i], source[i], Numerics.Clamp(amount[i], 0, 1F)); + } } } } @@ -1829,18 +4700,79 @@ internal static class DefaultPixelBlenders protected override void BlendFunction(Span destination, ReadOnlySpan background, ReadOnlySpan source, float amount) { amount = Numerics.Clamp(amount, 0, 1); - for (int i = 0; i < destination.Length; i++) + + if (Avx2.IsSupported && destination.Length >= 2) + { + // Divide by 2 as 4 elements per Vector4 and 8 per Vector256 + ref Vector256 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); + ref Vector256 destinationLast = ref Unsafe.Add(ref destinationBase, (IntPtr)((uint)destination.Length / 2u)); + + ref Vector256 backgroundBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(background)); + ref Vector256 sourceBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(source)); + Vector256 opacity = Vector256.Create(amount); + + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) + { + destinationBase = PorterDuffFunctions.AddDest(backgroundBase, sourceBase, opacity); + destinationBase = ref Unsafe.Add(ref destinationBase, 1); + backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); + sourceBase = ref Unsafe.Add(ref sourceBase, 1); + } + + if (Numerics.Modulo2(destination.Length) != 0) + { + // Vector4 fits neatly in pairs. Any overlap has to be equal to 1. + int i = destination.Length - 1; + destination[i] = PorterDuffFunctions.AddDest(background[i], source[i], amount); + } + } + else { - destination[i] = PorterDuffFunctions.AddDest(background[i], source[i], amount); + for (int i = 0; i < destination.Length; i++) + { + destination[i] = PorterDuffFunctions.AddDest(background[i], source[i], amount); + } } } /// protected override void BlendFunction(Span destination, ReadOnlySpan background, ReadOnlySpan source, ReadOnlySpan amount) { - for (int i = 0; i < destination.Length; i++) + if (Avx2.IsSupported && destination.Length >= 2) + { + // Divide by 2 as 4 elements per Vector4 and 8 per Vector256 + ref Vector256 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); + ref Vector256 destinationLast = ref Unsafe.Add(ref destinationBase, (IntPtr)((uint)destination.Length / 2u)); + + ref Vector256 backgroundBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(background)); + ref Vector256 sourceBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(source)); + ref float amountBase = ref MemoryMarshal.GetReference(amount); + + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) + { + // TODO: It would be better if we can clamp this outside of the loop using our SIMD methods. + Vector256 opacity = Vector256.Create(Numerics.Clamp(amountBase, 0, 1F)); + + destinationBase = PorterDuffFunctions.AddDest(backgroundBase, sourceBase, opacity); + destinationBase = ref Unsafe.Add(ref destinationBase, 1); + backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); + sourceBase = ref Unsafe.Add(ref sourceBase, 1); + amountBase = ref Unsafe.Add(ref amountBase, 1); + } + + if (Numerics.Modulo2(destination.Length) != 0) + { + // Vector4 fits neatly in pairs. Any overlap has to be equal to 1. + int i = destination.Length - 1; + destination[i] = PorterDuffFunctions.AddDest(background[i], source[i], Numerics.Clamp(amount[i], 0, 1F)); + } + } + else { - destination[i] = PorterDuffFunctions.AddDest(background[i], source[i], Numerics.Clamp(amount[i], 0, 1)); + for (int i = 0; i < destination.Length; i++) + { + destination[i] = PorterDuffFunctions.AddDest(background[i], source[i], Numerics.Clamp(amount[i], 0, 1F)); + } } } } @@ -1867,18 +4799,79 @@ internal static class DefaultPixelBlenders protected override void BlendFunction(Span destination, ReadOnlySpan background, ReadOnlySpan source, float amount) { amount = Numerics.Clamp(amount, 0, 1); - for (int i = 0; i < destination.Length; i++) + + if (Avx2.IsSupported && destination.Length >= 2) + { + // Divide by 2 as 4 elements per Vector4 and 8 per Vector256 + ref Vector256 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); + ref Vector256 destinationLast = ref Unsafe.Add(ref destinationBase, (IntPtr)((uint)destination.Length / 2u)); + + ref Vector256 backgroundBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(background)); + ref Vector256 sourceBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(source)); + Vector256 opacity = Vector256.Create(amount); + + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) + { + destinationBase = PorterDuffFunctions.SubtractDest(backgroundBase, sourceBase, opacity); + destinationBase = ref Unsafe.Add(ref destinationBase, 1); + backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); + sourceBase = ref Unsafe.Add(ref sourceBase, 1); + } + + if (Numerics.Modulo2(destination.Length) != 0) + { + // Vector4 fits neatly in pairs. Any overlap has to be equal to 1. + int i = destination.Length - 1; + destination[i] = PorterDuffFunctions.SubtractDest(background[i], source[i], amount); + } + } + else { - destination[i] = PorterDuffFunctions.SubtractDest(background[i], source[i], amount); + for (int i = 0; i < destination.Length; i++) + { + destination[i] = PorterDuffFunctions.SubtractDest(background[i], source[i], amount); + } } } /// protected override void BlendFunction(Span destination, ReadOnlySpan background, ReadOnlySpan source, ReadOnlySpan amount) { - for (int i = 0; i < destination.Length; i++) + if (Avx2.IsSupported && destination.Length >= 2) + { + // Divide by 2 as 4 elements per Vector4 and 8 per Vector256 + ref Vector256 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); + ref Vector256 destinationLast = ref Unsafe.Add(ref destinationBase, (IntPtr)((uint)destination.Length / 2u)); + + ref Vector256 backgroundBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(background)); + ref Vector256 sourceBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(source)); + ref float amountBase = ref MemoryMarshal.GetReference(amount); + + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) + { + // TODO: It would be better if we can clamp this outside of the loop using our SIMD methods. + Vector256 opacity = Vector256.Create(Numerics.Clamp(amountBase, 0, 1F)); + + destinationBase = PorterDuffFunctions.SubtractDest(backgroundBase, sourceBase, opacity); + destinationBase = ref Unsafe.Add(ref destinationBase, 1); + backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); + sourceBase = ref Unsafe.Add(ref sourceBase, 1); + amountBase = ref Unsafe.Add(ref amountBase, 1); + } + + if (Numerics.Modulo2(destination.Length) != 0) + { + // Vector4 fits neatly in pairs. Any overlap has to be equal to 1. + int i = destination.Length - 1; + destination[i] = PorterDuffFunctions.SubtractDest(background[i], source[i], Numerics.Clamp(amount[i], 0, 1F)); + } + } + else { - destination[i] = PorterDuffFunctions.SubtractDest(background[i], source[i], Numerics.Clamp(amount[i], 0, 1)); + for (int i = 0; i < destination.Length; i++) + { + destination[i] = PorterDuffFunctions.SubtractDest(background[i], source[i], Numerics.Clamp(amount[i], 0, 1F)); + } } } } @@ -1905,18 +4898,79 @@ internal static class DefaultPixelBlenders protected override void BlendFunction(Span destination, ReadOnlySpan background, ReadOnlySpan source, float amount) { amount = Numerics.Clamp(amount, 0, 1); - for (int i = 0; i < destination.Length; i++) + + if (Avx2.IsSupported && destination.Length >= 2) + { + // Divide by 2 as 4 elements per Vector4 and 8 per Vector256 + ref Vector256 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); + ref Vector256 destinationLast = ref Unsafe.Add(ref destinationBase, (IntPtr)((uint)destination.Length / 2u)); + + ref Vector256 backgroundBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(background)); + ref Vector256 sourceBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(source)); + Vector256 opacity = Vector256.Create(amount); + + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) + { + destinationBase = PorterDuffFunctions.ScreenDest(backgroundBase, sourceBase, opacity); + destinationBase = ref Unsafe.Add(ref destinationBase, 1); + backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); + sourceBase = ref Unsafe.Add(ref sourceBase, 1); + } + + if (Numerics.Modulo2(destination.Length) != 0) + { + // Vector4 fits neatly in pairs. Any overlap has to be equal to 1. + int i = destination.Length - 1; + destination[i] = PorterDuffFunctions.ScreenDest(background[i], source[i], amount); + } + } + else { - destination[i] = PorterDuffFunctions.ScreenDest(background[i], source[i], amount); + for (int i = 0; i < destination.Length; i++) + { + destination[i] = PorterDuffFunctions.ScreenDest(background[i], source[i], amount); + } } } /// protected override void BlendFunction(Span destination, ReadOnlySpan background, ReadOnlySpan source, ReadOnlySpan amount) { - for (int i = 0; i < destination.Length; i++) + if (Avx2.IsSupported && destination.Length >= 2) + { + // Divide by 2 as 4 elements per Vector4 and 8 per Vector256 + ref Vector256 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); + ref Vector256 destinationLast = ref Unsafe.Add(ref destinationBase, (IntPtr)((uint)destination.Length / 2u)); + + ref Vector256 backgroundBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(background)); + ref Vector256 sourceBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(source)); + ref float amountBase = ref MemoryMarshal.GetReference(amount); + + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) + { + // TODO: It would be better if we can clamp this outside of the loop using our SIMD methods. + Vector256 opacity = Vector256.Create(Numerics.Clamp(amountBase, 0, 1F)); + + destinationBase = PorterDuffFunctions.ScreenDest(backgroundBase, sourceBase, opacity); + destinationBase = ref Unsafe.Add(ref destinationBase, 1); + backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); + sourceBase = ref Unsafe.Add(ref sourceBase, 1); + amountBase = ref Unsafe.Add(ref amountBase, 1); + } + + if (Numerics.Modulo2(destination.Length) != 0) + { + // Vector4 fits neatly in pairs. Any overlap has to be equal to 1. + int i = destination.Length - 1; + destination[i] = PorterDuffFunctions.ScreenDest(background[i], source[i], Numerics.Clamp(amount[i], 0, 1F)); + } + } + else { - destination[i] = PorterDuffFunctions.ScreenDest(background[i], source[i], Numerics.Clamp(amount[i], 0, 1)); + for (int i = 0; i < destination.Length; i++) + { + destination[i] = PorterDuffFunctions.ScreenDest(background[i], source[i], Numerics.Clamp(amount[i], 0, 1F)); + } } } } @@ -1943,18 +4997,79 @@ internal static class DefaultPixelBlenders protected override void BlendFunction(Span destination, ReadOnlySpan background, ReadOnlySpan source, float amount) { amount = Numerics.Clamp(amount, 0, 1); - for (int i = 0; i < destination.Length; i++) + + if (Avx2.IsSupported && destination.Length >= 2) + { + // Divide by 2 as 4 elements per Vector4 and 8 per Vector256 + ref Vector256 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); + ref Vector256 destinationLast = ref Unsafe.Add(ref destinationBase, (IntPtr)((uint)destination.Length / 2u)); + + ref Vector256 backgroundBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(background)); + ref Vector256 sourceBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(source)); + Vector256 opacity = Vector256.Create(amount); + + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) + { + destinationBase = PorterDuffFunctions.DarkenDest(backgroundBase, sourceBase, opacity); + destinationBase = ref Unsafe.Add(ref destinationBase, 1); + backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); + sourceBase = ref Unsafe.Add(ref sourceBase, 1); + } + + if (Numerics.Modulo2(destination.Length) != 0) + { + // Vector4 fits neatly in pairs. Any overlap has to be equal to 1. + int i = destination.Length - 1; + destination[i] = PorterDuffFunctions.DarkenDest(background[i], source[i], amount); + } + } + else { - destination[i] = PorterDuffFunctions.DarkenDest(background[i], source[i], amount); + for (int i = 0; i < destination.Length; i++) + { + destination[i] = PorterDuffFunctions.DarkenDest(background[i], source[i], amount); + } } } /// protected override void BlendFunction(Span destination, ReadOnlySpan background, ReadOnlySpan source, ReadOnlySpan amount) { - for (int i = 0; i < destination.Length; i++) + if (Avx2.IsSupported && destination.Length >= 2) + { + // Divide by 2 as 4 elements per Vector4 and 8 per Vector256 + ref Vector256 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); + ref Vector256 destinationLast = ref Unsafe.Add(ref destinationBase, (IntPtr)((uint)destination.Length / 2u)); + + ref Vector256 backgroundBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(background)); + ref Vector256 sourceBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(source)); + ref float amountBase = ref MemoryMarshal.GetReference(amount); + + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) + { + // TODO: It would be better if we can clamp this outside of the loop using our SIMD methods. + Vector256 opacity = Vector256.Create(Numerics.Clamp(amountBase, 0, 1F)); + + destinationBase = PorterDuffFunctions.DarkenDest(backgroundBase, sourceBase, opacity); + destinationBase = ref Unsafe.Add(ref destinationBase, 1); + backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); + sourceBase = ref Unsafe.Add(ref sourceBase, 1); + amountBase = ref Unsafe.Add(ref amountBase, 1); + } + + if (Numerics.Modulo2(destination.Length) != 0) + { + // Vector4 fits neatly in pairs. Any overlap has to be equal to 1. + int i = destination.Length - 1; + destination[i] = PorterDuffFunctions.DarkenDest(background[i], source[i], Numerics.Clamp(amount[i], 0, 1F)); + } + } + else { - destination[i] = PorterDuffFunctions.DarkenDest(background[i], source[i], Numerics.Clamp(amount[i], 0, 1)); + for (int i = 0; i < destination.Length; i++) + { + destination[i] = PorterDuffFunctions.DarkenDest(background[i], source[i], Numerics.Clamp(amount[i], 0, 1F)); + } } } } @@ -1981,18 +5096,79 @@ internal static class DefaultPixelBlenders protected override void BlendFunction(Span destination, ReadOnlySpan background, ReadOnlySpan source, float amount) { amount = Numerics.Clamp(amount, 0, 1); - for (int i = 0; i < destination.Length; i++) + + if (Avx2.IsSupported && destination.Length >= 2) + { + // Divide by 2 as 4 elements per Vector4 and 8 per Vector256 + ref Vector256 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); + ref Vector256 destinationLast = ref Unsafe.Add(ref destinationBase, (IntPtr)((uint)destination.Length / 2u)); + + ref Vector256 backgroundBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(background)); + ref Vector256 sourceBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(source)); + Vector256 opacity = Vector256.Create(amount); + + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) + { + destinationBase = PorterDuffFunctions.LightenDest(backgroundBase, sourceBase, opacity); + destinationBase = ref Unsafe.Add(ref destinationBase, 1); + backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); + sourceBase = ref Unsafe.Add(ref sourceBase, 1); + } + + if (Numerics.Modulo2(destination.Length) != 0) + { + // Vector4 fits neatly in pairs. Any overlap has to be equal to 1. + int i = destination.Length - 1; + destination[i] = PorterDuffFunctions.LightenDest(background[i], source[i], amount); + } + } + else { - destination[i] = PorterDuffFunctions.LightenDest(background[i], source[i], amount); + for (int i = 0; i < destination.Length; i++) + { + destination[i] = PorterDuffFunctions.LightenDest(background[i], source[i], amount); + } } } /// protected override void BlendFunction(Span destination, ReadOnlySpan background, ReadOnlySpan source, ReadOnlySpan amount) { - for (int i = 0; i < destination.Length; i++) + if (Avx2.IsSupported && destination.Length >= 2) + { + // Divide by 2 as 4 elements per Vector4 and 8 per Vector256 + ref Vector256 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); + ref Vector256 destinationLast = ref Unsafe.Add(ref destinationBase, (IntPtr)((uint)destination.Length / 2u)); + + ref Vector256 backgroundBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(background)); + ref Vector256 sourceBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(source)); + ref float amountBase = ref MemoryMarshal.GetReference(amount); + + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) + { + // TODO: It would be better if we can clamp this outside of the loop using our SIMD methods. + Vector256 opacity = Vector256.Create(Numerics.Clamp(amountBase, 0, 1F)); + + destinationBase = PorterDuffFunctions.LightenDest(backgroundBase, sourceBase, opacity); + destinationBase = ref Unsafe.Add(ref destinationBase, 1); + backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); + sourceBase = ref Unsafe.Add(ref sourceBase, 1); + amountBase = ref Unsafe.Add(ref amountBase, 1); + } + + if (Numerics.Modulo2(destination.Length) != 0) + { + // Vector4 fits neatly in pairs. Any overlap has to be equal to 1. + int i = destination.Length - 1; + destination[i] = PorterDuffFunctions.LightenDest(background[i], source[i], Numerics.Clamp(amount[i], 0, 1F)); + } + } + else { - destination[i] = PorterDuffFunctions.LightenDest(background[i], source[i], Numerics.Clamp(amount[i], 0, 1)); + for (int i = 0; i < destination.Length; i++) + { + destination[i] = PorterDuffFunctions.LightenDest(background[i], source[i], Numerics.Clamp(amount[i], 0, 1F)); + } } } } @@ -2019,18 +5195,79 @@ internal static class DefaultPixelBlenders protected override void BlendFunction(Span destination, ReadOnlySpan background, ReadOnlySpan source, float amount) { amount = Numerics.Clamp(amount, 0, 1); - for (int i = 0; i < destination.Length; i++) + + if (Avx2.IsSupported && destination.Length >= 2) + { + // Divide by 2 as 4 elements per Vector4 and 8 per Vector256 + ref Vector256 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); + ref Vector256 destinationLast = ref Unsafe.Add(ref destinationBase, (IntPtr)((uint)destination.Length / 2u)); + + ref Vector256 backgroundBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(background)); + ref Vector256 sourceBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(source)); + Vector256 opacity = Vector256.Create(amount); + + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) + { + destinationBase = PorterDuffFunctions.OverlayDest(backgroundBase, sourceBase, opacity); + destinationBase = ref Unsafe.Add(ref destinationBase, 1); + backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); + sourceBase = ref Unsafe.Add(ref sourceBase, 1); + } + + if (Numerics.Modulo2(destination.Length) != 0) + { + // Vector4 fits neatly in pairs. Any overlap has to be equal to 1. + int i = destination.Length - 1; + destination[i] = PorterDuffFunctions.OverlayDest(background[i], source[i], amount); + } + } + else { - destination[i] = PorterDuffFunctions.OverlayDest(background[i], source[i], amount); + for (int i = 0; i < destination.Length; i++) + { + destination[i] = PorterDuffFunctions.OverlayDest(background[i], source[i], amount); + } } } /// protected override void BlendFunction(Span destination, ReadOnlySpan background, ReadOnlySpan source, ReadOnlySpan amount) { - for (int i = 0; i < destination.Length; i++) + if (Avx2.IsSupported && destination.Length >= 2) + { + // Divide by 2 as 4 elements per Vector4 and 8 per Vector256 + ref Vector256 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); + ref Vector256 destinationLast = ref Unsafe.Add(ref destinationBase, (IntPtr)((uint)destination.Length / 2u)); + + ref Vector256 backgroundBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(background)); + ref Vector256 sourceBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(source)); + ref float amountBase = ref MemoryMarshal.GetReference(amount); + + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) + { + // TODO: It would be better if we can clamp this outside of the loop using our SIMD methods. + Vector256 opacity = Vector256.Create(Numerics.Clamp(amountBase, 0, 1F)); + + destinationBase = PorterDuffFunctions.OverlayDest(backgroundBase, sourceBase, opacity); + destinationBase = ref Unsafe.Add(ref destinationBase, 1); + backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); + sourceBase = ref Unsafe.Add(ref sourceBase, 1); + amountBase = ref Unsafe.Add(ref amountBase, 1); + } + + if (Numerics.Modulo2(destination.Length) != 0) + { + // Vector4 fits neatly in pairs. Any overlap has to be equal to 1. + int i = destination.Length - 1; + destination[i] = PorterDuffFunctions.OverlayDest(background[i], source[i], Numerics.Clamp(amount[i], 0, 1F)); + } + } + else { - destination[i] = PorterDuffFunctions.OverlayDest(background[i], source[i], Numerics.Clamp(amount[i], 0, 1)); + for (int i = 0; i < destination.Length; i++) + { + destination[i] = PorterDuffFunctions.OverlayDest(background[i], source[i], Numerics.Clamp(amount[i], 0, 1F)); + } } } } @@ -2057,18 +5294,79 @@ internal static class DefaultPixelBlenders protected override void BlendFunction(Span destination, ReadOnlySpan background, ReadOnlySpan source, float amount) { amount = Numerics.Clamp(amount, 0, 1); - for (int i = 0; i < destination.Length; i++) + + if (Avx2.IsSupported && destination.Length >= 2) + { + // Divide by 2 as 4 elements per Vector4 and 8 per Vector256 + ref Vector256 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); + ref Vector256 destinationLast = ref Unsafe.Add(ref destinationBase, (IntPtr)((uint)destination.Length / 2u)); + + ref Vector256 backgroundBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(background)); + ref Vector256 sourceBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(source)); + Vector256 opacity = Vector256.Create(amount); + + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) + { + destinationBase = PorterDuffFunctions.HardLightDest(backgroundBase, sourceBase, opacity); + destinationBase = ref Unsafe.Add(ref destinationBase, 1); + backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); + sourceBase = ref Unsafe.Add(ref sourceBase, 1); + } + + if (Numerics.Modulo2(destination.Length) != 0) + { + // Vector4 fits neatly in pairs. Any overlap has to be equal to 1. + int i = destination.Length - 1; + destination[i] = PorterDuffFunctions.HardLightDest(background[i], source[i], amount); + } + } + else { - destination[i] = PorterDuffFunctions.HardLightDest(background[i], source[i], amount); + for (int i = 0; i < destination.Length; i++) + { + destination[i] = PorterDuffFunctions.HardLightDest(background[i], source[i], amount); + } } } /// protected override void BlendFunction(Span destination, ReadOnlySpan background, ReadOnlySpan source, ReadOnlySpan amount) { - for (int i = 0; i < destination.Length; i++) + if (Avx2.IsSupported && destination.Length >= 2) + { + // Divide by 2 as 4 elements per Vector4 and 8 per Vector256 + ref Vector256 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); + ref Vector256 destinationLast = ref Unsafe.Add(ref destinationBase, (IntPtr)((uint)destination.Length / 2u)); + + ref Vector256 backgroundBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(background)); + ref Vector256 sourceBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(source)); + ref float amountBase = ref MemoryMarshal.GetReference(amount); + + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) + { + // TODO: It would be better if we can clamp this outside of the loop using our SIMD methods. + Vector256 opacity = Vector256.Create(Numerics.Clamp(amountBase, 0, 1F)); + + destinationBase = PorterDuffFunctions.HardLightDest(backgroundBase, sourceBase, opacity); + destinationBase = ref Unsafe.Add(ref destinationBase, 1); + backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); + sourceBase = ref Unsafe.Add(ref sourceBase, 1); + amountBase = ref Unsafe.Add(ref amountBase, 1); + } + + if (Numerics.Modulo2(destination.Length) != 0) + { + // Vector4 fits neatly in pairs. Any overlap has to be equal to 1. + int i = destination.Length - 1; + destination[i] = PorterDuffFunctions.HardLightDest(background[i], source[i], Numerics.Clamp(amount[i], 0, 1F)); + } + } + else { - destination[i] = PorterDuffFunctions.HardLightDest(background[i], source[i], Numerics.Clamp(amount[i], 0, 1)); + for (int i = 0; i < destination.Length; i++) + { + destination[i] = PorterDuffFunctions.HardLightDest(background[i], source[i], Numerics.Clamp(amount[i], 0, 1F)); + } } } } @@ -2095,18 +5393,79 @@ internal static class DefaultPixelBlenders protected override void BlendFunction(Span destination, ReadOnlySpan background, ReadOnlySpan source, float amount) { amount = Numerics.Clamp(amount, 0, 1); - for (int i = 0; i < destination.Length; i++) + + if (Avx2.IsSupported && destination.Length >= 2) + { + // Divide by 2 as 4 elements per Vector4 and 8 per Vector256 + ref Vector256 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); + ref Vector256 destinationLast = ref Unsafe.Add(ref destinationBase, (IntPtr)((uint)destination.Length / 2u)); + + ref Vector256 backgroundBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(background)); + ref Vector256 sourceBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(source)); + Vector256 opacity = Vector256.Create(amount); + + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) + { + destinationBase = PorterDuffFunctions.NormalDestAtop(backgroundBase, sourceBase, opacity); + destinationBase = ref Unsafe.Add(ref destinationBase, 1); + backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); + sourceBase = ref Unsafe.Add(ref sourceBase, 1); + } + + if (Numerics.Modulo2(destination.Length) != 0) + { + // Vector4 fits neatly in pairs. Any overlap has to be equal to 1. + int i = destination.Length - 1; + destination[i] = PorterDuffFunctions.NormalDestAtop(background[i], source[i], amount); + } + } + else { - destination[i] = PorterDuffFunctions.NormalDestAtop(background[i], source[i], amount); + for (int i = 0; i < destination.Length; i++) + { + destination[i] = PorterDuffFunctions.NormalDestAtop(background[i], source[i], amount); + } } } /// protected override void BlendFunction(Span destination, ReadOnlySpan background, ReadOnlySpan source, ReadOnlySpan amount) { - for (int i = 0; i < destination.Length; i++) + if (Avx2.IsSupported && destination.Length >= 2) + { + // Divide by 2 as 4 elements per Vector4 and 8 per Vector256 + ref Vector256 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); + ref Vector256 destinationLast = ref Unsafe.Add(ref destinationBase, (IntPtr)((uint)destination.Length / 2u)); + + ref Vector256 backgroundBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(background)); + ref Vector256 sourceBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(source)); + ref float amountBase = ref MemoryMarshal.GetReference(amount); + + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) + { + // TODO: It would be better if we can clamp this outside of the loop using our SIMD methods. + Vector256 opacity = Vector256.Create(Numerics.Clamp(amountBase, 0, 1F)); + + destinationBase = PorterDuffFunctions.NormalDestAtop(backgroundBase, sourceBase, opacity); + destinationBase = ref Unsafe.Add(ref destinationBase, 1); + backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); + sourceBase = ref Unsafe.Add(ref sourceBase, 1); + amountBase = ref Unsafe.Add(ref amountBase, 1); + } + + if (Numerics.Modulo2(destination.Length) != 0) + { + // Vector4 fits neatly in pairs. Any overlap has to be equal to 1. + int i = destination.Length - 1; + destination[i] = PorterDuffFunctions.NormalDestAtop(background[i], source[i], Numerics.Clamp(amount[i], 0, 1F)); + } + } + else { - destination[i] = PorterDuffFunctions.NormalDestAtop(background[i], source[i], Numerics.Clamp(amount[i], 0, 1)); + for (int i = 0; i < destination.Length; i++) + { + destination[i] = PorterDuffFunctions.NormalDestAtop(background[i], source[i], Numerics.Clamp(amount[i], 0, 1F)); + } } } } @@ -2133,18 +5492,79 @@ internal static class DefaultPixelBlenders protected override void BlendFunction(Span destination, ReadOnlySpan background, ReadOnlySpan source, float amount) { amount = Numerics.Clamp(amount, 0, 1); - for (int i = 0; i < destination.Length; i++) + + if (Avx2.IsSupported && destination.Length >= 2) + { + // Divide by 2 as 4 elements per Vector4 and 8 per Vector256 + ref Vector256 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); + ref Vector256 destinationLast = ref Unsafe.Add(ref destinationBase, (IntPtr)((uint)destination.Length / 2u)); + + ref Vector256 backgroundBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(background)); + ref Vector256 sourceBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(source)); + Vector256 opacity = Vector256.Create(amount); + + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) + { + destinationBase = PorterDuffFunctions.MultiplyDestAtop(backgroundBase, sourceBase, opacity); + destinationBase = ref Unsafe.Add(ref destinationBase, 1); + backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); + sourceBase = ref Unsafe.Add(ref sourceBase, 1); + } + + if (Numerics.Modulo2(destination.Length) != 0) + { + // Vector4 fits neatly in pairs. Any overlap has to be equal to 1. + int i = destination.Length - 1; + destination[i] = PorterDuffFunctions.MultiplyDestAtop(background[i], source[i], amount); + } + } + else { - destination[i] = PorterDuffFunctions.MultiplyDestAtop(background[i], source[i], amount); + for (int i = 0; i < destination.Length; i++) + { + destination[i] = PorterDuffFunctions.MultiplyDestAtop(background[i], source[i], amount); + } } } /// protected override void BlendFunction(Span destination, ReadOnlySpan background, ReadOnlySpan source, ReadOnlySpan amount) { - for (int i = 0; i < destination.Length; i++) + if (Avx2.IsSupported && destination.Length >= 2) + { + // Divide by 2 as 4 elements per Vector4 and 8 per Vector256 + ref Vector256 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); + ref Vector256 destinationLast = ref Unsafe.Add(ref destinationBase, (IntPtr)((uint)destination.Length / 2u)); + + ref Vector256 backgroundBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(background)); + ref Vector256 sourceBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(source)); + ref float amountBase = ref MemoryMarshal.GetReference(amount); + + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) + { + // TODO: It would be better if we can clamp this outside of the loop using our SIMD methods. + Vector256 opacity = Vector256.Create(Numerics.Clamp(amountBase, 0, 1F)); + + destinationBase = PorterDuffFunctions.MultiplyDestAtop(backgroundBase, sourceBase, opacity); + destinationBase = ref Unsafe.Add(ref destinationBase, 1); + backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); + sourceBase = ref Unsafe.Add(ref sourceBase, 1); + amountBase = ref Unsafe.Add(ref amountBase, 1); + } + + if (Numerics.Modulo2(destination.Length) != 0) + { + // Vector4 fits neatly in pairs. Any overlap has to be equal to 1. + int i = destination.Length - 1; + destination[i] = PorterDuffFunctions.MultiplyDestAtop(background[i], source[i], Numerics.Clamp(amount[i], 0, 1F)); + } + } + else { - destination[i] = PorterDuffFunctions.MultiplyDestAtop(background[i], source[i], Numerics.Clamp(amount[i], 0, 1)); + for (int i = 0; i < destination.Length; i++) + { + destination[i] = PorterDuffFunctions.MultiplyDestAtop(background[i], source[i], Numerics.Clamp(amount[i], 0, 1F)); + } } } } @@ -2171,18 +5591,79 @@ internal static class DefaultPixelBlenders protected override void BlendFunction(Span destination, ReadOnlySpan background, ReadOnlySpan source, float amount) { amount = Numerics.Clamp(amount, 0, 1); - for (int i = 0; i < destination.Length; i++) + + if (Avx2.IsSupported && destination.Length >= 2) + { + // Divide by 2 as 4 elements per Vector4 and 8 per Vector256 + ref Vector256 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); + ref Vector256 destinationLast = ref Unsafe.Add(ref destinationBase, (IntPtr)((uint)destination.Length / 2u)); + + ref Vector256 backgroundBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(background)); + ref Vector256 sourceBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(source)); + Vector256 opacity = Vector256.Create(amount); + + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) + { + destinationBase = PorterDuffFunctions.AddDestAtop(backgroundBase, sourceBase, opacity); + destinationBase = ref Unsafe.Add(ref destinationBase, 1); + backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); + sourceBase = ref Unsafe.Add(ref sourceBase, 1); + } + + if (Numerics.Modulo2(destination.Length) != 0) + { + // Vector4 fits neatly in pairs. Any overlap has to be equal to 1. + int i = destination.Length - 1; + destination[i] = PorterDuffFunctions.AddDestAtop(background[i], source[i], amount); + } + } + else { - destination[i] = PorterDuffFunctions.AddDestAtop(background[i], source[i], amount); + for (int i = 0; i < destination.Length; i++) + { + destination[i] = PorterDuffFunctions.AddDestAtop(background[i], source[i], amount); + } } } /// protected override void BlendFunction(Span destination, ReadOnlySpan background, ReadOnlySpan source, ReadOnlySpan amount) { - for (int i = 0; i < destination.Length; i++) + if (Avx2.IsSupported && destination.Length >= 2) + { + // Divide by 2 as 4 elements per Vector4 and 8 per Vector256 + ref Vector256 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); + ref Vector256 destinationLast = ref Unsafe.Add(ref destinationBase, (IntPtr)((uint)destination.Length / 2u)); + + ref Vector256 backgroundBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(background)); + ref Vector256 sourceBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(source)); + ref float amountBase = ref MemoryMarshal.GetReference(amount); + + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) + { + // TODO: It would be better if we can clamp this outside of the loop using our SIMD methods. + Vector256 opacity = Vector256.Create(Numerics.Clamp(amountBase, 0, 1F)); + + destinationBase = PorterDuffFunctions.AddDestAtop(backgroundBase, sourceBase, opacity); + destinationBase = ref Unsafe.Add(ref destinationBase, 1); + backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); + sourceBase = ref Unsafe.Add(ref sourceBase, 1); + amountBase = ref Unsafe.Add(ref amountBase, 1); + } + + if (Numerics.Modulo2(destination.Length) != 0) + { + // Vector4 fits neatly in pairs. Any overlap has to be equal to 1. + int i = destination.Length - 1; + destination[i] = PorterDuffFunctions.AddDestAtop(background[i], source[i], Numerics.Clamp(amount[i], 0, 1F)); + } + } + else { - destination[i] = PorterDuffFunctions.AddDestAtop(background[i], source[i], Numerics.Clamp(amount[i], 0, 1)); + for (int i = 0; i < destination.Length; i++) + { + destination[i] = PorterDuffFunctions.AddDestAtop(background[i], source[i], Numerics.Clamp(amount[i], 0, 1F)); + } } } } @@ -2209,18 +5690,79 @@ internal static class DefaultPixelBlenders protected override void BlendFunction(Span destination, ReadOnlySpan background, ReadOnlySpan source, float amount) { amount = Numerics.Clamp(amount, 0, 1); - for (int i = 0; i < destination.Length; i++) + + if (Avx2.IsSupported && destination.Length >= 2) + { + // Divide by 2 as 4 elements per Vector4 and 8 per Vector256 + ref Vector256 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); + ref Vector256 destinationLast = ref Unsafe.Add(ref destinationBase, (IntPtr)((uint)destination.Length / 2u)); + + ref Vector256 backgroundBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(background)); + ref Vector256 sourceBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(source)); + Vector256 opacity = Vector256.Create(amount); + + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) + { + destinationBase = PorterDuffFunctions.SubtractDestAtop(backgroundBase, sourceBase, opacity); + destinationBase = ref Unsafe.Add(ref destinationBase, 1); + backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); + sourceBase = ref Unsafe.Add(ref sourceBase, 1); + } + + if (Numerics.Modulo2(destination.Length) != 0) + { + // Vector4 fits neatly in pairs. Any overlap has to be equal to 1. + int i = destination.Length - 1; + destination[i] = PorterDuffFunctions.SubtractDestAtop(background[i], source[i], amount); + } + } + else { - destination[i] = PorterDuffFunctions.SubtractDestAtop(background[i], source[i], amount); + for (int i = 0; i < destination.Length; i++) + { + destination[i] = PorterDuffFunctions.SubtractDestAtop(background[i], source[i], amount); + } } } /// protected override void BlendFunction(Span destination, ReadOnlySpan background, ReadOnlySpan source, ReadOnlySpan amount) { - for (int i = 0; i < destination.Length; i++) + if (Avx2.IsSupported && destination.Length >= 2) + { + // Divide by 2 as 4 elements per Vector4 and 8 per Vector256 + ref Vector256 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); + ref Vector256 destinationLast = ref Unsafe.Add(ref destinationBase, (IntPtr)((uint)destination.Length / 2u)); + + ref Vector256 backgroundBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(background)); + ref Vector256 sourceBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(source)); + ref float amountBase = ref MemoryMarshal.GetReference(amount); + + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) + { + // TODO: It would be better if we can clamp this outside of the loop using our SIMD methods. + Vector256 opacity = Vector256.Create(Numerics.Clamp(amountBase, 0, 1F)); + + destinationBase = PorterDuffFunctions.SubtractDestAtop(backgroundBase, sourceBase, opacity); + destinationBase = ref Unsafe.Add(ref destinationBase, 1); + backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); + sourceBase = ref Unsafe.Add(ref sourceBase, 1); + amountBase = ref Unsafe.Add(ref amountBase, 1); + } + + if (Numerics.Modulo2(destination.Length) != 0) + { + // Vector4 fits neatly in pairs. Any overlap has to be equal to 1. + int i = destination.Length - 1; + destination[i] = PorterDuffFunctions.SubtractDestAtop(background[i], source[i], Numerics.Clamp(amount[i], 0, 1F)); + } + } + else { - destination[i] = PorterDuffFunctions.SubtractDestAtop(background[i], source[i], Numerics.Clamp(amount[i], 0, 1)); + for (int i = 0; i < destination.Length; i++) + { + destination[i] = PorterDuffFunctions.SubtractDestAtop(background[i], source[i], Numerics.Clamp(amount[i], 0, 1F)); + } } } } @@ -2247,18 +5789,79 @@ internal static class DefaultPixelBlenders protected override void BlendFunction(Span destination, ReadOnlySpan background, ReadOnlySpan source, float amount) { amount = Numerics.Clamp(amount, 0, 1); - for (int i = 0; i < destination.Length; i++) + + if (Avx2.IsSupported && destination.Length >= 2) + { + // Divide by 2 as 4 elements per Vector4 and 8 per Vector256 + ref Vector256 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); + ref Vector256 destinationLast = ref Unsafe.Add(ref destinationBase, (IntPtr)((uint)destination.Length / 2u)); + + ref Vector256 backgroundBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(background)); + ref Vector256 sourceBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(source)); + Vector256 opacity = Vector256.Create(amount); + + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) + { + destinationBase = PorterDuffFunctions.ScreenDestAtop(backgroundBase, sourceBase, opacity); + destinationBase = ref Unsafe.Add(ref destinationBase, 1); + backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); + sourceBase = ref Unsafe.Add(ref sourceBase, 1); + } + + if (Numerics.Modulo2(destination.Length) != 0) + { + // Vector4 fits neatly in pairs. Any overlap has to be equal to 1. + int i = destination.Length - 1; + destination[i] = PorterDuffFunctions.ScreenDestAtop(background[i], source[i], amount); + } + } + else { - destination[i] = PorterDuffFunctions.ScreenDestAtop(background[i], source[i], amount); + for (int i = 0; i < destination.Length; i++) + { + destination[i] = PorterDuffFunctions.ScreenDestAtop(background[i], source[i], amount); + } } } /// protected override void BlendFunction(Span destination, ReadOnlySpan background, ReadOnlySpan source, ReadOnlySpan amount) { - for (int i = 0; i < destination.Length; i++) + if (Avx2.IsSupported && destination.Length >= 2) + { + // Divide by 2 as 4 elements per Vector4 and 8 per Vector256 + ref Vector256 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); + ref Vector256 destinationLast = ref Unsafe.Add(ref destinationBase, (IntPtr)((uint)destination.Length / 2u)); + + ref Vector256 backgroundBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(background)); + ref Vector256 sourceBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(source)); + ref float amountBase = ref MemoryMarshal.GetReference(amount); + + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) + { + // TODO: It would be better if we can clamp this outside of the loop using our SIMD methods. + Vector256 opacity = Vector256.Create(Numerics.Clamp(amountBase, 0, 1F)); + + destinationBase = PorterDuffFunctions.ScreenDestAtop(backgroundBase, sourceBase, opacity); + destinationBase = ref Unsafe.Add(ref destinationBase, 1); + backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); + sourceBase = ref Unsafe.Add(ref sourceBase, 1); + amountBase = ref Unsafe.Add(ref amountBase, 1); + } + + if (Numerics.Modulo2(destination.Length) != 0) + { + // Vector4 fits neatly in pairs. Any overlap has to be equal to 1. + int i = destination.Length - 1; + destination[i] = PorterDuffFunctions.ScreenDestAtop(background[i], source[i], Numerics.Clamp(amount[i], 0, 1F)); + } + } + else { - destination[i] = PorterDuffFunctions.ScreenDestAtop(background[i], source[i], Numerics.Clamp(amount[i], 0, 1)); + for (int i = 0; i < destination.Length; i++) + { + destination[i] = PorterDuffFunctions.ScreenDestAtop(background[i], source[i], Numerics.Clamp(amount[i], 0, 1F)); + } } } } @@ -2285,18 +5888,79 @@ internal static class DefaultPixelBlenders protected override void BlendFunction(Span destination, ReadOnlySpan background, ReadOnlySpan source, float amount) { amount = Numerics.Clamp(amount, 0, 1); - for (int i = 0; i < destination.Length; i++) + + if (Avx2.IsSupported && destination.Length >= 2) + { + // Divide by 2 as 4 elements per Vector4 and 8 per Vector256 + ref Vector256 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); + ref Vector256 destinationLast = ref Unsafe.Add(ref destinationBase, (IntPtr)((uint)destination.Length / 2u)); + + ref Vector256 backgroundBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(background)); + ref Vector256 sourceBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(source)); + Vector256 opacity = Vector256.Create(amount); + + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) + { + destinationBase = PorterDuffFunctions.DarkenDestAtop(backgroundBase, sourceBase, opacity); + destinationBase = ref Unsafe.Add(ref destinationBase, 1); + backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); + sourceBase = ref Unsafe.Add(ref sourceBase, 1); + } + + if (Numerics.Modulo2(destination.Length) != 0) + { + // Vector4 fits neatly in pairs. Any overlap has to be equal to 1. + int i = destination.Length - 1; + destination[i] = PorterDuffFunctions.DarkenDestAtop(background[i], source[i], amount); + } + } + else { - destination[i] = PorterDuffFunctions.DarkenDestAtop(background[i], source[i], amount); + for (int i = 0; i < destination.Length; i++) + { + destination[i] = PorterDuffFunctions.DarkenDestAtop(background[i], source[i], amount); + } } } /// protected override void BlendFunction(Span destination, ReadOnlySpan background, ReadOnlySpan source, ReadOnlySpan amount) { - for (int i = 0; i < destination.Length; i++) + if (Avx2.IsSupported && destination.Length >= 2) + { + // Divide by 2 as 4 elements per Vector4 and 8 per Vector256 + ref Vector256 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); + ref Vector256 destinationLast = ref Unsafe.Add(ref destinationBase, (IntPtr)((uint)destination.Length / 2u)); + + ref Vector256 backgroundBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(background)); + ref Vector256 sourceBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(source)); + ref float amountBase = ref MemoryMarshal.GetReference(amount); + + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) + { + // TODO: It would be better if we can clamp this outside of the loop using our SIMD methods. + Vector256 opacity = Vector256.Create(Numerics.Clamp(amountBase, 0, 1F)); + + destinationBase = PorterDuffFunctions.DarkenDestAtop(backgroundBase, sourceBase, opacity); + destinationBase = ref Unsafe.Add(ref destinationBase, 1); + backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); + sourceBase = ref Unsafe.Add(ref sourceBase, 1); + amountBase = ref Unsafe.Add(ref amountBase, 1); + } + + if (Numerics.Modulo2(destination.Length) != 0) + { + // Vector4 fits neatly in pairs. Any overlap has to be equal to 1. + int i = destination.Length - 1; + destination[i] = PorterDuffFunctions.DarkenDestAtop(background[i], source[i], Numerics.Clamp(amount[i], 0, 1F)); + } + } + else { - destination[i] = PorterDuffFunctions.DarkenDestAtop(background[i], source[i], Numerics.Clamp(amount[i], 0, 1)); + for (int i = 0; i < destination.Length; i++) + { + destination[i] = PorterDuffFunctions.DarkenDestAtop(background[i], source[i], Numerics.Clamp(amount[i], 0, 1F)); + } } } } @@ -2323,18 +5987,79 @@ internal static class DefaultPixelBlenders protected override void BlendFunction(Span destination, ReadOnlySpan background, ReadOnlySpan source, float amount) { amount = Numerics.Clamp(amount, 0, 1); - for (int i = 0; i < destination.Length; i++) + + if (Avx2.IsSupported && destination.Length >= 2) + { + // Divide by 2 as 4 elements per Vector4 and 8 per Vector256 + ref Vector256 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); + ref Vector256 destinationLast = ref Unsafe.Add(ref destinationBase, (IntPtr)((uint)destination.Length / 2u)); + + ref Vector256 backgroundBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(background)); + ref Vector256 sourceBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(source)); + Vector256 opacity = Vector256.Create(amount); + + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) + { + destinationBase = PorterDuffFunctions.LightenDestAtop(backgroundBase, sourceBase, opacity); + destinationBase = ref Unsafe.Add(ref destinationBase, 1); + backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); + sourceBase = ref Unsafe.Add(ref sourceBase, 1); + } + + if (Numerics.Modulo2(destination.Length) != 0) + { + // Vector4 fits neatly in pairs. Any overlap has to be equal to 1. + int i = destination.Length - 1; + destination[i] = PorterDuffFunctions.LightenDestAtop(background[i], source[i], amount); + } + } + else { - destination[i] = PorterDuffFunctions.LightenDestAtop(background[i], source[i], amount); + for (int i = 0; i < destination.Length; i++) + { + destination[i] = PorterDuffFunctions.LightenDestAtop(background[i], source[i], amount); + } } } /// protected override void BlendFunction(Span destination, ReadOnlySpan background, ReadOnlySpan source, ReadOnlySpan amount) { - for (int i = 0; i < destination.Length; i++) + if (Avx2.IsSupported && destination.Length >= 2) + { + // Divide by 2 as 4 elements per Vector4 and 8 per Vector256 + ref Vector256 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); + ref Vector256 destinationLast = ref Unsafe.Add(ref destinationBase, (IntPtr)((uint)destination.Length / 2u)); + + ref Vector256 backgroundBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(background)); + ref Vector256 sourceBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(source)); + ref float amountBase = ref MemoryMarshal.GetReference(amount); + + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) + { + // TODO: It would be better if we can clamp this outside of the loop using our SIMD methods. + Vector256 opacity = Vector256.Create(Numerics.Clamp(amountBase, 0, 1F)); + + destinationBase = PorterDuffFunctions.LightenDestAtop(backgroundBase, sourceBase, opacity); + destinationBase = ref Unsafe.Add(ref destinationBase, 1); + backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); + sourceBase = ref Unsafe.Add(ref sourceBase, 1); + amountBase = ref Unsafe.Add(ref amountBase, 1); + } + + if (Numerics.Modulo2(destination.Length) != 0) + { + // Vector4 fits neatly in pairs. Any overlap has to be equal to 1. + int i = destination.Length - 1; + destination[i] = PorterDuffFunctions.LightenDestAtop(background[i], source[i], Numerics.Clamp(amount[i], 0, 1F)); + } + } + else { - destination[i] = PorterDuffFunctions.LightenDestAtop(background[i], source[i], Numerics.Clamp(amount[i], 0, 1)); + for (int i = 0; i < destination.Length; i++) + { + destination[i] = PorterDuffFunctions.LightenDestAtop(background[i], source[i], Numerics.Clamp(amount[i], 0, 1F)); + } } } } @@ -2361,18 +6086,79 @@ internal static class DefaultPixelBlenders protected override void BlendFunction(Span destination, ReadOnlySpan background, ReadOnlySpan source, float amount) { amount = Numerics.Clamp(amount, 0, 1); - for (int i = 0; i < destination.Length; i++) + + if (Avx2.IsSupported && destination.Length >= 2) + { + // Divide by 2 as 4 elements per Vector4 and 8 per Vector256 + ref Vector256 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); + ref Vector256 destinationLast = ref Unsafe.Add(ref destinationBase, (IntPtr)((uint)destination.Length / 2u)); + + ref Vector256 backgroundBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(background)); + ref Vector256 sourceBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(source)); + Vector256 opacity = Vector256.Create(amount); + + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) + { + destinationBase = PorterDuffFunctions.OverlayDestAtop(backgroundBase, sourceBase, opacity); + destinationBase = ref Unsafe.Add(ref destinationBase, 1); + backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); + sourceBase = ref Unsafe.Add(ref sourceBase, 1); + } + + if (Numerics.Modulo2(destination.Length) != 0) + { + // Vector4 fits neatly in pairs. Any overlap has to be equal to 1. + int i = destination.Length - 1; + destination[i] = PorterDuffFunctions.OverlayDestAtop(background[i], source[i], amount); + } + } + else { - destination[i] = PorterDuffFunctions.OverlayDestAtop(background[i], source[i], amount); + for (int i = 0; i < destination.Length; i++) + { + destination[i] = PorterDuffFunctions.OverlayDestAtop(background[i], source[i], amount); + } } } /// protected override void BlendFunction(Span destination, ReadOnlySpan background, ReadOnlySpan source, ReadOnlySpan amount) { - for (int i = 0; i < destination.Length; i++) + if (Avx2.IsSupported && destination.Length >= 2) + { + // Divide by 2 as 4 elements per Vector4 and 8 per Vector256 + ref Vector256 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); + ref Vector256 destinationLast = ref Unsafe.Add(ref destinationBase, (IntPtr)((uint)destination.Length / 2u)); + + ref Vector256 backgroundBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(background)); + ref Vector256 sourceBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(source)); + ref float amountBase = ref MemoryMarshal.GetReference(amount); + + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) + { + // TODO: It would be better if we can clamp this outside of the loop using our SIMD methods. + Vector256 opacity = Vector256.Create(Numerics.Clamp(amountBase, 0, 1F)); + + destinationBase = PorterDuffFunctions.OverlayDestAtop(backgroundBase, sourceBase, opacity); + destinationBase = ref Unsafe.Add(ref destinationBase, 1); + backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); + sourceBase = ref Unsafe.Add(ref sourceBase, 1); + amountBase = ref Unsafe.Add(ref amountBase, 1); + } + + if (Numerics.Modulo2(destination.Length) != 0) + { + // Vector4 fits neatly in pairs. Any overlap has to be equal to 1. + int i = destination.Length - 1; + destination[i] = PorterDuffFunctions.OverlayDestAtop(background[i], source[i], Numerics.Clamp(amount[i], 0, 1F)); + } + } + else { - destination[i] = PorterDuffFunctions.OverlayDestAtop(background[i], source[i], Numerics.Clamp(amount[i], 0, 1)); + for (int i = 0; i < destination.Length; i++) + { + destination[i] = PorterDuffFunctions.OverlayDestAtop(background[i], source[i], Numerics.Clamp(amount[i], 0, 1F)); + } } } } @@ -2399,18 +6185,79 @@ internal static class DefaultPixelBlenders protected override void BlendFunction(Span destination, ReadOnlySpan background, ReadOnlySpan source, float amount) { amount = Numerics.Clamp(amount, 0, 1); - for (int i = 0; i < destination.Length; i++) + + if (Avx2.IsSupported && destination.Length >= 2) + { + // Divide by 2 as 4 elements per Vector4 and 8 per Vector256 + ref Vector256 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); + ref Vector256 destinationLast = ref Unsafe.Add(ref destinationBase, (IntPtr)((uint)destination.Length / 2u)); + + ref Vector256 backgroundBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(background)); + ref Vector256 sourceBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(source)); + Vector256 opacity = Vector256.Create(amount); + + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) + { + destinationBase = PorterDuffFunctions.HardLightDestAtop(backgroundBase, sourceBase, opacity); + destinationBase = ref Unsafe.Add(ref destinationBase, 1); + backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); + sourceBase = ref Unsafe.Add(ref sourceBase, 1); + } + + if (Numerics.Modulo2(destination.Length) != 0) + { + // Vector4 fits neatly in pairs. Any overlap has to be equal to 1. + int i = destination.Length - 1; + destination[i] = PorterDuffFunctions.HardLightDestAtop(background[i], source[i], amount); + } + } + else { - destination[i] = PorterDuffFunctions.HardLightDestAtop(background[i], source[i], amount); + for (int i = 0; i < destination.Length; i++) + { + destination[i] = PorterDuffFunctions.HardLightDestAtop(background[i], source[i], amount); + } } } /// protected override void BlendFunction(Span destination, ReadOnlySpan background, ReadOnlySpan source, ReadOnlySpan amount) { - for (int i = 0; i < destination.Length; i++) + if (Avx2.IsSupported && destination.Length >= 2) + { + // Divide by 2 as 4 elements per Vector4 and 8 per Vector256 + ref Vector256 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); + ref Vector256 destinationLast = ref Unsafe.Add(ref destinationBase, (IntPtr)((uint)destination.Length / 2u)); + + ref Vector256 backgroundBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(background)); + ref Vector256 sourceBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(source)); + ref float amountBase = ref MemoryMarshal.GetReference(amount); + + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) + { + // TODO: It would be better if we can clamp this outside of the loop using our SIMD methods. + Vector256 opacity = Vector256.Create(Numerics.Clamp(amountBase, 0, 1F)); + + destinationBase = PorterDuffFunctions.HardLightDestAtop(backgroundBase, sourceBase, opacity); + destinationBase = ref Unsafe.Add(ref destinationBase, 1); + backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); + sourceBase = ref Unsafe.Add(ref sourceBase, 1); + amountBase = ref Unsafe.Add(ref amountBase, 1); + } + + if (Numerics.Modulo2(destination.Length) != 0) + { + // Vector4 fits neatly in pairs. Any overlap has to be equal to 1. + int i = destination.Length - 1; + destination[i] = PorterDuffFunctions.HardLightDestAtop(background[i], source[i], Numerics.Clamp(amount[i], 0, 1F)); + } + } + else { - destination[i] = PorterDuffFunctions.HardLightDestAtop(background[i], source[i], Numerics.Clamp(amount[i], 0, 1)); + for (int i = 0; i < destination.Length; i++) + { + destination[i] = PorterDuffFunctions.HardLightDestAtop(background[i], source[i], Numerics.Clamp(amount[i], 0, 1F)); + } } } } @@ -2437,18 +6284,79 @@ internal static class DefaultPixelBlenders protected override void BlendFunction(Span destination, ReadOnlySpan background, ReadOnlySpan source, float amount) { amount = Numerics.Clamp(amount, 0, 1); - for (int i = 0; i < destination.Length; i++) + + if (Avx2.IsSupported && destination.Length >= 2) + { + // Divide by 2 as 4 elements per Vector4 and 8 per Vector256 + ref Vector256 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); + ref Vector256 destinationLast = ref Unsafe.Add(ref destinationBase, (IntPtr)((uint)destination.Length / 2u)); + + ref Vector256 backgroundBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(background)); + ref Vector256 sourceBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(source)); + Vector256 opacity = Vector256.Create(amount); + + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) + { + destinationBase = PorterDuffFunctions.NormalDestOver(backgroundBase, sourceBase, opacity); + destinationBase = ref Unsafe.Add(ref destinationBase, 1); + backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); + sourceBase = ref Unsafe.Add(ref sourceBase, 1); + } + + if (Numerics.Modulo2(destination.Length) != 0) + { + // Vector4 fits neatly in pairs. Any overlap has to be equal to 1. + int i = destination.Length - 1; + destination[i] = PorterDuffFunctions.NormalDestOver(background[i], source[i], amount); + } + } + else { - destination[i] = PorterDuffFunctions.NormalDestOver(background[i], source[i], amount); + for (int i = 0; i < destination.Length; i++) + { + destination[i] = PorterDuffFunctions.NormalDestOver(background[i], source[i], amount); + } } } /// protected override void BlendFunction(Span destination, ReadOnlySpan background, ReadOnlySpan source, ReadOnlySpan amount) { - for (int i = 0; i < destination.Length; i++) + if (Avx2.IsSupported && destination.Length >= 2) + { + // Divide by 2 as 4 elements per Vector4 and 8 per Vector256 + ref Vector256 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); + ref Vector256 destinationLast = ref Unsafe.Add(ref destinationBase, (IntPtr)((uint)destination.Length / 2u)); + + ref Vector256 backgroundBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(background)); + ref Vector256 sourceBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(source)); + ref float amountBase = ref MemoryMarshal.GetReference(amount); + + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) + { + // TODO: It would be better if we can clamp this outside of the loop using our SIMD methods. + Vector256 opacity = Vector256.Create(Numerics.Clamp(amountBase, 0, 1F)); + + destinationBase = PorterDuffFunctions.NormalDestOver(backgroundBase, sourceBase, opacity); + destinationBase = ref Unsafe.Add(ref destinationBase, 1); + backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); + sourceBase = ref Unsafe.Add(ref sourceBase, 1); + amountBase = ref Unsafe.Add(ref amountBase, 1); + } + + if (Numerics.Modulo2(destination.Length) != 0) + { + // Vector4 fits neatly in pairs. Any overlap has to be equal to 1. + int i = destination.Length - 1; + destination[i] = PorterDuffFunctions.NormalDestOver(background[i], source[i], Numerics.Clamp(amount[i], 0, 1F)); + } + } + else { - destination[i] = PorterDuffFunctions.NormalDestOver(background[i], source[i], Numerics.Clamp(amount[i], 0, 1)); + for (int i = 0; i < destination.Length; i++) + { + destination[i] = PorterDuffFunctions.NormalDestOver(background[i], source[i], Numerics.Clamp(amount[i], 0, 1F)); + } } } } @@ -2475,18 +6383,79 @@ internal static class DefaultPixelBlenders protected override void BlendFunction(Span destination, ReadOnlySpan background, ReadOnlySpan source, float amount) { amount = Numerics.Clamp(amount, 0, 1); - for (int i = 0; i < destination.Length; i++) + + if (Avx2.IsSupported && destination.Length >= 2) + { + // Divide by 2 as 4 elements per Vector4 and 8 per Vector256 + ref Vector256 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); + ref Vector256 destinationLast = ref Unsafe.Add(ref destinationBase, (IntPtr)((uint)destination.Length / 2u)); + + ref Vector256 backgroundBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(background)); + ref Vector256 sourceBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(source)); + Vector256 opacity = Vector256.Create(amount); + + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) + { + destinationBase = PorterDuffFunctions.MultiplyDestOver(backgroundBase, sourceBase, opacity); + destinationBase = ref Unsafe.Add(ref destinationBase, 1); + backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); + sourceBase = ref Unsafe.Add(ref sourceBase, 1); + } + + if (Numerics.Modulo2(destination.Length) != 0) + { + // Vector4 fits neatly in pairs. Any overlap has to be equal to 1. + int i = destination.Length - 1; + destination[i] = PorterDuffFunctions.MultiplyDestOver(background[i], source[i], amount); + } + } + else { - destination[i] = PorterDuffFunctions.MultiplyDestOver(background[i], source[i], amount); + for (int i = 0; i < destination.Length; i++) + { + destination[i] = PorterDuffFunctions.MultiplyDestOver(background[i], source[i], amount); + } } } /// protected override void BlendFunction(Span destination, ReadOnlySpan background, ReadOnlySpan source, ReadOnlySpan amount) { - for (int i = 0; i < destination.Length; i++) + if (Avx2.IsSupported && destination.Length >= 2) + { + // Divide by 2 as 4 elements per Vector4 and 8 per Vector256 + ref Vector256 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); + ref Vector256 destinationLast = ref Unsafe.Add(ref destinationBase, (IntPtr)((uint)destination.Length / 2u)); + + ref Vector256 backgroundBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(background)); + ref Vector256 sourceBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(source)); + ref float amountBase = ref MemoryMarshal.GetReference(amount); + + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) + { + // TODO: It would be better if we can clamp this outside of the loop using our SIMD methods. + Vector256 opacity = Vector256.Create(Numerics.Clamp(amountBase, 0, 1F)); + + destinationBase = PorterDuffFunctions.MultiplyDestOver(backgroundBase, sourceBase, opacity); + destinationBase = ref Unsafe.Add(ref destinationBase, 1); + backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); + sourceBase = ref Unsafe.Add(ref sourceBase, 1); + amountBase = ref Unsafe.Add(ref amountBase, 1); + } + + if (Numerics.Modulo2(destination.Length) != 0) + { + // Vector4 fits neatly in pairs. Any overlap has to be equal to 1. + int i = destination.Length - 1; + destination[i] = PorterDuffFunctions.MultiplyDestOver(background[i], source[i], Numerics.Clamp(amount[i], 0, 1F)); + } + } + else { - destination[i] = PorterDuffFunctions.MultiplyDestOver(background[i], source[i], Numerics.Clamp(amount[i], 0, 1)); + for (int i = 0; i < destination.Length; i++) + { + destination[i] = PorterDuffFunctions.MultiplyDestOver(background[i], source[i], Numerics.Clamp(amount[i], 0, 1F)); + } } } } @@ -2513,18 +6482,79 @@ internal static class DefaultPixelBlenders protected override void BlendFunction(Span destination, ReadOnlySpan background, ReadOnlySpan source, float amount) { amount = Numerics.Clamp(amount, 0, 1); - for (int i = 0; i < destination.Length; i++) + + if (Avx2.IsSupported && destination.Length >= 2) + { + // Divide by 2 as 4 elements per Vector4 and 8 per Vector256 + ref Vector256 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); + ref Vector256 destinationLast = ref Unsafe.Add(ref destinationBase, (IntPtr)((uint)destination.Length / 2u)); + + ref Vector256 backgroundBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(background)); + ref Vector256 sourceBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(source)); + Vector256 opacity = Vector256.Create(amount); + + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) + { + destinationBase = PorterDuffFunctions.AddDestOver(backgroundBase, sourceBase, opacity); + destinationBase = ref Unsafe.Add(ref destinationBase, 1); + backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); + sourceBase = ref Unsafe.Add(ref sourceBase, 1); + } + + if (Numerics.Modulo2(destination.Length) != 0) + { + // Vector4 fits neatly in pairs. Any overlap has to be equal to 1. + int i = destination.Length - 1; + destination[i] = PorterDuffFunctions.AddDestOver(background[i], source[i], amount); + } + } + else { - destination[i] = PorterDuffFunctions.AddDestOver(background[i], source[i], amount); + for (int i = 0; i < destination.Length; i++) + { + destination[i] = PorterDuffFunctions.AddDestOver(background[i], source[i], amount); + } } } /// protected override void BlendFunction(Span destination, ReadOnlySpan background, ReadOnlySpan source, ReadOnlySpan amount) { - for (int i = 0; i < destination.Length; i++) + if (Avx2.IsSupported && destination.Length >= 2) + { + // Divide by 2 as 4 elements per Vector4 and 8 per Vector256 + ref Vector256 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); + ref Vector256 destinationLast = ref Unsafe.Add(ref destinationBase, (IntPtr)((uint)destination.Length / 2u)); + + ref Vector256 backgroundBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(background)); + ref Vector256 sourceBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(source)); + ref float amountBase = ref MemoryMarshal.GetReference(amount); + + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) + { + // TODO: It would be better if we can clamp this outside of the loop using our SIMD methods. + Vector256 opacity = Vector256.Create(Numerics.Clamp(amountBase, 0, 1F)); + + destinationBase = PorterDuffFunctions.AddDestOver(backgroundBase, sourceBase, opacity); + destinationBase = ref Unsafe.Add(ref destinationBase, 1); + backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); + sourceBase = ref Unsafe.Add(ref sourceBase, 1); + amountBase = ref Unsafe.Add(ref amountBase, 1); + } + + if (Numerics.Modulo2(destination.Length) != 0) + { + // Vector4 fits neatly in pairs. Any overlap has to be equal to 1. + int i = destination.Length - 1; + destination[i] = PorterDuffFunctions.AddDestOver(background[i], source[i], Numerics.Clamp(amount[i], 0, 1F)); + } + } + else { - destination[i] = PorterDuffFunctions.AddDestOver(background[i], source[i], Numerics.Clamp(amount[i], 0, 1)); + for (int i = 0; i < destination.Length; i++) + { + destination[i] = PorterDuffFunctions.AddDestOver(background[i], source[i], Numerics.Clamp(amount[i], 0, 1F)); + } } } } @@ -2551,18 +6581,79 @@ internal static class DefaultPixelBlenders protected override void BlendFunction(Span destination, ReadOnlySpan background, ReadOnlySpan source, float amount) { amount = Numerics.Clamp(amount, 0, 1); - for (int i = 0; i < destination.Length; i++) + + if (Avx2.IsSupported && destination.Length >= 2) + { + // Divide by 2 as 4 elements per Vector4 and 8 per Vector256 + ref Vector256 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); + ref Vector256 destinationLast = ref Unsafe.Add(ref destinationBase, (IntPtr)((uint)destination.Length / 2u)); + + ref Vector256 backgroundBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(background)); + ref Vector256 sourceBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(source)); + Vector256 opacity = Vector256.Create(amount); + + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) + { + destinationBase = PorterDuffFunctions.SubtractDestOver(backgroundBase, sourceBase, opacity); + destinationBase = ref Unsafe.Add(ref destinationBase, 1); + backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); + sourceBase = ref Unsafe.Add(ref sourceBase, 1); + } + + if (Numerics.Modulo2(destination.Length) != 0) + { + // Vector4 fits neatly in pairs. Any overlap has to be equal to 1. + int i = destination.Length - 1; + destination[i] = PorterDuffFunctions.SubtractDestOver(background[i], source[i], amount); + } + } + else { - destination[i] = PorterDuffFunctions.SubtractDestOver(background[i], source[i], amount); + for (int i = 0; i < destination.Length; i++) + { + destination[i] = PorterDuffFunctions.SubtractDestOver(background[i], source[i], amount); + } } } /// protected override void BlendFunction(Span destination, ReadOnlySpan background, ReadOnlySpan source, ReadOnlySpan amount) { - for (int i = 0; i < destination.Length; i++) + if (Avx2.IsSupported && destination.Length >= 2) + { + // Divide by 2 as 4 elements per Vector4 and 8 per Vector256 + ref Vector256 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); + ref Vector256 destinationLast = ref Unsafe.Add(ref destinationBase, (IntPtr)((uint)destination.Length / 2u)); + + ref Vector256 backgroundBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(background)); + ref Vector256 sourceBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(source)); + ref float amountBase = ref MemoryMarshal.GetReference(amount); + + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) + { + // TODO: It would be better if we can clamp this outside of the loop using our SIMD methods. + Vector256 opacity = Vector256.Create(Numerics.Clamp(amountBase, 0, 1F)); + + destinationBase = PorterDuffFunctions.SubtractDestOver(backgroundBase, sourceBase, opacity); + destinationBase = ref Unsafe.Add(ref destinationBase, 1); + backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); + sourceBase = ref Unsafe.Add(ref sourceBase, 1); + amountBase = ref Unsafe.Add(ref amountBase, 1); + } + + if (Numerics.Modulo2(destination.Length) != 0) + { + // Vector4 fits neatly in pairs. Any overlap has to be equal to 1. + int i = destination.Length - 1; + destination[i] = PorterDuffFunctions.SubtractDestOver(background[i], source[i], Numerics.Clamp(amount[i], 0, 1F)); + } + } + else { - destination[i] = PorterDuffFunctions.SubtractDestOver(background[i], source[i], Numerics.Clamp(amount[i], 0, 1)); + for (int i = 0; i < destination.Length; i++) + { + destination[i] = PorterDuffFunctions.SubtractDestOver(background[i], source[i], Numerics.Clamp(amount[i], 0, 1F)); + } } } } @@ -2589,18 +6680,79 @@ internal static class DefaultPixelBlenders protected override void BlendFunction(Span destination, ReadOnlySpan background, ReadOnlySpan source, float amount) { amount = Numerics.Clamp(amount, 0, 1); - for (int i = 0; i < destination.Length; i++) + + if (Avx2.IsSupported && destination.Length >= 2) + { + // Divide by 2 as 4 elements per Vector4 and 8 per Vector256 + ref Vector256 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); + ref Vector256 destinationLast = ref Unsafe.Add(ref destinationBase, (IntPtr)((uint)destination.Length / 2u)); + + ref Vector256 backgroundBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(background)); + ref Vector256 sourceBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(source)); + Vector256 opacity = Vector256.Create(amount); + + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) + { + destinationBase = PorterDuffFunctions.ScreenDestOver(backgroundBase, sourceBase, opacity); + destinationBase = ref Unsafe.Add(ref destinationBase, 1); + backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); + sourceBase = ref Unsafe.Add(ref sourceBase, 1); + } + + if (Numerics.Modulo2(destination.Length) != 0) + { + // Vector4 fits neatly in pairs. Any overlap has to be equal to 1. + int i = destination.Length - 1; + destination[i] = PorterDuffFunctions.ScreenDestOver(background[i], source[i], amount); + } + } + else { - destination[i] = PorterDuffFunctions.ScreenDestOver(background[i], source[i], amount); + for (int i = 0; i < destination.Length; i++) + { + destination[i] = PorterDuffFunctions.ScreenDestOver(background[i], source[i], amount); + } } } /// protected override void BlendFunction(Span destination, ReadOnlySpan background, ReadOnlySpan source, ReadOnlySpan amount) { - for (int i = 0; i < destination.Length; i++) + if (Avx2.IsSupported && destination.Length >= 2) + { + // Divide by 2 as 4 elements per Vector4 and 8 per Vector256 + ref Vector256 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); + ref Vector256 destinationLast = ref Unsafe.Add(ref destinationBase, (IntPtr)((uint)destination.Length / 2u)); + + ref Vector256 backgroundBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(background)); + ref Vector256 sourceBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(source)); + ref float amountBase = ref MemoryMarshal.GetReference(amount); + + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) + { + // TODO: It would be better if we can clamp this outside of the loop using our SIMD methods. + Vector256 opacity = Vector256.Create(Numerics.Clamp(amountBase, 0, 1F)); + + destinationBase = PorterDuffFunctions.ScreenDestOver(backgroundBase, sourceBase, opacity); + destinationBase = ref Unsafe.Add(ref destinationBase, 1); + backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); + sourceBase = ref Unsafe.Add(ref sourceBase, 1); + amountBase = ref Unsafe.Add(ref amountBase, 1); + } + + if (Numerics.Modulo2(destination.Length) != 0) + { + // Vector4 fits neatly in pairs. Any overlap has to be equal to 1. + int i = destination.Length - 1; + destination[i] = PorterDuffFunctions.ScreenDestOver(background[i], source[i], Numerics.Clamp(amount[i], 0, 1F)); + } + } + else { - destination[i] = PorterDuffFunctions.ScreenDestOver(background[i], source[i], Numerics.Clamp(amount[i], 0, 1)); + for (int i = 0; i < destination.Length; i++) + { + destination[i] = PorterDuffFunctions.ScreenDestOver(background[i], source[i], Numerics.Clamp(amount[i], 0, 1F)); + } } } } @@ -2627,18 +6779,79 @@ internal static class DefaultPixelBlenders protected override void BlendFunction(Span destination, ReadOnlySpan background, ReadOnlySpan source, float amount) { amount = Numerics.Clamp(amount, 0, 1); - for (int i = 0; i < destination.Length; i++) + + if (Avx2.IsSupported && destination.Length >= 2) + { + // Divide by 2 as 4 elements per Vector4 and 8 per Vector256 + ref Vector256 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); + ref Vector256 destinationLast = ref Unsafe.Add(ref destinationBase, (IntPtr)((uint)destination.Length / 2u)); + + ref Vector256 backgroundBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(background)); + ref Vector256 sourceBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(source)); + Vector256 opacity = Vector256.Create(amount); + + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) + { + destinationBase = PorterDuffFunctions.DarkenDestOver(backgroundBase, sourceBase, opacity); + destinationBase = ref Unsafe.Add(ref destinationBase, 1); + backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); + sourceBase = ref Unsafe.Add(ref sourceBase, 1); + } + + if (Numerics.Modulo2(destination.Length) != 0) + { + // Vector4 fits neatly in pairs. Any overlap has to be equal to 1. + int i = destination.Length - 1; + destination[i] = PorterDuffFunctions.DarkenDestOver(background[i], source[i], amount); + } + } + else { - destination[i] = PorterDuffFunctions.DarkenDestOver(background[i], source[i], amount); + for (int i = 0; i < destination.Length; i++) + { + destination[i] = PorterDuffFunctions.DarkenDestOver(background[i], source[i], amount); + } } } /// protected override void BlendFunction(Span destination, ReadOnlySpan background, ReadOnlySpan source, ReadOnlySpan amount) { - for (int i = 0; i < destination.Length; i++) + if (Avx2.IsSupported && destination.Length >= 2) + { + // Divide by 2 as 4 elements per Vector4 and 8 per Vector256 + ref Vector256 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); + ref Vector256 destinationLast = ref Unsafe.Add(ref destinationBase, (IntPtr)((uint)destination.Length / 2u)); + + ref Vector256 backgroundBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(background)); + ref Vector256 sourceBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(source)); + ref float amountBase = ref MemoryMarshal.GetReference(amount); + + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) + { + // TODO: It would be better if we can clamp this outside of the loop using our SIMD methods. + Vector256 opacity = Vector256.Create(Numerics.Clamp(amountBase, 0, 1F)); + + destinationBase = PorterDuffFunctions.DarkenDestOver(backgroundBase, sourceBase, opacity); + destinationBase = ref Unsafe.Add(ref destinationBase, 1); + backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); + sourceBase = ref Unsafe.Add(ref sourceBase, 1); + amountBase = ref Unsafe.Add(ref amountBase, 1); + } + + if (Numerics.Modulo2(destination.Length) != 0) + { + // Vector4 fits neatly in pairs. Any overlap has to be equal to 1. + int i = destination.Length - 1; + destination[i] = PorterDuffFunctions.DarkenDestOver(background[i], source[i], Numerics.Clamp(amount[i], 0, 1F)); + } + } + else { - destination[i] = PorterDuffFunctions.DarkenDestOver(background[i], source[i], Numerics.Clamp(amount[i], 0, 1)); + for (int i = 0; i < destination.Length; i++) + { + destination[i] = PorterDuffFunctions.DarkenDestOver(background[i], source[i], Numerics.Clamp(amount[i], 0, 1F)); + } } } } @@ -2665,18 +6878,79 @@ internal static class DefaultPixelBlenders protected override void BlendFunction(Span destination, ReadOnlySpan background, ReadOnlySpan source, float amount) { amount = Numerics.Clamp(amount, 0, 1); - for (int i = 0; i < destination.Length; i++) + + if (Avx2.IsSupported && destination.Length >= 2) + { + // Divide by 2 as 4 elements per Vector4 and 8 per Vector256 + ref Vector256 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); + ref Vector256 destinationLast = ref Unsafe.Add(ref destinationBase, (IntPtr)((uint)destination.Length / 2u)); + + ref Vector256 backgroundBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(background)); + ref Vector256 sourceBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(source)); + Vector256 opacity = Vector256.Create(amount); + + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) + { + destinationBase = PorterDuffFunctions.LightenDestOver(backgroundBase, sourceBase, opacity); + destinationBase = ref Unsafe.Add(ref destinationBase, 1); + backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); + sourceBase = ref Unsafe.Add(ref sourceBase, 1); + } + + if (Numerics.Modulo2(destination.Length) != 0) + { + // Vector4 fits neatly in pairs. Any overlap has to be equal to 1. + int i = destination.Length - 1; + destination[i] = PorterDuffFunctions.LightenDestOver(background[i], source[i], amount); + } + } + else { - destination[i] = PorterDuffFunctions.LightenDestOver(background[i], source[i], amount); + for (int i = 0; i < destination.Length; i++) + { + destination[i] = PorterDuffFunctions.LightenDestOver(background[i], source[i], amount); + } } } /// protected override void BlendFunction(Span destination, ReadOnlySpan background, ReadOnlySpan source, ReadOnlySpan amount) { - for (int i = 0; i < destination.Length; i++) + if (Avx2.IsSupported && destination.Length >= 2) + { + // Divide by 2 as 4 elements per Vector4 and 8 per Vector256 + ref Vector256 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); + ref Vector256 destinationLast = ref Unsafe.Add(ref destinationBase, (IntPtr)((uint)destination.Length / 2u)); + + ref Vector256 backgroundBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(background)); + ref Vector256 sourceBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(source)); + ref float amountBase = ref MemoryMarshal.GetReference(amount); + + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) + { + // TODO: It would be better if we can clamp this outside of the loop using our SIMD methods. + Vector256 opacity = Vector256.Create(Numerics.Clamp(amountBase, 0, 1F)); + + destinationBase = PorterDuffFunctions.LightenDestOver(backgroundBase, sourceBase, opacity); + destinationBase = ref Unsafe.Add(ref destinationBase, 1); + backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); + sourceBase = ref Unsafe.Add(ref sourceBase, 1); + amountBase = ref Unsafe.Add(ref amountBase, 1); + } + + if (Numerics.Modulo2(destination.Length) != 0) + { + // Vector4 fits neatly in pairs. Any overlap has to be equal to 1. + int i = destination.Length - 1; + destination[i] = PorterDuffFunctions.LightenDestOver(background[i], source[i], Numerics.Clamp(amount[i], 0, 1F)); + } + } + else { - destination[i] = PorterDuffFunctions.LightenDestOver(background[i], source[i], Numerics.Clamp(amount[i], 0, 1)); + for (int i = 0; i < destination.Length; i++) + { + destination[i] = PorterDuffFunctions.LightenDestOver(background[i], source[i], Numerics.Clamp(amount[i], 0, 1F)); + } } } } @@ -2703,18 +6977,79 @@ internal static class DefaultPixelBlenders protected override void BlendFunction(Span destination, ReadOnlySpan background, ReadOnlySpan source, float amount) { amount = Numerics.Clamp(amount, 0, 1); - for (int i = 0; i < destination.Length; i++) + + if (Avx2.IsSupported && destination.Length >= 2) + { + // Divide by 2 as 4 elements per Vector4 and 8 per Vector256 + ref Vector256 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); + ref Vector256 destinationLast = ref Unsafe.Add(ref destinationBase, (IntPtr)((uint)destination.Length / 2u)); + + ref Vector256 backgroundBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(background)); + ref Vector256 sourceBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(source)); + Vector256 opacity = Vector256.Create(amount); + + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) + { + destinationBase = PorterDuffFunctions.OverlayDestOver(backgroundBase, sourceBase, opacity); + destinationBase = ref Unsafe.Add(ref destinationBase, 1); + backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); + sourceBase = ref Unsafe.Add(ref sourceBase, 1); + } + + if (Numerics.Modulo2(destination.Length) != 0) + { + // Vector4 fits neatly in pairs. Any overlap has to be equal to 1. + int i = destination.Length - 1; + destination[i] = PorterDuffFunctions.OverlayDestOver(background[i], source[i], amount); + } + } + else { - destination[i] = PorterDuffFunctions.OverlayDestOver(background[i], source[i], amount); + for (int i = 0; i < destination.Length; i++) + { + destination[i] = PorterDuffFunctions.OverlayDestOver(background[i], source[i], amount); + } } } /// protected override void BlendFunction(Span destination, ReadOnlySpan background, ReadOnlySpan source, ReadOnlySpan amount) { - for (int i = 0; i < destination.Length; i++) + if (Avx2.IsSupported && destination.Length >= 2) + { + // Divide by 2 as 4 elements per Vector4 and 8 per Vector256 + ref Vector256 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); + ref Vector256 destinationLast = ref Unsafe.Add(ref destinationBase, (IntPtr)((uint)destination.Length / 2u)); + + ref Vector256 backgroundBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(background)); + ref Vector256 sourceBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(source)); + ref float amountBase = ref MemoryMarshal.GetReference(amount); + + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) + { + // TODO: It would be better if we can clamp this outside of the loop using our SIMD methods. + Vector256 opacity = Vector256.Create(Numerics.Clamp(amountBase, 0, 1F)); + + destinationBase = PorterDuffFunctions.OverlayDestOver(backgroundBase, sourceBase, opacity); + destinationBase = ref Unsafe.Add(ref destinationBase, 1); + backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); + sourceBase = ref Unsafe.Add(ref sourceBase, 1); + amountBase = ref Unsafe.Add(ref amountBase, 1); + } + + if (Numerics.Modulo2(destination.Length) != 0) + { + // Vector4 fits neatly in pairs. Any overlap has to be equal to 1. + int i = destination.Length - 1; + destination[i] = PorterDuffFunctions.OverlayDestOver(background[i], source[i], Numerics.Clamp(amount[i], 0, 1F)); + } + } + else { - destination[i] = PorterDuffFunctions.OverlayDestOver(background[i], source[i], Numerics.Clamp(amount[i], 0, 1)); + for (int i = 0; i < destination.Length; i++) + { + destination[i] = PorterDuffFunctions.OverlayDestOver(background[i], source[i], Numerics.Clamp(amount[i], 0, 1F)); + } } } } @@ -2741,18 +7076,79 @@ internal static class DefaultPixelBlenders protected override void BlendFunction(Span destination, ReadOnlySpan background, ReadOnlySpan source, float amount) { amount = Numerics.Clamp(amount, 0, 1); - for (int i = 0; i < destination.Length; i++) + + if (Avx2.IsSupported && destination.Length >= 2) + { + // Divide by 2 as 4 elements per Vector4 and 8 per Vector256 + ref Vector256 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); + ref Vector256 destinationLast = ref Unsafe.Add(ref destinationBase, (IntPtr)((uint)destination.Length / 2u)); + + ref Vector256 backgroundBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(background)); + ref Vector256 sourceBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(source)); + Vector256 opacity = Vector256.Create(amount); + + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) + { + destinationBase = PorterDuffFunctions.HardLightDestOver(backgroundBase, sourceBase, opacity); + destinationBase = ref Unsafe.Add(ref destinationBase, 1); + backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); + sourceBase = ref Unsafe.Add(ref sourceBase, 1); + } + + if (Numerics.Modulo2(destination.Length) != 0) + { + // Vector4 fits neatly in pairs. Any overlap has to be equal to 1. + int i = destination.Length - 1; + destination[i] = PorterDuffFunctions.HardLightDestOver(background[i], source[i], amount); + } + } + else { - destination[i] = PorterDuffFunctions.HardLightDestOver(background[i], source[i], amount); + for (int i = 0; i < destination.Length; i++) + { + destination[i] = PorterDuffFunctions.HardLightDestOver(background[i], source[i], amount); + } } } /// protected override void BlendFunction(Span destination, ReadOnlySpan background, ReadOnlySpan source, ReadOnlySpan amount) { - for (int i = 0; i < destination.Length; i++) + if (Avx2.IsSupported && destination.Length >= 2) + { + // Divide by 2 as 4 elements per Vector4 and 8 per Vector256 + ref Vector256 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); + ref Vector256 destinationLast = ref Unsafe.Add(ref destinationBase, (IntPtr)((uint)destination.Length / 2u)); + + ref Vector256 backgroundBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(background)); + ref Vector256 sourceBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(source)); + ref float amountBase = ref MemoryMarshal.GetReference(amount); + + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) + { + // TODO: It would be better if we can clamp this outside of the loop using our SIMD methods. + Vector256 opacity = Vector256.Create(Numerics.Clamp(amountBase, 0, 1F)); + + destinationBase = PorterDuffFunctions.HardLightDestOver(backgroundBase, sourceBase, opacity); + destinationBase = ref Unsafe.Add(ref destinationBase, 1); + backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); + sourceBase = ref Unsafe.Add(ref sourceBase, 1); + amountBase = ref Unsafe.Add(ref amountBase, 1); + } + + if (Numerics.Modulo2(destination.Length) != 0) + { + // Vector4 fits neatly in pairs. Any overlap has to be equal to 1. + int i = destination.Length - 1; + destination[i] = PorterDuffFunctions.HardLightDestOver(background[i], source[i], Numerics.Clamp(amount[i], 0, 1F)); + } + } + else { - destination[i] = PorterDuffFunctions.HardLightDestOver(background[i], source[i], Numerics.Clamp(amount[i], 0, 1)); + for (int i = 0; i < destination.Length; i++) + { + destination[i] = PorterDuffFunctions.HardLightDestOver(background[i], source[i], Numerics.Clamp(amount[i], 0, 1F)); + } } } } @@ -2779,18 +7175,79 @@ internal static class DefaultPixelBlenders protected override void BlendFunction(Span destination, ReadOnlySpan background, ReadOnlySpan source, float amount) { amount = Numerics.Clamp(amount, 0, 1); - for (int i = 0; i < destination.Length; i++) + + if (Avx2.IsSupported && destination.Length >= 2) + { + // Divide by 2 as 4 elements per Vector4 and 8 per Vector256 + ref Vector256 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); + ref Vector256 destinationLast = ref Unsafe.Add(ref destinationBase, (IntPtr)((uint)destination.Length / 2u)); + + ref Vector256 backgroundBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(background)); + ref Vector256 sourceBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(source)); + Vector256 opacity = Vector256.Create(amount); + + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) + { + destinationBase = PorterDuffFunctions.NormalDestIn(backgroundBase, sourceBase, opacity); + destinationBase = ref Unsafe.Add(ref destinationBase, 1); + backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); + sourceBase = ref Unsafe.Add(ref sourceBase, 1); + } + + if (Numerics.Modulo2(destination.Length) != 0) + { + // Vector4 fits neatly in pairs. Any overlap has to be equal to 1. + int i = destination.Length - 1; + destination[i] = PorterDuffFunctions.NormalDestIn(background[i], source[i], amount); + } + } + else { - destination[i] = PorterDuffFunctions.NormalDestIn(background[i], source[i], amount); + for (int i = 0; i < destination.Length; i++) + { + destination[i] = PorterDuffFunctions.NormalDestIn(background[i], source[i], amount); + } } } /// protected override void BlendFunction(Span destination, ReadOnlySpan background, ReadOnlySpan source, ReadOnlySpan amount) { - for (int i = 0; i < destination.Length; i++) + if (Avx2.IsSupported && destination.Length >= 2) + { + // Divide by 2 as 4 elements per Vector4 and 8 per Vector256 + ref Vector256 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); + ref Vector256 destinationLast = ref Unsafe.Add(ref destinationBase, (IntPtr)((uint)destination.Length / 2u)); + + ref Vector256 backgroundBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(background)); + ref Vector256 sourceBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(source)); + ref float amountBase = ref MemoryMarshal.GetReference(amount); + + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) + { + // TODO: It would be better if we can clamp this outside of the loop using our SIMD methods. + Vector256 opacity = Vector256.Create(Numerics.Clamp(amountBase, 0, 1F)); + + destinationBase = PorterDuffFunctions.NormalDestIn(backgroundBase, sourceBase, opacity); + destinationBase = ref Unsafe.Add(ref destinationBase, 1); + backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); + sourceBase = ref Unsafe.Add(ref sourceBase, 1); + amountBase = ref Unsafe.Add(ref amountBase, 1); + } + + if (Numerics.Modulo2(destination.Length) != 0) + { + // Vector4 fits neatly in pairs. Any overlap has to be equal to 1. + int i = destination.Length - 1; + destination[i] = PorterDuffFunctions.NormalDestIn(background[i], source[i], Numerics.Clamp(amount[i], 0, 1F)); + } + } + else { - destination[i] = PorterDuffFunctions.NormalDestIn(background[i], source[i], Numerics.Clamp(amount[i], 0, 1)); + for (int i = 0; i < destination.Length; i++) + { + destination[i] = PorterDuffFunctions.NormalDestIn(background[i], source[i], Numerics.Clamp(amount[i], 0, 1F)); + } } } } @@ -2817,18 +7274,79 @@ internal static class DefaultPixelBlenders protected override void BlendFunction(Span destination, ReadOnlySpan background, ReadOnlySpan source, float amount) { amount = Numerics.Clamp(amount, 0, 1); - for (int i = 0; i < destination.Length; i++) + + if (Avx2.IsSupported && destination.Length >= 2) + { + // Divide by 2 as 4 elements per Vector4 and 8 per Vector256 + ref Vector256 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); + ref Vector256 destinationLast = ref Unsafe.Add(ref destinationBase, (IntPtr)((uint)destination.Length / 2u)); + + ref Vector256 backgroundBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(background)); + ref Vector256 sourceBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(source)); + Vector256 opacity = Vector256.Create(amount); + + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) + { + destinationBase = PorterDuffFunctions.MultiplyDestIn(backgroundBase, sourceBase, opacity); + destinationBase = ref Unsafe.Add(ref destinationBase, 1); + backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); + sourceBase = ref Unsafe.Add(ref sourceBase, 1); + } + + if (Numerics.Modulo2(destination.Length) != 0) + { + // Vector4 fits neatly in pairs. Any overlap has to be equal to 1. + int i = destination.Length - 1; + destination[i] = PorterDuffFunctions.MultiplyDestIn(background[i], source[i], amount); + } + } + else { - destination[i] = PorterDuffFunctions.MultiplyDestIn(background[i], source[i], amount); + for (int i = 0; i < destination.Length; i++) + { + destination[i] = PorterDuffFunctions.MultiplyDestIn(background[i], source[i], amount); + } } } /// protected override void BlendFunction(Span destination, ReadOnlySpan background, ReadOnlySpan source, ReadOnlySpan amount) { - for (int i = 0; i < destination.Length; i++) + if (Avx2.IsSupported && destination.Length >= 2) + { + // Divide by 2 as 4 elements per Vector4 and 8 per Vector256 + ref Vector256 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); + ref Vector256 destinationLast = ref Unsafe.Add(ref destinationBase, (IntPtr)((uint)destination.Length / 2u)); + + ref Vector256 backgroundBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(background)); + ref Vector256 sourceBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(source)); + ref float amountBase = ref MemoryMarshal.GetReference(amount); + + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) + { + // TODO: It would be better if we can clamp this outside of the loop using our SIMD methods. + Vector256 opacity = Vector256.Create(Numerics.Clamp(amountBase, 0, 1F)); + + destinationBase = PorterDuffFunctions.MultiplyDestIn(backgroundBase, sourceBase, opacity); + destinationBase = ref Unsafe.Add(ref destinationBase, 1); + backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); + sourceBase = ref Unsafe.Add(ref sourceBase, 1); + amountBase = ref Unsafe.Add(ref amountBase, 1); + } + + if (Numerics.Modulo2(destination.Length) != 0) + { + // Vector4 fits neatly in pairs. Any overlap has to be equal to 1. + int i = destination.Length - 1; + destination[i] = PorterDuffFunctions.MultiplyDestIn(background[i], source[i], Numerics.Clamp(amount[i], 0, 1F)); + } + } + else { - destination[i] = PorterDuffFunctions.MultiplyDestIn(background[i], source[i], Numerics.Clamp(amount[i], 0, 1)); + for (int i = 0; i < destination.Length; i++) + { + destination[i] = PorterDuffFunctions.MultiplyDestIn(background[i], source[i], Numerics.Clamp(amount[i], 0, 1F)); + } } } } @@ -2855,18 +7373,79 @@ internal static class DefaultPixelBlenders protected override void BlendFunction(Span destination, ReadOnlySpan background, ReadOnlySpan source, float amount) { amount = Numerics.Clamp(amount, 0, 1); - for (int i = 0; i < destination.Length; i++) + + if (Avx2.IsSupported && destination.Length >= 2) + { + // Divide by 2 as 4 elements per Vector4 and 8 per Vector256 + ref Vector256 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); + ref Vector256 destinationLast = ref Unsafe.Add(ref destinationBase, (IntPtr)((uint)destination.Length / 2u)); + + ref Vector256 backgroundBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(background)); + ref Vector256 sourceBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(source)); + Vector256 opacity = Vector256.Create(amount); + + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) + { + destinationBase = PorterDuffFunctions.AddDestIn(backgroundBase, sourceBase, opacity); + destinationBase = ref Unsafe.Add(ref destinationBase, 1); + backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); + sourceBase = ref Unsafe.Add(ref sourceBase, 1); + } + + if (Numerics.Modulo2(destination.Length) != 0) + { + // Vector4 fits neatly in pairs. Any overlap has to be equal to 1. + int i = destination.Length - 1; + destination[i] = PorterDuffFunctions.AddDestIn(background[i], source[i], amount); + } + } + else { - destination[i] = PorterDuffFunctions.AddDestIn(background[i], source[i], amount); + for (int i = 0; i < destination.Length; i++) + { + destination[i] = PorterDuffFunctions.AddDestIn(background[i], source[i], amount); + } } } /// protected override void BlendFunction(Span destination, ReadOnlySpan background, ReadOnlySpan source, ReadOnlySpan amount) { - for (int i = 0; i < destination.Length; i++) + if (Avx2.IsSupported && destination.Length >= 2) + { + // Divide by 2 as 4 elements per Vector4 and 8 per Vector256 + ref Vector256 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); + ref Vector256 destinationLast = ref Unsafe.Add(ref destinationBase, (IntPtr)((uint)destination.Length / 2u)); + + ref Vector256 backgroundBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(background)); + ref Vector256 sourceBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(source)); + ref float amountBase = ref MemoryMarshal.GetReference(amount); + + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) + { + // TODO: It would be better if we can clamp this outside of the loop using our SIMD methods. + Vector256 opacity = Vector256.Create(Numerics.Clamp(amountBase, 0, 1F)); + + destinationBase = PorterDuffFunctions.AddDestIn(backgroundBase, sourceBase, opacity); + destinationBase = ref Unsafe.Add(ref destinationBase, 1); + backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); + sourceBase = ref Unsafe.Add(ref sourceBase, 1); + amountBase = ref Unsafe.Add(ref amountBase, 1); + } + + if (Numerics.Modulo2(destination.Length) != 0) + { + // Vector4 fits neatly in pairs. Any overlap has to be equal to 1. + int i = destination.Length - 1; + destination[i] = PorterDuffFunctions.AddDestIn(background[i], source[i], Numerics.Clamp(amount[i], 0, 1F)); + } + } + else { - destination[i] = PorterDuffFunctions.AddDestIn(background[i], source[i], Numerics.Clamp(amount[i], 0, 1)); + for (int i = 0; i < destination.Length; i++) + { + destination[i] = PorterDuffFunctions.AddDestIn(background[i], source[i], Numerics.Clamp(amount[i], 0, 1F)); + } } } } @@ -2893,18 +7472,79 @@ internal static class DefaultPixelBlenders protected override void BlendFunction(Span destination, ReadOnlySpan background, ReadOnlySpan source, float amount) { amount = Numerics.Clamp(amount, 0, 1); - for (int i = 0; i < destination.Length; i++) + + if (Avx2.IsSupported && destination.Length >= 2) + { + // Divide by 2 as 4 elements per Vector4 and 8 per Vector256 + ref Vector256 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); + ref Vector256 destinationLast = ref Unsafe.Add(ref destinationBase, (IntPtr)((uint)destination.Length / 2u)); + + ref Vector256 backgroundBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(background)); + ref Vector256 sourceBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(source)); + Vector256 opacity = Vector256.Create(amount); + + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) + { + destinationBase = PorterDuffFunctions.SubtractDestIn(backgroundBase, sourceBase, opacity); + destinationBase = ref Unsafe.Add(ref destinationBase, 1); + backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); + sourceBase = ref Unsafe.Add(ref sourceBase, 1); + } + + if (Numerics.Modulo2(destination.Length) != 0) + { + // Vector4 fits neatly in pairs. Any overlap has to be equal to 1. + int i = destination.Length - 1; + destination[i] = PorterDuffFunctions.SubtractDestIn(background[i], source[i], amount); + } + } + else { - destination[i] = PorterDuffFunctions.SubtractDestIn(background[i], source[i], amount); + for (int i = 0; i < destination.Length; i++) + { + destination[i] = PorterDuffFunctions.SubtractDestIn(background[i], source[i], amount); + } } } /// protected override void BlendFunction(Span destination, ReadOnlySpan background, ReadOnlySpan source, ReadOnlySpan amount) { - for (int i = 0; i < destination.Length; i++) + if (Avx2.IsSupported && destination.Length >= 2) + { + // Divide by 2 as 4 elements per Vector4 and 8 per Vector256 + ref Vector256 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); + ref Vector256 destinationLast = ref Unsafe.Add(ref destinationBase, (IntPtr)((uint)destination.Length / 2u)); + + ref Vector256 backgroundBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(background)); + ref Vector256 sourceBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(source)); + ref float amountBase = ref MemoryMarshal.GetReference(amount); + + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) + { + // TODO: It would be better if we can clamp this outside of the loop using our SIMD methods. + Vector256 opacity = Vector256.Create(Numerics.Clamp(amountBase, 0, 1F)); + + destinationBase = PorterDuffFunctions.SubtractDestIn(backgroundBase, sourceBase, opacity); + destinationBase = ref Unsafe.Add(ref destinationBase, 1); + backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); + sourceBase = ref Unsafe.Add(ref sourceBase, 1); + amountBase = ref Unsafe.Add(ref amountBase, 1); + } + + if (Numerics.Modulo2(destination.Length) != 0) + { + // Vector4 fits neatly in pairs. Any overlap has to be equal to 1. + int i = destination.Length - 1; + destination[i] = PorterDuffFunctions.SubtractDestIn(background[i], source[i], Numerics.Clamp(amount[i], 0, 1F)); + } + } + else { - destination[i] = PorterDuffFunctions.SubtractDestIn(background[i], source[i], Numerics.Clamp(amount[i], 0, 1)); + for (int i = 0; i < destination.Length; i++) + { + destination[i] = PorterDuffFunctions.SubtractDestIn(background[i], source[i], Numerics.Clamp(amount[i], 0, 1F)); + } } } } @@ -2931,18 +7571,79 @@ internal static class DefaultPixelBlenders protected override void BlendFunction(Span destination, ReadOnlySpan background, ReadOnlySpan source, float amount) { amount = Numerics.Clamp(amount, 0, 1); - for (int i = 0; i < destination.Length; i++) + + if (Avx2.IsSupported && destination.Length >= 2) + { + // Divide by 2 as 4 elements per Vector4 and 8 per Vector256 + ref Vector256 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); + ref Vector256 destinationLast = ref Unsafe.Add(ref destinationBase, (IntPtr)((uint)destination.Length / 2u)); + + ref Vector256 backgroundBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(background)); + ref Vector256 sourceBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(source)); + Vector256 opacity = Vector256.Create(amount); + + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) + { + destinationBase = PorterDuffFunctions.ScreenDestIn(backgroundBase, sourceBase, opacity); + destinationBase = ref Unsafe.Add(ref destinationBase, 1); + backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); + sourceBase = ref Unsafe.Add(ref sourceBase, 1); + } + + if (Numerics.Modulo2(destination.Length) != 0) + { + // Vector4 fits neatly in pairs. Any overlap has to be equal to 1. + int i = destination.Length - 1; + destination[i] = PorterDuffFunctions.ScreenDestIn(background[i], source[i], amount); + } + } + else { - destination[i] = PorterDuffFunctions.ScreenDestIn(background[i], source[i], amount); + for (int i = 0; i < destination.Length; i++) + { + destination[i] = PorterDuffFunctions.ScreenDestIn(background[i], source[i], amount); + } } } /// protected override void BlendFunction(Span destination, ReadOnlySpan background, ReadOnlySpan source, ReadOnlySpan amount) { - for (int i = 0; i < destination.Length; i++) + if (Avx2.IsSupported && destination.Length >= 2) + { + // Divide by 2 as 4 elements per Vector4 and 8 per Vector256 + ref Vector256 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); + ref Vector256 destinationLast = ref Unsafe.Add(ref destinationBase, (IntPtr)((uint)destination.Length / 2u)); + + ref Vector256 backgroundBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(background)); + ref Vector256 sourceBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(source)); + ref float amountBase = ref MemoryMarshal.GetReference(amount); + + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) + { + // TODO: It would be better if we can clamp this outside of the loop using our SIMD methods. + Vector256 opacity = Vector256.Create(Numerics.Clamp(amountBase, 0, 1F)); + + destinationBase = PorterDuffFunctions.ScreenDestIn(backgroundBase, sourceBase, opacity); + destinationBase = ref Unsafe.Add(ref destinationBase, 1); + backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); + sourceBase = ref Unsafe.Add(ref sourceBase, 1); + amountBase = ref Unsafe.Add(ref amountBase, 1); + } + + if (Numerics.Modulo2(destination.Length) != 0) + { + // Vector4 fits neatly in pairs. Any overlap has to be equal to 1. + int i = destination.Length - 1; + destination[i] = PorterDuffFunctions.ScreenDestIn(background[i], source[i], Numerics.Clamp(amount[i], 0, 1F)); + } + } + else { - destination[i] = PorterDuffFunctions.ScreenDestIn(background[i], source[i], Numerics.Clamp(amount[i], 0, 1)); + for (int i = 0; i < destination.Length; i++) + { + destination[i] = PorterDuffFunctions.ScreenDestIn(background[i], source[i], Numerics.Clamp(amount[i], 0, 1F)); + } } } } @@ -2969,18 +7670,79 @@ internal static class DefaultPixelBlenders protected override void BlendFunction(Span destination, ReadOnlySpan background, ReadOnlySpan source, float amount) { amount = Numerics.Clamp(amount, 0, 1); - for (int i = 0; i < destination.Length; i++) + + if (Avx2.IsSupported && destination.Length >= 2) + { + // Divide by 2 as 4 elements per Vector4 and 8 per Vector256 + ref Vector256 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); + ref Vector256 destinationLast = ref Unsafe.Add(ref destinationBase, (IntPtr)((uint)destination.Length / 2u)); + + ref Vector256 backgroundBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(background)); + ref Vector256 sourceBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(source)); + Vector256 opacity = Vector256.Create(amount); + + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) + { + destinationBase = PorterDuffFunctions.DarkenDestIn(backgroundBase, sourceBase, opacity); + destinationBase = ref Unsafe.Add(ref destinationBase, 1); + backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); + sourceBase = ref Unsafe.Add(ref sourceBase, 1); + } + + if (Numerics.Modulo2(destination.Length) != 0) + { + // Vector4 fits neatly in pairs. Any overlap has to be equal to 1. + int i = destination.Length - 1; + destination[i] = PorterDuffFunctions.DarkenDestIn(background[i], source[i], amount); + } + } + else { - destination[i] = PorterDuffFunctions.DarkenDestIn(background[i], source[i], amount); + for (int i = 0; i < destination.Length; i++) + { + destination[i] = PorterDuffFunctions.DarkenDestIn(background[i], source[i], amount); + } } } /// protected override void BlendFunction(Span destination, ReadOnlySpan background, ReadOnlySpan source, ReadOnlySpan amount) { - for (int i = 0; i < destination.Length; i++) + if (Avx2.IsSupported && destination.Length >= 2) + { + // Divide by 2 as 4 elements per Vector4 and 8 per Vector256 + ref Vector256 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); + ref Vector256 destinationLast = ref Unsafe.Add(ref destinationBase, (IntPtr)((uint)destination.Length / 2u)); + + ref Vector256 backgroundBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(background)); + ref Vector256 sourceBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(source)); + ref float amountBase = ref MemoryMarshal.GetReference(amount); + + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) + { + // TODO: It would be better if we can clamp this outside of the loop using our SIMD methods. + Vector256 opacity = Vector256.Create(Numerics.Clamp(amountBase, 0, 1F)); + + destinationBase = PorterDuffFunctions.DarkenDestIn(backgroundBase, sourceBase, opacity); + destinationBase = ref Unsafe.Add(ref destinationBase, 1); + backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); + sourceBase = ref Unsafe.Add(ref sourceBase, 1); + amountBase = ref Unsafe.Add(ref amountBase, 1); + } + + if (Numerics.Modulo2(destination.Length) != 0) + { + // Vector4 fits neatly in pairs. Any overlap has to be equal to 1. + int i = destination.Length - 1; + destination[i] = PorterDuffFunctions.DarkenDestIn(background[i], source[i], Numerics.Clamp(amount[i], 0, 1F)); + } + } + else { - destination[i] = PorterDuffFunctions.DarkenDestIn(background[i], source[i], Numerics.Clamp(amount[i], 0, 1)); + for (int i = 0; i < destination.Length; i++) + { + destination[i] = PorterDuffFunctions.DarkenDestIn(background[i], source[i], Numerics.Clamp(amount[i], 0, 1F)); + } } } } @@ -3007,18 +7769,79 @@ internal static class DefaultPixelBlenders protected override void BlendFunction(Span destination, ReadOnlySpan background, ReadOnlySpan source, float amount) { amount = Numerics.Clamp(amount, 0, 1); - for (int i = 0; i < destination.Length; i++) + + if (Avx2.IsSupported && destination.Length >= 2) + { + // Divide by 2 as 4 elements per Vector4 and 8 per Vector256 + ref Vector256 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); + ref Vector256 destinationLast = ref Unsafe.Add(ref destinationBase, (IntPtr)((uint)destination.Length / 2u)); + + ref Vector256 backgroundBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(background)); + ref Vector256 sourceBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(source)); + Vector256 opacity = Vector256.Create(amount); + + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) + { + destinationBase = PorterDuffFunctions.LightenDestIn(backgroundBase, sourceBase, opacity); + destinationBase = ref Unsafe.Add(ref destinationBase, 1); + backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); + sourceBase = ref Unsafe.Add(ref sourceBase, 1); + } + + if (Numerics.Modulo2(destination.Length) != 0) + { + // Vector4 fits neatly in pairs. Any overlap has to be equal to 1. + int i = destination.Length - 1; + destination[i] = PorterDuffFunctions.LightenDestIn(background[i], source[i], amount); + } + } + else { - destination[i] = PorterDuffFunctions.LightenDestIn(background[i], source[i], amount); + for (int i = 0; i < destination.Length; i++) + { + destination[i] = PorterDuffFunctions.LightenDestIn(background[i], source[i], amount); + } } } /// protected override void BlendFunction(Span destination, ReadOnlySpan background, ReadOnlySpan source, ReadOnlySpan amount) { - for (int i = 0; i < destination.Length; i++) + if (Avx2.IsSupported && destination.Length >= 2) + { + // Divide by 2 as 4 elements per Vector4 and 8 per Vector256 + ref Vector256 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); + ref Vector256 destinationLast = ref Unsafe.Add(ref destinationBase, (IntPtr)((uint)destination.Length / 2u)); + + ref Vector256 backgroundBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(background)); + ref Vector256 sourceBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(source)); + ref float amountBase = ref MemoryMarshal.GetReference(amount); + + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) + { + // TODO: It would be better if we can clamp this outside of the loop using our SIMD methods. + Vector256 opacity = Vector256.Create(Numerics.Clamp(amountBase, 0, 1F)); + + destinationBase = PorterDuffFunctions.LightenDestIn(backgroundBase, sourceBase, opacity); + destinationBase = ref Unsafe.Add(ref destinationBase, 1); + backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); + sourceBase = ref Unsafe.Add(ref sourceBase, 1); + amountBase = ref Unsafe.Add(ref amountBase, 1); + } + + if (Numerics.Modulo2(destination.Length) != 0) + { + // Vector4 fits neatly in pairs. Any overlap has to be equal to 1. + int i = destination.Length - 1; + destination[i] = PorterDuffFunctions.LightenDestIn(background[i], source[i], Numerics.Clamp(amount[i], 0, 1F)); + } + } + else { - destination[i] = PorterDuffFunctions.LightenDestIn(background[i], source[i], Numerics.Clamp(amount[i], 0, 1)); + for (int i = 0; i < destination.Length; i++) + { + destination[i] = PorterDuffFunctions.LightenDestIn(background[i], source[i], Numerics.Clamp(amount[i], 0, 1F)); + } } } } @@ -3045,18 +7868,79 @@ internal static class DefaultPixelBlenders protected override void BlendFunction(Span destination, ReadOnlySpan background, ReadOnlySpan source, float amount) { amount = Numerics.Clamp(amount, 0, 1); - for (int i = 0; i < destination.Length; i++) + + if (Avx2.IsSupported && destination.Length >= 2) + { + // Divide by 2 as 4 elements per Vector4 and 8 per Vector256 + ref Vector256 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); + ref Vector256 destinationLast = ref Unsafe.Add(ref destinationBase, (IntPtr)((uint)destination.Length / 2u)); + + ref Vector256 backgroundBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(background)); + ref Vector256 sourceBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(source)); + Vector256 opacity = Vector256.Create(amount); + + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) + { + destinationBase = PorterDuffFunctions.OverlayDestIn(backgroundBase, sourceBase, opacity); + destinationBase = ref Unsafe.Add(ref destinationBase, 1); + backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); + sourceBase = ref Unsafe.Add(ref sourceBase, 1); + } + + if (Numerics.Modulo2(destination.Length) != 0) + { + // Vector4 fits neatly in pairs. Any overlap has to be equal to 1. + int i = destination.Length - 1; + destination[i] = PorterDuffFunctions.OverlayDestIn(background[i], source[i], amount); + } + } + else { - destination[i] = PorterDuffFunctions.OverlayDestIn(background[i], source[i], amount); + for (int i = 0; i < destination.Length; i++) + { + destination[i] = PorterDuffFunctions.OverlayDestIn(background[i], source[i], amount); + } } } /// protected override void BlendFunction(Span destination, ReadOnlySpan background, ReadOnlySpan source, ReadOnlySpan amount) { - for (int i = 0; i < destination.Length; i++) + if (Avx2.IsSupported && destination.Length >= 2) + { + // Divide by 2 as 4 elements per Vector4 and 8 per Vector256 + ref Vector256 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); + ref Vector256 destinationLast = ref Unsafe.Add(ref destinationBase, (IntPtr)((uint)destination.Length / 2u)); + + ref Vector256 backgroundBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(background)); + ref Vector256 sourceBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(source)); + ref float amountBase = ref MemoryMarshal.GetReference(amount); + + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) + { + // TODO: It would be better if we can clamp this outside of the loop using our SIMD methods. + Vector256 opacity = Vector256.Create(Numerics.Clamp(amountBase, 0, 1F)); + + destinationBase = PorterDuffFunctions.OverlayDestIn(backgroundBase, sourceBase, opacity); + destinationBase = ref Unsafe.Add(ref destinationBase, 1); + backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); + sourceBase = ref Unsafe.Add(ref sourceBase, 1); + amountBase = ref Unsafe.Add(ref amountBase, 1); + } + + if (Numerics.Modulo2(destination.Length) != 0) + { + // Vector4 fits neatly in pairs. Any overlap has to be equal to 1. + int i = destination.Length - 1; + destination[i] = PorterDuffFunctions.OverlayDestIn(background[i], source[i], Numerics.Clamp(amount[i], 0, 1F)); + } + } + else { - destination[i] = PorterDuffFunctions.OverlayDestIn(background[i], source[i], Numerics.Clamp(amount[i], 0, 1)); + for (int i = 0; i < destination.Length; i++) + { + destination[i] = PorterDuffFunctions.OverlayDestIn(background[i], source[i], Numerics.Clamp(amount[i], 0, 1F)); + } } } } @@ -3083,18 +7967,79 @@ internal static class DefaultPixelBlenders protected override void BlendFunction(Span destination, ReadOnlySpan background, ReadOnlySpan source, float amount) { amount = Numerics.Clamp(amount, 0, 1); - for (int i = 0; i < destination.Length; i++) + + if (Avx2.IsSupported && destination.Length >= 2) + { + // Divide by 2 as 4 elements per Vector4 and 8 per Vector256 + ref Vector256 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); + ref Vector256 destinationLast = ref Unsafe.Add(ref destinationBase, (IntPtr)((uint)destination.Length / 2u)); + + ref Vector256 backgroundBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(background)); + ref Vector256 sourceBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(source)); + Vector256 opacity = Vector256.Create(amount); + + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) + { + destinationBase = PorterDuffFunctions.HardLightDestIn(backgroundBase, sourceBase, opacity); + destinationBase = ref Unsafe.Add(ref destinationBase, 1); + backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); + sourceBase = ref Unsafe.Add(ref sourceBase, 1); + } + + if (Numerics.Modulo2(destination.Length) != 0) + { + // Vector4 fits neatly in pairs. Any overlap has to be equal to 1. + int i = destination.Length - 1; + destination[i] = PorterDuffFunctions.HardLightDestIn(background[i], source[i], amount); + } + } + else { - destination[i] = PorterDuffFunctions.HardLightDestIn(background[i], source[i], amount); + for (int i = 0; i < destination.Length; i++) + { + destination[i] = PorterDuffFunctions.HardLightDestIn(background[i], source[i], amount); + } } } /// protected override void BlendFunction(Span destination, ReadOnlySpan background, ReadOnlySpan source, ReadOnlySpan amount) { - for (int i = 0; i < destination.Length; i++) + if (Avx2.IsSupported && destination.Length >= 2) + { + // Divide by 2 as 4 elements per Vector4 and 8 per Vector256 + ref Vector256 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); + ref Vector256 destinationLast = ref Unsafe.Add(ref destinationBase, (IntPtr)((uint)destination.Length / 2u)); + + ref Vector256 backgroundBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(background)); + ref Vector256 sourceBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(source)); + ref float amountBase = ref MemoryMarshal.GetReference(amount); + + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) + { + // TODO: It would be better if we can clamp this outside of the loop using our SIMD methods. + Vector256 opacity = Vector256.Create(Numerics.Clamp(amountBase, 0, 1F)); + + destinationBase = PorterDuffFunctions.HardLightDestIn(backgroundBase, sourceBase, opacity); + destinationBase = ref Unsafe.Add(ref destinationBase, 1); + backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); + sourceBase = ref Unsafe.Add(ref sourceBase, 1); + amountBase = ref Unsafe.Add(ref amountBase, 1); + } + + if (Numerics.Modulo2(destination.Length) != 0) + { + // Vector4 fits neatly in pairs. Any overlap has to be equal to 1. + int i = destination.Length - 1; + destination[i] = PorterDuffFunctions.HardLightDestIn(background[i], source[i], Numerics.Clamp(amount[i], 0, 1F)); + } + } + else { - destination[i] = PorterDuffFunctions.HardLightDestIn(background[i], source[i], Numerics.Clamp(amount[i], 0, 1)); + for (int i = 0; i < destination.Length; i++) + { + destination[i] = PorterDuffFunctions.HardLightDestIn(background[i], source[i], Numerics.Clamp(amount[i], 0, 1F)); + } } } } @@ -3121,18 +8066,79 @@ internal static class DefaultPixelBlenders protected override void BlendFunction(Span destination, ReadOnlySpan background, ReadOnlySpan source, float amount) { amount = Numerics.Clamp(amount, 0, 1); - for (int i = 0; i < destination.Length; i++) + + if (Avx2.IsSupported && destination.Length >= 2) + { + // Divide by 2 as 4 elements per Vector4 and 8 per Vector256 + ref Vector256 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); + ref Vector256 destinationLast = ref Unsafe.Add(ref destinationBase, (IntPtr)((uint)destination.Length / 2u)); + + ref Vector256 backgroundBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(background)); + ref Vector256 sourceBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(source)); + Vector256 opacity = Vector256.Create(amount); + + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) + { + destinationBase = PorterDuffFunctions.NormalDestOut(backgroundBase, sourceBase, opacity); + destinationBase = ref Unsafe.Add(ref destinationBase, 1); + backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); + sourceBase = ref Unsafe.Add(ref sourceBase, 1); + } + + if (Numerics.Modulo2(destination.Length) != 0) + { + // Vector4 fits neatly in pairs. Any overlap has to be equal to 1. + int i = destination.Length - 1; + destination[i] = PorterDuffFunctions.NormalDestOut(background[i], source[i], amount); + } + } + else { - destination[i] = PorterDuffFunctions.NormalDestOut(background[i], source[i], amount); + for (int i = 0; i < destination.Length; i++) + { + destination[i] = PorterDuffFunctions.NormalDestOut(background[i], source[i], amount); + } } } /// protected override void BlendFunction(Span destination, ReadOnlySpan background, ReadOnlySpan source, ReadOnlySpan amount) { - for (int i = 0; i < destination.Length; i++) + if (Avx2.IsSupported && destination.Length >= 2) + { + // Divide by 2 as 4 elements per Vector4 and 8 per Vector256 + ref Vector256 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); + ref Vector256 destinationLast = ref Unsafe.Add(ref destinationBase, (IntPtr)((uint)destination.Length / 2u)); + + ref Vector256 backgroundBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(background)); + ref Vector256 sourceBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(source)); + ref float amountBase = ref MemoryMarshal.GetReference(amount); + + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) + { + // TODO: It would be better if we can clamp this outside of the loop using our SIMD methods. + Vector256 opacity = Vector256.Create(Numerics.Clamp(amountBase, 0, 1F)); + + destinationBase = PorterDuffFunctions.NormalDestOut(backgroundBase, sourceBase, opacity); + destinationBase = ref Unsafe.Add(ref destinationBase, 1); + backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); + sourceBase = ref Unsafe.Add(ref sourceBase, 1); + amountBase = ref Unsafe.Add(ref amountBase, 1); + } + + if (Numerics.Modulo2(destination.Length) != 0) + { + // Vector4 fits neatly in pairs. Any overlap has to be equal to 1. + int i = destination.Length - 1; + destination[i] = PorterDuffFunctions.NormalDestOut(background[i], source[i], Numerics.Clamp(amount[i], 0, 1F)); + } + } + else { - destination[i] = PorterDuffFunctions.NormalDestOut(background[i], source[i], Numerics.Clamp(amount[i], 0, 1)); + for (int i = 0; i < destination.Length; i++) + { + destination[i] = PorterDuffFunctions.NormalDestOut(background[i], source[i], Numerics.Clamp(amount[i], 0, 1F)); + } } } } @@ -3159,18 +8165,79 @@ internal static class DefaultPixelBlenders protected override void BlendFunction(Span destination, ReadOnlySpan background, ReadOnlySpan source, float amount) { amount = Numerics.Clamp(amount, 0, 1); - for (int i = 0; i < destination.Length; i++) + + if (Avx2.IsSupported && destination.Length >= 2) + { + // Divide by 2 as 4 elements per Vector4 and 8 per Vector256 + ref Vector256 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); + ref Vector256 destinationLast = ref Unsafe.Add(ref destinationBase, (IntPtr)((uint)destination.Length / 2u)); + + ref Vector256 backgroundBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(background)); + ref Vector256 sourceBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(source)); + Vector256 opacity = Vector256.Create(amount); + + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) + { + destinationBase = PorterDuffFunctions.MultiplyDestOut(backgroundBase, sourceBase, opacity); + destinationBase = ref Unsafe.Add(ref destinationBase, 1); + backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); + sourceBase = ref Unsafe.Add(ref sourceBase, 1); + } + + if (Numerics.Modulo2(destination.Length) != 0) + { + // Vector4 fits neatly in pairs. Any overlap has to be equal to 1. + int i = destination.Length - 1; + destination[i] = PorterDuffFunctions.MultiplyDestOut(background[i], source[i], amount); + } + } + else { - destination[i] = PorterDuffFunctions.MultiplyDestOut(background[i], source[i], amount); + for (int i = 0; i < destination.Length; i++) + { + destination[i] = PorterDuffFunctions.MultiplyDestOut(background[i], source[i], amount); + } } } /// protected override void BlendFunction(Span destination, ReadOnlySpan background, ReadOnlySpan source, ReadOnlySpan amount) { - for (int i = 0; i < destination.Length; i++) + if (Avx2.IsSupported && destination.Length >= 2) + { + // Divide by 2 as 4 elements per Vector4 and 8 per Vector256 + ref Vector256 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); + ref Vector256 destinationLast = ref Unsafe.Add(ref destinationBase, (IntPtr)((uint)destination.Length / 2u)); + + ref Vector256 backgroundBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(background)); + ref Vector256 sourceBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(source)); + ref float amountBase = ref MemoryMarshal.GetReference(amount); + + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) + { + // TODO: It would be better if we can clamp this outside of the loop using our SIMD methods. + Vector256 opacity = Vector256.Create(Numerics.Clamp(amountBase, 0, 1F)); + + destinationBase = PorterDuffFunctions.MultiplyDestOut(backgroundBase, sourceBase, opacity); + destinationBase = ref Unsafe.Add(ref destinationBase, 1); + backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); + sourceBase = ref Unsafe.Add(ref sourceBase, 1); + amountBase = ref Unsafe.Add(ref amountBase, 1); + } + + if (Numerics.Modulo2(destination.Length) != 0) + { + // Vector4 fits neatly in pairs. Any overlap has to be equal to 1. + int i = destination.Length - 1; + destination[i] = PorterDuffFunctions.MultiplyDestOut(background[i], source[i], Numerics.Clamp(amount[i], 0, 1F)); + } + } + else { - destination[i] = PorterDuffFunctions.MultiplyDestOut(background[i], source[i], Numerics.Clamp(amount[i], 0, 1)); + for (int i = 0; i < destination.Length; i++) + { + destination[i] = PorterDuffFunctions.MultiplyDestOut(background[i], source[i], Numerics.Clamp(amount[i], 0, 1F)); + } } } } @@ -3197,18 +8264,79 @@ internal static class DefaultPixelBlenders protected override void BlendFunction(Span destination, ReadOnlySpan background, ReadOnlySpan source, float amount) { amount = Numerics.Clamp(amount, 0, 1); - for (int i = 0; i < destination.Length; i++) + + if (Avx2.IsSupported && destination.Length >= 2) + { + // Divide by 2 as 4 elements per Vector4 and 8 per Vector256 + ref Vector256 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); + ref Vector256 destinationLast = ref Unsafe.Add(ref destinationBase, (IntPtr)((uint)destination.Length / 2u)); + + ref Vector256 backgroundBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(background)); + ref Vector256 sourceBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(source)); + Vector256 opacity = Vector256.Create(amount); + + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) + { + destinationBase = PorterDuffFunctions.AddDestOut(backgroundBase, sourceBase, opacity); + destinationBase = ref Unsafe.Add(ref destinationBase, 1); + backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); + sourceBase = ref Unsafe.Add(ref sourceBase, 1); + } + + if (Numerics.Modulo2(destination.Length) != 0) + { + // Vector4 fits neatly in pairs. Any overlap has to be equal to 1. + int i = destination.Length - 1; + destination[i] = PorterDuffFunctions.AddDestOut(background[i], source[i], amount); + } + } + else { - destination[i] = PorterDuffFunctions.AddDestOut(background[i], source[i], amount); + for (int i = 0; i < destination.Length; i++) + { + destination[i] = PorterDuffFunctions.AddDestOut(background[i], source[i], amount); + } } } /// protected override void BlendFunction(Span destination, ReadOnlySpan background, ReadOnlySpan source, ReadOnlySpan amount) { - for (int i = 0; i < destination.Length; i++) + if (Avx2.IsSupported && destination.Length >= 2) + { + // Divide by 2 as 4 elements per Vector4 and 8 per Vector256 + ref Vector256 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); + ref Vector256 destinationLast = ref Unsafe.Add(ref destinationBase, (IntPtr)((uint)destination.Length / 2u)); + + ref Vector256 backgroundBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(background)); + ref Vector256 sourceBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(source)); + ref float amountBase = ref MemoryMarshal.GetReference(amount); + + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) + { + // TODO: It would be better if we can clamp this outside of the loop using our SIMD methods. + Vector256 opacity = Vector256.Create(Numerics.Clamp(amountBase, 0, 1F)); + + destinationBase = PorterDuffFunctions.AddDestOut(backgroundBase, sourceBase, opacity); + destinationBase = ref Unsafe.Add(ref destinationBase, 1); + backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); + sourceBase = ref Unsafe.Add(ref sourceBase, 1); + amountBase = ref Unsafe.Add(ref amountBase, 1); + } + + if (Numerics.Modulo2(destination.Length) != 0) + { + // Vector4 fits neatly in pairs. Any overlap has to be equal to 1. + int i = destination.Length - 1; + destination[i] = PorterDuffFunctions.AddDestOut(background[i], source[i], Numerics.Clamp(amount[i], 0, 1F)); + } + } + else { - destination[i] = PorterDuffFunctions.AddDestOut(background[i], source[i], Numerics.Clamp(amount[i], 0, 1)); + for (int i = 0; i < destination.Length; i++) + { + destination[i] = PorterDuffFunctions.AddDestOut(background[i], source[i], Numerics.Clamp(amount[i], 0, 1F)); + } } } } @@ -3235,18 +8363,79 @@ internal static class DefaultPixelBlenders protected override void BlendFunction(Span destination, ReadOnlySpan background, ReadOnlySpan source, float amount) { amount = Numerics.Clamp(amount, 0, 1); - for (int i = 0; i < destination.Length; i++) + + if (Avx2.IsSupported && destination.Length >= 2) + { + // Divide by 2 as 4 elements per Vector4 and 8 per Vector256 + ref Vector256 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); + ref Vector256 destinationLast = ref Unsafe.Add(ref destinationBase, (IntPtr)((uint)destination.Length / 2u)); + + ref Vector256 backgroundBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(background)); + ref Vector256 sourceBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(source)); + Vector256 opacity = Vector256.Create(amount); + + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) + { + destinationBase = PorterDuffFunctions.SubtractDestOut(backgroundBase, sourceBase, opacity); + destinationBase = ref Unsafe.Add(ref destinationBase, 1); + backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); + sourceBase = ref Unsafe.Add(ref sourceBase, 1); + } + + if (Numerics.Modulo2(destination.Length) != 0) + { + // Vector4 fits neatly in pairs. Any overlap has to be equal to 1. + int i = destination.Length - 1; + destination[i] = PorterDuffFunctions.SubtractDestOut(background[i], source[i], amount); + } + } + else { - destination[i] = PorterDuffFunctions.SubtractDestOut(background[i], source[i], amount); + for (int i = 0; i < destination.Length; i++) + { + destination[i] = PorterDuffFunctions.SubtractDestOut(background[i], source[i], amount); + } } } /// protected override void BlendFunction(Span destination, ReadOnlySpan background, ReadOnlySpan source, ReadOnlySpan amount) { - for (int i = 0; i < destination.Length; i++) + if (Avx2.IsSupported && destination.Length >= 2) + { + // Divide by 2 as 4 elements per Vector4 and 8 per Vector256 + ref Vector256 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); + ref Vector256 destinationLast = ref Unsafe.Add(ref destinationBase, (IntPtr)((uint)destination.Length / 2u)); + + ref Vector256 backgroundBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(background)); + ref Vector256 sourceBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(source)); + ref float amountBase = ref MemoryMarshal.GetReference(amount); + + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) + { + // TODO: It would be better if we can clamp this outside of the loop using our SIMD methods. + Vector256 opacity = Vector256.Create(Numerics.Clamp(amountBase, 0, 1F)); + + destinationBase = PorterDuffFunctions.SubtractDestOut(backgroundBase, sourceBase, opacity); + destinationBase = ref Unsafe.Add(ref destinationBase, 1); + backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); + sourceBase = ref Unsafe.Add(ref sourceBase, 1); + amountBase = ref Unsafe.Add(ref amountBase, 1); + } + + if (Numerics.Modulo2(destination.Length) != 0) + { + // Vector4 fits neatly in pairs. Any overlap has to be equal to 1. + int i = destination.Length - 1; + destination[i] = PorterDuffFunctions.SubtractDestOut(background[i], source[i], Numerics.Clamp(amount[i], 0, 1F)); + } + } + else { - destination[i] = PorterDuffFunctions.SubtractDestOut(background[i], source[i], Numerics.Clamp(amount[i], 0, 1)); + for (int i = 0; i < destination.Length; i++) + { + destination[i] = PorterDuffFunctions.SubtractDestOut(background[i], source[i], Numerics.Clamp(amount[i], 0, 1F)); + } } } } @@ -3273,18 +8462,79 @@ internal static class DefaultPixelBlenders protected override void BlendFunction(Span destination, ReadOnlySpan background, ReadOnlySpan source, float amount) { amount = Numerics.Clamp(amount, 0, 1); - for (int i = 0; i < destination.Length; i++) + + if (Avx2.IsSupported && destination.Length >= 2) + { + // Divide by 2 as 4 elements per Vector4 and 8 per Vector256 + ref Vector256 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); + ref Vector256 destinationLast = ref Unsafe.Add(ref destinationBase, (IntPtr)((uint)destination.Length / 2u)); + + ref Vector256 backgroundBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(background)); + ref Vector256 sourceBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(source)); + Vector256 opacity = Vector256.Create(amount); + + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) + { + destinationBase = PorterDuffFunctions.ScreenDestOut(backgroundBase, sourceBase, opacity); + destinationBase = ref Unsafe.Add(ref destinationBase, 1); + backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); + sourceBase = ref Unsafe.Add(ref sourceBase, 1); + } + + if (Numerics.Modulo2(destination.Length) != 0) + { + // Vector4 fits neatly in pairs. Any overlap has to be equal to 1. + int i = destination.Length - 1; + destination[i] = PorterDuffFunctions.ScreenDestOut(background[i], source[i], amount); + } + } + else { - destination[i] = PorterDuffFunctions.ScreenDestOut(background[i], source[i], amount); + for (int i = 0; i < destination.Length; i++) + { + destination[i] = PorterDuffFunctions.ScreenDestOut(background[i], source[i], amount); + } } } /// protected override void BlendFunction(Span destination, ReadOnlySpan background, ReadOnlySpan source, ReadOnlySpan amount) { - for (int i = 0; i < destination.Length; i++) + if (Avx2.IsSupported && destination.Length >= 2) + { + // Divide by 2 as 4 elements per Vector4 and 8 per Vector256 + ref Vector256 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); + ref Vector256 destinationLast = ref Unsafe.Add(ref destinationBase, (IntPtr)((uint)destination.Length / 2u)); + + ref Vector256 backgroundBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(background)); + ref Vector256 sourceBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(source)); + ref float amountBase = ref MemoryMarshal.GetReference(amount); + + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) + { + // TODO: It would be better if we can clamp this outside of the loop using our SIMD methods. + Vector256 opacity = Vector256.Create(Numerics.Clamp(amountBase, 0, 1F)); + + destinationBase = PorterDuffFunctions.ScreenDestOut(backgroundBase, sourceBase, opacity); + destinationBase = ref Unsafe.Add(ref destinationBase, 1); + backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); + sourceBase = ref Unsafe.Add(ref sourceBase, 1); + amountBase = ref Unsafe.Add(ref amountBase, 1); + } + + if (Numerics.Modulo2(destination.Length) != 0) + { + // Vector4 fits neatly in pairs. Any overlap has to be equal to 1. + int i = destination.Length - 1; + destination[i] = PorterDuffFunctions.ScreenDestOut(background[i], source[i], Numerics.Clamp(amount[i], 0, 1F)); + } + } + else { - destination[i] = PorterDuffFunctions.ScreenDestOut(background[i], source[i], Numerics.Clamp(amount[i], 0, 1)); + for (int i = 0; i < destination.Length; i++) + { + destination[i] = PorterDuffFunctions.ScreenDestOut(background[i], source[i], Numerics.Clamp(amount[i], 0, 1F)); + } } } } @@ -3311,18 +8561,79 @@ internal static class DefaultPixelBlenders protected override void BlendFunction(Span destination, ReadOnlySpan background, ReadOnlySpan source, float amount) { amount = Numerics.Clamp(amount, 0, 1); - for (int i = 0; i < destination.Length; i++) + + if (Avx2.IsSupported && destination.Length >= 2) + { + // Divide by 2 as 4 elements per Vector4 and 8 per Vector256 + ref Vector256 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); + ref Vector256 destinationLast = ref Unsafe.Add(ref destinationBase, (IntPtr)((uint)destination.Length / 2u)); + + ref Vector256 backgroundBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(background)); + ref Vector256 sourceBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(source)); + Vector256 opacity = Vector256.Create(amount); + + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) + { + destinationBase = PorterDuffFunctions.DarkenDestOut(backgroundBase, sourceBase, opacity); + destinationBase = ref Unsafe.Add(ref destinationBase, 1); + backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); + sourceBase = ref Unsafe.Add(ref sourceBase, 1); + } + + if (Numerics.Modulo2(destination.Length) != 0) + { + // Vector4 fits neatly in pairs. Any overlap has to be equal to 1. + int i = destination.Length - 1; + destination[i] = PorterDuffFunctions.DarkenDestOut(background[i], source[i], amount); + } + } + else { - destination[i] = PorterDuffFunctions.DarkenDestOut(background[i], source[i], amount); + for (int i = 0; i < destination.Length; i++) + { + destination[i] = PorterDuffFunctions.DarkenDestOut(background[i], source[i], amount); + } } } /// protected override void BlendFunction(Span destination, ReadOnlySpan background, ReadOnlySpan source, ReadOnlySpan amount) { - for (int i = 0; i < destination.Length; i++) + if (Avx2.IsSupported && destination.Length >= 2) + { + // Divide by 2 as 4 elements per Vector4 and 8 per Vector256 + ref Vector256 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); + ref Vector256 destinationLast = ref Unsafe.Add(ref destinationBase, (IntPtr)((uint)destination.Length / 2u)); + + ref Vector256 backgroundBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(background)); + ref Vector256 sourceBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(source)); + ref float amountBase = ref MemoryMarshal.GetReference(amount); + + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) + { + // TODO: It would be better if we can clamp this outside of the loop using our SIMD methods. + Vector256 opacity = Vector256.Create(Numerics.Clamp(amountBase, 0, 1F)); + + destinationBase = PorterDuffFunctions.DarkenDestOut(backgroundBase, sourceBase, opacity); + destinationBase = ref Unsafe.Add(ref destinationBase, 1); + backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); + sourceBase = ref Unsafe.Add(ref sourceBase, 1); + amountBase = ref Unsafe.Add(ref amountBase, 1); + } + + if (Numerics.Modulo2(destination.Length) != 0) + { + // Vector4 fits neatly in pairs. Any overlap has to be equal to 1. + int i = destination.Length - 1; + destination[i] = PorterDuffFunctions.DarkenDestOut(background[i], source[i], Numerics.Clamp(amount[i], 0, 1F)); + } + } + else { - destination[i] = PorterDuffFunctions.DarkenDestOut(background[i], source[i], Numerics.Clamp(amount[i], 0, 1)); + for (int i = 0; i < destination.Length; i++) + { + destination[i] = PorterDuffFunctions.DarkenDestOut(background[i], source[i], Numerics.Clamp(amount[i], 0, 1F)); + } } } } @@ -3349,18 +8660,79 @@ internal static class DefaultPixelBlenders protected override void BlendFunction(Span destination, ReadOnlySpan background, ReadOnlySpan source, float amount) { amount = Numerics.Clamp(amount, 0, 1); - for (int i = 0; i < destination.Length; i++) + + if (Avx2.IsSupported && destination.Length >= 2) + { + // Divide by 2 as 4 elements per Vector4 and 8 per Vector256 + ref Vector256 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); + ref Vector256 destinationLast = ref Unsafe.Add(ref destinationBase, (IntPtr)((uint)destination.Length / 2u)); + + ref Vector256 backgroundBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(background)); + ref Vector256 sourceBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(source)); + Vector256 opacity = Vector256.Create(amount); + + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) + { + destinationBase = PorterDuffFunctions.LightenDestOut(backgroundBase, sourceBase, opacity); + destinationBase = ref Unsafe.Add(ref destinationBase, 1); + backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); + sourceBase = ref Unsafe.Add(ref sourceBase, 1); + } + + if (Numerics.Modulo2(destination.Length) != 0) + { + // Vector4 fits neatly in pairs. Any overlap has to be equal to 1. + int i = destination.Length - 1; + destination[i] = PorterDuffFunctions.LightenDestOut(background[i], source[i], amount); + } + } + else { - destination[i] = PorterDuffFunctions.LightenDestOut(background[i], source[i], amount); + for (int i = 0; i < destination.Length; i++) + { + destination[i] = PorterDuffFunctions.LightenDestOut(background[i], source[i], amount); + } } } /// protected override void BlendFunction(Span destination, ReadOnlySpan background, ReadOnlySpan source, ReadOnlySpan amount) { - for (int i = 0; i < destination.Length; i++) + if (Avx2.IsSupported && destination.Length >= 2) + { + // Divide by 2 as 4 elements per Vector4 and 8 per Vector256 + ref Vector256 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); + ref Vector256 destinationLast = ref Unsafe.Add(ref destinationBase, (IntPtr)((uint)destination.Length / 2u)); + + ref Vector256 backgroundBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(background)); + ref Vector256 sourceBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(source)); + ref float amountBase = ref MemoryMarshal.GetReference(amount); + + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) + { + // TODO: It would be better if we can clamp this outside of the loop using our SIMD methods. + Vector256 opacity = Vector256.Create(Numerics.Clamp(amountBase, 0, 1F)); + + destinationBase = PorterDuffFunctions.LightenDestOut(backgroundBase, sourceBase, opacity); + destinationBase = ref Unsafe.Add(ref destinationBase, 1); + backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); + sourceBase = ref Unsafe.Add(ref sourceBase, 1); + amountBase = ref Unsafe.Add(ref amountBase, 1); + } + + if (Numerics.Modulo2(destination.Length) != 0) + { + // Vector4 fits neatly in pairs. Any overlap has to be equal to 1. + int i = destination.Length - 1; + destination[i] = PorterDuffFunctions.LightenDestOut(background[i], source[i], Numerics.Clamp(amount[i], 0, 1F)); + } + } + else { - destination[i] = PorterDuffFunctions.LightenDestOut(background[i], source[i], Numerics.Clamp(amount[i], 0, 1)); + for (int i = 0; i < destination.Length; i++) + { + destination[i] = PorterDuffFunctions.LightenDestOut(background[i], source[i], Numerics.Clamp(amount[i], 0, 1F)); + } } } } @@ -3387,18 +8759,79 @@ internal static class DefaultPixelBlenders protected override void BlendFunction(Span destination, ReadOnlySpan background, ReadOnlySpan source, float amount) { amount = Numerics.Clamp(amount, 0, 1); - for (int i = 0; i < destination.Length; i++) + + if (Avx2.IsSupported && destination.Length >= 2) + { + // Divide by 2 as 4 elements per Vector4 and 8 per Vector256 + ref Vector256 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); + ref Vector256 destinationLast = ref Unsafe.Add(ref destinationBase, (IntPtr)((uint)destination.Length / 2u)); + + ref Vector256 backgroundBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(background)); + ref Vector256 sourceBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(source)); + Vector256 opacity = Vector256.Create(amount); + + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) + { + destinationBase = PorterDuffFunctions.OverlayDestOut(backgroundBase, sourceBase, opacity); + destinationBase = ref Unsafe.Add(ref destinationBase, 1); + backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); + sourceBase = ref Unsafe.Add(ref sourceBase, 1); + } + + if (Numerics.Modulo2(destination.Length) != 0) + { + // Vector4 fits neatly in pairs. Any overlap has to be equal to 1. + int i = destination.Length - 1; + destination[i] = PorterDuffFunctions.OverlayDestOut(background[i], source[i], amount); + } + } + else { - destination[i] = PorterDuffFunctions.OverlayDestOut(background[i], source[i], amount); + for (int i = 0; i < destination.Length; i++) + { + destination[i] = PorterDuffFunctions.OverlayDestOut(background[i], source[i], amount); + } } } /// protected override void BlendFunction(Span destination, ReadOnlySpan background, ReadOnlySpan source, ReadOnlySpan amount) { - for (int i = 0; i < destination.Length; i++) + if (Avx2.IsSupported && destination.Length >= 2) + { + // Divide by 2 as 4 elements per Vector4 and 8 per Vector256 + ref Vector256 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); + ref Vector256 destinationLast = ref Unsafe.Add(ref destinationBase, (IntPtr)((uint)destination.Length / 2u)); + + ref Vector256 backgroundBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(background)); + ref Vector256 sourceBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(source)); + ref float amountBase = ref MemoryMarshal.GetReference(amount); + + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) + { + // TODO: It would be better if we can clamp this outside of the loop using our SIMD methods. + Vector256 opacity = Vector256.Create(Numerics.Clamp(amountBase, 0, 1F)); + + destinationBase = PorterDuffFunctions.OverlayDestOut(backgroundBase, sourceBase, opacity); + destinationBase = ref Unsafe.Add(ref destinationBase, 1); + backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); + sourceBase = ref Unsafe.Add(ref sourceBase, 1); + amountBase = ref Unsafe.Add(ref amountBase, 1); + } + + if (Numerics.Modulo2(destination.Length) != 0) + { + // Vector4 fits neatly in pairs. Any overlap has to be equal to 1. + int i = destination.Length - 1; + destination[i] = PorterDuffFunctions.OverlayDestOut(background[i], source[i], Numerics.Clamp(amount[i], 0, 1F)); + } + } + else { - destination[i] = PorterDuffFunctions.OverlayDestOut(background[i], source[i], Numerics.Clamp(amount[i], 0, 1)); + for (int i = 0; i < destination.Length; i++) + { + destination[i] = PorterDuffFunctions.OverlayDestOut(background[i], source[i], Numerics.Clamp(amount[i], 0, 1F)); + } } } } @@ -3425,18 +8858,79 @@ internal static class DefaultPixelBlenders protected override void BlendFunction(Span destination, ReadOnlySpan background, ReadOnlySpan source, float amount) { amount = Numerics.Clamp(amount, 0, 1); - for (int i = 0; i < destination.Length; i++) + + if (Avx2.IsSupported && destination.Length >= 2) + { + // Divide by 2 as 4 elements per Vector4 and 8 per Vector256 + ref Vector256 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); + ref Vector256 destinationLast = ref Unsafe.Add(ref destinationBase, (IntPtr)((uint)destination.Length / 2u)); + + ref Vector256 backgroundBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(background)); + ref Vector256 sourceBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(source)); + Vector256 opacity = Vector256.Create(amount); + + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) + { + destinationBase = PorterDuffFunctions.HardLightDestOut(backgroundBase, sourceBase, opacity); + destinationBase = ref Unsafe.Add(ref destinationBase, 1); + backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); + sourceBase = ref Unsafe.Add(ref sourceBase, 1); + } + + if (Numerics.Modulo2(destination.Length) != 0) + { + // Vector4 fits neatly in pairs. Any overlap has to be equal to 1. + int i = destination.Length - 1; + destination[i] = PorterDuffFunctions.HardLightDestOut(background[i], source[i], amount); + } + } + else { - destination[i] = PorterDuffFunctions.HardLightDestOut(background[i], source[i], amount); + for (int i = 0; i < destination.Length; i++) + { + destination[i] = PorterDuffFunctions.HardLightDestOut(background[i], source[i], amount); + } } } /// protected override void BlendFunction(Span destination, ReadOnlySpan background, ReadOnlySpan source, ReadOnlySpan amount) { - for (int i = 0; i < destination.Length; i++) + if (Avx2.IsSupported && destination.Length >= 2) + { + // Divide by 2 as 4 elements per Vector4 and 8 per Vector256 + ref Vector256 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); + ref Vector256 destinationLast = ref Unsafe.Add(ref destinationBase, (IntPtr)((uint)destination.Length / 2u)); + + ref Vector256 backgroundBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(background)); + ref Vector256 sourceBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(source)); + ref float amountBase = ref MemoryMarshal.GetReference(amount); + + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) + { + // TODO: It would be better if we can clamp this outside of the loop using our SIMD methods. + Vector256 opacity = Vector256.Create(Numerics.Clamp(amountBase, 0, 1F)); + + destinationBase = PorterDuffFunctions.HardLightDestOut(backgroundBase, sourceBase, opacity); + destinationBase = ref Unsafe.Add(ref destinationBase, 1); + backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); + sourceBase = ref Unsafe.Add(ref sourceBase, 1); + amountBase = ref Unsafe.Add(ref amountBase, 1); + } + + if (Numerics.Modulo2(destination.Length) != 0) + { + // Vector4 fits neatly in pairs. Any overlap has to be equal to 1. + int i = destination.Length - 1; + destination[i] = PorterDuffFunctions.HardLightDestOut(background[i], source[i], Numerics.Clamp(amount[i], 0, 1F)); + } + } + else { - destination[i] = PorterDuffFunctions.HardLightDestOut(background[i], source[i], Numerics.Clamp(amount[i], 0, 1)); + for (int i = 0; i < destination.Length; i++) + { + destination[i] = PorterDuffFunctions.HardLightDestOut(background[i], source[i], Numerics.Clamp(amount[i], 0, 1F)); + } } } } @@ -3463,18 +8957,79 @@ internal static class DefaultPixelBlenders protected override void BlendFunction(Span destination, ReadOnlySpan background, ReadOnlySpan source, float amount) { amount = Numerics.Clamp(amount, 0, 1); - for (int i = 0; i < destination.Length; i++) + + if (Avx2.IsSupported && destination.Length >= 2) + { + // Divide by 2 as 4 elements per Vector4 and 8 per Vector256 + ref Vector256 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); + ref Vector256 destinationLast = ref Unsafe.Add(ref destinationBase, (IntPtr)((uint)destination.Length / 2u)); + + ref Vector256 backgroundBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(background)); + ref Vector256 sourceBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(source)); + Vector256 opacity = Vector256.Create(amount); + + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) + { + destinationBase = PorterDuffFunctions.NormalClear(backgroundBase, sourceBase, opacity); + destinationBase = ref Unsafe.Add(ref destinationBase, 1); + backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); + sourceBase = ref Unsafe.Add(ref sourceBase, 1); + } + + if (Numerics.Modulo2(destination.Length) != 0) + { + // Vector4 fits neatly in pairs. Any overlap has to be equal to 1. + int i = destination.Length - 1; + destination[i] = PorterDuffFunctions.NormalClear(background[i], source[i], amount); + } + } + else { - destination[i] = PorterDuffFunctions.NormalClear(background[i], source[i], amount); + for (int i = 0; i < destination.Length; i++) + { + destination[i] = PorterDuffFunctions.NormalClear(background[i], source[i], amount); + } } } /// protected override void BlendFunction(Span destination, ReadOnlySpan background, ReadOnlySpan source, ReadOnlySpan amount) { - for (int i = 0; i < destination.Length; i++) + if (Avx2.IsSupported && destination.Length >= 2) + { + // Divide by 2 as 4 elements per Vector4 and 8 per Vector256 + ref Vector256 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); + ref Vector256 destinationLast = ref Unsafe.Add(ref destinationBase, (IntPtr)((uint)destination.Length / 2u)); + + ref Vector256 backgroundBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(background)); + ref Vector256 sourceBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(source)); + ref float amountBase = ref MemoryMarshal.GetReference(amount); + + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) + { + // TODO: It would be better if we can clamp this outside of the loop using our SIMD methods. + Vector256 opacity = Vector256.Create(Numerics.Clamp(amountBase, 0, 1F)); + + destinationBase = PorterDuffFunctions.NormalClear(backgroundBase, sourceBase, opacity); + destinationBase = ref Unsafe.Add(ref destinationBase, 1); + backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); + sourceBase = ref Unsafe.Add(ref sourceBase, 1); + amountBase = ref Unsafe.Add(ref amountBase, 1); + } + + if (Numerics.Modulo2(destination.Length) != 0) + { + // Vector4 fits neatly in pairs. Any overlap has to be equal to 1. + int i = destination.Length - 1; + destination[i] = PorterDuffFunctions.NormalClear(background[i], source[i], Numerics.Clamp(amount[i], 0, 1F)); + } + } + else { - destination[i] = PorterDuffFunctions.NormalClear(background[i], source[i], Numerics.Clamp(amount[i], 0, 1)); + for (int i = 0; i < destination.Length; i++) + { + destination[i] = PorterDuffFunctions.NormalClear(background[i], source[i], Numerics.Clamp(amount[i], 0, 1F)); + } } } } @@ -3501,18 +9056,79 @@ internal static class DefaultPixelBlenders protected override void BlendFunction(Span destination, ReadOnlySpan background, ReadOnlySpan source, float amount) { amount = Numerics.Clamp(amount, 0, 1); - for (int i = 0; i < destination.Length; i++) + + if (Avx2.IsSupported && destination.Length >= 2) + { + // Divide by 2 as 4 elements per Vector4 and 8 per Vector256 + ref Vector256 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); + ref Vector256 destinationLast = ref Unsafe.Add(ref destinationBase, (IntPtr)((uint)destination.Length / 2u)); + + ref Vector256 backgroundBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(background)); + ref Vector256 sourceBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(source)); + Vector256 opacity = Vector256.Create(amount); + + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) + { + destinationBase = PorterDuffFunctions.MultiplyClear(backgroundBase, sourceBase, opacity); + destinationBase = ref Unsafe.Add(ref destinationBase, 1); + backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); + sourceBase = ref Unsafe.Add(ref sourceBase, 1); + } + + if (Numerics.Modulo2(destination.Length) != 0) + { + // Vector4 fits neatly in pairs. Any overlap has to be equal to 1. + int i = destination.Length - 1; + destination[i] = PorterDuffFunctions.MultiplyClear(background[i], source[i], amount); + } + } + else { - destination[i] = PorterDuffFunctions.MultiplyClear(background[i], source[i], amount); + for (int i = 0; i < destination.Length; i++) + { + destination[i] = PorterDuffFunctions.MultiplyClear(background[i], source[i], amount); + } } } /// protected override void BlendFunction(Span destination, ReadOnlySpan background, ReadOnlySpan source, ReadOnlySpan amount) { - for (int i = 0; i < destination.Length; i++) + if (Avx2.IsSupported && destination.Length >= 2) + { + // Divide by 2 as 4 elements per Vector4 and 8 per Vector256 + ref Vector256 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); + ref Vector256 destinationLast = ref Unsafe.Add(ref destinationBase, (IntPtr)((uint)destination.Length / 2u)); + + ref Vector256 backgroundBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(background)); + ref Vector256 sourceBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(source)); + ref float amountBase = ref MemoryMarshal.GetReference(amount); + + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) + { + // TODO: It would be better if we can clamp this outside of the loop using our SIMD methods. + Vector256 opacity = Vector256.Create(Numerics.Clamp(amountBase, 0, 1F)); + + destinationBase = PorterDuffFunctions.MultiplyClear(backgroundBase, sourceBase, opacity); + destinationBase = ref Unsafe.Add(ref destinationBase, 1); + backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); + sourceBase = ref Unsafe.Add(ref sourceBase, 1); + amountBase = ref Unsafe.Add(ref amountBase, 1); + } + + if (Numerics.Modulo2(destination.Length) != 0) + { + // Vector4 fits neatly in pairs. Any overlap has to be equal to 1. + int i = destination.Length - 1; + destination[i] = PorterDuffFunctions.MultiplyClear(background[i], source[i], Numerics.Clamp(amount[i], 0, 1F)); + } + } + else { - destination[i] = PorterDuffFunctions.MultiplyClear(background[i], source[i], Numerics.Clamp(amount[i], 0, 1)); + for (int i = 0; i < destination.Length; i++) + { + destination[i] = PorterDuffFunctions.MultiplyClear(background[i], source[i], Numerics.Clamp(amount[i], 0, 1F)); + } } } } @@ -3539,18 +9155,79 @@ internal static class DefaultPixelBlenders protected override void BlendFunction(Span destination, ReadOnlySpan background, ReadOnlySpan source, float amount) { amount = Numerics.Clamp(amount, 0, 1); - for (int i = 0; i < destination.Length; i++) + + if (Avx2.IsSupported && destination.Length >= 2) + { + // Divide by 2 as 4 elements per Vector4 and 8 per Vector256 + ref Vector256 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); + ref Vector256 destinationLast = ref Unsafe.Add(ref destinationBase, (IntPtr)((uint)destination.Length / 2u)); + + ref Vector256 backgroundBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(background)); + ref Vector256 sourceBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(source)); + Vector256 opacity = Vector256.Create(amount); + + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) + { + destinationBase = PorterDuffFunctions.AddClear(backgroundBase, sourceBase, opacity); + destinationBase = ref Unsafe.Add(ref destinationBase, 1); + backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); + sourceBase = ref Unsafe.Add(ref sourceBase, 1); + } + + if (Numerics.Modulo2(destination.Length) != 0) + { + // Vector4 fits neatly in pairs. Any overlap has to be equal to 1. + int i = destination.Length - 1; + destination[i] = PorterDuffFunctions.AddClear(background[i], source[i], amount); + } + } + else { - destination[i] = PorterDuffFunctions.AddClear(background[i], source[i], amount); + for (int i = 0; i < destination.Length; i++) + { + destination[i] = PorterDuffFunctions.AddClear(background[i], source[i], amount); + } } } /// protected override void BlendFunction(Span destination, ReadOnlySpan background, ReadOnlySpan source, ReadOnlySpan amount) { - for (int i = 0; i < destination.Length; i++) + if (Avx2.IsSupported && destination.Length >= 2) + { + // Divide by 2 as 4 elements per Vector4 and 8 per Vector256 + ref Vector256 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); + ref Vector256 destinationLast = ref Unsafe.Add(ref destinationBase, (IntPtr)((uint)destination.Length / 2u)); + + ref Vector256 backgroundBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(background)); + ref Vector256 sourceBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(source)); + ref float amountBase = ref MemoryMarshal.GetReference(amount); + + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) + { + // TODO: It would be better if we can clamp this outside of the loop using our SIMD methods. + Vector256 opacity = Vector256.Create(Numerics.Clamp(amountBase, 0, 1F)); + + destinationBase = PorterDuffFunctions.AddClear(backgroundBase, sourceBase, opacity); + destinationBase = ref Unsafe.Add(ref destinationBase, 1); + backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); + sourceBase = ref Unsafe.Add(ref sourceBase, 1); + amountBase = ref Unsafe.Add(ref amountBase, 1); + } + + if (Numerics.Modulo2(destination.Length) != 0) + { + // Vector4 fits neatly in pairs. Any overlap has to be equal to 1. + int i = destination.Length - 1; + destination[i] = PorterDuffFunctions.AddClear(background[i], source[i], Numerics.Clamp(amount[i], 0, 1F)); + } + } + else { - destination[i] = PorterDuffFunctions.AddClear(background[i], source[i], Numerics.Clamp(amount[i], 0, 1)); + for (int i = 0; i < destination.Length; i++) + { + destination[i] = PorterDuffFunctions.AddClear(background[i], source[i], Numerics.Clamp(amount[i], 0, 1F)); + } } } } @@ -3577,18 +9254,79 @@ internal static class DefaultPixelBlenders protected override void BlendFunction(Span destination, ReadOnlySpan background, ReadOnlySpan source, float amount) { amount = Numerics.Clamp(amount, 0, 1); - for (int i = 0; i < destination.Length; i++) + + if (Avx2.IsSupported && destination.Length >= 2) + { + // Divide by 2 as 4 elements per Vector4 and 8 per Vector256 + ref Vector256 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); + ref Vector256 destinationLast = ref Unsafe.Add(ref destinationBase, (IntPtr)((uint)destination.Length / 2u)); + + ref Vector256 backgroundBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(background)); + ref Vector256 sourceBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(source)); + Vector256 opacity = Vector256.Create(amount); + + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) + { + destinationBase = PorterDuffFunctions.SubtractClear(backgroundBase, sourceBase, opacity); + destinationBase = ref Unsafe.Add(ref destinationBase, 1); + backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); + sourceBase = ref Unsafe.Add(ref sourceBase, 1); + } + + if (Numerics.Modulo2(destination.Length) != 0) + { + // Vector4 fits neatly in pairs. Any overlap has to be equal to 1. + int i = destination.Length - 1; + destination[i] = PorterDuffFunctions.SubtractClear(background[i], source[i], amount); + } + } + else { - destination[i] = PorterDuffFunctions.SubtractClear(background[i], source[i], amount); + for (int i = 0; i < destination.Length; i++) + { + destination[i] = PorterDuffFunctions.SubtractClear(background[i], source[i], amount); + } } } /// protected override void BlendFunction(Span destination, ReadOnlySpan background, ReadOnlySpan source, ReadOnlySpan amount) { - for (int i = 0; i < destination.Length; i++) + if (Avx2.IsSupported && destination.Length >= 2) + { + // Divide by 2 as 4 elements per Vector4 and 8 per Vector256 + ref Vector256 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); + ref Vector256 destinationLast = ref Unsafe.Add(ref destinationBase, (IntPtr)((uint)destination.Length / 2u)); + + ref Vector256 backgroundBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(background)); + ref Vector256 sourceBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(source)); + ref float amountBase = ref MemoryMarshal.GetReference(amount); + + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) + { + // TODO: It would be better if we can clamp this outside of the loop using our SIMD methods. + Vector256 opacity = Vector256.Create(Numerics.Clamp(amountBase, 0, 1F)); + + destinationBase = PorterDuffFunctions.SubtractClear(backgroundBase, sourceBase, opacity); + destinationBase = ref Unsafe.Add(ref destinationBase, 1); + backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); + sourceBase = ref Unsafe.Add(ref sourceBase, 1); + amountBase = ref Unsafe.Add(ref amountBase, 1); + } + + if (Numerics.Modulo2(destination.Length) != 0) + { + // Vector4 fits neatly in pairs. Any overlap has to be equal to 1. + int i = destination.Length - 1; + destination[i] = PorterDuffFunctions.SubtractClear(background[i], source[i], Numerics.Clamp(amount[i], 0, 1F)); + } + } + else { - destination[i] = PorterDuffFunctions.SubtractClear(background[i], source[i], Numerics.Clamp(amount[i], 0, 1)); + for (int i = 0; i < destination.Length; i++) + { + destination[i] = PorterDuffFunctions.SubtractClear(background[i], source[i], Numerics.Clamp(amount[i], 0, 1F)); + } } } } @@ -3615,18 +9353,79 @@ internal static class DefaultPixelBlenders protected override void BlendFunction(Span destination, ReadOnlySpan background, ReadOnlySpan source, float amount) { amount = Numerics.Clamp(amount, 0, 1); - for (int i = 0; i < destination.Length; i++) + + if (Avx2.IsSupported && destination.Length >= 2) + { + // Divide by 2 as 4 elements per Vector4 and 8 per Vector256 + ref Vector256 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); + ref Vector256 destinationLast = ref Unsafe.Add(ref destinationBase, (IntPtr)((uint)destination.Length / 2u)); + + ref Vector256 backgroundBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(background)); + ref Vector256 sourceBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(source)); + Vector256 opacity = Vector256.Create(amount); + + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) + { + destinationBase = PorterDuffFunctions.ScreenClear(backgroundBase, sourceBase, opacity); + destinationBase = ref Unsafe.Add(ref destinationBase, 1); + backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); + sourceBase = ref Unsafe.Add(ref sourceBase, 1); + } + + if (Numerics.Modulo2(destination.Length) != 0) + { + // Vector4 fits neatly in pairs. Any overlap has to be equal to 1. + int i = destination.Length - 1; + destination[i] = PorterDuffFunctions.ScreenClear(background[i], source[i], amount); + } + } + else { - destination[i] = PorterDuffFunctions.ScreenClear(background[i], source[i], amount); + for (int i = 0; i < destination.Length; i++) + { + destination[i] = PorterDuffFunctions.ScreenClear(background[i], source[i], amount); + } } } /// protected override void BlendFunction(Span destination, ReadOnlySpan background, ReadOnlySpan source, ReadOnlySpan amount) { - for (int i = 0; i < destination.Length; i++) + if (Avx2.IsSupported && destination.Length >= 2) + { + // Divide by 2 as 4 elements per Vector4 and 8 per Vector256 + ref Vector256 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); + ref Vector256 destinationLast = ref Unsafe.Add(ref destinationBase, (IntPtr)((uint)destination.Length / 2u)); + + ref Vector256 backgroundBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(background)); + ref Vector256 sourceBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(source)); + ref float amountBase = ref MemoryMarshal.GetReference(amount); + + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) + { + // TODO: It would be better if we can clamp this outside of the loop using our SIMD methods. + Vector256 opacity = Vector256.Create(Numerics.Clamp(amountBase, 0, 1F)); + + destinationBase = PorterDuffFunctions.ScreenClear(backgroundBase, sourceBase, opacity); + destinationBase = ref Unsafe.Add(ref destinationBase, 1); + backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); + sourceBase = ref Unsafe.Add(ref sourceBase, 1); + amountBase = ref Unsafe.Add(ref amountBase, 1); + } + + if (Numerics.Modulo2(destination.Length) != 0) + { + // Vector4 fits neatly in pairs. Any overlap has to be equal to 1. + int i = destination.Length - 1; + destination[i] = PorterDuffFunctions.ScreenClear(background[i], source[i], Numerics.Clamp(amount[i], 0, 1F)); + } + } + else { - destination[i] = PorterDuffFunctions.ScreenClear(background[i], source[i], Numerics.Clamp(amount[i], 0, 1)); + for (int i = 0; i < destination.Length; i++) + { + destination[i] = PorterDuffFunctions.ScreenClear(background[i], source[i], Numerics.Clamp(amount[i], 0, 1F)); + } } } } @@ -3653,18 +9452,79 @@ internal static class DefaultPixelBlenders protected override void BlendFunction(Span destination, ReadOnlySpan background, ReadOnlySpan source, float amount) { amount = Numerics.Clamp(amount, 0, 1); - for (int i = 0; i < destination.Length; i++) + + if (Avx2.IsSupported && destination.Length >= 2) + { + // Divide by 2 as 4 elements per Vector4 and 8 per Vector256 + ref Vector256 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); + ref Vector256 destinationLast = ref Unsafe.Add(ref destinationBase, (IntPtr)((uint)destination.Length / 2u)); + + ref Vector256 backgroundBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(background)); + ref Vector256 sourceBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(source)); + Vector256 opacity = Vector256.Create(amount); + + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) + { + destinationBase = PorterDuffFunctions.DarkenClear(backgroundBase, sourceBase, opacity); + destinationBase = ref Unsafe.Add(ref destinationBase, 1); + backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); + sourceBase = ref Unsafe.Add(ref sourceBase, 1); + } + + if (Numerics.Modulo2(destination.Length) != 0) + { + // Vector4 fits neatly in pairs. Any overlap has to be equal to 1. + int i = destination.Length - 1; + destination[i] = PorterDuffFunctions.DarkenClear(background[i], source[i], amount); + } + } + else { - destination[i] = PorterDuffFunctions.DarkenClear(background[i], source[i], amount); + for (int i = 0; i < destination.Length; i++) + { + destination[i] = PorterDuffFunctions.DarkenClear(background[i], source[i], amount); + } } } /// protected override void BlendFunction(Span destination, ReadOnlySpan background, ReadOnlySpan source, ReadOnlySpan amount) { - for (int i = 0; i < destination.Length; i++) + if (Avx2.IsSupported && destination.Length >= 2) + { + // Divide by 2 as 4 elements per Vector4 and 8 per Vector256 + ref Vector256 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); + ref Vector256 destinationLast = ref Unsafe.Add(ref destinationBase, (IntPtr)((uint)destination.Length / 2u)); + + ref Vector256 backgroundBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(background)); + ref Vector256 sourceBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(source)); + ref float amountBase = ref MemoryMarshal.GetReference(amount); + + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) + { + // TODO: It would be better if we can clamp this outside of the loop using our SIMD methods. + Vector256 opacity = Vector256.Create(Numerics.Clamp(amountBase, 0, 1F)); + + destinationBase = PorterDuffFunctions.DarkenClear(backgroundBase, sourceBase, opacity); + destinationBase = ref Unsafe.Add(ref destinationBase, 1); + backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); + sourceBase = ref Unsafe.Add(ref sourceBase, 1); + amountBase = ref Unsafe.Add(ref amountBase, 1); + } + + if (Numerics.Modulo2(destination.Length) != 0) + { + // Vector4 fits neatly in pairs. Any overlap has to be equal to 1. + int i = destination.Length - 1; + destination[i] = PorterDuffFunctions.DarkenClear(background[i], source[i], Numerics.Clamp(amount[i], 0, 1F)); + } + } + else { - destination[i] = PorterDuffFunctions.DarkenClear(background[i], source[i], Numerics.Clamp(amount[i], 0, 1)); + for (int i = 0; i < destination.Length; i++) + { + destination[i] = PorterDuffFunctions.DarkenClear(background[i], source[i], Numerics.Clamp(amount[i], 0, 1F)); + } } } } @@ -3691,18 +9551,79 @@ internal static class DefaultPixelBlenders protected override void BlendFunction(Span destination, ReadOnlySpan background, ReadOnlySpan source, float amount) { amount = Numerics.Clamp(amount, 0, 1); - for (int i = 0; i < destination.Length; i++) + + if (Avx2.IsSupported && destination.Length >= 2) + { + // Divide by 2 as 4 elements per Vector4 and 8 per Vector256 + ref Vector256 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); + ref Vector256 destinationLast = ref Unsafe.Add(ref destinationBase, (IntPtr)((uint)destination.Length / 2u)); + + ref Vector256 backgroundBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(background)); + ref Vector256 sourceBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(source)); + Vector256 opacity = Vector256.Create(amount); + + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) + { + destinationBase = PorterDuffFunctions.LightenClear(backgroundBase, sourceBase, opacity); + destinationBase = ref Unsafe.Add(ref destinationBase, 1); + backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); + sourceBase = ref Unsafe.Add(ref sourceBase, 1); + } + + if (Numerics.Modulo2(destination.Length) != 0) + { + // Vector4 fits neatly in pairs. Any overlap has to be equal to 1. + int i = destination.Length - 1; + destination[i] = PorterDuffFunctions.LightenClear(background[i], source[i], amount); + } + } + else { - destination[i] = PorterDuffFunctions.LightenClear(background[i], source[i], amount); + for (int i = 0; i < destination.Length; i++) + { + destination[i] = PorterDuffFunctions.LightenClear(background[i], source[i], amount); + } } } /// protected override void BlendFunction(Span destination, ReadOnlySpan background, ReadOnlySpan source, ReadOnlySpan amount) { - for (int i = 0; i < destination.Length; i++) + if (Avx2.IsSupported && destination.Length >= 2) + { + // Divide by 2 as 4 elements per Vector4 and 8 per Vector256 + ref Vector256 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); + ref Vector256 destinationLast = ref Unsafe.Add(ref destinationBase, (IntPtr)((uint)destination.Length / 2u)); + + ref Vector256 backgroundBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(background)); + ref Vector256 sourceBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(source)); + ref float amountBase = ref MemoryMarshal.GetReference(amount); + + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) + { + // TODO: It would be better if we can clamp this outside of the loop using our SIMD methods. + Vector256 opacity = Vector256.Create(Numerics.Clamp(amountBase, 0, 1F)); + + destinationBase = PorterDuffFunctions.LightenClear(backgroundBase, sourceBase, opacity); + destinationBase = ref Unsafe.Add(ref destinationBase, 1); + backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); + sourceBase = ref Unsafe.Add(ref sourceBase, 1); + amountBase = ref Unsafe.Add(ref amountBase, 1); + } + + if (Numerics.Modulo2(destination.Length) != 0) + { + // Vector4 fits neatly in pairs. Any overlap has to be equal to 1. + int i = destination.Length - 1; + destination[i] = PorterDuffFunctions.LightenClear(background[i], source[i], Numerics.Clamp(amount[i], 0, 1F)); + } + } + else { - destination[i] = PorterDuffFunctions.LightenClear(background[i], source[i], Numerics.Clamp(amount[i], 0, 1)); + for (int i = 0; i < destination.Length; i++) + { + destination[i] = PorterDuffFunctions.LightenClear(background[i], source[i], Numerics.Clamp(amount[i], 0, 1F)); + } } } } @@ -3729,18 +9650,79 @@ internal static class DefaultPixelBlenders protected override void BlendFunction(Span destination, ReadOnlySpan background, ReadOnlySpan source, float amount) { amount = Numerics.Clamp(amount, 0, 1); - for (int i = 0; i < destination.Length; i++) + + if (Avx2.IsSupported && destination.Length >= 2) + { + // Divide by 2 as 4 elements per Vector4 and 8 per Vector256 + ref Vector256 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); + ref Vector256 destinationLast = ref Unsafe.Add(ref destinationBase, (IntPtr)((uint)destination.Length / 2u)); + + ref Vector256 backgroundBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(background)); + ref Vector256 sourceBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(source)); + Vector256 opacity = Vector256.Create(amount); + + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) + { + destinationBase = PorterDuffFunctions.OverlayClear(backgroundBase, sourceBase, opacity); + destinationBase = ref Unsafe.Add(ref destinationBase, 1); + backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); + sourceBase = ref Unsafe.Add(ref sourceBase, 1); + } + + if (Numerics.Modulo2(destination.Length) != 0) + { + // Vector4 fits neatly in pairs. Any overlap has to be equal to 1. + int i = destination.Length - 1; + destination[i] = PorterDuffFunctions.OverlayClear(background[i], source[i], amount); + } + } + else { - destination[i] = PorterDuffFunctions.OverlayClear(background[i], source[i], amount); + for (int i = 0; i < destination.Length; i++) + { + destination[i] = PorterDuffFunctions.OverlayClear(background[i], source[i], amount); + } } } /// protected override void BlendFunction(Span destination, ReadOnlySpan background, ReadOnlySpan source, ReadOnlySpan amount) { - for (int i = 0; i < destination.Length; i++) + if (Avx2.IsSupported && destination.Length >= 2) + { + // Divide by 2 as 4 elements per Vector4 and 8 per Vector256 + ref Vector256 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); + ref Vector256 destinationLast = ref Unsafe.Add(ref destinationBase, (IntPtr)((uint)destination.Length / 2u)); + + ref Vector256 backgroundBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(background)); + ref Vector256 sourceBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(source)); + ref float amountBase = ref MemoryMarshal.GetReference(amount); + + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) + { + // TODO: It would be better if we can clamp this outside of the loop using our SIMD methods. + Vector256 opacity = Vector256.Create(Numerics.Clamp(amountBase, 0, 1F)); + + destinationBase = PorterDuffFunctions.OverlayClear(backgroundBase, sourceBase, opacity); + destinationBase = ref Unsafe.Add(ref destinationBase, 1); + backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); + sourceBase = ref Unsafe.Add(ref sourceBase, 1); + amountBase = ref Unsafe.Add(ref amountBase, 1); + } + + if (Numerics.Modulo2(destination.Length) != 0) + { + // Vector4 fits neatly in pairs. Any overlap has to be equal to 1. + int i = destination.Length - 1; + destination[i] = PorterDuffFunctions.OverlayClear(background[i], source[i], Numerics.Clamp(amount[i], 0, 1F)); + } + } + else { - destination[i] = PorterDuffFunctions.OverlayClear(background[i], source[i], Numerics.Clamp(amount[i], 0, 1)); + for (int i = 0; i < destination.Length; i++) + { + destination[i] = PorterDuffFunctions.OverlayClear(background[i], source[i], Numerics.Clamp(amount[i], 0, 1F)); + } } } } @@ -3767,18 +9749,79 @@ internal static class DefaultPixelBlenders protected override void BlendFunction(Span destination, ReadOnlySpan background, ReadOnlySpan source, float amount) { amount = Numerics.Clamp(amount, 0, 1); - for (int i = 0; i < destination.Length; i++) + + if (Avx2.IsSupported && destination.Length >= 2) + { + // Divide by 2 as 4 elements per Vector4 and 8 per Vector256 + ref Vector256 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); + ref Vector256 destinationLast = ref Unsafe.Add(ref destinationBase, (IntPtr)((uint)destination.Length / 2u)); + + ref Vector256 backgroundBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(background)); + ref Vector256 sourceBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(source)); + Vector256 opacity = Vector256.Create(amount); + + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) + { + destinationBase = PorterDuffFunctions.HardLightClear(backgroundBase, sourceBase, opacity); + destinationBase = ref Unsafe.Add(ref destinationBase, 1); + backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); + sourceBase = ref Unsafe.Add(ref sourceBase, 1); + } + + if (Numerics.Modulo2(destination.Length) != 0) + { + // Vector4 fits neatly in pairs. Any overlap has to be equal to 1. + int i = destination.Length - 1; + destination[i] = PorterDuffFunctions.HardLightClear(background[i], source[i], amount); + } + } + else { - destination[i] = PorterDuffFunctions.HardLightClear(background[i], source[i], amount); + for (int i = 0; i < destination.Length; i++) + { + destination[i] = PorterDuffFunctions.HardLightClear(background[i], source[i], amount); + } } } /// protected override void BlendFunction(Span destination, ReadOnlySpan background, ReadOnlySpan source, ReadOnlySpan amount) { - for (int i = 0; i < destination.Length; i++) + if (Avx2.IsSupported && destination.Length >= 2) + { + // Divide by 2 as 4 elements per Vector4 and 8 per Vector256 + ref Vector256 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); + ref Vector256 destinationLast = ref Unsafe.Add(ref destinationBase, (IntPtr)((uint)destination.Length / 2u)); + + ref Vector256 backgroundBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(background)); + ref Vector256 sourceBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(source)); + ref float amountBase = ref MemoryMarshal.GetReference(amount); + + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) + { + // TODO: It would be better if we can clamp this outside of the loop using our SIMD methods. + Vector256 opacity = Vector256.Create(Numerics.Clamp(amountBase, 0, 1F)); + + destinationBase = PorterDuffFunctions.HardLightClear(backgroundBase, sourceBase, opacity); + destinationBase = ref Unsafe.Add(ref destinationBase, 1); + backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); + sourceBase = ref Unsafe.Add(ref sourceBase, 1); + amountBase = ref Unsafe.Add(ref amountBase, 1); + } + + if (Numerics.Modulo2(destination.Length) != 0) + { + // Vector4 fits neatly in pairs. Any overlap has to be equal to 1. + int i = destination.Length - 1; + destination[i] = PorterDuffFunctions.HardLightClear(background[i], source[i], Numerics.Clamp(amount[i], 0, 1F)); + } + } + else { - destination[i] = PorterDuffFunctions.HardLightClear(background[i], source[i], Numerics.Clamp(amount[i], 0, 1)); + for (int i = 0; i < destination.Length; i++) + { + destination[i] = PorterDuffFunctions.HardLightClear(background[i], source[i], Numerics.Clamp(amount[i], 0, 1F)); + } } } } @@ -3805,18 +9848,79 @@ internal static class DefaultPixelBlenders protected override void BlendFunction(Span destination, ReadOnlySpan background, ReadOnlySpan source, float amount) { amount = Numerics.Clamp(amount, 0, 1); - for (int i = 0; i < destination.Length; i++) + + if (Avx2.IsSupported && destination.Length >= 2) + { + // Divide by 2 as 4 elements per Vector4 and 8 per Vector256 + ref Vector256 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); + ref Vector256 destinationLast = ref Unsafe.Add(ref destinationBase, (IntPtr)((uint)destination.Length / 2u)); + + ref Vector256 backgroundBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(background)); + ref Vector256 sourceBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(source)); + Vector256 opacity = Vector256.Create(amount); + + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) + { + destinationBase = PorterDuffFunctions.NormalXor(backgroundBase, sourceBase, opacity); + destinationBase = ref Unsafe.Add(ref destinationBase, 1); + backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); + sourceBase = ref Unsafe.Add(ref sourceBase, 1); + } + + if (Numerics.Modulo2(destination.Length) != 0) + { + // Vector4 fits neatly in pairs. Any overlap has to be equal to 1. + int i = destination.Length - 1; + destination[i] = PorterDuffFunctions.NormalXor(background[i], source[i], amount); + } + } + else { - destination[i] = PorterDuffFunctions.NormalXor(background[i], source[i], amount); + for (int i = 0; i < destination.Length; i++) + { + destination[i] = PorterDuffFunctions.NormalXor(background[i], source[i], amount); + } } } /// protected override void BlendFunction(Span destination, ReadOnlySpan background, ReadOnlySpan source, ReadOnlySpan amount) { - for (int i = 0; i < destination.Length; i++) + if (Avx2.IsSupported && destination.Length >= 2) + { + // Divide by 2 as 4 elements per Vector4 and 8 per Vector256 + ref Vector256 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); + ref Vector256 destinationLast = ref Unsafe.Add(ref destinationBase, (IntPtr)((uint)destination.Length / 2u)); + + ref Vector256 backgroundBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(background)); + ref Vector256 sourceBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(source)); + ref float amountBase = ref MemoryMarshal.GetReference(amount); + + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) + { + // TODO: It would be better if we can clamp this outside of the loop using our SIMD methods. + Vector256 opacity = Vector256.Create(Numerics.Clamp(amountBase, 0, 1F)); + + destinationBase = PorterDuffFunctions.NormalXor(backgroundBase, sourceBase, opacity); + destinationBase = ref Unsafe.Add(ref destinationBase, 1); + backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); + sourceBase = ref Unsafe.Add(ref sourceBase, 1); + amountBase = ref Unsafe.Add(ref amountBase, 1); + } + + if (Numerics.Modulo2(destination.Length) != 0) + { + // Vector4 fits neatly in pairs. Any overlap has to be equal to 1. + int i = destination.Length - 1; + destination[i] = PorterDuffFunctions.NormalXor(background[i], source[i], Numerics.Clamp(amount[i], 0, 1F)); + } + } + else { - destination[i] = PorterDuffFunctions.NormalXor(background[i], source[i], Numerics.Clamp(amount[i], 0, 1)); + for (int i = 0; i < destination.Length; i++) + { + destination[i] = PorterDuffFunctions.NormalXor(background[i], source[i], Numerics.Clamp(amount[i], 0, 1F)); + } } } } @@ -3843,18 +9947,79 @@ internal static class DefaultPixelBlenders protected override void BlendFunction(Span destination, ReadOnlySpan background, ReadOnlySpan source, float amount) { amount = Numerics.Clamp(amount, 0, 1); - for (int i = 0; i < destination.Length; i++) + + if (Avx2.IsSupported && destination.Length >= 2) + { + // Divide by 2 as 4 elements per Vector4 and 8 per Vector256 + ref Vector256 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); + ref Vector256 destinationLast = ref Unsafe.Add(ref destinationBase, (IntPtr)((uint)destination.Length / 2u)); + + ref Vector256 backgroundBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(background)); + ref Vector256 sourceBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(source)); + Vector256 opacity = Vector256.Create(amount); + + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) + { + destinationBase = PorterDuffFunctions.MultiplyXor(backgroundBase, sourceBase, opacity); + destinationBase = ref Unsafe.Add(ref destinationBase, 1); + backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); + sourceBase = ref Unsafe.Add(ref sourceBase, 1); + } + + if (Numerics.Modulo2(destination.Length) != 0) + { + // Vector4 fits neatly in pairs. Any overlap has to be equal to 1. + int i = destination.Length - 1; + destination[i] = PorterDuffFunctions.MultiplyXor(background[i], source[i], amount); + } + } + else { - destination[i] = PorterDuffFunctions.MultiplyXor(background[i], source[i], amount); + for (int i = 0; i < destination.Length; i++) + { + destination[i] = PorterDuffFunctions.MultiplyXor(background[i], source[i], amount); + } } } /// protected override void BlendFunction(Span destination, ReadOnlySpan background, ReadOnlySpan source, ReadOnlySpan amount) { - for (int i = 0; i < destination.Length; i++) + if (Avx2.IsSupported && destination.Length >= 2) + { + // Divide by 2 as 4 elements per Vector4 and 8 per Vector256 + ref Vector256 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); + ref Vector256 destinationLast = ref Unsafe.Add(ref destinationBase, (IntPtr)((uint)destination.Length / 2u)); + + ref Vector256 backgroundBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(background)); + ref Vector256 sourceBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(source)); + ref float amountBase = ref MemoryMarshal.GetReference(amount); + + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) + { + // TODO: It would be better if we can clamp this outside of the loop using our SIMD methods. + Vector256 opacity = Vector256.Create(Numerics.Clamp(amountBase, 0, 1F)); + + destinationBase = PorterDuffFunctions.MultiplyXor(backgroundBase, sourceBase, opacity); + destinationBase = ref Unsafe.Add(ref destinationBase, 1); + backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); + sourceBase = ref Unsafe.Add(ref sourceBase, 1); + amountBase = ref Unsafe.Add(ref amountBase, 1); + } + + if (Numerics.Modulo2(destination.Length) != 0) + { + // Vector4 fits neatly in pairs. Any overlap has to be equal to 1. + int i = destination.Length - 1; + destination[i] = PorterDuffFunctions.MultiplyXor(background[i], source[i], Numerics.Clamp(amount[i], 0, 1F)); + } + } + else { - destination[i] = PorterDuffFunctions.MultiplyXor(background[i], source[i], Numerics.Clamp(amount[i], 0, 1)); + for (int i = 0; i < destination.Length; i++) + { + destination[i] = PorterDuffFunctions.MultiplyXor(background[i], source[i], Numerics.Clamp(amount[i], 0, 1F)); + } } } } @@ -3881,18 +10046,79 @@ internal static class DefaultPixelBlenders protected override void BlendFunction(Span destination, ReadOnlySpan background, ReadOnlySpan source, float amount) { amount = Numerics.Clamp(amount, 0, 1); - for (int i = 0; i < destination.Length; i++) + + if (Avx2.IsSupported && destination.Length >= 2) + { + // Divide by 2 as 4 elements per Vector4 and 8 per Vector256 + ref Vector256 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); + ref Vector256 destinationLast = ref Unsafe.Add(ref destinationBase, (IntPtr)((uint)destination.Length / 2u)); + + ref Vector256 backgroundBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(background)); + ref Vector256 sourceBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(source)); + Vector256 opacity = Vector256.Create(amount); + + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) + { + destinationBase = PorterDuffFunctions.AddXor(backgroundBase, sourceBase, opacity); + destinationBase = ref Unsafe.Add(ref destinationBase, 1); + backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); + sourceBase = ref Unsafe.Add(ref sourceBase, 1); + } + + if (Numerics.Modulo2(destination.Length) != 0) + { + // Vector4 fits neatly in pairs. Any overlap has to be equal to 1. + int i = destination.Length - 1; + destination[i] = PorterDuffFunctions.AddXor(background[i], source[i], amount); + } + } + else { - destination[i] = PorterDuffFunctions.AddXor(background[i], source[i], amount); + for (int i = 0; i < destination.Length; i++) + { + destination[i] = PorterDuffFunctions.AddXor(background[i], source[i], amount); + } } } /// protected override void BlendFunction(Span destination, ReadOnlySpan background, ReadOnlySpan source, ReadOnlySpan amount) { - for (int i = 0; i < destination.Length; i++) + if (Avx2.IsSupported && destination.Length >= 2) + { + // Divide by 2 as 4 elements per Vector4 and 8 per Vector256 + ref Vector256 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); + ref Vector256 destinationLast = ref Unsafe.Add(ref destinationBase, (IntPtr)((uint)destination.Length / 2u)); + + ref Vector256 backgroundBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(background)); + ref Vector256 sourceBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(source)); + ref float amountBase = ref MemoryMarshal.GetReference(amount); + + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) + { + // TODO: It would be better if we can clamp this outside of the loop using our SIMD methods. + Vector256 opacity = Vector256.Create(Numerics.Clamp(amountBase, 0, 1F)); + + destinationBase = PorterDuffFunctions.AddXor(backgroundBase, sourceBase, opacity); + destinationBase = ref Unsafe.Add(ref destinationBase, 1); + backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); + sourceBase = ref Unsafe.Add(ref sourceBase, 1); + amountBase = ref Unsafe.Add(ref amountBase, 1); + } + + if (Numerics.Modulo2(destination.Length) != 0) + { + // Vector4 fits neatly in pairs. Any overlap has to be equal to 1. + int i = destination.Length - 1; + destination[i] = PorterDuffFunctions.AddXor(background[i], source[i], Numerics.Clamp(amount[i], 0, 1F)); + } + } + else { - destination[i] = PorterDuffFunctions.AddXor(background[i], source[i], Numerics.Clamp(amount[i], 0, 1)); + for (int i = 0; i < destination.Length; i++) + { + destination[i] = PorterDuffFunctions.AddXor(background[i], source[i], Numerics.Clamp(amount[i], 0, 1F)); + } } } } @@ -3919,18 +10145,79 @@ internal static class DefaultPixelBlenders protected override void BlendFunction(Span destination, ReadOnlySpan background, ReadOnlySpan source, float amount) { amount = Numerics.Clamp(amount, 0, 1); - for (int i = 0; i < destination.Length; i++) + + if (Avx2.IsSupported && destination.Length >= 2) + { + // Divide by 2 as 4 elements per Vector4 and 8 per Vector256 + ref Vector256 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); + ref Vector256 destinationLast = ref Unsafe.Add(ref destinationBase, (IntPtr)((uint)destination.Length / 2u)); + + ref Vector256 backgroundBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(background)); + ref Vector256 sourceBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(source)); + Vector256 opacity = Vector256.Create(amount); + + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) + { + destinationBase = PorterDuffFunctions.SubtractXor(backgroundBase, sourceBase, opacity); + destinationBase = ref Unsafe.Add(ref destinationBase, 1); + backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); + sourceBase = ref Unsafe.Add(ref sourceBase, 1); + } + + if (Numerics.Modulo2(destination.Length) != 0) + { + // Vector4 fits neatly in pairs. Any overlap has to be equal to 1. + int i = destination.Length - 1; + destination[i] = PorterDuffFunctions.SubtractXor(background[i], source[i], amount); + } + } + else { - destination[i] = PorterDuffFunctions.SubtractXor(background[i], source[i], amount); + for (int i = 0; i < destination.Length; i++) + { + destination[i] = PorterDuffFunctions.SubtractXor(background[i], source[i], amount); + } } } /// protected override void BlendFunction(Span destination, ReadOnlySpan background, ReadOnlySpan source, ReadOnlySpan amount) { - for (int i = 0; i < destination.Length; i++) + if (Avx2.IsSupported && destination.Length >= 2) + { + // Divide by 2 as 4 elements per Vector4 and 8 per Vector256 + ref Vector256 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); + ref Vector256 destinationLast = ref Unsafe.Add(ref destinationBase, (IntPtr)((uint)destination.Length / 2u)); + + ref Vector256 backgroundBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(background)); + ref Vector256 sourceBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(source)); + ref float amountBase = ref MemoryMarshal.GetReference(amount); + + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) + { + // TODO: It would be better if we can clamp this outside of the loop using our SIMD methods. + Vector256 opacity = Vector256.Create(Numerics.Clamp(amountBase, 0, 1F)); + + destinationBase = PorterDuffFunctions.SubtractXor(backgroundBase, sourceBase, opacity); + destinationBase = ref Unsafe.Add(ref destinationBase, 1); + backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); + sourceBase = ref Unsafe.Add(ref sourceBase, 1); + amountBase = ref Unsafe.Add(ref amountBase, 1); + } + + if (Numerics.Modulo2(destination.Length) != 0) + { + // Vector4 fits neatly in pairs. Any overlap has to be equal to 1. + int i = destination.Length - 1; + destination[i] = PorterDuffFunctions.SubtractXor(background[i], source[i], Numerics.Clamp(amount[i], 0, 1F)); + } + } + else { - destination[i] = PorterDuffFunctions.SubtractXor(background[i], source[i], Numerics.Clamp(amount[i], 0, 1)); + for (int i = 0; i < destination.Length; i++) + { + destination[i] = PorterDuffFunctions.SubtractXor(background[i], source[i], Numerics.Clamp(amount[i], 0, 1F)); + } } } } @@ -3957,18 +10244,79 @@ internal static class DefaultPixelBlenders protected override void BlendFunction(Span destination, ReadOnlySpan background, ReadOnlySpan source, float amount) { amount = Numerics.Clamp(amount, 0, 1); - for (int i = 0; i < destination.Length; i++) + + if (Avx2.IsSupported && destination.Length >= 2) + { + // Divide by 2 as 4 elements per Vector4 and 8 per Vector256 + ref Vector256 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); + ref Vector256 destinationLast = ref Unsafe.Add(ref destinationBase, (IntPtr)((uint)destination.Length / 2u)); + + ref Vector256 backgroundBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(background)); + ref Vector256 sourceBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(source)); + Vector256 opacity = Vector256.Create(amount); + + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) + { + destinationBase = PorterDuffFunctions.ScreenXor(backgroundBase, sourceBase, opacity); + destinationBase = ref Unsafe.Add(ref destinationBase, 1); + backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); + sourceBase = ref Unsafe.Add(ref sourceBase, 1); + } + + if (Numerics.Modulo2(destination.Length) != 0) + { + // Vector4 fits neatly in pairs. Any overlap has to be equal to 1. + int i = destination.Length - 1; + destination[i] = PorterDuffFunctions.ScreenXor(background[i], source[i], amount); + } + } + else { - destination[i] = PorterDuffFunctions.ScreenXor(background[i], source[i], amount); + for (int i = 0; i < destination.Length; i++) + { + destination[i] = PorterDuffFunctions.ScreenXor(background[i], source[i], amount); + } } } /// protected override void BlendFunction(Span destination, ReadOnlySpan background, ReadOnlySpan source, ReadOnlySpan amount) { - for (int i = 0; i < destination.Length; i++) + if (Avx2.IsSupported && destination.Length >= 2) + { + // Divide by 2 as 4 elements per Vector4 and 8 per Vector256 + ref Vector256 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); + ref Vector256 destinationLast = ref Unsafe.Add(ref destinationBase, (IntPtr)((uint)destination.Length / 2u)); + + ref Vector256 backgroundBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(background)); + ref Vector256 sourceBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(source)); + ref float amountBase = ref MemoryMarshal.GetReference(amount); + + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) + { + // TODO: It would be better if we can clamp this outside of the loop using our SIMD methods. + Vector256 opacity = Vector256.Create(Numerics.Clamp(amountBase, 0, 1F)); + + destinationBase = PorterDuffFunctions.ScreenXor(backgroundBase, sourceBase, opacity); + destinationBase = ref Unsafe.Add(ref destinationBase, 1); + backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); + sourceBase = ref Unsafe.Add(ref sourceBase, 1); + amountBase = ref Unsafe.Add(ref amountBase, 1); + } + + if (Numerics.Modulo2(destination.Length) != 0) + { + // Vector4 fits neatly in pairs. Any overlap has to be equal to 1. + int i = destination.Length - 1; + destination[i] = PorterDuffFunctions.ScreenXor(background[i], source[i], Numerics.Clamp(amount[i], 0, 1F)); + } + } + else { - destination[i] = PorterDuffFunctions.ScreenXor(background[i], source[i], Numerics.Clamp(amount[i], 0, 1)); + for (int i = 0; i < destination.Length; i++) + { + destination[i] = PorterDuffFunctions.ScreenXor(background[i], source[i], Numerics.Clamp(amount[i], 0, 1F)); + } } } } @@ -3995,18 +10343,79 @@ internal static class DefaultPixelBlenders protected override void BlendFunction(Span destination, ReadOnlySpan background, ReadOnlySpan source, float amount) { amount = Numerics.Clamp(amount, 0, 1); - for (int i = 0; i < destination.Length; i++) + + if (Avx2.IsSupported && destination.Length >= 2) + { + // Divide by 2 as 4 elements per Vector4 and 8 per Vector256 + ref Vector256 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); + ref Vector256 destinationLast = ref Unsafe.Add(ref destinationBase, (IntPtr)((uint)destination.Length / 2u)); + + ref Vector256 backgroundBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(background)); + ref Vector256 sourceBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(source)); + Vector256 opacity = Vector256.Create(amount); + + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) + { + destinationBase = PorterDuffFunctions.DarkenXor(backgroundBase, sourceBase, opacity); + destinationBase = ref Unsafe.Add(ref destinationBase, 1); + backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); + sourceBase = ref Unsafe.Add(ref sourceBase, 1); + } + + if (Numerics.Modulo2(destination.Length) != 0) + { + // Vector4 fits neatly in pairs. Any overlap has to be equal to 1. + int i = destination.Length - 1; + destination[i] = PorterDuffFunctions.DarkenXor(background[i], source[i], amount); + } + } + else { - destination[i] = PorterDuffFunctions.DarkenXor(background[i], source[i], amount); + for (int i = 0; i < destination.Length; i++) + { + destination[i] = PorterDuffFunctions.DarkenXor(background[i], source[i], amount); + } } } /// protected override void BlendFunction(Span destination, ReadOnlySpan background, ReadOnlySpan source, ReadOnlySpan amount) { - for (int i = 0; i < destination.Length; i++) + if (Avx2.IsSupported && destination.Length >= 2) + { + // Divide by 2 as 4 elements per Vector4 and 8 per Vector256 + ref Vector256 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); + ref Vector256 destinationLast = ref Unsafe.Add(ref destinationBase, (IntPtr)((uint)destination.Length / 2u)); + + ref Vector256 backgroundBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(background)); + ref Vector256 sourceBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(source)); + ref float amountBase = ref MemoryMarshal.GetReference(amount); + + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) + { + // TODO: It would be better if we can clamp this outside of the loop using our SIMD methods. + Vector256 opacity = Vector256.Create(Numerics.Clamp(amountBase, 0, 1F)); + + destinationBase = PorterDuffFunctions.DarkenXor(backgroundBase, sourceBase, opacity); + destinationBase = ref Unsafe.Add(ref destinationBase, 1); + backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); + sourceBase = ref Unsafe.Add(ref sourceBase, 1); + amountBase = ref Unsafe.Add(ref amountBase, 1); + } + + if (Numerics.Modulo2(destination.Length) != 0) + { + // Vector4 fits neatly in pairs. Any overlap has to be equal to 1. + int i = destination.Length - 1; + destination[i] = PorterDuffFunctions.DarkenXor(background[i], source[i], Numerics.Clamp(amount[i], 0, 1F)); + } + } + else { - destination[i] = PorterDuffFunctions.DarkenXor(background[i], source[i], Numerics.Clamp(amount[i], 0, 1)); + for (int i = 0; i < destination.Length; i++) + { + destination[i] = PorterDuffFunctions.DarkenXor(background[i], source[i], Numerics.Clamp(amount[i], 0, 1F)); + } } } } @@ -4033,18 +10442,79 @@ internal static class DefaultPixelBlenders protected override void BlendFunction(Span destination, ReadOnlySpan background, ReadOnlySpan source, float amount) { amount = Numerics.Clamp(amount, 0, 1); - for (int i = 0; i < destination.Length; i++) + + if (Avx2.IsSupported && destination.Length >= 2) + { + // Divide by 2 as 4 elements per Vector4 and 8 per Vector256 + ref Vector256 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); + ref Vector256 destinationLast = ref Unsafe.Add(ref destinationBase, (IntPtr)((uint)destination.Length / 2u)); + + ref Vector256 backgroundBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(background)); + ref Vector256 sourceBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(source)); + Vector256 opacity = Vector256.Create(amount); + + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) + { + destinationBase = PorterDuffFunctions.LightenXor(backgroundBase, sourceBase, opacity); + destinationBase = ref Unsafe.Add(ref destinationBase, 1); + backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); + sourceBase = ref Unsafe.Add(ref sourceBase, 1); + } + + if (Numerics.Modulo2(destination.Length) != 0) + { + // Vector4 fits neatly in pairs. Any overlap has to be equal to 1. + int i = destination.Length - 1; + destination[i] = PorterDuffFunctions.LightenXor(background[i], source[i], amount); + } + } + else { - destination[i] = PorterDuffFunctions.LightenXor(background[i], source[i], amount); + for (int i = 0; i < destination.Length; i++) + { + destination[i] = PorterDuffFunctions.LightenXor(background[i], source[i], amount); + } } } /// protected override void BlendFunction(Span destination, ReadOnlySpan background, ReadOnlySpan source, ReadOnlySpan amount) { - for (int i = 0; i < destination.Length; i++) + if (Avx2.IsSupported && destination.Length >= 2) + { + // Divide by 2 as 4 elements per Vector4 and 8 per Vector256 + ref Vector256 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); + ref Vector256 destinationLast = ref Unsafe.Add(ref destinationBase, (IntPtr)((uint)destination.Length / 2u)); + + ref Vector256 backgroundBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(background)); + ref Vector256 sourceBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(source)); + ref float amountBase = ref MemoryMarshal.GetReference(amount); + + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) + { + // TODO: It would be better if we can clamp this outside of the loop using our SIMD methods. + Vector256 opacity = Vector256.Create(Numerics.Clamp(amountBase, 0, 1F)); + + destinationBase = PorterDuffFunctions.LightenXor(backgroundBase, sourceBase, opacity); + destinationBase = ref Unsafe.Add(ref destinationBase, 1); + backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); + sourceBase = ref Unsafe.Add(ref sourceBase, 1); + amountBase = ref Unsafe.Add(ref amountBase, 1); + } + + if (Numerics.Modulo2(destination.Length) != 0) + { + // Vector4 fits neatly in pairs. Any overlap has to be equal to 1. + int i = destination.Length - 1; + destination[i] = PorterDuffFunctions.LightenXor(background[i], source[i], Numerics.Clamp(amount[i], 0, 1F)); + } + } + else { - destination[i] = PorterDuffFunctions.LightenXor(background[i], source[i], Numerics.Clamp(amount[i], 0, 1)); + for (int i = 0; i < destination.Length; i++) + { + destination[i] = PorterDuffFunctions.LightenXor(background[i], source[i], Numerics.Clamp(amount[i], 0, 1F)); + } } } } @@ -4071,18 +10541,79 @@ internal static class DefaultPixelBlenders protected override void BlendFunction(Span destination, ReadOnlySpan background, ReadOnlySpan source, float amount) { amount = Numerics.Clamp(amount, 0, 1); - for (int i = 0; i < destination.Length; i++) + + if (Avx2.IsSupported && destination.Length >= 2) + { + // Divide by 2 as 4 elements per Vector4 and 8 per Vector256 + ref Vector256 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); + ref Vector256 destinationLast = ref Unsafe.Add(ref destinationBase, (IntPtr)((uint)destination.Length / 2u)); + + ref Vector256 backgroundBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(background)); + ref Vector256 sourceBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(source)); + Vector256 opacity = Vector256.Create(amount); + + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) + { + destinationBase = PorterDuffFunctions.OverlayXor(backgroundBase, sourceBase, opacity); + destinationBase = ref Unsafe.Add(ref destinationBase, 1); + backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); + sourceBase = ref Unsafe.Add(ref sourceBase, 1); + } + + if (Numerics.Modulo2(destination.Length) != 0) + { + // Vector4 fits neatly in pairs. Any overlap has to be equal to 1. + int i = destination.Length - 1; + destination[i] = PorterDuffFunctions.OverlayXor(background[i], source[i], amount); + } + } + else { - destination[i] = PorterDuffFunctions.OverlayXor(background[i], source[i], amount); + for (int i = 0; i < destination.Length; i++) + { + destination[i] = PorterDuffFunctions.OverlayXor(background[i], source[i], amount); + } } } /// protected override void BlendFunction(Span destination, ReadOnlySpan background, ReadOnlySpan source, ReadOnlySpan amount) { - for (int i = 0; i < destination.Length; i++) + if (Avx2.IsSupported && destination.Length >= 2) + { + // Divide by 2 as 4 elements per Vector4 and 8 per Vector256 + ref Vector256 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); + ref Vector256 destinationLast = ref Unsafe.Add(ref destinationBase, (IntPtr)((uint)destination.Length / 2u)); + + ref Vector256 backgroundBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(background)); + ref Vector256 sourceBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(source)); + ref float amountBase = ref MemoryMarshal.GetReference(amount); + + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) + { + // TODO: It would be better if we can clamp this outside of the loop using our SIMD methods. + Vector256 opacity = Vector256.Create(Numerics.Clamp(amountBase, 0, 1F)); + + destinationBase = PorterDuffFunctions.OverlayXor(backgroundBase, sourceBase, opacity); + destinationBase = ref Unsafe.Add(ref destinationBase, 1); + backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); + sourceBase = ref Unsafe.Add(ref sourceBase, 1); + amountBase = ref Unsafe.Add(ref amountBase, 1); + } + + if (Numerics.Modulo2(destination.Length) != 0) + { + // Vector4 fits neatly in pairs. Any overlap has to be equal to 1. + int i = destination.Length - 1; + destination[i] = PorterDuffFunctions.OverlayXor(background[i], source[i], Numerics.Clamp(amount[i], 0, 1F)); + } + } + else { - destination[i] = PorterDuffFunctions.OverlayXor(background[i], source[i], Numerics.Clamp(amount[i], 0, 1)); + for (int i = 0; i < destination.Length; i++) + { + destination[i] = PorterDuffFunctions.OverlayXor(background[i], source[i], Numerics.Clamp(amount[i], 0, 1F)); + } } } } @@ -4109,18 +10640,79 @@ internal static class DefaultPixelBlenders protected override void BlendFunction(Span destination, ReadOnlySpan background, ReadOnlySpan source, float amount) { amount = Numerics.Clamp(amount, 0, 1); - for (int i = 0; i < destination.Length; i++) + + if (Avx2.IsSupported && destination.Length >= 2) + { + // Divide by 2 as 4 elements per Vector4 and 8 per Vector256 + ref Vector256 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); + ref Vector256 destinationLast = ref Unsafe.Add(ref destinationBase, (IntPtr)((uint)destination.Length / 2u)); + + ref Vector256 backgroundBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(background)); + ref Vector256 sourceBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(source)); + Vector256 opacity = Vector256.Create(amount); + + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) + { + destinationBase = PorterDuffFunctions.HardLightXor(backgroundBase, sourceBase, opacity); + destinationBase = ref Unsafe.Add(ref destinationBase, 1); + backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); + sourceBase = ref Unsafe.Add(ref sourceBase, 1); + } + + if (Numerics.Modulo2(destination.Length) != 0) + { + // Vector4 fits neatly in pairs. Any overlap has to be equal to 1. + int i = destination.Length - 1; + destination[i] = PorterDuffFunctions.HardLightXor(background[i], source[i], amount); + } + } + else { - destination[i] = PorterDuffFunctions.HardLightXor(background[i], source[i], amount); + for (int i = 0; i < destination.Length; i++) + { + destination[i] = PorterDuffFunctions.HardLightXor(background[i], source[i], amount); + } } } /// protected override void BlendFunction(Span destination, ReadOnlySpan background, ReadOnlySpan source, ReadOnlySpan amount) { - for (int i = 0; i < destination.Length; i++) - { - destination[i] = PorterDuffFunctions.HardLightXor(background[i], source[i], Numerics.Clamp(amount[i], 0, 1)); + if (Avx2.IsSupported && destination.Length >= 2) + { + // Divide by 2 as 4 elements per Vector4 and 8 per Vector256 + ref Vector256 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); + ref Vector256 destinationLast = ref Unsafe.Add(ref destinationBase, (IntPtr)((uint)destination.Length / 2u)); + + ref Vector256 backgroundBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(background)); + ref Vector256 sourceBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(source)); + ref float amountBase = ref MemoryMarshal.GetReference(amount); + + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) + { + // TODO: It would be better if we can clamp this outside of the loop using our SIMD methods. + Vector256 opacity = Vector256.Create(Numerics.Clamp(amountBase, 0, 1F)); + + destinationBase = PorterDuffFunctions.HardLightXor(backgroundBase, sourceBase, opacity); + destinationBase = ref Unsafe.Add(ref destinationBase, 1); + backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); + sourceBase = ref Unsafe.Add(ref sourceBase, 1); + amountBase = ref Unsafe.Add(ref amountBase, 1); + } + + if (Numerics.Modulo2(destination.Length) != 0) + { + // Vector4 fits neatly in pairs. Any overlap has to be equal to 1. + int i = destination.Length - 1; + destination[i] = PorterDuffFunctions.HardLightXor(background[i], source[i], Numerics.Clamp(amount[i], 0, 1F)); + } + } + else + { + for (int i = 0; i < destination.Length; i++) + { + destination[i] = PorterDuffFunctions.HardLightXor(background[i], source[i], Numerics.Clamp(amount[i], 0, 1F)); + } } } } diff --git a/src/ImageSharp/PixelFormats/PixelBlenders/DefaultPixelBlenders.Generated.tt b/src/ImageSharp/PixelFormats/PixelBlenders/DefaultPixelBlenders.Generated.tt index 7bd51439c..6d98c6cd9 100644 --- a/src/ImageSharp/PixelFormats/PixelBlenders/DefaultPixelBlenders.Generated.tt +++ b/src/ImageSharp/PixelFormats/PixelBlenders/DefaultPixelBlenders.Generated.tt @@ -13,6 +13,10 @@ // using System.Numerics; +using System.Runtime.CompilerServices; +using System.Runtime.InteropServices; +using System.Runtime.Intrinsics; +using System.Runtime.Intrinsics.X86; namespace SixLabors.ImageSharp.PixelFormats.PixelBlenders; @@ -86,18 +90,79 @@ var blenders = new []{ protected override void BlendFunction(Span destination, ReadOnlySpan background, ReadOnlySpan source, float amount) { amount = Numerics.Clamp(amount, 0, 1); - for (int i = 0; i < destination.Length; i++) + + if (Avx2.IsSupported && destination.Length >= 2) + { + // Divide by 2 as 4 elements per Vector4 and 8 per Vector256 + ref Vector256 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); + ref Vector256 destinationLast = ref Unsafe.Add(ref destinationBase, (IntPtr)((uint)destination.Length / 2u)); + + ref Vector256 backgroundBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(background)); + ref Vector256 sourceBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(source)); + Vector256 opacity = Vector256.Create(amount); + + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) + { + destinationBase = PorterDuffFunctions.<#=blender_composer#>(backgroundBase, sourceBase, opacity); + destinationBase = ref Unsafe.Add(ref destinationBase, 1); + backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); + sourceBase = ref Unsafe.Add(ref sourceBase, 1); + } + + if (Numerics.Modulo2(destination.Length) != 0) + { + // Vector4 fits neatly in pairs. Any overlap has to be equal to 1. + int i = destination.Length - 1; + destination[i] = PorterDuffFunctions.<#=blender_composer#>(background[i], source[i], amount); + } + } + else { - destination[i] = PorterDuffFunctions.<#=blender_composer#>(background[i], source[i], amount); + for (int i = 0; i < destination.Length; i++) + { + destination[i] = PorterDuffFunctions.<#=blender_composer#>(background[i], source[i], amount); + } } } /// protected override void BlendFunction(Span destination, ReadOnlySpan background, ReadOnlySpan source, ReadOnlySpan amount) { - for (int i = 0; i < destination.Length; i++) + if (Avx2.IsSupported && destination.Length >= 2) + { + // Divide by 2 as 4 elements per Vector4 and 8 per Vector256 + ref Vector256 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); + ref Vector256 destinationLast = ref Unsafe.Add(ref destinationBase, (IntPtr)((uint)destination.Length / 2u)); + + ref Vector256 backgroundBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(background)); + ref Vector256 sourceBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(source)); + ref float amountBase = ref MemoryMarshal.GetReference(amount); + + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) + { + // TODO: It would be better if we can clamp this outside of the loop using our SIMD methods. + Vector256 opacity = Vector256.Create(Numerics.Clamp(amountBase, 0, 1F)); + + destinationBase = PorterDuffFunctions.<#=blender_composer#>(backgroundBase, sourceBase, opacity); + destinationBase = ref Unsafe.Add(ref destinationBase, 1); + backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); + sourceBase = ref Unsafe.Add(ref sourceBase, 1); + amountBase = ref Unsafe.Add(ref amountBase, 1); + } + + if (Numerics.Modulo2(destination.Length) != 0) + { + // Vector4 fits neatly in pairs. Any overlap has to be equal to 1. + int i = destination.Length - 1; + destination[i] = PorterDuffFunctions.<#=blender_composer#>(background[i], source[i], Numerics.Clamp(amount[i], 0, 1F)); + } + } + else { - destination[i] = PorterDuffFunctions.<#=blender_composer#>(background[i], source[i], Numerics.Clamp(amount[i], 0, 1)); + for (int i = 0; i < destination.Length; i++) + { + destination[i] = PorterDuffFunctions.<#=blender_composer#>(background[i], source[i], Numerics.Clamp(amount[i], 0, 1F)); + } } } } From b4ff1e4db39d4be8f5699da1e72d3e79864efb95 Mon Sep 17 00:00:00 2001 From: James Jackson-South Date: Sun, 19 Feb 2023 20:16:31 +1000 Subject: [PATCH 11/22] Fix issues --- .../PixelBlenders/PorterDuffFunctions.cs | 16 +++++----- .../PixelBlenders/PorterDuffFunctionsTests.cs | 31 +++++++++++++------ 2 files changed, 30 insertions(+), 17 deletions(-) diff --git a/src/ImageSharp/PixelFormats/PixelBlenders/PorterDuffFunctions.cs b/src/ImageSharp/PixelFormats/PixelBlenders/PorterDuffFunctions.cs index fa497c872..af111849a 100644 --- a/src/ImageSharp/PixelFormats/PixelBlenders/PorterDuffFunctions.cs +++ b/src/ImageSharp/PixelFormats/PixelBlenders/PorterDuffFunctions.cs @@ -293,8 +293,8 @@ internal static partial class PorterDuffFunctions // calculate weights Vector256 sW = Avx.Permute(source, ShuffleAlphaControl); Vector256 dW = Avx.Permute(destination, ShuffleAlphaControl); - Vector256 blendW = Avx.Multiply(sW, dW); + Vector256 blendW = Avx.Multiply(sW, dW); Vector256 dstW = Avx.Subtract(dW, blendW); Vector256 srcW = Avx.Subtract(sW, blendW); @@ -303,8 +303,8 @@ internal static partial class PorterDuffFunctions // calculate final color Vector256 color = Avx.Multiply(destination, dstW); - color = SimdUtils.HwIntrinsics.MultiplyAdd(source, srcW, color); - color = SimdUtils.HwIntrinsics.MultiplyAdd(blend, blendW, color); + color = SimdUtils.HwIntrinsics.MultiplyAdd(color, source, srcW); + color = SimdUtils.HwIntrinsics.MultiplyAdd(color, blend, blendW); // unpremultiply color = Avx.Divide(color, Avx.Max(alpha, Vector256.Create(Constants.Epsilon))); @@ -349,15 +349,15 @@ internal static partial class PorterDuffFunctions public static Vector256 Atop(Vector256 destination, Vector256 source, Vector256 blend) { // calculate final alpha - Vector256 alpha = Avx.Shuffle(destination, destination, ShuffleAlphaControl); + Vector256 alpha = Avx.Permute(destination, ShuffleAlphaControl); // calculate weights - Vector256 sW = Avx.Shuffle(source, source, ShuffleAlphaControl); + Vector256 sW = Avx.Permute(source, ShuffleAlphaControl); Vector256 blendW = Avx.Multiply(sW, alpha); Vector256 dstW = Avx.Subtract(alpha, blendW); // calculate final color - Vector256 color = SimdUtils.HwIntrinsics.MultiplyAdd(destination, dstW, Avx.Multiply(blend, blendW)); + Vector256 color = SimdUtils.HwIntrinsics.MultiplyAdd(Avx.Multiply(blend, blendW), destination, dstW); // unpremultiply color = Avx.Divide(color, Avx.Max(alpha, Vector256.Create(Constants.Epsilon))); @@ -482,8 +482,8 @@ internal static partial class PorterDuffFunctions Vector256 dstW = Avx.Subtract(vOne, sW); // calculate alpha - Vector256 alpha = SimdUtils.HwIntrinsics.MultiplyAdd(sW, srcW, Avx.Multiply(dW, dstW)); - Vector256 color = SimdUtils.HwIntrinsics.MultiplyAdd(Avx.Multiply(sW, source), srcW, Avx.Multiply(Avx.Multiply(dW, destination), dstW)); + Vector256 alpha = SimdUtils.HwIntrinsics.MultiplyAdd(Avx.Multiply(dW, dstW), sW, srcW); + Vector256 color = SimdUtils.HwIntrinsics.MultiplyAdd(Avx.Multiply(Avx.Multiply(dW, destination), dstW), Avx.Multiply(sW, source), srcW); // unpremultiply color = Avx.Divide(color, Avx.Max(alpha, Vector256.Create(Constants.Epsilon))); diff --git a/tests/ImageSharp.Tests/PixelFormats/PixelBlenders/PorterDuffFunctionsTests.cs b/tests/ImageSharp.Tests/PixelFormats/PixelBlenders/PorterDuffFunctionsTests.cs index 45dece8ec..02b4b0ea5 100644 --- a/tests/ImageSharp.Tests/PixelFormats/PixelBlenders/PorterDuffFunctionsTests.cs +++ b/tests/ImageSharp.Tests/PixelFormats/PixelBlenders/PorterDuffFunctionsTests.cs @@ -2,6 +2,7 @@ // Licensed under the Six Labors Split License. using System.Numerics; +using System.Runtime.Intrinsics; using SixLabors.ImageSharp.PixelFormats.PixelBlenders; using SixLabors.ImageSharp.Tests.TestUtilities; @@ -9,7 +10,7 @@ namespace SixLabors.ImageSharp.Tests.PixelFormats.PixelBlenders; public class PorterDuffFunctionsTests { - public static TheoryData NormalBlendFunctionData = new TheoryData + public static TheoryData NormalBlendFunctionData { get; } = new() { { new TestVector4(1, 1, 1, 1), new TestVector4(1, 1, 1, 1), 1, new TestVector4(1, 1, 1, 1) }, { new TestVector4(1, 1, 1, 1), new TestVector4(0, 0, 0, .8f), .5f, new TestVector4(0.6f, 0.6f, 0.6f, 1) } @@ -23,7 +24,19 @@ public class PorterDuffFunctionsTests Assert.Equal(expected, actual); } - public static TheoryData MultiplyFunctionData = new TheoryData + [Theory] + [MemberData(nameof(NormalBlendFunctionData))] + public void NormalBlendFunction256(TestVector4 back, TestVector4 source, float amount, TestVector4 expected) + { + Vector256 back256 = Vector256.Create(back.X, back.Y, back.Z, back.W, back.X, back.Y, back.Z, back.W); + Vector256 source256 = Vector256.Create(source.X, source.Y, source.Z, source.W, source.X, source.Y, source.Z, source.W); + + Vector256 expected256 = Vector256.Create(expected.X, expected.Y, expected.Z, expected.W, expected.X, expected.Y, expected.Z, expected.W); + Vector256 actual = PorterDuffFunctions.NormalSrcOver(back256, source256, Vector256.Create(amount)); + Assert.Equal(expected256, actual); + } + + public static TheoryData MultiplyFunctionData { get; } = new() { { new TestVector4(1, 1, 1, 1), new TestVector4(1, 1, 1, 1), 1, new TestVector4(1, 1, 1, 1) }, { new TestVector4(1, 1, 1, 1), new TestVector4(0, 0, 0, .8f), .5f, new TestVector4(0.6f, 0.6f, 0.6f, 1) }, @@ -38,7 +51,7 @@ public class PorterDuffFunctionsTests VectorAssert.Equal(expected, actual, 5); } - public static TheoryData AddFunctionData = new TheoryData + public static TheoryData AddFunctionData { get; } = new() { { new TestVector4(1, 1, 1, 1), new TestVector4(1, 1, 1, 1), 1, new TestVector4(1, 1, 1, 1) }, { new TestVector4(1, 1, 1, 1), new TestVector4(0, 0, 0, .8f), .5f, new TestVector4(.6f, .6f, .6f, 1f) }, @@ -53,7 +66,7 @@ public class PorterDuffFunctionsTests VectorAssert.Equal(expected, actual, 5); } - public static TheoryData SubtractFunctionData = new TheoryData + public static TheoryData SubtractFunctionData { get; } = new() { { new TestVector4(1, 1, 1, 1), new TestVector4(1, 1, 1, 1), 1, new TestVector4(0, 0, 0, 1) }, { new TestVector4(1, 1, 1, 1), new TestVector4(0, 0, 0, .8f), .5f, new TestVector4(1, 1, 1, 1f) }, @@ -68,7 +81,7 @@ public class PorterDuffFunctionsTests VectorAssert.Equal(expected, actual, 5); } - public static TheoryData ScreenFunctionData = new TheoryData + public static TheoryData ScreenFunctionData { get; } = new() { { new TestVector4(1, 1, 1, 1), new TestVector4(1, 1, 1, 1), 1, new TestVector4(1, 1, 1, 1) }, { new TestVector4(1, 1, 1, 1), new TestVector4(0, 0, 0, .8f), .5f, new TestVector4(1, 1, 1, 1f) }, @@ -83,7 +96,7 @@ public class PorterDuffFunctionsTests VectorAssert.Equal(expected, actual, 5); } - public static TheoryData DarkenFunctionData = new TheoryData + public static TheoryData DarkenFunctionData { get; } = new() { { new TestVector4(1, 1, 1, 1), new TestVector4(1, 1, 1, 1), 1, new TestVector4(1, 1, 1, 1) }, { new TestVector4(1, 1, 1, 1), new TestVector4(0, 0, 0, .8f), .5f, new TestVector4(.6f, .6f, .6f, 1f) }, @@ -98,7 +111,7 @@ public class PorterDuffFunctionsTests VectorAssert.Equal(expected, actual, 5); } - public static TheoryData LightenFunctionData = new TheoryData + public static TheoryData LightenFunctionData { get; } = new() { { new TestVector4(1, 1, 1, 1), new TestVector4(1, 1, 1, 1), 1, new TestVector4(1, 1, 1, 1) }, { new TestVector4(1, 1, 1, 1), new TestVector4(0, 0, 0, .8f), .5f, new TestVector4(1, 1, 1, 1f) }, @@ -113,7 +126,7 @@ public class PorterDuffFunctionsTests VectorAssert.Equal(expected, actual, 5); } - public static TheoryData OverlayFunctionData = new TheoryData + public static TheoryData OverlayFunctionData { get; } = new() { { new TestVector4(1, 1, 1, 1), new TestVector4(1, 1, 1, 1), 1, new TestVector4(1, 1, 1, 1) }, { new TestVector4(1, 1, 1, 1), new TestVector4(0, 0, 0, .8f), .5f, new TestVector4(1, 1, 1, 1f) }, @@ -128,7 +141,7 @@ public class PorterDuffFunctionsTests VectorAssert.Equal(expected, actual, 5); } - public static TheoryData HardLightFunctionData = new TheoryData + public static TheoryData HardLightFunctionData { get; } = new() { { new TestVector4(1, 1, 1, 1), new TestVector4(1, 1, 1, 1), 1, new TestVector4(1, 1, 1, 1) }, { new TestVector4(1, 1, 1, 1), new TestVector4(0, 0, 0, .8f), .5f, new TestVector4(0.6f, 0.6f, 0.6f, 1f) }, From c58be60c754fc40264d9f9640d6f6160268d57ce Mon Sep 17 00:00:00 2001 From: James Jackson-South Date: Sun, 19 Feb 2023 20:46:18 +1000 Subject: [PATCH 12/22] Add additional PD tests --- .../PixelBlenders/PorterDuffFunctions.cs | 2 +- .../PixelBlenders/PorterDuffFunctionsTests.cs | 106 +++++++++++++++++- .../TestUtilities/ApproximateFloatComparer.cs | 17 ++- 3 files changed, 119 insertions(+), 6 deletions(-) diff --git a/src/ImageSharp/PixelFormats/PixelBlenders/PorterDuffFunctions.cs b/src/ImageSharp/PixelFormats/PixelBlenders/PorterDuffFunctions.cs index af111849a..d1bd5bad3 100644 --- a/src/ImageSharp/PixelFormats/PixelBlenders/PorterDuffFunctions.cs +++ b/src/ImageSharp/PixelFormats/PixelBlenders/PorterDuffFunctions.cs @@ -102,7 +102,7 @@ internal static partial class PorterDuffFunctions /// The . [MethodImpl(MethodImplOptions.AggressiveInlining)] public static Vector256 Subtract(Vector256 backdrop, Vector256 source) - => Avx.Min(Vector256.Create(1F), Avx.Subtract(backdrop, source)); + => Avx.Max(Vector256.Zero, Avx.Subtract(backdrop, source)); /// /// Returns the result of the "Screen" compositing equation. diff --git a/tests/ImageSharp.Tests/PixelFormats/PixelBlenders/PorterDuffFunctionsTests.cs b/tests/ImageSharp.Tests/PixelFormats/PixelBlenders/PorterDuffFunctionsTests.cs index 02b4b0ea5..189d21f1e 100644 --- a/tests/ImageSharp.Tests/PixelFormats/PixelBlenders/PorterDuffFunctionsTests.cs +++ b/tests/ImageSharp.Tests/PixelFormats/PixelBlenders/PorterDuffFunctionsTests.cs @@ -10,6 +10,8 @@ namespace SixLabors.ImageSharp.Tests.PixelFormats.PixelBlenders; public class PorterDuffFunctionsTests { + private static readonly ApproximateFloatComparer FloatComparer = new(.000001F); + public static TheoryData NormalBlendFunctionData { get; } = new() { { new TestVector4(1, 1, 1, 1), new TestVector4(1, 1, 1, 1), 1, new TestVector4(1, 1, 1, 1) }, @@ -33,7 +35,7 @@ public class PorterDuffFunctionsTests Vector256 expected256 = Vector256.Create(expected.X, expected.Y, expected.Z, expected.W, expected.X, expected.Y, expected.Z, expected.W); Vector256 actual = PorterDuffFunctions.NormalSrcOver(back256, source256, Vector256.Create(amount)); - Assert.Equal(expected256, actual); + Assert.Equal(expected256, actual, FloatComparer); } public static TheoryData MultiplyFunctionData { get; } = new() @@ -51,21 +53,45 @@ public class PorterDuffFunctionsTests VectorAssert.Equal(expected, actual, 5); } + [Theory] + [MemberData(nameof(MultiplyFunctionData))] + public void MultiplyFunction256(TestVector4 back, TestVector4 source, float amount, TestVector4 expected) + { + Vector256 back256 = Vector256.Create(back.X, back.Y, back.Z, back.W, back.X, back.Y, back.Z, back.W); + Vector256 source256 = Vector256.Create(source.X, source.Y, source.Z, source.W, source.X, source.Y, source.Z, source.W); + + Vector256 expected256 = Vector256.Create(expected.X, expected.Y, expected.Z, expected.W, expected.X, expected.Y, expected.Z, expected.W); + Vector256 actual = PorterDuffFunctions.MultiplySrcOver(back256, source256, Vector256.Create(amount)); + Assert.Equal(expected256, actual, FloatComparer); + } + public static TheoryData AddFunctionData { get; } = new() { { new TestVector4(1, 1, 1, 1), new TestVector4(1, 1, 1, 1), 1, new TestVector4(1, 1, 1, 1) }, - { new TestVector4(1, 1, 1, 1), new TestVector4(0, 0, 0, .8f), .5f, new TestVector4(.6f, .6f, .6f, 1f) }, - { new TestVector4(0.2f, 0.2f, 0.2f, 0.3f), new TestVector4(0.3f, 0.3f, 0.3f, 0.2f), .5f, new TestVector4(.2075676f, .2075676f, .2075676f, .37f) } + { new TestVector4(1, 1, 1, 1), new TestVector4(0, 0, 0, .8f), .5f, new TestVector4(1, 1, 1, 1) }, + { new TestVector4(0.2f, 0.2f, 0.2f, 0.3f), new TestVector4(0.3f, 0.3f, 0.3f, 0.2f), .5f, new TestVector4(0.24324325f, 0.24324325f, 0.24324325f, .37f) } }; [Theory] [MemberData(nameof(AddFunctionData))] public void AddFunction(TestVector4 back, TestVector4 source, float amount, TestVector4 expected) { - Vector4 actual = PorterDuffFunctions.MultiplySrcOver((Vector4)back, source, amount); + Vector4 actual = PorterDuffFunctions.AddSrcOver((Vector4)back, source, amount); VectorAssert.Equal(expected, actual, 5); } + [Theory] + [MemberData(nameof(AddFunctionData))] + public void AddFunction256(TestVector4 back, TestVector4 source, float amount, TestVector4 expected) + { + Vector256 back256 = Vector256.Create(back.X, back.Y, back.Z, back.W, back.X, back.Y, back.Z, back.W); + Vector256 source256 = Vector256.Create(source.X, source.Y, source.Z, source.W, source.X, source.Y, source.Z, source.W); + + Vector256 expected256 = Vector256.Create(expected.X, expected.Y, expected.Z, expected.W, expected.X, expected.Y, expected.Z, expected.W); + Vector256 actual = PorterDuffFunctions.AddSrcOver(back256, source256, Vector256.Create(amount)); + Assert.Equal(expected256, actual, FloatComparer); + } + public static TheoryData SubtractFunctionData { get; } = new() { { new TestVector4(1, 1, 1, 1), new TestVector4(1, 1, 1, 1), 1, new TestVector4(0, 0, 0, 1) }, @@ -81,6 +107,18 @@ public class PorterDuffFunctionsTests VectorAssert.Equal(expected, actual, 5); } + [Theory] + [MemberData(nameof(SubtractFunctionData))] + public void SubtractFunction256(TestVector4 back, TestVector4 source, float amount, TestVector4 expected) + { + Vector256 back256 = Vector256.Create(back.X, back.Y, back.Z, back.W, back.X, back.Y, back.Z, back.W); + Vector256 source256 = Vector256.Create(source.X, source.Y, source.Z, source.W, source.X, source.Y, source.Z, source.W); + + Vector256 expected256 = Vector256.Create(expected.X, expected.Y, expected.Z, expected.W, expected.X, expected.Y, expected.Z, expected.W); + Vector256 actual = PorterDuffFunctions.SubtractSrcOver(back256, source256, Vector256.Create(amount)); + Assert.Equal(expected256, actual, FloatComparer); + } + public static TheoryData ScreenFunctionData { get; } = new() { { new TestVector4(1, 1, 1, 1), new TestVector4(1, 1, 1, 1), 1, new TestVector4(1, 1, 1, 1) }, @@ -96,6 +134,18 @@ public class PorterDuffFunctionsTests VectorAssert.Equal(expected, actual, 5); } + [Theory] + [MemberData(nameof(ScreenFunctionData))] + public void ScreenFunction256(TestVector4 back, TestVector4 source, float amount, TestVector4 expected) + { + Vector256 back256 = Vector256.Create(back.X, back.Y, back.Z, back.W, back.X, back.Y, back.Z, back.W); + Vector256 source256 = Vector256.Create(source.X, source.Y, source.Z, source.W, source.X, source.Y, source.Z, source.W); + + Vector256 expected256 = Vector256.Create(expected.X, expected.Y, expected.Z, expected.W, expected.X, expected.Y, expected.Z, expected.W); + Vector256 actual = PorterDuffFunctions.ScreenSrcOver(back256, source256, Vector256.Create(amount)); + Assert.Equal(expected256, actual, FloatComparer); + } + public static TheoryData DarkenFunctionData { get; } = new() { { new TestVector4(1, 1, 1, 1), new TestVector4(1, 1, 1, 1), 1, new TestVector4(1, 1, 1, 1) }, @@ -111,6 +161,18 @@ public class PorterDuffFunctionsTests VectorAssert.Equal(expected, actual, 5); } + [Theory] + [MemberData(nameof(DarkenFunctionData))] + public void DarkenFunction256(TestVector4 back, TestVector4 source, float amount, TestVector4 expected) + { + Vector256 back256 = Vector256.Create(back.X, back.Y, back.Z, back.W, back.X, back.Y, back.Z, back.W); + Vector256 source256 = Vector256.Create(source.X, source.Y, source.Z, source.W, source.X, source.Y, source.Z, source.W); + + Vector256 expected256 = Vector256.Create(expected.X, expected.Y, expected.Z, expected.W, expected.X, expected.Y, expected.Z, expected.W); + Vector256 actual = PorterDuffFunctions.DarkenSrcOver(back256, source256, Vector256.Create(amount)); + Assert.Equal(expected256, actual, FloatComparer); + } + public static TheoryData LightenFunctionData { get; } = new() { { new TestVector4(1, 1, 1, 1), new TestVector4(1, 1, 1, 1), 1, new TestVector4(1, 1, 1, 1) }, @@ -126,6 +188,18 @@ public class PorterDuffFunctionsTests VectorAssert.Equal(expected, actual, 5); } + [Theory] + [MemberData(nameof(LightenFunctionData))] + public void LightenFunction256(TestVector4 back, TestVector4 source, float amount, TestVector4 expected) + { + Vector256 back256 = Vector256.Create(back.X, back.Y, back.Z, back.W, back.X, back.Y, back.Z, back.W); + Vector256 source256 = Vector256.Create(source.X, source.Y, source.Z, source.W, source.X, source.Y, source.Z, source.W); + + Vector256 expected256 = Vector256.Create(expected.X, expected.Y, expected.Z, expected.W, expected.X, expected.Y, expected.Z, expected.W); + Vector256 actual = PorterDuffFunctions.LightenSrcOver(back256, source256, Vector256.Create(amount)); + Assert.Equal(expected256, actual, FloatComparer); + } + public static TheoryData OverlayFunctionData { get; } = new() { { new TestVector4(1, 1, 1, 1), new TestVector4(1, 1, 1, 1), 1, new TestVector4(1, 1, 1, 1) }, @@ -141,6 +215,18 @@ public class PorterDuffFunctionsTests VectorAssert.Equal(expected, actual, 5); } + [Theory] + [MemberData(nameof(OverlayFunctionData))] + public void OverlayFunction256(TestVector4 back, TestVector4 source, float amount, TestVector4 expected) + { + Vector256 back256 = Vector256.Create(back.X, back.Y, back.Z, back.W, back.X, back.Y, back.Z, back.W); + Vector256 source256 = Vector256.Create(source.X, source.Y, source.Z, source.W, source.X, source.Y, source.Z, source.W); + + Vector256 expected256 = Vector256.Create(expected.X, expected.Y, expected.Z, expected.W, expected.X, expected.Y, expected.Z, expected.W); + Vector256 actual = PorterDuffFunctions.OverlaySrcOver(back256, source256, Vector256.Create(amount)); + Assert.Equal(expected256, actual, FloatComparer); + } + public static TheoryData HardLightFunctionData { get; } = new() { { new TestVector4(1, 1, 1, 1), new TestVector4(1, 1, 1, 1), 1, new TestVector4(1, 1, 1, 1) }, @@ -155,4 +241,16 @@ public class PorterDuffFunctionsTests Vector4 actual = PorterDuffFunctions.HardLightSrcOver((Vector4)back, source, amount); VectorAssert.Equal(expected, actual, 5); } + + [Theory] + [MemberData(nameof(HardLightFunctionData))] + public void HardLightFunction256(TestVector4 back, TestVector4 source, float amount, TestVector4 expected) + { + Vector256 back256 = Vector256.Create(back.X, back.Y, back.Z, back.W, back.X, back.Y, back.Z, back.W); + Vector256 source256 = Vector256.Create(source.X, source.Y, source.Z, source.W, source.X, source.Y, source.Z, source.W); + + Vector256 expected256 = Vector256.Create(expected.X, expected.Y, expected.Z, expected.W, expected.X, expected.Y, expected.Z, expected.W); + Vector256 actual = PorterDuffFunctions.HardLightSrcOver(back256, source256, Vector256.Create(amount)); + Assert.Equal(expected256, actual, FloatComparer); + } } diff --git a/tests/ImageSharp.Tests/TestUtilities/ApproximateFloatComparer.cs b/tests/ImageSharp.Tests/TestUtilities/ApproximateFloatComparer.cs index 6d9652d89..e35f36fee 100644 --- a/tests/ImageSharp.Tests/TestUtilities/ApproximateFloatComparer.cs +++ b/tests/ImageSharp.Tests/TestUtilities/ApproximateFloatComparer.cs @@ -1,7 +1,9 @@ // Copyright (c) Six Labors. // Licensed under the Six Labors Split License. +using System.Diagnostics.CodeAnalysis; using System.Numerics; +using System.Runtime.Intrinsics; using SixLabors.ImageSharp.PixelFormats; namespace SixLabors.ImageSharp.Tests; @@ -14,7 +16,8 @@ internal readonly struct ApproximateFloatComparer : IEqualityComparer, IEqualityComparer, IEqualityComparer, - IEqualityComparer + IEqualityComparer, + IEqualityComparer> { private readonly float epsilon; @@ -72,4 +75,16 @@ internal readonly struct ApproximateFloatComparer : /// public int GetHashCode(ColorMatrix obj) => obj.GetHashCode(); + + public bool Equals(Vector256 x, Vector256 y) + => this.Equals(x.GetElement(0), y.GetElement(0)) + && this.Equals(x.GetElement(1), y.GetElement(1)) + && this.Equals(x.GetElement(2), y.GetElement(2)) + && this.Equals(x.GetElement(3), y.GetElement(3)) + && this.Equals(x.GetElement(4), y.GetElement(4)) + && this.Equals(x.GetElement(5), y.GetElement(5)) + && this.Equals(x.GetElement(6), y.GetElement(6)) + && this.Equals(x.GetElement(7), y.GetElement(7)); + + public int GetHashCode([DisallowNull] Vector256 obj) => obj.GetHashCode(); } From dff381fd554227858a534c49c8c5ea6d0534403e Mon Sep 17 00:00:00 2001 From: James Jackson-South Date: Sun, 19 Feb 2023 21:08:19 +1000 Subject: [PATCH 13/22] Fix amount span assignment --- .../DefaultPixelBlenders.Generated.cs | 648 +++++++++++++++--- .../DefaultPixelBlenders.Generated.tt | 6 +- 2 files changed, 545 insertions(+), 109 deletions(-) diff --git a/src/ImageSharp/PixelFormats/PixelBlenders/DefaultPixelBlenders.Generated.cs b/src/ImageSharp/PixelFormats/PixelBlenders/DefaultPixelBlenders.Generated.cs index c2d97efa0..f28fba25c 100644 --- a/src/ImageSharp/PixelFormats/PixelBlenders/DefaultPixelBlenders.Generated.cs +++ b/src/ImageSharp/PixelFormats/PixelBlenders/DefaultPixelBlenders.Generated.cs @@ -98,7 +98,11 @@ internal static class DefaultPixelBlenders while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) { // TODO: It would be better if we can clamp this outside of the loop using our SIMD methods. - Vector256 opacity = Vector256.Create(Numerics.Clamp(amountBase, 0, 1F)); + // We need to create a Vector256 containing the current and next amount values + // taking up each half of the Vector256 and then clamp them. + Vector256 opacity = Vector256.Create( + Vector128.Create(Numerics.Clamp(amountBase, 0, 1F)), + Vector128.Create(Numerics.Clamp(Unsafe.Add(ref amountBase, 1), 0, 1F))); destinationBase = PorterDuffFunctions.NormalSrc(backgroundBase, sourceBase, opacity); destinationBase = ref Unsafe.Add(ref destinationBase, 1); @@ -197,7 +201,11 @@ internal static class DefaultPixelBlenders while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) { // TODO: It would be better if we can clamp this outside of the loop using our SIMD methods. - Vector256 opacity = Vector256.Create(Numerics.Clamp(amountBase, 0, 1F)); + // We need to create a Vector256 containing the current and next amount values + // taking up each half of the Vector256 and then clamp them. + Vector256 opacity = Vector256.Create( + Vector128.Create(Numerics.Clamp(amountBase, 0, 1F)), + Vector128.Create(Numerics.Clamp(Unsafe.Add(ref amountBase, 1), 0, 1F))); destinationBase = PorterDuffFunctions.MultiplySrc(backgroundBase, sourceBase, opacity); destinationBase = ref Unsafe.Add(ref destinationBase, 1); @@ -296,7 +304,11 @@ internal static class DefaultPixelBlenders while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) { // TODO: It would be better if we can clamp this outside of the loop using our SIMD methods. - Vector256 opacity = Vector256.Create(Numerics.Clamp(amountBase, 0, 1F)); + // We need to create a Vector256 containing the current and next amount values + // taking up each half of the Vector256 and then clamp them. + Vector256 opacity = Vector256.Create( + Vector128.Create(Numerics.Clamp(amountBase, 0, 1F)), + Vector128.Create(Numerics.Clamp(Unsafe.Add(ref amountBase, 1), 0, 1F))); destinationBase = PorterDuffFunctions.AddSrc(backgroundBase, sourceBase, opacity); destinationBase = ref Unsafe.Add(ref destinationBase, 1); @@ -395,7 +407,11 @@ internal static class DefaultPixelBlenders while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) { // TODO: It would be better if we can clamp this outside of the loop using our SIMD methods. - Vector256 opacity = Vector256.Create(Numerics.Clamp(amountBase, 0, 1F)); + // We need to create a Vector256 containing the current and next amount values + // taking up each half of the Vector256 and then clamp them. + Vector256 opacity = Vector256.Create( + Vector128.Create(Numerics.Clamp(amountBase, 0, 1F)), + Vector128.Create(Numerics.Clamp(Unsafe.Add(ref amountBase, 1), 0, 1F))); destinationBase = PorterDuffFunctions.SubtractSrc(backgroundBase, sourceBase, opacity); destinationBase = ref Unsafe.Add(ref destinationBase, 1); @@ -494,7 +510,11 @@ internal static class DefaultPixelBlenders while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) { // TODO: It would be better if we can clamp this outside of the loop using our SIMD methods. - Vector256 opacity = Vector256.Create(Numerics.Clamp(amountBase, 0, 1F)); + // We need to create a Vector256 containing the current and next amount values + // taking up each half of the Vector256 and then clamp them. + Vector256 opacity = Vector256.Create( + Vector128.Create(Numerics.Clamp(amountBase, 0, 1F)), + Vector128.Create(Numerics.Clamp(Unsafe.Add(ref amountBase, 1), 0, 1F))); destinationBase = PorterDuffFunctions.ScreenSrc(backgroundBase, sourceBase, opacity); destinationBase = ref Unsafe.Add(ref destinationBase, 1); @@ -593,7 +613,11 @@ internal static class DefaultPixelBlenders while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) { // TODO: It would be better if we can clamp this outside of the loop using our SIMD methods. - Vector256 opacity = Vector256.Create(Numerics.Clamp(amountBase, 0, 1F)); + // We need to create a Vector256 containing the current and next amount values + // taking up each half of the Vector256 and then clamp them. + Vector256 opacity = Vector256.Create( + Vector128.Create(Numerics.Clamp(amountBase, 0, 1F)), + Vector128.Create(Numerics.Clamp(Unsafe.Add(ref amountBase, 1), 0, 1F))); destinationBase = PorterDuffFunctions.DarkenSrc(backgroundBase, sourceBase, opacity); destinationBase = ref Unsafe.Add(ref destinationBase, 1); @@ -692,7 +716,11 @@ internal static class DefaultPixelBlenders while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) { // TODO: It would be better if we can clamp this outside of the loop using our SIMD methods. - Vector256 opacity = Vector256.Create(Numerics.Clamp(amountBase, 0, 1F)); + // We need to create a Vector256 containing the current and next amount values + // taking up each half of the Vector256 and then clamp them. + Vector256 opacity = Vector256.Create( + Vector128.Create(Numerics.Clamp(amountBase, 0, 1F)), + Vector128.Create(Numerics.Clamp(Unsafe.Add(ref amountBase, 1), 0, 1F))); destinationBase = PorterDuffFunctions.LightenSrc(backgroundBase, sourceBase, opacity); destinationBase = ref Unsafe.Add(ref destinationBase, 1); @@ -791,7 +819,11 @@ internal static class DefaultPixelBlenders while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) { // TODO: It would be better if we can clamp this outside of the loop using our SIMD methods. - Vector256 opacity = Vector256.Create(Numerics.Clamp(amountBase, 0, 1F)); + // We need to create a Vector256 containing the current and next amount values + // taking up each half of the Vector256 and then clamp them. + Vector256 opacity = Vector256.Create( + Vector128.Create(Numerics.Clamp(amountBase, 0, 1F)), + Vector128.Create(Numerics.Clamp(Unsafe.Add(ref amountBase, 1), 0, 1F))); destinationBase = PorterDuffFunctions.OverlaySrc(backgroundBase, sourceBase, opacity); destinationBase = ref Unsafe.Add(ref destinationBase, 1); @@ -890,7 +922,11 @@ internal static class DefaultPixelBlenders while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) { // TODO: It would be better if we can clamp this outside of the loop using our SIMD methods. - Vector256 opacity = Vector256.Create(Numerics.Clamp(amountBase, 0, 1F)); + // We need to create a Vector256 containing the current and next amount values + // taking up each half of the Vector256 and then clamp them. + Vector256 opacity = Vector256.Create( + Vector128.Create(Numerics.Clamp(amountBase, 0, 1F)), + Vector128.Create(Numerics.Clamp(Unsafe.Add(ref amountBase, 1), 0, 1F))); destinationBase = PorterDuffFunctions.HardLightSrc(backgroundBase, sourceBase, opacity); destinationBase = ref Unsafe.Add(ref destinationBase, 1); @@ -989,7 +1025,11 @@ internal static class DefaultPixelBlenders while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) { // TODO: It would be better if we can clamp this outside of the loop using our SIMD methods. - Vector256 opacity = Vector256.Create(Numerics.Clamp(amountBase, 0, 1F)); + // We need to create a Vector256 containing the current and next amount values + // taking up each half of the Vector256 and then clamp them. + Vector256 opacity = Vector256.Create( + Vector128.Create(Numerics.Clamp(amountBase, 0, 1F)), + Vector128.Create(Numerics.Clamp(Unsafe.Add(ref amountBase, 1), 0, 1F))); destinationBase = PorterDuffFunctions.NormalSrcAtop(backgroundBase, sourceBase, opacity); destinationBase = ref Unsafe.Add(ref destinationBase, 1); @@ -1088,7 +1128,11 @@ internal static class DefaultPixelBlenders while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) { // TODO: It would be better if we can clamp this outside of the loop using our SIMD methods. - Vector256 opacity = Vector256.Create(Numerics.Clamp(amountBase, 0, 1F)); + // We need to create a Vector256 containing the current and next amount values + // taking up each half of the Vector256 and then clamp them. + Vector256 opacity = Vector256.Create( + Vector128.Create(Numerics.Clamp(amountBase, 0, 1F)), + Vector128.Create(Numerics.Clamp(Unsafe.Add(ref amountBase, 1), 0, 1F))); destinationBase = PorterDuffFunctions.MultiplySrcAtop(backgroundBase, sourceBase, opacity); destinationBase = ref Unsafe.Add(ref destinationBase, 1); @@ -1187,7 +1231,11 @@ internal static class DefaultPixelBlenders while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) { // TODO: It would be better if we can clamp this outside of the loop using our SIMD methods. - Vector256 opacity = Vector256.Create(Numerics.Clamp(amountBase, 0, 1F)); + // We need to create a Vector256 containing the current and next amount values + // taking up each half of the Vector256 and then clamp them. + Vector256 opacity = Vector256.Create( + Vector128.Create(Numerics.Clamp(amountBase, 0, 1F)), + Vector128.Create(Numerics.Clamp(Unsafe.Add(ref amountBase, 1), 0, 1F))); destinationBase = PorterDuffFunctions.AddSrcAtop(backgroundBase, sourceBase, opacity); destinationBase = ref Unsafe.Add(ref destinationBase, 1); @@ -1286,7 +1334,11 @@ internal static class DefaultPixelBlenders while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) { // TODO: It would be better if we can clamp this outside of the loop using our SIMD methods. - Vector256 opacity = Vector256.Create(Numerics.Clamp(amountBase, 0, 1F)); + // We need to create a Vector256 containing the current and next amount values + // taking up each half of the Vector256 and then clamp them. + Vector256 opacity = Vector256.Create( + Vector128.Create(Numerics.Clamp(amountBase, 0, 1F)), + Vector128.Create(Numerics.Clamp(Unsafe.Add(ref amountBase, 1), 0, 1F))); destinationBase = PorterDuffFunctions.SubtractSrcAtop(backgroundBase, sourceBase, opacity); destinationBase = ref Unsafe.Add(ref destinationBase, 1); @@ -1385,7 +1437,11 @@ internal static class DefaultPixelBlenders while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) { // TODO: It would be better if we can clamp this outside of the loop using our SIMD methods. - Vector256 opacity = Vector256.Create(Numerics.Clamp(amountBase, 0, 1F)); + // We need to create a Vector256 containing the current and next amount values + // taking up each half of the Vector256 and then clamp them. + Vector256 opacity = Vector256.Create( + Vector128.Create(Numerics.Clamp(amountBase, 0, 1F)), + Vector128.Create(Numerics.Clamp(Unsafe.Add(ref amountBase, 1), 0, 1F))); destinationBase = PorterDuffFunctions.ScreenSrcAtop(backgroundBase, sourceBase, opacity); destinationBase = ref Unsafe.Add(ref destinationBase, 1); @@ -1484,7 +1540,11 @@ internal static class DefaultPixelBlenders while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) { // TODO: It would be better if we can clamp this outside of the loop using our SIMD methods. - Vector256 opacity = Vector256.Create(Numerics.Clamp(amountBase, 0, 1F)); + // We need to create a Vector256 containing the current and next amount values + // taking up each half of the Vector256 and then clamp them. + Vector256 opacity = Vector256.Create( + Vector128.Create(Numerics.Clamp(amountBase, 0, 1F)), + Vector128.Create(Numerics.Clamp(Unsafe.Add(ref amountBase, 1), 0, 1F))); destinationBase = PorterDuffFunctions.DarkenSrcAtop(backgroundBase, sourceBase, opacity); destinationBase = ref Unsafe.Add(ref destinationBase, 1); @@ -1583,7 +1643,11 @@ internal static class DefaultPixelBlenders while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) { // TODO: It would be better if we can clamp this outside of the loop using our SIMD methods. - Vector256 opacity = Vector256.Create(Numerics.Clamp(amountBase, 0, 1F)); + // We need to create a Vector256 containing the current and next amount values + // taking up each half of the Vector256 and then clamp them. + Vector256 opacity = Vector256.Create( + Vector128.Create(Numerics.Clamp(amountBase, 0, 1F)), + Vector128.Create(Numerics.Clamp(Unsafe.Add(ref amountBase, 1), 0, 1F))); destinationBase = PorterDuffFunctions.LightenSrcAtop(backgroundBase, sourceBase, opacity); destinationBase = ref Unsafe.Add(ref destinationBase, 1); @@ -1682,7 +1746,11 @@ internal static class DefaultPixelBlenders while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) { // TODO: It would be better if we can clamp this outside of the loop using our SIMD methods. - Vector256 opacity = Vector256.Create(Numerics.Clamp(amountBase, 0, 1F)); + // We need to create a Vector256 containing the current and next amount values + // taking up each half of the Vector256 and then clamp them. + Vector256 opacity = Vector256.Create( + Vector128.Create(Numerics.Clamp(amountBase, 0, 1F)), + Vector128.Create(Numerics.Clamp(Unsafe.Add(ref amountBase, 1), 0, 1F))); destinationBase = PorterDuffFunctions.OverlaySrcAtop(backgroundBase, sourceBase, opacity); destinationBase = ref Unsafe.Add(ref destinationBase, 1); @@ -1781,7 +1849,11 @@ internal static class DefaultPixelBlenders while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) { // TODO: It would be better if we can clamp this outside of the loop using our SIMD methods. - Vector256 opacity = Vector256.Create(Numerics.Clamp(amountBase, 0, 1F)); + // We need to create a Vector256 containing the current and next amount values + // taking up each half of the Vector256 and then clamp them. + Vector256 opacity = Vector256.Create( + Vector128.Create(Numerics.Clamp(amountBase, 0, 1F)), + Vector128.Create(Numerics.Clamp(Unsafe.Add(ref amountBase, 1), 0, 1F))); destinationBase = PorterDuffFunctions.HardLightSrcAtop(backgroundBase, sourceBase, opacity); destinationBase = ref Unsafe.Add(ref destinationBase, 1); @@ -1880,7 +1952,11 @@ internal static class DefaultPixelBlenders while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) { // TODO: It would be better if we can clamp this outside of the loop using our SIMD methods. - Vector256 opacity = Vector256.Create(Numerics.Clamp(amountBase, 0, 1F)); + // We need to create a Vector256 containing the current and next amount values + // taking up each half of the Vector256 and then clamp them. + Vector256 opacity = Vector256.Create( + Vector128.Create(Numerics.Clamp(amountBase, 0, 1F)), + Vector128.Create(Numerics.Clamp(Unsafe.Add(ref amountBase, 1), 0, 1F))); destinationBase = PorterDuffFunctions.NormalSrcOver(backgroundBase, sourceBase, opacity); destinationBase = ref Unsafe.Add(ref destinationBase, 1); @@ -1979,7 +2055,11 @@ internal static class DefaultPixelBlenders while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) { // TODO: It would be better if we can clamp this outside of the loop using our SIMD methods. - Vector256 opacity = Vector256.Create(Numerics.Clamp(amountBase, 0, 1F)); + // We need to create a Vector256 containing the current and next amount values + // taking up each half of the Vector256 and then clamp them. + Vector256 opacity = Vector256.Create( + Vector128.Create(Numerics.Clamp(amountBase, 0, 1F)), + Vector128.Create(Numerics.Clamp(Unsafe.Add(ref amountBase, 1), 0, 1F))); destinationBase = PorterDuffFunctions.MultiplySrcOver(backgroundBase, sourceBase, opacity); destinationBase = ref Unsafe.Add(ref destinationBase, 1); @@ -2078,7 +2158,11 @@ internal static class DefaultPixelBlenders while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) { // TODO: It would be better if we can clamp this outside of the loop using our SIMD methods. - Vector256 opacity = Vector256.Create(Numerics.Clamp(amountBase, 0, 1F)); + // We need to create a Vector256 containing the current and next amount values + // taking up each half of the Vector256 and then clamp them. + Vector256 opacity = Vector256.Create( + Vector128.Create(Numerics.Clamp(amountBase, 0, 1F)), + Vector128.Create(Numerics.Clamp(Unsafe.Add(ref amountBase, 1), 0, 1F))); destinationBase = PorterDuffFunctions.AddSrcOver(backgroundBase, sourceBase, opacity); destinationBase = ref Unsafe.Add(ref destinationBase, 1); @@ -2177,7 +2261,11 @@ internal static class DefaultPixelBlenders while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) { // TODO: It would be better if we can clamp this outside of the loop using our SIMD methods. - Vector256 opacity = Vector256.Create(Numerics.Clamp(amountBase, 0, 1F)); + // We need to create a Vector256 containing the current and next amount values + // taking up each half of the Vector256 and then clamp them. + Vector256 opacity = Vector256.Create( + Vector128.Create(Numerics.Clamp(amountBase, 0, 1F)), + Vector128.Create(Numerics.Clamp(Unsafe.Add(ref amountBase, 1), 0, 1F))); destinationBase = PorterDuffFunctions.SubtractSrcOver(backgroundBase, sourceBase, opacity); destinationBase = ref Unsafe.Add(ref destinationBase, 1); @@ -2276,7 +2364,11 @@ internal static class DefaultPixelBlenders while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) { // TODO: It would be better if we can clamp this outside of the loop using our SIMD methods. - Vector256 opacity = Vector256.Create(Numerics.Clamp(amountBase, 0, 1F)); + // We need to create a Vector256 containing the current and next amount values + // taking up each half of the Vector256 and then clamp them. + Vector256 opacity = Vector256.Create( + Vector128.Create(Numerics.Clamp(amountBase, 0, 1F)), + Vector128.Create(Numerics.Clamp(Unsafe.Add(ref amountBase, 1), 0, 1F))); destinationBase = PorterDuffFunctions.ScreenSrcOver(backgroundBase, sourceBase, opacity); destinationBase = ref Unsafe.Add(ref destinationBase, 1); @@ -2375,7 +2467,11 @@ internal static class DefaultPixelBlenders while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) { // TODO: It would be better if we can clamp this outside of the loop using our SIMD methods. - Vector256 opacity = Vector256.Create(Numerics.Clamp(amountBase, 0, 1F)); + // We need to create a Vector256 containing the current and next amount values + // taking up each half of the Vector256 and then clamp them. + Vector256 opacity = Vector256.Create( + Vector128.Create(Numerics.Clamp(amountBase, 0, 1F)), + Vector128.Create(Numerics.Clamp(Unsafe.Add(ref amountBase, 1), 0, 1F))); destinationBase = PorterDuffFunctions.DarkenSrcOver(backgroundBase, sourceBase, opacity); destinationBase = ref Unsafe.Add(ref destinationBase, 1); @@ -2474,7 +2570,11 @@ internal static class DefaultPixelBlenders while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) { // TODO: It would be better if we can clamp this outside of the loop using our SIMD methods. - Vector256 opacity = Vector256.Create(Numerics.Clamp(amountBase, 0, 1F)); + // We need to create a Vector256 containing the current and next amount values + // taking up each half of the Vector256 and then clamp them. + Vector256 opacity = Vector256.Create( + Vector128.Create(Numerics.Clamp(amountBase, 0, 1F)), + Vector128.Create(Numerics.Clamp(Unsafe.Add(ref amountBase, 1), 0, 1F))); destinationBase = PorterDuffFunctions.LightenSrcOver(backgroundBase, sourceBase, opacity); destinationBase = ref Unsafe.Add(ref destinationBase, 1); @@ -2573,7 +2673,11 @@ internal static class DefaultPixelBlenders while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) { // TODO: It would be better if we can clamp this outside of the loop using our SIMD methods. - Vector256 opacity = Vector256.Create(Numerics.Clamp(amountBase, 0, 1F)); + // We need to create a Vector256 containing the current and next amount values + // taking up each half of the Vector256 and then clamp them. + Vector256 opacity = Vector256.Create( + Vector128.Create(Numerics.Clamp(amountBase, 0, 1F)), + Vector128.Create(Numerics.Clamp(Unsafe.Add(ref amountBase, 1), 0, 1F))); destinationBase = PorterDuffFunctions.OverlaySrcOver(backgroundBase, sourceBase, opacity); destinationBase = ref Unsafe.Add(ref destinationBase, 1); @@ -2672,7 +2776,11 @@ internal static class DefaultPixelBlenders while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) { // TODO: It would be better if we can clamp this outside of the loop using our SIMD methods. - Vector256 opacity = Vector256.Create(Numerics.Clamp(amountBase, 0, 1F)); + // We need to create a Vector256 containing the current and next amount values + // taking up each half of the Vector256 and then clamp them. + Vector256 opacity = Vector256.Create( + Vector128.Create(Numerics.Clamp(amountBase, 0, 1F)), + Vector128.Create(Numerics.Clamp(Unsafe.Add(ref amountBase, 1), 0, 1F))); destinationBase = PorterDuffFunctions.HardLightSrcOver(backgroundBase, sourceBase, opacity); destinationBase = ref Unsafe.Add(ref destinationBase, 1); @@ -2771,7 +2879,11 @@ internal static class DefaultPixelBlenders while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) { // TODO: It would be better if we can clamp this outside of the loop using our SIMD methods. - Vector256 opacity = Vector256.Create(Numerics.Clamp(amountBase, 0, 1F)); + // We need to create a Vector256 containing the current and next amount values + // taking up each half of the Vector256 and then clamp them. + Vector256 opacity = Vector256.Create( + Vector128.Create(Numerics.Clamp(amountBase, 0, 1F)), + Vector128.Create(Numerics.Clamp(Unsafe.Add(ref amountBase, 1), 0, 1F))); destinationBase = PorterDuffFunctions.NormalSrcIn(backgroundBase, sourceBase, opacity); destinationBase = ref Unsafe.Add(ref destinationBase, 1); @@ -2870,7 +2982,11 @@ internal static class DefaultPixelBlenders while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) { // TODO: It would be better if we can clamp this outside of the loop using our SIMD methods. - Vector256 opacity = Vector256.Create(Numerics.Clamp(amountBase, 0, 1F)); + // We need to create a Vector256 containing the current and next amount values + // taking up each half of the Vector256 and then clamp them. + Vector256 opacity = Vector256.Create( + Vector128.Create(Numerics.Clamp(amountBase, 0, 1F)), + Vector128.Create(Numerics.Clamp(Unsafe.Add(ref amountBase, 1), 0, 1F))); destinationBase = PorterDuffFunctions.MultiplySrcIn(backgroundBase, sourceBase, opacity); destinationBase = ref Unsafe.Add(ref destinationBase, 1); @@ -2969,7 +3085,11 @@ internal static class DefaultPixelBlenders while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) { // TODO: It would be better if we can clamp this outside of the loop using our SIMD methods. - Vector256 opacity = Vector256.Create(Numerics.Clamp(amountBase, 0, 1F)); + // We need to create a Vector256 containing the current and next amount values + // taking up each half of the Vector256 and then clamp them. + Vector256 opacity = Vector256.Create( + Vector128.Create(Numerics.Clamp(amountBase, 0, 1F)), + Vector128.Create(Numerics.Clamp(Unsafe.Add(ref amountBase, 1), 0, 1F))); destinationBase = PorterDuffFunctions.AddSrcIn(backgroundBase, sourceBase, opacity); destinationBase = ref Unsafe.Add(ref destinationBase, 1); @@ -3068,7 +3188,11 @@ internal static class DefaultPixelBlenders while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) { // TODO: It would be better if we can clamp this outside of the loop using our SIMD methods. - Vector256 opacity = Vector256.Create(Numerics.Clamp(amountBase, 0, 1F)); + // We need to create a Vector256 containing the current and next amount values + // taking up each half of the Vector256 and then clamp them. + Vector256 opacity = Vector256.Create( + Vector128.Create(Numerics.Clamp(amountBase, 0, 1F)), + Vector128.Create(Numerics.Clamp(Unsafe.Add(ref amountBase, 1), 0, 1F))); destinationBase = PorterDuffFunctions.SubtractSrcIn(backgroundBase, sourceBase, opacity); destinationBase = ref Unsafe.Add(ref destinationBase, 1); @@ -3167,7 +3291,11 @@ internal static class DefaultPixelBlenders while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) { // TODO: It would be better if we can clamp this outside of the loop using our SIMD methods. - Vector256 opacity = Vector256.Create(Numerics.Clamp(amountBase, 0, 1F)); + // We need to create a Vector256 containing the current and next amount values + // taking up each half of the Vector256 and then clamp them. + Vector256 opacity = Vector256.Create( + Vector128.Create(Numerics.Clamp(amountBase, 0, 1F)), + Vector128.Create(Numerics.Clamp(Unsafe.Add(ref amountBase, 1), 0, 1F))); destinationBase = PorterDuffFunctions.ScreenSrcIn(backgroundBase, sourceBase, opacity); destinationBase = ref Unsafe.Add(ref destinationBase, 1); @@ -3266,7 +3394,11 @@ internal static class DefaultPixelBlenders while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) { // TODO: It would be better if we can clamp this outside of the loop using our SIMD methods. - Vector256 opacity = Vector256.Create(Numerics.Clamp(amountBase, 0, 1F)); + // We need to create a Vector256 containing the current and next amount values + // taking up each half of the Vector256 and then clamp them. + Vector256 opacity = Vector256.Create( + Vector128.Create(Numerics.Clamp(amountBase, 0, 1F)), + Vector128.Create(Numerics.Clamp(Unsafe.Add(ref amountBase, 1), 0, 1F))); destinationBase = PorterDuffFunctions.DarkenSrcIn(backgroundBase, sourceBase, opacity); destinationBase = ref Unsafe.Add(ref destinationBase, 1); @@ -3365,7 +3497,11 @@ internal static class DefaultPixelBlenders while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) { // TODO: It would be better if we can clamp this outside of the loop using our SIMD methods. - Vector256 opacity = Vector256.Create(Numerics.Clamp(amountBase, 0, 1F)); + // We need to create a Vector256 containing the current and next amount values + // taking up each half of the Vector256 and then clamp them. + Vector256 opacity = Vector256.Create( + Vector128.Create(Numerics.Clamp(amountBase, 0, 1F)), + Vector128.Create(Numerics.Clamp(Unsafe.Add(ref amountBase, 1), 0, 1F))); destinationBase = PorterDuffFunctions.LightenSrcIn(backgroundBase, sourceBase, opacity); destinationBase = ref Unsafe.Add(ref destinationBase, 1); @@ -3464,7 +3600,11 @@ internal static class DefaultPixelBlenders while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) { // TODO: It would be better if we can clamp this outside of the loop using our SIMD methods. - Vector256 opacity = Vector256.Create(Numerics.Clamp(amountBase, 0, 1F)); + // We need to create a Vector256 containing the current and next amount values + // taking up each half of the Vector256 and then clamp them. + Vector256 opacity = Vector256.Create( + Vector128.Create(Numerics.Clamp(amountBase, 0, 1F)), + Vector128.Create(Numerics.Clamp(Unsafe.Add(ref amountBase, 1), 0, 1F))); destinationBase = PorterDuffFunctions.OverlaySrcIn(backgroundBase, sourceBase, opacity); destinationBase = ref Unsafe.Add(ref destinationBase, 1); @@ -3563,7 +3703,11 @@ internal static class DefaultPixelBlenders while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) { // TODO: It would be better if we can clamp this outside of the loop using our SIMD methods. - Vector256 opacity = Vector256.Create(Numerics.Clamp(amountBase, 0, 1F)); + // We need to create a Vector256 containing the current and next amount values + // taking up each half of the Vector256 and then clamp them. + Vector256 opacity = Vector256.Create( + Vector128.Create(Numerics.Clamp(amountBase, 0, 1F)), + Vector128.Create(Numerics.Clamp(Unsafe.Add(ref amountBase, 1), 0, 1F))); destinationBase = PorterDuffFunctions.HardLightSrcIn(backgroundBase, sourceBase, opacity); destinationBase = ref Unsafe.Add(ref destinationBase, 1); @@ -3662,7 +3806,11 @@ internal static class DefaultPixelBlenders while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) { // TODO: It would be better if we can clamp this outside of the loop using our SIMD methods. - Vector256 opacity = Vector256.Create(Numerics.Clamp(amountBase, 0, 1F)); + // We need to create a Vector256 containing the current and next amount values + // taking up each half of the Vector256 and then clamp them. + Vector256 opacity = Vector256.Create( + Vector128.Create(Numerics.Clamp(amountBase, 0, 1F)), + Vector128.Create(Numerics.Clamp(Unsafe.Add(ref amountBase, 1), 0, 1F))); destinationBase = PorterDuffFunctions.NormalSrcOut(backgroundBase, sourceBase, opacity); destinationBase = ref Unsafe.Add(ref destinationBase, 1); @@ -3761,7 +3909,11 @@ internal static class DefaultPixelBlenders while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) { // TODO: It would be better if we can clamp this outside of the loop using our SIMD methods. - Vector256 opacity = Vector256.Create(Numerics.Clamp(amountBase, 0, 1F)); + // We need to create a Vector256 containing the current and next amount values + // taking up each half of the Vector256 and then clamp them. + Vector256 opacity = Vector256.Create( + Vector128.Create(Numerics.Clamp(amountBase, 0, 1F)), + Vector128.Create(Numerics.Clamp(Unsafe.Add(ref amountBase, 1), 0, 1F))); destinationBase = PorterDuffFunctions.MultiplySrcOut(backgroundBase, sourceBase, opacity); destinationBase = ref Unsafe.Add(ref destinationBase, 1); @@ -3860,7 +4012,11 @@ internal static class DefaultPixelBlenders while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) { // TODO: It would be better if we can clamp this outside of the loop using our SIMD methods. - Vector256 opacity = Vector256.Create(Numerics.Clamp(amountBase, 0, 1F)); + // We need to create a Vector256 containing the current and next amount values + // taking up each half of the Vector256 and then clamp them. + Vector256 opacity = Vector256.Create( + Vector128.Create(Numerics.Clamp(amountBase, 0, 1F)), + Vector128.Create(Numerics.Clamp(Unsafe.Add(ref amountBase, 1), 0, 1F))); destinationBase = PorterDuffFunctions.AddSrcOut(backgroundBase, sourceBase, opacity); destinationBase = ref Unsafe.Add(ref destinationBase, 1); @@ -3959,7 +4115,11 @@ internal static class DefaultPixelBlenders while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) { // TODO: It would be better if we can clamp this outside of the loop using our SIMD methods. - Vector256 opacity = Vector256.Create(Numerics.Clamp(amountBase, 0, 1F)); + // We need to create a Vector256 containing the current and next amount values + // taking up each half of the Vector256 and then clamp them. + Vector256 opacity = Vector256.Create( + Vector128.Create(Numerics.Clamp(amountBase, 0, 1F)), + Vector128.Create(Numerics.Clamp(Unsafe.Add(ref amountBase, 1), 0, 1F))); destinationBase = PorterDuffFunctions.SubtractSrcOut(backgroundBase, sourceBase, opacity); destinationBase = ref Unsafe.Add(ref destinationBase, 1); @@ -4058,7 +4218,11 @@ internal static class DefaultPixelBlenders while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) { // TODO: It would be better if we can clamp this outside of the loop using our SIMD methods. - Vector256 opacity = Vector256.Create(Numerics.Clamp(amountBase, 0, 1F)); + // We need to create a Vector256 containing the current and next amount values + // taking up each half of the Vector256 and then clamp them. + Vector256 opacity = Vector256.Create( + Vector128.Create(Numerics.Clamp(amountBase, 0, 1F)), + Vector128.Create(Numerics.Clamp(Unsafe.Add(ref amountBase, 1), 0, 1F))); destinationBase = PorterDuffFunctions.ScreenSrcOut(backgroundBase, sourceBase, opacity); destinationBase = ref Unsafe.Add(ref destinationBase, 1); @@ -4157,7 +4321,11 @@ internal static class DefaultPixelBlenders while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) { // TODO: It would be better if we can clamp this outside of the loop using our SIMD methods. - Vector256 opacity = Vector256.Create(Numerics.Clamp(amountBase, 0, 1F)); + // We need to create a Vector256 containing the current and next amount values + // taking up each half of the Vector256 and then clamp them. + Vector256 opacity = Vector256.Create( + Vector128.Create(Numerics.Clamp(amountBase, 0, 1F)), + Vector128.Create(Numerics.Clamp(Unsafe.Add(ref amountBase, 1), 0, 1F))); destinationBase = PorterDuffFunctions.DarkenSrcOut(backgroundBase, sourceBase, opacity); destinationBase = ref Unsafe.Add(ref destinationBase, 1); @@ -4256,7 +4424,11 @@ internal static class DefaultPixelBlenders while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) { // TODO: It would be better if we can clamp this outside of the loop using our SIMD methods. - Vector256 opacity = Vector256.Create(Numerics.Clamp(amountBase, 0, 1F)); + // We need to create a Vector256 containing the current and next amount values + // taking up each half of the Vector256 and then clamp them. + Vector256 opacity = Vector256.Create( + Vector128.Create(Numerics.Clamp(amountBase, 0, 1F)), + Vector128.Create(Numerics.Clamp(Unsafe.Add(ref amountBase, 1), 0, 1F))); destinationBase = PorterDuffFunctions.LightenSrcOut(backgroundBase, sourceBase, opacity); destinationBase = ref Unsafe.Add(ref destinationBase, 1); @@ -4355,7 +4527,11 @@ internal static class DefaultPixelBlenders while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) { // TODO: It would be better if we can clamp this outside of the loop using our SIMD methods. - Vector256 opacity = Vector256.Create(Numerics.Clamp(amountBase, 0, 1F)); + // We need to create a Vector256 containing the current and next amount values + // taking up each half of the Vector256 and then clamp them. + Vector256 opacity = Vector256.Create( + Vector128.Create(Numerics.Clamp(amountBase, 0, 1F)), + Vector128.Create(Numerics.Clamp(Unsafe.Add(ref amountBase, 1), 0, 1F))); destinationBase = PorterDuffFunctions.OverlaySrcOut(backgroundBase, sourceBase, opacity); destinationBase = ref Unsafe.Add(ref destinationBase, 1); @@ -4454,7 +4630,11 @@ internal static class DefaultPixelBlenders while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) { // TODO: It would be better if we can clamp this outside of the loop using our SIMD methods. - Vector256 opacity = Vector256.Create(Numerics.Clamp(amountBase, 0, 1F)); + // We need to create a Vector256 containing the current and next amount values + // taking up each half of the Vector256 and then clamp them. + Vector256 opacity = Vector256.Create( + Vector128.Create(Numerics.Clamp(amountBase, 0, 1F)), + Vector128.Create(Numerics.Clamp(Unsafe.Add(ref amountBase, 1), 0, 1F))); destinationBase = PorterDuffFunctions.HardLightSrcOut(backgroundBase, sourceBase, opacity); destinationBase = ref Unsafe.Add(ref destinationBase, 1); @@ -4553,7 +4733,11 @@ internal static class DefaultPixelBlenders while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) { // TODO: It would be better if we can clamp this outside of the loop using our SIMD methods. - Vector256 opacity = Vector256.Create(Numerics.Clamp(amountBase, 0, 1F)); + // We need to create a Vector256 containing the current and next amount values + // taking up each half of the Vector256 and then clamp them. + Vector256 opacity = Vector256.Create( + Vector128.Create(Numerics.Clamp(amountBase, 0, 1F)), + Vector128.Create(Numerics.Clamp(Unsafe.Add(ref amountBase, 1), 0, 1F))); destinationBase = PorterDuffFunctions.NormalDest(backgroundBase, sourceBase, opacity); destinationBase = ref Unsafe.Add(ref destinationBase, 1); @@ -4652,7 +4836,11 @@ internal static class DefaultPixelBlenders while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) { // TODO: It would be better if we can clamp this outside of the loop using our SIMD methods. - Vector256 opacity = Vector256.Create(Numerics.Clamp(amountBase, 0, 1F)); + // We need to create a Vector256 containing the current and next amount values + // taking up each half of the Vector256 and then clamp them. + Vector256 opacity = Vector256.Create( + Vector128.Create(Numerics.Clamp(amountBase, 0, 1F)), + Vector128.Create(Numerics.Clamp(Unsafe.Add(ref amountBase, 1), 0, 1F))); destinationBase = PorterDuffFunctions.MultiplyDest(backgroundBase, sourceBase, opacity); destinationBase = ref Unsafe.Add(ref destinationBase, 1); @@ -4751,7 +4939,11 @@ internal static class DefaultPixelBlenders while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) { // TODO: It would be better if we can clamp this outside of the loop using our SIMD methods. - Vector256 opacity = Vector256.Create(Numerics.Clamp(amountBase, 0, 1F)); + // We need to create a Vector256 containing the current and next amount values + // taking up each half of the Vector256 and then clamp them. + Vector256 opacity = Vector256.Create( + Vector128.Create(Numerics.Clamp(amountBase, 0, 1F)), + Vector128.Create(Numerics.Clamp(Unsafe.Add(ref amountBase, 1), 0, 1F))); destinationBase = PorterDuffFunctions.AddDest(backgroundBase, sourceBase, opacity); destinationBase = ref Unsafe.Add(ref destinationBase, 1); @@ -4850,7 +5042,11 @@ internal static class DefaultPixelBlenders while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) { // TODO: It would be better if we can clamp this outside of the loop using our SIMD methods. - Vector256 opacity = Vector256.Create(Numerics.Clamp(amountBase, 0, 1F)); + // We need to create a Vector256 containing the current and next amount values + // taking up each half of the Vector256 and then clamp them. + Vector256 opacity = Vector256.Create( + Vector128.Create(Numerics.Clamp(amountBase, 0, 1F)), + Vector128.Create(Numerics.Clamp(Unsafe.Add(ref amountBase, 1), 0, 1F))); destinationBase = PorterDuffFunctions.SubtractDest(backgroundBase, sourceBase, opacity); destinationBase = ref Unsafe.Add(ref destinationBase, 1); @@ -4949,7 +5145,11 @@ internal static class DefaultPixelBlenders while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) { // TODO: It would be better if we can clamp this outside of the loop using our SIMD methods. - Vector256 opacity = Vector256.Create(Numerics.Clamp(amountBase, 0, 1F)); + // We need to create a Vector256 containing the current and next amount values + // taking up each half of the Vector256 and then clamp them. + Vector256 opacity = Vector256.Create( + Vector128.Create(Numerics.Clamp(amountBase, 0, 1F)), + Vector128.Create(Numerics.Clamp(Unsafe.Add(ref amountBase, 1), 0, 1F))); destinationBase = PorterDuffFunctions.ScreenDest(backgroundBase, sourceBase, opacity); destinationBase = ref Unsafe.Add(ref destinationBase, 1); @@ -5048,7 +5248,11 @@ internal static class DefaultPixelBlenders while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) { // TODO: It would be better if we can clamp this outside of the loop using our SIMD methods. - Vector256 opacity = Vector256.Create(Numerics.Clamp(amountBase, 0, 1F)); + // We need to create a Vector256 containing the current and next amount values + // taking up each half of the Vector256 and then clamp them. + Vector256 opacity = Vector256.Create( + Vector128.Create(Numerics.Clamp(amountBase, 0, 1F)), + Vector128.Create(Numerics.Clamp(Unsafe.Add(ref amountBase, 1), 0, 1F))); destinationBase = PorterDuffFunctions.DarkenDest(backgroundBase, sourceBase, opacity); destinationBase = ref Unsafe.Add(ref destinationBase, 1); @@ -5147,7 +5351,11 @@ internal static class DefaultPixelBlenders while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) { // TODO: It would be better if we can clamp this outside of the loop using our SIMD methods. - Vector256 opacity = Vector256.Create(Numerics.Clamp(amountBase, 0, 1F)); + // We need to create a Vector256 containing the current and next amount values + // taking up each half of the Vector256 and then clamp them. + Vector256 opacity = Vector256.Create( + Vector128.Create(Numerics.Clamp(amountBase, 0, 1F)), + Vector128.Create(Numerics.Clamp(Unsafe.Add(ref amountBase, 1), 0, 1F))); destinationBase = PorterDuffFunctions.LightenDest(backgroundBase, sourceBase, opacity); destinationBase = ref Unsafe.Add(ref destinationBase, 1); @@ -5246,7 +5454,11 @@ internal static class DefaultPixelBlenders while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) { // TODO: It would be better if we can clamp this outside of the loop using our SIMD methods. - Vector256 opacity = Vector256.Create(Numerics.Clamp(amountBase, 0, 1F)); + // We need to create a Vector256 containing the current and next amount values + // taking up each half of the Vector256 and then clamp them. + Vector256 opacity = Vector256.Create( + Vector128.Create(Numerics.Clamp(amountBase, 0, 1F)), + Vector128.Create(Numerics.Clamp(Unsafe.Add(ref amountBase, 1), 0, 1F))); destinationBase = PorterDuffFunctions.OverlayDest(backgroundBase, sourceBase, opacity); destinationBase = ref Unsafe.Add(ref destinationBase, 1); @@ -5345,7 +5557,11 @@ internal static class DefaultPixelBlenders while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) { // TODO: It would be better if we can clamp this outside of the loop using our SIMD methods. - Vector256 opacity = Vector256.Create(Numerics.Clamp(amountBase, 0, 1F)); + // We need to create a Vector256 containing the current and next amount values + // taking up each half of the Vector256 and then clamp them. + Vector256 opacity = Vector256.Create( + Vector128.Create(Numerics.Clamp(amountBase, 0, 1F)), + Vector128.Create(Numerics.Clamp(Unsafe.Add(ref amountBase, 1), 0, 1F))); destinationBase = PorterDuffFunctions.HardLightDest(backgroundBase, sourceBase, opacity); destinationBase = ref Unsafe.Add(ref destinationBase, 1); @@ -5444,7 +5660,11 @@ internal static class DefaultPixelBlenders while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) { // TODO: It would be better if we can clamp this outside of the loop using our SIMD methods. - Vector256 opacity = Vector256.Create(Numerics.Clamp(amountBase, 0, 1F)); + // We need to create a Vector256 containing the current and next amount values + // taking up each half of the Vector256 and then clamp them. + Vector256 opacity = Vector256.Create( + Vector128.Create(Numerics.Clamp(amountBase, 0, 1F)), + Vector128.Create(Numerics.Clamp(Unsafe.Add(ref amountBase, 1), 0, 1F))); destinationBase = PorterDuffFunctions.NormalDestAtop(backgroundBase, sourceBase, opacity); destinationBase = ref Unsafe.Add(ref destinationBase, 1); @@ -5543,7 +5763,11 @@ internal static class DefaultPixelBlenders while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) { // TODO: It would be better if we can clamp this outside of the loop using our SIMD methods. - Vector256 opacity = Vector256.Create(Numerics.Clamp(amountBase, 0, 1F)); + // We need to create a Vector256 containing the current and next amount values + // taking up each half of the Vector256 and then clamp them. + Vector256 opacity = Vector256.Create( + Vector128.Create(Numerics.Clamp(amountBase, 0, 1F)), + Vector128.Create(Numerics.Clamp(Unsafe.Add(ref amountBase, 1), 0, 1F))); destinationBase = PorterDuffFunctions.MultiplyDestAtop(backgroundBase, sourceBase, opacity); destinationBase = ref Unsafe.Add(ref destinationBase, 1); @@ -5642,7 +5866,11 @@ internal static class DefaultPixelBlenders while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) { // TODO: It would be better if we can clamp this outside of the loop using our SIMD methods. - Vector256 opacity = Vector256.Create(Numerics.Clamp(amountBase, 0, 1F)); + // We need to create a Vector256 containing the current and next amount values + // taking up each half of the Vector256 and then clamp them. + Vector256 opacity = Vector256.Create( + Vector128.Create(Numerics.Clamp(amountBase, 0, 1F)), + Vector128.Create(Numerics.Clamp(Unsafe.Add(ref amountBase, 1), 0, 1F))); destinationBase = PorterDuffFunctions.AddDestAtop(backgroundBase, sourceBase, opacity); destinationBase = ref Unsafe.Add(ref destinationBase, 1); @@ -5741,7 +5969,11 @@ internal static class DefaultPixelBlenders while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) { // TODO: It would be better if we can clamp this outside of the loop using our SIMD methods. - Vector256 opacity = Vector256.Create(Numerics.Clamp(amountBase, 0, 1F)); + // We need to create a Vector256 containing the current and next amount values + // taking up each half of the Vector256 and then clamp them. + Vector256 opacity = Vector256.Create( + Vector128.Create(Numerics.Clamp(amountBase, 0, 1F)), + Vector128.Create(Numerics.Clamp(Unsafe.Add(ref amountBase, 1), 0, 1F))); destinationBase = PorterDuffFunctions.SubtractDestAtop(backgroundBase, sourceBase, opacity); destinationBase = ref Unsafe.Add(ref destinationBase, 1); @@ -5840,7 +6072,11 @@ internal static class DefaultPixelBlenders while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) { // TODO: It would be better if we can clamp this outside of the loop using our SIMD methods. - Vector256 opacity = Vector256.Create(Numerics.Clamp(amountBase, 0, 1F)); + // We need to create a Vector256 containing the current and next amount values + // taking up each half of the Vector256 and then clamp them. + Vector256 opacity = Vector256.Create( + Vector128.Create(Numerics.Clamp(amountBase, 0, 1F)), + Vector128.Create(Numerics.Clamp(Unsafe.Add(ref amountBase, 1), 0, 1F))); destinationBase = PorterDuffFunctions.ScreenDestAtop(backgroundBase, sourceBase, opacity); destinationBase = ref Unsafe.Add(ref destinationBase, 1); @@ -5939,7 +6175,11 @@ internal static class DefaultPixelBlenders while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) { // TODO: It would be better if we can clamp this outside of the loop using our SIMD methods. - Vector256 opacity = Vector256.Create(Numerics.Clamp(amountBase, 0, 1F)); + // We need to create a Vector256 containing the current and next amount values + // taking up each half of the Vector256 and then clamp them. + Vector256 opacity = Vector256.Create( + Vector128.Create(Numerics.Clamp(amountBase, 0, 1F)), + Vector128.Create(Numerics.Clamp(Unsafe.Add(ref amountBase, 1), 0, 1F))); destinationBase = PorterDuffFunctions.DarkenDestAtop(backgroundBase, sourceBase, opacity); destinationBase = ref Unsafe.Add(ref destinationBase, 1); @@ -6038,7 +6278,11 @@ internal static class DefaultPixelBlenders while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) { // TODO: It would be better if we can clamp this outside of the loop using our SIMD methods. - Vector256 opacity = Vector256.Create(Numerics.Clamp(amountBase, 0, 1F)); + // We need to create a Vector256 containing the current and next amount values + // taking up each half of the Vector256 and then clamp them. + Vector256 opacity = Vector256.Create( + Vector128.Create(Numerics.Clamp(amountBase, 0, 1F)), + Vector128.Create(Numerics.Clamp(Unsafe.Add(ref amountBase, 1), 0, 1F))); destinationBase = PorterDuffFunctions.LightenDestAtop(backgroundBase, sourceBase, opacity); destinationBase = ref Unsafe.Add(ref destinationBase, 1); @@ -6137,7 +6381,11 @@ internal static class DefaultPixelBlenders while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) { // TODO: It would be better if we can clamp this outside of the loop using our SIMD methods. - Vector256 opacity = Vector256.Create(Numerics.Clamp(amountBase, 0, 1F)); + // We need to create a Vector256 containing the current and next amount values + // taking up each half of the Vector256 and then clamp them. + Vector256 opacity = Vector256.Create( + Vector128.Create(Numerics.Clamp(amountBase, 0, 1F)), + Vector128.Create(Numerics.Clamp(Unsafe.Add(ref amountBase, 1), 0, 1F))); destinationBase = PorterDuffFunctions.OverlayDestAtop(backgroundBase, sourceBase, opacity); destinationBase = ref Unsafe.Add(ref destinationBase, 1); @@ -6236,7 +6484,11 @@ internal static class DefaultPixelBlenders while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) { // TODO: It would be better if we can clamp this outside of the loop using our SIMD methods. - Vector256 opacity = Vector256.Create(Numerics.Clamp(amountBase, 0, 1F)); + // We need to create a Vector256 containing the current and next amount values + // taking up each half of the Vector256 and then clamp them. + Vector256 opacity = Vector256.Create( + Vector128.Create(Numerics.Clamp(amountBase, 0, 1F)), + Vector128.Create(Numerics.Clamp(Unsafe.Add(ref amountBase, 1), 0, 1F))); destinationBase = PorterDuffFunctions.HardLightDestAtop(backgroundBase, sourceBase, opacity); destinationBase = ref Unsafe.Add(ref destinationBase, 1); @@ -6335,7 +6587,11 @@ internal static class DefaultPixelBlenders while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) { // TODO: It would be better if we can clamp this outside of the loop using our SIMD methods. - Vector256 opacity = Vector256.Create(Numerics.Clamp(amountBase, 0, 1F)); + // We need to create a Vector256 containing the current and next amount values + // taking up each half of the Vector256 and then clamp them. + Vector256 opacity = Vector256.Create( + Vector128.Create(Numerics.Clamp(amountBase, 0, 1F)), + Vector128.Create(Numerics.Clamp(Unsafe.Add(ref amountBase, 1), 0, 1F))); destinationBase = PorterDuffFunctions.NormalDestOver(backgroundBase, sourceBase, opacity); destinationBase = ref Unsafe.Add(ref destinationBase, 1); @@ -6434,7 +6690,11 @@ internal static class DefaultPixelBlenders while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) { // TODO: It would be better if we can clamp this outside of the loop using our SIMD methods. - Vector256 opacity = Vector256.Create(Numerics.Clamp(amountBase, 0, 1F)); + // We need to create a Vector256 containing the current and next amount values + // taking up each half of the Vector256 and then clamp them. + Vector256 opacity = Vector256.Create( + Vector128.Create(Numerics.Clamp(amountBase, 0, 1F)), + Vector128.Create(Numerics.Clamp(Unsafe.Add(ref amountBase, 1), 0, 1F))); destinationBase = PorterDuffFunctions.MultiplyDestOver(backgroundBase, sourceBase, opacity); destinationBase = ref Unsafe.Add(ref destinationBase, 1); @@ -6533,7 +6793,11 @@ internal static class DefaultPixelBlenders while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) { // TODO: It would be better if we can clamp this outside of the loop using our SIMD methods. - Vector256 opacity = Vector256.Create(Numerics.Clamp(amountBase, 0, 1F)); + // We need to create a Vector256 containing the current and next amount values + // taking up each half of the Vector256 and then clamp them. + Vector256 opacity = Vector256.Create( + Vector128.Create(Numerics.Clamp(amountBase, 0, 1F)), + Vector128.Create(Numerics.Clamp(Unsafe.Add(ref amountBase, 1), 0, 1F))); destinationBase = PorterDuffFunctions.AddDestOver(backgroundBase, sourceBase, opacity); destinationBase = ref Unsafe.Add(ref destinationBase, 1); @@ -6632,7 +6896,11 @@ internal static class DefaultPixelBlenders while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) { // TODO: It would be better if we can clamp this outside of the loop using our SIMD methods. - Vector256 opacity = Vector256.Create(Numerics.Clamp(amountBase, 0, 1F)); + // We need to create a Vector256 containing the current and next amount values + // taking up each half of the Vector256 and then clamp them. + Vector256 opacity = Vector256.Create( + Vector128.Create(Numerics.Clamp(amountBase, 0, 1F)), + Vector128.Create(Numerics.Clamp(Unsafe.Add(ref amountBase, 1), 0, 1F))); destinationBase = PorterDuffFunctions.SubtractDestOver(backgroundBase, sourceBase, opacity); destinationBase = ref Unsafe.Add(ref destinationBase, 1); @@ -6731,7 +6999,11 @@ internal static class DefaultPixelBlenders while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) { // TODO: It would be better if we can clamp this outside of the loop using our SIMD methods. - Vector256 opacity = Vector256.Create(Numerics.Clamp(amountBase, 0, 1F)); + // We need to create a Vector256 containing the current and next amount values + // taking up each half of the Vector256 and then clamp them. + Vector256 opacity = Vector256.Create( + Vector128.Create(Numerics.Clamp(amountBase, 0, 1F)), + Vector128.Create(Numerics.Clamp(Unsafe.Add(ref amountBase, 1), 0, 1F))); destinationBase = PorterDuffFunctions.ScreenDestOver(backgroundBase, sourceBase, opacity); destinationBase = ref Unsafe.Add(ref destinationBase, 1); @@ -6830,7 +7102,11 @@ internal static class DefaultPixelBlenders while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) { // TODO: It would be better if we can clamp this outside of the loop using our SIMD methods. - Vector256 opacity = Vector256.Create(Numerics.Clamp(amountBase, 0, 1F)); + // We need to create a Vector256 containing the current and next amount values + // taking up each half of the Vector256 and then clamp them. + Vector256 opacity = Vector256.Create( + Vector128.Create(Numerics.Clamp(amountBase, 0, 1F)), + Vector128.Create(Numerics.Clamp(Unsafe.Add(ref amountBase, 1), 0, 1F))); destinationBase = PorterDuffFunctions.DarkenDestOver(backgroundBase, sourceBase, opacity); destinationBase = ref Unsafe.Add(ref destinationBase, 1); @@ -6929,7 +7205,11 @@ internal static class DefaultPixelBlenders while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) { // TODO: It would be better if we can clamp this outside of the loop using our SIMD methods. - Vector256 opacity = Vector256.Create(Numerics.Clamp(amountBase, 0, 1F)); + // We need to create a Vector256 containing the current and next amount values + // taking up each half of the Vector256 and then clamp them. + Vector256 opacity = Vector256.Create( + Vector128.Create(Numerics.Clamp(amountBase, 0, 1F)), + Vector128.Create(Numerics.Clamp(Unsafe.Add(ref amountBase, 1), 0, 1F))); destinationBase = PorterDuffFunctions.LightenDestOver(backgroundBase, sourceBase, opacity); destinationBase = ref Unsafe.Add(ref destinationBase, 1); @@ -7028,7 +7308,11 @@ internal static class DefaultPixelBlenders while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) { // TODO: It would be better if we can clamp this outside of the loop using our SIMD methods. - Vector256 opacity = Vector256.Create(Numerics.Clamp(amountBase, 0, 1F)); + // We need to create a Vector256 containing the current and next amount values + // taking up each half of the Vector256 and then clamp them. + Vector256 opacity = Vector256.Create( + Vector128.Create(Numerics.Clamp(amountBase, 0, 1F)), + Vector128.Create(Numerics.Clamp(Unsafe.Add(ref amountBase, 1), 0, 1F))); destinationBase = PorterDuffFunctions.OverlayDestOver(backgroundBase, sourceBase, opacity); destinationBase = ref Unsafe.Add(ref destinationBase, 1); @@ -7127,7 +7411,11 @@ internal static class DefaultPixelBlenders while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) { // TODO: It would be better if we can clamp this outside of the loop using our SIMD methods. - Vector256 opacity = Vector256.Create(Numerics.Clamp(amountBase, 0, 1F)); + // We need to create a Vector256 containing the current and next amount values + // taking up each half of the Vector256 and then clamp them. + Vector256 opacity = Vector256.Create( + Vector128.Create(Numerics.Clamp(amountBase, 0, 1F)), + Vector128.Create(Numerics.Clamp(Unsafe.Add(ref amountBase, 1), 0, 1F))); destinationBase = PorterDuffFunctions.HardLightDestOver(backgroundBase, sourceBase, opacity); destinationBase = ref Unsafe.Add(ref destinationBase, 1); @@ -7226,7 +7514,11 @@ internal static class DefaultPixelBlenders while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) { // TODO: It would be better if we can clamp this outside of the loop using our SIMD methods. - Vector256 opacity = Vector256.Create(Numerics.Clamp(amountBase, 0, 1F)); + // We need to create a Vector256 containing the current and next amount values + // taking up each half of the Vector256 and then clamp them. + Vector256 opacity = Vector256.Create( + Vector128.Create(Numerics.Clamp(amountBase, 0, 1F)), + Vector128.Create(Numerics.Clamp(Unsafe.Add(ref amountBase, 1), 0, 1F))); destinationBase = PorterDuffFunctions.NormalDestIn(backgroundBase, sourceBase, opacity); destinationBase = ref Unsafe.Add(ref destinationBase, 1); @@ -7325,7 +7617,11 @@ internal static class DefaultPixelBlenders while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) { // TODO: It would be better if we can clamp this outside of the loop using our SIMD methods. - Vector256 opacity = Vector256.Create(Numerics.Clamp(amountBase, 0, 1F)); + // We need to create a Vector256 containing the current and next amount values + // taking up each half of the Vector256 and then clamp them. + Vector256 opacity = Vector256.Create( + Vector128.Create(Numerics.Clamp(amountBase, 0, 1F)), + Vector128.Create(Numerics.Clamp(Unsafe.Add(ref amountBase, 1), 0, 1F))); destinationBase = PorterDuffFunctions.MultiplyDestIn(backgroundBase, sourceBase, opacity); destinationBase = ref Unsafe.Add(ref destinationBase, 1); @@ -7424,7 +7720,11 @@ internal static class DefaultPixelBlenders while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) { // TODO: It would be better if we can clamp this outside of the loop using our SIMD methods. - Vector256 opacity = Vector256.Create(Numerics.Clamp(amountBase, 0, 1F)); + // We need to create a Vector256 containing the current and next amount values + // taking up each half of the Vector256 and then clamp them. + Vector256 opacity = Vector256.Create( + Vector128.Create(Numerics.Clamp(amountBase, 0, 1F)), + Vector128.Create(Numerics.Clamp(Unsafe.Add(ref amountBase, 1), 0, 1F))); destinationBase = PorterDuffFunctions.AddDestIn(backgroundBase, sourceBase, opacity); destinationBase = ref Unsafe.Add(ref destinationBase, 1); @@ -7523,7 +7823,11 @@ internal static class DefaultPixelBlenders while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) { // TODO: It would be better if we can clamp this outside of the loop using our SIMD methods. - Vector256 opacity = Vector256.Create(Numerics.Clamp(amountBase, 0, 1F)); + // We need to create a Vector256 containing the current and next amount values + // taking up each half of the Vector256 and then clamp them. + Vector256 opacity = Vector256.Create( + Vector128.Create(Numerics.Clamp(amountBase, 0, 1F)), + Vector128.Create(Numerics.Clamp(Unsafe.Add(ref amountBase, 1), 0, 1F))); destinationBase = PorterDuffFunctions.SubtractDestIn(backgroundBase, sourceBase, opacity); destinationBase = ref Unsafe.Add(ref destinationBase, 1); @@ -7622,7 +7926,11 @@ internal static class DefaultPixelBlenders while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) { // TODO: It would be better if we can clamp this outside of the loop using our SIMD methods. - Vector256 opacity = Vector256.Create(Numerics.Clamp(amountBase, 0, 1F)); + // We need to create a Vector256 containing the current and next amount values + // taking up each half of the Vector256 and then clamp them. + Vector256 opacity = Vector256.Create( + Vector128.Create(Numerics.Clamp(amountBase, 0, 1F)), + Vector128.Create(Numerics.Clamp(Unsafe.Add(ref amountBase, 1), 0, 1F))); destinationBase = PorterDuffFunctions.ScreenDestIn(backgroundBase, sourceBase, opacity); destinationBase = ref Unsafe.Add(ref destinationBase, 1); @@ -7721,7 +8029,11 @@ internal static class DefaultPixelBlenders while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) { // TODO: It would be better if we can clamp this outside of the loop using our SIMD methods. - Vector256 opacity = Vector256.Create(Numerics.Clamp(amountBase, 0, 1F)); + // We need to create a Vector256 containing the current and next amount values + // taking up each half of the Vector256 and then clamp them. + Vector256 opacity = Vector256.Create( + Vector128.Create(Numerics.Clamp(amountBase, 0, 1F)), + Vector128.Create(Numerics.Clamp(Unsafe.Add(ref amountBase, 1), 0, 1F))); destinationBase = PorterDuffFunctions.DarkenDestIn(backgroundBase, sourceBase, opacity); destinationBase = ref Unsafe.Add(ref destinationBase, 1); @@ -7820,7 +8132,11 @@ internal static class DefaultPixelBlenders while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) { // TODO: It would be better if we can clamp this outside of the loop using our SIMD methods. - Vector256 opacity = Vector256.Create(Numerics.Clamp(amountBase, 0, 1F)); + // We need to create a Vector256 containing the current and next amount values + // taking up each half of the Vector256 and then clamp them. + Vector256 opacity = Vector256.Create( + Vector128.Create(Numerics.Clamp(amountBase, 0, 1F)), + Vector128.Create(Numerics.Clamp(Unsafe.Add(ref amountBase, 1), 0, 1F))); destinationBase = PorterDuffFunctions.LightenDestIn(backgroundBase, sourceBase, opacity); destinationBase = ref Unsafe.Add(ref destinationBase, 1); @@ -7919,7 +8235,11 @@ internal static class DefaultPixelBlenders while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) { // TODO: It would be better if we can clamp this outside of the loop using our SIMD methods. - Vector256 opacity = Vector256.Create(Numerics.Clamp(amountBase, 0, 1F)); + // We need to create a Vector256 containing the current and next amount values + // taking up each half of the Vector256 and then clamp them. + Vector256 opacity = Vector256.Create( + Vector128.Create(Numerics.Clamp(amountBase, 0, 1F)), + Vector128.Create(Numerics.Clamp(Unsafe.Add(ref amountBase, 1), 0, 1F))); destinationBase = PorterDuffFunctions.OverlayDestIn(backgroundBase, sourceBase, opacity); destinationBase = ref Unsafe.Add(ref destinationBase, 1); @@ -8018,7 +8338,11 @@ internal static class DefaultPixelBlenders while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) { // TODO: It would be better if we can clamp this outside of the loop using our SIMD methods. - Vector256 opacity = Vector256.Create(Numerics.Clamp(amountBase, 0, 1F)); + // We need to create a Vector256 containing the current and next amount values + // taking up each half of the Vector256 and then clamp them. + Vector256 opacity = Vector256.Create( + Vector128.Create(Numerics.Clamp(amountBase, 0, 1F)), + Vector128.Create(Numerics.Clamp(Unsafe.Add(ref amountBase, 1), 0, 1F))); destinationBase = PorterDuffFunctions.HardLightDestIn(backgroundBase, sourceBase, opacity); destinationBase = ref Unsafe.Add(ref destinationBase, 1); @@ -8117,7 +8441,11 @@ internal static class DefaultPixelBlenders while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) { // TODO: It would be better if we can clamp this outside of the loop using our SIMD methods. - Vector256 opacity = Vector256.Create(Numerics.Clamp(amountBase, 0, 1F)); + // We need to create a Vector256 containing the current and next amount values + // taking up each half of the Vector256 and then clamp them. + Vector256 opacity = Vector256.Create( + Vector128.Create(Numerics.Clamp(amountBase, 0, 1F)), + Vector128.Create(Numerics.Clamp(Unsafe.Add(ref amountBase, 1), 0, 1F))); destinationBase = PorterDuffFunctions.NormalDestOut(backgroundBase, sourceBase, opacity); destinationBase = ref Unsafe.Add(ref destinationBase, 1); @@ -8216,7 +8544,11 @@ internal static class DefaultPixelBlenders while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) { // TODO: It would be better if we can clamp this outside of the loop using our SIMD methods. - Vector256 opacity = Vector256.Create(Numerics.Clamp(amountBase, 0, 1F)); + // We need to create a Vector256 containing the current and next amount values + // taking up each half of the Vector256 and then clamp them. + Vector256 opacity = Vector256.Create( + Vector128.Create(Numerics.Clamp(amountBase, 0, 1F)), + Vector128.Create(Numerics.Clamp(Unsafe.Add(ref amountBase, 1), 0, 1F))); destinationBase = PorterDuffFunctions.MultiplyDestOut(backgroundBase, sourceBase, opacity); destinationBase = ref Unsafe.Add(ref destinationBase, 1); @@ -8315,7 +8647,11 @@ internal static class DefaultPixelBlenders while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) { // TODO: It would be better if we can clamp this outside of the loop using our SIMD methods. - Vector256 opacity = Vector256.Create(Numerics.Clamp(amountBase, 0, 1F)); + // We need to create a Vector256 containing the current and next amount values + // taking up each half of the Vector256 and then clamp them. + Vector256 opacity = Vector256.Create( + Vector128.Create(Numerics.Clamp(amountBase, 0, 1F)), + Vector128.Create(Numerics.Clamp(Unsafe.Add(ref amountBase, 1), 0, 1F))); destinationBase = PorterDuffFunctions.AddDestOut(backgroundBase, sourceBase, opacity); destinationBase = ref Unsafe.Add(ref destinationBase, 1); @@ -8414,7 +8750,11 @@ internal static class DefaultPixelBlenders while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) { // TODO: It would be better if we can clamp this outside of the loop using our SIMD methods. - Vector256 opacity = Vector256.Create(Numerics.Clamp(amountBase, 0, 1F)); + // We need to create a Vector256 containing the current and next amount values + // taking up each half of the Vector256 and then clamp them. + Vector256 opacity = Vector256.Create( + Vector128.Create(Numerics.Clamp(amountBase, 0, 1F)), + Vector128.Create(Numerics.Clamp(Unsafe.Add(ref amountBase, 1), 0, 1F))); destinationBase = PorterDuffFunctions.SubtractDestOut(backgroundBase, sourceBase, opacity); destinationBase = ref Unsafe.Add(ref destinationBase, 1); @@ -8513,7 +8853,11 @@ internal static class DefaultPixelBlenders while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) { // TODO: It would be better if we can clamp this outside of the loop using our SIMD methods. - Vector256 opacity = Vector256.Create(Numerics.Clamp(amountBase, 0, 1F)); + // We need to create a Vector256 containing the current and next amount values + // taking up each half of the Vector256 and then clamp them. + Vector256 opacity = Vector256.Create( + Vector128.Create(Numerics.Clamp(amountBase, 0, 1F)), + Vector128.Create(Numerics.Clamp(Unsafe.Add(ref amountBase, 1), 0, 1F))); destinationBase = PorterDuffFunctions.ScreenDestOut(backgroundBase, sourceBase, opacity); destinationBase = ref Unsafe.Add(ref destinationBase, 1); @@ -8612,7 +8956,11 @@ internal static class DefaultPixelBlenders while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) { // TODO: It would be better if we can clamp this outside of the loop using our SIMD methods. - Vector256 opacity = Vector256.Create(Numerics.Clamp(amountBase, 0, 1F)); + // We need to create a Vector256 containing the current and next amount values + // taking up each half of the Vector256 and then clamp them. + Vector256 opacity = Vector256.Create( + Vector128.Create(Numerics.Clamp(amountBase, 0, 1F)), + Vector128.Create(Numerics.Clamp(Unsafe.Add(ref amountBase, 1), 0, 1F))); destinationBase = PorterDuffFunctions.DarkenDestOut(backgroundBase, sourceBase, opacity); destinationBase = ref Unsafe.Add(ref destinationBase, 1); @@ -8711,7 +9059,11 @@ internal static class DefaultPixelBlenders while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) { // TODO: It would be better if we can clamp this outside of the loop using our SIMD methods. - Vector256 opacity = Vector256.Create(Numerics.Clamp(amountBase, 0, 1F)); + // We need to create a Vector256 containing the current and next amount values + // taking up each half of the Vector256 and then clamp them. + Vector256 opacity = Vector256.Create( + Vector128.Create(Numerics.Clamp(amountBase, 0, 1F)), + Vector128.Create(Numerics.Clamp(Unsafe.Add(ref amountBase, 1), 0, 1F))); destinationBase = PorterDuffFunctions.LightenDestOut(backgroundBase, sourceBase, opacity); destinationBase = ref Unsafe.Add(ref destinationBase, 1); @@ -8810,7 +9162,11 @@ internal static class DefaultPixelBlenders while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) { // TODO: It would be better if we can clamp this outside of the loop using our SIMD methods. - Vector256 opacity = Vector256.Create(Numerics.Clamp(amountBase, 0, 1F)); + // We need to create a Vector256 containing the current and next amount values + // taking up each half of the Vector256 and then clamp them. + Vector256 opacity = Vector256.Create( + Vector128.Create(Numerics.Clamp(amountBase, 0, 1F)), + Vector128.Create(Numerics.Clamp(Unsafe.Add(ref amountBase, 1), 0, 1F))); destinationBase = PorterDuffFunctions.OverlayDestOut(backgroundBase, sourceBase, opacity); destinationBase = ref Unsafe.Add(ref destinationBase, 1); @@ -8909,7 +9265,11 @@ internal static class DefaultPixelBlenders while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) { // TODO: It would be better if we can clamp this outside of the loop using our SIMD methods. - Vector256 opacity = Vector256.Create(Numerics.Clamp(amountBase, 0, 1F)); + // We need to create a Vector256 containing the current and next amount values + // taking up each half of the Vector256 and then clamp them. + Vector256 opacity = Vector256.Create( + Vector128.Create(Numerics.Clamp(amountBase, 0, 1F)), + Vector128.Create(Numerics.Clamp(Unsafe.Add(ref amountBase, 1), 0, 1F))); destinationBase = PorterDuffFunctions.HardLightDestOut(backgroundBase, sourceBase, opacity); destinationBase = ref Unsafe.Add(ref destinationBase, 1); @@ -9008,7 +9368,11 @@ internal static class DefaultPixelBlenders while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) { // TODO: It would be better if we can clamp this outside of the loop using our SIMD methods. - Vector256 opacity = Vector256.Create(Numerics.Clamp(amountBase, 0, 1F)); + // We need to create a Vector256 containing the current and next amount values + // taking up each half of the Vector256 and then clamp them. + Vector256 opacity = Vector256.Create( + Vector128.Create(Numerics.Clamp(amountBase, 0, 1F)), + Vector128.Create(Numerics.Clamp(Unsafe.Add(ref amountBase, 1), 0, 1F))); destinationBase = PorterDuffFunctions.NormalClear(backgroundBase, sourceBase, opacity); destinationBase = ref Unsafe.Add(ref destinationBase, 1); @@ -9107,7 +9471,11 @@ internal static class DefaultPixelBlenders while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) { // TODO: It would be better if we can clamp this outside of the loop using our SIMD methods. - Vector256 opacity = Vector256.Create(Numerics.Clamp(amountBase, 0, 1F)); + // We need to create a Vector256 containing the current and next amount values + // taking up each half of the Vector256 and then clamp them. + Vector256 opacity = Vector256.Create( + Vector128.Create(Numerics.Clamp(amountBase, 0, 1F)), + Vector128.Create(Numerics.Clamp(Unsafe.Add(ref amountBase, 1), 0, 1F))); destinationBase = PorterDuffFunctions.MultiplyClear(backgroundBase, sourceBase, opacity); destinationBase = ref Unsafe.Add(ref destinationBase, 1); @@ -9206,7 +9574,11 @@ internal static class DefaultPixelBlenders while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) { // TODO: It would be better if we can clamp this outside of the loop using our SIMD methods. - Vector256 opacity = Vector256.Create(Numerics.Clamp(amountBase, 0, 1F)); + // We need to create a Vector256 containing the current and next amount values + // taking up each half of the Vector256 and then clamp them. + Vector256 opacity = Vector256.Create( + Vector128.Create(Numerics.Clamp(amountBase, 0, 1F)), + Vector128.Create(Numerics.Clamp(Unsafe.Add(ref amountBase, 1), 0, 1F))); destinationBase = PorterDuffFunctions.AddClear(backgroundBase, sourceBase, opacity); destinationBase = ref Unsafe.Add(ref destinationBase, 1); @@ -9305,7 +9677,11 @@ internal static class DefaultPixelBlenders while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) { // TODO: It would be better if we can clamp this outside of the loop using our SIMD methods. - Vector256 opacity = Vector256.Create(Numerics.Clamp(amountBase, 0, 1F)); + // We need to create a Vector256 containing the current and next amount values + // taking up each half of the Vector256 and then clamp them. + Vector256 opacity = Vector256.Create( + Vector128.Create(Numerics.Clamp(amountBase, 0, 1F)), + Vector128.Create(Numerics.Clamp(Unsafe.Add(ref amountBase, 1), 0, 1F))); destinationBase = PorterDuffFunctions.SubtractClear(backgroundBase, sourceBase, opacity); destinationBase = ref Unsafe.Add(ref destinationBase, 1); @@ -9404,7 +9780,11 @@ internal static class DefaultPixelBlenders while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) { // TODO: It would be better if we can clamp this outside of the loop using our SIMD methods. - Vector256 opacity = Vector256.Create(Numerics.Clamp(amountBase, 0, 1F)); + // We need to create a Vector256 containing the current and next amount values + // taking up each half of the Vector256 and then clamp them. + Vector256 opacity = Vector256.Create( + Vector128.Create(Numerics.Clamp(amountBase, 0, 1F)), + Vector128.Create(Numerics.Clamp(Unsafe.Add(ref amountBase, 1), 0, 1F))); destinationBase = PorterDuffFunctions.ScreenClear(backgroundBase, sourceBase, opacity); destinationBase = ref Unsafe.Add(ref destinationBase, 1); @@ -9503,7 +9883,11 @@ internal static class DefaultPixelBlenders while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) { // TODO: It would be better if we can clamp this outside of the loop using our SIMD methods. - Vector256 opacity = Vector256.Create(Numerics.Clamp(amountBase, 0, 1F)); + // We need to create a Vector256 containing the current and next amount values + // taking up each half of the Vector256 and then clamp them. + Vector256 opacity = Vector256.Create( + Vector128.Create(Numerics.Clamp(amountBase, 0, 1F)), + Vector128.Create(Numerics.Clamp(Unsafe.Add(ref amountBase, 1), 0, 1F))); destinationBase = PorterDuffFunctions.DarkenClear(backgroundBase, sourceBase, opacity); destinationBase = ref Unsafe.Add(ref destinationBase, 1); @@ -9602,7 +9986,11 @@ internal static class DefaultPixelBlenders while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) { // TODO: It would be better if we can clamp this outside of the loop using our SIMD methods. - Vector256 opacity = Vector256.Create(Numerics.Clamp(amountBase, 0, 1F)); + // We need to create a Vector256 containing the current and next amount values + // taking up each half of the Vector256 and then clamp them. + Vector256 opacity = Vector256.Create( + Vector128.Create(Numerics.Clamp(amountBase, 0, 1F)), + Vector128.Create(Numerics.Clamp(Unsafe.Add(ref amountBase, 1), 0, 1F))); destinationBase = PorterDuffFunctions.LightenClear(backgroundBase, sourceBase, opacity); destinationBase = ref Unsafe.Add(ref destinationBase, 1); @@ -9701,7 +10089,11 @@ internal static class DefaultPixelBlenders while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) { // TODO: It would be better if we can clamp this outside of the loop using our SIMD methods. - Vector256 opacity = Vector256.Create(Numerics.Clamp(amountBase, 0, 1F)); + // We need to create a Vector256 containing the current and next amount values + // taking up each half of the Vector256 and then clamp them. + Vector256 opacity = Vector256.Create( + Vector128.Create(Numerics.Clamp(amountBase, 0, 1F)), + Vector128.Create(Numerics.Clamp(Unsafe.Add(ref amountBase, 1), 0, 1F))); destinationBase = PorterDuffFunctions.OverlayClear(backgroundBase, sourceBase, opacity); destinationBase = ref Unsafe.Add(ref destinationBase, 1); @@ -9800,7 +10192,11 @@ internal static class DefaultPixelBlenders while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) { // TODO: It would be better if we can clamp this outside of the loop using our SIMD methods. - Vector256 opacity = Vector256.Create(Numerics.Clamp(amountBase, 0, 1F)); + // We need to create a Vector256 containing the current and next amount values + // taking up each half of the Vector256 and then clamp them. + Vector256 opacity = Vector256.Create( + Vector128.Create(Numerics.Clamp(amountBase, 0, 1F)), + Vector128.Create(Numerics.Clamp(Unsafe.Add(ref amountBase, 1), 0, 1F))); destinationBase = PorterDuffFunctions.HardLightClear(backgroundBase, sourceBase, opacity); destinationBase = ref Unsafe.Add(ref destinationBase, 1); @@ -9899,7 +10295,11 @@ internal static class DefaultPixelBlenders while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) { // TODO: It would be better if we can clamp this outside of the loop using our SIMD methods. - Vector256 opacity = Vector256.Create(Numerics.Clamp(amountBase, 0, 1F)); + // We need to create a Vector256 containing the current and next amount values + // taking up each half of the Vector256 and then clamp them. + Vector256 opacity = Vector256.Create( + Vector128.Create(Numerics.Clamp(amountBase, 0, 1F)), + Vector128.Create(Numerics.Clamp(Unsafe.Add(ref amountBase, 1), 0, 1F))); destinationBase = PorterDuffFunctions.NormalXor(backgroundBase, sourceBase, opacity); destinationBase = ref Unsafe.Add(ref destinationBase, 1); @@ -9998,7 +10398,11 @@ internal static class DefaultPixelBlenders while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) { // TODO: It would be better if we can clamp this outside of the loop using our SIMD methods. - Vector256 opacity = Vector256.Create(Numerics.Clamp(amountBase, 0, 1F)); + // We need to create a Vector256 containing the current and next amount values + // taking up each half of the Vector256 and then clamp them. + Vector256 opacity = Vector256.Create( + Vector128.Create(Numerics.Clamp(amountBase, 0, 1F)), + Vector128.Create(Numerics.Clamp(Unsafe.Add(ref amountBase, 1), 0, 1F))); destinationBase = PorterDuffFunctions.MultiplyXor(backgroundBase, sourceBase, opacity); destinationBase = ref Unsafe.Add(ref destinationBase, 1); @@ -10097,7 +10501,11 @@ internal static class DefaultPixelBlenders while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) { // TODO: It would be better if we can clamp this outside of the loop using our SIMD methods. - Vector256 opacity = Vector256.Create(Numerics.Clamp(amountBase, 0, 1F)); + // We need to create a Vector256 containing the current and next amount values + // taking up each half of the Vector256 and then clamp them. + Vector256 opacity = Vector256.Create( + Vector128.Create(Numerics.Clamp(amountBase, 0, 1F)), + Vector128.Create(Numerics.Clamp(Unsafe.Add(ref amountBase, 1), 0, 1F))); destinationBase = PorterDuffFunctions.AddXor(backgroundBase, sourceBase, opacity); destinationBase = ref Unsafe.Add(ref destinationBase, 1); @@ -10196,7 +10604,11 @@ internal static class DefaultPixelBlenders while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) { // TODO: It would be better if we can clamp this outside of the loop using our SIMD methods. - Vector256 opacity = Vector256.Create(Numerics.Clamp(amountBase, 0, 1F)); + // We need to create a Vector256 containing the current and next amount values + // taking up each half of the Vector256 and then clamp them. + Vector256 opacity = Vector256.Create( + Vector128.Create(Numerics.Clamp(amountBase, 0, 1F)), + Vector128.Create(Numerics.Clamp(Unsafe.Add(ref amountBase, 1), 0, 1F))); destinationBase = PorterDuffFunctions.SubtractXor(backgroundBase, sourceBase, opacity); destinationBase = ref Unsafe.Add(ref destinationBase, 1); @@ -10295,7 +10707,11 @@ internal static class DefaultPixelBlenders while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) { // TODO: It would be better if we can clamp this outside of the loop using our SIMD methods. - Vector256 opacity = Vector256.Create(Numerics.Clamp(amountBase, 0, 1F)); + // We need to create a Vector256 containing the current and next amount values + // taking up each half of the Vector256 and then clamp them. + Vector256 opacity = Vector256.Create( + Vector128.Create(Numerics.Clamp(amountBase, 0, 1F)), + Vector128.Create(Numerics.Clamp(Unsafe.Add(ref amountBase, 1), 0, 1F))); destinationBase = PorterDuffFunctions.ScreenXor(backgroundBase, sourceBase, opacity); destinationBase = ref Unsafe.Add(ref destinationBase, 1); @@ -10394,7 +10810,11 @@ internal static class DefaultPixelBlenders while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) { // TODO: It would be better if we can clamp this outside of the loop using our SIMD methods. - Vector256 opacity = Vector256.Create(Numerics.Clamp(amountBase, 0, 1F)); + // We need to create a Vector256 containing the current and next amount values + // taking up each half of the Vector256 and then clamp them. + Vector256 opacity = Vector256.Create( + Vector128.Create(Numerics.Clamp(amountBase, 0, 1F)), + Vector128.Create(Numerics.Clamp(Unsafe.Add(ref amountBase, 1), 0, 1F))); destinationBase = PorterDuffFunctions.DarkenXor(backgroundBase, sourceBase, opacity); destinationBase = ref Unsafe.Add(ref destinationBase, 1); @@ -10493,7 +10913,11 @@ internal static class DefaultPixelBlenders while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) { // TODO: It would be better if we can clamp this outside of the loop using our SIMD methods. - Vector256 opacity = Vector256.Create(Numerics.Clamp(amountBase, 0, 1F)); + // We need to create a Vector256 containing the current and next amount values + // taking up each half of the Vector256 and then clamp them. + Vector256 opacity = Vector256.Create( + Vector128.Create(Numerics.Clamp(amountBase, 0, 1F)), + Vector128.Create(Numerics.Clamp(Unsafe.Add(ref amountBase, 1), 0, 1F))); destinationBase = PorterDuffFunctions.LightenXor(backgroundBase, sourceBase, opacity); destinationBase = ref Unsafe.Add(ref destinationBase, 1); @@ -10592,7 +11016,11 @@ internal static class DefaultPixelBlenders while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) { // TODO: It would be better if we can clamp this outside of the loop using our SIMD methods. - Vector256 opacity = Vector256.Create(Numerics.Clamp(amountBase, 0, 1F)); + // We need to create a Vector256 containing the current and next amount values + // taking up each half of the Vector256 and then clamp them. + Vector256 opacity = Vector256.Create( + Vector128.Create(Numerics.Clamp(amountBase, 0, 1F)), + Vector128.Create(Numerics.Clamp(Unsafe.Add(ref amountBase, 1), 0, 1F))); destinationBase = PorterDuffFunctions.OverlayXor(backgroundBase, sourceBase, opacity); destinationBase = ref Unsafe.Add(ref destinationBase, 1); @@ -10691,7 +11119,11 @@ internal static class DefaultPixelBlenders while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) { // TODO: It would be better if we can clamp this outside of the loop using our SIMD methods. - Vector256 opacity = Vector256.Create(Numerics.Clamp(amountBase, 0, 1F)); + // We need to create a Vector256 containing the current and next amount values + // taking up each half of the Vector256 and then clamp them. + Vector256 opacity = Vector256.Create( + Vector128.Create(Numerics.Clamp(amountBase, 0, 1F)), + Vector128.Create(Numerics.Clamp(Unsafe.Add(ref amountBase, 1), 0, 1F))); destinationBase = PorterDuffFunctions.HardLightXor(backgroundBase, sourceBase, opacity); destinationBase = ref Unsafe.Add(ref destinationBase, 1); diff --git a/src/ImageSharp/PixelFormats/PixelBlenders/DefaultPixelBlenders.Generated.tt b/src/ImageSharp/PixelFormats/PixelBlenders/DefaultPixelBlenders.Generated.tt index 6d98c6cd9..07165a9b5 100644 --- a/src/ImageSharp/PixelFormats/PixelBlenders/DefaultPixelBlenders.Generated.tt +++ b/src/ImageSharp/PixelFormats/PixelBlenders/DefaultPixelBlenders.Generated.tt @@ -141,7 +141,11 @@ var blenders = new []{ while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) { // TODO: It would be better if we can clamp this outside of the loop using our SIMD methods. - Vector256 opacity = Vector256.Create(Numerics.Clamp(amountBase, 0, 1F)); + // We need to create a Vector256 containing the current and next amount values + // taking up each half of the Vector256 and then clamp them. + Vector256 opacity = Vector256.Create( + Vector128.Create(Numerics.Clamp(amountBase, 0, 1F)), + Vector128.Create(Numerics.Clamp(Unsafe.Add(ref amountBase, 1), 0, 1F))); destinationBase = PorterDuffFunctions.<#=blender_composer#>(backgroundBase, sourceBase, opacity); destinationBase = ref Unsafe.Add(ref destinationBase, 1); From 6cb6bd41f1fc3159259256a2a0372387fe0cc425 Mon Sep 17 00:00:00 2001 From: James Jackson-South Date: Sun, 19 Feb 2023 21:20:38 +1000 Subject: [PATCH 14/22] Better clamp, fix offset (again) --- .../DefaultPixelBlenders.Generated.cs | 1080 ++++++++++------- .../DefaultPixelBlenders.Generated.tt | 10 +- 2 files changed, 654 insertions(+), 436 deletions(-) diff --git a/src/ImageSharp/PixelFormats/PixelBlenders/DefaultPixelBlenders.Generated.cs b/src/ImageSharp/PixelFormats/PixelBlenders/DefaultPixelBlenders.Generated.cs index f28fba25c..2db61a06f 100644 --- a/src/ImageSharp/PixelFormats/PixelBlenders/DefaultPixelBlenders.Generated.cs +++ b/src/ImageSharp/PixelFormats/PixelBlenders/DefaultPixelBlenders.Generated.cs @@ -95,20 +95,22 @@ internal static class DefaultPixelBlenders ref Vector256 sourceBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(source)); ref float amountBase = ref MemoryMarshal.GetReference(amount); + Vector256 vOne = Vector256.Create(1F); + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) { - // TODO: It would be better if we can clamp this outside of the loop using our SIMD methods. // We need to create a Vector256 containing the current and next amount values // taking up each half of the Vector256 and then clamp them. Vector256 opacity = Vector256.Create( - Vector128.Create(Numerics.Clamp(amountBase, 0, 1F)), - Vector128.Create(Numerics.Clamp(Unsafe.Add(ref amountBase, 1), 0, 1F))); + Vector128.Create(amountBase), + Vector128.Create(Unsafe.Add(ref amountBase, 1))); + opacity = Avx.Min(Avx.Max(Vector256.Zero, opacity), vOne); destinationBase = PorterDuffFunctions.NormalSrc(backgroundBase, sourceBase, opacity); destinationBase = ref Unsafe.Add(ref destinationBase, 1); backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); sourceBase = ref Unsafe.Add(ref sourceBase, 1); - amountBase = ref Unsafe.Add(ref amountBase, 1); + amountBase = ref Unsafe.Add(ref amountBase, 2); } if (Numerics.Modulo2(destination.Length) != 0) @@ -198,20 +200,22 @@ internal static class DefaultPixelBlenders ref Vector256 sourceBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(source)); ref float amountBase = ref MemoryMarshal.GetReference(amount); + Vector256 vOne = Vector256.Create(1F); + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) { - // TODO: It would be better if we can clamp this outside of the loop using our SIMD methods. // We need to create a Vector256 containing the current and next amount values // taking up each half of the Vector256 and then clamp them. Vector256 opacity = Vector256.Create( - Vector128.Create(Numerics.Clamp(amountBase, 0, 1F)), - Vector128.Create(Numerics.Clamp(Unsafe.Add(ref amountBase, 1), 0, 1F))); + Vector128.Create(amountBase), + Vector128.Create(Unsafe.Add(ref amountBase, 1))); + opacity = Avx.Min(Avx.Max(Vector256.Zero, opacity), vOne); destinationBase = PorterDuffFunctions.MultiplySrc(backgroundBase, sourceBase, opacity); destinationBase = ref Unsafe.Add(ref destinationBase, 1); backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); sourceBase = ref Unsafe.Add(ref sourceBase, 1); - amountBase = ref Unsafe.Add(ref amountBase, 1); + amountBase = ref Unsafe.Add(ref amountBase, 2); } if (Numerics.Modulo2(destination.Length) != 0) @@ -301,20 +305,22 @@ internal static class DefaultPixelBlenders ref Vector256 sourceBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(source)); ref float amountBase = ref MemoryMarshal.GetReference(amount); + Vector256 vOne = Vector256.Create(1F); + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) { - // TODO: It would be better if we can clamp this outside of the loop using our SIMD methods. // We need to create a Vector256 containing the current and next amount values // taking up each half of the Vector256 and then clamp them. Vector256 opacity = Vector256.Create( - Vector128.Create(Numerics.Clamp(amountBase, 0, 1F)), - Vector128.Create(Numerics.Clamp(Unsafe.Add(ref amountBase, 1), 0, 1F))); + Vector128.Create(amountBase), + Vector128.Create(Unsafe.Add(ref amountBase, 1))); + opacity = Avx.Min(Avx.Max(Vector256.Zero, opacity), vOne); destinationBase = PorterDuffFunctions.AddSrc(backgroundBase, sourceBase, opacity); destinationBase = ref Unsafe.Add(ref destinationBase, 1); backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); sourceBase = ref Unsafe.Add(ref sourceBase, 1); - amountBase = ref Unsafe.Add(ref amountBase, 1); + amountBase = ref Unsafe.Add(ref amountBase, 2); } if (Numerics.Modulo2(destination.Length) != 0) @@ -404,20 +410,22 @@ internal static class DefaultPixelBlenders ref Vector256 sourceBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(source)); ref float amountBase = ref MemoryMarshal.GetReference(amount); + Vector256 vOne = Vector256.Create(1F); + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) { - // TODO: It would be better if we can clamp this outside of the loop using our SIMD methods. // We need to create a Vector256 containing the current and next amount values // taking up each half of the Vector256 and then clamp them. Vector256 opacity = Vector256.Create( - Vector128.Create(Numerics.Clamp(amountBase, 0, 1F)), - Vector128.Create(Numerics.Clamp(Unsafe.Add(ref amountBase, 1), 0, 1F))); + Vector128.Create(amountBase), + Vector128.Create(Unsafe.Add(ref amountBase, 1))); + opacity = Avx.Min(Avx.Max(Vector256.Zero, opacity), vOne); destinationBase = PorterDuffFunctions.SubtractSrc(backgroundBase, sourceBase, opacity); destinationBase = ref Unsafe.Add(ref destinationBase, 1); backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); sourceBase = ref Unsafe.Add(ref sourceBase, 1); - amountBase = ref Unsafe.Add(ref amountBase, 1); + amountBase = ref Unsafe.Add(ref amountBase, 2); } if (Numerics.Modulo2(destination.Length) != 0) @@ -507,20 +515,22 @@ internal static class DefaultPixelBlenders ref Vector256 sourceBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(source)); ref float amountBase = ref MemoryMarshal.GetReference(amount); + Vector256 vOne = Vector256.Create(1F); + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) { - // TODO: It would be better if we can clamp this outside of the loop using our SIMD methods. // We need to create a Vector256 containing the current and next amount values // taking up each half of the Vector256 and then clamp them. Vector256 opacity = Vector256.Create( - Vector128.Create(Numerics.Clamp(amountBase, 0, 1F)), - Vector128.Create(Numerics.Clamp(Unsafe.Add(ref amountBase, 1), 0, 1F))); + Vector128.Create(amountBase), + Vector128.Create(Unsafe.Add(ref amountBase, 1))); + opacity = Avx.Min(Avx.Max(Vector256.Zero, opacity), vOne); destinationBase = PorterDuffFunctions.ScreenSrc(backgroundBase, sourceBase, opacity); destinationBase = ref Unsafe.Add(ref destinationBase, 1); backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); sourceBase = ref Unsafe.Add(ref sourceBase, 1); - amountBase = ref Unsafe.Add(ref amountBase, 1); + amountBase = ref Unsafe.Add(ref amountBase, 2); } if (Numerics.Modulo2(destination.Length) != 0) @@ -610,20 +620,22 @@ internal static class DefaultPixelBlenders ref Vector256 sourceBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(source)); ref float amountBase = ref MemoryMarshal.GetReference(amount); + Vector256 vOne = Vector256.Create(1F); + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) { - // TODO: It would be better if we can clamp this outside of the loop using our SIMD methods. // We need to create a Vector256 containing the current and next amount values // taking up each half of the Vector256 and then clamp them. Vector256 opacity = Vector256.Create( - Vector128.Create(Numerics.Clamp(amountBase, 0, 1F)), - Vector128.Create(Numerics.Clamp(Unsafe.Add(ref amountBase, 1), 0, 1F))); + Vector128.Create(amountBase), + Vector128.Create(Unsafe.Add(ref amountBase, 1))); + opacity = Avx.Min(Avx.Max(Vector256.Zero, opacity), vOne); destinationBase = PorterDuffFunctions.DarkenSrc(backgroundBase, sourceBase, opacity); destinationBase = ref Unsafe.Add(ref destinationBase, 1); backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); sourceBase = ref Unsafe.Add(ref sourceBase, 1); - amountBase = ref Unsafe.Add(ref amountBase, 1); + amountBase = ref Unsafe.Add(ref amountBase, 2); } if (Numerics.Modulo2(destination.Length) != 0) @@ -713,20 +725,22 @@ internal static class DefaultPixelBlenders ref Vector256 sourceBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(source)); ref float amountBase = ref MemoryMarshal.GetReference(amount); + Vector256 vOne = Vector256.Create(1F); + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) { - // TODO: It would be better if we can clamp this outside of the loop using our SIMD methods. // We need to create a Vector256 containing the current and next amount values // taking up each half of the Vector256 and then clamp them. Vector256 opacity = Vector256.Create( - Vector128.Create(Numerics.Clamp(amountBase, 0, 1F)), - Vector128.Create(Numerics.Clamp(Unsafe.Add(ref amountBase, 1), 0, 1F))); + Vector128.Create(amountBase), + Vector128.Create(Unsafe.Add(ref amountBase, 1))); + opacity = Avx.Min(Avx.Max(Vector256.Zero, opacity), vOne); destinationBase = PorterDuffFunctions.LightenSrc(backgroundBase, sourceBase, opacity); destinationBase = ref Unsafe.Add(ref destinationBase, 1); backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); sourceBase = ref Unsafe.Add(ref sourceBase, 1); - amountBase = ref Unsafe.Add(ref amountBase, 1); + amountBase = ref Unsafe.Add(ref amountBase, 2); } if (Numerics.Modulo2(destination.Length) != 0) @@ -816,20 +830,22 @@ internal static class DefaultPixelBlenders ref Vector256 sourceBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(source)); ref float amountBase = ref MemoryMarshal.GetReference(amount); + Vector256 vOne = Vector256.Create(1F); + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) { - // TODO: It would be better if we can clamp this outside of the loop using our SIMD methods. // We need to create a Vector256 containing the current and next amount values // taking up each half of the Vector256 and then clamp them. Vector256 opacity = Vector256.Create( - Vector128.Create(Numerics.Clamp(amountBase, 0, 1F)), - Vector128.Create(Numerics.Clamp(Unsafe.Add(ref amountBase, 1), 0, 1F))); + Vector128.Create(amountBase), + Vector128.Create(Unsafe.Add(ref amountBase, 1))); + opacity = Avx.Min(Avx.Max(Vector256.Zero, opacity), vOne); destinationBase = PorterDuffFunctions.OverlaySrc(backgroundBase, sourceBase, opacity); destinationBase = ref Unsafe.Add(ref destinationBase, 1); backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); sourceBase = ref Unsafe.Add(ref sourceBase, 1); - amountBase = ref Unsafe.Add(ref amountBase, 1); + amountBase = ref Unsafe.Add(ref amountBase, 2); } if (Numerics.Modulo2(destination.Length) != 0) @@ -919,20 +935,22 @@ internal static class DefaultPixelBlenders ref Vector256 sourceBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(source)); ref float amountBase = ref MemoryMarshal.GetReference(amount); + Vector256 vOne = Vector256.Create(1F); + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) { - // TODO: It would be better if we can clamp this outside of the loop using our SIMD methods. // We need to create a Vector256 containing the current and next amount values // taking up each half of the Vector256 and then clamp them. Vector256 opacity = Vector256.Create( - Vector128.Create(Numerics.Clamp(amountBase, 0, 1F)), - Vector128.Create(Numerics.Clamp(Unsafe.Add(ref amountBase, 1), 0, 1F))); + Vector128.Create(amountBase), + Vector128.Create(Unsafe.Add(ref amountBase, 1))); + opacity = Avx.Min(Avx.Max(Vector256.Zero, opacity), vOne); destinationBase = PorterDuffFunctions.HardLightSrc(backgroundBase, sourceBase, opacity); destinationBase = ref Unsafe.Add(ref destinationBase, 1); backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); sourceBase = ref Unsafe.Add(ref sourceBase, 1); - amountBase = ref Unsafe.Add(ref amountBase, 1); + amountBase = ref Unsafe.Add(ref amountBase, 2); } if (Numerics.Modulo2(destination.Length) != 0) @@ -1022,20 +1040,22 @@ internal static class DefaultPixelBlenders ref Vector256 sourceBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(source)); ref float amountBase = ref MemoryMarshal.GetReference(amount); + Vector256 vOne = Vector256.Create(1F); + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) { - // TODO: It would be better if we can clamp this outside of the loop using our SIMD methods. // We need to create a Vector256 containing the current and next amount values // taking up each half of the Vector256 and then clamp them. Vector256 opacity = Vector256.Create( - Vector128.Create(Numerics.Clamp(amountBase, 0, 1F)), - Vector128.Create(Numerics.Clamp(Unsafe.Add(ref amountBase, 1), 0, 1F))); + Vector128.Create(amountBase), + Vector128.Create(Unsafe.Add(ref amountBase, 1))); + opacity = Avx.Min(Avx.Max(Vector256.Zero, opacity), vOne); destinationBase = PorterDuffFunctions.NormalSrcAtop(backgroundBase, sourceBase, opacity); destinationBase = ref Unsafe.Add(ref destinationBase, 1); backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); sourceBase = ref Unsafe.Add(ref sourceBase, 1); - amountBase = ref Unsafe.Add(ref amountBase, 1); + amountBase = ref Unsafe.Add(ref amountBase, 2); } if (Numerics.Modulo2(destination.Length) != 0) @@ -1125,20 +1145,22 @@ internal static class DefaultPixelBlenders ref Vector256 sourceBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(source)); ref float amountBase = ref MemoryMarshal.GetReference(amount); + Vector256 vOne = Vector256.Create(1F); + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) { - // TODO: It would be better if we can clamp this outside of the loop using our SIMD methods. // We need to create a Vector256 containing the current and next amount values // taking up each half of the Vector256 and then clamp them. Vector256 opacity = Vector256.Create( - Vector128.Create(Numerics.Clamp(amountBase, 0, 1F)), - Vector128.Create(Numerics.Clamp(Unsafe.Add(ref amountBase, 1), 0, 1F))); + Vector128.Create(amountBase), + Vector128.Create(Unsafe.Add(ref amountBase, 1))); + opacity = Avx.Min(Avx.Max(Vector256.Zero, opacity), vOne); destinationBase = PorterDuffFunctions.MultiplySrcAtop(backgroundBase, sourceBase, opacity); destinationBase = ref Unsafe.Add(ref destinationBase, 1); backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); sourceBase = ref Unsafe.Add(ref sourceBase, 1); - amountBase = ref Unsafe.Add(ref amountBase, 1); + amountBase = ref Unsafe.Add(ref amountBase, 2); } if (Numerics.Modulo2(destination.Length) != 0) @@ -1228,20 +1250,22 @@ internal static class DefaultPixelBlenders ref Vector256 sourceBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(source)); ref float amountBase = ref MemoryMarshal.GetReference(amount); + Vector256 vOne = Vector256.Create(1F); + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) { - // TODO: It would be better if we can clamp this outside of the loop using our SIMD methods. // We need to create a Vector256 containing the current and next amount values // taking up each half of the Vector256 and then clamp them. Vector256 opacity = Vector256.Create( - Vector128.Create(Numerics.Clamp(amountBase, 0, 1F)), - Vector128.Create(Numerics.Clamp(Unsafe.Add(ref amountBase, 1), 0, 1F))); + Vector128.Create(amountBase), + Vector128.Create(Unsafe.Add(ref amountBase, 1))); + opacity = Avx.Min(Avx.Max(Vector256.Zero, opacity), vOne); destinationBase = PorterDuffFunctions.AddSrcAtop(backgroundBase, sourceBase, opacity); destinationBase = ref Unsafe.Add(ref destinationBase, 1); backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); sourceBase = ref Unsafe.Add(ref sourceBase, 1); - amountBase = ref Unsafe.Add(ref amountBase, 1); + amountBase = ref Unsafe.Add(ref amountBase, 2); } if (Numerics.Modulo2(destination.Length) != 0) @@ -1331,20 +1355,22 @@ internal static class DefaultPixelBlenders ref Vector256 sourceBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(source)); ref float amountBase = ref MemoryMarshal.GetReference(amount); + Vector256 vOne = Vector256.Create(1F); + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) { - // TODO: It would be better if we can clamp this outside of the loop using our SIMD methods. // We need to create a Vector256 containing the current and next amount values // taking up each half of the Vector256 and then clamp them. Vector256 opacity = Vector256.Create( - Vector128.Create(Numerics.Clamp(amountBase, 0, 1F)), - Vector128.Create(Numerics.Clamp(Unsafe.Add(ref amountBase, 1), 0, 1F))); + Vector128.Create(amountBase), + Vector128.Create(Unsafe.Add(ref amountBase, 1))); + opacity = Avx.Min(Avx.Max(Vector256.Zero, opacity), vOne); destinationBase = PorterDuffFunctions.SubtractSrcAtop(backgroundBase, sourceBase, opacity); destinationBase = ref Unsafe.Add(ref destinationBase, 1); backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); sourceBase = ref Unsafe.Add(ref sourceBase, 1); - amountBase = ref Unsafe.Add(ref amountBase, 1); + amountBase = ref Unsafe.Add(ref amountBase, 2); } if (Numerics.Modulo2(destination.Length) != 0) @@ -1434,20 +1460,22 @@ internal static class DefaultPixelBlenders ref Vector256 sourceBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(source)); ref float amountBase = ref MemoryMarshal.GetReference(amount); + Vector256 vOne = Vector256.Create(1F); + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) { - // TODO: It would be better if we can clamp this outside of the loop using our SIMD methods. // We need to create a Vector256 containing the current and next amount values // taking up each half of the Vector256 and then clamp them. Vector256 opacity = Vector256.Create( - Vector128.Create(Numerics.Clamp(amountBase, 0, 1F)), - Vector128.Create(Numerics.Clamp(Unsafe.Add(ref amountBase, 1), 0, 1F))); + Vector128.Create(amountBase), + Vector128.Create(Unsafe.Add(ref amountBase, 1))); + opacity = Avx.Min(Avx.Max(Vector256.Zero, opacity), vOne); destinationBase = PorterDuffFunctions.ScreenSrcAtop(backgroundBase, sourceBase, opacity); destinationBase = ref Unsafe.Add(ref destinationBase, 1); backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); sourceBase = ref Unsafe.Add(ref sourceBase, 1); - amountBase = ref Unsafe.Add(ref amountBase, 1); + amountBase = ref Unsafe.Add(ref amountBase, 2); } if (Numerics.Modulo2(destination.Length) != 0) @@ -1537,20 +1565,22 @@ internal static class DefaultPixelBlenders ref Vector256 sourceBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(source)); ref float amountBase = ref MemoryMarshal.GetReference(amount); + Vector256 vOne = Vector256.Create(1F); + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) { - // TODO: It would be better if we can clamp this outside of the loop using our SIMD methods. // We need to create a Vector256 containing the current and next amount values // taking up each half of the Vector256 and then clamp them. Vector256 opacity = Vector256.Create( - Vector128.Create(Numerics.Clamp(amountBase, 0, 1F)), - Vector128.Create(Numerics.Clamp(Unsafe.Add(ref amountBase, 1), 0, 1F))); + Vector128.Create(amountBase), + Vector128.Create(Unsafe.Add(ref amountBase, 1))); + opacity = Avx.Min(Avx.Max(Vector256.Zero, opacity), vOne); destinationBase = PorterDuffFunctions.DarkenSrcAtop(backgroundBase, sourceBase, opacity); destinationBase = ref Unsafe.Add(ref destinationBase, 1); backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); sourceBase = ref Unsafe.Add(ref sourceBase, 1); - amountBase = ref Unsafe.Add(ref amountBase, 1); + amountBase = ref Unsafe.Add(ref amountBase, 2); } if (Numerics.Modulo2(destination.Length) != 0) @@ -1640,20 +1670,22 @@ internal static class DefaultPixelBlenders ref Vector256 sourceBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(source)); ref float amountBase = ref MemoryMarshal.GetReference(amount); + Vector256 vOne = Vector256.Create(1F); + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) { - // TODO: It would be better if we can clamp this outside of the loop using our SIMD methods. // We need to create a Vector256 containing the current and next amount values // taking up each half of the Vector256 and then clamp them. Vector256 opacity = Vector256.Create( - Vector128.Create(Numerics.Clamp(amountBase, 0, 1F)), - Vector128.Create(Numerics.Clamp(Unsafe.Add(ref amountBase, 1), 0, 1F))); + Vector128.Create(amountBase), + Vector128.Create(Unsafe.Add(ref amountBase, 1))); + opacity = Avx.Min(Avx.Max(Vector256.Zero, opacity), vOne); destinationBase = PorterDuffFunctions.LightenSrcAtop(backgroundBase, sourceBase, opacity); destinationBase = ref Unsafe.Add(ref destinationBase, 1); backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); sourceBase = ref Unsafe.Add(ref sourceBase, 1); - amountBase = ref Unsafe.Add(ref amountBase, 1); + amountBase = ref Unsafe.Add(ref amountBase, 2); } if (Numerics.Modulo2(destination.Length) != 0) @@ -1743,20 +1775,22 @@ internal static class DefaultPixelBlenders ref Vector256 sourceBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(source)); ref float amountBase = ref MemoryMarshal.GetReference(amount); + Vector256 vOne = Vector256.Create(1F); + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) { - // TODO: It would be better if we can clamp this outside of the loop using our SIMD methods. // We need to create a Vector256 containing the current and next amount values // taking up each half of the Vector256 and then clamp them. Vector256 opacity = Vector256.Create( - Vector128.Create(Numerics.Clamp(amountBase, 0, 1F)), - Vector128.Create(Numerics.Clamp(Unsafe.Add(ref amountBase, 1), 0, 1F))); + Vector128.Create(amountBase), + Vector128.Create(Unsafe.Add(ref amountBase, 1))); + opacity = Avx.Min(Avx.Max(Vector256.Zero, opacity), vOne); destinationBase = PorterDuffFunctions.OverlaySrcAtop(backgroundBase, sourceBase, opacity); destinationBase = ref Unsafe.Add(ref destinationBase, 1); backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); sourceBase = ref Unsafe.Add(ref sourceBase, 1); - amountBase = ref Unsafe.Add(ref amountBase, 1); + amountBase = ref Unsafe.Add(ref amountBase, 2); } if (Numerics.Modulo2(destination.Length) != 0) @@ -1846,20 +1880,22 @@ internal static class DefaultPixelBlenders ref Vector256 sourceBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(source)); ref float amountBase = ref MemoryMarshal.GetReference(amount); + Vector256 vOne = Vector256.Create(1F); + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) { - // TODO: It would be better if we can clamp this outside of the loop using our SIMD methods. // We need to create a Vector256 containing the current and next amount values // taking up each half of the Vector256 and then clamp them. Vector256 opacity = Vector256.Create( - Vector128.Create(Numerics.Clamp(amountBase, 0, 1F)), - Vector128.Create(Numerics.Clamp(Unsafe.Add(ref amountBase, 1), 0, 1F))); + Vector128.Create(amountBase), + Vector128.Create(Unsafe.Add(ref amountBase, 1))); + opacity = Avx.Min(Avx.Max(Vector256.Zero, opacity), vOne); destinationBase = PorterDuffFunctions.HardLightSrcAtop(backgroundBase, sourceBase, opacity); destinationBase = ref Unsafe.Add(ref destinationBase, 1); backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); sourceBase = ref Unsafe.Add(ref sourceBase, 1); - amountBase = ref Unsafe.Add(ref amountBase, 1); + amountBase = ref Unsafe.Add(ref amountBase, 2); } if (Numerics.Modulo2(destination.Length) != 0) @@ -1949,20 +1985,22 @@ internal static class DefaultPixelBlenders ref Vector256 sourceBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(source)); ref float amountBase = ref MemoryMarshal.GetReference(amount); + Vector256 vOne = Vector256.Create(1F); + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) { - // TODO: It would be better if we can clamp this outside of the loop using our SIMD methods. // We need to create a Vector256 containing the current and next amount values // taking up each half of the Vector256 and then clamp them. Vector256 opacity = Vector256.Create( - Vector128.Create(Numerics.Clamp(amountBase, 0, 1F)), - Vector128.Create(Numerics.Clamp(Unsafe.Add(ref amountBase, 1), 0, 1F))); + Vector128.Create(amountBase), + Vector128.Create(Unsafe.Add(ref amountBase, 1))); + opacity = Avx.Min(Avx.Max(Vector256.Zero, opacity), vOne); destinationBase = PorterDuffFunctions.NormalSrcOver(backgroundBase, sourceBase, opacity); destinationBase = ref Unsafe.Add(ref destinationBase, 1); backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); sourceBase = ref Unsafe.Add(ref sourceBase, 1); - amountBase = ref Unsafe.Add(ref amountBase, 1); + amountBase = ref Unsafe.Add(ref amountBase, 2); } if (Numerics.Modulo2(destination.Length) != 0) @@ -2052,20 +2090,22 @@ internal static class DefaultPixelBlenders ref Vector256 sourceBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(source)); ref float amountBase = ref MemoryMarshal.GetReference(amount); + Vector256 vOne = Vector256.Create(1F); + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) { - // TODO: It would be better if we can clamp this outside of the loop using our SIMD methods. // We need to create a Vector256 containing the current and next amount values // taking up each half of the Vector256 and then clamp them. Vector256 opacity = Vector256.Create( - Vector128.Create(Numerics.Clamp(amountBase, 0, 1F)), - Vector128.Create(Numerics.Clamp(Unsafe.Add(ref amountBase, 1), 0, 1F))); + Vector128.Create(amountBase), + Vector128.Create(Unsafe.Add(ref amountBase, 1))); + opacity = Avx.Min(Avx.Max(Vector256.Zero, opacity), vOne); destinationBase = PorterDuffFunctions.MultiplySrcOver(backgroundBase, sourceBase, opacity); destinationBase = ref Unsafe.Add(ref destinationBase, 1); backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); sourceBase = ref Unsafe.Add(ref sourceBase, 1); - amountBase = ref Unsafe.Add(ref amountBase, 1); + amountBase = ref Unsafe.Add(ref amountBase, 2); } if (Numerics.Modulo2(destination.Length) != 0) @@ -2155,20 +2195,22 @@ internal static class DefaultPixelBlenders ref Vector256 sourceBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(source)); ref float amountBase = ref MemoryMarshal.GetReference(amount); + Vector256 vOne = Vector256.Create(1F); + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) { - // TODO: It would be better if we can clamp this outside of the loop using our SIMD methods. // We need to create a Vector256 containing the current and next amount values // taking up each half of the Vector256 and then clamp them. Vector256 opacity = Vector256.Create( - Vector128.Create(Numerics.Clamp(amountBase, 0, 1F)), - Vector128.Create(Numerics.Clamp(Unsafe.Add(ref amountBase, 1), 0, 1F))); + Vector128.Create(amountBase), + Vector128.Create(Unsafe.Add(ref amountBase, 1))); + opacity = Avx.Min(Avx.Max(Vector256.Zero, opacity), vOne); destinationBase = PorterDuffFunctions.AddSrcOver(backgroundBase, sourceBase, opacity); destinationBase = ref Unsafe.Add(ref destinationBase, 1); backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); sourceBase = ref Unsafe.Add(ref sourceBase, 1); - amountBase = ref Unsafe.Add(ref amountBase, 1); + amountBase = ref Unsafe.Add(ref amountBase, 2); } if (Numerics.Modulo2(destination.Length) != 0) @@ -2258,20 +2300,22 @@ internal static class DefaultPixelBlenders ref Vector256 sourceBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(source)); ref float amountBase = ref MemoryMarshal.GetReference(amount); + Vector256 vOne = Vector256.Create(1F); + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) { - // TODO: It would be better if we can clamp this outside of the loop using our SIMD methods. // We need to create a Vector256 containing the current and next amount values // taking up each half of the Vector256 and then clamp them. Vector256 opacity = Vector256.Create( - Vector128.Create(Numerics.Clamp(amountBase, 0, 1F)), - Vector128.Create(Numerics.Clamp(Unsafe.Add(ref amountBase, 1), 0, 1F))); + Vector128.Create(amountBase), + Vector128.Create(Unsafe.Add(ref amountBase, 1))); + opacity = Avx.Min(Avx.Max(Vector256.Zero, opacity), vOne); destinationBase = PorterDuffFunctions.SubtractSrcOver(backgroundBase, sourceBase, opacity); destinationBase = ref Unsafe.Add(ref destinationBase, 1); backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); sourceBase = ref Unsafe.Add(ref sourceBase, 1); - amountBase = ref Unsafe.Add(ref amountBase, 1); + amountBase = ref Unsafe.Add(ref amountBase, 2); } if (Numerics.Modulo2(destination.Length) != 0) @@ -2361,20 +2405,22 @@ internal static class DefaultPixelBlenders ref Vector256 sourceBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(source)); ref float amountBase = ref MemoryMarshal.GetReference(amount); + Vector256 vOne = Vector256.Create(1F); + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) { - // TODO: It would be better if we can clamp this outside of the loop using our SIMD methods. // We need to create a Vector256 containing the current and next amount values // taking up each half of the Vector256 and then clamp them. Vector256 opacity = Vector256.Create( - Vector128.Create(Numerics.Clamp(amountBase, 0, 1F)), - Vector128.Create(Numerics.Clamp(Unsafe.Add(ref amountBase, 1), 0, 1F))); + Vector128.Create(amountBase), + Vector128.Create(Unsafe.Add(ref amountBase, 1))); + opacity = Avx.Min(Avx.Max(Vector256.Zero, opacity), vOne); destinationBase = PorterDuffFunctions.ScreenSrcOver(backgroundBase, sourceBase, opacity); destinationBase = ref Unsafe.Add(ref destinationBase, 1); backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); sourceBase = ref Unsafe.Add(ref sourceBase, 1); - amountBase = ref Unsafe.Add(ref amountBase, 1); + amountBase = ref Unsafe.Add(ref amountBase, 2); } if (Numerics.Modulo2(destination.Length) != 0) @@ -2464,20 +2510,22 @@ internal static class DefaultPixelBlenders ref Vector256 sourceBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(source)); ref float amountBase = ref MemoryMarshal.GetReference(amount); + Vector256 vOne = Vector256.Create(1F); + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) { - // TODO: It would be better if we can clamp this outside of the loop using our SIMD methods. // We need to create a Vector256 containing the current and next amount values // taking up each half of the Vector256 and then clamp them. Vector256 opacity = Vector256.Create( - Vector128.Create(Numerics.Clamp(amountBase, 0, 1F)), - Vector128.Create(Numerics.Clamp(Unsafe.Add(ref amountBase, 1), 0, 1F))); + Vector128.Create(amountBase), + Vector128.Create(Unsafe.Add(ref amountBase, 1))); + opacity = Avx.Min(Avx.Max(Vector256.Zero, opacity), vOne); destinationBase = PorterDuffFunctions.DarkenSrcOver(backgroundBase, sourceBase, opacity); destinationBase = ref Unsafe.Add(ref destinationBase, 1); backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); sourceBase = ref Unsafe.Add(ref sourceBase, 1); - amountBase = ref Unsafe.Add(ref amountBase, 1); + amountBase = ref Unsafe.Add(ref amountBase, 2); } if (Numerics.Modulo2(destination.Length) != 0) @@ -2567,20 +2615,22 @@ internal static class DefaultPixelBlenders ref Vector256 sourceBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(source)); ref float amountBase = ref MemoryMarshal.GetReference(amount); + Vector256 vOne = Vector256.Create(1F); + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) { - // TODO: It would be better if we can clamp this outside of the loop using our SIMD methods. // We need to create a Vector256 containing the current and next amount values // taking up each half of the Vector256 and then clamp them. Vector256 opacity = Vector256.Create( - Vector128.Create(Numerics.Clamp(amountBase, 0, 1F)), - Vector128.Create(Numerics.Clamp(Unsafe.Add(ref amountBase, 1), 0, 1F))); + Vector128.Create(amountBase), + Vector128.Create(Unsafe.Add(ref amountBase, 1))); + opacity = Avx.Min(Avx.Max(Vector256.Zero, opacity), vOne); destinationBase = PorterDuffFunctions.LightenSrcOver(backgroundBase, sourceBase, opacity); destinationBase = ref Unsafe.Add(ref destinationBase, 1); backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); sourceBase = ref Unsafe.Add(ref sourceBase, 1); - amountBase = ref Unsafe.Add(ref amountBase, 1); + amountBase = ref Unsafe.Add(ref amountBase, 2); } if (Numerics.Modulo2(destination.Length) != 0) @@ -2670,20 +2720,22 @@ internal static class DefaultPixelBlenders ref Vector256 sourceBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(source)); ref float amountBase = ref MemoryMarshal.GetReference(amount); + Vector256 vOne = Vector256.Create(1F); + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) { - // TODO: It would be better if we can clamp this outside of the loop using our SIMD methods. // We need to create a Vector256 containing the current and next amount values // taking up each half of the Vector256 and then clamp them. Vector256 opacity = Vector256.Create( - Vector128.Create(Numerics.Clamp(amountBase, 0, 1F)), - Vector128.Create(Numerics.Clamp(Unsafe.Add(ref amountBase, 1), 0, 1F))); + Vector128.Create(amountBase), + Vector128.Create(Unsafe.Add(ref amountBase, 1))); + opacity = Avx.Min(Avx.Max(Vector256.Zero, opacity), vOne); destinationBase = PorterDuffFunctions.OverlaySrcOver(backgroundBase, sourceBase, opacity); destinationBase = ref Unsafe.Add(ref destinationBase, 1); backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); sourceBase = ref Unsafe.Add(ref sourceBase, 1); - amountBase = ref Unsafe.Add(ref amountBase, 1); + amountBase = ref Unsafe.Add(ref amountBase, 2); } if (Numerics.Modulo2(destination.Length) != 0) @@ -2773,20 +2825,22 @@ internal static class DefaultPixelBlenders ref Vector256 sourceBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(source)); ref float amountBase = ref MemoryMarshal.GetReference(amount); + Vector256 vOne = Vector256.Create(1F); + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) { - // TODO: It would be better if we can clamp this outside of the loop using our SIMD methods. // We need to create a Vector256 containing the current and next amount values // taking up each half of the Vector256 and then clamp them. Vector256 opacity = Vector256.Create( - Vector128.Create(Numerics.Clamp(amountBase, 0, 1F)), - Vector128.Create(Numerics.Clamp(Unsafe.Add(ref amountBase, 1), 0, 1F))); + Vector128.Create(amountBase), + Vector128.Create(Unsafe.Add(ref amountBase, 1))); + opacity = Avx.Min(Avx.Max(Vector256.Zero, opacity), vOne); destinationBase = PorterDuffFunctions.HardLightSrcOver(backgroundBase, sourceBase, opacity); destinationBase = ref Unsafe.Add(ref destinationBase, 1); backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); sourceBase = ref Unsafe.Add(ref sourceBase, 1); - amountBase = ref Unsafe.Add(ref amountBase, 1); + amountBase = ref Unsafe.Add(ref amountBase, 2); } if (Numerics.Modulo2(destination.Length) != 0) @@ -2876,20 +2930,22 @@ internal static class DefaultPixelBlenders ref Vector256 sourceBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(source)); ref float amountBase = ref MemoryMarshal.GetReference(amount); + Vector256 vOne = Vector256.Create(1F); + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) { - // TODO: It would be better if we can clamp this outside of the loop using our SIMD methods. // We need to create a Vector256 containing the current and next amount values // taking up each half of the Vector256 and then clamp them. Vector256 opacity = Vector256.Create( - Vector128.Create(Numerics.Clamp(amountBase, 0, 1F)), - Vector128.Create(Numerics.Clamp(Unsafe.Add(ref amountBase, 1), 0, 1F))); + Vector128.Create(amountBase), + Vector128.Create(Unsafe.Add(ref amountBase, 1))); + opacity = Avx.Min(Avx.Max(Vector256.Zero, opacity), vOne); destinationBase = PorterDuffFunctions.NormalSrcIn(backgroundBase, sourceBase, opacity); destinationBase = ref Unsafe.Add(ref destinationBase, 1); backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); sourceBase = ref Unsafe.Add(ref sourceBase, 1); - amountBase = ref Unsafe.Add(ref amountBase, 1); + amountBase = ref Unsafe.Add(ref amountBase, 2); } if (Numerics.Modulo2(destination.Length) != 0) @@ -2979,20 +3035,22 @@ internal static class DefaultPixelBlenders ref Vector256 sourceBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(source)); ref float amountBase = ref MemoryMarshal.GetReference(amount); + Vector256 vOne = Vector256.Create(1F); + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) { - // TODO: It would be better if we can clamp this outside of the loop using our SIMD methods. // We need to create a Vector256 containing the current and next amount values // taking up each half of the Vector256 and then clamp them. Vector256 opacity = Vector256.Create( - Vector128.Create(Numerics.Clamp(amountBase, 0, 1F)), - Vector128.Create(Numerics.Clamp(Unsafe.Add(ref amountBase, 1), 0, 1F))); + Vector128.Create(amountBase), + Vector128.Create(Unsafe.Add(ref amountBase, 1))); + opacity = Avx.Min(Avx.Max(Vector256.Zero, opacity), vOne); destinationBase = PorterDuffFunctions.MultiplySrcIn(backgroundBase, sourceBase, opacity); destinationBase = ref Unsafe.Add(ref destinationBase, 1); backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); sourceBase = ref Unsafe.Add(ref sourceBase, 1); - amountBase = ref Unsafe.Add(ref amountBase, 1); + amountBase = ref Unsafe.Add(ref amountBase, 2); } if (Numerics.Modulo2(destination.Length) != 0) @@ -3082,20 +3140,22 @@ internal static class DefaultPixelBlenders ref Vector256 sourceBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(source)); ref float amountBase = ref MemoryMarshal.GetReference(amount); + Vector256 vOne = Vector256.Create(1F); + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) { - // TODO: It would be better if we can clamp this outside of the loop using our SIMD methods. // We need to create a Vector256 containing the current and next amount values // taking up each half of the Vector256 and then clamp them. Vector256 opacity = Vector256.Create( - Vector128.Create(Numerics.Clamp(amountBase, 0, 1F)), - Vector128.Create(Numerics.Clamp(Unsafe.Add(ref amountBase, 1), 0, 1F))); + Vector128.Create(amountBase), + Vector128.Create(Unsafe.Add(ref amountBase, 1))); + opacity = Avx.Min(Avx.Max(Vector256.Zero, opacity), vOne); destinationBase = PorterDuffFunctions.AddSrcIn(backgroundBase, sourceBase, opacity); destinationBase = ref Unsafe.Add(ref destinationBase, 1); backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); sourceBase = ref Unsafe.Add(ref sourceBase, 1); - amountBase = ref Unsafe.Add(ref amountBase, 1); + amountBase = ref Unsafe.Add(ref amountBase, 2); } if (Numerics.Modulo2(destination.Length) != 0) @@ -3185,20 +3245,22 @@ internal static class DefaultPixelBlenders ref Vector256 sourceBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(source)); ref float amountBase = ref MemoryMarshal.GetReference(amount); + Vector256 vOne = Vector256.Create(1F); + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) { - // TODO: It would be better if we can clamp this outside of the loop using our SIMD methods. // We need to create a Vector256 containing the current and next amount values // taking up each half of the Vector256 and then clamp them. Vector256 opacity = Vector256.Create( - Vector128.Create(Numerics.Clamp(amountBase, 0, 1F)), - Vector128.Create(Numerics.Clamp(Unsafe.Add(ref amountBase, 1), 0, 1F))); + Vector128.Create(amountBase), + Vector128.Create(Unsafe.Add(ref amountBase, 1))); + opacity = Avx.Min(Avx.Max(Vector256.Zero, opacity), vOne); destinationBase = PorterDuffFunctions.SubtractSrcIn(backgroundBase, sourceBase, opacity); destinationBase = ref Unsafe.Add(ref destinationBase, 1); backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); sourceBase = ref Unsafe.Add(ref sourceBase, 1); - amountBase = ref Unsafe.Add(ref amountBase, 1); + amountBase = ref Unsafe.Add(ref amountBase, 2); } if (Numerics.Modulo2(destination.Length) != 0) @@ -3288,20 +3350,22 @@ internal static class DefaultPixelBlenders ref Vector256 sourceBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(source)); ref float amountBase = ref MemoryMarshal.GetReference(amount); + Vector256 vOne = Vector256.Create(1F); + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) { - // TODO: It would be better if we can clamp this outside of the loop using our SIMD methods. // We need to create a Vector256 containing the current and next amount values // taking up each half of the Vector256 and then clamp them. Vector256 opacity = Vector256.Create( - Vector128.Create(Numerics.Clamp(amountBase, 0, 1F)), - Vector128.Create(Numerics.Clamp(Unsafe.Add(ref amountBase, 1), 0, 1F))); + Vector128.Create(amountBase), + Vector128.Create(Unsafe.Add(ref amountBase, 1))); + opacity = Avx.Min(Avx.Max(Vector256.Zero, opacity), vOne); destinationBase = PorterDuffFunctions.ScreenSrcIn(backgroundBase, sourceBase, opacity); destinationBase = ref Unsafe.Add(ref destinationBase, 1); backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); sourceBase = ref Unsafe.Add(ref sourceBase, 1); - amountBase = ref Unsafe.Add(ref amountBase, 1); + amountBase = ref Unsafe.Add(ref amountBase, 2); } if (Numerics.Modulo2(destination.Length) != 0) @@ -3391,20 +3455,22 @@ internal static class DefaultPixelBlenders ref Vector256 sourceBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(source)); ref float amountBase = ref MemoryMarshal.GetReference(amount); + Vector256 vOne = Vector256.Create(1F); + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) { - // TODO: It would be better if we can clamp this outside of the loop using our SIMD methods. // We need to create a Vector256 containing the current and next amount values // taking up each half of the Vector256 and then clamp them. Vector256 opacity = Vector256.Create( - Vector128.Create(Numerics.Clamp(amountBase, 0, 1F)), - Vector128.Create(Numerics.Clamp(Unsafe.Add(ref amountBase, 1), 0, 1F))); + Vector128.Create(amountBase), + Vector128.Create(Unsafe.Add(ref amountBase, 1))); + opacity = Avx.Min(Avx.Max(Vector256.Zero, opacity), vOne); destinationBase = PorterDuffFunctions.DarkenSrcIn(backgroundBase, sourceBase, opacity); destinationBase = ref Unsafe.Add(ref destinationBase, 1); backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); sourceBase = ref Unsafe.Add(ref sourceBase, 1); - amountBase = ref Unsafe.Add(ref amountBase, 1); + amountBase = ref Unsafe.Add(ref amountBase, 2); } if (Numerics.Modulo2(destination.Length) != 0) @@ -3494,20 +3560,22 @@ internal static class DefaultPixelBlenders ref Vector256 sourceBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(source)); ref float amountBase = ref MemoryMarshal.GetReference(amount); + Vector256 vOne = Vector256.Create(1F); + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) { - // TODO: It would be better if we can clamp this outside of the loop using our SIMD methods. // We need to create a Vector256 containing the current and next amount values // taking up each half of the Vector256 and then clamp them. Vector256 opacity = Vector256.Create( - Vector128.Create(Numerics.Clamp(amountBase, 0, 1F)), - Vector128.Create(Numerics.Clamp(Unsafe.Add(ref amountBase, 1), 0, 1F))); + Vector128.Create(amountBase), + Vector128.Create(Unsafe.Add(ref amountBase, 1))); + opacity = Avx.Min(Avx.Max(Vector256.Zero, opacity), vOne); destinationBase = PorterDuffFunctions.LightenSrcIn(backgroundBase, sourceBase, opacity); destinationBase = ref Unsafe.Add(ref destinationBase, 1); backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); sourceBase = ref Unsafe.Add(ref sourceBase, 1); - amountBase = ref Unsafe.Add(ref amountBase, 1); + amountBase = ref Unsafe.Add(ref amountBase, 2); } if (Numerics.Modulo2(destination.Length) != 0) @@ -3597,20 +3665,22 @@ internal static class DefaultPixelBlenders ref Vector256 sourceBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(source)); ref float amountBase = ref MemoryMarshal.GetReference(amount); + Vector256 vOne = Vector256.Create(1F); + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) { - // TODO: It would be better if we can clamp this outside of the loop using our SIMD methods. // We need to create a Vector256 containing the current and next amount values // taking up each half of the Vector256 and then clamp them. Vector256 opacity = Vector256.Create( - Vector128.Create(Numerics.Clamp(amountBase, 0, 1F)), - Vector128.Create(Numerics.Clamp(Unsafe.Add(ref amountBase, 1), 0, 1F))); + Vector128.Create(amountBase), + Vector128.Create(Unsafe.Add(ref amountBase, 1))); + opacity = Avx.Min(Avx.Max(Vector256.Zero, opacity), vOne); destinationBase = PorterDuffFunctions.OverlaySrcIn(backgroundBase, sourceBase, opacity); destinationBase = ref Unsafe.Add(ref destinationBase, 1); backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); sourceBase = ref Unsafe.Add(ref sourceBase, 1); - amountBase = ref Unsafe.Add(ref amountBase, 1); + amountBase = ref Unsafe.Add(ref amountBase, 2); } if (Numerics.Modulo2(destination.Length) != 0) @@ -3700,20 +3770,22 @@ internal static class DefaultPixelBlenders ref Vector256 sourceBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(source)); ref float amountBase = ref MemoryMarshal.GetReference(amount); + Vector256 vOne = Vector256.Create(1F); + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) { - // TODO: It would be better if we can clamp this outside of the loop using our SIMD methods. // We need to create a Vector256 containing the current and next amount values // taking up each half of the Vector256 and then clamp them. Vector256 opacity = Vector256.Create( - Vector128.Create(Numerics.Clamp(amountBase, 0, 1F)), - Vector128.Create(Numerics.Clamp(Unsafe.Add(ref amountBase, 1), 0, 1F))); + Vector128.Create(amountBase), + Vector128.Create(Unsafe.Add(ref amountBase, 1))); + opacity = Avx.Min(Avx.Max(Vector256.Zero, opacity), vOne); destinationBase = PorterDuffFunctions.HardLightSrcIn(backgroundBase, sourceBase, opacity); destinationBase = ref Unsafe.Add(ref destinationBase, 1); backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); sourceBase = ref Unsafe.Add(ref sourceBase, 1); - amountBase = ref Unsafe.Add(ref amountBase, 1); + amountBase = ref Unsafe.Add(ref amountBase, 2); } if (Numerics.Modulo2(destination.Length) != 0) @@ -3803,20 +3875,22 @@ internal static class DefaultPixelBlenders ref Vector256 sourceBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(source)); ref float amountBase = ref MemoryMarshal.GetReference(amount); + Vector256 vOne = Vector256.Create(1F); + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) { - // TODO: It would be better if we can clamp this outside of the loop using our SIMD methods. // We need to create a Vector256 containing the current and next amount values // taking up each half of the Vector256 and then clamp them. Vector256 opacity = Vector256.Create( - Vector128.Create(Numerics.Clamp(amountBase, 0, 1F)), - Vector128.Create(Numerics.Clamp(Unsafe.Add(ref amountBase, 1), 0, 1F))); + Vector128.Create(amountBase), + Vector128.Create(Unsafe.Add(ref amountBase, 1))); + opacity = Avx.Min(Avx.Max(Vector256.Zero, opacity), vOne); destinationBase = PorterDuffFunctions.NormalSrcOut(backgroundBase, sourceBase, opacity); destinationBase = ref Unsafe.Add(ref destinationBase, 1); backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); sourceBase = ref Unsafe.Add(ref sourceBase, 1); - amountBase = ref Unsafe.Add(ref amountBase, 1); + amountBase = ref Unsafe.Add(ref amountBase, 2); } if (Numerics.Modulo2(destination.Length) != 0) @@ -3906,20 +3980,22 @@ internal static class DefaultPixelBlenders ref Vector256 sourceBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(source)); ref float amountBase = ref MemoryMarshal.GetReference(amount); + Vector256 vOne = Vector256.Create(1F); + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) { - // TODO: It would be better if we can clamp this outside of the loop using our SIMD methods. // We need to create a Vector256 containing the current and next amount values // taking up each half of the Vector256 and then clamp them. Vector256 opacity = Vector256.Create( - Vector128.Create(Numerics.Clamp(amountBase, 0, 1F)), - Vector128.Create(Numerics.Clamp(Unsafe.Add(ref amountBase, 1), 0, 1F))); + Vector128.Create(amountBase), + Vector128.Create(Unsafe.Add(ref amountBase, 1))); + opacity = Avx.Min(Avx.Max(Vector256.Zero, opacity), vOne); destinationBase = PorterDuffFunctions.MultiplySrcOut(backgroundBase, sourceBase, opacity); destinationBase = ref Unsafe.Add(ref destinationBase, 1); backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); sourceBase = ref Unsafe.Add(ref sourceBase, 1); - amountBase = ref Unsafe.Add(ref amountBase, 1); + amountBase = ref Unsafe.Add(ref amountBase, 2); } if (Numerics.Modulo2(destination.Length) != 0) @@ -4009,20 +4085,22 @@ internal static class DefaultPixelBlenders ref Vector256 sourceBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(source)); ref float amountBase = ref MemoryMarshal.GetReference(amount); + Vector256 vOne = Vector256.Create(1F); + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) { - // TODO: It would be better if we can clamp this outside of the loop using our SIMD methods. // We need to create a Vector256 containing the current and next amount values // taking up each half of the Vector256 and then clamp them. Vector256 opacity = Vector256.Create( - Vector128.Create(Numerics.Clamp(amountBase, 0, 1F)), - Vector128.Create(Numerics.Clamp(Unsafe.Add(ref amountBase, 1), 0, 1F))); + Vector128.Create(amountBase), + Vector128.Create(Unsafe.Add(ref amountBase, 1))); + opacity = Avx.Min(Avx.Max(Vector256.Zero, opacity), vOne); destinationBase = PorterDuffFunctions.AddSrcOut(backgroundBase, sourceBase, opacity); destinationBase = ref Unsafe.Add(ref destinationBase, 1); backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); sourceBase = ref Unsafe.Add(ref sourceBase, 1); - amountBase = ref Unsafe.Add(ref amountBase, 1); + amountBase = ref Unsafe.Add(ref amountBase, 2); } if (Numerics.Modulo2(destination.Length) != 0) @@ -4112,20 +4190,22 @@ internal static class DefaultPixelBlenders ref Vector256 sourceBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(source)); ref float amountBase = ref MemoryMarshal.GetReference(amount); + Vector256 vOne = Vector256.Create(1F); + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) { - // TODO: It would be better if we can clamp this outside of the loop using our SIMD methods. // We need to create a Vector256 containing the current and next amount values // taking up each half of the Vector256 and then clamp them. Vector256 opacity = Vector256.Create( - Vector128.Create(Numerics.Clamp(amountBase, 0, 1F)), - Vector128.Create(Numerics.Clamp(Unsafe.Add(ref amountBase, 1), 0, 1F))); + Vector128.Create(amountBase), + Vector128.Create(Unsafe.Add(ref amountBase, 1))); + opacity = Avx.Min(Avx.Max(Vector256.Zero, opacity), vOne); destinationBase = PorterDuffFunctions.SubtractSrcOut(backgroundBase, sourceBase, opacity); destinationBase = ref Unsafe.Add(ref destinationBase, 1); backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); sourceBase = ref Unsafe.Add(ref sourceBase, 1); - amountBase = ref Unsafe.Add(ref amountBase, 1); + amountBase = ref Unsafe.Add(ref amountBase, 2); } if (Numerics.Modulo2(destination.Length) != 0) @@ -4215,20 +4295,22 @@ internal static class DefaultPixelBlenders ref Vector256 sourceBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(source)); ref float amountBase = ref MemoryMarshal.GetReference(amount); + Vector256 vOne = Vector256.Create(1F); + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) { - // TODO: It would be better if we can clamp this outside of the loop using our SIMD methods. // We need to create a Vector256 containing the current and next amount values // taking up each half of the Vector256 and then clamp them. Vector256 opacity = Vector256.Create( - Vector128.Create(Numerics.Clamp(amountBase, 0, 1F)), - Vector128.Create(Numerics.Clamp(Unsafe.Add(ref amountBase, 1), 0, 1F))); + Vector128.Create(amountBase), + Vector128.Create(Unsafe.Add(ref amountBase, 1))); + opacity = Avx.Min(Avx.Max(Vector256.Zero, opacity), vOne); destinationBase = PorterDuffFunctions.ScreenSrcOut(backgroundBase, sourceBase, opacity); destinationBase = ref Unsafe.Add(ref destinationBase, 1); backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); sourceBase = ref Unsafe.Add(ref sourceBase, 1); - amountBase = ref Unsafe.Add(ref amountBase, 1); + amountBase = ref Unsafe.Add(ref amountBase, 2); } if (Numerics.Modulo2(destination.Length) != 0) @@ -4318,20 +4400,22 @@ internal static class DefaultPixelBlenders ref Vector256 sourceBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(source)); ref float amountBase = ref MemoryMarshal.GetReference(amount); + Vector256 vOne = Vector256.Create(1F); + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) { - // TODO: It would be better if we can clamp this outside of the loop using our SIMD methods. // We need to create a Vector256 containing the current and next amount values // taking up each half of the Vector256 and then clamp them. Vector256 opacity = Vector256.Create( - Vector128.Create(Numerics.Clamp(amountBase, 0, 1F)), - Vector128.Create(Numerics.Clamp(Unsafe.Add(ref amountBase, 1), 0, 1F))); + Vector128.Create(amountBase), + Vector128.Create(Unsafe.Add(ref amountBase, 1))); + opacity = Avx.Min(Avx.Max(Vector256.Zero, opacity), vOne); destinationBase = PorterDuffFunctions.DarkenSrcOut(backgroundBase, sourceBase, opacity); destinationBase = ref Unsafe.Add(ref destinationBase, 1); backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); sourceBase = ref Unsafe.Add(ref sourceBase, 1); - amountBase = ref Unsafe.Add(ref amountBase, 1); + amountBase = ref Unsafe.Add(ref amountBase, 2); } if (Numerics.Modulo2(destination.Length) != 0) @@ -4421,20 +4505,22 @@ internal static class DefaultPixelBlenders ref Vector256 sourceBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(source)); ref float amountBase = ref MemoryMarshal.GetReference(amount); + Vector256 vOne = Vector256.Create(1F); + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) { - // TODO: It would be better if we can clamp this outside of the loop using our SIMD methods. // We need to create a Vector256 containing the current and next amount values // taking up each half of the Vector256 and then clamp them. Vector256 opacity = Vector256.Create( - Vector128.Create(Numerics.Clamp(amountBase, 0, 1F)), - Vector128.Create(Numerics.Clamp(Unsafe.Add(ref amountBase, 1), 0, 1F))); + Vector128.Create(amountBase), + Vector128.Create(Unsafe.Add(ref amountBase, 1))); + opacity = Avx.Min(Avx.Max(Vector256.Zero, opacity), vOne); destinationBase = PorterDuffFunctions.LightenSrcOut(backgroundBase, sourceBase, opacity); destinationBase = ref Unsafe.Add(ref destinationBase, 1); backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); sourceBase = ref Unsafe.Add(ref sourceBase, 1); - amountBase = ref Unsafe.Add(ref amountBase, 1); + amountBase = ref Unsafe.Add(ref amountBase, 2); } if (Numerics.Modulo2(destination.Length) != 0) @@ -4524,20 +4610,22 @@ internal static class DefaultPixelBlenders ref Vector256 sourceBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(source)); ref float amountBase = ref MemoryMarshal.GetReference(amount); + Vector256 vOne = Vector256.Create(1F); + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) { - // TODO: It would be better if we can clamp this outside of the loop using our SIMD methods. // We need to create a Vector256 containing the current and next amount values // taking up each half of the Vector256 and then clamp them. Vector256 opacity = Vector256.Create( - Vector128.Create(Numerics.Clamp(amountBase, 0, 1F)), - Vector128.Create(Numerics.Clamp(Unsafe.Add(ref amountBase, 1), 0, 1F))); + Vector128.Create(amountBase), + Vector128.Create(Unsafe.Add(ref amountBase, 1))); + opacity = Avx.Min(Avx.Max(Vector256.Zero, opacity), vOne); destinationBase = PorterDuffFunctions.OverlaySrcOut(backgroundBase, sourceBase, opacity); destinationBase = ref Unsafe.Add(ref destinationBase, 1); backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); sourceBase = ref Unsafe.Add(ref sourceBase, 1); - amountBase = ref Unsafe.Add(ref amountBase, 1); + amountBase = ref Unsafe.Add(ref amountBase, 2); } if (Numerics.Modulo2(destination.Length) != 0) @@ -4627,20 +4715,22 @@ internal static class DefaultPixelBlenders ref Vector256 sourceBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(source)); ref float amountBase = ref MemoryMarshal.GetReference(amount); + Vector256 vOne = Vector256.Create(1F); + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) { - // TODO: It would be better if we can clamp this outside of the loop using our SIMD methods. // We need to create a Vector256 containing the current and next amount values // taking up each half of the Vector256 and then clamp them. Vector256 opacity = Vector256.Create( - Vector128.Create(Numerics.Clamp(amountBase, 0, 1F)), - Vector128.Create(Numerics.Clamp(Unsafe.Add(ref amountBase, 1), 0, 1F))); + Vector128.Create(amountBase), + Vector128.Create(Unsafe.Add(ref amountBase, 1))); + opacity = Avx.Min(Avx.Max(Vector256.Zero, opacity), vOne); destinationBase = PorterDuffFunctions.HardLightSrcOut(backgroundBase, sourceBase, opacity); destinationBase = ref Unsafe.Add(ref destinationBase, 1); backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); sourceBase = ref Unsafe.Add(ref sourceBase, 1); - amountBase = ref Unsafe.Add(ref amountBase, 1); + amountBase = ref Unsafe.Add(ref amountBase, 2); } if (Numerics.Modulo2(destination.Length) != 0) @@ -4730,20 +4820,22 @@ internal static class DefaultPixelBlenders ref Vector256 sourceBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(source)); ref float amountBase = ref MemoryMarshal.GetReference(amount); + Vector256 vOne = Vector256.Create(1F); + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) { - // TODO: It would be better if we can clamp this outside of the loop using our SIMD methods. // We need to create a Vector256 containing the current and next amount values // taking up each half of the Vector256 and then clamp them. Vector256 opacity = Vector256.Create( - Vector128.Create(Numerics.Clamp(amountBase, 0, 1F)), - Vector128.Create(Numerics.Clamp(Unsafe.Add(ref amountBase, 1), 0, 1F))); + Vector128.Create(amountBase), + Vector128.Create(Unsafe.Add(ref amountBase, 1))); + opacity = Avx.Min(Avx.Max(Vector256.Zero, opacity), vOne); destinationBase = PorterDuffFunctions.NormalDest(backgroundBase, sourceBase, opacity); destinationBase = ref Unsafe.Add(ref destinationBase, 1); backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); sourceBase = ref Unsafe.Add(ref sourceBase, 1); - amountBase = ref Unsafe.Add(ref amountBase, 1); + amountBase = ref Unsafe.Add(ref amountBase, 2); } if (Numerics.Modulo2(destination.Length) != 0) @@ -4833,20 +4925,22 @@ internal static class DefaultPixelBlenders ref Vector256 sourceBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(source)); ref float amountBase = ref MemoryMarshal.GetReference(amount); + Vector256 vOne = Vector256.Create(1F); + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) { - // TODO: It would be better if we can clamp this outside of the loop using our SIMD methods. // We need to create a Vector256 containing the current and next amount values // taking up each half of the Vector256 and then clamp them. Vector256 opacity = Vector256.Create( - Vector128.Create(Numerics.Clamp(amountBase, 0, 1F)), - Vector128.Create(Numerics.Clamp(Unsafe.Add(ref amountBase, 1), 0, 1F))); + Vector128.Create(amountBase), + Vector128.Create(Unsafe.Add(ref amountBase, 1))); + opacity = Avx.Min(Avx.Max(Vector256.Zero, opacity), vOne); destinationBase = PorterDuffFunctions.MultiplyDest(backgroundBase, sourceBase, opacity); destinationBase = ref Unsafe.Add(ref destinationBase, 1); backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); sourceBase = ref Unsafe.Add(ref sourceBase, 1); - amountBase = ref Unsafe.Add(ref amountBase, 1); + amountBase = ref Unsafe.Add(ref amountBase, 2); } if (Numerics.Modulo2(destination.Length) != 0) @@ -4936,20 +5030,22 @@ internal static class DefaultPixelBlenders ref Vector256 sourceBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(source)); ref float amountBase = ref MemoryMarshal.GetReference(amount); + Vector256 vOne = Vector256.Create(1F); + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) { - // TODO: It would be better if we can clamp this outside of the loop using our SIMD methods. // We need to create a Vector256 containing the current and next amount values // taking up each half of the Vector256 and then clamp them. Vector256 opacity = Vector256.Create( - Vector128.Create(Numerics.Clamp(amountBase, 0, 1F)), - Vector128.Create(Numerics.Clamp(Unsafe.Add(ref amountBase, 1), 0, 1F))); + Vector128.Create(amountBase), + Vector128.Create(Unsafe.Add(ref amountBase, 1))); + opacity = Avx.Min(Avx.Max(Vector256.Zero, opacity), vOne); destinationBase = PorterDuffFunctions.AddDest(backgroundBase, sourceBase, opacity); destinationBase = ref Unsafe.Add(ref destinationBase, 1); backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); sourceBase = ref Unsafe.Add(ref sourceBase, 1); - amountBase = ref Unsafe.Add(ref amountBase, 1); + amountBase = ref Unsafe.Add(ref amountBase, 2); } if (Numerics.Modulo2(destination.Length) != 0) @@ -5039,20 +5135,22 @@ internal static class DefaultPixelBlenders ref Vector256 sourceBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(source)); ref float amountBase = ref MemoryMarshal.GetReference(amount); + Vector256 vOne = Vector256.Create(1F); + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) { - // TODO: It would be better if we can clamp this outside of the loop using our SIMD methods. // We need to create a Vector256 containing the current and next amount values // taking up each half of the Vector256 and then clamp them. Vector256 opacity = Vector256.Create( - Vector128.Create(Numerics.Clamp(amountBase, 0, 1F)), - Vector128.Create(Numerics.Clamp(Unsafe.Add(ref amountBase, 1), 0, 1F))); + Vector128.Create(amountBase), + Vector128.Create(Unsafe.Add(ref amountBase, 1))); + opacity = Avx.Min(Avx.Max(Vector256.Zero, opacity), vOne); destinationBase = PorterDuffFunctions.SubtractDest(backgroundBase, sourceBase, opacity); destinationBase = ref Unsafe.Add(ref destinationBase, 1); backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); sourceBase = ref Unsafe.Add(ref sourceBase, 1); - amountBase = ref Unsafe.Add(ref amountBase, 1); + amountBase = ref Unsafe.Add(ref amountBase, 2); } if (Numerics.Modulo2(destination.Length) != 0) @@ -5142,20 +5240,22 @@ internal static class DefaultPixelBlenders ref Vector256 sourceBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(source)); ref float amountBase = ref MemoryMarshal.GetReference(amount); + Vector256 vOne = Vector256.Create(1F); + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) { - // TODO: It would be better if we can clamp this outside of the loop using our SIMD methods. // We need to create a Vector256 containing the current and next amount values // taking up each half of the Vector256 and then clamp them. Vector256 opacity = Vector256.Create( - Vector128.Create(Numerics.Clamp(amountBase, 0, 1F)), - Vector128.Create(Numerics.Clamp(Unsafe.Add(ref amountBase, 1), 0, 1F))); + Vector128.Create(amountBase), + Vector128.Create(Unsafe.Add(ref amountBase, 1))); + opacity = Avx.Min(Avx.Max(Vector256.Zero, opacity), vOne); destinationBase = PorterDuffFunctions.ScreenDest(backgroundBase, sourceBase, opacity); destinationBase = ref Unsafe.Add(ref destinationBase, 1); backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); sourceBase = ref Unsafe.Add(ref sourceBase, 1); - amountBase = ref Unsafe.Add(ref amountBase, 1); + amountBase = ref Unsafe.Add(ref amountBase, 2); } if (Numerics.Modulo2(destination.Length) != 0) @@ -5245,20 +5345,22 @@ internal static class DefaultPixelBlenders ref Vector256 sourceBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(source)); ref float amountBase = ref MemoryMarshal.GetReference(amount); + Vector256 vOne = Vector256.Create(1F); + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) { - // TODO: It would be better if we can clamp this outside of the loop using our SIMD methods. // We need to create a Vector256 containing the current and next amount values // taking up each half of the Vector256 and then clamp them. Vector256 opacity = Vector256.Create( - Vector128.Create(Numerics.Clamp(amountBase, 0, 1F)), - Vector128.Create(Numerics.Clamp(Unsafe.Add(ref amountBase, 1), 0, 1F))); + Vector128.Create(amountBase), + Vector128.Create(Unsafe.Add(ref amountBase, 1))); + opacity = Avx.Min(Avx.Max(Vector256.Zero, opacity), vOne); destinationBase = PorterDuffFunctions.DarkenDest(backgroundBase, sourceBase, opacity); destinationBase = ref Unsafe.Add(ref destinationBase, 1); backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); sourceBase = ref Unsafe.Add(ref sourceBase, 1); - amountBase = ref Unsafe.Add(ref amountBase, 1); + amountBase = ref Unsafe.Add(ref amountBase, 2); } if (Numerics.Modulo2(destination.Length) != 0) @@ -5348,20 +5450,22 @@ internal static class DefaultPixelBlenders ref Vector256 sourceBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(source)); ref float amountBase = ref MemoryMarshal.GetReference(amount); + Vector256 vOne = Vector256.Create(1F); + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) { - // TODO: It would be better if we can clamp this outside of the loop using our SIMD methods. // We need to create a Vector256 containing the current and next amount values // taking up each half of the Vector256 and then clamp them. Vector256 opacity = Vector256.Create( - Vector128.Create(Numerics.Clamp(amountBase, 0, 1F)), - Vector128.Create(Numerics.Clamp(Unsafe.Add(ref amountBase, 1), 0, 1F))); + Vector128.Create(amountBase), + Vector128.Create(Unsafe.Add(ref amountBase, 1))); + opacity = Avx.Min(Avx.Max(Vector256.Zero, opacity), vOne); destinationBase = PorterDuffFunctions.LightenDest(backgroundBase, sourceBase, opacity); destinationBase = ref Unsafe.Add(ref destinationBase, 1); backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); sourceBase = ref Unsafe.Add(ref sourceBase, 1); - amountBase = ref Unsafe.Add(ref amountBase, 1); + amountBase = ref Unsafe.Add(ref amountBase, 2); } if (Numerics.Modulo2(destination.Length) != 0) @@ -5451,20 +5555,22 @@ internal static class DefaultPixelBlenders ref Vector256 sourceBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(source)); ref float amountBase = ref MemoryMarshal.GetReference(amount); + Vector256 vOne = Vector256.Create(1F); + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) { - // TODO: It would be better if we can clamp this outside of the loop using our SIMD methods. // We need to create a Vector256 containing the current and next amount values // taking up each half of the Vector256 and then clamp them. Vector256 opacity = Vector256.Create( - Vector128.Create(Numerics.Clamp(amountBase, 0, 1F)), - Vector128.Create(Numerics.Clamp(Unsafe.Add(ref amountBase, 1), 0, 1F))); + Vector128.Create(amountBase), + Vector128.Create(Unsafe.Add(ref amountBase, 1))); + opacity = Avx.Min(Avx.Max(Vector256.Zero, opacity), vOne); destinationBase = PorterDuffFunctions.OverlayDest(backgroundBase, sourceBase, opacity); destinationBase = ref Unsafe.Add(ref destinationBase, 1); backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); sourceBase = ref Unsafe.Add(ref sourceBase, 1); - amountBase = ref Unsafe.Add(ref amountBase, 1); + amountBase = ref Unsafe.Add(ref amountBase, 2); } if (Numerics.Modulo2(destination.Length) != 0) @@ -5554,20 +5660,22 @@ internal static class DefaultPixelBlenders ref Vector256 sourceBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(source)); ref float amountBase = ref MemoryMarshal.GetReference(amount); + Vector256 vOne = Vector256.Create(1F); + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) { - // TODO: It would be better if we can clamp this outside of the loop using our SIMD methods. // We need to create a Vector256 containing the current and next amount values // taking up each half of the Vector256 and then clamp them. Vector256 opacity = Vector256.Create( - Vector128.Create(Numerics.Clamp(amountBase, 0, 1F)), - Vector128.Create(Numerics.Clamp(Unsafe.Add(ref amountBase, 1), 0, 1F))); + Vector128.Create(amountBase), + Vector128.Create(Unsafe.Add(ref amountBase, 1))); + opacity = Avx.Min(Avx.Max(Vector256.Zero, opacity), vOne); destinationBase = PorterDuffFunctions.HardLightDest(backgroundBase, sourceBase, opacity); destinationBase = ref Unsafe.Add(ref destinationBase, 1); backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); sourceBase = ref Unsafe.Add(ref sourceBase, 1); - amountBase = ref Unsafe.Add(ref amountBase, 1); + amountBase = ref Unsafe.Add(ref amountBase, 2); } if (Numerics.Modulo2(destination.Length) != 0) @@ -5657,20 +5765,22 @@ internal static class DefaultPixelBlenders ref Vector256 sourceBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(source)); ref float amountBase = ref MemoryMarshal.GetReference(amount); + Vector256 vOne = Vector256.Create(1F); + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) { - // TODO: It would be better if we can clamp this outside of the loop using our SIMD methods. // We need to create a Vector256 containing the current and next amount values // taking up each half of the Vector256 and then clamp them. Vector256 opacity = Vector256.Create( - Vector128.Create(Numerics.Clamp(amountBase, 0, 1F)), - Vector128.Create(Numerics.Clamp(Unsafe.Add(ref amountBase, 1), 0, 1F))); + Vector128.Create(amountBase), + Vector128.Create(Unsafe.Add(ref amountBase, 1))); + opacity = Avx.Min(Avx.Max(Vector256.Zero, opacity), vOne); destinationBase = PorterDuffFunctions.NormalDestAtop(backgroundBase, sourceBase, opacity); destinationBase = ref Unsafe.Add(ref destinationBase, 1); backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); sourceBase = ref Unsafe.Add(ref sourceBase, 1); - amountBase = ref Unsafe.Add(ref amountBase, 1); + amountBase = ref Unsafe.Add(ref amountBase, 2); } if (Numerics.Modulo2(destination.Length) != 0) @@ -5760,20 +5870,22 @@ internal static class DefaultPixelBlenders ref Vector256 sourceBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(source)); ref float amountBase = ref MemoryMarshal.GetReference(amount); + Vector256 vOne = Vector256.Create(1F); + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) { - // TODO: It would be better if we can clamp this outside of the loop using our SIMD methods. // We need to create a Vector256 containing the current and next amount values // taking up each half of the Vector256 and then clamp them. Vector256 opacity = Vector256.Create( - Vector128.Create(Numerics.Clamp(amountBase, 0, 1F)), - Vector128.Create(Numerics.Clamp(Unsafe.Add(ref amountBase, 1), 0, 1F))); + Vector128.Create(amountBase), + Vector128.Create(Unsafe.Add(ref amountBase, 1))); + opacity = Avx.Min(Avx.Max(Vector256.Zero, opacity), vOne); destinationBase = PorterDuffFunctions.MultiplyDestAtop(backgroundBase, sourceBase, opacity); destinationBase = ref Unsafe.Add(ref destinationBase, 1); backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); sourceBase = ref Unsafe.Add(ref sourceBase, 1); - amountBase = ref Unsafe.Add(ref amountBase, 1); + amountBase = ref Unsafe.Add(ref amountBase, 2); } if (Numerics.Modulo2(destination.Length) != 0) @@ -5863,20 +5975,22 @@ internal static class DefaultPixelBlenders ref Vector256 sourceBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(source)); ref float amountBase = ref MemoryMarshal.GetReference(amount); + Vector256 vOne = Vector256.Create(1F); + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) { - // TODO: It would be better if we can clamp this outside of the loop using our SIMD methods. // We need to create a Vector256 containing the current and next amount values // taking up each half of the Vector256 and then clamp them. Vector256 opacity = Vector256.Create( - Vector128.Create(Numerics.Clamp(amountBase, 0, 1F)), - Vector128.Create(Numerics.Clamp(Unsafe.Add(ref amountBase, 1), 0, 1F))); + Vector128.Create(amountBase), + Vector128.Create(Unsafe.Add(ref amountBase, 1))); + opacity = Avx.Min(Avx.Max(Vector256.Zero, opacity), vOne); destinationBase = PorterDuffFunctions.AddDestAtop(backgroundBase, sourceBase, opacity); destinationBase = ref Unsafe.Add(ref destinationBase, 1); backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); sourceBase = ref Unsafe.Add(ref sourceBase, 1); - amountBase = ref Unsafe.Add(ref amountBase, 1); + amountBase = ref Unsafe.Add(ref amountBase, 2); } if (Numerics.Modulo2(destination.Length) != 0) @@ -5966,20 +6080,22 @@ internal static class DefaultPixelBlenders ref Vector256 sourceBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(source)); ref float amountBase = ref MemoryMarshal.GetReference(amount); + Vector256 vOne = Vector256.Create(1F); + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) { - // TODO: It would be better if we can clamp this outside of the loop using our SIMD methods. // We need to create a Vector256 containing the current and next amount values // taking up each half of the Vector256 and then clamp them. Vector256 opacity = Vector256.Create( - Vector128.Create(Numerics.Clamp(amountBase, 0, 1F)), - Vector128.Create(Numerics.Clamp(Unsafe.Add(ref amountBase, 1), 0, 1F))); + Vector128.Create(amountBase), + Vector128.Create(Unsafe.Add(ref amountBase, 1))); + opacity = Avx.Min(Avx.Max(Vector256.Zero, opacity), vOne); destinationBase = PorterDuffFunctions.SubtractDestAtop(backgroundBase, sourceBase, opacity); destinationBase = ref Unsafe.Add(ref destinationBase, 1); backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); sourceBase = ref Unsafe.Add(ref sourceBase, 1); - amountBase = ref Unsafe.Add(ref amountBase, 1); + amountBase = ref Unsafe.Add(ref amountBase, 2); } if (Numerics.Modulo2(destination.Length) != 0) @@ -6069,20 +6185,22 @@ internal static class DefaultPixelBlenders ref Vector256 sourceBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(source)); ref float amountBase = ref MemoryMarshal.GetReference(amount); + Vector256 vOne = Vector256.Create(1F); + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) { - // TODO: It would be better if we can clamp this outside of the loop using our SIMD methods. // We need to create a Vector256 containing the current and next amount values // taking up each half of the Vector256 and then clamp them. Vector256 opacity = Vector256.Create( - Vector128.Create(Numerics.Clamp(amountBase, 0, 1F)), - Vector128.Create(Numerics.Clamp(Unsafe.Add(ref amountBase, 1), 0, 1F))); + Vector128.Create(amountBase), + Vector128.Create(Unsafe.Add(ref amountBase, 1))); + opacity = Avx.Min(Avx.Max(Vector256.Zero, opacity), vOne); destinationBase = PorterDuffFunctions.ScreenDestAtop(backgroundBase, sourceBase, opacity); destinationBase = ref Unsafe.Add(ref destinationBase, 1); backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); sourceBase = ref Unsafe.Add(ref sourceBase, 1); - amountBase = ref Unsafe.Add(ref amountBase, 1); + amountBase = ref Unsafe.Add(ref amountBase, 2); } if (Numerics.Modulo2(destination.Length) != 0) @@ -6172,20 +6290,22 @@ internal static class DefaultPixelBlenders ref Vector256 sourceBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(source)); ref float amountBase = ref MemoryMarshal.GetReference(amount); + Vector256 vOne = Vector256.Create(1F); + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) { - // TODO: It would be better if we can clamp this outside of the loop using our SIMD methods. // We need to create a Vector256 containing the current and next amount values // taking up each half of the Vector256 and then clamp them. Vector256 opacity = Vector256.Create( - Vector128.Create(Numerics.Clamp(amountBase, 0, 1F)), - Vector128.Create(Numerics.Clamp(Unsafe.Add(ref amountBase, 1), 0, 1F))); + Vector128.Create(amountBase), + Vector128.Create(Unsafe.Add(ref amountBase, 1))); + opacity = Avx.Min(Avx.Max(Vector256.Zero, opacity), vOne); destinationBase = PorterDuffFunctions.DarkenDestAtop(backgroundBase, sourceBase, opacity); destinationBase = ref Unsafe.Add(ref destinationBase, 1); backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); sourceBase = ref Unsafe.Add(ref sourceBase, 1); - amountBase = ref Unsafe.Add(ref amountBase, 1); + amountBase = ref Unsafe.Add(ref amountBase, 2); } if (Numerics.Modulo2(destination.Length) != 0) @@ -6275,20 +6395,22 @@ internal static class DefaultPixelBlenders ref Vector256 sourceBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(source)); ref float amountBase = ref MemoryMarshal.GetReference(amount); + Vector256 vOne = Vector256.Create(1F); + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) { - // TODO: It would be better if we can clamp this outside of the loop using our SIMD methods. // We need to create a Vector256 containing the current and next amount values // taking up each half of the Vector256 and then clamp them. Vector256 opacity = Vector256.Create( - Vector128.Create(Numerics.Clamp(amountBase, 0, 1F)), - Vector128.Create(Numerics.Clamp(Unsafe.Add(ref amountBase, 1), 0, 1F))); + Vector128.Create(amountBase), + Vector128.Create(Unsafe.Add(ref amountBase, 1))); + opacity = Avx.Min(Avx.Max(Vector256.Zero, opacity), vOne); destinationBase = PorterDuffFunctions.LightenDestAtop(backgroundBase, sourceBase, opacity); destinationBase = ref Unsafe.Add(ref destinationBase, 1); backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); sourceBase = ref Unsafe.Add(ref sourceBase, 1); - amountBase = ref Unsafe.Add(ref amountBase, 1); + amountBase = ref Unsafe.Add(ref amountBase, 2); } if (Numerics.Modulo2(destination.Length) != 0) @@ -6378,20 +6500,22 @@ internal static class DefaultPixelBlenders ref Vector256 sourceBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(source)); ref float amountBase = ref MemoryMarshal.GetReference(amount); + Vector256 vOne = Vector256.Create(1F); + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) { - // TODO: It would be better if we can clamp this outside of the loop using our SIMD methods. // We need to create a Vector256 containing the current and next amount values // taking up each half of the Vector256 and then clamp them. Vector256 opacity = Vector256.Create( - Vector128.Create(Numerics.Clamp(amountBase, 0, 1F)), - Vector128.Create(Numerics.Clamp(Unsafe.Add(ref amountBase, 1), 0, 1F))); + Vector128.Create(amountBase), + Vector128.Create(Unsafe.Add(ref amountBase, 1))); + opacity = Avx.Min(Avx.Max(Vector256.Zero, opacity), vOne); destinationBase = PorterDuffFunctions.OverlayDestAtop(backgroundBase, sourceBase, opacity); destinationBase = ref Unsafe.Add(ref destinationBase, 1); backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); sourceBase = ref Unsafe.Add(ref sourceBase, 1); - amountBase = ref Unsafe.Add(ref amountBase, 1); + amountBase = ref Unsafe.Add(ref amountBase, 2); } if (Numerics.Modulo2(destination.Length) != 0) @@ -6481,20 +6605,22 @@ internal static class DefaultPixelBlenders ref Vector256 sourceBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(source)); ref float amountBase = ref MemoryMarshal.GetReference(amount); + Vector256 vOne = Vector256.Create(1F); + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) { - // TODO: It would be better if we can clamp this outside of the loop using our SIMD methods. // We need to create a Vector256 containing the current and next amount values // taking up each half of the Vector256 and then clamp them. Vector256 opacity = Vector256.Create( - Vector128.Create(Numerics.Clamp(amountBase, 0, 1F)), - Vector128.Create(Numerics.Clamp(Unsafe.Add(ref amountBase, 1), 0, 1F))); + Vector128.Create(amountBase), + Vector128.Create(Unsafe.Add(ref amountBase, 1))); + opacity = Avx.Min(Avx.Max(Vector256.Zero, opacity), vOne); destinationBase = PorterDuffFunctions.HardLightDestAtop(backgroundBase, sourceBase, opacity); destinationBase = ref Unsafe.Add(ref destinationBase, 1); backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); sourceBase = ref Unsafe.Add(ref sourceBase, 1); - amountBase = ref Unsafe.Add(ref amountBase, 1); + amountBase = ref Unsafe.Add(ref amountBase, 2); } if (Numerics.Modulo2(destination.Length) != 0) @@ -6584,20 +6710,22 @@ internal static class DefaultPixelBlenders ref Vector256 sourceBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(source)); ref float amountBase = ref MemoryMarshal.GetReference(amount); + Vector256 vOne = Vector256.Create(1F); + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) { - // TODO: It would be better if we can clamp this outside of the loop using our SIMD methods. // We need to create a Vector256 containing the current and next amount values // taking up each half of the Vector256 and then clamp them. Vector256 opacity = Vector256.Create( - Vector128.Create(Numerics.Clamp(amountBase, 0, 1F)), - Vector128.Create(Numerics.Clamp(Unsafe.Add(ref amountBase, 1), 0, 1F))); + Vector128.Create(amountBase), + Vector128.Create(Unsafe.Add(ref amountBase, 1))); + opacity = Avx.Min(Avx.Max(Vector256.Zero, opacity), vOne); destinationBase = PorterDuffFunctions.NormalDestOver(backgroundBase, sourceBase, opacity); destinationBase = ref Unsafe.Add(ref destinationBase, 1); backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); sourceBase = ref Unsafe.Add(ref sourceBase, 1); - amountBase = ref Unsafe.Add(ref amountBase, 1); + amountBase = ref Unsafe.Add(ref amountBase, 2); } if (Numerics.Modulo2(destination.Length) != 0) @@ -6687,20 +6815,22 @@ internal static class DefaultPixelBlenders ref Vector256 sourceBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(source)); ref float amountBase = ref MemoryMarshal.GetReference(amount); + Vector256 vOne = Vector256.Create(1F); + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) { - // TODO: It would be better if we can clamp this outside of the loop using our SIMD methods. // We need to create a Vector256 containing the current and next amount values // taking up each half of the Vector256 and then clamp them. Vector256 opacity = Vector256.Create( - Vector128.Create(Numerics.Clamp(amountBase, 0, 1F)), - Vector128.Create(Numerics.Clamp(Unsafe.Add(ref amountBase, 1), 0, 1F))); + Vector128.Create(amountBase), + Vector128.Create(Unsafe.Add(ref amountBase, 1))); + opacity = Avx.Min(Avx.Max(Vector256.Zero, opacity), vOne); destinationBase = PorterDuffFunctions.MultiplyDestOver(backgroundBase, sourceBase, opacity); destinationBase = ref Unsafe.Add(ref destinationBase, 1); backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); sourceBase = ref Unsafe.Add(ref sourceBase, 1); - amountBase = ref Unsafe.Add(ref amountBase, 1); + amountBase = ref Unsafe.Add(ref amountBase, 2); } if (Numerics.Modulo2(destination.Length) != 0) @@ -6790,20 +6920,22 @@ internal static class DefaultPixelBlenders ref Vector256 sourceBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(source)); ref float amountBase = ref MemoryMarshal.GetReference(amount); + Vector256 vOne = Vector256.Create(1F); + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) { - // TODO: It would be better if we can clamp this outside of the loop using our SIMD methods. // We need to create a Vector256 containing the current and next amount values // taking up each half of the Vector256 and then clamp them. Vector256 opacity = Vector256.Create( - Vector128.Create(Numerics.Clamp(amountBase, 0, 1F)), - Vector128.Create(Numerics.Clamp(Unsafe.Add(ref amountBase, 1), 0, 1F))); + Vector128.Create(amountBase), + Vector128.Create(Unsafe.Add(ref amountBase, 1))); + opacity = Avx.Min(Avx.Max(Vector256.Zero, opacity), vOne); destinationBase = PorterDuffFunctions.AddDestOver(backgroundBase, sourceBase, opacity); destinationBase = ref Unsafe.Add(ref destinationBase, 1); backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); sourceBase = ref Unsafe.Add(ref sourceBase, 1); - amountBase = ref Unsafe.Add(ref amountBase, 1); + amountBase = ref Unsafe.Add(ref amountBase, 2); } if (Numerics.Modulo2(destination.Length) != 0) @@ -6893,20 +7025,22 @@ internal static class DefaultPixelBlenders ref Vector256 sourceBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(source)); ref float amountBase = ref MemoryMarshal.GetReference(amount); + Vector256 vOne = Vector256.Create(1F); + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) { - // TODO: It would be better if we can clamp this outside of the loop using our SIMD methods. // We need to create a Vector256 containing the current and next amount values // taking up each half of the Vector256 and then clamp them. Vector256 opacity = Vector256.Create( - Vector128.Create(Numerics.Clamp(amountBase, 0, 1F)), - Vector128.Create(Numerics.Clamp(Unsafe.Add(ref amountBase, 1), 0, 1F))); + Vector128.Create(amountBase), + Vector128.Create(Unsafe.Add(ref amountBase, 1))); + opacity = Avx.Min(Avx.Max(Vector256.Zero, opacity), vOne); destinationBase = PorterDuffFunctions.SubtractDestOver(backgroundBase, sourceBase, opacity); destinationBase = ref Unsafe.Add(ref destinationBase, 1); backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); sourceBase = ref Unsafe.Add(ref sourceBase, 1); - amountBase = ref Unsafe.Add(ref amountBase, 1); + amountBase = ref Unsafe.Add(ref amountBase, 2); } if (Numerics.Modulo2(destination.Length) != 0) @@ -6996,20 +7130,22 @@ internal static class DefaultPixelBlenders ref Vector256 sourceBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(source)); ref float amountBase = ref MemoryMarshal.GetReference(amount); + Vector256 vOne = Vector256.Create(1F); + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) { - // TODO: It would be better if we can clamp this outside of the loop using our SIMD methods. // We need to create a Vector256 containing the current and next amount values // taking up each half of the Vector256 and then clamp them. Vector256 opacity = Vector256.Create( - Vector128.Create(Numerics.Clamp(amountBase, 0, 1F)), - Vector128.Create(Numerics.Clamp(Unsafe.Add(ref amountBase, 1), 0, 1F))); + Vector128.Create(amountBase), + Vector128.Create(Unsafe.Add(ref amountBase, 1))); + opacity = Avx.Min(Avx.Max(Vector256.Zero, opacity), vOne); destinationBase = PorterDuffFunctions.ScreenDestOver(backgroundBase, sourceBase, opacity); destinationBase = ref Unsafe.Add(ref destinationBase, 1); backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); sourceBase = ref Unsafe.Add(ref sourceBase, 1); - amountBase = ref Unsafe.Add(ref amountBase, 1); + amountBase = ref Unsafe.Add(ref amountBase, 2); } if (Numerics.Modulo2(destination.Length) != 0) @@ -7099,20 +7235,22 @@ internal static class DefaultPixelBlenders ref Vector256 sourceBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(source)); ref float amountBase = ref MemoryMarshal.GetReference(amount); + Vector256 vOne = Vector256.Create(1F); + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) { - // TODO: It would be better if we can clamp this outside of the loop using our SIMD methods. // We need to create a Vector256 containing the current and next amount values // taking up each half of the Vector256 and then clamp them. Vector256 opacity = Vector256.Create( - Vector128.Create(Numerics.Clamp(amountBase, 0, 1F)), - Vector128.Create(Numerics.Clamp(Unsafe.Add(ref amountBase, 1), 0, 1F))); + Vector128.Create(amountBase), + Vector128.Create(Unsafe.Add(ref amountBase, 1))); + opacity = Avx.Min(Avx.Max(Vector256.Zero, opacity), vOne); destinationBase = PorterDuffFunctions.DarkenDestOver(backgroundBase, sourceBase, opacity); destinationBase = ref Unsafe.Add(ref destinationBase, 1); backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); sourceBase = ref Unsafe.Add(ref sourceBase, 1); - amountBase = ref Unsafe.Add(ref amountBase, 1); + amountBase = ref Unsafe.Add(ref amountBase, 2); } if (Numerics.Modulo2(destination.Length) != 0) @@ -7202,20 +7340,22 @@ internal static class DefaultPixelBlenders ref Vector256 sourceBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(source)); ref float amountBase = ref MemoryMarshal.GetReference(amount); + Vector256 vOne = Vector256.Create(1F); + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) { - // TODO: It would be better if we can clamp this outside of the loop using our SIMD methods. // We need to create a Vector256 containing the current and next amount values // taking up each half of the Vector256 and then clamp them. Vector256 opacity = Vector256.Create( - Vector128.Create(Numerics.Clamp(amountBase, 0, 1F)), - Vector128.Create(Numerics.Clamp(Unsafe.Add(ref amountBase, 1), 0, 1F))); + Vector128.Create(amountBase), + Vector128.Create(Unsafe.Add(ref amountBase, 1))); + opacity = Avx.Min(Avx.Max(Vector256.Zero, opacity), vOne); destinationBase = PorterDuffFunctions.LightenDestOver(backgroundBase, sourceBase, opacity); destinationBase = ref Unsafe.Add(ref destinationBase, 1); backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); sourceBase = ref Unsafe.Add(ref sourceBase, 1); - amountBase = ref Unsafe.Add(ref amountBase, 1); + amountBase = ref Unsafe.Add(ref amountBase, 2); } if (Numerics.Modulo2(destination.Length) != 0) @@ -7305,20 +7445,22 @@ internal static class DefaultPixelBlenders ref Vector256 sourceBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(source)); ref float amountBase = ref MemoryMarshal.GetReference(amount); + Vector256 vOne = Vector256.Create(1F); + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) { - // TODO: It would be better if we can clamp this outside of the loop using our SIMD methods. // We need to create a Vector256 containing the current and next amount values // taking up each half of the Vector256 and then clamp them. Vector256 opacity = Vector256.Create( - Vector128.Create(Numerics.Clamp(amountBase, 0, 1F)), - Vector128.Create(Numerics.Clamp(Unsafe.Add(ref amountBase, 1), 0, 1F))); + Vector128.Create(amountBase), + Vector128.Create(Unsafe.Add(ref amountBase, 1))); + opacity = Avx.Min(Avx.Max(Vector256.Zero, opacity), vOne); destinationBase = PorterDuffFunctions.OverlayDestOver(backgroundBase, sourceBase, opacity); destinationBase = ref Unsafe.Add(ref destinationBase, 1); backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); sourceBase = ref Unsafe.Add(ref sourceBase, 1); - amountBase = ref Unsafe.Add(ref amountBase, 1); + amountBase = ref Unsafe.Add(ref amountBase, 2); } if (Numerics.Modulo2(destination.Length) != 0) @@ -7408,20 +7550,22 @@ internal static class DefaultPixelBlenders ref Vector256 sourceBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(source)); ref float amountBase = ref MemoryMarshal.GetReference(amount); + Vector256 vOne = Vector256.Create(1F); + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) { - // TODO: It would be better if we can clamp this outside of the loop using our SIMD methods. // We need to create a Vector256 containing the current and next amount values // taking up each half of the Vector256 and then clamp them. Vector256 opacity = Vector256.Create( - Vector128.Create(Numerics.Clamp(amountBase, 0, 1F)), - Vector128.Create(Numerics.Clamp(Unsafe.Add(ref amountBase, 1), 0, 1F))); + Vector128.Create(amountBase), + Vector128.Create(Unsafe.Add(ref amountBase, 1))); + opacity = Avx.Min(Avx.Max(Vector256.Zero, opacity), vOne); destinationBase = PorterDuffFunctions.HardLightDestOver(backgroundBase, sourceBase, opacity); destinationBase = ref Unsafe.Add(ref destinationBase, 1); backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); sourceBase = ref Unsafe.Add(ref sourceBase, 1); - amountBase = ref Unsafe.Add(ref amountBase, 1); + amountBase = ref Unsafe.Add(ref amountBase, 2); } if (Numerics.Modulo2(destination.Length) != 0) @@ -7511,20 +7655,22 @@ internal static class DefaultPixelBlenders ref Vector256 sourceBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(source)); ref float amountBase = ref MemoryMarshal.GetReference(amount); + Vector256 vOne = Vector256.Create(1F); + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) { - // TODO: It would be better if we can clamp this outside of the loop using our SIMD methods. // We need to create a Vector256 containing the current and next amount values // taking up each half of the Vector256 and then clamp them. Vector256 opacity = Vector256.Create( - Vector128.Create(Numerics.Clamp(amountBase, 0, 1F)), - Vector128.Create(Numerics.Clamp(Unsafe.Add(ref amountBase, 1), 0, 1F))); + Vector128.Create(amountBase), + Vector128.Create(Unsafe.Add(ref amountBase, 1))); + opacity = Avx.Min(Avx.Max(Vector256.Zero, opacity), vOne); destinationBase = PorterDuffFunctions.NormalDestIn(backgroundBase, sourceBase, opacity); destinationBase = ref Unsafe.Add(ref destinationBase, 1); backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); sourceBase = ref Unsafe.Add(ref sourceBase, 1); - amountBase = ref Unsafe.Add(ref amountBase, 1); + amountBase = ref Unsafe.Add(ref amountBase, 2); } if (Numerics.Modulo2(destination.Length) != 0) @@ -7614,20 +7760,22 @@ internal static class DefaultPixelBlenders ref Vector256 sourceBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(source)); ref float amountBase = ref MemoryMarshal.GetReference(amount); + Vector256 vOne = Vector256.Create(1F); + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) { - // TODO: It would be better if we can clamp this outside of the loop using our SIMD methods. // We need to create a Vector256 containing the current and next amount values // taking up each half of the Vector256 and then clamp them. Vector256 opacity = Vector256.Create( - Vector128.Create(Numerics.Clamp(amountBase, 0, 1F)), - Vector128.Create(Numerics.Clamp(Unsafe.Add(ref amountBase, 1), 0, 1F))); + Vector128.Create(amountBase), + Vector128.Create(Unsafe.Add(ref amountBase, 1))); + opacity = Avx.Min(Avx.Max(Vector256.Zero, opacity), vOne); destinationBase = PorterDuffFunctions.MultiplyDestIn(backgroundBase, sourceBase, opacity); destinationBase = ref Unsafe.Add(ref destinationBase, 1); backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); sourceBase = ref Unsafe.Add(ref sourceBase, 1); - amountBase = ref Unsafe.Add(ref amountBase, 1); + amountBase = ref Unsafe.Add(ref amountBase, 2); } if (Numerics.Modulo2(destination.Length) != 0) @@ -7717,20 +7865,22 @@ internal static class DefaultPixelBlenders ref Vector256 sourceBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(source)); ref float amountBase = ref MemoryMarshal.GetReference(amount); + Vector256 vOne = Vector256.Create(1F); + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) { - // TODO: It would be better if we can clamp this outside of the loop using our SIMD methods. // We need to create a Vector256 containing the current and next amount values // taking up each half of the Vector256 and then clamp them. Vector256 opacity = Vector256.Create( - Vector128.Create(Numerics.Clamp(amountBase, 0, 1F)), - Vector128.Create(Numerics.Clamp(Unsafe.Add(ref amountBase, 1), 0, 1F))); + Vector128.Create(amountBase), + Vector128.Create(Unsafe.Add(ref amountBase, 1))); + opacity = Avx.Min(Avx.Max(Vector256.Zero, opacity), vOne); destinationBase = PorterDuffFunctions.AddDestIn(backgroundBase, sourceBase, opacity); destinationBase = ref Unsafe.Add(ref destinationBase, 1); backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); sourceBase = ref Unsafe.Add(ref sourceBase, 1); - amountBase = ref Unsafe.Add(ref amountBase, 1); + amountBase = ref Unsafe.Add(ref amountBase, 2); } if (Numerics.Modulo2(destination.Length) != 0) @@ -7820,20 +7970,22 @@ internal static class DefaultPixelBlenders ref Vector256 sourceBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(source)); ref float amountBase = ref MemoryMarshal.GetReference(amount); + Vector256 vOne = Vector256.Create(1F); + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) { - // TODO: It would be better if we can clamp this outside of the loop using our SIMD methods. // We need to create a Vector256 containing the current and next amount values // taking up each half of the Vector256 and then clamp them. Vector256 opacity = Vector256.Create( - Vector128.Create(Numerics.Clamp(amountBase, 0, 1F)), - Vector128.Create(Numerics.Clamp(Unsafe.Add(ref amountBase, 1), 0, 1F))); + Vector128.Create(amountBase), + Vector128.Create(Unsafe.Add(ref amountBase, 1))); + opacity = Avx.Min(Avx.Max(Vector256.Zero, opacity), vOne); destinationBase = PorterDuffFunctions.SubtractDestIn(backgroundBase, sourceBase, opacity); destinationBase = ref Unsafe.Add(ref destinationBase, 1); backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); sourceBase = ref Unsafe.Add(ref sourceBase, 1); - amountBase = ref Unsafe.Add(ref amountBase, 1); + amountBase = ref Unsafe.Add(ref amountBase, 2); } if (Numerics.Modulo2(destination.Length) != 0) @@ -7923,20 +8075,22 @@ internal static class DefaultPixelBlenders ref Vector256 sourceBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(source)); ref float amountBase = ref MemoryMarshal.GetReference(amount); + Vector256 vOne = Vector256.Create(1F); + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) { - // TODO: It would be better if we can clamp this outside of the loop using our SIMD methods. // We need to create a Vector256 containing the current and next amount values // taking up each half of the Vector256 and then clamp them. Vector256 opacity = Vector256.Create( - Vector128.Create(Numerics.Clamp(amountBase, 0, 1F)), - Vector128.Create(Numerics.Clamp(Unsafe.Add(ref amountBase, 1), 0, 1F))); + Vector128.Create(amountBase), + Vector128.Create(Unsafe.Add(ref amountBase, 1))); + opacity = Avx.Min(Avx.Max(Vector256.Zero, opacity), vOne); destinationBase = PorterDuffFunctions.ScreenDestIn(backgroundBase, sourceBase, opacity); destinationBase = ref Unsafe.Add(ref destinationBase, 1); backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); sourceBase = ref Unsafe.Add(ref sourceBase, 1); - amountBase = ref Unsafe.Add(ref amountBase, 1); + amountBase = ref Unsafe.Add(ref amountBase, 2); } if (Numerics.Modulo2(destination.Length) != 0) @@ -8026,20 +8180,22 @@ internal static class DefaultPixelBlenders ref Vector256 sourceBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(source)); ref float amountBase = ref MemoryMarshal.GetReference(amount); + Vector256 vOne = Vector256.Create(1F); + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) { - // TODO: It would be better if we can clamp this outside of the loop using our SIMD methods. // We need to create a Vector256 containing the current and next amount values // taking up each half of the Vector256 and then clamp them. Vector256 opacity = Vector256.Create( - Vector128.Create(Numerics.Clamp(amountBase, 0, 1F)), - Vector128.Create(Numerics.Clamp(Unsafe.Add(ref amountBase, 1), 0, 1F))); + Vector128.Create(amountBase), + Vector128.Create(Unsafe.Add(ref amountBase, 1))); + opacity = Avx.Min(Avx.Max(Vector256.Zero, opacity), vOne); destinationBase = PorterDuffFunctions.DarkenDestIn(backgroundBase, sourceBase, opacity); destinationBase = ref Unsafe.Add(ref destinationBase, 1); backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); sourceBase = ref Unsafe.Add(ref sourceBase, 1); - amountBase = ref Unsafe.Add(ref amountBase, 1); + amountBase = ref Unsafe.Add(ref amountBase, 2); } if (Numerics.Modulo2(destination.Length) != 0) @@ -8129,20 +8285,22 @@ internal static class DefaultPixelBlenders ref Vector256 sourceBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(source)); ref float amountBase = ref MemoryMarshal.GetReference(amount); + Vector256 vOne = Vector256.Create(1F); + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) { - // TODO: It would be better if we can clamp this outside of the loop using our SIMD methods. // We need to create a Vector256 containing the current and next amount values // taking up each half of the Vector256 and then clamp them. Vector256 opacity = Vector256.Create( - Vector128.Create(Numerics.Clamp(amountBase, 0, 1F)), - Vector128.Create(Numerics.Clamp(Unsafe.Add(ref amountBase, 1), 0, 1F))); + Vector128.Create(amountBase), + Vector128.Create(Unsafe.Add(ref amountBase, 1))); + opacity = Avx.Min(Avx.Max(Vector256.Zero, opacity), vOne); destinationBase = PorterDuffFunctions.LightenDestIn(backgroundBase, sourceBase, opacity); destinationBase = ref Unsafe.Add(ref destinationBase, 1); backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); sourceBase = ref Unsafe.Add(ref sourceBase, 1); - amountBase = ref Unsafe.Add(ref amountBase, 1); + amountBase = ref Unsafe.Add(ref amountBase, 2); } if (Numerics.Modulo2(destination.Length) != 0) @@ -8232,20 +8390,22 @@ internal static class DefaultPixelBlenders ref Vector256 sourceBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(source)); ref float amountBase = ref MemoryMarshal.GetReference(amount); + Vector256 vOne = Vector256.Create(1F); + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) { - // TODO: It would be better if we can clamp this outside of the loop using our SIMD methods. // We need to create a Vector256 containing the current and next amount values // taking up each half of the Vector256 and then clamp them. Vector256 opacity = Vector256.Create( - Vector128.Create(Numerics.Clamp(amountBase, 0, 1F)), - Vector128.Create(Numerics.Clamp(Unsafe.Add(ref amountBase, 1), 0, 1F))); + Vector128.Create(amountBase), + Vector128.Create(Unsafe.Add(ref amountBase, 1))); + opacity = Avx.Min(Avx.Max(Vector256.Zero, opacity), vOne); destinationBase = PorterDuffFunctions.OverlayDestIn(backgroundBase, sourceBase, opacity); destinationBase = ref Unsafe.Add(ref destinationBase, 1); backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); sourceBase = ref Unsafe.Add(ref sourceBase, 1); - amountBase = ref Unsafe.Add(ref amountBase, 1); + amountBase = ref Unsafe.Add(ref amountBase, 2); } if (Numerics.Modulo2(destination.Length) != 0) @@ -8335,20 +8495,22 @@ internal static class DefaultPixelBlenders ref Vector256 sourceBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(source)); ref float amountBase = ref MemoryMarshal.GetReference(amount); + Vector256 vOne = Vector256.Create(1F); + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) { - // TODO: It would be better if we can clamp this outside of the loop using our SIMD methods. // We need to create a Vector256 containing the current and next amount values // taking up each half of the Vector256 and then clamp them. Vector256 opacity = Vector256.Create( - Vector128.Create(Numerics.Clamp(amountBase, 0, 1F)), - Vector128.Create(Numerics.Clamp(Unsafe.Add(ref amountBase, 1), 0, 1F))); + Vector128.Create(amountBase), + Vector128.Create(Unsafe.Add(ref amountBase, 1))); + opacity = Avx.Min(Avx.Max(Vector256.Zero, opacity), vOne); destinationBase = PorterDuffFunctions.HardLightDestIn(backgroundBase, sourceBase, opacity); destinationBase = ref Unsafe.Add(ref destinationBase, 1); backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); sourceBase = ref Unsafe.Add(ref sourceBase, 1); - amountBase = ref Unsafe.Add(ref amountBase, 1); + amountBase = ref Unsafe.Add(ref amountBase, 2); } if (Numerics.Modulo2(destination.Length) != 0) @@ -8438,20 +8600,22 @@ internal static class DefaultPixelBlenders ref Vector256 sourceBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(source)); ref float amountBase = ref MemoryMarshal.GetReference(amount); + Vector256 vOne = Vector256.Create(1F); + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) { - // TODO: It would be better if we can clamp this outside of the loop using our SIMD methods. // We need to create a Vector256 containing the current and next amount values // taking up each half of the Vector256 and then clamp them. Vector256 opacity = Vector256.Create( - Vector128.Create(Numerics.Clamp(amountBase, 0, 1F)), - Vector128.Create(Numerics.Clamp(Unsafe.Add(ref amountBase, 1), 0, 1F))); + Vector128.Create(amountBase), + Vector128.Create(Unsafe.Add(ref amountBase, 1))); + opacity = Avx.Min(Avx.Max(Vector256.Zero, opacity), vOne); destinationBase = PorterDuffFunctions.NormalDestOut(backgroundBase, sourceBase, opacity); destinationBase = ref Unsafe.Add(ref destinationBase, 1); backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); sourceBase = ref Unsafe.Add(ref sourceBase, 1); - amountBase = ref Unsafe.Add(ref amountBase, 1); + amountBase = ref Unsafe.Add(ref amountBase, 2); } if (Numerics.Modulo2(destination.Length) != 0) @@ -8541,20 +8705,22 @@ internal static class DefaultPixelBlenders ref Vector256 sourceBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(source)); ref float amountBase = ref MemoryMarshal.GetReference(amount); + Vector256 vOne = Vector256.Create(1F); + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) { - // TODO: It would be better if we can clamp this outside of the loop using our SIMD methods. // We need to create a Vector256 containing the current and next amount values // taking up each half of the Vector256 and then clamp them. Vector256 opacity = Vector256.Create( - Vector128.Create(Numerics.Clamp(amountBase, 0, 1F)), - Vector128.Create(Numerics.Clamp(Unsafe.Add(ref amountBase, 1), 0, 1F))); + Vector128.Create(amountBase), + Vector128.Create(Unsafe.Add(ref amountBase, 1))); + opacity = Avx.Min(Avx.Max(Vector256.Zero, opacity), vOne); destinationBase = PorterDuffFunctions.MultiplyDestOut(backgroundBase, sourceBase, opacity); destinationBase = ref Unsafe.Add(ref destinationBase, 1); backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); sourceBase = ref Unsafe.Add(ref sourceBase, 1); - amountBase = ref Unsafe.Add(ref amountBase, 1); + amountBase = ref Unsafe.Add(ref amountBase, 2); } if (Numerics.Modulo2(destination.Length) != 0) @@ -8644,20 +8810,22 @@ internal static class DefaultPixelBlenders ref Vector256 sourceBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(source)); ref float amountBase = ref MemoryMarshal.GetReference(amount); + Vector256 vOne = Vector256.Create(1F); + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) { - // TODO: It would be better if we can clamp this outside of the loop using our SIMD methods. // We need to create a Vector256 containing the current and next amount values // taking up each half of the Vector256 and then clamp them. Vector256 opacity = Vector256.Create( - Vector128.Create(Numerics.Clamp(amountBase, 0, 1F)), - Vector128.Create(Numerics.Clamp(Unsafe.Add(ref amountBase, 1), 0, 1F))); + Vector128.Create(amountBase), + Vector128.Create(Unsafe.Add(ref amountBase, 1))); + opacity = Avx.Min(Avx.Max(Vector256.Zero, opacity), vOne); destinationBase = PorterDuffFunctions.AddDestOut(backgroundBase, sourceBase, opacity); destinationBase = ref Unsafe.Add(ref destinationBase, 1); backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); sourceBase = ref Unsafe.Add(ref sourceBase, 1); - amountBase = ref Unsafe.Add(ref amountBase, 1); + amountBase = ref Unsafe.Add(ref amountBase, 2); } if (Numerics.Modulo2(destination.Length) != 0) @@ -8747,20 +8915,22 @@ internal static class DefaultPixelBlenders ref Vector256 sourceBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(source)); ref float amountBase = ref MemoryMarshal.GetReference(amount); + Vector256 vOne = Vector256.Create(1F); + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) { - // TODO: It would be better if we can clamp this outside of the loop using our SIMD methods. // We need to create a Vector256 containing the current and next amount values // taking up each half of the Vector256 and then clamp them. Vector256 opacity = Vector256.Create( - Vector128.Create(Numerics.Clamp(amountBase, 0, 1F)), - Vector128.Create(Numerics.Clamp(Unsafe.Add(ref amountBase, 1), 0, 1F))); + Vector128.Create(amountBase), + Vector128.Create(Unsafe.Add(ref amountBase, 1))); + opacity = Avx.Min(Avx.Max(Vector256.Zero, opacity), vOne); destinationBase = PorterDuffFunctions.SubtractDestOut(backgroundBase, sourceBase, opacity); destinationBase = ref Unsafe.Add(ref destinationBase, 1); backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); sourceBase = ref Unsafe.Add(ref sourceBase, 1); - amountBase = ref Unsafe.Add(ref amountBase, 1); + amountBase = ref Unsafe.Add(ref amountBase, 2); } if (Numerics.Modulo2(destination.Length) != 0) @@ -8850,20 +9020,22 @@ internal static class DefaultPixelBlenders ref Vector256 sourceBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(source)); ref float amountBase = ref MemoryMarshal.GetReference(amount); + Vector256 vOne = Vector256.Create(1F); + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) { - // TODO: It would be better if we can clamp this outside of the loop using our SIMD methods. // We need to create a Vector256 containing the current and next amount values // taking up each half of the Vector256 and then clamp them. Vector256 opacity = Vector256.Create( - Vector128.Create(Numerics.Clamp(amountBase, 0, 1F)), - Vector128.Create(Numerics.Clamp(Unsafe.Add(ref amountBase, 1), 0, 1F))); + Vector128.Create(amountBase), + Vector128.Create(Unsafe.Add(ref amountBase, 1))); + opacity = Avx.Min(Avx.Max(Vector256.Zero, opacity), vOne); destinationBase = PorterDuffFunctions.ScreenDestOut(backgroundBase, sourceBase, opacity); destinationBase = ref Unsafe.Add(ref destinationBase, 1); backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); sourceBase = ref Unsafe.Add(ref sourceBase, 1); - amountBase = ref Unsafe.Add(ref amountBase, 1); + amountBase = ref Unsafe.Add(ref amountBase, 2); } if (Numerics.Modulo2(destination.Length) != 0) @@ -8953,20 +9125,22 @@ internal static class DefaultPixelBlenders ref Vector256 sourceBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(source)); ref float amountBase = ref MemoryMarshal.GetReference(amount); + Vector256 vOne = Vector256.Create(1F); + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) { - // TODO: It would be better if we can clamp this outside of the loop using our SIMD methods. // We need to create a Vector256 containing the current and next amount values // taking up each half of the Vector256 and then clamp them. Vector256 opacity = Vector256.Create( - Vector128.Create(Numerics.Clamp(amountBase, 0, 1F)), - Vector128.Create(Numerics.Clamp(Unsafe.Add(ref amountBase, 1), 0, 1F))); + Vector128.Create(amountBase), + Vector128.Create(Unsafe.Add(ref amountBase, 1))); + opacity = Avx.Min(Avx.Max(Vector256.Zero, opacity), vOne); destinationBase = PorterDuffFunctions.DarkenDestOut(backgroundBase, sourceBase, opacity); destinationBase = ref Unsafe.Add(ref destinationBase, 1); backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); sourceBase = ref Unsafe.Add(ref sourceBase, 1); - amountBase = ref Unsafe.Add(ref amountBase, 1); + amountBase = ref Unsafe.Add(ref amountBase, 2); } if (Numerics.Modulo2(destination.Length) != 0) @@ -9056,20 +9230,22 @@ internal static class DefaultPixelBlenders ref Vector256 sourceBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(source)); ref float amountBase = ref MemoryMarshal.GetReference(amount); + Vector256 vOne = Vector256.Create(1F); + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) { - // TODO: It would be better if we can clamp this outside of the loop using our SIMD methods. // We need to create a Vector256 containing the current and next amount values // taking up each half of the Vector256 and then clamp them. Vector256 opacity = Vector256.Create( - Vector128.Create(Numerics.Clamp(amountBase, 0, 1F)), - Vector128.Create(Numerics.Clamp(Unsafe.Add(ref amountBase, 1), 0, 1F))); + Vector128.Create(amountBase), + Vector128.Create(Unsafe.Add(ref amountBase, 1))); + opacity = Avx.Min(Avx.Max(Vector256.Zero, opacity), vOne); destinationBase = PorterDuffFunctions.LightenDestOut(backgroundBase, sourceBase, opacity); destinationBase = ref Unsafe.Add(ref destinationBase, 1); backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); sourceBase = ref Unsafe.Add(ref sourceBase, 1); - amountBase = ref Unsafe.Add(ref amountBase, 1); + amountBase = ref Unsafe.Add(ref amountBase, 2); } if (Numerics.Modulo2(destination.Length) != 0) @@ -9159,20 +9335,22 @@ internal static class DefaultPixelBlenders ref Vector256 sourceBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(source)); ref float amountBase = ref MemoryMarshal.GetReference(amount); + Vector256 vOne = Vector256.Create(1F); + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) { - // TODO: It would be better if we can clamp this outside of the loop using our SIMD methods. // We need to create a Vector256 containing the current and next amount values // taking up each half of the Vector256 and then clamp them. Vector256 opacity = Vector256.Create( - Vector128.Create(Numerics.Clamp(amountBase, 0, 1F)), - Vector128.Create(Numerics.Clamp(Unsafe.Add(ref amountBase, 1), 0, 1F))); + Vector128.Create(amountBase), + Vector128.Create(Unsafe.Add(ref amountBase, 1))); + opacity = Avx.Min(Avx.Max(Vector256.Zero, opacity), vOne); destinationBase = PorterDuffFunctions.OverlayDestOut(backgroundBase, sourceBase, opacity); destinationBase = ref Unsafe.Add(ref destinationBase, 1); backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); sourceBase = ref Unsafe.Add(ref sourceBase, 1); - amountBase = ref Unsafe.Add(ref amountBase, 1); + amountBase = ref Unsafe.Add(ref amountBase, 2); } if (Numerics.Modulo2(destination.Length) != 0) @@ -9262,20 +9440,22 @@ internal static class DefaultPixelBlenders ref Vector256 sourceBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(source)); ref float amountBase = ref MemoryMarshal.GetReference(amount); + Vector256 vOne = Vector256.Create(1F); + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) { - // TODO: It would be better if we can clamp this outside of the loop using our SIMD methods. // We need to create a Vector256 containing the current and next amount values // taking up each half of the Vector256 and then clamp them. Vector256 opacity = Vector256.Create( - Vector128.Create(Numerics.Clamp(amountBase, 0, 1F)), - Vector128.Create(Numerics.Clamp(Unsafe.Add(ref amountBase, 1), 0, 1F))); + Vector128.Create(amountBase), + Vector128.Create(Unsafe.Add(ref amountBase, 1))); + opacity = Avx.Min(Avx.Max(Vector256.Zero, opacity), vOne); destinationBase = PorterDuffFunctions.HardLightDestOut(backgroundBase, sourceBase, opacity); destinationBase = ref Unsafe.Add(ref destinationBase, 1); backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); sourceBase = ref Unsafe.Add(ref sourceBase, 1); - amountBase = ref Unsafe.Add(ref amountBase, 1); + amountBase = ref Unsafe.Add(ref amountBase, 2); } if (Numerics.Modulo2(destination.Length) != 0) @@ -9365,20 +9545,22 @@ internal static class DefaultPixelBlenders ref Vector256 sourceBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(source)); ref float amountBase = ref MemoryMarshal.GetReference(amount); + Vector256 vOne = Vector256.Create(1F); + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) { - // TODO: It would be better if we can clamp this outside of the loop using our SIMD methods. // We need to create a Vector256 containing the current and next amount values // taking up each half of the Vector256 and then clamp them. Vector256 opacity = Vector256.Create( - Vector128.Create(Numerics.Clamp(amountBase, 0, 1F)), - Vector128.Create(Numerics.Clamp(Unsafe.Add(ref amountBase, 1), 0, 1F))); + Vector128.Create(amountBase), + Vector128.Create(Unsafe.Add(ref amountBase, 1))); + opacity = Avx.Min(Avx.Max(Vector256.Zero, opacity), vOne); destinationBase = PorterDuffFunctions.NormalClear(backgroundBase, sourceBase, opacity); destinationBase = ref Unsafe.Add(ref destinationBase, 1); backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); sourceBase = ref Unsafe.Add(ref sourceBase, 1); - amountBase = ref Unsafe.Add(ref amountBase, 1); + amountBase = ref Unsafe.Add(ref amountBase, 2); } if (Numerics.Modulo2(destination.Length) != 0) @@ -9468,20 +9650,22 @@ internal static class DefaultPixelBlenders ref Vector256 sourceBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(source)); ref float amountBase = ref MemoryMarshal.GetReference(amount); + Vector256 vOne = Vector256.Create(1F); + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) { - // TODO: It would be better if we can clamp this outside of the loop using our SIMD methods. // We need to create a Vector256 containing the current and next amount values // taking up each half of the Vector256 and then clamp them. Vector256 opacity = Vector256.Create( - Vector128.Create(Numerics.Clamp(amountBase, 0, 1F)), - Vector128.Create(Numerics.Clamp(Unsafe.Add(ref amountBase, 1), 0, 1F))); + Vector128.Create(amountBase), + Vector128.Create(Unsafe.Add(ref amountBase, 1))); + opacity = Avx.Min(Avx.Max(Vector256.Zero, opacity), vOne); destinationBase = PorterDuffFunctions.MultiplyClear(backgroundBase, sourceBase, opacity); destinationBase = ref Unsafe.Add(ref destinationBase, 1); backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); sourceBase = ref Unsafe.Add(ref sourceBase, 1); - amountBase = ref Unsafe.Add(ref amountBase, 1); + amountBase = ref Unsafe.Add(ref amountBase, 2); } if (Numerics.Modulo2(destination.Length) != 0) @@ -9571,20 +9755,22 @@ internal static class DefaultPixelBlenders ref Vector256 sourceBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(source)); ref float amountBase = ref MemoryMarshal.GetReference(amount); + Vector256 vOne = Vector256.Create(1F); + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) { - // TODO: It would be better if we can clamp this outside of the loop using our SIMD methods. // We need to create a Vector256 containing the current and next amount values // taking up each half of the Vector256 and then clamp them. Vector256 opacity = Vector256.Create( - Vector128.Create(Numerics.Clamp(amountBase, 0, 1F)), - Vector128.Create(Numerics.Clamp(Unsafe.Add(ref amountBase, 1), 0, 1F))); + Vector128.Create(amountBase), + Vector128.Create(Unsafe.Add(ref amountBase, 1))); + opacity = Avx.Min(Avx.Max(Vector256.Zero, opacity), vOne); destinationBase = PorterDuffFunctions.AddClear(backgroundBase, sourceBase, opacity); destinationBase = ref Unsafe.Add(ref destinationBase, 1); backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); sourceBase = ref Unsafe.Add(ref sourceBase, 1); - amountBase = ref Unsafe.Add(ref amountBase, 1); + amountBase = ref Unsafe.Add(ref amountBase, 2); } if (Numerics.Modulo2(destination.Length) != 0) @@ -9674,20 +9860,22 @@ internal static class DefaultPixelBlenders ref Vector256 sourceBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(source)); ref float amountBase = ref MemoryMarshal.GetReference(amount); + Vector256 vOne = Vector256.Create(1F); + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) { - // TODO: It would be better if we can clamp this outside of the loop using our SIMD methods. // We need to create a Vector256 containing the current and next amount values // taking up each half of the Vector256 and then clamp them. Vector256 opacity = Vector256.Create( - Vector128.Create(Numerics.Clamp(amountBase, 0, 1F)), - Vector128.Create(Numerics.Clamp(Unsafe.Add(ref amountBase, 1), 0, 1F))); + Vector128.Create(amountBase), + Vector128.Create(Unsafe.Add(ref amountBase, 1))); + opacity = Avx.Min(Avx.Max(Vector256.Zero, opacity), vOne); destinationBase = PorterDuffFunctions.SubtractClear(backgroundBase, sourceBase, opacity); destinationBase = ref Unsafe.Add(ref destinationBase, 1); backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); sourceBase = ref Unsafe.Add(ref sourceBase, 1); - amountBase = ref Unsafe.Add(ref amountBase, 1); + amountBase = ref Unsafe.Add(ref amountBase, 2); } if (Numerics.Modulo2(destination.Length) != 0) @@ -9777,20 +9965,22 @@ internal static class DefaultPixelBlenders ref Vector256 sourceBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(source)); ref float amountBase = ref MemoryMarshal.GetReference(amount); + Vector256 vOne = Vector256.Create(1F); + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) { - // TODO: It would be better if we can clamp this outside of the loop using our SIMD methods. // We need to create a Vector256 containing the current and next amount values // taking up each half of the Vector256 and then clamp them. Vector256 opacity = Vector256.Create( - Vector128.Create(Numerics.Clamp(amountBase, 0, 1F)), - Vector128.Create(Numerics.Clamp(Unsafe.Add(ref amountBase, 1), 0, 1F))); + Vector128.Create(amountBase), + Vector128.Create(Unsafe.Add(ref amountBase, 1))); + opacity = Avx.Min(Avx.Max(Vector256.Zero, opacity), vOne); destinationBase = PorterDuffFunctions.ScreenClear(backgroundBase, sourceBase, opacity); destinationBase = ref Unsafe.Add(ref destinationBase, 1); backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); sourceBase = ref Unsafe.Add(ref sourceBase, 1); - amountBase = ref Unsafe.Add(ref amountBase, 1); + amountBase = ref Unsafe.Add(ref amountBase, 2); } if (Numerics.Modulo2(destination.Length) != 0) @@ -9880,20 +10070,22 @@ internal static class DefaultPixelBlenders ref Vector256 sourceBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(source)); ref float amountBase = ref MemoryMarshal.GetReference(amount); + Vector256 vOne = Vector256.Create(1F); + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) { - // TODO: It would be better if we can clamp this outside of the loop using our SIMD methods. // We need to create a Vector256 containing the current and next amount values // taking up each half of the Vector256 and then clamp them. Vector256 opacity = Vector256.Create( - Vector128.Create(Numerics.Clamp(amountBase, 0, 1F)), - Vector128.Create(Numerics.Clamp(Unsafe.Add(ref amountBase, 1), 0, 1F))); + Vector128.Create(amountBase), + Vector128.Create(Unsafe.Add(ref amountBase, 1))); + opacity = Avx.Min(Avx.Max(Vector256.Zero, opacity), vOne); destinationBase = PorterDuffFunctions.DarkenClear(backgroundBase, sourceBase, opacity); destinationBase = ref Unsafe.Add(ref destinationBase, 1); backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); sourceBase = ref Unsafe.Add(ref sourceBase, 1); - amountBase = ref Unsafe.Add(ref amountBase, 1); + amountBase = ref Unsafe.Add(ref amountBase, 2); } if (Numerics.Modulo2(destination.Length) != 0) @@ -9983,20 +10175,22 @@ internal static class DefaultPixelBlenders ref Vector256 sourceBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(source)); ref float amountBase = ref MemoryMarshal.GetReference(amount); + Vector256 vOne = Vector256.Create(1F); + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) { - // TODO: It would be better if we can clamp this outside of the loop using our SIMD methods. // We need to create a Vector256 containing the current and next amount values // taking up each half of the Vector256 and then clamp them. Vector256 opacity = Vector256.Create( - Vector128.Create(Numerics.Clamp(amountBase, 0, 1F)), - Vector128.Create(Numerics.Clamp(Unsafe.Add(ref amountBase, 1), 0, 1F))); + Vector128.Create(amountBase), + Vector128.Create(Unsafe.Add(ref amountBase, 1))); + opacity = Avx.Min(Avx.Max(Vector256.Zero, opacity), vOne); destinationBase = PorterDuffFunctions.LightenClear(backgroundBase, sourceBase, opacity); destinationBase = ref Unsafe.Add(ref destinationBase, 1); backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); sourceBase = ref Unsafe.Add(ref sourceBase, 1); - amountBase = ref Unsafe.Add(ref amountBase, 1); + amountBase = ref Unsafe.Add(ref amountBase, 2); } if (Numerics.Modulo2(destination.Length) != 0) @@ -10086,20 +10280,22 @@ internal static class DefaultPixelBlenders ref Vector256 sourceBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(source)); ref float amountBase = ref MemoryMarshal.GetReference(amount); + Vector256 vOne = Vector256.Create(1F); + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) { - // TODO: It would be better if we can clamp this outside of the loop using our SIMD methods. // We need to create a Vector256 containing the current and next amount values // taking up each half of the Vector256 and then clamp them. Vector256 opacity = Vector256.Create( - Vector128.Create(Numerics.Clamp(amountBase, 0, 1F)), - Vector128.Create(Numerics.Clamp(Unsafe.Add(ref amountBase, 1), 0, 1F))); + Vector128.Create(amountBase), + Vector128.Create(Unsafe.Add(ref amountBase, 1))); + opacity = Avx.Min(Avx.Max(Vector256.Zero, opacity), vOne); destinationBase = PorterDuffFunctions.OverlayClear(backgroundBase, sourceBase, opacity); destinationBase = ref Unsafe.Add(ref destinationBase, 1); backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); sourceBase = ref Unsafe.Add(ref sourceBase, 1); - amountBase = ref Unsafe.Add(ref amountBase, 1); + amountBase = ref Unsafe.Add(ref amountBase, 2); } if (Numerics.Modulo2(destination.Length) != 0) @@ -10189,20 +10385,22 @@ internal static class DefaultPixelBlenders ref Vector256 sourceBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(source)); ref float amountBase = ref MemoryMarshal.GetReference(amount); + Vector256 vOne = Vector256.Create(1F); + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) { - // TODO: It would be better if we can clamp this outside of the loop using our SIMD methods. // We need to create a Vector256 containing the current and next amount values // taking up each half of the Vector256 and then clamp them. Vector256 opacity = Vector256.Create( - Vector128.Create(Numerics.Clamp(amountBase, 0, 1F)), - Vector128.Create(Numerics.Clamp(Unsafe.Add(ref amountBase, 1), 0, 1F))); + Vector128.Create(amountBase), + Vector128.Create(Unsafe.Add(ref amountBase, 1))); + opacity = Avx.Min(Avx.Max(Vector256.Zero, opacity), vOne); destinationBase = PorterDuffFunctions.HardLightClear(backgroundBase, sourceBase, opacity); destinationBase = ref Unsafe.Add(ref destinationBase, 1); backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); sourceBase = ref Unsafe.Add(ref sourceBase, 1); - amountBase = ref Unsafe.Add(ref amountBase, 1); + amountBase = ref Unsafe.Add(ref amountBase, 2); } if (Numerics.Modulo2(destination.Length) != 0) @@ -10292,20 +10490,22 @@ internal static class DefaultPixelBlenders ref Vector256 sourceBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(source)); ref float amountBase = ref MemoryMarshal.GetReference(amount); + Vector256 vOne = Vector256.Create(1F); + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) { - // TODO: It would be better if we can clamp this outside of the loop using our SIMD methods. // We need to create a Vector256 containing the current and next amount values // taking up each half of the Vector256 and then clamp them. Vector256 opacity = Vector256.Create( - Vector128.Create(Numerics.Clamp(amountBase, 0, 1F)), - Vector128.Create(Numerics.Clamp(Unsafe.Add(ref amountBase, 1), 0, 1F))); + Vector128.Create(amountBase), + Vector128.Create(Unsafe.Add(ref amountBase, 1))); + opacity = Avx.Min(Avx.Max(Vector256.Zero, opacity), vOne); destinationBase = PorterDuffFunctions.NormalXor(backgroundBase, sourceBase, opacity); destinationBase = ref Unsafe.Add(ref destinationBase, 1); backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); sourceBase = ref Unsafe.Add(ref sourceBase, 1); - amountBase = ref Unsafe.Add(ref amountBase, 1); + amountBase = ref Unsafe.Add(ref amountBase, 2); } if (Numerics.Modulo2(destination.Length) != 0) @@ -10395,20 +10595,22 @@ internal static class DefaultPixelBlenders ref Vector256 sourceBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(source)); ref float amountBase = ref MemoryMarshal.GetReference(amount); + Vector256 vOne = Vector256.Create(1F); + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) { - // TODO: It would be better if we can clamp this outside of the loop using our SIMD methods. // We need to create a Vector256 containing the current and next amount values // taking up each half of the Vector256 and then clamp them. Vector256 opacity = Vector256.Create( - Vector128.Create(Numerics.Clamp(amountBase, 0, 1F)), - Vector128.Create(Numerics.Clamp(Unsafe.Add(ref amountBase, 1), 0, 1F))); + Vector128.Create(amountBase), + Vector128.Create(Unsafe.Add(ref amountBase, 1))); + opacity = Avx.Min(Avx.Max(Vector256.Zero, opacity), vOne); destinationBase = PorterDuffFunctions.MultiplyXor(backgroundBase, sourceBase, opacity); destinationBase = ref Unsafe.Add(ref destinationBase, 1); backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); sourceBase = ref Unsafe.Add(ref sourceBase, 1); - amountBase = ref Unsafe.Add(ref amountBase, 1); + amountBase = ref Unsafe.Add(ref amountBase, 2); } if (Numerics.Modulo2(destination.Length) != 0) @@ -10498,20 +10700,22 @@ internal static class DefaultPixelBlenders ref Vector256 sourceBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(source)); ref float amountBase = ref MemoryMarshal.GetReference(amount); + Vector256 vOne = Vector256.Create(1F); + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) { - // TODO: It would be better if we can clamp this outside of the loop using our SIMD methods. // We need to create a Vector256 containing the current and next amount values // taking up each half of the Vector256 and then clamp them. Vector256 opacity = Vector256.Create( - Vector128.Create(Numerics.Clamp(amountBase, 0, 1F)), - Vector128.Create(Numerics.Clamp(Unsafe.Add(ref amountBase, 1), 0, 1F))); + Vector128.Create(amountBase), + Vector128.Create(Unsafe.Add(ref amountBase, 1))); + opacity = Avx.Min(Avx.Max(Vector256.Zero, opacity), vOne); destinationBase = PorterDuffFunctions.AddXor(backgroundBase, sourceBase, opacity); destinationBase = ref Unsafe.Add(ref destinationBase, 1); backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); sourceBase = ref Unsafe.Add(ref sourceBase, 1); - amountBase = ref Unsafe.Add(ref amountBase, 1); + amountBase = ref Unsafe.Add(ref amountBase, 2); } if (Numerics.Modulo2(destination.Length) != 0) @@ -10601,20 +10805,22 @@ internal static class DefaultPixelBlenders ref Vector256 sourceBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(source)); ref float amountBase = ref MemoryMarshal.GetReference(amount); + Vector256 vOne = Vector256.Create(1F); + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) { - // TODO: It would be better if we can clamp this outside of the loop using our SIMD methods. // We need to create a Vector256 containing the current and next amount values // taking up each half of the Vector256 and then clamp them. Vector256 opacity = Vector256.Create( - Vector128.Create(Numerics.Clamp(amountBase, 0, 1F)), - Vector128.Create(Numerics.Clamp(Unsafe.Add(ref amountBase, 1), 0, 1F))); + Vector128.Create(amountBase), + Vector128.Create(Unsafe.Add(ref amountBase, 1))); + opacity = Avx.Min(Avx.Max(Vector256.Zero, opacity), vOne); destinationBase = PorterDuffFunctions.SubtractXor(backgroundBase, sourceBase, opacity); destinationBase = ref Unsafe.Add(ref destinationBase, 1); backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); sourceBase = ref Unsafe.Add(ref sourceBase, 1); - amountBase = ref Unsafe.Add(ref amountBase, 1); + amountBase = ref Unsafe.Add(ref amountBase, 2); } if (Numerics.Modulo2(destination.Length) != 0) @@ -10704,20 +10910,22 @@ internal static class DefaultPixelBlenders ref Vector256 sourceBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(source)); ref float amountBase = ref MemoryMarshal.GetReference(amount); + Vector256 vOne = Vector256.Create(1F); + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) { - // TODO: It would be better if we can clamp this outside of the loop using our SIMD methods. // We need to create a Vector256 containing the current and next amount values // taking up each half of the Vector256 and then clamp them. Vector256 opacity = Vector256.Create( - Vector128.Create(Numerics.Clamp(amountBase, 0, 1F)), - Vector128.Create(Numerics.Clamp(Unsafe.Add(ref amountBase, 1), 0, 1F))); + Vector128.Create(amountBase), + Vector128.Create(Unsafe.Add(ref amountBase, 1))); + opacity = Avx.Min(Avx.Max(Vector256.Zero, opacity), vOne); destinationBase = PorterDuffFunctions.ScreenXor(backgroundBase, sourceBase, opacity); destinationBase = ref Unsafe.Add(ref destinationBase, 1); backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); sourceBase = ref Unsafe.Add(ref sourceBase, 1); - amountBase = ref Unsafe.Add(ref amountBase, 1); + amountBase = ref Unsafe.Add(ref amountBase, 2); } if (Numerics.Modulo2(destination.Length) != 0) @@ -10807,20 +11015,22 @@ internal static class DefaultPixelBlenders ref Vector256 sourceBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(source)); ref float amountBase = ref MemoryMarshal.GetReference(amount); + Vector256 vOne = Vector256.Create(1F); + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) { - // TODO: It would be better if we can clamp this outside of the loop using our SIMD methods. // We need to create a Vector256 containing the current and next amount values // taking up each half of the Vector256 and then clamp them. Vector256 opacity = Vector256.Create( - Vector128.Create(Numerics.Clamp(amountBase, 0, 1F)), - Vector128.Create(Numerics.Clamp(Unsafe.Add(ref amountBase, 1), 0, 1F))); + Vector128.Create(amountBase), + Vector128.Create(Unsafe.Add(ref amountBase, 1))); + opacity = Avx.Min(Avx.Max(Vector256.Zero, opacity), vOne); destinationBase = PorterDuffFunctions.DarkenXor(backgroundBase, sourceBase, opacity); destinationBase = ref Unsafe.Add(ref destinationBase, 1); backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); sourceBase = ref Unsafe.Add(ref sourceBase, 1); - amountBase = ref Unsafe.Add(ref amountBase, 1); + amountBase = ref Unsafe.Add(ref amountBase, 2); } if (Numerics.Modulo2(destination.Length) != 0) @@ -10910,20 +11120,22 @@ internal static class DefaultPixelBlenders ref Vector256 sourceBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(source)); ref float amountBase = ref MemoryMarshal.GetReference(amount); + Vector256 vOne = Vector256.Create(1F); + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) { - // TODO: It would be better if we can clamp this outside of the loop using our SIMD methods. // We need to create a Vector256 containing the current and next amount values // taking up each half of the Vector256 and then clamp them. Vector256 opacity = Vector256.Create( - Vector128.Create(Numerics.Clamp(amountBase, 0, 1F)), - Vector128.Create(Numerics.Clamp(Unsafe.Add(ref amountBase, 1), 0, 1F))); + Vector128.Create(amountBase), + Vector128.Create(Unsafe.Add(ref amountBase, 1))); + opacity = Avx.Min(Avx.Max(Vector256.Zero, opacity), vOne); destinationBase = PorterDuffFunctions.LightenXor(backgroundBase, sourceBase, opacity); destinationBase = ref Unsafe.Add(ref destinationBase, 1); backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); sourceBase = ref Unsafe.Add(ref sourceBase, 1); - amountBase = ref Unsafe.Add(ref amountBase, 1); + amountBase = ref Unsafe.Add(ref amountBase, 2); } if (Numerics.Modulo2(destination.Length) != 0) @@ -11013,20 +11225,22 @@ internal static class DefaultPixelBlenders ref Vector256 sourceBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(source)); ref float amountBase = ref MemoryMarshal.GetReference(amount); + Vector256 vOne = Vector256.Create(1F); + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) { - // TODO: It would be better if we can clamp this outside of the loop using our SIMD methods. // We need to create a Vector256 containing the current and next amount values // taking up each half of the Vector256 and then clamp them. Vector256 opacity = Vector256.Create( - Vector128.Create(Numerics.Clamp(amountBase, 0, 1F)), - Vector128.Create(Numerics.Clamp(Unsafe.Add(ref amountBase, 1), 0, 1F))); + Vector128.Create(amountBase), + Vector128.Create(Unsafe.Add(ref amountBase, 1))); + opacity = Avx.Min(Avx.Max(Vector256.Zero, opacity), vOne); destinationBase = PorterDuffFunctions.OverlayXor(backgroundBase, sourceBase, opacity); destinationBase = ref Unsafe.Add(ref destinationBase, 1); backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); sourceBase = ref Unsafe.Add(ref sourceBase, 1); - amountBase = ref Unsafe.Add(ref amountBase, 1); + amountBase = ref Unsafe.Add(ref amountBase, 2); } if (Numerics.Modulo2(destination.Length) != 0) @@ -11116,20 +11330,22 @@ internal static class DefaultPixelBlenders ref Vector256 sourceBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(source)); ref float amountBase = ref MemoryMarshal.GetReference(amount); + Vector256 vOne = Vector256.Create(1F); + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) { - // TODO: It would be better if we can clamp this outside of the loop using our SIMD methods. // We need to create a Vector256 containing the current and next amount values // taking up each half of the Vector256 and then clamp them. Vector256 opacity = Vector256.Create( - Vector128.Create(Numerics.Clamp(amountBase, 0, 1F)), - Vector128.Create(Numerics.Clamp(Unsafe.Add(ref amountBase, 1), 0, 1F))); + Vector128.Create(amountBase), + Vector128.Create(Unsafe.Add(ref amountBase, 1))); + opacity = Avx.Min(Avx.Max(Vector256.Zero, opacity), vOne); destinationBase = PorterDuffFunctions.HardLightXor(backgroundBase, sourceBase, opacity); destinationBase = ref Unsafe.Add(ref destinationBase, 1); backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); sourceBase = ref Unsafe.Add(ref sourceBase, 1); - amountBase = ref Unsafe.Add(ref amountBase, 1); + amountBase = ref Unsafe.Add(ref amountBase, 2); } if (Numerics.Modulo2(destination.Length) != 0) diff --git a/src/ImageSharp/PixelFormats/PixelBlenders/DefaultPixelBlenders.Generated.tt b/src/ImageSharp/PixelFormats/PixelBlenders/DefaultPixelBlenders.Generated.tt index 07165a9b5..22b9ebf98 100644 --- a/src/ImageSharp/PixelFormats/PixelBlenders/DefaultPixelBlenders.Generated.tt +++ b/src/ImageSharp/PixelFormats/PixelBlenders/DefaultPixelBlenders.Generated.tt @@ -138,20 +138,22 @@ var blenders = new []{ ref Vector256 sourceBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(source)); ref float amountBase = ref MemoryMarshal.GetReference(amount); + Vector256 vOne = Vector256.Create(1F); + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) { - // TODO: It would be better if we can clamp this outside of the loop using our SIMD methods. // We need to create a Vector256 containing the current and next amount values // taking up each half of the Vector256 and then clamp them. Vector256 opacity = Vector256.Create( - Vector128.Create(Numerics.Clamp(amountBase, 0, 1F)), - Vector128.Create(Numerics.Clamp(Unsafe.Add(ref amountBase, 1), 0, 1F))); + Vector128.Create(amountBase), + Vector128.Create(Unsafe.Add(ref amountBase, 1))); + opacity = Avx.Min(Avx.Max(Vector256.Zero, opacity), vOne); destinationBase = PorterDuffFunctions.<#=blender_composer#>(backgroundBase, sourceBase, opacity); destinationBase = ref Unsafe.Add(ref destinationBase, 1); backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); sourceBase = ref Unsafe.Add(ref sourceBase, 1); - amountBase = ref Unsafe.Add(ref amountBase, 1); + amountBase = ref Unsafe.Add(ref amountBase, 2); } if (Numerics.Modulo2(destination.Length) != 0) From c06da8c4bcf37cbdc327a90d38bedbbff08fca73 Mon Sep 17 00:00:00 2001 From: James Jackson-South Date: Sun, 19 Feb 2023 21:34:06 +1000 Subject: [PATCH 15/22] Add NormalSrcOver benchmark --- .../PixelBlenders/PorterDuffBulkVsPixel.cs | 24 +++---- .../PorterDuffBulkVsSingleVector.cs | 68 +++++++++++++++++++ 2 files changed, 80 insertions(+), 12 deletions(-) create mode 100644 tests/ImageSharp.Benchmarks/PixelBlenders/PorterDuffBulkVsSingleVector.cs diff --git a/tests/ImageSharp.Benchmarks/PixelBlenders/PorterDuffBulkVsPixel.cs b/tests/ImageSharp.Benchmarks/PixelBlenders/PorterDuffBulkVsPixel.cs index 68956c880..3e6667dbc 100644 --- a/tests/ImageSharp.Benchmarks/PixelBlenders/PorterDuffBulkVsPixel.cs +++ b/tests/ImageSharp.Benchmarks/PixelBlenders/PorterDuffBulkVsPixel.cs @@ -12,9 +12,9 @@ namespace SixLabors.ImageSharp.Benchmarks; public class PorterDuffBulkVsPixel { - private Configuration Configuration => Configuration.Default; + private static Configuration Configuration => Configuration.Default; - private void BulkVectorConvert( + private static void BulkVectorConvert( Span destination, Span background, Span source, @@ -31,18 +31,18 @@ public class PorterDuffBulkVsPixel Span backgroundSpan = buffer.Slice(destination.Length, destination.Length); Span sourceSpan = buffer.Slice(destination.Length * 2, destination.Length); - PixelOperations.Instance.ToVector4(this.Configuration, background, backgroundSpan); - PixelOperations.Instance.ToVector4(this.Configuration, source, sourceSpan); + PixelOperations.Instance.ToVector4(Configuration, background, backgroundSpan); + PixelOperations.Instance.ToVector4(Configuration, source, sourceSpan); for (int i = 0; i < destination.Length; i++) { destinationSpan[i] = PorterDuffFunctions.NormalSrcOver(backgroundSpan[i], sourceSpan[i], amount[i]); } - PixelOperations.Instance.FromVector4Destructive(this.Configuration, destinationSpan, destination); + PixelOperations.Instance.FromVector4Destructive(Configuration, destinationSpan, destination); } - private void BulkPixelConvert( + private static void BulkPixelConvert( Span destination, Span background, Span source, @@ -60,9 +60,9 @@ public class PorterDuffBulkVsPixel } [Benchmark(Description = "ImageSharp BulkVectorConvert")] - public Size BulkVectorConvert() + public static Size BulkVectorConvert() { - using var image = new Image(800, 800); + using Image image = new(800, 800); using IMemoryOwner amounts = Configuration.Default.MemoryAllocator.Allocate(image.Width); amounts.GetSpan().Fill(1); @@ -70,23 +70,23 @@ public class PorterDuffBulkVsPixel for (int y = 0; y < image.Height; y++) { Span span = pixels.DangerousGetRowSpan(y); - this.BulkVectorConvert(span, span, span, amounts.GetSpan()); + BulkVectorConvert(span, span, span, amounts.GetSpan()); } return new Size(image.Width, image.Height); } [Benchmark(Description = "ImageSharp BulkPixelConvert")] - public Size BulkPixelConvert() + public static Size BulkPixelConvert() { - using var image = new Image(800, 800); + using Image image = new(800, 800); using IMemoryOwner amounts = Configuration.Default.MemoryAllocator.Allocate(image.Width); amounts.GetSpan().Fill(1); Buffer2D pixels = image.GetRootFramePixelBuffer(); for (int y = 0; y < image.Height; y++) { Span span = pixels.DangerousGetRowSpan(y); - this.BulkPixelConvert(span, span, span, amounts.GetSpan()); + BulkPixelConvert(span, span, span, amounts.GetSpan()); } return new Size(image.Width, image.Height); diff --git a/tests/ImageSharp.Benchmarks/PixelBlenders/PorterDuffBulkVsSingleVector.cs b/tests/ImageSharp.Benchmarks/PixelBlenders/PorterDuffBulkVsSingleVector.cs new file mode 100644 index 000000000..6727f1c91 --- /dev/null +++ b/tests/ImageSharp.Benchmarks/PixelBlenders/PorterDuffBulkVsSingleVector.cs @@ -0,0 +1,68 @@ +// Copyright (c) Six Labors. +// Licensed under the Six Labors Split License. + +using System.Numerics; +using System.Runtime.CompilerServices; +using System.Runtime.InteropServices; +using System.Runtime.Intrinsics; +using BenchmarkDotNet.Attributes; +using SixLabors.ImageSharp.PixelFormats.PixelBlenders; + +namespace SixLabors.ImageSharp.Benchmarks.PixelBlenders; + +public class PorterDuffBulkVsSingleVector +{ + private Vector4[] backdrop; + private Vector4[] source; + + [GlobalSetup] + public void Setup() + { + this.backdrop = new Vector4[8 * 20]; + this.source = new Vector4[8 * 20]; + + FillRandom(this.backdrop); + FillRandom(this.source); + } + + private static void FillRandom(Vector4[] arr) + { + Random rng = new(); + for (int i = 0; i < arr.Length; i++) + { + arr[i].X = rng.NextSingle(); + arr[i].Y = rng.NextSingle(); + arr[i].Z = rng.NextSingle(); + arr[i].W = rng.NextSingle(); + } + } + + [Benchmark(Description = "Scalar")] + public Vector4 OverlayValueFunction_Scalar() + { + Vector4 result = default; + for (int i = 0; i < this.backdrop.Length; i++) + { + result = PorterDuffFunctions.NormalSrcOver(this.backdrop[i], this.source[i], .5F); + } + + return result; + } + + [Benchmark(Description = "Avx")] + public Vector256 OverlayValueFunction_Avx() + { + ref Vector256 backdrop = ref Unsafe.As>(ref MemoryMarshal.GetReference(this.backdrop)); + ref Vector256 source = ref Unsafe.As>(ref MemoryMarshal.GetReference(this.backdrop)); + + Vector256 result = default; + Vector256 opacity = Vector256.Create(.5F); + int count = this.backdrop.Length / 2; + for (int i = 0; i < count; i++) + { + result = PorterDuffFunctions.NormalSrcOver(Unsafe.Add(ref backdrop, i), Unsafe.Add(ref source, i), opacity); + } + + return result; + } +} From b05b25b36de60fa143b0cd4c51364abbe8151ba9 Mon Sep 17 00:00:00 2001 From: James Jackson-South Date: Sun, 19 Feb 2023 21:57:34 +1000 Subject: [PATCH 16/22] Use RemoteExecutor for composition tests --- .../PorterDuffCompositorTests.cs | 73 ++++++++++--------- .../FeatureTesting/FeatureTestRunner.cs | 46 ++++++++++++ 2 files changed, 86 insertions(+), 33 deletions(-) diff --git a/tests/ImageSharp.Tests/PixelFormats/PixelBlenders/PorterDuffCompositorTests.cs b/tests/ImageSharp.Tests/PixelFormats/PixelBlenders/PorterDuffCompositorTests.cs index c81b0a74f..1086afe76 100644 --- a/tests/ImageSharp.Tests/PixelFormats/PixelBlenders/PorterDuffCompositorTests.cs +++ b/tests/ImageSharp.Tests/PixelFormats/PixelBlenders/PorterDuffCompositorTests.cs @@ -1,59 +1,66 @@ // Copyright (c) Six Labors. // Licensed under the Six Labors Split License. -namespace SixLabors.ImageSharp.Tests.PixelFormats.PixelBlenders; - using SixLabors.ImageSharp.PixelFormats; using SixLabors.ImageSharp.Processing; +using SixLabors.ImageSharp.Tests.TestUtilities; -using Xunit; +namespace SixLabors.ImageSharp.Tests.PixelFormats.PixelBlenders; public class PorterDuffCompositorTests { // TODO: Add other modes to compare. public static readonly TheoryData CompositingOperators = - new TheoryData - { - PixelAlphaCompositionMode.Src, - PixelAlphaCompositionMode.SrcAtop, - PixelAlphaCompositionMode.SrcOver, - PixelAlphaCompositionMode.SrcIn, - PixelAlphaCompositionMode.SrcOut, - PixelAlphaCompositionMode.Dest, - PixelAlphaCompositionMode.DestAtop, - PixelAlphaCompositionMode.DestOver, - PixelAlphaCompositionMode.DestIn, - PixelAlphaCompositionMode.DestOut, - PixelAlphaCompositionMode.Clear, - PixelAlphaCompositionMode.Xor - }; + new() + { + PixelAlphaCompositionMode.Src, + PixelAlphaCompositionMode.SrcAtop, + PixelAlphaCompositionMode.SrcOver, + PixelAlphaCompositionMode.SrcIn, + PixelAlphaCompositionMode.SrcOut, + PixelAlphaCompositionMode.Dest, + PixelAlphaCompositionMode.DestAtop, + PixelAlphaCompositionMode.DestOver, + PixelAlphaCompositionMode.DestIn, + PixelAlphaCompositionMode.DestOut, + PixelAlphaCompositionMode.Clear, + PixelAlphaCompositionMode.Xor + }; [Theory] [WithFile(TestImages.Png.PDDest, nameof(CompositingOperators), PixelTypes.Rgba32)] public void PorterDuffOutputIsCorrect(TestImageProvider provider, PixelAlphaCompositionMode mode) { - var srcFile = TestFile.Create(TestImages.Png.PDSrc); - using (Image src = srcFile.CreateRgba32Image()) - using (Image dest = provider.GetImage()) + static void RunTest(string providerDump, string alphaMode) { - var options = new GraphicsOptions + TestImageProvider provider + = BasicSerializer.Deserialize>(providerDump); + + TestFile srcFile = TestFile.Create(TestImages.Png.PDSrc); + using Image src = srcFile.CreateRgba32Image(); + using Image dest = provider.GetImage(); + GraphicsOptions options = new() { Antialias = false, - AlphaCompositionMode = mode + AlphaCompositionMode = Enum.Parse(alphaMode) }; - using (Image res = dest.Clone(x => x.DrawImage(src, options))) - { - string combinedMode = mode.ToString(); - - if (combinedMode != "Src" && combinedMode.StartsWith("Src")) - { - combinedMode = combinedMode.Substring(3); - } + using Image res = dest.Clone(x => x.DrawImage(src, options)); + string combinedMode = alphaMode; - res.DebugSave(provider, combinedMode); - res.CompareToReferenceOutput(provider, combinedMode); + if (combinedMode != "Src" && combinedMode.StartsWith("Src", StringComparison.OrdinalIgnoreCase)) + { + combinedMode = combinedMode[3..]; } + + res.DebugSave(provider, combinedMode); + res.CompareToReferenceOutput(provider, combinedMode); } + + FeatureTestRunner.RunWithHwIntrinsicsFeature( + RunTest, + HwIntrinsics.AllowAll | HwIntrinsics.DisableAVX, + provider, + mode.ToString()); } } diff --git a/tests/ImageSharp.Tests/TestUtilities/FeatureTesting/FeatureTestRunner.cs b/tests/ImageSharp.Tests/TestUtilities/FeatureTesting/FeatureTestRunner.cs index 1bb64d99d..f68bfdbe6 100644 --- a/tests/ImageSharp.Tests/TestUtilities/FeatureTesting/FeatureTestRunner.cs +++ b/tests/ImageSharp.Tests/TestUtilities/FeatureTesting/FeatureTestRunner.cs @@ -257,6 +257,52 @@ public static class FeatureTestRunner } } + /// + /// Runs the given test within an environment + /// where the given features. + /// + /// The test action to run. + /// The intrinsics features. + /// The value to pass as a parameter to the test action. + /// The second value to pass as a parameter to the test action. + public static void RunWithHwIntrinsicsFeature( + Action action, + HwIntrinsics intrinsics, + T arg1, + string arg2) + where T : IXunitSerializable + { + if (!RemoteExecutor.IsSupported) + { + return; + } + + foreach (KeyValuePair intrinsic in intrinsics.ToFeatureKeyValueCollection()) + { + ProcessStartInfo processStartInfo = new(); + if (intrinsic.Key != HwIntrinsics.AllowAll) + { + processStartInfo.Environment[$"COMPlus_{intrinsic.Value}"] = "0"; + + RemoteExecutor.Invoke( + action, + BasicSerializer.Serialize(arg1), + arg2, + new RemoteInvokeOptions + { + StartInfo = processStartInfo + }) + .Dispose(); + } + else + { + // Since we are running using the default architecture there is no + // point creating the overhead of running the action in a separate process. + action(BasicSerializer.Serialize(arg1), arg2); + } + } + } + /// /// Runs the given test within an environment /// where the given features. From 916084cc1c4cff357b5c939bc81b4f181bb7446b Mon Sep 17 00:00:00 2001 From: James Jackson-South Date: Sun, 19 Feb 2023 23:12:17 +1000 Subject: [PATCH 17/22] Fix field assignment in benchmark --- .../PixelBlenders/PorterDuffBulkVsSingleVector.cs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/ImageSharp.Benchmarks/PixelBlenders/PorterDuffBulkVsSingleVector.cs b/tests/ImageSharp.Benchmarks/PixelBlenders/PorterDuffBulkVsSingleVector.cs index 6727f1c91..4bd21c9a8 100644 --- a/tests/ImageSharp.Benchmarks/PixelBlenders/PorterDuffBulkVsSingleVector.cs +++ b/tests/ImageSharp.Benchmarks/PixelBlenders/PorterDuffBulkVsSingleVector.cs @@ -53,7 +53,7 @@ public class PorterDuffBulkVsSingleVector public Vector256 OverlayValueFunction_Avx() { ref Vector256 backdrop = ref Unsafe.As>(ref MemoryMarshal.GetReference(this.backdrop)); - ref Vector256 source = ref Unsafe.As>(ref MemoryMarshal.GetReference(this.backdrop)); + ref Vector256 source = ref Unsafe.As>(ref MemoryMarshal.GetReference(this.source)); Vector256 result = default; Vector256 opacity = Vector256.Create(.5F); From 8ffec30559fb1c16aa0d715b2090a1ace44a3c16 Mon Sep 17 00:00:00 2001 From: James Jackson-South Date: Sun, 19 Feb 2023 23:15:27 +1000 Subject: [PATCH 18/22] Make Scalar default --- .../PixelBlenders/PorterDuffBulkVsSingleVector.cs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/ImageSharp.Benchmarks/PixelBlenders/PorterDuffBulkVsSingleVector.cs b/tests/ImageSharp.Benchmarks/PixelBlenders/PorterDuffBulkVsSingleVector.cs index 4bd21c9a8..fcf7e9dcc 100644 --- a/tests/ImageSharp.Benchmarks/PixelBlenders/PorterDuffBulkVsSingleVector.cs +++ b/tests/ImageSharp.Benchmarks/PixelBlenders/PorterDuffBulkVsSingleVector.cs @@ -37,7 +37,7 @@ public class PorterDuffBulkVsSingleVector } } - [Benchmark(Description = "Scalar")] + [Benchmark(Description = "Scalar", Baseline = true)] public Vector4 OverlayValueFunction_Scalar() { Vector4 result = default; From a666372f68e44d1c610223a97641d2348b9d7782 Mon Sep 17 00:00:00 2001 From: James Jackson-South Date: Mon, 20 Feb 2023 09:52:41 +1000 Subject: [PATCH 19/22] Use FMA where possible. --- .../Common/Helpers/SimdUtils.HwIntrinsics.cs | 24 +++++++++++++++++++ .../PixelBlenders/PorterDuffFunctions.cs | 10 ++++---- 2 files changed, 28 insertions(+), 6 deletions(-) diff --git a/src/ImageSharp/Common/Helpers/SimdUtils.HwIntrinsics.cs b/src/ImageSharp/Common/Helpers/SimdUtils.HwIntrinsics.cs index 128218aac..7d2bab259 100644 --- a/src/ImageSharp/Common/Helpers/SimdUtils.HwIntrinsics.cs +++ b/src/ImageSharp/Common/Helpers/SimdUtils.HwIntrinsics.cs @@ -533,6 +533,7 @@ internal static partial class SimdUtils /// /// Performs a multiplication and an addition of the . + /// TODO: Fix. The arguments are in a different order to the FMA intrinsic. /// /// ret = (vm0 * vm1) + va /// The vector to add to the intermediate result. @@ -555,6 +556,7 @@ internal static partial class SimdUtils /// /// Performs a multiplication and a subtraction of the . + /// TODO: Fix. The arguments are in a different order to the FMA intrinsic. /// /// ret = (vm0 * vm1) - vs /// The vector to subtract from the intermediate result. @@ -575,6 +577,28 @@ internal static partial class SimdUtils return Avx.Subtract(Avx.Multiply(vm0, vm1), vs); } + /// + /// Performs a multiplication and a negated addition of the . + /// + /// ret = c - (a * b) + /// The first vector to multiply. + /// The second vector to multiply. + /// The vector to add negated to the intermediate result. + /// The . + [MethodImpl(InliningOptions.ShortMethod)] + public static Vector256 MultiplyAddNegated( + in Vector256 a, + in Vector256 b, + in Vector256 c) + { + if (Fma.IsSupported) + { + return Fma.MultiplyAddNegated(a, b, c); + } + + return Avx.Subtract(c, Avx.Multiply(a, b)); + } + /// /// as many elements as possible, slicing them down (keeping the remainder). /// diff --git a/src/ImageSharp/PixelFormats/PixelBlenders/PorterDuffFunctions.cs b/src/ImageSharp/PixelFormats/PixelBlenders/PorterDuffFunctions.cs index d1bd5bad3..2d47f1a62 100644 --- a/src/ImageSharp/PixelFormats/PixelBlenders/PorterDuffFunctions.cs +++ b/src/ImageSharp/PixelFormats/PixelBlenders/PorterDuffFunctions.cs @@ -124,7 +124,7 @@ internal static partial class PorterDuffFunctions public static Vector256 Screen(Vector256 backdrop, Vector256 source) { Vector256 vOne = Vector256.Create(1F); - return Avx.Subtract(vOne, Avx.Multiply(Avx.Subtract(vOne, backdrop), Avx.Subtract(vOne, source))); + return SimdUtils.HwIntrinsics.MultiplyAddNegated(Avx.Subtract(vOne, backdrop), Avx.Subtract(vOne, source), vOne); } /// @@ -244,10 +244,10 @@ internal static partial class PorterDuffFunctions public static Vector256 OverlayValueFunction(Vector256 backdrop, Vector256 source) { Vector256 vOne = Vector256.Create(1F); - Vector256 vTwo = Vector256.Create(2F); Vector256 left = Avx.Multiply(Avx.Add(backdrop, backdrop), source); - Vector256 right = Avx.Subtract(vOne, Avx.Multiply(Avx.Multiply(vTwo, Avx.Subtract(vOne, source)), Avx.Subtract(vOne, backdrop))); + Vector256 vOneMinusSource = Avx.Subtract(vOne, source); + Vector256 right = SimdUtils.HwIntrinsics.MultiplyAddNegated(Avx.Add(vOneMinusSource, vOneMinusSource), Avx.Subtract(vOne, backdrop), vOne); Vector256 cmp = Avx.CompareGreaterThan(backdrop, Vector256.Create(.5F)); return Avx.BlendVariable(left, right, cmp); } @@ -430,9 +430,7 @@ internal static partial class PorterDuffFunctions public static Vector256 Out(Vector256 destination, Vector256 source) { // calculate alpha - Vector256 sW = Avx.Shuffle(source, source, ShuffleAlphaControl); - Vector256 dW = Avx.Shuffle(destination, destination, ShuffleAlphaControl); - Vector256 alpha = Avx.Multiply(Avx.Subtract(Vector256.Create(1F), dW), sW); + Vector256 alpha = Avx.Permute(Avx.Multiply(source, Avx.Subtract(Vector256.Create(1F), destination)), ShuffleAlphaControl); // premultiply Vector256 color = Avx.Multiply(source, alpha); From afdc53c090791c4c9fe2fd3ccc4ebfc2a5339c06 Mon Sep 17 00:00:00 2001 From: James Jackson-South Date: Mon, 20 Feb 2023 13:16:19 +1000 Subject: [PATCH 20/22] Tanners Top Tips!! --- .../PorterDuffFunctions.Generated.cs | 198 +++++++++--------- .../PorterDuffFunctions.Generated.tt | 22 +- .../PixelBlenders/PorterDuffFunctions.cs | 83 ++++++-- 3 files changed, 170 insertions(+), 133 deletions(-) diff --git a/src/ImageSharp/PixelFormats/PixelBlenders/PorterDuffFunctions.Generated.cs b/src/ImageSharp/PixelFormats/PixelBlenders/PorterDuffFunctions.Generated.cs index 5740a704c..bd522da19 100644 --- a/src/ImageSharp/PixelFormats/PixelBlenders/PorterDuffFunctions.Generated.cs +++ b/src/ImageSharp/PixelFormats/PixelBlenders/PorterDuffFunctions.Generated.cs @@ -23,7 +23,7 @@ internal static partial class PorterDuffFunctions [MethodImpl(MethodImplOptions.AggressiveInlining)] public static Vector4 NormalSrc(Vector4 backdrop, Vector4 source, float opacity) { - source.W *= opacity; + source = WithW(source, source * opacity); return source; } @@ -49,7 +49,7 @@ internal static partial class PorterDuffFunctions [MethodImpl(MethodImplOptions.AggressiveInlining)] public static Vector4 NormalSrcAtop(Vector4 backdrop, Vector4 source, float opacity) { - source.W *= opacity; + source = WithW(source, source * opacity); return Atop(backdrop, source, Normal(backdrop, source)); } @@ -79,7 +79,7 @@ internal static partial class PorterDuffFunctions [MethodImpl(MethodImplOptions.AggressiveInlining)] public static Vector4 NormalSrcOver(Vector4 backdrop, Vector4 source, float opacity) { - source.W *= opacity; + source = WithW(source, source * opacity); return Over(backdrop, source, Normal(backdrop, source)); } @@ -109,7 +109,7 @@ internal static partial class PorterDuffFunctions [MethodImpl(MethodImplOptions.AggressiveInlining)] public static Vector4 NormalSrcIn(Vector4 backdrop, Vector4 source, float opacity) { - source.W *= opacity; + source = WithW(source, source * opacity); return In(backdrop, source); } @@ -135,7 +135,7 @@ internal static partial class PorterDuffFunctions [MethodImpl(MethodImplOptions.AggressiveInlining)] public static Vector4 NormalSrcOut(Vector4 backdrop, Vector4 source, float opacity) { - source.W *= opacity; + source = WithW(source, source * opacity); return Out(backdrop, source); } @@ -187,7 +187,7 @@ internal static partial class PorterDuffFunctions [MethodImpl(MethodImplOptions.AggressiveInlining)] public static Vector4 NormalDestAtop(Vector4 backdrop, Vector4 source, float opacity) { - source.W *= opacity; + source = WithW(source, source * opacity); return Atop(source, backdrop, Normal(source, backdrop)); } @@ -217,7 +217,7 @@ internal static partial class PorterDuffFunctions [MethodImpl(MethodImplOptions.AggressiveInlining)] public static Vector4 NormalDestOver(Vector4 backdrop, Vector4 source, float opacity) { - source.W *= opacity; + source = WithW(source, source * opacity); return Over(source, backdrop, Normal(source, backdrop)); } @@ -247,7 +247,7 @@ internal static partial class PorterDuffFunctions [MethodImpl(MethodImplOptions.AggressiveInlining)] public static Vector4 NormalDestIn(Vector4 backdrop, Vector4 source, float opacity) { - source.W *= opacity; + source = WithW(source, source * opacity); return In(source, backdrop); } @@ -273,7 +273,7 @@ internal static partial class PorterDuffFunctions [MethodImpl(MethodImplOptions.AggressiveInlining)] public static Vector4 NormalDestOut(Vector4 backdrop, Vector4 source, float opacity) { - source.W *= opacity; + source = WithW(source, source * opacity); return Out(source, backdrop); } @@ -299,7 +299,7 @@ internal static partial class PorterDuffFunctions [MethodImpl(MethodImplOptions.AggressiveInlining)] public static Vector4 NormalXor(Vector4 backdrop, Vector4 source, float opacity) { - source.W *= opacity; + source = WithW(source, source * opacity); return Xor(backdrop, source); } @@ -325,7 +325,7 @@ internal static partial class PorterDuffFunctions [MethodImpl(MethodImplOptions.AggressiveInlining)] public static Vector4 NormalClear(Vector4 backdrop, Vector4 source, float opacity) { - source.W *= opacity; + source = WithW(source, source * opacity); return Clear(backdrop, source); } @@ -568,7 +568,7 @@ internal static partial class PorterDuffFunctions [MethodImpl(MethodImplOptions.AggressiveInlining)] public static Vector4 MultiplySrc(Vector4 backdrop, Vector4 source, float opacity) { - source.W *= opacity; + source = WithW(source, source * opacity); return source; } @@ -594,7 +594,7 @@ internal static partial class PorterDuffFunctions [MethodImpl(MethodImplOptions.AggressiveInlining)] public static Vector4 MultiplySrcAtop(Vector4 backdrop, Vector4 source, float opacity) { - source.W *= opacity; + source = WithW(source, source * opacity); return Atop(backdrop, source, Multiply(backdrop, source)); } @@ -624,7 +624,7 @@ internal static partial class PorterDuffFunctions [MethodImpl(MethodImplOptions.AggressiveInlining)] public static Vector4 MultiplySrcOver(Vector4 backdrop, Vector4 source, float opacity) { - source.W *= opacity; + source = WithW(source, source * opacity); return Over(backdrop, source, Multiply(backdrop, source)); } @@ -654,7 +654,7 @@ internal static partial class PorterDuffFunctions [MethodImpl(MethodImplOptions.AggressiveInlining)] public static Vector4 MultiplySrcIn(Vector4 backdrop, Vector4 source, float opacity) { - source.W *= opacity; + source = WithW(source, source * opacity); return In(backdrop, source); } @@ -680,7 +680,7 @@ internal static partial class PorterDuffFunctions [MethodImpl(MethodImplOptions.AggressiveInlining)] public static Vector4 MultiplySrcOut(Vector4 backdrop, Vector4 source, float opacity) { - source.W *= opacity; + source = WithW(source, source * opacity); return Out(backdrop, source); } @@ -732,7 +732,7 @@ internal static partial class PorterDuffFunctions [MethodImpl(MethodImplOptions.AggressiveInlining)] public static Vector4 MultiplyDestAtop(Vector4 backdrop, Vector4 source, float opacity) { - source.W *= opacity; + source = WithW(source, source * opacity); return Atop(source, backdrop, Multiply(source, backdrop)); } @@ -762,7 +762,7 @@ internal static partial class PorterDuffFunctions [MethodImpl(MethodImplOptions.AggressiveInlining)] public static Vector4 MultiplyDestOver(Vector4 backdrop, Vector4 source, float opacity) { - source.W *= opacity; + source = WithW(source, source * opacity); return Over(source, backdrop, Multiply(source, backdrop)); } @@ -792,7 +792,7 @@ internal static partial class PorterDuffFunctions [MethodImpl(MethodImplOptions.AggressiveInlining)] public static Vector4 MultiplyDestIn(Vector4 backdrop, Vector4 source, float opacity) { - source.W *= opacity; + source = WithW(source, source * opacity); return In(source, backdrop); } @@ -818,7 +818,7 @@ internal static partial class PorterDuffFunctions [MethodImpl(MethodImplOptions.AggressiveInlining)] public static Vector4 MultiplyDestOut(Vector4 backdrop, Vector4 source, float opacity) { - source.W *= opacity; + source = WithW(source, source * opacity); return Out(source, backdrop); } @@ -844,7 +844,7 @@ internal static partial class PorterDuffFunctions [MethodImpl(MethodImplOptions.AggressiveInlining)] public static Vector4 MultiplyXor(Vector4 backdrop, Vector4 source, float opacity) { - source.W *= opacity; + source = WithW(source, source * opacity); return Xor(backdrop, source); } @@ -870,7 +870,7 @@ internal static partial class PorterDuffFunctions [MethodImpl(MethodImplOptions.AggressiveInlining)] public static Vector4 MultiplyClear(Vector4 backdrop, Vector4 source, float opacity) { - source.W *= opacity; + source = WithW(source, source * opacity); return Clear(backdrop, source); } @@ -1113,7 +1113,7 @@ internal static partial class PorterDuffFunctions [MethodImpl(MethodImplOptions.AggressiveInlining)] public static Vector4 AddSrc(Vector4 backdrop, Vector4 source, float opacity) { - source.W *= opacity; + source = WithW(source, source * opacity); return source; } @@ -1139,7 +1139,7 @@ internal static partial class PorterDuffFunctions [MethodImpl(MethodImplOptions.AggressiveInlining)] public static Vector4 AddSrcAtop(Vector4 backdrop, Vector4 source, float opacity) { - source.W *= opacity; + source = WithW(source, source * opacity); return Atop(backdrop, source, Add(backdrop, source)); } @@ -1169,7 +1169,7 @@ internal static partial class PorterDuffFunctions [MethodImpl(MethodImplOptions.AggressiveInlining)] public static Vector4 AddSrcOver(Vector4 backdrop, Vector4 source, float opacity) { - source.W *= opacity; + source = WithW(source, source * opacity); return Over(backdrop, source, Add(backdrop, source)); } @@ -1199,7 +1199,7 @@ internal static partial class PorterDuffFunctions [MethodImpl(MethodImplOptions.AggressiveInlining)] public static Vector4 AddSrcIn(Vector4 backdrop, Vector4 source, float opacity) { - source.W *= opacity; + source = WithW(source, source * opacity); return In(backdrop, source); } @@ -1225,7 +1225,7 @@ internal static partial class PorterDuffFunctions [MethodImpl(MethodImplOptions.AggressiveInlining)] public static Vector4 AddSrcOut(Vector4 backdrop, Vector4 source, float opacity) { - source.W *= opacity; + source = WithW(source, source * opacity); return Out(backdrop, source); } @@ -1277,7 +1277,7 @@ internal static partial class PorterDuffFunctions [MethodImpl(MethodImplOptions.AggressiveInlining)] public static Vector4 AddDestAtop(Vector4 backdrop, Vector4 source, float opacity) { - source.W *= opacity; + source = WithW(source, source * opacity); return Atop(source, backdrop, Add(source, backdrop)); } @@ -1307,7 +1307,7 @@ internal static partial class PorterDuffFunctions [MethodImpl(MethodImplOptions.AggressiveInlining)] public static Vector4 AddDestOver(Vector4 backdrop, Vector4 source, float opacity) { - source.W *= opacity; + source = WithW(source, source * opacity); return Over(source, backdrop, Add(source, backdrop)); } @@ -1337,7 +1337,7 @@ internal static partial class PorterDuffFunctions [MethodImpl(MethodImplOptions.AggressiveInlining)] public static Vector4 AddDestIn(Vector4 backdrop, Vector4 source, float opacity) { - source.W *= opacity; + source = WithW(source, source * opacity); return In(source, backdrop); } @@ -1363,7 +1363,7 @@ internal static partial class PorterDuffFunctions [MethodImpl(MethodImplOptions.AggressiveInlining)] public static Vector4 AddDestOut(Vector4 backdrop, Vector4 source, float opacity) { - source.W *= opacity; + source = WithW(source, source * opacity); return Out(source, backdrop); } @@ -1389,7 +1389,7 @@ internal static partial class PorterDuffFunctions [MethodImpl(MethodImplOptions.AggressiveInlining)] public static Vector4 AddXor(Vector4 backdrop, Vector4 source, float opacity) { - source.W *= opacity; + source = WithW(source, source * opacity); return Xor(backdrop, source); } @@ -1415,7 +1415,7 @@ internal static partial class PorterDuffFunctions [MethodImpl(MethodImplOptions.AggressiveInlining)] public static Vector4 AddClear(Vector4 backdrop, Vector4 source, float opacity) { - source.W *= opacity; + source = WithW(source, source * opacity); return Clear(backdrop, source); } @@ -1658,7 +1658,7 @@ internal static partial class PorterDuffFunctions [MethodImpl(MethodImplOptions.AggressiveInlining)] public static Vector4 SubtractSrc(Vector4 backdrop, Vector4 source, float opacity) { - source.W *= opacity; + source = WithW(source, source * opacity); return source; } @@ -1684,7 +1684,7 @@ internal static partial class PorterDuffFunctions [MethodImpl(MethodImplOptions.AggressiveInlining)] public static Vector4 SubtractSrcAtop(Vector4 backdrop, Vector4 source, float opacity) { - source.W *= opacity; + source = WithW(source, source * opacity); return Atop(backdrop, source, Subtract(backdrop, source)); } @@ -1714,7 +1714,7 @@ internal static partial class PorterDuffFunctions [MethodImpl(MethodImplOptions.AggressiveInlining)] public static Vector4 SubtractSrcOver(Vector4 backdrop, Vector4 source, float opacity) { - source.W *= opacity; + source = WithW(source, source * opacity); return Over(backdrop, source, Subtract(backdrop, source)); } @@ -1744,7 +1744,7 @@ internal static partial class PorterDuffFunctions [MethodImpl(MethodImplOptions.AggressiveInlining)] public static Vector4 SubtractSrcIn(Vector4 backdrop, Vector4 source, float opacity) { - source.W *= opacity; + source = WithW(source, source * opacity); return In(backdrop, source); } @@ -1770,7 +1770,7 @@ internal static partial class PorterDuffFunctions [MethodImpl(MethodImplOptions.AggressiveInlining)] public static Vector4 SubtractSrcOut(Vector4 backdrop, Vector4 source, float opacity) { - source.W *= opacity; + source = WithW(source, source * opacity); return Out(backdrop, source); } @@ -1822,7 +1822,7 @@ internal static partial class PorterDuffFunctions [MethodImpl(MethodImplOptions.AggressiveInlining)] public static Vector4 SubtractDestAtop(Vector4 backdrop, Vector4 source, float opacity) { - source.W *= opacity; + source = WithW(source, source * opacity); return Atop(source, backdrop, Subtract(source, backdrop)); } @@ -1852,7 +1852,7 @@ internal static partial class PorterDuffFunctions [MethodImpl(MethodImplOptions.AggressiveInlining)] public static Vector4 SubtractDestOver(Vector4 backdrop, Vector4 source, float opacity) { - source.W *= opacity; + source = WithW(source, source * opacity); return Over(source, backdrop, Subtract(source, backdrop)); } @@ -1882,7 +1882,7 @@ internal static partial class PorterDuffFunctions [MethodImpl(MethodImplOptions.AggressiveInlining)] public static Vector4 SubtractDestIn(Vector4 backdrop, Vector4 source, float opacity) { - source.W *= opacity; + source = WithW(source, source * opacity); return In(source, backdrop); } @@ -1908,7 +1908,7 @@ internal static partial class PorterDuffFunctions [MethodImpl(MethodImplOptions.AggressiveInlining)] public static Vector4 SubtractDestOut(Vector4 backdrop, Vector4 source, float opacity) { - source.W *= opacity; + source = WithW(source, source * opacity); return Out(source, backdrop); } @@ -1934,7 +1934,7 @@ internal static partial class PorterDuffFunctions [MethodImpl(MethodImplOptions.AggressiveInlining)] public static Vector4 SubtractXor(Vector4 backdrop, Vector4 source, float opacity) { - source.W *= opacity; + source = WithW(source, source * opacity); return Xor(backdrop, source); } @@ -1960,7 +1960,7 @@ internal static partial class PorterDuffFunctions [MethodImpl(MethodImplOptions.AggressiveInlining)] public static Vector4 SubtractClear(Vector4 backdrop, Vector4 source, float opacity) { - source.W *= opacity; + source = WithW(source, source * opacity); return Clear(backdrop, source); } @@ -2203,7 +2203,7 @@ internal static partial class PorterDuffFunctions [MethodImpl(MethodImplOptions.AggressiveInlining)] public static Vector4 ScreenSrc(Vector4 backdrop, Vector4 source, float opacity) { - source.W *= opacity; + source = WithW(source, source * opacity); return source; } @@ -2229,7 +2229,7 @@ internal static partial class PorterDuffFunctions [MethodImpl(MethodImplOptions.AggressiveInlining)] public static Vector4 ScreenSrcAtop(Vector4 backdrop, Vector4 source, float opacity) { - source.W *= opacity; + source = WithW(source, source * opacity); return Atop(backdrop, source, Screen(backdrop, source)); } @@ -2259,7 +2259,7 @@ internal static partial class PorterDuffFunctions [MethodImpl(MethodImplOptions.AggressiveInlining)] public static Vector4 ScreenSrcOver(Vector4 backdrop, Vector4 source, float opacity) { - source.W *= opacity; + source = WithW(source, source * opacity); return Over(backdrop, source, Screen(backdrop, source)); } @@ -2289,7 +2289,7 @@ internal static partial class PorterDuffFunctions [MethodImpl(MethodImplOptions.AggressiveInlining)] public static Vector4 ScreenSrcIn(Vector4 backdrop, Vector4 source, float opacity) { - source.W *= opacity; + source = WithW(source, source * opacity); return In(backdrop, source); } @@ -2315,7 +2315,7 @@ internal static partial class PorterDuffFunctions [MethodImpl(MethodImplOptions.AggressiveInlining)] public static Vector4 ScreenSrcOut(Vector4 backdrop, Vector4 source, float opacity) { - source.W *= opacity; + source = WithW(source, source * opacity); return Out(backdrop, source); } @@ -2367,7 +2367,7 @@ internal static partial class PorterDuffFunctions [MethodImpl(MethodImplOptions.AggressiveInlining)] public static Vector4 ScreenDestAtop(Vector4 backdrop, Vector4 source, float opacity) { - source.W *= opacity; + source = WithW(source, source * opacity); return Atop(source, backdrop, Screen(source, backdrop)); } @@ -2397,7 +2397,7 @@ internal static partial class PorterDuffFunctions [MethodImpl(MethodImplOptions.AggressiveInlining)] public static Vector4 ScreenDestOver(Vector4 backdrop, Vector4 source, float opacity) { - source.W *= opacity; + source = WithW(source, source * opacity); return Over(source, backdrop, Screen(source, backdrop)); } @@ -2427,7 +2427,7 @@ internal static partial class PorterDuffFunctions [MethodImpl(MethodImplOptions.AggressiveInlining)] public static Vector4 ScreenDestIn(Vector4 backdrop, Vector4 source, float opacity) { - source.W *= opacity; + source = WithW(source, source * opacity); return In(source, backdrop); } @@ -2453,7 +2453,7 @@ internal static partial class PorterDuffFunctions [MethodImpl(MethodImplOptions.AggressiveInlining)] public static Vector4 ScreenDestOut(Vector4 backdrop, Vector4 source, float opacity) { - source.W *= opacity; + source = WithW(source, source * opacity); return Out(source, backdrop); } @@ -2479,7 +2479,7 @@ internal static partial class PorterDuffFunctions [MethodImpl(MethodImplOptions.AggressiveInlining)] public static Vector4 ScreenXor(Vector4 backdrop, Vector4 source, float opacity) { - source.W *= opacity; + source = WithW(source, source * opacity); return Xor(backdrop, source); } @@ -2505,7 +2505,7 @@ internal static partial class PorterDuffFunctions [MethodImpl(MethodImplOptions.AggressiveInlining)] public static Vector4 ScreenClear(Vector4 backdrop, Vector4 source, float opacity) { - source.W *= opacity; + source = WithW(source, source * opacity); return Clear(backdrop, source); } @@ -2748,7 +2748,7 @@ internal static partial class PorterDuffFunctions [MethodImpl(MethodImplOptions.AggressiveInlining)] public static Vector4 DarkenSrc(Vector4 backdrop, Vector4 source, float opacity) { - source.W *= opacity; + source = WithW(source, source * opacity); return source; } @@ -2774,7 +2774,7 @@ internal static partial class PorterDuffFunctions [MethodImpl(MethodImplOptions.AggressiveInlining)] public static Vector4 DarkenSrcAtop(Vector4 backdrop, Vector4 source, float opacity) { - source.W *= opacity; + source = WithW(source, source * opacity); return Atop(backdrop, source, Darken(backdrop, source)); } @@ -2804,7 +2804,7 @@ internal static partial class PorterDuffFunctions [MethodImpl(MethodImplOptions.AggressiveInlining)] public static Vector4 DarkenSrcOver(Vector4 backdrop, Vector4 source, float opacity) { - source.W *= opacity; + source = WithW(source, source * opacity); return Over(backdrop, source, Darken(backdrop, source)); } @@ -2834,7 +2834,7 @@ internal static partial class PorterDuffFunctions [MethodImpl(MethodImplOptions.AggressiveInlining)] public static Vector4 DarkenSrcIn(Vector4 backdrop, Vector4 source, float opacity) { - source.W *= opacity; + source = WithW(source, source * opacity); return In(backdrop, source); } @@ -2860,7 +2860,7 @@ internal static partial class PorterDuffFunctions [MethodImpl(MethodImplOptions.AggressiveInlining)] public static Vector4 DarkenSrcOut(Vector4 backdrop, Vector4 source, float opacity) { - source.W *= opacity; + source = WithW(source, source * opacity); return Out(backdrop, source); } @@ -2912,7 +2912,7 @@ internal static partial class PorterDuffFunctions [MethodImpl(MethodImplOptions.AggressiveInlining)] public static Vector4 DarkenDestAtop(Vector4 backdrop, Vector4 source, float opacity) { - source.W *= opacity; + source = WithW(source, source * opacity); return Atop(source, backdrop, Darken(source, backdrop)); } @@ -2942,7 +2942,7 @@ internal static partial class PorterDuffFunctions [MethodImpl(MethodImplOptions.AggressiveInlining)] public static Vector4 DarkenDestOver(Vector4 backdrop, Vector4 source, float opacity) { - source.W *= opacity; + source = WithW(source, source * opacity); return Over(source, backdrop, Darken(source, backdrop)); } @@ -2972,7 +2972,7 @@ internal static partial class PorterDuffFunctions [MethodImpl(MethodImplOptions.AggressiveInlining)] public static Vector4 DarkenDestIn(Vector4 backdrop, Vector4 source, float opacity) { - source.W *= opacity; + source = WithW(source, source * opacity); return In(source, backdrop); } @@ -2998,7 +2998,7 @@ internal static partial class PorterDuffFunctions [MethodImpl(MethodImplOptions.AggressiveInlining)] public static Vector4 DarkenDestOut(Vector4 backdrop, Vector4 source, float opacity) { - source.W *= opacity; + source = WithW(source, source * opacity); return Out(source, backdrop); } @@ -3024,7 +3024,7 @@ internal static partial class PorterDuffFunctions [MethodImpl(MethodImplOptions.AggressiveInlining)] public static Vector4 DarkenXor(Vector4 backdrop, Vector4 source, float opacity) { - source.W *= opacity; + source = WithW(source, source * opacity); return Xor(backdrop, source); } @@ -3050,7 +3050,7 @@ internal static partial class PorterDuffFunctions [MethodImpl(MethodImplOptions.AggressiveInlining)] public static Vector4 DarkenClear(Vector4 backdrop, Vector4 source, float opacity) { - source.W *= opacity; + source = WithW(source, source * opacity); return Clear(backdrop, source); } @@ -3293,7 +3293,7 @@ internal static partial class PorterDuffFunctions [MethodImpl(MethodImplOptions.AggressiveInlining)] public static Vector4 LightenSrc(Vector4 backdrop, Vector4 source, float opacity) { - source.W *= opacity; + source = WithW(source, source * opacity); return source; } @@ -3319,7 +3319,7 @@ internal static partial class PorterDuffFunctions [MethodImpl(MethodImplOptions.AggressiveInlining)] public static Vector4 LightenSrcAtop(Vector4 backdrop, Vector4 source, float opacity) { - source.W *= opacity; + source = WithW(source, source * opacity); return Atop(backdrop, source, Lighten(backdrop, source)); } @@ -3349,7 +3349,7 @@ internal static partial class PorterDuffFunctions [MethodImpl(MethodImplOptions.AggressiveInlining)] public static Vector4 LightenSrcOver(Vector4 backdrop, Vector4 source, float opacity) { - source.W *= opacity; + source = WithW(source, source * opacity); return Over(backdrop, source, Lighten(backdrop, source)); } @@ -3379,7 +3379,7 @@ internal static partial class PorterDuffFunctions [MethodImpl(MethodImplOptions.AggressiveInlining)] public static Vector4 LightenSrcIn(Vector4 backdrop, Vector4 source, float opacity) { - source.W *= opacity; + source = WithW(source, source * opacity); return In(backdrop, source); } @@ -3405,7 +3405,7 @@ internal static partial class PorterDuffFunctions [MethodImpl(MethodImplOptions.AggressiveInlining)] public static Vector4 LightenSrcOut(Vector4 backdrop, Vector4 source, float opacity) { - source.W *= opacity; + source = WithW(source, source * opacity); return Out(backdrop, source); } @@ -3457,7 +3457,7 @@ internal static partial class PorterDuffFunctions [MethodImpl(MethodImplOptions.AggressiveInlining)] public static Vector4 LightenDestAtop(Vector4 backdrop, Vector4 source, float opacity) { - source.W *= opacity; + source = WithW(source, source * opacity); return Atop(source, backdrop, Lighten(source, backdrop)); } @@ -3487,7 +3487,7 @@ internal static partial class PorterDuffFunctions [MethodImpl(MethodImplOptions.AggressiveInlining)] public static Vector4 LightenDestOver(Vector4 backdrop, Vector4 source, float opacity) { - source.W *= opacity; + source = WithW(source, source * opacity); return Over(source, backdrop, Lighten(source, backdrop)); } @@ -3517,7 +3517,7 @@ internal static partial class PorterDuffFunctions [MethodImpl(MethodImplOptions.AggressiveInlining)] public static Vector4 LightenDestIn(Vector4 backdrop, Vector4 source, float opacity) { - source.W *= opacity; + source = WithW(source, source * opacity); return In(source, backdrop); } @@ -3543,7 +3543,7 @@ internal static partial class PorterDuffFunctions [MethodImpl(MethodImplOptions.AggressiveInlining)] public static Vector4 LightenDestOut(Vector4 backdrop, Vector4 source, float opacity) { - source.W *= opacity; + source = WithW(source, source * opacity); return Out(source, backdrop); } @@ -3569,7 +3569,7 @@ internal static partial class PorterDuffFunctions [MethodImpl(MethodImplOptions.AggressiveInlining)] public static Vector4 LightenXor(Vector4 backdrop, Vector4 source, float opacity) { - source.W *= opacity; + source = WithW(source, source * opacity); return Xor(backdrop, source); } @@ -3595,7 +3595,7 @@ internal static partial class PorterDuffFunctions [MethodImpl(MethodImplOptions.AggressiveInlining)] public static Vector4 LightenClear(Vector4 backdrop, Vector4 source, float opacity) { - source.W *= opacity; + source = WithW(source, source * opacity); return Clear(backdrop, source); } @@ -3838,7 +3838,7 @@ internal static partial class PorterDuffFunctions [MethodImpl(MethodImplOptions.AggressiveInlining)] public static Vector4 OverlaySrc(Vector4 backdrop, Vector4 source, float opacity) { - source.W *= opacity; + source = WithW(source, source * opacity); return source; } @@ -3864,7 +3864,7 @@ internal static partial class PorterDuffFunctions [MethodImpl(MethodImplOptions.AggressiveInlining)] public static Vector4 OverlaySrcAtop(Vector4 backdrop, Vector4 source, float opacity) { - source.W *= opacity; + source = WithW(source, source * opacity); return Atop(backdrop, source, Overlay(backdrop, source)); } @@ -3894,7 +3894,7 @@ internal static partial class PorterDuffFunctions [MethodImpl(MethodImplOptions.AggressiveInlining)] public static Vector4 OverlaySrcOver(Vector4 backdrop, Vector4 source, float opacity) { - source.W *= opacity; + source = WithW(source, source * opacity); return Over(backdrop, source, Overlay(backdrop, source)); } @@ -3924,7 +3924,7 @@ internal static partial class PorterDuffFunctions [MethodImpl(MethodImplOptions.AggressiveInlining)] public static Vector4 OverlaySrcIn(Vector4 backdrop, Vector4 source, float opacity) { - source.W *= opacity; + source = WithW(source, source * opacity); return In(backdrop, source); } @@ -3950,7 +3950,7 @@ internal static partial class PorterDuffFunctions [MethodImpl(MethodImplOptions.AggressiveInlining)] public static Vector4 OverlaySrcOut(Vector4 backdrop, Vector4 source, float opacity) { - source.W *= opacity; + source = WithW(source, source * opacity); return Out(backdrop, source); } @@ -4002,7 +4002,7 @@ internal static partial class PorterDuffFunctions [MethodImpl(MethodImplOptions.AggressiveInlining)] public static Vector4 OverlayDestAtop(Vector4 backdrop, Vector4 source, float opacity) { - source.W *= opacity; + source = WithW(source, source * opacity); return Atop(source, backdrop, Overlay(source, backdrop)); } @@ -4032,7 +4032,7 @@ internal static partial class PorterDuffFunctions [MethodImpl(MethodImplOptions.AggressiveInlining)] public static Vector4 OverlayDestOver(Vector4 backdrop, Vector4 source, float opacity) { - source.W *= opacity; + source = WithW(source, source * opacity); return Over(source, backdrop, Overlay(source, backdrop)); } @@ -4062,7 +4062,7 @@ internal static partial class PorterDuffFunctions [MethodImpl(MethodImplOptions.AggressiveInlining)] public static Vector4 OverlayDestIn(Vector4 backdrop, Vector4 source, float opacity) { - source.W *= opacity; + source = WithW(source, source * opacity); return In(source, backdrop); } @@ -4088,7 +4088,7 @@ internal static partial class PorterDuffFunctions [MethodImpl(MethodImplOptions.AggressiveInlining)] public static Vector4 OverlayDestOut(Vector4 backdrop, Vector4 source, float opacity) { - source.W *= opacity; + source = WithW(source, source * opacity); return Out(source, backdrop); } @@ -4114,7 +4114,7 @@ internal static partial class PorterDuffFunctions [MethodImpl(MethodImplOptions.AggressiveInlining)] public static Vector4 OverlayXor(Vector4 backdrop, Vector4 source, float opacity) { - source.W *= opacity; + source = WithW(source, source * opacity); return Xor(backdrop, source); } @@ -4140,7 +4140,7 @@ internal static partial class PorterDuffFunctions [MethodImpl(MethodImplOptions.AggressiveInlining)] public static Vector4 OverlayClear(Vector4 backdrop, Vector4 source, float opacity) { - source.W *= opacity; + source = WithW(source, source * opacity); return Clear(backdrop, source); } @@ -4383,7 +4383,7 @@ internal static partial class PorterDuffFunctions [MethodImpl(MethodImplOptions.AggressiveInlining)] public static Vector4 HardLightSrc(Vector4 backdrop, Vector4 source, float opacity) { - source.W *= opacity; + source = WithW(source, source * opacity); return source; } @@ -4409,7 +4409,7 @@ internal static partial class PorterDuffFunctions [MethodImpl(MethodImplOptions.AggressiveInlining)] public static Vector4 HardLightSrcAtop(Vector4 backdrop, Vector4 source, float opacity) { - source.W *= opacity; + source = WithW(source, source * opacity); return Atop(backdrop, source, HardLight(backdrop, source)); } @@ -4439,7 +4439,7 @@ internal static partial class PorterDuffFunctions [MethodImpl(MethodImplOptions.AggressiveInlining)] public static Vector4 HardLightSrcOver(Vector4 backdrop, Vector4 source, float opacity) { - source.W *= opacity; + source = WithW(source, source * opacity); return Over(backdrop, source, HardLight(backdrop, source)); } @@ -4469,7 +4469,7 @@ internal static partial class PorterDuffFunctions [MethodImpl(MethodImplOptions.AggressiveInlining)] public static Vector4 HardLightSrcIn(Vector4 backdrop, Vector4 source, float opacity) { - source.W *= opacity; + source = WithW(source, source * opacity); return In(backdrop, source); } @@ -4495,7 +4495,7 @@ internal static partial class PorterDuffFunctions [MethodImpl(MethodImplOptions.AggressiveInlining)] public static Vector4 HardLightSrcOut(Vector4 backdrop, Vector4 source, float opacity) { - source.W *= opacity; + source = WithW(source, source * opacity); return Out(backdrop, source); } @@ -4547,7 +4547,7 @@ internal static partial class PorterDuffFunctions [MethodImpl(MethodImplOptions.AggressiveInlining)] public static Vector4 HardLightDestAtop(Vector4 backdrop, Vector4 source, float opacity) { - source.W *= opacity; + source = WithW(source, source * opacity); return Atop(source, backdrop, HardLight(source, backdrop)); } @@ -4577,7 +4577,7 @@ internal static partial class PorterDuffFunctions [MethodImpl(MethodImplOptions.AggressiveInlining)] public static Vector4 HardLightDestOver(Vector4 backdrop, Vector4 source, float opacity) { - source.W *= opacity; + source = WithW(source, source * opacity); return Over(source, backdrop, HardLight(source, backdrop)); } @@ -4607,7 +4607,7 @@ internal static partial class PorterDuffFunctions [MethodImpl(MethodImplOptions.AggressiveInlining)] public static Vector4 HardLightDestIn(Vector4 backdrop, Vector4 source, float opacity) { - source.W *= opacity; + source = WithW(source, source * opacity); return In(source, backdrop); } @@ -4633,7 +4633,7 @@ internal static partial class PorterDuffFunctions [MethodImpl(MethodImplOptions.AggressiveInlining)] public static Vector4 HardLightDestOut(Vector4 backdrop, Vector4 source, float opacity) { - source.W *= opacity; + source = WithW(source, source * opacity); return Out(source, backdrop); } @@ -4659,7 +4659,7 @@ internal static partial class PorterDuffFunctions [MethodImpl(MethodImplOptions.AggressiveInlining)] public static Vector4 HardLightXor(Vector4 backdrop, Vector4 source, float opacity) { - source.W *= opacity; + source = WithW(source, source * opacity); return Xor(backdrop, source); } @@ -4685,7 +4685,7 @@ internal static partial class PorterDuffFunctions [MethodImpl(MethodImplOptions.AggressiveInlining)] public static Vector4 HardLightClear(Vector4 backdrop, Vector4 source, float opacity) { - source.W *= opacity; + source = WithW(source, source * opacity); return Clear(backdrop, source); } diff --git a/src/ImageSharp/PixelFormats/PixelBlenders/PorterDuffFunctions.Generated.tt b/src/ImageSharp/PixelFormats/PixelBlenders/PorterDuffFunctions.Generated.tt index 34eeb78cb..69dac875c 100644 --- a/src/ImageSharp/PixelFormats/PixelBlenders/PorterDuffFunctions.Generated.tt +++ b/src/ImageSharp/PixelFormats/PixelBlenders/PorterDuffFunctions.Generated.tt @@ -33,7 +33,7 @@ internal static partial class PorterDuffFunctions [MethodImpl(MethodImplOptions.AggressiveInlining)] public static Vector4 <#=blender#>Src(Vector4 backdrop, Vector4 source, float opacity) { - source.W *= opacity; + source = WithW(source, source * opacity); return source; } @@ -59,7 +59,7 @@ internal static partial class PorterDuffFunctions [MethodImpl(MethodImplOptions.AggressiveInlining)] public static Vector4 <#=blender#>SrcAtop(Vector4 backdrop, Vector4 source, float opacity) { - source.W *= opacity; + source = WithW(source, source * opacity); return Atop(backdrop, source, <#=blender#>(backdrop, source)); } @@ -89,7 +89,7 @@ internal static partial class PorterDuffFunctions [MethodImpl(MethodImplOptions.AggressiveInlining)] public static Vector4 <#=blender#>SrcOver(Vector4 backdrop, Vector4 source, float opacity) { - source.W *= opacity; + source = WithW(source, source * opacity); return Over(backdrop, source, <#=blender#>(backdrop, source)); } @@ -119,7 +119,7 @@ internal static partial class PorterDuffFunctions [MethodImpl(MethodImplOptions.AggressiveInlining)] public static Vector4 <#=blender#>SrcIn(Vector4 backdrop, Vector4 source, float opacity) { - source.W *= opacity; + source = WithW(source, source * opacity); return In(backdrop, source); } @@ -145,7 +145,7 @@ internal static partial class PorterDuffFunctions [MethodImpl(MethodImplOptions.AggressiveInlining)] public static Vector4 <#=blender#>SrcOut(Vector4 backdrop, Vector4 source, float opacity) { - source.W *= opacity; + source = WithW(source, source * opacity); return Out(backdrop, source); } @@ -197,7 +197,7 @@ internal static partial class PorterDuffFunctions [MethodImpl(MethodImplOptions.AggressiveInlining)] public static Vector4 <#=blender#>DestAtop(Vector4 backdrop, Vector4 source, float opacity) { - source.W *= opacity; + source = WithW(source, source * opacity); return Atop(source, backdrop, <#=blender#>(source, backdrop)); } @@ -227,7 +227,7 @@ internal static partial class PorterDuffFunctions [MethodImpl(MethodImplOptions.AggressiveInlining)] public static Vector4 <#=blender#>DestOver(Vector4 backdrop, Vector4 source, float opacity) { - source.W *= opacity; + source = WithW(source, source * opacity); return Over(source, backdrop, <#=blender#>(source, backdrop)); } @@ -257,7 +257,7 @@ internal static partial class PorterDuffFunctions [MethodImpl(MethodImplOptions.AggressiveInlining)] public static Vector4 <#=blender#>DestIn(Vector4 backdrop, Vector4 source, float opacity) { - source.W *= opacity; + source = WithW(source, source * opacity); return In(source, backdrop); } @@ -283,7 +283,7 @@ internal static partial class PorterDuffFunctions [MethodImpl(MethodImplOptions.AggressiveInlining)] public static Vector4 <#=blender#>DestOut(Vector4 backdrop, Vector4 source, float opacity) { - source.W *= opacity; + source = WithW(source, source * opacity); return Out(source, backdrop); } @@ -309,7 +309,7 @@ internal static partial class PorterDuffFunctions [MethodImpl(MethodImplOptions.AggressiveInlining)] public static Vector4 <#=blender#>Xor(Vector4 backdrop, Vector4 source, float opacity) { - source.W *= opacity; + source = WithW(source, source * opacity); return Xor(backdrop, source); } @@ -335,7 +335,7 @@ internal static partial class PorterDuffFunctions [MethodImpl(MethodImplOptions.AggressiveInlining)] public static Vector4 <#=blender#>Clear(Vector4 backdrop, Vector4 source, float opacity) { - source.W *= opacity; + source = WithW(source, source * opacity); return Clear(backdrop, source); } diff --git a/src/ImageSharp/PixelFormats/PixelBlenders/PorterDuffFunctions.cs b/src/ImageSharp/PixelFormats/PixelBlenders/PorterDuffFunctions.cs index 2d47f1a62..cd85939e0 100644 --- a/src/ImageSharp/PixelFormats/PixelBlenders/PorterDuffFunctions.cs +++ b/src/ImageSharp/PixelFormats/PixelBlenders/PorterDuffFunctions.cs @@ -263,19 +263,22 @@ internal static partial class PorterDuffFunctions public static Vector4 Over(Vector4 destination, Vector4 source, Vector4 blend) { // calculate weights - float blendW = destination.W * source.W; - float dstW = destination.W - blendW; - float srcW = source.W - blendW; + Vector4 sW = PermuteW(source); + Vector4 dW = PermuteW(destination); + + Vector4 blendW = sW * dW; + Vector4 dstW = dW - blendW; + Vector4 srcW = sW - blendW; // calculate final alpha - float alpha = dstW + source.W; + Vector4 alpha = dstW + sW; // calculate final color Vector4 color = (destination * dstW) + (source * srcW) + (blend * blendW); // unpremultiply - color /= MathF.Max(alpha, Constants.Epsilon); - color.W = alpha; + color /= Vector4.Max(alpha, new(Constants.Epsilon)); + color.W = alpha.W; return color; } @@ -322,18 +325,21 @@ internal static partial class PorterDuffFunctions public static Vector4 Atop(Vector4 destination, Vector4 source, Vector4 blend) { // calculate weights - float blendW = destination.W * source.W; - float dstW = destination.W - blendW; + Vector4 sW = PermuteW(source); + Vector4 dW = PermuteW(destination); + + Vector4 blendW = sW * dW; + Vector4 dstW = dW - blendW; // calculate final alpha - float alpha = destination.W; + Vector4 alpha = dW; // calculate final color Vector4 color = (destination * dstW) + (blend * blendW); // unpremultiply - color /= MathF.Max(alpha, Constants.Epsilon); - color.W = alpha; + color /= Vector4.Max(alpha, new(Constants.Epsilon)); + color.W = alpha.W; return color; } @@ -373,11 +379,13 @@ internal static partial class PorterDuffFunctions [MethodImpl(MethodImplOptions.AggressiveInlining)] public static Vector4 In(Vector4 destination, Vector4 source) { - float alpha = destination.W * source.W; + Vector4 sW = PermuteW(source); + Vector4 dW = PermuteW(destination); + Vector4 alpha = dW * sW; Vector4 color = source * alpha; // premultiply - color /= MathF.Max(alpha, Constants.Epsilon); // unpremultiply - color.W = alpha; + color /= Vector4.Max(alpha, new(Constants.Epsilon)); // unpremultiply + color.W = alpha.W; return color; } @@ -411,11 +419,13 @@ internal static partial class PorterDuffFunctions [MethodImpl(MethodImplOptions.AggressiveInlining)] public static Vector4 Out(Vector4 destination, Vector4 source) { - float alpha = (1 - destination.W) * source.W; + Vector4 sW = PermuteW(source); + Vector4 dW = PermuteW(destination); + Vector4 alpha = (Vector4.One - dW) * sW; Vector4 color = source * alpha; // premultiply - color /= MathF.Max(alpha, Constants.Epsilon); // unpremultiply - color.W = alpha; + color /= Vector4.Max(alpha, new(Constants.Epsilon)); // unpremultiply + color.W = alpha.W; return color; } @@ -449,15 +459,18 @@ internal static partial class PorterDuffFunctions [MethodImpl(MethodImplOptions.AggressiveInlining)] public static Vector4 Xor(Vector4 destination, Vector4 source) { - float srcW = 1 - destination.W; - float dstW = 1 - source.W; + Vector4 sW = PermuteW(source); + Vector4 dW = PermuteW(destination); + + Vector4 srcW = Vector4.One - dW; + Vector4 dstW = Vector4.One - sW; - float alpha = (source.W * srcW) + (destination.W * dstW); - Vector4 color = (source.W * source * srcW) + (destination.W * destination * dstW); + Vector4 alpha = (sW * srcW) + (dW * dstW); + Vector4 color = (sW * source * srcW) + (dW * destination * dstW); // unpremultiply - color /= MathF.Max(alpha, Constants.Epsilon); - color.W = alpha; + color /= Vector4.Max(alpha, new(Constants.Epsilon)); + color.W = alpha.W; return color; } @@ -493,4 +506,28 @@ internal static partial class PorterDuffFunctions [MethodImpl(MethodImplOptions.AggressiveInlining)] private static Vector256 Clear(Vector256 backdrop, Vector256 source) => Vector256.Zero; + + [MethodImpl(MethodImplOptions.AggressiveInlining)] + private static Vector4 WithW(Vector4 value, Vector4 w) + { + // TODO: Provide SSE fallback which uses "shuffle" - just pick XYZ from value and W from w + if (Sse41.IsSupported) + { + return Sse41.Insert(value.AsVector128(), w.AsVector128(), 0b11_11_0000).AsVector4(); + } + + value.W = w.W; + return value; + } + + [MethodImpl(MethodImplOptions.AggressiveInlining)] + private static Vector4 PermuteW(Vector4 value) + { + if (Sse.IsSupported) + { + return Sse.Shuffle(value.AsVector128(), value.AsVector128(), 0b11111111).AsVector4(); + } + + return new(value.W); + } } From 78eb2f176473b332fa318ab5ac87d878dba46a43 Mon Sep 17 00:00:00 2001 From: James Jackson-South Date: Mon, 20 Feb 2023 13:28:44 +1000 Subject: [PATCH 21/22] Use WithW --- .../PixelBlenders/PorterDuffFunctions.cs | 20 +++++-------------- 1 file changed, 5 insertions(+), 15 deletions(-) diff --git a/src/ImageSharp/PixelFormats/PixelBlenders/PorterDuffFunctions.cs b/src/ImageSharp/PixelFormats/PixelBlenders/PorterDuffFunctions.cs index cd85939e0..bc7958f85 100644 --- a/src/ImageSharp/PixelFormats/PixelBlenders/PorterDuffFunctions.cs +++ b/src/ImageSharp/PixelFormats/PixelBlenders/PorterDuffFunctions.cs @@ -278,9 +278,7 @@ internal static partial class PorterDuffFunctions // unpremultiply color /= Vector4.Max(alpha, new(Constants.Epsilon)); - color.W = alpha.W; - - return color; + return WithW(color, alpha); } /// @@ -339,9 +337,7 @@ internal static partial class PorterDuffFunctions // unpremultiply color /= Vector4.Max(alpha, new(Constants.Epsilon)); - color.W = alpha.W; - - return color; + return WithW(color, alpha); } /// @@ -385,9 +381,7 @@ internal static partial class PorterDuffFunctions Vector4 color = source * alpha; // premultiply color /= Vector4.Max(alpha, new(Constants.Epsilon)); // unpremultiply - color.W = alpha.W; - - return color; + return WithW(color, alpha); } /// @@ -425,9 +419,7 @@ internal static partial class PorterDuffFunctions Vector4 color = source * alpha; // premultiply color /= Vector4.Max(alpha, new(Constants.Epsilon)); // unpremultiply - color.W = alpha.W; - - return color; + return WithW(color, alpha); } /// @@ -470,9 +462,7 @@ internal static partial class PorterDuffFunctions // unpremultiply color /= Vector4.Max(alpha, new(Constants.Epsilon)); - color.W = alpha.W; - - return color; + return WithW(color, alpha); } /// From ac0d27d9bdda472c373bf312a149dbfc558d1ccc Mon Sep 17 00:00:00 2001 From: James Jackson-South Date: Mon, 20 Feb 2023 14:10:49 +1000 Subject: [PATCH 22/22] Provide Sse fallback for WithW --- .../PixelFormats/PixelBlenders/PorterDuffFunctions.cs | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/src/ImageSharp/PixelFormats/PixelBlenders/PorterDuffFunctions.cs b/src/ImageSharp/PixelFormats/PixelBlenders/PorterDuffFunctions.cs index bc7958f85..baf7d80c0 100644 --- a/src/ImageSharp/PixelFormats/PixelBlenders/PorterDuffFunctions.cs +++ b/src/ImageSharp/PixelFormats/PixelBlenders/PorterDuffFunctions.cs @@ -500,12 +500,19 @@ internal static partial class PorterDuffFunctions [MethodImpl(MethodImplOptions.AggressiveInlining)] private static Vector4 WithW(Vector4 value, Vector4 w) { - // TODO: Provide SSE fallback which uses "shuffle" - just pick XYZ from value and W from w if (Sse41.IsSupported) { return Sse41.Insert(value.AsVector128(), w.AsVector128(), 0b11_11_0000).AsVector4(); } + if (Sse.IsSupported) + { + // Create tmp as + // Then return (which is ) + Vector128 tmp = Sse.Shuffle(w.AsVector128(), value.AsVector128(), 0b00_10_00_11); + return Sse.Shuffle(value.AsVector128(), tmp, 0b00_10_01_00).AsVector4(); + } + value.W = w.W; return value; }