From f047392bf0efe8e9a04122c316b973c922973974 Mon Sep 17 00:00:00 2001 From: Stefan Nikolei Date: Thu, 9 Feb 2023 13:37:01 +0100 Subject: [PATCH 01/35] Remove nullable disable from various files --- src/ImageSharp/Common/Helpers/Guard.cs | 7 ++--- src/ImageSharp/ImageExtensions.cs | 3 +-- src/ImageSharp/ImageFrame{TPixel}.cs | 26 +++++++++---------- src/ImageSharp/IndexedImageFrame{TPixel}.cs | 3 --- .../Extensions/ProcessingExtensions.cs | 9 +++---- .../Linear/AffineTransformProcessor.cs | 2 +- .../Linear/ProjectiveTransformProcessor.cs | 2 +- .../Transforms/Resize/ResizeProcessor.cs | 2 +- 8 files changed, 24 insertions(+), 30 deletions(-) diff --git a/src/ImageSharp/Common/Helpers/Guard.cs b/src/ImageSharp/Common/Helpers/Guard.cs index 95211ccda8..a485bf9b36 100644 --- a/src/ImageSharp/Common/Helpers/Guard.cs +++ b/src/ImageSharp/Common/Helpers/Guard.cs @@ -1,9 +1,9 @@ // Copyright (c) Six Labors. // Licensed under the Six Labors Split License. -#nullable disable using System.Runtime.CompilerServices; using SixLabors.ImageSharp; +using SixLabors.ImageSharp.Processing.Processors.Transforms; namespace SixLabors; @@ -17,13 +17,14 @@ internal static partial class Guard /// The type of the value. /// is not a value type. [MethodImpl(InliningOptions.ShortMethod)] - public static void MustBeValueType(TValue value, string parameterName) + public static void SamplerMustBeValueType(TValue value, [CallerArgumentExpression("value")] string? parameterName = null) + where TValue : IResampler { if (value.GetType().IsValueType) { return; } - ThrowHelper.ThrowArgumentException("Type must be a struct.", parameterName); + ThrowHelper.ThrowArgumentException("Type must be a struct.", parameterName!); } } diff --git a/src/ImageSharp/ImageExtensions.cs b/src/ImageSharp/ImageExtensions.cs index 04930a2689..cf970b3166 100644 --- a/src/ImageSharp/ImageExtensions.cs +++ b/src/ImageSharp/ImageExtensions.cs @@ -1,6 +1,5 @@ // Copyright (c) Six Labors. // Licensed under the Six Labors Split License. -#nullable disable using System.Globalization; using System.Text; @@ -180,6 +179,6 @@ public static partial class ImageExtensions // Always available. stream.TryGetBuffer(out ArraySegment buffer); - return $"data:{format.DefaultMimeType};base64,{Convert.ToBase64String(buffer.Array, 0, (int)stream.Length)}"; + return $"data:{format.DefaultMimeType};base64,{Convert.ToBase64String(buffer.Array ?? Array.Empty(), 0, (int)stream.Length)}"; } } diff --git a/src/ImageSharp/ImageFrame{TPixel}.cs b/src/ImageSharp/ImageFrame{TPixel}.cs index fb4ee6ae5f..becd69a0eb 100644 --- a/src/ImageSharp/ImageFrame{TPixel}.cs +++ b/src/ImageSharp/ImageFrame{TPixel}.cs @@ -1,6 +1,5 @@ // Copyright (c) Six Labors. // Licensed under the Six Labors Split License. -#nullable disable using System.Runtime.CompilerServices; using System.Runtime.InteropServices; @@ -183,7 +182,7 @@ public sealed class ImageFrame : ImageFrame, IPixelSource try { - var accessor = new PixelAccessor(this.PixelBuffer); + PixelAccessor accessor = new(this.PixelBuffer); processPixels(accessor); } finally @@ -211,8 +210,8 @@ public sealed class ImageFrame : ImageFrame, IPixelSource try { - var accessor1 = new PixelAccessor(this.PixelBuffer); - var accessor2 = new PixelAccessor(frame2.PixelBuffer); + PixelAccessor accessor1 = new(this.PixelBuffer); + PixelAccessor accessor2 = new(frame2.PixelBuffer); processPixels(accessor1, accessor2); } finally @@ -247,9 +246,9 @@ public sealed class ImageFrame : ImageFrame, IPixelSource try { - var accessor1 = new PixelAccessor(this.PixelBuffer); - var accessor2 = new PixelAccessor(frame2.PixelBuffer); - var accessor3 = new PixelAccessor(frame3.PixelBuffer); + PixelAccessor accessor1 = new(this.PixelBuffer); + PixelAccessor accessor2 = new(frame2.PixelBuffer); + PixelAccessor accessor3 = new(frame3.PixelBuffer); processPixels(accessor1, accessor2, accessor3); } finally @@ -342,8 +341,7 @@ public sealed class ImageFrame : ImageFrame, IPixelSource if (disposing) { - this.PixelBuffer?.Dispose(); - this.PixelBuffer = null; + this.PixelBuffer.Dispose(); } this.isDisposed = true; @@ -379,14 +377,14 @@ public sealed class ImageFrame : ImageFrame, IPixelSource /// /// The configuration providing initialization code which allows extending the library. /// The - internal ImageFrame Clone(Configuration configuration) => new ImageFrame(configuration, this); + internal ImageFrame Clone(Configuration configuration) => new(configuration, this); /// /// Returns a copy of the image frame in the given pixel format. /// /// The pixel format. /// The - internal ImageFrame CloneAs() + internal ImageFrame? CloneAs() where TPixel2 : unmanaged, IPixel => this.CloneAs(this.GetConfiguration()); /// @@ -400,11 +398,11 @@ public sealed class ImageFrame : ImageFrame, IPixelSource { if (typeof(TPixel2) == typeof(TPixel)) { - return this.Clone(configuration) as ImageFrame; + return (this.Clone(configuration) as ImageFrame)!; } - var target = new ImageFrame(configuration, this.Width, this.Height, this.Metadata.DeepClone()); - var operation = new RowIntervalOperation(this.PixelBuffer, target.PixelBuffer, configuration); + ImageFrame target = new(configuration, this.Width, this.Height, this.Metadata.DeepClone()); + RowIntervalOperation operation = new(this.PixelBuffer, target.PixelBuffer, configuration); ParallelRowIterator.IterateRowIntervals( configuration, diff --git a/src/ImageSharp/IndexedImageFrame{TPixel}.cs b/src/ImageSharp/IndexedImageFrame{TPixel}.cs index 741b3219e0..12a78a0239 100644 --- a/src/ImageSharp/IndexedImageFrame{TPixel}.cs +++ b/src/ImageSharp/IndexedImageFrame{TPixel}.cs @@ -1,6 +1,5 @@ // Copyright (c) Six Labors. // Licensed under the Six Labors Split License. -#nullable disable using System.Buffers; using System.Runtime.CompilerServices; @@ -109,8 +108,6 @@ public sealed class IndexedImageFrame : IPixelSource, IDisposable this.isDisposed = true; this.pixelBuffer.Dispose(); this.paletteOwner.Dispose(); - this.pixelBuffer = null; - this.paletteOwner = null; } } } diff --git a/src/ImageSharp/Processing/Extensions/ProcessingExtensions.cs b/src/ImageSharp/Processing/Extensions/ProcessingExtensions.cs index 575ce293ca..02e688f57f 100644 --- a/src/ImageSharp/Processing/Extensions/ProcessingExtensions.cs +++ b/src/ImageSharp/Processing/Extensions/ProcessingExtensions.cs @@ -1,6 +1,5 @@ // Copyright (c) Six Labors. // Licensed under the Six Labors Split License. -#nullable disable using SixLabors.ImageSharp.Advanced; using SixLabors.ImageSharp.PixelFormats; @@ -135,7 +134,7 @@ public static partial class ProcessingExtensions /// The operation is null. /// The source has been disposed. /// The processing operation failed. - public static Image Clone(this Image source, Action operation) + public static Image? Clone(this Image source, Action operation) => Clone(source, source.GetConfiguration(), operation); /// @@ -150,14 +149,14 @@ public static partial class ProcessingExtensions /// The source has been disposed. /// The processing operation failed. /// The new . - public static Image Clone(this Image source, Configuration configuration, Action operation) + public static Image? Clone(this Image source, Configuration configuration, Action operation) { Guard.NotNull(configuration, nameof(configuration)); Guard.NotNull(source, nameof(source)); Guard.NotNull(operation, nameof(operation)); source.EnsureNotDisposed(); - var visitor = new ProcessingVisitor(configuration, operation, false); + ProcessingVisitor visitor = new(configuration, operation, false); source.AcceptVisitor(visitor); return visitor.ResultImage; } @@ -282,7 +281,7 @@ public static partial class ProcessingExtensions this.mutate = mutate; } - public Image ResultImage { get; private set; } + public Image? ResultImage { get; private set; } public void Visit(Image image) where TPixel : unmanaged, IPixel diff --git a/src/ImageSharp/Processing/Processors/Transforms/Linear/AffineTransformProcessor.cs b/src/ImageSharp/Processing/Processors/Transforms/Linear/AffineTransformProcessor.cs index abb8beccdf..4a13e69174 100644 --- a/src/ImageSharp/Processing/Processors/Transforms/Linear/AffineTransformProcessor.cs +++ b/src/ImageSharp/Processing/Processors/Transforms/Linear/AffineTransformProcessor.cs @@ -19,7 +19,7 @@ public class AffineTransformProcessor : CloningImageProcessor public AffineTransformProcessor(Matrix3x2 matrix, IResampler sampler, Size targetDimensions) { Guard.NotNull(sampler, nameof(sampler)); - Guard.MustBeValueType(sampler, nameof(sampler)); + Guard.SamplerMustBeValueType(sampler); if (TransformUtils.IsDegenerate(matrix)) { diff --git a/src/ImageSharp/Processing/Processors/Transforms/Linear/ProjectiveTransformProcessor.cs b/src/ImageSharp/Processing/Processors/Transforms/Linear/ProjectiveTransformProcessor.cs index 9b0b979e69..55e3227fb7 100644 --- a/src/ImageSharp/Processing/Processors/Transforms/Linear/ProjectiveTransformProcessor.cs +++ b/src/ImageSharp/Processing/Processors/Transforms/Linear/ProjectiveTransformProcessor.cs @@ -19,7 +19,7 @@ public sealed class ProjectiveTransformProcessor : CloningImageProcessor public ProjectiveTransformProcessor(Matrix4x4 matrix, IResampler sampler, Size targetDimensions) { Guard.NotNull(sampler, nameof(sampler)); - Guard.MustBeValueType(sampler, nameof(sampler)); + Guard.SamplerMustBeValueType(sampler); if (TransformUtils.IsDegenerate(matrix)) { diff --git a/src/ImageSharp/Processing/Processors/Transforms/Resize/ResizeProcessor.cs b/src/ImageSharp/Processing/Processors/Transforms/Resize/ResizeProcessor.cs index b7651c03f3..f056e434ed 100644 --- a/src/ImageSharp/Processing/Processors/Transforms/Resize/ResizeProcessor.cs +++ b/src/ImageSharp/Processing/Processors/Transforms/Resize/ResizeProcessor.cs @@ -17,7 +17,7 @@ public class ResizeProcessor : CloningImageProcessor { Guard.NotNull(options, nameof(options)); Guard.NotNull(options.Sampler, nameof(options.Sampler)); - Guard.MustBeValueType(options.Sampler, nameof(options.Sampler)); + Guard.SamplerMustBeValueType(options.Sampler); (Size size, Rectangle rectangle) = ResizeHelper.CalculateTargetLocationAndBounds(sourceSize, options); From 0e40d718953d5ad6d50e4df7f28c07dd0a9e885f Mon Sep 17 00:00:00 2001 From: Stefan Nikolei Date: Fri, 10 Feb 2023 06:42:43 +0100 Subject: [PATCH 02/35] Change Guard removed the generic constrained of IResampler and added notnull --- src/ImageSharp/Common/Helpers/Guard.cs | 5 ++--- .../Processors/Transforms/Linear/AffineTransformProcessor.cs | 2 +- .../Transforms/Linear/ProjectiveTransformProcessor.cs | 2 +- .../Processors/Transforms/Resize/ResizeProcessor.cs | 2 +- 4 files changed, 5 insertions(+), 6 deletions(-) diff --git a/src/ImageSharp/Common/Helpers/Guard.cs b/src/ImageSharp/Common/Helpers/Guard.cs index a485bf9b36..96cd094cd8 100644 --- a/src/ImageSharp/Common/Helpers/Guard.cs +++ b/src/ImageSharp/Common/Helpers/Guard.cs @@ -3,7 +3,6 @@ using System.Runtime.CompilerServices; using SixLabors.ImageSharp; -using SixLabors.ImageSharp.Processing.Processors.Transforms; namespace SixLabors; @@ -17,8 +16,8 @@ internal static partial class Guard /// The type of the value. /// is not a value type. [MethodImpl(InliningOptions.ShortMethod)] - public static void SamplerMustBeValueType(TValue value, [CallerArgumentExpression("value")] string? parameterName = null) - where TValue : IResampler + public static void MustBeValueType(TValue value, [CallerArgumentExpression("value")] string? parameterName = null) + where TValue : notnull { if (value.GetType().IsValueType) { diff --git a/src/ImageSharp/Processing/Processors/Transforms/Linear/AffineTransformProcessor.cs b/src/ImageSharp/Processing/Processors/Transforms/Linear/AffineTransformProcessor.cs index 4a13e69174..30c279e593 100644 --- a/src/ImageSharp/Processing/Processors/Transforms/Linear/AffineTransformProcessor.cs +++ b/src/ImageSharp/Processing/Processors/Transforms/Linear/AffineTransformProcessor.cs @@ -19,7 +19,7 @@ public class AffineTransformProcessor : CloningImageProcessor public AffineTransformProcessor(Matrix3x2 matrix, IResampler sampler, Size targetDimensions) { Guard.NotNull(sampler, nameof(sampler)); - Guard.SamplerMustBeValueType(sampler); + Guard.MustBeValueType(sampler); if (TransformUtils.IsDegenerate(matrix)) { diff --git a/src/ImageSharp/Processing/Processors/Transforms/Linear/ProjectiveTransformProcessor.cs b/src/ImageSharp/Processing/Processors/Transforms/Linear/ProjectiveTransformProcessor.cs index 55e3227fb7..9e9507b737 100644 --- a/src/ImageSharp/Processing/Processors/Transforms/Linear/ProjectiveTransformProcessor.cs +++ b/src/ImageSharp/Processing/Processors/Transforms/Linear/ProjectiveTransformProcessor.cs @@ -19,7 +19,7 @@ public sealed class ProjectiveTransformProcessor : CloningImageProcessor public ProjectiveTransformProcessor(Matrix4x4 matrix, IResampler sampler, Size targetDimensions) { Guard.NotNull(sampler, nameof(sampler)); - Guard.SamplerMustBeValueType(sampler); + Guard.MustBeValueType(sampler); if (TransformUtils.IsDegenerate(matrix)) { diff --git a/src/ImageSharp/Processing/Processors/Transforms/Resize/ResizeProcessor.cs b/src/ImageSharp/Processing/Processors/Transforms/Resize/ResizeProcessor.cs index f056e434ed..719622a28f 100644 --- a/src/ImageSharp/Processing/Processors/Transforms/Resize/ResizeProcessor.cs +++ b/src/ImageSharp/Processing/Processors/Transforms/Resize/ResizeProcessor.cs @@ -17,7 +17,7 @@ public class ResizeProcessor : CloningImageProcessor { Guard.NotNull(options, nameof(options)); Guard.NotNull(options.Sampler, nameof(options.Sampler)); - Guard.SamplerMustBeValueType(options.Sampler); + Guard.MustBeValueType(options.Sampler); (Size size, Rectangle rectangle) = ResizeHelper.CalculateTargetLocationAndBounds(sourceSize, options); From 517ec8028058d80b8673129311715eb1d98da49e Mon Sep 17 00:00:00 2001 From: James Jackson-South Date: Fri, 17 Feb 2023 21:04:57 +1000 Subject: [PATCH 03/35] Port most of the function components. --- src/ImageSharp/Common/Constants.cs | 9 +- .../Common/Helpers/SimdUtils.HwIntrinsics.cs | 20 +- .../Components/FloatingPointDCT.Intrinsic.cs | 2 +- .../PixelBlenders/PorterDuffFunctions.cs | 253 ++++++++++++++++-- 4 files changed, 244 insertions(+), 40 deletions(-) diff --git a/src/ImageSharp/Common/Constants.cs b/src/ImageSharp/Common/Constants.cs index fa2f72c74a..a3cfe3623f 100644 --- a/src/ImageSharp/Common/Constants.cs +++ b/src/ImageSharp/Common/Constants.cs @@ -1,6 +1,8 @@ -// Copyright (c) Six Labors. +// Copyright (c) Six Labors. // Licensed under the Six Labors Split License. +using System.Runtime.Intrinsics; + namespace SixLabors.ImageSharp; /// @@ -13,6 +15,11 @@ internal static class Constants /// public static readonly float Epsilon = 0.001F; + /// + /// The epsilon value for comparing floating point numbers. + /// + public static readonly Vector256 Epsilon256 = Vector256.Create(0.001F); + /// /// The epsilon squared value for comparing floating point numbers. /// diff --git a/src/ImageSharp/Common/Helpers/SimdUtils.HwIntrinsics.cs b/src/ImageSharp/Common/Helpers/SimdUtils.HwIntrinsics.cs index 4bc0040c67..128218aac2 100644 --- a/src/ImageSharp/Common/Helpers/SimdUtils.HwIntrinsics.cs +++ b/src/ImageSharp/Common/Helpers/SimdUtils.HwIntrinsics.cs @@ -532,7 +532,7 @@ internal static partial class SimdUtils } /// - /// Performs a multiplication and an addition of the . + /// Performs a multiplication and an addition of the . /// /// ret = (vm0 * vm1) + va /// The vector to add to the intermediate result. @@ -549,22 +549,20 @@ internal static partial class SimdUtils { return Fma.MultiplyAdd(vm1, vm0, va); } - else - { - return Avx.Add(Avx.Multiply(vm0, vm1), va); - } + + return Avx.Add(Avx.Multiply(vm0, vm1), va); } /// - /// Performs a multiplication and a substraction of the . + /// Performs a multiplication and a subtraction of the . /// /// ret = (vm0 * vm1) - vs - /// The vector to substract from the intermediate result. + /// The vector to subtract from the intermediate result. /// The first vector to multiply. /// The second vector to multiply. /// The . [MethodImpl(InliningOptions.ShortMethod)] - public static Vector256 MultiplySubstract( + public static Vector256 MultiplySubtract( in Vector256 vs, in Vector256 vm0, in Vector256 vm1) @@ -573,10 +571,8 @@ internal static partial class SimdUtils { return Fma.MultiplySubtract(vm1, vm0, vs); } - else - { - return Avx.Subtract(Avx.Multiply(vm0, vm1), vs); - } + + return Avx.Subtract(Avx.Multiply(vm0, vm1), vs); } /// diff --git a/src/ImageSharp/Formats/Jpeg/Components/FloatingPointDCT.Intrinsic.cs b/src/ImageSharp/Formats/Jpeg/Components/FloatingPointDCT.Intrinsic.cs index cae89fc3cf..7e102f696d 100644 --- a/src/ImageSharp/Formats/Jpeg/Components/FloatingPointDCT.Intrinsic.cs +++ b/src/ImageSharp/Formats/Jpeg/Components/FloatingPointDCT.Intrinsic.cs @@ -99,7 +99,7 @@ internal static partial class FloatingPointDCT var mm256_F_1_4142 = Vector256.Create(1.414213562f); Vector256 tmp13 = Avx.Add(tmp1, tmp3); - Vector256 tmp12 = SimdUtils.HwIntrinsics.MultiplySubstract(tmp13, Avx.Subtract(tmp1, tmp3), mm256_F_1_4142); + Vector256 tmp12 = SimdUtils.HwIntrinsics.MultiplySubtract(tmp13, Avx.Subtract(tmp1, tmp3), mm256_F_1_4142); tmp0 = Avx.Add(tmp10, tmp13); tmp3 = Avx.Subtract(tmp10, tmp13); diff --git a/src/ImageSharp/PixelFormats/PixelBlenders/PorterDuffFunctions.cs b/src/ImageSharp/PixelFormats/PixelBlenders/PorterDuffFunctions.cs index 9bc7e35f30..d7d31c0c8b 100644 --- a/src/ImageSharp/PixelFormats/PixelBlenders/PorterDuffFunctions.cs +++ b/src/ImageSharp/PixelFormats/PixelBlenders/PorterDuffFunctions.cs @@ -3,6 +3,8 @@ using System.Numerics; using System.Runtime.CompilerServices; +using System.Runtime.Intrinsics; +using System.Runtime.Intrinsics.X86; namespace SixLabors.ImageSharp.PixelFormats.PixelBlenders; @@ -27,9 +29,17 @@ internal static partial class PorterDuffFunctions /// The . [MethodImpl(MethodImplOptions.AggressiveInlining)] public static Vector4 Normal(Vector4 backdrop, Vector4 source) - { - return source; - } + => source; + + /// + /// Returns the result of the "Normal" compositing equation. + /// + /// The backdrop vector. + /// The source vector. + /// The . + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public static Vector256 Normal(Vector256 backdrop, Vector256 source) + => source; /// /// Returns the result of the "Multiply" compositing equation. @@ -39,9 +49,17 @@ internal static partial class PorterDuffFunctions /// The . [MethodImpl(MethodImplOptions.AggressiveInlining)] public static Vector4 Multiply(Vector4 backdrop, Vector4 source) - { - return backdrop * source; - } + => backdrop * source; + + /// + /// Returns the result of the "Multiply" compositing equation. + /// + /// The backdrop vector. + /// The source vector. + /// The . + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public static Vector256 Multiply(Vector256 backdrop, Vector256 source) + => Avx.Multiply(backdrop, source); /// /// Returns the result of the "Add" compositing equation. @@ -51,9 +69,17 @@ internal static partial class PorterDuffFunctions /// The . [MethodImpl(MethodImplOptions.AggressiveInlining)] public static Vector4 Add(Vector4 backdrop, Vector4 source) - { - return Vector4.Min(Vector4.One, backdrop + source); - } + => Vector4.Min(Vector4.One, backdrop + source); + + /// + /// Returns the result of the "Add" compositing equation. + /// + /// The backdrop vector. + /// The source vector. + /// The . + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public static Vector256 Add(Vector256 backdrop, Vector256 source) + => Avx.Min(Vector256.Create(1F), Avx.Add(backdrop, source)); /// /// Returns the result of the "Subtract" compositing equation. @@ -63,9 +89,17 @@ internal static partial class PorterDuffFunctions /// The . [MethodImpl(MethodImplOptions.AggressiveInlining)] public static Vector4 Subtract(Vector4 backdrop, Vector4 source) - { - return Vector4.Max(Vector4.Zero, backdrop - source); - } + => Vector4.Max(Vector4.Zero, backdrop - source); + + /// + /// Returns the result of the "Subtract" compositing equation. + /// + /// The backdrop vector. + /// The source vector. + /// The . + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public static Vector256 Subtract(Vector256 backdrop, Vector256 source) + => Avx.Min(Vector256.Create(1F), Avx.Subtract(backdrop, source)); /// /// Returns the result of the "Screen" compositing equation. @@ -75,8 +109,19 @@ internal static partial class PorterDuffFunctions /// The . [MethodImpl(MethodImplOptions.AggressiveInlining)] public static Vector4 Screen(Vector4 backdrop, Vector4 source) + => Vector4.One - ((Vector4.One - backdrop) * (Vector4.One - source)); + + /// + /// Returns the result of the "Screen" compositing equation. + /// + /// The backdrop vector. + /// The source vector. + /// The . + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public static Vector256 Screen(Vector256 backdrop, Vector256 source) { - return Vector4.One - ((Vector4.One - backdrop) * (Vector4.One - source)); + Vector256 vOne = Vector256.Create(1F); + return Avx.Subtract(vOne, Avx.Multiply(Avx.Subtract(vOne, backdrop), Avx.Subtract(vOne, source))); } /// @@ -87,9 +132,17 @@ internal static partial class PorterDuffFunctions /// The . [MethodImpl(MethodImplOptions.AggressiveInlining)] public static Vector4 Darken(Vector4 backdrop, Vector4 source) - { - return Vector4.Min(backdrop, source); - } + => Vector4.Min(backdrop, source); + + /// + /// Returns the result of the "Darken" compositing equation. + /// + /// The backdrop vector. + /// The source vector. + /// The . + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public static Vector256 Darken(Vector256 backdrop, Vector256 source) + => Avx.Min(backdrop, source); /// /// Returns the result of the "Lighten" compositing equation. @@ -98,10 +151,17 @@ internal static partial class PorterDuffFunctions /// The source vector. /// The . [MethodImpl(MethodImplOptions.AggressiveInlining)] - public static Vector4 Lighten(Vector4 backdrop, Vector4 source) - { - return Vector4.Max(backdrop, source); - } + public static Vector4 Lighten(Vector4 backdrop, Vector4 source) => Vector4.Max(backdrop, source); + + /// + /// Returns the result of the "Lighten" compositing equation. + /// + /// The backdrop vector. + /// The source vector. + /// The . + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public static Vector256 Lighten(Vector256 backdrop, Vector256 source) + => Avx.Max(backdrop, source); /// /// Returns the result of the "Overlay" compositing equation. @@ -136,16 +196,14 @@ internal static partial class PorterDuffFunctions } /// - /// Helper function for Overlay andHardLight modes + /// Helper function for Overlay and HardLight modes /// /// Backdrop color element /// Source color element /// Overlay value [MethodImpl(MethodImplOptions.AggressiveInlining)] private static float OverlayValueFunction(float backdrop, float source) - { - return backdrop <= 0.5f ? (2 * backdrop * source) : 1 - (2 * (1 - source) * (1 - backdrop)); - } + => backdrop <= 0.5f ? (2 * backdrop * source) : 1 - (2 * (1 - source) * (1 - backdrop)); /// /// Returns the result of the "Over" compositing equation. @@ -175,6 +233,40 @@ internal static partial class PorterDuffFunctions return color; } + /// + /// Returns the result of the "Over" compositing equation. + /// + /// The destination vector. + /// The source vector. + /// The amount to blend. Range 0..1 + /// The . + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public static Vector256 Over(Vector256 destination, Vector256 source, Vector256 blend) + { + const int blendAlphaControl = 0b_10_00_10_00; + const int shuffleAlphaControl = 0b_11_11_11_11; + + // calculate weights + Vector256 sW = Avx.Shuffle(source, source, shuffleAlphaControl); + Vector256 dW = Avx.Shuffle(destination, destination, shuffleAlphaControl); + Vector256 blendW = Avx.Multiply(sW, dW); + + Vector256 dstW = Avx.Subtract(dW, blendW); + Vector256 srcW = Avx.Subtract(sW, blendW); + + // calculate final alpha + Vector256 alpha = Avx.Add(dstW, sW); + + // calculate final color + Vector256 color = Avx.Multiply(destination, dstW); + color = SimdUtils.HwIntrinsics.MultiplyAdd(source, srcW, color); + color = SimdUtils.HwIntrinsics.MultiplyAdd(blend, blendW, color); + + // unpremultiply + color = Avx.Divide(color, Avx.Max(alpha, Constants.Epsilon256)); + return Avx.Blend(color, alpha, blendAlphaControl); + } + /// /// Returns the result of the "Atop" compositing equation. /// @@ -202,6 +294,36 @@ internal static partial class PorterDuffFunctions return color; } + /// + /// Returns the result of the "Atop" compositing equation. + /// + /// The destination vector. + /// The source vector. + /// The amount to blend. Range 0..1 + /// The . + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public static Vector256 Atop(Vector256 destination, Vector256 source, Vector256 blend) + { + // calculate weights + const int blendAlphaControl = 0b_10_00_10_00; + const int shuffleAlphaControl = 0b_11_11_11_11; + + // calculate final alpha + Vector256 alpha = Avx.Shuffle(destination, destination, shuffleAlphaControl); + + // calculate weights + Vector256 sW = Avx.Shuffle(source, source, shuffleAlphaControl); + Vector256 blendW = Avx.Multiply(sW, alpha); + Vector256 dstW = Avx.Subtract(alpha, blendW); + + // calculate final color + Vector256 color = SimdUtils.HwIntrinsics.MultiplyAdd(destination, dstW, Avx.Multiply(blend, blendW)); + + // unpremultiply + color = Avx.Divide(color, Avx.Max(alpha, Constants.Epsilon256)); + return Avx.Blend(color, alpha, blendAlphaControl); + } + /// /// Returns the result of the "In" compositing equation. /// @@ -220,6 +342,31 @@ internal static partial class PorterDuffFunctions return color; } + /// + /// Returns the result of the "In" compositing equation. + /// + /// The destination vector. + /// The source vector. + /// The . + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public static Vector256 In(Vector256 destination, Vector256 source) + { + const int blendAlphaControl = 0b_10_00_10_00; + const int shuffleAlphaControl = 0b_11_11_11_11; + + // calculate alpha + Vector256 sW = Avx.Shuffle(source, source, shuffleAlphaControl); + Vector256 dW = Avx.Shuffle(destination, destination, shuffleAlphaControl); + Vector256 alpha = Avx.Multiply(sW, dW); + + // premultiply + Vector256 color = Avx.Multiply(source, alpha); + + // unpremultiply + color = Avx.Divide(color, Avx.Max(alpha, Constants.Epsilon256)); + return Avx.Blend(color, alpha, blendAlphaControl); + } + /// /// Returns the result of the "Out" compositing equation. /// @@ -238,6 +385,31 @@ internal static partial class PorterDuffFunctions return color; } + /// + /// Returns the result of the "Out" compositing equation. + /// + /// The destination vector. + /// The source vector. + /// The . + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public static Vector256 Out(Vector256 destination, Vector256 source) + { + const int blendAlphaControl = 0b_10_00_10_00; + const int shuffleAlphaControl = 0b_11_11_11_11; + + // calculate alpha + Vector256 sW = Avx.Shuffle(source, source, shuffleAlphaControl); + Vector256 dW = Avx.Shuffle(destination, destination, shuffleAlphaControl); + Vector256 alpha = Avx.Multiply(Avx.Subtract(Vector256.Create(1F), dW), sW); + + // premultiply + Vector256 color = Avx.Multiply(source, alpha); + + // unpremultiply + color = Avx.Divide(color, Avx.Max(alpha, Constants.Epsilon256)); + return Avx.Blend(color, alpha, blendAlphaControl); + } + /// /// Returns the result of the "XOr" compositing equation. /// @@ -260,9 +432,38 @@ internal static partial class PorterDuffFunctions return color; } + /// + /// Returns the result of the "XOr" compositing equation. + /// + /// The destination vector. + /// The source vector. + /// The . [MethodImpl(MethodImplOptions.AggressiveInlining)] - private static Vector4 Clear(Vector4 backdrop, Vector4 source) + public static Vector256 Xor(Vector256 destination, Vector256 source) { - return Vector4.Zero; + const int blendAlphaControl = 0b_10_00_10_00; + const int shuffleAlphaControl = 0b_11_11_11_11; + + // calculate weights + Vector256 sW = Avx.Shuffle(source, source, shuffleAlphaControl); + Vector256 dW = Avx.Shuffle(destination, destination, shuffleAlphaControl); + + Vector256 vOne = Vector256.Create(1F); + Vector256 srcW = Avx.Subtract(vOne, dW); + Vector256 dstW = Avx.Subtract(vOne, sW); + + // calculate alpha + Vector256 alpha = SimdUtils.HwIntrinsics.MultiplyAdd(sW, srcW, Avx.Multiply(dW, dstW)); + Vector256 color = SimdUtils.HwIntrinsics.MultiplyAdd(Avx.Multiply(sW, source), srcW, Avx.Multiply(Avx.Multiply(dW, destination), dstW)); + + // unpremultiply + color = Avx.Divide(color, Avx.Max(alpha, Constants.Epsilon256)); + return Avx.Blend(color, alpha, blendAlphaControl); } + + [MethodImpl(MethodImplOptions.AggressiveInlining)] + private static Vector4 Clear(Vector4 backdrop, Vector4 source) => Vector4.Zero; + + [MethodImpl(MethodImplOptions.AggressiveInlining)] + private static Vector256 Clear(Vector256 backdrop, Vector256 source) => Vector256.Zero; } From 746b34d46f52b13e6bab922899ba8825ab23b5b5 Mon Sep 17 00:00:00 2001 From: James Jackson-South Date: Sun, 19 Feb 2023 13:22:00 +1000 Subject: [PATCH 04/35] Finish porting function components --- .../PixelBlenders/PorterDuffFunctions.cs | 110 +++++++++++------- 1 file changed, 69 insertions(+), 41 deletions(-) diff --git a/src/ImageSharp/PixelFormats/PixelBlenders/PorterDuffFunctions.cs b/src/ImageSharp/PixelFormats/PixelBlenders/PorterDuffFunctions.cs index d7d31c0c8b..551f17f209 100644 --- a/src/ImageSharp/PixelFormats/PixelBlenders/PorterDuffFunctions.cs +++ b/src/ImageSharp/PixelFormats/PixelBlenders/PorterDuffFunctions.cs @@ -21,6 +21,12 @@ namespace SixLabors.ImageSharp.PixelFormats.PixelBlenders; /// internal static partial class PorterDuffFunctions { + private const int BlendAlphaControl = 0b_10_00_10_00; + private const int ShuffleAlphaControl = 0b_11_11_11_11; + private static readonly Vector256 Vector256Half = Vector256.Create(0.5F); + private static readonly Vector256 Vector256One = Vector256.Create(1F); + private static readonly Vector256 Vector256Two = Vector256.Create(2F); + /// /// Returns the result of the "Normal" compositing equation. /// @@ -79,7 +85,7 @@ internal static partial class PorterDuffFunctions /// The . [MethodImpl(MethodImplOptions.AggressiveInlining)] public static Vector256 Add(Vector256 backdrop, Vector256 source) - => Avx.Min(Vector256.Create(1F), Avx.Add(backdrop, source)); + => Avx.Min(Vector256One, Avx.Add(backdrop, source)); /// /// Returns the result of the "Subtract" compositing equation. @@ -99,7 +105,7 @@ internal static partial class PorterDuffFunctions /// The . [MethodImpl(MethodImplOptions.AggressiveInlining)] public static Vector256 Subtract(Vector256 backdrop, Vector256 source) - => Avx.Min(Vector256.Create(1F), Avx.Subtract(backdrop, source)); + => Avx.Min(Vector256One, Avx.Subtract(backdrop, source)); /// /// Returns the result of the "Screen" compositing equation. @@ -119,10 +125,7 @@ internal static partial class PorterDuffFunctions /// The . [MethodImpl(MethodImplOptions.AggressiveInlining)] public static Vector256 Screen(Vector256 backdrop, Vector256 source) - { - Vector256 vOne = Vector256.Create(1F); - return Avx.Subtract(vOne, Avx.Multiply(Avx.Subtract(vOne, backdrop), Avx.Subtract(vOne, source))); - } + => Avx.Subtract(Vector256One, Avx.Multiply(Avx.Subtract(Vector256One, backdrop), Avx.Subtract(Vector256One, source))); /// /// Returns the result of the "Darken" compositing equation. @@ -179,6 +182,19 @@ internal static partial class PorterDuffFunctions return Vector4.Min(Vector4.One, new Vector4(cr, cg, cb, 0)); } + /// + /// Returns the result of the "Overlay" compositing equation. + /// + /// The backdrop vector. + /// The source vector. + /// The . + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public static Vector256 Overlay(Vector256 backdrop, Vector256 source) + { + Vector256 color = OverlayValueFunction(backdrop, source); + return Avx.Min(Vector256One, Avx.Blend(color, Vector256.Zero, BlendAlphaControl)); + } + /// /// Returns the result of the "HardLight" compositing equation. /// @@ -195,6 +211,19 @@ internal static partial class PorterDuffFunctions return Vector4.Min(Vector4.One, new Vector4(cr, cg, cb, 0)); } + /// + /// Returns the result of the "HardLight" compositing equation. + /// + /// The backdrop vector. + /// The source vector. + /// The . + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public static Vector256 HardLight(Vector256 backdrop, Vector256 source) + { + Vector256 color = OverlayValueFunction(source, backdrop); + return Avx.Min(Vector256One, Avx.Blend(color, Vector256.Zero, BlendAlphaControl)); + } + /// /// Helper function for Overlay and HardLight modes /// @@ -205,6 +234,22 @@ internal static partial class PorterDuffFunctions private static float OverlayValueFunction(float backdrop, float source) => backdrop <= 0.5f ? (2 * backdrop * source) : 1 - (2 * (1 - source) * (1 - backdrop)); + /// + /// Helper function for Overlay and HardLight modes + /// + /// Backdrop color element + /// Source color element + /// Overlay value + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public static Vector256 OverlayValueFunction(Vector256 backdrop, Vector256 source) + { + Vector256 left = Avx.Multiply(Avx.Multiply(Vector256Two, backdrop), source); + Vector256 right = Avx.Subtract(Vector256One, Avx.Multiply(Avx.Multiply(Vector256Two, Avx.Subtract(Vector256One, source)), Avx.Subtract(Vector256One, backdrop))); + + Vector256 cmp = Avx.CompareGreaterThan(backdrop, Vector256Half); + return Avx.BlendVariable(left, right, cmp); + } + /// /// Returns the result of the "Over" compositing equation. /// @@ -243,12 +288,9 @@ internal static partial class PorterDuffFunctions [MethodImpl(MethodImplOptions.AggressiveInlining)] public static Vector256 Over(Vector256 destination, Vector256 source, Vector256 blend) { - const int blendAlphaControl = 0b_10_00_10_00; - const int shuffleAlphaControl = 0b_11_11_11_11; - // calculate weights - Vector256 sW = Avx.Shuffle(source, source, shuffleAlphaControl); - Vector256 dW = Avx.Shuffle(destination, destination, shuffleAlphaControl); + Vector256 sW = Avx.Shuffle(source, source, ShuffleAlphaControl); + Vector256 dW = Avx.Shuffle(destination, destination, ShuffleAlphaControl); Vector256 blendW = Avx.Multiply(sW, dW); Vector256 dstW = Avx.Subtract(dW, blendW); @@ -264,7 +306,7 @@ internal static partial class PorterDuffFunctions // unpremultiply color = Avx.Divide(color, Avx.Max(alpha, Constants.Epsilon256)); - return Avx.Blend(color, alpha, blendAlphaControl); + return Avx.Blend(color, alpha, BlendAlphaControl); } /// @@ -304,15 +346,11 @@ internal static partial class PorterDuffFunctions [MethodImpl(MethodImplOptions.AggressiveInlining)] public static Vector256 Atop(Vector256 destination, Vector256 source, Vector256 blend) { - // calculate weights - const int blendAlphaControl = 0b_10_00_10_00; - const int shuffleAlphaControl = 0b_11_11_11_11; - // calculate final alpha - Vector256 alpha = Avx.Shuffle(destination, destination, shuffleAlphaControl); + Vector256 alpha = Avx.Shuffle(destination, destination, ShuffleAlphaControl); // calculate weights - Vector256 sW = Avx.Shuffle(source, source, shuffleAlphaControl); + Vector256 sW = Avx.Shuffle(source, source, ShuffleAlphaControl); Vector256 blendW = Avx.Multiply(sW, alpha); Vector256 dstW = Avx.Subtract(alpha, blendW); @@ -321,7 +359,7 @@ internal static partial class PorterDuffFunctions // unpremultiply color = Avx.Divide(color, Avx.Max(alpha, Constants.Epsilon256)); - return Avx.Blend(color, alpha, blendAlphaControl); + return Avx.Blend(color, alpha, BlendAlphaControl); } /// @@ -351,12 +389,9 @@ internal static partial class PorterDuffFunctions [MethodImpl(MethodImplOptions.AggressiveInlining)] public static Vector256 In(Vector256 destination, Vector256 source) { - const int blendAlphaControl = 0b_10_00_10_00; - const int shuffleAlphaControl = 0b_11_11_11_11; - // calculate alpha - Vector256 sW = Avx.Shuffle(source, source, shuffleAlphaControl); - Vector256 dW = Avx.Shuffle(destination, destination, shuffleAlphaControl); + Vector256 sW = Avx.Shuffle(source, source, ShuffleAlphaControl); + Vector256 dW = Avx.Shuffle(destination, destination, ShuffleAlphaControl); Vector256 alpha = Avx.Multiply(sW, dW); // premultiply @@ -364,7 +399,7 @@ internal static partial class PorterDuffFunctions // unpremultiply color = Avx.Divide(color, Avx.Max(alpha, Constants.Epsilon256)); - return Avx.Blend(color, alpha, blendAlphaControl); + return Avx.Blend(color, alpha, BlendAlphaControl); } /// @@ -394,20 +429,17 @@ internal static partial class PorterDuffFunctions [MethodImpl(MethodImplOptions.AggressiveInlining)] public static Vector256 Out(Vector256 destination, Vector256 source) { - const int blendAlphaControl = 0b_10_00_10_00; - const int shuffleAlphaControl = 0b_11_11_11_11; - // calculate alpha - Vector256 sW = Avx.Shuffle(source, source, shuffleAlphaControl); - Vector256 dW = Avx.Shuffle(destination, destination, shuffleAlphaControl); - Vector256 alpha = Avx.Multiply(Avx.Subtract(Vector256.Create(1F), dW), sW); + Vector256 sW = Avx.Shuffle(source, source, ShuffleAlphaControl); + Vector256 dW = Avx.Shuffle(destination, destination, ShuffleAlphaControl); + Vector256 alpha = Avx.Multiply(Avx.Subtract(Vector256One, dW), sW); // premultiply Vector256 color = Avx.Multiply(source, alpha); // unpremultiply color = Avx.Divide(color, Avx.Max(alpha, Constants.Epsilon256)); - return Avx.Blend(color, alpha, blendAlphaControl); + return Avx.Blend(color, alpha, BlendAlphaControl); } /// @@ -441,16 +473,12 @@ internal static partial class PorterDuffFunctions [MethodImpl(MethodImplOptions.AggressiveInlining)] public static Vector256 Xor(Vector256 destination, Vector256 source) { - const int blendAlphaControl = 0b_10_00_10_00; - const int shuffleAlphaControl = 0b_11_11_11_11; - // calculate weights - Vector256 sW = Avx.Shuffle(source, source, shuffleAlphaControl); - Vector256 dW = Avx.Shuffle(destination, destination, shuffleAlphaControl); + Vector256 sW = Avx.Shuffle(source, source, ShuffleAlphaControl); + Vector256 dW = Avx.Shuffle(destination, destination, ShuffleAlphaControl); - Vector256 vOne = Vector256.Create(1F); - Vector256 srcW = Avx.Subtract(vOne, dW); - Vector256 dstW = Avx.Subtract(vOne, sW); + Vector256 srcW = Avx.Subtract(Vector256One, dW); + Vector256 dstW = Avx.Subtract(Vector256One, sW); // calculate alpha Vector256 alpha = SimdUtils.HwIntrinsics.MultiplyAdd(sW, srcW, Avx.Multiply(dW, dstW)); @@ -458,7 +486,7 @@ internal static partial class PorterDuffFunctions // unpremultiply color = Avx.Divide(color, Avx.Max(alpha, Constants.Epsilon256)); - return Avx.Blend(color, alpha, blendAlphaControl); + return Avx.Blend(color, alpha, BlendAlphaControl); } [MethodImpl(MethodImplOptions.AggressiveInlining)] From 4c546d7de8c917e4dca0b1bcc8b62bb8ee92b007 Mon Sep 17 00:00:00 2001 From: James Jackson-South Date: Sun, 19 Feb 2023 14:12:30 +1000 Subject: [PATCH 05/35] Update the PorterDuffFunctions.Generated.tt to include the Vector256 variants. --- .../PorterDuffFunctions.Generated.cs | 1406 ++++++++++++++++- .../PorterDuffFunctions.Generated.tt | 152 ++ 2 files changed, 1531 insertions(+), 27 deletions(-) diff --git a/src/ImageSharp/PixelFormats/PixelBlenders/PorterDuffFunctions.Generated.cs b/src/ImageSharp/PixelFormats/PixelBlenders/PorterDuffFunctions.Generated.cs index ff41e70b20..2b365f1779 100644 --- a/src/ImageSharp/PixelFormats/PixelBlenders/PorterDuffFunctions.Generated.cs +++ b/src/ImageSharp/PixelFormats/PixelBlenders/PorterDuffFunctions.Generated.cs @@ -5,6 +5,8 @@ using System.Numerics; using System.Runtime.CompilerServices; +using System.Runtime.Intrinsics; +using System.Runtime.Intrinsics.X86; namespace SixLabors.ImageSharp.PixelFormats.PixelBlenders; @@ -26,6 +28,17 @@ internal static partial class PorterDuffFunctions return source; } + /// + /// Returns the result of the "NormalSrc compositing equation. + /// + /// The backdrop vector. + /// The source vector. + /// The source opacity. Range 0..1 + /// The . + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public static Vector256 NormalSrc(Vector256 backdrop, Vector256 source, Vector256 opacity) + => Avx.Blend(source, Avx.Multiply(source, opacity), BlendAlphaControl); + /// /// Returns the result of the "NormalSrcAtop" compositing equation. /// @@ -41,6 +54,21 @@ internal static partial class PorterDuffFunctions return Atop(backdrop, source, Normal(backdrop, source)); } + /// + /// Returns the result of the "NormalSrcAtop" compositing equation. + /// + /// The backdrop vector. + /// The source vector. + /// The source opacity. Range 0..1 + /// The . + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public static Vector256 NormalSrcAtop(Vector256 backdrop, Vector256 source, Vector256 opacity) + { + source = Avx.Blend(source, Avx.Multiply(source, opacity), BlendAlphaControl); + + return Atop(backdrop, source, Normal(backdrop, source)); + } + /// /// Returns the result of the "NormalSrcOver" compositing equation. /// @@ -56,6 +84,21 @@ internal static partial class PorterDuffFunctions return Over(backdrop, source, Normal(backdrop, source)); } + /// + /// Returns the result of the "NormalSrcOver" compositing equation. + /// + /// The backdrop vector. + /// The source vector. + /// The source opacity. Range 0..1 + /// The . + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public static Vector256 NormalSrcOver(Vector256 backdrop, Vector256 source, Vector256 opacity) + { + source = Avx.Blend(source, Avx.Multiply(source, opacity), BlendAlphaControl); + + return Over(backdrop, source, Normal(backdrop, source)); + } + /// /// Returns the result of the "NormalSrcIn" compositing equation. /// @@ -71,6 +114,17 @@ internal static partial class PorterDuffFunctions return In(backdrop, source); } + /// + /// Returns the result of the "NormalSrcIn" compositing equation. + /// + /// The backdrop vector. + /// The source vector. + /// The source opacity. Range 0..1 + /// The . + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public static Vector256 NormalSrcIn(Vector256 backdrop, Vector256 source, Vector256 opacity) + => In(backdrop, Avx.Blend(source, Avx.Multiply(source, opacity), BlendAlphaControl)); + /// /// Returns the result of the "NormalSrcOut" compositing equation. /// @@ -86,6 +140,17 @@ internal static partial class PorterDuffFunctions return Out(backdrop, source); } + /// + /// Returns the result of the "NormalSrcOut" compositing equation. + /// + /// The backdrop vector. + /// The source vector. + /// The source opacity. Range 0..1 + /// The . + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public static Vector256 NormalSrcOut(Vector256 backdrop, Vector256 source, Vector256 opacity) + => Out(backdrop, Avx.Blend(source, Avx.Multiply(source, opacity), BlendAlphaControl)); + /// /// Returns the result of the "NormalDest" compositing equation. /// @@ -99,6 +164,19 @@ internal static partial class PorterDuffFunctions return backdrop; } + /// + /// Returns the result of the "NormalDest" compositing equation. + /// + /// The backdrop vector. + /// The source vector. + /// The source opacity. Range 0..1 + /// The . + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public static Vector256 NormalDest(Vector256 backdrop, Vector256 source, Vector256 opacity) + { + return backdrop; + } + /// /// Returns the result of the "NormalDestAtop" compositing equation. /// @@ -114,6 +192,21 @@ internal static partial class PorterDuffFunctions return Atop(source, backdrop, Normal(source, backdrop)); } + /// + /// Returns the result of the "NormalDestAtop" compositing equation. + /// + /// The backdrop vector. + /// The source vector. + /// The source opacity. Range 0..1 + /// The . + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public static Vector256 NormalDestAtop(Vector256 backdrop, Vector256 source, Vector256 opacity) + { + source = Avx.Blend(source, Avx.Multiply(source, opacity), BlendAlphaControl); + + return Atop(source, backdrop, Normal(source, backdrop)); + } + /// /// Returns the result of the "NormalDestOver" compositing equation. /// @@ -129,6 +222,21 @@ internal static partial class PorterDuffFunctions return Over(source, backdrop, Normal(source, backdrop)); } + /// + /// Returns the result of the "NormalDestOver" compositing equation. + /// + /// The backdrop vector. + /// The source vector. + /// The source opacity. Range 0..1 + /// The . + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public static Vector256 NormalDestOver(Vector256 backdrop, Vector256 source, Vector256 opacity) + { + source = Avx.Blend(source, Avx.Multiply(source, opacity), BlendAlphaControl); + + return Over(source, backdrop, Normal(source, backdrop)); + } + /// /// Returns the result of the "NormalDestIn" compositing equation. /// @@ -144,6 +252,17 @@ internal static partial class PorterDuffFunctions return In(source, backdrop); } + /// + /// Returns the result of the "NormalDestIn" compositing equation. + /// + /// The backdrop vector. + /// The source vector. + /// The source opacity. Range 0..1 + /// The . + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public static Vector256 NormalDestIn(Vector256 backdrop, Vector256 source, Vector256 opacity) + => In(Avx.Blend(source, Avx.Multiply(source, opacity), BlendAlphaControl), backdrop); + /// /// Returns the result of the "NormalDestOut" compositing equation. /// @@ -159,6 +278,17 @@ internal static partial class PorterDuffFunctions return Out(source, backdrop); } + /// + /// Returns the result of the "NormalDestOut" compositing equation. + /// + /// The backdrop vector. + /// The source vector. + /// The source opacity. Range 0..1 + /// The . + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public static Vector256 NormalDestOut(Vector256 backdrop, Vector256 source, Vector256 opacity) + => Out(Avx.Blend(source, Avx.Multiply(source, opacity), BlendAlphaControl), backdrop); + /// /// Returns the result of the "NormalXor" compositing equation. /// @@ -174,6 +304,17 @@ internal static partial class PorterDuffFunctions return Xor(backdrop, source); } + /// + /// Returns the result of the "NormalXor" compositing equation. + /// + /// The backdrop vector. + /// The source vector. + /// The source opacity. Range 0..1 + /// The . + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public static Vector256 NormalXor(Vector256 backdrop, Vector256 source, Vector256 opacity) + => Xor(backdrop, Avx.Blend(source, Avx.Multiply(source, opacity), BlendAlphaControl)); + /// /// Returns the result of the "NormalClear" compositing equation. /// @@ -189,6 +330,17 @@ internal static partial class PorterDuffFunctions return Clear(backdrop, source); } + /// + /// Returns the result of the "NormalClear" compositing equation. + /// + /// The backdrop vector. + /// The source vector. + /// The source opacity. Range 0..1 + /// The . + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public static Vector256 NormalXor(Vector256 backdrop, Vector256 source, Vector256 opacity) + => Clear(backdrop, Avx.Blend(source, Avx.Multiply(source, opacity), BlendAlphaControl)); + /// /// Returns the result of the "NormalSrc" compositing equation. @@ -421,6 +573,17 @@ internal static partial class PorterDuffFunctions return source; } + /// + /// Returns the result of the "MultiplySrc compositing equation. + /// + /// The backdrop vector. + /// The source vector. + /// The source opacity. Range 0..1 + /// The . + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public static Vector256 MultiplySrc(Vector256 backdrop, Vector256 source, Vector256 opacity) + => Avx.Blend(source, Avx.Multiply(source, opacity), BlendAlphaControl); + /// /// Returns the result of the "MultiplySrcAtop" compositing equation. /// @@ -436,6 +599,21 @@ internal static partial class PorterDuffFunctions return Atop(backdrop, source, Multiply(backdrop, source)); } + /// + /// Returns the result of the "MultiplySrcAtop" compositing equation. + /// + /// The backdrop vector. + /// The source vector. + /// The source opacity. Range 0..1 + /// The . + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public static Vector256 MultiplySrcAtop(Vector256 backdrop, Vector256 source, Vector256 opacity) + { + source = Avx.Blend(source, Avx.Multiply(source, opacity), BlendAlphaControl); + + return Atop(backdrop, source, Multiply(backdrop, source)); + } + /// /// Returns the result of the "MultiplySrcOver" compositing equation. /// @@ -451,6 +629,21 @@ internal static partial class PorterDuffFunctions return Over(backdrop, source, Multiply(backdrop, source)); } + /// + /// Returns the result of the "MultiplySrcOver" compositing equation. + /// + /// The backdrop vector. + /// The source vector. + /// The source opacity. Range 0..1 + /// The . + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public static Vector256 MultiplySrcOver(Vector256 backdrop, Vector256 source, Vector256 opacity) + { + source = Avx.Blend(source, Avx.Multiply(source, opacity), BlendAlphaControl); + + return Over(backdrop, source, Multiply(backdrop, source)); + } + /// /// Returns the result of the "MultiplySrcIn" compositing equation. /// @@ -466,6 +659,17 @@ internal static partial class PorterDuffFunctions return In(backdrop, source); } + /// + /// Returns the result of the "MultiplySrcIn" compositing equation. + /// + /// The backdrop vector. + /// The source vector. + /// The source opacity. Range 0..1 + /// The . + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public static Vector256 MultiplySrcIn(Vector256 backdrop, Vector256 source, Vector256 opacity) + => In(backdrop, Avx.Blend(source, Avx.Multiply(source, opacity), BlendAlphaControl)); + /// /// Returns the result of the "MultiplySrcOut" compositing equation. /// @@ -481,6 +685,17 @@ internal static partial class PorterDuffFunctions return Out(backdrop, source); } + /// + /// Returns the result of the "MultiplySrcOut" compositing equation. + /// + /// The backdrop vector. + /// The source vector. + /// The source opacity. Range 0..1 + /// The . + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public static Vector256 MultiplySrcOut(Vector256 backdrop, Vector256 source, Vector256 opacity) + => Out(backdrop, Avx.Blend(source, Avx.Multiply(source, opacity), BlendAlphaControl)); + /// /// Returns the result of the "MultiplyDest" compositing equation. /// @@ -494,6 +709,19 @@ internal static partial class PorterDuffFunctions return backdrop; } + /// + /// Returns the result of the "MultiplyDest" compositing equation. + /// + /// The backdrop vector. + /// The source vector. + /// The source opacity. Range 0..1 + /// The . + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public static Vector256 MultiplyDest(Vector256 backdrop, Vector256 source, Vector256 opacity) + { + return backdrop; + } + /// /// Returns the result of the "MultiplyDestAtop" compositing equation. /// @@ -509,6 +737,21 @@ internal static partial class PorterDuffFunctions return Atop(source, backdrop, Multiply(source, backdrop)); } + /// + /// Returns the result of the "MultiplyDestAtop" compositing equation. + /// + /// The backdrop vector. + /// The source vector. + /// The source opacity. Range 0..1 + /// The . + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public static Vector256 MultiplyDestAtop(Vector256 backdrop, Vector256 source, Vector256 opacity) + { + source = Avx.Blend(source, Avx.Multiply(source, opacity), BlendAlphaControl); + + return Atop(source, backdrop, Multiply(source, backdrop)); + } + /// /// Returns the result of the "MultiplyDestOver" compositing equation. /// @@ -524,6 +767,21 @@ internal static partial class PorterDuffFunctions return Over(source, backdrop, Multiply(source, backdrop)); } + /// + /// Returns the result of the "MultiplyDestOver" compositing equation. + /// + /// The backdrop vector. + /// The source vector. + /// The source opacity. Range 0..1 + /// The . + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public static Vector256 MultiplyDestOver(Vector256 backdrop, Vector256 source, Vector256 opacity) + { + source = Avx.Blend(source, Avx.Multiply(source, opacity), BlendAlphaControl); + + return Over(source, backdrop, Multiply(source, backdrop)); + } + /// /// Returns the result of the "MultiplyDestIn" compositing equation. /// @@ -539,6 +797,17 @@ internal static partial class PorterDuffFunctions return In(source, backdrop); } + /// + /// Returns the result of the "MultiplyDestIn" compositing equation. + /// + /// The backdrop vector. + /// The source vector. + /// The source opacity. Range 0..1 + /// The . + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public static Vector256 MultiplyDestIn(Vector256 backdrop, Vector256 source, Vector256 opacity) + => In(Avx.Blend(source, Avx.Multiply(source, opacity), BlendAlphaControl), backdrop); + /// /// Returns the result of the "MultiplyDestOut" compositing equation. /// @@ -554,6 +823,17 @@ internal static partial class PorterDuffFunctions return Out(source, backdrop); } + /// + /// Returns the result of the "MultiplyDestOut" compositing equation. + /// + /// The backdrop vector. + /// The source vector. + /// The source opacity. Range 0..1 + /// The . + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public static Vector256 MultiplyDestOut(Vector256 backdrop, Vector256 source, Vector256 opacity) + => Out(Avx.Blend(source, Avx.Multiply(source, opacity), BlendAlphaControl), backdrop); + /// /// Returns the result of the "MultiplyXor" compositing equation. /// @@ -569,6 +849,17 @@ internal static partial class PorterDuffFunctions return Xor(backdrop, source); } + /// + /// Returns the result of the "MultiplyXor" compositing equation. + /// + /// The backdrop vector. + /// The source vector. + /// The source opacity. Range 0..1 + /// The . + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public static Vector256 MultiplyXor(Vector256 backdrop, Vector256 source, Vector256 opacity) + => Xor(backdrop, Avx.Blend(source, Avx.Multiply(source, opacity), BlendAlphaControl)); + /// /// Returns the result of the "MultiplyClear" compositing equation. /// @@ -584,6 +875,17 @@ internal static partial class PorterDuffFunctions return Clear(backdrop, source); } + /// + /// Returns the result of the "MultiplyClear" compositing equation. + /// + /// The backdrop vector. + /// The source vector. + /// The source opacity. Range 0..1 + /// The . + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public static Vector256 MultiplyXor(Vector256 backdrop, Vector256 source, Vector256 opacity) + => Clear(backdrop, Avx.Blend(source, Avx.Multiply(source, opacity), BlendAlphaControl)); + /// /// Returns the result of the "MultiplySrc" compositing equation. @@ -816,6 +1118,17 @@ internal static partial class PorterDuffFunctions return source; } + /// + /// Returns the result of the "AddSrc compositing equation. + /// + /// The backdrop vector. + /// The source vector. + /// The source opacity. Range 0..1 + /// The . + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public static Vector256 AddSrc(Vector256 backdrop, Vector256 source, Vector256 opacity) + => Avx.Blend(source, Avx.Multiply(source, opacity), BlendAlphaControl); + /// /// Returns the result of the "AddSrcAtop" compositing equation. /// @@ -832,52 +1145,104 @@ internal static partial class PorterDuffFunctions } /// - /// Returns the result of the "AddSrcOver" compositing equation. + /// Returns the result of the "AddSrcAtop" compositing equation. /// /// The backdrop vector. /// The source vector. /// The source opacity. Range 0..1 - /// The . + /// The . [MethodImpl(MethodImplOptions.AggressiveInlining)] - public static Vector4 AddSrcOver(Vector4 backdrop, Vector4 source, float opacity) + public static Vector256 AddSrcAtop(Vector256 backdrop, Vector256 source, Vector256 opacity) { - source.W *= opacity; + source = Avx.Blend(source, Avx.Multiply(source, opacity), BlendAlphaControl); - return Over(backdrop, source, Add(backdrop, source)); + return Atop(backdrop, source, Add(backdrop, source)); } /// - /// Returns the result of the "AddSrcIn" compositing equation. + /// Returns the result of the "AddSrcOver" compositing equation. /// /// The backdrop vector. /// The source vector. /// The source opacity. Range 0..1 /// The . [MethodImpl(MethodImplOptions.AggressiveInlining)] - public static Vector4 AddSrcIn(Vector4 backdrop, Vector4 source, float opacity) + public static Vector4 AddSrcOver(Vector4 backdrop, Vector4 source, float opacity) { source.W *= opacity; - return In(backdrop, source); + return Over(backdrop, source, Add(backdrop, source)); } /// - /// Returns the result of the "AddSrcOut" compositing equation. + /// Returns the result of the "AddSrcOver" compositing equation. /// /// The backdrop vector. /// The source vector. /// The source opacity. Range 0..1 - /// The . + /// The . [MethodImpl(MethodImplOptions.AggressiveInlining)] - public static Vector4 AddSrcOut(Vector4 backdrop, Vector4 source, float opacity) + public static Vector256 AddSrcOver(Vector256 backdrop, Vector256 source, Vector256 opacity) { - source.W *= opacity; + source = Avx.Blend(source, Avx.Multiply(source, opacity), BlendAlphaControl); - return Out(backdrop, source); + return Over(backdrop, source, Add(backdrop, source)); } /// - /// Returns the result of the "AddDest" compositing equation. + /// Returns the result of the "AddSrcIn" compositing equation. + /// + /// The backdrop vector. + /// The source vector. + /// The source opacity. Range 0..1 + /// The . + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public static Vector4 AddSrcIn(Vector4 backdrop, Vector4 source, float opacity) + { + source.W *= opacity; + + return In(backdrop, source); + } + + /// + /// Returns the result of the "AddSrcIn" compositing equation. + /// + /// The backdrop vector. + /// The source vector. + /// The source opacity. Range 0..1 + /// The . + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public static Vector256 AddSrcIn(Vector256 backdrop, Vector256 source, Vector256 opacity) + => In(backdrop, Avx.Blend(source, Avx.Multiply(source, opacity), BlendAlphaControl)); + + /// + /// Returns the result of the "AddSrcOut" compositing equation. + /// + /// The backdrop vector. + /// The source vector. + /// The source opacity. Range 0..1 + /// The . + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public static Vector4 AddSrcOut(Vector4 backdrop, Vector4 source, float opacity) + { + source.W *= opacity; + + return Out(backdrop, source); + } + + /// + /// Returns the result of the "AddSrcOut" compositing equation. + /// + /// The backdrop vector. + /// The source vector. + /// The source opacity. Range 0..1 + /// The . + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public static Vector256 AddSrcOut(Vector256 backdrop, Vector256 source, Vector256 opacity) + => Out(backdrop, Avx.Blend(source, Avx.Multiply(source, opacity), BlendAlphaControl)); + + /// + /// Returns the result of the "AddDest" compositing equation. /// /// The backdrop vector. /// The source vector. @@ -889,6 +1254,19 @@ internal static partial class PorterDuffFunctions return backdrop; } + /// + /// Returns the result of the "AddDest" compositing equation. + /// + /// The backdrop vector. + /// The source vector. + /// The source opacity. Range 0..1 + /// The . + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public static Vector256 AddDest(Vector256 backdrop, Vector256 source, Vector256 opacity) + { + return backdrop; + } + /// /// Returns the result of the "AddDestAtop" compositing equation. /// @@ -904,6 +1282,21 @@ internal static partial class PorterDuffFunctions return Atop(source, backdrop, Add(source, backdrop)); } + /// + /// Returns the result of the "AddDestAtop" compositing equation. + /// + /// The backdrop vector. + /// The source vector. + /// The source opacity. Range 0..1 + /// The . + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public static Vector256 AddDestAtop(Vector256 backdrop, Vector256 source, Vector256 opacity) + { + source = Avx.Blend(source, Avx.Multiply(source, opacity), BlendAlphaControl); + + return Atop(source, backdrop, Add(source, backdrop)); + } + /// /// Returns the result of the "AddDestOver" compositing equation. /// @@ -919,6 +1312,21 @@ internal static partial class PorterDuffFunctions return Over(source, backdrop, Add(source, backdrop)); } + /// + /// Returns the result of the "AddDestOver" compositing equation. + /// + /// The backdrop vector. + /// The source vector. + /// The source opacity. Range 0..1 + /// The . + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public static Vector256 AddDestOver(Vector256 backdrop, Vector256 source, Vector256 opacity) + { + source = Avx.Blend(source, Avx.Multiply(source, opacity), BlendAlphaControl); + + return Over(source, backdrop, Add(source, backdrop)); + } + /// /// Returns the result of the "AddDestIn" compositing equation. /// @@ -934,6 +1342,17 @@ internal static partial class PorterDuffFunctions return In(source, backdrop); } + /// + /// Returns the result of the "AddDestIn" compositing equation. + /// + /// The backdrop vector. + /// The source vector. + /// The source opacity. Range 0..1 + /// The . + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public static Vector256 AddDestIn(Vector256 backdrop, Vector256 source, Vector256 opacity) + => In(Avx.Blend(source, Avx.Multiply(source, opacity), BlendAlphaControl), backdrop); + /// /// Returns the result of the "AddDestOut" compositing equation. /// @@ -949,6 +1368,17 @@ internal static partial class PorterDuffFunctions return Out(source, backdrop); } + /// + /// Returns the result of the "AddDestOut" compositing equation. + /// + /// The backdrop vector. + /// The source vector. + /// The source opacity. Range 0..1 + /// The . + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public static Vector256 AddDestOut(Vector256 backdrop, Vector256 source, Vector256 opacity) + => Out(Avx.Blend(source, Avx.Multiply(source, opacity), BlendAlphaControl), backdrop); + /// /// Returns the result of the "AddXor" compositing equation. /// @@ -964,6 +1394,17 @@ internal static partial class PorterDuffFunctions return Xor(backdrop, source); } + /// + /// Returns the result of the "AddXor" compositing equation. + /// + /// The backdrop vector. + /// The source vector. + /// The source opacity. Range 0..1 + /// The . + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public static Vector256 AddXor(Vector256 backdrop, Vector256 source, Vector256 opacity) + => Xor(backdrop, Avx.Blend(source, Avx.Multiply(source, opacity), BlendAlphaControl)); + /// /// Returns the result of the "AddClear" compositing equation. /// @@ -979,6 +1420,17 @@ internal static partial class PorterDuffFunctions return Clear(backdrop, source); } + /// + /// Returns the result of the "AddClear" compositing equation. + /// + /// The backdrop vector. + /// The source vector. + /// The source opacity. Range 0..1 + /// The . + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public static Vector256 AddXor(Vector256 backdrop, Vector256 source, Vector256 opacity) + => Clear(backdrop, Avx.Blend(source, Avx.Multiply(source, opacity), BlendAlphaControl)); + /// /// Returns the result of the "AddSrc" compositing equation. @@ -1211,6 +1663,17 @@ internal static partial class PorterDuffFunctions return source; } + /// + /// Returns the result of the "SubtractSrc compositing equation. + /// + /// The backdrop vector. + /// The source vector. + /// The source opacity. Range 0..1 + /// The . + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public static Vector256 SubtractSrc(Vector256 backdrop, Vector256 source, Vector256 opacity) + => Avx.Blend(source, Avx.Multiply(source, opacity), BlendAlphaControl); + /// /// Returns the result of the "SubtractSrcAtop" compositing equation. /// @@ -1226,6 +1689,21 @@ internal static partial class PorterDuffFunctions return Atop(backdrop, source, Subtract(backdrop, source)); } + /// + /// Returns the result of the "SubtractSrcAtop" compositing equation. + /// + /// The backdrop vector. + /// The source vector. + /// The source opacity. Range 0..1 + /// The . + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public static Vector256 SubtractSrcAtop(Vector256 backdrop, Vector256 source, Vector256 opacity) + { + source = Avx.Blend(source, Avx.Multiply(source, opacity), BlendAlphaControl); + + return Atop(backdrop, source, Subtract(backdrop, source)); + } + /// /// Returns the result of the "SubtractSrcOver" compositing equation. /// @@ -1241,6 +1719,21 @@ internal static partial class PorterDuffFunctions return Over(backdrop, source, Subtract(backdrop, source)); } + /// + /// Returns the result of the "SubtractSrcOver" compositing equation. + /// + /// The backdrop vector. + /// The source vector. + /// The source opacity. Range 0..1 + /// The . + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public static Vector256 SubtractSrcOver(Vector256 backdrop, Vector256 source, Vector256 opacity) + { + source = Avx.Blend(source, Avx.Multiply(source, opacity), BlendAlphaControl); + + return Over(backdrop, source, Subtract(backdrop, source)); + } + /// /// Returns the result of the "SubtractSrcIn" compositing equation. /// @@ -1256,6 +1749,17 @@ internal static partial class PorterDuffFunctions return In(backdrop, source); } + /// + /// Returns the result of the "SubtractSrcIn" compositing equation. + /// + /// The backdrop vector. + /// The source vector. + /// The source opacity. Range 0..1 + /// The . + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public static Vector256 SubtractSrcIn(Vector256 backdrop, Vector256 source, Vector256 opacity) + => In(backdrop, Avx.Blend(source, Avx.Multiply(source, opacity), BlendAlphaControl)); + /// /// Returns the result of the "SubtractSrcOut" compositing equation. /// @@ -1271,6 +1775,17 @@ internal static partial class PorterDuffFunctions return Out(backdrop, source); } + /// + /// Returns the result of the "SubtractSrcOut" compositing equation. + /// + /// The backdrop vector. + /// The source vector. + /// The source opacity. Range 0..1 + /// The . + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public static Vector256 SubtractSrcOut(Vector256 backdrop, Vector256 source, Vector256 opacity) + => Out(backdrop, Avx.Blend(source, Avx.Multiply(source, opacity), BlendAlphaControl)); + /// /// Returns the result of the "SubtractDest" compositing equation. /// @@ -1284,6 +1799,19 @@ internal static partial class PorterDuffFunctions return backdrop; } + /// + /// Returns the result of the "SubtractDest" compositing equation. + /// + /// The backdrop vector. + /// The source vector. + /// The source opacity. Range 0..1 + /// The . + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public static Vector256 SubtractDest(Vector256 backdrop, Vector256 source, Vector256 opacity) + { + return backdrop; + } + /// /// Returns the result of the "SubtractDestAtop" compositing equation. /// @@ -1299,6 +1827,21 @@ internal static partial class PorterDuffFunctions return Atop(source, backdrop, Subtract(source, backdrop)); } + /// + /// Returns the result of the "SubtractDestAtop" compositing equation. + /// + /// The backdrop vector. + /// The source vector. + /// The source opacity. Range 0..1 + /// The . + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public static Vector256 SubtractDestAtop(Vector256 backdrop, Vector256 source, Vector256 opacity) + { + source = Avx.Blend(source, Avx.Multiply(source, opacity), BlendAlphaControl); + + return Atop(source, backdrop, Subtract(source, backdrop)); + } + /// /// Returns the result of the "SubtractDestOver" compositing equation. /// @@ -1314,6 +1857,21 @@ internal static partial class PorterDuffFunctions return Over(source, backdrop, Subtract(source, backdrop)); } + /// + /// Returns the result of the "SubtractDestOver" compositing equation. + /// + /// The backdrop vector. + /// The source vector. + /// The source opacity. Range 0..1 + /// The . + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public static Vector256 SubtractDestOver(Vector256 backdrop, Vector256 source, Vector256 opacity) + { + source = Avx.Blend(source, Avx.Multiply(source, opacity), BlendAlphaControl); + + return Over(source, backdrop, Subtract(source, backdrop)); + } + /// /// Returns the result of the "SubtractDestIn" compositing equation. /// @@ -1329,6 +1887,17 @@ internal static partial class PorterDuffFunctions return In(source, backdrop); } + /// + /// Returns the result of the "SubtractDestIn" compositing equation. + /// + /// The backdrop vector. + /// The source vector. + /// The source opacity. Range 0..1 + /// The . + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public static Vector256 SubtractDestIn(Vector256 backdrop, Vector256 source, Vector256 opacity) + => In(Avx.Blend(source, Avx.Multiply(source, opacity), BlendAlphaControl), backdrop); + /// /// Returns the result of the "SubtractDestOut" compositing equation. /// @@ -1344,6 +1913,17 @@ internal static partial class PorterDuffFunctions return Out(source, backdrop); } + /// + /// Returns the result of the "SubtractDestOut" compositing equation. + /// + /// The backdrop vector. + /// The source vector. + /// The source opacity. Range 0..1 + /// The . + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public static Vector256 SubtractDestOut(Vector256 backdrop, Vector256 source, Vector256 opacity) + => Out(Avx.Blend(source, Avx.Multiply(source, opacity), BlendAlphaControl), backdrop); + /// /// Returns the result of the "SubtractXor" compositing equation. /// @@ -1359,6 +1939,17 @@ internal static partial class PorterDuffFunctions return Xor(backdrop, source); } + /// + /// Returns the result of the "SubtractXor" compositing equation. + /// + /// The backdrop vector. + /// The source vector. + /// The source opacity. Range 0..1 + /// The . + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public static Vector256 SubtractXor(Vector256 backdrop, Vector256 source, Vector256 opacity) + => Xor(backdrop, Avx.Blend(source, Avx.Multiply(source, opacity), BlendAlphaControl)); + /// /// Returns the result of the "SubtractClear" compositing equation. /// @@ -1374,6 +1965,17 @@ internal static partial class PorterDuffFunctions return Clear(backdrop, source); } + /// + /// Returns the result of the "SubtractClear" compositing equation. + /// + /// The backdrop vector. + /// The source vector. + /// The source opacity. Range 0..1 + /// The . + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public static Vector256 SubtractXor(Vector256 backdrop, Vector256 source, Vector256 opacity) + => Clear(backdrop, Avx.Blend(source, Avx.Multiply(source, opacity), BlendAlphaControl)); + /// /// Returns the result of the "SubtractSrc" compositing equation. @@ -1606,6 +2208,17 @@ internal static partial class PorterDuffFunctions return source; } + /// + /// Returns the result of the "ScreenSrc compositing equation. + /// + /// The backdrop vector. + /// The source vector. + /// The source opacity. Range 0..1 + /// The . + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public static Vector256 ScreenSrc(Vector256 backdrop, Vector256 source, Vector256 opacity) + => Avx.Blend(source, Avx.Multiply(source, opacity), BlendAlphaControl); + /// /// Returns the result of the "ScreenSrcAtop" compositing equation. /// @@ -1621,6 +2234,21 @@ internal static partial class PorterDuffFunctions return Atop(backdrop, source, Screen(backdrop, source)); } + /// + /// Returns the result of the "ScreenSrcAtop" compositing equation. + /// + /// The backdrop vector. + /// The source vector. + /// The source opacity. Range 0..1 + /// The . + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public static Vector256 ScreenSrcAtop(Vector256 backdrop, Vector256 source, Vector256 opacity) + { + source = Avx.Blend(source, Avx.Multiply(source, opacity), BlendAlphaControl); + + return Atop(backdrop, source, Screen(backdrop, source)); + } + /// /// Returns the result of the "ScreenSrcOver" compositing equation. /// @@ -1636,6 +2264,21 @@ internal static partial class PorterDuffFunctions return Over(backdrop, source, Screen(backdrop, source)); } + /// + /// Returns the result of the "ScreenSrcOver" compositing equation. + /// + /// The backdrop vector. + /// The source vector. + /// The source opacity. Range 0..1 + /// The . + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public static Vector256 ScreenSrcOver(Vector256 backdrop, Vector256 source, Vector256 opacity) + { + source = Avx.Blend(source, Avx.Multiply(source, opacity), BlendAlphaControl); + + return Over(backdrop, source, Screen(backdrop, source)); + } + /// /// Returns the result of the "ScreenSrcIn" compositing equation. /// @@ -1651,6 +2294,17 @@ internal static partial class PorterDuffFunctions return In(backdrop, source); } + /// + /// Returns the result of the "ScreenSrcIn" compositing equation. + /// + /// The backdrop vector. + /// The source vector. + /// The source opacity. Range 0..1 + /// The . + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public static Vector256 ScreenSrcIn(Vector256 backdrop, Vector256 source, Vector256 opacity) + => In(backdrop, Avx.Blend(source, Avx.Multiply(source, opacity), BlendAlphaControl)); + /// /// Returns the result of the "ScreenSrcOut" compositing equation. /// @@ -1666,6 +2320,17 @@ internal static partial class PorterDuffFunctions return Out(backdrop, source); } + /// + /// Returns the result of the "ScreenSrcOut" compositing equation. + /// + /// The backdrop vector. + /// The source vector. + /// The source opacity. Range 0..1 + /// The . + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public static Vector256 ScreenSrcOut(Vector256 backdrop, Vector256 source, Vector256 opacity) + => Out(backdrop, Avx.Blend(source, Avx.Multiply(source, opacity), BlendAlphaControl)); + /// /// Returns the result of the "ScreenDest" compositing equation. /// @@ -1679,6 +2344,19 @@ internal static partial class PorterDuffFunctions return backdrop; } + /// + /// Returns the result of the "ScreenDest" compositing equation. + /// + /// The backdrop vector. + /// The source vector. + /// The source opacity. Range 0..1 + /// The . + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public static Vector256 ScreenDest(Vector256 backdrop, Vector256 source, Vector256 opacity) + { + return backdrop; + } + /// /// Returns the result of the "ScreenDestAtop" compositing equation. /// @@ -1691,7 +2369,37 @@ internal static partial class PorterDuffFunctions { source.W *= opacity; - return Atop(source, backdrop, Screen(source, backdrop)); + return Atop(source, backdrop, Screen(source, backdrop)); + } + + /// + /// Returns the result of the "ScreenDestAtop" compositing equation. + /// + /// The backdrop vector. + /// The source vector. + /// The source opacity. Range 0..1 + /// The . + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public static Vector256 ScreenDestAtop(Vector256 backdrop, Vector256 source, Vector256 opacity) + { + source = Avx.Blend(source, Avx.Multiply(source, opacity), BlendAlphaControl); + + return Atop(source, backdrop, Screen(source, backdrop)); + } + + /// + /// Returns the result of the "ScreenDestOver" compositing equation. + /// + /// The backdrop vector. + /// The source vector. + /// The source opacity. Range 0..1 + /// The . + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public static Vector4 ScreenDestOver(Vector4 backdrop, Vector4 source, float opacity) + { + source.W *= opacity; + + return Over(source, backdrop, Screen(source, backdrop)); } /// @@ -1700,11 +2408,11 @@ internal static partial class PorterDuffFunctions /// The backdrop vector. /// The source vector. /// The source opacity. Range 0..1 - /// The . + /// The . [MethodImpl(MethodImplOptions.AggressiveInlining)] - public static Vector4 ScreenDestOver(Vector4 backdrop, Vector4 source, float opacity) + public static Vector256 ScreenDestOver(Vector256 backdrop, Vector256 source, Vector256 opacity) { - source.W *= opacity; + source = Avx.Blend(source, Avx.Multiply(source, opacity), BlendAlphaControl); return Over(source, backdrop, Screen(source, backdrop)); } @@ -1724,6 +2432,17 @@ internal static partial class PorterDuffFunctions return In(source, backdrop); } + /// + /// Returns the result of the "ScreenDestIn" compositing equation. + /// + /// The backdrop vector. + /// The source vector. + /// The source opacity. Range 0..1 + /// The . + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public static Vector256 ScreenDestIn(Vector256 backdrop, Vector256 source, Vector256 opacity) + => In(Avx.Blend(source, Avx.Multiply(source, opacity), BlendAlphaControl), backdrop); + /// /// Returns the result of the "ScreenDestOut" compositing equation. /// @@ -1739,6 +2458,17 @@ internal static partial class PorterDuffFunctions return Out(source, backdrop); } + /// + /// Returns the result of the "ScreenDestOut" compositing equation. + /// + /// The backdrop vector. + /// The source vector. + /// The source opacity. Range 0..1 + /// The . + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public static Vector256 ScreenDestOut(Vector256 backdrop, Vector256 source, Vector256 opacity) + => Out(Avx.Blend(source, Avx.Multiply(source, opacity), BlendAlphaControl), backdrop); + /// /// Returns the result of the "ScreenXor" compositing equation. /// @@ -1754,6 +2484,17 @@ internal static partial class PorterDuffFunctions return Xor(backdrop, source); } + /// + /// Returns the result of the "ScreenXor" compositing equation. + /// + /// The backdrop vector. + /// The source vector. + /// The source opacity. Range 0..1 + /// The . + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public static Vector256 ScreenXor(Vector256 backdrop, Vector256 source, Vector256 opacity) + => Xor(backdrop, Avx.Blend(source, Avx.Multiply(source, opacity), BlendAlphaControl)); + /// /// Returns the result of the "ScreenClear" compositing equation. /// @@ -1769,6 +2510,17 @@ internal static partial class PorterDuffFunctions return Clear(backdrop, source); } + /// + /// Returns the result of the "ScreenClear" compositing equation. + /// + /// The backdrop vector. + /// The source vector. + /// The source opacity. Range 0..1 + /// The . + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public static Vector256 ScreenXor(Vector256 backdrop, Vector256 source, Vector256 opacity) + => Clear(backdrop, Avx.Blend(source, Avx.Multiply(source, opacity), BlendAlphaControl)); + /// /// Returns the result of the "ScreenSrc" compositing equation. @@ -2001,6 +2753,17 @@ internal static partial class PorterDuffFunctions return source; } + /// + /// Returns the result of the "DarkenSrc compositing equation. + /// + /// The backdrop vector. + /// The source vector. + /// The source opacity. Range 0..1 + /// The . + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public static Vector256 DarkenSrc(Vector256 backdrop, Vector256 source, Vector256 opacity) + => Avx.Blend(source, Avx.Multiply(source, opacity), BlendAlphaControl); + /// /// Returns the result of the "DarkenSrcAtop" compositing equation. /// @@ -2016,6 +2779,21 @@ internal static partial class PorterDuffFunctions return Atop(backdrop, source, Darken(backdrop, source)); } + /// + /// Returns the result of the "DarkenSrcAtop" compositing equation. + /// + /// The backdrop vector. + /// The source vector. + /// The source opacity. Range 0..1 + /// The . + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public static Vector256 DarkenSrcAtop(Vector256 backdrop, Vector256 source, Vector256 opacity) + { + source = Avx.Blend(source, Avx.Multiply(source, opacity), BlendAlphaControl); + + return Atop(backdrop, source, Darken(backdrop, source)); + } + /// /// Returns the result of the "DarkenSrcOver" compositing equation. /// @@ -2031,6 +2809,21 @@ internal static partial class PorterDuffFunctions return Over(backdrop, source, Darken(backdrop, source)); } + /// + /// Returns the result of the "DarkenSrcOver" compositing equation. + /// + /// The backdrop vector. + /// The source vector. + /// The source opacity. Range 0..1 + /// The . + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public static Vector256 DarkenSrcOver(Vector256 backdrop, Vector256 source, Vector256 opacity) + { + source = Avx.Blend(source, Avx.Multiply(source, opacity), BlendAlphaControl); + + return Over(backdrop, source, Darken(backdrop, source)); + } + /// /// Returns the result of the "DarkenSrcIn" compositing equation. /// @@ -2046,6 +2839,17 @@ internal static partial class PorterDuffFunctions return In(backdrop, source); } + /// + /// Returns the result of the "DarkenSrcIn" compositing equation. + /// + /// The backdrop vector. + /// The source vector. + /// The source opacity. Range 0..1 + /// The . + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public static Vector256 DarkenSrcIn(Vector256 backdrop, Vector256 source, Vector256 opacity) + => In(backdrop, Avx.Blend(source, Avx.Multiply(source, opacity), BlendAlphaControl)); + /// /// Returns the result of the "DarkenSrcOut" compositing equation. /// @@ -2061,6 +2865,17 @@ internal static partial class PorterDuffFunctions return Out(backdrop, source); } + /// + /// Returns the result of the "DarkenSrcOut" compositing equation. + /// + /// The backdrop vector. + /// The source vector. + /// The source opacity. Range 0..1 + /// The . + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public static Vector256 DarkenSrcOut(Vector256 backdrop, Vector256 source, Vector256 opacity) + => Out(backdrop, Avx.Blend(source, Avx.Multiply(source, opacity), BlendAlphaControl)); + /// /// Returns the result of the "DarkenDest" compositing equation. /// @@ -2074,6 +2889,19 @@ internal static partial class PorterDuffFunctions return backdrop; } + /// + /// Returns the result of the "DarkenDest" compositing equation. + /// + /// The backdrop vector. + /// The source vector. + /// The source opacity. Range 0..1 + /// The . + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public static Vector256 DarkenDest(Vector256 backdrop, Vector256 source, Vector256 opacity) + { + return backdrop; + } + /// /// Returns the result of the "DarkenDestAtop" compositing equation. /// @@ -2089,6 +2917,21 @@ internal static partial class PorterDuffFunctions return Atop(source, backdrop, Darken(source, backdrop)); } + /// + /// Returns the result of the "DarkenDestAtop" compositing equation. + /// + /// The backdrop vector. + /// The source vector. + /// The source opacity. Range 0..1 + /// The . + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public static Vector256 DarkenDestAtop(Vector256 backdrop, Vector256 source, Vector256 opacity) + { + source = Avx.Blend(source, Avx.Multiply(source, opacity), BlendAlphaControl); + + return Atop(source, backdrop, Darken(source, backdrop)); + } + /// /// Returns the result of the "DarkenDestOver" compositing equation. /// @@ -2104,6 +2947,21 @@ internal static partial class PorterDuffFunctions return Over(source, backdrop, Darken(source, backdrop)); } + /// + /// Returns the result of the "DarkenDestOver" compositing equation. + /// + /// The backdrop vector. + /// The source vector. + /// The source opacity. Range 0..1 + /// The . + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public static Vector256 DarkenDestOver(Vector256 backdrop, Vector256 source, Vector256 opacity) + { + source = Avx.Blend(source, Avx.Multiply(source, opacity), BlendAlphaControl); + + return Over(source, backdrop, Darken(source, backdrop)); + } + /// /// Returns the result of the "DarkenDestIn" compositing equation. /// @@ -2119,6 +2977,17 @@ internal static partial class PorterDuffFunctions return In(source, backdrop); } + /// + /// Returns the result of the "DarkenDestIn" compositing equation. + /// + /// The backdrop vector. + /// The source vector. + /// The source opacity. Range 0..1 + /// The . + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public static Vector256 DarkenDestIn(Vector256 backdrop, Vector256 source, Vector256 opacity) + => In(Avx.Blend(source, Avx.Multiply(source, opacity), BlendAlphaControl), backdrop); + /// /// Returns the result of the "DarkenDestOut" compositing equation. /// @@ -2134,6 +3003,17 @@ internal static partial class PorterDuffFunctions return Out(source, backdrop); } + /// + /// Returns the result of the "DarkenDestOut" compositing equation. + /// + /// The backdrop vector. + /// The source vector. + /// The source opacity. Range 0..1 + /// The . + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public static Vector256 DarkenDestOut(Vector256 backdrop, Vector256 source, Vector256 opacity) + => Out(Avx.Blend(source, Avx.Multiply(source, opacity), BlendAlphaControl), backdrop); + /// /// Returns the result of the "DarkenXor" compositing equation. /// @@ -2149,6 +3029,17 @@ internal static partial class PorterDuffFunctions return Xor(backdrop, source); } + /// + /// Returns the result of the "DarkenXor" compositing equation. + /// + /// The backdrop vector. + /// The source vector. + /// The source opacity. Range 0..1 + /// The . + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public static Vector256 DarkenXor(Vector256 backdrop, Vector256 source, Vector256 opacity) + => Xor(backdrop, Avx.Blend(source, Avx.Multiply(source, opacity), BlendAlphaControl)); + /// /// Returns the result of the "DarkenClear" compositing equation. /// @@ -2164,6 +3055,17 @@ internal static partial class PorterDuffFunctions return Clear(backdrop, source); } + /// + /// Returns the result of the "DarkenClear" compositing equation. + /// + /// The backdrop vector. + /// The source vector. + /// The source opacity. Range 0..1 + /// The . + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public static Vector256 DarkenXor(Vector256 backdrop, Vector256 source, Vector256 opacity) + => Clear(backdrop, Avx.Blend(source, Avx.Multiply(source, opacity), BlendAlphaControl)); + /// /// Returns the result of the "DarkenSrc" compositing equation. @@ -2396,6 +3298,17 @@ internal static partial class PorterDuffFunctions return source; } + /// + /// Returns the result of the "LightenSrc compositing equation. + /// + /// The backdrop vector. + /// The source vector. + /// The source opacity. Range 0..1 + /// The . + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public static Vector256 LightenSrc(Vector256 backdrop, Vector256 source, Vector256 opacity) + => Avx.Blend(source, Avx.Multiply(source, opacity), BlendAlphaControl); + /// /// Returns the result of the "LightenSrcAtop" compositing equation. /// @@ -2411,6 +3324,21 @@ internal static partial class PorterDuffFunctions return Atop(backdrop, source, Lighten(backdrop, source)); } + /// + /// Returns the result of the "LightenSrcAtop" compositing equation. + /// + /// The backdrop vector. + /// The source vector. + /// The source opacity. Range 0..1 + /// The . + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public static Vector256 LightenSrcAtop(Vector256 backdrop, Vector256 source, Vector256 opacity) + { + source = Avx.Blend(source, Avx.Multiply(source, opacity), BlendAlphaControl); + + return Atop(backdrop, source, Lighten(backdrop, source)); + } + /// /// Returns the result of the "LightenSrcOver" compositing equation. /// @@ -2426,6 +3354,21 @@ internal static partial class PorterDuffFunctions return Over(backdrop, source, Lighten(backdrop, source)); } + /// + /// Returns the result of the "LightenSrcOver" compositing equation. + /// + /// The backdrop vector. + /// The source vector. + /// The source opacity. Range 0..1 + /// The . + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public static Vector256 LightenSrcOver(Vector256 backdrop, Vector256 source, Vector256 opacity) + { + source = Avx.Blend(source, Avx.Multiply(source, opacity), BlendAlphaControl); + + return Over(backdrop, source, Lighten(backdrop, source)); + } + /// /// Returns the result of the "LightenSrcIn" compositing equation. /// @@ -2441,6 +3384,17 @@ internal static partial class PorterDuffFunctions return In(backdrop, source); } + /// + /// Returns the result of the "LightenSrcIn" compositing equation. + /// + /// The backdrop vector. + /// The source vector. + /// The source opacity. Range 0..1 + /// The . + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public static Vector256 LightenSrcIn(Vector256 backdrop, Vector256 source, Vector256 opacity) + => In(backdrop, Avx.Blend(source, Avx.Multiply(source, opacity), BlendAlphaControl)); + /// /// Returns the result of the "LightenSrcOut" compositing equation. /// @@ -2456,6 +3410,17 @@ internal static partial class PorterDuffFunctions return Out(backdrop, source); } + /// + /// Returns the result of the "LightenSrcOut" compositing equation. + /// + /// The backdrop vector. + /// The source vector. + /// The source opacity. Range 0..1 + /// The . + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public static Vector256 LightenSrcOut(Vector256 backdrop, Vector256 source, Vector256 opacity) + => Out(backdrop, Avx.Blend(source, Avx.Multiply(source, opacity), BlendAlphaControl)); + /// /// Returns the result of the "LightenDest" compositing equation. /// @@ -2469,6 +3434,19 @@ internal static partial class PorterDuffFunctions return backdrop; } + /// + /// Returns the result of the "LightenDest" compositing equation. + /// + /// The backdrop vector. + /// The source vector. + /// The source opacity. Range 0..1 + /// The . + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public static Vector256 LightenDest(Vector256 backdrop, Vector256 source, Vector256 opacity) + { + return backdrop; + } + /// /// Returns the result of the "LightenDestAtop" compositing equation. /// @@ -2484,6 +3462,21 @@ internal static partial class PorterDuffFunctions return Atop(source, backdrop, Lighten(source, backdrop)); } + /// + /// Returns the result of the "LightenDestAtop" compositing equation. + /// + /// The backdrop vector. + /// The source vector. + /// The source opacity. Range 0..1 + /// The . + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public static Vector256 LightenDestAtop(Vector256 backdrop, Vector256 source, Vector256 opacity) + { + source = Avx.Blend(source, Avx.Multiply(source, opacity), BlendAlphaControl); + + return Atop(source, backdrop, Lighten(source, backdrop)); + } + /// /// Returns the result of the "LightenDestOver" compositing equation. /// @@ -2499,19 +3492,60 @@ internal static partial class PorterDuffFunctions return Over(source, backdrop, Lighten(source, backdrop)); } + /// + /// Returns the result of the "LightenDestOver" compositing equation. + /// + /// The backdrop vector. + /// The source vector. + /// The source opacity. Range 0..1 + /// The . + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public static Vector256 LightenDestOver(Vector256 backdrop, Vector256 source, Vector256 opacity) + { + source = Avx.Blend(source, Avx.Multiply(source, opacity), BlendAlphaControl); + + return Over(source, backdrop, Lighten(source, backdrop)); + } + + /// + /// Returns the result of the "LightenDestIn" compositing equation. + /// + /// The backdrop vector. + /// The source vector. + /// The source opacity. Range 0..1 + /// The . + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public static Vector4 LightenDestIn(Vector4 backdrop, Vector4 source, float opacity) + { + source.W *= opacity; + + return In(source, backdrop); + } + /// /// Returns the result of the "LightenDestIn" compositing equation. /// /// The backdrop vector. /// The source vector. /// The source opacity. Range 0..1 + /// The . + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public static Vector256 LightenDestIn(Vector256 backdrop, Vector256 source, Vector256 opacity) + => In(Avx.Blend(source, Avx.Multiply(source, opacity), BlendAlphaControl), backdrop); + + /// + /// Returns the result of the "LightenDestOut" compositing equation. + /// + /// The backdrop vector. + /// The source vector. + /// The source opacity. Range 0..1 /// The . [MethodImpl(MethodImplOptions.AggressiveInlining)] - public static Vector4 LightenDestIn(Vector4 backdrop, Vector4 source, float opacity) + public static Vector4 LightenDestOut(Vector4 backdrop, Vector4 source, float opacity) { source.W *= opacity; - return In(source, backdrop); + return Out(source, backdrop); } /// @@ -2520,14 +3554,10 @@ internal static partial class PorterDuffFunctions /// The backdrop vector. /// The source vector. /// The source opacity. Range 0..1 - /// The . + /// The . [MethodImpl(MethodImplOptions.AggressiveInlining)] - public static Vector4 LightenDestOut(Vector4 backdrop, Vector4 source, float opacity) - { - source.W *= opacity; - - return Out(source, backdrop); - } + public static Vector256 LightenDestOut(Vector256 backdrop, Vector256 source, Vector256 opacity) + => Out(Avx.Blend(source, Avx.Multiply(source, opacity), BlendAlphaControl), backdrop); /// /// Returns the result of the "LightenXor" compositing equation. @@ -2544,6 +3574,17 @@ internal static partial class PorterDuffFunctions return Xor(backdrop, source); } + /// + /// Returns the result of the "LightenXor" compositing equation. + /// + /// The backdrop vector. + /// The source vector. + /// The source opacity. Range 0..1 + /// The . + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public static Vector256 LightenXor(Vector256 backdrop, Vector256 source, Vector256 opacity) + => Xor(backdrop, Avx.Blend(source, Avx.Multiply(source, opacity), BlendAlphaControl)); + /// /// Returns the result of the "LightenClear" compositing equation. /// @@ -2559,6 +3600,17 @@ internal static partial class PorterDuffFunctions return Clear(backdrop, source); } + /// + /// Returns the result of the "LightenClear" compositing equation. + /// + /// The backdrop vector. + /// The source vector. + /// The source opacity. Range 0..1 + /// The . + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public static Vector256 LightenXor(Vector256 backdrop, Vector256 source, Vector256 opacity) + => Clear(backdrop, Avx.Blend(source, Avx.Multiply(source, opacity), BlendAlphaControl)); + /// /// Returns the result of the "LightenSrc" compositing equation. @@ -2791,6 +3843,17 @@ internal static partial class PorterDuffFunctions return source; } + /// + /// Returns the result of the "OverlaySrc compositing equation. + /// + /// The backdrop vector. + /// The source vector. + /// The source opacity. Range 0..1 + /// The . + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public static Vector256 OverlaySrc(Vector256 backdrop, Vector256 source, Vector256 opacity) + => Avx.Blend(source, Avx.Multiply(source, opacity), BlendAlphaControl); + /// /// Returns the result of the "OverlaySrcAtop" compositing equation. /// @@ -2806,6 +3869,21 @@ internal static partial class PorterDuffFunctions return Atop(backdrop, source, Overlay(backdrop, source)); } + /// + /// Returns the result of the "OverlaySrcAtop" compositing equation. + /// + /// The backdrop vector. + /// The source vector. + /// The source opacity. Range 0..1 + /// The . + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public static Vector256 OverlaySrcAtop(Vector256 backdrop, Vector256 source, Vector256 opacity) + { + source = Avx.Blend(source, Avx.Multiply(source, opacity), BlendAlphaControl); + + return Atop(backdrop, source, Overlay(backdrop, source)); + } + /// /// Returns the result of the "OverlaySrcOver" compositing equation. /// @@ -2821,6 +3899,21 @@ internal static partial class PorterDuffFunctions return Over(backdrop, source, Overlay(backdrop, source)); } + /// + /// Returns the result of the "OverlaySrcOver" compositing equation. + /// + /// The backdrop vector. + /// The source vector. + /// The source opacity. Range 0..1 + /// The . + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public static Vector256 OverlaySrcOver(Vector256 backdrop, Vector256 source, Vector256 opacity) + { + source = Avx.Blend(source, Avx.Multiply(source, opacity), BlendAlphaControl); + + return Over(backdrop, source, Overlay(backdrop, source)); + } + /// /// Returns the result of the "OverlaySrcIn" compositing equation. /// @@ -2836,6 +3929,17 @@ internal static partial class PorterDuffFunctions return In(backdrop, source); } + /// + /// Returns the result of the "OverlaySrcIn" compositing equation. + /// + /// The backdrop vector. + /// The source vector. + /// The source opacity. Range 0..1 + /// The . + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public static Vector256 OverlaySrcIn(Vector256 backdrop, Vector256 source, Vector256 opacity) + => In(backdrop, Avx.Blend(source, Avx.Multiply(source, opacity), BlendAlphaControl)); + /// /// Returns the result of the "OverlaySrcOut" compositing equation. /// @@ -2851,6 +3955,17 @@ internal static partial class PorterDuffFunctions return Out(backdrop, source); } + /// + /// Returns the result of the "OverlaySrcOut" compositing equation. + /// + /// The backdrop vector. + /// The source vector. + /// The source opacity. Range 0..1 + /// The . + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public static Vector256 OverlaySrcOut(Vector256 backdrop, Vector256 source, Vector256 opacity) + => Out(backdrop, Avx.Blend(source, Avx.Multiply(source, opacity), BlendAlphaControl)); + /// /// Returns the result of the "OverlayDest" compositing equation. /// @@ -2864,6 +3979,19 @@ internal static partial class PorterDuffFunctions return backdrop; } + /// + /// Returns the result of the "OverlayDest" compositing equation. + /// + /// The backdrop vector. + /// The source vector. + /// The source opacity. Range 0..1 + /// The . + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public static Vector256 OverlayDest(Vector256 backdrop, Vector256 source, Vector256 opacity) + { + return backdrop; + } + /// /// Returns the result of the "OverlayDestAtop" compositing equation. /// @@ -2879,6 +4007,21 @@ internal static partial class PorterDuffFunctions return Atop(source, backdrop, Overlay(source, backdrop)); } + /// + /// Returns the result of the "OverlayDestAtop" compositing equation. + /// + /// The backdrop vector. + /// The source vector. + /// The source opacity. Range 0..1 + /// The . + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public static Vector256 OverlayDestAtop(Vector256 backdrop, Vector256 source, Vector256 opacity) + { + source = Avx.Blend(source, Avx.Multiply(source, opacity), BlendAlphaControl); + + return Atop(source, backdrop, Overlay(source, backdrop)); + } + /// /// Returns the result of the "OverlayDestOver" compositing equation. /// @@ -2894,6 +4037,21 @@ internal static partial class PorterDuffFunctions return Over(source, backdrop, Overlay(source, backdrop)); } + /// + /// Returns the result of the "OverlayDestOver" compositing equation. + /// + /// The backdrop vector. + /// The source vector. + /// The source opacity. Range 0..1 + /// The . + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public static Vector256 OverlayDestOver(Vector256 backdrop, Vector256 source, Vector256 opacity) + { + source = Avx.Blend(source, Avx.Multiply(source, opacity), BlendAlphaControl); + + return Over(source, backdrop, Overlay(source, backdrop)); + } + /// /// Returns the result of the "OverlayDestIn" compositing equation. /// @@ -2909,6 +4067,17 @@ internal static partial class PorterDuffFunctions return In(source, backdrop); } + /// + /// Returns the result of the "OverlayDestIn" compositing equation. + /// + /// The backdrop vector. + /// The source vector. + /// The source opacity. Range 0..1 + /// The . + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public static Vector256 OverlayDestIn(Vector256 backdrop, Vector256 source, Vector256 opacity) + => In(Avx.Blend(source, Avx.Multiply(source, opacity), BlendAlphaControl), backdrop); + /// /// Returns the result of the "OverlayDestOut" compositing equation. /// @@ -2924,6 +4093,17 @@ internal static partial class PorterDuffFunctions return Out(source, backdrop); } + /// + /// Returns the result of the "OverlayDestOut" compositing equation. + /// + /// The backdrop vector. + /// The source vector. + /// The source opacity. Range 0..1 + /// The . + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public static Vector256 OverlayDestOut(Vector256 backdrop, Vector256 source, Vector256 opacity) + => Out(Avx.Blend(source, Avx.Multiply(source, opacity), BlendAlphaControl), backdrop); + /// /// Returns the result of the "OverlayXor" compositing equation. /// @@ -2939,6 +4119,17 @@ internal static partial class PorterDuffFunctions return Xor(backdrop, source); } + /// + /// Returns the result of the "OverlayXor" compositing equation. + /// + /// The backdrop vector. + /// The source vector. + /// The source opacity. Range 0..1 + /// The . + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public static Vector256 OverlayXor(Vector256 backdrop, Vector256 source, Vector256 opacity) + => Xor(backdrop, Avx.Blend(source, Avx.Multiply(source, opacity), BlendAlphaControl)); + /// /// Returns the result of the "OverlayClear" compositing equation. /// @@ -2954,6 +4145,17 @@ internal static partial class PorterDuffFunctions return Clear(backdrop, source); } + /// + /// Returns the result of the "OverlayClear" compositing equation. + /// + /// The backdrop vector. + /// The source vector. + /// The source opacity. Range 0..1 + /// The . + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public static Vector256 OverlayXor(Vector256 backdrop, Vector256 source, Vector256 opacity) + => Clear(backdrop, Avx.Blend(source, Avx.Multiply(source, opacity), BlendAlphaControl)); + /// /// Returns the result of the "OverlaySrc" compositing equation. @@ -3186,6 +4388,17 @@ internal static partial class PorterDuffFunctions return source; } + /// + /// Returns the result of the "HardLightSrc compositing equation. + /// + /// The backdrop vector. + /// The source vector. + /// The source opacity. Range 0..1 + /// The . + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public static Vector256 HardLightSrc(Vector256 backdrop, Vector256 source, Vector256 opacity) + => Avx.Blend(source, Avx.Multiply(source, opacity), BlendAlphaControl); + /// /// Returns the result of the "HardLightSrcAtop" compositing equation. /// @@ -3201,6 +4414,21 @@ internal static partial class PorterDuffFunctions return Atop(backdrop, source, HardLight(backdrop, source)); } + /// + /// Returns the result of the "HardLightSrcAtop" compositing equation. + /// + /// The backdrop vector. + /// The source vector. + /// The source opacity. Range 0..1 + /// The . + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public static Vector256 HardLightSrcAtop(Vector256 backdrop, Vector256 source, Vector256 opacity) + { + source = Avx.Blend(source, Avx.Multiply(source, opacity), BlendAlphaControl); + + return Atop(backdrop, source, HardLight(backdrop, source)); + } + /// /// Returns the result of the "HardLightSrcOver" compositing equation. /// @@ -3216,6 +4444,21 @@ internal static partial class PorterDuffFunctions return Over(backdrop, source, HardLight(backdrop, source)); } + /// + /// Returns the result of the "HardLightSrcOver" compositing equation. + /// + /// The backdrop vector. + /// The source vector. + /// The source opacity. Range 0..1 + /// The . + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public static Vector256 HardLightSrcOver(Vector256 backdrop, Vector256 source, Vector256 opacity) + { + source = Avx.Blend(source, Avx.Multiply(source, opacity), BlendAlphaControl); + + return Over(backdrop, source, HardLight(backdrop, source)); + } + /// /// Returns the result of the "HardLightSrcIn" compositing equation. /// @@ -3231,6 +4474,17 @@ internal static partial class PorterDuffFunctions return In(backdrop, source); } + /// + /// Returns the result of the "HardLightSrcIn" compositing equation. + /// + /// The backdrop vector. + /// The source vector. + /// The source opacity. Range 0..1 + /// The . + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public static Vector256 HardLightSrcIn(Vector256 backdrop, Vector256 source, Vector256 opacity) + => In(backdrop, Avx.Blend(source, Avx.Multiply(source, opacity), BlendAlphaControl)); + /// /// Returns the result of the "HardLightSrcOut" compositing equation. /// @@ -3246,6 +4500,17 @@ internal static partial class PorterDuffFunctions return Out(backdrop, source); } + /// + /// Returns the result of the "HardLightSrcOut" compositing equation. + /// + /// The backdrop vector. + /// The source vector. + /// The source opacity. Range 0..1 + /// The . + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public static Vector256 HardLightSrcOut(Vector256 backdrop, Vector256 source, Vector256 opacity) + => Out(backdrop, Avx.Blend(source, Avx.Multiply(source, opacity), BlendAlphaControl)); + /// /// Returns the result of the "HardLightDest" compositing equation. /// @@ -3259,6 +4524,19 @@ internal static partial class PorterDuffFunctions return backdrop; } + /// + /// Returns the result of the "HardLightDest" compositing equation. + /// + /// The backdrop vector. + /// The source vector. + /// The source opacity. Range 0..1 + /// The . + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public static Vector256 HardLightDest(Vector256 backdrop, Vector256 source, Vector256 opacity) + { + return backdrop; + } + /// /// Returns the result of the "HardLightDestAtop" compositing equation. /// @@ -3274,6 +4552,21 @@ internal static partial class PorterDuffFunctions return Atop(source, backdrop, HardLight(source, backdrop)); } + /// + /// Returns the result of the "HardLightDestAtop" compositing equation. + /// + /// The backdrop vector. + /// The source vector. + /// The source opacity. Range 0..1 + /// The . + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public static Vector256 HardLightDestAtop(Vector256 backdrop, Vector256 source, Vector256 opacity) + { + source = Avx.Blend(source, Avx.Multiply(source, opacity), BlendAlphaControl); + + return Atop(source, backdrop, HardLight(source, backdrop)); + } + /// /// Returns the result of the "HardLightDestOver" compositing equation. /// @@ -3289,6 +4582,21 @@ internal static partial class PorterDuffFunctions return Over(source, backdrop, HardLight(source, backdrop)); } + /// + /// Returns the result of the "HardLightDestOver" compositing equation. + /// + /// The backdrop vector. + /// The source vector. + /// The source opacity. Range 0..1 + /// The . + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public static Vector256 HardLightDestOver(Vector256 backdrop, Vector256 source, Vector256 opacity) + { + source = Avx.Blend(source, Avx.Multiply(source, opacity), BlendAlphaControl); + + return Over(source, backdrop, HardLight(source, backdrop)); + } + /// /// Returns the result of the "HardLightDestIn" compositing equation. /// @@ -3304,6 +4612,17 @@ internal static partial class PorterDuffFunctions return In(source, backdrop); } + /// + /// Returns the result of the "HardLightDestIn" compositing equation. + /// + /// The backdrop vector. + /// The source vector. + /// The source opacity. Range 0..1 + /// The . + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public static Vector256 HardLightDestIn(Vector256 backdrop, Vector256 source, Vector256 opacity) + => In(Avx.Blend(source, Avx.Multiply(source, opacity), BlendAlphaControl), backdrop); + /// /// Returns the result of the "HardLightDestOut" compositing equation. /// @@ -3319,6 +4638,17 @@ internal static partial class PorterDuffFunctions return Out(source, backdrop); } + /// + /// Returns the result of the "HardLightDestOut" compositing equation. + /// + /// The backdrop vector. + /// The source vector. + /// The source opacity. Range 0..1 + /// The . + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public static Vector256 HardLightDestOut(Vector256 backdrop, Vector256 source, Vector256 opacity) + => Out(Avx.Blend(source, Avx.Multiply(source, opacity), BlendAlphaControl), backdrop); + /// /// Returns the result of the "HardLightXor" compositing equation. /// @@ -3334,6 +4664,17 @@ internal static partial class PorterDuffFunctions return Xor(backdrop, source); } + /// + /// Returns the result of the "HardLightXor" compositing equation. + /// + /// The backdrop vector. + /// The source vector. + /// The source opacity. Range 0..1 + /// The . + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public static Vector256 HardLightXor(Vector256 backdrop, Vector256 source, Vector256 opacity) + => Xor(backdrop, Avx.Blend(source, Avx.Multiply(source, opacity), BlendAlphaControl)); + /// /// Returns the result of the "HardLightClear" compositing equation. /// @@ -3349,6 +4690,17 @@ internal static partial class PorterDuffFunctions return Clear(backdrop, source); } + /// + /// Returns the result of the "HardLightClear" compositing equation. + /// + /// The backdrop vector. + /// The source vector. + /// The source opacity. Range 0..1 + /// The . + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public static Vector256 HardLightXor(Vector256 backdrop, Vector256 source, Vector256 opacity) + => Clear(backdrop, Avx.Blend(source, Avx.Multiply(source, opacity), BlendAlphaControl)); + /// /// Returns the result of the "HardLightSrc" compositing equation. diff --git a/src/ImageSharp/PixelFormats/PixelBlenders/PorterDuffFunctions.Generated.tt b/src/ImageSharp/PixelFormats/PixelBlenders/PorterDuffFunctions.Generated.tt index 40d8b89970..5baa1e8641 100644 --- a/src/ImageSharp/PixelFormats/PixelBlenders/PorterDuffFunctions.Generated.tt +++ b/src/ImageSharp/PixelFormats/PixelBlenders/PorterDuffFunctions.Generated.tt @@ -15,6 +15,8 @@ using System.Numerics; using System.Runtime.CompilerServices; +using System.Runtime.Intrinsics; +using System.Runtime.Intrinsics.X86; namespace SixLabors.ImageSharp.PixelFormats.PixelBlenders; @@ -36,6 +38,17 @@ internal static partial class PorterDuffFunctions return source; } + /// + /// Returns the result of the "<#=blender#>Src compositing equation. + /// + /// The backdrop vector. + /// The source vector. + /// The source opacity. Range 0..1 + /// The . + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public static Vector256 <#=blender#>Src(Vector256 backdrop, Vector256 source, Vector256 opacity) + => Avx.Blend(source, Avx.Multiply(source, opacity), BlendAlphaControl); + /// /// Returns the result of the "<#=blender#>SrcAtop" compositing equation. /// @@ -51,6 +64,21 @@ internal static partial class PorterDuffFunctions return Atop(backdrop, source, <#=blender#>(backdrop, source)); } + /// + /// Returns the result of the "<#=blender#>SrcAtop" compositing equation. + /// + /// The backdrop vector. + /// The source vector. + /// The source opacity. Range 0..1 + /// The . + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public static Vector256 <#=blender#>SrcAtop(Vector256 backdrop, Vector256 source, Vector256 opacity) + { + source = Avx.Blend(source, Avx.Multiply(source, opacity), BlendAlphaControl); + + return Atop(backdrop, source, <#=blender#>(backdrop, source)); + } + /// /// Returns the result of the "<#=blender#>SrcOver" compositing equation. /// @@ -66,6 +94,21 @@ internal static partial class PorterDuffFunctions return Over(backdrop, source, <#=blender#>(backdrop, source)); } + /// + /// Returns the result of the "<#=blender#>SrcOver" compositing equation. + /// + /// The backdrop vector. + /// The source vector. + /// The source opacity. Range 0..1 + /// The . + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public static Vector256 <#=blender#>SrcOver(Vector256 backdrop, Vector256 source, Vector256 opacity) + { + source = Avx.Blend(source, Avx.Multiply(source, opacity), BlendAlphaControl); + + return Over(backdrop, source, <#=blender#>(backdrop, source)); + } + /// /// Returns the result of the "<#=blender#>SrcIn" compositing equation. /// @@ -81,6 +124,17 @@ internal static partial class PorterDuffFunctions return In(backdrop, source); } + /// + /// Returns the result of the "<#=blender#>SrcIn" compositing equation. + /// + /// The backdrop vector. + /// The source vector. + /// The source opacity. Range 0..1 + /// The . + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public static Vector256 <#=blender#>SrcIn(Vector256 backdrop, Vector256 source, Vector256 opacity) + => In(backdrop, Avx.Blend(source, Avx.Multiply(source, opacity), BlendAlphaControl)); + /// /// Returns the result of the "<#=blender#>SrcOut" compositing equation. /// @@ -96,6 +150,17 @@ internal static partial class PorterDuffFunctions return Out(backdrop, source); } + /// + /// Returns the result of the "<#=blender#>SrcOut" compositing equation. + /// + /// The backdrop vector. + /// The source vector. + /// The source opacity. Range 0..1 + /// The . + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public static Vector256 <#=blender#>SrcOut(Vector256 backdrop, Vector256 source, Vector256 opacity) + => Out(backdrop, Avx.Blend(source, Avx.Multiply(source, opacity), BlendAlphaControl)); + /// /// Returns the result of the "<#=blender#>Dest" compositing equation. /// @@ -109,6 +174,19 @@ internal static partial class PorterDuffFunctions return backdrop; } + /// + /// Returns the result of the "<#=blender#>Dest" compositing equation. + /// + /// The backdrop vector. + /// The source vector. + /// The source opacity. Range 0..1 + /// The . + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public static Vector256 <#=blender#>Dest(Vector256 backdrop, Vector256 source, Vector256 opacity) + { + return backdrop; + } + /// /// Returns the result of the "<#=blender#>DestAtop" compositing equation. /// @@ -124,6 +202,21 @@ internal static partial class PorterDuffFunctions return Atop(source, backdrop, <#=blender#>(source, backdrop)); } + /// + /// Returns the result of the "<#=blender#>DestAtop" compositing equation. + /// + /// The backdrop vector. + /// The source vector. + /// The source opacity. Range 0..1 + /// The . + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public static Vector256 <#=blender#>DestAtop(Vector256 backdrop, Vector256 source, Vector256 opacity) + { + source = Avx.Blend(source, Avx.Multiply(source, opacity), BlendAlphaControl); + + return Atop(source, backdrop, <#=blender#>(source, backdrop)); + } + /// /// Returns the result of the "<#=blender#>DestOver" compositing equation. /// @@ -139,6 +232,21 @@ internal static partial class PorterDuffFunctions return Over(source, backdrop, <#=blender#>(source, backdrop)); } + /// + /// Returns the result of the "<#=blender#>DestOver" compositing equation. + /// + /// The backdrop vector. + /// The source vector. + /// The source opacity. Range 0..1 + /// The . + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public static Vector256 <#=blender#>DestOver(Vector256 backdrop, Vector256 source, Vector256 opacity) + { + source = Avx.Blend(source, Avx.Multiply(source, opacity), BlendAlphaControl); + + return Over(source, backdrop, <#=blender#>(source, backdrop)); + } + /// /// Returns the result of the "<#=blender#>DestIn" compositing equation. /// @@ -154,6 +262,17 @@ internal static partial class PorterDuffFunctions return In(source, backdrop); } + /// + /// Returns the result of the "<#=blender#>DestIn" compositing equation. + /// + /// The backdrop vector. + /// The source vector. + /// The source opacity. Range 0..1 + /// The . + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public static Vector256 <#=blender#>DestIn(Vector256 backdrop, Vector256 source, Vector256 opacity) + => In(Avx.Blend(source, Avx.Multiply(source, opacity), BlendAlphaControl), backdrop); + /// /// Returns the result of the "<#=blender#>DestOut" compositing equation. /// @@ -169,6 +288,17 @@ internal static partial class PorterDuffFunctions return Out(source, backdrop); } + /// + /// Returns the result of the "<#=blender#>DestOut" compositing equation. + /// + /// The backdrop vector. + /// The source vector. + /// The source opacity. Range 0..1 + /// The . + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public static Vector256 <#=blender#>DestOut(Vector256 backdrop, Vector256 source, Vector256 opacity) + => Out(Avx.Blend(source, Avx.Multiply(source, opacity), BlendAlphaControl), backdrop); + /// /// Returns the result of the "<#=blender#>Xor" compositing equation. /// @@ -184,6 +314,17 @@ internal static partial class PorterDuffFunctions return Xor(backdrop, source); } + /// + /// Returns the result of the "<#=blender#>Xor" compositing equation. + /// + /// The backdrop vector. + /// The source vector. + /// The source opacity. Range 0..1 + /// The . + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public static Vector256 <#=blender#>Xor(Vector256 backdrop, Vector256 source, Vector256 opacity) + => Xor(backdrop, Avx.Blend(source, Avx.Multiply(source, opacity), BlendAlphaControl)); + /// /// Returns the result of the "<#=blender#>Clear" compositing equation. /// @@ -199,6 +340,17 @@ internal static partial class PorterDuffFunctions return Clear(backdrop, source); } + /// + /// Returns the result of the "<#=blender#>Clear" compositing equation. + /// + /// The backdrop vector. + /// The source vector. + /// The source opacity. Range 0..1 + /// The . + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public static Vector256 <#=blender#>Xor(Vector256 backdrop, Vector256 source, Vector256 opacity) + => Clear(backdrop, Avx.Blend(source, Avx.Multiply(source, opacity), BlendAlphaControl)); + <#} #> <# void GenerateGenericPixelBlender(string blender, string composer) { #> From ef34960e81a5b9475b394d0fe4017befe17b1b63 Mon Sep 17 00:00:00 2001 From: James Jackson-South Date: Sun, 19 Feb 2023 14:40:29 +1000 Subject: [PATCH 06/35] Fix code generation --- .../PorterDuffFunctions.Generated.cs | 18 +++++++++--------- .../PorterDuffFunctions.Generated.tt | 2 +- 2 files changed, 10 insertions(+), 10 deletions(-) diff --git a/src/ImageSharp/PixelFormats/PixelBlenders/PorterDuffFunctions.Generated.cs b/src/ImageSharp/PixelFormats/PixelBlenders/PorterDuffFunctions.Generated.cs index 2b365f1779..5740a704ca 100644 --- a/src/ImageSharp/PixelFormats/PixelBlenders/PorterDuffFunctions.Generated.cs +++ b/src/ImageSharp/PixelFormats/PixelBlenders/PorterDuffFunctions.Generated.cs @@ -338,7 +338,7 @@ internal static partial class PorterDuffFunctions /// The source opacity. Range 0..1 /// The . [MethodImpl(MethodImplOptions.AggressiveInlining)] - public static Vector256 NormalXor(Vector256 backdrop, Vector256 source, Vector256 opacity) + public static Vector256 NormalClear(Vector256 backdrop, Vector256 source, Vector256 opacity) => Clear(backdrop, Avx.Blend(source, Avx.Multiply(source, opacity), BlendAlphaControl)); @@ -883,7 +883,7 @@ internal static partial class PorterDuffFunctions /// The source opacity. Range 0..1 /// The . [MethodImpl(MethodImplOptions.AggressiveInlining)] - public static Vector256 MultiplyXor(Vector256 backdrop, Vector256 source, Vector256 opacity) + public static Vector256 MultiplyClear(Vector256 backdrop, Vector256 source, Vector256 opacity) => Clear(backdrop, Avx.Blend(source, Avx.Multiply(source, opacity), BlendAlphaControl)); @@ -1428,7 +1428,7 @@ internal static partial class PorterDuffFunctions /// The source opacity. Range 0..1 /// The . [MethodImpl(MethodImplOptions.AggressiveInlining)] - public static Vector256 AddXor(Vector256 backdrop, Vector256 source, Vector256 opacity) + public static Vector256 AddClear(Vector256 backdrop, Vector256 source, Vector256 opacity) => Clear(backdrop, Avx.Blend(source, Avx.Multiply(source, opacity), BlendAlphaControl)); @@ -1973,7 +1973,7 @@ internal static partial class PorterDuffFunctions /// The source opacity. Range 0..1 /// The . [MethodImpl(MethodImplOptions.AggressiveInlining)] - public static Vector256 SubtractXor(Vector256 backdrop, Vector256 source, Vector256 opacity) + public static Vector256 SubtractClear(Vector256 backdrop, Vector256 source, Vector256 opacity) => Clear(backdrop, Avx.Blend(source, Avx.Multiply(source, opacity), BlendAlphaControl)); @@ -2518,7 +2518,7 @@ internal static partial class PorterDuffFunctions /// The source opacity. Range 0..1 /// The . [MethodImpl(MethodImplOptions.AggressiveInlining)] - public static Vector256 ScreenXor(Vector256 backdrop, Vector256 source, Vector256 opacity) + public static Vector256 ScreenClear(Vector256 backdrop, Vector256 source, Vector256 opacity) => Clear(backdrop, Avx.Blend(source, Avx.Multiply(source, opacity), BlendAlphaControl)); @@ -3063,7 +3063,7 @@ internal static partial class PorterDuffFunctions /// The source opacity. Range 0..1 /// The . [MethodImpl(MethodImplOptions.AggressiveInlining)] - public static Vector256 DarkenXor(Vector256 backdrop, Vector256 source, Vector256 opacity) + public static Vector256 DarkenClear(Vector256 backdrop, Vector256 source, Vector256 opacity) => Clear(backdrop, Avx.Blend(source, Avx.Multiply(source, opacity), BlendAlphaControl)); @@ -3608,7 +3608,7 @@ internal static partial class PorterDuffFunctions /// The source opacity. Range 0..1 /// The . [MethodImpl(MethodImplOptions.AggressiveInlining)] - public static Vector256 LightenXor(Vector256 backdrop, Vector256 source, Vector256 opacity) + public static Vector256 LightenClear(Vector256 backdrop, Vector256 source, Vector256 opacity) => Clear(backdrop, Avx.Blend(source, Avx.Multiply(source, opacity), BlendAlphaControl)); @@ -4153,7 +4153,7 @@ internal static partial class PorterDuffFunctions /// The source opacity. Range 0..1 /// The . [MethodImpl(MethodImplOptions.AggressiveInlining)] - public static Vector256 OverlayXor(Vector256 backdrop, Vector256 source, Vector256 opacity) + public static Vector256 OverlayClear(Vector256 backdrop, Vector256 source, Vector256 opacity) => Clear(backdrop, Avx.Blend(source, Avx.Multiply(source, opacity), BlendAlphaControl)); @@ -4698,7 +4698,7 @@ internal static partial class PorterDuffFunctions /// The source opacity. Range 0..1 /// The . [MethodImpl(MethodImplOptions.AggressiveInlining)] - public static Vector256 HardLightXor(Vector256 backdrop, Vector256 source, Vector256 opacity) + public static Vector256 HardLightClear(Vector256 backdrop, Vector256 source, Vector256 opacity) => Clear(backdrop, Avx.Blend(source, Avx.Multiply(source, opacity), BlendAlphaControl)); diff --git a/src/ImageSharp/PixelFormats/PixelBlenders/PorterDuffFunctions.Generated.tt b/src/ImageSharp/PixelFormats/PixelBlenders/PorterDuffFunctions.Generated.tt index 5baa1e8641..34eeb78cbe 100644 --- a/src/ImageSharp/PixelFormats/PixelBlenders/PorterDuffFunctions.Generated.tt +++ b/src/ImageSharp/PixelFormats/PixelBlenders/PorterDuffFunctions.Generated.tt @@ -348,7 +348,7 @@ internal static partial class PorterDuffFunctions /// The source opacity. Range 0..1 /// The . [MethodImpl(MethodImplOptions.AggressiveInlining)] - public static Vector256 <#=blender#>Xor(Vector256 backdrop, Vector256 source, Vector256 opacity) + public static Vector256 <#=blender#>Clear(Vector256 backdrop, Vector256 source, Vector256 opacity) => Clear(backdrop, Avx.Blend(source, Avx.Multiply(source, opacity), BlendAlphaControl)); <#} #> From 9f8bcc464db227bfc570c98ea6d23583f38fd4a7 Mon Sep 17 00:00:00 2001 From: James Jackson-South Date: Sun, 19 Feb 2023 14:41:55 +1000 Subject: [PATCH 07/35] Respond to feedback --- src/ImageSharp/Common/Constants.cs | 7 ------- 1 file changed, 7 deletions(-) diff --git a/src/ImageSharp/Common/Constants.cs b/src/ImageSharp/Common/Constants.cs index a3cfe3623f..d4640f133f 100644 --- a/src/ImageSharp/Common/Constants.cs +++ b/src/ImageSharp/Common/Constants.cs @@ -1,8 +1,6 @@ // Copyright (c) Six Labors. // Licensed under the Six Labors Split License. -using System.Runtime.Intrinsics; - namespace SixLabors.ImageSharp; /// @@ -15,11 +13,6 @@ internal static class Constants /// public static readonly float Epsilon = 0.001F; - /// - /// The epsilon value for comparing floating point numbers. - /// - public static readonly Vector256 Epsilon256 = Vector256.Create(0.001F); - /// /// The epsilon squared value for comparing floating point numbers. /// From 5fedca864782a3e23ea86d5a37d416a94e922559 Mon Sep 17 00:00:00 2001 From: James Jackson-South Date: Sun, 19 Feb 2023 14:42:13 +1000 Subject: [PATCH 08/35] Respond to feedback --- .../PixelBlenders/PorterDuffFunctions.cs | 41 ++++++++++--------- 1 file changed, 22 insertions(+), 19 deletions(-) diff --git a/src/ImageSharp/PixelFormats/PixelBlenders/PorterDuffFunctions.cs b/src/ImageSharp/PixelFormats/PixelBlenders/PorterDuffFunctions.cs index 551f17f209..3fe375344e 100644 --- a/src/ImageSharp/PixelFormats/PixelBlenders/PorterDuffFunctions.cs +++ b/src/ImageSharp/PixelFormats/PixelBlenders/PorterDuffFunctions.cs @@ -23,9 +23,6 @@ internal static partial class PorterDuffFunctions { private const int BlendAlphaControl = 0b_10_00_10_00; private const int ShuffleAlphaControl = 0b_11_11_11_11; - private static readonly Vector256 Vector256Half = Vector256.Create(0.5F); - private static readonly Vector256 Vector256One = Vector256.Create(1F); - private static readonly Vector256 Vector256Two = Vector256.Create(2F); /// /// Returns the result of the "Normal" compositing equation. @@ -85,7 +82,7 @@ internal static partial class PorterDuffFunctions /// The . [MethodImpl(MethodImplOptions.AggressiveInlining)] public static Vector256 Add(Vector256 backdrop, Vector256 source) - => Avx.Min(Vector256One, Avx.Add(backdrop, source)); + => Avx.Min(Vector256.Create(1F), Avx.Add(backdrop, source)); /// /// Returns the result of the "Subtract" compositing equation. @@ -105,7 +102,7 @@ internal static partial class PorterDuffFunctions /// The . [MethodImpl(MethodImplOptions.AggressiveInlining)] public static Vector256 Subtract(Vector256 backdrop, Vector256 source) - => Avx.Min(Vector256One, Avx.Subtract(backdrop, source)); + => Avx.Min(Vector256.Create(1F), Avx.Subtract(backdrop, source)); /// /// Returns the result of the "Screen" compositing equation. @@ -125,7 +122,10 @@ internal static partial class PorterDuffFunctions /// The . [MethodImpl(MethodImplOptions.AggressiveInlining)] public static Vector256 Screen(Vector256 backdrop, Vector256 source) - => Avx.Subtract(Vector256One, Avx.Multiply(Avx.Subtract(Vector256One, backdrop), Avx.Subtract(Vector256One, source))); + { + Vector256 vOne = Vector256.Create(1F); + return Avx.Subtract(vOne, Avx.Multiply(Avx.Subtract(vOne, backdrop), Avx.Subtract(vOne, source))); + } /// /// Returns the result of the "Darken" compositing equation. @@ -192,7 +192,7 @@ internal static partial class PorterDuffFunctions public static Vector256 Overlay(Vector256 backdrop, Vector256 source) { Vector256 color = OverlayValueFunction(backdrop, source); - return Avx.Min(Vector256One, Avx.Blend(color, Vector256.Zero, BlendAlphaControl)); + return Avx.Min(Vector256.Create(1F), Avx.Blend(color, Vector256.Zero, BlendAlphaControl)); } /// @@ -221,7 +221,7 @@ internal static partial class PorterDuffFunctions public static Vector256 HardLight(Vector256 backdrop, Vector256 source) { Vector256 color = OverlayValueFunction(source, backdrop); - return Avx.Min(Vector256One, Avx.Blend(color, Vector256.Zero, BlendAlphaControl)); + return Avx.Min(Vector256.Create(1F), Avx.Blend(color, Vector256.Zero, BlendAlphaControl)); } /// @@ -243,10 +243,12 @@ internal static partial class PorterDuffFunctions [MethodImpl(MethodImplOptions.AggressiveInlining)] public static Vector256 OverlayValueFunction(Vector256 backdrop, Vector256 source) { - Vector256 left = Avx.Multiply(Avx.Multiply(Vector256Two, backdrop), source); - Vector256 right = Avx.Subtract(Vector256One, Avx.Multiply(Avx.Multiply(Vector256Two, Avx.Subtract(Vector256One, source)), Avx.Subtract(Vector256One, backdrop))); + Vector256 vOne = Vector256.Create(1F); + Vector256 vTwo = Vector256.Create(2F); + Vector256 left = Avx.Multiply(Avx.Add(backdrop, backdrop), source); + Vector256 right = Avx.Subtract(vOne, Avx.Multiply(Avx.Multiply(vTwo, Avx.Subtract(vOne, source)), Avx.Subtract(vOne, backdrop))); - Vector256 cmp = Avx.CompareGreaterThan(backdrop, Vector256Half); + Vector256 cmp = Avx.CompareGreaterThan(backdrop, Vector256.Create(.5F)); return Avx.BlendVariable(left, right, cmp); } @@ -305,7 +307,7 @@ internal static partial class PorterDuffFunctions color = SimdUtils.HwIntrinsics.MultiplyAdd(blend, blendW, color); // unpremultiply - color = Avx.Divide(color, Avx.Max(alpha, Constants.Epsilon256)); + color = Avx.Divide(color, Avx.Max(alpha, Vector256.Create(Constants.Epsilon))); return Avx.Blend(color, alpha, BlendAlphaControl); } @@ -358,7 +360,7 @@ internal static partial class PorterDuffFunctions Vector256 color = SimdUtils.HwIntrinsics.MultiplyAdd(destination, dstW, Avx.Multiply(blend, blendW)); // unpremultiply - color = Avx.Divide(color, Avx.Max(alpha, Constants.Epsilon256)); + color = Avx.Divide(color, Avx.Max(alpha, Vector256.Create(Constants.Epsilon))); return Avx.Blend(color, alpha, BlendAlphaControl); } @@ -398,7 +400,7 @@ internal static partial class PorterDuffFunctions Vector256 color = Avx.Multiply(source, alpha); // unpremultiply - color = Avx.Divide(color, Avx.Max(alpha, Constants.Epsilon256)); + color = Avx.Divide(color, Avx.Max(alpha, Vector256.Create(Constants.Epsilon))); return Avx.Blend(color, alpha, BlendAlphaControl); } @@ -432,13 +434,13 @@ internal static partial class PorterDuffFunctions // calculate alpha Vector256 sW = Avx.Shuffle(source, source, ShuffleAlphaControl); Vector256 dW = Avx.Shuffle(destination, destination, ShuffleAlphaControl); - Vector256 alpha = Avx.Multiply(Avx.Subtract(Vector256One, dW), sW); + Vector256 alpha = Avx.Multiply(Avx.Subtract(Vector256.Create(1F), dW), sW); // premultiply Vector256 color = Avx.Multiply(source, alpha); // unpremultiply - color = Avx.Divide(color, Avx.Max(alpha, Constants.Epsilon256)); + color = Avx.Divide(color, Avx.Max(alpha, Vector256.Create(Constants.Epsilon))); return Avx.Blend(color, alpha, BlendAlphaControl); } @@ -477,15 +479,16 @@ internal static partial class PorterDuffFunctions Vector256 sW = Avx.Shuffle(source, source, ShuffleAlphaControl); Vector256 dW = Avx.Shuffle(destination, destination, ShuffleAlphaControl); - Vector256 srcW = Avx.Subtract(Vector256One, dW); - Vector256 dstW = Avx.Subtract(Vector256One, sW); + Vector256 vOne = Vector256.Create(1F); + Vector256 srcW = Avx.Subtract(vOne, dW); + Vector256 dstW = Avx.Subtract(vOne, sW); // calculate alpha Vector256 alpha = SimdUtils.HwIntrinsics.MultiplyAdd(sW, srcW, Avx.Multiply(dW, dstW)); Vector256 color = SimdUtils.HwIntrinsics.MultiplyAdd(Avx.Multiply(sW, source), srcW, Avx.Multiply(Avx.Multiply(dW, destination), dstW)); // unpremultiply - color = Avx.Divide(color, Avx.Max(alpha, Constants.Epsilon256)); + color = Avx.Divide(color, Avx.Max(alpha, Vector256.Create(Constants.Epsilon))); return Avx.Blend(color, alpha, BlendAlphaControl); } From 907400f2dc426a2f709981dfdf1b47cabca79228 Mon Sep 17 00:00:00 2001 From: James Jackson-South Date: Sun, 19 Feb 2023 14:54:42 +1000 Subject: [PATCH 09/35] Use Permute --- .../PixelFormats/PixelBlenders/PorterDuffFunctions.cs | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/ImageSharp/PixelFormats/PixelBlenders/PorterDuffFunctions.cs b/src/ImageSharp/PixelFormats/PixelBlenders/PorterDuffFunctions.cs index 3fe375344e..183f4b59f5 100644 --- a/src/ImageSharp/PixelFormats/PixelBlenders/PorterDuffFunctions.cs +++ b/src/ImageSharp/PixelFormats/PixelBlenders/PorterDuffFunctions.cs @@ -291,8 +291,8 @@ internal static partial class PorterDuffFunctions public static Vector256 Over(Vector256 destination, Vector256 source, Vector256 blend) { // calculate weights - Vector256 sW = Avx.Shuffle(source, source, ShuffleAlphaControl); - Vector256 dW = Avx.Shuffle(destination, destination, ShuffleAlphaControl); + Vector256 sW = Avx.Permute(source, ShuffleAlphaControl); + Vector256 dW = Avx.Permute(destination, ShuffleAlphaControl); Vector256 blendW = Avx.Multiply(sW, dW); Vector256 dstW = Avx.Subtract(dW, blendW); From 9a552f17ece1a35d07794351c57904c5d8d6b505 Mon Sep 17 00:00:00 2001 From: James Jackson-South Date: Sun, 19 Feb 2023 14:59:34 +1000 Subject: [PATCH 10/35] Revert "Use Permute" This reverts commit 907400f2dc426a2f709981dfdf1b47cabca79228. --- .../PixelFormats/PixelBlenders/PorterDuffFunctions.cs | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/ImageSharp/PixelFormats/PixelBlenders/PorterDuffFunctions.cs b/src/ImageSharp/PixelFormats/PixelBlenders/PorterDuffFunctions.cs index 183f4b59f5..3fe375344e 100644 --- a/src/ImageSharp/PixelFormats/PixelBlenders/PorterDuffFunctions.cs +++ b/src/ImageSharp/PixelFormats/PixelBlenders/PorterDuffFunctions.cs @@ -291,8 +291,8 @@ internal static partial class PorterDuffFunctions public static Vector256 Over(Vector256 destination, Vector256 source, Vector256 blend) { // calculate weights - Vector256 sW = Avx.Permute(source, ShuffleAlphaControl); - Vector256 dW = Avx.Permute(destination, ShuffleAlphaControl); + Vector256 sW = Avx.Shuffle(source, source, ShuffleAlphaControl); + Vector256 dW = Avx.Shuffle(destination, destination, ShuffleAlphaControl); Vector256 blendW = Avx.Multiply(sW, dW); Vector256 dstW = Avx.Subtract(dW, blendW); From bde9324f17e4367cf94114244191ae66cfa831a7 Mon Sep 17 00:00:00 2001 From: James Jackson-South Date: Sun, 19 Feb 2023 15:58:47 +1000 Subject: [PATCH 11/35] Use Permute --- .../PixelFormats/PixelBlenders/PorterDuffFunctions.cs | 8 +++----- 1 file changed, 3 insertions(+), 5 deletions(-) diff --git a/src/ImageSharp/PixelFormats/PixelBlenders/PorterDuffFunctions.cs b/src/ImageSharp/PixelFormats/PixelBlenders/PorterDuffFunctions.cs index 3fe375344e..fa497c8726 100644 --- a/src/ImageSharp/PixelFormats/PixelBlenders/PorterDuffFunctions.cs +++ b/src/ImageSharp/PixelFormats/PixelBlenders/PorterDuffFunctions.cs @@ -291,8 +291,8 @@ internal static partial class PorterDuffFunctions public static Vector256 Over(Vector256 destination, Vector256 source, Vector256 blend) { // calculate weights - Vector256 sW = Avx.Shuffle(source, source, ShuffleAlphaControl); - Vector256 dW = Avx.Shuffle(destination, destination, ShuffleAlphaControl); + Vector256 sW = Avx.Permute(source, ShuffleAlphaControl); + Vector256 dW = Avx.Permute(destination, ShuffleAlphaControl); Vector256 blendW = Avx.Multiply(sW, dW); Vector256 dstW = Avx.Subtract(dW, blendW); @@ -392,9 +392,7 @@ internal static partial class PorterDuffFunctions public static Vector256 In(Vector256 destination, Vector256 source) { // calculate alpha - Vector256 sW = Avx.Shuffle(source, source, ShuffleAlphaControl); - Vector256 dW = Avx.Shuffle(destination, destination, ShuffleAlphaControl); - Vector256 alpha = Avx.Multiply(sW, dW); + Vector256 alpha = Avx.Permute(Avx.Multiply(source, destination), ShuffleAlphaControl); // premultiply Vector256 color = Avx.Multiply(source, alpha); From 41cfa9b1fb9fa9e3c80a67433263124a2470389f Mon Sep 17 00:00:00 2001 From: James Jackson-South Date: Sun, 19 Feb 2023 15:59:10 +1000 Subject: [PATCH 12/35] Port DefaultPixelBlenders --- .../DefaultPixelBlenders.Generated.cs | 7470 ++++++++++++++++- .../DefaultPixelBlenders.Generated.tt | 73 +- 2 files changed, 7100 insertions(+), 443 deletions(-) diff --git a/src/ImageSharp/PixelFormats/PixelBlenders/DefaultPixelBlenders.Generated.cs b/src/ImageSharp/PixelFormats/PixelBlenders/DefaultPixelBlenders.Generated.cs index cf19101211..c2d97efa0a 100644 --- a/src/ImageSharp/PixelFormats/PixelBlenders/DefaultPixelBlenders.Generated.cs +++ b/src/ImageSharp/PixelFormats/PixelBlenders/DefaultPixelBlenders.Generated.cs @@ -3,6 +3,10 @@ // using System.Numerics; +using System.Runtime.CompilerServices; +using System.Runtime.InteropServices; +using System.Runtime.Intrinsics; +using System.Runtime.Intrinsics.X86; namespace SixLabors.ImageSharp.PixelFormats.PixelBlenders; @@ -43,18 +47,79 @@ internal static class DefaultPixelBlenders protected override void BlendFunction(Span destination, ReadOnlySpan background, ReadOnlySpan source, float amount) { amount = Numerics.Clamp(amount, 0, 1); - for (int i = 0; i < destination.Length; i++) + + if (Avx2.IsSupported && destination.Length >= 2) + { + // Divide by 2 as 4 elements per Vector4 and 8 per Vector256 + ref Vector256 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); + ref Vector256 destinationLast = ref Unsafe.Add(ref destinationBase, (IntPtr)((uint)destination.Length / 2u)); + + ref Vector256 backgroundBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(background)); + ref Vector256 sourceBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(source)); + Vector256 opacity = Vector256.Create(amount); + + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) + { + destinationBase = PorterDuffFunctions.NormalSrc(backgroundBase, sourceBase, opacity); + destinationBase = ref Unsafe.Add(ref destinationBase, 1); + backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); + sourceBase = ref Unsafe.Add(ref sourceBase, 1); + } + + if (Numerics.Modulo2(destination.Length) != 0) + { + // Vector4 fits neatly in pairs. Any overlap has to be equal to 1. + int i = destination.Length - 1; + destination[i] = PorterDuffFunctions.NormalSrc(background[i], source[i], amount); + } + } + else { - destination[i] = PorterDuffFunctions.NormalSrc(background[i], source[i], amount); + for (int i = 0; i < destination.Length; i++) + { + destination[i] = PorterDuffFunctions.NormalSrc(background[i], source[i], amount); + } } } /// protected override void BlendFunction(Span destination, ReadOnlySpan background, ReadOnlySpan source, ReadOnlySpan amount) { - for (int i = 0; i < destination.Length; i++) + if (Avx2.IsSupported && destination.Length >= 2) + { + // Divide by 2 as 4 elements per Vector4 and 8 per Vector256 + ref Vector256 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); + ref Vector256 destinationLast = ref Unsafe.Add(ref destinationBase, (IntPtr)((uint)destination.Length / 2u)); + + ref Vector256 backgroundBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(background)); + ref Vector256 sourceBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(source)); + ref float amountBase = ref MemoryMarshal.GetReference(amount); + + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) + { + // TODO: It would be better if we can clamp this outside of the loop using our SIMD methods. + Vector256 opacity = Vector256.Create(Numerics.Clamp(amountBase, 0, 1F)); + + destinationBase = PorterDuffFunctions.NormalSrc(backgroundBase, sourceBase, opacity); + destinationBase = ref Unsafe.Add(ref destinationBase, 1); + backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); + sourceBase = ref Unsafe.Add(ref sourceBase, 1); + amountBase = ref Unsafe.Add(ref amountBase, 1); + } + + if (Numerics.Modulo2(destination.Length) != 0) + { + // Vector4 fits neatly in pairs. Any overlap has to be equal to 1. + int i = destination.Length - 1; + destination[i] = PorterDuffFunctions.NormalSrc(background[i], source[i], Numerics.Clamp(amount[i], 0, 1F)); + } + } + else { - destination[i] = PorterDuffFunctions.NormalSrc(background[i], source[i], Numerics.Clamp(amount[i], 0, 1)); + for (int i = 0; i < destination.Length; i++) + { + destination[i] = PorterDuffFunctions.NormalSrc(background[i], source[i], Numerics.Clamp(amount[i], 0, 1F)); + } } } } @@ -81,18 +146,79 @@ internal static class DefaultPixelBlenders protected override void BlendFunction(Span destination, ReadOnlySpan background, ReadOnlySpan source, float amount) { amount = Numerics.Clamp(amount, 0, 1); - for (int i = 0; i < destination.Length; i++) + + if (Avx2.IsSupported && destination.Length >= 2) + { + // Divide by 2 as 4 elements per Vector4 and 8 per Vector256 + ref Vector256 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); + ref Vector256 destinationLast = ref Unsafe.Add(ref destinationBase, (IntPtr)((uint)destination.Length / 2u)); + + ref Vector256 backgroundBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(background)); + ref Vector256 sourceBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(source)); + Vector256 opacity = Vector256.Create(amount); + + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) + { + destinationBase = PorterDuffFunctions.MultiplySrc(backgroundBase, sourceBase, opacity); + destinationBase = ref Unsafe.Add(ref destinationBase, 1); + backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); + sourceBase = ref Unsafe.Add(ref sourceBase, 1); + } + + if (Numerics.Modulo2(destination.Length) != 0) + { + // Vector4 fits neatly in pairs. Any overlap has to be equal to 1. + int i = destination.Length - 1; + destination[i] = PorterDuffFunctions.MultiplySrc(background[i], source[i], amount); + } + } + else { - destination[i] = PorterDuffFunctions.MultiplySrc(background[i], source[i], amount); + for (int i = 0; i < destination.Length; i++) + { + destination[i] = PorterDuffFunctions.MultiplySrc(background[i], source[i], amount); + } } } /// protected override void BlendFunction(Span destination, ReadOnlySpan background, ReadOnlySpan source, ReadOnlySpan amount) { - for (int i = 0; i < destination.Length; i++) + if (Avx2.IsSupported && destination.Length >= 2) + { + // Divide by 2 as 4 elements per Vector4 and 8 per Vector256 + ref Vector256 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); + ref Vector256 destinationLast = ref Unsafe.Add(ref destinationBase, (IntPtr)((uint)destination.Length / 2u)); + + ref Vector256 backgroundBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(background)); + ref Vector256 sourceBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(source)); + ref float amountBase = ref MemoryMarshal.GetReference(amount); + + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) + { + // TODO: It would be better if we can clamp this outside of the loop using our SIMD methods. + Vector256 opacity = Vector256.Create(Numerics.Clamp(amountBase, 0, 1F)); + + destinationBase = PorterDuffFunctions.MultiplySrc(backgroundBase, sourceBase, opacity); + destinationBase = ref Unsafe.Add(ref destinationBase, 1); + backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); + sourceBase = ref Unsafe.Add(ref sourceBase, 1); + amountBase = ref Unsafe.Add(ref amountBase, 1); + } + + if (Numerics.Modulo2(destination.Length) != 0) + { + // Vector4 fits neatly in pairs. Any overlap has to be equal to 1. + int i = destination.Length - 1; + destination[i] = PorterDuffFunctions.MultiplySrc(background[i], source[i], Numerics.Clamp(amount[i], 0, 1F)); + } + } + else { - destination[i] = PorterDuffFunctions.MultiplySrc(background[i], source[i], Numerics.Clamp(amount[i], 0, 1)); + for (int i = 0; i < destination.Length; i++) + { + destination[i] = PorterDuffFunctions.MultiplySrc(background[i], source[i], Numerics.Clamp(amount[i], 0, 1F)); + } } } } @@ -119,18 +245,79 @@ internal static class DefaultPixelBlenders protected override void BlendFunction(Span destination, ReadOnlySpan background, ReadOnlySpan source, float amount) { amount = Numerics.Clamp(amount, 0, 1); - for (int i = 0; i < destination.Length; i++) + + if (Avx2.IsSupported && destination.Length >= 2) + { + // Divide by 2 as 4 elements per Vector4 and 8 per Vector256 + ref Vector256 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); + ref Vector256 destinationLast = ref Unsafe.Add(ref destinationBase, (IntPtr)((uint)destination.Length / 2u)); + + ref Vector256 backgroundBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(background)); + ref Vector256 sourceBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(source)); + Vector256 opacity = Vector256.Create(amount); + + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) + { + destinationBase = PorterDuffFunctions.AddSrc(backgroundBase, sourceBase, opacity); + destinationBase = ref Unsafe.Add(ref destinationBase, 1); + backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); + sourceBase = ref Unsafe.Add(ref sourceBase, 1); + } + + if (Numerics.Modulo2(destination.Length) != 0) + { + // Vector4 fits neatly in pairs. Any overlap has to be equal to 1. + int i = destination.Length - 1; + destination[i] = PorterDuffFunctions.AddSrc(background[i], source[i], amount); + } + } + else { - destination[i] = PorterDuffFunctions.AddSrc(background[i], source[i], amount); + for (int i = 0; i < destination.Length; i++) + { + destination[i] = PorterDuffFunctions.AddSrc(background[i], source[i], amount); + } } } /// protected override void BlendFunction(Span destination, ReadOnlySpan background, ReadOnlySpan source, ReadOnlySpan amount) { - for (int i = 0; i < destination.Length; i++) + if (Avx2.IsSupported && destination.Length >= 2) + { + // Divide by 2 as 4 elements per Vector4 and 8 per Vector256 + ref Vector256 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); + ref Vector256 destinationLast = ref Unsafe.Add(ref destinationBase, (IntPtr)((uint)destination.Length / 2u)); + + ref Vector256 backgroundBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(background)); + ref Vector256 sourceBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(source)); + ref float amountBase = ref MemoryMarshal.GetReference(amount); + + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) + { + // TODO: It would be better if we can clamp this outside of the loop using our SIMD methods. + Vector256 opacity = Vector256.Create(Numerics.Clamp(amountBase, 0, 1F)); + + destinationBase = PorterDuffFunctions.AddSrc(backgroundBase, sourceBase, opacity); + destinationBase = ref Unsafe.Add(ref destinationBase, 1); + backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); + sourceBase = ref Unsafe.Add(ref sourceBase, 1); + amountBase = ref Unsafe.Add(ref amountBase, 1); + } + + if (Numerics.Modulo2(destination.Length) != 0) + { + // Vector4 fits neatly in pairs. Any overlap has to be equal to 1. + int i = destination.Length - 1; + destination[i] = PorterDuffFunctions.AddSrc(background[i], source[i], Numerics.Clamp(amount[i], 0, 1F)); + } + } + else { - destination[i] = PorterDuffFunctions.AddSrc(background[i], source[i], Numerics.Clamp(amount[i], 0, 1)); + for (int i = 0; i < destination.Length; i++) + { + destination[i] = PorterDuffFunctions.AddSrc(background[i], source[i], Numerics.Clamp(amount[i], 0, 1F)); + } } } } @@ -157,18 +344,79 @@ internal static class DefaultPixelBlenders protected override void BlendFunction(Span destination, ReadOnlySpan background, ReadOnlySpan source, float amount) { amount = Numerics.Clamp(amount, 0, 1); - for (int i = 0; i < destination.Length; i++) + + if (Avx2.IsSupported && destination.Length >= 2) + { + // Divide by 2 as 4 elements per Vector4 and 8 per Vector256 + ref Vector256 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); + ref Vector256 destinationLast = ref Unsafe.Add(ref destinationBase, (IntPtr)((uint)destination.Length / 2u)); + + ref Vector256 backgroundBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(background)); + ref Vector256 sourceBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(source)); + Vector256 opacity = Vector256.Create(amount); + + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) + { + destinationBase = PorterDuffFunctions.SubtractSrc(backgroundBase, sourceBase, opacity); + destinationBase = ref Unsafe.Add(ref destinationBase, 1); + backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); + sourceBase = ref Unsafe.Add(ref sourceBase, 1); + } + + if (Numerics.Modulo2(destination.Length) != 0) + { + // Vector4 fits neatly in pairs. Any overlap has to be equal to 1. + int i = destination.Length - 1; + destination[i] = PorterDuffFunctions.SubtractSrc(background[i], source[i], amount); + } + } + else { - destination[i] = PorterDuffFunctions.SubtractSrc(background[i], source[i], amount); + for (int i = 0; i < destination.Length; i++) + { + destination[i] = PorterDuffFunctions.SubtractSrc(background[i], source[i], amount); + } } } /// protected override void BlendFunction(Span destination, ReadOnlySpan background, ReadOnlySpan source, ReadOnlySpan amount) { - for (int i = 0; i < destination.Length; i++) + if (Avx2.IsSupported && destination.Length >= 2) + { + // Divide by 2 as 4 elements per Vector4 and 8 per Vector256 + ref Vector256 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); + ref Vector256 destinationLast = ref Unsafe.Add(ref destinationBase, (IntPtr)((uint)destination.Length / 2u)); + + ref Vector256 backgroundBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(background)); + ref Vector256 sourceBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(source)); + ref float amountBase = ref MemoryMarshal.GetReference(amount); + + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) + { + // TODO: It would be better if we can clamp this outside of the loop using our SIMD methods. + Vector256 opacity = Vector256.Create(Numerics.Clamp(amountBase, 0, 1F)); + + destinationBase = PorterDuffFunctions.SubtractSrc(backgroundBase, sourceBase, opacity); + destinationBase = ref Unsafe.Add(ref destinationBase, 1); + backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); + sourceBase = ref Unsafe.Add(ref sourceBase, 1); + amountBase = ref Unsafe.Add(ref amountBase, 1); + } + + if (Numerics.Modulo2(destination.Length) != 0) + { + // Vector4 fits neatly in pairs. Any overlap has to be equal to 1. + int i = destination.Length - 1; + destination[i] = PorterDuffFunctions.SubtractSrc(background[i], source[i], Numerics.Clamp(amount[i], 0, 1F)); + } + } + else { - destination[i] = PorterDuffFunctions.SubtractSrc(background[i], source[i], Numerics.Clamp(amount[i], 0, 1)); + for (int i = 0; i < destination.Length; i++) + { + destination[i] = PorterDuffFunctions.SubtractSrc(background[i], source[i], Numerics.Clamp(amount[i], 0, 1F)); + } } } } @@ -195,18 +443,79 @@ internal static class DefaultPixelBlenders protected override void BlendFunction(Span destination, ReadOnlySpan background, ReadOnlySpan source, float amount) { amount = Numerics.Clamp(amount, 0, 1); - for (int i = 0; i < destination.Length; i++) + + if (Avx2.IsSupported && destination.Length >= 2) + { + // Divide by 2 as 4 elements per Vector4 and 8 per Vector256 + ref Vector256 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); + ref Vector256 destinationLast = ref Unsafe.Add(ref destinationBase, (IntPtr)((uint)destination.Length / 2u)); + + ref Vector256 backgroundBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(background)); + ref Vector256 sourceBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(source)); + Vector256 opacity = Vector256.Create(amount); + + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) + { + destinationBase = PorterDuffFunctions.ScreenSrc(backgroundBase, sourceBase, opacity); + destinationBase = ref Unsafe.Add(ref destinationBase, 1); + backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); + sourceBase = ref Unsafe.Add(ref sourceBase, 1); + } + + if (Numerics.Modulo2(destination.Length) != 0) + { + // Vector4 fits neatly in pairs. Any overlap has to be equal to 1. + int i = destination.Length - 1; + destination[i] = PorterDuffFunctions.ScreenSrc(background[i], source[i], amount); + } + } + else { - destination[i] = PorterDuffFunctions.ScreenSrc(background[i], source[i], amount); + for (int i = 0; i < destination.Length; i++) + { + destination[i] = PorterDuffFunctions.ScreenSrc(background[i], source[i], amount); + } } } /// protected override void BlendFunction(Span destination, ReadOnlySpan background, ReadOnlySpan source, ReadOnlySpan amount) { - for (int i = 0; i < destination.Length; i++) + if (Avx2.IsSupported && destination.Length >= 2) + { + // Divide by 2 as 4 elements per Vector4 and 8 per Vector256 + ref Vector256 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); + ref Vector256 destinationLast = ref Unsafe.Add(ref destinationBase, (IntPtr)((uint)destination.Length / 2u)); + + ref Vector256 backgroundBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(background)); + ref Vector256 sourceBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(source)); + ref float amountBase = ref MemoryMarshal.GetReference(amount); + + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) + { + // TODO: It would be better if we can clamp this outside of the loop using our SIMD methods. + Vector256 opacity = Vector256.Create(Numerics.Clamp(amountBase, 0, 1F)); + + destinationBase = PorterDuffFunctions.ScreenSrc(backgroundBase, sourceBase, opacity); + destinationBase = ref Unsafe.Add(ref destinationBase, 1); + backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); + sourceBase = ref Unsafe.Add(ref sourceBase, 1); + amountBase = ref Unsafe.Add(ref amountBase, 1); + } + + if (Numerics.Modulo2(destination.Length) != 0) + { + // Vector4 fits neatly in pairs. Any overlap has to be equal to 1. + int i = destination.Length - 1; + destination[i] = PorterDuffFunctions.ScreenSrc(background[i], source[i], Numerics.Clamp(amount[i], 0, 1F)); + } + } + else { - destination[i] = PorterDuffFunctions.ScreenSrc(background[i], source[i], Numerics.Clamp(amount[i], 0, 1)); + for (int i = 0; i < destination.Length; i++) + { + destination[i] = PorterDuffFunctions.ScreenSrc(background[i], source[i], Numerics.Clamp(amount[i], 0, 1F)); + } } } } @@ -233,18 +542,79 @@ internal static class DefaultPixelBlenders protected override void BlendFunction(Span destination, ReadOnlySpan background, ReadOnlySpan source, float amount) { amount = Numerics.Clamp(amount, 0, 1); - for (int i = 0; i < destination.Length; i++) + + if (Avx2.IsSupported && destination.Length >= 2) + { + // Divide by 2 as 4 elements per Vector4 and 8 per Vector256 + ref Vector256 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); + ref Vector256 destinationLast = ref Unsafe.Add(ref destinationBase, (IntPtr)((uint)destination.Length / 2u)); + + ref Vector256 backgroundBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(background)); + ref Vector256 sourceBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(source)); + Vector256 opacity = Vector256.Create(amount); + + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) + { + destinationBase = PorterDuffFunctions.DarkenSrc(backgroundBase, sourceBase, opacity); + destinationBase = ref Unsafe.Add(ref destinationBase, 1); + backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); + sourceBase = ref Unsafe.Add(ref sourceBase, 1); + } + + if (Numerics.Modulo2(destination.Length) != 0) + { + // Vector4 fits neatly in pairs. Any overlap has to be equal to 1. + int i = destination.Length - 1; + destination[i] = PorterDuffFunctions.DarkenSrc(background[i], source[i], amount); + } + } + else { - destination[i] = PorterDuffFunctions.DarkenSrc(background[i], source[i], amount); + for (int i = 0; i < destination.Length; i++) + { + destination[i] = PorterDuffFunctions.DarkenSrc(background[i], source[i], amount); + } } } /// protected override void BlendFunction(Span destination, ReadOnlySpan background, ReadOnlySpan source, ReadOnlySpan amount) { - for (int i = 0; i < destination.Length; i++) + if (Avx2.IsSupported && destination.Length >= 2) + { + // Divide by 2 as 4 elements per Vector4 and 8 per Vector256 + ref Vector256 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); + ref Vector256 destinationLast = ref Unsafe.Add(ref destinationBase, (IntPtr)((uint)destination.Length / 2u)); + + ref Vector256 backgroundBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(background)); + ref Vector256 sourceBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(source)); + ref float amountBase = ref MemoryMarshal.GetReference(amount); + + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) + { + // TODO: It would be better if we can clamp this outside of the loop using our SIMD methods. + Vector256 opacity = Vector256.Create(Numerics.Clamp(amountBase, 0, 1F)); + + destinationBase = PorterDuffFunctions.DarkenSrc(backgroundBase, sourceBase, opacity); + destinationBase = ref Unsafe.Add(ref destinationBase, 1); + backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); + sourceBase = ref Unsafe.Add(ref sourceBase, 1); + amountBase = ref Unsafe.Add(ref amountBase, 1); + } + + if (Numerics.Modulo2(destination.Length) != 0) + { + // Vector4 fits neatly in pairs. Any overlap has to be equal to 1. + int i = destination.Length - 1; + destination[i] = PorterDuffFunctions.DarkenSrc(background[i], source[i], Numerics.Clamp(amount[i], 0, 1F)); + } + } + else { - destination[i] = PorterDuffFunctions.DarkenSrc(background[i], source[i], Numerics.Clamp(amount[i], 0, 1)); + for (int i = 0; i < destination.Length; i++) + { + destination[i] = PorterDuffFunctions.DarkenSrc(background[i], source[i], Numerics.Clamp(amount[i], 0, 1F)); + } } } } @@ -271,18 +641,79 @@ internal static class DefaultPixelBlenders protected override void BlendFunction(Span destination, ReadOnlySpan background, ReadOnlySpan source, float amount) { amount = Numerics.Clamp(amount, 0, 1); - for (int i = 0; i < destination.Length; i++) + + if (Avx2.IsSupported && destination.Length >= 2) + { + // Divide by 2 as 4 elements per Vector4 and 8 per Vector256 + ref Vector256 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); + ref Vector256 destinationLast = ref Unsafe.Add(ref destinationBase, (IntPtr)((uint)destination.Length / 2u)); + + ref Vector256 backgroundBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(background)); + ref Vector256 sourceBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(source)); + Vector256 opacity = Vector256.Create(amount); + + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) + { + destinationBase = PorterDuffFunctions.LightenSrc(backgroundBase, sourceBase, opacity); + destinationBase = ref Unsafe.Add(ref destinationBase, 1); + backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); + sourceBase = ref Unsafe.Add(ref sourceBase, 1); + } + + if (Numerics.Modulo2(destination.Length) != 0) + { + // Vector4 fits neatly in pairs. Any overlap has to be equal to 1. + int i = destination.Length - 1; + destination[i] = PorterDuffFunctions.LightenSrc(background[i], source[i], amount); + } + } + else { - destination[i] = PorterDuffFunctions.LightenSrc(background[i], source[i], amount); + for (int i = 0; i < destination.Length; i++) + { + destination[i] = PorterDuffFunctions.LightenSrc(background[i], source[i], amount); + } } } /// protected override void BlendFunction(Span destination, ReadOnlySpan background, ReadOnlySpan source, ReadOnlySpan amount) { - for (int i = 0; i < destination.Length; i++) + if (Avx2.IsSupported && destination.Length >= 2) + { + // Divide by 2 as 4 elements per Vector4 and 8 per Vector256 + ref Vector256 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); + ref Vector256 destinationLast = ref Unsafe.Add(ref destinationBase, (IntPtr)((uint)destination.Length / 2u)); + + ref Vector256 backgroundBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(background)); + ref Vector256 sourceBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(source)); + ref float amountBase = ref MemoryMarshal.GetReference(amount); + + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) + { + // TODO: It would be better if we can clamp this outside of the loop using our SIMD methods. + Vector256 opacity = Vector256.Create(Numerics.Clamp(amountBase, 0, 1F)); + + destinationBase = PorterDuffFunctions.LightenSrc(backgroundBase, sourceBase, opacity); + destinationBase = ref Unsafe.Add(ref destinationBase, 1); + backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); + sourceBase = ref Unsafe.Add(ref sourceBase, 1); + amountBase = ref Unsafe.Add(ref amountBase, 1); + } + + if (Numerics.Modulo2(destination.Length) != 0) + { + // Vector4 fits neatly in pairs. Any overlap has to be equal to 1. + int i = destination.Length - 1; + destination[i] = PorterDuffFunctions.LightenSrc(background[i], source[i], Numerics.Clamp(amount[i], 0, 1F)); + } + } + else { - destination[i] = PorterDuffFunctions.LightenSrc(background[i], source[i], Numerics.Clamp(amount[i], 0, 1)); + for (int i = 0; i < destination.Length; i++) + { + destination[i] = PorterDuffFunctions.LightenSrc(background[i], source[i], Numerics.Clamp(amount[i], 0, 1F)); + } } } } @@ -309,18 +740,79 @@ internal static class DefaultPixelBlenders protected override void BlendFunction(Span destination, ReadOnlySpan background, ReadOnlySpan source, float amount) { amount = Numerics.Clamp(amount, 0, 1); - for (int i = 0; i < destination.Length; i++) + + if (Avx2.IsSupported && destination.Length >= 2) + { + // Divide by 2 as 4 elements per Vector4 and 8 per Vector256 + ref Vector256 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); + ref Vector256 destinationLast = ref Unsafe.Add(ref destinationBase, (IntPtr)((uint)destination.Length / 2u)); + + ref Vector256 backgroundBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(background)); + ref Vector256 sourceBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(source)); + Vector256 opacity = Vector256.Create(amount); + + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) + { + destinationBase = PorterDuffFunctions.OverlaySrc(backgroundBase, sourceBase, opacity); + destinationBase = ref Unsafe.Add(ref destinationBase, 1); + backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); + sourceBase = ref Unsafe.Add(ref sourceBase, 1); + } + + if (Numerics.Modulo2(destination.Length) != 0) + { + // Vector4 fits neatly in pairs. Any overlap has to be equal to 1. + int i = destination.Length - 1; + destination[i] = PorterDuffFunctions.OverlaySrc(background[i], source[i], amount); + } + } + else { - destination[i] = PorterDuffFunctions.OverlaySrc(background[i], source[i], amount); + for (int i = 0; i < destination.Length; i++) + { + destination[i] = PorterDuffFunctions.OverlaySrc(background[i], source[i], amount); + } } } /// protected override void BlendFunction(Span destination, ReadOnlySpan background, ReadOnlySpan source, ReadOnlySpan amount) { - for (int i = 0; i < destination.Length; i++) + if (Avx2.IsSupported && destination.Length >= 2) + { + // Divide by 2 as 4 elements per Vector4 and 8 per Vector256 + ref Vector256 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); + ref Vector256 destinationLast = ref Unsafe.Add(ref destinationBase, (IntPtr)((uint)destination.Length / 2u)); + + ref Vector256 backgroundBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(background)); + ref Vector256 sourceBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(source)); + ref float amountBase = ref MemoryMarshal.GetReference(amount); + + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) + { + // TODO: It would be better if we can clamp this outside of the loop using our SIMD methods. + Vector256 opacity = Vector256.Create(Numerics.Clamp(amountBase, 0, 1F)); + + destinationBase = PorterDuffFunctions.OverlaySrc(backgroundBase, sourceBase, opacity); + destinationBase = ref Unsafe.Add(ref destinationBase, 1); + backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); + sourceBase = ref Unsafe.Add(ref sourceBase, 1); + amountBase = ref Unsafe.Add(ref amountBase, 1); + } + + if (Numerics.Modulo2(destination.Length) != 0) + { + // Vector4 fits neatly in pairs. Any overlap has to be equal to 1. + int i = destination.Length - 1; + destination[i] = PorterDuffFunctions.OverlaySrc(background[i], source[i], Numerics.Clamp(amount[i], 0, 1F)); + } + } + else { - destination[i] = PorterDuffFunctions.OverlaySrc(background[i], source[i], Numerics.Clamp(amount[i], 0, 1)); + for (int i = 0; i < destination.Length; i++) + { + destination[i] = PorterDuffFunctions.OverlaySrc(background[i], source[i], Numerics.Clamp(amount[i], 0, 1F)); + } } } } @@ -347,18 +839,79 @@ internal static class DefaultPixelBlenders protected override void BlendFunction(Span destination, ReadOnlySpan background, ReadOnlySpan source, float amount) { amount = Numerics.Clamp(amount, 0, 1); - for (int i = 0; i < destination.Length; i++) + + if (Avx2.IsSupported && destination.Length >= 2) + { + // Divide by 2 as 4 elements per Vector4 and 8 per Vector256 + ref Vector256 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); + ref Vector256 destinationLast = ref Unsafe.Add(ref destinationBase, (IntPtr)((uint)destination.Length / 2u)); + + ref Vector256 backgroundBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(background)); + ref Vector256 sourceBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(source)); + Vector256 opacity = Vector256.Create(amount); + + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) + { + destinationBase = PorterDuffFunctions.HardLightSrc(backgroundBase, sourceBase, opacity); + destinationBase = ref Unsafe.Add(ref destinationBase, 1); + backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); + sourceBase = ref Unsafe.Add(ref sourceBase, 1); + } + + if (Numerics.Modulo2(destination.Length) != 0) + { + // Vector4 fits neatly in pairs. Any overlap has to be equal to 1. + int i = destination.Length - 1; + destination[i] = PorterDuffFunctions.HardLightSrc(background[i], source[i], amount); + } + } + else { - destination[i] = PorterDuffFunctions.HardLightSrc(background[i], source[i], amount); + for (int i = 0; i < destination.Length; i++) + { + destination[i] = PorterDuffFunctions.HardLightSrc(background[i], source[i], amount); + } } } /// protected override void BlendFunction(Span destination, ReadOnlySpan background, ReadOnlySpan source, ReadOnlySpan amount) { - for (int i = 0; i < destination.Length; i++) + if (Avx2.IsSupported && destination.Length >= 2) + { + // Divide by 2 as 4 elements per Vector4 and 8 per Vector256 + ref Vector256 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); + ref Vector256 destinationLast = ref Unsafe.Add(ref destinationBase, (IntPtr)((uint)destination.Length / 2u)); + + ref Vector256 backgroundBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(background)); + ref Vector256 sourceBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(source)); + ref float amountBase = ref MemoryMarshal.GetReference(amount); + + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) + { + // TODO: It would be better if we can clamp this outside of the loop using our SIMD methods. + Vector256 opacity = Vector256.Create(Numerics.Clamp(amountBase, 0, 1F)); + + destinationBase = PorterDuffFunctions.HardLightSrc(backgroundBase, sourceBase, opacity); + destinationBase = ref Unsafe.Add(ref destinationBase, 1); + backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); + sourceBase = ref Unsafe.Add(ref sourceBase, 1); + amountBase = ref Unsafe.Add(ref amountBase, 1); + } + + if (Numerics.Modulo2(destination.Length) != 0) + { + // Vector4 fits neatly in pairs. Any overlap has to be equal to 1. + int i = destination.Length - 1; + destination[i] = PorterDuffFunctions.HardLightSrc(background[i], source[i], Numerics.Clamp(amount[i], 0, 1F)); + } + } + else { - destination[i] = PorterDuffFunctions.HardLightSrc(background[i], source[i], Numerics.Clamp(amount[i], 0, 1)); + for (int i = 0; i < destination.Length; i++) + { + destination[i] = PorterDuffFunctions.HardLightSrc(background[i], source[i], Numerics.Clamp(amount[i], 0, 1F)); + } } } } @@ -385,18 +938,79 @@ internal static class DefaultPixelBlenders protected override void BlendFunction(Span destination, ReadOnlySpan background, ReadOnlySpan source, float amount) { amount = Numerics.Clamp(amount, 0, 1); - for (int i = 0; i < destination.Length; i++) + + if (Avx2.IsSupported && destination.Length >= 2) + { + // Divide by 2 as 4 elements per Vector4 and 8 per Vector256 + ref Vector256 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); + ref Vector256 destinationLast = ref Unsafe.Add(ref destinationBase, (IntPtr)((uint)destination.Length / 2u)); + + ref Vector256 backgroundBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(background)); + ref Vector256 sourceBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(source)); + Vector256 opacity = Vector256.Create(amount); + + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) + { + destinationBase = PorterDuffFunctions.NormalSrcAtop(backgroundBase, sourceBase, opacity); + destinationBase = ref Unsafe.Add(ref destinationBase, 1); + backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); + sourceBase = ref Unsafe.Add(ref sourceBase, 1); + } + + if (Numerics.Modulo2(destination.Length) != 0) + { + // Vector4 fits neatly in pairs. Any overlap has to be equal to 1. + int i = destination.Length - 1; + destination[i] = PorterDuffFunctions.NormalSrcAtop(background[i], source[i], amount); + } + } + else { - destination[i] = PorterDuffFunctions.NormalSrcAtop(background[i], source[i], amount); + for (int i = 0; i < destination.Length; i++) + { + destination[i] = PorterDuffFunctions.NormalSrcAtop(background[i], source[i], amount); + } } } /// protected override void BlendFunction(Span destination, ReadOnlySpan background, ReadOnlySpan source, ReadOnlySpan amount) { - for (int i = 0; i < destination.Length; i++) + if (Avx2.IsSupported && destination.Length >= 2) + { + // Divide by 2 as 4 elements per Vector4 and 8 per Vector256 + ref Vector256 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); + ref Vector256 destinationLast = ref Unsafe.Add(ref destinationBase, (IntPtr)((uint)destination.Length / 2u)); + + ref Vector256 backgroundBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(background)); + ref Vector256 sourceBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(source)); + ref float amountBase = ref MemoryMarshal.GetReference(amount); + + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) + { + // TODO: It would be better if we can clamp this outside of the loop using our SIMD methods. + Vector256 opacity = Vector256.Create(Numerics.Clamp(amountBase, 0, 1F)); + + destinationBase = PorterDuffFunctions.NormalSrcAtop(backgroundBase, sourceBase, opacity); + destinationBase = ref Unsafe.Add(ref destinationBase, 1); + backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); + sourceBase = ref Unsafe.Add(ref sourceBase, 1); + amountBase = ref Unsafe.Add(ref amountBase, 1); + } + + if (Numerics.Modulo2(destination.Length) != 0) + { + // Vector4 fits neatly in pairs. Any overlap has to be equal to 1. + int i = destination.Length - 1; + destination[i] = PorterDuffFunctions.NormalSrcAtop(background[i], source[i], Numerics.Clamp(amount[i], 0, 1F)); + } + } + else { - destination[i] = PorterDuffFunctions.NormalSrcAtop(background[i], source[i], Numerics.Clamp(amount[i], 0, 1)); + for (int i = 0; i < destination.Length; i++) + { + destination[i] = PorterDuffFunctions.NormalSrcAtop(background[i], source[i], Numerics.Clamp(amount[i], 0, 1F)); + } } } } @@ -423,18 +1037,79 @@ internal static class DefaultPixelBlenders protected override void BlendFunction(Span destination, ReadOnlySpan background, ReadOnlySpan source, float amount) { amount = Numerics.Clamp(amount, 0, 1); - for (int i = 0; i < destination.Length; i++) + + if (Avx2.IsSupported && destination.Length >= 2) + { + // Divide by 2 as 4 elements per Vector4 and 8 per Vector256 + ref Vector256 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); + ref Vector256 destinationLast = ref Unsafe.Add(ref destinationBase, (IntPtr)((uint)destination.Length / 2u)); + + ref Vector256 backgroundBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(background)); + ref Vector256 sourceBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(source)); + Vector256 opacity = Vector256.Create(amount); + + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) + { + destinationBase = PorterDuffFunctions.MultiplySrcAtop(backgroundBase, sourceBase, opacity); + destinationBase = ref Unsafe.Add(ref destinationBase, 1); + backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); + sourceBase = ref Unsafe.Add(ref sourceBase, 1); + } + + if (Numerics.Modulo2(destination.Length) != 0) + { + // Vector4 fits neatly in pairs. Any overlap has to be equal to 1. + int i = destination.Length - 1; + destination[i] = PorterDuffFunctions.MultiplySrcAtop(background[i], source[i], amount); + } + } + else { - destination[i] = PorterDuffFunctions.MultiplySrcAtop(background[i], source[i], amount); + for (int i = 0; i < destination.Length; i++) + { + destination[i] = PorterDuffFunctions.MultiplySrcAtop(background[i], source[i], amount); + } } } /// protected override void BlendFunction(Span destination, ReadOnlySpan background, ReadOnlySpan source, ReadOnlySpan amount) { - for (int i = 0; i < destination.Length; i++) + if (Avx2.IsSupported && destination.Length >= 2) + { + // Divide by 2 as 4 elements per Vector4 and 8 per Vector256 + ref Vector256 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); + ref Vector256 destinationLast = ref Unsafe.Add(ref destinationBase, (IntPtr)((uint)destination.Length / 2u)); + + ref Vector256 backgroundBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(background)); + ref Vector256 sourceBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(source)); + ref float amountBase = ref MemoryMarshal.GetReference(amount); + + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) + { + // TODO: It would be better if we can clamp this outside of the loop using our SIMD methods. + Vector256 opacity = Vector256.Create(Numerics.Clamp(amountBase, 0, 1F)); + + destinationBase = PorterDuffFunctions.MultiplySrcAtop(backgroundBase, sourceBase, opacity); + destinationBase = ref Unsafe.Add(ref destinationBase, 1); + backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); + sourceBase = ref Unsafe.Add(ref sourceBase, 1); + amountBase = ref Unsafe.Add(ref amountBase, 1); + } + + if (Numerics.Modulo2(destination.Length) != 0) + { + // Vector4 fits neatly in pairs. Any overlap has to be equal to 1. + int i = destination.Length - 1; + destination[i] = PorterDuffFunctions.MultiplySrcAtop(background[i], source[i], Numerics.Clamp(amount[i], 0, 1F)); + } + } + else { - destination[i] = PorterDuffFunctions.MultiplySrcAtop(background[i], source[i], Numerics.Clamp(amount[i], 0, 1)); + for (int i = 0; i < destination.Length; i++) + { + destination[i] = PorterDuffFunctions.MultiplySrcAtop(background[i], source[i], Numerics.Clamp(amount[i], 0, 1F)); + } } } } @@ -461,18 +1136,79 @@ internal static class DefaultPixelBlenders protected override void BlendFunction(Span destination, ReadOnlySpan background, ReadOnlySpan source, float amount) { amount = Numerics.Clamp(amount, 0, 1); - for (int i = 0; i < destination.Length; i++) + + if (Avx2.IsSupported && destination.Length >= 2) + { + // Divide by 2 as 4 elements per Vector4 and 8 per Vector256 + ref Vector256 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); + ref Vector256 destinationLast = ref Unsafe.Add(ref destinationBase, (IntPtr)((uint)destination.Length / 2u)); + + ref Vector256 backgroundBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(background)); + ref Vector256 sourceBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(source)); + Vector256 opacity = Vector256.Create(amount); + + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) + { + destinationBase = PorterDuffFunctions.AddSrcAtop(backgroundBase, sourceBase, opacity); + destinationBase = ref Unsafe.Add(ref destinationBase, 1); + backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); + sourceBase = ref Unsafe.Add(ref sourceBase, 1); + } + + if (Numerics.Modulo2(destination.Length) != 0) + { + // Vector4 fits neatly in pairs. Any overlap has to be equal to 1. + int i = destination.Length - 1; + destination[i] = PorterDuffFunctions.AddSrcAtop(background[i], source[i], amount); + } + } + else { - destination[i] = PorterDuffFunctions.AddSrcAtop(background[i], source[i], amount); + for (int i = 0; i < destination.Length; i++) + { + destination[i] = PorterDuffFunctions.AddSrcAtop(background[i], source[i], amount); + } } } /// protected override void BlendFunction(Span destination, ReadOnlySpan background, ReadOnlySpan source, ReadOnlySpan amount) { - for (int i = 0; i < destination.Length; i++) + if (Avx2.IsSupported && destination.Length >= 2) + { + // Divide by 2 as 4 elements per Vector4 and 8 per Vector256 + ref Vector256 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); + ref Vector256 destinationLast = ref Unsafe.Add(ref destinationBase, (IntPtr)((uint)destination.Length / 2u)); + + ref Vector256 backgroundBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(background)); + ref Vector256 sourceBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(source)); + ref float amountBase = ref MemoryMarshal.GetReference(amount); + + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) + { + // TODO: It would be better if we can clamp this outside of the loop using our SIMD methods. + Vector256 opacity = Vector256.Create(Numerics.Clamp(amountBase, 0, 1F)); + + destinationBase = PorterDuffFunctions.AddSrcAtop(backgroundBase, sourceBase, opacity); + destinationBase = ref Unsafe.Add(ref destinationBase, 1); + backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); + sourceBase = ref Unsafe.Add(ref sourceBase, 1); + amountBase = ref Unsafe.Add(ref amountBase, 1); + } + + if (Numerics.Modulo2(destination.Length) != 0) + { + // Vector4 fits neatly in pairs. Any overlap has to be equal to 1. + int i = destination.Length - 1; + destination[i] = PorterDuffFunctions.AddSrcAtop(background[i], source[i], Numerics.Clamp(amount[i], 0, 1F)); + } + } + else { - destination[i] = PorterDuffFunctions.AddSrcAtop(background[i], source[i], Numerics.Clamp(amount[i], 0, 1)); + for (int i = 0; i < destination.Length; i++) + { + destination[i] = PorterDuffFunctions.AddSrcAtop(background[i], source[i], Numerics.Clamp(amount[i], 0, 1F)); + } } } } @@ -499,18 +1235,79 @@ internal static class DefaultPixelBlenders protected override void BlendFunction(Span destination, ReadOnlySpan background, ReadOnlySpan source, float amount) { amount = Numerics.Clamp(amount, 0, 1); - for (int i = 0; i < destination.Length; i++) + + if (Avx2.IsSupported && destination.Length >= 2) + { + // Divide by 2 as 4 elements per Vector4 and 8 per Vector256 + ref Vector256 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); + ref Vector256 destinationLast = ref Unsafe.Add(ref destinationBase, (IntPtr)((uint)destination.Length / 2u)); + + ref Vector256 backgroundBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(background)); + ref Vector256 sourceBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(source)); + Vector256 opacity = Vector256.Create(amount); + + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) + { + destinationBase = PorterDuffFunctions.SubtractSrcAtop(backgroundBase, sourceBase, opacity); + destinationBase = ref Unsafe.Add(ref destinationBase, 1); + backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); + sourceBase = ref Unsafe.Add(ref sourceBase, 1); + } + + if (Numerics.Modulo2(destination.Length) != 0) + { + // Vector4 fits neatly in pairs. Any overlap has to be equal to 1. + int i = destination.Length - 1; + destination[i] = PorterDuffFunctions.SubtractSrcAtop(background[i], source[i], amount); + } + } + else { - destination[i] = PorterDuffFunctions.SubtractSrcAtop(background[i], source[i], amount); + for (int i = 0; i < destination.Length; i++) + { + destination[i] = PorterDuffFunctions.SubtractSrcAtop(background[i], source[i], amount); + } } } /// protected override void BlendFunction(Span destination, ReadOnlySpan background, ReadOnlySpan source, ReadOnlySpan amount) { - for (int i = 0; i < destination.Length; i++) + if (Avx2.IsSupported && destination.Length >= 2) + { + // Divide by 2 as 4 elements per Vector4 and 8 per Vector256 + ref Vector256 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); + ref Vector256 destinationLast = ref Unsafe.Add(ref destinationBase, (IntPtr)((uint)destination.Length / 2u)); + + ref Vector256 backgroundBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(background)); + ref Vector256 sourceBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(source)); + ref float amountBase = ref MemoryMarshal.GetReference(amount); + + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) + { + // TODO: It would be better if we can clamp this outside of the loop using our SIMD methods. + Vector256 opacity = Vector256.Create(Numerics.Clamp(amountBase, 0, 1F)); + + destinationBase = PorterDuffFunctions.SubtractSrcAtop(backgroundBase, sourceBase, opacity); + destinationBase = ref Unsafe.Add(ref destinationBase, 1); + backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); + sourceBase = ref Unsafe.Add(ref sourceBase, 1); + amountBase = ref Unsafe.Add(ref amountBase, 1); + } + + if (Numerics.Modulo2(destination.Length) != 0) + { + // Vector4 fits neatly in pairs. Any overlap has to be equal to 1. + int i = destination.Length - 1; + destination[i] = PorterDuffFunctions.SubtractSrcAtop(background[i], source[i], Numerics.Clamp(amount[i], 0, 1F)); + } + } + else { - destination[i] = PorterDuffFunctions.SubtractSrcAtop(background[i], source[i], Numerics.Clamp(amount[i], 0, 1)); + for (int i = 0; i < destination.Length; i++) + { + destination[i] = PorterDuffFunctions.SubtractSrcAtop(background[i], source[i], Numerics.Clamp(amount[i], 0, 1F)); + } } } } @@ -537,18 +1334,79 @@ internal static class DefaultPixelBlenders protected override void BlendFunction(Span destination, ReadOnlySpan background, ReadOnlySpan source, float amount) { amount = Numerics.Clamp(amount, 0, 1); - for (int i = 0; i < destination.Length; i++) + + if (Avx2.IsSupported && destination.Length >= 2) + { + // Divide by 2 as 4 elements per Vector4 and 8 per Vector256 + ref Vector256 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); + ref Vector256 destinationLast = ref Unsafe.Add(ref destinationBase, (IntPtr)((uint)destination.Length / 2u)); + + ref Vector256 backgroundBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(background)); + ref Vector256 sourceBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(source)); + Vector256 opacity = Vector256.Create(amount); + + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) + { + destinationBase = PorterDuffFunctions.ScreenSrcAtop(backgroundBase, sourceBase, opacity); + destinationBase = ref Unsafe.Add(ref destinationBase, 1); + backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); + sourceBase = ref Unsafe.Add(ref sourceBase, 1); + } + + if (Numerics.Modulo2(destination.Length) != 0) + { + // Vector4 fits neatly in pairs. Any overlap has to be equal to 1. + int i = destination.Length - 1; + destination[i] = PorterDuffFunctions.ScreenSrcAtop(background[i], source[i], amount); + } + } + else { - destination[i] = PorterDuffFunctions.ScreenSrcAtop(background[i], source[i], amount); + for (int i = 0; i < destination.Length; i++) + { + destination[i] = PorterDuffFunctions.ScreenSrcAtop(background[i], source[i], amount); + } } } /// protected override void BlendFunction(Span destination, ReadOnlySpan background, ReadOnlySpan source, ReadOnlySpan amount) { - for (int i = 0; i < destination.Length; i++) + if (Avx2.IsSupported && destination.Length >= 2) + { + // Divide by 2 as 4 elements per Vector4 and 8 per Vector256 + ref Vector256 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); + ref Vector256 destinationLast = ref Unsafe.Add(ref destinationBase, (IntPtr)((uint)destination.Length / 2u)); + + ref Vector256 backgroundBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(background)); + ref Vector256 sourceBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(source)); + ref float amountBase = ref MemoryMarshal.GetReference(amount); + + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) + { + // TODO: It would be better if we can clamp this outside of the loop using our SIMD methods. + Vector256 opacity = Vector256.Create(Numerics.Clamp(amountBase, 0, 1F)); + + destinationBase = PorterDuffFunctions.ScreenSrcAtop(backgroundBase, sourceBase, opacity); + destinationBase = ref Unsafe.Add(ref destinationBase, 1); + backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); + sourceBase = ref Unsafe.Add(ref sourceBase, 1); + amountBase = ref Unsafe.Add(ref amountBase, 1); + } + + if (Numerics.Modulo2(destination.Length) != 0) + { + // Vector4 fits neatly in pairs. Any overlap has to be equal to 1. + int i = destination.Length - 1; + destination[i] = PorterDuffFunctions.ScreenSrcAtop(background[i], source[i], Numerics.Clamp(amount[i], 0, 1F)); + } + } + else { - destination[i] = PorterDuffFunctions.ScreenSrcAtop(background[i], source[i], Numerics.Clamp(amount[i], 0, 1)); + for (int i = 0; i < destination.Length; i++) + { + destination[i] = PorterDuffFunctions.ScreenSrcAtop(background[i], source[i], Numerics.Clamp(amount[i], 0, 1F)); + } } } } @@ -575,18 +1433,79 @@ internal static class DefaultPixelBlenders protected override void BlendFunction(Span destination, ReadOnlySpan background, ReadOnlySpan source, float amount) { amount = Numerics.Clamp(amount, 0, 1); - for (int i = 0; i < destination.Length; i++) + + if (Avx2.IsSupported && destination.Length >= 2) + { + // Divide by 2 as 4 elements per Vector4 and 8 per Vector256 + ref Vector256 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); + ref Vector256 destinationLast = ref Unsafe.Add(ref destinationBase, (IntPtr)((uint)destination.Length / 2u)); + + ref Vector256 backgroundBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(background)); + ref Vector256 sourceBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(source)); + Vector256 opacity = Vector256.Create(amount); + + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) + { + destinationBase = PorterDuffFunctions.DarkenSrcAtop(backgroundBase, sourceBase, opacity); + destinationBase = ref Unsafe.Add(ref destinationBase, 1); + backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); + sourceBase = ref Unsafe.Add(ref sourceBase, 1); + } + + if (Numerics.Modulo2(destination.Length) != 0) + { + // Vector4 fits neatly in pairs. Any overlap has to be equal to 1. + int i = destination.Length - 1; + destination[i] = PorterDuffFunctions.DarkenSrcAtop(background[i], source[i], amount); + } + } + else { - destination[i] = PorterDuffFunctions.DarkenSrcAtop(background[i], source[i], amount); + for (int i = 0; i < destination.Length; i++) + { + destination[i] = PorterDuffFunctions.DarkenSrcAtop(background[i], source[i], amount); + } } } /// protected override void BlendFunction(Span destination, ReadOnlySpan background, ReadOnlySpan source, ReadOnlySpan amount) { - for (int i = 0; i < destination.Length; i++) + if (Avx2.IsSupported && destination.Length >= 2) + { + // Divide by 2 as 4 elements per Vector4 and 8 per Vector256 + ref Vector256 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); + ref Vector256 destinationLast = ref Unsafe.Add(ref destinationBase, (IntPtr)((uint)destination.Length / 2u)); + + ref Vector256 backgroundBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(background)); + ref Vector256 sourceBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(source)); + ref float amountBase = ref MemoryMarshal.GetReference(amount); + + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) + { + // TODO: It would be better if we can clamp this outside of the loop using our SIMD methods. + Vector256 opacity = Vector256.Create(Numerics.Clamp(amountBase, 0, 1F)); + + destinationBase = PorterDuffFunctions.DarkenSrcAtop(backgroundBase, sourceBase, opacity); + destinationBase = ref Unsafe.Add(ref destinationBase, 1); + backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); + sourceBase = ref Unsafe.Add(ref sourceBase, 1); + amountBase = ref Unsafe.Add(ref amountBase, 1); + } + + if (Numerics.Modulo2(destination.Length) != 0) + { + // Vector4 fits neatly in pairs. Any overlap has to be equal to 1. + int i = destination.Length - 1; + destination[i] = PorterDuffFunctions.DarkenSrcAtop(background[i], source[i], Numerics.Clamp(amount[i], 0, 1F)); + } + } + else { - destination[i] = PorterDuffFunctions.DarkenSrcAtop(background[i], source[i], Numerics.Clamp(amount[i], 0, 1)); + for (int i = 0; i < destination.Length; i++) + { + destination[i] = PorterDuffFunctions.DarkenSrcAtop(background[i], source[i], Numerics.Clamp(amount[i], 0, 1F)); + } } } } @@ -613,18 +1532,79 @@ internal static class DefaultPixelBlenders protected override void BlendFunction(Span destination, ReadOnlySpan background, ReadOnlySpan source, float amount) { amount = Numerics.Clamp(amount, 0, 1); - for (int i = 0; i < destination.Length; i++) + + if (Avx2.IsSupported && destination.Length >= 2) + { + // Divide by 2 as 4 elements per Vector4 and 8 per Vector256 + ref Vector256 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); + ref Vector256 destinationLast = ref Unsafe.Add(ref destinationBase, (IntPtr)((uint)destination.Length / 2u)); + + ref Vector256 backgroundBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(background)); + ref Vector256 sourceBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(source)); + Vector256 opacity = Vector256.Create(amount); + + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) + { + destinationBase = PorterDuffFunctions.LightenSrcAtop(backgroundBase, sourceBase, opacity); + destinationBase = ref Unsafe.Add(ref destinationBase, 1); + backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); + sourceBase = ref Unsafe.Add(ref sourceBase, 1); + } + + if (Numerics.Modulo2(destination.Length) != 0) + { + // Vector4 fits neatly in pairs. Any overlap has to be equal to 1. + int i = destination.Length - 1; + destination[i] = PorterDuffFunctions.LightenSrcAtop(background[i], source[i], amount); + } + } + else { - destination[i] = PorterDuffFunctions.LightenSrcAtop(background[i], source[i], amount); + for (int i = 0; i < destination.Length; i++) + { + destination[i] = PorterDuffFunctions.LightenSrcAtop(background[i], source[i], amount); + } } } /// protected override void BlendFunction(Span destination, ReadOnlySpan background, ReadOnlySpan source, ReadOnlySpan amount) { - for (int i = 0; i < destination.Length; i++) + if (Avx2.IsSupported && destination.Length >= 2) + { + // Divide by 2 as 4 elements per Vector4 and 8 per Vector256 + ref Vector256 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); + ref Vector256 destinationLast = ref Unsafe.Add(ref destinationBase, (IntPtr)((uint)destination.Length / 2u)); + + ref Vector256 backgroundBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(background)); + ref Vector256 sourceBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(source)); + ref float amountBase = ref MemoryMarshal.GetReference(amount); + + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) + { + // TODO: It would be better if we can clamp this outside of the loop using our SIMD methods. + Vector256 opacity = Vector256.Create(Numerics.Clamp(amountBase, 0, 1F)); + + destinationBase = PorterDuffFunctions.LightenSrcAtop(backgroundBase, sourceBase, opacity); + destinationBase = ref Unsafe.Add(ref destinationBase, 1); + backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); + sourceBase = ref Unsafe.Add(ref sourceBase, 1); + amountBase = ref Unsafe.Add(ref amountBase, 1); + } + + if (Numerics.Modulo2(destination.Length) != 0) + { + // Vector4 fits neatly in pairs. Any overlap has to be equal to 1. + int i = destination.Length - 1; + destination[i] = PorterDuffFunctions.LightenSrcAtop(background[i], source[i], Numerics.Clamp(amount[i], 0, 1F)); + } + } + else { - destination[i] = PorterDuffFunctions.LightenSrcAtop(background[i], source[i], Numerics.Clamp(amount[i], 0, 1)); + for (int i = 0; i < destination.Length; i++) + { + destination[i] = PorterDuffFunctions.LightenSrcAtop(background[i], source[i], Numerics.Clamp(amount[i], 0, 1F)); + } } } } @@ -651,18 +1631,79 @@ internal static class DefaultPixelBlenders protected override void BlendFunction(Span destination, ReadOnlySpan background, ReadOnlySpan source, float amount) { amount = Numerics.Clamp(amount, 0, 1); - for (int i = 0; i < destination.Length; i++) + + if (Avx2.IsSupported && destination.Length >= 2) + { + // Divide by 2 as 4 elements per Vector4 and 8 per Vector256 + ref Vector256 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); + ref Vector256 destinationLast = ref Unsafe.Add(ref destinationBase, (IntPtr)((uint)destination.Length / 2u)); + + ref Vector256 backgroundBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(background)); + ref Vector256 sourceBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(source)); + Vector256 opacity = Vector256.Create(amount); + + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) + { + destinationBase = PorterDuffFunctions.OverlaySrcAtop(backgroundBase, sourceBase, opacity); + destinationBase = ref Unsafe.Add(ref destinationBase, 1); + backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); + sourceBase = ref Unsafe.Add(ref sourceBase, 1); + } + + if (Numerics.Modulo2(destination.Length) != 0) + { + // Vector4 fits neatly in pairs. Any overlap has to be equal to 1. + int i = destination.Length - 1; + destination[i] = PorterDuffFunctions.OverlaySrcAtop(background[i], source[i], amount); + } + } + else { - destination[i] = PorterDuffFunctions.OverlaySrcAtop(background[i], source[i], amount); + for (int i = 0; i < destination.Length; i++) + { + destination[i] = PorterDuffFunctions.OverlaySrcAtop(background[i], source[i], amount); + } } } /// protected override void BlendFunction(Span destination, ReadOnlySpan background, ReadOnlySpan source, ReadOnlySpan amount) { - for (int i = 0; i < destination.Length; i++) + if (Avx2.IsSupported && destination.Length >= 2) + { + // Divide by 2 as 4 elements per Vector4 and 8 per Vector256 + ref Vector256 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); + ref Vector256 destinationLast = ref Unsafe.Add(ref destinationBase, (IntPtr)((uint)destination.Length / 2u)); + + ref Vector256 backgroundBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(background)); + ref Vector256 sourceBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(source)); + ref float amountBase = ref MemoryMarshal.GetReference(amount); + + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) + { + // TODO: It would be better if we can clamp this outside of the loop using our SIMD methods. + Vector256 opacity = Vector256.Create(Numerics.Clamp(amountBase, 0, 1F)); + + destinationBase = PorterDuffFunctions.OverlaySrcAtop(backgroundBase, sourceBase, opacity); + destinationBase = ref Unsafe.Add(ref destinationBase, 1); + backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); + sourceBase = ref Unsafe.Add(ref sourceBase, 1); + amountBase = ref Unsafe.Add(ref amountBase, 1); + } + + if (Numerics.Modulo2(destination.Length) != 0) + { + // Vector4 fits neatly in pairs. Any overlap has to be equal to 1. + int i = destination.Length - 1; + destination[i] = PorterDuffFunctions.OverlaySrcAtop(background[i], source[i], Numerics.Clamp(amount[i], 0, 1F)); + } + } + else { - destination[i] = PorterDuffFunctions.OverlaySrcAtop(background[i], source[i], Numerics.Clamp(amount[i], 0, 1)); + for (int i = 0; i < destination.Length; i++) + { + destination[i] = PorterDuffFunctions.OverlaySrcAtop(background[i], source[i], Numerics.Clamp(amount[i], 0, 1F)); + } } } } @@ -689,18 +1730,79 @@ internal static class DefaultPixelBlenders protected override void BlendFunction(Span destination, ReadOnlySpan background, ReadOnlySpan source, float amount) { amount = Numerics.Clamp(amount, 0, 1); - for (int i = 0; i < destination.Length; i++) + + if (Avx2.IsSupported && destination.Length >= 2) + { + // Divide by 2 as 4 elements per Vector4 and 8 per Vector256 + ref Vector256 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); + ref Vector256 destinationLast = ref Unsafe.Add(ref destinationBase, (IntPtr)((uint)destination.Length / 2u)); + + ref Vector256 backgroundBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(background)); + ref Vector256 sourceBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(source)); + Vector256 opacity = Vector256.Create(amount); + + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) + { + destinationBase = PorterDuffFunctions.HardLightSrcAtop(backgroundBase, sourceBase, opacity); + destinationBase = ref Unsafe.Add(ref destinationBase, 1); + backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); + sourceBase = ref Unsafe.Add(ref sourceBase, 1); + } + + if (Numerics.Modulo2(destination.Length) != 0) + { + // Vector4 fits neatly in pairs. Any overlap has to be equal to 1. + int i = destination.Length - 1; + destination[i] = PorterDuffFunctions.HardLightSrcAtop(background[i], source[i], amount); + } + } + else { - destination[i] = PorterDuffFunctions.HardLightSrcAtop(background[i], source[i], amount); + for (int i = 0; i < destination.Length; i++) + { + destination[i] = PorterDuffFunctions.HardLightSrcAtop(background[i], source[i], amount); + } } } /// protected override void BlendFunction(Span destination, ReadOnlySpan background, ReadOnlySpan source, ReadOnlySpan amount) { - for (int i = 0; i < destination.Length; i++) + if (Avx2.IsSupported && destination.Length >= 2) + { + // Divide by 2 as 4 elements per Vector4 and 8 per Vector256 + ref Vector256 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); + ref Vector256 destinationLast = ref Unsafe.Add(ref destinationBase, (IntPtr)((uint)destination.Length / 2u)); + + ref Vector256 backgroundBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(background)); + ref Vector256 sourceBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(source)); + ref float amountBase = ref MemoryMarshal.GetReference(amount); + + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) + { + // TODO: It would be better if we can clamp this outside of the loop using our SIMD methods. + Vector256 opacity = Vector256.Create(Numerics.Clamp(amountBase, 0, 1F)); + + destinationBase = PorterDuffFunctions.HardLightSrcAtop(backgroundBase, sourceBase, opacity); + destinationBase = ref Unsafe.Add(ref destinationBase, 1); + backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); + sourceBase = ref Unsafe.Add(ref sourceBase, 1); + amountBase = ref Unsafe.Add(ref amountBase, 1); + } + + if (Numerics.Modulo2(destination.Length) != 0) + { + // Vector4 fits neatly in pairs. Any overlap has to be equal to 1. + int i = destination.Length - 1; + destination[i] = PorterDuffFunctions.HardLightSrcAtop(background[i], source[i], Numerics.Clamp(amount[i], 0, 1F)); + } + } + else { - destination[i] = PorterDuffFunctions.HardLightSrcAtop(background[i], source[i], Numerics.Clamp(amount[i], 0, 1)); + for (int i = 0; i < destination.Length; i++) + { + destination[i] = PorterDuffFunctions.HardLightSrcAtop(background[i], source[i], Numerics.Clamp(amount[i], 0, 1F)); + } } } } @@ -727,18 +1829,79 @@ internal static class DefaultPixelBlenders protected override void BlendFunction(Span destination, ReadOnlySpan background, ReadOnlySpan source, float amount) { amount = Numerics.Clamp(amount, 0, 1); - for (int i = 0; i < destination.Length; i++) + + if (Avx2.IsSupported && destination.Length >= 2) + { + // Divide by 2 as 4 elements per Vector4 and 8 per Vector256 + ref Vector256 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); + ref Vector256 destinationLast = ref Unsafe.Add(ref destinationBase, (IntPtr)((uint)destination.Length / 2u)); + + ref Vector256 backgroundBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(background)); + ref Vector256 sourceBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(source)); + Vector256 opacity = Vector256.Create(amount); + + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) + { + destinationBase = PorterDuffFunctions.NormalSrcOver(backgroundBase, sourceBase, opacity); + destinationBase = ref Unsafe.Add(ref destinationBase, 1); + backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); + sourceBase = ref Unsafe.Add(ref sourceBase, 1); + } + + if (Numerics.Modulo2(destination.Length) != 0) + { + // Vector4 fits neatly in pairs. Any overlap has to be equal to 1. + int i = destination.Length - 1; + destination[i] = PorterDuffFunctions.NormalSrcOver(background[i], source[i], amount); + } + } + else { - destination[i] = PorterDuffFunctions.NormalSrcOver(background[i], source[i], amount); + for (int i = 0; i < destination.Length; i++) + { + destination[i] = PorterDuffFunctions.NormalSrcOver(background[i], source[i], amount); + } } } /// protected override void BlendFunction(Span destination, ReadOnlySpan background, ReadOnlySpan source, ReadOnlySpan amount) { - for (int i = 0; i < destination.Length; i++) + if (Avx2.IsSupported && destination.Length >= 2) + { + // Divide by 2 as 4 elements per Vector4 and 8 per Vector256 + ref Vector256 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); + ref Vector256 destinationLast = ref Unsafe.Add(ref destinationBase, (IntPtr)((uint)destination.Length / 2u)); + + ref Vector256 backgroundBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(background)); + ref Vector256 sourceBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(source)); + ref float amountBase = ref MemoryMarshal.GetReference(amount); + + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) + { + // TODO: It would be better if we can clamp this outside of the loop using our SIMD methods. + Vector256 opacity = Vector256.Create(Numerics.Clamp(amountBase, 0, 1F)); + + destinationBase = PorterDuffFunctions.NormalSrcOver(backgroundBase, sourceBase, opacity); + destinationBase = ref Unsafe.Add(ref destinationBase, 1); + backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); + sourceBase = ref Unsafe.Add(ref sourceBase, 1); + amountBase = ref Unsafe.Add(ref amountBase, 1); + } + + if (Numerics.Modulo2(destination.Length) != 0) + { + // Vector4 fits neatly in pairs. Any overlap has to be equal to 1. + int i = destination.Length - 1; + destination[i] = PorterDuffFunctions.NormalSrcOver(background[i], source[i], Numerics.Clamp(amount[i], 0, 1F)); + } + } + else { - destination[i] = PorterDuffFunctions.NormalSrcOver(background[i], source[i], Numerics.Clamp(amount[i], 0, 1)); + for (int i = 0; i < destination.Length; i++) + { + destination[i] = PorterDuffFunctions.NormalSrcOver(background[i], source[i], Numerics.Clamp(amount[i], 0, 1F)); + } } } } @@ -765,18 +1928,79 @@ internal static class DefaultPixelBlenders protected override void BlendFunction(Span destination, ReadOnlySpan background, ReadOnlySpan source, float amount) { amount = Numerics.Clamp(amount, 0, 1); - for (int i = 0; i < destination.Length; i++) + + if (Avx2.IsSupported && destination.Length >= 2) + { + // Divide by 2 as 4 elements per Vector4 and 8 per Vector256 + ref Vector256 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); + ref Vector256 destinationLast = ref Unsafe.Add(ref destinationBase, (IntPtr)((uint)destination.Length / 2u)); + + ref Vector256 backgroundBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(background)); + ref Vector256 sourceBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(source)); + Vector256 opacity = Vector256.Create(amount); + + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) + { + destinationBase = PorterDuffFunctions.MultiplySrcOver(backgroundBase, sourceBase, opacity); + destinationBase = ref Unsafe.Add(ref destinationBase, 1); + backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); + sourceBase = ref Unsafe.Add(ref sourceBase, 1); + } + + if (Numerics.Modulo2(destination.Length) != 0) + { + // Vector4 fits neatly in pairs. Any overlap has to be equal to 1. + int i = destination.Length - 1; + destination[i] = PorterDuffFunctions.MultiplySrcOver(background[i], source[i], amount); + } + } + else { - destination[i] = PorterDuffFunctions.MultiplySrcOver(background[i], source[i], amount); + for (int i = 0; i < destination.Length; i++) + { + destination[i] = PorterDuffFunctions.MultiplySrcOver(background[i], source[i], amount); + } } } /// protected override void BlendFunction(Span destination, ReadOnlySpan background, ReadOnlySpan source, ReadOnlySpan amount) { - for (int i = 0; i < destination.Length; i++) + if (Avx2.IsSupported && destination.Length >= 2) + { + // Divide by 2 as 4 elements per Vector4 and 8 per Vector256 + ref Vector256 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); + ref Vector256 destinationLast = ref Unsafe.Add(ref destinationBase, (IntPtr)((uint)destination.Length / 2u)); + + ref Vector256 backgroundBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(background)); + ref Vector256 sourceBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(source)); + ref float amountBase = ref MemoryMarshal.GetReference(amount); + + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) + { + // TODO: It would be better if we can clamp this outside of the loop using our SIMD methods. + Vector256 opacity = Vector256.Create(Numerics.Clamp(amountBase, 0, 1F)); + + destinationBase = PorterDuffFunctions.MultiplySrcOver(backgroundBase, sourceBase, opacity); + destinationBase = ref Unsafe.Add(ref destinationBase, 1); + backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); + sourceBase = ref Unsafe.Add(ref sourceBase, 1); + amountBase = ref Unsafe.Add(ref amountBase, 1); + } + + if (Numerics.Modulo2(destination.Length) != 0) + { + // Vector4 fits neatly in pairs. Any overlap has to be equal to 1. + int i = destination.Length - 1; + destination[i] = PorterDuffFunctions.MultiplySrcOver(background[i], source[i], Numerics.Clamp(amount[i], 0, 1F)); + } + } + else { - destination[i] = PorterDuffFunctions.MultiplySrcOver(background[i], source[i], Numerics.Clamp(amount[i], 0, 1)); + for (int i = 0; i < destination.Length; i++) + { + destination[i] = PorterDuffFunctions.MultiplySrcOver(background[i], source[i], Numerics.Clamp(amount[i], 0, 1F)); + } } } } @@ -803,18 +2027,79 @@ internal static class DefaultPixelBlenders protected override void BlendFunction(Span destination, ReadOnlySpan background, ReadOnlySpan source, float amount) { amount = Numerics.Clamp(amount, 0, 1); - for (int i = 0; i < destination.Length; i++) + + if (Avx2.IsSupported && destination.Length >= 2) + { + // Divide by 2 as 4 elements per Vector4 and 8 per Vector256 + ref Vector256 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); + ref Vector256 destinationLast = ref Unsafe.Add(ref destinationBase, (IntPtr)((uint)destination.Length / 2u)); + + ref Vector256 backgroundBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(background)); + ref Vector256 sourceBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(source)); + Vector256 opacity = Vector256.Create(amount); + + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) + { + destinationBase = PorterDuffFunctions.AddSrcOver(backgroundBase, sourceBase, opacity); + destinationBase = ref Unsafe.Add(ref destinationBase, 1); + backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); + sourceBase = ref Unsafe.Add(ref sourceBase, 1); + } + + if (Numerics.Modulo2(destination.Length) != 0) + { + // Vector4 fits neatly in pairs. Any overlap has to be equal to 1. + int i = destination.Length - 1; + destination[i] = PorterDuffFunctions.AddSrcOver(background[i], source[i], amount); + } + } + else { - destination[i] = PorterDuffFunctions.AddSrcOver(background[i], source[i], amount); + for (int i = 0; i < destination.Length; i++) + { + destination[i] = PorterDuffFunctions.AddSrcOver(background[i], source[i], amount); + } } } /// protected override void BlendFunction(Span destination, ReadOnlySpan background, ReadOnlySpan source, ReadOnlySpan amount) { - for (int i = 0; i < destination.Length; i++) + if (Avx2.IsSupported && destination.Length >= 2) + { + // Divide by 2 as 4 elements per Vector4 and 8 per Vector256 + ref Vector256 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); + ref Vector256 destinationLast = ref Unsafe.Add(ref destinationBase, (IntPtr)((uint)destination.Length / 2u)); + + ref Vector256 backgroundBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(background)); + ref Vector256 sourceBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(source)); + ref float amountBase = ref MemoryMarshal.GetReference(amount); + + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) + { + // TODO: It would be better if we can clamp this outside of the loop using our SIMD methods. + Vector256 opacity = Vector256.Create(Numerics.Clamp(amountBase, 0, 1F)); + + destinationBase = PorterDuffFunctions.AddSrcOver(backgroundBase, sourceBase, opacity); + destinationBase = ref Unsafe.Add(ref destinationBase, 1); + backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); + sourceBase = ref Unsafe.Add(ref sourceBase, 1); + amountBase = ref Unsafe.Add(ref amountBase, 1); + } + + if (Numerics.Modulo2(destination.Length) != 0) + { + // Vector4 fits neatly in pairs. Any overlap has to be equal to 1. + int i = destination.Length - 1; + destination[i] = PorterDuffFunctions.AddSrcOver(background[i], source[i], Numerics.Clamp(amount[i], 0, 1F)); + } + } + else { - destination[i] = PorterDuffFunctions.AddSrcOver(background[i], source[i], Numerics.Clamp(amount[i], 0, 1)); + for (int i = 0; i < destination.Length; i++) + { + destination[i] = PorterDuffFunctions.AddSrcOver(background[i], source[i], Numerics.Clamp(amount[i], 0, 1F)); + } } } } @@ -841,18 +2126,79 @@ internal static class DefaultPixelBlenders protected override void BlendFunction(Span destination, ReadOnlySpan background, ReadOnlySpan source, float amount) { amount = Numerics.Clamp(amount, 0, 1); - for (int i = 0; i < destination.Length; i++) + + if (Avx2.IsSupported && destination.Length >= 2) { - destination[i] = PorterDuffFunctions.SubtractSrcOver(background[i], source[i], amount); - } - } + // Divide by 2 as 4 elements per Vector4 and 8 per Vector256 + ref Vector256 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); + ref Vector256 destinationLast = ref Unsafe.Add(ref destinationBase, (IntPtr)((uint)destination.Length / 2u)); - /// + ref Vector256 backgroundBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(background)); + ref Vector256 sourceBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(source)); + Vector256 opacity = Vector256.Create(amount); + + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) + { + destinationBase = PorterDuffFunctions.SubtractSrcOver(backgroundBase, sourceBase, opacity); + destinationBase = ref Unsafe.Add(ref destinationBase, 1); + backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); + sourceBase = ref Unsafe.Add(ref sourceBase, 1); + } + + if (Numerics.Modulo2(destination.Length) != 0) + { + // Vector4 fits neatly in pairs. Any overlap has to be equal to 1. + int i = destination.Length - 1; + destination[i] = PorterDuffFunctions.SubtractSrcOver(background[i], source[i], amount); + } + } + else + { + for (int i = 0; i < destination.Length; i++) + { + destination[i] = PorterDuffFunctions.SubtractSrcOver(background[i], source[i], amount); + } + } + } + + /// protected override void BlendFunction(Span destination, ReadOnlySpan background, ReadOnlySpan source, ReadOnlySpan amount) { - for (int i = 0; i < destination.Length; i++) + if (Avx2.IsSupported && destination.Length >= 2) + { + // Divide by 2 as 4 elements per Vector4 and 8 per Vector256 + ref Vector256 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); + ref Vector256 destinationLast = ref Unsafe.Add(ref destinationBase, (IntPtr)((uint)destination.Length / 2u)); + + ref Vector256 backgroundBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(background)); + ref Vector256 sourceBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(source)); + ref float amountBase = ref MemoryMarshal.GetReference(amount); + + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) + { + // TODO: It would be better if we can clamp this outside of the loop using our SIMD methods. + Vector256 opacity = Vector256.Create(Numerics.Clamp(amountBase, 0, 1F)); + + destinationBase = PorterDuffFunctions.SubtractSrcOver(backgroundBase, sourceBase, opacity); + destinationBase = ref Unsafe.Add(ref destinationBase, 1); + backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); + sourceBase = ref Unsafe.Add(ref sourceBase, 1); + amountBase = ref Unsafe.Add(ref amountBase, 1); + } + + if (Numerics.Modulo2(destination.Length) != 0) + { + // Vector4 fits neatly in pairs. Any overlap has to be equal to 1. + int i = destination.Length - 1; + destination[i] = PorterDuffFunctions.SubtractSrcOver(background[i], source[i], Numerics.Clamp(amount[i], 0, 1F)); + } + } + else { - destination[i] = PorterDuffFunctions.SubtractSrcOver(background[i], source[i], Numerics.Clamp(amount[i], 0, 1)); + for (int i = 0; i < destination.Length; i++) + { + destination[i] = PorterDuffFunctions.SubtractSrcOver(background[i], source[i], Numerics.Clamp(amount[i], 0, 1F)); + } } } } @@ -879,18 +2225,79 @@ internal static class DefaultPixelBlenders protected override void BlendFunction(Span destination, ReadOnlySpan background, ReadOnlySpan source, float amount) { amount = Numerics.Clamp(amount, 0, 1); - for (int i = 0; i < destination.Length; i++) + + if (Avx2.IsSupported && destination.Length >= 2) { - destination[i] = PorterDuffFunctions.ScreenSrcOver(background[i], source[i], amount); + // Divide by 2 as 4 elements per Vector4 and 8 per Vector256 + ref Vector256 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); + ref Vector256 destinationLast = ref Unsafe.Add(ref destinationBase, (IntPtr)((uint)destination.Length / 2u)); + + ref Vector256 backgroundBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(background)); + ref Vector256 sourceBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(source)); + Vector256 opacity = Vector256.Create(amount); + + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) + { + destinationBase = PorterDuffFunctions.ScreenSrcOver(backgroundBase, sourceBase, opacity); + destinationBase = ref Unsafe.Add(ref destinationBase, 1); + backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); + sourceBase = ref Unsafe.Add(ref sourceBase, 1); + } + + if (Numerics.Modulo2(destination.Length) != 0) + { + // Vector4 fits neatly in pairs. Any overlap has to be equal to 1. + int i = destination.Length - 1; + destination[i] = PorterDuffFunctions.ScreenSrcOver(background[i], source[i], amount); + } + } + else + { + for (int i = 0; i < destination.Length; i++) + { + destination[i] = PorterDuffFunctions.ScreenSrcOver(background[i], source[i], amount); + } } } /// protected override void BlendFunction(Span destination, ReadOnlySpan background, ReadOnlySpan source, ReadOnlySpan amount) { - for (int i = 0; i < destination.Length; i++) + if (Avx2.IsSupported && destination.Length >= 2) { - destination[i] = PorterDuffFunctions.ScreenSrcOver(background[i], source[i], Numerics.Clamp(amount[i], 0, 1)); + // Divide by 2 as 4 elements per Vector4 and 8 per Vector256 + ref Vector256 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); + ref Vector256 destinationLast = ref Unsafe.Add(ref destinationBase, (IntPtr)((uint)destination.Length / 2u)); + + ref Vector256 backgroundBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(background)); + ref Vector256 sourceBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(source)); + ref float amountBase = ref MemoryMarshal.GetReference(amount); + + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) + { + // TODO: It would be better if we can clamp this outside of the loop using our SIMD methods. + Vector256 opacity = Vector256.Create(Numerics.Clamp(amountBase, 0, 1F)); + + destinationBase = PorterDuffFunctions.ScreenSrcOver(backgroundBase, sourceBase, opacity); + destinationBase = ref Unsafe.Add(ref destinationBase, 1); + backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); + sourceBase = ref Unsafe.Add(ref sourceBase, 1); + amountBase = ref Unsafe.Add(ref amountBase, 1); + } + + if (Numerics.Modulo2(destination.Length) != 0) + { + // Vector4 fits neatly in pairs. Any overlap has to be equal to 1. + int i = destination.Length - 1; + destination[i] = PorterDuffFunctions.ScreenSrcOver(background[i], source[i], Numerics.Clamp(amount[i], 0, 1F)); + } + } + else + { + for (int i = 0; i < destination.Length; i++) + { + destination[i] = PorterDuffFunctions.ScreenSrcOver(background[i], source[i], Numerics.Clamp(amount[i], 0, 1F)); + } } } } @@ -917,18 +2324,79 @@ internal static class DefaultPixelBlenders protected override void BlendFunction(Span destination, ReadOnlySpan background, ReadOnlySpan source, float amount) { amount = Numerics.Clamp(amount, 0, 1); - for (int i = 0; i < destination.Length; i++) + + if (Avx2.IsSupported && destination.Length >= 2) { - destination[i] = PorterDuffFunctions.DarkenSrcOver(background[i], source[i], amount); + // Divide by 2 as 4 elements per Vector4 and 8 per Vector256 + ref Vector256 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); + ref Vector256 destinationLast = ref Unsafe.Add(ref destinationBase, (IntPtr)((uint)destination.Length / 2u)); + + ref Vector256 backgroundBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(background)); + ref Vector256 sourceBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(source)); + Vector256 opacity = Vector256.Create(amount); + + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) + { + destinationBase = PorterDuffFunctions.DarkenSrcOver(backgroundBase, sourceBase, opacity); + destinationBase = ref Unsafe.Add(ref destinationBase, 1); + backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); + sourceBase = ref Unsafe.Add(ref sourceBase, 1); + } + + if (Numerics.Modulo2(destination.Length) != 0) + { + // Vector4 fits neatly in pairs. Any overlap has to be equal to 1. + int i = destination.Length - 1; + destination[i] = PorterDuffFunctions.DarkenSrcOver(background[i], source[i], amount); + } + } + else + { + for (int i = 0; i < destination.Length; i++) + { + destination[i] = PorterDuffFunctions.DarkenSrcOver(background[i], source[i], amount); + } } } /// protected override void BlendFunction(Span destination, ReadOnlySpan background, ReadOnlySpan source, ReadOnlySpan amount) { - for (int i = 0; i < destination.Length; i++) + if (Avx2.IsSupported && destination.Length >= 2) + { + // Divide by 2 as 4 elements per Vector4 and 8 per Vector256 + ref Vector256 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); + ref Vector256 destinationLast = ref Unsafe.Add(ref destinationBase, (IntPtr)((uint)destination.Length / 2u)); + + ref Vector256 backgroundBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(background)); + ref Vector256 sourceBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(source)); + ref float amountBase = ref MemoryMarshal.GetReference(amount); + + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) + { + // TODO: It would be better if we can clamp this outside of the loop using our SIMD methods. + Vector256 opacity = Vector256.Create(Numerics.Clamp(amountBase, 0, 1F)); + + destinationBase = PorterDuffFunctions.DarkenSrcOver(backgroundBase, sourceBase, opacity); + destinationBase = ref Unsafe.Add(ref destinationBase, 1); + backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); + sourceBase = ref Unsafe.Add(ref sourceBase, 1); + amountBase = ref Unsafe.Add(ref amountBase, 1); + } + + if (Numerics.Modulo2(destination.Length) != 0) + { + // Vector4 fits neatly in pairs. Any overlap has to be equal to 1. + int i = destination.Length - 1; + destination[i] = PorterDuffFunctions.DarkenSrcOver(background[i], source[i], Numerics.Clamp(amount[i], 0, 1F)); + } + } + else { - destination[i] = PorterDuffFunctions.DarkenSrcOver(background[i], source[i], Numerics.Clamp(amount[i], 0, 1)); + for (int i = 0; i < destination.Length; i++) + { + destination[i] = PorterDuffFunctions.DarkenSrcOver(background[i], source[i], Numerics.Clamp(amount[i], 0, 1F)); + } } } } @@ -955,18 +2423,79 @@ internal static class DefaultPixelBlenders protected override void BlendFunction(Span destination, ReadOnlySpan background, ReadOnlySpan source, float amount) { amount = Numerics.Clamp(amount, 0, 1); - for (int i = 0; i < destination.Length; i++) + + if (Avx2.IsSupported && destination.Length >= 2) + { + // Divide by 2 as 4 elements per Vector4 and 8 per Vector256 + ref Vector256 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); + ref Vector256 destinationLast = ref Unsafe.Add(ref destinationBase, (IntPtr)((uint)destination.Length / 2u)); + + ref Vector256 backgroundBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(background)); + ref Vector256 sourceBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(source)); + Vector256 opacity = Vector256.Create(amount); + + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) + { + destinationBase = PorterDuffFunctions.LightenSrcOver(backgroundBase, sourceBase, opacity); + destinationBase = ref Unsafe.Add(ref destinationBase, 1); + backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); + sourceBase = ref Unsafe.Add(ref sourceBase, 1); + } + + if (Numerics.Modulo2(destination.Length) != 0) + { + // Vector4 fits neatly in pairs. Any overlap has to be equal to 1. + int i = destination.Length - 1; + destination[i] = PorterDuffFunctions.LightenSrcOver(background[i], source[i], amount); + } + } + else { - destination[i] = PorterDuffFunctions.LightenSrcOver(background[i], source[i], amount); + for (int i = 0; i < destination.Length; i++) + { + destination[i] = PorterDuffFunctions.LightenSrcOver(background[i], source[i], amount); + } } } /// protected override void BlendFunction(Span destination, ReadOnlySpan background, ReadOnlySpan source, ReadOnlySpan amount) { - for (int i = 0; i < destination.Length; i++) + if (Avx2.IsSupported && destination.Length >= 2) { - destination[i] = PorterDuffFunctions.LightenSrcOver(background[i], source[i], Numerics.Clamp(amount[i], 0, 1)); + // Divide by 2 as 4 elements per Vector4 and 8 per Vector256 + ref Vector256 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); + ref Vector256 destinationLast = ref Unsafe.Add(ref destinationBase, (IntPtr)((uint)destination.Length / 2u)); + + ref Vector256 backgroundBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(background)); + ref Vector256 sourceBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(source)); + ref float amountBase = ref MemoryMarshal.GetReference(amount); + + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) + { + // TODO: It would be better if we can clamp this outside of the loop using our SIMD methods. + Vector256 opacity = Vector256.Create(Numerics.Clamp(amountBase, 0, 1F)); + + destinationBase = PorterDuffFunctions.LightenSrcOver(backgroundBase, sourceBase, opacity); + destinationBase = ref Unsafe.Add(ref destinationBase, 1); + backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); + sourceBase = ref Unsafe.Add(ref sourceBase, 1); + amountBase = ref Unsafe.Add(ref amountBase, 1); + } + + if (Numerics.Modulo2(destination.Length) != 0) + { + // Vector4 fits neatly in pairs. Any overlap has to be equal to 1. + int i = destination.Length - 1; + destination[i] = PorterDuffFunctions.LightenSrcOver(background[i], source[i], Numerics.Clamp(amount[i], 0, 1F)); + } + } + else + { + for (int i = 0; i < destination.Length; i++) + { + destination[i] = PorterDuffFunctions.LightenSrcOver(background[i], source[i], Numerics.Clamp(amount[i], 0, 1F)); + } } } } @@ -993,18 +2522,79 @@ internal static class DefaultPixelBlenders protected override void BlendFunction(Span destination, ReadOnlySpan background, ReadOnlySpan source, float amount) { amount = Numerics.Clamp(amount, 0, 1); - for (int i = 0; i < destination.Length; i++) + + if (Avx2.IsSupported && destination.Length >= 2) { - destination[i] = PorterDuffFunctions.OverlaySrcOver(background[i], source[i], amount); + // Divide by 2 as 4 elements per Vector4 and 8 per Vector256 + ref Vector256 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); + ref Vector256 destinationLast = ref Unsafe.Add(ref destinationBase, (IntPtr)((uint)destination.Length / 2u)); + + ref Vector256 backgroundBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(background)); + ref Vector256 sourceBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(source)); + Vector256 opacity = Vector256.Create(amount); + + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) + { + destinationBase = PorterDuffFunctions.OverlaySrcOver(backgroundBase, sourceBase, opacity); + destinationBase = ref Unsafe.Add(ref destinationBase, 1); + backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); + sourceBase = ref Unsafe.Add(ref sourceBase, 1); + } + + if (Numerics.Modulo2(destination.Length) != 0) + { + // Vector4 fits neatly in pairs. Any overlap has to be equal to 1. + int i = destination.Length - 1; + destination[i] = PorterDuffFunctions.OverlaySrcOver(background[i], source[i], amount); + } + } + else + { + for (int i = 0; i < destination.Length; i++) + { + destination[i] = PorterDuffFunctions.OverlaySrcOver(background[i], source[i], amount); + } } } /// protected override void BlendFunction(Span destination, ReadOnlySpan background, ReadOnlySpan source, ReadOnlySpan amount) { - for (int i = 0; i < destination.Length; i++) + if (Avx2.IsSupported && destination.Length >= 2) + { + // Divide by 2 as 4 elements per Vector4 and 8 per Vector256 + ref Vector256 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); + ref Vector256 destinationLast = ref Unsafe.Add(ref destinationBase, (IntPtr)((uint)destination.Length / 2u)); + + ref Vector256 backgroundBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(background)); + ref Vector256 sourceBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(source)); + ref float amountBase = ref MemoryMarshal.GetReference(amount); + + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) + { + // TODO: It would be better if we can clamp this outside of the loop using our SIMD methods. + Vector256 opacity = Vector256.Create(Numerics.Clamp(amountBase, 0, 1F)); + + destinationBase = PorterDuffFunctions.OverlaySrcOver(backgroundBase, sourceBase, opacity); + destinationBase = ref Unsafe.Add(ref destinationBase, 1); + backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); + sourceBase = ref Unsafe.Add(ref sourceBase, 1); + amountBase = ref Unsafe.Add(ref amountBase, 1); + } + + if (Numerics.Modulo2(destination.Length) != 0) + { + // Vector4 fits neatly in pairs. Any overlap has to be equal to 1. + int i = destination.Length - 1; + destination[i] = PorterDuffFunctions.OverlaySrcOver(background[i], source[i], Numerics.Clamp(amount[i], 0, 1F)); + } + } + else { - destination[i] = PorterDuffFunctions.OverlaySrcOver(background[i], source[i], Numerics.Clamp(amount[i], 0, 1)); + for (int i = 0; i < destination.Length; i++) + { + destination[i] = PorterDuffFunctions.OverlaySrcOver(background[i], source[i], Numerics.Clamp(amount[i], 0, 1F)); + } } } } @@ -1031,18 +2621,79 @@ internal static class DefaultPixelBlenders protected override void BlendFunction(Span destination, ReadOnlySpan background, ReadOnlySpan source, float amount) { amount = Numerics.Clamp(amount, 0, 1); - for (int i = 0; i < destination.Length; i++) + + if (Avx2.IsSupported && destination.Length >= 2) + { + // Divide by 2 as 4 elements per Vector4 and 8 per Vector256 + ref Vector256 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); + ref Vector256 destinationLast = ref Unsafe.Add(ref destinationBase, (IntPtr)((uint)destination.Length / 2u)); + + ref Vector256 backgroundBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(background)); + ref Vector256 sourceBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(source)); + Vector256 opacity = Vector256.Create(amount); + + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) + { + destinationBase = PorterDuffFunctions.HardLightSrcOver(backgroundBase, sourceBase, opacity); + destinationBase = ref Unsafe.Add(ref destinationBase, 1); + backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); + sourceBase = ref Unsafe.Add(ref sourceBase, 1); + } + + if (Numerics.Modulo2(destination.Length) != 0) + { + // Vector4 fits neatly in pairs. Any overlap has to be equal to 1. + int i = destination.Length - 1; + destination[i] = PorterDuffFunctions.HardLightSrcOver(background[i], source[i], amount); + } + } + else { - destination[i] = PorterDuffFunctions.HardLightSrcOver(background[i], source[i], amount); + for (int i = 0; i < destination.Length; i++) + { + destination[i] = PorterDuffFunctions.HardLightSrcOver(background[i], source[i], amount); + } } } /// protected override void BlendFunction(Span destination, ReadOnlySpan background, ReadOnlySpan source, ReadOnlySpan amount) { - for (int i = 0; i < destination.Length; i++) + if (Avx2.IsSupported && destination.Length >= 2) + { + // Divide by 2 as 4 elements per Vector4 and 8 per Vector256 + ref Vector256 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); + ref Vector256 destinationLast = ref Unsafe.Add(ref destinationBase, (IntPtr)((uint)destination.Length / 2u)); + + ref Vector256 backgroundBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(background)); + ref Vector256 sourceBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(source)); + ref float amountBase = ref MemoryMarshal.GetReference(amount); + + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) + { + // TODO: It would be better if we can clamp this outside of the loop using our SIMD methods. + Vector256 opacity = Vector256.Create(Numerics.Clamp(amountBase, 0, 1F)); + + destinationBase = PorterDuffFunctions.HardLightSrcOver(backgroundBase, sourceBase, opacity); + destinationBase = ref Unsafe.Add(ref destinationBase, 1); + backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); + sourceBase = ref Unsafe.Add(ref sourceBase, 1); + amountBase = ref Unsafe.Add(ref amountBase, 1); + } + + if (Numerics.Modulo2(destination.Length) != 0) + { + // Vector4 fits neatly in pairs. Any overlap has to be equal to 1. + int i = destination.Length - 1; + destination[i] = PorterDuffFunctions.HardLightSrcOver(background[i], source[i], Numerics.Clamp(amount[i], 0, 1F)); + } + } + else { - destination[i] = PorterDuffFunctions.HardLightSrcOver(background[i], source[i], Numerics.Clamp(amount[i], 0, 1)); + for (int i = 0; i < destination.Length; i++) + { + destination[i] = PorterDuffFunctions.HardLightSrcOver(background[i], source[i], Numerics.Clamp(amount[i], 0, 1F)); + } } } } @@ -1069,18 +2720,79 @@ internal static class DefaultPixelBlenders protected override void BlendFunction(Span destination, ReadOnlySpan background, ReadOnlySpan source, float amount) { amount = Numerics.Clamp(amount, 0, 1); - for (int i = 0; i < destination.Length; i++) + + if (Avx2.IsSupported && destination.Length >= 2) + { + // Divide by 2 as 4 elements per Vector4 and 8 per Vector256 + ref Vector256 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); + ref Vector256 destinationLast = ref Unsafe.Add(ref destinationBase, (IntPtr)((uint)destination.Length / 2u)); + + ref Vector256 backgroundBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(background)); + ref Vector256 sourceBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(source)); + Vector256 opacity = Vector256.Create(amount); + + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) + { + destinationBase = PorterDuffFunctions.NormalSrcIn(backgroundBase, sourceBase, opacity); + destinationBase = ref Unsafe.Add(ref destinationBase, 1); + backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); + sourceBase = ref Unsafe.Add(ref sourceBase, 1); + } + + if (Numerics.Modulo2(destination.Length) != 0) + { + // Vector4 fits neatly in pairs. Any overlap has to be equal to 1. + int i = destination.Length - 1; + destination[i] = PorterDuffFunctions.NormalSrcIn(background[i], source[i], amount); + } + } + else { - destination[i] = PorterDuffFunctions.NormalSrcIn(background[i], source[i], amount); + for (int i = 0; i < destination.Length; i++) + { + destination[i] = PorterDuffFunctions.NormalSrcIn(background[i], source[i], amount); + } } } /// protected override void BlendFunction(Span destination, ReadOnlySpan background, ReadOnlySpan source, ReadOnlySpan amount) { - for (int i = 0; i < destination.Length; i++) + if (Avx2.IsSupported && destination.Length >= 2) { - destination[i] = PorterDuffFunctions.NormalSrcIn(background[i], source[i], Numerics.Clamp(amount[i], 0, 1)); + // Divide by 2 as 4 elements per Vector4 and 8 per Vector256 + ref Vector256 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); + ref Vector256 destinationLast = ref Unsafe.Add(ref destinationBase, (IntPtr)((uint)destination.Length / 2u)); + + ref Vector256 backgroundBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(background)); + ref Vector256 sourceBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(source)); + ref float amountBase = ref MemoryMarshal.GetReference(amount); + + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) + { + // TODO: It would be better if we can clamp this outside of the loop using our SIMD methods. + Vector256 opacity = Vector256.Create(Numerics.Clamp(amountBase, 0, 1F)); + + destinationBase = PorterDuffFunctions.NormalSrcIn(backgroundBase, sourceBase, opacity); + destinationBase = ref Unsafe.Add(ref destinationBase, 1); + backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); + sourceBase = ref Unsafe.Add(ref sourceBase, 1); + amountBase = ref Unsafe.Add(ref amountBase, 1); + } + + if (Numerics.Modulo2(destination.Length) != 0) + { + // Vector4 fits neatly in pairs. Any overlap has to be equal to 1. + int i = destination.Length - 1; + destination[i] = PorterDuffFunctions.NormalSrcIn(background[i], source[i], Numerics.Clamp(amount[i], 0, 1F)); + } + } + else + { + for (int i = 0; i < destination.Length; i++) + { + destination[i] = PorterDuffFunctions.NormalSrcIn(background[i], source[i], Numerics.Clamp(amount[i], 0, 1F)); + } } } } @@ -1107,18 +2819,79 @@ internal static class DefaultPixelBlenders protected override void BlendFunction(Span destination, ReadOnlySpan background, ReadOnlySpan source, float amount) { amount = Numerics.Clamp(amount, 0, 1); - for (int i = 0; i < destination.Length; i++) + + if (Avx2.IsSupported && destination.Length >= 2) { - destination[i] = PorterDuffFunctions.MultiplySrcIn(background[i], source[i], amount); + // Divide by 2 as 4 elements per Vector4 and 8 per Vector256 + ref Vector256 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); + ref Vector256 destinationLast = ref Unsafe.Add(ref destinationBase, (IntPtr)((uint)destination.Length / 2u)); + + ref Vector256 backgroundBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(background)); + ref Vector256 sourceBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(source)); + Vector256 opacity = Vector256.Create(amount); + + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) + { + destinationBase = PorterDuffFunctions.MultiplySrcIn(backgroundBase, sourceBase, opacity); + destinationBase = ref Unsafe.Add(ref destinationBase, 1); + backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); + sourceBase = ref Unsafe.Add(ref sourceBase, 1); + } + + if (Numerics.Modulo2(destination.Length) != 0) + { + // Vector4 fits neatly in pairs. Any overlap has to be equal to 1. + int i = destination.Length - 1; + destination[i] = PorterDuffFunctions.MultiplySrcIn(background[i], source[i], amount); + } + } + else + { + for (int i = 0; i < destination.Length; i++) + { + destination[i] = PorterDuffFunctions.MultiplySrcIn(background[i], source[i], amount); + } } } /// protected override void BlendFunction(Span destination, ReadOnlySpan background, ReadOnlySpan source, ReadOnlySpan amount) { - for (int i = 0; i < destination.Length; i++) + if (Avx2.IsSupported && destination.Length >= 2) { - destination[i] = PorterDuffFunctions.MultiplySrcIn(background[i], source[i], Numerics.Clamp(amount[i], 0, 1)); + // Divide by 2 as 4 elements per Vector4 and 8 per Vector256 + ref Vector256 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); + ref Vector256 destinationLast = ref Unsafe.Add(ref destinationBase, (IntPtr)((uint)destination.Length / 2u)); + + ref Vector256 backgroundBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(background)); + ref Vector256 sourceBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(source)); + ref float amountBase = ref MemoryMarshal.GetReference(amount); + + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) + { + // TODO: It would be better if we can clamp this outside of the loop using our SIMD methods. + Vector256 opacity = Vector256.Create(Numerics.Clamp(amountBase, 0, 1F)); + + destinationBase = PorterDuffFunctions.MultiplySrcIn(backgroundBase, sourceBase, opacity); + destinationBase = ref Unsafe.Add(ref destinationBase, 1); + backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); + sourceBase = ref Unsafe.Add(ref sourceBase, 1); + amountBase = ref Unsafe.Add(ref amountBase, 1); + } + + if (Numerics.Modulo2(destination.Length) != 0) + { + // Vector4 fits neatly in pairs. Any overlap has to be equal to 1. + int i = destination.Length - 1; + destination[i] = PorterDuffFunctions.MultiplySrcIn(background[i], source[i], Numerics.Clamp(amount[i], 0, 1F)); + } + } + else + { + for (int i = 0; i < destination.Length; i++) + { + destination[i] = PorterDuffFunctions.MultiplySrcIn(background[i], source[i], Numerics.Clamp(amount[i], 0, 1F)); + } } } } @@ -1145,18 +2918,79 @@ internal static class DefaultPixelBlenders protected override void BlendFunction(Span destination, ReadOnlySpan background, ReadOnlySpan source, float amount) { amount = Numerics.Clamp(amount, 0, 1); - for (int i = 0; i < destination.Length; i++) + + if (Avx2.IsSupported && destination.Length >= 2) + { + // Divide by 2 as 4 elements per Vector4 and 8 per Vector256 + ref Vector256 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); + ref Vector256 destinationLast = ref Unsafe.Add(ref destinationBase, (IntPtr)((uint)destination.Length / 2u)); + + ref Vector256 backgroundBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(background)); + ref Vector256 sourceBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(source)); + Vector256 opacity = Vector256.Create(amount); + + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) + { + destinationBase = PorterDuffFunctions.AddSrcIn(backgroundBase, sourceBase, opacity); + destinationBase = ref Unsafe.Add(ref destinationBase, 1); + backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); + sourceBase = ref Unsafe.Add(ref sourceBase, 1); + } + + if (Numerics.Modulo2(destination.Length) != 0) + { + // Vector4 fits neatly in pairs. Any overlap has to be equal to 1. + int i = destination.Length - 1; + destination[i] = PorterDuffFunctions.AddSrcIn(background[i], source[i], amount); + } + } + else { - destination[i] = PorterDuffFunctions.AddSrcIn(background[i], source[i], amount); + for (int i = 0; i < destination.Length; i++) + { + destination[i] = PorterDuffFunctions.AddSrcIn(background[i], source[i], amount); + } } } /// protected override void BlendFunction(Span destination, ReadOnlySpan background, ReadOnlySpan source, ReadOnlySpan amount) { - for (int i = 0; i < destination.Length; i++) + if (Avx2.IsSupported && destination.Length >= 2) + { + // Divide by 2 as 4 elements per Vector4 and 8 per Vector256 + ref Vector256 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); + ref Vector256 destinationLast = ref Unsafe.Add(ref destinationBase, (IntPtr)((uint)destination.Length / 2u)); + + ref Vector256 backgroundBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(background)); + ref Vector256 sourceBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(source)); + ref float amountBase = ref MemoryMarshal.GetReference(amount); + + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) + { + // TODO: It would be better if we can clamp this outside of the loop using our SIMD methods. + Vector256 opacity = Vector256.Create(Numerics.Clamp(amountBase, 0, 1F)); + + destinationBase = PorterDuffFunctions.AddSrcIn(backgroundBase, sourceBase, opacity); + destinationBase = ref Unsafe.Add(ref destinationBase, 1); + backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); + sourceBase = ref Unsafe.Add(ref sourceBase, 1); + amountBase = ref Unsafe.Add(ref amountBase, 1); + } + + if (Numerics.Modulo2(destination.Length) != 0) + { + // Vector4 fits neatly in pairs. Any overlap has to be equal to 1. + int i = destination.Length - 1; + destination[i] = PorterDuffFunctions.AddSrcIn(background[i], source[i], Numerics.Clamp(amount[i], 0, 1F)); + } + } + else { - destination[i] = PorterDuffFunctions.AddSrcIn(background[i], source[i], Numerics.Clamp(amount[i], 0, 1)); + for (int i = 0; i < destination.Length; i++) + { + destination[i] = PorterDuffFunctions.AddSrcIn(background[i], source[i], Numerics.Clamp(amount[i], 0, 1F)); + } } } } @@ -1183,18 +3017,79 @@ internal static class DefaultPixelBlenders protected override void BlendFunction(Span destination, ReadOnlySpan background, ReadOnlySpan source, float amount) { amount = Numerics.Clamp(amount, 0, 1); - for (int i = 0; i < destination.Length; i++) + + if (Avx2.IsSupported && destination.Length >= 2) + { + // Divide by 2 as 4 elements per Vector4 and 8 per Vector256 + ref Vector256 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); + ref Vector256 destinationLast = ref Unsafe.Add(ref destinationBase, (IntPtr)((uint)destination.Length / 2u)); + + ref Vector256 backgroundBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(background)); + ref Vector256 sourceBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(source)); + Vector256 opacity = Vector256.Create(amount); + + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) + { + destinationBase = PorterDuffFunctions.SubtractSrcIn(backgroundBase, sourceBase, opacity); + destinationBase = ref Unsafe.Add(ref destinationBase, 1); + backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); + sourceBase = ref Unsafe.Add(ref sourceBase, 1); + } + + if (Numerics.Modulo2(destination.Length) != 0) + { + // Vector4 fits neatly in pairs. Any overlap has to be equal to 1. + int i = destination.Length - 1; + destination[i] = PorterDuffFunctions.SubtractSrcIn(background[i], source[i], amount); + } + } + else { - destination[i] = PorterDuffFunctions.SubtractSrcIn(background[i], source[i], amount); + for (int i = 0; i < destination.Length; i++) + { + destination[i] = PorterDuffFunctions.SubtractSrcIn(background[i], source[i], amount); + } } } /// protected override void BlendFunction(Span destination, ReadOnlySpan background, ReadOnlySpan source, ReadOnlySpan amount) { - for (int i = 0; i < destination.Length; i++) + if (Avx2.IsSupported && destination.Length >= 2) { - destination[i] = PorterDuffFunctions.SubtractSrcIn(background[i], source[i], Numerics.Clamp(amount[i], 0, 1)); + // Divide by 2 as 4 elements per Vector4 and 8 per Vector256 + ref Vector256 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); + ref Vector256 destinationLast = ref Unsafe.Add(ref destinationBase, (IntPtr)((uint)destination.Length / 2u)); + + ref Vector256 backgroundBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(background)); + ref Vector256 sourceBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(source)); + ref float amountBase = ref MemoryMarshal.GetReference(amount); + + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) + { + // TODO: It would be better if we can clamp this outside of the loop using our SIMD methods. + Vector256 opacity = Vector256.Create(Numerics.Clamp(amountBase, 0, 1F)); + + destinationBase = PorterDuffFunctions.SubtractSrcIn(backgroundBase, sourceBase, opacity); + destinationBase = ref Unsafe.Add(ref destinationBase, 1); + backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); + sourceBase = ref Unsafe.Add(ref sourceBase, 1); + amountBase = ref Unsafe.Add(ref amountBase, 1); + } + + if (Numerics.Modulo2(destination.Length) != 0) + { + // Vector4 fits neatly in pairs. Any overlap has to be equal to 1. + int i = destination.Length - 1; + destination[i] = PorterDuffFunctions.SubtractSrcIn(background[i], source[i], Numerics.Clamp(amount[i], 0, 1F)); + } + } + else + { + for (int i = 0; i < destination.Length; i++) + { + destination[i] = PorterDuffFunctions.SubtractSrcIn(background[i], source[i], Numerics.Clamp(amount[i], 0, 1F)); + } } } } @@ -1221,18 +3116,79 @@ internal static class DefaultPixelBlenders protected override void BlendFunction(Span destination, ReadOnlySpan background, ReadOnlySpan source, float amount) { amount = Numerics.Clamp(amount, 0, 1); - for (int i = 0; i < destination.Length; i++) + + if (Avx2.IsSupported && destination.Length >= 2) { - destination[i] = PorterDuffFunctions.ScreenSrcIn(background[i], source[i], amount); + // Divide by 2 as 4 elements per Vector4 and 8 per Vector256 + ref Vector256 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); + ref Vector256 destinationLast = ref Unsafe.Add(ref destinationBase, (IntPtr)((uint)destination.Length / 2u)); + + ref Vector256 backgroundBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(background)); + ref Vector256 sourceBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(source)); + Vector256 opacity = Vector256.Create(amount); + + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) + { + destinationBase = PorterDuffFunctions.ScreenSrcIn(backgroundBase, sourceBase, opacity); + destinationBase = ref Unsafe.Add(ref destinationBase, 1); + backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); + sourceBase = ref Unsafe.Add(ref sourceBase, 1); + } + + if (Numerics.Modulo2(destination.Length) != 0) + { + // Vector4 fits neatly in pairs. Any overlap has to be equal to 1. + int i = destination.Length - 1; + destination[i] = PorterDuffFunctions.ScreenSrcIn(background[i], source[i], amount); + } + } + else + { + for (int i = 0; i < destination.Length; i++) + { + destination[i] = PorterDuffFunctions.ScreenSrcIn(background[i], source[i], amount); + } } } /// protected override void BlendFunction(Span destination, ReadOnlySpan background, ReadOnlySpan source, ReadOnlySpan amount) { - for (int i = 0; i < destination.Length; i++) + if (Avx2.IsSupported && destination.Length >= 2) { - destination[i] = PorterDuffFunctions.ScreenSrcIn(background[i], source[i], Numerics.Clamp(amount[i], 0, 1)); + // Divide by 2 as 4 elements per Vector4 and 8 per Vector256 + ref Vector256 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); + ref Vector256 destinationLast = ref Unsafe.Add(ref destinationBase, (IntPtr)((uint)destination.Length / 2u)); + + ref Vector256 backgroundBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(background)); + ref Vector256 sourceBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(source)); + ref float amountBase = ref MemoryMarshal.GetReference(amount); + + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) + { + // TODO: It would be better if we can clamp this outside of the loop using our SIMD methods. + Vector256 opacity = Vector256.Create(Numerics.Clamp(amountBase, 0, 1F)); + + destinationBase = PorterDuffFunctions.ScreenSrcIn(backgroundBase, sourceBase, opacity); + destinationBase = ref Unsafe.Add(ref destinationBase, 1); + backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); + sourceBase = ref Unsafe.Add(ref sourceBase, 1); + amountBase = ref Unsafe.Add(ref amountBase, 1); + } + + if (Numerics.Modulo2(destination.Length) != 0) + { + // Vector4 fits neatly in pairs. Any overlap has to be equal to 1. + int i = destination.Length - 1; + destination[i] = PorterDuffFunctions.ScreenSrcIn(background[i], source[i], Numerics.Clamp(amount[i], 0, 1F)); + } + } + else + { + for (int i = 0; i < destination.Length; i++) + { + destination[i] = PorterDuffFunctions.ScreenSrcIn(background[i], source[i], Numerics.Clamp(amount[i], 0, 1F)); + } } } } @@ -1259,18 +3215,79 @@ internal static class DefaultPixelBlenders protected override void BlendFunction(Span destination, ReadOnlySpan background, ReadOnlySpan source, float amount) { amount = Numerics.Clamp(amount, 0, 1); - for (int i = 0; i < destination.Length; i++) + + if (Avx2.IsSupported && destination.Length >= 2) + { + // Divide by 2 as 4 elements per Vector4 and 8 per Vector256 + ref Vector256 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); + ref Vector256 destinationLast = ref Unsafe.Add(ref destinationBase, (IntPtr)((uint)destination.Length / 2u)); + + ref Vector256 backgroundBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(background)); + ref Vector256 sourceBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(source)); + Vector256 opacity = Vector256.Create(amount); + + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) + { + destinationBase = PorterDuffFunctions.DarkenSrcIn(backgroundBase, sourceBase, opacity); + destinationBase = ref Unsafe.Add(ref destinationBase, 1); + backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); + sourceBase = ref Unsafe.Add(ref sourceBase, 1); + } + + if (Numerics.Modulo2(destination.Length) != 0) + { + // Vector4 fits neatly in pairs. Any overlap has to be equal to 1. + int i = destination.Length - 1; + destination[i] = PorterDuffFunctions.DarkenSrcIn(background[i], source[i], amount); + } + } + else { - destination[i] = PorterDuffFunctions.DarkenSrcIn(background[i], source[i], amount); + for (int i = 0; i < destination.Length; i++) + { + destination[i] = PorterDuffFunctions.DarkenSrcIn(background[i], source[i], amount); + } } } /// protected override void BlendFunction(Span destination, ReadOnlySpan background, ReadOnlySpan source, ReadOnlySpan amount) { - for (int i = 0; i < destination.Length; i++) + if (Avx2.IsSupported && destination.Length >= 2) { - destination[i] = PorterDuffFunctions.DarkenSrcIn(background[i], source[i], Numerics.Clamp(amount[i], 0, 1)); + // Divide by 2 as 4 elements per Vector4 and 8 per Vector256 + ref Vector256 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); + ref Vector256 destinationLast = ref Unsafe.Add(ref destinationBase, (IntPtr)((uint)destination.Length / 2u)); + + ref Vector256 backgroundBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(background)); + ref Vector256 sourceBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(source)); + ref float amountBase = ref MemoryMarshal.GetReference(amount); + + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) + { + // TODO: It would be better if we can clamp this outside of the loop using our SIMD methods. + Vector256 opacity = Vector256.Create(Numerics.Clamp(amountBase, 0, 1F)); + + destinationBase = PorterDuffFunctions.DarkenSrcIn(backgroundBase, sourceBase, opacity); + destinationBase = ref Unsafe.Add(ref destinationBase, 1); + backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); + sourceBase = ref Unsafe.Add(ref sourceBase, 1); + amountBase = ref Unsafe.Add(ref amountBase, 1); + } + + if (Numerics.Modulo2(destination.Length) != 0) + { + // Vector4 fits neatly in pairs. Any overlap has to be equal to 1. + int i = destination.Length - 1; + destination[i] = PorterDuffFunctions.DarkenSrcIn(background[i], source[i], Numerics.Clamp(amount[i], 0, 1F)); + } + } + else + { + for (int i = 0; i < destination.Length; i++) + { + destination[i] = PorterDuffFunctions.DarkenSrcIn(background[i], source[i], Numerics.Clamp(amount[i], 0, 1F)); + } } } } @@ -1297,18 +3314,79 @@ internal static class DefaultPixelBlenders protected override void BlendFunction(Span destination, ReadOnlySpan background, ReadOnlySpan source, float amount) { amount = Numerics.Clamp(amount, 0, 1); - for (int i = 0; i < destination.Length; i++) + + if (Avx2.IsSupported && destination.Length >= 2) + { + // Divide by 2 as 4 elements per Vector4 and 8 per Vector256 + ref Vector256 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); + ref Vector256 destinationLast = ref Unsafe.Add(ref destinationBase, (IntPtr)((uint)destination.Length / 2u)); + + ref Vector256 backgroundBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(background)); + ref Vector256 sourceBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(source)); + Vector256 opacity = Vector256.Create(amount); + + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) + { + destinationBase = PorterDuffFunctions.LightenSrcIn(backgroundBase, sourceBase, opacity); + destinationBase = ref Unsafe.Add(ref destinationBase, 1); + backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); + sourceBase = ref Unsafe.Add(ref sourceBase, 1); + } + + if (Numerics.Modulo2(destination.Length) != 0) + { + // Vector4 fits neatly in pairs. Any overlap has to be equal to 1. + int i = destination.Length - 1; + destination[i] = PorterDuffFunctions.LightenSrcIn(background[i], source[i], amount); + } + } + else { - destination[i] = PorterDuffFunctions.LightenSrcIn(background[i], source[i], amount); + for (int i = 0; i < destination.Length; i++) + { + destination[i] = PorterDuffFunctions.LightenSrcIn(background[i], source[i], amount); + } } } /// protected override void BlendFunction(Span destination, ReadOnlySpan background, ReadOnlySpan source, ReadOnlySpan amount) { - for (int i = 0; i < destination.Length; i++) + if (Avx2.IsSupported && destination.Length >= 2) + { + // Divide by 2 as 4 elements per Vector4 and 8 per Vector256 + ref Vector256 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); + ref Vector256 destinationLast = ref Unsafe.Add(ref destinationBase, (IntPtr)((uint)destination.Length / 2u)); + + ref Vector256 backgroundBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(background)); + ref Vector256 sourceBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(source)); + ref float amountBase = ref MemoryMarshal.GetReference(amount); + + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) + { + // TODO: It would be better if we can clamp this outside of the loop using our SIMD methods. + Vector256 opacity = Vector256.Create(Numerics.Clamp(amountBase, 0, 1F)); + + destinationBase = PorterDuffFunctions.LightenSrcIn(backgroundBase, sourceBase, opacity); + destinationBase = ref Unsafe.Add(ref destinationBase, 1); + backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); + sourceBase = ref Unsafe.Add(ref sourceBase, 1); + amountBase = ref Unsafe.Add(ref amountBase, 1); + } + + if (Numerics.Modulo2(destination.Length) != 0) + { + // Vector4 fits neatly in pairs. Any overlap has to be equal to 1. + int i = destination.Length - 1; + destination[i] = PorterDuffFunctions.LightenSrcIn(background[i], source[i], Numerics.Clamp(amount[i], 0, 1F)); + } + } + else { - destination[i] = PorterDuffFunctions.LightenSrcIn(background[i], source[i], Numerics.Clamp(amount[i], 0, 1)); + for (int i = 0; i < destination.Length; i++) + { + destination[i] = PorterDuffFunctions.LightenSrcIn(background[i], source[i], Numerics.Clamp(amount[i], 0, 1F)); + } } } } @@ -1335,18 +3413,79 @@ internal static class DefaultPixelBlenders protected override void BlendFunction(Span destination, ReadOnlySpan background, ReadOnlySpan source, float amount) { amount = Numerics.Clamp(amount, 0, 1); - for (int i = 0; i < destination.Length; i++) + + if (Avx2.IsSupported && destination.Length >= 2) { - destination[i] = PorterDuffFunctions.OverlaySrcIn(background[i], source[i], amount); + // Divide by 2 as 4 elements per Vector4 and 8 per Vector256 + ref Vector256 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); + ref Vector256 destinationLast = ref Unsafe.Add(ref destinationBase, (IntPtr)((uint)destination.Length / 2u)); + + ref Vector256 backgroundBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(background)); + ref Vector256 sourceBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(source)); + Vector256 opacity = Vector256.Create(amount); + + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) + { + destinationBase = PorterDuffFunctions.OverlaySrcIn(backgroundBase, sourceBase, opacity); + destinationBase = ref Unsafe.Add(ref destinationBase, 1); + backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); + sourceBase = ref Unsafe.Add(ref sourceBase, 1); + } + + if (Numerics.Modulo2(destination.Length) != 0) + { + // Vector4 fits neatly in pairs. Any overlap has to be equal to 1. + int i = destination.Length - 1; + destination[i] = PorterDuffFunctions.OverlaySrcIn(background[i], source[i], amount); + } + } + else + { + for (int i = 0; i < destination.Length; i++) + { + destination[i] = PorterDuffFunctions.OverlaySrcIn(background[i], source[i], amount); + } } } /// protected override void BlendFunction(Span destination, ReadOnlySpan background, ReadOnlySpan source, ReadOnlySpan amount) { - for (int i = 0; i < destination.Length; i++) + if (Avx2.IsSupported && destination.Length >= 2) { - destination[i] = PorterDuffFunctions.OverlaySrcIn(background[i], source[i], Numerics.Clamp(amount[i], 0, 1)); + // Divide by 2 as 4 elements per Vector4 and 8 per Vector256 + ref Vector256 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); + ref Vector256 destinationLast = ref Unsafe.Add(ref destinationBase, (IntPtr)((uint)destination.Length / 2u)); + + ref Vector256 backgroundBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(background)); + ref Vector256 sourceBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(source)); + ref float amountBase = ref MemoryMarshal.GetReference(amount); + + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) + { + // TODO: It would be better if we can clamp this outside of the loop using our SIMD methods. + Vector256 opacity = Vector256.Create(Numerics.Clamp(amountBase, 0, 1F)); + + destinationBase = PorterDuffFunctions.OverlaySrcIn(backgroundBase, sourceBase, opacity); + destinationBase = ref Unsafe.Add(ref destinationBase, 1); + backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); + sourceBase = ref Unsafe.Add(ref sourceBase, 1); + amountBase = ref Unsafe.Add(ref amountBase, 1); + } + + if (Numerics.Modulo2(destination.Length) != 0) + { + // Vector4 fits neatly in pairs. Any overlap has to be equal to 1. + int i = destination.Length - 1; + destination[i] = PorterDuffFunctions.OverlaySrcIn(background[i], source[i], Numerics.Clamp(amount[i], 0, 1F)); + } + } + else + { + for (int i = 0; i < destination.Length; i++) + { + destination[i] = PorterDuffFunctions.OverlaySrcIn(background[i], source[i], Numerics.Clamp(amount[i], 0, 1F)); + } } } } @@ -1373,18 +3512,79 @@ internal static class DefaultPixelBlenders protected override void BlendFunction(Span destination, ReadOnlySpan background, ReadOnlySpan source, float amount) { amount = Numerics.Clamp(amount, 0, 1); - for (int i = 0; i < destination.Length; i++) + + if (Avx2.IsSupported && destination.Length >= 2) + { + // Divide by 2 as 4 elements per Vector4 and 8 per Vector256 + ref Vector256 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); + ref Vector256 destinationLast = ref Unsafe.Add(ref destinationBase, (IntPtr)((uint)destination.Length / 2u)); + + ref Vector256 backgroundBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(background)); + ref Vector256 sourceBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(source)); + Vector256 opacity = Vector256.Create(amount); + + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) + { + destinationBase = PorterDuffFunctions.HardLightSrcIn(backgroundBase, sourceBase, opacity); + destinationBase = ref Unsafe.Add(ref destinationBase, 1); + backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); + sourceBase = ref Unsafe.Add(ref sourceBase, 1); + } + + if (Numerics.Modulo2(destination.Length) != 0) + { + // Vector4 fits neatly in pairs. Any overlap has to be equal to 1. + int i = destination.Length - 1; + destination[i] = PorterDuffFunctions.HardLightSrcIn(background[i], source[i], amount); + } + } + else { - destination[i] = PorterDuffFunctions.HardLightSrcIn(background[i], source[i], amount); + for (int i = 0; i < destination.Length; i++) + { + destination[i] = PorterDuffFunctions.HardLightSrcIn(background[i], source[i], amount); + } } } /// protected override void BlendFunction(Span destination, ReadOnlySpan background, ReadOnlySpan source, ReadOnlySpan amount) { - for (int i = 0; i < destination.Length; i++) + if (Avx2.IsSupported && destination.Length >= 2) { - destination[i] = PorterDuffFunctions.HardLightSrcIn(background[i], source[i], Numerics.Clamp(amount[i], 0, 1)); + // Divide by 2 as 4 elements per Vector4 and 8 per Vector256 + ref Vector256 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); + ref Vector256 destinationLast = ref Unsafe.Add(ref destinationBase, (IntPtr)((uint)destination.Length / 2u)); + + ref Vector256 backgroundBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(background)); + ref Vector256 sourceBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(source)); + ref float amountBase = ref MemoryMarshal.GetReference(amount); + + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) + { + // TODO: It would be better if we can clamp this outside of the loop using our SIMD methods. + Vector256 opacity = Vector256.Create(Numerics.Clamp(amountBase, 0, 1F)); + + destinationBase = PorterDuffFunctions.HardLightSrcIn(backgroundBase, sourceBase, opacity); + destinationBase = ref Unsafe.Add(ref destinationBase, 1); + backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); + sourceBase = ref Unsafe.Add(ref sourceBase, 1); + amountBase = ref Unsafe.Add(ref amountBase, 1); + } + + if (Numerics.Modulo2(destination.Length) != 0) + { + // Vector4 fits neatly in pairs. Any overlap has to be equal to 1. + int i = destination.Length - 1; + destination[i] = PorterDuffFunctions.HardLightSrcIn(background[i], source[i], Numerics.Clamp(amount[i], 0, 1F)); + } + } + else + { + for (int i = 0; i < destination.Length; i++) + { + destination[i] = PorterDuffFunctions.HardLightSrcIn(background[i], source[i], Numerics.Clamp(amount[i], 0, 1F)); + } } } } @@ -1411,18 +3611,79 @@ internal static class DefaultPixelBlenders protected override void BlendFunction(Span destination, ReadOnlySpan background, ReadOnlySpan source, float amount) { amount = Numerics.Clamp(amount, 0, 1); - for (int i = 0; i < destination.Length; i++) + + if (Avx2.IsSupported && destination.Length >= 2) + { + // Divide by 2 as 4 elements per Vector4 and 8 per Vector256 + ref Vector256 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); + ref Vector256 destinationLast = ref Unsafe.Add(ref destinationBase, (IntPtr)((uint)destination.Length / 2u)); + + ref Vector256 backgroundBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(background)); + ref Vector256 sourceBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(source)); + Vector256 opacity = Vector256.Create(amount); + + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) + { + destinationBase = PorterDuffFunctions.NormalSrcOut(backgroundBase, sourceBase, opacity); + destinationBase = ref Unsafe.Add(ref destinationBase, 1); + backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); + sourceBase = ref Unsafe.Add(ref sourceBase, 1); + } + + if (Numerics.Modulo2(destination.Length) != 0) + { + // Vector4 fits neatly in pairs. Any overlap has to be equal to 1. + int i = destination.Length - 1; + destination[i] = PorterDuffFunctions.NormalSrcOut(background[i], source[i], amount); + } + } + else { - destination[i] = PorterDuffFunctions.NormalSrcOut(background[i], source[i], amount); + for (int i = 0; i < destination.Length; i++) + { + destination[i] = PorterDuffFunctions.NormalSrcOut(background[i], source[i], amount); + } } } /// protected override void BlendFunction(Span destination, ReadOnlySpan background, ReadOnlySpan source, ReadOnlySpan amount) { - for (int i = 0; i < destination.Length; i++) + if (Avx2.IsSupported && destination.Length >= 2) { - destination[i] = PorterDuffFunctions.NormalSrcOut(background[i], source[i], Numerics.Clamp(amount[i], 0, 1)); + // Divide by 2 as 4 elements per Vector4 and 8 per Vector256 + ref Vector256 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); + ref Vector256 destinationLast = ref Unsafe.Add(ref destinationBase, (IntPtr)((uint)destination.Length / 2u)); + + ref Vector256 backgroundBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(background)); + ref Vector256 sourceBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(source)); + ref float amountBase = ref MemoryMarshal.GetReference(amount); + + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) + { + // TODO: It would be better if we can clamp this outside of the loop using our SIMD methods. + Vector256 opacity = Vector256.Create(Numerics.Clamp(amountBase, 0, 1F)); + + destinationBase = PorterDuffFunctions.NormalSrcOut(backgroundBase, sourceBase, opacity); + destinationBase = ref Unsafe.Add(ref destinationBase, 1); + backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); + sourceBase = ref Unsafe.Add(ref sourceBase, 1); + amountBase = ref Unsafe.Add(ref amountBase, 1); + } + + if (Numerics.Modulo2(destination.Length) != 0) + { + // Vector4 fits neatly in pairs. Any overlap has to be equal to 1. + int i = destination.Length - 1; + destination[i] = PorterDuffFunctions.NormalSrcOut(background[i], source[i], Numerics.Clamp(amount[i], 0, 1F)); + } + } + else + { + for (int i = 0; i < destination.Length; i++) + { + destination[i] = PorterDuffFunctions.NormalSrcOut(background[i], source[i], Numerics.Clamp(amount[i], 0, 1F)); + } } } } @@ -1449,18 +3710,79 @@ internal static class DefaultPixelBlenders protected override void BlendFunction(Span destination, ReadOnlySpan background, ReadOnlySpan source, float amount) { amount = Numerics.Clamp(amount, 0, 1); - for (int i = 0; i < destination.Length; i++) + + if (Avx2.IsSupported && destination.Length >= 2) { - destination[i] = PorterDuffFunctions.MultiplySrcOut(background[i], source[i], amount); + // Divide by 2 as 4 elements per Vector4 and 8 per Vector256 + ref Vector256 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); + ref Vector256 destinationLast = ref Unsafe.Add(ref destinationBase, (IntPtr)((uint)destination.Length / 2u)); + + ref Vector256 backgroundBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(background)); + ref Vector256 sourceBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(source)); + Vector256 opacity = Vector256.Create(amount); + + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) + { + destinationBase = PorterDuffFunctions.MultiplySrcOut(backgroundBase, sourceBase, opacity); + destinationBase = ref Unsafe.Add(ref destinationBase, 1); + backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); + sourceBase = ref Unsafe.Add(ref sourceBase, 1); + } + + if (Numerics.Modulo2(destination.Length) != 0) + { + // Vector4 fits neatly in pairs. Any overlap has to be equal to 1. + int i = destination.Length - 1; + destination[i] = PorterDuffFunctions.MultiplySrcOut(background[i], source[i], amount); + } + } + else + { + for (int i = 0; i < destination.Length; i++) + { + destination[i] = PorterDuffFunctions.MultiplySrcOut(background[i], source[i], amount); + } } } /// protected override void BlendFunction(Span destination, ReadOnlySpan background, ReadOnlySpan source, ReadOnlySpan amount) { - for (int i = 0; i < destination.Length; i++) + if (Avx2.IsSupported && destination.Length >= 2) { - destination[i] = PorterDuffFunctions.MultiplySrcOut(background[i], source[i], Numerics.Clamp(amount[i], 0, 1)); + // Divide by 2 as 4 elements per Vector4 and 8 per Vector256 + ref Vector256 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); + ref Vector256 destinationLast = ref Unsafe.Add(ref destinationBase, (IntPtr)((uint)destination.Length / 2u)); + + ref Vector256 backgroundBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(background)); + ref Vector256 sourceBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(source)); + ref float amountBase = ref MemoryMarshal.GetReference(amount); + + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) + { + // TODO: It would be better if we can clamp this outside of the loop using our SIMD methods. + Vector256 opacity = Vector256.Create(Numerics.Clamp(amountBase, 0, 1F)); + + destinationBase = PorterDuffFunctions.MultiplySrcOut(backgroundBase, sourceBase, opacity); + destinationBase = ref Unsafe.Add(ref destinationBase, 1); + backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); + sourceBase = ref Unsafe.Add(ref sourceBase, 1); + amountBase = ref Unsafe.Add(ref amountBase, 1); + } + + if (Numerics.Modulo2(destination.Length) != 0) + { + // Vector4 fits neatly in pairs. Any overlap has to be equal to 1. + int i = destination.Length - 1; + destination[i] = PorterDuffFunctions.MultiplySrcOut(background[i], source[i], Numerics.Clamp(amount[i], 0, 1F)); + } + } + else + { + for (int i = 0; i < destination.Length; i++) + { + destination[i] = PorterDuffFunctions.MultiplySrcOut(background[i], source[i], Numerics.Clamp(amount[i], 0, 1F)); + } } } } @@ -1487,18 +3809,79 @@ internal static class DefaultPixelBlenders protected override void BlendFunction(Span destination, ReadOnlySpan background, ReadOnlySpan source, float amount) { amount = Numerics.Clamp(amount, 0, 1); - for (int i = 0; i < destination.Length; i++) + + if (Avx2.IsSupported && destination.Length >= 2) { - destination[i] = PorterDuffFunctions.AddSrcOut(background[i], source[i], amount); + // Divide by 2 as 4 elements per Vector4 and 8 per Vector256 + ref Vector256 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); + ref Vector256 destinationLast = ref Unsafe.Add(ref destinationBase, (IntPtr)((uint)destination.Length / 2u)); + + ref Vector256 backgroundBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(background)); + ref Vector256 sourceBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(source)); + Vector256 opacity = Vector256.Create(amount); + + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) + { + destinationBase = PorterDuffFunctions.AddSrcOut(backgroundBase, sourceBase, opacity); + destinationBase = ref Unsafe.Add(ref destinationBase, 1); + backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); + sourceBase = ref Unsafe.Add(ref sourceBase, 1); + } + + if (Numerics.Modulo2(destination.Length) != 0) + { + // Vector4 fits neatly in pairs. Any overlap has to be equal to 1. + int i = destination.Length - 1; + destination[i] = PorterDuffFunctions.AddSrcOut(background[i], source[i], amount); + } + } + else + { + for (int i = 0; i < destination.Length; i++) + { + destination[i] = PorterDuffFunctions.AddSrcOut(background[i], source[i], amount); + } } } /// protected override void BlendFunction(Span destination, ReadOnlySpan background, ReadOnlySpan source, ReadOnlySpan amount) { - for (int i = 0; i < destination.Length; i++) + if (Avx2.IsSupported && destination.Length >= 2) { - destination[i] = PorterDuffFunctions.AddSrcOut(background[i], source[i], Numerics.Clamp(amount[i], 0, 1)); + // Divide by 2 as 4 elements per Vector4 and 8 per Vector256 + ref Vector256 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); + ref Vector256 destinationLast = ref Unsafe.Add(ref destinationBase, (IntPtr)((uint)destination.Length / 2u)); + + ref Vector256 backgroundBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(background)); + ref Vector256 sourceBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(source)); + ref float amountBase = ref MemoryMarshal.GetReference(amount); + + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) + { + // TODO: It would be better if we can clamp this outside of the loop using our SIMD methods. + Vector256 opacity = Vector256.Create(Numerics.Clamp(amountBase, 0, 1F)); + + destinationBase = PorterDuffFunctions.AddSrcOut(backgroundBase, sourceBase, opacity); + destinationBase = ref Unsafe.Add(ref destinationBase, 1); + backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); + sourceBase = ref Unsafe.Add(ref sourceBase, 1); + amountBase = ref Unsafe.Add(ref amountBase, 1); + } + + if (Numerics.Modulo2(destination.Length) != 0) + { + // Vector4 fits neatly in pairs. Any overlap has to be equal to 1. + int i = destination.Length - 1; + destination[i] = PorterDuffFunctions.AddSrcOut(background[i], source[i], Numerics.Clamp(amount[i], 0, 1F)); + } + } + else + { + for (int i = 0; i < destination.Length; i++) + { + destination[i] = PorterDuffFunctions.AddSrcOut(background[i], source[i], Numerics.Clamp(amount[i], 0, 1F)); + } } } } @@ -1525,18 +3908,79 @@ internal static class DefaultPixelBlenders protected override void BlendFunction(Span destination, ReadOnlySpan background, ReadOnlySpan source, float amount) { amount = Numerics.Clamp(amount, 0, 1); - for (int i = 0; i < destination.Length; i++) + + if (Avx2.IsSupported && destination.Length >= 2) { - destination[i] = PorterDuffFunctions.SubtractSrcOut(background[i], source[i], amount); + // Divide by 2 as 4 elements per Vector4 and 8 per Vector256 + ref Vector256 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); + ref Vector256 destinationLast = ref Unsafe.Add(ref destinationBase, (IntPtr)((uint)destination.Length / 2u)); + + ref Vector256 backgroundBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(background)); + ref Vector256 sourceBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(source)); + Vector256 opacity = Vector256.Create(amount); + + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) + { + destinationBase = PorterDuffFunctions.SubtractSrcOut(backgroundBase, sourceBase, opacity); + destinationBase = ref Unsafe.Add(ref destinationBase, 1); + backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); + sourceBase = ref Unsafe.Add(ref sourceBase, 1); + } + + if (Numerics.Modulo2(destination.Length) != 0) + { + // Vector4 fits neatly in pairs. Any overlap has to be equal to 1. + int i = destination.Length - 1; + destination[i] = PorterDuffFunctions.SubtractSrcOut(background[i], source[i], amount); + } + } + else + { + for (int i = 0; i < destination.Length; i++) + { + destination[i] = PorterDuffFunctions.SubtractSrcOut(background[i], source[i], amount); + } } } /// protected override void BlendFunction(Span destination, ReadOnlySpan background, ReadOnlySpan source, ReadOnlySpan amount) { - for (int i = 0; i < destination.Length; i++) + if (Avx2.IsSupported && destination.Length >= 2) { - destination[i] = PorterDuffFunctions.SubtractSrcOut(background[i], source[i], Numerics.Clamp(amount[i], 0, 1)); + // Divide by 2 as 4 elements per Vector4 and 8 per Vector256 + ref Vector256 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); + ref Vector256 destinationLast = ref Unsafe.Add(ref destinationBase, (IntPtr)((uint)destination.Length / 2u)); + + ref Vector256 backgroundBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(background)); + ref Vector256 sourceBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(source)); + ref float amountBase = ref MemoryMarshal.GetReference(amount); + + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) + { + // TODO: It would be better if we can clamp this outside of the loop using our SIMD methods. + Vector256 opacity = Vector256.Create(Numerics.Clamp(amountBase, 0, 1F)); + + destinationBase = PorterDuffFunctions.SubtractSrcOut(backgroundBase, sourceBase, opacity); + destinationBase = ref Unsafe.Add(ref destinationBase, 1); + backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); + sourceBase = ref Unsafe.Add(ref sourceBase, 1); + amountBase = ref Unsafe.Add(ref amountBase, 1); + } + + if (Numerics.Modulo2(destination.Length) != 0) + { + // Vector4 fits neatly in pairs. Any overlap has to be equal to 1. + int i = destination.Length - 1; + destination[i] = PorterDuffFunctions.SubtractSrcOut(background[i], source[i], Numerics.Clamp(amount[i], 0, 1F)); + } + } + else + { + for (int i = 0; i < destination.Length; i++) + { + destination[i] = PorterDuffFunctions.SubtractSrcOut(background[i], source[i], Numerics.Clamp(amount[i], 0, 1F)); + } } } } @@ -1563,18 +4007,79 @@ internal static class DefaultPixelBlenders protected override void BlendFunction(Span destination, ReadOnlySpan background, ReadOnlySpan source, float amount) { amount = Numerics.Clamp(amount, 0, 1); - for (int i = 0; i < destination.Length; i++) + + if (Avx2.IsSupported && destination.Length >= 2) { - destination[i] = PorterDuffFunctions.ScreenSrcOut(background[i], source[i], amount); + // Divide by 2 as 4 elements per Vector4 and 8 per Vector256 + ref Vector256 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); + ref Vector256 destinationLast = ref Unsafe.Add(ref destinationBase, (IntPtr)((uint)destination.Length / 2u)); + + ref Vector256 backgroundBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(background)); + ref Vector256 sourceBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(source)); + Vector256 opacity = Vector256.Create(amount); + + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) + { + destinationBase = PorterDuffFunctions.ScreenSrcOut(backgroundBase, sourceBase, opacity); + destinationBase = ref Unsafe.Add(ref destinationBase, 1); + backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); + sourceBase = ref Unsafe.Add(ref sourceBase, 1); + } + + if (Numerics.Modulo2(destination.Length) != 0) + { + // Vector4 fits neatly in pairs. Any overlap has to be equal to 1. + int i = destination.Length - 1; + destination[i] = PorterDuffFunctions.ScreenSrcOut(background[i], source[i], amount); + } + } + else + { + for (int i = 0; i < destination.Length; i++) + { + destination[i] = PorterDuffFunctions.ScreenSrcOut(background[i], source[i], amount); + } } } /// protected override void BlendFunction(Span destination, ReadOnlySpan background, ReadOnlySpan source, ReadOnlySpan amount) { - for (int i = 0; i < destination.Length; i++) + if (Avx2.IsSupported && destination.Length >= 2) { - destination[i] = PorterDuffFunctions.ScreenSrcOut(background[i], source[i], Numerics.Clamp(amount[i], 0, 1)); + // Divide by 2 as 4 elements per Vector4 and 8 per Vector256 + ref Vector256 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); + ref Vector256 destinationLast = ref Unsafe.Add(ref destinationBase, (IntPtr)((uint)destination.Length / 2u)); + + ref Vector256 backgroundBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(background)); + ref Vector256 sourceBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(source)); + ref float amountBase = ref MemoryMarshal.GetReference(amount); + + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) + { + // TODO: It would be better if we can clamp this outside of the loop using our SIMD methods. + Vector256 opacity = Vector256.Create(Numerics.Clamp(amountBase, 0, 1F)); + + destinationBase = PorterDuffFunctions.ScreenSrcOut(backgroundBase, sourceBase, opacity); + destinationBase = ref Unsafe.Add(ref destinationBase, 1); + backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); + sourceBase = ref Unsafe.Add(ref sourceBase, 1); + amountBase = ref Unsafe.Add(ref amountBase, 1); + } + + if (Numerics.Modulo2(destination.Length) != 0) + { + // Vector4 fits neatly in pairs. Any overlap has to be equal to 1. + int i = destination.Length - 1; + destination[i] = PorterDuffFunctions.ScreenSrcOut(background[i], source[i], Numerics.Clamp(amount[i], 0, 1F)); + } + } + else + { + for (int i = 0; i < destination.Length; i++) + { + destination[i] = PorterDuffFunctions.ScreenSrcOut(background[i], source[i], Numerics.Clamp(amount[i], 0, 1F)); + } } } } @@ -1601,18 +4106,79 @@ internal static class DefaultPixelBlenders protected override void BlendFunction(Span destination, ReadOnlySpan background, ReadOnlySpan source, float amount) { amount = Numerics.Clamp(amount, 0, 1); - for (int i = 0; i < destination.Length; i++) + + if (Avx2.IsSupported && destination.Length >= 2) { - destination[i] = PorterDuffFunctions.DarkenSrcOut(background[i], source[i], amount); + // Divide by 2 as 4 elements per Vector4 and 8 per Vector256 + ref Vector256 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); + ref Vector256 destinationLast = ref Unsafe.Add(ref destinationBase, (IntPtr)((uint)destination.Length / 2u)); + + ref Vector256 backgroundBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(background)); + ref Vector256 sourceBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(source)); + Vector256 opacity = Vector256.Create(amount); + + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) + { + destinationBase = PorterDuffFunctions.DarkenSrcOut(backgroundBase, sourceBase, opacity); + destinationBase = ref Unsafe.Add(ref destinationBase, 1); + backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); + sourceBase = ref Unsafe.Add(ref sourceBase, 1); + } + + if (Numerics.Modulo2(destination.Length) != 0) + { + // Vector4 fits neatly in pairs. Any overlap has to be equal to 1. + int i = destination.Length - 1; + destination[i] = PorterDuffFunctions.DarkenSrcOut(background[i], source[i], amount); + } + } + else + { + for (int i = 0; i < destination.Length; i++) + { + destination[i] = PorterDuffFunctions.DarkenSrcOut(background[i], source[i], amount); + } } } /// protected override void BlendFunction(Span destination, ReadOnlySpan background, ReadOnlySpan source, ReadOnlySpan amount) { - for (int i = 0; i < destination.Length; i++) + if (Avx2.IsSupported && destination.Length >= 2) + { + // Divide by 2 as 4 elements per Vector4 and 8 per Vector256 + ref Vector256 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); + ref Vector256 destinationLast = ref Unsafe.Add(ref destinationBase, (IntPtr)((uint)destination.Length / 2u)); + + ref Vector256 backgroundBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(background)); + ref Vector256 sourceBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(source)); + ref float amountBase = ref MemoryMarshal.GetReference(amount); + + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) + { + // TODO: It would be better if we can clamp this outside of the loop using our SIMD methods. + Vector256 opacity = Vector256.Create(Numerics.Clamp(amountBase, 0, 1F)); + + destinationBase = PorterDuffFunctions.DarkenSrcOut(backgroundBase, sourceBase, opacity); + destinationBase = ref Unsafe.Add(ref destinationBase, 1); + backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); + sourceBase = ref Unsafe.Add(ref sourceBase, 1); + amountBase = ref Unsafe.Add(ref amountBase, 1); + } + + if (Numerics.Modulo2(destination.Length) != 0) + { + // Vector4 fits neatly in pairs. Any overlap has to be equal to 1. + int i = destination.Length - 1; + destination[i] = PorterDuffFunctions.DarkenSrcOut(background[i], source[i], Numerics.Clamp(amount[i], 0, 1F)); + } + } + else { - destination[i] = PorterDuffFunctions.DarkenSrcOut(background[i], source[i], Numerics.Clamp(amount[i], 0, 1)); + for (int i = 0; i < destination.Length; i++) + { + destination[i] = PorterDuffFunctions.DarkenSrcOut(background[i], source[i], Numerics.Clamp(amount[i], 0, 1F)); + } } } } @@ -1639,18 +4205,79 @@ internal static class DefaultPixelBlenders protected override void BlendFunction(Span destination, ReadOnlySpan background, ReadOnlySpan source, float amount) { amount = Numerics.Clamp(amount, 0, 1); - for (int i = 0; i < destination.Length; i++) + + if (Avx2.IsSupported && destination.Length >= 2) + { + // Divide by 2 as 4 elements per Vector4 and 8 per Vector256 + ref Vector256 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); + ref Vector256 destinationLast = ref Unsafe.Add(ref destinationBase, (IntPtr)((uint)destination.Length / 2u)); + + ref Vector256 backgroundBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(background)); + ref Vector256 sourceBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(source)); + Vector256 opacity = Vector256.Create(amount); + + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) + { + destinationBase = PorterDuffFunctions.LightenSrcOut(backgroundBase, sourceBase, opacity); + destinationBase = ref Unsafe.Add(ref destinationBase, 1); + backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); + sourceBase = ref Unsafe.Add(ref sourceBase, 1); + } + + if (Numerics.Modulo2(destination.Length) != 0) + { + // Vector4 fits neatly in pairs. Any overlap has to be equal to 1. + int i = destination.Length - 1; + destination[i] = PorterDuffFunctions.LightenSrcOut(background[i], source[i], amount); + } + } + else { - destination[i] = PorterDuffFunctions.LightenSrcOut(background[i], source[i], amount); + for (int i = 0; i < destination.Length; i++) + { + destination[i] = PorterDuffFunctions.LightenSrcOut(background[i], source[i], amount); + } } } /// protected override void BlendFunction(Span destination, ReadOnlySpan background, ReadOnlySpan source, ReadOnlySpan amount) { - for (int i = 0; i < destination.Length; i++) + if (Avx2.IsSupported && destination.Length >= 2) + { + // Divide by 2 as 4 elements per Vector4 and 8 per Vector256 + ref Vector256 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); + ref Vector256 destinationLast = ref Unsafe.Add(ref destinationBase, (IntPtr)((uint)destination.Length / 2u)); + + ref Vector256 backgroundBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(background)); + ref Vector256 sourceBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(source)); + ref float amountBase = ref MemoryMarshal.GetReference(amount); + + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) + { + // TODO: It would be better if we can clamp this outside of the loop using our SIMD methods. + Vector256 opacity = Vector256.Create(Numerics.Clamp(amountBase, 0, 1F)); + + destinationBase = PorterDuffFunctions.LightenSrcOut(backgroundBase, sourceBase, opacity); + destinationBase = ref Unsafe.Add(ref destinationBase, 1); + backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); + sourceBase = ref Unsafe.Add(ref sourceBase, 1); + amountBase = ref Unsafe.Add(ref amountBase, 1); + } + + if (Numerics.Modulo2(destination.Length) != 0) + { + // Vector4 fits neatly in pairs. Any overlap has to be equal to 1. + int i = destination.Length - 1; + destination[i] = PorterDuffFunctions.LightenSrcOut(background[i], source[i], Numerics.Clamp(amount[i], 0, 1F)); + } + } + else { - destination[i] = PorterDuffFunctions.LightenSrcOut(background[i], source[i], Numerics.Clamp(amount[i], 0, 1)); + for (int i = 0; i < destination.Length; i++) + { + destination[i] = PorterDuffFunctions.LightenSrcOut(background[i], source[i], Numerics.Clamp(amount[i], 0, 1F)); + } } } } @@ -1677,18 +4304,79 @@ internal static class DefaultPixelBlenders protected override void BlendFunction(Span destination, ReadOnlySpan background, ReadOnlySpan source, float amount) { amount = Numerics.Clamp(amount, 0, 1); - for (int i = 0; i < destination.Length; i++) + + if (Avx2.IsSupported && destination.Length >= 2) { - destination[i] = PorterDuffFunctions.OverlaySrcOut(background[i], source[i], amount); + // Divide by 2 as 4 elements per Vector4 and 8 per Vector256 + ref Vector256 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); + ref Vector256 destinationLast = ref Unsafe.Add(ref destinationBase, (IntPtr)((uint)destination.Length / 2u)); + + ref Vector256 backgroundBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(background)); + ref Vector256 sourceBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(source)); + Vector256 opacity = Vector256.Create(amount); + + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) + { + destinationBase = PorterDuffFunctions.OverlaySrcOut(backgroundBase, sourceBase, opacity); + destinationBase = ref Unsafe.Add(ref destinationBase, 1); + backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); + sourceBase = ref Unsafe.Add(ref sourceBase, 1); + } + + if (Numerics.Modulo2(destination.Length) != 0) + { + // Vector4 fits neatly in pairs. Any overlap has to be equal to 1. + int i = destination.Length - 1; + destination[i] = PorterDuffFunctions.OverlaySrcOut(background[i], source[i], amount); + } + } + else + { + for (int i = 0; i < destination.Length; i++) + { + destination[i] = PorterDuffFunctions.OverlaySrcOut(background[i], source[i], amount); + } } } - /// - protected override void BlendFunction(Span destination, ReadOnlySpan background, ReadOnlySpan source, ReadOnlySpan amount) - { - for (int i = 0; i < destination.Length; i++) + /// + protected override void BlendFunction(Span destination, ReadOnlySpan background, ReadOnlySpan source, ReadOnlySpan amount) + { + if (Avx2.IsSupported && destination.Length >= 2) + { + // Divide by 2 as 4 elements per Vector4 and 8 per Vector256 + ref Vector256 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); + ref Vector256 destinationLast = ref Unsafe.Add(ref destinationBase, (IntPtr)((uint)destination.Length / 2u)); + + ref Vector256 backgroundBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(background)); + ref Vector256 sourceBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(source)); + ref float amountBase = ref MemoryMarshal.GetReference(amount); + + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) + { + // TODO: It would be better if we can clamp this outside of the loop using our SIMD methods. + Vector256 opacity = Vector256.Create(Numerics.Clamp(amountBase, 0, 1F)); + + destinationBase = PorterDuffFunctions.OverlaySrcOut(backgroundBase, sourceBase, opacity); + destinationBase = ref Unsafe.Add(ref destinationBase, 1); + backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); + sourceBase = ref Unsafe.Add(ref sourceBase, 1); + amountBase = ref Unsafe.Add(ref amountBase, 1); + } + + if (Numerics.Modulo2(destination.Length) != 0) + { + // Vector4 fits neatly in pairs. Any overlap has to be equal to 1. + int i = destination.Length - 1; + destination[i] = PorterDuffFunctions.OverlaySrcOut(background[i], source[i], Numerics.Clamp(amount[i], 0, 1F)); + } + } + else { - destination[i] = PorterDuffFunctions.OverlaySrcOut(background[i], source[i], Numerics.Clamp(amount[i], 0, 1)); + for (int i = 0; i < destination.Length; i++) + { + destination[i] = PorterDuffFunctions.OverlaySrcOut(background[i], source[i], Numerics.Clamp(amount[i], 0, 1F)); + } } } } @@ -1715,18 +4403,79 @@ internal static class DefaultPixelBlenders protected override void BlendFunction(Span destination, ReadOnlySpan background, ReadOnlySpan source, float amount) { amount = Numerics.Clamp(amount, 0, 1); - for (int i = 0; i < destination.Length; i++) + + if (Avx2.IsSupported && destination.Length >= 2) + { + // Divide by 2 as 4 elements per Vector4 and 8 per Vector256 + ref Vector256 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); + ref Vector256 destinationLast = ref Unsafe.Add(ref destinationBase, (IntPtr)((uint)destination.Length / 2u)); + + ref Vector256 backgroundBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(background)); + ref Vector256 sourceBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(source)); + Vector256 opacity = Vector256.Create(amount); + + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) + { + destinationBase = PorterDuffFunctions.HardLightSrcOut(backgroundBase, sourceBase, opacity); + destinationBase = ref Unsafe.Add(ref destinationBase, 1); + backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); + sourceBase = ref Unsafe.Add(ref sourceBase, 1); + } + + if (Numerics.Modulo2(destination.Length) != 0) + { + // Vector4 fits neatly in pairs. Any overlap has to be equal to 1. + int i = destination.Length - 1; + destination[i] = PorterDuffFunctions.HardLightSrcOut(background[i], source[i], amount); + } + } + else { - destination[i] = PorterDuffFunctions.HardLightSrcOut(background[i], source[i], amount); + for (int i = 0; i < destination.Length; i++) + { + destination[i] = PorterDuffFunctions.HardLightSrcOut(background[i], source[i], amount); + } } } /// protected override void BlendFunction(Span destination, ReadOnlySpan background, ReadOnlySpan source, ReadOnlySpan amount) { - for (int i = 0; i < destination.Length; i++) + if (Avx2.IsSupported && destination.Length >= 2) + { + // Divide by 2 as 4 elements per Vector4 and 8 per Vector256 + ref Vector256 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); + ref Vector256 destinationLast = ref Unsafe.Add(ref destinationBase, (IntPtr)((uint)destination.Length / 2u)); + + ref Vector256 backgroundBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(background)); + ref Vector256 sourceBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(source)); + ref float amountBase = ref MemoryMarshal.GetReference(amount); + + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) + { + // TODO: It would be better if we can clamp this outside of the loop using our SIMD methods. + Vector256 opacity = Vector256.Create(Numerics.Clamp(amountBase, 0, 1F)); + + destinationBase = PorterDuffFunctions.HardLightSrcOut(backgroundBase, sourceBase, opacity); + destinationBase = ref Unsafe.Add(ref destinationBase, 1); + backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); + sourceBase = ref Unsafe.Add(ref sourceBase, 1); + amountBase = ref Unsafe.Add(ref amountBase, 1); + } + + if (Numerics.Modulo2(destination.Length) != 0) + { + // Vector4 fits neatly in pairs. Any overlap has to be equal to 1. + int i = destination.Length - 1; + destination[i] = PorterDuffFunctions.HardLightSrcOut(background[i], source[i], Numerics.Clamp(amount[i], 0, 1F)); + } + } + else { - destination[i] = PorterDuffFunctions.HardLightSrcOut(background[i], source[i], Numerics.Clamp(amount[i], 0, 1)); + for (int i = 0; i < destination.Length; i++) + { + destination[i] = PorterDuffFunctions.HardLightSrcOut(background[i], source[i], Numerics.Clamp(amount[i], 0, 1F)); + } } } } @@ -1753,18 +4502,79 @@ internal static class DefaultPixelBlenders protected override void BlendFunction(Span destination, ReadOnlySpan background, ReadOnlySpan source, float amount) { amount = Numerics.Clamp(amount, 0, 1); - for (int i = 0; i < destination.Length; i++) + + if (Avx2.IsSupported && destination.Length >= 2) + { + // Divide by 2 as 4 elements per Vector4 and 8 per Vector256 + ref Vector256 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); + ref Vector256 destinationLast = ref Unsafe.Add(ref destinationBase, (IntPtr)((uint)destination.Length / 2u)); + + ref Vector256 backgroundBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(background)); + ref Vector256 sourceBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(source)); + Vector256 opacity = Vector256.Create(amount); + + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) + { + destinationBase = PorterDuffFunctions.NormalDest(backgroundBase, sourceBase, opacity); + destinationBase = ref Unsafe.Add(ref destinationBase, 1); + backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); + sourceBase = ref Unsafe.Add(ref sourceBase, 1); + } + + if (Numerics.Modulo2(destination.Length) != 0) + { + // Vector4 fits neatly in pairs. Any overlap has to be equal to 1. + int i = destination.Length - 1; + destination[i] = PorterDuffFunctions.NormalDest(background[i], source[i], amount); + } + } + else { - destination[i] = PorterDuffFunctions.NormalDest(background[i], source[i], amount); + for (int i = 0; i < destination.Length; i++) + { + destination[i] = PorterDuffFunctions.NormalDest(background[i], source[i], amount); + } } } /// protected override void BlendFunction(Span destination, ReadOnlySpan background, ReadOnlySpan source, ReadOnlySpan amount) { - for (int i = 0; i < destination.Length; i++) + if (Avx2.IsSupported && destination.Length >= 2) + { + // Divide by 2 as 4 elements per Vector4 and 8 per Vector256 + ref Vector256 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); + ref Vector256 destinationLast = ref Unsafe.Add(ref destinationBase, (IntPtr)((uint)destination.Length / 2u)); + + ref Vector256 backgroundBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(background)); + ref Vector256 sourceBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(source)); + ref float amountBase = ref MemoryMarshal.GetReference(amount); + + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) + { + // TODO: It would be better if we can clamp this outside of the loop using our SIMD methods. + Vector256 opacity = Vector256.Create(Numerics.Clamp(amountBase, 0, 1F)); + + destinationBase = PorterDuffFunctions.NormalDest(backgroundBase, sourceBase, opacity); + destinationBase = ref Unsafe.Add(ref destinationBase, 1); + backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); + sourceBase = ref Unsafe.Add(ref sourceBase, 1); + amountBase = ref Unsafe.Add(ref amountBase, 1); + } + + if (Numerics.Modulo2(destination.Length) != 0) + { + // Vector4 fits neatly in pairs. Any overlap has to be equal to 1. + int i = destination.Length - 1; + destination[i] = PorterDuffFunctions.NormalDest(background[i], source[i], Numerics.Clamp(amount[i], 0, 1F)); + } + } + else { - destination[i] = PorterDuffFunctions.NormalDest(background[i], source[i], Numerics.Clamp(amount[i], 0, 1)); + for (int i = 0; i < destination.Length; i++) + { + destination[i] = PorterDuffFunctions.NormalDest(background[i], source[i], Numerics.Clamp(amount[i], 0, 1F)); + } } } } @@ -1791,18 +4601,79 @@ internal static class DefaultPixelBlenders protected override void BlendFunction(Span destination, ReadOnlySpan background, ReadOnlySpan source, float amount) { amount = Numerics.Clamp(amount, 0, 1); - for (int i = 0; i < destination.Length; i++) + + if (Avx2.IsSupported && destination.Length >= 2) + { + // Divide by 2 as 4 elements per Vector4 and 8 per Vector256 + ref Vector256 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); + ref Vector256 destinationLast = ref Unsafe.Add(ref destinationBase, (IntPtr)((uint)destination.Length / 2u)); + + ref Vector256 backgroundBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(background)); + ref Vector256 sourceBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(source)); + Vector256 opacity = Vector256.Create(amount); + + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) + { + destinationBase = PorterDuffFunctions.MultiplyDest(backgroundBase, sourceBase, opacity); + destinationBase = ref Unsafe.Add(ref destinationBase, 1); + backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); + sourceBase = ref Unsafe.Add(ref sourceBase, 1); + } + + if (Numerics.Modulo2(destination.Length) != 0) + { + // Vector4 fits neatly in pairs. Any overlap has to be equal to 1. + int i = destination.Length - 1; + destination[i] = PorterDuffFunctions.MultiplyDest(background[i], source[i], amount); + } + } + else { - destination[i] = PorterDuffFunctions.MultiplyDest(background[i], source[i], amount); + for (int i = 0; i < destination.Length; i++) + { + destination[i] = PorterDuffFunctions.MultiplyDest(background[i], source[i], amount); + } } } /// protected override void BlendFunction(Span destination, ReadOnlySpan background, ReadOnlySpan source, ReadOnlySpan amount) { - for (int i = 0; i < destination.Length; i++) + if (Avx2.IsSupported && destination.Length >= 2) + { + // Divide by 2 as 4 elements per Vector4 and 8 per Vector256 + ref Vector256 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); + ref Vector256 destinationLast = ref Unsafe.Add(ref destinationBase, (IntPtr)((uint)destination.Length / 2u)); + + ref Vector256 backgroundBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(background)); + ref Vector256 sourceBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(source)); + ref float amountBase = ref MemoryMarshal.GetReference(amount); + + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) + { + // TODO: It would be better if we can clamp this outside of the loop using our SIMD methods. + Vector256 opacity = Vector256.Create(Numerics.Clamp(amountBase, 0, 1F)); + + destinationBase = PorterDuffFunctions.MultiplyDest(backgroundBase, sourceBase, opacity); + destinationBase = ref Unsafe.Add(ref destinationBase, 1); + backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); + sourceBase = ref Unsafe.Add(ref sourceBase, 1); + amountBase = ref Unsafe.Add(ref amountBase, 1); + } + + if (Numerics.Modulo2(destination.Length) != 0) + { + // Vector4 fits neatly in pairs. Any overlap has to be equal to 1. + int i = destination.Length - 1; + destination[i] = PorterDuffFunctions.MultiplyDest(background[i], source[i], Numerics.Clamp(amount[i], 0, 1F)); + } + } + else { - destination[i] = PorterDuffFunctions.MultiplyDest(background[i], source[i], Numerics.Clamp(amount[i], 0, 1)); + for (int i = 0; i < destination.Length; i++) + { + destination[i] = PorterDuffFunctions.MultiplyDest(background[i], source[i], Numerics.Clamp(amount[i], 0, 1F)); + } } } } @@ -1829,18 +4700,79 @@ internal static class DefaultPixelBlenders protected override void BlendFunction(Span destination, ReadOnlySpan background, ReadOnlySpan source, float amount) { amount = Numerics.Clamp(amount, 0, 1); - for (int i = 0; i < destination.Length; i++) + + if (Avx2.IsSupported && destination.Length >= 2) + { + // Divide by 2 as 4 elements per Vector4 and 8 per Vector256 + ref Vector256 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); + ref Vector256 destinationLast = ref Unsafe.Add(ref destinationBase, (IntPtr)((uint)destination.Length / 2u)); + + ref Vector256 backgroundBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(background)); + ref Vector256 sourceBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(source)); + Vector256 opacity = Vector256.Create(amount); + + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) + { + destinationBase = PorterDuffFunctions.AddDest(backgroundBase, sourceBase, opacity); + destinationBase = ref Unsafe.Add(ref destinationBase, 1); + backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); + sourceBase = ref Unsafe.Add(ref sourceBase, 1); + } + + if (Numerics.Modulo2(destination.Length) != 0) + { + // Vector4 fits neatly in pairs. Any overlap has to be equal to 1. + int i = destination.Length - 1; + destination[i] = PorterDuffFunctions.AddDest(background[i], source[i], amount); + } + } + else { - destination[i] = PorterDuffFunctions.AddDest(background[i], source[i], amount); + for (int i = 0; i < destination.Length; i++) + { + destination[i] = PorterDuffFunctions.AddDest(background[i], source[i], amount); + } } } /// protected override void BlendFunction(Span destination, ReadOnlySpan background, ReadOnlySpan source, ReadOnlySpan amount) { - for (int i = 0; i < destination.Length; i++) + if (Avx2.IsSupported && destination.Length >= 2) + { + // Divide by 2 as 4 elements per Vector4 and 8 per Vector256 + ref Vector256 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); + ref Vector256 destinationLast = ref Unsafe.Add(ref destinationBase, (IntPtr)((uint)destination.Length / 2u)); + + ref Vector256 backgroundBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(background)); + ref Vector256 sourceBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(source)); + ref float amountBase = ref MemoryMarshal.GetReference(amount); + + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) + { + // TODO: It would be better if we can clamp this outside of the loop using our SIMD methods. + Vector256 opacity = Vector256.Create(Numerics.Clamp(amountBase, 0, 1F)); + + destinationBase = PorterDuffFunctions.AddDest(backgroundBase, sourceBase, opacity); + destinationBase = ref Unsafe.Add(ref destinationBase, 1); + backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); + sourceBase = ref Unsafe.Add(ref sourceBase, 1); + amountBase = ref Unsafe.Add(ref amountBase, 1); + } + + if (Numerics.Modulo2(destination.Length) != 0) + { + // Vector4 fits neatly in pairs. Any overlap has to be equal to 1. + int i = destination.Length - 1; + destination[i] = PorterDuffFunctions.AddDest(background[i], source[i], Numerics.Clamp(amount[i], 0, 1F)); + } + } + else { - destination[i] = PorterDuffFunctions.AddDest(background[i], source[i], Numerics.Clamp(amount[i], 0, 1)); + for (int i = 0; i < destination.Length; i++) + { + destination[i] = PorterDuffFunctions.AddDest(background[i], source[i], Numerics.Clamp(amount[i], 0, 1F)); + } } } } @@ -1867,18 +4799,79 @@ internal static class DefaultPixelBlenders protected override void BlendFunction(Span destination, ReadOnlySpan background, ReadOnlySpan source, float amount) { amount = Numerics.Clamp(amount, 0, 1); - for (int i = 0; i < destination.Length; i++) + + if (Avx2.IsSupported && destination.Length >= 2) + { + // Divide by 2 as 4 elements per Vector4 and 8 per Vector256 + ref Vector256 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); + ref Vector256 destinationLast = ref Unsafe.Add(ref destinationBase, (IntPtr)((uint)destination.Length / 2u)); + + ref Vector256 backgroundBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(background)); + ref Vector256 sourceBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(source)); + Vector256 opacity = Vector256.Create(amount); + + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) + { + destinationBase = PorterDuffFunctions.SubtractDest(backgroundBase, sourceBase, opacity); + destinationBase = ref Unsafe.Add(ref destinationBase, 1); + backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); + sourceBase = ref Unsafe.Add(ref sourceBase, 1); + } + + if (Numerics.Modulo2(destination.Length) != 0) + { + // Vector4 fits neatly in pairs. Any overlap has to be equal to 1. + int i = destination.Length - 1; + destination[i] = PorterDuffFunctions.SubtractDest(background[i], source[i], amount); + } + } + else { - destination[i] = PorterDuffFunctions.SubtractDest(background[i], source[i], amount); + for (int i = 0; i < destination.Length; i++) + { + destination[i] = PorterDuffFunctions.SubtractDest(background[i], source[i], amount); + } } } /// protected override void BlendFunction(Span destination, ReadOnlySpan background, ReadOnlySpan source, ReadOnlySpan amount) { - for (int i = 0; i < destination.Length; i++) + if (Avx2.IsSupported && destination.Length >= 2) + { + // Divide by 2 as 4 elements per Vector4 and 8 per Vector256 + ref Vector256 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); + ref Vector256 destinationLast = ref Unsafe.Add(ref destinationBase, (IntPtr)((uint)destination.Length / 2u)); + + ref Vector256 backgroundBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(background)); + ref Vector256 sourceBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(source)); + ref float amountBase = ref MemoryMarshal.GetReference(amount); + + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) + { + // TODO: It would be better if we can clamp this outside of the loop using our SIMD methods. + Vector256 opacity = Vector256.Create(Numerics.Clamp(amountBase, 0, 1F)); + + destinationBase = PorterDuffFunctions.SubtractDest(backgroundBase, sourceBase, opacity); + destinationBase = ref Unsafe.Add(ref destinationBase, 1); + backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); + sourceBase = ref Unsafe.Add(ref sourceBase, 1); + amountBase = ref Unsafe.Add(ref amountBase, 1); + } + + if (Numerics.Modulo2(destination.Length) != 0) + { + // Vector4 fits neatly in pairs. Any overlap has to be equal to 1. + int i = destination.Length - 1; + destination[i] = PorterDuffFunctions.SubtractDest(background[i], source[i], Numerics.Clamp(amount[i], 0, 1F)); + } + } + else { - destination[i] = PorterDuffFunctions.SubtractDest(background[i], source[i], Numerics.Clamp(amount[i], 0, 1)); + for (int i = 0; i < destination.Length; i++) + { + destination[i] = PorterDuffFunctions.SubtractDest(background[i], source[i], Numerics.Clamp(amount[i], 0, 1F)); + } } } } @@ -1905,18 +4898,79 @@ internal static class DefaultPixelBlenders protected override void BlendFunction(Span destination, ReadOnlySpan background, ReadOnlySpan source, float amount) { amount = Numerics.Clamp(amount, 0, 1); - for (int i = 0; i < destination.Length; i++) + + if (Avx2.IsSupported && destination.Length >= 2) + { + // Divide by 2 as 4 elements per Vector4 and 8 per Vector256 + ref Vector256 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); + ref Vector256 destinationLast = ref Unsafe.Add(ref destinationBase, (IntPtr)((uint)destination.Length / 2u)); + + ref Vector256 backgroundBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(background)); + ref Vector256 sourceBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(source)); + Vector256 opacity = Vector256.Create(amount); + + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) + { + destinationBase = PorterDuffFunctions.ScreenDest(backgroundBase, sourceBase, opacity); + destinationBase = ref Unsafe.Add(ref destinationBase, 1); + backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); + sourceBase = ref Unsafe.Add(ref sourceBase, 1); + } + + if (Numerics.Modulo2(destination.Length) != 0) + { + // Vector4 fits neatly in pairs. Any overlap has to be equal to 1. + int i = destination.Length - 1; + destination[i] = PorterDuffFunctions.ScreenDest(background[i], source[i], amount); + } + } + else { - destination[i] = PorterDuffFunctions.ScreenDest(background[i], source[i], amount); + for (int i = 0; i < destination.Length; i++) + { + destination[i] = PorterDuffFunctions.ScreenDest(background[i], source[i], amount); + } } } /// protected override void BlendFunction(Span destination, ReadOnlySpan background, ReadOnlySpan source, ReadOnlySpan amount) { - for (int i = 0; i < destination.Length; i++) + if (Avx2.IsSupported && destination.Length >= 2) + { + // Divide by 2 as 4 elements per Vector4 and 8 per Vector256 + ref Vector256 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); + ref Vector256 destinationLast = ref Unsafe.Add(ref destinationBase, (IntPtr)((uint)destination.Length / 2u)); + + ref Vector256 backgroundBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(background)); + ref Vector256 sourceBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(source)); + ref float amountBase = ref MemoryMarshal.GetReference(amount); + + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) + { + // TODO: It would be better if we can clamp this outside of the loop using our SIMD methods. + Vector256 opacity = Vector256.Create(Numerics.Clamp(amountBase, 0, 1F)); + + destinationBase = PorterDuffFunctions.ScreenDest(backgroundBase, sourceBase, opacity); + destinationBase = ref Unsafe.Add(ref destinationBase, 1); + backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); + sourceBase = ref Unsafe.Add(ref sourceBase, 1); + amountBase = ref Unsafe.Add(ref amountBase, 1); + } + + if (Numerics.Modulo2(destination.Length) != 0) + { + // Vector4 fits neatly in pairs. Any overlap has to be equal to 1. + int i = destination.Length - 1; + destination[i] = PorterDuffFunctions.ScreenDest(background[i], source[i], Numerics.Clamp(amount[i], 0, 1F)); + } + } + else { - destination[i] = PorterDuffFunctions.ScreenDest(background[i], source[i], Numerics.Clamp(amount[i], 0, 1)); + for (int i = 0; i < destination.Length; i++) + { + destination[i] = PorterDuffFunctions.ScreenDest(background[i], source[i], Numerics.Clamp(amount[i], 0, 1F)); + } } } } @@ -1943,18 +4997,79 @@ internal static class DefaultPixelBlenders protected override void BlendFunction(Span destination, ReadOnlySpan background, ReadOnlySpan source, float amount) { amount = Numerics.Clamp(amount, 0, 1); - for (int i = 0; i < destination.Length; i++) + + if (Avx2.IsSupported && destination.Length >= 2) + { + // Divide by 2 as 4 elements per Vector4 and 8 per Vector256 + ref Vector256 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); + ref Vector256 destinationLast = ref Unsafe.Add(ref destinationBase, (IntPtr)((uint)destination.Length / 2u)); + + ref Vector256 backgroundBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(background)); + ref Vector256 sourceBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(source)); + Vector256 opacity = Vector256.Create(amount); + + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) + { + destinationBase = PorterDuffFunctions.DarkenDest(backgroundBase, sourceBase, opacity); + destinationBase = ref Unsafe.Add(ref destinationBase, 1); + backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); + sourceBase = ref Unsafe.Add(ref sourceBase, 1); + } + + if (Numerics.Modulo2(destination.Length) != 0) + { + // Vector4 fits neatly in pairs. Any overlap has to be equal to 1. + int i = destination.Length - 1; + destination[i] = PorterDuffFunctions.DarkenDest(background[i], source[i], amount); + } + } + else { - destination[i] = PorterDuffFunctions.DarkenDest(background[i], source[i], amount); + for (int i = 0; i < destination.Length; i++) + { + destination[i] = PorterDuffFunctions.DarkenDest(background[i], source[i], amount); + } } } /// protected override void BlendFunction(Span destination, ReadOnlySpan background, ReadOnlySpan source, ReadOnlySpan amount) { - for (int i = 0; i < destination.Length; i++) + if (Avx2.IsSupported && destination.Length >= 2) + { + // Divide by 2 as 4 elements per Vector4 and 8 per Vector256 + ref Vector256 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); + ref Vector256 destinationLast = ref Unsafe.Add(ref destinationBase, (IntPtr)((uint)destination.Length / 2u)); + + ref Vector256 backgroundBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(background)); + ref Vector256 sourceBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(source)); + ref float amountBase = ref MemoryMarshal.GetReference(amount); + + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) + { + // TODO: It would be better if we can clamp this outside of the loop using our SIMD methods. + Vector256 opacity = Vector256.Create(Numerics.Clamp(amountBase, 0, 1F)); + + destinationBase = PorterDuffFunctions.DarkenDest(backgroundBase, sourceBase, opacity); + destinationBase = ref Unsafe.Add(ref destinationBase, 1); + backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); + sourceBase = ref Unsafe.Add(ref sourceBase, 1); + amountBase = ref Unsafe.Add(ref amountBase, 1); + } + + if (Numerics.Modulo2(destination.Length) != 0) + { + // Vector4 fits neatly in pairs. Any overlap has to be equal to 1. + int i = destination.Length - 1; + destination[i] = PorterDuffFunctions.DarkenDest(background[i], source[i], Numerics.Clamp(amount[i], 0, 1F)); + } + } + else { - destination[i] = PorterDuffFunctions.DarkenDest(background[i], source[i], Numerics.Clamp(amount[i], 0, 1)); + for (int i = 0; i < destination.Length; i++) + { + destination[i] = PorterDuffFunctions.DarkenDest(background[i], source[i], Numerics.Clamp(amount[i], 0, 1F)); + } } } } @@ -1981,18 +5096,79 @@ internal static class DefaultPixelBlenders protected override void BlendFunction(Span destination, ReadOnlySpan background, ReadOnlySpan source, float amount) { amount = Numerics.Clamp(amount, 0, 1); - for (int i = 0; i < destination.Length; i++) + + if (Avx2.IsSupported && destination.Length >= 2) + { + // Divide by 2 as 4 elements per Vector4 and 8 per Vector256 + ref Vector256 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); + ref Vector256 destinationLast = ref Unsafe.Add(ref destinationBase, (IntPtr)((uint)destination.Length / 2u)); + + ref Vector256 backgroundBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(background)); + ref Vector256 sourceBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(source)); + Vector256 opacity = Vector256.Create(amount); + + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) + { + destinationBase = PorterDuffFunctions.LightenDest(backgroundBase, sourceBase, opacity); + destinationBase = ref Unsafe.Add(ref destinationBase, 1); + backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); + sourceBase = ref Unsafe.Add(ref sourceBase, 1); + } + + if (Numerics.Modulo2(destination.Length) != 0) + { + // Vector4 fits neatly in pairs. Any overlap has to be equal to 1. + int i = destination.Length - 1; + destination[i] = PorterDuffFunctions.LightenDest(background[i], source[i], amount); + } + } + else { - destination[i] = PorterDuffFunctions.LightenDest(background[i], source[i], amount); + for (int i = 0; i < destination.Length; i++) + { + destination[i] = PorterDuffFunctions.LightenDest(background[i], source[i], amount); + } } } /// protected override void BlendFunction(Span destination, ReadOnlySpan background, ReadOnlySpan source, ReadOnlySpan amount) { - for (int i = 0; i < destination.Length; i++) + if (Avx2.IsSupported && destination.Length >= 2) + { + // Divide by 2 as 4 elements per Vector4 and 8 per Vector256 + ref Vector256 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); + ref Vector256 destinationLast = ref Unsafe.Add(ref destinationBase, (IntPtr)((uint)destination.Length / 2u)); + + ref Vector256 backgroundBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(background)); + ref Vector256 sourceBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(source)); + ref float amountBase = ref MemoryMarshal.GetReference(amount); + + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) + { + // TODO: It would be better if we can clamp this outside of the loop using our SIMD methods. + Vector256 opacity = Vector256.Create(Numerics.Clamp(amountBase, 0, 1F)); + + destinationBase = PorterDuffFunctions.LightenDest(backgroundBase, sourceBase, opacity); + destinationBase = ref Unsafe.Add(ref destinationBase, 1); + backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); + sourceBase = ref Unsafe.Add(ref sourceBase, 1); + amountBase = ref Unsafe.Add(ref amountBase, 1); + } + + if (Numerics.Modulo2(destination.Length) != 0) + { + // Vector4 fits neatly in pairs. Any overlap has to be equal to 1. + int i = destination.Length - 1; + destination[i] = PorterDuffFunctions.LightenDest(background[i], source[i], Numerics.Clamp(amount[i], 0, 1F)); + } + } + else { - destination[i] = PorterDuffFunctions.LightenDest(background[i], source[i], Numerics.Clamp(amount[i], 0, 1)); + for (int i = 0; i < destination.Length; i++) + { + destination[i] = PorterDuffFunctions.LightenDest(background[i], source[i], Numerics.Clamp(amount[i], 0, 1F)); + } } } } @@ -2019,18 +5195,79 @@ internal static class DefaultPixelBlenders protected override void BlendFunction(Span destination, ReadOnlySpan background, ReadOnlySpan source, float amount) { amount = Numerics.Clamp(amount, 0, 1); - for (int i = 0; i < destination.Length; i++) + + if (Avx2.IsSupported && destination.Length >= 2) + { + // Divide by 2 as 4 elements per Vector4 and 8 per Vector256 + ref Vector256 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); + ref Vector256 destinationLast = ref Unsafe.Add(ref destinationBase, (IntPtr)((uint)destination.Length / 2u)); + + ref Vector256 backgroundBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(background)); + ref Vector256 sourceBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(source)); + Vector256 opacity = Vector256.Create(amount); + + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) + { + destinationBase = PorterDuffFunctions.OverlayDest(backgroundBase, sourceBase, opacity); + destinationBase = ref Unsafe.Add(ref destinationBase, 1); + backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); + sourceBase = ref Unsafe.Add(ref sourceBase, 1); + } + + if (Numerics.Modulo2(destination.Length) != 0) + { + // Vector4 fits neatly in pairs. Any overlap has to be equal to 1. + int i = destination.Length - 1; + destination[i] = PorterDuffFunctions.OverlayDest(background[i], source[i], amount); + } + } + else { - destination[i] = PorterDuffFunctions.OverlayDest(background[i], source[i], amount); + for (int i = 0; i < destination.Length; i++) + { + destination[i] = PorterDuffFunctions.OverlayDest(background[i], source[i], amount); + } } } /// protected override void BlendFunction(Span destination, ReadOnlySpan background, ReadOnlySpan source, ReadOnlySpan amount) { - for (int i = 0; i < destination.Length; i++) + if (Avx2.IsSupported && destination.Length >= 2) + { + // Divide by 2 as 4 elements per Vector4 and 8 per Vector256 + ref Vector256 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); + ref Vector256 destinationLast = ref Unsafe.Add(ref destinationBase, (IntPtr)((uint)destination.Length / 2u)); + + ref Vector256 backgroundBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(background)); + ref Vector256 sourceBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(source)); + ref float amountBase = ref MemoryMarshal.GetReference(amount); + + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) + { + // TODO: It would be better if we can clamp this outside of the loop using our SIMD methods. + Vector256 opacity = Vector256.Create(Numerics.Clamp(amountBase, 0, 1F)); + + destinationBase = PorterDuffFunctions.OverlayDest(backgroundBase, sourceBase, opacity); + destinationBase = ref Unsafe.Add(ref destinationBase, 1); + backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); + sourceBase = ref Unsafe.Add(ref sourceBase, 1); + amountBase = ref Unsafe.Add(ref amountBase, 1); + } + + if (Numerics.Modulo2(destination.Length) != 0) + { + // Vector4 fits neatly in pairs. Any overlap has to be equal to 1. + int i = destination.Length - 1; + destination[i] = PorterDuffFunctions.OverlayDest(background[i], source[i], Numerics.Clamp(amount[i], 0, 1F)); + } + } + else { - destination[i] = PorterDuffFunctions.OverlayDest(background[i], source[i], Numerics.Clamp(amount[i], 0, 1)); + for (int i = 0; i < destination.Length; i++) + { + destination[i] = PorterDuffFunctions.OverlayDest(background[i], source[i], Numerics.Clamp(amount[i], 0, 1F)); + } } } } @@ -2057,18 +5294,79 @@ internal static class DefaultPixelBlenders protected override void BlendFunction(Span destination, ReadOnlySpan background, ReadOnlySpan source, float amount) { amount = Numerics.Clamp(amount, 0, 1); - for (int i = 0; i < destination.Length; i++) + + if (Avx2.IsSupported && destination.Length >= 2) + { + // Divide by 2 as 4 elements per Vector4 and 8 per Vector256 + ref Vector256 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); + ref Vector256 destinationLast = ref Unsafe.Add(ref destinationBase, (IntPtr)((uint)destination.Length / 2u)); + + ref Vector256 backgroundBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(background)); + ref Vector256 sourceBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(source)); + Vector256 opacity = Vector256.Create(amount); + + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) + { + destinationBase = PorterDuffFunctions.HardLightDest(backgroundBase, sourceBase, opacity); + destinationBase = ref Unsafe.Add(ref destinationBase, 1); + backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); + sourceBase = ref Unsafe.Add(ref sourceBase, 1); + } + + if (Numerics.Modulo2(destination.Length) != 0) + { + // Vector4 fits neatly in pairs. Any overlap has to be equal to 1. + int i = destination.Length - 1; + destination[i] = PorterDuffFunctions.HardLightDest(background[i], source[i], amount); + } + } + else { - destination[i] = PorterDuffFunctions.HardLightDest(background[i], source[i], amount); + for (int i = 0; i < destination.Length; i++) + { + destination[i] = PorterDuffFunctions.HardLightDest(background[i], source[i], amount); + } } } /// protected override void BlendFunction(Span destination, ReadOnlySpan background, ReadOnlySpan source, ReadOnlySpan amount) { - for (int i = 0; i < destination.Length; i++) + if (Avx2.IsSupported && destination.Length >= 2) + { + // Divide by 2 as 4 elements per Vector4 and 8 per Vector256 + ref Vector256 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); + ref Vector256 destinationLast = ref Unsafe.Add(ref destinationBase, (IntPtr)((uint)destination.Length / 2u)); + + ref Vector256 backgroundBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(background)); + ref Vector256 sourceBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(source)); + ref float amountBase = ref MemoryMarshal.GetReference(amount); + + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) + { + // TODO: It would be better if we can clamp this outside of the loop using our SIMD methods. + Vector256 opacity = Vector256.Create(Numerics.Clamp(amountBase, 0, 1F)); + + destinationBase = PorterDuffFunctions.HardLightDest(backgroundBase, sourceBase, opacity); + destinationBase = ref Unsafe.Add(ref destinationBase, 1); + backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); + sourceBase = ref Unsafe.Add(ref sourceBase, 1); + amountBase = ref Unsafe.Add(ref amountBase, 1); + } + + if (Numerics.Modulo2(destination.Length) != 0) + { + // Vector4 fits neatly in pairs. Any overlap has to be equal to 1. + int i = destination.Length - 1; + destination[i] = PorterDuffFunctions.HardLightDest(background[i], source[i], Numerics.Clamp(amount[i], 0, 1F)); + } + } + else { - destination[i] = PorterDuffFunctions.HardLightDest(background[i], source[i], Numerics.Clamp(amount[i], 0, 1)); + for (int i = 0; i < destination.Length; i++) + { + destination[i] = PorterDuffFunctions.HardLightDest(background[i], source[i], Numerics.Clamp(amount[i], 0, 1F)); + } } } } @@ -2095,18 +5393,79 @@ internal static class DefaultPixelBlenders protected override void BlendFunction(Span destination, ReadOnlySpan background, ReadOnlySpan source, float amount) { amount = Numerics.Clamp(amount, 0, 1); - for (int i = 0; i < destination.Length; i++) + + if (Avx2.IsSupported && destination.Length >= 2) + { + // Divide by 2 as 4 elements per Vector4 and 8 per Vector256 + ref Vector256 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); + ref Vector256 destinationLast = ref Unsafe.Add(ref destinationBase, (IntPtr)((uint)destination.Length / 2u)); + + ref Vector256 backgroundBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(background)); + ref Vector256 sourceBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(source)); + Vector256 opacity = Vector256.Create(amount); + + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) + { + destinationBase = PorterDuffFunctions.NormalDestAtop(backgroundBase, sourceBase, opacity); + destinationBase = ref Unsafe.Add(ref destinationBase, 1); + backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); + sourceBase = ref Unsafe.Add(ref sourceBase, 1); + } + + if (Numerics.Modulo2(destination.Length) != 0) + { + // Vector4 fits neatly in pairs. Any overlap has to be equal to 1. + int i = destination.Length - 1; + destination[i] = PorterDuffFunctions.NormalDestAtop(background[i], source[i], amount); + } + } + else { - destination[i] = PorterDuffFunctions.NormalDestAtop(background[i], source[i], amount); + for (int i = 0; i < destination.Length; i++) + { + destination[i] = PorterDuffFunctions.NormalDestAtop(background[i], source[i], amount); + } } } /// protected override void BlendFunction(Span destination, ReadOnlySpan background, ReadOnlySpan source, ReadOnlySpan amount) { - for (int i = 0; i < destination.Length; i++) + if (Avx2.IsSupported && destination.Length >= 2) + { + // Divide by 2 as 4 elements per Vector4 and 8 per Vector256 + ref Vector256 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); + ref Vector256 destinationLast = ref Unsafe.Add(ref destinationBase, (IntPtr)((uint)destination.Length / 2u)); + + ref Vector256 backgroundBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(background)); + ref Vector256 sourceBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(source)); + ref float amountBase = ref MemoryMarshal.GetReference(amount); + + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) + { + // TODO: It would be better if we can clamp this outside of the loop using our SIMD methods. + Vector256 opacity = Vector256.Create(Numerics.Clamp(amountBase, 0, 1F)); + + destinationBase = PorterDuffFunctions.NormalDestAtop(backgroundBase, sourceBase, opacity); + destinationBase = ref Unsafe.Add(ref destinationBase, 1); + backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); + sourceBase = ref Unsafe.Add(ref sourceBase, 1); + amountBase = ref Unsafe.Add(ref amountBase, 1); + } + + if (Numerics.Modulo2(destination.Length) != 0) + { + // Vector4 fits neatly in pairs. Any overlap has to be equal to 1. + int i = destination.Length - 1; + destination[i] = PorterDuffFunctions.NormalDestAtop(background[i], source[i], Numerics.Clamp(amount[i], 0, 1F)); + } + } + else { - destination[i] = PorterDuffFunctions.NormalDestAtop(background[i], source[i], Numerics.Clamp(amount[i], 0, 1)); + for (int i = 0; i < destination.Length; i++) + { + destination[i] = PorterDuffFunctions.NormalDestAtop(background[i], source[i], Numerics.Clamp(amount[i], 0, 1F)); + } } } } @@ -2133,18 +5492,79 @@ internal static class DefaultPixelBlenders protected override void BlendFunction(Span destination, ReadOnlySpan background, ReadOnlySpan source, float amount) { amount = Numerics.Clamp(amount, 0, 1); - for (int i = 0; i < destination.Length; i++) + + if (Avx2.IsSupported && destination.Length >= 2) + { + // Divide by 2 as 4 elements per Vector4 and 8 per Vector256 + ref Vector256 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); + ref Vector256 destinationLast = ref Unsafe.Add(ref destinationBase, (IntPtr)((uint)destination.Length / 2u)); + + ref Vector256 backgroundBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(background)); + ref Vector256 sourceBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(source)); + Vector256 opacity = Vector256.Create(amount); + + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) + { + destinationBase = PorterDuffFunctions.MultiplyDestAtop(backgroundBase, sourceBase, opacity); + destinationBase = ref Unsafe.Add(ref destinationBase, 1); + backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); + sourceBase = ref Unsafe.Add(ref sourceBase, 1); + } + + if (Numerics.Modulo2(destination.Length) != 0) + { + // Vector4 fits neatly in pairs. Any overlap has to be equal to 1. + int i = destination.Length - 1; + destination[i] = PorterDuffFunctions.MultiplyDestAtop(background[i], source[i], amount); + } + } + else { - destination[i] = PorterDuffFunctions.MultiplyDestAtop(background[i], source[i], amount); + for (int i = 0; i < destination.Length; i++) + { + destination[i] = PorterDuffFunctions.MultiplyDestAtop(background[i], source[i], amount); + } } } /// protected override void BlendFunction(Span destination, ReadOnlySpan background, ReadOnlySpan source, ReadOnlySpan amount) { - for (int i = 0; i < destination.Length; i++) + if (Avx2.IsSupported && destination.Length >= 2) + { + // Divide by 2 as 4 elements per Vector4 and 8 per Vector256 + ref Vector256 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); + ref Vector256 destinationLast = ref Unsafe.Add(ref destinationBase, (IntPtr)((uint)destination.Length / 2u)); + + ref Vector256 backgroundBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(background)); + ref Vector256 sourceBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(source)); + ref float amountBase = ref MemoryMarshal.GetReference(amount); + + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) + { + // TODO: It would be better if we can clamp this outside of the loop using our SIMD methods. + Vector256 opacity = Vector256.Create(Numerics.Clamp(amountBase, 0, 1F)); + + destinationBase = PorterDuffFunctions.MultiplyDestAtop(backgroundBase, sourceBase, opacity); + destinationBase = ref Unsafe.Add(ref destinationBase, 1); + backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); + sourceBase = ref Unsafe.Add(ref sourceBase, 1); + amountBase = ref Unsafe.Add(ref amountBase, 1); + } + + if (Numerics.Modulo2(destination.Length) != 0) + { + // Vector4 fits neatly in pairs. Any overlap has to be equal to 1. + int i = destination.Length - 1; + destination[i] = PorterDuffFunctions.MultiplyDestAtop(background[i], source[i], Numerics.Clamp(amount[i], 0, 1F)); + } + } + else { - destination[i] = PorterDuffFunctions.MultiplyDestAtop(background[i], source[i], Numerics.Clamp(amount[i], 0, 1)); + for (int i = 0; i < destination.Length; i++) + { + destination[i] = PorterDuffFunctions.MultiplyDestAtop(background[i], source[i], Numerics.Clamp(amount[i], 0, 1F)); + } } } } @@ -2171,18 +5591,79 @@ internal static class DefaultPixelBlenders protected override void BlendFunction(Span destination, ReadOnlySpan background, ReadOnlySpan source, float amount) { amount = Numerics.Clamp(amount, 0, 1); - for (int i = 0; i < destination.Length; i++) + + if (Avx2.IsSupported && destination.Length >= 2) + { + // Divide by 2 as 4 elements per Vector4 and 8 per Vector256 + ref Vector256 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); + ref Vector256 destinationLast = ref Unsafe.Add(ref destinationBase, (IntPtr)((uint)destination.Length / 2u)); + + ref Vector256 backgroundBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(background)); + ref Vector256 sourceBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(source)); + Vector256 opacity = Vector256.Create(amount); + + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) + { + destinationBase = PorterDuffFunctions.AddDestAtop(backgroundBase, sourceBase, opacity); + destinationBase = ref Unsafe.Add(ref destinationBase, 1); + backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); + sourceBase = ref Unsafe.Add(ref sourceBase, 1); + } + + if (Numerics.Modulo2(destination.Length) != 0) + { + // Vector4 fits neatly in pairs. Any overlap has to be equal to 1. + int i = destination.Length - 1; + destination[i] = PorterDuffFunctions.AddDestAtop(background[i], source[i], amount); + } + } + else { - destination[i] = PorterDuffFunctions.AddDestAtop(background[i], source[i], amount); + for (int i = 0; i < destination.Length; i++) + { + destination[i] = PorterDuffFunctions.AddDestAtop(background[i], source[i], amount); + } } } /// protected override void BlendFunction(Span destination, ReadOnlySpan background, ReadOnlySpan source, ReadOnlySpan amount) { - for (int i = 0; i < destination.Length; i++) + if (Avx2.IsSupported && destination.Length >= 2) + { + // Divide by 2 as 4 elements per Vector4 and 8 per Vector256 + ref Vector256 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); + ref Vector256 destinationLast = ref Unsafe.Add(ref destinationBase, (IntPtr)((uint)destination.Length / 2u)); + + ref Vector256 backgroundBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(background)); + ref Vector256 sourceBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(source)); + ref float amountBase = ref MemoryMarshal.GetReference(amount); + + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) + { + // TODO: It would be better if we can clamp this outside of the loop using our SIMD methods. + Vector256 opacity = Vector256.Create(Numerics.Clamp(amountBase, 0, 1F)); + + destinationBase = PorterDuffFunctions.AddDestAtop(backgroundBase, sourceBase, opacity); + destinationBase = ref Unsafe.Add(ref destinationBase, 1); + backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); + sourceBase = ref Unsafe.Add(ref sourceBase, 1); + amountBase = ref Unsafe.Add(ref amountBase, 1); + } + + if (Numerics.Modulo2(destination.Length) != 0) + { + // Vector4 fits neatly in pairs. Any overlap has to be equal to 1. + int i = destination.Length - 1; + destination[i] = PorterDuffFunctions.AddDestAtop(background[i], source[i], Numerics.Clamp(amount[i], 0, 1F)); + } + } + else { - destination[i] = PorterDuffFunctions.AddDestAtop(background[i], source[i], Numerics.Clamp(amount[i], 0, 1)); + for (int i = 0; i < destination.Length; i++) + { + destination[i] = PorterDuffFunctions.AddDestAtop(background[i], source[i], Numerics.Clamp(amount[i], 0, 1F)); + } } } } @@ -2209,18 +5690,79 @@ internal static class DefaultPixelBlenders protected override void BlendFunction(Span destination, ReadOnlySpan background, ReadOnlySpan source, float amount) { amount = Numerics.Clamp(amount, 0, 1); - for (int i = 0; i < destination.Length; i++) + + if (Avx2.IsSupported && destination.Length >= 2) + { + // Divide by 2 as 4 elements per Vector4 and 8 per Vector256 + ref Vector256 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); + ref Vector256 destinationLast = ref Unsafe.Add(ref destinationBase, (IntPtr)((uint)destination.Length / 2u)); + + ref Vector256 backgroundBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(background)); + ref Vector256 sourceBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(source)); + Vector256 opacity = Vector256.Create(amount); + + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) + { + destinationBase = PorterDuffFunctions.SubtractDestAtop(backgroundBase, sourceBase, opacity); + destinationBase = ref Unsafe.Add(ref destinationBase, 1); + backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); + sourceBase = ref Unsafe.Add(ref sourceBase, 1); + } + + if (Numerics.Modulo2(destination.Length) != 0) + { + // Vector4 fits neatly in pairs. Any overlap has to be equal to 1. + int i = destination.Length - 1; + destination[i] = PorterDuffFunctions.SubtractDestAtop(background[i], source[i], amount); + } + } + else { - destination[i] = PorterDuffFunctions.SubtractDestAtop(background[i], source[i], amount); + for (int i = 0; i < destination.Length; i++) + { + destination[i] = PorterDuffFunctions.SubtractDestAtop(background[i], source[i], amount); + } } } /// protected override void BlendFunction(Span destination, ReadOnlySpan background, ReadOnlySpan source, ReadOnlySpan amount) { - for (int i = 0; i < destination.Length; i++) + if (Avx2.IsSupported && destination.Length >= 2) + { + // Divide by 2 as 4 elements per Vector4 and 8 per Vector256 + ref Vector256 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); + ref Vector256 destinationLast = ref Unsafe.Add(ref destinationBase, (IntPtr)((uint)destination.Length / 2u)); + + ref Vector256 backgroundBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(background)); + ref Vector256 sourceBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(source)); + ref float amountBase = ref MemoryMarshal.GetReference(amount); + + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) + { + // TODO: It would be better if we can clamp this outside of the loop using our SIMD methods. + Vector256 opacity = Vector256.Create(Numerics.Clamp(amountBase, 0, 1F)); + + destinationBase = PorterDuffFunctions.SubtractDestAtop(backgroundBase, sourceBase, opacity); + destinationBase = ref Unsafe.Add(ref destinationBase, 1); + backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); + sourceBase = ref Unsafe.Add(ref sourceBase, 1); + amountBase = ref Unsafe.Add(ref amountBase, 1); + } + + if (Numerics.Modulo2(destination.Length) != 0) + { + // Vector4 fits neatly in pairs. Any overlap has to be equal to 1. + int i = destination.Length - 1; + destination[i] = PorterDuffFunctions.SubtractDestAtop(background[i], source[i], Numerics.Clamp(amount[i], 0, 1F)); + } + } + else { - destination[i] = PorterDuffFunctions.SubtractDestAtop(background[i], source[i], Numerics.Clamp(amount[i], 0, 1)); + for (int i = 0; i < destination.Length; i++) + { + destination[i] = PorterDuffFunctions.SubtractDestAtop(background[i], source[i], Numerics.Clamp(amount[i], 0, 1F)); + } } } } @@ -2247,18 +5789,79 @@ internal static class DefaultPixelBlenders protected override void BlendFunction(Span destination, ReadOnlySpan background, ReadOnlySpan source, float amount) { amount = Numerics.Clamp(amount, 0, 1); - for (int i = 0; i < destination.Length; i++) + + if (Avx2.IsSupported && destination.Length >= 2) + { + // Divide by 2 as 4 elements per Vector4 and 8 per Vector256 + ref Vector256 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); + ref Vector256 destinationLast = ref Unsafe.Add(ref destinationBase, (IntPtr)((uint)destination.Length / 2u)); + + ref Vector256 backgroundBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(background)); + ref Vector256 sourceBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(source)); + Vector256 opacity = Vector256.Create(amount); + + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) + { + destinationBase = PorterDuffFunctions.ScreenDestAtop(backgroundBase, sourceBase, opacity); + destinationBase = ref Unsafe.Add(ref destinationBase, 1); + backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); + sourceBase = ref Unsafe.Add(ref sourceBase, 1); + } + + if (Numerics.Modulo2(destination.Length) != 0) + { + // Vector4 fits neatly in pairs. Any overlap has to be equal to 1. + int i = destination.Length - 1; + destination[i] = PorterDuffFunctions.ScreenDestAtop(background[i], source[i], amount); + } + } + else { - destination[i] = PorterDuffFunctions.ScreenDestAtop(background[i], source[i], amount); + for (int i = 0; i < destination.Length; i++) + { + destination[i] = PorterDuffFunctions.ScreenDestAtop(background[i], source[i], amount); + } } } /// protected override void BlendFunction(Span destination, ReadOnlySpan background, ReadOnlySpan source, ReadOnlySpan amount) { - for (int i = 0; i < destination.Length; i++) + if (Avx2.IsSupported && destination.Length >= 2) + { + // Divide by 2 as 4 elements per Vector4 and 8 per Vector256 + ref Vector256 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); + ref Vector256 destinationLast = ref Unsafe.Add(ref destinationBase, (IntPtr)((uint)destination.Length / 2u)); + + ref Vector256 backgroundBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(background)); + ref Vector256 sourceBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(source)); + ref float amountBase = ref MemoryMarshal.GetReference(amount); + + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) + { + // TODO: It would be better if we can clamp this outside of the loop using our SIMD methods. + Vector256 opacity = Vector256.Create(Numerics.Clamp(amountBase, 0, 1F)); + + destinationBase = PorterDuffFunctions.ScreenDestAtop(backgroundBase, sourceBase, opacity); + destinationBase = ref Unsafe.Add(ref destinationBase, 1); + backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); + sourceBase = ref Unsafe.Add(ref sourceBase, 1); + amountBase = ref Unsafe.Add(ref amountBase, 1); + } + + if (Numerics.Modulo2(destination.Length) != 0) + { + // Vector4 fits neatly in pairs. Any overlap has to be equal to 1. + int i = destination.Length - 1; + destination[i] = PorterDuffFunctions.ScreenDestAtop(background[i], source[i], Numerics.Clamp(amount[i], 0, 1F)); + } + } + else { - destination[i] = PorterDuffFunctions.ScreenDestAtop(background[i], source[i], Numerics.Clamp(amount[i], 0, 1)); + for (int i = 0; i < destination.Length; i++) + { + destination[i] = PorterDuffFunctions.ScreenDestAtop(background[i], source[i], Numerics.Clamp(amount[i], 0, 1F)); + } } } } @@ -2285,18 +5888,79 @@ internal static class DefaultPixelBlenders protected override void BlendFunction(Span destination, ReadOnlySpan background, ReadOnlySpan source, float amount) { amount = Numerics.Clamp(amount, 0, 1); - for (int i = 0; i < destination.Length; i++) + + if (Avx2.IsSupported && destination.Length >= 2) + { + // Divide by 2 as 4 elements per Vector4 and 8 per Vector256 + ref Vector256 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); + ref Vector256 destinationLast = ref Unsafe.Add(ref destinationBase, (IntPtr)((uint)destination.Length / 2u)); + + ref Vector256 backgroundBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(background)); + ref Vector256 sourceBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(source)); + Vector256 opacity = Vector256.Create(amount); + + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) + { + destinationBase = PorterDuffFunctions.DarkenDestAtop(backgroundBase, sourceBase, opacity); + destinationBase = ref Unsafe.Add(ref destinationBase, 1); + backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); + sourceBase = ref Unsafe.Add(ref sourceBase, 1); + } + + if (Numerics.Modulo2(destination.Length) != 0) + { + // Vector4 fits neatly in pairs. Any overlap has to be equal to 1. + int i = destination.Length - 1; + destination[i] = PorterDuffFunctions.DarkenDestAtop(background[i], source[i], amount); + } + } + else { - destination[i] = PorterDuffFunctions.DarkenDestAtop(background[i], source[i], amount); + for (int i = 0; i < destination.Length; i++) + { + destination[i] = PorterDuffFunctions.DarkenDestAtop(background[i], source[i], amount); + } } } /// protected override void BlendFunction(Span destination, ReadOnlySpan background, ReadOnlySpan source, ReadOnlySpan amount) { - for (int i = 0; i < destination.Length; i++) + if (Avx2.IsSupported && destination.Length >= 2) + { + // Divide by 2 as 4 elements per Vector4 and 8 per Vector256 + ref Vector256 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); + ref Vector256 destinationLast = ref Unsafe.Add(ref destinationBase, (IntPtr)((uint)destination.Length / 2u)); + + ref Vector256 backgroundBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(background)); + ref Vector256 sourceBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(source)); + ref float amountBase = ref MemoryMarshal.GetReference(amount); + + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) + { + // TODO: It would be better if we can clamp this outside of the loop using our SIMD methods. + Vector256 opacity = Vector256.Create(Numerics.Clamp(amountBase, 0, 1F)); + + destinationBase = PorterDuffFunctions.DarkenDestAtop(backgroundBase, sourceBase, opacity); + destinationBase = ref Unsafe.Add(ref destinationBase, 1); + backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); + sourceBase = ref Unsafe.Add(ref sourceBase, 1); + amountBase = ref Unsafe.Add(ref amountBase, 1); + } + + if (Numerics.Modulo2(destination.Length) != 0) + { + // Vector4 fits neatly in pairs. Any overlap has to be equal to 1. + int i = destination.Length - 1; + destination[i] = PorterDuffFunctions.DarkenDestAtop(background[i], source[i], Numerics.Clamp(amount[i], 0, 1F)); + } + } + else { - destination[i] = PorterDuffFunctions.DarkenDestAtop(background[i], source[i], Numerics.Clamp(amount[i], 0, 1)); + for (int i = 0; i < destination.Length; i++) + { + destination[i] = PorterDuffFunctions.DarkenDestAtop(background[i], source[i], Numerics.Clamp(amount[i], 0, 1F)); + } } } } @@ -2323,18 +5987,79 @@ internal static class DefaultPixelBlenders protected override void BlendFunction(Span destination, ReadOnlySpan background, ReadOnlySpan source, float amount) { amount = Numerics.Clamp(amount, 0, 1); - for (int i = 0; i < destination.Length; i++) + + if (Avx2.IsSupported && destination.Length >= 2) + { + // Divide by 2 as 4 elements per Vector4 and 8 per Vector256 + ref Vector256 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); + ref Vector256 destinationLast = ref Unsafe.Add(ref destinationBase, (IntPtr)((uint)destination.Length / 2u)); + + ref Vector256 backgroundBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(background)); + ref Vector256 sourceBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(source)); + Vector256 opacity = Vector256.Create(amount); + + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) + { + destinationBase = PorterDuffFunctions.LightenDestAtop(backgroundBase, sourceBase, opacity); + destinationBase = ref Unsafe.Add(ref destinationBase, 1); + backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); + sourceBase = ref Unsafe.Add(ref sourceBase, 1); + } + + if (Numerics.Modulo2(destination.Length) != 0) + { + // Vector4 fits neatly in pairs. Any overlap has to be equal to 1. + int i = destination.Length - 1; + destination[i] = PorterDuffFunctions.LightenDestAtop(background[i], source[i], amount); + } + } + else { - destination[i] = PorterDuffFunctions.LightenDestAtop(background[i], source[i], amount); + for (int i = 0; i < destination.Length; i++) + { + destination[i] = PorterDuffFunctions.LightenDestAtop(background[i], source[i], amount); + } } } /// protected override void BlendFunction(Span destination, ReadOnlySpan background, ReadOnlySpan source, ReadOnlySpan amount) { - for (int i = 0; i < destination.Length; i++) + if (Avx2.IsSupported && destination.Length >= 2) + { + // Divide by 2 as 4 elements per Vector4 and 8 per Vector256 + ref Vector256 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); + ref Vector256 destinationLast = ref Unsafe.Add(ref destinationBase, (IntPtr)((uint)destination.Length / 2u)); + + ref Vector256 backgroundBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(background)); + ref Vector256 sourceBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(source)); + ref float amountBase = ref MemoryMarshal.GetReference(amount); + + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) + { + // TODO: It would be better if we can clamp this outside of the loop using our SIMD methods. + Vector256 opacity = Vector256.Create(Numerics.Clamp(amountBase, 0, 1F)); + + destinationBase = PorterDuffFunctions.LightenDestAtop(backgroundBase, sourceBase, opacity); + destinationBase = ref Unsafe.Add(ref destinationBase, 1); + backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); + sourceBase = ref Unsafe.Add(ref sourceBase, 1); + amountBase = ref Unsafe.Add(ref amountBase, 1); + } + + if (Numerics.Modulo2(destination.Length) != 0) + { + // Vector4 fits neatly in pairs. Any overlap has to be equal to 1. + int i = destination.Length - 1; + destination[i] = PorterDuffFunctions.LightenDestAtop(background[i], source[i], Numerics.Clamp(amount[i], 0, 1F)); + } + } + else { - destination[i] = PorterDuffFunctions.LightenDestAtop(background[i], source[i], Numerics.Clamp(amount[i], 0, 1)); + for (int i = 0; i < destination.Length; i++) + { + destination[i] = PorterDuffFunctions.LightenDestAtop(background[i], source[i], Numerics.Clamp(amount[i], 0, 1F)); + } } } } @@ -2361,18 +6086,79 @@ internal static class DefaultPixelBlenders protected override void BlendFunction(Span destination, ReadOnlySpan background, ReadOnlySpan source, float amount) { amount = Numerics.Clamp(amount, 0, 1); - for (int i = 0; i < destination.Length; i++) + + if (Avx2.IsSupported && destination.Length >= 2) + { + // Divide by 2 as 4 elements per Vector4 and 8 per Vector256 + ref Vector256 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); + ref Vector256 destinationLast = ref Unsafe.Add(ref destinationBase, (IntPtr)((uint)destination.Length / 2u)); + + ref Vector256 backgroundBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(background)); + ref Vector256 sourceBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(source)); + Vector256 opacity = Vector256.Create(amount); + + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) + { + destinationBase = PorterDuffFunctions.OverlayDestAtop(backgroundBase, sourceBase, opacity); + destinationBase = ref Unsafe.Add(ref destinationBase, 1); + backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); + sourceBase = ref Unsafe.Add(ref sourceBase, 1); + } + + if (Numerics.Modulo2(destination.Length) != 0) + { + // Vector4 fits neatly in pairs. Any overlap has to be equal to 1. + int i = destination.Length - 1; + destination[i] = PorterDuffFunctions.OverlayDestAtop(background[i], source[i], amount); + } + } + else { - destination[i] = PorterDuffFunctions.OverlayDestAtop(background[i], source[i], amount); + for (int i = 0; i < destination.Length; i++) + { + destination[i] = PorterDuffFunctions.OverlayDestAtop(background[i], source[i], amount); + } } } /// protected override void BlendFunction(Span destination, ReadOnlySpan background, ReadOnlySpan source, ReadOnlySpan amount) { - for (int i = 0; i < destination.Length; i++) + if (Avx2.IsSupported && destination.Length >= 2) + { + // Divide by 2 as 4 elements per Vector4 and 8 per Vector256 + ref Vector256 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); + ref Vector256 destinationLast = ref Unsafe.Add(ref destinationBase, (IntPtr)((uint)destination.Length / 2u)); + + ref Vector256 backgroundBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(background)); + ref Vector256 sourceBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(source)); + ref float amountBase = ref MemoryMarshal.GetReference(amount); + + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) + { + // TODO: It would be better if we can clamp this outside of the loop using our SIMD methods. + Vector256 opacity = Vector256.Create(Numerics.Clamp(amountBase, 0, 1F)); + + destinationBase = PorterDuffFunctions.OverlayDestAtop(backgroundBase, sourceBase, opacity); + destinationBase = ref Unsafe.Add(ref destinationBase, 1); + backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); + sourceBase = ref Unsafe.Add(ref sourceBase, 1); + amountBase = ref Unsafe.Add(ref amountBase, 1); + } + + if (Numerics.Modulo2(destination.Length) != 0) + { + // Vector4 fits neatly in pairs. Any overlap has to be equal to 1. + int i = destination.Length - 1; + destination[i] = PorterDuffFunctions.OverlayDestAtop(background[i], source[i], Numerics.Clamp(amount[i], 0, 1F)); + } + } + else { - destination[i] = PorterDuffFunctions.OverlayDestAtop(background[i], source[i], Numerics.Clamp(amount[i], 0, 1)); + for (int i = 0; i < destination.Length; i++) + { + destination[i] = PorterDuffFunctions.OverlayDestAtop(background[i], source[i], Numerics.Clamp(amount[i], 0, 1F)); + } } } } @@ -2399,18 +6185,79 @@ internal static class DefaultPixelBlenders protected override void BlendFunction(Span destination, ReadOnlySpan background, ReadOnlySpan source, float amount) { amount = Numerics.Clamp(amount, 0, 1); - for (int i = 0; i < destination.Length; i++) + + if (Avx2.IsSupported && destination.Length >= 2) + { + // Divide by 2 as 4 elements per Vector4 and 8 per Vector256 + ref Vector256 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); + ref Vector256 destinationLast = ref Unsafe.Add(ref destinationBase, (IntPtr)((uint)destination.Length / 2u)); + + ref Vector256 backgroundBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(background)); + ref Vector256 sourceBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(source)); + Vector256 opacity = Vector256.Create(amount); + + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) + { + destinationBase = PorterDuffFunctions.HardLightDestAtop(backgroundBase, sourceBase, opacity); + destinationBase = ref Unsafe.Add(ref destinationBase, 1); + backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); + sourceBase = ref Unsafe.Add(ref sourceBase, 1); + } + + if (Numerics.Modulo2(destination.Length) != 0) + { + // Vector4 fits neatly in pairs. Any overlap has to be equal to 1. + int i = destination.Length - 1; + destination[i] = PorterDuffFunctions.HardLightDestAtop(background[i], source[i], amount); + } + } + else { - destination[i] = PorterDuffFunctions.HardLightDestAtop(background[i], source[i], amount); + for (int i = 0; i < destination.Length; i++) + { + destination[i] = PorterDuffFunctions.HardLightDestAtop(background[i], source[i], amount); + } } } /// protected override void BlendFunction(Span destination, ReadOnlySpan background, ReadOnlySpan source, ReadOnlySpan amount) { - for (int i = 0; i < destination.Length; i++) + if (Avx2.IsSupported && destination.Length >= 2) + { + // Divide by 2 as 4 elements per Vector4 and 8 per Vector256 + ref Vector256 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); + ref Vector256 destinationLast = ref Unsafe.Add(ref destinationBase, (IntPtr)((uint)destination.Length / 2u)); + + ref Vector256 backgroundBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(background)); + ref Vector256 sourceBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(source)); + ref float amountBase = ref MemoryMarshal.GetReference(amount); + + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) + { + // TODO: It would be better if we can clamp this outside of the loop using our SIMD methods. + Vector256 opacity = Vector256.Create(Numerics.Clamp(amountBase, 0, 1F)); + + destinationBase = PorterDuffFunctions.HardLightDestAtop(backgroundBase, sourceBase, opacity); + destinationBase = ref Unsafe.Add(ref destinationBase, 1); + backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); + sourceBase = ref Unsafe.Add(ref sourceBase, 1); + amountBase = ref Unsafe.Add(ref amountBase, 1); + } + + if (Numerics.Modulo2(destination.Length) != 0) + { + // Vector4 fits neatly in pairs. Any overlap has to be equal to 1. + int i = destination.Length - 1; + destination[i] = PorterDuffFunctions.HardLightDestAtop(background[i], source[i], Numerics.Clamp(amount[i], 0, 1F)); + } + } + else { - destination[i] = PorterDuffFunctions.HardLightDestAtop(background[i], source[i], Numerics.Clamp(amount[i], 0, 1)); + for (int i = 0; i < destination.Length; i++) + { + destination[i] = PorterDuffFunctions.HardLightDestAtop(background[i], source[i], Numerics.Clamp(amount[i], 0, 1F)); + } } } } @@ -2437,18 +6284,79 @@ internal static class DefaultPixelBlenders protected override void BlendFunction(Span destination, ReadOnlySpan background, ReadOnlySpan source, float amount) { amount = Numerics.Clamp(amount, 0, 1); - for (int i = 0; i < destination.Length; i++) + + if (Avx2.IsSupported && destination.Length >= 2) + { + // Divide by 2 as 4 elements per Vector4 and 8 per Vector256 + ref Vector256 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); + ref Vector256 destinationLast = ref Unsafe.Add(ref destinationBase, (IntPtr)((uint)destination.Length / 2u)); + + ref Vector256 backgroundBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(background)); + ref Vector256 sourceBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(source)); + Vector256 opacity = Vector256.Create(amount); + + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) + { + destinationBase = PorterDuffFunctions.NormalDestOver(backgroundBase, sourceBase, opacity); + destinationBase = ref Unsafe.Add(ref destinationBase, 1); + backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); + sourceBase = ref Unsafe.Add(ref sourceBase, 1); + } + + if (Numerics.Modulo2(destination.Length) != 0) + { + // Vector4 fits neatly in pairs. Any overlap has to be equal to 1. + int i = destination.Length - 1; + destination[i] = PorterDuffFunctions.NormalDestOver(background[i], source[i], amount); + } + } + else { - destination[i] = PorterDuffFunctions.NormalDestOver(background[i], source[i], amount); + for (int i = 0; i < destination.Length; i++) + { + destination[i] = PorterDuffFunctions.NormalDestOver(background[i], source[i], amount); + } } } /// protected override void BlendFunction(Span destination, ReadOnlySpan background, ReadOnlySpan source, ReadOnlySpan amount) { - for (int i = 0; i < destination.Length; i++) + if (Avx2.IsSupported && destination.Length >= 2) + { + // Divide by 2 as 4 elements per Vector4 and 8 per Vector256 + ref Vector256 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); + ref Vector256 destinationLast = ref Unsafe.Add(ref destinationBase, (IntPtr)((uint)destination.Length / 2u)); + + ref Vector256 backgroundBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(background)); + ref Vector256 sourceBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(source)); + ref float amountBase = ref MemoryMarshal.GetReference(amount); + + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) + { + // TODO: It would be better if we can clamp this outside of the loop using our SIMD methods. + Vector256 opacity = Vector256.Create(Numerics.Clamp(amountBase, 0, 1F)); + + destinationBase = PorterDuffFunctions.NormalDestOver(backgroundBase, sourceBase, opacity); + destinationBase = ref Unsafe.Add(ref destinationBase, 1); + backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); + sourceBase = ref Unsafe.Add(ref sourceBase, 1); + amountBase = ref Unsafe.Add(ref amountBase, 1); + } + + if (Numerics.Modulo2(destination.Length) != 0) + { + // Vector4 fits neatly in pairs. Any overlap has to be equal to 1. + int i = destination.Length - 1; + destination[i] = PorterDuffFunctions.NormalDestOver(background[i], source[i], Numerics.Clamp(amount[i], 0, 1F)); + } + } + else { - destination[i] = PorterDuffFunctions.NormalDestOver(background[i], source[i], Numerics.Clamp(amount[i], 0, 1)); + for (int i = 0; i < destination.Length; i++) + { + destination[i] = PorterDuffFunctions.NormalDestOver(background[i], source[i], Numerics.Clamp(amount[i], 0, 1F)); + } } } } @@ -2475,18 +6383,79 @@ internal static class DefaultPixelBlenders protected override void BlendFunction(Span destination, ReadOnlySpan background, ReadOnlySpan source, float amount) { amount = Numerics.Clamp(amount, 0, 1); - for (int i = 0; i < destination.Length; i++) + + if (Avx2.IsSupported && destination.Length >= 2) + { + // Divide by 2 as 4 elements per Vector4 and 8 per Vector256 + ref Vector256 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); + ref Vector256 destinationLast = ref Unsafe.Add(ref destinationBase, (IntPtr)((uint)destination.Length / 2u)); + + ref Vector256 backgroundBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(background)); + ref Vector256 sourceBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(source)); + Vector256 opacity = Vector256.Create(amount); + + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) + { + destinationBase = PorterDuffFunctions.MultiplyDestOver(backgroundBase, sourceBase, opacity); + destinationBase = ref Unsafe.Add(ref destinationBase, 1); + backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); + sourceBase = ref Unsafe.Add(ref sourceBase, 1); + } + + if (Numerics.Modulo2(destination.Length) != 0) + { + // Vector4 fits neatly in pairs. Any overlap has to be equal to 1. + int i = destination.Length - 1; + destination[i] = PorterDuffFunctions.MultiplyDestOver(background[i], source[i], amount); + } + } + else { - destination[i] = PorterDuffFunctions.MultiplyDestOver(background[i], source[i], amount); + for (int i = 0; i < destination.Length; i++) + { + destination[i] = PorterDuffFunctions.MultiplyDestOver(background[i], source[i], amount); + } } } /// protected override void BlendFunction(Span destination, ReadOnlySpan background, ReadOnlySpan source, ReadOnlySpan amount) { - for (int i = 0; i < destination.Length; i++) + if (Avx2.IsSupported && destination.Length >= 2) + { + // Divide by 2 as 4 elements per Vector4 and 8 per Vector256 + ref Vector256 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); + ref Vector256 destinationLast = ref Unsafe.Add(ref destinationBase, (IntPtr)((uint)destination.Length / 2u)); + + ref Vector256 backgroundBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(background)); + ref Vector256 sourceBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(source)); + ref float amountBase = ref MemoryMarshal.GetReference(amount); + + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) + { + // TODO: It would be better if we can clamp this outside of the loop using our SIMD methods. + Vector256 opacity = Vector256.Create(Numerics.Clamp(amountBase, 0, 1F)); + + destinationBase = PorterDuffFunctions.MultiplyDestOver(backgroundBase, sourceBase, opacity); + destinationBase = ref Unsafe.Add(ref destinationBase, 1); + backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); + sourceBase = ref Unsafe.Add(ref sourceBase, 1); + amountBase = ref Unsafe.Add(ref amountBase, 1); + } + + if (Numerics.Modulo2(destination.Length) != 0) + { + // Vector4 fits neatly in pairs. Any overlap has to be equal to 1. + int i = destination.Length - 1; + destination[i] = PorterDuffFunctions.MultiplyDestOver(background[i], source[i], Numerics.Clamp(amount[i], 0, 1F)); + } + } + else { - destination[i] = PorterDuffFunctions.MultiplyDestOver(background[i], source[i], Numerics.Clamp(amount[i], 0, 1)); + for (int i = 0; i < destination.Length; i++) + { + destination[i] = PorterDuffFunctions.MultiplyDestOver(background[i], source[i], Numerics.Clamp(amount[i], 0, 1F)); + } } } } @@ -2513,18 +6482,79 @@ internal static class DefaultPixelBlenders protected override void BlendFunction(Span destination, ReadOnlySpan background, ReadOnlySpan source, float amount) { amount = Numerics.Clamp(amount, 0, 1); - for (int i = 0; i < destination.Length; i++) + + if (Avx2.IsSupported && destination.Length >= 2) + { + // Divide by 2 as 4 elements per Vector4 and 8 per Vector256 + ref Vector256 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); + ref Vector256 destinationLast = ref Unsafe.Add(ref destinationBase, (IntPtr)((uint)destination.Length / 2u)); + + ref Vector256 backgroundBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(background)); + ref Vector256 sourceBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(source)); + Vector256 opacity = Vector256.Create(amount); + + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) + { + destinationBase = PorterDuffFunctions.AddDestOver(backgroundBase, sourceBase, opacity); + destinationBase = ref Unsafe.Add(ref destinationBase, 1); + backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); + sourceBase = ref Unsafe.Add(ref sourceBase, 1); + } + + if (Numerics.Modulo2(destination.Length) != 0) + { + // Vector4 fits neatly in pairs. Any overlap has to be equal to 1. + int i = destination.Length - 1; + destination[i] = PorterDuffFunctions.AddDestOver(background[i], source[i], amount); + } + } + else { - destination[i] = PorterDuffFunctions.AddDestOver(background[i], source[i], amount); + for (int i = 0; i < destination.Length; i++) + { + destination[i] = PorterDuffFunctions.AddDestOver(background[i], source[i], amount); + } } } /// protected override void BlendFunction(Span destination, ReadOnlySpan background, ReadOnlySpan source, ReadOnlySpan amount) { - for (int i = 0; i < destination.Length; i++) + if (Avx2.IsSupported && destination.Length >= 2) + { + // Divide by 2 as 4 elements per Vector4 and 8 per Vector256 + ref Vector256 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); + ref Vector256 destinationLast = ref Unsafe.Add(ref destinationBase, (IntPtr)((uint)destination.Length / 2u)); + + ref Vector256 backgroundBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(background)); + ref Vector256 sourceBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(source)); + ref float amountBase = ref MemoryMarshal.GetReference(amount); + + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) + { + // TODO: It would be better if we can clamp this outside of the loop using our SIMD methods. + Vector256 opacity = Vector256.Create(Numerics.Clamp(amountBase, 0, 1F)); + + destinationBase = PorterDuffFunctions.AddDestOver(backgroundBase, sourceBase, opacity); + destinationBase = ref Unsafe.Add(ref destinationBase, 1); + backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); + sourceBase = ref Unsafe.Add(ref sourceBase, 1); + amountBase = ref Unsafe.Add(ref amountBase, 1); + } + + if (Numerics.Modulo2(destination.Length) != 0) + { + // Vector4 fits neatly in pairs. Any overlap has to be equal to 1. + int i = destination.Length - 1; + destination[i] = PorterDuffFunctions.AddDestOver(background[i], source[i], Numerics.Clamp(amount[i], 0, 1F)); + } + } + else { - destination[i] = PorterDuffFunctions.AddDestOver(background[i], source[i], Numerics.Clamp(amount[i], 0, 1)); + for (int i = 0; i < destination.Length; i++) + { + destination[i] = PorterDuffFunctions.AddDestOver(background[i], source[i], Numerics.Clamp(amount[i], 0, 1F)); + } } } } @@ -2551,18 +6581,79 @@ internal static class DefaultPixelBlenders protected override void BlendFunction(Span destination, ReadOnlySpan background, ReadOnlySpan source, float amount) { amount = Numerics.Clamp(amount, 0, 1); - for (int i = 0; i < destination.Length; i++) + + if (Avx2.IsSupported && destination.Length >= 2) + { + // Divide by 2 as 4 elements per Vector4 and 8 per Vector256 + ref Vector256 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); + ref Vector256 destinationLast = ref Unsafe.Add(ref destinationBase, (IntPtr)((uint)destination.Length / 2u)); + + ref Vector256 backgroundBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(background)); + ref Vector256 sourceBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(source)); + Vector256 opacity = Vector256.Create(amount); + + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) + { + destinationBase = PorterDuffFunctions.SubtractDestOver(backgroundBase, sourceBase, opacity); + destinationBase = ref Unsafe.Add(ref destinationBase, 1); + backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); + sourceBase = ref Unsafe.Add(ref sourceBase, 1); + } + + if (Numerics.Modulo2(destination.Length) != 0) + { + // Vector4 fits neatly in pairs. Any overlap has to be equal to 1. + int i = destination.Length - 1; + destination[i] = PorterDuffFunctions.SubtractDestOver(background[i], source[i], amount); + } + } + else { - destination[i] = PorterDuffFunctions.SubtractDestOver(background[i], source[i], amount); + for (int i = 0; i < destination.Length; i++) + { + destination[i] = PorterDuffFunctions.SubtractDestOver(background[i], source[i], amount); + } } } /// protected override void BlendFunction(Span destination, ReadOnlySpan background, ReadOnlySpan source, ReadOnlySpan amount) { - for (int i = 0; i < destination.Length; i++) + if (Avx2.IsSupported && destination.Length >= 2) + { + // Divide by 2 as 4 elements per Vector4 and 8 per Vector256 + ref Vector256 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); + ref Vector256 destinationLast = ref Unsafe.Add(ref destinationBase, (IntPtr)((uint)destination.Length / 2u)); + + ref Vector256 backgroundBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(background)); + ref Vector256 sourceBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(source)); + ref float amountBase = ref MemoryMarshal.GetReference(amount); + + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) + { + // TODO: It would be better if we can clamp this outside of the loop using our SIMD methods. + Vector256 opacity = Vector256.Create(Numerics.Clamp(amountBase, 0, 1F)); + + destinationBase = PorterDuffFunctions.SubtractDestOver(backgroundBase, sourceBase, opacity); + destinationBase = ref Unsafe.Add(ref destinationBase, 1); + backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); + sourceBase = ref Unsafe.Add(ref sourceBase, 1); + amountBase = ref Unsafe.Add(ref amountBase, 1); + } + + if (Numerics.Modulo2(destination.Length) != 0) + { + // Vector4 fits neatly in pairs. Any overlap has to be equal to 1. + int i = destination.Length - 1; + destination[i] = PorterDuffFunctions.SubtractDestOver(background[i], source[i], Numerics.Clamp(amount[i], 0, 1F)); + } + } + else { - destination[i] = PorterDuffFunctions.SubtractDestOver(background[i], source[i], Numerics.Clamp(amount[i], 0, 1)); + for (int i = 0; i < destination.Length; i++) + { + destination[i] = PorterDuffFunctions.SubtractDestOver(background[i], source[i], Numerics.Clamp(amount[i], 0, 1F)); + } } } } @@ -2589,18 +6680,79 @@ internal static class DefaultPixelBlenders protected override void BlendFunction(Span destination, ReadOnlySpan background, ReadOnlySpan source, float amount) { amount = Numerics.Clamp(amount, 0, 1); - for (int i = 0; i < destination.Length; i++) + + if (Avx2.IsSupported && destination.Length >= 2) + { + // Divide by 2 as 4 elements per Vector4 and 8 per Vector256 + ref Vector256 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); + ref Vector256 destinationLast = ref Unsafe.Add(ref destinationBase, (IntPtr)((uint)destination.Length / 2u)); + + ref Vector256 backgroundBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(background)); + ref Vector256 sourceBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(source)); + Vector256 opacity = Vector256.Create(amount); + + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) + { + destinationBase = PorterDuffFunctions.ScreenDestOver(backgroundBase, sourceBase, opacity); + destinationBase = ref Unsafe.Add(ref destinationBase, 1); + backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); + sourceBase = ref Unsafe.Add(ref sourceBase, 1); + } + + if (Numerics.Modulo2(destination.Length) != 0) + { + // Vector4 fits neatly in pairs. Any overlap has to be equal to 1. + int i = destination.Length - 1; + destination[i] = PorterDuffFunctions.ScreenDestOver(background[i], source[i], amount); + } + } + else { - destination[i] = PorterDuffFunctions.ScreenDestOver(background[i], source[i], amount); + for (int i = 0; i < destination.Length; i++) + { + destination[i] = PorterDuffFunctions.ScreenDestOver(background[i], source[i], amount); + } } } /// protected override void BlendFunction(Span destination, ReadOnlySpan background, ReadOnlySpan source, ReadOnlySpan amount) { - for (int i = 0; i < destination.Length; i++) + if (Avx2.IsSupported && destination.Length >= 2) + { + // Divide by 2 as 4 elements per Vector4 and 8 per Vector256 + ref Vector256 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); + ref Vector256 destinationLast = ref Unsafe.Add(ref destinationBase, (IntPtr)((uint)destination.Length / 2u)); + + ref Vector256 backgroundBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(background)); + ref Vector256 sourceBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(source)); + ref float amountBase = ref MemoryMarshal.GetReference(amount); + + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) + { + // TODO: It would be better if we can clamp this outside of the loop using our SIMD methods. + Vector256 opacity = Vector256.Create(Numerics.Clamp(amountBase, 0, 1F)); + + destinationBase = PorterDuffFunctions.ScreenDestOver(backgroundBase, sourceBase, opacity); + destinationBase = ref Unsafe.Add(ref destinationBase, 1); + backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); + sourceBase = ref Unsafe.Add(ref sourceBase, 1); + amountBase = ref Unsafe.Add(ref amountBase, 1); + } + + if (Numerics.Modulo2(destination.Length) != 0) + { + // Vector4 fits neatly in pairs. Any overlap has to be equal to 1. + int i = destination.Length - 1; + destination[i] = PorterDuffFunctions.ScreenDestOver(background[i], source[i], Numerics.Clamp(amount[i], 0, 1F)); + } + } + else { - destination[i] = PorterDuffFunctions.ScreenDestOver(background[i], source[i], Numerics.Clamp(amount[i], 0, 1)); + for (int i = 0; i < destination.Length; i++) + { + destination[i] = PorterDuffFunctions.ScreenDestOver(background[i], source[i], Numerics.Clamp(amount[i], 0, 1F)); + } } } } @@ -2627,18 +6779,79 @@ internal static class DefaultPixelBlenders protected override void BlendFunction(Span destination, ReadOnlySpan background, ReadOnlySpan source, float amount) { amount = Numerics.Clamp(amount, 0, 1); - for (int i = 0; i < destination.Length; i++) + + if (Avx2.IsSupported && destination.Length >= 2) + { + // Divide by 2 as 4 elements per Vector4 and 8 per Vector256 + ref Vector256 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); + ref Vector256 destinationLast = ref Unsafe.Add(ref destinationBase, (IntPtr)((uint)destination.Length / 2u)); + + ref Vector256 backgroundBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(background)); + ref Vector256 sourceBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(source)); + Vector256 opacity = Vector256.Create(amount); + + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) + { + destinationBase = PorterDuffFunctions.DarkenDestOver(backgroundBase, sourceBase, opacity); + destinationBase = ref Unsafe.Add(ref destinationBase, 1); + backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); + sourceBase = ref Unsafe.Add(ref sourceBase, 1); + } + + if (Numerics.Modulo2(destination.Length) != 0) + { + // Vector4 fits neatly in pairs. Any overlap has to be equal to 1. + int i = destination.Length - 1; + destination[i] = PorterDuffFunctions.DarkenDestOver(background[i], source[i], amount); + } + } + else { - destination[i] = PorterDuffFunctions.DarkenDestOver(background[i], source[i], amount); + for (int i = 0; i < destination.Length; i++) + { + destination[i] = PorterDuffFunctions.DarkenDestOver(background[i], source[i], amount); + } } } /// protected override void BlendFunction(Span destination, ReadOnlySpan background, ReadOnlySpan source, ReadOnlySpan amount) { - for (int i = 0; i < destination.Length; i++) + if (Avx2.IsSupported && destination.Length >= 2) + { + // Divide by 2 as 4 elements per Vector4 and 8 per Vector256 + ref Vector256 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); + ref Vector256 destinationLast = ref Unsafe.Add(ref destinationBase, (IntPtr)((uint)destination.Length / 2u)); + + ref Vector256 backgroundBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(background)); + ref Vector256 sourceBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(source)); + ref float amountBase = ref MemoryMarshal.GetReference(amount); + + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) + { + // TODO: It would be better if we can clamp this outside of the loop using our SIMD methods. + Vector256 opacity = Vector256.Create(Numerics.Clamp(amountBase, 0, 1F)); + + destinationBase = PorterDuffFunctions.DarkenDestOver(backgroundBase, sourceBase, opacity); + destinationBase = ref Unsafe.Add(ref destinationBase, 1); + backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); + sourceBase = ref Unsafe.Add(ref sourceBase, 1); + amountBase = ref Unsafe.Add(ref amountBase, 1); + } + + if (Numerics.Modulo2(destination.Length) != 0) + { + // Vector4 fits neatly in pairs. Any overlap has to be equal to 1. + int i = destination.Length - 1; + destination[i] = PorterDuffFunctions.DarkenDestOver(background[i], source[i], Numerics.Clamp(amount[i], 0, 1F)); + } + } + else { - destination[i] = PorterDuffFunctions.DarkenDestOver(background[i], source[i], Numerics.Clamp(amount[i], 0, 1)); + for (int i = 0; i < destination.Length; i++) + { + destination[i] = PorterDuffFunctions.DarkenDestOver(background[i], source[i], Numerics.Clamp(amount[i], 0, 1F)); + } } } } @@ -2665,18 +6878,79 @@ internal static class DefaultPixelBlenders protected override void BlendFunction(Span destination, ReadOnlySpan background, ReadOnlySpan source, float amount) { amount = Numerics.Clamp(amount, 0, 1); - for (int i = 0; i < destination.Length; i++) + + if (Avx2.IsSupported && destination.Length >= 2) + { + // Divide by 2 as 4 elements per Vector4 and 8 per Vector256 + ref Vector256 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); + ref Vector256 destinationLast = ref Unsafe.Add(ref destinationBase, (IntPtr)((uint)destination.Length / 2u)); + + ref Vector256 backgroundBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(background)); + ref Vector256 sourceBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(source)); + Vector256 opacity = Vector256.Create(amount); + + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) + { + destinationBase = PorterDuffFunctions.LightenDestOver(backgroundBase, sourceBase, opacity); + destinationBase = ref Unsafe.Add(ref destinationBase, 1); + backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); + sourceBase = ref Unsafe.Add(ref sourceBase, 1); + } + + if (Numerics.Modulo2(destination.Length) != 0) + { + // Vector4 fits neatly in pairs. Any overlap has to be equal to 1. + int i = destination.Length - 1; + destination[i] = PorterDuffFunctions.LightenDestOver(background[i], source[i], amount); + } + } + else { - destination[i] = PorterDuffFunctions.LightenDestOver(background[i], source[i], amount); + for (int i = 0; i < destination.Length; i++) + { + destination[i] = PorterDuffFunctions.LightenDestOver(background[i], source[i], amount); + } } } /// protected override void BlendFunction(Span destination, ReadOnlySpan background, ReadOnlySpan source, ReadOnlySpan amount) { - for (int i = 0; i < destination.Length; i++) + if (Avx2.IsSupported && destination.Length >= 2) + { + // Divide by 2 as 4 elements per Vector4 and 8 per Vector256 + ref Vector256 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); + ref Vector256 destinationLast = ref Unsafe.Add(ref destinationBase, (IntPtr)((uint)destination.Length / 2u)); + + ref Vector256 backgroundBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(background)); + ref Vector256 sourceBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(source)); + ref float amountBase = ref MemoryMarshal.GetReference(amount); + + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) + { + // TODO: It would be better if we can clamp this outside of the loop using our SIMD methods. + Vector256 opacity = Vector256.Create(Numerics.Clamp(amountBase, 0, 1F)); + + destinationBase = PorterDuffFunctions.LightenDestOver(backgroundBase, sourceBase, opacity); + destinationBase = ref Unsafe.Add(ref destinationBase, 1); + backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); + sourceBase = ref Unsafe.Add(ref sourceBase, 1); + amountBase = ref Unsafe.Add(ref amountBase, 1); + } + + if (Numerics.Modulo2(destination.Length) != 0) + { + // Vector4 fits neatly in pairs. Any overlap has to be equal to 1. + int i = destination.Length - 1; + destination[i] = PorterDuffFunctions.LightenDestOver(background[i], source[i], Numerics.Clamp(amount[i], 0, 1F)); + } + } + else { - destination[i] = PorterDuffFunctions.LightenDestOver(background[i], source[i], Numerics.Clamp(amount[i], 0, 1)); + for (int i = 0; i < destination.Length; i++) + { + destination[i] = PorterDuffFunctions.LightenDestOver(background[i], source[i], Numerics.Clamp(amount[i], 0, 1F)); + } } } } @@ -2703,18 +6977,79 @@ internal static class DefaultPixelBlenders protected override void BlendFunction(Span destination, ReadOnlySpan background, ReadOnlySpan source, float amount) { amount = Numerics.Clamp(amount, 0, 1); - for (int i = 0; i < destination.Length; i++) + + if (Avx2.IsSupported && destination.Length >= 2) + { + // Divide by 2 as 4 elements per Vector4 and 8 per Vector256 + ref Vector256 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); + ref Vector256 destinationLast = ref Unsafe.Add(ref destinationBase, (IntPtr)((uint)destination.Length / 2u)); + + ref Vector256 backgroundBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(background)); + ref Vector256 sourceBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(source)); + Vector256 opacity = Vector256.Create(amount); + + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) + { + destinationBase = PorterDuffFunctions.OverlayDestOver(backgroundBase, sourceBase, opacity); + destinationBase = ref Unsafe.Add(ref destinationBase, 1); + backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); + sourceBase = ref Unsafe.Add(ref sourceBase, 1); + } + + if (Numerics.Modulo2(destination.Length) != 0) + { + // Vector4 fits neatly in pairs. Any overlap has to be equal to 1. + int i = destination.Length - 1; + destination[i] = PorterDuffFunctions.OverlayDestOver(background[i], source[i], amount); + } + } + else { - destination[i] = PorterDuffFunctions.OverlayDestOver(background[i], source[i], amount); + for (int i = 0; i < destination.Length; i++) + { + destination[i] = PorterDuffFunctions.OverlayDestOver(background[i], source[i], amount); + } } } /// protected override void BlendFunction(Span destination, ReadOnlySpan background, ReadOnlySpan source, ReadOnlySpan amount) { - for (int i = 0; i < destination.Length; i++) + if (Avx2.IsSupported && destination.Length >= 2) + { + // Divide by 2 as 4 elements per Vector4 and 8 per Vector256 + ref Vector256 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); + ref Vector256 destinationLast = ref Unsafe.Add(ref destinationBase, (IntPtr)((uint)destination.Length / 2u)); + + ref Vector256 backgroundBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(background)); + ref Vector256 sourceBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(source)); + ref float amountBase = ref MemoryMarshal.GetReference(amount); + + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) + { + // TODO: It would be better if we can clamp this outside of the loop using our SIMD methods. + Vector256 opacity = Vector256.Create(Numerics.Clamp(amountBase, 0, 1F)); + + destinationBase = PorterDuffFunctions.OverlayDestOver(backgroundBase, sourceBase, opacity); + destinationBase = ref Unsafe.Add(ref destinationBase, 1); + backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); + sourceBase = ref Unsafe.Add(ref sourceBase, 1); + amountBase = ref Unsafe.Add(ref amountBase, 1); + } + + if (Numerics.Modulo2(destination.Length) != 0) + { + // Vector4 fits neatly in pairs. Any overlap has to be equal to 1. + int i = destination.Length - 1; + destination[i] = PorterDuffFunctions.OverlayDestOver(background[i], source[i], Numerics.Clamp(amount[i], 0, 1F)); + } + } + else { - destination[i] = PorterDuffFunctions.OverlayDestOver(background[i], source[i], Numerics.Clamp(amount[i], 0, 1)); + for (int i = 0; i < destination.Length; i++) + { + destination[i] = PorterDuffFunctions.OverlayDestOver(background[i], source[i], Numerics.Clamp(amount[i], 0, 1F)); + } } } } @@ -2741,18 +7076,79 @@ internal static class DefaultPixelBlenders protected override void BlendFunction(Span destination, ReadOnlySpan background, ReadOnlySpan source, float amount) { amount = Numerics.Clamp(amount, 0, 1); - for (int i = 0; i < destination.Length; i++) + + if (Avx2.IsSupported && destination.Length >= 2) + { + // Divide by 2 as 4 elements per Vector4 and 8 per Vector256 + ref Vector256 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); + ref Vector256 destinationLast = ref Unsafe.Add(ref destinationBase, (IntPtr)((uint)destination.Length / 2u)); + + ref Vector256 backgroundBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(background)); + ref Vector256 sourceBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(source)); + Vector256 opacity = Vector256.Create(amount); + + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) + { + destinationBase = PorterDuffFunctions.HardLightDestOver(backgroundBase, sourceBase, opacity); + destinationBase = ref Unsafe.Add(ref destinationBase, 1); + backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); + sourceBase = ref Unsafe.Add(ref sourceBase, 1); + } + + if (Numerics.Modulo2(destination.Length) != 0) + { + // Vector4 fits neatly in pairs. Any overlap has to be equal to 1. + int i = destination.Length - 1; + destination[i] = PorterDuffFunctions.HardLightDestOver(background[i], source[i], amount); + } + } + else { - destination[i] = PorterDuffFunctions.HardLightDestOver(background[i], source[i], amount); + for (int i = 0; i < destination.Length; i++) + { + destination[i] = PorterDuffFunctions.HardLightDestOver(background[i], source[i], amount); + } } } /// protected override void BlendFunction(Span destination, ReadOnlySpan background, ReadOnlySpan source, ReadOnlySpan amount) { - for (int i = 0; i < destination.Length; i++) + if (Avx2.IsSupported && destination.Length >= 2) + { + // Divide by 2 as 4 elements per Vector4 and 8 per Vector256 + ref Vector256 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); + ref Vector256 destinationLast = ref Unsafe.Add(ref destinationBase, (IntPtr)((uint)destination.Length / 2u)); + + ref Vector256 backgroundBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(background)); + ref Vector256 sourceBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(source)); + ref float amountBase = ref MemoryMarshal.GetReference(amount); + + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) + { + // TODO: It would be better if we can clamp this outside of the loop using our SIMD methods. + Vector256 opacity = Vector256.Create(Numerics.Clamp(amountBase, 0, 1F)); + + destinationBase = PorterDuffFunctions.HardLightDestOver(backgroundBase, sourceBase, opacity); + destinationBase = ref Unsafe.Add(ref destinationBase, 1); + backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); + sourceBase = ref Unsafe.Add(ref sourceBase, 1); + amountBase = ref Unsafe.Add(ref amountBase, 1); + } + + if (Numerics.Modulo2(destination.Length) != 0) + { + // Vector4 fits neatly in pairs. Any overlap has to be equal to 1. + int i = destination.Length - 1; + destination[i] = PorterDuffFunctions.HardLightDestOver(background[i], source[i], Numerics.Clamp(amount[i], 0, 1F)); + } + } + else { - destination[i] = PorterDuffFunctions.HardLightDestOver(background[i], source[i], Numerics.Clamp(amount[i], 0, 1)); + for (int i = 0; i < destination.Length; i++) + { + destination[i] = PorterDuffFunctions.HardLightDestOver(background[i], source[i], Numerics.Clamp(amount[i], 0, 1F)); + } } } } @@ -2779,18 +7175,79 @@ internal static class DefaultPixelBlenders protected override void BlendFunction(Span destination, ReadOnlySpan background, ReadOnlySpan source, float amount) { amount = Numerics.Clamp(amount, 0, 1); - for (int i = 0; i < destination.Length; i++) + + if (Avx2.IsSupported && destination.Length >= 2) + { + // Divide by 2 as 4 elements per Vector4 and 8 per Vector256 + ref Vector256 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); + ref Vector256 destinationLast = ref Unsafe.Add(ref destinationBase, (IntPtr)((uint)destination.Length / 2u)); + + ref Vector256 backgroundBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(background)); + ref Vector256 sourceBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(source)); + Vector256 opacity = Vector256.Create(amount); + + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) + { + destinationBase = PorterDuffFunctions.NormalDestIn(backgroundBase, sourceBase, opacity); + destinationBase = ref Unsafe.Add(ref destinationBase, 1); + backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); + sourceBase = ref Unsafe.Add(ref sourceBase, 1); + } + + if (Numerics.Modulo2(destination.Length) != 0) + { + // Vector4 fits neatly in pairs. Any overlap has to be equal to 1. + int i = destination.Length - 1; + destination[i] = PorterDuffFunctions.NormalDestIn(background[i], source[i], amount); + } + } + else { - destination[i] = PorterDuffFunctions.NormalDestIn(background[i], source[i], amount); + for (int i = 0; i < destination.Length; i++) + { + destination[i] = PorterDuffFunctions.NormalDestIn(background[i], source[i], amount); + } } } /// protected override void BlendFunction(Span destination, ReadOnlySpan background, ReadOnlySpan source, ReadOnlySpan amount) { - for (int i = 0; i < destination.Length; i++) + if (Avx2.IsSupported && destination.Length >= 2) + { + // Divide by 2 as 4 elements per Vector4 and 8 per Vector256 + ref Vector256 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); + ref Vector256 destinationLast = ref Unsafe.Add(ref destinationBase, (IntPtr)((uint)destination.Length / 2u)); + + ref Vector256 backgroundBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(background)); + ref Vector256 sourceBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(source)); + ref float amountBase = ref MemoryMarshal.GetReference(amount); + + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) + { + // TODO: It would be better if we can clamp this outside of the loop using our SIMD methods. + Vector256 opacity = Vector256.Create(Numerics.Clamp(amountBase, 0, 1F)); + + destinationBase = PorterDuffFunctions.NormalDestIn(backgroundBase, sourceBase, opacity); + destinationBase = ref Unsafe.Add(ref destinationBase, 1); + backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); + sourceBase = ref Unsafe.Add(ref sourceBase, 1); + amountBase = ref Unsafe.Add(ref amountBase, 1); + } + + if (Numerics.Modulo2(destination.Length) != 0) + { + // Vector4 fits neatly in pairs. Any overlap has to be equal to 1. + int i = destination.Length - 1; + destination[i] = PorterDuffFunctions.NormalDestIn(background[i], source[i], Numerics.Clamp(amount[i], 0, 1F)); + } + } + else { - destination[i] = PorterDuffFunctions.NormalDestIn(background[i], source[i], Numerics.Clamp(amount[i], 0, 1)); + for (int i = 0; i < destination.Length; i++) + { + destination[i] = PorterDuffFunctions.NormalDestIn(background[i], source[i], Numerics.Clamp(amount[i], 0, 1F)); + } } } } @@ -2817,18 +7274,79 @@ internal static class DefaultPixelBlenders protected override void BlendFunction(Span destination, ReadOnlySpan background, ReadOnlySpan source, float amount) { amount = Numerics.Clamp(amount, 0, 1); - for (int i = 0; i < destination.Length; i++) + + if (Avx2.IsSupported && destination.Length >= 2) + { + // Divide by 2 as 4 elements per Vector4 and 8 per Vector256 + ref Vector256 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); + ref Vector256 destinationLast = ref Unsafe.Add(ref destinationBase, (IntPtr)((uint)destination.Length / 2u)); + + ref Vector256 backgroundBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(background)); + ref Vector256 sourceBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(source)); + Vector256 opacity = Vector256.Create(amount); + + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) + { + destinationBase = PorterDuffFunctions.MultiplyDestIn(backgroundBase, sourceBase, opacity); + destinationBase = ref Unsafe.Add(ref destinationBase, 1); + backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); + sourceBase = ref Unsafe.Add(ref sourceBase, 1); + } + + if (Numerics.Modulo2(destination.Length) != 0) + { + // Vector4 fits neatly in pairs. Any overlap has to be equal to 1. + int i = destination.Length - 1; + destination[i] = PorterDuffFunctions.MultiplyDestIn(background[i], source[i], amount); + } + } + else { - destination[i] = PorterDuffFunctions.MultiplyDestIn(background[i], source[i], amount); + for (int i = 0; i < destination.Length; i++) + { + destination[i] = PorterDuffFunctions.MultiplyDestIn(background[i], source[i], amount); + } } } /// protected override void BlendFunction(Span destination, ReadOnlySpan background, ReadOnlySpan source, ReadOnlySpan amount) { - for (int i = 0; i < destination.Length; i++) + if (Avx2.IsSupported && destination.Length >= 2) + { + // Divide by 2 as 4 elements per Vector4 and 8 per Vector256 + ref Vector256 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); + ref Vector256 destinationLast = ref Unsafe.Add(ref destinationBase, (IntPtr)((uint)destination.Length / 2u)); + + ref Vector256 backgroundBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(background)); + ref Vector256 sourceBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(source)); + ref float amountBase = ref MemoryMarshal.GetReference(amount); + + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) + { + // TODO: It would be better if we can clamp this outside of the loop using our SIMD methods. + Vector256 opacity = Vector256.Create(Numerics.Clamp(amountBase, 0, 1F)); + + destinationBase = PorterDuffFunctions.MultiplyDestIn(backgroundBase, sourceBase, opacity); + destinationBase = ref Unsafe.Add(ref destinationBase, 1); + backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); + sourceBase = ref Unsafe.Add(ref sourceBase, 1); + amountBase = ref Unsafe.Add(ref amountBase, 1); + } + + if (Numerics.Modulo2(destination.Length) != 0) + { + // Vector4 fits neatly in pairs. Any overlap has to be equal to 1. + int i = destination.Length - 1; + destination[i] = PorterDuffFunctions.MultiplyDestIn(background[i], source[i], Numerics.Clamp(amount[i], 0, 1F)); + } + } + else { - destination[i] = PorterDuffFunctions.MultiplyDestIn(background[i], source[i], Numerics.Clamp(amount[i], 0, 1)); + for (int i = 0; i < destination.Length; i++) + { + destination[i] = PorterDuffFunctions.MultiplyDestIn(background[i], source[i], Numerics.Clamp(amount[i], 0, 1F)); + } } } } @@ -2855,18 +7373,79 @@ internal static class DefaultPixelBlenders protected override void BlendFunction(Span destination, ReadOnlySpan background, ReadOnlySpan source, float amount) { amount = Numerics.Clamp(amount, 0, 1); - for (int i = 0; i < destination.Length; i++) + + if (Avx2.IsSupported && destination.Length >= 2) + { + // Divide by 2 as 4 elements per Vector4 and 8 per Vector256 + ref Vector256 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); + ref Vector256 destinationLast = ref Unsafe.Add(ref destinationBase, (IntPtr)((uint)destination.Length / 2u)); + + ref Vector256 backgroundBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(background)); + ref Vector256 sourceBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(source)); + Vector256 opacity = Vector256.Create(amount); + + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) + { + destinationBase = PorterDuffFunctions.AddDestIn(backgroundBase, sourceBase, opacity); + destinationBase = ref Unsafe.Add(ref destinationBase, 1); + backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); + sourceBase = ref Unsafe.Add(ref sourceBase, 1); + } + + if (Numerics.Modulo2(destination.Length) != 0) + { + // Vector4 fits neatly in pairs. Any overlap has to be equal to 1. + int i = destination.Length - 1; + destination[i] = PorterDuffFunctions.AddDestIn(background[i], source[i], amount); + } + } + else { - destination[i] = PorterDuffFunctions.AddDestIn(background[i], source[i], amount); + for (int i = 0; i < destination.Length; i++) + { + destination[i] = PorterDuffFunctions.AddDestIn(background[i], source[i], amount); + } } } /// protected override void BlendFunction(Span destination, ReadOnlySpan background, ReadOnlySpan source, ReadOnlySpan amount) { - for (int i = 0; i < destination.Length; i++) + if (Avx2.IsSupported && destination.Length >= 2) + { + // Divide by 2 as 4 elements per Vector4 and 8 per Vector256 + ref Vector256 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); + ref Vector256 destinationLast = ref Unsafe.Add(ref destinationBase, (IntPtr)((uint)destination.Length / 2u)); + + ref Vector256 backgroundBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(background)); + ref Vector256 sourceBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(source)); + ref float amountBase = ref MemoryMarshal.GetReference(amount); + + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) + { + // TODO: It would be better if we can clamp this outside of the loop using our SIMD methods. + Vector256 opacity = Vector256.Create(Numerics.Clamp(amountBase, 0, 1F)); + + destinationBase = PorterDuffFunctions.AddDestIn(backgroundBase, sourceBase, opacity); + destinationBase = ref Unsafe.Add(ref destinationBase, 1); + backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); + sourceBase = ref Unsafe.Add(ref sourceBase, 1); + amountBase = ref Unsafe.Add(ref amountBase, 1); + } + + if (Numerics.Modulo2(destination.Length) != 0) + { + // Vector4 fits neatly in pairs. Any overlap has to be equal to 1. + int i = destination.Length - 1; + destination[i] = PorterDuffFunctions.AddDestIn(background[i], source[i], Numerics.Clamp(amount[i], 0, 1F)); + } + } + else { - destination[i] = PorterDuffFunctions.AddDestIn(background[i], source[i], Numerics.Clamp(amount[i], 0, 1)); + for (int i = 0; i < destination.Length; i++) + { + destination[i] = PorterDuffFunctions.AddDestIn(background[i], source[i], Numerics.Clamp(amount[i], 0, 1F)); + } } } } @@ -2893,18 +7472,79 @@ internal static class DefaultPixelBlenders protected override void BlendFunction(Span destination, ReadOnlySpan background, ReadOnlySpan source, float amount) { amount = Numerics.Clamp(amount, 0, 1); - for (int i = 0; i < destination.Length; i++) + + if (Avx2.IsSupported && destination.Length >= 2) + { + // Divide by 2 as 4 elements per Vector4 and 8 per Vector256 + ref Vector256 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); + ref Vector256 destinationLast = ref Unsafe.Add(ref destinationBase, (IntPtr)((uint)destination.Length / 2u)); + + ref Vector256 backgroundBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(background)); + ref Vector256 sourceBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(source)); + Vector256 opacity = Vector256.Create(amount); + + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) + { + destinationBase = PorterDuffFunctions.SubtractDestIn(backgroundBase, sourceBase, opacity); + destinationBase = ref Unsafe.Add(ref destinationBase, 1); + backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); + sourceBase = ref Unsafe.Add(ref sourceBase, 1); + } + + if (Numerics.Modulo2(destination.Length) != 0) + { + // Vector4 fits neatly in pairs. Any overlap has to be equal to 1. + int i = destination.Length - 1; + destination[i] = PorterDuffFunctions.SubtractDestIn(background[i], source[i], amount); + } + } + else { - destination[i] = PorterDuffFunctions.SubtractDestIn(background[i], source[i], amount); + for (int i = 0; i < destination.Length; i++) + { + destination[i] = PorterDuffFunctions.SubtractDestIn(background[i], source[i], amount); + } } } /// protected override void BlendFunction(Span destination, ReadOnlySpan background, ReadOnlySpan source, ReadOnlySpan amount) { - for (int i = 0; i < destination.Length; i++) + if (Avx2.IsSupported && destination.Length >= 2) + { + // Divide by 2 as 4 elements per Vector4 and 8 per Vector256 + ref Vector256 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); + ref Vector256 destinationLast = ref Unsafe.Add(ref destinationBase, (IntPtr)((uint)destination.Length / 2u)); + + ref Vector256 backgroundBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(background)); + ref Vector256 sourceBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(source)); + ref float amountBase = ref MemoryMarshal.GetReference(amount); + + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) + { + // TODO: It would be better if we can clamp this outside of the loop using our SIMD methods. + Vector256 opacity = Vector256.Create(Numerics.Clamp(amountBase, 0, 1F)); + + destinationBase = PorterDuffFunctions.SubtractDestIn(backgroundBase, sourceBase, opacity); + destinationBase = ref Unsafe.Add(ref destinationBase, 1); + backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); + sourceBase = ref Unsafe.Add(ref sourceBase, 1); + amountBase = ref Unsafe.Add(ref amountBase, 1); + } + + if (Numerics.Modulo2(destination.Length) != 0) + { + // Vector4 fits neatly in pairs. Any overlap has to be equal to 1. + int i = destination.Length - 1; + destination[i] = PorterDuffFunctions.SubtractDestIn(background[i], source[i], Numerics.Clamp(amount[i], 0, 1F)); + } + } + else { - destination[i] = PorterDuffFunctions.SubtractDestIn(background[i], source[i], Numerics.Clamp(amount[i], 0, 1)); + for (int i = 0; i < destination.Length; i++) + { + destination[i] = PorterDuffFunctions.SubtractDestIn(background[i], source[i], Numerics.Clamp(amount[i], 0, 1F)); + } } } } @@ -2931,18 +7571,79 @@ internal static class DefaultPixelBlenders protected override void BlendFunction(Span destination, ReadOnlySpan background, ReadOnlySpan source, float amount) { amount = Numerics.Clamp(amount, 0, 1); - for (int i = 0; i < destination.Length; i++) + + if (Avx2.IsSupported && destination.Length >= 2) + { + // Divide by 2 as 4 elements per Vector4 and 8 per Vector256 + ref Vector256 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); + ref Vector256 destinationLast = ref Unsafe.Add(ref destinationBase, (IntPtr)((uint)destination.Length / 2u)); + + ref Vector256 backgroundBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(background)); + ref Vector256 sourceBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(source)); + Vector256 opacity = Vector256.Create(amount); + + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) + { + destinationBase = PorterDuffFunctions.ScreenDestIn(backgroundBase, sourceBase, opacity); + destinationBase = ref Unsafe.Add(ref destinationBase, 1); + backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); + sourceBase = ref Unsafe.Add(ref sourceBase, 1); + } + + if (Numerics.Modulo2(destination.Length) != 0) + { + // Vector4 fits neatly in pairs. Any overlap has to be equal to 1. + int i = destination.Length - 1; + destination[i] = PorterDuffFunctions.ScreenDestIn(background[i], source[i], amount); + } + } + else { - destination[i] = PorterDuffFunctions.ScreenDestIn(background[i], source[i], amount); + for (int i = 0; i < destination.Length; i++) + { + destination[i] = PorterDuffFunctions.ScreenDestIn(background[i], source[i], amount); + } } } /// protected override void BlendFunction(Span destination, ReadOnlySpan background, ReadOnlySpan source, ReadOnlySpan amount) { - for (int i = 0; i < destination.Length; i++) + if (Avx2.IsSupported && destination.Length >= 2) + { + // Divide by 2 as 4 elements per Vector4 and 8 per Vector256 + ref Vector256 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); + ref Vector256 destinationLast = ref Unsafe.Add(ref destinationBase, (IntPtr)((uint)destination.Length / 2u)); + + ref Vector256 backgroundBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(background)); + ref Vector256 sourceBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(source)); + ref float amountBase = ref MemoryMarshal.GetReference(amount); + + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) + { + // TODO: It would be better if we can clamp this outside of the loop using our SIMD methods. + Vector256 opacity = Vector256.Create(Numerics.Clamp(amountBase, 0, 1F)); + + destinationBase = PorterDuffFunctions.ScreenDestIn(backgroundBase, sourceBase, opacity); + destinationBase = ref Unsafe.Add(ref destinationBase, 1); + backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); + sourceBase = ref Unsafe.Add(ref sourceBase, 1); + amountBase = ref Unsafe.Add(ref amountBase, 1); + } + + if (Numerics.Modulo2(destination.Length) != 0) + { + // Vector4 fits neatly in pairs. Any overlap has to be equal to 1. + int i = destination.Length - 1; + destination[i] = PorterDuffFunctions.ScreenDestIn(background[i], source[i], Numerics.Clamp(amount[i], 0, 1F)); + } + } + else { - destination[i] = PorterDuffFunctions.ScreenDestIn(background[i], source[i], Numerics.Clamp(amount[i], 0, 1)); + for (int i = 0; i < destination.Length; i++) + { + destination[i] = PorterDuffFunctions.ScreenDestIn(background[i], source[i], Numerics.Clamp(amount[i], 0, 1F)); + } } } } @@ -2969,18 +7670,79 @@ internal static class DefaultPixelBlenders protected override void BlendFunction(Span destination, ReadOnlySpan background, ReadOnlySpan source, float amount) { amount = Numerics.Clamp(amount, 0, 1); - for (int i = 0; i < destination.Length; i++) + + if (Avx2.IsSupported && destination.Length >= 2) + { + // Divide by 2 as 4 elements per Vector4 and 8 per Vector256 + ref Vector256 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); + ref Vector256 destinationLast = ref Unsafe.Add(ref destinationBase, (IntPtr)((uint)destination.Length / 2u)); + + ref Vector256 backgroundBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(background)); + ref Vector256 sourceBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(source)); + Vector256 opacity = Vector256.Create(amount); + + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) + { + destinationBase = PorterDuffFunctions.DarkenDestIn(backgroundBase, sourceBase, opacity); + destinationBase = ref Unsafe.Add(ref destinationBase, 1); + backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); + sourceBase = ref Unsafe.Add(ref sourceBase, 1); + } + + if (Numerics.Modulo2(destination.Length) != 0) + { + // Vector4 fits neatly in pairs. Any overlap has to be equal to 1. + int i = destination.Length - 1; + destination[i] = PorterDuffFunctions.DarkenDestIn(background[i], source[i], amount); + } + } + else { - destination[i] = PorterDuffFunctions.DarkenDestIn(background[i], source[i], amount); + for (int i = 0; i < destination.Length; i++) + { + destination[i] = PorterDuffFunctions.DarkenDestIn(background[i], source[i], amount); + } } } /// protected override void BlendFunction(Span destination, ReadOnlySpan background, ReadOnlySpan source, ReadOnlySpan amount) { - for (int i = 0; i < destination.Length; i++) + if (Avx2.IsSupported && destination.Length >= 2) + { + // Divide by 2 as 4 elements per Vector4 and 8 per Vector256 + ref Vector256 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); + ref Vector256 destinationLast = ref Unsafe.Add(ref destinationBase, (IntPtr)((uint)destination.Length / 2u)); + + ref Vector256 backgroundBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(background)); + ref Vector256 sourceBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(source)); + ref float amountBase = ref MemoryMarshal.GetReference(amount); + + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) + { + // TODO: It would be better if we can clamp this outside of the loop using our SIMD methods. + Vector256 opacity = Vector256.Create(Numerics.Clamp(amountBase, 0, 1F)); + + destinationBase = PorterDuffFunctions.DarkenDestIn(backgroundBase, sourceBase, opacity); + destinationBase = ref Unsafe.Add(ref destinationBase, 1); + backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); + sourceBase = ref Unsafe.Add(ref sourceBase, 1); + amountBase = ref Unsafe.Add(ref amountBase, 1); + } + + if (Numerics.Modulo2(destination.Length) != 0) + { + // Vector4 fits neatly in pairs. Any overlap has to be equal to 1. + int i = destination.Length - 1; + destination[i] = PorterDuffFunctions.DarkenDestIn(background[i], source[i], Numerics.Clamp(amount[i], 0, 1F)); + } + } + else { - destination[i] = PorterDuffFunctions.DarkenDestIn(background[i], source[i], Numerics.Clamp(amount[i], 0, 1)); + for (int i = 0; i < destination.Length; i++) + { + destination[i] = PorterDuffFunctions.DarkenDestIn(background[i], source[i], Numerics.Clamp(amount[i], 0, 1F)); + } } } } @@ -3007,18 +7769,79 @@ internal static class DefaultPixelBlenders protected override void BlendFunction(Span destination, ReadOnlySpan background, ReadOnlySpan source, float amount) { amount = Numerics.Clamp(amount, 0, 1); - for (int i = 0; i < destination.Length; i++) + + if (Avx2.IsSupported && destination.Length >= 2) + { + // Divide by 2 as 4 elements per Vector4 and 8 per Vector256 + ref Vector256 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); + ref Vector256 destinationLast = ref Unsafe.Add(ref destinationBase, (IntPtr)((uint)destination.Length / 2u)); + + ref Vector256 backgroundBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(background)); + ref Vector256 sourceBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(source)); + Vector256 opacity = Vector256.Create(amount); + + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) + { + destinationBase = PorterDuffFunctions.LightenDestIn(backgroundBase, sourceBase, opacity); + destinationBase = ref Unsafe.Add(ref destinationBase, 1); + backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); + sourceBase = ref Unsafe.Add(ref sourceBase, 1); + } + + if (Numerics.Modulo2(destination.Length) != 0) + { + // Vector4 fits neatly in pairs. Any overlap has to be equal to 1. + int i = destination.Length - 1; + destination[i] = PorterDuffFunctions.LightenDestIn(background[i], source[i], amount); + } + } + else { - destination[i] = PorterDuffFunctions.LightenDestIn(background[i], source[i], amount); + for (int i = 0; i < destination.Length; i++) + { + destination[i] = PorterDuffFunctions.LightenDestIn(background[i], source[i], amount); + } } } /// protected override void BlendFunction(Span destination, ReadOnlySpan background, ReadOnlySpan source, ReadOnlySpan amount) { - for (int i = 0; i < destination.Length; i++) + if (Avx2.IsSupported && destination.Length >= 2) + { + // Divide by 2 as 4 elements per Vector4 and 8 per Vector256 + ref Vector256 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); + ref Vector256 destinationLast = ref Unsafe.Add(ref destinationBase, (IntPtr)((uint)destination.Length / 2u)); + + ref Vector256 backgroundBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(background)); + ref Vector256 sourceBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(source)); + ref float amountBase = ref MemoryMarshal.GetReference(amount); + + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) + { + // TODO: It would be better if we can clamp this outside of the loop using our SIMD methods. + Vector256 opacity = Vector256.Create(Numerics.Clamp(amountBase, 0, 1F)); + + destinationBase = PorterDuffFunctions.LightenDestIn(backgroundBase, sourceBase, opacity); + destinationBase = ref Unsafe.Add(ref destinationBase, 1); + backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); + sourceBase = ref Unsafe.Add(ref sourceBase, 1); + amountBase = ref Unsafe.Add(ref amountBase, 1); + } + + if (Numerics.Modulo2(destination.Length) != 0) + { + // Vector4 fits neatly in pairs. Any overlap has to be equal to 1. + int i = destination.Length - 1; + destination[i] = PorterDuffFunctions.LightenDestIn(background[i], source[i], Numerics.Clamp(amount[i], 0, 1F)); + } + } + else { - destination[i] = PorterDuffFunctions.LightenDestIn(background[i], source[i], Numerics.Clamp(amount[i], 0, 1)); + for (int i = 0; i < destination.Length; i++) + { + destination[i] = PorterDuffFunctions.LightenDestIn(background[i], source[i], Numerics.Clamp(amount[i], 0, 1F)); + } } } } @@ -3045,18 +7868,79 @@ internal static class DefaultPixelBlenders protected override void BlendFunction(Span destination, ReadOnlySpan background, ReadOnlySpan source, float amount) { amount = Numerics.Clamp(amount, 0, 1); - for (int i = 0; i < destination.Length; i++) + + if (Avx2.IsSupported && destination.Length >= 2) + { + // Divide by 2 as 4 elements per Vector4 and 8 per Vector256 + ref Vector256 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); + ref Vector256 destinationLast = ref Unsafe.Add(ref destinationBase, (IntPtr)((uint)destination.Length / 2u)); + + ref Vector256 backgroundBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(background)); + ref Vector256 sourceBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(source)); + Vector256 opacity = Vector256.Create(amount); + + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) + { + destinationBase = PorterDuffFunctions.OverlayDestIn(backgroundBase, sourceBase, opacity); + destinationBase = ref Unsafe.Add(ref destinationBase, 1); + backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); + sourceBase = ref Unsafe.Add(ref sourceBase, 1); + } + + if (Numerics.Modulo2(destination.Length) != 0) + { + // Vector4 fits neatly in pairs. Any overlap has to be equal to 1. + int i = destination.Length - 1; + destination[i] = PorterDuffFunctions.OverlayDestIn(background[i], source[i], amount); + } + } + else { - destination[i] = PorterDuffFunctions.OverlayDestIn(background[i], source[i], amount); + for (int i = 0; i < destination.Length; i++) + { + destination[i] = PorterDuffFunctions.OverlayDestIn(background[i], source[i], amount); + } } } /// protected override void BlendFunction(Span destination, ReadOnlySpan background, ReadOnlySpan source, ReadOnlySpan amount) { - for (int i = 0; i < destination.Length; i++) + if (Avx2.IsSupported && destination.Length >= 2) + { + // Divide by 2 as 4 elements per Vector4 and 8 per Vector256 + ref Vector256 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); + ref Vector256 destinationLast = ref Unsafe.Add(ref destinationBase, (IntPtr)((uint)destination.Length / 2u)); + + ref Vector256 backgroundBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(background)); + ref Vector256 sourceBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(source)); + ref float amountBase = ref MemoryMarshal.GetReference(amount); + + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) + { + // TODO: It would be better if we can clamp this outside of the loop using our SIMD methods. + Vector256 opacity = Vector256.Create(Numerics.Clamp(amountBase, 0, 1F)); + + destinationBase = PorterDuffFunctions.OverlayDestIn(backgroundBase, sourceBase, opacity); + destinationBase = ref Unsafe.Add(ref destinationBase, 1); + backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); + sourceBase = ref Unsafe.Add(ref sourceBase, 1); + amountBase = ref Unsafe.Add(ref amountBase, 1); + } + + if (Numerics.Modulo2(destination.Length) != 0) + { + // Vector4 fits neatly in pairs. Any overlap has to be equal to 1. + int i = destination.Length - 1; + destination[i] = PorterDuffFunctions.OverlayDestIn(background[i], source[i], Numerics.Clamp(amount[i], 0, 1F)); + } + } + else { - destination[i] = PorterDuffFunctions.OverlayDestIn(background[i], source[i], Numerics.Clamp(amount[i], 0, 1)); + for (int i = 0; i < destination.Length; i++) + { + destination[i] = PorterDuffFunctions.OverlayDestIn(background[i], source[i], Numerics.Clamp(amount[i], 0, 1F)); + } } } } @@ -3083,18 +7967,79 @@ internal static class DefaultPixelBlenders protected override void BlendFunction(Span destination, ReadOnlySpan background, ReadOnlySpan source, float amount) { amount = Numerics.Clamp(amount, 0, 1); - for (int i = 0; i < destination.Length; i++) + + if (Avx2.IsSupported && destination.Length >= 2) + { + // Divide by 2 as 4 elements per Vector4 and 8 per Vector256 + ref Vector256 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); + ref Vector256 destinationLast = ref Unsafe.Add(ref destinationBase, (IntPtr)((uint)destination.Length / 2u)); + + ref Vector256 backgroundBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(background)); + ref Vector256 sourceBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(source)); + Vector256 opacity = Vector256.Create(amount); + + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) + { + destinationBase = PorterDuffFunctions.HardLightDestIn(backgroundBase, sourceBase, opacity); + destinationBase = ref Unsafe.Add(ref destinationBase, 1); + backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); + sourceBase = ref Unsafe.Add(ref sourceBase, 1); + } + + if (Numerics.Modulo2(destination.Length) != 0) + { + // Vector4 fits neatly in pairs. Any overlap has to be equal to 1. + int i = destination.Length - 1; + destination[i] = PorterDuffFunctions.HardLightDestIn(background[i], source[i], amount); + } + } + else { - destination[i] = PorterDuffFunctions.HardLightDestIn(background[i], source[i], amount); + for (int i = 0; i < destination.Length; i++) + { + destination[i] = PorterDuffFunctions.HardLightDestIn(background[i], source[i], amount); + } } } /// protected override void BlendFunction(Span destination, ReadOnlySpan background, ReadOnlySpan source, ReadOnlySpan amount) { - for (int i = 0; i < destination.Length; i++) + if (Avx2.IsSupported && destination.Length >= 2) + { + // Divide by 2 as 4 elements per Vector4 and 8 per Vector256 + ref Vector256 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); + ref Vector256 destinationLast = ref Unsafe.Add(ref destinationBase, (IntPtr)((uint)destination.Length / 2u)); + + ref Vector256 backgroundBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(background)); + ref Vector256 sourceBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(source)); + ref float amountBase = ref MemoryMarshal.GetReference(amount); + + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) + { + // TODO: It would be better if we can clamp this outside of the loop using our SIMD methods. + Vector256 opacity = Vector256.Create(Numerics.Clamp(amountBase, 0, 1F)); + + destinationBase = PorterDuffFunctions.HardLightDestIn(backgroundBase, sourceBase, opacity); + destinationBase = ref Unsafe.Add(ref destinationBase, 1); + backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); + sourceBase = ref Unsafe.Add(ref sourceBase, 1); + amountBase = ref Unsafe.Add(ref amountBase, 1); + } + + if (Numerics.Modulo2(destination.Length) != 0) + { + // Vector4 fits neatly in pairs. Any overlap has to be equal to 1. + int i = destination.Length - 1; + destination[i] = PorterDuffFunctions.HardLightDestIn(background[i], source[i], Numerics.Clamp(amount[i], 0, 1F)); + } + } + else { - destination[i] = PorterDuffFunctions.HardLightDestIn(background[i], source[i], Numerics.Clamp(amount[i], 0, 1)); + for (int i = 0; i < destination.Length; i++) + { + destination[i] = PorterDuffFunctions.HardLightDestIn(background[i], source[i], Numerics.Clamp(amount[i], 0, 1F)); + } } } } @@ -3121,18 +8066,79 @@ internal static class DefaultPixelBlenders protected override void BlendFunction(Span destination, ReadOnlySpan background, ReadOnlySpan source, float amount) { amount = Numerics.Clamp(amount, 0, 1); - for (int i = 0; i < destination.Length; i++) + + if (Avx2.IsSupported && destination.Length >= 2) + { + // Divide by 2 as 4 elements per Vector4 and 8 per Vector256 + ref Vector256 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); + ref Vector256 destinationLast = ref Unsafe.Add(ref destinationBase, (IntPtr)((uint)destination.Length / 2u)); + + ref Vector256 backgroundBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(background)); + ref Vector256 sourceBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(source)); + Vector256 opacity = Vector256.Create(amount); + + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) + { + destinationBase = PorterDuffFunctions.NormalDestOut(backgroundBase, sourceBase, opacity); + destinationBase = ref Unsafe.Add(ref destinationBase, 1); + backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); + sourceBase = ref Unsafe.Add(ref sourceBase, 1); + } + + if (Numerics.Modulo2(destination.Length) != 0) + { + // Vector4 fits neatly in pairs. Any overlap has to be equal to 1. + int i = destination.Length - 1; + destination[i] = PorterDuffFunctions.NormalDestOut(background[i], source[i], amount); + } + } + else { - destination[i] = PorterDuffFunctions.NormalDestOut(background[i], source[i], amount); + for (int i = 0; i < destination.Length; i++) + { + destination[i] = PorterDuffFunctions.NormalDestOut(background[i], source[i], amount); + } } } /// protected override void BlendFunction(Span destination, ReadOnlySpan background, ReadOnlySpan source, ReadOnlySpan amount) { - for (int i = 0; i < destination.Length; i++) + if (Avx2.IsSupported && destination.Length >= 2) + { + // Divide by 2 as 4 elements per Vector4 and 8 per Vector256 + ref Vector256 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); + ref Vector256 destinationLast = ref Unsafe.Add(ref destinationBase, (IntPtr)((uint)destination.Length / 2u)); + + ref Vector256 backgroundBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(background)); + ref Vector256 sourceBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(source)); + ref float amountBase = ref MemoryMarshal.GetReference(amount); + + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) + { + // TODO: It would be better if we can clamp this outside of the loop using our SIMD methods. + Vector256 opacity = Vector256.Create(Numerics.Clamp(amountBase, 0, 1F)); + + destinationBase = PorterDuffFunctions.NormalDestOut(backgroundBase, sourceBase, opacity); + destinationBase = ref Unsafe.Add(ref destinationBase, 1); + backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); + sourceBase = ref Unsafe.Add(ref sourceBase, 1); + amountBase = ref Unsafe.Add(ref amountBase, 1); + } + + if (Numerics.Modulo2(destination.Length) != 0) + { + // Vector4 fits neatly in pairs. Any overlap has to be equal to 1. + int i = destination.Length - 1; + destination[i] = PorterDuffFunctions.NormalDestOut(background[i], source[i], Numerics.Clamp(amount[i], 0, 1F)); + } + } + else { - destination[i] = PorterDuffFunctions.NormalDestOut(background[i], source[i], Numerics.Clamp(amount[i], 0, 1)); + for (int i = 0; i < destination.Length; i++) + { + destination[i] = PorterDuffFunctions.NormalDestOut(background[i], source[i], Numerics.Clamp(amount[i], 0, 1F)); + } } } } @@ -3159,18 +8165,79 @@ internal static class DefaultPixelBlenders protected override void BlendFunction(Span destination, ReadOnlySpan background, ReadOnlySpan source, float amount) { amount = Numerics.Clamp(amount, 0, 1); - for (int i = 0; i < destination.Length; i++) + + if (Avx2.IsSupported && destination.Length >= 2) + { + // Divide by 2 as 4 elements per Vector4 and 8 per Vector256 + ref Vector256 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); + ref Vector256 destinationLast = ref Unsafe.Add(ref destinationBase, (IntPtr)((uint)destination.Length / 2u)); + + ref Vector256 backgroundBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(background)); + ref Vector256 sourceBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(source)); + Vector256 opacity = Vector256.Create(amount); + + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) + { + destinationBase = PorterDuffFunctions.MultiplyDestOut(backgroundBase, sourceBase, opacity); + destinationBase = ref Unsafe.Add(ref destinationBase, 1); + backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); + sourceBase = ref Unsafe.Add(ref sourceBase, 1); + } + + if (Numerics.Modulo2(destination.Length) != 0) + { + // Vector4 fits neatly in pairs. Any overlap has to be equal to 1. + int i = destination.Length - 1; + destination[i] = PorterDuffFunctions.MultiplyDestOut(background[i], source[i], amount); + } + } + else { - destination[i] = PorterDuffFunctions.MultiplyDestOut(background[i], source[i], amount); + for (int i = 0; i < destination.Length; i++) + { + destination[i] = PorterDuffFunctions.MultiplyDestOut(background[i], source[i], amount); + } } } /// protected override void BlendFunction(Span destination, ReadOnlySpan background, ReadOnlySpan source, ReadOnlySpan amount) { - for (int i = 0; i < destination.Length; i++) + if (Avx2.IsSupported && destination.Length >= 2) + { + // Divide by 2 as 4 elements per Vector4 and 8 per Vector256 + ref Vector256 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); + ref Vector256 destinationLast = ref Unsafe.Add(ref destinationBase, (IntPtr)((uint)destination.Length / 2u)); + + ref Vector256 backgroundBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(background)); + ref Vector256 sourceBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(source)); + ref float amountBase = ref MemoryMarshal.GetReference(amount); + + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) + { + // TODO: It would be better if we can clamp this outside of the loop using our SIMD methods. + Vector256 opacity = Vector256.Create(Numerics.Clamp(amountBase, 0, 1F)); + + destinationBase = PorterDuffFunctions.MultiplyDestOut(backgroundBase, sourceBase, opacity); + destinationBase = ref Unsafe.Add(ref destinationBase, 1); + backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); + sourceBase = ref Unsafe.Add(ref sourceBase, 1); + amountBase = ref Unsafe.Add(ref amountBase, 1); + } + + if (Numerics.Modulo2(destination.Length) != 0) + { + // Vector4 fits neatly in pairs. Any overlap has to be equal to 1. + int i = destination.Length - 1; + destination[i] = PorterDuffFunctions.MultiplyDestOut(background[i], source[i], Numerics.Clamp(amount[i], 0, 1F)); + } + } + else { - destination[i] = PorterDuffFunctions.MultiplyDestOut(background[i], source[i], Numerics.Clamp(amount[i], 0, 1)); + for (int i = 0; i < destination.Length; i++) + { + destination[i] = PorterDuffFunctions.MultiplyDestOut(background[i], source[i], Numerics.Clamp(amount[i], 0, 1F)); + } } } } @@ -3197,18 +8264,79 @@ internal static class DefaultPixelBlenders protected override void BlendFunction(Span destination, ReadOnlySpan background, ReadOnlySpan source, float amount) { amount = Numerics.Clamp(amount, 0, 1); - for (int i = 0; i < destination.Length; i++) + + if (Avx2.IsSupported && destination.Length >= 2) + { + // Divide by 2 as 4 elements per Vector4 and 8 per Vector256 + ref Vector256 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); + ref Vector256 destinationLast = ref Unsafe.Add(ref destinationBase, (IntPtr)((uint)destination.Length / 2u)); + + ref Vector256 backgroundBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(background)); + ref Vector256 sourceBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(source)); + Vector256 opacity = Vector256.Create(amount); + + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) + { + destinationBase = PorterDuffFunctions.AddDestOut(backgroundBase, sourceBase, opacity); + destinationBase = ref Unsafe.Add(ref destinationBase, 1); + backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); + sourceBase = ref Unsafe.Add(ref sourceBase, 1); + } + + if (Numerics.Modulo2(destination.Length) != 0) + { + // Vector4 fits neatly in pairs. Any overlap has to be equal to 1. + int i = destination.Length - 1; + destination[i] = PorterDuffFunctions.AddDestOut(background[i], source[i], amount); + } + } + else { - destination[i] = PorterDuffFunctions.AddDestOut(background[i], source[i], amount); + for (int i = 0; i < destination.Length; i++) + { + destination[i] = PorterDuffFunctions.AddDestOut(background[i], source[i], amount); + } } } /// protected override void BlendFunction(Span destination, ReadOnlySpan background, ReadOnlySpan source, ReadOnlySpan amount) { - for (int i = 0; i < destination.Length; i++) + if (Avx2.IsSupported && destination.Length >= 2) + { + // Divide by 2 as 4 elements per Vector4 and 8 per Vector256 + ref Vector256 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); + ref Vector256 destinationLast = ref Unsafe.Add(ref destinationBase, (IntPtr)((uint)destination.Length / 2u)); + + ref Vector256 backgroundBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(background)); + ref Vector256 sourceBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(source)); + ref float amountBase = ref MemoryMarshal.GetReference(amount); + + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) + { + // TODO: It would be better if we can clamp this outside of the loop using our SIMD methods. + Vector256 opacity = Vector256.Create(Numerics.Clamp(amountBase, 0, 1F)); + + destinationBase = PorterDuffFunctions.AddDestOut(backgroundBase, sourceBase, opacity); + destinationBase = ref Unsafe.Add(ref destinationBase, 1); + backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); + sourceBase = ref Unsafe.Add(ref sourceBase, 1); + amountBase = ref Unsafe.Add(ref amountBase, 1); + } + + if (Numerics.Modulo2(destination.Length) != 0) + { + // Vector4 fits neatly in pairs. Any overlap has to be equal to 1. + int i = destination.Length - 1; + destination[i] = PorterDuffFunctions.AddDestOut(background[i], source[i], Numerics.Clamp(amount[i], 0, 1F)); + } + } + else { - destination[i] = PorterDuffFunctions.AddDestOut(background[i], source[i], Numerics.Clamp(amount[i], 0, 1)); + for (int i = 0; i < destination.Length; i++) + { + destination[i] = PorterDuffFunctions.AddDestOut(background[i], source[i], Numerics.Clamp(amount[i], 0, 1F)); + } } } } @@ -3235,18 +8363,79 @@ internal static class DefaultPixelBlenders protected override void BlendFunction(Span destination, ReadOnlySpan background, ReadOnlySpan source, float amount) { amount = Numerics.Clamp(amount, 0, 1); - for (int i = 0; i < destination.Length; i++) + + if (Avx2.IsSupported && destination.Length >= 2) + { + // Divide by 2 as 4 elements per Vector4 and 8 per Vector256 + ref Vector256 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); + ref Vector256 destinationLast = ref Unsafe.Add(ref destinationBase, (IntPtr)((uint)destination.Length / 2u)); + + ref Vector256 backgroundBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(background)); + ref Vector256 sourceBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(source)); + Vector256 opacity = Vector256.Create(amount); + + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) + { + destinationBase = PorterDuffFunctions.SubtractDestOut(backgroundBase, sourceBase, opacity); + destinationBase = ref Unsafe.Add(ref destinationBase, 1); + backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); + sourceBase = ref Unsafe.Add(ref sourceBase, 1); + } + + if (Numerics.Modulo2(destination.Length) != 0) + { + // Vector4 fits neatly in pairs. Any overlap has to be equal to 1. + int i = destination.Length - 1; + destination[i] = PorterDuffFunctions.SubtractDestOut(background[i], source[i], amount); + } + } + else { - destination[i] = PorterDuffFunctions.SubtractDestOut(background[i], source[i], amount); + for (int i = 0; i < destination.Length; i++) + { + destination[i] = PorterDuffFunctions.SubtractDestOut(background[i], source[i], amount); + } } } /// protected override void BlendFunction(Span destination, ReadOnlySpan background, ReadOnlySpan source, ReadOnlySpan amount) { - for (int i = 0; i < destination.Length; i++) + if (Avx2.IsSupported && destination.Length >= 2) + { + // Divide by 2 as 4 elements per Vector4 and 8 per Vector256 + ref Vector256 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); + ref Vector256 destinationLast = ref Unsafe.Add(ref destinationBase, (IntPtr)((uint)destination.Length / 2u)); + + ref Vector256 backgroundBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(background)); + ref Vector256 sourceBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(source)); + ref float amountBase = ref MemoryMarshal.GetReference(amount); + + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) + { + // TODO: It would be better if we can clamp this outside of the loop using our SIMD methods. + Vector256 opacity = Vector256.Create(Numerics.Clamp(amountBase, 0, 1F)); + + destinationBase = PorterDuffFunctions.SubtractDestOut(backgroundBase, sourceBase, opacity); + destinationBase = ref Unsafe.Add(ref destinationBase, 1); + backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); + sourceBase = ref Unsafe.Add(ref sourceBase, 1); + amountBase = ref Unsafe.Add(ref amountBase, 1); + } + + if (Numerics.Modulo2(destination.Length) != 0) + { + // Vector4 fits neatly in pairs. Any overlap has to be equal to 1. + int i = destination.Length - 1; + destination[i] = PorterDuffFunctions.SubtractDestOut(background[i], source[i], Numerics.Clamp(amount[i], 0, 1F)); + } + } + else { - destination[i] = PorterDuffFunctions.SubtractDestOut(background[i], source[i], Numerics.Clamp(amount[i], 0, 1)); + for (int i = 0; i < destination.Length; i++) + { + destination[i] = PorterDuffFunctions.SubtractDestOut(background[i], source[i], Numerics.Clamp(amount[i], 0, 1F)); + } } } } @@ -3273,18 +8462,79 @@ internal static class DefaultPixelBlenders protected override void BlendFunction(Span destination, ReadOnlySpan background, ReadOnlySpan source, float amount) { amount = Numerics.Clamp(amount, 0, 1); - for (int i = 0; i < destination.Length; i++) + + if (Avx2.IsSupported && destination.Length >= 2) + { + // Divide by 2 as 4 elements per Vector4 and 8 per Vector256 + ref Vector256 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); + ref Vector256 destinationLast = ref Unsafe.Add(ref destinationBase, (IntPtr)((uint)destination.Length / 2u)); + + ref Vector256 backgroundBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(background)); + ref Vector256 sourceBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(source)); + Vector256 opacity = Vector256.Create(amount); + + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) + { + destinationBase = PorterDuffFunctions.ScreenDestOut(backgroundBase, sourceBase, opacity); + destinationBase = ref Unsafe.Add(ref destinationBase, 1); + backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); + sourceBase = ref Unsafe.Add(ref sourceBase, 1); + } + + if (Numerics.Modulo2(destination.Length) != 0) + { + // Vector4 fits neatly in pairs. Any overlap has to be equal to 1. + int i = destination.Length - 1; + destination[i] = PorterDuffFunctions.ScreenDestOut(background[i], source[i], amount); + } + } + else { - destination[i] = PorterDuffFunctions.ScreenDestOut(background[i], source[i], amount); + for (int i = 0; i < destination.Length; i++) + { + destination[i] = PorterDuffFunctions.ScreenDestOut(background[i], source[i], amount); + } } } /// protected override void BlendFunction(Span destination, ReadOnlySpan background, ReadOnlySpan source, ReadOnlySpan amount) { - for (int i = 0; i < destination.Length; i++) + if (Avx2.IsSupported && destination.Length >= 2) + { + // Divide by 2 as 4 elements per Vector4 and 8 per Vector256 + ref Vector256 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); + ref Vector256 destinationLast = ref Unsafe.Add(ref destinationBase, (IntPtr)((uint)destination.Length / 2u)); + + ref Vector256 backgroundBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(background)); + ref Vector256 sourceBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(source)); + ref float amountBase = ref MemoryMarshal.GetReference(amount); + + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) + { + // TODO: It would be better if we can clamp this outside of the loop using our SIMD methods. + Vector256 opacity = Vector256.Create(Numerics.Clamp(amountBase, 0, 1F)); + + destinationBase = PorterDuffFunctions.ScreenDestOut(backgroundBase, sourceBase, opacity); + destinationBase = ref Unsafe.Add(ref destinationBase, 1); + backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); + sourceBase = ref Unsafe.Add(ref sourceBase, 1); + amountBase = ref Unsafe.Add(ref amountBase, 1); + } + + if (Numerics.Modulo2(destination.Length) != 0) + { + // Vector4 fits neatly in pairs. Any overlap has to be equal to 1. + int i = destination.Length - 1; + destination[i] = PorterDuffFunctions.ScreenDestOut(background[i], source[i], Numerics.Clamp(amount[i], 0, 1F)); + } + } + else { - destination[i] = PorterDuffFunctions.ScreenDestOut(background[i], source[i], Numerics.Clamp(amount[i], 0, 1)); + for (int i = 0; i < destination.Length; i++) + { + destination[i] = PorterDuffFunctions.ScreenDestOut(background[i], source[i], Numerics.Clamp(amount[i], 0, 1F)); + } } } } @@ -3311,18 +8561,79 @@ internal static class DefaultPixelBlenders protected override void BlendFunction(Span destination, ReadOnlySpan background, ReadOnlySpan source, float amount) { amount = Numerics.Clamp(amount, 0, 1); - for (int i = 0; i < destination.Length; i++) + + if (Avx2.IsSupported && destination.Length >= 2) + { + // Divide by 2 as 4 elements per Vector4 and 8 per Vector256 + ref Vector256 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); + ref Vector256 destinationLast = ref Unsafe.Add(ref destinationBase, (IntPtr)((uint)destination.Length / 2u)); + + ref Vector256 backgroundBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(background)); + ref Vector256 sourceBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(source)); + Vector256 opacity = Vector256.Create(amount); + + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) + { + destinationBase = PorterDuffFunctions.DarkenDestOut(backgroundBase, sourceBase, opacity); + destinationBase = ref Unsafe.Add(ref destinationBase, 1); + backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); + sourceBase = ref Unsafe.Add(ref sourceBase, 1); + } + + if (Numerics.Modulo2(destination.Length) != 0) + { + // Vector4 fits neatly in pairs. Any overlap has to be equal to 1. + int i = destination.Length - 1; + destination[i] = PorterDuffFunctions.DarkenDestOut(background[i], source[i], amount); + } + } + else { - destination[i] = PorterDuffFunctions.DarkenDestOut(background[i], source[i], amount); + for (int i = 0; i < destination.Length; i++) + { + destination[i] = PorterDuffFunctions.DarkenDestOut(background[i], source[i], amount); + } } } /// protected override void BlendFunction(Span destination, ReadOnlySpan background, ReadOnlySpan source, ReadOnlySpan amount) { - for (int i = 0; i < destination.Length; i++) + if (Avx2.IsSupported && destination.Length >= 2) + { + // Divide by 2 as 4 elements per Vector4 and 8 per Vector256 + ref Vector256 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); + ref Vector256 destinationLast = ref Unsafe.Add(ref destinationBase, (IntPtr)((uint)destination.Length / 2u)); + + ref Vector256 backgroundBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(background)); + ref Vector256 sourceBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(source)); + ref float amountBase = ref MemoryMarshal.GetReference(amount); + + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) + { + // TODO: It would be better if we can clamp this outside of the loop using our SIMD methods. + Vector256 opacity = Vector256.Create(Numerics.Clamp(amountBase, 0, 1F)); + + destinationBase = PorterDuffFunctions.DarkenDestOut(backgroundBase, sourceBase, opacity); + destinationBase = ref Unsafe.Add(ref destinationBase, 1); + backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); + sourceBase = ref Unsafe.Add(ref sourceBase, 1); + amountBase = ref Unsafe.Add(ref amountBase, 1); + } + + if (Numerics.Modulo2(destination.Length) != 0) + { + // Vector4 fits neatly in pairs. Any overlap has to be equal to 1. + int i = destination.Length - 1; + destination[i] = PorterDuffFunctions.DarkenDestOut(background[i], source[i], Numerics.Clamp(amount[i], 0, 1F)); + } + } + else { - destination[i] = PorterDuffFunctions.DarkenDestOut(background[i], source[i], Numerics.Clamp(amount[i], 0, 1)); + for (int i = 0; i < destination.Length; i++) + { + destination[i] = PorterDuffFunctions.DarkenDestOut(background[i], source[i], Numerics.Clamp(amount[i], 0, 1F)); + } } } } @@ -3349,18 +8660,79 @@ internal static class DefaultPixelBlenders protected override void BlendFunction(Span destination, ReadOnlySpan background, ReadOnlySpan source, float amount) { amount = Numerics.Clamp(amount, 0, 1); - for (int i = 0; i < destination.Length; i++) + + if (Avx2.IsSupported && destination.Length >= 2) + { + // Divide by 2 as 4 elements per Vector4 and 8 per Vector256 + ref Vector256 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); + ref Vector256 destinationLast = ref Unsafe.Add(ref destinationBase, (IntPtr)((uint)destination.Length / 2u)); + + ref Vector256 backgroundBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(background)); + ref Vector256 sourceBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(source)); + Vector256 opacity = Vector256.Create(amount); + + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) + { + destinationBase = PorterDuffFunctions.LightenDestOut(backgroundBase, sourceBase, opacity); + destinationBase = ref Unsafe.Add(ref destinationBase, 1); + backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); + sourceBase = ref Unsafe.Add(ref sourceBase, 1); + } + + if (Numerics.Modulo2(destination.Length) != 0) + { + // Vector4 fits neatly in pairs. Any overlap has to be equal to 1. + int i = destination.Length - 1; + destination[i] = PorterDuffFunctions.LightenDestOut(background[i], source[i], amount); + } + } + else { - destination[i] = PorterDuffFunctions.LightenDestOut(background[i], source[i], amount); + for (int i = 0; i < destination.Length; i++) + { + destination[i] = PorterDuffFunctions.LightenDestOut(background[i], source[i], amount); + } } } /// protected override void BlendFunction(Span destination, ReadOnlySpan background, ReadOnlySpan source, ReadOnlySpan amount) { - for (int i = 0; i < destination.Length; i++) + if (Avx2.IsSupported && destination.Length >= 2) + { + // Divide by 2 as 4 elements per Vector4 and 8 per Vector256 + ref Vector256 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); + ref Vector256 destinationLast = ref Unsafe.Add(ref destinationBase, (IntPtr)((uint)destination.Length / 2u)); + + ref Vector256 backgroundBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(background)); + ref Vector256 sourceBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(source)); + ref float amountBase = ref MemoryMarshal.GetReference(amount); + + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) + { + // TODO: It would be better if we can clamp this outside of the loop using our SIMD methods. + Vector256 opacity = Vector256.Create(Numerics.Clamp(amountBase, 0, 1F)); + + destinationBase = PorterDuffFunctions.LightenDestOut(backgroundBase, sourceBase, opacity); + destinationBase = ref Unsafe.Add(ref destinationBase, 1); + backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); + sourceBase = ref Unsafe.Add(ref sourceBase, 1); + amountBase = ref Unsafe.Add(ref amountBase, 1); + } + + if (Numerics.Modulo2(destination.Length) != 0) + { + // Vector4 fits neatly in pairs. Any overlap has to be equal to 1. + int i = destination.Length - 1; + destination[i] = PorterDuffFunctions.LightenDestOut(background[i], source[i], Numerics.Clamp(amount[i], 0, 1F)); + } + } + else { - destination[i] = PorterDuffFunctions.LightenDestOut(background[i], source[i], Numerics.Clamp(amount[i], 0, 1)); + for (int i = 0; i < destination.Length; i++) + { + destination[i] = PorterDuffFunctions.LightenDestOut(background[i], source[i], Numerics.Clamp(amount[i], 0, 1F)); + } } } } @@ -3387,18 +8759,79 @@ internal static class DefaultPixelBlenders protected override void BlendFunction(Span destination, ReadOnlySpan background, ReadOnlySpan source, float amount) { amount = Numerics.Clamp(amount, 0, 1); - for (int i = 0; i < destination.Length; i++) + + if (Avx2.IsSupported && destination.Length >= 2) + { + // Divide by 2 as 4 elements per Vector4 and 8 per Vector256 + ref Vector256 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); + ref Vector256 destinationLast = ref Unsafe.Add(ref destinationBase, (IntPtr)((uint)destination.Length / 2u)); + + ref Vector256 backgroundBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(background)); + ref Vector256 sourceBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(source)); + Vector256 opacity = Vector256.Create(amount); + + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) + { + destinationBase = PorterDuffFunctions.OverlayDestOut(backgroundBase, sourceBase, opacity); + destinationBase = ref Unsafe.Add(ref destinationBase, 1); + backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); + sourceBase = ref Unsafe.Add(ref sourceBase, 1); + } + + if (Numerics.Modulo2(destination.Length) != 0) + { + // Vector4 fits neatly in pairs. Any overlap has to be equal to 1. + int i = destination.Length - 1; + destination[i] = PorterDuffFunctions.OverlayDestOut(background[i], source[i], amount); + } + } + else { - destination[i] = PorterDuffFunctions.OverlayDestOut(background[i], source[i], amount); + for (int i = 0; i < destination.Length; i++) + { + destination[i] = PorterDuffFunctions.OverlayDestOut(background[i], source[i], amount); + } } } /// protected override void BlendFunction(Span destination, ReadOnlySpan background, ReadOnlySpan source, ReadOnlySpan amount) { - for (int i = 0; i < destination.Length; i++) + if (Avx2.IsSupported && destination.Length >= 2) + { + // Divide by 2 as 4 elements per Vector4 and 8 per Vector256 + ref Vector256 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); + ref Vector256 destinationLast = ref Unsafe.Add(ref destinationBase, (IntPtr)((uint)destination.Length / 2u)); + + ref Vector256 backgroundBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(background)); + ref Vector256 sourceBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(source)); + ref float amountBase = ref MemoryMarshal.GetReference(amount); + + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) + { + // TODO: It would be better if we can clamp this outside of the loop using our SIMD methods. + Vector256 opacity = Vector256.Create(Numerics.Clamp(amountBase, 0, 1F)); + + destinationBase = PorterDuffFunctions.OverlayDestOut(backgroundBase, sourceBase, opacity); + destinationBase = ref Unsafe.Add(ref destinationBase, 1); + backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); + sourceBase = ref Unsafe.Add(ref sourceBase, 1); + amountBase = ref Unsafe.Add(ref amountBase, 1); + } + + if (Numerics.Modulo2(destination.Length) != 0) + { + // Vector4 fits neatly in pairs. Any overlap has to be equal to 1. + int i = destination.Length - 1; + destination[i] = PorterDuffFunctions.OverlayDestOut(background[i], source[i], Numerics.Clamp(amount[i], 0, 1F)); + } + } + else { - destination[i] = PorterDuffFunctions.OverlayDestOut(background[i], source[i], Numerics.Clamp(amount[i], 0, 1)); + for (int i = 0; i < destination.Length; i++) + { + destination[i] = PorterDuffFunctions.OverlayDestOut(background[i], source[i], Numerics.Clamp(amount[i], 0, 1F)); + } } } } @@ -3425,18 +8858,79 @@ internal static class DefaultPixelBlenders protected override void BlendFunction(Span destination, ReadOnlySpan background, ReadOnlySpan source, float amount) { amount = Numerics.Clamp(amount, 0, 1); - for (int i = 0; i < destination.Length; i++) + + if (Avx2.IsSupported && destination.Length >= 2) + { + // Divide by 2 as 4 elements per Vector4 and 8 per Vector256 + ref Vector256 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); + ref Vector256 destinationLast = ref Unsafe.Add(ref destinationBase, (IntPtr)((uint)destination.Length / 2u)); + + ref Vector256 backgroundBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(background)); + ref Vector256 sourceBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(source)); + Vector256 opacity = Vector256.Create(amount); + + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) + { + destinationBase = PorterDuffFunctions.HardLightDestOut(backgroundBase, sourceBase, opacity); + destinationBase = ref Unsafe.Add(ref destinationBase, 1); + backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); + sourceBase = ref Unsafe.Add(ref sourceBase, 1); + } + + if (Numerics.Modulo2(destination.Length) != 0) + { + // Vector4 fits neatly in pairs. Any overlap has to be equal to 1. + int i = destination.Length - 1; + destination[i] = PorterDuffFunctions.HardLightDestOut(background[i], source[i], amount); + } + } + else { - destination[i] = PorterDuffFunctions.HardLightDestOut(background[i], source[i], amount); + for (int i = 0; i < destination.Length; i++) + { + destination[i] = PorterDuffFunctions.HardLightDestOut(background[i], source[i], amount); + } } } /// protected override void BlendFunction(Span destination, ReadOnlySpan background, ReadOnlySpan source, ReadOnlySpan amount) { - for (int i = 0; i < destination.Length; i++) + if (Avx2.IsSupported && destination.Length >= 2) + { + // Divide by 2 as 4 elements per Vector4 and 8 per Vector256 + ref Vector256 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); + ref Vector256 destinationLast = ref Unsafe.Add(ref destinationBase, (IntPtr)((uint)destination.Length / 2u)); + + ref Vector256 backgroundBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(background)); + ref Vector256 sourceBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(source)); + ref float amountBase = ref MemoryMarshal.GetReference(amount); + + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) + { + // TODO: It would be better if we can clamp this outside of the loop using our SIMD methods. + Vector256 opacity = Vector256.Create(Numerics.Clamp(amountBase, 0, 1F)); + + destinationBase = PorterDuffFunctions.HardLightDestOut(backgroundBase, sourceBase, opacity); + destinationBase = ref Unsafe.Add(ref destinationBase, 1); + backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); + sourceBase = ref Unsafe.Add(ref sourceBase, 1); + amountBase = ref Unsafe.Add(ref amountBase, 1); + } + + if (Numerics.Modulo2(destination.Length) != 0) + { + // Vector4 fits neatly in pairs. Any overlap has to be equal to 1. + int i = destination.Length - 1; + destination[i] = PorterDuffFunctions.HardLightDestOut(background[i], source[i], Numerics.Clamp(amount[i], 0, 1F)); + } + } + else { - destination[i] = PorterDuffFunctions.HardLightDestOut(background[i], source[i], Numerics.Clamp(amount[i], 0, 1)); + for (int i = 0; i < destination.Length; i++) + { + destination[i] = PorterDuffFunctions.HardLightDestOut(background[i], source[i], Numerics.Clamp(amount[i], 0, 1F)); + } } } } @@ -3463,18 +8957,79 @@ internal static class DefaultPixelBlenders protected override void BlendFunction(Span destination, ReadOnlySpan background, ReadOnlySpan source, float amount) { amount = Numerics.Clamp(amount, 0, 1); - for (int i = 0; i < destination.Length; i++) + + if (Avx2.IsSupported && destination.Length >= 2) + { + // Divide by 2 as 4 elements per Vector4 and 8 per Vector256 + ref Vector256 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); + ref Vector256 destinationLast = ref Unsafe.Add(ref destinationBase, (IntPtr)((uint)destination.Length / 2u)); + + ref Vector256 backgroundBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(background)); + ref Vector256 sourceBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(source)); + Vector256 opacity = Vector256.Create(amount); + + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) + { + destinationBase = PorterDuffFunctions.NormalClear(backgroundBase, sourceBase, opacity); + destinationBase = ref Unsafe.Add(ref destinationBase, 1); + backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); + sourceBase = ref Unsafe.Add(ref sourceBase, 1); + } + + if (Numerics.Modulo2(destination.Length) != 0) + { + // Vector4 fits neatly in pairs. Any overlap has to be equal to 1. + int i = destination.Length - 1; + destination[i] = PorterDuffFunctions.NormalClear(background[i], source[i], amount); + } + } + else { - destination[i] = PorterDuffFunctions.NormalClear(background[i], source[i], amount); + for (int i = 0; i < destination.Length; i++) + { + destination[i] = PorterDuffFunctions.NormalClear(background[i], source[i], amount); + } } } /// protected override void BlendFunction(Span destination, ReadOnlySpan background, ReadOnlySpan source, ReadOnlySpan amount) { - for (int i = 0; i < destination.Length; i++) + if (Avx2.IsSupported && destination.Length >= 2) + { + // Divide by 2 as 4 elements per Vector4 and 8 per Vector256 + ref Vector256 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); + ref Vector256 destinationLast = ref Unsafe.Add(ref destinationBase, (IntPtr)((uint)destination.Length / 2u)); + + ref Vector256 backgroundBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(background)); + ref Vector256 sourceBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(source)); + ref float amountBase = ref MemoryMarshal.GetReference(amount); + + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) + { + // TODO: It would be better if we can clamp this outside of the loop using our SIMD methods. + Vector256 opacity = Vector256.Create(Numerics.Clamp(amountBase, 0, 1F)); + + destinationBase = PorterDuffFunctions.NormalClear(backgroundBase, sourceBase, opacity); + destinationBase = ref Unsafe.Add(ref destinationBase, 1); + backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); + sourceBase = ref Unsafe.Add(ref sourceBase, 1); + amountBase = ref Unsafe.Add(ref amountBase, 1); + } + + if (Numerics.Modulo2(destination.Length) != 0) + { + // Vector4 fits neatly in pairs. Any overlap has to be equal to 1. + int i = destination.Length - 1; + destination[i] = PorterDuffFunctions.NormalClear(background[i], source[i], Numerics.Clamp(amount[i], 0, 1F)); + } + } + else { - destination[i] = PorterDuffFunctions.NormalClear(background[i], source[i], Numerics.Clamp(amount[i], 0, 1)); + for (int i = 0; i < destination.Length; i++) + { + destination[i] = PorterDuffFunctions.NormalClear(background[i], source[i], Numerics.Clamp(amount[i], 0, 1F)); + } } } } @@ -3501,18 +9056,79 @@ internal static class DefaultPixelBlenders protected override void BlendFunction(Span destination, ReadOnlySpan background, ReadOnlySpan source, float amount) { amount = Numerics.Clamp(amount, 0, 1); - for (int i = 0; i < destination.Length; i++) + + if (Avx2.IsSupported && destination.Length >= 2) + { + // Divide by 2 as 4 elements per Vector4 and 8 per Vector256 + ref Vector256 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); + ref Vector256 destinationLast = ref Unsafe.Add(ref destinationBase, (IntPtr)((uint)destination.Length / 2u)); + + ref Vector256 backgroundBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(background)); + ref Vector256 sourceBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(source)); + Vector256 opacity = Vector256.Create(amount); + + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) + { + destinationBase = PorterDuffFunctions.MultiplyClear(backgroundBase, sourceBase, opacity); + destinationBase = ref Unsafe.Add(ref destinationBase, 1); + backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); + sourceBase = ref Unsafe.Add(ref sourceBase, 1); + } + + if (Numerics.Modulo2(destination.Length) != 0) + { + // Vector4 fits neatly in pairs. Any overlap has to be equal to 1. + int i = destination.Length - 1; + destination[i] = PorterDuffFunctions.MultiplyClear(background[i], source[i], amount); + } + } + else { - destination[i] = PorterDuffFunctions.MultiplyClear(background[i], source[i], amount); + for (int i = 0; i < destination.Length; i++) + { + destination[i] = PorterDuffFunctions.MultiplyClear(background[i], source[i], amount); + } } } /// protected override void BlendFunction(Span destination, ReadOnlySpan background, ReadOnlySpan source, ReadOnlySpan amount) { - for (int i = 0; i < destination.Length; i++) + if (Avx2.IsSupported && destination.Length >= 2) + { + // Divide by 2 as 4 elements per Vector4 and 8 per Vector256 + ref Vector256 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); + ref Vector256 destinationLast = ref Unsafe.Add(ref destinationBase, (IntPtr)((uint)destination.Length / 2u)); + + ref Vector256 backgroundBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(background)); + ref Vector256 sourceBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(source)); + ref float amountBase = ref MemoryMarshal.GetReference(amount); + + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) + { + // TODO: It would be better if we can clamp this outside of the loop using our SIMD methods. + Vector256 opacity = Vector256.Create(Numerics.Clamp(amountBase, 0, 1F)); + + destinationBase = PorterDuffFunctions.MultiplyClear(backgroundBase, sourceBase, opacity); + destinationBase = ref Unsafe.Add(ref destinationBase, 1); + backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); + sourceBase = ref Unsafe.Add(ref sourceBase, 1); + amountBase = ref Unsafe.Add(ref amountBase, 1); + } + + if (Numerics.Modulo2(destination.Length) != 0) + { + // Vector4 fits neatly in pairs. Any overlap has to be equal to 1. + int i = destination.Length - 1; + destination[i] = PorterDuffFunctions.MultiplyClear(background[i], source[i], Numerics.Clamp(amount[i], 0, 1F)); + } + } + else { - destination[i] = PorterDuffFunctions.MultiplyClear(background[i], source[i], Numerics.Clamp(amount[i], 0, 1)); + for (int i = 0; i < destination.Length; i++) + { + destination[i] = PorterDuffFunctions.MultiplyClear(background[i], source[i], Numerics.Clamp(amount[i], 0, 1F)); + } } } } @@ -3539,18 +9155,79 @@ internal static class DefaultPixelBlenders protected override void BlendFunction(Span destination, ReadOnlySpan background, ReadOnlySpan source, float amount) { amount = Numerics.Clamp(amount, 0, 1); - for (int i = 0; i < destination.Length; i++) + + if (Avx2.IsSupported && destination.Length >= 2) + { + // Divide by 2 as 4 elements per Vector4 and 8 per Vector256 + ref Vector256 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); + ref Vector256 destinationLast = ref Unsafe.Add(ref destinationBase, (IntPtr)((uint)destination.Length / 2u)); + + ref Vector256 backgroundBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(background)); + ref Vector256 sourceBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(source)); + Vector256 opacity = Vector256.Create(amount); + + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) + { + destinationBase = PorterDuffFunctions.AddClear(backgroundBase, sourceBase, opacity); + destinationBase = ref Unsafe.Add(ref destinationBase, 1); + backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); + sourceBase = ref Unsafe.Add(ref sourceBase, 1); + } + + if (Numerics.Modulo2(destination.Length) != 0) + { + // Vector4 fits neatly in pairs. Any overlap has to be equal to 1. + int i = destination.Length - 1; + destination[i] = PorterDuffFunctions.AddClear(background[i], source[i], amount); + } + } + else { - destination[i] = PorterDuffFunctions.AddClear(background[i], source[i], amount); + for (int i = 0; i < destination.Length; i++) + { + destination[i] = PorterDuffFunctions.AddClear(background[i], source[i], amount); + } } } /// protected override void BlendFunction(Span destination, ReadOnlySpan background, ReadOnlySpan source, ReadOnlySpan amount) { - for (int i = 0; i < destination.Length; i++) + if (Avx2.IsSupported && destination.Length >= 2) + { + // Divide by 2 as 4 elements per Vector4 and 8 per Vector256 + ref Vector256 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); + ref Vector256 destinationLast = ref Unsafe.Add(ref destinationBase, (IntPtr)((uint)destination.Length / 2u)); + + ref Vector256 backgroundBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(background)); + ref Vector256 sourceBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(source)); + ref float amountBase = ref MemoryMarshal.GetReference(amount); + + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) + { + // TODO: It would be better if we can clamp this outside of the loop using our SIMD methods. + Vector256 opacity = Vector256.Create(Numerics.Clamp(amountBase, 0, 1F)); + + destinationBase = PorterDuffFunctions.AddClear(backgroundBase, sourceBase, opacity); + destinationBase = ref Unsafe.Add(ref destinationBase, 1); + backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); + sourceBase = ref Unsafe.Add(ref sourceBase, 1); + amountBase = ref Unsafe.Add(ref amountBase, 1); + } + + if (Numerics.Modulo2(destination.Length) != 0) + { + // Vector4 fits neatly in pairs. Any overlap has to be equal to 1. + int i = destination.Length - 1; + destination[i] = PorterDuffFunctions.AddClear(background[i], source[i], Numerics.Clamp(amount[i], 0, 1F)); + } + } + else { - destination[i] = PorterDuffFunctions.AddClear(background[i], source[i], Numerics.Clamp(amount[i], 0, 1)); + for (int i = 0; i < destination.Length; i++) + { + destination[i] = PorterDuffFunctions.AddClear(background[i], source[i], Numerics.Clamp(amount[i], 0, 1F)); + } } } } @@ -3577,18 +9254,79 @@ internal static class DefaultPixelBlenders protected override void BlendFunction(Span destination, ReadOnlySpan background, ReadOnlySpan source, float amount) { amount = Numerics.Clamp(amount, 0, 1); - for (int i = 0; i < destination.Length; i++) + + if (Avx2.IsSupported && destination.Length >= 2) + { + // Divide by 2 as 4 elements per Vector4 and 8 per Vector256 + ref Vector256 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); + ref Vector256 destinationLast = ref Unsafe.Add(ref destinationBase, (IntPtr)((uint)destination.Length / 2u)); + + ref Vector256 backgroundBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(background)); + ref Vector256 sourceBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(source)); + Vector256 opacity = Vector256.Create(amount); + + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) + { + destinationBase = PorterDuffFunctions.SubtractClear(backgroundBase, sourceBase, opacity); + destinationBase = ref Unsafe.Add(ref destinationBase, 1); + backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); + sourceBase = ref Unsafe.Add(ref sourceBase, 1); + } + + if (Numerics.Modulo2(destination.Length) != 0) + { + // Vector4 fits neatly in pairs. Any overlap has to be equal to 1. + int i = destination.Length - 1; + destination[i] = PorterDuffFunctions.SubtractClear(background[i], source[i], amount); + } + } + else { - destination[i] = PorterDuffFunctions.SubtractClear(background[i], source[i], amount); + for (int i = 0; i < destination.Length; i++) + { + destination[i] = PorterDuffFunctions.SubtractClear(background[i], source[i], amount); + } } } /// protected override void BlendFunction(Span destination, ReadOnlySpan background, ReadOnlySpan source, ReadOnlySpan amount) { - for (int i = 0; i < destination.Length; i++) + if (Avx2.IsSupported && destination.Length >= 2) + { + // Divide by 2 as 4 elements per Vector4 and 8 per Vector256 + ref Vector256 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); + ref Vector256 destinationLast = ref Unsafe.Add(ref destinationBase, (IntPtr)((uint)destination.Length / 2u)); + + ref Vector256 backgroundBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(background)); + ref Vector256 sourceBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(source)); + ref float amountBase = ref MemoryMarshal.GetReference(amount); + + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) + { + // TODO: It would be better if we can clamp this outside of the loop using our SIMD methods. + Vector256 opacity = Vector256.Create(Numerics.Clamp(amountBase, 0, 1F)); + + destinationBase = PorterDuffFunctions.SubtractClear(backgroundBase, sourceBase, opacity); + destinationBase = ref Unsafe.Add(ref destinationBase, 1); + backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); + sourceBase = ref Unsafe.Add(ref sourceBase, 1); + amountBase = ref Unsafe.Add(ref amountBase, 1); + } + + if (Numerics.Modulo2(destination.Length) != 0) + { + // Vector4 fits neatly in pairs. Any overlap has to be equal to 1. + int i = destination.Length - 1; + destination[i] = PorterDuffFunctions.SubtractClear(background[i], source[i], Numerics.Clamp(amount[i], 0, 1F)); + } + } + else { - destination[i] = PorterDuffFunctions.SubtractClear(background[i], source[i], Numerics.Clamp(amount[i], 0, 1)); + for (int i = 0; i < destination.Length; i++) + { + destination[i] = PorterDuffFunctions.SubtractClear(background[i], source[i], Numerics.Clamp(amount[i], 0, 1F)); + } } } } @@ -3615,18 +9353,79 @@ internal static class DefaultPixelBlenders protected override void BlendFunction(Span destination, ReadOnlySpan background, ReadOnlySpan source, float amount) { amount = Numerics.Clamp(amount, 0, 1); - for (int i = 0; i < destination.Length; i++) + + if (Avx2.IsSupported && destination.Length >= 2) + { + // Divide by 2 as 4 elements per Vector4 and 8 per Vector256 + ref Vector256 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); + ref Vector256 destinationLast = ref Unsafe.Add(ref destinationBase, (IntPtr)((uint)destination.Length / 2u)); + + ref Vector256 backgroundBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(background)); + ref Vector256 sourceBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(source)); + Vector256 opacity = Vector256.Create(amount); + + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) + { + destinationBase = PorterDuffFunctions.ScreenClear(backgroundBase, sourceBase, opacity); + destinationBase = ref Unsafe.Add(ref destinationBase, 1); + backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); + sourceBase = ref Unsafe.Add(ref sourceBase, 1); + } + + if (Numerics.Modulo2(destination.Length) != 0) + { + // Vector4 fits neatly in pairs. Any overlap has to be equal to 1. + int i = destination.Length - 1; + destination[i] = PorterDuffFunctions.ScreenClear(background[i], source[i], amount); + } + } + else { - destination[i] = PorterDuffFunctions.ScreenClear(background[i], source[i], amount); + for (int i = 0; i < destination.Length; i++) + { + destination[i] = PorterDuffFunctions.ScreenClear(background[i], source[i], amount); + } } } /// protected override void BlendFunction(Span destination, ReadOnlySpan background, ReadOnlySpan source, ReadOnlySpan amount) { - for (int i = 0; i < destination.Length; i++) + if (Avx2.IsSupported && destination.Length >= 2) + { + // Divide by 2 as 4 elements per Vector4 and 8 per Vector256 + ref Vector256 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); + ref Vector256 destinationLast = ref Unsafe.Add(ref destinationBase, (IntPtr)((uint)destination.Length / 2u)); + + ref Vector256 backgroundBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(background)); + ref Vector256 sourceBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(source)); + ref float amountBase = ref MemoryMarshal.GetReference(amount); + + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) + { + // TODO: It would be better if we can clamp this outside of the loop using our SIMD methods. + Vector256 opacity = Vector256.Create(Numerics.Clamp(amountBase, 0, 1F)); + + destinationBase = PorterDuffFunctions.ScreenClear(backgroundBase, sourceBase, opacity); + destinationBase = ref Unsafe.Add(ref destinationBase, 1); + backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); + sourceBase = ref Unsafe.Add(ref sourceBase, 1); + amountBase = ref Unsafe.Add(ref amountBase, 1); + } + + if (Numerics.Modulo2(destination.Length) != 0) + { + // Vector4 fits neatly in pairs. Any overlap has to be equal to 1. + int i = destination.Length - 1; + destination[i] = PorterDuffFunctions.ScreenClear(background[i], source[i], Numerics.Clamp(amount[i], 0, 1F)); + } + } + else { - destination[i] = PorterDuffFunctions.ScreenClear(background[i], source[i], Numerics.Clamp(amount[i], 0, 1)); + for (int i = 0; i < destination.Length; i++) + { + destination[i] = PorterDuffFunctions.ScreenClear(background[i], source[i], Numerics.Clamp(amount[i], 0, 1F)); + } } } } @@ -3653,18 +9452,79 @@ internal static class DefaultPixelBlenders protected override void BlendFunction(Span destination, ReadOnlySpan background, ReadOnlySpan source, float amount) { amount = Numerics.Clamp(amount, 0, 1); - for (int i = 0; i < destination.Length; i++) + + if (Avx2.IsSupported && destination.Length >= 2) + { + // Divide by 2 as 4 elements per Vector4 and 8 per Vector256 + ref Vector256 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); + ref Vector256 destinationLast = ref Unsafe.Add(ref destinationBase, (IntPtr)((uint)destination.Length / 2u)); + + ref Vector256 backgroundBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(background)); + ref Vector256 sourceBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(source)); + Vector256 opacity = Vector256.Create(amount); + + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) + { + destinationBase = PorterDuffFunctions.DarkenClear(backgroundBase, sourceBase, opacity); + destinationBase = ref Unsafe.Add(ref destinationBase, 1); + backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); + sourceBase = ref Unsafe.Add(ref sourceBase, 1); + } + + if (Numerics.Modulo2(destination.Length) != 0) + { + // Vector4 fits neatly in pairs. Any overlap has to be equal to 1. + int i = destination.Length - 1; + destination[i] = PorterDuffFunctions.DarkenClear(background[i], source[i], amount); + } + } + else { - destination[i] = PorterDuffFunctions.DarkenClear(background[i], source[i], amount); + for (int i = 0; i < destination.Length; i++) + { + destination[i] = PorterDuffFunctions.DarkenClear(background[i], source[i], amount); + } } } /// protected override void BlendFunction(Span destination, ReadOnlySpan background, ReadOnlySpan source, ReadOnlySpan amount) { - for (int i = 0; i < destination.Length; i++) + if (Avx2.IsSupported && destination.Length >= 2) + { + // Divide by 2 as 4 elements per Vector4 and 8 per Vector256 + ref Vector256 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); + ref Vector256 destinationLast = ref Unsafe.Add(ref destinationBase, (IntPtr)((uint)destination.Length / 2u)); + + ref Vector256 backgroundBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(background)); + ref Vector256 sourceBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(source)); + ref float amountBase = ref MemoryMarshal.GetReference(amount); + + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) + { + // TODO: It would be better if we can clamp this outside of the loop using our SIMD methods. + Vector256 opacity = Vector256.Create(Numerics.Clamp(amountBase, 0, 1F)); + + destinationBase = PorterDuffFunctions.DarkenClear(backgroundBase, sourceBase, opacity); + destinationBase = ref Unsafe.Add(ref destinationBase, 1); + backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); + sourceBase = ref Unsafe.Add(ref sourceBase, 1); + amountBase = ref Unsafe.Add(ref amountBase, 1); + } + + if (Numerics.Modulo2(destination.Length) != 0) + { + // Vector4 fits neatly in pairs. Any overlap has to be equal to 1. + int i = destination.Length - 1; + destination[i] = PorterDuffFunctions.DarkenClear(background[i], source[i], Numerics.Clamp(amount[i], 0, 1F)); + } + } + else { - destination[i] = PorterDuffFunctions.DarkenClear(background[i], source[i], Numerics.Clamp(amount[i], 0, 1)); + for (int i = 0; i < destination.Length; i++) + { + destination[i] = PorterDuffFunctions.DarkenClear(background[i], source[i], Numerics.Clamp(amount[i], 0, 1F)); + } } } } @@ -3691,18 +9551,79 @@ internal static class DefaultPixelBlenders protected override void BlendFunction(Span destination, ReadOnlySpan background, ReadOnlySpan source, float amount) { amount = Numerics.Clamp(amount, 0, 1); - for (int i = 0; i < destination.Length; i++) + + if (Avx2.IsSupported && destination.Length >= 2) + { + // Divide by 2 as 4 elements per Vector4 and 8 per Vector256 + ref Vector256 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); + ref Vector256 destinationLast = ref Unsafe.Add(ref destinationBase, (IntPtr)((uint)destination.Length / 2u)); + + ref Vector256 backgroundBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(background)); + ref Vector256 sourceBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(source)); + Vector256 opacity = Vector256.Create(amount); + + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) + { + destinationBase = PorterDuffFunctions.LightenClear(backgroundBase, sourceBase, opacity); + destinationBase = ref Unsafe.Add(ref destinationBase, 1); + backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); + sourceBase = ref Unsafe.Add(ref sourceBase, 1); + } + + if (Numerics.Modulo2(destination.Length) != 0) + { + // Vector4 fits neatly in pairs. Any overlap has to be equal to 1. + int i = destination.Length - 1; + destination[i] = PorterDuffFunctions.LightenClear(background[i], source[i], amount); + } + } + else { - destination[i] = PorterDuffFunctions.LightenClear(background[i], source[i], amount); + for (int i = 0; i < destination.Length; i++) + { + destination[i] = PorterDuffFunctions.LightenClear(background[i], source[i], amount); + } } } /// protected override void BlendFunction(Span destination, ReadOnlySpan background, ReadOnlySpan source, ReadOnlySpan amount) { - for (int i = 0; i < destination.Length; i++) + if (Avx2.IsSupported && destination.Length >= 2) + { + // Divide by 2 as 4 elements per Vector4 and 8 per Vector256 + ref Vector256 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); + ref Vector256 destinationLast = ref Unsafe.Add(ref destinationBase, (IntPtr)((uint)destination.Length / 2u)); + + ref Vector256 backgroundBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(background)); + ref Vector256 sourceBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(source)); + ref float amountBase = ref MemoryMarshal.GetReference(amount); + + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) + { + // TODO: It would be better if we can clamp this outside of the loop using our SIMD methods. + Vector256 opacity = Vector256.Create(Numerics.Clamp(amountBase, 0, 1F)); + + destinationBase = PorterDuffFunctions.LightenClear(backgroundBase, sourceBase, opacity); + destinationBase = ref Unsafe.Add(ref destinationBase, 1); + backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); + sourceBase = ref Unsafe.Add(ref sourceBase, 1); + amountBase = ref Unsafe.Add(ref amountBase, 1); + } + + if (Numerics.Modulo2(destination.Length) != 0) + { + // Vector4 fits neatly in pairs. Any overlap has to be equal to 1. + int i = destination.Length - 1; + destination[i] = PorterDuffFunctions.LightenClear(background[i], source[i], Numerics.Clamp(amount[i], 0, 1F)); + } + } + else { - destination[i] = PorterDuffFunctions.LightenClear(background[i], source[i], Numerics.Clamp(amount[i], 0, 1)); + for (int i = 0; i < destination.Length; i++) + { + destination[i] = PorterDuffFunctions.LightenClear(background[i], source[i], Numerics.Clamp(amount[i], 0, 1F)); + } } } } @@ -3729,18 +9650,79 @@ internal static class DefaultPixelBlenders protected override void BlendFunction(Span destination, ReadOnlySpan background, ReadOnlySpan source, float amount) { amount = Numerics.Clamp(amount, 0, 1); - for (int i = 0; i < destination.Length; i++) + + if (Avx2.IsSupported && destination.Length >= 2) + { + // Divide by 2 as 4 elements per Vector4 and 8 per Vector256 + ref Vector256 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); + ref Vector256 destinationLast = ref Unsafe.Add(ref destinationBase, (IntPtr)((uint)destination.Length / 2u)); + + ref Vector256 backgroundBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(background)); + ref Vector256 sourceBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(source)); + Vector256 opacity = Vector256.Create(amount); + + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) + { + destinationBase = PorterDuffFunctions.OverlayClear(backgroundBase, sourceBase, opacity); + destinationBase = ref Unsafe.Add(ref destinationBase, 1); + backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); + sourceBase = ref Unsafe.Add(ref sourceBase, 1); + } + + if (Numerics.Modulo2(destination.Length) != 0) + { + // Vector4 fits neatly in pairs. Any overlap has to be equal to 1. + int i = destination.Length - 1; + destination[i] = PorterDuffFunctions.OverlayClear(background[i], source[i], amount); + } + } + else { - destination[i] = PorterDuffFunctions.OverlayClear(background[i], source[i], amount); + for (int i = 0; i < destination.Length; i++) + { + destination[i] = PorterDuffFunctions.OverlayClear(background[i], source[i], amount); + } } } /// protected override void BlendFunction(Span destination, ReadOnlySpan background, ReadOnlySpan source, ReadOnlySpan amount) { - for (int i = 0; i < destination.Length; i++) + if (Avx2.IsSupported && destination.Length >= 2) + { + // Divide by 2 as 4 elements per Vector4 and 8 per Vector256 + ref Vector256 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); + ref Vector256 destinationLast = ref Unsafe.Add(ref destinationBase, (IntPtr)((uint)destination.Length / 2u)); + + ref Vector256 backgroundBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(background)); + ref Vector256 sourceBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(source)); + ref float amountBase = ref MemoryMarshal.GetReference(amount); + + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) + { + // TODO: It would be better if we can clamp this outside of the loop using our SIMD methods. + Vector256 opacity = Vector256.Create(Numerics.Clamp(amountBase, 0, 1F)); + + destinationBase = PorterDuffFunctions.OverlayClear(backgroundBase, sourceBase, opacity); + destinationBase = ref Unsafe.Add(ref destinationBase, 1); + backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); + sourceBase = ref Unsafe.Add(ref sourceBase, 1); + amountBase = ref Unsafe.Add(ref amountBase, 1); + } + + if (Numerics.Modulo2(destination.Length) != 0) + { + // Vector4 fits neatly in pairs. Any overlap has to be equal to 1. + int i = destination.Length - 1; + destination[i] = PorterDuffFunctions.OverlayClear(background[i], source[i], Numerics.Clamp(amount[i], 0, 1F)); + } + } + else { - destination[i] = PorterDuffFunctions.OverlayClear(background[i], source[i], Numerics.Clamp(amount[i], 0, 1)); + for (int i = 0; i < destination.Length; i++) + { + destination[i] = PorterDuffFunctions.OverlayClear(background[i], source[i], Numerics.Clamp(amount[i], 0, 1F)); + } } } } @@ -3767,18 +9749,79 @@ internal static class DefaultPixelBlenders protected override void BlendFunction(Span destination, ReadOnlySpan background, ReadOnlySpan source, float amount) { amount = Numerics.Clamp(amount, 0, 1); - for (int i = 0; i < destination.Length; i++) + + if (Avx2.IsSupported && destination.Length >= 2) + { + // Divide by 2 as 4 elements per Vector4 and 8 per Vector256 + ref Vector256 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); + ref Vector256 destinationLast = ref Unsafe.Add(ref destinationBase, (IntPtr)((uint)destination.Length / 2u)); + + ref Vector256 backgroundBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(background)); + ref Vector256 sourceBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(source)); + Vector256 opacity = Vector256.Create(amount); + + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) + { + destinationBase = PorterDuffFunctions.HardLightClear(backgroundBase, sourceBase, opacity); + destinationBase = ref Unsafe.Add(ref destinationBase, 1); + backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); + sourceBase = ref Unsafe.Add(ref sourceBase, 1); + } + + if (Numerics.Modulo2(destination.Length) != 0) + { + // Vector4 fits neatly in pairs. Any overlap has to be equal to 1. + int i = destination.Length - 1; + destination[i] = PorterDuffFunctions.HardLightClear(background[i], source[i], amount); + } + } + else { - destination[i] = PorterDuffFunctions.HardLightClear(background[i], source[i], amount); + for (int i = 0; i < destination.Length; i++) + { + destination[i] = PorterDuffFunctions.HardLightClear(background[i], source[i], amount); + } } } /// protected override void BlendFunction(Span destination, ReadOnlySpan background, ReadOnlySpan source, ReadOnlySpan amount) { - for (int i = 0; i < destination.Length; i++) + if (Avx2.IsSupported && destination.Length >= 2) + { + // Divide by 2 as 4 elements per Vector4 and 8 per Vector256 + ref Vector256 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); + ref Vector256 destinationLast = ref Unsafe.Add(ref destinationBase, (IntPtr)((uint)destination.Length / 2u)); + + ref Vector256 backgroundBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(background)); + ref Vector256 sourceBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(source)); + ref float amountBase = ref MemoryMarshal.GetReference(amount); + + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) + { + // TODO: It would be better if we can clamp this outside of the loop using our SIMD methods. + Vector256 opacity = Vector256.Create(Numerics.Clamp(amountBase, 0, 1F)); + + destinationBase = PorterDuffFunctions.HardLightClear(backgroundBase, sourceBase, opacity); + destinationBase = ref Unsafe.Add(ref destinationBase, 1); + backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); + sourceBase = ref Unsafe.Add(ref sourceBase, 1); + amountBase = ref Unsafe.Add(ref amountBase, 1); + } + + if (Numerics.Modulo2(destination.Length) != 0) + { + // Vector4 fits neatly in pairs. Any overlap has to be equal to 1. + int i = destination.Length - 1; + destination[i] = PorterDuffFunctions.HardLightClear(background[i], source[i], Numerics.Clamp(amount[i], 0, 1F)); + } + } + else { - destination[i] = PorterDuffFunctions.HardLightClear(background[i], source[i], Numerics.Clamp(amount[i], 0, 1)); + for (int i = 0; i < destination.Length; i++) + { + destination[i] = PorterDuffFunctions.HardLightClear(background[i], source[i], Numerics.Clamp(amount[i], 0, 1F)); + } } } } @@ -3805,18 +9848,79 @@ internal static class DefaultPixelBlenders protected override void BlendFunction(Span destination, ReadOnlySpan background, ReadOnlySpan source, float amount) { amount = Numerics.Clamp(amount, 0, 1); - for (int i = 0; i < destination.Length; i++) + + if (Avx2.IsSupported && destination.Length >= 2) + { + // Divide by 2 as 4 elements per Vector4 and 8 per Vector256 + ref Vector256 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); + ref Vector256 destinationLast = ref Unsafe.Add(ref destinationBase, (IntPtr)((uint)destination.Length / 2u)); + + ref Vector256 backgroundBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(background)); + ref Vector256 sourceBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(source)); + Vector256 opacity = Vector256.Create(amount); + + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) + { + destinationBase = PorterDuffFunctions.NormalXor(backgroundBase, sourceBase, opacity); + destinationBase = ref Unsafe.Add(ref destinationBase, 1); + backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); + sourceBase = ref Unsafe.Add(ref sourceBase, 1); + } + + if (Numerics.Modulo2(destination.Length) != 0) + { + // Vector4 fits neatly in pairs. Any overlap has to be equal to 1. + int i = destination.Length - 1; + destination[i] = PorterDuffFunctions.NormalXor(background[i], source[i], amount); + } + } + else { - destination[i] = PorterDuffFunctions.NormalXor(background[i], source[i], amount); + for (int i = 0; i < destination.Length; i++) + { + destination[i] = PorterDuffFunctions.NormalXor(background[i], source[i], amount); + } } } /// protected override void BlendFunction(Span destination, ReadOnlySpan background, ReadOnlySpan source, ReadOnlySpan amount) { - for (int i = 0; i < destination.Length; i++) + if (Avx2.IsSupported && destination.Length >= 2) + { + // Divide by 2 as 4 elements per Vector4 and 8 per Vector256 + ref Vector256 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); + ref Vector256 destinationLast = ref Unsafe.Add(ref destinationBase, (IntPtr)((uint)destination.Length / 2u)); + + ref Vector256 backgroundBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(background)); + ref Vector256 sourceBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(source)); + ref float amountBase = ref MemoryMarshal.GetReference(amount); + + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) + { + // TODO: It would be better if we can clamp this outside of the loop using our SIMD methods. + Vector256 opacity = Vector256.Create(Numerics.Clamp(amountBase, 0, 1F)); + + destinationBase = PorterDuffFunctions.NormalXor(backgroundBase, sourceBase, opacity); + destinationBase = ref Unsafe.Add(ref destinationBase, 1); + backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); + sourceBase = ref Unsafe.Add(ref sourceBase, 1); + amountBase = ref Unsafe.Add(ref amountBase, 1); + } + + if (Numerics.Modulo2(destination.Length) != 0) + { + // Vector4 fits neatly in pairs. Any overlap has to be equal to 1. + int i = destination.Length - 1; + destination[i] = PorterDuffFunctions.NormalXor(background[i], source[i], Numerics.Clamp(amount[i], 0, 1F)); + } + } + else { - destination[i] = PorterDuffFunctions.NormalXor(background[i], source[i], Numerics.Clamp(amount[i], 0, 1)); + for (int i = 0; i < destination.Length; i++) + { + destination[i] = PorterDuffFunctions.NormalXor(background[i], source[i], Numerics.Clamp(amount[i], 0, 1F)); + } } } } @@ -3843,18 +9947,79 @@ internal static class DefaultPixelBlenders protected override void BlendFunction(Span destination, ReadOnlySpan background, ReadOnlySpan source, float amount) { amount = Numerics.Clamp(amount, 0, 1); - for (int i = 0; i < destination.Length; i++) + + if (Avx2.IsSupported && destination.Length >= 2) + { + // Divide by 2 as 4 elements per Vector4 and 8 per Vector256 + ref Vector256 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); + ref Vector256 destinationLast = ref Unsafe.Add(ref destinationBase, (IntPtr)((uint)destination.Length / 2u)); + + ref Vector256 backgroundBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(background)); + ref Vector256 sourceBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(source)); + Vector256 opacity = Vector256.Create(amount); + + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) + { + destinationBase = PorterDuffFunctions.MultiplyXor(backgroundBase, sourceBase, opacity); + destinationBase = ref Unsafe.Add(ref destinationBase, 1); + backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); + sourceBase = ref Unsafe.Add(ref sourceBase, 1); + } + + if (Numerics.Modulo2(destination.Length) != 0) + { + // Vector4 fits neatly in pairs. Any overlap has to be equal to 1. + int i = destination.Length - 1; + destination[i] = PorterDuffFunctions.MultiplyXor(background[i], source[i], amount); + } + } + else { - destination[i] = PorterDuffFunctions.MultiplyXor(background[i], source[i], amount); + for (int i = 0; i < destination.Length; i++) + { + destination[i] = PorterDuffFunctions.MultiplyXor(background[i], source[i], amount); + } } } /// protected override void BlendFunction(Span destination, ReadOnlySpan background, ReadOnlySpan source, ReadOnlySpan amount) { - for (int i = 0; i < destination.Length; i++) + if (Avx2.IsSupported && destination.Length >= 2) + { + // Divide by 2 as 4 elements per Vector4 and 8 per Vector256 + ref Vector256 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); + ref Vector256 destinationLast = ref Unsafe.Add(ref destinationBase, (IntPtr)((uint)destination.Length / 2u)); + + ref Vector256 backgroundBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(background)); + ref Vector256 sourceBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(source)); + ref float amountBase = ref MemoryMarshal.GetReference(amount); + + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) + { + // TODO: It would be better if we can clamp this outside of the loop using our SIMD methods. + Vector256 opacity = Vector256.Create(Numerics.Clamp(amountBase, 0, 1F)); + + destinationBase = PorterDuffFunctions.MultiplyXor(backgroundBase, sourceBase, opacity); + destinationBase = ref Unsafe.Add(ref destinationBase, 1); + backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); + sourceBase = ref Unsafe.Add(ref sourceBase, 1); + amountBase = ref Unsafe.Add(ref amountBase, 1); + } + + if (Numerics.Modulo2(destination.Length) != 0) + { + // Vector4 fits neatly in pairs. Any overlap has to be equal to 1. + int i = destination.Length - 1; + destination[i] = PorterDuffFunctions.MultiplyXor(background[i], source[i], Numerics.Clamp(amount[i], 0, 1F)); + } + } + else { - destination[i] = PorterDuffFunctions.MultiplyXor(background[i], source[i], Numerics.Clamp(amount[i], 0, 1)); + for (int i = 0; i < destination.Length; i++) + { + destination[i] = PorterDuffFunctions.MultiplyXor(background[i], source[i], Numerics.Clamp(amount[i], 0, 1F)); + } } } } @@ -3881,18 +10046,79 @@ internal static class DefaultPixelBlenders protected override void BlendFunction(Span destination, ReadOnlySpan background, ReadOnlySpan source, float amount) { amount = Numerics.Clamp(amount, 0, 1); - for (int i = 0; i < destination.Length; i++) + + if (Avx2.IsSupported && destination.Length >= 2) + { + // Divide by 2 as 4 elements per Vector4 and 8 per Vector256 + ref Vector256 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); + ref Vector256 destinationLast = ref Unsafe.Add(ref destinationBase, (IntPtr)((uint)destination.Length / 2u)); + + ref Vector256 backgroundBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(background)); + ref Vector256 sourceBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(source)); + Vector256 opacity = Vector256.Create(amount); + + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) + { + destinationBase = PorterDuffFunctions.AddXor(backgroundBase, sourceBase, opacity); + destinationBase = ref Unsafe.Add(ref destinationBase, 1); + backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); + sourceBase = ref Unsafe.Add(ref sourceBase, 1); + } + + if (Numerics.Modulo2(destination.Length) != 0) + { + // Vector4 fits neatly in pairs. Any overlap has to be equal to 1. + int i = destination.Length - 1; + destination[i] = PorterDuffFunctions.AddXor(background[i], source[i], amount); + } + } + else { - destination[i] = PorterDuffFunctions.AddXor(background[i], source[i], amount); + for (int i = 0; i < destination.Length; i++) + { + destination[i] = PorterDuffFunctions.AddXor(background[i], source[i], amount); + } } } /// protected override void BlendFunction(Span destination, ReadOnlySpan background, ReadOnlySpan source, ReadOnlySpan amount) { - for (int i = 0; i < destination.Length; i++) + if (Avx2.IsSupported && destination.Length >= 2) + { + // Divide by 2 as 4 elements per Vector4 and 8 per Vector256 + ref Vector256 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); + ref Vector256 destinationLast = ref Unsafe.Add(ref destinationBase, (IntPtr)((uint)destination.Length / 2u)); + + ref Vector256 backgroundBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(background)); + ref Vector256 sourceBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(source)); + ref float amountBase = ref MemoryMarshal.GetReference(amount); + + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) + { + // TODO: It would be better if we can clamp this outside of the loop using our SIMD methods. + Vector256 opacity = Vector256.Create(Numerics.Clamp(amountBase, 0, 1F)); + + destinationBase = PorterDuffFunctions.AddXor(backgroundBase, sourceBase, opacity); + destinationBase = ref Unsafe.Add(ref destinationBase, 1); + backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); + sourceBase = ref Unsafe.Add(ref sourceBase, 1); + amountBase = ref Unsafe.Add(ref amountBase, 1); + } + + if (Numerics.Modulo2(destination.Length) != 0) + { + // Vector4 fits neatly in pairs. Any overlap has to be equal to 1. + int i = destination.Length - 1; + destination[i] = PorterDuffFunctions.AddXor(background[i], source[i], Numerics.Clamp(amount[i], 0, 1F)); + } + } + else { - destination[i] = PorterDuffFunctions.AddXor(background[i], source[i], Numerics.Clamp(amount[i], 0, 1)); + for (int i = 0; i < destination.Length; i++) + { + destination[i] = PorterDuffFunctions.AddXor(background[i], source[i], Numerics.Clamp(amount[i], 0, 1F)); + } } } } @@ -3919,18 +10145,79 @@ internal static class DefaultPixelBlenders protected override void BlendFunction(Span destination, ReadOnlySpan background, ReadOnlySpan source, float amount) { amount = Numerics.Clamp(amount, 0, 1); - for (int i = 0; i < destination.Length; i++) + + if (Avx2.IsSupported && destination.Length >= 2) + { + // Divide by 2 as 4 elements per Vector4 and 8 per Vector256 + ref Vector256 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); + ref Vector256 destinationLast = ref Unsafe.Add(ref destinationBase, (IntPtr)((uint)destination.Length / 2u)); + + ref Vector256 backgroundBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(background)); + ref Vector256 sourceBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(source)); + Vector256 opacity = Vector256.Create(amount); + + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) + { + destinationBase = PorterDuffFunctions.SubtractXor(backgroundBase, sourceBase, opacity); + destinationBase = ref Unsafe.Add(ref destinationBase, 1); + backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); + sourceBase = ref Unsafe.Add(ref sourceBase, 1); + } + + if (Numerics.Modulo2(destination.Length) != 0) + { + // Vector4 fits neatly in pairs. Any overlap has to be equal to 1. + int i = destination.Length - 1; + destination[i] = PorterDuffFunctions.SubtractXor(background[i], source[i], amount); + } + } + else { - destination[i] = PorterDuffFunctions.SubtractXor(background[i], source[i], amount); + for (int i = 0; i < destination.Length; i++) + { + destination[i] = PorterDuffFunctions.SubtractXor(background[i], source[i], amount); + } } } /// protected override void BlendFunction(Span destination, ReadOnlySpan background, ReadOnlySpan source, ReadOnlySpan amount) { - for (int i = 0; i < destination.Length; i++) + if (Avx2.IsSupported && destination.Length >= 2) + { + // Divide by 2 as 4 elements per Vector4 and 8 per Vector256 + ref Vector256 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); + ref Vector256 destinationLast = ref Unsafe.Add(ref destinationBase, (IntPtr)((uint)destination.Length / 2u)); + + ref Vector256 backgroundBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(background)); + ref Vector256 sourceBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(source)); + ref float amountBase = ref MemoryMarshal.GetReference(amount); + + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) + { + // TODO: It would be better if we can clamp this outside of the loop using our SIMD methods. + Vector256 opacity = Vector256.Create(Numerics.Clamp(amountBase, 0, 1F)); + + destinationBase = PorterDuffFunctions.SubtractXor(backgroundBase, sourceBase, opacity); + destinationBase = ref Unsafe.Add(ref destinationBase, 1); + backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); + sourceBase = ref Unsafe.Add(ref sourceBase, 1); + amountBase = ref Unsafe.Add(ref amountBase, 1); + } + + if (Numerics.Modulo2(destination.Length) != 0) + { + // Vector4 fits neatly in pairs. Any overlap has to be equal to 1. + int i = destination.Length - 1; + destination[i] = PorterDuffFunctions.SubtractXor(background[i], source[i], Numerics.Clamp(amount[i], 0, 1F)); + } + } + else { - destination[i] = PorterDuffFunctions.SubtractXor(background[i], source[i], Numerics.Clamp(amount[i], 0, 1)); + for (int i = 0; i < destination.Length; i++) + { + destination[i] = PorterDuffFunctions.SubtractXor(background[i], source[i], Numerics.Clamp(amount[i], 0, 1F)); + } } } } @@ -3957,18 +10244,79 @@ internal static class DefaultPixelBlenders protected override void BlendFunction(Span destination, ReadOnlySpan background, ReadOnlySpan source, float amount) { amount = Numerics.Clamp(amount, 0, 1); - for (int i = 0; i < destination.Length; i++) + + if (Avx2.IsSupported && destination.Length >= 2) + { + // Divide by 2 as 4 elements per Vector4 and 8 per Vector256 + ref Vector256 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); + ref Vector256 destinationLast = ref Unsafe.Add(ref destinationBase, (IntPtr)((uint)destination.Length / 2u)); + + ref Vector256 backgroundBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(background)); + ref Vector256 sourceBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(source)); + Vector256 opacity = Vector256.Create(amount); + + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) + { + destinationBase = PorterDuffFunctions.ScreenXor(backgroundBase, sourceBase, opacity); + destinationBase = ref Unsafe.Add(ref destinationBase, 1); + backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); + sourceBase = ref Unsafe.Add(ref sourceBase, 1); + } + + if (Numerics.Modulo2(destination.Length) != 0) + { + // Vector4 fits neatly in pairs. Any overlap has to be equal to 1. + int i = destination.Length - 1; + destination[i] = PorterDuffFunctions.ScreenXor(background[i], source[i], amount); + } + } + else { - destination[i] = PorterDuffFunctions.ScreenXor(background[i], source[i], amount); + for (int i = 0; i < destination.Length; i++) + { + destination[i] = PorterDuffFunctions.ScreenXor(background[i], source[i], amount); + } } } /// protected override void BlendFunction(Span destination, ReadOnlySpan background, ReadOnlySpan source, ReadOnlySpan amount) { - for (int i = 0; i < destination.Length; i++) + if (Avx2.IsSupported && destination.Length >= 2) + { + // Divide by 2 as 4 elements per Vector4 and 8 per Vector256 + ref Vector256 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); + ref Vector256 destinationLast = ref Unsafe.Add(ref destinationBase, (IntPtr)((uint)destination.Length / 2u)); + + ref Vector256 backgroundBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(background)); + ref Vector256 sourceBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(source)); + ref float amountBase = ref MemoryMarshal.GetReference(amount); + + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) + { + // TODO: It would be better if we can clamp this outside of the loop using our SIMD methods. + Vector256 opacity = Vector256.Create(Numerics.Clamp(amountBase, 0, 1F)); + + destinationBase = PorterDuffFunctions.ScreenXor(backgroundBase, sourceBase, opacity); + destinationBase = ref Unsafe.Add(ref destinationBase, 1); + backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); + sourceBase = ref Unsafe.Add(ref sourceBase, 1); + amountBase = ref Unsafe.Add(ref amountBase, 1); + } + + if (Numerics.Modulo2(destination.Length) != 0) + { + // Vector4 fits neatly in pairs. Any overlap has to be equal to 1. + int i = destination.Length - 1; + destination[i] = PorterDuffFunctions.ScreenXor(background[i], source[i], Numerics.Clamp(amount[i], 0, 1F)); + } + } + else { - destination[i] = PorterDuffFunctions.ScreenXor(background[i], source[i], Numerics.Clamp(amount[i], 0, 1)); + for (int i = 0; i < destination.Length; i++) + { + destination[i] = PorterDuffFunctions.ScreenXor(background[i], source[i], Numerics.Clamp(amount[i], 0, 1F)); + } } } } @@ -3995,18 +10343,79 @@ internal static class DefaultPixelBlenders protected override void BlendFunction(Span destination, ReadOnlySpan background, ReadOnlySpan source, float amount) { amount = Numerics.Clamp(amount, 0, 1); - for (int i = 0; i < destination.Length; i++) + + if (Avx2.IsSupported && destination.Length >= 2) + { + // Divide by 2 as 4 elements per Vector4 and 8 per Vector256 + ref Vector256 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); + ref Vector256 destinationLast = ref Unsafe.Add(ref destinationBase, (IntPtr)((uint)destination.Length / 2u)); + + ref Vector256 backgroundBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(background)); + ref Vector256 sourceBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(source)); + Vector256 opacity = Vector256.Create(amount); + + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) + { + destinationBase = PorterDuffFunctions.DarkenXor(backgroundBase, sourceBase, opacity); + destinationBase = ref Unsafe.Add(ref destinationBase, 1); + backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); + sourceBase = ref Unsafe.Add(ref sourceBase, 1); + } + + if (Numerics.Modulo2(destination.Length) != 0) + { + // Vector4 fits neatly in pairs. Any overlap has to be equal to 1. + int i = destination.Length - 1; + destination[i] = PorterDuffFunctions.DarkenXor(background[i], source[i], amount); + } + } + else { - destination[i] = PorterDuffFunctions.DarkenXor(background[i], source[i], amount); + for (int i = 0; i < destination.Length; i++) + { + destination[i] = PorterDuffFunctions.DarkenXor(background[i], source[i], amount); + } } } /// protected override void BlendFunction(Span destination, ReadOnlySpan background, ReadOnlySpan source, ReadOnlySpan amount) { - for (int i = 0; i < destination.Length; i++) + if (Avx2.IsSupported && destination.Length >= 2) + { + // Divide by 2 as 4 elements per Vector4 and 8 per Vector256 + ref Vector256 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); + ref Vector256 destinationLast = ref Unsafe.Add(ref destinationBase, (IntPtr)((uint)destination.Length / 2u)); + + ref Vector256 backgroundBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(background)); + ref Vector256 sourceBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(source)); + ref float amountBase = ref MemoryMarshal.GetReference(amount); + + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) + { + // TODO: It would be better if we can clamp this outside of the loop using our SIMD methods. + Vector256 opacity = Vector256.Create(Numerics.Clamp(amountBase, 0, 1F)); + + destinationBase = PorterDuffFunctions.DarkenXor(backgroundBase, sourceBase, opacity); + destinationBase = ref Unsafe.Add(ref destinationBase, 1); + backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); + sourceBase = ref Unsafe.Add(ref sourceBase, 1); + amountBase = ref Unsafe.Add(ref amountBase, 1); + } + + if (Numerics.Modulo2(destination.Length) != 0) + { + // Vector4 fits neatly in pairs. Any overlap has to be equal to 1. + int i = destination.Length - 1; + destination[i] = PorterDuffFunctions.DarkenXor(background[i], source[i], Numerics.Clamp(amount[i], 0, 1F)); + } + } + else { - destination[i] = PorterDuffFunctions.DarkenXor(background[i], source[i], Numerics.Clamp(amount[i], 0, 1)); + for (int i = 0; i < destination.Length; i++) + { + destination[i] = PorterDuffFunctions.DarkenXor(background[i], source[i], Numerics.Clamp(amount[i], 0, 1F)); + } } } } @@ -4033,18 +10442,79 @@ internal static class DefaultPixelBlenders protected override void BlendFunction(Span destination, ReadOnlySpan background, ReadOnlySpan source, float amount) { amount = Numerics.Clamp(amount, 0, 1); - for (int i = 0; i < destination.Length; i++) + + if (Avx2.IsSupported && destination.Length >= 2) + { + // Divide by 2 as 4 elements per Vector4 and 8 per Vector256 + ref Vector256 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); + ref Vector256 destinationLast = ref Unsafe.Add(ref destinationBase, (IntPtr)((uint)destination.Length / 2u)); + + ref Vector256 backgroundBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(background)); + ref Vector256 sourceBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(source)); + Vector256 opacity = Vector256.Create(amount); + + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) + { + destinationBase = PorterDuffFunctions.LightenXor(backgroundBase, sourceBase, opacity); + destinationBase = ref Unsafe.Add(ref destinationBase, 1); + backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); + sourceBase = ref Unsafe.Add(ref sourceBase, 1); + } + + if (Numerics.Modulo2(destination.Length) != 0) + { + // Vector4 fits neatly in pairs. Any overlap has to be equal to 1. + int i = destination.Length - 1; + destination[i] = PorterDuffFunctions.LightenXor(background[i], source[i], amount); + } + } + else { - destination[i] = PorterDuffFunctions.LightenXor(background[i], source[i], amount); + for (int i = 0; i < destination.Length; i++) + { + destination[i] = PorterDuffFunctions.LightenXor(background[i], source[i], amount); + } } } /// protected override void BlendFunction(Span destination, ReadOnlySpan background, ReadOnlySpan source, ReadOnlySpan amount) { - for (int i = 0; i < destination.Length; i++) + if (Avx2.IsSupported && destination.Length >= 2) + { + // Divide by 2 as 4 elements per Vector4 and 8 per Vector256 + ref Vector256 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); + ref Vector256 destinationLast = ref Unsafe.Add(ref destinationBase, (IntPtr)((uint)destination.Length / 2u)); + + ref Vector256 backgroundBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(background)); + ref Vector256 sourceBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(source)); + ref float amountBase = ref MemoryMarshal.GetReference(amount); + + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) + { + // TODO: It would be better if we can clamp this outside of the loop using our SIMD methods. + Vector256 opacity = Vector256.Create(Numerics.Clamp(amountBase, 0, 1F)); + + destinationBase = PorterDuffFunctions.LightenXor(backgroundBase, sourceBase, opacity); + destinationBase = ref Unsafe.Add(ref destinationBase, 1); + backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); + sourceBase = ref Unsafe.Add(ref sourceBase, 1); + amountBase = ref Unsafe.Add(ref amountBase, 1); + } + + if (Numerics.Modulo2(destination.Length) != 0) + { + // Vector4 fits neatly in pairs. Any overlap has to be equal to 1. + int i = destination.Length - 1; + destination[i] = PorterDuffFunctions.LightenXor(background[i], source[i], Numerics.Clamp(amount[i], 0, 1F)); + } + } + else { - destination[i] = PorterDuffFunctions.LightenXor(background[i], source[i], Numerics.Clamp(amount[i], 0, 1)); + for (int i = 0; i < destination.Length; i++) + { + destination[i] = PorterDuffFunctions.LightenXor(background[i], source[i], Numerics.Clamp(amount[i], 0, 1F)); + } } } } @@ -4071,18 +10541,79 @@ internal static class DefaultPixelBlenders protected override void BlendFunction(Span destination, ReadOnlySpan background, ReadOnlySpan source, float amount) { amount = Numerics.Clamp(amount, 0, 1); - for (int i = 0; i < destination.Length; i++) + + if (Avx2.IsSupported && destination.Length >= 2) + { + // Divide by 2 as 4 elements per Vector4 and 8 per Vector256 + ref Vector256 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); + ref Vector256 destinationLast = ref Unsafe.Add(ref destinationBase, (IntPtr)((uint)destination.Length / 2u)); + + ref Vector256 backgroundBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(background)); + ref Vector256 sourceBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(source)); + Vector256 opacity = Vector256.Create(amount); + + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) + { + destinationBase = PorterDuffFunctions.OverlayXor(backgroundBase, sourceBase, opacity); + destinationBase = ref Unsafe.Add(ref destinationBase, 1); + backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); + sourceBase = ref Unsafe.Add(ref sourceBase, 1); + } + + if (Numerics.Modulo2(destination.Length) != 0) + { + // Vector4 fits neatly in pairs. Any overlap has to be equal to 1. + int i = destination.Length - 1; + destination[i] = PorterDuffFunctions.OverlayXor(background[i], source[i], amount); + } + } + else { - destination[i] = PorterDuffFunctions.OverlayXor(background[i], source[i], amount); + for (int i = 0; i < destination.Length; i++) + { + destination[i] = PorterDuffFunctions.OverlayXor(background[i], source[i], amount); + } } } /// protected override void BlendFunction(Span destination, ReadOnlySpan background, ReadOnlySpan source, ReadOnlySpan amount) { - for (int i = 0; i < destination.Length; i++) + if (Avx2.IsSupported && destination.Length >= 2) + { + // Divide by 2 as 4 elements per Vector4 and 8 per Vector256 + ref Vector256 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); + ref Vector256 destinationLast = ref Unsafe.Add(ref destinationBase, (IntPtr)((uint)destination.Length / 2u)); + + ref Vector256 backgroundBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(background)); + ref Vector256 sourceBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(source)); + ref float amountBase = ref MemoryMarshal.GetReference(amount); + + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) + { + // TODO: It would be better if we can clamp this outside of the loop using our SIMD methods. + Vector256 opacity = Vector256.Create(Numerics.Clamp(amountBase, 0, 1F)); + + destinationBase = PorterDuffFunctions.OverlayXor(backgroundBase, sourceBase, opacity); + destinationBase = ref Unsafe.Add(ref destinationBase, 1); + backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); + sourceBase = ref Unsafe.Add(ref sourceBase, 1); + amountBase = ref Unsafe.Add(ref amountBase, 1); + } + + if (Numerics.Modulo2(destination.Length) != 0) + { + // Vector4 fits neatly in pairs. Any overlap has to be equal to 1. + int i = destination.Length - 1; + destination[i] = PorterDuffFunctions.OverlayXor(background[i], source[i], Numerics.Clamp(amount[i], 0, 1F)); + } + } + else { - destination[i] = PorterDuffFunctions.OverlayXor(background[i], source[i], Numerics.Clamp(amount[i], 0, 1)); + for (int i = 0; i < destination.Length; i++) + { + destination[i] = PorterDuffFunctions.OverlayXor(background[i], source[i], Numerics.Clamp(amount[i], 0, 1F)); + } } } } @@ -4109,18 +10640,79 @@ internal static class DefaultPixelBlenders protected override void BlendFunction(Span destination, ReadOnlySpan background, ReadOnlySpan source, float amount) { amount = Numerics.Clamp(amount, 0, 1); - for (int i = 0; i < destination.Length; i++) + + if (Avx2.IsSupported && destination.Length >= 2) + { + // Divide by 2 as 4 elements per Vector4 and 8 per Vector256 + ref Vector256 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); + ref Vector256 destinationLast = ref Unsafe.Add(ref destinationBase, (IntPtr)((uint)destination.Length / 2u)); + + ref Vector256 backgroundBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(background)); + ref Vector256 sourceBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(source)); + Vector256 opacity = Vector256.Create(amount); + + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) + { + destinationBase = PorterDuffFunctions.HardLightXor(backgroundBase, sourceBase, opacity); + destinationBase = ref Unsafe.Add(ref destinationBase, 1); + backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); + sourceBase = ref Unsafe.Add(ref sourceBase, 1); + } + + if (Numerics.Modulo2(destination.Length) != 0) + { + // Vector4 fits neatly in pairs. Any overlap has to be equal to 1. + int i = destination.Length - 1; + destination[i] = PorterDuffFunctions.HardLightXor(background[i], source[i], amount); + } + } + else { - destination[i] = PorterDuffFunctions.HardLightXor(background[i], source[i], amount); + for (int i = 0; i < destination.Length; i++) + { + destination[i] = PorterDuffFunctions.HardLightXor(background[i], source[i], amount); + } } } /// protected override void BlendFunction(Span destination, ReadOnlySpan background, ReadOnlySpan source, ReadOnlySpan amount) { - for (int i = 0; i < destination.Length; i++) - { - destination[i] = PorterDuffFunctions.HardLightXor(background[i], source[i], Numerics.Clamp(amount[i], 0, 1)); + if (Avx2.IsSupported && destination.Length >= 2) + { + // Divide by 2 as 4 elements per Vector4 and 8 per Vector256 + ref Vector256 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); + ref Vector256 destinationLast = ref Unsafe.Add(ref destinationBase, (IntPtr)((uint)destination.Length / 2u)); + + ref Vector256 backgroundBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(background)); + ref Vector256 sourceBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(source)); + ref float amountBase = ref MemoryMarshal.GetReference(amount); + + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) + { + // TODO: It would be better if we can clamp this outside of the loop using our SIMD methods. + Vector256 opacity = Vector256.Create(Numerics.Clamp(amountBase, 0, 1F)); + + destinationBase = PorterDuffFunctions.HardLightXor(backgroundBase, sourceBase, opacity); + destinationBase = ref Unsafe.Add(ref destinationBase, 1); + backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); + sourceBase = ref Unsafe.Add(ref sourceBase, 1); + amountBase = ref Unsafe.Add(ref amountBase, 1); + } + + if (Numerics.Modulo2(destination.Length) != 0) + { + // Vector4 fits neatly in pairs. Any overlap has to be equal to 1. + int i = destination.Length - 1; + destination[i] = PorterDuffFunctions.HardLightXor(background[i], source[i], Numerics.Clamp(amount[i], 0, 1F)); + } + } + else + { + for (int i = 0; i < destination.Length; i++) + { + destination[i] = PorterDuffFunctions.HardLightXor(background[i], source[i], Numerics.Clamp(amount[i], 0, 1F)); + } } } } diff --git a/src/ImageSharp/PixelFormats/PixelBlenders/DefaultPixelBlenders.Generated.tt b/src/ImageSharp/PixelFormats/PixelBlenders/DefaultPixelBlenders.Generated.tt index 7bd51439ce..6d98c6cd99 100644 --- a/src/ImageSharp/PixelFormats/PixelBlenders/DefaultPixelBlenders.Generated.tt +++ b/src/ImageSharp/PixelFormats/PixelBlenders/DefaultPixelBlenders.Generated.tt @@ -13,6 +13,10 @@ // using System.Numerics; +using System.Runtime.CompilerServices; +using System.Runtime.InteropServices; +using System.Runtime.Intrinsics; +using System.Runtime.Intrinsics.X86; namespace SixLabors.ImageSharp.PixelFormats.PixelBlenders; @@ -86,18 +90,79 @@ var blenders = new []{ protected override void BlendFunction(Span destination, ReadOnlySpan background, ReadOnlySpan source, float amount) { amount = Numerics.Clamp(amount, 0, 1); - for (int i = 0; i < destination.Length; i++) + + if (Avx2.IsSupported && destination.Length >= 2) + { + // Divide by 2 as 4 elements per Vector4 and 8 per Vector256 + ref Vector256 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); + ref Vector256 destinationLast = ref Unsafe.Add(ref destinationBase, (IntPtr)((uint)destination.Length / 2u)); + + ref Vector256 backgroundBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(background)); + ref Vector256 sourceBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(source)); + Vector256 opacity = Vector256.Create(amount); + + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) + { + destinationBase = PorterDuffFunctions.<#=blender_composer#>(backgroundBase, sourceBase, opacity); + destinationBase = ref Unsafe.Add(ref destinationBase, 1); + backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); + sourceBase = ref Unsafe.Add(ref sourceBase, 1); + } + + if (Numerics.Modulo2(destination.Length) != 0) + { + // Vector4 fits neatly in pairs. Any overlap has to be equal to 1. + int i = destination.Length - 1; + destination[i] = PorterDuffFunctions.<#=blender_composer#>(background[i], source[i], amount); + } + } + else { - destination[i] = PorterDuffFunctions.<#=blender_composer#>(background[i], source[i], amount); + for (int i = 0; i < destination.Length; i++) + { + destination[i] = PorterDuffFunctions.<#=blender_composer#>(background[i], source[i], amount); + } } } /// protected override void BlendFunction(Span destination, ReadOnlySpan background, ReadOnlySpan source, ReadOnlySpan amount) { - for (int i = 0; i < destination.Length; i++) + if (Avx2.IsSupported && destination.Length >= 2) + { + // Divide by 2 as 4 elements per Vector4 and 8 per Vector256 + ref Vector256 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); + ref Vector256 destinationLast = ref Unsafe.Add(ref destinationBase, (IntPtr)((uint)destination.Length / 2u)); + + ref Vector256 backgroundBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(background)); + ref Vector256 sourceBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(source)); + ref float amountBase = ref MemoryMarshal.GetReference(amount); + + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) + { + // TODO: It would be better if we can clamp this outside of the loop using our SIMD methods. + Vector256 opacity = Vector256.Create(Numerics.Clamp(amountBase, 0, 1F)); + + destinationBase = PorterDuffFunctions.<#=blender_composer#>(backgroundBase, sourceBase, opacity); + destinationBase = ref Unsafe.Add(ref destinationBase, 1); + backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); + sourceBase = ref Unsafe.Add(ref sourceBase, 1); + amountBase = ref Unsafe.Add(ref amountBase, 1); + } + + if (Numerics.Modulo2(destination.Length) != 0) + { + // Vector4 fits neatly in pairs. Any overlap has to be equal to 1. + int i = destination.Length - 1; + destination[i] = PorterDuffFunctions.<#=blender_composer#>(background[i], source[i], Numerics.Clamp(amount[i], 0, 1F)); + } + } + else { - destination[i] = PorterDuffFunctions.<#=blender_composer#>(background[i], source[i], Numerics.Clamp(amount[i], 0, 1)); + for (int i = 0; i < destination.Length; i++) + { + destination[i] = PorterDuffFunctions.<#=blender_composer#>(background[i], source[i], Numerics.Clamp(amount[i], 0, 1F)); + } } } } From b4ff1e4db39d4be8f5699da1e72d3e79864efb95 Mon Sep 17 00:00:00 2001 From: James Jackson-South Date: Sun, 19 Feb 2023 20:16:31 +1000 Subject: [PATCH 13/35] Fix issues --- .../PixelBlenders/PorterDuffFunctions.cs | 16 +++++----- .../PixelBlenders/PorterDuffFunctionsTests.cs | 31 +++++++++++++------ 2 files changed, 30 insertions(+), 17 deletions(-) diff --git a/src/ImageSharp/PixelFormats/PixelBlenders/PorterDuffFunctions.cs b/src/ImageSharp/PixelFormats/PixelBlenders/PorterDuffFunctions.cs index fa497c8726..af111849a8 100644 --- a/src/ImageSharp/PixelFormats/PixelBlenders/PorterDuffFunctions.cs +++ b/src/ImageSharp/PixelFormats/PixelBlenders/PorterDuffFunctions.cs @@ -293,8 +293,8 @@ internal static partial class PorterDuffFunctions // calculate weights Vector256 sW = Avx.Permute(source, ShuffleAlphaControl); Vector256 dW = Avx.Permute(destination, ShuffleAlphaControl); - Vector256 blendW = Avx.Multiply(sW, dW); + Vector256 blendW = Avx.Multiply(sW, dW); Vector256 dstW = Avx.Subtract(dW, blendW); Vector256 srcW = Avx.Subtract(sW, blendW); @@ -303,8 +303,8 @@ internal static partial class PorterDuffFunctions // calculate final color Vector256 color = Avx.Multiply(destination, dstW); - color = SimdUtils.HwIntrinsics.MultiplyAdd(source, srcW, color); - color = SimdUtils.HwIntrinsics.MultiplyAdd(blend, blendW, color); + color = SimdUtils.HwIntrinsics.MultiplyAdd(color, source, srcW); + color = SimdUtils.HwIntrinsics.MultiplyAdd(color, blend, blendW); // unpremultiply color = Avx.Divide(color, Avx.Max(alpha, Vector256.Create(Constants.Epsilon))); @@ -349,15 +349,15 @@ internal static partial class PorterDuffFunctions public static Vector256 Atop(Vector256 destination, Vector256 source, Vector256 blend) { // calculate final alpha - Vector256 alpha = Avx.Shuffle(destination, destination, ShuffleAlphaControl); + Vector256 alpha = Avx.Permute(destination, ShuffleAlphaControl); // calculate weights - Vector256 sW = Avx.Shuffle(source, source, ShuffleAlphaControl); + Vector256 sW = Avx.Permute(source, ShuffleAlphaControl); Vector256 blendW = Avx.Multiply(sW, alpha); Vector256 dstW = Avx.Subtract(alpha, blendW); // calculate final color - Vector256 color = SimdUtils.HwIntrinsics.MultiplyAdd(destination, dstW, Avx.Multiply(blend, blendW)); + Vector256 color = SimdUtils.HwIntrinsics.MultiplyAdd(Avx.Multiply(blend, blendW), destination, dstW); // unpremultiply color = Avx.Divide(color, Avx.Max(alpha, Vector256.Create(Constants.Epsilon))); @@ -482,8 +482,8 @@ internal static partial class PorterDuffFunctions Vector256 dstW = Avx.Subtract(vOne, sW); // calculate alpha - Vector256 alpha = SimdUtils.HwIntrinsics.MultiplyAdd(sW, srcW, Avx.Multiply(dW, dstW)); - Vector256 color = SimdUtils.HwIntrinsics.MultiplyAdd(Avx.Multiply(sW, source), srcW, Avx.Multiply(Avx.Multiply(dW, destination), dstW)); + Vector256 alpha = SimdUtils.HwIntrinsics.MultiplyAdd(Avx.Multiply(dW, dstW), sW, srcW); + Vector256 color = SimdUtils.HwIntrinsics.MultiplyAdd(Avx.Multiply(Avx.Multiply(dW, destination), dstW), Avx.Multiply(sW, source), srcW); // unpremultiply color = Avx.Divide(color, Avx.Max(alpha, Vector256.Create(Constants.Epsilon))); diff --git a/tests/ImageSharp.Tests/PixelFormats/PixelBlenders/PorterDuffFunctionsTests.cs b/tests/ImageSharp.Tests/PixelFormats/PixelBlenders/PorterDuffFunctionsTests.cs index 45dece8ec8..02b4b0ea50 100644 --- a/tests/ImageSharp.Tests/PixelFormats/PixelBlenders/PorterDuffFunctionsTests.cs +++ b/tests/ImageSharp.Tests/PixelFormats/PixelBlenders/PorterDuffFunctionsTests.cs @@ -2,6 +2,7 @@ // Licensed under the Six Labors Split License. using System.Numerics; +using System.Runtime.Intrinsics; using SixLabors.ImageSharp.PixelFormats.PixelBlenders; using SixLabors.ImageSharp.Tests.TestUtilities; @@ -9,7 +10,7 @@ namespace SixLabors.ImageSharp.Tests.PixelFormats.PixelBlenders; public class PorterDuffFunctionsTests { - public static TheoryData NormalBlendFunctionData = new TheoryData + public static TheoryData NormalBlendFunctionData { get; } = new() { { new TestVector4(1, 1, 1, 1), new TestVector4(1, 1, 1, 1), 1, new TestVector4(1, 1, 1, 1) }, { new TestVector4(1, 1, 1, 1), new TestVector4(0, 0, 0, .8f), .5f, new TestVector4(0.6f, 0.6f, 0.6f, 1) } @@ -23,7 +24,19 @@ public class PorterDuffFunctionsTests Assert.Equal(expected, actual); } - public static TheoryData MultiplyFunctionData = new TheoryData + [Theory] + [MemberData(nameof(NormalBlendFunctionData))] + public void NormalBlendFunction256(TestVector4 back, TestVector4 source, float amount, TestVector4 expected) + { + Vector256 back256 = Vector256.Create(back.X, back.Y, back.Z, back.W, back.X, back.Y, back.Z, back.W); + Vector256 source256 = Vector256.Create(source.X, source.Y, source.Z, source.W, source.X, source.Y, source.Z, source.W); + + Vector256 expected256 = Vector256.Create(expected.X, expected.Y, expected.Z, expected.W, expected.X, expected.Y, expected.Z, expected.W); + Vector256 actual = PorterDuffFunctions.NormalSrcOver(back256, source256, Vector256.Create(amount)); + Assert.Equal(expected256, actual); + } + + public static TheoryData MultiplyFunctionData { get; } = new() { { new TestVector4(1, 1, 1, 1), new TestVector4(1, 1, 1, 1), 1, new TestVector4(1, 1, 1, 1) }, { new TestVector4(1, 1, 1, 1), new TestVector4(0, 0, 0, .8f), .5f, new TestVector4(0.6f, 0.6f, 0.6f, 1) }, @@ -38,7 +51,7 @@ public class PorterDuffFunctionsTests VectorAssert.Equal(expected, actual, 5); } - public static TheoryData AddFunctionData = new TheoryData + public static TheoryData AddFunctionData { get; } = new() { { new TestVector4(1, 1, 1, 1), new TestVector4(1, 1, 1, 1), 1, new TestVector4(1, 1, 1, 1) }, { new TestVector4(1, 1, 1, 1), new TestVector4(0, 0, 0, .8f), .5f, new TestVector4(.6f, .6f, .6f, 1f) }, @@ -53,7 +66,7 @@ public class PorterDuffFunctionsTests VectorAssert.Equal(expected, actual, 5); } - public static TheoryData SubtractFunctionData = new TheoryData + public static TheoryData SubtractFunctionData { get; } = new() { { new TestVector4(1, 1, 1, 1), new TestVector4(1, 1, 1, 1), 1, new TestVector4(0, 0, 0, 1) }, { new TestVector4(1, 1, 1, 1), new TestVector4(0, 0, 0, .8f), .5f, new TestVector4(1, 1, 1, 1f) }, @@ -68,7 +81,7 @@ public class PorterDuffFunctionsTests VectorAssert.Equal(expected, actual, 5); } - public static TheoryData ScreenFunctionData = new TheoryData + public static TheoryData ScreenFunctionData { get; } = new() { { new TestVector4(1, 1, 1, 1), new TestVector4(1, 1, 1, 1), 1, new TestVector4(1, 1, 1, 1) }, { new TestVector4(1, 1, 1, 1), new TestVector4(0, 0, 0, .8f), .5f, new TestVector4(1, 1, 1, 1f) }, @@ -83,7 +96,7 @@ public class PorterDuffFunctionsTests VectorAssert.Equal(expected, actual, 5); } - public static TheoryData DarkenFunctionData = new TheoryData + public static TheoryData DarkenFunctionData { get; } = new() { { new TestVector4(1, 1, 1, 1), new TestVector4(1, 1, 1, 1), 1, new TestVector4(1, 1, 1, 1) }, { new TestVector4(1, 1, 1, 1), new TestVector4(0, 0, 0, .8f), .5f, new TestVector4(.6f, .6f, .6f, 1f) }, @@ -98,7 +111,7 @@ public class PorterDuffFunctionsTests VectorAssert.Equal(expected, actual, 5); } - public static TheoryData LightenFunctionData = new TheoryData + public static TheoryData LightenFunctionData { get; } = new() { { new TestVector4(1, 1, 1, 1), new TestVector4(1, 1, 1, 1), 1, new TestVector4(1, 1, 1, 1) }, { new TestVector4(1, 1, 1, 1), new TestVector4(0, 0, 0, .8f), .5f, new TestVector4(1, 1, 1, 1f) }, @@ -113,7 +126,7 @@ public class PorterDuffFunctionsTests VectorAssert.Equal(expected, actual, 5); } - public static TheoryData OverlayFunctionData = new TheoryData + public static TheoryData OverlayFunctionData { get; } = new() { { new TestVector4(1, 1, 1, 1), new TestVector4(1, 1, 1, 1), 1, new TestVector4(1, 1, 1, 1) }, { new TestVector4(1, 1, 1, 1), new TestVector4(0, 0, 0, .8f), .5f, new TestVector4(1, 1, 1, 1f) }, @@ -128,7 +141,7 @@ public class PorterDuffFunctionsTests VectorAssert.Equal(expected, actual, 5); } - public static TheoryData HardLightFunctionData = new TheoryData + public static TheoryData HardLightFunctionData { get; } = new() { { new TestVector4(1, 1, 1, 1), new TestVector4(1, 1, 1, 1), 1, new TestVector4(1, 1, 1, 1) }, { new TestVector4(1, 1, 1, 1), new TestVector4(0, 0, 0, .8f), .5f, new TestVector4(0.6f, 0.6f, 0.6f, 1f) }, From c58be60c754fc40264d9f9640d6f6160268d57ce Mon Sep 17 00:00:00 2001 From: James Jackson-South Date: Sun, 19 Feb 2023 20:46:18 +1000 Subject: [PATCH 14/35] Add additional PD tests --- .../PixelBlenders/PorterDuffFunctions.cs | 2 +- .../PixelBlenders/PorterDuffFunctionsTests.cs | 106 +++++++++++++++++- .../TestUtilities/ApproximateFloatComparer.cs | 17 ++- 3 files changed, 119 insertions(+), 6 deletions(-) diff --git a/src/ImageSharp/PixelFormats/PixelBlenders/PorterDuffFunctions.cs b/src/ImageSharp/PixelFormats/PixelBlenders/PorterDuffFunctions.cs index af111849a8..d1bd5bad31 100644 --- a/src/ImageSharp/PixelFormats/PixelBlenders/PorterDuffFunctions.cs +++ b/src/ImageSharp/PixelFormats/PixelBlenders/PorterDuffFunctions.cs @@ -102,7 +102,7 @@ internal static partial class PorterDuffFunctions /// The . [MethodImpl(MethodImplOptions.AggressiveInlining)] public static Vector256 Subtract(Vector256 backdrop, Vector256 source) - => Avx.Min(Vector256.Create(1F), Avx.Subtract(backdrop, source)); + => Avx.Max(Vector256.Zero, Avx.Subtract(backdrop, source)); /// /// Returns the result of the "Screen" compositing equation. diff --git a/tests/ImageSharp.Tests/PixelFormats/PixelBlenders/PorterDuffFunctionsTests.cs b/tests/ImageSharp.Tests/PixelFormats/PixelBlenders/PorterDuffFunctionsTests.cs index 02b4b0ea50..189d21f1e6 100644 --- a/tests/ImageSharp.Tests/PixelFormats/PixelBlenders/PorterDuffFunctionsTests.cs +++ b/tests/ImageSharp.Tests/PixelFormats/PixelBlenders/PorterDuffFunctionsTests.cs @@ -10,6 +10,8 @@ namespace SixLabors.ImageSharp.Tests.PixelFormats.PixelBlenders; public class PorterDuffFunctionsTests { + private static readonly ApproximateFloatComparer FloatComparer = new(.000001F); + public static TheoryData NormalBlendFunctionData { get; } = new() { { new TestVector4(1, 1, 1, 1), new TestVector4(1, 1, 1, 1), 1, new TestVector4(1, 1, 1, 1) }, @@ -33,7 +35,7 @@ public class PorterDuffFunctionsTests Vector256 expected256 = Vector256.Create(expected.X, expected.Y, expected.Z, expected.W, expected.X, expected.Y, expected.Z, expected.W); Vector256 actual = PorterDuffFunctions.NormalSrcOver(back256, source256, Vector256.Create(amount)); - Assert.Equal(expected256, actual); + Assert.Equal(expected256, actual, FloatComparer); } public static TheoryData MultiplyFunctionData { get; } = new() @@ -51,21 +53,45 @@ public class PorterDuffFunctionsTests VectorAssert.Equal(expected, actual, 5); } + [Theory] + [MemberData(nameof(MultiplyFunctionData))] + public void MultiplyFunction256(TestVector4 back, TestVector4 source, float amount, TestVector4 expected) + { + Vector256 back256 = Vector256.Create(back.X, back.Y, back.Z, back.W, back.X, back.Y, back.Z, back.W); + Vector256 source256 = Vector256.Create(source.X, source.Y, source.Z, source.W, source.X, source.Y, source.Z, source.W); + + Vector256 expected256 = Vector256.Create(expected.X, expected.Y, expected.Z, expected.W, expected.X, expected.Y, expected.Z, expected.W); + Vector256 actual = PorterDuffFunctions.MultiplySrcOver(back256, source256, Vector256.Create(amount)); + Assert.Equal(expected256, actual, FloatComparer); + } + public static TheoryData AddFunctionData { get; } = new() { { new TestVector4(1, 1, 1, 1), new TestVector4(1, 1, 1, 1), 1, new TestVector4(1, 1, 1, 1) }, - { new TestVector4(1, 1, 1, 1), new TestVector4(0, 0, 0, .8f), .5f, new TestVector4(.6f, .6f, .6f, 1f) }, - { new TestVector4(0.2f, 0.2f, 0.2f, 0.3f), new TestVector4(0.3f, 0.3f, 0.3f, 0.2f), .5f, new TestVector4(.2075676f, .2075676f, .2075676f, .37f) } + { new TestVector4(1, 1, 1, 1), new TestVector4(0, 0, 0, .8f), .5f, new TestVector4(1, 1, 1, 1) }, + { new TestVector4(0.2f, 0.2f, 0.2f, 0.3f), new TestVector4(0.3f, 0.3f, 0.3f, 0.2f), .5f, new TestVector4(0.24324325f, 0.24324325f, 0.24324325f, .37f) } }; [Theory] [MemberData(nameof(AddFunctionData))] public void AddFunction(TestVector4 back, TestVector4 source, float amount, TestVector4 expected) { - Vector4 actual = PorterDuffFunctions.MultiplySrcOver((Vector4)back, source, amount); + Vector4 actual = PorterDuffFunctions.AddSrcOver((Vector4)back, source, amount); VectorAssert.Equal(expected, actual, 5); } + [Theory] + [MemberData(nameof(AddFunctionData))] + public void AddFunction256(TestVector4 back, TestVector4 source, float amount, TestVector4 expected) + { + Vector256 back256 = Vector256.Create(back.X, back.Y, back.Z, back.W, back.X, back.Y, back.Z, back.W); + Vector256 source256 = Vector256.Create(source.X, source.Y, source.Z, source.W, source.X, source.Y, source.Z, source.W); + + Vector256 expected256 = Vector256.Create(expected.X, expected.Y, expected.Z, expected.W, expected.X, expected.Y, expected.Z, expected.W); + Vector256 actual = PorterDuffFunctions.AddSrcOver(back256, source256, Vector256.Create(amount)); + Assert.Equal(expected256, actual, FloatComparer); + } + public static TheoryData SubtractFunctionData { get; } = new() { { new TestVector4(1, 1, 1, 1), new TestVector4(1, 1, 1, 1), 1, new TestVector4(0, 0, 0, 1) }, @@ -81,6 +107,18 @@ public class PorterDuffFunctionsTests VectorAssert.Equal(expected, actual, 5); } + [Theory] + [MemberData(nameof(SubtractFunctionData))] + public void SubtractFunction256(TestVector4 back, TestVector4 source, float amount, TestVector4 expected) + { + Vector256 back256 = Vector256.Create(back.X, back.Y, back.Z, back.W, back.X, back.Y, back.Z, back.W); + Vector256 source256 = Vector256.Create(source.X, source.Y, source.Z, source.W, source.X, source.Y, source.Z, source.W); + + Vector256 expected256 = Vector256.Create(expected.X, expected.Y, expected.Z, expected.W, expected.X, expected.Y, expected.Z, expected.W); + Vector256 actual = PorterDuffFunctions.SubtractSrcOver(back256, source256, Vector256.Create(amount)); + Assert.Equal(expected256, actual, FloatComparer); + } + public static TheoryData ScreenFunctionData { get; } = new() { { new TestVector4(1, 1, 1, 1), new TestVector4(1, 1, 1, 1), 1, new TestVector4(1, 1, 1, 1) }, @@ -96,6 +134,18 @@ public class PorterDuffFunctionsTests VectorAssert.Equal(expected, actual, 5); } + [Theory] + [MemberData(nameof(ScreenFunctionData))] + public void ScreenFunction256(TestVector4 back, TestVector4 source, float amount, TestVector4 expected) + { + Vector256 back256 = Vector256.Create(back.X, back.Y, back.Z, back.W, back.X, back.Y, back.Z, back.W); + Vector256 source256 = Vector256.Create(source.X, source.Y, source.Z, source.W, source.X, source.Y, source.Z, source.W); + + Vector256 expected256 = Vector256.Create(expected.X, expected.Y, expected.Z, expected.W, expected.X, expected.Y, expected.Z, expected.W); + Vector256 actual = PorterDuffFunctions.ScreenSrcOver(back256, source256, Vector256.Create(amount)); + Assert.Equal(expected256, actual, FloatComparer); + } + public static TheoryData DarkenFunctionData { get; } = new() { { new TestVector4(1, 1, 1, 1), new TestVector4(1, 1, 1, 1), 1, new TestVector4(1, 1, 1, 1) }, @@ -111,6 +161,18 @@ public class PorterDuffFunctionsTests VectorAssert.Equal(expected, actual, 5); } + [Theory] + [MemberData(nameof(DarkenFunctionData))] + public void DarkenFunction256(TestVector4 back, TestVector4 source, float amount, TestVector4 expected) + { + Vector256 back256 = Vector256.Create(back.X, back.Y, back.Z, back.W, back.X, back.Y, back.Z, back.W); + Vector256 source256 = Vector256.Create(source.X, source.Y, source.Z, source.W, source.X, source.Y, source.Z, source.W); + + Vector256 expected256 = Vector256.Create(expected.X, expected.Y, expected.Z, expected.W, expected.X, expected.Y, expected.Z, expected.W); + Vector256 actual = PorterDuffFunctions.DarkenSrcOver(back256, source256, Vector256.Create(amount)); + Assert.Equal(expected256, actual, FloatComparer); + } + public static TheoryData LightenFunctionData { get; } = new() { { new TestVector4(1, 1, 1, 1), new TestVector4(1, 1, 1, 1), 1, new TestVector4(1, 1, 1, 1) }, @@ -126,6 +188,18 @@ public class PorterDuffFunctionsTests VectorAssert.Equal(expected, actual, 5); } + [Theory] + [MemberData(nameof(LightenFunctionData))] + public void LightenFunction256(TestVector4 back, TestVector4 source, float amount, TestVector4 expected) + { + Vector256 back256 = Vector256.Create(back.X, back.Y, back.Z, back.W, back.X, back.Y, back.Z, back.W); + Vector256 source256 = Vector256.Create(source.X, source.Y, source.Z, source.W, source.X, source.Y, source.Z, source.W); + + Vector256 expected256 = Vector256.Create(expected.X, expected.Y, expected.Z, expected.W, expected.X, expected.Y, expected.Z, expected.W); + Vector256 actual = PorterDuffFunctions.LightenSrcOver(back256, source256, Vector256.Create(amount)); + Assert.Equal(expected256, actual, FloatComparer); + } + public static TheoryData OverlayFunctionData { get; } = new() { { new TestVector4(1, 1, 1, 1), new TestVector4(1, 1, 1, 1), 1, new TestVector4(1, 1, 1, 1) }, @@ -141,6 +215,18 @@ public class PorterDuffFunctionsTests VectorAssert.Equal(expected, actual, 5); } + [Theory] + [MemberData(nameof(OverlayFunctionData))] + public void OverlayFunction256(TestVector4 back, TestVector4 source, float amount, TestVector4 expected) + { + Vector256 back256 = Vector256.Create(back.X, back.Y, back.Z, back.W, back.X, back.Y, back.Z, back.W); + Vector256 source256 = Vector256.Create(source.X, source.Y, source.Z, source.W, source.X, source.Y, source.Z, source.W); + + Vector256 expected256 = Vector256.Create(expected.X, expected.Y, expected.Z, expected.W, expected.X, expected.Y, expected.Z, expected.W); + Vector256 actual = PorterDuffFunctions.OverlaySrcOver(back256, source256, Vector256.Create(amount)); + Assert.Equal(expected256, actual, FloatComparer); + } + public static TheoryData HardLightFunctionData { get; } = new() { { new TestVector4(1, 1, 1, 1), new TestVector4(1, 1, 1, 1), 1, new TestVector4(1, 1, 1, 1) }, @@ -155,4 +241,16 @@ public class PorterDuffFunctionsTests Vector4 actual = PorterDuffFunctions.HardLightSrcOver((Vector4)back, source, amount); VectorAssert.Equal(expected, actual, 5); } + + [Theory] + [MemberData(nameof(HardLightFunctionData))] + public void HardLightFunction256(TestVector4 back, TestVector4 source, float amount, TestVector4 expected) + { + Vector256 back256 = Vector256.Create(back.X, back.Y, back.Z, back.W, back.X, back.Y, back.Z, back.W); + Vector256 source256 = Vector256.Create(source.X, source.Y, source.Z, source.W, source.X, source.Y, source.Z, source.W); + + Vector256 expected256 = Vector256.Create(expected.X, expected.Y, expected.Z, expected.W, expected.X, expected.Y, expected.Z, expected.W); + Vector256 actual = PorterDuffFunctions.HardLightSrcOver(back256, source256, Vector256.Create(amount)); + Assert.Equal(expected256, actual, FloatComparer); + } } diff --git a/tests/ImageSharp.Tests/TestUtilities/ApproximateFloatComparer.cs b/tests/ImageSharp.Tests/TestUtilities/ApproximateFloatComparer.cs index 6d9652d898..e35f36feec 100644 --- a/tests/ImageSharp.Tests/TestUtilities/ApproximateFloatComparer.cs +++ b/tests/ImageSharp.Tests/TestUtilities/ApproximateFloatComparer.cs @@ -1,7 +1,9 @@ // Copyright (c) Six Labors. // Licensed under the Six Labors Split License. +using System.Diagnostics.CodeAnalysis; using System.Numerics; +using System.Runtime.Intrinsics; using SixLabors.ImageSharp.PixelFormats; namespace SixLabors.ImageSharp.Tests; @@ -14,7 +16,8 @@ internal readonly struct ApproximateFloatComparer : IEqualityComparer, IEqualityComparer, IEqualityComparer, - IEqualityComparer + IEqualityComparer, + IEqualityComparer> { private readonly float epsilon; @@ -72,4 +75,16 @@ internal readonly struct ApproximateFloatComparer : /// public int GetHashCode(ColorMatrix obj) => obj.GetHashCode(); + + public bool Equals(Vector256 x, Vector256 y) + => this.Equals(x.GetElement(0), y.GetElement(0)) + && this.Equals(x.GetElement(1), y.GetElement(1)) + && this.Equals(x.GetElement(2), y.GetElement(2)) + && this.Equals(x.GetElement(3), y.GetElement(3)) + && this.Equals(x.GetElement(4), y.GetElement(4)) + && this.Equals(x.GetElement(5), y.GetElement(5)) + && this.Equals(x.GetElement(6), y.GetElement(6)) + && this.Equals(x.GetElement(7), y.GetElement(7)); + + public int GetHashCode([DisallowNull] Vector256 obj) => obj.GetHashCode(); } From dff381fd554227858a534c49c8c5ea6d0534403e Mon Sep 17 00:00:00 2001 From: James Jackson-South Date: Sun, 19 Feb 2023 21:08:19 +1000 Subject: [PATCH 15/35] Fix amount span assignment --- .../DefaultPixelBlenders.Generated.cs | 648 +++++++++++++++--- .../DefaultPixelBlenders.Generated.tt | 6 +- 2 files changed, 545 insertions(+), 109 deletions(-) diff --git a/src/ImageSharp/PixelFormats/PixelBlenders/DefaultPixelBlenders.Generated.cs b/src/ImageSharp/PixelFormats/PixelBlenders/DefaultPixelBlenders.Generated.cs index c2d97efa0a..f28fba25ca 100644 --- a/src/ImageSharp/PixelFormats/PixelBlenders/DefaultPixelBlenders.Generated.cs +++ b/src/ImageSharp/PixelFormats/PixelBlenders/DefaultPixelBlenders.Generated.cs @@ -98,7 +98,11 @@ internal static class DefaultPixelBlenders while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) { // TODO: It would be better if we can clamp this outside of the loop using our SIMD methods. - Vector256 opacity = Vector256.Create(Numerics.Clamp(amountBase, 0, 1F)); + // We need to create a Vector256 containing the current and next amount values + // taking up each half of the Vector256 and then clamp them. + Vector256 opacity = Vector256.Create( + Vector128.Create(Numerics.Clamp(amountBase, 0, 1F)), + Vector128.Create(Numerics.Clamp(Unsafe.Add(ref amountBase, 1), 0, 1F))); destinationBase = PorterDuffFunctions.NormalSrc(backgroundBase, sourceBase, opacity); destinationBase = ref Unsafe.Add(ref destinationBase, 1); @@ -197,7 +201,11 @@ internal static class DefaultPixelBlenders while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) { // TODO: It would be better if we can clamp this outside of the loop using our SIMD methods. - Vector256 opacity = Vector256.Create(Numerics.Clamp(amountBase, 0, 1F)); + // We need to create a Vector256 containing the current and next amount values + // taking up each half of the Vector256 and then clamp them. + Vector256 opacity = Vector256.Create( + Vector128.Create(Numerics.Clamp(amountBase, 0, 1F)), + Vector128.Create(Numerics.Clamp(Unsafe.Add(ref amountBase, 1), 0, 1F))); destinationBase = PorterDuffFunctions.MultiplySrc(backgroundBase, sourceBase, opacity); destinationBase = ref Unsafe.Add(ref destinationBase, 1); @@ -296,7 +304,11 @@ internal static class DefaultPixelBlenders while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) { // TODO: It would be better if we can clamp this outside of the loop using our SIMD methods. - Vector256 opacity = Vector256.Create(Numerics.Clamp(amountBase, 0, 1F)); + // We need to create a Vector256 containing the current and next amount values + // taking up each half of the Vector256 and then clamp them. + Vector256 opacity = Vector256.Create( + Vector128.Create(Numerics.Clamp(amountBase, 0, 1F)), + Vector128.Create(Numerics.Clamp(Unsafe.Add(ref amountBase, 1), 0, 1F))); destinationBase = PorterDuffFunctions.AddSrc(backgroundBase, sourceBase, opacity); destinationBase = ref Unsafe.Add(ref destinationBase, 1); @@ -395,7 +407,11 @@ internal static class DefaultPixelBlenders while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) { // TODO: It would be better if we can clamp this outside of the loop using our SIMD methods. - Vector256 opacity = Vector256.Create(Numerics.Clamp(amountBase, 0, 1F)); + // We need to create a Vector256 containing the current and next amount values + // taking up each half of the Vector256 and then clamp them. + Vector256 opacity = Vector256.Create( + Vector128.Create(Numerics.Clamp(amountBase, 0, 1F)), + Vector128.Create(Numerics.Clamp(Unsafe.Add(ref amountBase, 1), 0, 1F))); destinationBase = PorterDuffFunctions.SubtractSrc(backgroundBase, sourceBase, opacity); destinationBase = ref Unsafe.Add(ref destinationBase, 1); @@ -494,7 +510,11 @@ internal static class DefaultPixelBlenders while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) { // TODO: It would be better if we can clamp this outside of the loop using our SIMD methods. - Vector256 opacity = Vector256.Create(Numerics.Clamp(amountBase, 0, 1F)); + // We need to create a Vector256 containing the current and next amount values + // taking up each half of the Vector256 and then clamp them. + Vector256 opacity = Vector256.Create( + Vector128.Create(Numerics.Clamp(amountBase, 0, 1F)), + Vector128.Create(Numerics.Clamp(Unsafe.Add(ref amountBase, 1), 0, 1F))); destinationBase = PorterDuffFunctions.ScreenSrc(backgroundBase, sourceBase, opacity); destinationBase = ref Unsafe.Add(ref destinationBase, 1); @@ -593,7 +613,11 @@ internal static class DefaultPixelBlenders while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) { // TODO: It would be better if we can clamp this outside of the loop using our SIMD methods. - Vector256 opacity = Vector256.Create(Numerics.Clamp(amountBase, 0, 1F)); + // We need to create a Vector256 containing the current and next amount values + // taking up each half of the Vector256 and then clamp them. + Vector256 opacity = Vector256.Create( + Vector128.Create(Numerics.Clamp(amountBase, 0, 1F)), + Vector128.Create(Numerics.Clamp(Unsafe.Add(ref amountBase, 1), 0, 1F))); destinationBase = PorterDuffFunctions.DarkenSrc(backgroundBase, sourceBase, opacity); destinationBase = ref Unsafe.Add(ref destinationBase, 1); @@ -692,7 +716,11 @@ internal static class DefaultPixelBlenders while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) { // TODO: It would be better if we can clamp this outside of the loop using our SIMD methods. - Vector256 opacity = Vector256.Create(Numerics.Clamp(amountBase, 0, 1F)); + // We need to create a Vector256 containing the current and next amount values + // taking up each half of the Vector256 and then clamp them. + Vector256 opacity = Vector256.Create( + Vector128.Create(Numerics.Clamp(amountBase, 0, 1F)), + Vector128.Create(Numerics.Clamp(Unsafe.Add(ref amountBase, 1), 0, 1F))); destinationBase = PorterDuffFunctions.LightenSrc(backgroundBase, sourceBase, opacity); destinationBase = ref Unsafe.Add(ref destinationBase, 1); @@ -791,7 +819,11 @@ internal static class DefaultPixelBlenders while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) { // TODO: It would be better if we can clamp this outside of the loop using our SIMD methods. - Vector256 opacity = Vector256.Create(Numerics.Clamp(amountBase, 0, 1F)); + // We need to create a Vector256 containing the current and next amount values + // taking up each half of the Vector256 and then clamp them. + Vector256 opacity = Vector256.Create( + Vector128.Create(Numerics.Clamp(amountBase, 0, 1F)), + Vector128.Create(Numerics.Clamp(Unsafe.Add(ref amountBase, 1), 0, 1F))); destinationBase = PorterDuffFunctions.OverlaySrc(backgroundBase, sourceBase, opacity); destinationBase = ref Unsafe.Add(ref destinationBase, 1); @@ -890,7 +922,11 @@ internal static class DefaultPixelBlenders while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) { // TODO: It would be better if we can clamp this outside of the loop using our SIMD methods. - Vector256 opacity = Vector256.Create(Numerics.Clamp(amountBase, 0, 1F)); + // We need to create a Vector256 containing the current and next amount values + // taking up each half of the Vector256 and then clamp them. + Vector256 opacity = Vector256.Create( + Vector128.Create(Numerics.Clamp(amountBase, 0, 1F)), + Vector128.Create(Numerics.Clamp(Unsafe.Add(ref amountBase, 1), 0, 1F))); destinationBase = PorterDuffFunctions.HardLightSrc(backgroundBase, sourceBase, opacity); destinationBase = ref Unsafe.Add(ref destinationBase, 1); @@ -989,7 +1025,11 @@ internal static class DefaultPixelBlenders while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) { // TODO: It would be better if we can clamp this outside of the loop using our SIMD methods. - Vector256 opacity = Vector256.Create(Numerics.Clamp(amountBase, 0, 1F)); + // We need to create a Vector256 containing the current and next amount values + // taking up each half of the Vector256 and then clamp them. + Vector256 opacity = Vector256.Create( + Vector128.Create(Numerics.Clamp(amountBase, 0, 1F)), + Vector128.Create(Numerics.Clamp(Unsafe.Add(ref amountBase, 1), 0, 1F))); destinationBase = PorterDuffFunctions.NormalSrcAtop(backgroundBase, sourceBase, opacity); destinationBase = ref Unsafe.Add(ref destinationBase, 1); @@ -1088,7 +1128,11 @@ internal static class DefaultPixelBlenders while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) { // TODO: It would be better if we can clamp this outside of the loop using our SIMD methods. - Vector256 opacity = Vector256.Create(Numerics.Clamp(amountBase, 0, 1F)); + // We need to create a Vector256 containing the current and next amount values + // taking up each half of the Vector256 and then clamp them. + Vector256 opacity = Vector256.Create( + Vector128.Create(Numerics.Clamp(amountBase, 0, 1F)), + Vector128.Create(Numerics.Clamp(Unsafe.Add(ref amountBase, 1), 0, 1F))); destinationBase = PorterDuffFunctions.MultiplySrcAtop(backgroundBase, sourceBase, opacity); destinationBase = ref Unsafe.Add(ref destinationBase, 1); @@ -1187,7 +1231,11 @@ internal static class DefaultPixelBlenders while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) { // TODO: It would be better if we can clamp this outside of the loop using our SIMD methods. - Vector256 opacity = Vector256.Create(Numerics.Clamp(amountBase, 0, 1F)); + // We need to create a Vector256 containing the current and next amount values + // taking up each half of the Vector256 and then clamp them. + Vector256 opacity = Vector256.Create( + Vector128.Create(Numerics.Clamp(amountBase, 0, 1F)), + Vector128.Create(Numerics.Clamp(Unsafe.Add(ref amountBase, 1), 0, 1F))); destinationBase = PorterDuffFunctions.AddSrcAtop(backgroundBase, sourceBase, opacity); destinationBase = ref Unsafe.Add(ref destinationBase, 1); @@ -1286,7 +1334,11 @@ internal static class DefaultPixelBlenders while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) { // TODO: It would be better if we can clamp this outside of the loop using our SIMD methods. - Vector256 opacity = Vector256.Create(Numerics.Clamp(amountBase, 0, 1F)); + // We need to create a Vector256 containing the current and next amount values + // taking up each half of the Vector256 and then clamp them. + Vector256 opacity = Vector256.Create( + Vector128.Create(Numerics.Clamp(amountBase, 0, 1F)), + Vector128.Create(Numerics.Clamp(Unsafe.Add(ref amountBase, 1), 0, 1F))); destinationBase = PorterDuffFunctions.SubtractSrcAtop(backgroundBase, sourceBase, opacity); destinationBase = ref Unsafe.Add(ref destinationBase, 1); @@ -1385,7 +1437,11 @@ internal static class DefaultPixelBlenders while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) { // TODO: It would be better if we can clamp this outside of the loop using our SIMD methods. - Vector256 opacity = Vector256.Create(Numerics.Clamp(amountBase, 0, 1F)); + // We need to create a Vector256 containing the current and next amount values + // taking up each half of the Vector256 and then clamp them. + Vector256 opacity = Vector256.Create( + Vector128.Create(Numerics.Clamp(amountBase, 0, 1F)), + Vector128.Create(Numerics.Clamp(Unsafe.Add(ref amountBase, 1), 0, 1F))); destinationBase = PorterDuffFunctions.ScreenSrcAtop(backgroundBase, sourceBase, opacity); destinationBase = ref Unsafe.Add(ref destinationBase, 1); @@ -1484,7 +1540,11 @@ internal static class DefaultPixelBlenders while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) { // TODO: It would be better if we can clamp this outside of the loop using our SIMD methods. - Vector256 opacity = Vector256.Create(Numerics.Clamp(amountBase, 0, 1F)); + // We need to create a Vector256 containing the current and next amount values + // taking up each half of the Vector256 and then clamp them. + Vector256 opacity = Vector256.Create( + Vector128.Create(Numerics.Clamp(amountBase, 0, 1F)), + Vector128.Create(Numerics.Clamp(Unsafe.Add(ref amountBase, 1), 0, 1F))); destinationBase = PorterDuffFunctions.DarkenSrcAtop(backgroundBase, sourceBase, opacity); destinationBase = ref Unsafe.Add(ref destinationBase, 1); @@ -1583,7 +1643,11 @@ internal static class DefaultPixelBlenders while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) { // TODO: It would be better if we can clamp this outside of the loop using our SIMD methods. - Vector256 opacity = Vector256.Create(Numerics.Clamp(amountBase, 0, 1F)); + // We need to create a Vector256 containing the current and next amount values + // taking up each half of the Vector256 and then clamp them. + Vector256 opacity = Vector256.Create( + Vector128.Create(Numerics.Clamp(amountBase, 0, 1F)), + Vector128.Create(Numerics.Clamp(Unsafe.Add(ref amountBase, 1), 0, 1F))); destinationBase = PorterDuffFunctions.LightenSrcAtop(backgroundBase, sourceBase, opacity); destinationBase = ref Unsafe.Add(ref destinationBase, 1); @@ -1682,7 +1746,11 @@ internal static class DefaultPixelBlenders while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) { // TODO: It would be better if we can clamp this outside of the loop using our SIMD methods. - Vector256 opacity = Vector256.Create(Numerics.Clamp(amountBase, 0, 1F)); + // We need to create a Vector256 containing the current and next amount values + // taking up each half of the Vector256 and then clamp them. + Vector256 opacity = Vector256.Create( + Vector128.Create(Numerics.Clamp(amountBase, 0, 1F)), + Vector128.Create(Numerics.Clamp(Unsafe.Add(ref amountBase, 1), 0, 1F))); destinationBase = PorterDuffFunctions.OverlaySrcAtop(backgroundBase, sourceBase, opacity); destinationBase = ref Unsafe.Add(ref destinationBase, 1); @@ -1781,7 +1849,11 @@ internal static class DefaultPixelBlenders while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) { // TODO: It would be better if we can clamp this outside of the loop using our SIMD methods. - Vector256 opacity = Vector256.Create(Numerics.Clamp(amountBase, 0, 1F)); + // We need to create a Vector256 containing the current and next amount values + // taking up each half of the Vector256 and then clamp them. + Vector256 opacity = Vector256.Create( + Vector128.Create(Numerics.Clamp(amountBase, 0, 1F)), + Vector128.Create(Numerics.Clamp(Unsafe.Add(ref amountBase, 1), 0, 1F))); destinationBase = PorterDuffFunctions.HardLightSrcAtop(backgroundBase, sourceBase, opacity); destinationBase = ref Unsafe.Add(ref destinationBase, 1); @@ -1880,7 +1952,11 @@ internal static class DefaultPixelBlenders while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) { // TODO: It would be better if we can clamp this outside of the loop using our SIMD methods. - Vector256 opacity = Vector256.Create(Numerics.Clamp(amountBase, 0, 1F)); + // We need to create a Vector256 containing the current and next amount values + // taking up each half of the Vector256 and then clamp them. + Vector256 opacity = Vector256.Create( + Vector128.Create(Numerics.Clamp(amountBase, 0, 1F)), + Vector128.Create(Numerics.Clamp(Unsafe.Add(ref amountBase, 1), 0, 1F))); destinationBase = PorterDuffFunctions.NormalSrcOver(backgroundBase, sourceBase, opacity); destinationBase = ref Unsafe.Add(ref destinationBase, 1); @@ -1979,7 +2055,11 @@ internal static class DefaultPixelBlenders while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) { // TODO: It would be better if we can clamp this outside of the loop using our SIMD methods. - Vector256 opacity = Vector256.Create(Numerics.Clamp(amountBase, 0, 1F)); + // We need to create a Vector256 containing the current and next amount values + // taking up each half of the Vector256 and then clamp them. + Vector256 opacity = Vector256.Create( + Vector128.Create(Numerics.Clamp(amountBase, 0, 1F)), + Vector128.Create(Numerics.Clamp(Unsafe.Add(ref amountBase, 1), 0, 1F))); destinationBase = PorterDuffFunctions.MultiplySrcOver(backgroundBase, sourceBase, opacity); destinationBase = ref Unsafe.Add(ref destinationBase, 1); @@ -2078,7 +2158,11 @@ internal static class DefaultPixelBlenders while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) { // TODO: It would be better if we can clamp this outside of the loop using our SIMD methods. - Vector256 opacity = Vector256.Create(Numerics.Clamp(amountBase, 0, 1F)); + // We need to create a Vector256 containing the current and next amount values + // taking up each half of the Vector256 and then clamp them. + Vector256 opacity = Vector256.Create( + Vector128.Create(Numerics.Clamp(amountBase, 0, 1F)), + Vector128.Create(Numerics.Clamp(Unsafe.Add(ref amountBase, 1), 0, 1F))); destinationBase = PorterDuffFunctions.AddSrcOver(backgroundBase, sourceBase, opacity); destinationBase = ref Unsafe.Add(ref destinationBase, 1); @@ -2177,7 +2261,11 @@ internal static class DefaultPixelBlenders while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) { // TODO: It would be better if we can clamp this outside of the loop using our SIMD methods. - Vector256 opacity = Vector256.Create(Numerics.Clamp(amountBase, 0, 1F)); + // We need to create a Vector256 containing the current and next amount values + // taking up each half of the Vector256 and then clamp them. + Vector256 opacity = Vector256.Create( + Vector128.Create(Numerics.Clamp(amountBase, 0, 1F)), + Vector128.Create(Numerics.Clamp(Unsafe.Add(ref amountBase, 1), 0, 1F))); destinationBase = PorterDuffFunctions.SubtractSrcOver(backgroundBase, sourceBase, opacity); destinationBase = ref Unsafe.Add(ref destinationBase, 1); @@ -2276,7 +2364,11 @@ internal static class DefaultPixelBlenders while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) { // TODO: It would be better if we can clamp this outside of the loop using our SIMD methods. - Vector256 opacity = Vector256.Create(Numerics.Clamp(amountBase, 0, 1F)); + // We need to create a Vector256 containing the current and next amount values + // taking up each half of the Vector256 and then clamp them. + Vector256 opacity = Vector256.Create( + Vector128.Create(Numerics.Clamp(amountBase, 0, 1F)), + Vector128.Create(Numerics.Clamp(Unsafe.Add(ref amountBase, 1), 0, 1F))); destinationBase = PorterDuffFunctions.ScreenSrcOver(backgroundBase, sourceBase, opacity); destinationBase = ref Unsafe.Add(ref destinationBase, 1); @@ -2375,7 +2467,11 @@ internal static class DefaultPixelBlenders while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) { // TODO: It would be better if we can clamp this outside of the loop using our SIMD methods. - Vector256 opacity = Vector256.Create(Numerics.Clamp(amountBase, 0, 1F)); + // We need to create a Vector256 containing the current and next amount values + // taking up each half of the Vector256 and then clamp them. + Vector256 opacity = Vector256.Create( + Vector128.Create(Numerics.Clamp(amountBase, 0, 1F)), + Vector128.Create(Numerics.Clamp(Unsafe.Add(ref amountBase, 1), 0, 1F))); destinationBase = PorterDuffFunctions.DarkenSrcOver(backgroundBase, sourceBase, opacity); destinationBase = ref Unsafe.Add(ref destinationBase, 1); @@ -2474,7 +2570,11 @@ internal static class DefaultPixelBlenders while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) { // TODO: It would be better if we can clamp this outside of the loop using our SIMD methods. - Vector256 opacity = Vector256.Create(Numerics.Clamp(amountBase, 0, 1F)); + // We need to create a Vector256 containing the current and next amount values + // taking up each half of the Vector256 and then clamp them. + Vector256 opacity = Vector256.Create( + Vector128.Create(Numerics.Clamp(amountBase, 0, 1F)), + Vector128.Create(Numerics.Clamp(Unsafe.Add(ref amountBase, 1), 0, 1F))); destinationBase = PorterDuffFunctions.LightenSrcOver(backgroundBase, sourceBase, opacity); destinationBase = ref Unsafe.Add(ref destinationBase, 1); @@ -2573,7 +2673,11 @@ internal static class DefaultPixelBlenders while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) { // TODO: It would be better if we can clamp this outside of the loop using our SIMD methods. - Vector256 opacity = Vector256.Create(Numerics.Clamp(amountBase, 0, 1F)); + // We need to create a Vector256 containing the current and next amount values + // taking up each half of the Vector256 and then clamp them. + Vector256 opacity = Vector256.Create( + Vector128.Create(Numerics.Clamp(amountBase, 0, 1F)), + Vector128.Create(Numerics.Clamp(Unsafe.Add(ref amountBase, 1), 0, 1F))); destinationBase = PorterDuffFunctions.OverlaySrcOver(backgroundBase, sourceBase, opacity); destinationBase = ref Unsafe.Add(ref destinationBase, 1); @@ -2672,7 +2776,11 @@ internal static class DefaultPixelBlenders while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) { // TODO: It would be better if we can clamp this outside of the loop using our SIMD methods. - Vector256 opacity = Vector256.Create(Numerics.Clamp(amountBase, 0, 1F)); + // We need to create a Vector256 containing the current and next amount values + // taking up each half of the Vector256 and then clamp them. + Vector256 opacity = Vector256.Create( + Vector128.Create(Numerics.Clamp(amountBase, 0, 1F)), + Vector128.Create(Numerics.Clamp(Unsafe.Add(ref amountBase, 1), 0, 1F))); destinationBase = PorterDuffFunctions.HardLightSrcOver(backgroundBase, sourceBase, opacity); destinationBase = ref Unsafe.Add(ref destinationBase, 1); @@ -2771,7 +2879,11 @@ internal static class DefaultPixelBlenders while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) { // TODO: It would be better if we can clamp this outside of the loop using our SIMD methods. - Vector256 opacity = Vector256.Create(Numerics.Clamp(amountBase, 0, 1F)); + // We need to create a Vector256 containing the current and next amount values + // taking up each half of the Vector256 and then clamp them. + Vector256 opacity = Vector256.Create( + Vector128.Create(Numerics.Clamp(amountBase, 0, 1F)), + Vector128.Create(Numerics.Clamp(Unsafe.Add(ref amountBase, 1), 0, 1F))); destinationBase = PorterDuffFunctions.NormalSrcIn(backgroundBase, sourceBase, opacity); destinationBase = ref Unsafe.Add(ref destinationBase, 1); @@ -2870,7 +2982,11 @@ internal static class DefaultPixelBlenders while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) { // TODO: It would be better if we can clamp this outside of the loop using our SIMD methods. - Vector256 opacity = Vector256.Create(Numerics.Clamp(amountBase, 0, 1F)); + // We need to create a Vector256 containing the current and next amount values + // taking up each half of the Vector256 and then clamp them. + Vector256 opacity = Vector256.Create( + Vector128.Create(Numerics.Clamp(amountBase, 0, 1F)), + Vector128.Create(Numerics.Clamp(Unsafe.Add(ref amountBase, 1), 0, 1F))); destinationBase = PorterDuffFunctions.MultiplySrcIn(backgroundBase, sourceBase, opacity); destinationBase = ref Unsafe.Add(ref destinationBase, 1); @@ -2969,7 +3085,11 @@ internal static class DefaultPixelBlenders while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) { // TODO: It would be better if we can clamp this outside of the loop using our SIMD methods. - Vector256 opacity = Vector256.Create(Numerics.Clamp(amountBase, 0, 1F)); + // We need to create a Vector256 containing the current and next amount values + // taking up each half of the Vector256 and then clamp them. + Vector256 opacity = Vector256.Create( + Vector128.Create(Numerics.Clamp(amountBase, 0, 1F)), + Vector128.Create(Numerics.Clamp(Unsafe.Add(ref amountBase, 1), 0, 1F))); destinationBase = PorterDuffFunctions.AddSrcIn(backgroundBase, sourceBase, opacity); destinationBase = ref Unsafe.Add(ref destinationBase, 1); @@ -3068,7 +3188,11 @@ internal static class DefaultPixelBlenders while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) { // TODO: It would be better if we can clamp this outside of the loop using our SIMD methods. - Vector256 opacity = Vector256.Create(Numerics.Clamp(amountBase, 0, 1F)); + // We need to create a Vector256 containing the current and next amount values + // taking up each half of the Vector256 and then clamp them. + Vector256 opacity = Vector256.Create( + Vector128.Create(Numerics.Clamp(amountBase, 0, 1F)), + Vector128.Create(Numerics.Clamp(Unsafe.Add(ref amountBase, 1), 0, 1F))); destinationBase = PorterDuffFunctions.SubtractSrcIn(backgroundBase, sourceBase, opacity); destinationBase = ref Unsafe.Add(ref destinationBase, 1); @@ -3167,7 +3291,11 @@ internal static class DefaultPixelBlenders while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) { // TODO: It would be better if we can clamp this outside of the loop using our SIMD methods. - Vector256 opacity = Vector256.Create(Numerics.Clamp(amountBase, 0, 1F)); + // We need to create a Vector256 containing the current and next amount values + // taking up each half of the Vector256 and then clamp them. + Vector256 opacity = Vector256.Create( + Vector128.Create(Numerics.Clamp(amountBase, 0, 1F)), + Vector128.Create(Numerics.Clamp(Unsafe.Add(ref amountBase, 1), 0, 1F))); destinationBase = PorterDuffFunctions.ScreenSrcIn(backgroundBase, sourceBase, opacity); destinationBase = ref Unsafe.Add(ref destinationBase, 1); @@ -3266,7 +3394,11 @@ internal static class DefaultPixelBlenders while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) { // TODO: It would be better if we can clamp this outside of the loop using our SIMD methods. - Vector256 opacity = Vector256.Create(Numerics.Clamp(amountBase, 0, 1F)); + // We need to create a Vector256 containing the current and next amount values + // taking up each half of the Vector256 and then clamp them. + Vector256 opacity = Vector256.Create( + Vector128.Create(Numerics.Clamp(amountBase, 0, 1F)), + Vector128.Create(Numerics.Clamp(Unsafe.Add(ref amountBase, 1), 0, 1F))); destinationBase = PorterDuffFunctions.DarkenSrcIn(backgroundBase, sourceBase, opacity); destinationBase = ref Unsafe.Add(ref destinationBase, 1); @@ -3365,7 +3497,11 @@ internal static class DefaultPixelBlenders while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) { // TODO: It would be better if we can clamp this outside of the loop using our SIMD methods. - Vector256 opacity = Vector256.Create(Numerics.Clamp(amountBase, 0, 1F)); + // We need to create a Vector256 containing the current and next amount values + // taking up each half of the Vector256 and then clamp them. + Vector256 opacity = Vector256.Create( + Vector128.Create(Numerics.Clamp(amountBase, 0, 1F)), + Vector128.Create(Numerics.Clamp(Unsafe.Add(ref amountBase, 1), 0, 1F))); destinationBase = PorterDuffFunctions.LightenSrcIn(backgroundBase, sourceBase, opacity); destinationBase = ref Unsafe.Add(ref destinationBase, 1); @@ -3464,7 +3600,11 @@ internal static class DefaultPixelBlenders while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) { // TODO: It would be better if we can clamp this outside of the loop using our SIMD methods. - Vector256 opacity = Vector256.Create(Numerics.Clamp(amountBase, 0, 1F)); + // We need to create a Vector256 containing the current and next amount values + // taking up each half of the Vector256 and then clamp them. + Vector256 opacity = Vector256.Create( + Vector128.Create(Numerics.Clamp(amountBase, 0, 1F)), + Vector128.Create(Numerics.Clamp(Unsafe.Add(ref amountBase, 1), 0, 1F))); destinationBase = PorterDuffFunctions.OverlaySrcIn(backgroundBase, sourceBase, opacity); destinationBase = ref Unsafe.Add(ref destinationBase, 1); @@ -3563,7 +3703,11 @@ internal static class DefaultPixelBlenders while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) { // TODO: It would be better if we can clamp this outside of the loop using our SIMD methods. - Vector256 opacity = Vector256.Create(Numerics.Clamp(amountBase, 0, 1F)); + // We need to create a Vector256 containing the current and next amount values + // taking up each half of the Vector256 and then clamp them. + Vector256 opacity = Vector256.Create( + Vector128.Create(Numerics.Clamp(amountBase, 0, 1F)), + Vector128.Create(Numerics.Clamp(Unsafe.Add(ref amountBase, 1), 0, 1F))); destinationBase = PorterDuffFunctions.HardLightSrcIn(backgroundBase, sourceBase, opacity); destinationBase = ref Unsafe.Add(ref destinationBase, 1); @@ -3662,7 +3806,11 @@ internal static class DefaultPixelBlenders while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) { // TODO: It would be better if we can clamp this outside of the loop using our SIMD methods. - Vector256 opacity = Vector256.Create(Numerics.Clamp(amountBase, 0, 1F)); + // We need to create a Vector256 containing the current and next amount values + // taking up each half of the Vector256 and then clamp them. + Vector256 opacity = Vector256.Create( + Vector128.Create(Numerics.Clamp(amountBase, 0, 1F)), + Vector128.Create(Numerics.Clamp(Unsafe.Add(ref amountBase, 1), 0, 1F))); destinationBase = PorterDuffFunctions.NormalSrcOut(backgroundBase, sourceBase, opacity); destinationBase = ref Unsafe.Add(ref destinationBase, 1); @@ -3761,7 +3909,11 @@ internal static class DefaultPixelBlenders while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) { // TODO: It would be better if we can clamp this outside of the loop using our SIMD methods. - Vector256 opacity = Vector256.Create(Numerics.Clamp(amountBase, 0, 1F)); + // We need to create a Vector256 containing the current and next amount values + // taking up each half of the Vector256 and then clamp them. + Vector256 opacity = Vector256.Create( + Vector128.Create(Numerics.Clamp(amountBase, 0, 1F)), + Vector128.Create(Numerics.Clamp(Unsafe.Add(ref amountBase, 1), 0, 1F))); destinationBase = PorterDuffFunctions.MultiplySrcOut(backgroundBase, sourceBase, opacity); destinationBase = ref Unsafe.Add(ref destinationBase, 1); @@ -3860,7 +4012,11 @@ internal static class DefaultPixelBlenders while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) { // TODO: It would be better if we can clamp this outside of the loop using our SIMD methods. - Vector256 opacity = Vector256.Create(Numerics.Clamp(amountBase, 0, 1F)); + // We need to create a Vector256 containing the current and next amount values + // taking up each half of the Vector256 and then clamp them. + Vector256 opacity = Vector256.Create( + Vector128.Create(Numerics.Clamp(amountBase, 0, 1F)), + Vector128.Create(Numerics.Clamp(Unsafe.Add(ref amountBase, 1), 0, 1F))); destinationBase = PorterDuffFunctions.AddSrcOut(backgroundBase, sourceBase, opacity); destinationBase = ref Unsafe.Add(ref destinationBase, 1); @@ -3959,7 +4115,11 @@ internal static class DefaultPixelBlenders while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) { // TODO: It would be better if we can clamp this outside of the loop using our SIMD methods. - Vector256 opacity = Vector256.Create(Numerics.Clamp(amountBase, 0, 1F)); + // We need to create a Vector256 containing the current and next amount values + // taking up each half of the Vector256 and then clamp them. + Vector256 opacity = Vector256.Create( + Vector128.Create(Numerics.Clamp(amountBase, 0, 1F)), + Vector128.Create(Numerics.Clamp(Unsafe.Add(ref amountBase, 1), 0, 1F))); destinationBase = PorterDuffFunctions.SubtractSrcOut(backgroundBase, sourceBase, opacity); destinationBase = ref Unsafe.Add(ref destinationBase, 1); @@ -4058,7 +4218,11 @@ internal static class DefaultPixelBlenders while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) { // TODO: It would be better if we can clamp this outside of the loop using our SIMD methods. - Vector256 opacity = Vector256.Create(Numerics.Clamp(amountBase, 0, 1F)); + // We need to create a Vector256 containing the current and next amount values + // taking up each half of the Vector256 and then clamp them. + Vector256 opacity = Vector256.Create( + Vector128.Create(Numerics.Clamp(amountBase, 0, 1F)), + Vector128.Create(Numerics.Clamp(Unsafe.Add(ref amountBase, 1), 0, 1F))); destinationBase = PorterDuffFunctions.ScreenSrcOut(backgroundBase, sourceBase, opacity); destinationBase = ref Unsafe.Add(ref destinationBase, 1); @@ -4157,7 +4321,11 @@ internal static class DefaultPixelBlenders while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) { // TODO: It would be better if we can clamp this outside of the loop using our SIMD methods. - Vector256 opacity = Vector256.Create(Numerics.Clamp(amountBase, 0, 1F)); + // We need to create a Vector256 containing the current and next amount values + // taking up each half of the Vector256 and then clamp them. + Vector256 opacity = Vector256.Create( + Vector128.Create(Numerics.Clamp(amountBase, 0, 1F)), + Vector128.Create(Numerics.Clamp(Unsafe.Add(ref amountBase, 1), 0, 1F))); destinationBase = PorterDuffFunctions.DarkenSrcOut(backgroundBase, sourceBase, opacity); destinationBase = ref Unsafe.Add(ref destinationBase, 1); @@ -4256,7 +4424,11 @@ internal static class DefaultPixelBlenders while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) { // TODO: It would be better if we can clamp this outside of the loop using our SIMD methods. - Vector256 opacity = Vector256.Create(Numerics.Clamp(amountBase, 0, 1F)); + // We need to create a Vector256 containing the current and next amount values + // taking up each half of the Vector256 and then clamp them. + Vector256 opacity = Vector256.Create( + Vector128.Create(Numerics.Clamp(amountBase, 0, 1F)), + Vector128.Create(Numerics.Clamp(Unsafe.Add(ref amountBase, 1), 0, 1F))); destinationBase = PorterDuffFunctions.LightenSrcOut(backgroundBase, sourceBase, opacity); destinationBase = ref Unsafe.Add(ref destinationBase, 1); @@ -4355,7 +4527,11 @@ internal static class DefaultPixelBlenders while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) { // TODO: It would be better if we can clamp this outside of the loop using our SIMD methods. - Vector256 opacity = Vector256.Create(Numerics.Clamp(amountBase, 0, 1F)); + // We need to create a Vector256 containing the current and next amount values + // taking up each half of the Vector256 and then clamp them. + Vector256 opacity = Vector256.Create( + Vector128.Create(Numerics.Clamp(amountBase, 0, 1F)), + Vector128.Create(Numerics.Clamp(Unsafe.Add(ref amountBase, 1), 0, 1F))); destinationBase = PorterDuffFunctions.OverlaySrcOut(backgroundBase, sourceBase, opacity); destinationBase = ref Unsafe.Add(ref destinationBase, 1); @@ -4454,7 +4630,11 @@ internal static class DefaultPixelBlenders while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) { // TODO: It would be better if we can clamp this outside of the loop using our SIMD methods. - Vector256 opacity = Vector256.Create(Numerics.Clamp(amountBase, 0, 1F)); + // We need to create a Vector256 containing the current and next amount values + // taking up each half of the Vector256 and then clamp them. + Vector256 opacity = Vector256.Create( + Vector128.Create(Numerics.Clamp(amountBase, 0, 1F)), + Vector128.Create(Numerics.Clamp(Unsafe.Add(ref amountBase, 1), 0, 1F))); destinationBase = PorterDuffFunctions.HardLightSrcOut(backgroundBase, sourceBase, opacity); destinationBase = ref Unsafe.Add(ref destinationBase, 1); @@ -4553,7 +4733,11 @@ internal static class DefaultPixelBlenders while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) { // TODO: It would be better if we can clamp this outside of the loop using our SIMD methods. - Vector256 opacity = Vector256.Create(Numerics.Clamp(amountBase, 0, 1F)); + // We need to create a Vector256 containing the current and next amount values + // taking up each half of the Vector256 and then clamp them. + Vector256 opacity = Vector256.Create( + Vector128.Create(Numerics.Clamp(amountBase, 0, 1F)), + Vector128.Create(Numerics.Clamp(Unsafe.Add(ref amountBase, 1), 0, 1F))); destinationBase = PorterDuffFunctions.NormalDest(backgroundBase, sourceBase, opacity); destinationBase = ref Unsafe.Add(ref destinationBase, 1); @@ -4652,7 +4836,11 @@ internal static class DefaultPixelBlenders while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) { // TODO: It would be better if we can clamp this outside of the loop using our SIMD methods. - Vector256 opacity = Vector256.Create(Numerics.Clamp(amountBase, 0, 1F)); + // We need to create a Vector256 containing the current and next amount values + // taking up each half of the Vector256 and then clamp them. + Vector256 opacity = Vector256.Create( + Vector128.Create(Numerics.Clamp(amountBase, 0, 1F)), + Vector128.Create(Numerics.Clamp(Unsafe.Add(ref amountBase, 1), 0, 1F))); destinationBase = PorterDuffFunctions.MultiplyDest(backgroundBase, sourceBase, opacity); destinationBase = ref Unsafe.Add(ref destinationBase, 1); @@ -4751,7 +4939,11 @@ internal static class DefaultPixelBlenders while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) { // TODO: It would be better if we can clamp this outside of the loop using our SIMD methods. - Vector256 opacity = Vector256.Create(Numerics.Clamp(amountBase, 0, 1F)); + // We need to create a Vector256 containing the current and next amount values + // taking up each half of the Vector256 and then clamp them. + Vector256 opacity = Vector256.Create( + Vector128.Create(Numerics.Clamp(amountBase, 0, 1F)), + Vector128.Create(Numerics.Clamp(Unsafe.Add(ref amountBase, 1), 0, 1F))); destinationBase = PorterDuffFunctions.AddDest(backgroundBase, sourceBase, opacity); destinationBase = ref Unsafe.Add(ref destinationBase, 1); @@ -4850,7 +5042,11 @@ internal static class DefaultPixelBlenders while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) { // TODO: It would be better if we can clamp this outside of the loop using our SIMD methods. - Vector256 opacity = Vector256.Create(Numerics.Clamp(amountBase, 0, 1F)); + // We need to create a Vector256 containing the current and next amount values + // taking up each half of the Vector256 and then clamp them. + Vector256 opacity = Vector256.Create( + Vector128.Create(Numerics.Clamp(amountBase, 0, 1F)), + Vector128.Create(Numerics.Clamp(Unsafe.Add(ref amountBase, 1), 0, 1F))); destinationBase = PorterDuffFunctions.SubtractDest(backgroundBase, sourceBase, opacity); destinationBase = ref Unsafe.Add(ref destinationBase, 1); @@ -4949,7 +5145,11 @@ internal static class DefaultPixelBlenders while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) { // TODO: It would be better if we can clamp this outside of the loop using our SIMD methods. - Vector256 opacity = Vector256.Create(Numerics.Clamp(amountBase, 0, 1F)); + // We need to create a Vector256 containing the current and next amount values + // taking up each half of the Vector256 and then clamp them. + Vector256 opacity = Vector256.Create( + Vector128.Create(Numerics.Clamp(amountBase, 0, 1F)), + Vector128.Create(Numerics.Clamp(Unsafe.Add(ref amountBase, 1), 0, 1F))); destinationBase = PorterDuffFunctions.ScreenDest(backgroundBase, sourceBase, opacity); destinationBase = ref Unsafe.Add(ref destinationBase, 1); @@ -5048,7 +5248,11 @@ internal static class DefaultPixelBlenders while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) { // TODO: It would be better if we can clamp this outside of the loop using our SIMD methods. - Vector256 opacity = Vector256.Create(Numerics.Clamp(amountBase, 0, 1F)); + // We need to create a Vector256 containing the current and next amount values + // taking up each half of the Vector256 and then clamp them. + Vector256 opacity = Vector256.Create( + Vector128.Create(Numerics.Clamp(amountBase, 0, 1F)), + Vector128.Create(Numerics.Clamp(Unsafe.Add(ref amountBase, 1), 0, 1F))); destinationBase = PorterDuffFunctions.DarkenDest(backgroundBase, sourceBase, opacity); destinationBase = ref Unsafe.Add(ref destinationBase, 1); @@ -5147,7 +5351,11 @@ internal static class DefaultPixelBlenders while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) { // TODO: It would be better if we can clamp this outside of the loop using our SIMD methods. - Vector256 opacity = Vector256.Create(Numerics.Clamp(amountBase, 0, 1F)); + // We need to create a Vector256 containing the current and next amount values + // taking up each half of the Vector256 and then clamp them. + Vector256 opacity = Vector256.Create( + Vector128.Create(Numerics.Clamp(amountBase, 0, 1F)), + Vector128.Create(Numerics.Clamp(Unsafe.Add(ref amountBase, 1), 0, 1F))); destinationBase = PorterDuffFunctions.LightenDest(backgroundBase, sourceBase, opacity); destinationBase = ref Unsafe.Add(ref destinationBase, 1); @@ -5246,7 +5454,11 @@ internal static class DefaultPixelBlenders while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) { // TODO: It would be better if we can clamp this outside of the loop using our SIMD methods. - Vector256 opacity = Vector256.Create(Numerics.Clamp(amountBase, 0, 1F)); + // We need to create a Vector256 containing the current and next amount values + // taking up each half of the Vector256 and then clamp them. + Vector256 opacity = Vector256.Create( + Vector128.Create(Numerics.Clamp(amountBase, 0, 1F)), + Vector128.Create(Numerics.Clamp(Unsafe.Add(ref amountBase, 1), 0, 1F))); destinationBase = PorterDuffFunctions.OverlayDest(backgroundBase, sourceBase, opacity); destinationBase = ref Unsafe.Add(ref destinationBase, 1); @@ -5345,7 +5557,11 @@ internal static class DefaultPixelBlenders while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) { // TODO: It would be better if we can clamp this outside of the loop using our SIMD methods. - Vector256 opacity = Vector256.Create(Numerics.Clamp(amountBase, 0, 1F)); + // We need to create a Vector256 containing the current and next amount values + // taking up each half of the Vector256 and then clamp them. + Vector256 opacity = Vector256.Create( + Vector128.Create(Numerics.Clamp(amountBase, 0, 1F)), + Vector128.Create(Numerics.Clamp(Unsafe.Add(ref amountBase, 1), 0, 1F))); destinationBase = PorterDuffFunctions.HardLightDest(backgroundBase, sourceBase, opacity); destinationBase = ref Unsafe.Add(ref destinationBase, 1); @@ -5444,7 +5660,11 @@ internal static class DefaultPixelBlenders while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) { // TODO: It would be better if we can clamp this outside of the loop using our SIMD methods. - Vector256 opacity = Vector256.Create(Numerics.Clamp(amountBase, 0, 1F)); + // We need to create a Vector256 containing the current and next amount values + // taking up each half of the Vector256 and then clamp them. + Vector256 opacity = Vector256.Create( + Vector128.Create(Numerics.Clamp(amountBase, 0, 1F)), + Vector128.Create(Numerics.Clamp(Unsafe.Add(ref amountBase, 1), 0, 1F))); destinationBase = PorterDuffFunctions.NormalDestAtop(backgroundBase, sourceBase, opacity); destinationBase = ref Unsafe.Add(ref destinationBase, 1); @@ -5543,7 +5763,11 @@ internal static class DefaultPixelBlenders while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) { // TODO: It would be better if we can clamp this outside of the loop using our SIMD methods. - Vector256 opacity = Vector256.Create(Numerics.Clamp(amountBase, 0, 1F)); + // We need to create a Vector256 containing the current and next amount values + // taking up each half of the Vector256 and then clamp them. + Vector256 opacity = Vector256.Create( + Vector128.Create(Numerics.Clamp(amountBase, 0, 1F)), + Vector128.Create(Numerics.Clamp(Unsafe.Add(ref amountBase, 1), 0, 1F))); destinationBase = PorterDuffFunctions.MultiplyDestAtop(backgroundBase, sourceBase, opacity); destinationBase = ref Unsafe.Add(ref destinationBase, 1); @@ -5642,7 +5866,11 @@ internal static class DefaultPixelBlenders while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) { // TODO: It would be better if we can clamp this outside of the loop using our SIMD methods. - Vector256 opacity = Vector256.Create(Numerics.Clamp(amountBase, 0, 1F)); + // We need to create a Vector256 containing the current and next amount values + // taking up each half of the Vector256 and then clamp them. + Vector256 opacity = Vector256.Create( + Vector128.Create(Numerics.Clamp(amountBase, 0, 1F)), + Vector128.Create(Numerics.Clamp(Unsafe.Add(ref amountBase, 1), 0, 1F))); destinationBase = PorterDuffFunctions.AddDestAtop(backgroundBase, sourceBase, opacity); destinationBase = ref Unsafe.Add(ref destinationBase, 1); @@ -5741,7 +5969,11 @@ internal static class DefaultPixelBlenders while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) { // TODO: It would be better if we can clamp this outside of the loop using our SIMD methods. - Vector256 opacity = Vector256.Create(Numerics.Clamp(amountBase, 0, 1F)); + // We need to create a Vector256 containing the current and next amount values + // taking up each half of the Vector256 and then clamp them. + Vector256 opacity = Vector256.Create( + Vector128.Create(Numerics.Clamp(amountBase, 0, 1F)), + Vector128.Create(Numerics.Clamp(Unsafe.Add(ref amountBase, 1), 0, 1F))); destinationBase = PorterDuffFunctions.SubtractDestAtop(backgroundBase, sourceBase, opacity); destinationBase = ref Unsafe.Add(ref destinationBase, 1); @@ -5840,7 +6072,11 @@ internal static class DefaultPixelBlenders while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) { // TODO: It would be better if we can clamp this outside of the loop using our SIMD methods. - Vector256 opacity = Vector256.Create(Numerics.Clamp(amountBase, 0, 1F)); + // We need to create a Vector256 containing the current and next amount values + // taking up each half of the Vector256 and then clamp them. + Vector256 opacity = Vector256.Create( + Vector128.Create(Numerics.Clamp(amountBase, 0, 1F)), + Vector128.Create(Numerics.Clamp(Unsafe.Add(ref amountBase, 1), 0, 1F))); destinationBase = PorterDuffFunctions.ScreenDestAtop(backgroundBase, sourceBase, opacity); destinationBase = ref Unsafe.Add(ref destinationBase, 1); @@ -5939,7 +6175,11 @@ internal static class DefaultPixelBlenders while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) { // TODO: It would be better if we can clamp this outside of the loop using our SIMD methods. - Vector256 opacity = Vector256.Create(Numerics.Clamp(amountBase, 0, 1F)); + // We need to create a Vector256 containing the current and next amount values + // taking up each half of the Vector256 and then clamp them. + Vector256 opacity = Vector256.Create( + Vector128.Create(Numerics.Clamp(amountBase, 0, 1F)), + Vector128.Create(Numerics.Clamp(Unsafe.Add(ref amountBase, 1), 0, 1F))); destinationBase = PorterDuffFunctions.DarkenDestAtop(backgroundBase, sourceBase, opacity); destinationBase = ref Unsafe.Add(ref destinationBase, 1); @@ -6038,7 +6278,11 @@ internal static class DefaultPixelBlenders while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) { // TODO: It would be better if we can clamp this outside of the loop using our SIMD methods. - Vector256 opacity = Vector256.Create(Numerics.Clamp(amountBase, 0, 1F)); + // We need to create a Vector256 containing the current and next amount values + // taking up each half of the Vector256 and then clamp them. + Vector256 opacity = Vector256.Create( + Vector128.Create(Numerics.Clamp(amountBase, 0, 1F)), + Vector128.Create(Numerics.Clamp(Unsafe.Add(ref amountBase, 1), 0, 1F))); destinationBase = PorterDuffFunctions.LightenDestAtop(backgroundBase, sourceBase, opacity); destinationBase = ref Unsafe.Add(ref destinationBase, 1); @@ -6137,7 +6381,11 @@ internal static class DefaultPixelBlenders while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) { // TODO: It would be better if we can clamp this outside of the loop using our SIMD methods. - Vector256 opacity = Vector256.Create(Numerics.Clamp(amountBase, 0, 1F)); + // We need to create a Vector256 containing the current and next amount values + // taking up each half of the Vector256 and then clamp them. + Vector256 opacity = Vector256.Create( + Vector128.Create(Numerics.Clamp(amountBase, 0, 1F)), + Vector128.Create(Numerics.Clamp(Unsafe.Add(ref amountBase, 1), 0, 1F))); destinationBase = PorterDuffFunctions.OverlayDestAtop(backgroundBase, sourceBase, opacity); destinationBase = ref Unsafe.Add(ref destinationBase, 1); @@ -6236,7 +6484,11 @@ internal static class DefaultPixelBlenders while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) { // TODO: It would be better if we can clamp this outside of the loop using our SIMD methods. - Vector256 opacity = Vector256.Create(Numerics.Clamp(amountBase, 0, 1F)); + // We need to create a Vector256 containing the current and next amount values + // taking up each half of the Vector256 and then clamp them. + Vector256 opacity = Vector256.Create( + Vector128.Create(Numerics.Clamp(amountBase, 0, 1F)), + Vector128.Create(Numerics.Clamp(Unsafe.Add(ref amountBase, 1), 0, 1F))); destinationBase = PorterDuffFunctions.HardLightDestAtop(backgroundBase, sourceBase, opacity); destinationBase = ref Unsafe.Add(ref destinationBase, 1); @@ -6335,7 +6587,11 @@ internal static class DefaultPixelBlenders while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) { // TODO: It would be better if we can clamp this outside of the loop using our SIMD methods. - Vector256 opacity = Vector256.Create(Numerics.Clamp(amountBase, 0, 1F)); + // We need to create a Vector256 containing the current and next amount values + // taking up each half of the Vector256 and then clamp them. + Vector256 opacity = Vector256.Create( + Vector128.Create(Numerics.Clamp(amountBase, 0, 1F)), + Vector128.Create(Numerics.Clamp(Unsafe.Add(ref amountBase, 1), 0, 1F))); destinationBase = PorterDuffFunctions.NormalDestOver(backgroundBase, sourceBase, opacity); destinationBase = ref Unsafe.Add(ref destinationBase, 1); @@ -6434,7 +6690,11 @@ internal static class DefaultPixelBlenders while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) { // TODO: It would be better if we can clamp this outside of the loop using our SIMD methods. - Vector256 opacity = Vector256.Create(Numerics.Clamp(amountBase, 0, 1F)); + // We need to create a Vector256 containing the current and next amount values + // taking up each half of the Vector256 and then clamp them. + Vector256 opacity = Vector256.Create( + Vector128.Create(Numerics.Clamp(amountBase, 0, 1F)), + Vector128.Create(Numerics.Clamp(Unsafe.Add(ref amountBase, 1), 0, 1F))); destinationBase = PorterDuffFunctions.MultiplyDestOver(backgroundBase, sourceBase, opacity); destinationBase = ref Unsafe.Add(ref destinationBase, 1); @@ -6533,7 +6793,11 @@ internal static class DefaultPixelBlenders while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) { // TODO: It would be better if we can clamp this outside of the loop using our SIMD methods. - Vector256 opacity = Vector256.Create(Numerics.Clamp(amountBase, 0, 1F)); + // We need to create a Vector256 containing the current and next amount values + // taking up each half of the Vector256 and then clamp them. + Vector256 opacity = Vector256.Create( + Vector128.Create(Numerics.Clamp(amountBase, 0, 1F)), + Vector128.Create(Numerics.Clamp(Unsafe.Add(ref amountBase, 1), 0, 1F))); destinationBase = PorterDuffFunctions.AddDestOver(backgroundBase, sourceBase, opacity); destinationBase = ref Unsafe.Add(ref destinationBase, 1); @@ -6632,7 +6896,11 @@ internal static class DefaultPixelBlenders while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) { // TODO: It would be better if we can clamp this outside of the loop using our SIMD methods. - Vector256 opacity = Vector256.Create(Numerics.Clamp(amountBase, 0, 1F)); + // We need to create a Vector256 containing the current and next amount values + // taking up each half of the Vector256 and then clamp them. + Vector256 opacity = Vector256.Create( + Vector128.Create(Numerics.Clamp(amountBase, 0, 1F)), + Vector128.Create(Numerics.Clamp(Unsafe.Add(ref amountBase, 1), 0, 1F))); destinationBase = PorterDuffFunctions.SubtractDestOver(backgroundBase, sourceBase, opacity); destinationBase = ref Unsafe.Add(ref destinationBase, 1); @@ -6731,7 +6999,11 @@ internal static class DefaultPixelBlenders while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) { // TODO: It would be better if we can clamp this outside of the loop using our SIMD methods. - Vector256 opacity = Vector256.Create(Numerics.Clamp(amountBase, 0, 1F)); + // We need to create a Vector256 containing the current and next amount values + // taking up each half of the Vector256 and then clamp them. + Vector256 opacity = Vector256.Create( + Vector128.Create(Numerics.Clamp(amountBase, 0, 1F)), + Vector128.Create(Numerics.Clamp(Unsafe.Add(ref amountBase, 1), 0, 1F))); destinationBase = PorterDuffFunctions.ScreenDestOver(backgroundBase, sourceBase, opacity); destinationBase = ref Unsafe.Add(ref destinationBase, 1); @@ -6830,7 +7102,11 @@ internal static class DefaultPixelBlenders while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) { // TODO: It would be better if we can clamp this outside of the loop using our SIMD methods. - Vector256 opacity = Vector256.Create(Numerics.Clamp(amountBase, 0, 1F)); + // We need to create a Vector256 containing the current and next amount values + // taking up each half of the Vector256 and then clamp them. + Vector256 opacity = Vector256.Create( + Vector128.Create(Numerics.Clamp(amountBase, 0, 1F)), + Vector128.Create(Numerics.Clamp(Unsafe.Add(ref amountBase, 1), 0, 1F))); destinationBase = PorterDuffFunctions.DarkenDestOver(backgroundBase, sourceBase, opacity); destinationBase = ref Unsafe.Add(ref destinationBase, 1); @@ -6929,7 +7205,11 @@ internal static class DefaultPixelBlenders while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) { // TODO: It would be better if we can clamp this outside of the loop using our SIMD methods. - Vector256 opacity = Vector256.Create(Numerics.Clamp(amountBase, 0, 1F)); + // We need to create a Vector256 containing the current and next amount values + // taking up each half of the Vector256 and then clamp them. + Vector256 opacity = Vector256.Create( + Vector128.Create(Numerics.Clamp(amountBase, 0, 1F)), + Vector128.Create(Numerics.Clamp(Unsafe.Add(ref amountBase, 1), 0, 1F))); destinationBase = PorterDuffFunctions.LightenDestOver(backgroundBase, sourceBase, opacity); destinationBase = ref Unsafe.Add(ref destinationBase, 1); @@ -7028,7 +7308,11 @@ internal static class DefaultPixelBlenders while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) { // TODO: It would be better if we can clamp this outside of the loop using our SIMD methods. - Vector256 opacity = Vector256.Create(Numerics.Clamp(amountBase, 0, 1F)); + // We need to create a Vector256 containing the current and next amount values + // taking up each half of the Vector256 and then clamp them. + Vector256 opacity = Vector256.Create( + Vector128.Create(Numerics.Clamp(amountBase, 0, 1F)), + Vector128.Create(Numerics.Clamp(Unsafe.Add(ref amountBase, 1), 0, 1F))); destinationBase = PorterDuffFunctions.OverlayDestOver(backgroundBase, sourceBase, opacity); destinationBase = ref Unsafe.Add(ref destinationBase, 1); @@ -7127,7 +7411,11 @@ internal static class DefaultPixelBlenders while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) { // TODO: It would be better if we can clamp this outside of the loop using our SIMD methods. - Vector256 opacity = Vector256.Create(Numerics.Clamp(amountBase, 0, 1F)); + // We need to create a Vector256 containing the current and next amount values + // taking up each half of the Vector256 and then clamp them. + Vector256 opacity = Vector256.Create( + Vector128.Create(Numerics.Clamp(amountBase, 0, 1F)), + Vector128.Create(Numerics.Clamp(Unsafe.Add(ref amountBase, 1), 0, 1F))); destinationBase = PorterDuffFunctions.HardLightDestOver(backgroundBase, sourceBase, opacity); destinationBase = ref Unsafe.Add(ref destinationBase, 1); @@ -7226,7 +7514,11 @@ internal static class DefaultPixelBlenders while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) { // TODO: It would be better if we can clamp this outside of the loop using our SIMD methods. - Vector256 opacity = Vector256.Create(Numerics.Clamp(amountBase, 0, 1F)); + // We need to create a Vector256 containing the current and next amount values + // taking up each half of the Vector256 and then clamp them. + Vector256 opacity = Vector256.Create( + Vector128.Create(Numerics.Clamp(amountBase, 0, 1F)), + Vector128.Create(Numerics.Clamp(Unsafe.Add(ref amountBase, 1), 0, 1F))); destinationBase = PorterDuffFunctions.NormalDestIn(backgroundBase, sourceBase, opacity); destinationBase = ref Unsafe.Add(ref destinationBase, 1); @@ -7325,7 +7617,11 @@ internal static class DefaultPixelBlenders while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) { // TODO: It would be better if we can clamp this outside of the loop using our SIMD methods. - Vector256 opacity = Vector256.Create(Numerics.Clamp(amountBase, 0, 1F)); + // We need to create a Vector256 containing the current and next amount values + // taking up each half of the Vector256 and then clamp them. + Vector256 opacity = Vector256.Create( + Vector128.Create(Numerics.Clamp(amountBase, 0, 1F)), + Vector128.Create(Numerics.Clamp(Unsafe.Add(ref amountBase, 1), 0, 1F))); destinationBase = PorterDuffFunctions.MultiplyDestIn(backgroundBase, sourceBase, opacity); destinationBase = ref Unsafe.Add(ref destinationBase, 1); @@ -7424,7 +7720,11 @@ internal static class DefaultPixelBlenders while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) { // TODO: It would be better if we can clamp this outside of the loop using our SIMD methods. - Vector256 opacity = Vector256.Create(Numerics.Clamp(amountBase, 0, 1F)); + // We need to create a Vector256 containing the current and next amount values + // taking up each half of the Vector256 and then clamp them. + Vector256 opacity = Vector256.Create( + Vector128.Create(Numerics.Clamp(amountBase, 0, 1F)), + Vector128.Create(Numerics.Clamp(Unsafe.Add(ref amountBase, 1), 0, 1F))); destinationBase = PorterDuffFunctions.AddDestIn(backgroundBase, sourceBase, opacity); destinationBase = ref Unsafe.Add(ref destinationBase, 1); @@ -7523,7 +7823,11 @@ internal static class DefaultPixelBlenders while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) { // TODO: It would be better if we can clamp this outside of the loop using our SIMD methods. - Vector256 opacity = Vector256.Create(Numerics.Clamp(amountBase, 0, 1F)); + // We need to create a Vector256 containing the current and next amount values + // taking up each half of the Vector256 and then clamp them. + Vector256 opacity = Vector256.Create( + Vector128.Create(Numerics.Clamp(amountBase, 0, 1F)), + Vector128.Create(Numerics.Clamp(Unsafe.Add(ref amountBase, 1), 0, 1F))); destinationBase = PorterDuffFunctions.SubtractDestIn(backgroundBase, sourceBase, opacity); destinationBase = ref Unsafe.Add(ref destinationBase, 1); @@ -7622,7 +7926,11 @@ internal static class DefaultPixelBlenders while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) { // TODO: It would be better if we can clamp this outside of the loop using our SIMD methods. - Vector256 opacity = Vector256.Create(Numerics.Clamp(amountBase, 0, 1F)); + // We need to create a Vector256 containing the current and next amount values + // taking up each half of the Vector256 and then clamp them. + Vector256 opacity = Vector256.Create( + Vector128.Create(Numerics.Clamp(amountBase, 0, 1F)), + Vector128.Create(Numerics.Clamp(Unsafe.Add(ref amountBase, 1), 0, 1F))); destinationBase = PorterDuffFunctions.ScreenDestIn(backgroundBase, sourceBase, opacity); destinationBase = ref Unsafe.Add(ref destinationBase, 1); @@ -7721,7 +8029,11 @@ internal static class DefaultPixelBlenders while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) { // TODO: It would be better if we can clamp this outside of the loop using our SIMD methods. - Vector256 opacity = Vector256.Create(Numerics.Clamp(amountBase, 0, 1F)); + // We need to create a Vector256 containing the current and next amount values + // taking up each half of the Vector256 and then clamp them. + Vector256 opacity = Vector256.Create( + Vector128.Create(Numerics.Clamp(amountBase, 0, 1F)), + Vector128.Create(Numerics.Clamp(Unsafe.Add(ref amountBase, 1), 0, 1F))); destinationBase = PorterDuffFunctions.DarkenDestIn(backgroundBase, sourceBase, opacity); destinationBase = ref Unsafe.Add(ref destinationBase, 1); @@ -7820,7 +8132,11 @@ internal static class DefaultPixelBlenders while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) { // TODO: It would be better if we can clamp this outside of the loop using our SIMD methods. - Vector256 opacity = Vector256.Create(Numerics.Clamp(amountBase, 0, 1F)); + // We need to create a Vector256 containing the current and next amount values + // taking up each half of the Vector256 and then clamp them. + Vector256 opacity = Vector256.Create( + Vector128.Create(Numerics.Clamp(amountBase, 0, 1F)), + Vector128.Create(Numerics.Clamp(Unsafe.Add(ref amountBase, 1), 0, 1F))); destinationBase = PorterDuffFunctions.LightenDestIn(backgroundBase, sourceBase, opacity); destinationBase = ref Unsafe.Add(ref destinationBase, 1); @@ -7919,7 +8235,11 @@ internal static class DefaultPixelBlenders while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) { // TODO: It would be better if we can clamp this outside of the loop using our SIMD methods. - Vector256 opacity = Vector256.Create(Numerics.Clamp(amountBase, 0, 1F)); + // We need to create a Vector256 containing the current and next amount values + // taking up each half of the Vector256 and then clamp them. + Vector256 opacity = Vector256.Create( + Vector128.Create(Numerics.Clamp(amountBase, 0, 1F)), + Vector128.Create(Numerics.Clamp(Unsafe.Add(ref amountBase, 1), 0, 1F))); destinationBase = PorterDuffFunctions.OverlayDestIn(backgroundBase, sourceBase, opacity); destinationBase = ref Unsafe.Add(ref destinationBase, 1); @@ -8018,7 +8338,11 @@ internal static class DefaultPixelBlenders while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) { // TODO: It would be better if we can clamp this outside of the loop using our SIMD methods. - Vector256 opacity = Vector256.Create(Numerics.Clamp(amountBase, 0, 1F)); + // We need to create a Vector256 containing the current and next amount values + // taking up each half of the Vector256 and then clamp them. + Vector256 opacity = Vector256.Create( + Vector128.Create(Numerics.Clamp(amountBase, 0, 1F)), + Vector128.Create(Numerics.Clamp(Unsafe.Add(ref amountBase, 1), 0, 1F))); destinationBase = PorterDuffFunctions.HardLightDestIn(backgroundBase, sourceBase, opacity); destinationBase = ref Unsafe.Add(ref destinationBase, 1); @@ -8117,7 +8441,11 @@ internal static class DefaultPixelBlenders while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) { // TODO: It would be better if we can clamp this outside of the loop using our SIMD methods. - Vector256 opacity = Vector256.Create(Numerics.Clamp(amountBase, 0, 1F)); + // We need to create a Vector256 containing the current and next amount values + // taking up each half of the Vector256 and then clamp them. + Vector256 opacity = Vector256.Create( + Vector128.Create(Numerics.Clamp(amountBase, 0, 1F)), + Vector128.Create(Numerics.Clamp(Unsafe.Add(ref amountBase, 1), 0, 1F))); destinationBase = PorterDuffFunctions.NormalDestOut(backgroundBase, sourceBase, opacity); destinationBase = ref Unsafe.Add(ref destinationBase, 1); @@ -8216,7 +8544,11 @@ internal static class DefaultPixelBlenders while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) { // TODO: It would be better if we can clamp this outside of the loop using our SIMD methods. - Vector256 opacity = Vector256.Create(Numerics.Clamp(amountBase, 0, 1F)); + // We need to create a Vector256 containing the current and next amount values + // taking up each half of the Vector256 and then clamp them. + Vector256 opacity = Vector256.Create( + Vector128.Create(Numerics.Clamp(amountBase, 0, 1F)), + Vector128.Create(Numerics.Clamp(Unsafe.Add(ref amountBase, 1), 0, 1F))); destinationBase = PorterDuffFunctions.MultiplyDestOut(backgroundBase, sourceBase, opacity); destinationBase = ref Unsafe.Add(ref destinationBase, 1); @@ -8315,7 +8647,11 @@ internal static class DefaultPixelBlenders while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) { // TODO: It would be better if we can clamp this outside of the loop using our SIMD methods. - Vector256 opacity = Vector256.Create(Numerics.Clamp(amountBase, 0, 1F)); + // We need to create a Vector256 containing the current and next amount values + // taking up each half of the Vector256 and then clamp them. + Vector256 opacity = Vector256.Create( + Vector128.Create(Numerics.Clamp(amountBase, 0, 1F)), + Vector128.Create(Numerics.Clamp(Unsafe.Add(ref amountBase, 1), 0, 1F))); destinationBase = PorterDuffFunctions.AddDestOut(backgroundBase, sourceBase, opacity); destinationBase = ref Unsafe.Add(ref destinationBase, 1); @@ -8414,7 +8750,11 @@ internal static class DefaultPixelBlenders while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) { // TODO: It would be better if we can clamp this outside of the loop using our SIMD methods. - Vector256 opacity = Vector256.Create(Numerics.Clamp(amountBase, 0, 1F)); + // We need to create a Vector256 containing the current and next amount values + // taking up each half of the Vector256 and then clamp them. + Vector256 opacity = Vector256.Create( + Vector128.Create(Numerics.Clamp(amountBase, 0, 1F)), + Vector128.Create(Numerics.Clamp(Unsafe.Add(ref amountBase, 1), 0, 1F))); destinationBase = PorterDuffFunctions.SubtractDestOut(backgroundBase, sourceBase, opacity); destinationBase = ref Unsafe.Add(ref destinationBase, 1); @@ -8513,7 +8853,11 @@ internal static class DefaultPixelBlenders while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) { // TODO: It would be better if we can clamp this outside of the loop using our SIMD methods. - Vector256 opacity = Vector256.Create(Numerics.Clamp(amountBase, 0, 1F)); + // We need to create a Vector256 containing the current and next amount values + // taking up each half of the Vector256 and then clamp them. + Vector256 opacity = Vector256.Create( + Vector128.Create(Numerics.Clamp(amountBase, 0, 1F)), + Vector128.Create(Numerics.Clamp(Unsafe.Add(ref amountBase, 1), 0, 1F))); destinationBase = PorterDuffFunctions.ScreenDestOut(backgroundBase, sourceBase, opacity); destinationBase = ref Unsafe.Add(ref destinationBase, 1); @@ -8612,7 +8956,11 @@ internal static class DefaultPixelBlenders while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) { // TODO: It would be better if we can clamp this outside of the loop using our SIMD methods. - Vector256 opacity = Vector256.Create(Numerics.Clamp(amountBase, 0, 1F)); + // We need to create a Vector256 containing the current and next amount values + // taking up each half of the Vector256 and then clamp them. + Vector256 opacity = Vector256.Create( + Vector128.Create(Numerics.Clamp(amountBase, 0, 1F)), + Vector128.Create(Numerics.Clamp(Unsafe.Add(ref amountBase, 1), 0, 1F))); destinationBase = PorterDuffFunctions.DarkenDestOut(backgroundBase, sourceBase, opacity); destinationBase = ref Unsafe.Add(ref destinationBase, 1); @@ -8711,7 +9059,11 @@ internal static class DefaultPixelBlenders while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) { // TODO: It would be better if we can clamp this outside of the loop using our SIMD methods. - Vector256 opacity = Vector256.Create(Numerics.Clamp(amountBase, 0, 1F)); + // We need to create a Vector256 containing the current and next amount values + // taking up each half of the Vector256 and then clamp them. + Vector256 opacity = Vector256.Create( + Vector128.Create(Numerics.Clamp(amountBase, 0, 1F)), + Vector128.Create(Numerics.Clamp(Unsafe.Add(ref amountBase, 1), 0, 1F))); destinationBase = PorterDuffFunctions.LightenDestOut(backgroundBase, sourceBase, opacity); destinationBase = ref Unsafe.Add(ref destinationBase, 1); @@ -8810,7 +9162,11 @@ internal static class DefaultPixelBlenders while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) { // TODO: It would be better if we can clamp this outside of the loop using our SIMD methods. - Vector256 opacity = Vector256.Create(Numerics.Clamp(amountBase, 0, 1F)); + // We need to create a Vector256 containing the current and next amount values + // taking up each half of the Vector256 and then clamp them. + Vector256 opacity = Vector256.Create( + Vector128.Create(Numerics.Clamp(amountBase, 0, 1F)), + Vector128.Create(Numerics.Clamp(Unsafe.Add(ref amountBase, 1), 0, 1F))); destinationBase = PorterDuffFunctions.OverlayDestOut(backgroundBase, sourceBase, opacity); destinationBase = ref Unsafe.Add(ref destinationBase, 1); @@ -8909,7 +9265,11 @@ internal static class DefaultPixelBlenders while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) { // TODO: It would be better if we can clamp this outside of the loop using our SIMD methods. - Vector256 opacity = Vector256.Create(Numerics.Clamp(amountBase, 0, 1F)); + // We need to create a Vector256 containing the current and next amount values + // taking up each half of the Vector256 and then clamp them. + Vector256 opacity = Vector256.Create( + Vector128.Create(Numerics.Clamp(amountBase, 0, 1F)), + Vector128.Create(Numerics.Clamp(Unsafe.Add(ref amountBase, 1), 0, 1F))); destinationBase = PorterDuffFunctions.HardLightDestOut(backgroundBase, sourceBase, opacity); destinationBase = ref Unsafe.Add(ref destinationBase, 1); @@ -9008,7 +9368,11 @@ internal static class DefaultPixelBlenders while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) { // TODO: It would be better if we can clamp this outside of the loop using our SIMD methods. - Vector256 opacity = Vector256.Create(Numerics.Clamp(amountBase, 0, 1F)); + // We need to create a Vector256 containing the current and next amount values + // taking up each half of the Vector256 and then clamp them. + Vector256 opacity = Vector256.Create( + Vector128.Create(Numerics.Clamp(amountBase, 0, 1F)), + Vector128.Create(Numerics.Clamp(Unsafe.Add(ref amountBase, 1), 0, 1F))); destinationBase = PorterDuffFunctions.NormalClear(backgroundBase, sourceBase, opacity); destinationBase = ref Unsafe.Add(ref destinationBase, 1); @@ -9107,7 +9471,11 @@ internal static class DefaultPixelBlenders while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) { // TODO: It would be better if we can clamp this outside of the loop using our SIMD methods. - Vector256 opacity = Vector256.Create(Numerics.Clamp(amountBase, 0, 1F)); + // We need to create a Vector256 containing the current and next amount values + // taking up each half of the Vector256 and then clamp them. + Vector256 opacity = Vector256.Create( + Vector128.Create(Numerics.Clamp(amountBase, 0, 1F)), + Vector128.Create(Numerics.Clamp(Unsafe.Add(ref amountBase, 1), 0, 1F))); destinationBase = PorterDuffFunctions.MultiplyClear(backgroundBase, sourceBase, opacity); destinationBase = ref Unsafe.Add(ref destinationBase, 1); @@ -9206,7 +9574,11 @@ internal static class DefaultPixelBlenders while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) { // TODO: It would be better if we can clamp this outside of the loop using our SIMD methods. - Vector256 opacity = Vector256.Create(Numerics.Clamp(amountBase, 0, 1F)); + // We need to create a Vector256 containing the current and next amount values + // taking up each half of the Vector256 and then clamp them. + Vector256 opacity = Vector256.Create( + Vector128.Create(Numerics.Clamp(amountBase, 0, 1F)), + Vector128.Create(Numerics.Clamp(Unsafe.Add(ref amountBase, 1), 0, 1F))); destinationBase = PorterDuffFunctions.AddClear(backgroundBase, sourceBase, opacity); destinationBase = ref Unsafe.Add(ref destinationBase, 1); @@ -9305,7 +9677,11 @@ internal static class DefaultPixelBlenders while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) { // TODO: It would be better if we can clamp this outside of the loop using our SIMD methods. - Vector256 opacity = Vector256.Create(Numerics.Clamp(amountBase, 0, 1F)); + // We need to create a Vector256 containing the current and next amount values + // taking up each half of the Vector256 and then clamp them. + Vector256 opacity = Vector256.Create( + Vector128.Create(Numerics.Clamp(amountBase, 0, 1F)), + Vector128.Create(Numerics.Clamp(Unsafe.Add(ref amountBase, 1), 0, 1F))); destinationBase = PorterDuffFunctions.SubtractClear(backgroundBase, sourceBase, opacity); destinationBase = ref Unsafe.Add(ref destinationBase, 1); @@ -9404,7 +9780,11 @@ internal static class DefaultPixelBlenders while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) { // TODO: It would be better if we can clamp this outside of the loop using our SIMD methods. - Vector256 opacity = Vector256.Create(Numerics.Clamp(amountBase, 0, 1F)); + // We need to create a Vector256 containing the current and next amount values + // taking up each half of the Vector256 and then clamp them. + Vector256 opacity = Vector256.Create( + Vector128.Create(Numerics.Clamp(amountBase, 0, 1F)), + Vector128.Create(Numerics.Clamp(Unsafe.Add(ref amountBase, 1), 0, 1F))); destinationBase = PorterDuffFunctions.ScreenClear(backgroundBase, sourceBase, opacity); destinationBase = ref Unsafe.Add(ref destinationBase, 1); @@ -9503,7 +9883,11 @@ internal static class DefaultPixelBlenders while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) { // TODO: It would be better if we can clamp this outside of the loop using our SIMD methods. - Vector256 opacity = Vector256.Create(Numerics.Clamp(amountBase, 0, 1F)); + // We need to create a Vector256 containing the current and next amount values + // taking up each half of the Vector256 and then clamp them. + Vector256 opacity = Vector256.Create( + Vector128.Create(Numerics.Clamp(amountBase, 0, 1F)), + Vector128.Create(Numerics.Clamp(Unsafe.Add(ref amountBase, 1), 0, 1F))); destinationBase = PorterDuffFunctions.DarkenClear(backgroundBase, sourceBase, opacity); destinationBase = ref Unsafe.Add(ref destinationBase, 1); @@ -9602,7 +9986,11 @@ internal static class DefaultPixelBlenders while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) { // TODO: It would be better if we can clamp this outside of the loop using our SIMD methods. - Vector256 opacity = Vector256.Create(Numerics.Clamp(amountBase, 0, 1F)); + // We need to create a Vector256 containing the current and next amount values + // taking up each half of the Vector256 and then clamp them. + Vector256 opacity = Vector256.Create( + Vector128.Create(Numerics.Clamp(amountBase, 0, 1F)), + Vector128.Create(Numerics.Clamp(Unsafe.Add(ref amountBase, 1), 0, 1F))); destinationBase = PorterDuffFunctions.LightenClear(backgroundBase, sourceBase, opacity); destinationBase = ref Unsafe.Add(ref destinationBase, 1); @@ -9701,7 +10089,11 @@ internal static class DefaultPixelBlenders while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) { // TODO: It would be better if we can clamp this outside of the loop using our SIMD methods. - Vector256 opacity = Vector256.Create(Numerics.Clamp(amountBase, 0, 1F)); + // We need to create a Vector256 containing the current and next amount values + // taking up each half of the Vector256 and then clamp them. + Vector256 opacity = Vector256.Create( + Vector128.Create(Numerics.Clamp(amountBase, 0, 1F)), + Vector128.Create(Numerics.Clamp(Unsafe.Add(ref amountBase, 1), 0, 1F))); destinationBase = PorterDuffFunctions.OverlayClear(backgroundBase, sourceBase, opacity); destinationBase = ref Unsafe.Add(ref destinationBase, 1); @@ -9800,7 +10192,11 @@ internal static class DefaultPixelBlenders while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) { // TODO: It would be better if we can clamp this outside of the loop using our SIMD methods. - Vector256 opacity = Vector256.Create(Numerics.Clamp(amountBase, 0, 1F)); + // We need to create a Vector256 containing the current and next amount values + // taking up each half of the Vector256 and then clamp them. + Vector256 opacity = Vector256.Create( + Vector128.Create(Numerics.Clamp(amountBase, 0, 1F)), + Vector128.Create(Numerics.Clamp(Unsafe.Add(ref amountBase, 1), 0, 1F))); destinationBase = PorterDuffFunctions.HardLightClear(backgroundBase, sourceBase, opacity); destinationBase = ref Unsafe.Add(ref destinationBase, 1); @@ -9899,7 +10295,11 @@ internal static class DefaultPixelBlenders while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) { // TODO: It would be better if we can clamp this outside of the loop using our SIMD methods. - Vector256 opacity = Vector256.Create(Numerics.Clamp(amountBase, 0, 1F)); + // We need to create a Vector256 containing the current and next amount values + // taking up each half of the Vector256 and then clamp them. + Vector256 opacity = Vector256.Create( + Vector128.Create(Numerics.Clamp(amountBase, 0, 1F)), + Vector128.Create(Numerics.Clamp(Unsafe.Add(ref amountBase, 1), 0, 1F))); destinationBase = PorterDuffFunctions.NormalXor(backgroundBase, sourceBase, opacity); destinationBase = ref Unsafe.Add(ref destinationBase, 1); @@ -9998,7 +10398,11 @@ internal static class DefaultPixelBlenders while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) { // TODO: It would be better if we can clamp this outside of the loop using our SIMD methods. - Vector256 opacity = Vector256.Create(Numerics.Clamp(amountBase, 0, 1F)); + // We need to create a Vector256 containing the current and next amount values + // taking up each half of the Vector256 and then clamp them. + Vector256 opacity = Vector256.Create( + Vector128.Create(Numerics.Clamp(amountBase, 0, 1F)), + Vector128.Create(Numerics.Clamp(Unsafe.Add(ref amountBase, 1), 0, 1F))); destinationBase = PorterDuffFunctions.MultiplyXor(backgroundBase, sourceBase, opacity); destinationBase = ref Unsafe.Add(ref destinationBase, 1); @@ -10097,7 +10501,11 @@ internal static class DefaultPixelBlenders while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) { // TODO: It would be better if we can clamp this outside of the loop using our SIMD methods. - Vector256 opacity = Vector256.Create(Numerics.Clamp(amountBase, 0, 1F)); + // We need to create a Vector256 containing the current and next amount values + // taking up each half of the Vector256 and then clamp them. + Vector256 opacity = Vector256.Create( + Vector128.Create(Numerics.Clamp(amountBase, 0, 1F)), + Vector128.Create(Numerics.Clamp(Unsafe.Add(ref amountBase, 1), 0, 1F))); destinationBase = PorterDuffFunctions.AddXor(backgroundBase, sourceBase, opacity); destinationBase = ref Unsafe.Add(ref destinationBase, 1); @@ -10196,7 +10604,11 @@ internal static class DefaultPixelBlenders while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) { // TODO: It would be better if we can clamp this outside of the loop using our SIMD methods. - Vector256 opacity = Vector256.Create(Numerics.Clamp(amountBase, 0, 1F)); + // We need to create a Vector256 containing the current and next amount values + // taking up each half of the Vector256 and then clamp them. + Vector256 opacity = Vector256.Create( + Vector128.Create(Numerics.Clamp(amountBase, 0, 1F)), + Vector128.Create(Numerics.Clamp(Unsafe.Add(ref amountBase, 1), 0, 1F))); destinationBase = PorterDuffFunctions.SubtractXor(backgroundBase, sourceBase, opacity); destinationBase = ref Unsafe.Add(ref destinationBase, 1); @@ -10295,7 +10707,11 @@ internal static class DefaultPixelBlenders while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) { // TODO: It would be better if we can clamp this outside of the loop using our SIMD methods. - Vector256 opacity = Vector256.Create(Numerics.Clamp(amountBase, 0, 1F)); + // We need to create a Vector256 containing the current and next amount values + // taking up each half of the Vector256 and then clamp them. + Vector256 opacity = Vector256.Create( + Vector128.Create(Numerics.Clamp(amountBase, 0, 1F)), + Vector128.Create(Numerics.Clamp(Unsafe.Add(ref amountBase, 1), 0, 1F))); destinationBase = PorterDuffFunctions.ScreenXor(backgroundBase, sourceBase, opacity); destinationBase = ref Unsafe.Add(ref destinationBase, 1); @@ -10394,7 +10810,11 @@ internal static class DefaultPixelBlenders while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) { // TODO: It would be better if we can clamp this outside of the loop using our SIMD methods. - Vector256 opacity = Vector256.Create(Numerics.Clamp(amountBase, 0, 1F)); + // We need to create a Vector256 containing the current and next amount values + // taking up each half of the Vector256 and then clamp them. + Vector256 opacity = Vector256.Create( + Vector128.Create(Numerics.Clamp(amountBase, 0, 1F)), + Vector128.Create(Numerics.Clamp(Unsafe.Add(ref amountBase, 1), 0, 1F))); destinationBase = PorterDuffFunctions.DarkenXor(backgroundBase, sourceBase, opacity); destinationBase = ref Unsafe.Add(ref destinationBase, 1); @@ -10493,7 +10913,11 @@ internal static class DefaultPixelBlenders while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) { // TODO: It would be better if we can clamp this outside of the loop using our SIMD methods. - Vector256 opacity = Vector256.Create(Numerics.Clamp(amountBase, 0, 1F)); + // We need to create a Vector256 containing the current and next amount values + // taking up each half of the Vector256 and then clamp them. + Vector256 opacity = Vector256.Create( + Vector128.Create(Numerics.Clamp(amountBase, 0, 1F)), + Vector128.Create(Numerics.Clamp(Unsafe.Add(ref amountBase, 1), 0, 1F))); destinationBase = PorterDuffFunctions.LightenXor(backgroundBase, sourceBase, opacity); destinationBase = ref Unsafe.Add(ref destinationBase, 1); @@ -10592,7 +11016,11 @@ internal static class DefaultPixelBlenders while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) { // TODO: It would be better if we can clamp this outside of the loop using our SIMD methods. - Vector256 opacity = Vector256.Create(Numerics.Clamp(amountBase, 0, 1F)); + // We need to create a Vector256 containing the current and next amount values + // taking up each half of the Vector256 and then clamp them. + Vector256 opacity = Vector256.Create( + Vector128.Create(Numerics.Clamp(amountBase, 0, 1F)), + Vector128.Create(Numerics.Clamp(Unsafe.Add(ref amountBase, 1), 0, 1F))); destinationBase = PorterDuffFunctions.OverlayXor(backgroundBase, sourceBase, opacity); destinationBase = ref Unsafe.Add(ref destinationBase, 1); @@ -10691,7 +11119,11 @@ internal static class DefaultPixelBlenders while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) { // TODO: It would be better if we can clamp this outside of the loop using our SIMD methods. - Vector256 opacity = Vector256.Create(Numerics.Clamp(amountBase, 0, 1F)); + // We need to create a Vector256 containing the current and next amount values + // taking up each half of the Vector256 and then clamp them. + Vector256 opacity = Vector256.Create( + Vector128.Create(Numerics.Clamp(amountBase, 0, 1F)), + Vector128.Create(Numerics.Clamp(Unsafe.Add(ref amountBase, 1), 0, 1F))); destinationBase = PorterDuffFunctions.HardLightXor(backgroundBase, sourceBase, opacity); destinationBase = ref Unsafe.Add(ref destinationBase, 1); diff --git a/src/ImageSharp/PixelFormats/PixelBlenders/DefaultPixelBlenders.Generated.tt b/src/ImageSharp/PixelFormats/PixelBlenders/DefaultPixelBlenders.Generated.tt index 6d98c6cd99..07165a9b5d 100644 --- a/src/ImageSharp/PixelFormats/PixelBlenders/DefaultPixelBlenders.Generated.tt +++ b/src/ImageSharp/PixelFormats/PixelBlenders/DefaultPixelBlenders.Generated.tt @@ -141,7 +141,11 @@ var blenders = new []{ while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) { // TODO: It would be better if we can clamp this outside of the loop using our SIMD methods. - Vector256 opacity = Vector256.Create(Numerics.Clamp(amountBase, 0, 1F)); + // We need to create a Vector256 containing the current and next amount values + // taking up each half of the Vector256 and then clamp them. + Vector256 opacity = Vector256.Create( + Vector128.Create(Numerics.Clamp(amountBase, 0, 1F)), + Vector128.Create(Numerics.Clamp(Unsafe.Add(ref amountBase, 1), 0, 1F))); destinationBase = PorterDuffFunctions.<#=blender_composer#>(backgroundBase, sourceBase, opacity); destinationBase = ref Unsafe.Add(ref destinationBase, 1); From 6cb6bd41f1fc3159259256a2a0372387fe0cc425 Mon Sep 17 00:00:00 2001 From: James Jackson-South Date: Sun, 19 Feb 2023 21:20:38 +1000 Subject: [PATCH 16/35] Better clamp, fix offset (again) --- .../DefaultPixelBlenders.Generated.cs | 1080 ++++++++++------- .../DefaultPixelBlenders.Generated.tt | 10 +- 2 files changed, 654 insertions(+), 436 deletions(-) diff --git a/src/ImageSharp/PixelFormats/PixelBlenders/DefaultPixelBlenders.Generated.cs b/src/ImageSharp/PixelFormats/PixelBlenders/DefaultPixelBlenders.Generated.cs index f28fba25ca..2db61a06f3 100644 --- a/src/ImageSharp/PixelFormats/PixelBlenders/DefaultPixelBlenders.Generated.cs +++ b/src/ImageSharp/PixelFormats/PixelBlenders/DefaultPixelBlenders.Generated.cs @@ -95,20 +95,22 @@ internal static class DefaultPixelBlenders ref Vector256 sourceBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(source)); ref float amountBase = ref MemoryMarshal.GetReference(amount); + Vector256 vOne = Vector256.Create(1F); + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) { - // TODO: It would be better if we can clamp this outside of the loop using our SIMD methods. // We need to create a Vector256 containing the current and next amount values // taking up each half of the Vector256 and then clamp them. Vector256 opacity = Vector256.Create( - Vector128.Create(Numerics.Clamp(amountBase, 0, 1F)), - Vector128.Create(Numerics.Clamp(Unsafe.Add(ref amountBase, 1), 0, 1F))); + Vector128.Create(amountBase), + Vector128.Create(Unsafe.Add(ref amountBase, 1))); + opacity = Avx.Min(Avx.Max(Vector256.Zero, opacity), vOne); destinationBase = PorterDuffFunctions.NormalSrc(backgroundBase, sourceBase, opacity); destinationBase = ref Unsafe.Add(ref destinationBase, 1); backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); sourceBase = ref Unsafe.Add(ref sourceBase, 1); - amountBase = ref Unsafe.Add(ref amountBase, 1); + amountBase = ref Unsafe.Add(ref amountBase, 2); } if (Numerics.Modulo2(destination.Length) != 0) @@ -198,20 +200,22 @@ internal static class DefaultPixelBlenders ref Vector256 sourceBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(source)); ref float amountBase = ref MemoryMarshal.GetReference(amount); + Vector256 vOne = Vector256.Create(1F); + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) { - // TODO: It would be better if we can clamp this outside of the loop using our SIMD methods. // We need to create a Vector256 containing the current and next amount values // taking up each half of the Vector256 and then clamp them. Vector256 opacity = Vector256.Create( - Vector128.Create(Numerics.Clamp(amountBase, 0, 1F)), - Vector128.Create(Numerics.Clamp(Unsafe.Add(ref amountBase, 1), 0, 1F))); + Vector128.Create(amountBase), + Vector128.Create(Unsafe.Add(ref amountBase, 1))); + opacity = Avx.Min(Avx.Max(Vector256.Zero, opacity), vOne); destinationBase = PorterDuffFunctions.MultiplySrc(backgroundBase, sourceBase, opacity); destinationBase = ref Unsafe.Add(ref destinationBase, 1); backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); sourceBase = ref Unsafe.Add(ref sourceBase, 1); - amountBase = ref Unsafe.Add(ref amountBase, 1); + amountBase = ref Unsafe.Add(ref amountBase, 2); } if (Numerics.Modulo2(destination.Length) != 0) @@ -301,20 +305,22 @@ internal static class DefaultPixelBlenders ref Vector256 sourceBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(source)); ref float amountBase = ref MemoryMarshal.GetReference(amount); + Vector256 vOne = Vector256.Create(1F); + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) { - // TODO: It would be better if we can clamp this outside of the loop using our SIMD methods. // We need to create a Vector256 containing the current and next amount values // taking up each half of the Vector256 and then clamp them. Vector256 opacity = Vector256.Create( - Vector128.Create(Numerics.Clamp(amountBase, 0, 1F)), - Vector128.Create(Numerics.Clamp(Unsafe.Add(ref amountBase, 1), 0, 1F))); + Vector128.Create(amountBase), + Vector128.Create(Unsafe.Add(ref amountBase, 1))); + opacity = Avx.Min(Avx.Max(Vector256.Zero, opacity), vOne); destinationBase = PorterDuffFunctions.AddSrc(backgroundBase, sourceBase, opacity); destinationBase = ref Unsafe.Add(ref destinationBase, 1); backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); sourceBase = ref Unsafe.Add(ref sourceBase, 1); - amountBase = ref Unsafe.Add(ref amountBase, 1); + amountBase = ref Unsafe.Add(ref amountBase, 2); } if (Numerics.Modulo2(destination.Length) != 0) @@ -404,20 +410,22 @@ internal static class DefaultPixelBlenders ref Vector256 sourceBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(source)); ref float amountBase = ref MemoryMarshal.GetReference(amount); + Vector256 vOne = Vector256.Create(1F); + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) { - // TODO: It would be better if we can clamp this outside of the loop using our SIMD methods. // We need to create a Vector256 containing the current and next amount values // taking up each half of the Vector256 and then clamp them. Vector256 opacity = Vector256.Create( - Vector128.Create(Numerics.Clamp(amountBase, 0, 1F)), - Vector128.Create(Numerics.Clamp(Unsafe.Add(ref amountBase, 1), 0, 1F))); + Vector128.Create(amountBase), + Vector128.Create(Unsafe.Add(ref amountBase, 1))); + opacity = Avx.Min(Avx.Max(Vector256.Zero, opacity), vOne); destinationBase = PorterDuffFunctions.SubtractSrc(backgroundBase, sourceBase, opacity); destinationBase = ref Unsafe.Add(ref destinationBase, 1); backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); sourceBase = ref Unsafe.Add(ref sourceBase, 1); - amountBase = ref Unsafe.Add(ref amountBase, 1); + amountBase = ref Unsafe.Add(ref amountBase, 2); } if (Numerics.Modulo2(destination.Length) != 0) @@ -507,20 +515,22 @@ internal static class DefaultPixelBlenders ref Vector256 sourceBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(source)); ref float amountBase = ref MemoryMarshal.GetReference(amount); + Vector256 vOne = Vector256.Create(1F); + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) { - // TODO: It would be better if we can clamp this outside of the loop using our SIMD methods. // We need to create a Vector256 containing the current and next amount values // taking up each half of the Vector256 and then clamp them. Vector256 opacity = Vector256.Create( - Vector128.Create(Numerics.Clamp(amountBase, 0, 1F)), - Vector128.Create(Numerics.Clamp(Unsafe.Add(ref amountBase, 1), 0, 1F))); + Vector128.Create(amountBase), + Vector128.Create(Unsafe.Add(ref amountBase, 1))); + opacity = Avx.Min(Avx.Max(Vector256.Zero, opacity), vOne); destinationBase = PorterDuffFunctions.ScreenSrc(backgroundBase, sourceBase, opacity); destinationBase = ref Unsafe.Add(ref destinationBase, 1); backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); sourceBase = ref Unsafe.Add(ref sourceBase, 1); - amountBase = ref Unsafe.Add(ref amountBase, 1); + amountBase = ref Unsafe.Add(ref amountBase, 2); } if (Numerics.Modulo2(destination.Length) != 0) @@ -610,20 +620,22 @@ internal static class DefaultPixelBlenders ref Vector256 sourceBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(source)); ref float amountBase = ref MemoryMarshal.GetReference(amount); + Vector256 vOne = Vector256.Create(1F); + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) { - // TODO: It would be better if we can clamp this outside of the loop using our SIMD methods. // We need to create a Vector256 containing the current and next amount values // taking up each half of the Vector256 and then clamp them. Vector256 opacity = Vector256.Create( - Vector128.Create(Numerics.Clamp(amountBase, 0, 1F)), - Vector128.Create(Numerics.Clamp(Unsafe.Add(ref amountBase, 1), 0, 1F))); + Vector128.Create(amountBase), + Vector128.Create(Unsafe.Add(ref amountBase, 1))); + opacity = Avx.Min(Avx.Max(Vector256.Zero, opacity), vOne); destinationBase = PorterDuffFunctions.DarkenSrc(backgroundBase, sourceBase, opacity); destinationBase = ref Unsafe.Add(ref destinationBase, 1); backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); sourceBase = ref Unsafe.Add(ref sourceBase, 1); - amountBase = ref Unsafe.Add(ref amountBase, 1); + amountBase = ref Unsafe.Add(ref amountBase, 2); } if (Numerics.Modulo2(destination.Length) != 0) @@ -713,20 +725,22 @@ internal static class DefaultPixelBlenders ref Vector256 sourceBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(source)); ref float amountBase = ref MemoryMarshal.GetReference(amount); + Vector256 vOne = Vector256.Create(1F); + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) { - // TODO: It would be better if we can clamp this outside of the loop using our SIMD methods. // We need to create a Vector256 containing the current and next amount values // taking up each half of the Vector256 and then clamp them. Vector256 opacity = Vector256.Create( - Vector128.Create(Numerics.Clamp(amountBase, 0, 1F)), - Vector128.Create(Numerics.Clamp(Unsafe.Add(ref amountBase, 1), 0, 1F))); + Vector128.Create(amountBase), + Vector128.Create(Unsafe.Add(ref amountBase, 1))); + opacity = Avx.Min(Avx.Max(Vector256.Zero, opacity), vOne); destinationBase = PorterDuffFunctions.LightenSrc(backgroundBase, sourceBase, opacity); destinationBase = ref Unsafe.Add(ref destinationBase, 1); backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); sourceBase = ref Unsafe.Add(ref sourceBase, 1); - amountBase = ref Unsafe.Add(ref amountBase, 1); + amountBase = ref Unsafe.Add(ref amountBase, 2); } if (Numerics.Modulo2(destination.Length) != 0) @@ -816,20 +830,22 @@ internal static class DefaultPixelBlenders ref Vector256 sourceBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(source)); ref float amountBase = ref MemoryMarshal.GetReference(amount); + Vector256 vOne = Vector256.Create(1F); + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) { - // TODO: It would be better if we can clamp this outside of the loop using our SIMD methods. // We need to create a Vector256 containing the current and next amount values // taking up each half of the Vector256 and then clamp them. Vector256 opacity = Vector256.Create( - Vector128.Create(Numerics.Clamp(amountBase, 0, 1F)), - Vector128.Create(Numerics.Clamp(Unsafe.Add(ref amountBase, 1), 0, 1F))); + Vector128.Create(amountBase), + Vector128.Create(Unsafe.Add(ref amountBase, 1))); + opacity = Avx.Min(Avx.Max(Vector256.Zero, opacity), vOne); destinationBase = PorterDuffFunctions.OverlaySrc(backgroundBase, sourceBase, opacity); destinationBase = ref Unsafe.Add(ref destinationBase, 1); backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); sourceBase = ref Unsafe.Add(ref sourceBase, 1); - amountBase = ref Unsafe.Add(ref amountBase, 1); + amountBase = ref Unsafe.Add(ref amountBase, 2); } if (Numerics.Modulo2(destination.Length) != 0) @@ -919,20 +935,22 @@ internal static class DefaultPixelBlenders ref Vector256 sourceBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(source)); ref float amountBase = ref MemoryMarshal.GetReference(amount); + Vector256 vOne = Vector256.Create(1F); + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) { - // TODO: It would be better if we can clamp this outside of the loop using our SIMD methods. // We need to create a Vector256 containing the current and next amount values // taking up each half of the Vector256 and then clamp them. Vector256 opacity = Vector256.Create( - Vector128.Create(Numerics.Clamp(amountBase, 0, 1F)), - Vector128.Create(Numerics.Clamp(Unsafe.Add(ref amountBase, 1), 0, 1F))); + Vector128.Create(amountBase), + Vector128.Create(Unsafe.Add(ref amountBase, 1))); + opacity = Avx.Min(Avx.Max(Vector256.Zero, opacity), vOne); destinationBase = PorterDuffFunctions.HardLightSrc(backgroundBase, sourceBase, opacity); destinationBase = ref Unsafe.Add(ref destinationBase, 1); backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); sourceBase = ref Unsafe.Add(ref sourceBase, 1); - amountBase = ref Unsafe.Add(ref amountBase, 1); + amountBase = ref Unsafe.Add(ref amountBase, 2); } if (Numerics.Modulo2(destination.Length) != 0) @@ -1022,20 +1040,22 @@ internal static class DefaultPixelBlenders ref Vector256 sourceBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(source)); ref float amountBase = ref MemoryMarshal.GetReference(amount); + Vector256 vOne = Vector256.Create(1F); + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) { - // TODO: It would be better if we can clamp this outside of the loop using our SIMD methods. // We need to create a Vector256 containing the current and next amount values // taking up each half of the Vector256 and then clamp them. Vector256 opacity = Vector256.Create( - Vector128.Create(Numerics.Clamp(amountBase, 0, 1F)), - Vector128.Create(Numerics.Clamp(Unsafe.Add(ref amountBase, 1), 0, 1F))); + Vector128.Create(amountBase), + Vector128.Create(Unsafe.Add(ref amountBase, 1))); + opacity = Avx.Min(Avx.Max(Vector256.Zero, opacity), vOne); destinationBase = PorterDuffFunctions.NormalSrcAtop(backgroundBase, sourceBase, opacity); destinationBase = ref Unsafe.Add(ref destinationBase, 1); backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); sourceBase = ref Unsafe.Add(ref sourceBase, 1); - amountBase = ref Unsafe.Add(ref amountBase, 1); + amountBase = ref Unsafe.Add(ref amountBase, 2); } if (Numerics.Modulo2(destination.Length) != 0) @@ -1125,20 +1145,22 @@ internal static class DefaultPixelBlenders ref Vector256 sourceBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(source)); ref float amountBase = ref MemoryMarshal.GetReference(amount); + Vector256 vOne = Vector256.Create(1F); + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) { - // TODO: It would be better if we can clamp this outside of the loop using our SIMD methods. // We need to create a Vector256 containing the current and next amount values // taking up each half of the Vector256 and then clamp them. Vector256 opacity = Vector256.Create( - Vector128.Create(Numerics.Clamp(amountBase, 0, 1F)), - Vector128.Create(Numerics.Clamp(Unsafe.Add(ref amountBase, 1), 0, 1F))); + Vector128.Create(amountBase), + Vector128.Create(Unsafe.Add(ref amountBase, 1))); + opacity = Avx.Min(Avx.Max(Vector256.Zero, opacity), vOne); destinationBase = PorterDuffFunctions.MultiplySrcAtop(backgroundBase, sourceBase, opacity); destinationBase = ref Unsafe.Add(ref destinationBase, 1); backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); sourceBase = ref Unsafe.Add(ref sourceBase, 1); - amountBase = ref Unsafe.Add(ref amountBase, 1); + amountBase = ref Unsafe.Add(ref amountBase, 2); } if (Numerics.Modulo2(destination.Length) != 0) @@ -1228,20 +1250,22 @@ internal static class DefaultPixelBlenders ref Vector256 sourceBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(source)); ref float amountBase = ref MemoryMarshal.GetReference(amount); + Vector256 vOne = Vector256.Create(1F); + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) { - // TODO: It would be better if we can clamp this outside of the loop using our SIMD methods. // We need to create a Vector256 containing the current and next amount values // taking up each half of the Vector256 and then clamp them. Vector256 opacity = Vector256.Create( - Vector128.Create(Numerics.Clamp(amountBase, 0, 1F)), - Vector128.Create(Numerics.Clamp(Unsafe.Add(ref amountBase, 1), 0, 1F))); + Vector128.Create(amountBase), + Vector128.Create(Unsafe.Add(ref amountBase, 1))); + opacity = Avx.Min(Avx.Max(Vector256.Zero, opacity), vOne); destinationBase = PorterDuffFunctions.AddSrcAtop(backgroundBase, sourceBase, opacity); destinationBase = ref Unsafe.Add(ref destinationBase, 1); backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); sourceBase = ref Unsafe.Add(ref sourceBase, 1); - amountBase = ref Unsafe.Add(ref amountBase, 1); + amountBase = ref Unsafe.Add(ref amountBase, 2); } if (Numerics.Modulo2(destination.Length) != 0) @@ -1331,20 +1355,22 @@ internal static class DefaultPixelBlenders ref Vector256 sourceBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(source)); ref float amountBase = ref MemoryMarshal.GetReference(amount); + Vector256 vOne = Vector256.Create(1F); + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) { - // TODO: It would be better if we can clamp this outside of the loop using our SIMD methods. // We need to create a Vector256 containing the current and next amount values // taking up each half of the Vector256 and then clamp them. Vector256 opacity = Vector256.Create( - Vector128.Create(Numerics.Clamp(amountBase, 0, 1F)), - Vector128.Create(Numerics.Clamp(Unsafe.Add(ref amountBase, 1), 0, 1F))); + Vector128.Create(amountBase), + Vector128.Create(Unsafe.Add(ref amountBase, 1))); + opacity = Avx.Min(Avx.Max(Vector256.Zero, opacity), vOne); destinationBase = PorterDuffFunctions.SubtractSrcAtop(backgroundBase, sourceBase, opacity); destinationBase = ref Unsafe.Add(ref destinationBase, 1); backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); sourceBase = ref Unsafe.Add(ref sourceBase, 1); - amountBase = ref Unsafe.Add(ref amountBase, 1); + amountBase = ref Unsafe.Add(ref amountBase, 2); } if (Numerics.Modulo2(destination.Length) != 0) @@ -1434,20 +1460,22 @@ internal static class DefaultPixelBlenders ref Vector256 sourceBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(source)); ref float amountBase = ref MemoryMarshal.GetReference(amount); + Vector256 vOne = Vector256.Create(1F); + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) { - // TODO: It would be better if we can clamp this outside of the loop using our SIMD methods. // We need to create a Vector256 containing the current and next amount values // taking up each half of the Vector256 and then clamp them. Vector256 opacity = Vector256.Create( - Vector128.Create(Numerics.Clamp(amountBase, 0, 1F)), - Vector128.Create(Numerics.Clamp(Unsafe.Add(ref amountBase, 1), 0, 1F))); + Vector128.Create(amountBase), + Vector128.Create(Unsafe.Add(ref amountBase, 1))); + opacity = Avx.Min(Avx.Max(Vector256.Zero, opacity), vOne); destinationBase = PorterDuffFunctions.ScreenSrcAtop(backgroundBase, sourceBase, opacity); destinationBase = ref Unsafe.Add(ref destinationBase, 1); backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); sourceBase = ref Unsafe.Add(ref sourceBase, 1); - amountBase = ref Unsafe.Add(ref amountBase, 1); + amountBase = ref Unsafe.Add(ref amountBase, 2); } if (Numerics.Modulo2(destination.Length) != 0) @@ -1537,20 +1565,22 @@ internal static class DefaultPixelBlenders ref Vector256 sourceBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(source)); ref float amountBase = ref MemoryMarshal.GetReference(amount); + Vector256 vOne = Vector256.Create(1F); + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) { - // TODO: It would be better if we can clamp this outside of the loop using our SIMD methods. // We need to create a Vector256 containing the current and next amount values // taking up each half of the Vector256 and then clamp them. Vector256 opacity = Vector256.Create( - Vector128.Create(Numerics.Clamp(amountBase, 0, 1F)), - Vector128.Create(Numerics.Clamp(Unsafe.Add(ref amountBase, 1), 0, 1F))); + Vector128.Create(amountBase), + Vector128.Create(Unsafe.Add(ref amountBase, 1))); + opacity = Avx.Min(Avx.Max(Vector256.Zero, opacity), vOne); destinationBase = PorterDuffFunctions.DarkenSrcAtop(backgroundBase, sourceBase, opacity); destinationBase = ref Unsafe.Add(ref destinationBase, 1); backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); sourceBase = ref Unsafe.Add(ref sourceBase, 1); - amountBase = ref Unsafe.Add(ref amountBase, 1); + amountBase = ref Unsafe.Add(ref amountBase, 2); } if (Numerics.Modulo2(destination.Length) != 0) @@ -1640,20 +1670,22 @@ internal static class DefaultPixelBlenders ref Vector256 sourceBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(source)); ref float amountBase = ref MemoryMarshal.GetReference(amount); + Vector256 vOne = Vector256.Create(1F); + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) { - // TODO: It would be better if we can clamp this outside of the loop using our SIMD methods. // We need to create a Vector256 containing the current and next amount values // taking up each half of the Vector256 and then clamp them. Vector256 opacity = Vector256.Create( - Vector128.Create(Numerics.Clamp(amountBase, 0, 1F)), - Vector128.Create(Numerics.Clamp(Unsafe.Add(ref amountBase, 1), 0, 1F))); + Vector128.Create(amountBase), + Vector128.Create(Unsafe.Add(ref amountBase, 1))); + opacity = Avx.Min(Avx.Max(Vector256.Zero, opacity), vOne); destinationBase = PorterDuffFunctions.LightenSrcAtop(backgroundBase, sourceBase, opacity); destinationBase = ref Unsafe.Add(ref destinationBase, 1); backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); sourceBase = ref Unsafe.Add(ref sourceBase, 1); - amountBase = ref Unsafe.Add(ref amountBase, 1); + amountBase = ref Unsafe.Add(ref amountBase, 2); } if (Numerics.Modulo2(destination.Length) != 0) @@ -1743,20 +1775,22 @@ internal static class DefaultPixelBlenders ref Vector256 sourceBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(source)); ref float amountBase = ref MemoryMarshal.GetReference(amount); + Vector256 vOne = Vector256.Create(1F); + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) { - // TODO: It would be better if we can clamp this outside of the loop using our SIMD methods. // We need to create a Vector256 containing the current and next amount values // taking up each half of the Vector256 and then clamp them. Vector256 opacity = Vector256.Create( - Vector128.Create(Numerics.Clamp(amountBase, 0, 1F)), - Vector128.Create(Numerics.Clamp(Unsafe.Add(ref amountBase, 1), 0, 1F))); + Vector128.Create(amountBase), + Vector128.Create(Unsafe.Add(ref amountBase, 1))); + opacity = Avx.Min(Avx.Max(Vector256.Zero, opacity), vOne); destinationBase = PorterDuffFunctions.OverlaySrcAtop(backgroundBase, sourceBase, opacity); destinationBase = ref Unsafe.Add(ref destinationBase, 1); backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); sourceBase = ref Unsafe.Add(ref sourceBase, 1); - amountBase = ref Unsafe.Add(ref amountBase, 1); + amountBase = ref Unsafe.Add(ref amountBase, 2); } if (Numerics.Modulo2(destination.Length) != 0) @@ -1846,20 +1880,22 @@ internal static class DefaultPixelBlenders ref Vector256 sourceBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(source)); ref float amountBase = ref MemoryMarshal.GetReference(amount); + Vector256 vOne = Vector256.Create(1F); + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) { - // TODO: It would be better if we can clamp this outside of the loop using our SIMD methods. // We need to create a Vector256 containing the current and next amount values // taking up each half of the Vector256 and then clamp them. Vector256 opacity = Vector256.Create( - Vector128.Create(Numerics.Clamp(amountBase, 0, 1F)), - Vector128.Create(Numerics.Clamp(Unsafe.Add(ref amountBase, 1), 0, 1F))); + Vector128.Create(amountBase), + Vector128.Create(Unsafe.Add(ref amountBase, 1))); + opacity = Avx.Min(Avx.Max(Vector256.Zero, opacity), vOne); destinationBase = PorterDuffFunctions.HardLightSrcAtop(backgroundBase, sourceBase, opacity); destinationBase = ref Unsafe.Add(ref destinationBase, 1); backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); sourceBase = ref Unsafe.Add(ref sourceBase, 1); - amountBase = ref Unsafe.Add(ref amountBase, 1); + amountBase = ref Unsafe.Add(ref amountBase, 2); } if (Numerics.Modulo2(destination.Length) != 0) @@ -1949,20 +1985,22 @@ internal static class DefaultPixelBlenders ref Vector256 sourceBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(source)); ref float amountBase = ref MemoryMarshal.GetReference(amount); + Vector256 vOne = Vector256.Create(1F); + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) { - // TODO: It would be better if we can clamp this outside of the loop using our SIMD methods. // We need to create a Vector256 containing the current and next amount values // taking up each half of the Vector256 and then clamp them. Vector256 opacity = Vector256.Create( - Vector128.Create(Numerics.Clamp(amountBase, 0, 1F)), - Vector128.Create(Numerics.Clamp(Unsafe.Add(ref amountBase, 1), 0, 1F))); + Vector128.Create(amountBase), + Vector128.Create(Unsafe.Add(ref amountBase, 1))); + opacity = Avx.Min(Avx.Max(Vector256.Zero, opacity), vOne); destinationBase = PorterDuffFunctions.NormalSrcOver(backgroundBase, sourceBase, opacity); destinationBase = ref Unsafe.Add(ref destinationBase, 1); backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); sourceBase = ref Unsafe.Add(ref sourceBase, 1); - amountBase = ref Unsafe.Add(ref amountBase, 1); + amountBase = ref Unsafe.Add(ref amountBase, 2); } if (Numerics.Modulo2(destination.Length) != 0) @@ -2052,20 +2090,22 @@ internal static class DefaultPixelBlenders ref Vector256 sourceBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(source)); ref float amountBase = ref MemoryMarshal.GetReference(amount); + Vector256 vOne = Vector256.Create(1F); + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) { - // TODO: It would be better if we can clamp this outside of the loop using our SIMD methods. // We need to create a Vector256 containing the current and next amount values // taking up each half of the Vector256 and then clamp them. Vector256 opacity = Vector256.Create( - Vector128.Create(Numerics.Clamp(amountBase, 0, 1F)), - Vector128.Create(Numerics.Clamp(Unsafe.Add(ref amountBase, 1), 0, 1F))); + Vector128.Create(amountBase), + Vector128.Create(Unsafe.Add(ref amountBase, 1))); + opacity = Avx.Min(Avx.Max(Vector256.Zero, opacity), vOne); destinationBase = PorterDuffFunctions.MultiplySrcOver(backgroundBase, sourceBase, opacity); destinationBase = ref Unsafe.Add(ref destinationBase, 1); backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); sourceBase = ref Unsafe.Add(ref sourceBase, 1); - amountBase = ref Unsafe.Add(ref amountBase, 1); + amountBase = ref Unsafe.Add(ref amountBase, 2); } if (Numerics.Modulo2(destination.Length) != 0) @@ -2155,20 +2195,22 @@ internal static class DefaultPixelBlenders ref Vector256 sourceBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(source)); ref float amountBase = ref MemoryMarshal.GetReference(amount); + Vector256 vOne = Vector256.Create(1F); + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) { - // TODO: It would be better if we can clamp this outside of the loop using our SIMD methods. // We need to create a Vector256 containing the current and next amount values // taking up each half of the Vector256 and then clamp them. Vector256 opacity = Vector256.Create( - Vector128.Create(Numerics.Clamp(amountBase, 0, 1F)), - Vector128.Create(Numerics.Clamp(Unsafe.Add(ref amountBase, 1), 0, 1F))); + Vector128.Create(amountBase), + Vector128.Create(Unsafe.Add(ref amountBase, 1))); + opacity = Avx.Min(Avx.Max(Vector256.Zero, opacity), vOne); destinationBase = PorterDuffFunctions.AddSrcOver(backgroundBase, sourceBase, opacity); destinationBase = ref Unsafe.Add(ref destinationBase, 1); backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); sourceBase = ref Unsafe.Add(ref sourceBase, 1); - amountBase = ref Unsafe.Add(ref amountBase, 1); + amountBase = ref Unsafe.Add(ref amountBase, 2); } if (Numerics.Modulo2(destination.Length) != 0) @@ -2258,20 +2300,22 @@ internal static class DefaultPixelBlenders ref Vector256 sourceBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(source)); ref float amountBase = ref MemoryMarshal.GetReference(amount); + Vector256 vOne = Vector256.Create(1F); + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) { - // TODO: It would be better if we can clamp this outside of the loop using our SIMD methods. // We need to create a Vector256 containing the current and next amount values // taking up each half of the Vector256 and then clamp them. Vector256 opacity = Vector256.Create( - Vector128.Create(Numerics.Clamp(amountBase, 0, 1F)), - Vector128.Create(Numerics.Clamp(Unsafe.Add(ref amountBase, 1), 0, 1F))); + Vector128.Create(amountBase), + Vector128.Create(Unsafe.Add(ref amountBase, 1))); + opacity = Avx.Min(Avx.Max(Vector256.Zero, opacity), vOne); destinationBase = PorterDuffFunctions.SubtractSrcOver(backgroundBase, sourceBase, opacity); destinationBase = ref Unsafe.Add(ref destinationBase, 1); backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); sourceBase = ref Unsafe.Add(ref sourceBase, 1); - amountBase = ref Unsafe.Add(ref amountBase, 1); + amountBase = ref Unsafe.Add(ref amountBase, 2); } if (Numerics.Modulo2(destination.Length) != 0) @@ -2361,20 +2405,22 @@ internal static class DefaultPixelBlenders ref Vector256 sourceBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(source)); ref float amountBase = ref MemoryMarshal.GetReference(amount); + Vector256 vOne = Vector256.Create(1F); + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) { - // TODO: It would be better if we can clamp this outside of the loop using our SIMD methods. // We need to create a Vector256 containing the current and next amount values // taking up each half of the Vector256 and then clamp them. Vector256 opacity = Vector256.Create( - Vector128.Create(Numerics.Clamp(amountBase, 0, 1F)), - Vector128.Create(Numerics.Clamp(Unsafe.Add(ref amountBase, 1), 0, 1F))); + Vector128.Create(amountBase), + Vector128.Create(Unsafe.Add(ref amountBase, 1))); + opacity = Avx.Min(Avx.Max(Vector256.Zero, opacity), vOne); destinationBase = PorterDuffFunctions.ScreenSrcOver(backgroundBase, sourceBase, opacity); destinationBase = ref Unsafe.Add(ref destinationBase, 1); backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); sourceBase = ref Unsafe.Add(ref sourceBase, 1); - amountBase = ref Unsafe.Add(ref amountBase, 1); + amountBase = ref Unsafe.Add(ref amountBase, 2); } if (Numerics.Modulo2(destination.Length) != 0) @@ -2464,20 +2510,22 @@ internal static class DefaultPixelBlenders ref Vector256 sourceBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(source)); ref float amountBase = ref MemoryMarshal.GetReference(amount); + Vector256 vOne = Vector256.Create(1F); + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) { - // TODO: It would be better if we can clamp this outside of the loop using our SIMD methods. // We need to create a Vector256 containing the current and next amount values // taking up each half of the Vector256 and then clamp them. Vector256 opacity = Vector256.Create( - Vector128.Create(Numerics.Clamp(amountBase, 0, 1F)), - Vector128.Create(Numerics.Clamp(Unsafe.Add(ref amountBase, 1), 0, 1F))); + Vector128.Create(amountBase), + Vector128.Create(Unsafe.Add(ref amountBase, 1))); + opacity = Avx.Min(Avx.Max(Vector256.Zero, opacity), vOne); destinationBase = PorterDuffFunctions.DarkenSrcOver(backgroundBase, sourceBase, opacity); destinationBase = ref Unsafe.Add(ref destinationBase, 1); backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); sourceBase = ref Unsafe.Add(ref sourceBase, 1); - amountBase = ref Unsafe.Add(ref amountBase, 1); + amountBase = ref Unsafe.Add(ref amountBase, 2); } if (Numerics.Modulo2(destination.Length) != 0) @@ -2567,20 +2615,22 @@ internal static class DefaultPixelBlenders ref Vector256 sourceBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(source)); ref float amountBase = ref MemoryMarshal.GetReference(amount); + Vector256 vOne = Vector256.Create(1F); + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) { - // TODO: It would be better if we can clamp this outside of the loop using our SIMD methods. // We need to create a Vector256 containing the current and next amount values // taking up each half of the Vector256 and then clamp them. Vector256 opacity = Vector256.Create( - Vector128.Create(Numerics.Clamp(amountBase, 0, 1F)), - Vector128.Create(Numerics.Clamp(Unsafe.Add(ref amountBase, 1), 0, 1F))); + Vector128.Create(amountBase), + Vector128.Create(Unsafe.Add(ref amountBase, 1))); + opacity = Avx.Min(Avx.Max(Vector256.Zero, opacity), vOne); destinationBase = PorterDuffFunctions.LightenSrcOver(backgroundBase, sourceBase, opacity); destinationBase = ref Unsafe.Add(ref destinationBase, 1); backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); sourceBase = ref Unsafe.Add(ref sourceBase, 1); - amountBase = ref Unsafe.Add(ref amountBase, 1); + amountBase = ref Unsafe.Add(ref amountBase, 2); } if (Numerics.Modulo2(destination.Length) != 0) @@ -2670,20 +2720,22 @@ internal static class DefaultPixelBlenders ref Vector256 sourceBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(source)); ref float amountBase = ref MemoryMarshal.GetReference(amount); + Vector256 vOne = Vector256.Create(1F); + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) { - // TODO: It would be better if we can clamp this outside of the loop using our SIMD methods. // We need to create a Vector256 containing the current and next amount values // taking up each half of the Vector256 and then clamp them. Vector256 opacity = Vector256.Create( - Vector128.Create(Numerics.Clamp(amountBase, 0, 1F)), - Vector128.Create(Numerics.Clamp(Unsafe.Add(ref amountBase, 1), 0, 1F))); + Vector128.Create(amountBase), + Vector128.Create(Unsafe.Add(ref amountBase, 1))); + opacity = Avx.Min(Avx.Max(Vector256.Zero, opacity), vOne); destinationBase = PorterDuffFunctions.OverlaySrcOver(backgroundBase, sourceBase, opacity); destinationBase = ref Unsafe.Add(ref destinationBase, 1); backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); sourceBase = ref Unsafe.Add(ref sourceBase, 1); - amountBase = ref Unsafe.Add(ref amountBase, 1); + amountBase = ref Unsafe.Add(ref amountBase, 2); } if (Numerics.Modulo2(destination.Length) != 0) @@ -2773,20 +2825,22 @@ internal static class DefaultPixelBlenders ref Vector256 sourceBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(source)); ref float amountBase = ref MemoryMarshal.GetReference(amount); + Vector256 vOne = Vector256.Create(1F); + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) { - // TODO: It would be better if we can clamp this outside of the loop using our SIMD methods. // We need to create a Vector256 containing the current and next amount values // taking up each half of the Vector256 and then clamp them. Vector256 opacity = Vector256.Create( - Vector128.Create(Numerics.Clamp(amountBase, 0, 1F)), - Vector128.Create(Numerics.Clamp(Unsafe.Add(ref amountBase, 1), 0, 1F))); + Vector128.Create(amountBase), + Vector128.Create(Unsafe.Add(ref amountBase, 1))); + opacity = Avx.Min(Avx.Max(Vector256.Zero, opacity), vOne); destinationBase = PorterDuffFunctions.HardLightSrcOver(backgroundBase, sourceBase, opacity); destinationBase = ref Unsafe.Add(ref destinationBase, 1); backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); sourceBase = ref Unsafe.Add(ref sourceBase, 1); - amountBase = ref Unsafe.Add(ref amountBase, 1); + amountBase = ref Unsafe.Add(ref amountBase, 2); } if (Numerics.Modulo2(destination.Length) != 0) @@ -2876,20 +2930,22 @@ internal static class DefaultPixelBlenders ref Vector256 sourceBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(source)); ref float amountBase = ref MemoryMarshal.GetReference(amount); + Vector256 vOne = Vector256.Create(1F); + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) { - // TODO: It would be better if we can clamp this outside of the loop using our SIMD methods. // We need to create a Vector256 containing the current and next amount values // taking up each half of the Vector256 and then clamp them. Vector256 opacity = Vector256.Create( - Vector128.Create(Numerics.Clamp(amountBase, 0, 1F)), - Vector128.Create(Numerics.Clamp(Unsafe.Add(ref amountBase, 1), 0, 1F))); + Vector128.Create(amountBase), + Vector128.Create(Unsafe.Add(ref amountBase, 1))); + opacity = Avx.Min(Avx.Max(Vector256.Zero, opacity), vOne); destinationBase = PorterDuffFunctions.NormalSrcIn(backgroundBase, sourceBase, opacity); destinationBase = ref Unsafe.Add(ref destinationBase, 1); backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); sourceBase = ref Unsafe.Add(ref sourceBase, 1); - amountBase = ref Unsafe.Add(ref amountBase, 1); + amountBase = ref Unsafe.Add(ref amountBase, 2); } if (Numerics.Modulo2(destination.Length) != 0) @@ -2979,20 +3035,22 @@ internal static class DefaultPixelBlenders ref Vector256 sourceBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(source)); ref float amountBase = ref MemoryMarshal.GetReference(amount); + Vector256 vOne = Vector256.Create(1F); + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) { - // TODO: It would be better if we can clamp this outside of the loop using our SIMD methods. // We need to create a Vector256 containing the current and next amount values // taking up each half of the Vector256 and then clamp them. Vector256 opacity = Vector256.Create( - Vector128.Create(Numerics.Clamp(amountBase, 0, 1F)), - Vector128.Create(Numerics.Clamp(Unsafe.Add(ref amountBase, 1), 0, 1F))); + Vector128.Create(amountBase), + Vector128.Create(Unsafe.Add(ref amountBase, 1))); + opacity = Avx.Min(Avx.Max(Vector256.Zero, opacity), vOne); destinationBase = PorterDuffFunctions.MultiplySrcIn(backgroundBase, sourceBase, opacity); destinationBase = ref Unsafe.Add(ref destinationBase, 1); backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); sourceBase = ref Unsafe.Add(ref sourceBase, 1); - amountBase = ref Unsafe.Add(ref amountBase, 1); + amountBase = ref Unsafe.Add(ref amountBase, 2); } if (Numerics.Modulo2(destination.Length) != 0) @@ -3082,20 +3140,22 @@ internal static class DefaultPixelBlenders ref Vector256 sourceBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(source)); ref float amountBase = ref MemoryMarshal.GetReference(amount); + Vector256 vOne = Vector256.Create(1F); + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) { - // TODO: It would be better if we can clamp this outside of the loop using our SIMD methods. // We need to create a Vector256 containing the current and next amount values // taking up each half of the Vector256 and then clamp them. Vector256 opacity = Vector256.Create( - Vector128.Create(Numerics.Clamp(amountBase, 0, 1F)), - Vector128.Create(Numerics.Clamp(Unsafe.Add(ref amountBase, 1), 0, 1F))); + Vector128.Create(amountBase), + Vector128.Create(Unsafe.Add(ref amountBase, 1))); + opacity = Avx.Min(Avx.Max(Vector256.Zero, opacity), vOne); destinationBase = PorterDuffFunctions.AddSrcIn(backgroundBase, sourceBase, opacity); destinationBase = ref Unsafe.Add(ref destinationBase, 1); backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); sourceBase = ref Unsafe.Add(ref sourceBase, 1); - amountBase = ref Unsafe.Add(ref amountBase, 1); + amountBase = ref Unsafe.Add(ref amountBase, 2); } if (Numerics.Modulo2(destination.Length) != 0) @@ -3185,20 +3245,22 @@ internal static class DefaultPixelBlenders ref Vector256 sourceBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(source)); ref float amountBase = ref MemoryMarshal.GetReference(amount); + Vector256 vOne = Vector256.Create(1F); + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) { - // TODO: It would be better if we can clamp this outside of the loop using our SIMD methods. // We need to create a Vector256 containing the current and next amount values // taking up each half of the Vector256 and then clamp them. Vector256 opacity = Vector256.Create( - Vector128.Create(Numerics.Clamp(amountBase, 0, 1F)), - Vector128.Create(Numerics.Clamp(Unsafe.Add(ref amountBase, 1), 0, 1F))); + Vector128.Create(amountBase), + Vector128.Create(Unsafe.Add(ref amountBase, 1))); + opacity = Avx.Min(Avx.Max(Vector256.Zero, opacity), vOne); destinationBase = PorterDuffFunctions.SubtractSrcIn(backgroundBase, sourceBase, opacity); destinationBase = ref Unsafe.Add(ref destinationBase, 1); backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); sourceBase = ref Unsafe.Add(ref sourceBase, 1); - amountBase = ref Unsafe.Add(ref amountBase, 1); + amountBase = ref Unsafe.Add(ref amountBase, 2); } if (Numerics.Modulo2(destination.Length) != 0) @@ -3288,20 +3350,22 @@ internal static class DefaultPixelBlenders ref Vector256 sourceBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(source)); ref float amountBase = ref MemoryMarshal.GetReference(amount); + Vector256 vOne = Vector256.Create(1F); + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) { - // TODO: It would be better if we can clamp this outside of the loop using our SIMD methods. // We need to create a Vector256 containing the current and next amount values // taking up each half of the Vector256 and then clamp them. Vector256 opacity = Vector256.Create( - Vector128.Create(Numerics.Clamp(amountBase, 0, 1F)), - Vector128.Create(Numerics.Clamp(Unsafe.Add(ref amountBase, 1), 0, 1F))); + Vector128.Create(amountBase), + Vector128.Create(Unsafe.Add(ref amountBase, 1))); + opacity = Avx.Min(Avx.Max(Vector256.Zero, opacity), vOne); destinationBase = PorterDuffFunctions.ScreenSrcIn(backgroundBase, sourceBase, opacity); destinationBase = ref Unsafe.Add(ref destinationBase, 1); backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); sourceBase = ref Unsafe.Add(ref sourceBase, 1); - amountBase = ref Unsafe.Add(ref amountBase, 1); + amountBase = ref Unsafe.Add(ref amountBase, 2); } if (Numerics.Modulo2(destination.Length) != 0) @@ -3391,20 +3455,22 @@ internal static class DefaultPixelBlenders ref Vector256 sourceBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(source)); ref float amountBase = ref MemoryMarshal.GetReference(amount); + Vector256 vOne = Vector256.Create(1F); + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) { - // TODO: It would be better if we can clamp this outside of the loop using our SIMD methods. // We need to create a Vector256 containing the current and next amount values // taking up each half of the Vector256 and then clamp them. Vector256 opacity = Vector256.Create( - Vector128.Create(Numerics.Clamp(amountBase, 0, 1F)), - Vector128.Create(Numerics.Clamp(Unsafe.Add(ref amountBase, 1), 0, 1F))); + Vector128.Create(amountBase), + Vector128.Create(Unsafe.Add(ref amountBase, 1))); + opacity = Avx.Min(Avx.Max(Vector256.Zero, opacity), vOne); destinationBase = PorterDuffFunctions.DarkenSrcIn(backgroundBase, sourceBase, opacity); destinationBase = ref Unsafe.Add(ref destinationBase, 1); backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); sourceBase = ref Unsafe.Add(ref sourceBase, 1); - amountBase = ref Unsafe.Add(ref amountBase, 1); + amountBase = ref Unsafe.Add(ref amountBase, 2); } if (Numerics.Modulo2(destination.Length) != 0) @@ -3494,20 +3560,22 @@ internal static class DefaultPixelBlenders ref Vector256 sourceBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(source)); ref float amountBase = ref MemoryMarshal.GetReference(amount); + Vector256 vOne = Vector256.Create(1F); + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) { - // TODO: It would be better if we can clamp this outside of the loop using our SIMD methods. // We need to create a Vector256 containing the current and next amount values // taking up each half of the Vector256 and then clamp them. Vector256 opacity = Vector256.Create( - Vector128.Create(Numerics.Clamp(amountBase, 0, 1F)), - Vector128.Create(Numerics.Clamp(Unsafe.Add(ref amountBase, 1), 0, 1F))); + Vector128.Create(amountBase), + Vector128.Create(Unsafe.Add(ref amountBase, 1))); + opacity = Avx.Min(Avx.Max(Vector256.Zero, opacity), vOne); destinationBase = PorterDuffFunctions.LightenSrcIn(backgroundBase, sourceBase, opacity); destinationBase = ref Unsafe.Add(ref destinationBase, 1); backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); sourceBase = ref Unsafe.Add(ref sourceBase, 1); - amountBase = ref Unsafe.Add(ref amountBase, 1); + amountBase = ref Unsafe.Add(ref amountBase, 2); } if (Numerics.Modulo2(destination.Length) != 0) @@ -3597,20 +3665,22 @@ internal static class DefaultPixelBlenders ref Vector256 sourceBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(source)); ref float amountBase = ref MemoryMarshal.GetReference(amount); + Vector256 vOne = Vector256.Create(1F); + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) { - // TODO: It would be better if we can clamp this outside of the loop using our SIMD methods. // We need to create a Vector256 containing the current and next amount values // taking up each half of the Vector256 and then clamp them. Vector256 opacity = Vector256.Create( - Vector128.Create(Numerics.Clamp(amountBase, 0, 1F)), - Vector128.Create(Numerics.Clamp(Unsafe.Add(ref amountBase, 1), 0, 1F))); + Vector128.Create(amountBase), + Vector128.Create(Unsafe.Add(ref amountBase, 1))); + opacity = Avx.Min(Avx.Max(Vector256.Zero, opacity), vOne); destinationBase = PorterDuffFunctions.OverlaySrcIn(backgroundBase, sourceBase, opacity); destinationBase = ref Unsafe.Add(ref destinationBase, 1); backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); sourceBase = ref Unsafe.Add(ref sourceBase, 1); - amountBase = ref Unsafe.Add(ref amountBase, 1); + amountBase = ref Unsafe.Add(ref amountBase, 2); } if (Numerics.Modulo2(destination.Length) != 0) @@ -3700,20 +3770,22 @@ internal static class DefaultPixelBlenders ref Vector256 sourceBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(source)); ref float amountBase = ref MemoryMarshal.GetReference(amount); + Vector256 vOne = Vector256.Create(1F); + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) { - // TODO: It would be better if we can clamp this outside of the loop using our SIMD methods. // We need to create a Vector256 containing the current and next amount values // taking up each half of the Vector256 and then clamp them. Vector256 opacity = Vector256.Create( - Vector128.Create(Numerics.Clamp(amountBase, 0, 1F)), - Vector128.Create(Numerics.Clamp(Unsafe.Add(ref amountBase, 1), 0, 1F))); + Vector128.Create(amountBase), + Vector128.Create(Unsafe.Add(ref amountBase, 1))); + opacity = Avx.Min(Avx.Max(Vector256.Zero, opacity), vOne); destinationBase = PorterDuffFunctions.HardLightSrcIn(backgroundBase, sourceBase, opacity); destinationBase = ref Unsafe.Add(ref destinationBase, 1); backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); sourceBase = ref Unsafe.Add(ref sourceBase, 1); - amountBase = ref Unsafe.Add(ref amountBase, 1); + amountBase = ref Unsafe.Add(ref amountBase, 2); } if (Numerics.Modulo2(destination.Length) != 0) @@ -3803,20 +3875,22 @@ internal static class DefaultPixelBlenders ref Vector256 sourceBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(source)); ref float amountBase = ref MemoryMarshal.GetReference(amount); + Vector256 vOne = Vector256.Create(1F); + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) { - // TODO: It would be better if we can clamp this outside of the loop using our SIMD methods. // We need to create a Vector256 containing the current and next amount values // taking up each half of the Vector256 and then clamp them. Vector256 opacity = Vector256.Create( - Vector128.Create(Numerics.Clamp(amountBase, 0, 1F)), - Vector128.Create(Numerics.Clamp(Unsafe.Add(ref amountBase, 1), 0, 1F))); + Vector128.Create(amountBase), + Vector128.Create(Unsafe.Add(ref amountBase, 1))); + opacity = Avx.Min(Avx.Max(Vector256.Zero, opacity), vOne); destinationBase = PorterDuffFunctions.NormalSrcOut(backgroundBase, sourceBase, opacity); destinationBase = ref Unsafe.Add(ref destinationBase, 1); backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); sourceBase = ref Unsafe.Add(ref sourceBase, 1); - amountBase = ref Unsafe.Add(ref amountBase, 1); + amountBase = ref Unsafe.Add(ref amountBase, 2); } if (Numerics.Modulo2(destination.Length) != 0) @@ -3906,20 +3980,22 @@ internal static class DefaultPixelBlenders ref Vector256 sourceBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(source)); ref float amountBase = ref MemoryMarshal.GetReference(amount); + Vector256 vOne = Vector256.Create(1F); + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) { - // TODO: It would be better if we can clamp this outside of the loop using our SIMD methods. // We need to create a Vector256 containing the current and next amount values // taking up each half of the Vector256 and then clamp them. Vector256 opacity = Vector256.Create( - Vector128.Create(Numerics.Clamp(amountBase, 0, 1F)), - Vector128.Create(Numerics.Clamp(Unsafe.Add(ref amountBase, 1), 0, 1F))); + Vector128.Create(amountBase), + Vector128.Create(Unsafe.Add(ref amountBase, 1))); + opacity = Avx.Min(Avx.Max(Vector256.Zero, opacity), vOne); destinationBase = PorterDuffFunctions.MultiplySrcOut(backgroundBase, sourceBase, opacity); destinationBase = ref Unsafe.Add(ref destinationBase, 1); backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); sourceBase = ref Unsafe.Add(ref sourceBase, 1); - amountBase = ref Unsafe.Add(ref amountBase, 1); + amountBase = ref Unsafe.Add(ref amountBase, 2); } if (Numerics.Modulo2(destination.Length) != 0) @@ -4009,20 +4085,22 @@ internal static class DefaultPixelBlenders ref Vector256 sourceBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(source)); ref float amountBase = ref MemoryMarshal.GetReference(amount); + Vector256 vOne = Vector256.Create(1F); + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) { - // TODO: It would be better if we can clamp this outside of the loop using our SIMD methods. // We need to create a Vector256 containing the current and next amount values // taking up each half of the Vector256 and then clamp them. Vector256 opacity = Vector256.Create( - Vector128.Create(Numerics.Clamp(amountBase, 0, 1F)), - Vector128.Create(Numerics.Clamp(Unsafe.Add(ref amountBase, 1), 0, 1F))); + Vector128.Create(amountBase), + Vector128.Create(Unsafe.Add(ref amountBase, 1))); + opacity = Avx.Min(Avx.Max(Vector256.Zero, opacity), vOne); destinationBase = PorterDuffFunctions.AddSrcOut(backgroundBase, sourceBase, opacity); destinationBase = ref Unsafe.Add(ref destinationBase, 1); backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); sourceBase = ref Unsafe.Add(ref sourceBase, 1); - amountBase = ref Unsafe.Add(ref amountBase, 1); + amountBase = ref Unsafe.Add(ref amountBase, 2); } if (Numerics.Modulo2(destination.Length) != 0) @@ -4112,20 +4190,22 @@ internal static class DefaultPixelBlenders ref Vector256 sourceBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(source)); ref float amountBase = ref MemoryMarshal.GetReference(amount); + Vector256 vOne = Vector256.Create(1F); + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) { - // TODO: It would be better if we can clamp this outside of the loop using our SIMD methods. // We need to create a Vector256 containing the current and next amount values // taking up each half of the Vector256 and then clamp them. Vector256 opacity = Vector256.Create( - Vector128.Create(Numerics.Clamp(amountBase, 0, 1F)), - Vector128.Create(Numerics.Clamp(Unsafe.Add(ref amountBase, 1), 0, 1F))); + Vector128.Create(amountBase), + Vector128.Create(Unsafe.Add(ref amountBase, 1))); + opacity = Avx.Min(Avx.Max(Vector256.Zero, opacity), vOne); destinationBase = PorterDuffFunctions.SubtractSrcOut(backgroundBase, sourceBase, opacity); destinationBase = ref Unsafe.Add(ref destinationBase, 1); backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); sourceBase = ref Unsafe.Add(ref sourceBase, 1); - amountBase = ref Unsafe.Add(ref amountBase, 1); + amountBase = ref Unsafe.Add(ref amountBase, 2); } if (Numerics.Modulo2(destination.Length) != 0) @@ -4215,20 +4295,22 @@ internal static class DefaultPixelBlenders ref Vector256 sourceBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(source)); ref float amountBase = ref MemoryMarshal.GetReference(amount); + Vector256 vOne = Vector256.Create(1F); + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) { - // TODO: It would be better if we can clamp this outside of the loop using our SIMD methods. // We need to create a Vector256 containing the current and next amount values // taking up each half of the Vector256 and then clamp them. Vector256 opacity = Vector256.Create( - Vector128.Create(Numerics.Clamp(amountBase, 0, 1F)), - Vector128.Create(Numerics.Clamp(Unsafe.Add(ref amountBase, 1), 0, 1F))); + Vector128.Create(amountBase), + Vector128.Create(Unsafe.Add(ref amountBase, 1))); + opacity = Avx.Min(Avx.Max(Vector256.Zero, opacity), vOne); destinationBase = PorterDuffFunctions.ScreenSrcOut(backgroundBase, sourceBase, opacity); destinationBase = ref Unsafe.Add(ref destinationBase, 1); backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); sourceBase = ref Unsafe.Add(ref sourceBase, 1); - amountBase = ref Unsafe.Add(ref amountBase, 1); + amountBase = ref Unsafe.Add(ref amountBase, 2); } if (Numerics.Modulo2(destination.Length) != 0) @@ -4318,20 +4400,22 @@ internal static class DefaultPixelBlenders ref Vector256 sourceBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(source)); ref float amountBase = ref MemoryMarshal.GetReference(amount); + Vector256 vOne = Vector256.Create(1F); + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) { - // TODO: It would be better if we can clamp this outside of the loop using our SIMD methods. // We need to create a Vector256 containing the current and next amount values // taking up each half of the Vector256 and then clamp them. Vector256 opacity = Vector256.Create( - Vector128.Create(Numerics.Clamp(amountBase, 0, 1F)), - Vector128.Create(Numerics.Clamp(Unsafe.Add(ref amountBase, 1), 0, 1F))); + Vector128.Create(amountBase), + Vector128.Create(Unsafe.Add(ref amountBase, 1))); + opacity = Avx.Min(Avx.Max(Vector256.Zero, opacity), vOne); destinationBase = PorterDuffFunctions.DarkenSrcOut(backgroundBase, sourceBase, opacity); destinationBase = ref Unsafe.Add(ref destinationBase, 1); backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); sourceBase = ref Unsafe.Add(ref sourceBase, 1); - amountBase = ref Unsafe.Add(ref amountBase, 1); + amountBase = ref Unsafe.Add(ref amountBase, 2); } if (Numerics.Modulo2(destination.Length) != 0) @@ -4421,20 +4505,22 @@ internal static class DefaultPixelBlenders ref Vector256 sourceBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(source)); ref float amountBase = ref MemoryMarshal.GetReference(amount); + Vector256 vOne = Vector256.Create(1F); + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) { - // TODO: It would be better if we can clamp this outside of the loop using our SIMD methods. // We need to create a Vector256 containing the current and next amount values // taking up each half of the Vector256 and then clamp them. Vector256 opacity = Vector256.Create( - Vector128.Create(Numerics.Clamp(amountBase, 0, 1F)), - Vector128.Create(Numerics.Clamp(Unsafe.Add(ref amountBase, 1), 0, 1F))); + Vector128.Create(amountBase), + Vector128.Create(Unsafe.Add(ref amountBase, 1))); + opacity = Avx.Min(Avx.Max(Vector256.Zero, opacity), vOne); destinationBase = PorterDuffFunctions.LightenSrcOut(backgroundBase, sourceBase, opacity); destinationBase = ref Unsafe.Add(ref destinationBase, 1); backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); sourceBase = ref Unsafe.Add(ref sourceBase, 1); - amountBase = ref Unsafe.Add(ref amountBase, 1); + amountBase = ref Unsafe.Add(ref amountBase, 2); } if (Numerics.Modulo2(destination.Length) != 0) @@ -4524,20 +4610,22 @@ internal static class DefaultPixelBlenders ref Vector256 sourceBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(source)); ref float amountBase = ref MemoryMarshal.GetReference(amount); + Vector256 vOne = Vector256.Create(1F); + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) { - // TODO: It would be better if we can clamp this outside of the loop using our SIMD methods. // We need to create a Vector256 containing the current and next amount values // taking up each half of the Vector256 and then clamp them. Vector256 opacity = Vector256.Create( - Vector128.Create(Numerics.Clamp(amountBase, 0, 1F)), - Vector128.Create(Numerics.Clamp(Unsafe.Add(ref amountBase, 1), 0, 1F))); + Vector128.Create(amountBase), + Vector128.Create(Unsafe.Add(ref amountBase, 1))); + opacity = Avx.Min(Avx.Max(Vector256.Zero, opacity), vOne); destinationBase = PorterDuffFunctions.OverlaySrcOut(backgroundBase, sourceBase, opacity); destinationBase = ref Unsafe.Add(ref destinationBase, 1); backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); sourceBase = ref Unsafe.Add(ref sourceBase, 1); - amountBase = ref Unsafe.Add(ref amountBase, 1); + amountBase = ref Unsafe.Add(ref amountBase, 2); } if (Numerics.Modulo2(destination.Length) != 0) @@ -4627,20 +4715,22 @@ internal static class DefaultPixelBlenders ref Vector256 sourceBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(source)); ref float amountBase = ref MemoryMarshal.GetReference(amount); + Vector256 vOne = Vector256.Create(1F); + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) { - // TODO: It would be better if we can clamp this outside of the loop using our SIMD methods. // We need to create a Vector256 containing the current and next amount values // taking up each half of the Vector256 and then clamp them. Vector256 opacity = Vector256.Create( - Vector128.Create(Numerics.Clamp(amountBase, 0, 1F)), - Vector128.Create(Numerics.Clamp(Unsafe.Add(ref amountBase, 1), 0, 1F))); + Vector128.Create(amountBase), + Vector128.Create(Unsafe.Add(ref amountBase, 1))); + opacity = Avx.Min(Avx.Max(Vector256.Zero, opacity), vOne); destinationBase = PorterDuffFunctions.HardLightSrcOut(backgroundBase, sourceBase, opacity); destinationBase = ref Unsafe.Add(ref destinationBase, 1); backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); sourceBase = ref Unsafe.Add(ref sourceBase, 1); - amountBase = ref Unsafe.Add(ref amountBase, 1); + amountBase = ref Unsafe.Add(ref amountBase, 2); } if (Numerics.Modulo2(destination.Length) != 0) @@ -4730,20 +4820,22 @@ internal static class DefaultPixelBlenders ref Vector256 sourceBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(source)); ref float amountBase = ref MemoryMarshal.GetReference(amount); + Vector256 vOne = Vector256.Create(1F); + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) { - // TODO: It would be better if we can clamp this outside of the loop using our SIMD methods. // We need to create a Vector256 containing the current and next amount values // taking up each half of the Vector256 and then clamp them. Vector256 opacity = Vector256.Create( - Vector128.Create(Numerics.Clamp(amountBase, 0, 1F)), - Vector128.Create(Numerics.Clamp(Unsafe.Add(ref amountBase, 1), 0, 1F))); + Vector128.Create(amountBase), + Vector128.Create(Unsafe.Add(ref amountBase, 1))); + opacity = Avx.Min(Avx.Max(Vector256.Zero, opacity), vOne); destinationBase = PorterDuffFunctions.NormalDest(backgroundBase, sourceBase, opacity); destinationBase = ref Unsafe.Add(ref destinationBase, 1); backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); sourceBase = ref Unsafe.Add(ref sourceBase, 1); - amountBase = ref Unsafe.Add(ref amountBase, 1); + amountBase = ref Unsafe.Add(ref amountBase, 2); } if (Numerics.Modulo2(destination.Length) != 0) @@ -4833,20 +4925,22 @@ internal static class DefaultPixelBlenders ref Vector256 sourceBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(source)); ref float amountBase = ref MemoryMarshal.GetReference(amount); + Vector256 vOne = Vector256.Create(1F); + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) { - // TODO: It would be better if we can clamp this outside of the loop using our SIMD methods. // We need to create a Vector256 containing the current and next amount values // taking up each half of the Vector256 and then clamp them. Vector256 opacity = Vector256.Create( - Vector128.Create(Numerics.Clamp(amountBase, 0, 1F)), - Vector128.Create(Numerics.Clamp(Unsafe.Add(ref amountBase, 1), 0, 1F))); + Vector128.Create(amountBase), + Vector128.Create(Unsafe.Add(ref amountBase, 1))); + opacity = Avx.Min(Avx.Max(Vector256.Zero, opacity), vOne); destinationBase = PorterDuffFunctions.MultiplyDest(backgroundBase, sourceBase, opacity); destinationBase = ref Unsafe.Add(ref destinationBase, 1); backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); sourceBase = ref Unsafe.Add(ref sourceBase, 1); - amountBase = ref Unsafe.Add(ref amountBase, 1); + amountBase = ref Unsafe.Add(ref amountBase, 2); } if (Numerics.Modulo2(destination.Length) != 0) @@ -4936,20 +5030,22 @@ internal static class DefaultPixelBlenders ref Vector256 sourceBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(source)); ref float amountBase = ref MemoryMarshal.GetReference(amount); + Vector256 vOne = Vector256.Create(1F); + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) { - // TODO: It would be better if we can clamp this outside of the loop using our SIMD methods. // We need to create a Vector256 containing the current and next amount values // taking up each half of the Vector256 and then clamp them. Vector256 opacity = Vector256.Create( - Vector128.Create(Numerics.Clamp(amountBase, 0, 1F)), - Vector128.Create(Numerics.Clamp(Unsafe.Add(ref amountBase, 1), 0, 1F))); + Vector128.Create(amountBase), + Vector128.Create(Unsafe.Add(ref amountBase, 1))); + opacity = Avx.Min(Avx.Max(Vector256.Zero, opacity), vOne); destinationBase = PorterDuffFunctions.AddDest(backgroundBase, sourceBase, opacity); destinationBase = ref Unsafe.Add(ref destinationBase, 1); backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); sourceBase = ref Unsafe.Add(ref sourceBase, 1); - amountBase = ref Unsafe.Add(ref amountBase, 1); + amountBase = ref Unsafe.Add(ref amountBase, 2); } if (Numerics.Modulo2(destination.Length) != 0) @@ -5039,20 +5135,22 @@ internal static class DefaultPixelBlenders ref Vector256 sourceBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(source)); ref float amountBase = ref MemoryMarshal.GetReference(amount); + Vector256 vOne = Vector256.Create(1F); + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) { - // TODO: It would be better if we can clamp this outside of the loop using our SIMD methods. // We need to create a Vector256 containing the current and next amount values // taking up each half of the Vector256 and then clamp them. Vector256 opacity = Vector256.Create( - Vector128.Create(Numerics.Clamp(amountBase, 0, 1F)), - Vector128.Create(Numerics.Clamp(Unsafe.Add(ref amountBase, 1), 0, 1F))); + Vector128.Create(amountBase), + Vector128.Create(Unsafe.Add(ref amountBase, 1))); + opacity = Avx.Min(Avx.Max(Vector256.Zero, opacity), vOne); destinationBase = PorterDuffFunctions.SubtractDest(backgroundBase, sourceBase, opacity); destinationBase = ref Unsafe.Add(ref destinationBase, 1); backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); sourceBase = ref Unsafe.Add(ref sourceBase, 1); - amountBase = ref Unsafe.Add(ref amountBase, 1); + amountBase = ref Unsafe.Add(ref amountBase, 2); } if (Numerics.Modulo2(destination.Length) != 0) @@ -5142,20 +5240,22 @@ internal static class DefaultPixelBlenders ref Vector256 sourceBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(source)); ref float amountBase = ref MemoryMarshal.GetReference(amount); + Vector256 vOne = Vector256.Create(1F); + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) { - // TODO: It would be better if we can clamp this outside of the loop using our SIMD methods. // We need to create a Vector256 containing the current and next amount values // taking up each half of the Vector256 and then clamp them. Vector256 opacity = Vector256.Create( - Vector128.Create(Numerics.Clamp(amountBase, 0, 1F)), - Vector128.Create(Numerics.Clamp(Unsafe.Add(ref amountBase, 1), 0, 1F))); + Vector128.Create(amountBase), + Vector128.Create(Unsafe.Add(ref amountBase, 1))); + opacity = Avx.Min(Avx.Max(Vector256.Zero, opacity), vOne); destinationBase = PorterDuffFunctions.ScreenDest(backgroundBase, sourceBase, opacity); destinationBase = ref Unsafe.Add(ref destinationBase, 1); backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); sourceBase = ref Unsafe.Add(ref sourceBase, 1); - amountBase = ref Unsafe.Add(ref amountBase, 1); + amountBase = ref Unsafe.Add(ref amountBase, 2); } if (Numerics.Modulo2(destination.Length) != 0) @@ -5245,20 +5345,22 @@ internal static class DefaultPixelBlenders ref Vector256 sourceBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(source)); ref float amountBase = ref MemoryMarshal.GetReference(amount); + Vector256 vOne = Vector256.Create(1F); + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) { - // TODO: It would be better if we can clamp this outside of the loop using our SIMD methods. // We need to create a Vector256 containing the current and next amount values // taking up each half of the Vector256 and then clamp them. Vector256 opacity = Vector256.Create( - Vector128.Create(Numerics.Clamp(amountBase, 0, 1F)), - Vector128.Create(Numerics.Clamp(Unsafe.Add(ref amountBase, 1), 0, 1F))); + Vector128.Create(amountBase), + Vector128.Create(Unsafe.Add(ref amountBase, 1))); + opacity = Avx.Min(Avx.Max(Vector256.Zero, opacity), vOne); destinationBase = PorterDuffFunctions.DarkenDest(backgroundBase, sourceBase, opacity); destinationBase = ref Unsafe.Add(ref destinationBase, 1); backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); sourceBase = ref Unsafe.Add(ref sourceBase, 1); - amountBase = ref Unsafe.Add(ref amountBase, 1); + amountBase = ref Unsafe.Add(ref amountBase, 2); } if (Numerics.Modulo2(destination.Length) != 0) @@ -5348,20 +5450,22 @@ internal static class DefaultPixelBlenders ref Vector256 sourceBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(source)); ref float amountBase = ref MemoryMarshal.GetReference(amount); + Vector256 vOne = Vector256.Create(1F); + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) { - // TODO: It would be better if we can clamp this outside of the loop using our SIMD methods. // We need to create a Vector256 containing the current and next amount values // taking up each half of the Vector256 and then clamp them. Vector256 opacity = Vector256.Create( - Vector128.Create(Numerics.Clamp(amountBase, 0, 1F)), - Vector128.Create(Numerics.Clamp(Unsafe.Add(ref amountBase, 1), 0, 1F))); + Vector128.Create(amountBase), + Vector128.Create(Unsafe.Add(ref amountBase, 1))); + opacity = Avx.Min(Avx.Max(Vector256.Zero, opacity), vOne); destinationBase = PorterDuffFunctions.LightenDest(backgroundBase, sourceBase, opacity); destinationBase = ref Unsafe.Add(ref destinationBase, 1); backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); sourceBase = ref Unsafe.Add(ref sourceBase, 1); - amountBase = ref Unsafe.Add(ref amountBase, 1); + amountBase = ref Unsafe.Add(ref amountBase, 2); } if (Numerics.Modulo2(destination.Length) != 0) @@ -5451,20 +5555,22 @@ internal static class DefaultPixelBlenders ref Vector256 sourceBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(source)); ref float amountBase = ref MemoryMarshal.GetReference(amount); + Vector256 vOne = Vector256.Create(1F); + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) { - // TODO: It would be better if we can clamp this outside of the loop using our SIMD methods. // We need to create a Vector256 containing the current and next amount values // taking up each half of the Vector256 and then clamp them. Vector256 opacity = Vector256.Create( - Vector128.Create(Numerics.Clamp(amountBase, 0, 1F)), - Vector128.Create(Numerics.Clamp(Unsafe.Add(ref amountBase, 1), 0, 1F))); + Vector128.Create(amountBase), + Vector128.Create(Unsafe.Add(ref amountBase, 1))); + opacity = Avx.Min(Avx.Max(Vector256.Zero, opacity), vOne); destinationBase = PorterDuffFunctions.OverlayDest(backgroundBase, sourceBase, opacity); destinationBase = ref Unsafe.Add(ref destinationBase, 1); backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); sourceBase = ref Unsafe.Add(ref sourceBase, 1); - amountBase = ref Unsafe.Add(ref amountBase, 1); + amountBase = ref Unsafe.Add(ref amountBase, 2); } if (Numerics.Modulo2(destination.Length) != 0) @@ -5554,20 +5660,22 @@ internal static class DefaultPixelBlenders ref Vector256 sourceBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(source)); ref float amountBase = ref MemoryMarshal.GetReference(amount); + Vector256 vOne = Vector256.Create(1F); + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) { - // TODO: It would be better if we can clamp this outside of the loop using our SIMD methods. // We need to create a Vector256 containing the current and next amount values // taking up each half of the Vector256 and then clamp them. Vector256 opacity = Vector256.Create( - Vector128.Create(Numerics.Clamp(amountBase, 0, 1F)), - Vector128.Create(Numerics.Clamp(Unsafe.Add(ref amountBase, 1), 0, 1F))); + Vector128.Create(amountBase), + Vector128.Create(Unsafe.Add(ref amountBase, 1))); + opacity = Avx.Min(Avx.Max(Vector256.Zero, opacity), vOne); destinationBase = PorterDuffFunctions.HardLightDest(backgroundBase, sourceBase, opacity); destinationBase = ref Unsafe.Add(ref destinationBase, 1); backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); sourceBase = ref Unsafe.Add(ref sourceBase, 1); - amountBase = ref Unsafe.Add(ref amountBase, 1); + amountBase = ref Unsafe.Add(ref amountBase, 2); } if (Numerics.Modulo2(destination.Length) != 0) @@ -5657,20 +5765,22 @@ internal static class DefaultPixelBlenders ref Vector256 sourceBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(source)); ref float amountBase = ref MemoryMarshal.GetReference(amount); + Vector256 vOne = Vector256.Create(1F); + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) { - // TODO: It would be better if we can clamp this outside of the loop using our SIMD methods. // We need to create a Vector256 containing the current and next amount values // taking up each half of the Vector256 and then clamp them. Vector256 opacity = Vector256.Create( - Vector128.Create(Numerics.Clamp(amountBase, 0, 1F)), - Vector128.Create(Numerics.Clamp(Unsafe.Add(ref amountBase, 1), 0, 1F))); + Vector128.Create(amountBase), + Vector128.Create(Unsafe.Add(ref amountBase, 1))); + opacity = Avx.Min(Avx.Max(Vector256.Zero, opacity), vOne); destinationBase = PorterDuffFunctions.NormalDestAtop(backgroundBase, sourceBase, opacity); destinationBase = ref Unsafe.Add(ref destinationBase, 1); backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); sourceBase = ref Unsafe.Add(ref sourceBase, 1); - amountBase = ref Unsafe.Add(ref amountBase, 1); + amountBase = ref Unsafe.Add(ref amountBase, 2); } if (Numerics.Modulo2(destination.Length) != 0) @@ -5760,20 +5870,22 @@ internal static class DefaultPixelBlenders ref Vector256 sourceBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(source)); ref float amountBase = ref MemoryMarshal.GetReference(amount); + Vector256 vOne = Vector256.Create(1F); + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) { - // TODO: It would be better if we can clamp this outside of the loop using our SIMD methods. // We need to create a Vector256 containing the current and next amount values // taking up each half of the Vector256 and then clamp them. Vector256 opacity = Vector256.Create( - Vector128.Create(Numerics.Clamp(amountBase, 0, 1F)), - Vector128.Create(Numerics.Clamp(Unsafe.Add(ref amountBase, 1), 0, 1F))); + Vector128.Create(amountBase), + Vector128.Create(Unsafe.Add(ref amountBase, 1))); + opacity = Avx.Min(Avx.Max(Vector256.Zero, opacity), vOne); destinationBase = PorterDuffFunctions.MultiplyDestAtop(backgroundBase, sourceBase, opacity); destinationBase = ref Unsafe.Add(ref destinationBase, 1); backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); sourceBase = ref Unsafe.Add(ref sourceBase, 1); - amountBase = ref Unsafe.Add(ref amountBase, 1); + amountBase = ref Unsafe.Add(ref amountBase, 2); } if (Numerics.Modulo2(destination.Length) != 0) @@ -5863,20 +5975,22 @@ internal static class DefaultPixelBlenders ref Vector256 sourceBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(source)); ref float amountBase = ref MemoryMarshal.GetReference(amount); + Vector256 vOne = Vector256.Create(1F); + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) { - // TODO: It would be better if we can clamp this outside of the loop using our SIMD methods. // We need to create a Vector256 containing the current and next amount values // taking up each half of the Vector256 and then clamp them. Vector256 opacity = Vector256.Create( - Vector128.Create(Numerics.Clamp(amountBase, 0, 1F)), - Vector128.Create(Numerics.Clamp(Unsafe.Add(ref amountBase, 1), 0, 1F))); + Vector128.Create(amountBase), + Vector128.Create(Unsafe.Add(ref amountBase, 1))); + opacity = Avx.Min(Avx.Max(Vector256.Zero, opacity), vOne); destinationBase = PorterDuffFunctions.AddDestAtop(backgroundBase, sourceBase, opacity); destinationBase = ref Unsafe.Add(ref destinationBase, 1); backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); sourceBase = ref Unsafe.Add(ref sourceBase, 1); - amountBase = ref Unsafe.Add(ref amountBase, 1); + amountBase = ref Unsafe.Add(ref amountBase, 2); } if (Numerics.Modulo2(destination.Length) != 0) @@ -5966,20 +6080,22 @@ internal static class DefaultPixelBlenders ref Vector256 sourceBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(source)); ref float amountBase = ref MemoryMarshal.GetReference(amount); + Vector256 vOne = Vector256.Create(1F); + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) { - // TODO: It would be better if we can clamp this outside of the loop using our SIMD methods. // We need to create a Vector256 containing the current and next amount values // taking up each half of the Vector256 and then clamp them. Vector256 opacity = Vector256.Create( - Vector128.Create(Numerics.Clamp(amountBase, 0, 1F)), - Vector128.Create(Numerics.Clamp(Unsafe.Add(ref amountBase, 1), 0, 1F))); + Vector128.Create(amountBase), + Vector128.Create(Unsafe.Add(ref amountBase, 1))); + opacity = Avx.Min(Avx.Max(Vector256.Zero, opacity), vOne); destinationBase = PorterDuffFunctions.SubtractDestAtop(backgroundBase, sourceBase, opacity); destinationBase = ref Unsafe.Add(ref destinationBase, 1); backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); sourceBase = ref Unsafe.Add(ref sourceBase, 1); - amountBase = ref Unsafe.Add(ref amountBase, 1); + amountBase = ref Unsafe.Add(ref amountBase, 2); } if (Numerics.Modulo2(destination.Length) != 0) @@ -6069,20 +6185,22 @@ internal static class DefaultPixelBlenders ref Vector256 sourceBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(source)); ref float amountBase = ref MemoryMarshal.GetReference(amount); + Vector256 vOne = Vector256.Create(1F); + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) { - // TODO: It would be better if we can clamp this outside of the loop using our SIMD methods. // We need to create a Vector256 containing the current and next amount values // taking up each half of the Vector256 and then clamp them. Vector256 opacity = Vector256.Create( - Vector128.Create(Numerics.Clamp(amountBase, 0, 1F)), - Vector128.Create(Numerics.Clamp(Unsafe.Add(ref amountBase, 1), 0, 1F))); + Vector128.Create(amountBase), + Vector128.Create(Unsafe.Add(ref amountBase, 1))); + opacity = Avx.Min(Avx.Max(Vector256.Zero, opacity), vOne); destinationBase = PorterDuffFunctions.ScreenDestAtop(backgroundBase, sourceBase, opacity); destinationBase = ref Unsafe.Add(ref destinationBase, 1); backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); sourceBase = ref Unsafe.Add(ref sourceBase, 1); - amountBase = ref Unsafe.Add(ref amountBase, 1); + amountBase = ref Unsafe.Add(ref amountBase, 2); } if (Numerics.Modulo2(destination.Length) != 0) @@ -6172,20 +6290,22 @@ internal static class DefaultPixelBlenders ref Vector256 sourceBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(source)); ref float amountBase = ref MemoryMarshal.GetReference(amount); + Vector256 vOne = Vector256.Create(1F); + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) { - // TODO: It would be better if we can clamp this outside of the loop using our SIMD methods. // We need to create a Vector256 containing the current and next amount values // taking up each half of the Vector256 and then clamp them. Vector256 opacity = Vector256.Create( - Vector128.Create(Numerics.Clamp(amountBase, 0, 1F)), - Vector128.Create(Numerics.Clamp(Unsafe.Add(ref amountBase, 1), 0, 1F))); + Vector128.Create(amountBase), + Vector128.Create(Unsafe.Add(ref amountBase, 1))); + opacity = Avx.Min(Avx.Max(Vector256.Zero, opacity), vOne); destinationBase = PorterDuffFunctions.DarkenDestAtop(backgroundBase, sourceBase, opacity); destinationBase = ref Unsafe.Add(ref destinationBase, 1); backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); sourceBase = ref Unsafe.Add(ref sourceBase, 1); - amountBase = ref Unsafe.Add(ref amountBase, 1); + amountBase = ref Unsafe.Add(ref amountBase, 2); } if (Numerics.Modulo2(destination.Length) != 0) @@ -6275,20 +6395,22 @@ internal static class DefaultPixelBlenders ref Vector256 sourceBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(source)); ref float amountBase = ref MemoryMarshal.GetReference(amount); + Vector256 vOne = Vector256.Create(1F); + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) { - // TODO: It would be better if we can clamp this outside of the loop using our SIMD methods. // We need to create a Vector256 containing the current and next amount values // taking up each half of the Vector256 and then clamp them. Vector256 opacity = Vector256.Create( - Vector128.Create(Numerics.Clamp(amountBase, 0, 1F)), - Vector128.Create(Numerics.Clamp(Unsafe.Add(ref amountBase, 1), 0, 1F))); + Vector128.Create(amountBase), + Vector128.Create(Unsafe.Add(ref amountBase, 1))); + opacity = Avx.Min(Avx.Max(Vector256.Zero, opacity), vOne); destinationBase = PorterDuffFunctions.LightenDestAtop(backgroundBase, sourceBase, opacity); destinationBase = ref Unsafe.Add(ref destinationBase, 1); backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); sourceBase = ref Unsafe.Add(ref sourceBase, 1); - amountBase = ref Unsafe.Add(ref amountBase, 1); + amountBase = ref Unsafe.Add(ref amountBase, 2); } if (Numerics.Modulo2(destination.Length) != 0) @@ -6378,20 +6500,22 @@ internal static class DefaultPixelBlenders ref Vector256 sourceBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(source)); ref float amountBase = ref MemoryMarshal.GetReference(amount); + Vector256 vOne = Vector256.Create(1F); + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) { - // TODO: It would be better if we can clamp this outside of the loop using our SIMD methods. // We need to create a Vector256 containing the current and next amount values // taking up each half of the Vector256 and then clamp them. Vector256 opacity = Vector256.Create( - Vector128.Create(Numerics.Clamp(amountBase, 0, 1F)), - Vector128.Create(Numerics.Clamp(Unsafe.Add(ref amountBase, 1), 0, 1F))); + Vector128.Create(amountBase), + Vector128.Create(Unsafe.Add(ref amountBase, 1))); + opacity = Avx.Min(Avx.Max(Vector256.Zero, opacity), vOne); destinationBase = PorterDuffFunctions.OverlayDestAtop(backgroundBase, sourceBase, opacity); destinationBase = ref Unsafe.Add(ref destinationBase, 1); backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); sourceBase = ref Unsafe.Add(ref sourceBase, 1); - amountBase = ref Unsafe.Add(ref amountBase, 1); + amountBase = ref Unsafe.Add(ref amountBase, 2); } if (Numerics.Modulo2(destination.Length) != 0) @@ -6481,20 +6605,22 @@ internal static class DefaultPixelBlenders ref Vector256 sourceBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(source)); ref float amountBase = ref MemoryMarshal.GetReference(amount); + Vector256 vOne = Vector256.Create(1F); + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) { - // TODO: It would be better if we can clamp this outside of the loop using our SIMD methods. // We need to create a Vector256 containing the current and next amount values // taking up each half of the Vector256 and then clamp them. Vector256 opacity = Vector256.Create( - Vector128.Create(Numerics.Clamp(amountBase, 0, 1F)), - Vector128.Create(Numerics.Clamp(Unsafe.Add(ref amountBase, 1), 0, 1F))); + Vector128.Create(amountBase), + Vector128.Create(Unsafe.Add(ref amountBase, 1))); + opacity = Avx.Min(Avx.Max(Vector256.Zero, opacity), vOne); destinationBase = PorterDuffFunctions.HardLightDestAtop(backgroundBase, sourceBase, opacity); destinationBase = ref Unsafe.Add(ref destinationBase, 1); backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); sourceBase = ref Unsafe.Add(ref sourceBase, 1); - amountBase = ref Unsafe.Add(ref amountBase, 1); + amountBase = ref Unsafe.Add(ref amountBase, 2); } if (Numerics.Modulo2(destination.Length) != 0) @@ -6584,20 +6710,22 @@ internal static class DefaultPixelBlenders ref Vector256 sourceBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(source)); ref float amountBase = ref MemoryMarshal.GetReference(amount); + Vector256 vOne = Vector256.Create(1F); + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) { - // TODO: It would be better if we can clamp this outside of the loop using our SIMD methods. // We need to create a Vector256 containing the current and next amount values // taking up each half of the Vector256 and then clamp them. Vector256 opacity = Vector256.Create( - Vector128.Create(Numerics.Clamp(amountBase, 0, 1F)), - Vector128.Create(Numerics.Clamp(Unsafe.Add(ref amountBase, 1), 0, 1F))); + Vector128.Create(amountBase), + Vector128.Create(Unsafe.Add(ref amountBase, 1))); + opacity = Avx.Min(Avx.Max(Vector256.Zero, opacity), vOne); destinationBase = PorterDuffFunctions.NormalDestOver(backgroundBase, sourceBase, opacity); destinationBase = ref Unsafe.Add(ref destinationBase, 1); backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); sourceBase = ref Unsafe.Add(ref sourceBase, 1); - amountBase = ref Unsafe.Add(ref amountBase, 1); + amountBase = ref Unsafe.Add(ref amountBase, 2); } if (Numerics.Modulo2(destination.Length) != 0) @@ -6687,20 +6815,22 @@ internal static class DefaultPixelBlenders ref Vector256 sourceBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(source)); ref float amountBase = ref MemoryMarshal.GetReference(amount); + Vector256 vOne = Vector256.Create(1F); + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) { - // TODO: It would be better if we can clamp this outside of the loop using our SIMD methods. // We need to create a Vector256 containing the current and next amount values // taking up each half of the Vector256 and then clamp them. Vector256 opacity = Vector256.Create( - Vector128.Create(Numerics.Clamp(amountBase, 0, 1F)), - Vector128.Create(Numerics.Clamp(Unsafe.Add(ref amountBase, 1), 0, 1F))); + Vector128.Create(amountBase), + Vector128.Create(Unsafe.Add(ref amountBase, 1))); + opacity = Avx.Min(Avx.Max(Vector256.Zero, opacity), vOne); destinationBase = PorterDuffFunctions.MultiplyDestOver(backgroundBase, sourceBase, opacity); destinationBase = ref Unsafe.Add(ref destinationBase, 1); backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); sourceBase = ref Unsafe.Add(ref sourceBase, 1); - amountBase = ref Unsafe.Add(ref amountBase, 1); + amountBase = ref Unsafe.Add(ref amountBase, 2); } if (Numerics.Modulo2(destination.Length) != 0) @@ -6790,20 +6920,22 @@ internal static class DefaultPixelBlenders ref Vector256 sourceBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(source)); ref float amountBase = ref MemoryMarshal.GetReference(amount); + Vector256 vOne = Vector256.Create(1F); + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) { - // TODO: It would be better if we can clamp this outside of the loop using our SIMD methods. // We need to create a Vector256 containing the current and next amount values // taking up each half of the Vector256 and then clamp them. Vector256 opacity = Vector256.Create( - Vector128.Create(Numerics.Clamp(amountBase, 0, 1F)), - Vector128.Create(Numerics.Clamp(Unsafe.Add(ref amountBase, 1), 0, 1F))); + Vector128.Create(amountBase), + Vector128.Create(Unsafe.Add(ref amountBase, 1))); + opacity = Avx.Min(Avx.Max(Vector256.Zero, opacity), vOne); destinationBase = PorterDuffFunctions.AddDestOver(backgroundBase, sourceBase, opacity); destinationBase = ref Unsafe.Add(ref destinationBase, 1); backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); sourceBase = ref Unsafe.Add(ref sourceBase, 1); - amountBase = ref Unsafe.Add(ref amountBase, 1); + amountBase = ref Unsafe.Add(ref amountBase, 2); } if (Numerics.Modulo2(destination.Length) != 0) @@ -6893,20 +7025,22 @@ internal static class DefaultPixelBlenders ref Vector256 sourceBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(source)); ref float amountBase = ref MemoryMarshal.GetReference(amount); + Vector256 vOne = Vector256.Create(1F); + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) { - // TODO: It would be better if we can clamp this outside of the loop using our SIMD methods. // We need to create a Vector256 containing the current and next amount values // taking up each half of the Vector256 and then clamp them. Vector256 opacity = Vector256.Create( - Vector128.Create(Numerics.Clamp(amountBase, 0, 1F)), - Vector128.Create(Numerics.Clamp(Unsafe.Add(ref amountBase, 1), 0, 1F))); + Vector128.Create(amountBase), + Vector128.Create(Unsafe.Add(ref amountBase, 1))); + opacity = Avx.Min(Avx.Max(Vector256.Zero, opacity), vOne); destinationBase = PorterDuffFunctions.SubtractDestOver(backgroundBase, sourceBase, opacity); destinationBase = ref Unsafe.Add(ref destinationBase, 1); backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); sourceBase = ref Unsafe.Add(ref sourceBase, 1); - amountBase = ref Unsafe.Add(ref amountBase, 1); + amountBase = ref Unsafe.Add(ref amountBase, 2); } if (Numerics.Modulo2(destination.Length) != 0) @@ -6996,20 +7130,22 @@ internal static class DefaultPixelBlenders ref Vector256 sourceBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(source)); ref float amountBase = ref MemoryMarshal.GetReference(amount); + Vector256 vOne = Vector256.Create(1F); + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) { - // TODO: It would be better if we can clamp this outside of the loop using our SIMD methods. // We need to create a Vector256 containing the current and next amount values // taking up each half of the Vector256 and then clamp them. Vector256 opacity = Vector256.Create( - Vector128.Create(Numerics.Clamp(amountBase, 0, 1F)), - Vector128.Create(Numerics.Clamp(Unsafe.Add(ref amountBase, 1), 0, 1F))); + Vector128.Create(amountBase), + Vector128.Create(Unsafe.Add(ref amountBase, 1))); + opacity = Avx.Min(Avx.Max(Vector256.Zero, opacity), vOne); destinationBase = PorterDuffFunctions.ScreenDestOver(backgroundBase, sourceBase, opacity); destinationBase = ref Unsafe.Add(ref destinationBase, 1); backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); sourceBase = ref Unsafe.Add(ref sourceBase, 1); - amountBase = ref Unsafe.Add(ref amountBase, 1); + amountBase = ref Unsafe.Add(ref amountBase, 2); } if (Numerics.Modulo2(destination.Length) != 0) @@ -7099,20 +7235,22 @@ internal static class DefaultPixelBlenders ref Vector256 sourceBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(source)); ref float amountBase = ref MemoryMarshal.GetReference(amount); + Vector256 vOne = Vector256.Create(1F); + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) { - // TODO: It would be better if we can clamp this outside of the loop using our SIMD methods. // We need to create a Vector256 containing the current and next amount values // taking up each half of the Vector256 and then clamp them. Vector256 opacity = Vector256.Create( - Vector128.Create(Numerics.Clamp(amountBase, 0, 1F)), - Vector128.Create(Numerics.Clamp(Unsafe.Add(ref amountBase, 1), 0, 1F))); + Vector128.Create(amountBase), + Vector128.Create(Unsafe.Add(ref amountBase, 1))); + opacity = Avx.Min(Avx.Max(Vector256.Zero, opacity), vOne); destinationBase = PorterDuffFunctions.DarkenDestOver(backgroundBase, sourceBase, opacity); destinationBase = ref Unsafe.Add(ref destinationBase, 1); backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); sourceBase = ref Unsafe.Add(ref sourceBase, 1); - amountBase = ref Unsafe.Add(ref amountBase, 1); + amountBase = ref Unsafe.Add(ref amountBase, 2); } if (Numerics.Modulo2(destination.Length) != 0) @@ -7202,20 +7340,22 @@ internal static class DefaultPixelBlenders ref Vector256 sourceBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(source)); ref float amountBase = ref MemoryMarshal.GetReference(amount); + Vector256 vOne = Vector256.Create(1F); + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) { - // TODO: It would be better if we can clamp this outside of the loop using our SIMD methods. // We need to create a Vector256 containing the current and next amount values // taking up each half of the Vector256 and then clamp them. Vector256 opacity = Vector256.Create( - Vector128.Create(Numerics.Clamp(amountBase, 0, 1F)), - Vector128.Create(Numerics.Clamp(Unsafe.Add(ref amountBase, 1), 0, 1F))); + Vector128.Create(amountBase), + Vector128.Create(Unsafe.Add(ref amountBase, 1))); + opacity = Avx.Min(Avx.Max(Vector256.Zero, opacity), vOne); destinationBase = PorterDuffFunctions.LightenDestOver(backgroundBase, sourceBase, opacity); destinationBase = ref Unsafe.Add(ref destinationBase, 1); backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); sourceBase = ref Unsafe.Add(ref sourceBase, 1); - amountBase = ref Unsafe.Add(ref amountBase, 1); + amountBase = ref Unsafe.Add(ref amountBase, 2); } if (Numerics.Modulo2(destination.Length) != 0) @@ -7305,20 +7445,22 @@ internal static class DefaultPixelBlenders ref Vector256 sourceBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(source)); ref float amountBase = ref MemoryMarshal.GetReference(amount); + Vector256 vOne = Vector256.Create(1F); + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) { - // TODO: It would be better if we can clamp this outside of the loop using our SIMD methods. // We need to create a Vector256 containing the current and next amount values // taking up each half of the Vector256 and then clamp them. Vector256 opacity = Vector256.Create( - Vector128.Create(Numerics.Clamp(amountBase, 0, 1F)), - Vector128.Create(Numerics.Clamp(Unsafe.Add(ref amountBase, 1), 0, 1F))); + Vector128.Create(amountBase), + Vector128.Create(Unsafe.Add(ref amountBase, 1))); + opacity = Avx.Min(Avx.Max(Vector256.Zero, opacity), vOne); destinationBase = PorterDuffFunctions.OverlayDestOver(backgroundBase, sourceBase, opacity); destinationBase = ref Unsafe.Add(ref destinationBase, 1); backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); sourceBase = ref Unsafe.Add(ref sourceBase, 1); - amountBase = ref Unsafe.Add(ref amountBase, 1); + amountBase = ref Unsafe.Add(ref amountBase, 2); } if (Numerics.Modulo2(destination.Length) != 0) @@ -7408,20 +7550,22 @@ internal static class DefaultPixelBlenders ref Vector256 sourceBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(source)); ref float amountBase = ref MemoryMarshal.GetReference(amount); + Vector256 vOne = Vector256.Create(1F); + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) { - // TODO: It would be better if we can clamp this outside of the loop using our SIMD methods. // We need to create a Vector256 containing the current and next amount values // taking up each half of the Vector256 and then clamp them. Vector256 opacity = Vector256.Create( - Vector128.Create(Numerics.Clamp(amountBase, 0, 1F)), - Vector128.Create(Numerics.Clamp(Unsafe.Add(ref amountBase, 1), 0, 1F))); + Vector128.Create(amountBase), + Vector128.Create(Unsafe.Add(ref amountBase, 1))); + opacity = Avx.Min(Avx.Max(Vector256.Zero, opacity), vOne); destinationBase = PorterDuffFunctions.HardLightDestOver(backgroundBase, sourceBase, opacity); destinationBase = ref Unsafe.Add(ref destinationBase, 1); backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); sourceBase = ref Unsafe.Add(ref sourceBase, 1); - amountBase = ref Unsafe.Add(ref amountBase, 1); + amountBase = ref Unsafe.Add(ref amountBase, 2); } if (Numerics.Modulo2(destination.Length) != 0) @@ -7511,20 +7655,22 @@ internal static class DefaultPixelBlenders ref Vector256 sourceBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(source)); ref float amountBase = ref MemoryMarshal.GetReference(amount); + Vector256 vOne = Vector256.Create(1F); + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) { - // TODO: It would be better if we can clamp this outside of the loop using our SIMD methods. // We need to create a Vector256 containing the current and next amount values // taking up each half of the Vector256 and then clamp them. Vector256 opacity = Vector256.Create( - Vector128.Create(Numerics.Clamp(amountBase, 0, 1F)), - Vector128.Create(Numerics.Clamp(Unsafe.Add(ref amountBase, 1), 0, 1F))); + Vector128.Create(amountBase), + Vector128.Create(Unsafe.Add(ref amountBase, 1))); + opacity = Avx.Min(Avx.Max(Vector256.Zero, opacity), vOne); destinationBase = PorterDuffFunctions.NormalDestIn(backgroundBase, sourceBase, opacity); destinationBase = ref Unsafe.Add(ref destinationBase, 1); backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); sourceBase = ref Unsafe.Add(ref sourceBase, 1); - amountBase = ref Unsafe.Add(ref amountBase, 1); + amountBase = ref Unsafe.Add(ref amountBase, 2); } if (Numerics.Modulo2(destination.Length) != 0) @@ -7614,20 +7760,22 @@ internal static class DefaultPixelBlenders ref Vector256 sourceBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(source)); ref float amountBase = ref MemoryMarshal.GetReference(amount); + Vector256 vOne = Vector256.Create(1F); + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) { - // TODO: It would be better if we can clamp this outside of the loop using our SIMD methods. // We need to create a Vector256 containing the current and next amount values // taking up each half of the Vector256 and then clamp them. Vector256 opacity = Vector256.Create( - Vector128.Create(Numerics.Clamp(amountBase, 0, 1F)), - Vector128.Create(Numerics.Clamp(Unsafe.Add(ref amountBase, 1), 0, 1F))); + Vector128.Create(amountBase), + Vector128.Create(Unsafe.Add(ref amountBase, 1))); + opacity = Avx.Min(Avx.Max(Vector256.Zero, opacity), vOne); destinationBase = PorterDuffFunctions.MultiplyDestIn(backgroundBase, sourceBase, opacity); destinationBase = ref Unsafe.Add(ref destinationBase, 1); backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); sourceBase = ref Unsafe.Add(ref sourceBase, 1); - amountBase = ref Unsafe.Add(ref amountBase, 1); + amountBase = ref Unsafe.Add(ref amountBase, 2); } if (Numerics.Modulo2(destination.Length) != 0) @@ -7717,20 +7865,22 @@ internal static class DefaultPixelBlenders ref Vector256 sourceBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(source)); ref float amountBase = ref MemoryMarshal.GetReference(amount); + Vector256 vOne = Vector256.Create(1F); + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) { - // TODO: It would be better if we can clamp this outside of the loop using our SIMD methods. // We need to create a Vector256 containing the current and next amount values // taking up each half of the Vector256 and then clamp them. Vector256 opacity = Vector256.Create( - Vector128.Create(Numerics.Clamp(amountBase, 0, 1F)), - Vector128.Create(Numerics.Clamp(Unsafe.Add(ref amountBase, 1), 0, 1F))); + Vector128.Create(amountBase), + Vector128.Create(Unsafe.Add(ref amountBase, 1))); + opacity = Avx.Min(Avx.Max(Vector256.Zero, opacity), vOne); destinationBase = PorterDuffFunctions.AddDestIn(backgroundBase, sourceBase, opacity); destinationBase = ref Unsafe.Add(ref destinationBase, 1); backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); sourceBase = ref Unsafe.Add(ref sourceBase, 1); - amountBase = ref Unsafe.Add(ref amountBase, 1); + amountBase = ref Unsafe.Add(ref amountBase, 2); } if (Numerics.Modulo2(destination.Length) != 0) @@ -7820,20 +7970,22 @@ internal static class DefaultPixelBlenders ref Vector256 sourceBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(source)); ref float amountBase = ref MemoryMarshal.GetReference(amount); + Vector256 vOne = Vector256.Create(1F); + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) { - // TODO: It would be better if we can clamp this outside of the loop using our SIMD methods. // We need to create a Vector256 containing the current and next amount values // taking up each half of the Vector256 and then clamp them. Vector256 opacity = Vector256.Create( - Vector128.Create(Numerics.Clamp(amountBase, 0, 1F)), - Vector128.Create(Numerics.Clamp(Unsafe.Add(ref amountBase, 1), 0, 1F))); + Vector128.Create(amountBase), + Vector128.Create(Unsafe.Add(ref amountBase, 1))); + opacity = Avx.Min(Avx.Max(Vector256.Zero, opacity), vOne); destinationBase = PorterDuffFunctions.SubtractDestIn(backgroundBase, sourceBase, opacity); destinationBase = ref Unsafe.Add(ref destinationBase, 1); backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); sourceBase = ref Unsafe.Add(ref sourceBase, 1); - amountBase = ref Unsafe.Add(ref amountBase, 1); + amountBase = ref Unsafe.Add(ref amountBase, 2); } if (Numerics.Modulo2(destination.Length) != 0) @@ -7923,20 +8075,22 @@ internal static class DefaultPixelBlenders ref Vector256 sourceBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(source)); ref float amountBase = ref MemoryMarshal.GetReference(amount); + Vector256 vOne = Vector256.Create(1F); + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) { - // TODO: It would be better if we can clamp this outside of the loop using our SIMD methods. // We need to create a Vector256 containing the current and next amount values // taking up each half of the Vector256 and then clamp them. Vector256 opacity = Vector256.Create( - Vector128.Create(Numerics.Clamp(amountBase, 0, 1F)), - Vector128.Create(Numerics.Clamp(Unsafe.Add(ref amountBase, 1), 0, 1F))); + Vector128.Create(amountBase), + Vector128.Create(Unsafe.Add(ref amountBase, 1))); + opacity = Avx.Min(Avx.Max(Vector256.Zero, opacity), vOne); destinationBase = PorterDuffFunctions.ScreenDestIn(backgroundBase, sourceBase, opacity); destinationBase = ref Unsafe.Add(ref destinationBase, 1); backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); sourceBase = ref Unsafe.Add(ref sourceBase, 1); - amountBase = ref Unsafe.Add(ref amountBase, 1); + amountBase = ref Unsafe.Add(ref amountBase, 2); } if (Numerics.Modulo2(destination.Length) != 0) @@ -8026,20 +8180,22 @@ internal static class DefaultPixelBlenders ref Vector256 sourceBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(source)); ref float amountBase = ref MemoryMarshal.GetReference(amount); + Vector256 vOne = Vector256.Create(1F); + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) { - // TODO: It would be better if we can clamp this outside of the loop using our SIMD methods. // We need to create a Vector256 containing the current and next amount values // taking up each half of the Vector256 and then clamp them. Vector256 opacity = Vector256.Create( - Vector128.Create(Numerics.Clamp(amountBase, 0, 1F)), - Vector128.Create(Numerics.Clamp(Unsafe.Add(ref amountBase, 1), 0, 1F))); + Vector128.Create(amountBase), + Vector128.Create(Unsafe.Add(ref amountBase, 1))); + opacity = Avx.Min(Avx.Max(Vector256.Zero, opacity), vOne); destinationBase = PorterDuffFunctions.DarkenDestIn(backgroundBase, sourceBase, opacity); destinationBase = ref Unsafe.Add(ref destinationBase, 1); backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); sourceBase = ref Unsafe.Add(ref sourceBase, 1); - amountBase = ref Unsafe.Add(ref amountBase, 1); + amountBase = ref Unsafe.Add(ref amountBase, 2); } if (Numerics.Modulo2(destination.Length) != 0) @@ -8129,20 +8285,22 @@ internal static class DefaultPixelBlenders ref Vector256 sourceBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(source)); ref float amountBase = ref MemoryMarshal.GetReference(amount); + Vector256 vOne = Vector256.Create(1F); + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) { - // TODO: It would be better if we can clamp this outside of the loop using our SIMD methods. // We need to create a Vector256 containing the current and next amount values // taking up each half of the Vector256 and then clamp them. Vector256 opacity = Vector256.Create( - Vector128.Create(Numerics.Clamp(amountBase, 0, 1F)), - Vector128.Create(Numerics.Clamp(Unsafe.Add(ref amountBase, 1), 0, 1F))); + Vector128.Create(amountBase), + Vector128.Create(Unsafe.Add(ref amountBase, 1))); + opacity = Avx.Min(Avx.Max(Vector256.Zero, opacity), vOne); destinationBase = PorterDuffFunctions.LightenDestIn(backgroundBase, sourceBase, opacity); destinationBase = ref Unsafe.Add(ref destinationBase, 1); backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); sourceBase = ref Unsafe.Add(ref sourceBase, 1); - amountBase = ref Unsafe.Add(ref amountBase, 1); + amountBase = ref Unsafe.Add(ref amountBase, 2); } if (Numerics.Modulo2(destination.Length) != 0) @@ -8232,20 +8390,22 @@ internal static class DefaultPixelBlenders ref Vector256 sourceBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(source)); ref float amountBase = ref MemoryMarshal.GetReference(amount); + Vector256 vOne = Vector256.Create(1F); + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) { - // TODO: It would be better if we can clamp this outside of the loop using our SIMD methods. // We need to create a Vector256 containing the current and next amount values // taking up each half of the Vector256 and then clamp them. Vector256 opacity = Vector256.Create( - Vector128.Create(Numerics.Clamp(amountBase, 0, 1F)), - Vector128.Create(Numerics.Clamp(Unsafe.Add(ref amountBase, 1), 0, 1F))); + Vector128.Create(amountBase), + Vector128.Create(Unsafe.Add(ref amountBase, 1))); + opacity = Avx.Min(Avx.Max(Vector256.Zero, opacity), vOne); destinationBase = PorterDuffFunctions.OverlayDestIn(backgroundBase, sourceBase, opacity); destinationBase = ref Unsafe.Add(ref destinationBase, 1); backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); sourceBase = ref Unsafe.Add(ref sourceBase, 1); - amountBase = ref Unsafe.Add(ref amountBase, 1); + amountBase = ref Unsafe.Add(ref amountBase, 2); } if (Numerics.Modulo2(destination.Length) != 0) @@ -8335,20 +8495,22 @@ internal static class DefaultPixelBlenders ref Vector256 sourceBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(source)); ref float amountBase = ref MemoryMarshal.GetReference(amount); + Vector256 vOne = Vector256.Create(1F); + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) { - // TODO: It would be better if we can clamp this outside of the loop using our SIMD methods. // We need to create a Vector256 containing the current and next amount values // taking up each half of the Vector256 and then clamp them. Vector256 opacity = Vector256.Create( - Vector128.Create(Numerics.Clamp(amountBase, 0, 1F)), - Vector128.Create(Numerics.Clamp(Unsafe.Add(ref amountBase, 1), 0, 1F))); + Vector128.Create(amountBase), + Vector128.Create(Unsafe.Add(ref amountBase, 1))); + opacity = Avx.Min(Avx.Max(Vector256.Zero, opacity), vOne); destinationBase = PorterDuffFunctions.HardLightDestIn(backgroundBase, sourceBase, opacity); destinationBase = ref Unsafe.Add(ref destinationBase, 1); backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); sourceBase = ref Unsafe.Add(ref sourceBase, 1); - amountBase = ref Unsafe.Add(ref amountBase, 1); + amountBase = ref Unsafe.Add(ref amountBase, 2); } if (Numerics.Modulo2(destination.Length) != 0) @@ -8438,20 +8600,22 @@ internal static class DefaultPixelBlenders ref Vector256 sourceBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(source)); ref float amountBase = ref MemoryMarshal.GetReference(amount); + Vector256 vOne = Vector256.Create(1F); + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) { - // TODO: It would be better if we can clamp this outside of the loop using our SIMD methods. // We need to create a Vector256 containing the current and next amount values // taking up each half of the Vector256 and then clamp them. Vector256 opacity = Vector256.Create( - Vector128.Create(Numerics.Clamp(amountBase, 0, 1F)), - Vector128.Create(Numerics.Clamp(Unsafe.Add(ref amountBase, 1), 0, 1F))); + Vector128.Create(amountBase), + Vector128.Create(Unsafe.Add(ref amountBase, 1))); + opacity = Avx.Min(Avx.Max(Vector256.Zero, opacity), vOne); destinationBase = PorterDuffFunctions.NormalDestOut(backgroundBase, sourceBase, opacity); destinationBase = ref Unsafe.Add(ref destinationBase, 1); backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); sourceBase = ref Unsafe.Add(ref sourceBase, 1); - amountBase = ref Unsafe.Add(ref amountBase, 1); + amountBase = ref Unsafe.Add(ref amountBase, 2); } if (Numerics.Modulo2(destination.Length) != 0) @@ -8541,20 +8705,22 @@ internal static class DefaultPixelBlenders ref Vector256 sourceBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(source)); ref float amountBase = ref MemoryMarshal.GetReference(amount); + Vector256 vOne = Vector256.Create(1F); + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) { - // TODO: It would be better if we can clamp this outside of the loop using our SIMD methods. // We need to create a Vector256 containing the current and next amount values // taking up each half of the Vector256 and then clamp them. Vector256 opacity = Vector256.Create( - Vector128.Create(Numerics.Clamp(amountBase, 0, 1F)), - Vector128.Create(Numerics.Clamp(Unsafe.Add(ref amountBase, 1), 0, 1F))); + Vector128.Create(amountBase), + Vector128.Create(Unsafe.Add(ref amountBase, 1))); + opacity = Avx.Min(Avx.Max(Vector256.Zero, opacity), vOne); destinationBase = PorterDuffFunctions.MultiplyDestOut(backgroundBase, sourceBase, opacity); destinationBase = ref Unsafe.Add(ref destinationBase, 1); backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); sourceBase = ref Unsafe.Add(ref sourceBase, 1); - amountBase = ref Unsafe.Add(ref amountBase, 1); + amountBase = ref Unsafe.Add(ref amountBase, 2); } if (Numerics.Modulo2(destination.Length) != 0) @@ -8644,20 +8810,22 @@ internal static class DefaultPixelBlenders ref Vector256 sourceBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(source)); ref float amountBase = ref MemoryMarshal.GetReference(amount); + Vector256 vOne = Vector256.Create(1F); + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) { - // TODO: It would be better if we can clamp this outside of the loop using our SIMD methods. // We need to create a Vector256 containing the current and next amount values // taking up each half of the Vector256 and then clamp them. Vector256 opacity = Vector256.Create( - Vector128.Create(Numerics.Clamp(amountBase, 0, 1F)), - Vector128.Create(Numerics.Clamp(Unsafe.Add(ref amountBase, 1), 0, 1F))); + Vector128.Create(amountBase), + Vector128.Create(Unsafe.Add(ref amountBase, 1))); + opacity = Avx.Min(Avx.Max(Vector256.Zero, opacity), vOne); destinationBase = PorterDuffFunctions.AddDestOut(backgroundBase, sourceBase, opacity); destinationBase = ref Unsafe.Add(ref destinationBase, 1); backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); sourceBase = ref Unsafe.Add(ref sourceBase, 1); - amountBase = ref Unsafe.Add(ref amountBase, 1); + amountBase = ref Unsafe.Add(ref amountBase, 2); } if (Numerics.Modulo2(destination.Length) != 0) @@ -8747,20 +8915,22 @@ internal static class DefaultPixelBlenders ref Vector256 sourceBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(source)); ref float amountBase = ref MemoryMarshal.GetReference(amount); + Vector256 vOne = Vector256.Create(1F); + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) { - // TODO: It would be better if we can clamp this outside of the loop using our SIMD methods. // We need to create a Vector256 containing the current and next amount values // taking up each half of the Vector256 and then clamp them. Vector256 opacity = Vector256.Create( - Vector128.Create(Numerics.Clamp(amountBase, 0, 1F)), - Vector128.Create(Numerics.Clamp(Unsafe.Add(ref amountBase, 1), 0, 1F))); + Vector128.Create(amountBase), + Vector128.Create(Unsafe.Add(ref amountBase, 1))); + opacity = Avx.Min(Avx.Max(Vector256.Zero, opacity), vOne); destinationBase = PorterDuffFunctions.SubtractDestOut(backgroundBase, sourceBase, opacity); destinationBase = ref Unsafe.Add(ref destinationBase, 1); backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); sourceBase = ref Unsafe.Add(ref sourceBase, 1); - amountBase = ref Unsafe.Add(ref amountBase, 1); + amountBase = ref Unsafe.Add(ref amountBase, 2); } if (Numerics.Modulo2(destination.Length) != 0) @@ -8850,20 +9020,22 @@ internal static class DefaultPixelBlenders ref Vector256 sourceBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(source)); ref float amountBase = ref MemoryMarshal.GetReference(amount); + Vector256 vOne = Vector256.Create(1F); + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) { - // TODO: It would be better if we can clamp this outside of the loop using our SIMD methods. // We need to create a Vector256 containing the current and next amount values // taking up each half of the Vector256 and then clamp them. Vector256 opacity = Vector256.Create( - Vector128.Create(Numerics.Clamp(amountBase, 0, 1F)), - Vector128.Create(Numerics.Clamp(Unsafe.Add(ref amountBase, 1), 0, 1F))); + Vector128.Create(amountBase), + Vector128.Create(Unsafe.Add(ref amountBase, 1))); + opacity = Avx.Min(Avx.Max(Vector256.Zero, opacity), vOne); destinationBase = PorterDuffFunctions.ScreenDestOut(backgroundBase, sourceBase, opacity); destinationBase = ref Unsafe.Add(ref destinationBase, 1); backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); sourceBase = ref Unsafe.Add(ref sourceBase, 1); - amountBase = ref Unsafe.Add(ref amountBase, 1); + amountBase = ref Unsafe.Add(ref amountBase, 2); } if (Numerics.Modulo2(destination.Length) != 0) @@ -8953,20 +9125,22 @@ internal static class DefaultPixelBlenders ref Vector256 sourceBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(source)); ref float amountBase = ref MemoryMarshal.GetReference(amount); + Vector256 vOne = Vector256.Create(1F); + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) { - // TODO: It would be better if we can clamp this outside of the loop using our SIMD methods. // We need to create a Vector256 containing the current and next amount values // taking up each half of the Vector256 and then clamp them. Vector256 opacity = Vector256.Create( - Vector128.Create(Numerics.Clamp(amountBase, 0, 1F)), - Vector128.Create(Numerics.Clamp(Unsafe.Add(ref amountBase, 1), 0, 1F))); + Vector128.Create(amountBase), + Vector128.Create(Unsafe.Add(ref amountBase, 1))); + opacity = Avx.Min(Avx.Max(Vector256.Zero, opacity), vOne); destinationBase = PorterDuffFunctions.DarkenDestOut(backgroundBase, sourceBase, opacity); destinationBase = ref Unsafe.Add(ref destinationBase, 1); backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); sourceBase = ref Unsafe.Add(ref sourceBase, 1); - amountBase = ref Unsafe.Add(ref amountBase, 1); + amountBase = ref Unsafe.Add(ref amountBase, 2); } if (Numerics.Modulo2(destination.Length) != 0) @@ -9056,20 +9230,22 @@ internal static class DefaultPixelBlenders ref Vector256 sourceBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(source)); ref float amountBase = ref MemoryMarshal.GetReference(amount); + Vector256 vOne = Vector256.Create(1F); + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) { - // TODO: It would be better if we can clamp this outside of the loop using our SIMD methods. // We need to create a Vector256 containing the current and next amount values // taking up each half of the Vector256 and then clamp them. Vector256 opacity = Vector256.Create( - Vector128.Create(Numerics.Clamp(amountBase, 0, 1F)), - Vector128.Create(Numerics.Clamp(Unsafe.Add(ref amountBase, 1), 0, 1F))); + Vector128.Create(amountBase), + Vector128.Create(Unsafe.Add(ref amountBase, 1))); + opacity = Avx.Min(Avx.Max(Vector256.Zero, opacity), vOne); destinationBase = PorterDuffFunctions.LightenDestOut(backgroundBase, sourceBase, opacity); destinationBase = ref Unsafe.Add(ref destinationBase, 1); backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); sourceBase = ref Unsafe.Add(ref sourceBase, 1); - amountBase = ref Unsafe.Add(ref amountBase, 1); + amountBase = ref Unsafe.Add(ref amountBase, 2); } if (Numerics.Modulo2(destination.Length) != 0) @@ -9159,20 +9335,22 @@ internal static class DefaultPixelBlenders ref Vector256 sourceBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(source)); ref float amountBase = ref MemoryMarshal.GetReference(amount); + Vector256 vOne = Vector256.Create(1F); + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) { - // TODO: It would be better if we can clamp this outside of the loop using our SIMD methods. // We need to create a Vector256 containing the current and next amount values // taking up each half of the Vector256 and then clamp them. Vector256 opacity = Vector256.Create( - Vector128.Create(Numerics.Clamp(amountBase, 0, 1F)), - Vector128.Create(Numerics.Clamp(Unsafe.Add(ref amountBase, 1), 0, 1F))); + Vector128.Create(amountBase), + Vector128.Create(Unsafe.Add(ref amountBase, 1))); + opacity = Avx.Min(Avx.Max(Vector256.Zero, opacity), vOne); destinationBase = PorterDuffFunctions.OverlayDestOut(backgroundBase, sourceBase, opacity); destinationBase = ref Unsafe.Add(ref destinationBase, 1); backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); sourceBase = ref Unsafe.Add(ref sourceBase, 1); - amountBase = ref Unsafe.Add(ref amountBase, 1); + amountBase = ref Unsafe.Add(ref amountBase, 2); } if (Numerics.Modulo2(destination.Length) != 0) @@ -9262,20 +9440,22 @@ internal static class DefaultPixelBlenders ref Vector256 sourceBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(source)); ref float amountBase = ref MemoryMarshal.GetReference(amount); + Vector256 vOne = Vector256.Create(1F); + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) { - // TODO: It would be better if we can clamp this outside of the loop using our SIMD methods. // We need to create a Vector256 containing the current and next amount values // taking up each half of the Vector256 and then clamp them. Vector256 opacity = Vector256.Create( - Vector128.Create(Numerics.Clamp(amountBase, 0, 1F)), - Vector128.Create(Numerics.Clamp(Unsafe.Add(ref amountBase, 1), 0, 1F))); + Vector128.Create(amountBase), + Vector128.Create(Unsafe.Add(ref amountBase, 1))); + opacity = Avx.Min(Avx.Max(Vector256.Zero, opacity), vOne); destinationBase = PorterDuffFunctions.HardLightDestOut(backgroundBase, sourceBase, opacity); destinationBase = ref Unsafe.Add(ref destinationBase, 1); backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); sourceBase = ref Unsafe.Add(ref sourceBase, 1); - amountBase = ref Unsafe.Add(ref amountBase, 1); + amountBase = ref Unsafe.Add(ref amountBase, 2); } if (Numerics.Modulo2(destination.Length) != 0) @@ -9365,20 +9545,22 @@ internal static class DefaultPixelBlenders ref Vector256 sourceBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(source)); ref float amountBase = ref MemoryMarshal.GetReference(amount); + Vector256 vOne = Vector256.Create(1F); + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) { - // TODO: It would be better if we can clamp this outside of the loop using our SIMD methods. // We need to create a Vector256 containing the current and next amount values // taking up each half of the Vector256 and then clamp them. Vector256 opacity = Vector256.Create( - Vector128.Create(Numerics.Clamp(amountBase, 0, 1F)), - Vector128.Create(Numerics.Clamp(Unsafe.Add(ref amountBase, 1), 0, 1F))); + Vector128.Create(amountBase), + Vector128.Create(Unsafe.Add(ref amountBase, 1))); + opacity = Avx.Min(Avx.Max(Vector256.Zero, opacity), vOne); destinationBase = PorterDuffFunctions.NormalClear(backgroundBase, sourceBase, opacity); destinationBase = ref Unsafe.Add(ref destinationBase, 1); backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); sourceBase = ref Unsafe.Add(ref sourceBase, 1); - amountBase = ref Unsafe.Add(ref amountBase, 1); + amountBase = ref Unsafe.Add(ref amountBase, 2); } if (Numerics.Modulo2(destination.Length) != 0) @@ -9468,20 +9650,22 @@ internal static class DefaultPixelBlenders ref Vector256 sourceBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(source)); ref float amountBase = ref MemoryMarshal.GetReference(amount); + Vector256 vOne = Vector256.Create(1F); + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) { - // TODO: It would be better if we can clamp this outside of the loop using our SIMD methods. // We need to create a Vector256 containing the current and next amount values // taking up each half of the Vector256 and then clamp them. Vector256 opacity = Vector256.Create( - Vector128.Create(Numerics.Clamp(amountBase, 0, 1F)), - Vector128.Create(Numerics.Clamp(Unsafe.Add(ref amountBase, 1), 0, 1F))); + Vector128.Create(amountBase), + Vector128.Create(Unsafe.Add(ref amountBase, 1))); + opacity = Avx.Min(Avx.Max(Vector256.Zero, opacity), vOne); destinationBase = PorterDuffFunctions.MultiplyClear(backgroundBase, sourceBase, opacity); destinationBase = ref Unsafe.Add(ref destinationBase, 1); backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); sourceBase = ref Unsafe.Add(ref sourceBase, 1); - amountBase = ref Unsafe.Add(ref amountBase, 1); + amountBase = ref Unsafe.Add(ref amountBase, 2); } if (Numerics.Modulo2(destination.Length) != 0) @@ -9571,20 +9755,22 @@ internal static class DefaultPixelBlenders ref Vector256 sourceBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(source)); ref float amountBase = ref MemoryMarshal.GetReference(amount); + Vector256 vOne = Vector256.Create(1F); + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) { - // TODO: It would be better if we can clamp this outside of the loop using our SIMD methods. // We need to create a Vector256 containing the current and next amount values // taking up each half of the Vector256 and then clamp them. Vector256 opacity = Vector256.Create( - Vector128.Create(Numerics.Clamp(amountBase, 0, 1F)), - Vector128.Create(Numerics.Clamp(Unsafe.Add(ref amountBase, 1), 0, 1F))); + Vector128.Create(amountBase), + Vector128.Create(Unsafe.Add(ref amountBase, 1))); + opacity = Avx.Min(Avx.Max(Vector256.Zero, opacity), vOne); destinationBase = PorterDuffFunctions.AddClear(backgroundBase, sourceBase, opacity); destinationBase = ref Unsafe.Add(ref destinationBase, 1); backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); sourceBase = ref Unsafe.Add(ref sourceBase, 1); - amountBase = ref Unsafe.Add(ref amountBase, 1); + amountBase = ref Unsafe.Add(ref amountBase, 2); } if (Numerics.Modulo2(destination.Length) != 0) @@ -9674,20 +9860,22 @@ internal static class DefaultPixelBlenders ref Vector256 sourceBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(source)); ref float amountBase = ref MemoryMarshal.GetReference(amount); + Vector256 vOne = Vector256.Create(1F); + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) { - // TODO: It would be better if we can clamp this outside of the loop using our SIMD methods. // We need to create a Vector256 containing the current and next amount values // taking up each half of the Vector256 and then clamp them. Vector256 opacity = Vector256.Create( - Vector128.Create(Numerics.Clamp(amountBase, 0, 1F)), - Vector128.Create(Numerics.Clamp(Unsafe.Add(ref amountBase, 1), 0, 1F))); + Vector128.Create(amountBase), + Vector128.Create(Unsafe.Add(ref amountBase, 1))); + opacity = Avx.Min(Avx.Max(Vector256.Zero, opacity), vOne); destinationBase = PorterDuffFunctions.SubtractClear(backgroundBase, sourceBase, opacity); destinationBase = ref Unsafe.Add(ref destinationBase, 1); backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); sourceBase = ref Unsafe.Add(ref sourceBase, 1); - amountBase = ref Unsafe.Add(ref amountBase, 1); + amountBase = ref Unsafe.Add(ref amountBase, 2); } if (Numerics.Modulo2(destination.Length) != 0) @@ -9777,20 +9965,22 @@ internal static class DefaultPixelBlenders ref Vector256 sourceBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(source)); ref float amountBase = ref MemoryMarshal.GetReference(amount); + Vector256 vOne = Vector256.Create(1F); + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) { - // TODO: It would be better if we can clamp this outside of the loop using our SIMD methods. // We need to create a Vector256 containing the current and next amount values // taking up each half of the Vector256 and then clamp them. Vector256 opacity = Vector256.Create( - Vector128.Create(Numerics.Clamp(amountBase, 0, 1F)), - Vector128.Create(Numerics.Clamp(Unsafe.Add(ref amountBase, 1), 0, 1F))); + Vector128.Create(amountBase), + Vector128.Create(Unsafe.Add(ref amountBase, 1))); + opacity = Avx.Min(Avx.Max(Vector256.Zero, opacity), vOne); destinationBase = PorterDuffFunctions.ScreenClear(backgroundBase, sourceBase, opacity); destinationBase = ref Unsafe.Add(ref destinationBase, 1); backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); sourceBase = ref Unsafe.Add(ref sourceBase, 1); - amountBase = ref Unsafe.Add(ref amountBase, 1); + amountBase = ref Unsafe.Add(ref amountBase, 2); } if (Numerics.Modulo2(destination.Length) != 0) @@ -9880,20 +10070,22 @@ internal static class DefaultPixelBlenders ref Vector256 sourceBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(source)); ref float amountBase = ref MemoryMarshal.GetReference(amount); + Vector256 vOne = Vector256.Create(1F); + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) { - // TODO: It would be better if we can clamp this outside of the loop using our SIMD methods. // We need to create a Vector256 containing the current and next amount values // taking up each half of the Vector256 and then clamp them. Vector256 opacity = Vector256.Create( - Vector128.Create(Numerics.Clamp(amountBase, 0, 1F)), - Vector128.Create(Numerics.Clamp(Unsafe.Add(ref amountBase, 1), 0, 1F))); + Vector128.Create(amountBase), + Vector128.Create(Unsafe.Add(ref amountBase, 1))); + opacity = Avx.Min(Avx.Max(Vector256.Zero, opacity), vOne); destinationBase = PorterDuffFunctions.DarkenClear(backgroundBase, sourceBase, opacity); destinationBase = ref Unsafe.Add(ref destinationBase, 1); backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); sourceBase = ref Unsafe.Add(ref sourceBase, 1); - amountBase = ref Unsafe.Add(ref amountBase, 1); + amountBase = ref Unsafe.Add(ref amountBase, 2); } if (Numerics.Modulo2(destination.Length) != 0) @@ -9983,20 +10175,22 @@ internal static class DefaultPixelBlenders ref Vector256 sourceBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(source)); ref float amountBase = ref MemoryMarshal.GetReference(amount); + Vector256 vOne = Vector256.Create(1F); + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) { - // TODO: It would be better if we can clamp this outside of the loop using our SIMD methods. // We need to create a Vector256 containing the current and next amount values // taking up each half of the Vector256 and then clamp them. Vector256 opacity = Vector256.Create( - Vector128.Create(Numerics.Clamp(amountBase, 0, 1F)), - Vector128.Create(Numerics.Clamp(Unsafe.Add(ref amountBase, 1), 0, 1F))); + Vector128.Create(amountBase), + Vector128.Create(Unsafe.Add(ref amountBase, 1))); + opacity = Avx.Min(Avx.Max(Vector256.Zero, opacity), vOne); destinationBase = PorterDuffFunctions.LightenClear(backgroundBase, sourceBase, opacity); destinationBase = ref Unsafe.Add(ref destinationBase, 1); backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); sourceBase = ref Unsafe.Add(ref sourceBase, 1); - amountBase = ref Unsafe.Add(ref amountBase, 1); + amountBase = ref Unsafe.Add(ref amountBase, 2); } if (Numerics.Modulo2(destination.Length) != 0) @@ -10086,20 +10280,22 @@ internal static class DefaultPixelBlenders ref Vector256 sourceBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(source)); ref float amountBase = ref MemoryMarshal.GetReference(amount); + Vector256 vOne = Vector256.Create(1F); + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) { - // TODO: It would be better if we can clamp this outside of the loop using our SIMD methods. // We need to create a Vector256 containing the current and next amount values // taking up each half of the Vector256 and then clamp them. Vector256 opacity = Vector256.Create( - Vector128.Create(Numerics.Clamp(amountBase, 0, 1F)), - Vector128.Create(Numerics.Clamp(Unsafe.Add(ref amountBase, 1), 0, 1F))); + Vector128.Create(amountBase), + Vector128.Create(Unsafe.Add(ref amountBase, 1))); + opacity = Avx.Min(Avx.Max(Vector256.Zero, opacity), vOne); destinationBase = PorterDuffFunctions.OverlayClear(backgroundBase, sourceBase, opacity); destinationBase = ref Unsafe.Add(ref destinationBase, 1); backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); sourceBase = ref Unsafe.Add(ref sourceBase, 1); - amountBase = ref Unsafe.Add(ref amountBase, 1); + amountBase = ref Unsafe.Add(ref amountBase, 2); } if (Numerics.Modulo2(destination.Length) != 0) @@ -10189,20 +10385,22 @@ internal static class DefaultPixelBlenders ref Vector256 sourceBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(source)); ref float amountBase = ref MemoryMarshal.GetReference(amount); + Vector256 vOne = Vector256.Create(1F); + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) { - // TODO: It would be better if we can clamp this outside of the loop using our SIMD methods. // We need to create a Vector256 containing the current and next amount values // taking up each half of the Vector256 and then clamp them. Vector256 opacity = Vector256.Create( - Vector128.Create(Numerics.Clamp(amountBase, 0, 1F)), - Vector128.Create(Numerics.Clamp(Unsafe.Add(ref amountBase, 1), 0, 1F))); + Vector128.Create(amountBase), + Vector128.Create(Unsafe.Add(ref amountBase, 1))); + opacity = Avx.Min(Avx.Max(Vector256.Zero, opacity), vOne); destinationBase = PorterDuffFunctions.HardLightClear(backgroundBase, sourceBase, opacity); destinationBase = ref Unsafe.Add(ref destinationBase, 1); backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); sourceBase = ref Unsafe.Add(ref sourceBase, 1); - amountBase = ref Unsafe.Add(ref amountBase, 1); + amountBase = ref Unsafe.Add(ref amountBase, 2); } if (Numerics.Modulo2(destination.Length) != 0) @@ -10292,20 +10490,22 @@ internal static class DefaultPixelBlenders ref Vector256 sourceBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(source)); ref float amountBase = ref MemoryMarshal.GetReference(amount); + Vector256 vOne = Vector256.Create(1F); + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) { - // TODO: It would be better if we can clamp this outside of the loop using our SIMD methods. // We need to create a Vector256 containing the current and next amount values // taking up each half of the Vector256 and then clamp them. Vector256 opacity = Vector256.Create( - Vector128.Create(Numerics.Clamp(amountBase, 0, 1F)), - Vector128.Create(Numerics.Clamp(Unsafe.Add(ref amountBase, 1), 0, 1F))); + Vector128.Create(amountBase), + Vector128.Create(Unsafe.Add(ref amountBase, 1))); + opacity = Avx.Min(Avx.Max(Vector256.Zero, opacity), vOne); destinationBase = PorterDuffFunctions.NormalXor(backgroundBase, sourceBase, opacity); destinationBase = ref Unsafe.Add(ref destinationBase, 1); backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); sourceBase = ref Unsafe.Add(ref sourceBase, 1); - amountBase = ref Unsafe.Add(ref amountBase, 1); + amountBase = ref Unsafe.Add(ref amountBase, 2); } if (Numerics.Modulo2(destination.Length) != 0) @@ -10395,20 +10595,22 @@ internal static class DefaultPixelBlenders ref Vector256 sourceBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(source)); ref float amountBase = ref MemoryMarshal.GetReference(amount); + Vector256 vOne = Vector256.Create(1F); + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) { - // TODO: It would be better if we can clamp this outside of the loop using our SIMD methods. // We need to create a Vector256 containing the current and next amount values // taking up each half of the Vector256 and then clamp them. Vector256 opacity = Vector256.Create( - Vector128.Create(Numerics.Clamp(amountBase, 0, 1F)), - Vector128.Create(Numerics.Clamp(Unsafe.Add(ref amountBase, 1), 0, 1F))); + Vector128.Create(amountBase), + Vector128.Create(Unsafe.Add(ref amountBase, 1))); + opacity = Avx.Min(Avx.Max(Vector256.Zero, opacity), vOne); destinationBase = PorterDuffFunctions.MultiplyXor(backgroundBase, sourceBase, opacity); destinationBase = ref Unsafe.Add(ref destinationBase, 1); backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); sourceBase = ref Unsafe.Add(ref sourceBase, 1); - amountBase = ref Unsafe.Add(ref amountBase, 1); + amountBase = ref Unsafe.Add(ref amountBase, 2); } if (Numerics.Modulo2(destination.Length) != 0) @@ -10498,20 +10700,22 @@ internal static class DefaultPixelBlenders ref Vector256 sourceBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(source)); ref float amountBase = ref MemoryMarshal.GetReference(amount); + Vector256 vOne = Vector256.Create(1F); + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) { - // TODO: It would be better if we can clamp this outside of the loop using our SIMD methods. // We need to create a Vector256 containing the current and next amount values // taking up each half of the Vector256 and then clamp them. Vector256 opacity = Vector256.Create( - Vector128.Create(Numerics.Clamp(amountBase, 0, 1F)), - Vector128.Create(Numerics.Clamp(Unsafe.Add(ref amountBase, 1), 0, 1F))); + Vector128.Create(amountBase), + Vector128.Create(Unsafe.Add(ref amountBase, 1))); + opacity = Avx.Min(Avx.Max(Vector256.Zero, opacity), vOne); destinationBase = PorterDuffFunctions.AddXor(backgroundBase, sourceBase, opacity); destinationBase = ref Unsafe.Add(ref destinationBase, 1); backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); sourceBase = ref Unsafe.Add(ref sourceBase, 1); - amountBase = ref Unsafe.Add(ref amountBase, 1); + amountBase = ref Unsafe.Add(ref amountBase, 2); } if (Numerics.Modulo2(destination.Length) != 0) @@ -10601,20 +10805,22 @@ internal static class DefaultPixelBlenders ref Vector256 sourceBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(source)); ref float amountBase = ref MemoryMarshal.GetReference(amount); + Vector256 vOne = Vector256.Create(1F); + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) { - // TODO: It would be better if we can clamp this outside of the loop using our SIMD methods. // We need to create a Vector256 containing the current and next amount values // taking up each half of the Vector256 and then clamp them. Vector256 opacity = Vector256.Create( - Vector128.Create(Numerics.Clamp(amountBase, 0, 1F)), - Vector128.Create(Numerics.Clamp(Unsafe.Add(ref amountBase, 1), 0, 1F))); + Vector128.Create(amountBase), + Vector128.Create(Unsafe.Add(ref amountBase, 1))); + opacity = Avx.Min(Avx.Max(Vector256.Zero, opacity), vOne); destinationBase = PorterDuffFunctions.SubtractXor(backgroundBase, sourceBase, opacity); destinationBase = ref Unsafe.Add(ref destinationBase, 1); backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); sourceBase = ref Unsafe.Add(ref sourceBase, 1); - amountBase = ref Unsafe.Add(ref amountBase, 1); + amountBase = ref Unsafe.Add(ref amountBase, 2); } if (Numerics.Modulo2(destination.Length) != 0) @@ -10704,20 +10910,22 @@ internal static class DefaultPixelBlenders ref Vector256 sourceBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(source)); ref float amountBase = ref MemoryMarshal.GetReference(amount); + Vector256 vOne = Vector256.Create(1F); + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) { - // TODO: It would be better if we can clamp this outside of the loop using our SIMD methods. // We need to create a Vector256 containing the current and next amount values // taking up each half of the Vector256 and then clamp them. Vector256 opacity = Vector256.Create( - Vector128.Create(Numerics.Clamp(amountBase, 0, 1F)), - Vector128.Create(Numerics.Clamp(Unsafe.Add(ref amountBase, 1), 0, 1F))); + Vector128.Create(amountBase), + Vector128.Create(Unsafe.Add(ref amountBase, 1))); + opacity = Avx.Min(Avx.Max(Vector256.Zero, opacity), vOne); destinationBase = PorterDuffFunctions.ScreenXor(backgroundBase, sourceBase, opacity); destinationBase = ref Unsafe.Add(ref destinationBase, 1); backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); sourceBase = ref Unsafe.Add(ref sourceBase, 1); - amountBase = ref Unsafe.Add(ref amountBase, 1); + amountBase = ref Unsafe.Add(ref amountBase, 2); } if (Numerics.Modulo2(destination.Length) != 0) @@ -10807,20 +11015,22 @@ internal static class DefaultPixelBlenders ref Vector256 sourceBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(source)); ref float amountBase = ref MemoryMarshal.GetReference(amount); + Vector256 vOne = Vector256.Create(1F); + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) { - // TODO: It would be better if we can clamp this outside of the loop using our SIMD methods. // We need to create a Vector256 containing the current and next amount values // taking up each half of the Vector256 and then clamp them. Vector256 opacity = Vector256.Create( - Vector128.Create(Numerics.Clamp(amountBase, 0, 1F)), - Vector128.Create(Numerics.Clamp(Unsafe.Add(ref amountBase, 1), 0, 1F))); + Vector128.Create(amountBase), + Vector128.Create(Unsafe.Add(ref amountBase, 1))); + opacity = Avx.Min(Avx.Max(Vector256.Zero, opacity), vOne); destinationBase = PorterDuffFunctions.DarkenXor(backgroundBase, sourceBase, opacity); destinationBase = ref Unsafe.Add(ref destinationBase, 1); backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); sourceBase = ref Unsafe.Add(ref sourceBase, 1); - amountBase = ref Unsafe.Add(ref amountBase, 1); + amountBase = ref Unsafe.Add(ref amountBase, 2); } if (Numerics.Modulo2(destination.Length) != 0) @@ -10910,20 +11120,22 @@ internal static class DefaultPixelBlenders ref Vector256 sourceBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(source)); ref float amountBase = ref MemoryMarshal.GetReference(amount); + Vector256 vOne = Vector256.Create(1F); + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) { - // TODO: It would be better if we can clamp this outside of the loop using our SIMD methods. // We need to create a Vector256 containing the current and next amount values // taking up each half of the Vector256 and then clamp them. Vector256 opacity = Vector256.Create( - Vector128.Create(Numerics.Clamp(amountBase, 0, 1F)), - Vector128.Create(Numerics.Clamp(Unsafe.Add(ref amountBase, 1), 0, 1F))); + Vector128.Create(amountBase), + Vector128.Create(Unsafe.Add(ref amountBase, 1))); + opacity = Avx.Min(Avx.Max(Vector256.Zero, opacity), vOne); destinationBase = PorterDuffFunctions.LightenXor(backgroundBase, sourceBase, opacity); destinationBase = ref Unsafe.Add(ref destinationBase, 1); backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); sourceBase = ref Unsafe.Add(ref sourceBase, 1); - amountBase = ref Unsafe.Add(ref amountBase, 1); + amountBase = ref Unsafe.Add(ref amountBase, 2); } if (Numerics.Modulo2(destination.Length) != 0) @@ -11013,20 +11225,22 @@ internal static class DefaultPixelBlenders ref Vector256 sourceBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(source)); ref float amountBase = ref MemoryMarshal.GetReference(amount); + Vector256 vOne = Vector256.Create(1F); + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) { - // TODO: It would be better if we can clamp this outside of the loop using our SIMD methods. // We need to create a Vector256 containing the current and next amount values // taking up each half of the Vector256 and then clamp them. Vector256 opacity = Vector256.Create( - Vector128.Create(Numerics.Clamp(amountBase, 0, 1F)), - Vector128.Create(Numerics.Clamp(Unsafe.Add(ref amountBase, 1), 0, 1F))); + Vector128.Create(amountBase), + Vector128.Create(Unsafe.Add(ref amountBase, 1))); + opacity = Avx.Min(Avx.Max(Vector256.Zero, opacity), vOne); destinationBase = PorterDuffFunctions.OverlayXor(backgroundBase, sourceBase, opacity); destinationBase = ref Unsafe.Add(ref destinationBase, 1); backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); sourceBase = ref Unsafe.Add(ref sourceBase, 1); - amountBase = ref Unsafe.Add(ref amountBase, 1); + amountBase = ref Unsafe.Add(ref amountBase, 2); } if (Numerics.Modulo2(destination.Length) != 0) @@ -11116,20 +11330,22 @@ internal static class DefaultPixelBlenders ref Vector256 sourceBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(source)); ref float amountBase = ref MemoryMarshal.GetReference(amount); + Vector256 vOne = Vector256.Create(1F); + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) { - // TODO: It would be better if we can clamp this outside of the loop using our SIMD methods. // We need to create a Vector256 containing the current and next amount values // taking up each half of the Vector256 and then clamp them. Vector256 opacity = Vector256.Create( - Vector128.Create(Numerics.Clamp(amountBase, 0, 1F)), - Vector128.Create(Numerics.Clamp(Unsafe.Add(ref amountBase, 1), 0, 1F))); + Vector128.Create(amountBase), + Vector128.Create(Unsafe.Add(ref amountBase, 1))); + opacity = Avx.Min(Avx.Max(Vector256.Zero, opacity), vOne); destinationBase = PorterDuffFunctions.HardLightXor(backgroundBase, sourceBase, opacity); destinationBase = ref Unsafe.Add(ref destinationBase, 1); backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); sourceBase = ref Unsafe.Add(ref sourceBase, 1); - amountBase = ref Unsafe.Add(ref amountBase, 1); + amountBase = ref Unsafe.Add(ref amountBase, 2); } if (Numerics.Modulo2(destination.Length) != 0) diff --git a/src/ImageSharp/PixelFormats/PixelBlenders/DefaultPixelBlenders.Generated.tt b/src/ImageSharp/PixelFormats/PixelBlenders/DefaultPixelBlenders.Generated.tt index 07165a9b5d..22b9ebf982 100644 --- a/src/ImageSharp/PixelFormats/PixelBlenders/DefaultPixelBlenders.Generated.tt +++ b/src/ImageSharp/PixelFormats/PixelBlenders/DefaultPixelBlenders.Generated.tt @@ -138,20 +138,22 @@ var blenders = new []{ ref Vector256 sourceBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(source)); ref float amountBase = ref MemoryMarshal.GetReference(amount); + Vector256 vOne = Vector256.Create(1F); + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) { - // TODO: It would be better if we can clamp this outside of the loop using our SIMD methods. // We need to create a Vector256 containing the current and next amount values // taking up each half of the Vector256 and then clamp them. Vector256 opacity = Vector256.Create( - Vector128.Create(Numerics.Clamp(amountBase, 0, 1F)), - Vector128.Create(Numerics.Clamp(Unsafe.Add(ref amountBase, 1), 0, 1F))); + Vector128.Create(amountBase), + Vector128.Create(Unsafe.Add(ref amountBase, 1))); + opacity = Avx.Min(Avx.Max(Vector256.Zero, opacity), vOne); destinationBase = PorterDuffFunctions.<#=blender_composer#>(backgroundBase, sourceBase, opacity); destinationBase = ref Unsafe.Add(ref destinationBase, 1); backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); sourceBase = ref Unsafe.Add(ref sourceBase, 1); - amountBase = ref Unsafe.Add(ref amountBase, 1); + amountBase = ref Unsafe.Add(ref amountBase, 2); } if (Numerics.Modulo2(destination.Length) != 0) From c06da8c4bcf37cbdc327a90d38bedbbff08fca73 Mon Sep 17 00:00:00 2001 From: James Jackson-South Date: Sun, 19 Feb 2023 21:34:06 +1000 Subject: [PATCH 17/35] Add NormalSrcOver benchmark --- .../PixelBlenders/PorterDuffBulkVsPixel.cs | 24 +++---- .../PorterDuffBulkVsSingleVector.cs | 68 +++++++++++++++++++ 2 files changed, 80 insertions(+), 12 deletions(-) create mode 100644 tests/ImageSharp.Benchmarks/PixelBlenders/PorterDuffBulkVsSingleVector.cs diff --git a/tests/ImageSharp.Benchmarks/PixelBlenders/PorterDuffBulkVsPixel.cs b/tests/ImageSharp.Benchmarks/PixelBlenders/PorterDuffBulkVsPixel.cs index 68956c880f..3e6667dbc2 100644 --- a/tests/ImageSharp.Benchmarks/PixelBlenders/PorterDuffBulkVsPixel.cs +++ b/tests/ImageSharp.Benchmarks/PixelBlenders/PorterDuffBulkVsPixel.cs @@ -12,9 +12,9 @@ namespace SixLabors.ImageSharp.Benchmarks; public class PorterDuffBulkVsPixel { - private Configuration Configuration => Configuration.Default; + private static Configuration Configuration => Configuration.Default; - private void BulkVectorConvert( + private static void BulkVectorConvert( Span destination, Span background, Span source, @@ -31,18 +31,18 @@ public class PorterDuffBulkVsPixel Span backgroundSpan = buffer.Slice(destination.Length, destination.Length); Span sourceSpan = buffer.Slice(destination.Length * 2, destination.Length); - PixelOperations.Instance.ToVector4(this.Configuration, background, backgroundSpan); - PixelOperations.Instance.ToVector4(this.Configuration, source, sourceSpan); + PixelOperations.Instance.ToVector4(Configuration, background, backgroundSpan); + PixelOperations.Instance.ToVector4(Configuration, source, sourceSpan); for (int i = 0; i < destination.Length; i++) { destinationSpan[i] = PorterDuffFunctions.NormalSrcOver(backgroundSpan[i], sourceSpan[i], amount[i]); } - PixelOperations.Instance.FromVector4Destructive(this.Configuration, destinationSpan, destination); + PixelOperations.Instance.FromVector4Destructive(Configuration, destinationSpan, destination); } - private void BulkPixelConvert( + private static void BulkPixelConvert( Span destination, Span background, Span source, @@ -60,9 +60,9 @@ public class PorterDuffBulkVsPixel } [Benchmark(Description = "ImageSharp BulkVectorConvert")] - public Size BulkVectorConvert() + public static Size BulkVectorConvert() { - using var image = new Image(800, 800); + using Image image = new(800, 800); using IMemoryOwner amounts = Configuration.Default.MemoryAllocator.Allocate(image.Width); amounts.GetSpan().Fill(1); @@ -70,23 +70,23 @@ public class PorterDuffBulkVsPixel for (int y = 0; y < image.Height; y++) { Span span = pixels.DangerousGetRowSpan(y); - this.BulkVectorConvert(span, span, span, amounts.GetSpan()); + BulkVectorConvert(span, span, span, amounts.GetSpan()); } return new Size(image.Width, image.Height); } [Benchmark(Description = "ImageSharp BulkPixelConvert")] - public Size BulkPixelConvert() + public static Size BulkPixelConvert() { - using var image = new Image(800, 800); + using Image image = new(800, 800); using IMemoryOwner amounts = Configuration.Default.MemoryAllocator.Allocate(image.Width); amounts.GetSpan().Fill(1); Buffer2D pixels = image.GetRootFramePixelBuffer(); for (int y = 0; y < image.Height; y++) { Span span = pixels.DangerousGetRowSpan(y); - this.BulkPixelConvert(span, span, span, amounts.GetSpan()); + BulkPixelConvert(span, span, span, amounts.GetSpan()); } return new Size(image.Width, image.Height); diff --git a/tests/ImageSharp.Benchmarks/PixelBlenders/PorterDuffBulkVsSingleVector.cs b/tests/ImageSharp.Benchmarks/PixelBlenders/PorterDuffBulkVsSingleVector.cs new file mode 100644 index 0000000000..6727f1c918 --- /dev/null +++ b/tests/ImageSharp.Benchmarks/PixelBlenders/PorterDuffBulkVsSingleVector.cs @@ -0,0 +1,68 @@ +// Copyright (c) Six Labors. +// Licensed under the Six Labors Split License. + +using System.Numerics; +using System.Runtime.CompilerServices; +using System.Runtime.InteropServices; +using System.Runtime.Intrinsics; +using BenchmarkDotNet.Attributes; +using SixLabors.ImageSharp.PixelFormats.PixelBlenders; + +namespace SixLabors.ImageSharp.Benchmarks.PixelBlenders; + +public class PorterDuffBulkVsSingleVector +{ + private Vector4[] backdrop; + private Vector4[] source; + + [GlobalSetup] + public void Setup() + { + this.backdrop = new Vector4[8 * 20]; + this.source = new Vector4[8 * 20]; + + FillRandom(this.backdrop); + FillRandom(this.source); + } + + private static void FillRandom(Vector4[] arr) + { + Random rng = new(); + for (int i = 0; i < arr.Length; i++) + { + arr[i].X = rng.NextSingle(); + arr[i].Y = rng.NextSingle(); + arr[i].Z = rng.NextSingle(); + arr[i].W = rng.NextSingle(); + } + } + + [Benchmark(Description = "Scalar")] + public Vector4 OverlayValueFunction_Scalar() + { + Vector4 result = default; + for (int i = 0; i < this.backdrop.Length; i++) + { + result = PorterDuffFunctions.NormalSrcOver(this.backdrop[i], this.source[i], .5F); + } + + return result; + } + + [Benchmark(Description = "Avx")] + public Vector256 OverlayValueFunction_Avx() + { + ref Vector256 backdrop = ref Unsafe.As>(ref MemoryMarshal.GetReference(this.backdrop)); + ref Vector256 source = ref Unsafe.As>(ref MemoryMarshal.GetReference(this.backdrop)); + + Vector256 result = default; + Vector256 opacity = Vector256.Create(.5F); + int count = this.backdrop.Length / 2; + for (int i = 0; i < count; i++) + { + result = PorterDuffFunctions.NormalSrcOver(Unsafe.Add(ref backdrop, i), Unsafe.Add(ref source, i), opacity); + } + + return result; + } +} From b05b25b36de60fa143b0cd4c51364abbe8151ba9 Mon Sep 17 00:00:00 2001 From: James Jackson-South Date: Sun, 19 Feb 2023 21:57:34 +1000 Subject: [PATCH 18/35] Use RemoteExecutor for composition tests --- .../PorterDuffCompositorTests.cs | 73 ++++++++++--------- .../FeatureTesting/FeatureTestRunner.cs | 46 ++++++++++++ 2 files changed, 86 insertions(+), 33 deletions(-) diff --git a/tests/ImageSharp.Tests/PixelFormats/PixelBlenders/PorterDuffCompositorTests.cs b/tests/ImageSharp.Tests/PixelFormats/PixelBlenders/PorterDuffCompositorTests.cs index c81b0a74ff..1086afe76d 100644 --- a/tests/ImageSharp.Tests/PixelFormats/PixelBlenders/PorterDuffCompositorTests.cs +++ b/tests/ImageSharp.Tests/PixelFormats/PixelBlenders/PorterDuffCompositorTests.cs @@ -1,59 +1,66 @@ // Copyright (c) Six Labors. // Licensed under the Six Labors Split License. -namespace SixLabors.ImageSharp.Tests.PixelFormats.PixelBlenders; - using SixLabors.ImageSharp.PixelFormats; using SixLabors.ImageSharp.Processing; +using SixLabors.ImageSharp.Tests.TestUtilities; -using Xunit; +namespace SixLabors.ImageSharp.Tests.PixelFormats.PixelBlenders; public class PorterDuffCompositorTests { // TODO: Add other modes to compare. public static readonly TheoryData CompositingOperators = - new TheoryData - { - PixelAlphaCompositionMode.Src, - PixelAlphaCompositionMode.SrcAtop, - PixelAlphaCompositionMode.SrcOver, - PixelAlphaCompositionMode.SrcIn, - PixelAlphaCompositionMode.SrcOut, - PixelAlphaCompositionMode.Dest, - PixelAlphaCompositionMode.DestAtop, - PixelAlphaCompositionMode.DestOver, - PixelAlphaCompositionMode.DestIn, - PixelAlphaCompositionMode.DestOut, - PixelAlphaCompositionMode.Clear, - PixelAlphaCompositionMode.Xor - }; + new() + { + PixelAlphaCompositionMode.Src, + PixelAlphaCompositionMode.SrcAtop, + PixelAlphaCompositionMode.SrcOver, + PixelAlphaCompositionMode.SrcIn, + PixelAlphaCompositionMode.SrcOut, + PixelAlphaCompositionMode.Dest, + PixelAlphaCompositionMode.DestAtop, + PixelAlphaCompositionMode.DestOver, + PixelAlphaCompositionMode.DestIn, + PixelAlphaCompositionMode.DestOut, + PixelAlphaCompositionMode.Clear, + PixelAlphaCompositionMode.Xor + }; [Theory] [WithFile(TestImages.Png.PDDest, nameof(CompositingOperators), PixelTypes.Rgba32)] public void PorterDuffOutputIsCorrect(TestImageProvider provider, PixelAlphaCompositionMode mode) { - var srcFile = TestFile.Create(TestImages.Png.PDSrc); - using (Image src = srcFile.CreateRgba32Image()) - using (Image dest = provider.GetImage()) + static void RunTest(string providerDump, string alphaMode) { - var options = new GraphicsOptions + TestImageProvider provider + = BasicSerializer.Deserialize>(providerDump); + + TestFile srcFile = TestFile.Create(TestImages.Png.PDSrc); + using Image src = srcFile.CreateRgba32Image(); + using Image dest = provider.GetImage(); + GraphicsOptions options = new() { Antialias = false, - AlphaCompositionMode = mode + AlphaCompositionMode = Enum.Parse(alphaMode) }; - using (Image res = dest.Clone(x => x.DrawImage(src, options))) - { - string combinedMode = mode.ToString(); - - if (combinedMode != "Src" && combinedMode.StartsWith("Src")) - { - combinedMode = combinedMode.Substring(3); - } + using Image res = dest.Clone(x => x.DrawImage(src, options)); + string combinedMode = alphaMode; - res.DebugSave(provider, combinedMode); - res.CompareToReferenceOutput(provider, combinedMode); + if (combinedMode != "Src" && combinedMode.StartsWith("Src", StringComparison.OrdinalIgnoreCase)) + { + combinedMode = combinedMode[3..]; } + + res.DebugSave(provider, combinedMode); + res.CompareToReferenceOutput(provider, combinedMode); } + + FeatureTestRunner.RunWithHwIntrinsicsFeature( + RunTest, + HwIntrinsics.AllowAll | HwIntrinsics.DisableAVX, + provider, + mode.ToString()); } } diff --git a/tests/ImageSharp.Tests/TestUtilities/FeatureTesting/FeatureTestRunner.cs b/tests/ImageSharp.Tests/TestUtilities/FeatureTesting/FeatureTestRunner.cs index 1bb64d99d6..f68bfdbe6e 100644 --- a/tests/ImageSharp.Tests/TestUtilities/FeatureTesting/FeatureTestRunner.cs +++ b/tests/ImageSharp.Tests/TestUtilities/FeatureTesting/FeatureTestRunner.cs @@ -257,6 +257,52 @@ public static class FeatureTestRunner } } + /// + /// Runs the given test within an environment + /// where the given features. + /// + /// The test action to run. + /// The intrinsics features. + /// The value to pass as a parameter to the test action. + /// The second value to pass as a parameter to the test action. + public static void RunWithHwIntrinsicsFeature( + Action action, + HwIntrinsics intrinsics, + T arg1, + string arg2) + where T : IXunitSerializable + { + if (!RemoteExecutor.IsSupported) + { + return; + } + + foreach (KeyValuePair intrinsic in intrinsics.ToFeatureKeyValueCollection()) + { + ProcessStartInfo processStartInfo = new(); + if (intrinsic.Key != HwIntrinsics.AllowAll) + { + processStartInfo.Environment[$"COMPlus_{intrinsic.Value}"] = "0"; + + RemoteExecutor.Invoke( + action, + BasicSerializer.Serialize(arg1), + arg2, + new RemoteInvokeOptions + { + StartInfo = processStartInfo + }) + .Dispose(); + } + else + { + // Since we are running using the default architecture there is no + // point creating the overhead of running the action in a separate process. + action(BasicSerializer.Serialize(arg1), arg2); + } + } + } + /// /// Runs the given test within an environment /// where the given features. From 916084cc1c4cff357b5c939bc81b4f181bb7446b Mon Sep 17 00:00:00 2001 From: James Jackson-South Date: Sun, 19 Feb 2023 23:12:17 +1000 Subject: [PATCH 19/35] Fix field assignment in benchmark --- .../PixelBlenders/PorterDuffBulkVsSingleVector.cs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/ImageSharp.Benchmarks/PixelBlenders/PorterDuffBulkVsSingleVector.cs b/tests/ImageSharp.Benchmarks/PixelBlenders/PorterDuffBulkVsSingleVector.cs index 6727f1c918..4bd21c9a85 100644 --- a/tests/ImageSharp.Benchmarks/PixelBlenders/PorterDuffBulkVsSingleVector.cs +++ b/tests/ImageSharp.Benchmarks/PixelBlenders/PorterDuffBulkVsSingleVector.cs @@ -53,7 +53,7 @@ public class PorterDuffBulkVsSingleVector public Vector256 OverlayValueFunction_Avx() { ref Vector256 backdrop = ref Unsafe.As>(ref MemoryMarshal.GetReference(this.backdrop)); - ref Vector256 source = ref Unsafe.As>(ref MemoryMarshal.GetReference(this.backdrop)); + ref Vector256 source = ref Unsafe.As>(ref MemoryMarshal.GetReference(this.source)); Vector256 result = default; Vector256 opacity = Vector256.Create(.5F); From 8ffec30559fb1c16aa0d715b2090a1ace44a3c16 Mon Sep 17 00:00:00 2001 From: James Jackson-South Date: Sun, 19 Feb 2023 23:15:27 +1000 Subject: [PATCH 20/35] Make Scalar default --- .../PixelBlenders/PorterDuffBulkVsSingleVector.cs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/ImageSharp.Benchmarks/PixelBlenders/PorterDuffBulkVsSingleVector.cs b/tests/ImageSharp.Benchmarks/PixelBlenders/PorterDuffBulkVsSingleVector.cs index 4bd21c9a85..fcf7e9dcce 100644 --- a/tests/ImageSharp.Benchmarks/PixelBlenders/PorterDuffBulkVsSingleVector.cs +++ b/tests/ImageSharp.Benchmarks/PixelBlenders/PorterDuffBulkVsSingleVector.cs @@ -37,7 +37,7 @@ public class PorterDuffBulkVsSingleVector } } - [Benchmark(Description = "Scalar")] + [Benchmark(Description = "Scalar", Baseline = true)] public Vector4 OverlayValueFunction_Scalar() { Vector4 result = default; From a666372f68e44d1c610223a97641d2348b9d7782 Mon Sep 17 00:00:00 2001 From: James Jackson-South Date: Mon, 20 Feb 2023 09:52:41 +1000 Subject: [PATCH 21/35] Use FMA where possible. --- .../Common/Helpers/SimdUtils.HwIntrinsics.cs | 24 +++++++++++++++++++ .../PixelBlenders/PorterDuffFunctions.cs | 10 ++++---- 2 files changed, 28 insertions(+), 6 deletions(-) diff --git a/src/ImageSharp/Common/Helpers/SimdUtils.HwIntrinsics.cs b/src/ImageSharp/Common/Helpers/SimdUtils.HwIntrinsics.cs index 128218aac2..7d2bab259e 100644 --- a/src/ImageSharp/Common/Helpers/SimdUtils.HwIntrinsics.cs +++ b/src/ImageSharp/Common/Helpers/SimdUtils.HwIntrinsics.cs @@ -533,6 +533,7 @@ internal static partial class SimdUtils /// /// Performs a multiplication and an addition of the . + /// TODO: Fix. The arguments are in a different order to the FMA intrinsic. /// /// ret = (vm0 * vm1) + va /// The vector to add to the intermediate result. @@ -555,6 +556,7 @@ internal static partial class SimdUtils /// /// Performs a multiplication and a subtraction of the . + /// TODO: Fix. The arguments are in a different order to the FMA intrinsic. /// /// ret = (vm0 * vm1) - vs /// The vector to subtract from the intermediate result. @@ -575,6 +577,28 @@ internal static partial class SimdUtils return Avx.Subtract(Avx.Multiply(vm0, vm1), vs); } + /// + /// Performs a multiplication and a negated addition of the . + /// + /// ret = c - (a * b) + /// The first vector to multiply. + /// The second vector to multiply. + /// The vector to add negated to the intermediate result. + /// The . + [MethodImpl(InliningOptions.ShortMethod)] + public static Vector256 MultiplyAddNegated( + in Vector256 a, + in Vector256 b, + in Vector256 c) + { + if (Fma.IsSupported) + { + return Fma.MultiplyAddNegated(a, b, c); + } + + return Avx.Subtract(c, Avx.Multiply(a, b)); + } + /// /// as many elements as possible, slicing them down (keeping the remainder). /// diff --git a/src/ImageSharp/PixelFormats/PixelBlenders/PorterDuffFunctions.cs b/src/ImageSharp/PixelFormats/PixelBlenders/PorterDuffFunctions.cs index d1bd5bad31..2d47f1a628 100644 --- a/src/ImageSharp/PixelFormats/PixelBlenders/PorterDuffFunctions.cs +++ b/src/ImageSharp/PixelFormats/PixelBlenders/PorterDuffFunctions.cs @@ -124,7 +124,7 @@ internal static partial class PorterDuffFunctions public static Vector256 Screen(Vector256 backdrop, Vector256 source) { Vector256 vOne = Vector256.Create(1F); - return Avx.Subtract(vOne, Avx.Multiply(Avx.Subtract(vOne, backdrop), Avx.Subtract(vOne, source))); + return SimdUtils.HwIntrinsics.MultiplyAddNegated(Avx.Subtract(vOne, backdrop), Avx.Subtract(vOne, source), vOne); } /// @@ -244,10 +244,10 @@ internal static partial class PorterDuffFunctions public static Vector256 OverlayValueFunction(Vector256 backdrop, Vector256 source) { Vector256 vOne = Vector256.Create(1F); - Vector256 vTwo = Vector256.Create(2F); Vector256 left = Avx.Multiply(Avx.Add(backdrop, backdrop), source); - Vector256 right = Avx.Subtract(vOne, Avx.Multiply(Avx.Multiply(vTwo, Avx.Subtract(vOne, source)), Avx.Subtract(vOne, backdrop))); + Vector256 vOneMinusSource = Avx.Subtract(vOne, source); + Vector256 right = SimdUtils.HwIntrinsics.MultiplyAddNegated(Avx.Add(vOneMinusSource, vOneMinusSource), Avx.Subtract(vOne, backdrop), vOne); Vector256 cmp = Avx.CompareGreaterThan(backdrop, Vector256.Create(.5F)); return Avx.BlendVariable(left, right, cmp); } @@ -430,9 +430,7 @@ internal static partial class PorterDuffFunctions public static Vector256 Out(Vector256 destination, Vector256 source) { // calculate alpha - Vector256 sW = Avx.Shuffle(source, source, ShuffleAlphaControl); - Vector256 dW = Avx.Shuffle(destination, destination, ShuffleAlphaControl); - Vector256 alpha = Avx.Multiply(Avx.Subtract(Vector256.Create(1F), dW), sW); + Vector256 alpha = Avx.Permute(Avx.Multiply(source, Avx.Subtract(Vector256.Create(1F), destination)), ShuffleAlphaControl); // premultiply Vector256 color = Avx.Multiply(source, alpha); From afdc53c090791c4c9fe2fd3ccc4ebfc2a5339c06 Mon Sep 17 00:00:00 2001 From: James Jackson-South Date: Mon, 20 Feb 2023 13:16:19 +1000 Subject: [PATCH 22/35] Tanners Top Tips!! --- .../PorterDuffFunctions.Generated.cs | 198 +++++++++--------- .../PorterDuffFunctions.Generated.tt | 22 +- .../PixelBlenders/PorterDuffFunctions.cs | 83 ++++++-- 3 files changed, 170 insertions(+), 133 deletions(-) diff --git a/src/ImageSharp/PixelFormats/PixelBlenders/PorterDuffFunctions.Generated.cs b/src/ImageSharp/PixelFormats/PixelBlenders/PorterDuffFunctions.Generated.cs index 5740a704ca..bd522da192 100644 --- a/src/ImageSharp/PixelFormats/PixelBlenders/PorterDuffFunctions.Generated.cs +++ b/src/ImageSharp/PixelFormats/PixelBlenders/PorterDuffFunctions.Generated.cs @@ -23,7 +23,7 @@ internal static partial class PorterDuffFunctions [MethodImpl(MethodImplOptions.AggressiveInlining)] public static Vector4 NormalSrc(Vector4 backdrop, Vector4 source, float opacity) { - source.W *= opacity; + source = WithW(source, source * opacity); return source; } @@ -49,7 +49,7 @@ internal static partial class PorterDuffFunctions [MethodImpl(MethodImplOptions.AggressiveInlining)] public static Vector4 NormalSrcAtop(Vector4 backdrop, Vector4 source, float opacity) { - source.W *= opacity; + source = WithW(source, source * opacity); return Atop(backdrop, source, Normal(backdrop, source)); } @@ -79,7 +79,7 @@ internal static partial class PorterDuffFunctions [MethodImpl(MethodImplOptions.AggressiveInlining)] public static Vector4 NormalSrcOver(Vector4 backdrop, Vector4 source, float opacity) { - source.W *= opacity; + source = WithW(source, source * opacity); return Over(backdrop, source, Normal(backdrop, source)); } @@ -109,7 +109,7 @@ internal static partial class PorterDuffFunctions [MethodImpl(MethodImplOptions.AggressiveInlining)] public static Vector4 NormalSrcIn(Vector4 backdrop, Vector4 source, float opacity) { - source.W *= opacity; + source = WithW(source, source * opacity); return In(backdrop, source); } @@ -135,7 +135,7 @@ internal static partial class PorterDuffFunctions [MethodImpl(MethodImplOptions.AggressiveInlining)] public static Vector4 NormalSrcOut(Vector4 backdrop, Vector4 source, float opacity) { - source.W *= opacity; + source = WithW(source, source * opacity); return Out(backdrop, source); } @@ -187,7 +187,7 @@ internal static partial class PorterDuffFunctions [MethodImpl(MethodImplOptions.AggressiveInlining)] public static Vector4 NormalDestAtop(Vector4 backdrop, Vector4 source, float opacity) { - source.W *= opacity; + source = WithW(source, source * opacity); return Atop(source, backdrop, Normal(source, backdrop)); } @@ -217,7 +217,7 @@ internal static partial class PorterDuffFunctions [MethodImpl(MethodImplOptions.AggressiveInlining)] public static Vector4 NormalDestOver(Vector4 backdrop, Vector4 source, float opacity) { - source.W *= opacity; + source = WithW(source, source * opacity); return Over(source, backdrop, Normal(source, backdrop)); } @@ -247,7 +247,7 @@ internal static partial class PorterDuffFunctions [MethodImpl(MethodImplOptions.AggressiveInlining)] public static Vector4 NormalDestIn(Vector4 backdrop, Vector4 source, float opacity) { - source.W *= opacity; + source = WithW(source, source * opacity); return In(source, backdrop); } @@ -273,7 +273,7 @@ internal static partial class PorterDuffFunctions [MethodImpl(MethodImplOptions.AggressiveInlining)] public static Vector4 NormalDestOut(Vector4 backdrop, Vector4 source, float opacity) { - source.W *= opacity; + source = WithW(source, source * opacity); return Out(source, backdrop); } @@ -299,7 +299,7 @@ internal static partial class PorterDuffFunctions [MethodImpl(MethodImplOptions.AggressiveInlining)] public static Vector4 NormalXor(Vector4 backdrop, Vector4 source, float opacity) { - source.W *= opacity; + source = WithW(source, source * opacity); return Xor(backdrop, source); } @@ -325,7 +325,7 @@ internal static partial class PorterDuffFunctions [MethodImpl(MethodImplOptions.AggressiveInlining)] public static Vector4 NormalClear(Vector4 backdrop, Vector4 source, float opacity) { - source.W *= opacity; + source = WithW(source, source * opacity); return Clear(backdrop, source); } @@ -568,7 +568,7 @@ internal static partial class PorterDuffFunctions [MethodImpl(MethodImplOptions.AggressiveInlining)] public static Vector4 MultiplySrc(Vector4 backdrop, Vector4 source, float opacity) { - source.W *= opacity; + source = WithW(source, source * opacity); return source; } @@ -594,7 +594,7 @@ internal static partial class PorterDuffFunctions [MethodImpl(MethodImplOptions.AggressiveInlining)] public static Vector4 MultiplySrcAtop(Vector4 backdrop, Vector4 source, float opacity) { - source.W *= opacity; + source = WithW(source, source * opacity); return Atop(backdrop, source, Multiply(backdrop, source)); } @@ -624,7 +624,7 @@ internal static partial class PorterDuffFunctions [MethodImpl(MethodImplOptions.AggressiveInlining)] public static Vector4 MultiplySrcOver(Vector4 backdrop, Vector4 source, float opacity) { - source.W *= opacity; + source = WithW(source, source * opacity); return Over(backdrop, source, Multiply(backdrop, source)); } @@ -654,7 +654,7 @@ internal static partial class PorterDuffFunctions [MethodImpl(MethodImplOptions.AggressiveInlining)] public static Vector4 MultiplySrcIn(Vector4 backdrop, Vector4 source, float opacity) { - source.W *= opacity; + source = WithW(source, source * opacity); return In(backdrop, source); } @@ -680,7 +680,7 @@ internal static partial class PorterDuffFunctions [MethodImpl(MethodImplOptions.AggressiveInlining)] public static Vector4 MultiplySrcOut(Vector4 backdrop, Vector4 source, float opacity) { - source.W *= opacity; + source = WithW(source, source * opacity); return Out(backdrop, source); } @@ -732,7 +732,7 @@ internal static partial class PorterDuffFunctions [MethodImpl(MethodImplOptions.AggressiveInlining)] public static Vector4 MultiplyDestAtop(Vector4 backdrop, Vector4 source, float opacity) { - source.W *= opacity; + source = WithW(source, source * opacity); return Atop(source, backdrop, Multiply(source, backdrop)); } @@ -762,7 +762,7 @@ internal static partial class PorterDuffFunctions [MethodImpl(MethodImplOptions.AggressiveInlining)] public static Vector4 MultiplyDestOver(Vector4 backdrop, Vector4 source, float opacity) { - source.W *= opacity; + source = WithW(source, source * opacity); return Over(source, backdrop, Multiply(source, backdrop)); } @@ -792,7 +792,7 @@ internal static partial class PorterDuffFunctions [MethodImpl(MethodImplOptions.AggressiveInlining)] public static Vector4 MultiplyDestIn(Vector4 backdrop, Vector4 source, float opacity) { - source.W *= opacity; + source = WithW(source, source * opacity); return In(source, backdrop); } @@ -818,7 +818,7 @@ internal static partial class PorterDuffFunctions [MethodImpl(MethodImplOptions.AggressiveInlining)] public static Vector4 MultiplyDestOut(Vector4 backdrop, Vector4 source, float opacity) { - source.W *= opacity; + source = WithW(source, source * opacity); return Out(source, backdrop); } @@ -844,7 +844,7 @@ internal static partial class PorterDuffFunctions [MethodImpl(MethodImplOptions.AggressiveInlining)] public static Vector4 MultiplyXor(Vector4 backdrop, Vector4 source, float opacity) { - source.W *= opacity; + source = WithW(source, source * opacity); return Xor(backdrop, source); } @@ -870,7 +870,7 @@ internal static partial class PorterDuffFunctions [MethodImpl(MethodImplOptions.AggressiveInlining)] public static Vector4 MultiplyClear(Vector4 backdrop, Vector4 source, float opacity) { - source.W *= opacity; + source = WithW(source, source * opacity); return Clear(backdrop, source); } @@ -1113,7 +1113,7 @@ internal static partial class PorterDuffFunctions [MethodImpl(MethodImplOptions.AggressiveInlining)] public static Vector4 AddSrc(Vector4 backdrop, Vector4 source, float opacity) { - source.W *= opacity; + source = WithW(source, source * opacity); return source; } @@ -1139,7 +1139,7 @@ internal static partial class PorterDuffFunctions [MethodImpl(MethodImplOptions.AggressiveInlining)] public static Vector4 AddSrcAtop(Vector4 backdrop, Vector4 source, float opacity) { - source.W *= opacity; + source = WithW(source, source * opacity); return Atop(backdrop, source, Add(backdrop, source)); } @@ -1169,7 +1169,7 @@ internal static partial class PorterDuffFunctions [MethodImpl(MethodImplOptions.AggressiveInlining)] public static Vector4 AddSrcOver(Vector4 backdrop, Vector4 source, float opacity) { - source.W *= opacity; + source = WithW(source, source * opacity); return Over(backdrop, source, Add(backdrop, source)); } @@ -1199,7 +1199,7 @@ internal static partial class PorterDuffFunctions [MethodImpl(MethodImplOptions.AggressiveInlining)] public static Vector4 AddSrcIn(Vector4 backdrop, Vector4 source, float opacity) { - source.W *= opacity; + source = WithW(source, source * opacity); return In(backdrop, source); } @@ -1225,7 +1225,7 @@ internal static partial class PorterDuffFunctions [MethodImpl(MethodImplOptions.AggressiveInlining)] public static Vector4 AddSrcOut(Vector4 backdrop, Vector4 source, float opacity) { - source.W *= opacity; + source = WithW(source, source * opacity); return Out(backdrop, source); } @@ -1277,7 +1277,7 @@ internal static partial class PorterDuffFunctions [MethodImpl(MethodImplOptions.AggressiveInlining)] public static Vector4 AddDestAtop(Vector4 backdrop, Vector4 source, float opacity) { - source.W *= opacity; + source = WithW(source, source * opacity); return Atop(source, backdrop, Add(source, backdrop)); } @@ -1307,7 +1307,7 @@ internal static partial class PorterDuffFunctions [MethodImpl(MethodImplOptions.AggressiveInlining)] public static Vector4 AddDestOver(Vector4 backdrop, Vector4 source, float opacity) { - source.W *= opacity; + source = WithW(source, source * opacity); return Over(source, backdrop, Add(source, backdrop)); } @@ -1337,7 +1337,7 @@ internal static partial class PorterDuffFunctions [MethodImpl(MethodImplOptions.AggressiveInlining)] public static Vector4 AddDestIn(Vector4 backdrop, Vector4 source, float opacity) { - source.W *= opacity; + source = WithW(source, source * opacity); return In(source, backdrop); } @@ -1363,7 +1363,7 @@ internal static partial class PorterDuffFunctions [MethodImpl(MethodImplOptions.AggressiveInlining)] public static Vector4 AddDestOut(Vector4 backdrop, Vector4 source, float opacity) { - source.W *= opacity; + source = WithW(source, source * opacity); return Out(source, backdrop); } @@ -1389,7 +1389,7 @@ internal static partial class PorterDuffFunctions [MethodImpl(MethodImplOptions.AggressiveInlining)] public static Vector4 AddXor(Vector4 backdrop, Vector4 source, float opacity) { - source.W *= opacity; + source = WithW(source, source * opacity); return Xor(backdrop, source); } @@ -1415,7 +1415,7 @@ internal static partial class PorterDuffFunctions [MethodImpl(MethodImplOptions.AggressiveInlining)] public static Vector4 AddClear(Vector4 backdrop, Vector4 source, float opacity) { - source.W *= opacity; + source = WithW(source, source * opacity); return Clear(backdrop, source); } @@ -1658,7 +1658,7 @@ internal static partial class PorterDuffFunctions [MethodImpl(MethodImplOptions.AggressiveInlining)] public static Vector4 SubtractSrc(Vector4 backdrop, Vector4 source, float opacity) { - source.W *= opacity; + source = WithW(source, source * opacity); return source; } @@ -1684,7 +1684,7 @@ internal static partial class PorterDuffFunctions [MethodImpl(MethodImplOptions.AggressiveInlining)] public static Vector4 SubtractSrcAtop(Vector4 backdrop, Vector4 source, float opacity) { - source.W *= opacity; + source = WithW(source, source * opacity); return Atop(backdrop, source, Subtract(backdrop, source)); } @@ -1714,7 +1714,7 @@ internal static partial class PorterDuffFunctions [MethodImpl(MethodImplOptions.AggressiveInlining)] public static Vector4 SubtractSrcOver(Vector4 backdrop, Vector4 source, float opacity) { - source.W *= opacity; + source = WithW(source, source * opacity); return Over(backdrop, source, Subtract(backdrop, source)); } @@ -1744,7 +1744,7 @@ internal static partial class PorterDuffFunctions [MethodImpl(MethodImplOptions.AggressiveInlining)] public static Vector4 SubtractSrcIn(Vector4 backdrop, Vector4 source, float opacity) { - source.W *= opacity; + source = WithW(source, source * opacity); return In(backdrop, source); } @@ -1770,7 +1770,7 @@ internal static partial class PorterDuffFunctions [MethodImpl(MethodImplOptions.AggressiveInlining)] public static Vector4 SubtractSrcOut(Vector4 backdrop, Vector4 source, float opacity) { - source.W *= opacity; + source = WithW(source, source * opacity); return Out(backdrop, source); } @@ -1822,7 +1822,7 @@ internal static partial class PorterDuffFunctions [MethodImpl(MethodImplOptions.AggressiveInlining)] public static Vector4 SubtractDestAtop(Vector4 backdrop, Vector4 source, float opacity) { - source.W *= opacity; + source = WithW(source, source * opacity); return Atop(source, backdrop, Subtract(source, backdrop)); } @@ -1852,7 +1852,7 @@ internal static partial class PorterDuffFunctions [MethodImpl(MethodImplOptions.AggressiveInlining)] public static Vector4 SubtractDestOver(Vector4 backdrop, Vector4 source, float opacity) { - source.W *= opacity; + source = WithW(source, source * opacity); return Over(source, backdrop, Subtract(source, backdrop)); } @@ -1882,7 +1882,7 @@ internal static partial class PorterDuffFunctions [MethodImpl(MethodImplOptions.AggressiveInlining)] public static Vector4 SubtractDestIn(Vector4 backdrop, Vector4 source, float opacity) { - source.W *= opacity; + source = WithW(source, source * opacity); return In(source, backdrop); } @@ -1908,7 +1908,7 @@ internal static partial class PorterDuffFunctions [MethodImpl(MethodImplOptions.AggressiveInlining)] public static Vector4 SubtractDestOut(Vector4 backdrop, Vector4 source, float opacity) { - source.W *= opacity; + source = WithW(source, source * opacity); return Out(source, backdrop); } @@ -1934,7 +1934,7 @@ internal static partial class PorterDuffFunctions [MethodImpl(MethodImplOptions.AggressiveInlining)] public static Vector4 SubtractXor(Vector4 backdrop, Vector4 source, float opacity) { - source.W *= opacity; + source = WithW(source, source * opacity); return Xor(backdrop, source); } @@ -1960,7 +1960,7 @@ internal static partial class PorterDuffFunctions [MethodImpl(MethodImplOptions.AggressiveInlining)] public static Vector4 SubtractClear(Vector4 backdrop, Vector4 source, float opacity) { - source.W *= opacity; + source = WithW(source, source * opacity); return Clear(backdrop, source); } @@ -2203,7 +2203,7 @@ internal static partial class PorterDuffFunctions [MethodImpl(MethodImplOptions.AggressiveInlining)] public static Vector4 ScreenSrc(Vector4 backdrop, Vector4 source, float opacity) { - source.W *= opacity; + source = WithW(source, source * opacity); return source; } @@ -2229,7 +2229,7 @@ internal static partial class PorterDuffFunctions [MethodImpl(MethodImplOptions.AggressiveInlining)] public static Vector4 ScreenSrcAtop(Vector4 backdrop, Vector4 source, float opacity) { - source.W *= opacity; + source = WithW(source, source * opacity); return Atop(backdrop, source, Screen(backdrop, source)); } @@ -2259,7 +2259,7 @@ internal static partial class PorterDuffFunctions [MethodImpl(MethodImplOptions.AggressiveInlining)] public static Vector4 ScreenSrcOver(Vector4 backdrop, Vector4 source, float opacity) { - source.W *= opacity; + source = WithW(source, source * opacity); return Over(backdrop, source, Screen(backdrop, source)); } @@ -2289,7 +2289,7 @@ internal static partial class PorterDuffFunctions [MethodImpl(MethodImplOptions.AggressiveInlining)] public static Vector4 ScreenSrcIn(Vector4 backdrop, Vector4 source, float opacity) { - source.W *= opacity; + source = WithW(source, source * opacity); return In(backdrop, source); } @@ -2315,7 +2315,7 @@ internal static partial class PorterDuffFunctions [MethodImpl(MethodImplOptions.AggressiveInlining)] public static Vector4 ScreenSrcOut(Vector4 backdrop, Vector4 source, float opacity) { - source.W *= opacity; + source = WithW(source, source * opacity); return Out(backdrop, source); } @@ -2367,7 +2367,7 @@ internal static partial class PorterDuffFunctions [MethodImpl(MethodImplOptions.AggressiveInlining)] public static Vector4 ScreenDestAtop(Vector4 backdrop, Vector4 source, float opacity) { - source.W *= opacity; + source = WithW(source, source * opacity); return Atop(source, backdrop, Screen(source, backdrop)); } @@ -2397,7 +2397,7 @@ internal static partial class PorterDuffFunctions [MethodImpl(MethodImplOptions.AggressiveInlining)] public static Vector4 ScreenDestOver(Vector4 backdrop, Vector4 source, float opacity) { - source.W *= opacity; + source = WithW(source, source * opacity); return Over(source, backdrop, Screen(source, backdrop)); } @@ -2427,7 +2427,7 @@ internal static partial class PorterDuffFunctions [MethodImpl(MethodImplOptions.AggressiveInlining)] public static Vector4 ScreenDestIn(Vector4 backdrop, Vector4 source, float opacity) { - source.W *= opacity; + source = WithW(source, source * opacity); return In(source, backdrop); } @@ -2453,7 +2453,7 @@ internal static partial class PorterDuffFunctions [MethodImpl(MethodImplOptions.AggressiveInlining)] public static Vector4 ScreenDestOut(Vector4 backdrop, Vector4 source, float opacity) { - source.W *= opacity; + source = WithW(source, source * opacity); return Out(source, backdrop); } @@ -2479,7 +2479,7 @@ internal static partial class PorterDuffFunctions [MethodImpl(MethodImplOptions.AggressiveInlining)] public static Vector4 ScreenXor(Vector4 backdrop, Vector4 source, float opacity) { - source.W *= opacity; + source = WithW(source, source * opacity); return Xor(backdrop, source); } @@ -2505,7 +2505,7 @@ internal static partial class PorterDuffFunctions [MethodImpl(MethodImplOptions.AggressiveInlining)] public static Vector4 ScreenClear(Vector4 backdrop, Vector4 source, float opacity) { - source.W *= opacity; + source = WithW(source, source * opacity); return Clear(backdrop, source); } @@ -2748,7 +2748,7 @@ internal static partial class PorterDuffFunctions [MethodImpl(MethodImplOptions.AggressiveInlining)] public static Vector4 DarkenSrc(Vector4 backdrop, Vector4 source, float opacity) { - source.W *= opacity; + source = WithW(source, source * opacity); return source; } @@ -2774,7 +2774,7 @@ internal static partial class PorterDuffFunctions [MethodImpl(MethodImplOptions.AggressiveInlining)] public static Vector4 DarkenSrcAtop(Vector4 backdrop, Vector4 source, float opacity) { - source.W *= opacity; + source = WithW(source, source * opacity); return Atop(backdrop, source, Darken(backdrop, source)); } @@ -2804,7 +2804,7 @@ internal static partial class PorterDuffFunctions [MethodImpl(MethodImplOptions.AggressiveInlining)] public static Vector4 DarkenSrcOver(Vector4 backdrop, Vector4 source, float opacity) { - source.W *= opacity; + source = WithW(source, source * opacity); return Over(backdrop, source, Darken(backdrop, source)); } @@ -2834,7 +2834,7 @@ internal static partial class PorterDuffFunctions [MethodImpl(MethodImplOptions.AggressiveInlining)] public static Vector4 DarkenSrcIn(Vector4 backdrop, Vector4 source, float opacity) { - source.W *= opacity; + source = WithW(source, source * opacity); return In(backdrop, source); } @@ -2860,7 +2860,7 @@ internal static partial class PorterDuffFunctions [MethodImpl(MethodImplOptions.AggressiveInlining)] public static Vector4 DarkenSrcOut(Vector4 backdrop, Vector4 source, float opacity) { - source.W *= opacity; + source = WithW(source, source * opacity); return Out(backdrop, source); } @@ -2912,7 +2912,7 @@ internal static partial class PorterDuffFunctions [MethodImpl(MethodImplOptions.AggressiveInlining)] public static Vector4 DarkenDestAtop(Vector4 backdrop, Vector4 source, float opacity) { - source.W *= opacity; + source = WithW(source, source * opacity); return Atop(source, backdrop, Darken(source, backdrop)); } @@ -2942,7 +2942,7 @@ internal static partial class PorterDuffFunctions [MethodImpl(MethodImplOptions.AggressiveInlining)] public static Vector4 DarkenDestOver(Vector4 backdrop, Vector4 source, float opacity) { - source.W *= opacity; + source = WithW(source, source * opacity); return Over(source, backdrop, Darken(source, backdrop)); } @@ -2972,7 +2972,7 @@ internal static partial class PorterDuffFunctions [MethodImpl(MethodImplOptions.AggressiveInlining)] public static Vector4 DarkenDestIn(Vector4 backdrop, Vector4 source, float opacity) { - source.W *= opacity; + source = WithW(source, source * opacity); return In(source, backdrop); } @@ -2998,7 +2998,7 @@ internal static partial class PorterDuffFunctions [MethodImpl(MethodImplOptions.AggressiveInlining)] public static Vector4 DarkenDestOut(Vector4 backdrop, Vector4 source, float opacity) { - source.W *= opacity; + source = WithW(source, source * opacity); return Out(source, backdrop); } @@ -3024,7 +3024,7 @@ internal static partial class PorterDuffFunctions [MethodImpl(MethodImplOptions.AggressiveInlining)] public static Vector4 DarkenXor(Vector4 backdrop, Vector4 source, float opacity) { - source.W *= opacity; + source = WithW(source, source * opacity); return Xor(backdrop, source); } @@ -3050,7 +3050,7 @@ internal static partial class PorterDuffFunctions [MethodImpl(MethodImplOptions.AggressiveInlining)] public static Vector4 DarkenClear(Vector4 backdrop, Vector4 source, float opacity) { - source.W *= opacity; + source = WithW(source, source * opacity); return Clear(backdrop, source); } @@ -3293,7 +3293,7 @@ internal static partial class PorterDuffFunctions [MethodImpl(MethodImplOptions.AggressiveInlining)] public static Vector4 LightenSrc(Vector4 backdrop, Vector4 source, float opacity) { - source.W *= opacity; + source = WithW(source, source * opacity); return source; } @@ -3319,7 +3319,7 @@ internal static partial class PorterDuffFunctions [MethodImpl(MethodImplOptions.AggressiveInlining)] public static Vector4 LightenSrcAtop(Vector4 backdrop, Vector4 source, float opacity) { - source.W *= opacity; + source = WithW(source, source * opacity); return Atop(backdrop, source, Lighten(backdrop, source)); } @@ -3349,7 +3349,7 @@ internal static partial class PorterDuffFunctions [MethodImpl(MethodImplOptions.AggressiveInlining)] public static Vector4 LightenSrcOver(Vector4 backdrop, Vector4 source, float opacity) { - source.W *= opacity; + source = WithW(source, source * opacity); return Over(backdrop, source, Lighten(backdrop, source)); } @@ -3379,7 +3379,7 @@ internal static partial class PorterDuffFunctions [MethodImpl(MethodImplOptions.AggressiveInlining)] public static Vector4 LightenSrcIn(Vector4 backdrop, Vector4 source, float opacity) { - source.W *= opacity; + source = WithW(source, source * opacity); return In(backdrop, source); } @@ -3405,7 +3405,7 @@ internal static partial class PorterDuffFunctions [MethodImpl(MethodImplOptions.AggressiveInlining)] public static Vector4 LightenSrcOut(Vector4 backdrop, Vector4 source, float opacity) { - source.W *= opacity; + source = WithW(source, source * opacity); return Out(backdrop, source); } @@ -3457,7 +3457,7 @@ internal static partial class PorterDuffFunctions [MethodImpl(MethodImplOptions.AggressiveInlining)] public static Vector4 LightenDestAtop(Vector4 backdrop, Vector4 source, float opacity) { - source.W *= opacity; + source = WithW(source, source * opacity); return Atop(source, backdrop, Lighten(source, backdrop)); } @@ -3487,7 +3487,7 @@ internal static partial class PorterDuffFunctions [MethodImpl(MethodImplOptions.AggressiveInlining)] public static Vector4 LightenDestOver(Vector4 backdrop, Vector4 source, float opacity) { - source.W *= opacity; + source = WithW(source, source * opacity); return Over(source, backdrop, Lighten(source, backdrop)); } @@ -3517,7 +3517,7 @@ internal static partial class PorterDuffFunctions [MethodImpl(MethodImplOptions.AggressiveInlining)] public static Vector4 LightenDestIn(Vector4 backdrop, Vector4 source, float opacity) { - source.W *= opacity; + source = WithW(source, source * opacity); return In(source, backdrop); } @@ -3543,7 +3543,7 @@ internal static partial class PorterDuffFunctions [MethodImpl(MethodImplOptions.AggressiveInlining)] public static Vector4 LightenDestOut(Vector4 backdrop, Vector4 source, float opacity) { - source.W *= opacity; + source = WithW(source, source * opacity); return Out(source, backdrop); } @@ -3569,7 +3569,7 @@ internal static partial class PorterDuffFunctions [MethodImpl(MethodImplOptions.AggressiveInlining)] public static Vector4 LightenXor(Vector4 backdrop, Vector4 source, float opacity) { - source.W *= opacity; + source = WithW(source, source * opacity); return Xor(backdrop, source); } @@ -3595,7 +3595,7 @@ internal static partial class PorterDuffFunctions [MethodImpl(MethodImplOptions.AggressiveInlining)] public static Vector4 LightenClear(Vector4 backdrop, Vector4 source, float opacity) { - source.W *= opacity; + source = WithW(source, source * opacity); return Clear(backdrop, source); } @@ -3838,7 +3838,7 @@ internal static partial class PorterDuffFunctions [MethodImpl(MethodImplOptions.AggressiveInlining)] public static Vector4 OverlaySrc(Vector4 backdrop, Vector4 source, float opacity) { - source.W *= opacity; + source = WithW(source, source * opacity); return source; } @@ -3864,7 +3864,7 @@ internal static partial class PorterDuffFunctions [MethodImpl(MethodImplOptions.AggressiveInlining)] public static Vector4 OverlaySrcAtop(Vector4 backdrop, Vector4 source, float opacity) { - source.W *= opacity; + source = WithW(source, source * opacity); return Atop(backdrop, source, Overlay(backdrop, source)); } @@ -3894,7 +3894,7 @@ internal static partial class PorterDuffFunctions [MethodImpl(MethodImplOptions.AggressiveInlining)] public static Vector4 OverlaySrcOver(Vector4 backdrop, Vector4 source, float opacity) { - source.W *= opacity; + source = WithW(source, source * opacity); return Over(backdrop, source, Overlay(backdrop, source)); } @@ -3924,7 +3924,7 @@ internal static partial class PorterDuffFunctions [MethodImpl(MethodImplOptions.AggressiveInlining)] public static Vector4 OverlaySrcIn(Vector4 backdrop, Vector4 source, float opacity) { - source.W *= opacity; + source = WithW(source, source * opacity); return In(backdrop, source); } @@ -3950,7 +3950,7 @@ internal static partial class PorterDuffFunctions [MethodImpl(MethodImplOptions.AggressiveInlining)] public static Vector4 OverlaySrcOut(Vector4 backdrop, Vector4 source, float opacity) { - source.W *= opacity; + source = WithW(source, source * opacity); return Out(backdrop, source); } @@ -4002,7 +4002,7 @@ internal static partial class PorterDuffFunctions [MethodImpl(MethodImplOptions.AggressiveInlining)] public static Vector4 OverlayDestAtop(Vector4 backdrop, Vector4 source, float opacity) { - source.W *= opacity; + source = WithW(source, source * opacity); return Atop(source, backdrop, Overlay(source, backdrop)); } @@ -4032,7 +4032,7 @@ internal static partial class PorterDuffFunctions [MethodImpl(MethodImplOptions.AggressiveInlining)] public static Vector4 OverlayDestOver(Vector4 backdrop, Vector4 source, float opacity) { - source.W *= opacity; + source = WithW(source, source * opacity); return Over(source, backdrop, Overlay(source, backdrop)); } @@ -4062,7 +4062,7 @@ internal static partial class PorterDuffFunctions [MethodImpl(MethodImplOptions.AggressiveInlining)] public static Vector4 OverlayDestIn(Vector4 backdrop, Vector4 source, float opacity) { - source.W *= opacity; + source = WithW(source, source * opacity); return In(source, backdrop); } @@ -4088,7 +4088,7 @@ internal static partial class PorterDuffFunctions [MethodImpl(MethodImplOptions.AggressiveInlining)] public static Vector4 OverlayDestOut(Vector4 backdrop, Vector4 source, float opacity) { - source.W *= opacity; + source = WithW(source, source * opacity); return Out(source, backdrop); } @@ -4114,7 +4114,7 @@ internal static partial class PorterDuffFunctions [MethodImpl(MethodImplOptions.AggressiveInlining)] public static Vector4 OverlayXor(Vector4 backdrop, Vector4 source, float opacity) { - source.W *= opacity; + source = WithW(source, source * opacity); return Xor(backdrop, source); } @@ -4140,7 +4140,7 @@ internal static partial class PorterDuffFunctions [MethodImpl(MethodImplOptions.AggressiveInlining)] public static Vector4 OverlayClear(Vector4 backdrop, Vector4 source, float opacity) { - source.W *= opacity; + source = WithW(source, source * opacity); return Clear(backdrop, source); } @@ -4383,7 +4383,7 @@ internal static partial class PorterDuffFunctions [MethodImpl(MethodImplOptions.AggressiveInlining)] public static Vector4 HardLightSrc(Vector4 backdrop, Vector4 source, float opacity) { - source.W *= opacity; + source = WithW(source, source * opacity); return source; } @@ -4409,7 +4409,7 @@ internal static partial class PorterDuffFunctions [MethodImpl(MethodImplOptions.AggressiveInlining)] public static Vector4 HardLightSrcAtop(Vector4 backdrop, Vector4 source, float opacity) { - source.W *= opacity; + source = WithW(source, source * opacity); return Atop(backdrop, source, HardLight(backdrop, source)); } @@ -4439,7 +4439,7 @@ internal static partial class PorterDuffFunctions [MethodImpl(MethodImplOptions.AggressiveInlining)] public static Vector4 HardLightSrcOver(Vector4 backdrop, Vector4 source, float opacity) { - source.W *= opacity; + source = WithW(source, source * opacity); return Over(backdrop, source, HardLight(backdrop, source)); } @@ -4469,7 +4469,7 @@ internal static partial class PorterDuffFunctions [MethodImpl(MethodImplOptions.AggressiveInlining)] public static Vector4 HardLightSrcIn(Vector4 backdrop, Vector4 source, float opacity) { - source.W *= opacity; + source = WithW(source, source * opacity); return In(backdrop, source); } @@ -4495,7 +4495,7 @@ internal static partial class PorterDuffFunctions [MethodImpl(MethodImplOptions.AggressiveInlining)] public static Vector4 HardLightSrcOut(Vector4 backdrop, Vector4 source, float opacity) { - source.W *= opacity; + source = WithW(source, source * opacity); return Out(backdrop, source); } @@ -4547,7 +4547,7 @@ internal static partial class PorterDuffFunctions [MethodImpl(MethodImplOptions.AggressiveInlining)] public static Vector4 HardLightDestAtop(Vector4 backdrop, Vector4 source, float opacity) { - source.W *= opacity; + source = WithW(source, source * opacity); return Atop(source, backdrop, HardLight(source, backdrop)); } @@ -4577,7 +4577,7 @@ internal static partial class PorterDuffFunctions [MethodImpl(MethodImplOptions.AggressiveInlining)] public static Vector4 HardLightDestOver(Vector4 backdrop, Vector4 source, float opacity) { - source.W *= opacity; + source = WithW(source, source * opacity); return Over(source, backdrop, HardLight(source, backdrop)); } @@ -4607,7 +4607,7 @@ internal static partial class PorterDuffFunctions [MethodImpl(MethodImplOptions.AggressiveInlining)] public static Vector4 HardLightDestIn(Vector4 backdrop, Vector4 source, float opacity) { - source.W *= opacity; + source = WithW(source, source * opacity); return In(source, backdrop); } @@ -4633,7 +4633,7 @@ internal static partial class PorterDuffFunctions [MethodImpl(MethodImplOptions.AggressiveInlining)] public static Vector4 HardLightDestOut(Vector4 backdrop, Vector4 source, float opacity) { - source.W *= opacity; + source = WithW(source, source * opacity); return Out(source, backdrop); } @@ -4659,7 +4659,7 @@ internal static partial class PorterDuffFunctions [MethodImpl(MethodImplOptions.AggressiveInlining)] public static Vector4 HardLightXor(Vector4 backdrop, Vector4 source, float opacity) { - source.W *= opacity; + source = WithW(source, source * opacity); return Xor(backdrop, source); } @@ -4685,7 +4685,7 @@ internal static partial class PorterDuffFunctions [MethodImpl(MethodImplOptions.AggressiveInlining)] public static Vector4 HardLightClear(Vector4 backdrop, Vector4 source, float opacity) { - source.W *= opacity; + source = WithW(source, source * opacity); return Clear(backdrop, source); } diff --git a/src/ImageSharp/PixelFormats/PixelBlenders/PorterDuffFunctions.Generated.tt b/src/ImageSharp/PixelFormats/PixelBlenders/PorterDuffFunctions.Generated.tt index 34eeb78cbe..69dac875c3 100644 --- a/src/ImageSharp/PixelFormats/PixelBlenders/PorterDuffFunctions.Generated.tt +++ b/src/ImageSharp/PixelFormats/PixelBlenders/PorterDuffFunctions.Generated.tt @@ -33,7 +33,7 @@ internal static partial class PorterDuffFunctions [MethodImpl(MethodImplOptions.AggressiveInlining)] public static Vector4 <#=blender#>Src(Vector4 backdrop, Vector4 source, float opacity) { - source.W *= opacity; + source = WithW(source, source * opacity); return source; } @@ -59,7 +59,7 @@ internal static partial class PorterDuffFunctions [MethodImpl(MethodImplOptions.AggressiveInlining)] public static Vector4 <#=blender#>SrcAtop(Vector4 backdrop, Vector4 source, float opacity) { - source.W *= opacity; + source = WithW(source, source * opacity); return Atop(backdrop, source, <#=blender#>(backdrop, source)); } @@ -89,7 +89,7 @@ internal static partial class PorterDuffFunctions [MethodImpl(MethodImplOptions.AggressiveInlining)] public static Vector4 <#=blender#>SrcOver(Vector4 backdrop, Vector4 source, float opacity) { - source.W *= opacity; + source = WithW(source, source * opacity); return Over(backdrop, source, <#=blender#>(backdrop, source)); } @@ -119,7 +119,7 @@ internal static partial class PorterDuffFunctions [MethodImpl(MethodImplOptions.AggressiveInlining)] public static Vector4 <#=blender#>SrcIn(Vector4 backdrop, Vector4 source, float opacity) { - source.W *= opacity; + source = WithW(source, source * opacity); return In(backdrop, source); } @@ -145,7 +145,7 @@ internal static partial class PorterDuffFunctions [MethodImpl(MethodImplOptions.AggressiveInlining)] public static Vector4 <#=blender#>SrcOut(Vector4 backdrop, Vector4 source, float opacity) { - source.W *= opacity; + source = WithW(source, source * opacity); return Out(backdrop, source); } @@ -197,7 +197,7 @@ internal static partial class PorterDuffFunctions [MethodImpl(MethodImplOptions.AggressiveInlining)] public static Vector4 <#=blender#>DestAtop(Vector4 backdrop, Vector4 source, float opacity) { - source.W *= opacity; + source = WithW(source, source * opacity); return Atop(source, backdrop, <#=blender#>(source, backdrop)); } @@ -227,7 +227,7 @@ internal static partial class PorterDuffFunctions [MethodImpl(MethodImplOptions.AggressiveInlining)] public static Vector4 <#=blender#>DestOver(Vector4 backdrop, Vector4 source, float opacity) { - source.W *= opacity; + source = WithW(source, source * opacity); return Over(source, backdrop, <#=blender#>(source, backdrop)); } @@ -257,7 +257,7 @@ internal static partial class PorterDuffFunctions [MethodImpl(MethodImplOptions.AggressiveInlining)] public static Vector4 <#=blender#>DestIn(Vector4 backdrop, Vector4 source, float opacity) { - source.W *= opacity; + source = WithW(source, source * opacity); return In(source, backdrop); } @@ -283,7 +283,7 @@ internal static partial class PorterDuffFunctions [MethodImpl(MethodImplOptions.AggressiveInlining)] public static Vector4 <#=blender#>DestOut(Vector4 backdrop, Vector4 source, float opacity) { - source.W *= opacity; + source = WithW(source, source * opacity); return Out(source, backdrop); } @@ -309,7 +309,7 @@ internal static partial class PorterDuffFunctions [MethodImpl(MethodImplOptions.AggressiveInlining)] public static Vector4 <#=blender#>Xor(Vector4 backdrop, Vector4 source, float opacity) { - source.W *= opacity; + source = WithW(source, source * opacity); return Xor(backdrop, source); } @@ -335,7 +335,7 @@ internal static partial class PorterDuffFunctions [MethodImpl(MethodImplOptions.AggressiveInlining)] public static Vector4 <#=blender#>Clear(Vector4 backdrop, Vector4 source, float opacity) { - source.W *= opacity; + source = WithW(source, source * opacity); return Clear(backdrop, source); } diff --git a/src/ImageSharp/PixelFormats/PixelBlenders/PorterDuffFunctions.cs b/src/ImageSharp/PixelFormats/PixelBlenders/PorterDuffFunctions.cs index 2d47f1a628..cd85939e03 100644 --- a/src/ImageSharp/PixelFormats/PixelBlenders/PorterDuffFunctions.cs +++ b/src/ImageSharp/PixelFormats/PixelBlenders/PorterDuffFunctions.cs @@ -263,19 +263,22 @@ internal static partial class PorterDuffFunctions public static Vector4 Over(Vector4 destination, Vector4 source, Vector4 blend) { // calculate weights - float blendW = destination.W * source.W; - float dstW = destination.W - blendW; - float srcW = source.W - blendW; + Vector4 sW = PermuteW(source); + Vector4 dW = PermuteW(destination); + + Vector4 blendW = sW * dW; + Vector4 dstW = dW - blendW; + Vector4 srcW = sW - blendW; // calculate final alpha - float alpha = dstW + source.W; + Vector4 alpha = dstW + sW; // calculate final color Vector4 color = (destination * dstW) + (source * srcW) + (blend * blendW); // unpremultiply - color /= MathF.Max(alpha, Constants.Epsilon); - color.W = alpha; + color /= Vector4.Max(alpha, new(Constants.Epsilon)); + color.W = alpha.W; return color; } @@ -322,18 +325,21 @@ internal static partial class PorterDuffFunctions public static Vector4 Atop(Vector4 destination, Vector4 source, Vector4 blend) { // calculate weights - float blendW = destination.W * source.W; - float dstW = destination.W - blendW; + Vector4 sW = PermuteW(source); + Vector4 dW = PermuteW(destination); + + Vector4 blendW = sW * dW; + Vector4 dstW = dW - blendW; // calculate final alpha - float alpha = destination.W; + Vector4 alpha = dW; // calculate final color Vector4 color = (destination * dstW) + (blend * blendW); // unpremultiply - color /= MathF.Max(alpha, Constants.Epsilon); - color.W = alpha; + color /= Vector4.Max(alpha, new(Constants.Epsilon)); + color.W = alpha.W; return color; } @@ -373,11 +379,13 @@ internal static partial class PorterDuffFunctions [MethodImpl(MethodImplOptions.AggressiveInlining)] public static Vector4 In(Vector4 destination, Vector4 source) { - float alpha = destination.W * source.W; + Vector4 sW = PermuteW(source); + Vector4 dW = PermuteW(destination); + Vector4 alpha = dW * sW; Vector4 color = source * alpha; // premultiply - color /= MathF.Max(alpha, Constants.Epsilon); // unpremultiply - color.W = alpha; + color /= Vector4.Max(alpha, new(Constants.Epsilon)); // unpremultiply + color.W = alpha.W; return color; } @@ -411,11 +419,13 @@ internal static partial class PorterDuffFunctions [MethodImpl(MethodImplOptions.AggressiveInlining)] public static Vector4 Out(Vector4 destination, Vector4 source) { - float alpha = (1 - destination.W) * source.W; + Vector4 sW = PermuteW(source); + Vector4 dW = PermuteW(destination); + Vector4 alpha = (Vector4.One - dW) * sW; Vector4 color = source * alpha; // premultiply - color /= MathF.Max(alpha, Constants.Epsilon); // unpremultiply - color.W = alpha; + color /= Vector4.Max(alpha, new(Constants.Epsilon)); // unpremultiply + color.W = alpha.W; return color; } @@ -449,15 +459,18 @@ internal static partial class PorterDuffFunctions [MethodImpl(MethodImplOptions.AggressiveInlining)] public static Vector4 Xor(Vector4 destination, Vector4 source) { - float srcW = 1 - destination.W; - float dstW = 1 - source.W; + Vector4 sW = PermuteW(source); + Vector4 dW = PermuteW(destination); + + Vector4 srcW = Vector4.One - dW; + Vector4 dstW = Vector4.One - sW; - float alpha = (source.W * srcW) + (destination.W * dstW); - Vector4 color = (source.W * source * srcW) + (destination.W * destination * dstW); + Vector4 alpha = (sW * srcW) + (dW * dstW); + Vector4 color = (sW * source * srcW) + (dW * destination * dstW); // unpremultiply - color /= MathF.Max(alpha, Constants.Epsilon); - color.W = alpha; + color /= Vector4.Max(alpha, new(Constants.Epsilon)); + color.W = alpha.W; return color; } @@ -493,4 +506,28 @@ internal static partial class PorterDuffFunctions [MethodImpl(MethodImplOptions.AggressiveInlining)] private static Vector256 Clear(Vector256 backdrop, Vector256 source) => Vector256.Zero; + + [MethodImpl(MethodImplOptions.AggressiveInlining)] + private static Vector4 WithW(Vector4 value, Vector4 w) + { + // TODO: Provide SSE fallback which uses "shuffle" - just pick XYZ from value and W from w + if (Sse41.IsSupported) + { + return Sse41.Insert(value.AsVector128(), w.AsVector128(), 0b11_11_0000).AsVector4(); + } + + value.W = w.W; + return value; + } + + [MethodImpl(MethodImplOptions.AggressiveInlining)] + private static Vector4 PermuteW(Vector4 value) + { + if (Sse.IsSupported) + { + return Sse.Shuffle(value.AsVector128(), value.AsVector128(), 0b11111111).AsVector4(); + } + + return new(value.W); + } } From 78eb2f176473b332fa318ab5ac87d878dba46a43 Mon Sep 17 00:00:00 2001 From: James Jackson-South Date: Mon, 20 Feb 2023 13:28:44 +1000 Subject: [PATCH 23/35] Use WithW --- .../PixelBlenders/PorterDuffFunctions.cs | 20 +++++-------------- 1 file changed, 5 insertions(+), 15 deletions(-) diff --git a/src/ImageSharp/PixelFormats/PixelBlenders/PorterDuffFunctions.cs b/src/ImageSharp/PixelFormats/PixelBlenders/PorterDuffFunctions.cs index cd85939e03..bc7958f857 100644 --- a/src/ImageSharp/PixelFormats/PixelBlenders/PorterDuffFunctions.cs +++ b/src/ImageSharp/PixelFormats/PixelBlenders/PorterDuffFunctions.cs @@ -278,9 +278,7 @@ internal static partial class PorterDuffFunctions // unpremultiply color /= Vector4.Max(alpha, new(Constants.Epsilon)); - color.W = alpha.W; - - return color; + return WithW(color, alpha); } /// @@ -339,9 +337,7 @@ internal static partial class PorterDuffFunctions // unpremultiply color /= Vector4.Max(alpha, new(Constants.Epsilon)); - color.W = alpha.W; - - return color; + return WithW(color, alpha); } /// @@ -385,9 +381,7 @@ internal static partial class PorterDuffFunctions Vector4 color = source * alpha; // premultiply color /= Vector4.Max(alpha, new(Constants.Epsilon)); // unpremultiply - color.W = alpha.W; - - return color; + return WithW(color, alpha); } /// @@ -425,9 +419,7 @@ internal static partial class PorterDuffFunctions Vector4 color = source * alpha; // premultiply color /= Vector4.Max(alpha, new(Constants.Epsilon)); // unpremultiply - color.W = alpha.W; - - return color; + return WithW(color, alpha); } /// @@ -470,9 +462,7 @@ internal static partial class PorterDuffFunctions // unpremultiply color /= Vector4.Max(alpha, new(Constants.Epsilon)); - color.W = alpha.W; - - return color; + return WithW(color, alpha); } /// From ac0d27d9bdda472c373bf312a149dbfc558d1ccc Mon Sep 17 00:00:00 2001 From: James Jackson-South Date: Mon, 20 Feb 2023 14:10:49 +1000 Subject: [PATCH 24/35] Provide Sse fallback for WithW --- .../PixelFormats/PixelBlenders/PorterDuffFunctions.cs | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/src/ImageSharp/PixelFormats/PixelBlenders/PorterDuffFunctions.cs b/src/ImageSharp/PixelFormats/PixelBlenders/PorterDuffFunctions.cs index bc7958f857..baf7d80c0a 100644 --- a/src/ImageSharp/PixelFormats/PixelBlenders/PorterDuffFunctions.cs +++ b/src/ImageSharp/PixelFormats/PixelBlenders/PorterDuffFunctions.cs @@ -500,12 +500,19 @@ internal static partial class PorterDuffFunctions [MethodImpl(MethodImplOptions.AggressiveInlining)] private static Vector4 WithW(Vector4 value, Vector4 w) { - // TODO: Provide SSE fallback which uses "shuffle" - just pick XYZ from value and W from w if (Sse41.IsSupported) { return Sse41.Insert(value.AsVector128(), w.AsVector128(), 0b11_11_0000).AsVector4(); } + if (Sse.IsSupported) + { + // Create tmp as + // Then return (which is ) + Vector128 tmp = Sse.Shuffle(w.AsVector128(), value.AsVector128(), 0b00_10_00_11); + return Sse.Shuffle(value.AsVector128(), tmp, 0b00_10_01_00).AsVector4(); + } + value.W = w.W; return value; } From a04526ff888bb06149a7af9d702e80f7727c3a9f Mon Sep 17 00:00:00 2001 From: James Jackson-South Date: Tue, 21 Feb 2023 15:30:09 +1000 Subject: [PATCH 25/35] Minor cleanup --- src/ImageSharp/ImageFrame{TPixel}.cs | 9 ++++----- src/ImageSharp/IndexedImageFrame{TPixel}.cs | 4 ++-- .../Processing/Extensions/ProcessingExtensions.cs | 12 +++++++----- .../Processing/ProjectiveTransformBuilder.cs | 4 ++-- 4 files changed, 15 insertions(+), 14 deletions(-) diff --git a/src/ImageSharp/ImageFrame{TPixel}.cs b/src/ImageSharp/ImageFrame{TPixel}.cs index becd69a0eb..3734402d30 100644 --- a/src/ImageSharp/ImageFrame{TPixel}.cs +++ b/src/ImageSharp/ImageFrame{TPixel}.cs @@ -144,7 +144,7 @@ public sealed class ImageFrame : ImageFrame, IPixelSource } /// - public Buffer2D PixelBuffer { get; private set; } + public Buffer2D PixelBuffer { get; } /// /// Gets or sets the pixel at the specified position. @@ -309,6 +309,7 @@ public sealed class ImageFrame : ImageFrame, IPixelSource /// Copies the pixels to a of the same size. /// /// The target pixel buffer accessor. + /// ImageFrame{TPixel}.CopyTo(): target must be of the same size! internal void CopyTo(Buffer2D target) { if (this.Size() != target.Size()) @@ -445,14 +446,12 @@ public sealed class ImageFrame : ImageFrame, IPixelSource } [MethodImpl(InliningOptions.ColdPath)] - private static void ThrowArgumentOutOfRangeException(string paramName) - { - throw new ArgumentOutOfRangeException(paramName); - } + private static void ThrowArgumentOutOfRangeException(string paramName) => throw new ArgumentOutOfRangeException(paramName); /// /// A implementing the clone logic for . /// + /// The type of the target pixel format. private readonly struct RowIntervalOperation : IRowIntervalOperation where TPixel2 : unmanaged, IPixel { diff --git a/src/ImageSharp/IndexedImageFrame{TPixel}.cs b/src/ImageSharp/IndexedImageFrame{TPixel}.cs index 12a78a0239..6807e77ad2 100644 --- a/src/ImageSharp/IndexedImageFrame{TPixel}.cs +++ b/src/ImageSharp/IndexedImageFrame{TPixel}.cs @@ -17,8 +17,8 @@ namespace SixLabors.ImageSharp; public sealed class IndexedImageFrame : IPixelSource, IDisposable where TPixel : unmanaged, IPixel { - private Buffer2D pixelBuffer; - private IMemoryOwner paletteOwner; + private readonly Buffer2D pixelBuffer; + private readonly IMemoryOwner paletteOwner; private bool isDisposed; /// diff --git a/src/ImageSharp/Processing/Extensions/ProcessingExtensions.cs b/src/ImageSharp/Processing/Extensions/ProcessingExtensions.cs index 02e688f57f..f830ddfd09 100644 --- a/src/ImageSharp/Processing/Extensions/ProcessingExtensions.cs +++ b/src/ImageSharp/Processing/Extensions/ProcessingExtensions.cs @@ -134,7 +134,7 @@ public static partial class ProcessingExtensions /// The operation is null. /// The source has been disposed. /// The processing operation failed. - public static Image? Clone(this Image source, Action operation) + public static Image Clone(this Image source, Action operation) => Clone(source, source.GetConfiguration(), operation); /// @@ -149,7 +149,7 @@ public static partial class ProcessingExtensions /// The source has been disposed. /// The processing operation failed. /// The new . - public static Image? Clone(this Image source, Configuration configuration, Action operation) + public static Image Clone(this Image source, Configuration configuration, Action operation) { Guard.NotNull(configuration, nameof(configuration)); Guard.NotNull(source, nameof(source)); @@ -158,7 +158,7 @@ public static partial class ProcessingExtensions ProcessingVisitor visitor = new(configuration, operation, false); source.AcceptVisitor(visitor); - return visitor.ResultImage; + return visitor.GetResultImage(); } /// @@ -274,6 +274,8 @@ public static partial class ProcessingExtensions private readonly bool mutate; + private Image? resultImage; + public ProcessingVisitor(Configuration configuration, Action operation, bool mutate) { this.configuration = configuration; @@ -281,7 +283,7 @@ public static partial class ProcessingExtensions this.mutate = mutate; } - public Image? ResultImage { get; private set; } + public Image GetResultImage() => this.resultImage!; public void Visit(Image image) where TPixel : unmanaged, IPixel @@ -290,7 +292,7 @@ public static partial class ProcessingExtensions this.configuration.ImageOperationsProvider.CreateImageProcessingContext(this.configuration, image, this.mutate); this.operation(operationsRunner); - this.ResultImage = operationsRunner.GetResultImage(); + this.resultImage = operationsRunner.GetResultImage(); } } } diff --git a/src/ImageSharp/Processing/ProjectiveTransformBuilder.cs b/src/ImageSharp/Processing/ProjectiveTransformBuilder.cs index 44d6d8e720..ad0888ad77 100644 --- a/src/ImageSharp/Processing/ProjectiveTransformBuilder.cs +++ b/src/ImageSharp/Processing/ProjectiveTransformBuilder.cs @@ -11,7 +11,7 @@ namespace SixLabors.ImageSharp.Processing; /// public class ProjectiveTransformBuilder { - private readonly List> matrixFactories = new List>(); + private readonly List> matrixFactories = new(); /// /// Prepends a matrix that performs a tapering projective transform. @@ -313,7 +313,7 @@ public class ProjectiveTransformBuilder Guard.MustBeGreaterThan(sourceRectangle.Height, 0, nameof(sourceRectangle)); // Translate the origin matrix to cater for source rectangle offsets. - var matrix = Matrix4x4.CreateTranslation(new Vector3(-sourceRectangle.Location, 0)); + Matrix4x4 matrix = Matrix4x4.CreateTranslation(new Vector3(-sourceRectangle.Location, 0)); Size size = sourceRectangle.Size; From 1a9db45a8500799934ae7645d9f50c68f5fdb1fa Mon Sep 17 00:00:00 2001 From: James Jackson-South Date: Tue, 21 Feb 2023 15:47:42 +1000 Subject: [PATCH 26/35] Update Buffer2D to allow disposal testing. --- src/ImageSharp/Memory/Buffer2D{T}.cs | 8 +++++++- .../Image/ImageFrameCollectionTests.Generic.cs | 8 ++++---- 2 files changed, 11 insertions(+), 5 deletions(-) diff --git a/src/ImageSharp/Memory/Buffer2D{T}.cs b/src/ImageSharp/Memory/Buffer2D{T}.cs index 8d6465389f..1173e02e17 100644 --- a/src/ImageSharp/Memory/Buffer2D{T}.cs +++ b/src/ImageSharp/Memory/Buffer2D{T}.cs @@ -55,6 +55,8 @@ public sealed class Buffer2D : IDisposable /// internal MemoryGroup FastMemoryGroup { get; private set; } + internal bool IsDisposed { get; private set; } + /// /// Gets a reference to the element at the specified position. /// @@ -79,7 +81,11 @@ public sealed class Buffer2D : IDisposable /// /// Disposes the instance /// - public void Dispose() => this.FastMemoryGroup.Dispose(); + public void Dispose() + { + this.FastMemoryGroup.Dispose(); + this.IsDisposed = true; + } /// /// Gets a to the row 'y' beginning from the pixel at the first pixel on that row. diff --git a/tests/ImageSharp.Tests/Image/ImageFrameCollectionTests.Generic.cs b/tests/ImageSharp.Tests/Image/ImageFrameCollectionTests.Generic.cs index fba19065b0..62d1ef4db9 100644 --- a/tests/ImageSharp.Tests/Image/ImageFrameCollectionTests.Generic.cs +++ b/tests/ImageSharp.Tests/Image/ImageFrameCollectionTests.Generic.cs @@ -182,12 +182,12 @@ public abstract partial class ImageFrameCollectionTests new[] { imageFrame1, imageFrame2 }); IPixelSource[] framesSnapShot = collection.OfType>().ToArray(); + + Assert.All(framesSnapShot, f => Assert.False(f.PixelBuffer.IsDisposed)); + collection.Dispose(); - Assert.All( - framesSnapShot, - f => // The pixel source of the frame is null after its been disposed. - Assert.Null(f.PixelBuffer)); + Assert.All(framesSnapShot, f => Assert.True(f.PixelBuffer.IsDisposed)); } [Theory] From 9e7cf1611b8d5f50080715cf16f16b4773b42462 Mon Sep 17 00:00:00 2001 From: James Jackson-South Date: Tue, 21 Feb 2023 21:41:20 +1000 Subject: [PATCH 27/35] Use MMShuffle consts everywhere and refactor explicit shuffle components to use them --- .../Helpers/Shuffle/IComponentShuffle.cs | 54 ++-- .../Common/Helpers/Shuffle/IPad3Shuffle4.cs | 17 +- .../Common/Helpers/Shuffle/IShuffle3.cs | 7 +- .../Common/Helpers/Shuffle/IShuffle4Slice3.cs | 15 +- .../Common/Helpers/SimdUtils.Shuffle.cs | 271 +++++++++++++++++- .../Formats/Webp/Lossless/LosslessUtils.cs | 58 ++-- .../Formats/Webp/Lossy/Vp8Encoding.cs | 9 +- .../Color/Bulk/ShuffleFloat4Channel.cs | 5 +- .../Common/SimdUtilsTests.Shuffle.cs | 50 ++-- 9 files changed, 369 insertions(+), 117 deletions(-) diff --git a/src/ImageSharp/Common/Helpers/Shuffle/IComponentShuffle.cs b/src/ImageSharp/Common/Helpers/Shuffle/IComponentShuffle.cs index 45d6e6d13d..b932028149 100644 --- a/src/ImageSharp/Common/Helpers/Shuffle/IComponentShuffle.cs +++ b/src/ImageSharp/Common/Helpers/Shuffle/IComponentShuffle.cs @@ -5,6 +5,7 @@ using System.Buffers.Binary; using System.Numerics; using System.Runtime.CompilerServices; using System.Runtime.InteropServices; +using static SixLabors.ImageSharp.SimdUtils; // The JIT can detect and optimize rotation idioms ROTL (Rotate Left) // and ROTR (Rotate Right) emitting efficient CPU instructions: @@ -18,9 +19,12 @@ namespace SixLabors.ImageSharp; internal interface IComponentShuffle { /// - /// Gets the shuffle control. + /// Shuffles then slices 8-bit integers within 128-bit lanes in + /// using the control and store the results in . /// - byte Control { get; } + /// The source span of bytes. + /// The destination span of bytes. + void ShuffleReduce(ref ReadOnlySpan source, ref Span dest); /// /// Shuffle 8-bit integers within 128-bit lanes in @@ -58,11 +62,15 @@ internal readonly struct DefaultShuffle4 : IShuffle4 this.p2 = p2; this.p1 = p1; this.p0 = p0; - this.Control = SimdUtils.Shuffle.MmShuffle(p3, p2, p1, p0); + this.Control = Shuffle.MmShuffle(p3, p2, p1, p0); } public byte Control { get; } + [MethodImpl(InliningOptions.ShortMethod)] + public void ShuffleReduce(ref ReadOnlySpan source, ref Span dest) + => HwIntrinsics.Shuffle4Reduce(ref source, ref dest, this.Control); + [MethodImpl(InliningOptions.ShortMethod)] public void RunFallbackShuffle(ReadOnlySpan source, Span dest) { @@ -86,11 +94,9 @@ internal readonly struct DefaultShuffle4 : IShuffle4 internal readonly struct WXYZShuffle4 : IShuffle4 { - public byte Control - { - [MethodImpl(InliningOptions.ShortMethod)] - get => SimdUtils.Shuffle.MmShuffle(2, 1, 0, 3); - } + [MethodImpl(InliningOptions.ShortMethod)] + public void ShuffleReduce(ref ReadOnlySpan source, ref Span dest) + => HwIntrinsics.Shuffle4Reduce(ref source, ref dest, Shuffle.MMShuffle2103); [MethodImpl(InliningOptions.ShortMethod)] public void RunFallbackShuffle(ReadOnlySpan source, Span dest) @@ -112,11 +118,9 @@ internal readonly struct WXYZShuffle4 : IShuffle4 internal readonly struct WZYXShuffle4 : IShuffle4 { - public byte Control - { - [MethodImpl(InliningOptions.ShortMethod)] - get => SimdUtils.Shuffle.MmShuffle(0, 1, 2, 3); - } + [MethodImpl(InliningOptions.ShortMethod)] + public void ShuffleReduce(ref ReadOnlySpan source, ref Span dest) + => HwIntrinsics.Shuffle4Reduce(ref source, ref dest, Shuffle.MMShuffle0123); [MethodImpl(InliningOptions.ShortMethod)] public void RunFallbackShuffle(ReadOnlySpan source, Span dest) @@ -138,11 +142,9 @@ internal readonly struct WZYXShuffle4 : IShuffle4 internal readonly struct YZWXShuffle4 : IShuffle4 { - public byte Control - { - [MethodImpl(InliningOptions.ShortMethod)] - get => SimdUtils.Shuffle.MmShuffle(0, 3, 2, 1); - } + [MethodImpl(InliningOptions.ShortMethod)] + public void ShuffleReduce(ref ReadOnlySpan source, ref Span dest) + => HwIntrinsics.Shuffle4Reduce(ref source, ref dest, Shuffle.MMShuffle0321); [MethodImpl(InliningOptions.ShortMethod)] public void RunFallbackShuffle(ReadOnlySpan source, Span dest) @@ -164,11 +166,9 @@ internal readonly struct YZWXShuffle4 : IShuffle4 internal readonly struct ZYXWShuffle4 : IShuffle4 { - public byte Control - { - [MethodImpl(InliningOptions.ShortMethod)] - get => SimdUtils.Shuffle.MmShuffle(3, 0, 1, 2); - } + [MethodImpl(InliningOptions.ShortMethod)] + public void ShuffleReduce(ref ReadOnlySpan source, ref Span dest) + => HwIntrinsics.Shuffle4Reduce(ref source, ref dest, Shuffle.MMShuffle3012); [MethodImpl(InliningOptions.ShortMethod)] public void RunFallbackShuffle(ReadOnlySpan source, Span dest) @@ -197,11 +197,9 @@ internal readonly struct ZYXWShuffle4 : IShuffle4 internal readonly struct XWZYShuffle4 : IShuffle4 { - public byte Control - { - [MethodImpl(InliningOptions.ShortMethod)] - get => SimdUtils.Shuffle.MmShuffle(1, 2, 3, 0); - } + [MethodImpl(InliningOptions.ShortMethod)] + public void ShuffleReduce(ref ReadOnlySpan source, ref Span dest) + => HwIntrinsics.Shuffle4Reduce(ref source, ref dest, Shuffle.MMShuffle1230); [MethodImpl(InliningOptions.ShortMethod)] public void RunFallbackShuffle(ReadOnlySpan source, Span dest) diff --git a/src/ImageSharp/Common/Helpers/Shuffle/IPad3Shuffle4.cs b/src/ImageSharp/Common/Helpers/Shuffle/IPad3Shuffle4.cs index 76cffd82bf..7f99b3a084 100644 --- a/src/ImageSharp/Common/Helpers/Shuffle/IPad3Shuffle4.cs +++ b/src/ImageSharp/Common/Helpers/Shuffle/IPad3Shuffle4.cs @@ -3,6 +3,7 @@ using System.Runtime.CompilerServices; using System.Runtime.InteropServices; +using static SixLabors.ImageSharp.SimdUtils; namespace SixLabors.ImageSharp; @@ -29,11 +30,15 @@ internal readonly struct DefaultPad3Shuffle4 : IPad3Shuffle4 this.p2 = p2; this.p1 = p1; this.p0 = p0; - this.Control = SimdUtils.Shuffle.MmShuffle(p3, p2, p1, p0); + this.Control = Shuffle.MmShuffle(p3, p2, p1, p0); } public byte Control { get; } + [MethodImpl(InliningOptions.ShortMethod)] + public void ShuffleReduce(ref ReadOnlySpan source, ref Span dest) + => HwIntrinsics.Pad3Shuffle4Reduce(ref source, ref dest, this.Control); + [MethodImpl(InliningOptions.ShortMethod)] public void RunFallbackShuffle(ReadOnlySpan source, Span dest) { @@ -51,7 +56,7 @@ internal readonly struct DefaultPad3Shuffle4 : IPad3Shuffle4 for (int i = 0, j = 0; i < source.Length; i += 3, j += 4) { - ref var s = ref Unsafe.Add(ref sBase, i); + ref byte s = ref Unsafe.Add(ref sBase, i); tu = Unsafe.As(ref s) | 0xFF000000; Unsafe.Add(ref dBase, j) = Unsafe.Add(ref t, p0); @@ -64,11 +69,9 @@ internal readonly struct DefaultPad3Shuffle4 : IPad3Shuffle4 internal readonly struct XYZWPad3Shuffle4 : IPad3Shuffle4 { - public byte Control - { - [MethodImpl(InliningOptions.ShortMethod)] - get => SimdUtils.Shuffle.MmShuffle(3, 2, 1, 0); - } + [MethodImpl(InliningOptions.ShortMethod)] + public void ShuffleReduce(ref ReadOnlySpan source, ref Span dest) + => HwIntrinsics.Pad3Shuffle4Reduce(ref source, ref dest, Shuffle.MMShuffle3210); [MethodImpl(InliningOptions.ShortMethod)] public void RunFallbackShuffle(ReadOnlySpan source, Span dest) diff --git a/src/ImageSharp/Common/Helpers/Shuffle/IShuffle3.cs b/src/ImageSharp/Common/Helpers/Shuffle/IShuffle3.cs index 9bee9d15ec..edbd2cd5ea 100644 --- a/src/ImageSharp/Common/Helpers/Shuffle/IShuffle3.cs +++ b/src/ImageSharp/Common/Helpers/Shuffle/IShuffle3.cs @@ -3,6 +3,7 @@ using System.Runtime.CompilerServices; using System.Runtime.InteropServices; +using static SixLabors.ImageSharp.SimdUtils; namespace SixLabors.ImageSharp; @@ -26,11 +27,15 @@ internal readonly struct DefaultShuffle3 : IShuffle3 this.p2 = p2; this.p1 = p1; this.p0 = p0; - this.Control = SimdUtils.Shuffle.MmShuffle(3, p2, p1, p0); + this.Control = Shuffle.MmShuffle(3, p2, p1, p0); } public byte Control { get; } + [MethodImpl(InliningOptions.ShortMethod)] + public void ShuffleReduce(ref ReadOnlySpan source, ref Span dest) + => HwIntrinsics.Shuffle3Reduce(ref source, ref dest, this.Control); + [MethodImpl(InliningOptions.ShortMethod)] public void RunFallbackShuffle(ReadOnlySpan source, Span dest) { diff --git a/src/ImageSharp/Common/Helpers/Shuffle/IShuffle4Slice3.cs b/src/ImageSharp/Common/Helpers/Shuffle/IShuffle4Slice3.cs index 90b77b568e..2179a085be 100644 --- a/src/ImageSharp/Common/Helpers/Shuffle/IShuffle4Slice3.cs +++ b/src/ImageSharp/Common/Helpers/Shuffle/IShuffle4Slice3.cs @@ -3,6 +3,7 @@ using System.Runtime.CompilerServices; using System.Runtime.InteropServices; +using static SixLabors.ImageSharp.SimdUtils; namespace SixLabors.ImageSharp; @@ -27,11 +28,15 @@ internal readonly struct DefaultShuffle4Slice3 : IShuffle4Slice3 this.p2 = p2; this.p1 = p1; this.p0 = p0; - this.Control = SimdUtils.Shuffle.MmShuffle(p3, p2, p1, p0); + this.Control = Shuffle.MmShuffle(p3, p2, p1, p0); } public byte Control { get; } + [MethodImpl(InliningOptions.ShortMethod)] + public void ShuffleReduce(ref ReadOnlySpan source, ref Span dest) + => HwIntrinsics.Shuffle4Slice3Reduce(ref source, ref dest, this.Control); + [MethodImpl(InliningOptions.ShortMethod)] public void RunFallbackShuffle(ReadOnlySpan source, Span dest) { @@ -53,11 +58,9 @@ internal readonly struct DefaultShuffle4Slice3 : IShuffle4Slice3 internal readonly struct XYZWShuffle4Slice3 : IShuffle4Slice3 { - public byte Control - { - [MethodImpl(InliningOptions.ShortMethod)] - get => SimdUtils.Shuffle.MmShuffle(3, 2, 1, 0); - } + [MethodImpl(InliningOptions.ShortMethod)] + public void ShuffleReduce(ref ReadOnlySpan source, ref Span dest) + => HwIntrinsics.Shuffle4Slice3Reduce(ref source, ref dest, Shuffle.MMShuffle3210); [MethodImpl(InliningOptions.ShortMethod)] public void RunFallbackShuffle(ReadOnlySpan source, Span dest) diff --git a/src/ImageSharp/Common/Helpers/SimdUtils.Shuffle.cs b/src/ImageSharp/Common/Helpers/SimdUtils.Shuffle.cs index b91dc2fad2..4ff5f3d5b1 100644 --- a/src/ImageSharp/Common/Helpers/SimdUtils.Shuffle.cs +++ b/src/ImageSharp/Common/Helpers/SimdUtils.Shuffle.cs @@ -37,6 +37,7 @@ internal static partial class SimdUtils /// Shuffle 8-bit integers within 128-bit lanes in /// using the control and store the results in . /// + /// The type of shuffle struct. /// The source span of bytes. /// The destination span of bytes. /// The type of shuffle to perform. @@ -49,7 +50,7 @@ internal static partial class SimdUtils { VerifyShuffle4SpanInput(source, dest); - HwIntrinsics.Shuffle4Reduce(ref source, ref dest, shuffle.Control); + shuffle.ShuffleReduce(ref source, ref dest); // Deal with the remainder: if (source.Length > 0) @@ -62,6 +63,7 @@ internal static partial class SimdUtils /// Shuffle 8-bit integer triplets within 128-bit lanes in /// using the control and store the results in . /// + /// The type of shuffle struct. /// The source span of bytes. /// The destination span of bytes. /// The type of shuffle to perform. @@ -75,7 +77,7 @@ internal static partial class SimdUtils // Source length should be smaller than dest length, and divisible by 3. VerifyShuffle3SpanInput(source, dest); - HwIntrinsics.Shuffle3Reduce(ref source, ref dest, shuffle.Control); + shuffle.ShuffleReduce(ref source, ref dest); // Deal with the remainder: if (source.Length > 0) @@ -88,6 +90,7 @@ internal static partial class SimdUtils /// Pads then shuffles 8-bit integers within 128-bit lanes in /// using the control and store the results in . /// + /// The type of shuffle struct. /// The source span of bytes. /// The destination span of bytes. /// The type of shuffle to perform. @@ -100,7 +103,7 @@ internal static partial class SimdUtils { VerifyPad3Shuffle4SpanInput(source, dest); - HwIntrinsics.Pad3Shuffle4Reduce(ref source, ref dest, shuffle.Control); + shuffle.ShuffleReduce(ref source, ref dest); // Deal with the remainder: if (source.Length > 0) @@ -113,6 +116,7 @@ internal static partial class SimdUtils /// Shuffles then slices 8-bit integers within 128-bit lanes in /// using the control and store the results in . /// + /// The type of shuffle struct. /// The source span of bytes. /// The destination span of bytes. /// The type of shuffle to perform. @@ -125,7 +129,7 @@ internal static partial class SimdUtils { VerifyShuffle4Slice3SpanInput(source, dest); - HwIntrinsics.Shuffle4Slice3Reduce(ref source, ref dest, shuffle.Control); + shuffle.ShuffleReduce(ref source, ref dest); // Deal with the remainder: if (source.Length > 0) @@ -153,7 +157,7 @@ internal static partial class SimdUtils } [Conditional("DEBUG")] - private static void VerifyShuffle4SpanInput(ReadOnlySpan source, Span dest) + internal static void VerifyShuffle4SpanInput(ReadOnlySpan source, Span dest) where T : struct { DebugGuard.IsTrue( @@ -222,6 +226,263 @@ internal static partial class SimdUtils public static class Shuffle { + public const byte MMShuffle0000 = 0b00000000; + public const byte MMShuffle0001 = 0b00000001; + public const byte MMShuffle0002 = 0b00000010; + public const byte MMShuffle0003 = 0b00000011; + public const byte MMShuffle0010 = 0b00000100; + public const byte MMShuffle0011 = 0b00000101; + public const byte MMShuffle0012 = 0b00000110; + public const byte MMShuffle0013 = 0b00000111; + public const byte MMShuffle0020 = 0b00001000; + public const byte MMShuffle0021 = 0b00001001; + public const byte MMShuffle0022 = 0b00001010; + public const byte MMShuffle0023 = 0b00001011; + public const byte MMShuffle0030 = 0b00001100; + public const byte MMShuffle0031 = 0b00001101; + public const byte MMShuffle0032 = 0b00001110; + public const byte MMShuffle0033 = 0b00001111; + public const byte MMShuffle0100 = 0b00010000; + public const byte MMShuffle0101 = 0b00010001; + public const byte MMShuffle0102 = 0b00010010; + public const byte MMShuffle0103 = 0b00010011; + public const byte MMShuffle0110 = 0b00010100; + public const byte MMShuffle0111 = 0b00010101; + public const byte MMShuffle0112 = 0b00010110; + public const byte MMShuffle0113 = 0b00010111; + public const byte MMShuffle0120 = 0b00011000; + public const byte MMShuffle0121 = 0b00011001; + public const byte MMShuffle0122 = 0b00011010; + public const byte MMShuffle0123 = 0b00011011; + public const byte MMShuffle0130 = 0b00011100; + public const byte MMShuffle0131 = 0b00011101; + public const byte MMShuffle0132 = 0b00011110; + public const byte MMShuffle0133 = 0b00011111; + public const byte MMShuffle0200 = 0b00100000; + public const byte MMShuffle0201 = 0b00100001; + public const byte MMShuffle0202 = 0b00100010; + public const byte MMShuffle0203 = 0b00100011; + public const byte MMShuffle0210 = 0b00100100; + public const byte MMShuffle0211 = 0b00100101; + public const byte MMShuffle0212 = 0b00100110; + public const byte MMShuffle0213 = 0b00100111; + public const byte MMShuffle0220 = 0b00101000; + public const byte MMShuffle0221 = 0b00101001; + public const byte MMShuffle0222 = 0b00101010; + public const byte MMShuffle0223 = 0b00101011; + public const byte MMShuffle0230 = 0b00101100; + public const byte MMShuffle0231 = 0b00101101; + public const byte MMShuffle0232 = 0b00101110; + public const byte MMShuffle0233 = 0b00101111; + public const byte MMShuffle0300 = 0b00110000; + public const byte MMShuffle0301 = 0b00110001; + public const byte MMShuffle0302 = 0b00110010; + public const byte MMShuffle0303 = 0b00110011; + public const byte MMShuffle0310 = 0b00110100; + public const byte MMShuffle0311 = 0b00110101; + public const byte MMShuffle0312 = 0b00110110; + public const byte MMShuffle0313 = 0b00110111; + public const byte MMShuffle0320 = 0b00111000; + public const byte MMShuffle0321 = 0b00111001; + public const byte MMShuffle0322 = 0b00111010; + public const byte MMShuffle0323 = 0b00111011; + public const byte MMShuffle0330 = 0b00111100; + public const byte MMShuffle0331 = 0b00111101; + public const byte MMShuffle0332 = 0b00111110; + public const byte MMShuffle0333 = 0b00111111; + public const byte MMShuffle1000 = 0b01000000; + public const byte MMShuffle1001 = 0b01000001; + public const byte MMShuffle1002 = 0b01000010; + public const byte MMShuffle1003 = 0b01000011; + public const byte MMShuffle1010 = 0b01000100; + public const byte MMShuffle1011 = 0b01000101; + public const byte MMShuffle1012 = 0b01000110; + public const byte MMShuffle1013 = 0b01000111; + public const byte MMShuffle1020 = 0b01001000; + public const byte MMShuffle1021 = 0b01001001; + public const byte MMShuffle1022 = 0b01001010; + public const byte MMShuffle1023 = 0b01001011; + public const byte MMShuffle1030 = 0b01001100; + public const byte MMShuffle1031 = 0b01001101; + public const byte MMShuffle1032 = 0b01001110; + public const byte MMShuffle1033 = 0b01001111; + public const byte MMShuffle1100 = 0b01010000; + public const byte MMShuffle1101 = 0b01010001; + public const byte MMShuffle1102 = 0b01010010; + public const byte MMShuffle1103 = 0b01010011; + public const byte MMShuffle1110 = 0b01010100; + public const byte MMShuffle1111 = 0b01010101; + public const byte MMShuffle1112 = 0b01010110; + public const byte MMShuffle1113 = 0b01010111; + public const byte MMShuffle1120 = 0b01011000; + public const byte MMShuffle1121 = 0b01011001; + public const byte MMShuffle1122 = 0b01011010; + public const byte MMShuffle1123 = 0b01011011; + public const byte MMShuffle1130 = 0b01011100; + public const byte MMShuffle1131 = 0b01011101; + public const byte MMShuffle1132 = 0b01011110; + public const byte MMShuffle1133 = 0b01011111; + public const byte MMShuffle1200 = 0b01100000; + public const byte MMShuffle1201 = 0b01100001; + public const byte MMShuffle1202 = 0b01100010; + public const byte MMShuffle1203 = 0b01100011; + public const byte MMShuffle1210 = 0b01100100; + public const byte MMShuffle1211 = 0b01100101; + public const byte MMShuffle1212 = 0b01100110; + public const byte MMShuffle1213 = 0b01100111; + public const byte MMShuffle1220 = 0b01101000; + public const byte MMShuffle1221 = 0b01101001; + public const byte MMShuffle1222 = 0b01101010; + public const byte MMShuffle1223 = 0b01101011; + public const byte MMShuffle1230 = 0b01101100; + public const byte MMShuffle1231 = 0b01101101; + public const byte MMShuffle1232 = 0b01101110; + public const byte MMShuffle1233 = 0b01101111; + public const byte MMShuffle1300 = 0b01110000; + public const byte MMShuffle1301 = 0b01110001; + public const byte MMShuffle1302 = 0b01110010; + public const byte MMShuffle1303 = 0b01110011; + public const byte MMShuffle1310 = 0b01110100; + public const byte MMShuffle1311 = 0b01110101; + public const byte MMShuffle1312 = 0b01110110; + public const byte MMShuffle1313 = 0b01110111; + public const byte MMShuffle1320 = 0b01111000; + public const byte MMShuffle1321 = 0b01111001; + public const byte MMShuffle1322 = 0b01111010; + public const byte MMShuffle1323 = 0b01111011; + public const byte MMShuffle1330 = 0b01111100; + public const byte MMShuffle1331 = 0b01111101; + public const byte MMShuffle1332 = 0b01111110; + public const byte MMShuffle1333 = 0b01111111; + public const byte MMShuffle2000 = 0b10000000; + public const byte MMShuffle2001 = 0b10000001; + public const byte MMShuffle2002 = 0b10000010; + public const byte MMShuffle2003 = 0b10000011; + public const byte MMShuffle2010 = 0b10000100; + public const byte MMShuffle2011 = 0b10000101; + public const byte MMShuffle2012 = 0b10000110; + public const byte MMShuffle2013 = 0b10000111; + public const byte MMShuffle2020 = 0b10001000; + public const byte MMShuffle2021 = 0b10001001; + public const byte MMShuffle2022 = 0b10001010; + public const byte MMShuffle2023 = 0b10001011; + public const byte MMShuffle2030 = 0b10001100; + public const byte MMShuffle2031 = 0b10001101; + public const byte MMShuffle2032 = 0b10001110; + public const byte MMShuffle2033 = 0b10001111; + public const byte MMShuffle2100 = 0b10010000; + public const byte MMShuffle2101 = 0b10010001; + public const byte MMShuffle2102 = 0b10010010; + public const byte MMShuffle2103 = 0b10010011; + public const byte MMShuffle2110 = 0b10010100; + public const byte MMShuffle2111 = 0b10010101; + public const byte MMShuffle2112 = 0b10010110; + public const byte MMShuffle2113 = 0b10010111; + public const byte MMShuffle2120 = 0b10011000; + public const byte MMShuffle2121 = 0b10011001; + public const byte MMShuffle2122 = 0b10011010; + public const byte MMShuffle2123 = 0b10011011; + public const byte MMShuffle2130 = 0b10011100; + public const byte MMShuffle2131 = 0b10011101; + public const byte MMShuffle2132 = 0b10011110; + public const byte MMShuffle2133 = 0b10011111; + public const byte MMShuffle2200 = 0b10100000; + public const byte MMShuffle2201 = 0b10100001; + public const byte MMShuffle2202 = 0b10100010; + public const byte MMShuffle2203 = 0b10100011; + public const byte MMShuffle2210 = 0b10100100; + public const byte MMShuffle2211 = 0b10100101; + public const byte MMShuffle2212 = 0b10100110; + public const byte MMShuffle2213 = 0b10100111; + public const byte MMShuffle2220 = 0b10101000; + public const byte MMShuffle2221 = 0b10101001; + public const byte MMShuffle2222 = 0b10101010; + public const byte MMShuffle2223 = 0b10101011; + public const byte MMShuffle2230 = 0b10101100; + public const byte MMShuffle2231 = 0b10101101; + public const byte MMShuffle2232 = 0b10101110; + public const byte MMShuffle2233 = 0b10101111; + public const byte MMShuffle2300 = 0b10110000; + public const byte MMShuffle2301 = 0b10110001; + public const byte MMShuffle2302 = 0b10110010; + public const byte MMShuffle2303 = 0b10110011; + public const byte MMShuffle2310 = 0b10110100; + public const byte MMShuffle2311 = 0b10110101; + public const byte MMShuffle2312 = 0b10110110; + public const byte MMShuffle2313 = 0b10110111; + public const byte MMShuffle2320 = 0b10111000; + public const byte MMShuffle2321 = 0b10111001; + public const byte MMShuffle2322 = 0b10111010; + public const byte MMShuffle2323 = 0b10111011; + public const byte MMShuffle2330 = 0b10111100; + public const byte MMShuffle2331 = 0b10111101; + public const byte MMShuffle2332 = 0b10111110; + public const byte MMShuffle2333 = 0b10111111; + public const byte MMShuffle3000 = 0b11000000; + public const byte MMShuffle3001 = 0b11000001; + public const byte MMShuffle3002 = 0b11000010; + public const byte MMShuffle3003 = 0b11000011; + public const byte MMShuffle3010 = 0b11000100; + public const byte MMShuffle3011 = 0b11000101; + public const byte MMShuffle3012 = 0b11000110; + public const byte MMShuffle3013 = 0b11000111; + public const byte MMShuffle3020 = 0b11001000; + public const byte MMShuffle3021 = 0b11001001; + public const byte MMShuffle3022 = 0b11001010; + public const byte MMShuffle3023 = 0b11001011; + public const byte MMShuffle3030 = 0b11001100; + public const byte MMShuffle3031 = 0b11001101; + public const byte MMShuffle3032 = 0b11001110; + public const byte MMShuffle3033 = 0b11001111; + public const byte MMShuffle3100 = 0b11010000; + public const byte MMShuffle3101 = 0b11010001; + public const byte MMShuffle3102 = 0b11010010; + public const byte MMShuffle3103 = 0b11010011; + public const byte MMShuffle3110 = 0b11010100; + public const byte MMShuffle3111 = 0b11010101; + public const byte MMShuffle3112 = 0b11010110; + public const byte MMShuffle3113 = 0b11010111; + public const byte MMShuffle3120 = 0b11011000; + public const byte MMShuffle3121 = 0b11011001; + public const byte MMShuffle3122 = 0b11011010; + public const byte MMShuffle3123 = 0b11011011; + public const byte MMShuffle3130 = 0b11011100; + public const byte MMShuffle3131 = 0b11011101; + public const byte MMShuffle3132 = 0b11011110; + public const byte MMShuffle3133 = 0b11011111; + public const byte MMShuffle3200 = 0b11100000; + public const byte MMShuffle3201 = 0b11100001; + public const byte MMShuffle3202 = 0b11100010; + public const byte MMShuffle3203 = 0b11100011; + public const byte MMShuffle3210 = 0b11100100; + public const byte MMShuffle3211 = 0b11100101; + public const byte MMShuffle3212 = 0b11100110; + public const byte MMShuffle3213 = 0b11100111; + public const byte MMShuffle3220 = 0b11101000; + public const byte MMShuffle3221 = 0b11101001; + public const byte MMShuffle3222 = 0b11101010; + public const byte MMShuffle3223 = 0b11101011; + public const byte MMShuffle3230 = 0b11101100; + public const byte MMShuffle3231 = 0b11101101; + public const byte MMShuffle3232 = 0b11101110; + public const byte MMShuffle3233 = 0b11101111; + public const byte MMShuffle3300 = 0b11110000; + public const byte MMShuffle3301 = 0b11110001; + public const byte MMShuffle3302 = 0b11110010; + public const byte MMShuffle3303 = 0b11110011; + public const byte MMShuffle3310 = 0b11110100; + public const byte MMShuffle3311 = 0b11110101; + public const byte MMShuffle3312 = 0b11110110; + public const byte MMShuffle3313 = 0b11110111; + public const byte MMShuffle3320 = 0b11111000; + public const byte MMShuffle3321 = 0b11111001; + public const byte MMShuffle3322 = 0b11111010; + public const byte MMShuffle3323 = 0b11111011; + public const byte MMShuffle3330 = 0b11111100; + public const byte MMShuffle3331 = 0b11111101; + public const byte MMShuffle3332 = 0b11111110; + public const byte MMShuffle3333 = 0b11111111; + [MethodImpl(InliningOptions.ShortMethod)] public static byte MmShuffle(byte p3, byte p2, byte p1, byte p0) => (byte)((p3 << 6) | (p2 << 4) | (p1 << 2) | p0); diff --git a/src/ImageSharp/Formats/Webp/Lossless/LosslessUtils.cs b/src/ImageSharp/Formats/Webp/Lossless/LosslessUtils.cs index 5d9c207096..ac92ce6a6a 100644 --- a/src/ImageSharp/Formats/Webp/Lossless/LosslessUtils.cs +++ b/src/ImageSharp/Formats/Webp/Lossless/LosslessUtils.cs @@ -96,7 +96,7 @@ internal static unsafe class LosslessUtils { if (Avx2.IsSupported) { - var addGreenToBlueAndRedMaskAvx2 = Vector256.Create(1, 255, 1, 255, 5, 255, 5, 255, 9, 255, 9, 255, 13, 255, 13, 255, 17, 255, 17, 255, 21, 255, 21, 255, 25, 255, 25, 255, 29, 255, 29, 255); + Vector256 addGreenToBlueAndRedMaskAvx2 = Vector256.Create(1, 255, 1, 255, 5, 255, 5, 255, 9, 255, 9, 255, 13, 255, 13, 255, 17, 255, 17, 255, 21, 255, 21, 255, 25, 255, 25, 255, 29, 255, 29, 255); int numPixels = pixelData.Length; nint i; for (i = 0; i <= numPixels - 8; i += 8) @@ -115,7 +115,7 @@ internal static unsafe class LosslessUtils } else if (Ssse3.IsSupported) { - var addGreenToBlueAndRedMaskSsse3 = Vector128.Create(1, 255, 1, 255, 5, 255, 5, 255, 9, 255, 9, 255, 13, 255, 13, 255); + Vector128 addGreenToBlueAndRedMaskSsse3 = Vector128.Create(1, 255, 1, 255, 5, 255, 5, 255, 9, 255, 9, 255, 13, 255, 13, 255); int numPixels = pixelData.Length; nint i; for (i = 0; i <= numPixels - 4; i += 4) @@ -138,13 +138,11 @@ internal static unsafe class LosslessUtils nint i; for (i = 0; i <= numPixels - 4; i += 4) { - const byte mmShuffle_2200 = 0b_10_10_00_00; - ref uint pos = ref Unsafe.Add(ref MemoryMarshal.GetReference(pixelData), i); Vector128 input = Unsafe.As>(ref pos).AsByte(); Vector128 a = Sse2.ShiftRightLogical(input.AsUInt16(), 8); // 0 a 0 g - Vector128 b = Sse2.ShuffleLow(a, mmShuffle_2200); - Vector128 c = Sse2.ShuffleHigh(b, mmShuffle_2200); // 0g0g + Vector128 b = Sse2.ShuffleLow(a, SimdUtils.Shuffle.MMShuffle2200); + Vector128 c = Sse2.ShuffleHigh(b, SimdUtils.Shuffle.MMShuffle2200); // 0g0g Vector128 output = Sse2.Add(input.AsByte(), c.AsByte()); Unsafe.As>(ref pos) = output.AsUInt32(); } @@ -178,7 +176,7 @@ internal static unsafe class LosslessUtils { if (Avx2.IsSupported) { - var subtractGreenFromBlueAndRedMaskAvx2 = Vector256.Create(1, 255, 1, 255, 5, 255, 5, 255, 9, 255, 9, 255, 13, 255, 13, 255, 17, 255, 17, 255, 21, 255, 21, 255, 25, 255, 25, 255, 29, 255, 29, 255); + Vector256 subtractGreenFromBlueAndRedMaskAvx2 = Vector256.Create(1, 255, 1, 255, 5, 255, 5, 255, 9, 255, 9, 255, 13, 255, 13, 255, 17, 255, 17, 255, 21, 255, 21, 255, 25, 255, 25, 255, 29, 255, 29, 255); int numPixels = pixelData.Length; nint i; for (i = 0; i <= numPixels - 8; i += 8) @@ -197,7 +195,7 @@ internal static unsafe class LosslessUtils } else if (Ssse3.IsSupported) { - var subtractGreenFromBlueAndRedMaskSsse3 = Vector128.Create(1, 255, 1, 255, 5, 255, 5, 255, 9, 255, 9, 255, 13, 255, 13, 255); + Vector128 subtractGreenFromBlueAndRedMaskSsse3 = Vector128.Create(1, 255, 1, 255, 5, 255, 5, 255, 9, 255, 9, 255, 13, 255, 13, 255); int numPixels = pixelData.Length; nint i; for (i = 0; i <= numPixels - 4; i += 4) @@ -220,13 +218,11 @@ internal static unsafe class LosslessUtils nint i; for (i = 0; i <= numPixels - 4; i += 4) { - const byte mmShuffle_2200 = 0b_10_10_00_00; - ref uint pos = ref Unsafe.Add(ref MemoryMarshal.GetReference(pixelData), i); Vector128 input = Unsafe.As>(ref pos).AsByte(); Vector128 a = Sse2.ShiftRightLogical(input.AsUInt16(), 8); // 0 a 0 g - Vector128 b = Sse2.ShuffleLow(a, mmShuffle_2200); - Vector128 c = Sse2.ShuffleHigh(b, mmShuffle_2200); // 0g0g + Vector128 b = Sse2.ShuffleLow(a, SimdUtils.Shuffle.MMShuffle2200); + Vector128 c = Sse2.ShuffleHigh(b, SimdUtils.Shuffle.MMShuffle2200); // 0g0g Vector128 output = Sse2.Subtract(input.AsByte(), c.AsByte()); Unsafe.As>(ref pos) = output.AsUInt32(); } @@ -334,7 +330,7 @@ internal static unsafe class LosslessUtils while (y < yEnd) { int predRowIdx = predRowIdxStart; - var m = default(Vp8LMultipliers); + Vp8LMultipliers m = default; int srcSafeEnd = pixelPos + safeWidth; int srcEnd = pixelPos + width; while (pixelPos < srcSafeEnd) @@ -371,21 +367,19 @@ internal static unsafe class LosslessUtils { if (Avx2.IsSupported && numPixels >= 8) { - var transformColorAlphaGreenMask256 = Vector256.Create(0, 255, 0, 255, 0, 255, 0, 255, 0, 255, 0, 255, 0, 255, 0, 255, 0, 255, 0, 255, 0, 255, 0, 255, 0, 255, 0, 255, 0, 255, 0, 255); - var transformColorRedBlueMask256 = Vector256.Create(255, 0, 255, 0, 255, 0, 255, 0, 255, 0, 255, 0, 255, 0, 255, 0, 255, 0, 255, 0, 255, 0, 255, 0, 255, 0, 255, 0, 255, 0, 255, 0); + Vector256 transformColorAlphaGreenMask256 = Vector256.Create(0, 255, 0, 255, 0, 255, 0, 255, 0, 255, 0, 255, 0, 255, 0, 255, 0, 255, 0, 255, 0, 255, 0, 255, 0, 255, 0, 255, 0, 255, 0, 255); + Vector256 transformColorRedBlueMask256 = Vector256.Create(255, 0, 255, 0, 255, 0, 255, 0, 255, 0, 255, 0, 255, 0, 255, 0, 255, 0, 255, 0, 255, 0, 255, 0, 255, 0, 255, 0, 255, 0, 255, 0); Vector256 multsrb = MkCst32(Cst5b(m.GreenToRed), Cst5b(m.GreenToBlue)); Vector256 multsb2 = MkCst32(Cst5b(m.RedToBlue), 0); nint idx; for (idx = 0; idx <= numPixels - 8; idx += 8) { - const byte mmShuffle_2200 = 0b_10_10_00_00; - ref uint pos = ref Unsafe.Add(ref MemoryMarshal.GetReference(pixelData), idx); Vector256 input = Unsafe.As>(ref pos); Vector256 a = Avx2.And(input.AsByte(), transformColorAlphaGreenMask256); - Vector256 b = Avx2.ShuffleLow(a.AsInt16(), mmShuffle_2200); - Vector256 c = Avx2.ShuffleHigh(b.AsInt16(), mmShuffle_2200); + Vector256 b = Avx2.ShuffleLow(a.AsInt16(), SimdUtils.Shuffle.MMShuffle2200); + Vector256 c = Avx2.ShuffleHigh(b.AsInt16(), SimdUtils.Shuffle.MMShuffle2200); Vector256 d = Avx2.MultiplyHigh(c.AsInt16(), multsrb.AsInt16()); Vector256 e = Avx2.ShiftLeftLogical(input.AsInt16(), 8); Vector256 f = Avx2.MultiplyHigh(e.AsInt16(), multsb2.AsInt16()); @@ -403,20 +397,18 @@ internal static unsafe class LosslessUtils } else if (Sse2.IsSupported) { - var transformColorAlphaGreenMask = Vector128.Create(0, 255, 0, 255, 0, 255, 0, 255, 0, 255, 0, 255, 0, 255, 0, 255); - var transformColorRedBlueMask = Vector128.Create(255, 0, 255, 0, 255, 0, 255, 0, 255, 0, 255, 0, 255, 0, 255, 0); + Vector128 transformColorAlphaGreenMask = Vector128.Create(0, 255, 0, 255, 0, 255, 0, 255, 0, 255, 0, 255, 0, 255, 0, 255); + Vector128 transformColorRedBlueMask = Vector128.Create(255, 0, 255, 0, 255, 0, 255, 0, 255, 0, 255, 0, 255, 0, 255, 0); Vector128 multsrb = MkCst16(Cst5b(m.GreenToRed), Cst5b(m.GreenToBlue)); Vector128 multsb2 = MkCst16(Cst5b(m.RedToBlue), 0); nint idx; for (idx = 0; idx <= numPixels - 4; idx += 4) { - const byte mmShuffle_2200 = 0b_10_10_00_00; - ref uint pos = ref Unsafe.Add(ref MemoryMarshal.GetReference(pixelData), idx); Vector128 input = Unsafe.As>(ref pos); Vector128 a = Sse2.And(input.AsByte(), transformColorAlphaGreenMask); - Vector128 b = Sse2.ShuffleLow(a.AsInt16(), mmShuffle_2200); - Vector128 c = Sse2.ShuffleHigh(b.AsInt16(), mmShuffle_2200); + Vector128 b = Sse2.ShuffleLow(a.AsInt16(), SimdUtils.Shuffle.MMShuffle2200); + Vector128 c = Sse2.ShuffleHigh(b.AsInt16(), SimdUtils.Shuffle.MMShuffle2200); Vector128 d = Sse2.MultiplyHigh(c.AsInt16(), multsrb.AsInt16()); Vector128 e = Sse2.ShiftLeftLogical(input.AsInt16(), 8); Vector128 f = Sse2.MultiplyHigh(e.AsInt16(), multsb2.AsInt16()); @@ -465,19 +457,17 @@ internal static unsafe class LosslessUtils { if (Avx2.IsSupported && pixelData.Length >= 8) { - var transformColorInverseAlphaGreenMask256 = Vector256.Create(0, 255, 0, 255, 0, 255, 0, 255, 0, 255, 0, 255, 0, 255, 0, 255, 0, 255, 0, 255, 0, 255, 0, 255, 0, 255, 0, 255, 0, 255, 0, 255); + Vector256 transformColorInverseAlphaGreenMask256 = Vector256.Create(0, 255, 0, 255, 0, 255, 0, 255, 0, 255, 0, 255, 0, 255, 0, 255, 0, 255, 0, 255, 0, 255, 0, 255, 0, 255, 0, 255, 0, 255, 0, 255); Vector256 multsrb = MkCst32(Cst5b(m.GreenToRed), Cst5b(m.GreenToBlue)); Vector256 multsb2 = MkCst32(Cst5b(m.RedToBlue), 0); nint idx; for (idx = 0; idx <= pixelData.Length - 8; idx += 8) { - const byte mmShuffle_2200 = 0b_10_10_00_00; - ref uint pos = ref Unsafe.Add(ref MemoryMarshal.GetReference(pixelData), idx); Vector256 input = Unsafe.As>(ref pos); Vector256 a = Avx2.And(input.AsByte(), transformColorInverseAlphaGreenMask256); - Vector256 b = Avx2.ShuffleLow(a.AsInt16(), mmShuffle_2200); - Vector256 c = Avx2.ShuffleHigh(b.AsInt16(), mmShuffle_2200); + Vector256 b = Avx2.ShuffleLow(a.AsInt16(), SimdUtils.Shuffle.MMShuffle2200); + Vector256 c = Avx2.ShuffleHigh(b.AsInt16(), SimdUtils.Shuffle.MMShuffle2200); Vector256 d = Avx2.MultiplyHigh(c.AsInt16(), multsrb.AsInt16()); Vector256 e = Avx2.Add(input.AsByte(), d.AsByte()); Vector256 f = Avx2.ShiftLeftLogical(e.AsInt16(), 8); @@ -496,20 +486,18 @@ internal static unsafe class LosslessUtils } else if (Sse2.IsSupported) { - var transformColorInverseAlphaGreenMask = Vector128.Create(0, 255, 0, 255, 0, 255, 0, 255, 0, 255, 0, 255, 0, 255, 0, 255); + Vector128 transformColorInverseAlphaGreenMask = Vector128.Create(0, 255, 0, 255, 0, 255, 0, 255, 0, 255, 0, 255, 0, 255, 0, 255); Vector128 multsrb = MkCst16(Cst5b(m.GreenToRed), Cst5b(m.GreenToBlue)); Vector128 multsb2 = MkCst16(Cst5b(m.RedToBlue), 0); nint idx; for (idx = 0; idx <= pixelData.Length - 4; idx += 4) { - const byte mmShuffle_2200 = 0b_10_10_00_00; - ref uint pos = ref Unsafe.Add(ref MemoryMarshal.GetReference(pixelData), idx); Vector128 input = Unsafe.As>(ref pos); Vector128 a = Sse2.And(input.AsByte(), transformColorInverseAlphaGreenMask); - Vector128 b = Sse2.ShuffleLow(a.AsInt16(), mmShuffle_2200); - Vector128 c = Sse2.ShuffleHigh(b.AsInt16(), mmShuffle_2200); + Vector128 b = Sse2.ShuffleLow(a.AsInt16(), SimdUtils.Shuffle.MMShuffle2200); + Vector128 c = Sse2.ShuffleHigh(b.AsInt16(), SimdUtils.Shuffle.MMShuffle2200); Vector128 d = Sse2.MultiplyHigh(c.AsInt16(), multsrb.AsInt16()); Vector128 e = Sse2.Add(input.AsByte(), d.AsByte()); Vector128 f = Sse2.ShiftLeftLogical(e.AsInt16(), 8); diff --git a/src/ImageSharp/Formats/Webp/Lossy/Vp8Encoding.cs b/src/ImageSharp/Formats/Webp/Lossy/Vp8Encoding.cs index cc263657f1..82f00e8760 100644 --- a/src/ImageSharp/Formats/Webp/Lossy/Vp8Encoding.cs +++ b/src/ImageSharp/Formats/Webp/Lossy/Vp8Encoding.cs @@ -521,9 +521,8 @@ internal static unsafe class Vp8Encoding { // *in01 = 00 01 10 11 02 03 12 13 // *in23 = 20 21 30 31 22 23 32 33 - const byte mmShuffle_2301 = 0b_10_11_00_01; - Vector128 shuf01_p = Sse2.ShuffleHigh(row01, mmShuffle_2301); - Vector128 shuf32_p = Sse2.ShuffleHigh(row23, mmShuffle_2301); + Vector128 shuf01_p = Sse2.ShuffleHigh(row01, SimdUtils.Shuffle.MMShuffle2301); + Vector128 shuf32_p = Sse2.ShuffleHigh(row23, SimdUtils.Shuffle.MMShuffle2301); // 00 01 10 11 03 02 13 12 // 20 21 30 31 23 22 33 32 @@ -555,9 +554,7 @@ internal static unsafe class Vp8Encoding Vector128 shi = Sse2.UnpackHigh(s03, s12); // 2 3 2 3 2 3 Vector128 v23 = Sse2.UnpackHigh(slo.AsInt32(), shi.AsInt32()); out01 = Sse2.UnpackLow(slo.AsInt32(), shi.AsInt32()); - - const byte mmShuffle_1032 = 0b_01_00_11_10; - out32 = Sse2.Shuffle(v23, mmShuffle_1032); + out32 = Sse2.Shuffle(v23, SimdUtils.Shuffle.MMShuffle1032); } public static void FTransformPass2SSE2(Vector128 v01, Vector128 v32, Span output) diff --git a/tests/ImageSharp.Benchmarks/Color/Bulk/ShuffleFloat4Channel.cs b/tests/ImageSharp.Benchmarks/Color/Bulk/ShuffleFloat4Channel.cs index bdeb880828..ec23364caa 100644 --- a/tests/ImageSharp.Benchmarks/Color/Bulk/ShuffleFloat4Channel.cs +++ b/tests/ImageSharp.Benchmarks/Color/Bulk/ShuffleFloat4Channel.cs @@ -9,7 +9,6 @@ namespace SixLabors.ImageSharp.Benchmarks.ColorSpaces.Bulk; [Config(typeof(Config.HwIntrinsics_SSE_AVX))] public class ShuffleFloat4Channel { - private static readonly byte Control = default(WXYZShuffle4).Control; private float[] source; private float[] destination; @@ -25,9 +24,7 @@ public class ShuffleFloat4Channel [Benchmark] public void Shuffle4Channel() - { - SimdUtils.Shuffle4(this.source, this.destination, Control); - } + => SimdUtils.Shuffle4(this.source, this.destination, SimdUtils.Shuffle.MMShuffle2103); } // 2020-10-29 diff --git a/tests/ImageSharp.Tests/Common/SimdUtilsTests.Shuffle.cs b/tests/ImageSharp.Tests/Common/SimdUtilsTests.Shuffle.cs index 9727731b6e..2c4989f387 100644 --- a/tests/ImageSharp.Tests/Common/SimdUtilsTests.Shuffle.cs +++ b/tests/ImageSharp.Tests/Common/SimdUtilsTests.Shuffle.cs @@ -16,7 +16,7 @@ public partial class SimdUtilsTests // No need to test multiple shuffle controls as the // pipeline is always the same. int size = FeatureTestRunner.Deserialize(serialized); - byte control = default(WZYXShuffle4).Control; + const byte control = SimdUtils.Shuffle.MMShuffle0123; TestShuffleFloat4Channel( size, @@ -45,39 +45,39 @@ public partial class SimdUtilsTests TestShuffleByte4Channel( size, (s, d) => SimdUtils.Shuffle4(s.Span, d.Span, wxyz), - wxyz.Control); + SimdUtils.Shuffle.MMShuffle2103); WZYXShuffle4 wzyx = default; TestShuffleByte4Channel( size, (s, d) => SimdUtils.Shuffle4(s.Span, d.Span, wzyx), - wzyx.Control); + SimdUtils.Shuffle.MMShuffle0123); YZWXShuffle4 yzwx = default; TestShuffleByte4Channel( size, (s, d) => SimdUtils.Shuffle4(s.Span, d.Span, yzwx), - yzwx.Control); + SimdUtils.Shuffle.MMShuffle0321); ZYXWShuffle4 zyxw = default; TestShuffleByte4Channel( size, (s, d) => SimdUtils.Shuffle4(s.Span, d.Span, zyxw), - zyxw.Control); + SimdUtils.Shuffle.MMShuffle3012); - var xwyz = new DefaultShuffle4(2, 1, 3, 0); + DefaultShuffle4 xwyz = new(2, 1, 3, 0); TestShuffleByte4Channel( size, (s, d) => SimdUtils.Shuffle4(s.Span, d.Span, xwyz), xwyz.Control); - var yyyy = new DefaultShuffle4(1, 1, 1, 1); + DefaultShuffle4 yyyy = new(1, 1, 1, 1); TestShuffleByte4Channel( size, (s, d) => SimdUtils.Shuffle4(s.Span, d.Span, yyyy), yyyy.Control); - var wwww = new DefaultShuffle4(3, 3, 3, 3); + DefaultShuffle4 wwww = new(3, 3, 3, 3); TestShuffleByte4Channel( size, (s, d) => SimdUtils.Shuffle4(s.Span, d.Span, wwww), @@ -101,25 +101,25 @@ public partial class SimdUtilsTests // These cannot be expressed as a theory as you cannot // use RemoteExecutor within generic methods nor pass // IShuffle3 to the generic utils method. - var zyx = new DefaultShuffle3(0, 1, 2); + DefaultShuffle3 zyx = new(0, 1, 2); TestShuffleByte3Channel( size, (s, d) => SimdUtils.Shuffle3(s.Span, d.Span, zyx), zyx.Control); - var xyz = new DefaultShuffle3(2, 1, 0); + DefaultShuffle3 xyz = new(2, 1, 0); TestShuffleByte3Channel( size, (s, d) => SimdUtils.Shuffle3(s.Span, d.Span, xyz), xyz.Control); - var yyy = new DefaultShuffle3(1, 1, 1); + DefaultShuffle3 yyy = new(1, 1, 1); TestShuffleByte3Channel( size, (s, d) => SimdUtils.Shuffle3(s.Span, d.Span, yyy), yyy.Control); - var zzz = new DefaultShuffle3(2, 2, 2); + DefaultShuffle3 zzz = new(2, 2, 2); TestShuffleByte3Channel( size, (s, d) => SimdUtils.Shuffle3(s.Span, d.Span, zzz), @@ -147,21 +147,21 @@ public partial class SimdUtilsTests TestPad3Shuffle4Channel( size, (s, d) => SimdUtils.Pad3Shuffle4(s.Span, d.Span, xyzw), - xyzw.Control); + SimdUtils.Shuffle.MMShuffle3210); - var xwyz = new DefaultPad3Shuffle4(2, 1, 3, 0); + DefaultPad3Shuffle4 xwyz = new(2, 1, 3, 0); TestPad3Shuffle4Channel( size, (s, d) => SimdUtils.Pad3Shuffle4(s.Span, d.Span, xwyz), xwyz.Control); - var yyyy = new DefaultPad3Shuffle4(1, 1, 1, 1); + DefaultPad3Shuffle4 yyyy = new(1, 1, 1, 1); TestPad3Shuffle4Channel( size, (s, d) => SimdUtils.Pad3Shuffle4(s.Span, d.Span, yyyy), yyyy.Control); - var wwww = new DefaultPad3Shuffle4(3, 3, 3, 3); + DefaultPad3Shuffle4 wwww = new(3, 3, 3, 3); TestPad3Shuffle4Channel( size, (s, d) => SimdUtils.Pad3Shuffle4(s.Span, d.Span, wwww), @@ -189,21 +189,21 @@ public partial class SimdUtilsTests TestShuffle4Slice3Channel( size, (s, d) => SimdUtils.Shuffle4Slice3(s.Span, d.Span, xyzw), - xyzw.Control); + SimdUtils.Shuffle.MMShuffle3210); - var xwyz = new DefaultShuffle4Slice3(2, 1, 3, 0); + DefaultShuffle4Slice3 xwyz = new(2, 1, 3, 0); TestShuffle4Slice3Channel( size, (s, d) => SimdUtils.Shuffle4Slice3(s.Span, d.Span, xwyz), xwyz.Control); - var yyyy = new DefaultShuffle4Slice3(1, 1, 1, 1); + DefaultShuffle4Slice3 yyyy = new(1, 1, 1, 1); TestShuffle4Slice3Channel( size, (s, d) => SimdUtils.Shuffle4Slice3(s.Span, d.Span, yyyy), yyyy.Control); - var wwww = new DefaultShuffle4Slice3(3, 3, 3, 3); + DefaultShuffle4Slice3 wwww = new(3, 3, 3, 3); TestShuffle4Slice3Channel( size, (s, d) => SimdUtils.Shuffle4Slice3(s.Span, d.Span, wwww), @@ -222,7 +222,7 @@ public partial class SimdUtilsTests byte control) { float[] source = new Random(count).GenerateRandomFloatArray(count, 0, 256); - var result = new float[count]; + float[] result = new float[count]; float[] expected = new float[count]; @@ -253,7 +253,7 @@ public partial class SimdUtilsTests { byte[] source = new byte[count]; new Random(count).NextBytes(source); - var result = new byte[count]; + byte[] result = new byte[count]; byte[] expected = new byte[count]; @@ -284,7 +284,7 @@ public partial class SimdUtilsTests { byte[] source = new byte[count]; new Random(count).NextBytes(source); - var result = new byte[count]; + byte[] result = new byte[count]; byte[] expected = new byte[count]; @@ -315,7 +315,7 @@ public partial class SimdUtilsTests byte[] source = new byte[count]; new Random(count).NextBytes(source); - var result = new byte[count * 4 / 3]; + byte[] result = new byte[count * 4 / 3]; byte[] expected = new byte[result.Length]; @@ -366,7 +366,7 @@ public partial class SimdUtilsTests byte[] source = new byte[count]; new Random(count).NextBytes(source); - var result = new byte[count * 3 / 4]; + byte[] result = new byte[count * 3 / 4]; byte[] expected = new byte[result.Length]; From b9e4810418c47012aaed2dfa4c740b252c4c2585 Mon Sep 17 00:00:00 2001 From: James Jackson-South Date: Tue, 21 Feb 2023 22:00:10 +1000 Subject: [PATCH 28/35] Refactor DefaultShuffleX to use constants. --- .../Helpers/Shuffle/IComponentShuffle.cs | 24 +---- .../Common/Helpers/Shuffle/IPad3Shuffle4.cs | 24 +---- .../Common/Helpers/Shuffle/IShuffle3.cs | 20 +---- .../Common/Helpers/Shuffle/IShuffle4Slice3.cs | 21 +---- .../PixelFormats/Utils/PixelConverter.cs | 88 ++++++++++++++++--- .../Common/SimdUtilsTests.Shuffle.cs | 26 +++--- 6 files changed, 103 insertions(+), 100 deletions(-) diff --git a/src/ImageSharp/Common/Helpers/Shuffle/IComponentShuffle.cs b/src/ImageSharp/Common/Helpers/Shuffle/IComponentShuffle.cs index b932028149..6ce04ef21c 100644 --- a/src/ImageSharp/Common/Helpers/Shuffle/IComponentShuffle.cs +++ b/src/ImageSharp/Common/Helpers/Shuffle/IComponentShuffle.cs @@ -46,23 +46,10 @@ internal interface IShuffle4 : IComponentShuffle internal readonly struct DefaultShuffle4 : IShuffle4 { - private readonly byte p3; - private readonly byte p2; - private readonly byte p1; - private readonly byte p0; - - public DefaultShuffle4(byte p3, byte p2, byte p1, byte p0) + public DefaultShuffle4(byte control) { - DebugGuard.MustBeBetweenOrEqualTo(p3, 0, 3, nameof(p3)); - DebugGuard.MustBeBetweenOrEqualTo(p2, 0, 3, nameof(p2)); - DebugGuard.MustBeBetweenOrEqualTo(p1, 0, 3, nameof(p1)); - DebugGuard.MustBeBetweenOrEqualTo(p0, 0, 3, nameof(p0)); - - this.p3 = p3; - this.p2 = p2; - this.p1 = p1; - this.p0 = p0; - this.Control = Shuffle.MmShuffle(p3, p2, p1, p0); + DebugGuard.MustBeBetweenOrEqualTo(control, 0, 3, nameof(control)); + this.Control = control; } public byte Control { get; } @@ -77,10 +64,7 @@ internal readonly struct DefaultShuffle4 : IShuffle4 ref byte sBase = ref MemoryMarshal.GetReference(source); ref byte dBase = ref MemoryMarshal.GetReference(dest); - int p3 = this.p3; - int p2 = this.p2; - int p1 = this.p1; - int p0 = this.p0; + Shuffle.InverseMmShuffle(this.Control, out int p3, out int p2, out int p1, out int p0); for (int i = 0; i < source.Length; i += 4) { diff --git a/src/ImageSharp/Common/Helpers/Shuffle/IPad3Shuffle4.cs b/src/ImageSharp/Common/Helpers/Shuffle/IPad3Shuffle4.cs index 7f99b3a084..69a688e59a 100644 --- a/src/ImageSharp/Common/Helpers/Shuffle/IPad3Shuffle4.cs +++ b/src/ImageSharp/Common/Helpers/Shuffle/IPad3Shuffle4.cs @@ -14,23 +14,10 @@ internal interface IPad3Shuffle4 : IComponentShuffle internal readonly struct DefaultPad3Shuffle4 : IPad3Shuffle4 { - private readonly byte p3; - private readonly byte p2; - private readonly byte p1; - private readonly byte p0; - - public DefaultPad3Shuffle4(byte p3, byte p2, byte p1, byte p0) + public DefaultPad3Shuffle4(byte control) { - DebugGuard.MustBeBetweenOrEqualTo(p3, 0, 3, nameof(p3)); - DebugGuard.MustBeBetweenOrEqualTo(p2, 0, 3, nameof(p2)); - DebugGuard.MustBeBetweenOrEqualTo(p1, 0, 3, nameof(p1)); - DebugGuard.MustBeBetweenOrEqualTo(p0, 0, 3, nameof(p0)); - - this.p3 = p3; - this.p2 = p2; - this.p1 = p1; - this.p0 = p0; - this.Control = Shuffle.MmShuffle(p3, p2, p1, p0); + DebugGuard.MustBeBetweenOrEqualTo(control, 0, 3, nameof(control)); + this.Control = control; } public byte Control { get; } @@ -45,10 +32,7 @@ internal readonly struct DefaultPad3Shuffle4 : IPad3Shuffle4 ref byte sBase = ref MemoryMarshal.GetReference(source); ref byte dBase = ref MemoryMarshal.GetReference(dest); - int p3 = this.p3; - int p2 = this.p2; - int p1 = this.p1; - int p0 = this.p0; + Shuffle.InverseMmShuffle(this.Control, out int p3, out int p2, out int p1, out int p0); Span temp = stackalloc byte[4]; ref byte t = ref MemoryMarshal.GetReference(temp); diff --git a/src/ImageSharp/Common/Helpers/Shuffle/IShuffle3.cs b/src/ImageSharp/Common/Helpers/Shuffle/IShuffle3.cs index edbd2cd5ea..4e1c2a4e4e 100644 --- a/src/ImageSharp/Common/Helpers/Shuffle/IShuffle3.cs +++ b/src/ImageSharp/Common/Helpers/Shuffle/IShuffle3.cs @@ -14,20 +14,10 @@ internal interface IShuffle3 : IComponentShuffle internal readonly struct DefaultShuffle3 : IShuffle3 { - private readonly byte p2; - private readonly byte p1; - private readonly byte p0; - - public DefaultShuffle3(byte p2, byte p1, byte p0) + public DefaultShuffle3(byte control) { - DebugGuard.MustBeBetweenOrEqualTo(p2, 0, 2, nameof(p2)); - DebugGuard.MustBeBetweenOrEqualTo(p1, 0, 2, nameof(p1)); - DebugGuard.MustBeBetweenOrEqualTo(p0, 0, 2, nameof(p0)); - - this.p2 = p2; - this.p1 = p1; - this.p0 = p0; - this.Control = Shuffle.MmShuffle(3, p2, p1, p0); + DebugGuard.MustBeBetweenOrEqualTo(control, 0, 3, nameof(control)); + this.Control = control; } public byte Control { get; } @@ -42,9 +32,7 @@ internal readonly struct DefaultShuffle3 : IShuffle3 ref byte sBase = ref MemoryMarshal.GetReference(source); ref byte dBase = ref MemoryMarshal.GetReference(dest); - int p2 = this.p2; - int p1 = this.p1; - int p0 = this.p0; + Shuffle.InverseMmShuffle(this.Control, out _, out int p2, out int p1, out int p0); for (int i = 0; i < source.Length; i += 3) { diff --git a/src/ImageSharp/Common/Helpers/Shuffle/IShuffle4Slice3.cs b/src/ImageSharp/Common/Helpers/Shuffle/IShuffle4Slice3.cs index 2179a085be..ec0c4fcf27 100644 --- a/src/ImageSharp/Common/Helpers/Shuffle/IShuffle4Slice3.cs +++ b/src/ImageSharp/Common/Helpers/Shuffle/IShuffle4Slice3.cs @@ -14,21 +14,10 @@ internal interface IShuffle4Slice3 : IComponentShuffle internal readonly struct DefaultShuffle4Slice3 : IShuffle4Slice3 { - private readonly byte p2; - private readonly byte p1; - private readonly byte p0; - - public DefaultShuffle4Slice3(byte p3, byte p2, byte p1, byte p0) + public DefaultShuffle4Slice3(byte control) { - DebugGuard.MustBeBetweenOrEqualTo(p3, 0, 3, nameof(p3)); - DebugGuard.MustBeBetweenOrEqualTo(p2, 0, 3, nameof(p2)); - DebugGuard.MustBeBetweenOrEqualTo(p1, 0, 3, nameof(p1)); - DebugGuard.MustBeBetweenOrEqualTo(p0, 0, 3, nameof(p0)); - - this.p2 = p2; - this.p1 = p1; - this.p0 = p0; - this.Control = Shuffle.MmShuffle(p3, p2, p1, p0); + DebugGuard.MustBeBetweenOrEqualTo(control, 0, 3, nameof(control)); + this.Control = control; } public byte Control { get; } @@ -43,9 +32,7 @@ internal readonly struct DefaultShuffle4Slice3 : IShuffle4Slice3 ref byte sBase = ref MemoryMarshal.GetReference(source); ref byte dBase = ref MemoryMarshal.GetReference(dest); - int p2 = this.p2; - int p1 = this.p1; - int p0 = this.p0; + Shuffle.InverseMmShuffle(this.Control, out _, out int p2, out int p1, out int p0); for (int i = 0, j = 0; i < dest.Length; i += 3, j += 4) { diff --git a/src/ImageSharp/PixelFormats/Utils/PixelConverter.cs b/src/ImageSharp/PixelFormats/Utils/PixelConverter.cs index 3ae5a3b5e4..50a68906e7 100644 --- a/src/ImageSharp/PixelFormats/Utils/PixelConverter.cs +++ b/src/ImageSharp/PixelFormats/Utils/PixelConverter.cs @@ -29,6 +29,8 @@ internal static class PixelConverter /// pixels to a representing /// a collection of pixels. /// + /// The source span of bytes. + /// The destination span of bytes. [MethodImpl(InliningOptions.ShortMethod)] public static void ToArgb32(ReadOnlySpan source, Span dest) => SimdUtils.Shuffle4(source, dest, default); @@ -38,6 +40,8 @@ internal static class PixelConverter /// pixels to a representing /// a collection of pixels. /// + /// The source span of bytes. + /// The destination span of bytes. [MethodImpl(InliningOptions.ShortMethod)] public static void ToBgra32(ReadOnlySpan source, Span dest) => SimdUtils.Shuffle4(source, dest, default); @@ -47,6 +51,8 @@ internal static class PixelConverter /// pixels to a representing /// a collection of pixels. /// + /// The source span of bytes. + /// The destination span of bytes. [MethodImpl(InliningOptions.ShortMethod)] public static void ToAbgr32(ReadOnlySpan source, Span dest) => SimdUtils.Shuffle4(source, dest, default); @@ -56,6 +62,8 @@ internal static class PixelConverter /// pixels to a representing /// a collection of pixels. /// + /// The source span of bytes. + /// The destination span of bytes. [MethodImpl(InliningOptions.ShortMethod)] public static void ToRgb24(ReadOnlySpan source, Span dest) => SimdUtils.Shuffle4Slice3(source, dest, default); @@ -65,9 +73,11 @@ internal static class PixelConverter /// pixels to a representing /// a collection of pixels. /// + /// The source span of bytes. + /// The destination span of bytes. [MethodImpl(InliningOptions.ShortMethod)] public static void ToBgr24(ReadOnlySpan source, Span dest) - => SimdUtils.Shuffle4Slice3(source, dest, new DefaultShuffle4Slice3(3, 0, 1, 2)); + => SimdUtils.Shuffle4Slice3(source, dest, new DefaultShuffle4Slice3(SimdUtils.Shuffle.MMShuffle3012)); } /// @@ -82,6 +92,8 @@ internal static class PixelConverter /// pixels to a representing /// a collection of pixels. /// + /// The source span of bytes. + /// The destination span of bytes. [MethodImpl(InliningOptions.ShortMethod)] public static void ToRgba32(ReadOnlySpan source, Span dest) => SimdUtils.Shuffle4(source, dest, default); @@ -91,6 +103,8 @@ internal static class PixelConverter /// pixels to a representing /// a collection of pixels. /// + /// The source span of bytes. + /// The destination span of bytes. [MethodImpl(InliningOptions.ShortMethod)] public static void ToBgra32(ReadOnlySpan source, Span dest) => SimdUtils.Shuffle4(source, dest, default); @@ -100,6 +114,8 @@ internal static class PixelConverter /// pixels to a representing /// a collection of pixels. /// + /// The source span of bytes. + /// The destination span of bytes. [MethodImpl(InliningOptions.ShortMethod)] public static void ToAbgr32(ReadOnlySpan source, Span dest) => SimdUtils.Shuffle4(source, dest, default); @@ -109,18 +125,22 @@ internal static class PixelConverter /// pixels to a representing /// a collection of pixels. /// + /// The source span of bytes. + /// The destination span of bytes. [MethodImpl(InliningOptions.ShortMethod)] public static void ToRgb24(ReadOnlySpan source, Span dest) - => SimdUtils.Shuffle4Slice3(source, dest, new DefaultShuffle4Slice3(0, 3, 2, 1)); + => SimdUtils.Shuffle4Slice3(source, dest, new DefaultShuffle4Slice3(SimdUtils.Shuffle.MMShuffle0321)); /// /// Converts a representing a collection of /// pixels to a representing /// a collection of pixels. /// + /// The source span of bytes. + /// The destination span of bytes. [MethodImpl(InliningOptions.ShortMethod)] public static void ToBgr24(ReadOnlySpan source, Span dest) - => SimdUtils.Shuffle4Slice3(source, dest, new DefaultShuffle4Slice3(0, 1, 2, 3)); + => SimdUtils.Shuffle4Slice3(source, dest, new DefaultShuffle4Slice3(SimdUtils.Shuffle.MMShuffle0123)); } /// @@ -135,6 +155,8 @@ internal static class PixelConverter /// pixels to a representing /// a collection of pixels. /// + /// The source span of bytes. + /// The destination span of bytes. [MethodImpl(InliningOptions.ShortMethod)] public static void ToArgb32(ReadOnlySpan source, Span dest) => SimdUtils.Shuffle4(source, dest, default); @@ -144,6 +166,8 @@ internal static class PixelConverter /// pixels to a representing /// a collection of pixels. /// + /// The source span of bytes. + /// The destination span of bytes. [MethodImpl(InliningOptions.ShortMethod)] public static void ToRgba32(ReadOnlySpan source, Span dest) => SimdUtils.Shuffle4(source, dest, default); @@ -153,6 +177,8 @@ internal static class PixelConverter /// pixels to a representing /// a collection of pixels. /// + /// The source span of bytes. + /// The destination span of bytes. [MethodImpl(InliningOptions.ShortMethod)] public static void ToAbgr32(ReadOnlySpan source, Span dest) => SimdUtils.Shuffle4(source, dest, default); @@ -162,15 +188,19 @@ internal static class PixelConverter /// pixels to a representing /// a collection of pixels. /// + /// The source span of bytes. + /// The destination span of bytes. [MethodImpl(InliningOptions.ShortMethod)] public static void ToRgb24(ReadOnlySpan source, Span dest) - => SimdUtils.Shuffle4Slice3(source, dest, new DefaultShuffle4Slice3(3, 0, 1, 2)); + => SimdUtils.Shuffle4Slice3(source, dest, new DefaultShuffle4Slice3(SimdUtils.Shuffle.MMShuffle3012)); /// /// Converts a representing a collection of /// pixels to a representing /// a collection of pixels. /// + /// The source span of bytes. + /// The destination span of bytes. [MethodImpl(InliningOptions.ShortMethod)] public static void ToBgr24(ReadOnlySpan source, Span dest) => SimdUtils.Shuffle4Slice3(source, dest, default); @@ -188,6 +218,8 @@ internal static class PixelConverter /// pixels to a representing /// a collection of pixels. /// + /// The source span of bytes. + /// The destination span of bytes. [MethodImpl(InliningOptions.ShortMethod)] public static void ToArgb32(ReadOnlySpan source, Span dest) => SimdUtils.Shuffle4(source, dest, default); @@ -197,6 +229,8 @@ internal static class PixelConverter /// pixels to a representing /// a collection of pixels. /// + /// The source span of bytes. + /// The destination span of bytes. [MethodImpl(InliningOptions.ShortMethod)] public static void ToRgba32(ReadOnlySpan source, Span dest) => SimdUtils.Shuffle4(source, dest, default); @@ -206,6 +240,8 @@ internal static class PixelConverter /// pixels to a representing /// a collection of pixels. /// + /// The source span of bytes. + /// The destination span of bytes. [MethodImpl(InliningOptions.ShortMethod)] public static void ToBgra32(ReadOnlySpan source, Span dest) => SimdUtils.Shuffle4(source, dest, default); @@ -215,18 +251,22 @@ internal static class PixelConverter /// pixels to a representing /// a collection of pixels. /// + /// The source span of bytes. + /// The destination span of bytes. [MethodImpl(InliningOptions.ShortMethod)] public static void ToRgb24(ReadOnlySpan source, Span dest) - => SimdUtils.Shuffle4Slice3(source, dest, new DefaultShuffle4Slice3(0, 1, 2, 3)); + => SimdUtils.Shuffle4Slice3(source, dest, new DefaultShuffle4Slice3(SimdUtils.Shuffle.MMShuffle0123)); /// /// Converts a representing a collection of /// pixels to a representing /// a collection of pixels. /// + /// The source span of bytes. + /// The destination span of bytes. [MethodImpl(InliningOptions.ShortMethod)] public static void ToBgr24(ReadOnlySpan source, Span dest) - => SimdUtils.Shuffle4Slice3(source, dest, new DefaultShuffle4Slice3(0, 3, 2, 1)); + => SimdUtils.Shuffle4Slice3(source, dest, new DefaultShuffle4Slice3(SimdUtils.Shuffle.MMShuffle0321)); } /// @@ -241,6 +281,8 @@ internal static class PixelConverter /// pixels to a representing /// a collection of pixels. /// + /// The source span of bytes. + /// The destination span of bytes. [MethodImpl(InliningOptions.ShortMethod)] public static void ToRgba32(ReadOnlySpan source, Span dest) => SimdUtils.Pad3Shuffle4(source, dest, default); @@ -250,36 +292,44 @@ internal static class PixelConverter /// pixels to a representing /// a collection of pixels. /// + /// The source span of bytes. + /// The destination span of bytes. [MethodImpl(InliningOptions.ShortMethod)] public static void ToArgb32(ReadOnlySpan source, Span dest) - => SimdUtils.Pad3Shuffle4(source, dest, new DefaultPad3Shuffle4(2, 1, 0, 3)); + => SimdUtils.Pad3Shuffle4(source, dest, new DefaultPad3Shuffle4(SimdUtils.Shuffle.MMShuffle2103)); /// /// Converts a representing a collection of /// pixels to a representing /// a collection of pixels. /// + /// The source span of bytes. + /// The destination span of bytes. [MethodImpl(InliningOptions.ShortMethod)] public static void ToBgra32(ReadOnlySpan source, Span dest) - => SimdUtils.Pad3Shuffle4(source, dest, new DefaultPad3Shuffle4(3, 0, 1, 2)); + => SimdUtils.Pad3Shuffle4(source, dest, new DefaultPad3Shuffle4(SimdUtils.Shuffle.MMShuffle3012)); /// /// Converts a representing a collection of /// pixels to a representing /// a collection of pixels. /// + /// The source span of bytes. + /// The destination span of bytes. [MethodImpl(InliningOptions.ShortMethod)] public static void ToAbgr32(ReadOnlySpan source, Span dest) - => SimdUtils.Pad3Shuffle4(source, dest, new DefaultPad3Shuffle4(0, 1, 2, 3)); + => SimdUtils.Pad3Shuffle4(source, dest, new DefaultPad3Shuffle4(SimdUtils.Shuffle.MMShuffle0123)); /// /// Converts a representing a collection of /// pixels to a representing /// a collection of pixels. /// + /// The source span of bytes. + /// The destination span of bytes. [MethodImpl(InliningOptions.ShortMethod)] public static void ToBgr24(ReadOnlySpan source, Span dest) - => SimdUtils.Shuffle3(source, dest, new DefaultShuffle3(0, 1, 2)); + => SimdUtils.Shuffle3(source, dest, new DefaultShuffle3(SimdUtils.Shuffle.MMShuffle3012)); } /// @@ -294,24 +344,30 @@ internal static class PixelConverter /// pixels to a representing /// a collection of pixels. /// + /// The source span of bytes. + /// The destination span of bytes. [MethodImpl(InliningOptions.ShortMethod)] public static void ToArgb32(ReadOnlySpan source, Span dest) - => SimdUtils.Pad3Shuffle4(source, dest, new DefaultPad3Shuffle4(0, 1, 2, 3)); + => SimdUtils.Pad3Shuffle4(source, dest, new DefaultPad3Shuffle4(SimdUtils.Shuffle.MMShuffle0123)); /// /// Converts a representing a collection of /// pixels to a representing /// a collection of pixels. /// + /// The source span of bytes. + /// The destination span of bytes. [MethodImpl(InliningOptions.ShortMethod)] public static void ToRgba32(ReadOnlySpan source, Span dest) - => SimdUtils.Pad3Shuffle4(source, dest, new DefaultPad3Shuffle4(3, 0, 1, 2)); + => SimdUtils.Pad3Shuffle4(source, dest, new DefaultPad3Shuffle4(SimdUtils.Shuffle.MMShuffle3012)); /// /// Converts a representing a collection of /// pixels to a representing /// a collection of pixels. /// + /// The source span of bytes. + /// The destination span of bytes. [MethodImpl(InliningOptions.ShortMethod)] public static void ToBgra32(ReadOnlySpan source, Span dest) => SimdUtils.Pad3Shuffle4(source, dest, default); @@ -321,17 +377,21 @@ internal static class PixelConverter /// pixels to a representing /// a collection of pixels. /// + /// The source span of bytes. + /// The destination span of bytes. [MethodImpl(InliningOptions.ShortMethod)] public static void ToAbgr32(ReadOnlySpan source, Span dest) - => SimdUtils.Pad3Shuffle4(source, dest, new DefaultPad3Shuffle4(2, 1, 0, 3)); + => SimdUtils.Pad3Shuffle4(source, dest, new DefaultPad3Shuffle4(SimdUtils.Shuffle.MMShuffle2103)); /// /// Converts a representing a collection of /// pixels to a representing /// a collection of pixels. /// + /// The source span of bytes. + /// The destination span of bytes. [MethodImpl(InliningOptions.ShortMethod)] public static void ToRgb24(ReadOnlySpan source, Span dest) - => SimdUtils.Shuffle3(source, dest, new DefaultShuffle3(0, 1, 2)); + => SimdUtils.Shuffle3(source, dest, new DefaultShuffle3(SimdUtils.Shuffle.MMShuffle3012)); } } diff --git a/tests/ImageSharp.Tests/Common/SimdUtilsTests.Shuffle.cs b/tests/ImageSharp.Tests/Common/SimdUtilsTests.Shuffle.cs index 2c4989f387..5684c20e8b 100644 --- a/tests/ImageSharp.Tests/Common/SimdUtilsTests.Shuffle.cs +++ b/tests/ImageSharp.Tests/Common/SimdUtilsTests.Shuffle.cs @@ -65,19 +65,19 @@ public partial class SimdUtilsTests (s, d) => SimdUtils.Shuffle4(s.Span, d.Span, zyxw), SimdUtils.Shuffle.MMShuffle3012); - DefaultShuffle4 xwyz = new(2, 1, 3, 0); + DefaultShuffle4 xwyz = new(SimdUtils.Shuffle.MMShuffle2130); TestShuffleByte4Channel( size, (s, d) => SimdUtils.Shuffle4(s.Span, d.Span, xwyz), xwyz.Control); - DefaultShuffle4 yyyy = new(1, 1, 1, 1); + DefaultShuffle4 yyyy = new(SimdUtils.Shuffle.MMShuffle1111); TestShuffleByte4Channel( size, (s, d) => SimdUtils.Shuffle4(s.Span, d.Span, yyyy), yyyy.Control); - DefaultShuffle4 wwww = new(3, 3, 3, 3); + DefaultShuffle4 wwww = new(SimdUtils.Shuffle.MMShuffle3333); TestShuffleByte4Channel( size, (s, d) => SimdUtils.Shuffle4(s.Span, d.Span, wwww), @@ -101,25 +101,25 @@ public partial class SimdUtilsTests // These cannot be expressed as a theory as you cannot // use RemoteExecutor within generic methods nor pass // IShuffle3 to the generic utils method. - DefaultShuffle3 zyx = new(0, 1, 2); + DefaultShuffle3 zyx = new(SimdUtils.Shuffle.MMShuffle3012); TestShuffleByte3Channel( size, (s, d) => SimdUtils.Shuffle3(s.Span, d.Span, zyx), zyx.Control); - DefaultShuffle3 xyz = new(2, 1, 0); + DefaultShuffle3 xyz = new(SimdUtils.Shuffle.MMShuffle3210); TestShuffleByte3Channel( size, (s, d) => SimdUtils.Shuffle3(s.Span, d.Span, xyz), xyz.Control); - DefaultShuffle3 yyy = new(1, 1, 1); + DefaultShuffle3 yyy = new(SimdUtils.Shuffle.MMShuffle3111); TestShuffleByte3Channel( size, (s, d) => SimdUtils.Shuffle3(s.Span, d.Span, yyy), yyy.Control); - DefaultShuffle3 zzz = new(2, 2, 2); + DefaultShuffle3 zzz = new(SimdUtils.Shuffle.MMShuffle3222); TestShuffleByte3Channel( size, (s, d) => SimdUtils.Shuffle3(s.Span, d.Span, zzz), @@ -149,19 +149,19 @@ public partial class SimdUtilsTests (s, d) => SimdUtils.Pad3Shuffle4(s.Span, d.Span, xyzw), SimdUtils.Shuffle.MMShuffle3210); - DefaultPad3Shuffle4 xwyz = new(2, 1, 3, 0); + DefaultPad3Shuffle4 xwyz = new(SimdUtils.Shuffle.MMShuffle2130); TestPad3Shuffle4Channel( size, (s, d) => SimdUtils.Pad3Shuffle4(s.Span, d.Span, xwyz), xwyz.Control); - DefaultPad3Shuffle4 yyyy = new(1, 1, 1, 1); + DefaultPad3Shuffle4 yyyy = new(SimdUtils.Shuffle.MMShuffle1111); TestPad3Shuffle4Channel( size, (s, d) => SimdUtils.Pad3Shuffle4(s.Span, d.Span, yyyy), yyyy.Control); - DefaultPad3Shuffle4 wwww = new(3, 3, 3, 3); + DefaultPad3Shuffle4 wwww = new(SimdUtils.Shuffle.MMShuffle3333); TestPad3Shuffle4Channel( size, (s, d) => SimdUtils.Pad3Shuffle4(s.Span, d.Span, wwww), @@ -191,19 +191,19 @@ public partial class SimdUtilsTests (s, d) => SimdUtils.Shuffle4Slice3(s.Span, d.Span, xyzw), SimdUtils.Shuffle.MMShuffle3210); - DefaultShuffle4Slice3 xwyz = new(2, 1, 3, 0); + DefaultShuffle4Slice3 xwyz = new(SimdUtils.Shuffle.MMShuffle2130); TestShuffle4Slice3Channel( size, (s, d) => SimdUtils.Shuffle4Slice3(s.Span, d.Span, xwyz), xwyz.Control); - DefaultShuffle4Slice3 yyyy = new(1, 1, 1, 1); + DefaultShuffle4Slice3 yyyy = new(SimdUtils.Shuffle.MMShuffle1111); TestShuffle4Slice3Channel( size, (s, d) => SimdUtils.Shuffle4Slice3(s.Span, d.Span, yyyy), yyyy.Control); - DefaultShuffle4Slice3 wwww = new(3, 3, 3, 3); + DefaultShuffle4Slice3 wwww = new(SimdUtils.Shuffle.MMShuffle3333); TestShuffle4Slice3Channel( size, (s, d) => SimdUtils.Shuffle4Slice3(s.Span, d.Span, wwww), From eda0869f094c77cb61a8e4283c35f4ea3289bb47 Mon Sep 17 00:00:00 2001 From: James Jackson-South Date: Wed, 22 Feb 2023 00:00:21 +1000 Subject: [PATCH 29/35] Update benchmarks and add more tests --- .../Helpers/Shuffle/IComponentShuffle.cs | 2 +- .../Common/Helpers/Shuffle/IPad3Shuffle4.cs | 2 +- .../Common/Helpers/Shuffle/IShuffle3.cs | 2 +- .../Common/Helpers/Shuffle/IShuffle4Slice3.cs | 2 +- .../Common/Helpers/SimdUtils.HwIntrinsics.cs | 10 +- .../Common/Helpers/SimdUtils.Shuffle.cs | 10 +- .../Color/Bulk/Pad3Shuffle4Channel.cs | 58 +++- .../Color/Bulk/Shuffle3Channel.cs | 39 ++- .../Color/Bulk/Shuffle4Slice3Channel.cs | 68 ++++- .../Color/Bulk/ShuffleByte4Channel.cs | 39 ++- .../Color/Bulk/ShuffleFloat4Channel.cs | 35 +++ .../Common/SimdUtilsTests.Shuffle.cs | 275 +++++++++++++++++- 12 files changed, 500 insertions(+), 42 deletions(-) diff --git a/src/ImageSharp/Common/Helpers/Shuffle/IComponentShuffle.cs b/src/ImageSharp/Common/Helpers/Shuffle/IComponentShuffle.cs index 6ce04ef21c..345f4b5c2e 100644 --- a/src/ImageSharp/Common/Helpers/Shuffle/IComponentShuffle.cs +++ b/src/ImageSharp/Common/Helpers/Shuffle/IComponentShuffle.cs @@ -64,7 +64,7 @@ internal readonly struct DefaultShuffle4 : IShuffle4 ref byte sBase = ref MemoryMarshal.GetReference(source); ref byte dBase = ref MemoryMarshal.GetReference(dest); - Shuffle.InverseMmShuffle(this.Control, out int p3, out int p2, out int p1, out int p0); + Shuffle.InverseMMShuffle(this.Control, out int p3, out int p2, out int p1, out int p0); for (int i = 0; i < source.Length; i += 4) { diff --git a/src/ImageSharp/Common/Helpers/Shuffle/IPad3Shuffle4.cs b/src/ImageSharp/Common/Helpers/Shuffle/IPad3Shuffle4.cs index 69a688e59a..348e8ec010 100644 --- a/src/ImageSharp/Common/Helpers/Shuffle/IPad3Shuffle4.cs +++ b/src/ImageSharp/Common/Helpers/Shuffle/IPad3Shuffle4.cs @@ -32,7 +32,7 @@ internal readonly struct DefaultPad3Shuffle4 : IPad3Shuffle4 ref byte sBase = ref MemoryMarshal.GetReference(source); ref byte dBase = ref MemoryMarshal.GetReference(dest); - Shuffle.InverseMmShuffle(this.Control, out int p3, out int p2, out int p1, out int p0); + Shuffle.InverseMMShuffle(this.Control, out int p3, out int p2, out int p1, out int p0); Span temp = stackalloc byte[4]; ref byte t = ref MemoryMarshal.GetReference(temp); diff --git a/src/ImageSharp/Common/Helpers/Shuffle/IShuffle3.cs b/src/ImageSharp/Common/Helpers/Shuffle/IShuffle3.cs index 4e1c2a4e4e..6ac8a2428a 100644 --- a/src/ImageSharp/Common/Helpers/Shuffle/IShuffle3.cs +++ b/src/ImageSharp/Common/Helpers/Shuffle/IShuffle3.cs @@ -32,7 +32,7 @@ internal readonly struct DefaultShuffle3 : IShuffle3 ref byte sBase = ref MemoryMarshal.GetReference(source); ref byte dBase = ref MemoryMarshal.GetReference(dest); - Shuffle.InverseMmShuffle(this.Control, out _, out int p2, out int p1, out int p0); + Shuffle.InverseMMShuffle(this.Control, out _, out int p2, out int p1, out int p0); for (int i = 0; i < source.Length; i += 3) { diff --git a/src/ImageSharp/Common/Helpers/Shuffle/IShuffle4Slice3.cs b/src/ImageSharp/Common/Helpers/Shuffle/IShuffle4Slice3.cs index ec0c4fcf27..d4a5e8130a 100644 --- a/src/ImageSharp/Common/Helpers/Shuffle/IShuffle4Slice3.cs +++ b/src/ImageSharp/Common/Helpers/Shuffle/IShuffle4Slice3.cs @@ -32,7 +32,7 @@ internal readonly struct DefaultShuffle4Slice3 : IShuffle4Slice3 ref byte sBase = ref MemoryMarshal.GetReference(source); ref byte dBase = ref MemoryMarshal.GetReference(dest); - Shuffle.InverseMmShuffle(this.Control, out _, out int p2, out int p1, out int p0); + Shuffle.InverseMMShuffle(this.Control, out _, out int p2, out int p1, out int p0); for (int i = 0, j = 0; i < dest.Length; i += 3, j += 4) { diff --git a/src/ImageSharp/Common/Helpers/SimdUtils.HwIntrinsics.cs b/src/ImageSharp/Common/Helpers/SimdUtils.HwIntrinsics.cs index 7d2bab259e..3841b64b4d 100644 --- a/src/ImageSharp/Common/Helpers/SimdUtils.HwIntrinsics.cs +++ b/src/ImageSharp/Common/Helpers/SimdUtils.HwIntrinsics.cs @@ -297,7 +297,7 @@ internal static partial class SimdUtils // shuffle controls to add to the library. // We can add static ROS instances if need be in the future. Span bytes = stackalloc byte[Vector256.Count]; - Shuffle.MmShuffleSpan(ref bytes, control); + Shuffle.MMShuffleSpan(ref bytes, control); Vector256 vshuffle = Unsafe.As>(ref MemoryMarshal.GetReference(bytes)); ref Vector256 sourceBase = @@ -333,7 +333,7 @@ internal static partial class SimdUtils { // Ssse3 Span bytes = stackalloc byte[Vector128.Count]; - Shuffle.MmShuffleSpan(ref bytes, control); + Shuffle.MMShuffleSpan(ref bytes, control); Vector128 vshuffle = Unsafe.As>(ref MemoryMarshal.GetReference(bytes)); ref Vector128 sourceBase = @@ -382,7 +382,7 @@ internal static partial class SimdUtils Vector128 vmaske = Ssse3.AlignRight(vmasko, vmasko, 12); Span bytes = stackalloc byte[Vector128.Count]; - Shuffle.MmShuffleSpan(ref bytes, control); + Shuffle.MMShuffleSpan(ref bytes, control); Vector128 vshuffle = Unsafe.As>(ref MemoryMarshal.GetReference(bytes)); ref Vector128 sourceBase = @@ -445,7 +445,7 @@ internal static partial class SimdUtils Vector128 vfill = Vector128.Create(0xff000000ff000000ul).AsByte(); Span bytes = stackalloc byte[Vector128.Count]; - Shuffle.MmShuffleSpan(ref bytes, control); + Shuffle.MMShuffleSpan(ref bytes, control); Vector128 vshuffle = Unsafe.As>(ref MemoryMarshal.GetReference(bytes)); ref Vector128 sourceBase = @@ -489,7 +489,7 @@ internal static partial class SimdUtils Vector128 vmaske = Ssse3.AlignRight(vmasko, vmasko, 12); Span bytes = stackalloc byte[Vector128.Count]; - Shuffle.MmShuffleSpan(ref bytes, control); + Shuffle.MMShuffleSpan(ref bytes, control); Vector128 vshuffle = Unsafe.As>(ref MemoryMarshal.GetReference(bytes)); ref Vector128 sourceBase = diff --git a/src/ImageSharp/Common/Helpers/SimdUtils.Shuffle.cs b/src/ImageSharp/Common/Helpers/SimdUtils.Shuffle.cs index 4ff5f3d5b1..aecdaa6390 100644 --- a/src/ImageSharp/Common/Helpers/SimdUtils.Shuffle.cs +++ b/src/ImageSharp/Common/Helpers/SimdUtils.Shuffle.cs @@ -145,7 +145,7 @@ internal static partial class SimdUtils { ref float sBase = ref MemoryMarshal.GetReference(source); ref float dBase = ref MemoryMarshal.GetReference(dest); - Shuffle.InverseMmShuffle(control, out int p3, out int p2, out int p1, out int p0); + Shuffle.InverseMMShuffle(control, out int p3, out int p2, out int p1, out int p0); for (int i = 0; i < source.Length; i += 4) { @@ -484,13 +484,13 @@ internal static partial class SimdUtils public const byte MMShuffle3333 = 0b11111111; [MethodImpl(InliningOptions.ShortMethod)] - public static byte MmShuffle(byte p3, byte p2, byte p1, byte p0) + public static byte MMShuffle(byte p3, byte p2, byte p1, byte p0) => (byte)((p3 << 6) | (p2 << 4) | (p1 << 2) | p0); [MethodImpl(InliningOptions.ShortMethod)] - public static void MmShuffleSpan(ref Span span, byte control) + public static void MMShuffleSpan(ref Span span, byte control) { - InverseMmShuffle( + InverseMMShuffle( control, out int p3, out int p2, @@ -509,7 +509,7 @@ internal static partial class SimdUtils } [MethodImpl(InliningOptions.ShortMethod)] - public static void InverseMmShuffle( + public static void InverseMMShuffle( byte control, out int p3, out int p2, diff --git a/tests/ImageSharp.Benchmarks/Color/Bulk/Pad3Shuffle4Channel.cs b/tests/ImageSharp.Benchmarks/Color/Bulk/Pad3Shuffle4Channel.cs index df308cdd4b..73dcf55a19 100644 --- a/tests/ImageSharp.Benchmarks/Color/Bulk/Pad3Shuffle4Channel.cs +++ b/tests/ImageSharp.Benchmarks/Color/Bulk/Pad3Shuffle4Channel.cs @@ -8,8 +8,7 @@ namespace SixLabors.ImageSharp.Benchmarks.ColorSpaces.Bulk; [Config(typeof(Config.HwIntrinsics_SSE_AVX))] public class Pad3Shuffle4Channel { - private static readonly DefaultPad3Shuffle4 Control = new DefaultPad3Shuffle4(1, 0, 3, 2); - private static readonly XYZWPad3Shuffle4 ControlFast = default; + private static readonly DefaultPad3Shuffle4 Control = new(SimdUtils.Shuffle.MMShuffle1032); private byte[] source; private byte[] destination; @@ -26,15 +25,11 @@ public class Pad3Shuffle4Channel [Benchmark] public void Pad3Shuffle4() - { - SimdUtils.Pad3Shuffle4(this.source, this.destination, Control); - } + => SimdUtils.Pad3Shuffle4(this.source, this.destination, Control); [Benchmark] public void Pad3Shuffle4FastFallback() - { - SimdUtils.Pad3Shuffle4(this.source, this.destination, ControlFast); - } + => SimdUtils.Pad3Shuffle4(this.source, this.destination, default(XYZWPad3Shuffle4)); } // 2020-10-30 @@ -83,3 +78,50 @@ public class Pad3Shuffle4Channel // | Pad3Shuffle4FastFallback | 1. No HwIntrinsics | COMPlus_EnableHWIntrinsic=0,COMPlus_FeatureSIMD=0 | 1536 | 220.37 ns | 1.601 ns | 1.419 ns | 220.13 ns | 1.00 | 0.00 | - | - | - | - | // | Pad3Shuffle4FastFallback | 2. AVX | Empty | 1536 | 111.54 ns | 2.173 ns | 2.901 ns | 111.27 ns | 0.51 | 0.01 | - | - | - | - | // | Pad3Shuffle4FastFallback | 3. SSE | COMPlus_EnableAVX=0 | 1536 | 110.23 ns | 0.456 ns | 0.427 ns | 110.25 ns | 0.50 | 0.00 | - | - | - | - | + +// 2023-02-21 +// ########## +// +// BenchmarkDotNet=v0.13.0, OS=Windows 10.0.22621 +// 11th Gen Intel Core i7-11370H 3.30GHz, 1 CPU, 8 logical and 4 physical cores +// .NET SDK= 7.0.103 +// [Host] : .NET 6.0.14 (6.0.1423.7309), X64 RyuJIT +// 1. No HwIntrinsics : .NET 6.0.14 (6.0.1423.7309), X64 RyuJIT +// 2. SSE : .NET 6.0.14 (6.0.1423.7309), X64 RyuJIT +// 3. AVX : .NET 6.0.14 (6.0.1423.7309), X64 RyuJIT + +// Runtime=.NET 6.0 + +// | Method | Job | EnvironmentVariables | Count | Mean | Error | StdDev | Ratio | Gen 0 | Gen 1 | Gen 2 | Allocated | +// |------------------------- |------------------- |-------------------------------------------------- |------ |----------:|---------:|---------:|------:|------:|------:|------:|----------:| +// | Pad3Shuffle4 | 1. No HwIntrinsics | COMPlus_EnableHWIntrinsic=0,COMPlus_FeatureSIMD=0 | 96 | 57.45 ns | 0.126 ns | 0.118 ns | 1.00 | - | - | - | - | +// | Pad3Shuffle4 | 2. SSE | COMPlus_EnableAVX=0 | 96 | 14.70 ns | 0.105 ns | 0.098 ns | 0.26 | - | - | - | - | +// | Pad3Shuffle4 | 3. AVX | Empty | 96 | 14.63 ns | 0.070 ns | 0.062 ns | 0.25 | - | - | - | - | +// | | | | | | | | | | | | | +// | Pad3Shuffle4FastFallback | 1. No HwIntrinsics | COMPlus_EnableHWIntrinsic=0,COMPlus_FeatureSIMD=0 | 96 | 12.08 ns | 0.028 ns | 0.025 ns | 1.00 | - | - | - | - | +// | Pad3Shuffle4FastFallback | 2. SSE | COMPlus_EnableAVX=0 | 96 | 14.04 ns | 0.050 ns | 0.044 ns | 1.16 | - | - | - | - | +// | Pad3Shuffle4FastFallback | 3. AVX | Empty | 96 | 13.90 ns | 0.086 ns | 0.080 ns | 1.15 | - | - | - | - | +// | | | | | | | | | | | | | +// | Pad3Shuffle4 | 1. No HwIntrinsics | COMPlus_EnableHWIntrinsic=0,COMPlus_FeatureSIMD=0 | 384 | 202.67 ns | 2.010 ns | 1.678 ns | 1.00 | - | - | - | - | +// | Pad3Shuffle4 | 2. SSE | COMPlus_EnableAVX=0 | 384 | 25.54 ns | 0.060 ns | 0.053 ns | 0.13 | - | - | - | - | +// | Pad3Shuffle4 | 3. AVX | Empty | 384 | 25.72 ns | 0.139 ns | 0.130 ns | 0.13 | - | - | - | - | +// | | | | | | | | | | | | | +// | Pad3Shuffle4FastFallback | 1. No HwIntrinsics | COMPlus_EnableHWIntrinsic=0,COMPlus_FeatureSIMD=0 | 384 | 60.35 ns | 0.080 ns | 0.071 ns | 1.00 | - | - | - | - | +// | Pad3Shuffle4FastFallback | 2. SSE | COMPlus_EnableAVX=0 | 384 | 25.18 ns | 0.388 ns | 0.324 ns | 0.42 | - | - | - | - | +// | Pad3Shuffle4FastFallback | 3. AVX | Empty | 384 | 26.21 ns | 0.067 ns | 0.059 ns | 0.43 | - | - | - | - | +// | | | | | | | | | | | | | +// | Pad3Shuffle4 | 1. No HwIntrinsics | COMPlus_EnableHWIntrinsic=0,COMPlus_FeatureSIMD=0 | 768 | 393.88 ns | 1.353 ns | 1.199 ns | 1.00 | - | - | - | - | +// | Pad3Shuffle4 | 2. SSE | COMPlus_EnableAVX=0 | 768 | 39.44 ns | 0.230 ns | 0.204 ns | 0.10 | - | - | - | - | +// | Pad3Shuffle4 | 3. AVX | Empty | 768 | 39.51 ns | 0.108 ns | 0.101 ns | 0.10 | - | - | - | - | +// | | | | | | | | | | | | | +// | Pad3Shuffle4FastFallback | 1. No HwIntrinsics | COMPlus_EnableHWIntrinsic=0,COMPlus_FeatureSIMD=0 | 768 | 112.02 ns | 0.140 ns | 0.131 ns | 1.00 | - | - | - | - | +// | Pad3Shuffle4FastFallback | 2. SSE | COMPlus_EnableAVX=0 | 768 | 38.60 ns | 0.091 ns | 0.080 ns | 0.34 | - | - | - | - | +// | Pad3Shuffle4FastFallback | 3. AVX | Empty | 768 | 38.18 ns | 0.100 ns | 0.084 ns | 0.34 | - | - | - | - | +// | | | | | | | | | | | | | +// | Pad3Shuffle4 | 1. No HwIntrinsics | COMPlus_EnableHWIntrinsic=0,COMPlus_FeatureSIMD=0 | 1536 | 777.95 ns | 1.719 ns | 1.342 ns | 1.00 | - | - | - | - | +// | Pad3Shuffle4 | 2. SSE | COMPlus_EnableAVX=0 | 1536 | 73.11 ns | 0.090 ns | 0.075 ns | 0.09 | - | - | - | - | +// | Pad3Shuffle4 | 3. AVX | Empty | 1536 | 73.41 ns | 0.125 ns | 0.117 ns | 0.09 | - | - | - | - | +// | | | | | | | | | | | | | +// | Pad3Shuffle4FastFallback | 1. No HwIntrinsics | COMPlus_EnableHWIntrinsic=0,COMPlus_FeatureSIMD=0 | 1536 | 218.14 ns | 0.377 ns | 0.334 ns | 1.00 | - | - | - | - | +// | Pad3Shuffle4FastFallback | 2. SSE | COMPlus_EnableAVX=0 | 1536 | 72.55 ns | 1.418 ns | 1.184 ns | 0.33 | - | - | - | - | +// | Pad3Shuffle4FastFallback | 3. AVX | Empty | 1536 | 73.15 ns | 0.330 ns | 0.292 ns | 0.34 | - | - | - | - | diff --git a/tests/ImageSharp.Benchmarks/Color/Bulk/Shuffle3Channel.cs b/tests/ImageSharp.Benchmarks/Color/Bulk/Shuffle3Channel.cs index 7f6f5901ff..9bd85f4743 100644 --- a/tests/ImageSharp.Benchmarks/Color/Bulk/Shuffle3Channel.cs +++ b/tests/ImageSharp.Benchmarks/Color/Bulk/Shuffle3Channel.cs @@ -1,14 +1,16 @@ // Copyright (c) Six Labors. // Licensed under the Six Labors Split License. +using System.Runtime.InteropServices; using BenchmarkDotNet.Attributes; +using Iced.Intel; namespace SixLabors.ImageSharp.Benchmarks.ColorSpaces.Bulk; [Config(typeof(Config.HwIntrinsics_SSE_AVX))] public class Shuffle3Channel { - private static readonly DefaultShuffle3 Control = new DefaultShuffle3(1, 0, 2); + private static readonly DefaultShuffle3 Control = new(SimdUtils.Shuffle.MMShuffle3102); private byte[] source; private byte[] destination; @@ -25,9 +27,7 @@ public class Shuffle3Channel [Benchmark] public void Shuffle3() - { - SimdUtils.Shuffle3(this.source, this.destination, Control); - } + => SimdUtils.Shuffle3(this.source, this.destination, Control); } // 2020-11-02 @@ -60,3 +60,34 @@ public class Shuffle3Channel // | Shuffle3 | 1. No HwIntrinsics | COMPlus_EnableHWIntrinsic=0,COMPlus_FeatureSIMD=0 | 1536 | 773.70 ns | 5.516 ns | 4.890 ns | 772.96 ns | 1.00 | 0.00 | - | - | - | - | // | Shuffle3 | 2. AVX | Empty | 1536 | 190.41 ns | 1.090 ns | 0.851 ns | 190.38 ns | 0.25 | 0.00 | - | - | - | - | // | Shuffle3 | 3. SSE | COMPlus_EnableAVX=0 | 1536 | 190.94 ns | 0.985 ns | 0.769 ns | 190.85 ns | 0.25 | 0.00 | - | - | - | - | + +// 2023-02-21 +// ########## +// +// BenchmarkDotNet=v0.13.0, OS=Windows 10.0.22621 +// 11th Gen Intel Core i7-11370H 3.30GHz, 1 CPU, 8 logical and 4 physical cores +// .NET SDK= 7.0.103 +// [Host] : .NET 6.0.14 (6.0.1423.7309), X64 RyuJIT +// 1. No HwIntrinsics : .NET 6.0.14 (6.0.1423.7309), X64 RyuJIT +// 2. SSE : .NET 6.0.14 (6.0.1423.7309), X64 RyuJIT +// 3. AVX : .NET 6.0.14 (6.0.1423.7309), X64 RyuJIT + +// Runtime=.NET 6.0 + +// | Method | Job | EnvironmentVariables | Count | Mean | Error | StdDev | Ratio | Gen 0 | Gen 1 | Gen 2 | Allocated | +// |--------- |------------------- |-------------------------------------------------- |------ |----------:|---------:|---------:|------:|------:|------:|------:|----------:| +// | Shuffle3 | 1. No HwIntrinsics | COMPlus_EnableHWIntrinsic=0,COMPlus_FeatureSIMD=0 | 96 | 44.55 ns | 0.564 ns | 0.528 ns | 1.00 | - | - | - | - | +// | Shuffle3 | 2. SSE | COMPlus_EnableAVX=0 | 96 | 15.46 ns | 0.064 ns | 0.060 ns | 0.35 | - | - | - | - | +// | Shuffle3 | 3. AVX | Empty | 96 | 15.18 ns | 0.056 ns | 0.053 ns | 0.34 | - | - | - | - | +// | | | | | | | | | | | | | +// | Shuffle3 | 1. No HwIntrinsics | COMPlus_EnableHWIntrinsic=0,COMPlus_FeatureSIMD=0 | 384 | 155.68 ns | 0.539 ns | 0.504 ns | 1.00 | - | - | - | - | +// | Shuffle3 | 2. SSE | COMPlus_EnableAVX=0 | 384 | 30.04 ns | 0.100 ns | 0.089 ns | 0.19 | - | - | - | - | +// | Shuffle3 | 3. AVX | Empty | 384 | 29.70 ns | 0.061 ns | 0.054 ns | 0.19 | - | - | - | - | +// | | | | | | | | | | | | | +// | Shuffle3 | 1. No HwIntrinsics | COMPlus_EnableHWIntrinsic=0,COMPlus_FeatureSIMD=0 | 768 | 302.76 ns | 1.023 ns | 0.957 ns | 1.00 | - | - | - | - | +// | Shuffle3 | 2. SSE | COMPlus_EnableAVX=0 | 768 | 50.24 ns | 0.098 ns | 0.092 ns | 0.17 | - | - | - | - | +// | Shuffle3 | 3. AVX | Empty | 768 | 49.28 ns | 0.156 ns | 0.131 ns | 0.16 | - | - | - | - | +// | | | | | | | | | | | | | +// | Shuffle3 | 1. No HwIntrinsics | COMPlus_EnableHWIntrinsic=0,COMPlus_FeatureSIMD=0 | 1536 | 596.53 ns | 2.675 ns | 2.503 ns | 1.00 | - | - | - | - | +// | Shuffle3 | 2. SSE | COMPlus_EnableAVX=0 | 1536 | 94.09 ns | 0.312 ns | 0.260 ns | 0.16 | - | - | - | - | +// | Shuffle3 | 3. AVX | Empty | 1536 | 93.57 ns | 0.196 ns | 0.183 ns | 0.16 | - | - | - | - | diff --git a/tests/ImageSharp.Benchmarks/Color/Bulk/Shuffle4Slice3Channel.cs b/tests/ImageSharp.Benchmarks/Color/Bulk/Shuffle4Slice3Channel.cs index 0a0afb7050..f0890221c8 100644 --- a/tests/ImageSharp.Benchmarks/Color/Bulk/Shuffle4Slice3Channel.cs +++ b/tests/ImageSharp.Benchmarks/Color/Bulk/Shuffle4Slice3Channel.cs @@ -1,15 +1,16 @@ // Copyright (c) Six Labors. // Licensed under the Six Labors Split License. +using System.Runtime.InteropServices; using BenchmarkDotNet.Attributes; +using Iced.Intel; namespace SixLabors.ImageSharp.Benchmarks.ColorSpaces.Bulk; [Config(typeof(Config.HwIntrinsics_SSE_AVX))] public class Shuffle4Slice3Channel { - private static readonly DefaultShuffle4Slice3 Control = new DefaultShuffle4Slice3(1, 0, 3, 2); - private static readonly XYZWShuffle4Slice3 ControlFast = default; + private static readonly DefaultShuffle4Slice3 Control = new(SimdUtils.Shuffle.MMShuffle1032); private byte[] source; private byte[] destination; @@ -26,15 +27,11 @@ public class Shuffle4Slice3Channel [Benchmark] public void Shuffle4Slice3() - { - SimdUtils.Shuffle4Slice3(this.source, this.destination, Control); - } + => SimdUtils.Shuffle4Slice3(this.source, this.destination, Control); [Benchmark] public void Shuffle4Slice3FastFallback() - { - SimdUtils.Shuffle4Slice3(this.source, this.destination, ControlFast); - } + => SimdUtils.Shuffle4Slice3(this.source, this.destination, default(XYZWShuffle4Slice3)); } // 2020-10-29 @@ -91,3 +88,58 @@ public class Shuffle4Slice3Channel // | Shuffle4Slice3FastFallback | 1. No HwIntrinsics | COMPlus_EnableHWIntrinsic=0,COMPlus_FeatureSIMD=0 | 2048 | 382.97 ns | 1.064 ns | 0.831 ns | 382.87 ns | 1.00 | 0.00 | - | - | - | - | // | Shuffle4Slice3FastFallback | 2. AVX | Empty | 2048 | 126.93 ns | 0.382 ns | 0.339 ns | 126.94 ns | 0.33 | 0.00 | - | - | - | - | // | Shuffle4Slice3FastFallback | 3. SSE | COMPlus_EnableAVX=0 | 2048 | 149.36 ns | 1.875 ns | 1.754 ns | 149.33 ns | 0.39 | 0.00 | - | - | - | - | + +// 2023-02-21 +// ########## +// +// BenchmarkDotNet=v0.13.0, OS=Windows 10.0.22621 +// 11th Gen Intel Core i7-11370H 3.30GHz, 1 CPU, 8 logical and 4 physical cores +// .NET SDK= 7.0.103 +// [Host] : .NET 6.0.14 (6.0.1423.7309), X64 RyuJIT +// 1. No HwIntrinsics : .NET 6.0.14 (6.0.1423.7309), X64 RyuJIT +// 2. SSE : .NET 6.0.14 (6.0.1423.7309), X64 RyuJIT +// 3. AVX : .NET 6.0.14 (6.0.1423.7309), X64 RyuJIT +// +// Runtime=.NET 6.0 +// +// | Method | Job | EnvironmentVariables | Count | Mean | Error | StdDev | Ratio | Gen 0 | Gen 1 | Gen 2 | Allocated | +// |--------------------------- |------------------- |-------------------------------------------------- |------ |----------:|---------:|---------:|------:|------:|------:|------:|----------:| +// | Shuffle4Slice3 | 1. No HwIntrinsics | COMPlus_EnableHWIntrinsic=0,COMPlus_FeatureSIMD=0 | 128 | 45.59 ns | 0.166 ns | 0.147 ns | 1.00 | - | - | - | - | +// | Shuffle4Slice3 | 2. SSE | COMPlus_EnableAVX=0 | 128 | 15.62 ns | 0.056 ns | 0.052 ns | 0.34 | - | - | - | - | +// | Shuffle4Slice3 | 3. AVX | Empty | 128 | 16.37 ns | 0.047 ns | 0.040 ns | 0.36 | - | - | - | - | +// | | | | | | | | | | | | | +// | Shuffle4Slice3FastFallback | 1. No HwIntrinsics | COMPlus_EnableHWIntrinsic=0,COMPlus_FeatureSIMD=0 | 128 | 13.23 ns | 0.028 ns | 0.026 ns | 1.00 | - | - | - | - | +// | Shuffle4Slice3FastFallback | 2. SSE | COMPlus_EnableAVX=0 | 128 | 14.41 ns | 0.013 ns | 0.012 ns | 1.09 | - | - | - | - | +// | Shuffle4Slice3FastFallback | 3. AVX | Empty | 128 | 14.70 ns | 0.050 ns | 0.047 ns | 1.11 | - | - | - | - | +// | | | | | | | | | | | | | +// | Shuffle4Slice3 | 1. No HwIntrinsics | COMPlus_EnableHWIntrinsic=0,COMPlus_FeatureSIMD=0 | 256 | 85.48 ns | 0.192 ns | 0.179 ns | 1.00 | - | - | - | - | +// | Shuffle4Slice3 | 2. SSE | COMPlus_EnableAVX=0 | 256 | 19.18 ns | 0.230 ns | 0.204 ns | 0.22 | - | - | - | - | +// | Shuffle4Slice3 | 3. AVX | Empty | 256 | 18.66 ns | 0.017 ns | 0.015 ns | 0.22 | - | - | - | - | +// | | | | | | | | | | | | | +// | Shuffle4Slice3FastFallback | 1. No HwIntrinsics | COMPlus_EnableHWIntrinsic=0,COMPlus_FeatureSIMD=0 | 256 | 24.34 ns | 0.078 ns | 0.073 ns | 1.00 | - | - | - | - | +// | Shuffle4Slice3FastFallback | 2. SSE | COMPlus_EnableAVX=0 | 256 | 18.58 ns | 0.061 ns | 0.057 ns | 0.76 | - | - | - | - | +// | Shuffle4Slice3FastFallback | 3. AVX | Empty | 256 | 19.23 ns | 0.018 ns | 0.016 ns | 0.79 | - | - | - | - | +// | | | | | | | | | | | | | +// | Shuffle4Slice3 | 1. No HwIntrinsics | COMPlus_EnableHWIntrinsic=0,COMPlus_FeatureSIMD=0 | 512 | 165.31 ns | 0.742 ns | 0.694 ns | 1.00 | - | - | - | - | +// | Shuffle4Slice3 | 2. SSE | COMPlus_EnableAVX=0 | 512 | 28.10 ns | 0.077 ns | 0.068 ns | 0.17 | - | - | - | - | +// | Shuffle4Slice3 | 3. AVX | Empty | 512 | 28.99 ns | 0.018 ns | 0.014 ns | 0.18 | - | - | - | - | +// | | | | | | | | | | | | | +// | Shuffle4Slice3FastFallback | 1. No HwIntrinsics | COMPlus_EnableHWIntrinsic=0,COMPlus_FeatureSIMD=0 | 512 | 53.45 ns | 0.270 ns | 0.226 ns | 1.00 | - | - | - | - | +// | Shuffle4Slice3FastFallback | 2. SSE | COMPlus_EnableAVX=0 | 512 | 27.50 ns | 0.034 ns | 0.028 ns | 0.51 | - | - | - | - | +// | Shuffle4Slice3FastFallback | 3. AVX | Empty | 512 | 28.76 ns | 0.017 ns | 0.015 ns | 0.54 | - | - | - | - | +// | | | | | | | | | | | | | +// | Shuffle4Slice3 | 1. No HwIntrinsics | COMPlus_EnableHWIntrinsic=0,COMPlus_FeatureSIMD=0 | 1024 | 323.87 ns | 0.549 ns | 0.487 ns | 1.00 | - | - | - | - | +// | Shuffle4Slice3 | 2. SSE | COMPlus_EnableAVX=0 | 1024 | 40.81 ns | 0.056 ns | 0.050 ns | 0.13 | - | - | - | - | +// | Shuffle4Slice3 | 3. AVX | Empty | 1024 | 39.95 ns | 0.075 ns | 0.067 ns | 0.12 | - | - | - | - | +// | | | | | | | | | | | | | +// | Shuffle4Slice3FastFallback | 1. No HwIntrinsics | COMPlus_EnableHWIntrinsic=0,COMPlus_FeatureSIMD=0 | 1024 | 101.37 ns | 0.080 ns | 0.067 ns | 1.00 | - | - | - | - | +// | Shuffle4Slice3FastFallback | 2. SSE | COMPlus_EnableAVX=0 | 1024 | 40.72 ns | 0.049 ns | 0.041 ns | 0.40 | - | - | - | - | +// | Shuffle4Slice3FastFallback | 3. AVX | Empty | 1024 | 39.78 ns | 0.029 ns | 0.027 ns | 0.39 | - | - | - | - | +// | | | | | | | | | | | | | +// | Shuffle4Slice3 | 1. No HwIntrinsics | COMPlus_EnableHWIntrinsic=0,COMPlus_FeatureSIMD=0 | 2048 | 642.95 ns | 2.067 ns | 1.933 ns | 1.00 | - | - | - | - | +// | Shuffle4Slice3 | 2. SSE | COMPlus_EnableAVX=0 | 2048 | 73.19 ns | 0.082 ns | 0.077 ns | 0.11 | - | - | - | - | +// | Shuffle4Slice3 | 3. AVX | Empty | 2048 | 69.83 ns | 0.319 ns | 0.267 ns | 0.11 | - | - | - | - | +// | | | | | | | | | | | | | +// | Shuffle4Slice3FastFallback | 1. No HwIntrinsics | COMPlus_EnableHWIntrinsic=0,COMPlus_FeatureSIMD=0 | 2048 | 196.85 ns | 0.238 ns | 0.211 ns | 1.00 | - | - | - | - | +// | Shuffle4Slice3FastFallback | 2. SSE | COMPlus_EnableAVX=0 | 2048 | 72.89 ns | 0.117 ns | 0.098 ns | 0.37 | - | - | - | - | +// | Shuffle4Slice3FastFallback | 3. AVX | Empty | 2048 | 69.59 ns | 0.073 ns | 0.061 ns | 0.35 | - | - | - | - | diff --git a/tests/ImageSharp.Benchmarks/Color/Bulk/ShuffleByte4Channel.cs b/tests/ImageSharp.Benchmarks/Color/Bulk/ShuffleByte4Channel.cs index 6323e1c55c..8e740743ba 100644 --- a/tests/ImageSharp.Benchmarks/Color/Bulk/ShuffleByte4Channel.cs +++ b/tests/ImageSharp.Benchmarks/Color/Bulk/ShuffleByte4Channel.cs @@ -24,9 +24,7 @@ public class ShuffleByte4Channel [Benchmark] public void Shuffle4Channel() - { - SimdUtils.Shuffle4(this.source, this.destination, default); - } + => SimdUtils.Shuffle4(this.source, this.destination, default); } // 2020-10-29 @@ -63,3 +61,38 @@ public class ShuffleByte4Channel // | Shuffle4Channel | 1. No HwIntrinsics | COMPlus_EnableHWIntrinsic=0,COMPlus_FeatureSIMD=0 | 2048 | 315.29 ns | 5.206 ns | 6.583 ns | 1.00 | 0.00 | - | - | - | - | // | Shuffle4Channel | 2. AVX | Empty | 2048 | 57.37 ns | 1.152 ns | 1.078 ns | 0.18 | 0.01 | - | - | - | - | // | Shuffle4Channel | 3. SSE | COMPlus_EnableAVX=0 | 2048 | 65.75 ns | 1.198 ns | 1.600 ns | 0.21 | 0.01 | - | - | - | - | + +// 2023-02-21 +// ########## +// +// BenchmarkDotNet=v0.13.0, OS=Windows 10.0.22621 +// 11th Gen Intel Core i7-11370H 3.30GHz, 1 CPU, 8 logical and 4 physical cores +// .NET SDK= 7.0.103 +// [Host] : .NET 6.0.14 (6.0.1423.7309), X64 RyuJIT +// 1. No HwIntrinsics : .NET 6.0.14 (6.0.1423.7309), X64 RyuJIT +// 2. SSE : .NET 6.0.14 (6.0.1423.7309), X64 RyuJIT +// 3. AVX : .NET 6.0.14 (6.0.1423.7309), X64 RyuJIT +// +// Runtime=.NET 6.0 +// +// | Method | Job | EnvironmentVariables | Count | Mean | Error | StdDev | Ratio | RatioSD | Gen 0 | Gen 1 | Gen 2 | Allocated | +// |---------------- |------------------- |-------------------------------------------------- |------ |----------:|---------:|---------:|------:|--------:|------:|------:|------:|----------:| +// | Shuffle4Channel | 1. No HwIntrinsics | COMPlus_EnableHWIntrinsic=0,COMPlus_FeatureSIMD=0 | 128 | 10.76 ns | 0.033 ns | 0.029 ns | 1.00 | 0.00 | - | - | - | - | +// | Shuffle4Channel | 2. SSE | COMPlus_EnableAVX=0 | 128 | 11.39 ns | 0.045 ns | 0.040 ns | 1.06 | 0.01 | - | - | - | - | +// | Shuffle4Channel | 3. AVX | Empty | 128 | 14.05 ns | 0.029 ns | 0.024 ns | 1.31 | 0.00 | - | - | - | - | +// | | | | | | | | | | | | | | +// | Shuffle4Channel | 1. No HwIntrinsics | COMPlus_EnableHWIntrinsic=0,COMPlus_FeatureSIMD=0 | 256 | 32.09 ns | 0.655 ns | 1.000 ns | 1.00 | 0.00 | - | - | - | - | +// | Shuffle4Channel | 2. SSE | COMPlus_EnableAVX=0 | 256 | 14.03 ns | 0.047 ns | 0.041 ns | 0.44 | 0.02 | - | - | - | - | +// | Shuffle4Channel | 3. AVX | Empty | 256 | 15.18 ns | 0.052 ns | 0.043 ns | 0.48 | 0.03 | - | - | - | - | +// | | | | | | | | | | | | | | +// | Shuffle4Channel | 1. No HwIntrinsics | COMPlus_EnableHWIntrinsic=0,COMPlus_FeatureSIMD=0 | 512 | 59.26 ns | 0.084 ns | 0.070 ns | 1.00 | 0.00 | - | - | - | - | +// | Shuffle4Channel | 2. SSE | COMPlus_EnableAVX=0 | 512 | 18.80 ns | 0.036 ns | 0.034 ns | 0.32 | 0.00 | - | - | - | - | +// | Shuffle4Channel | 3. AVX | Empty | 512 | 17.69 ns | 0.038 ns | 0.034 ns | 0.30 | 0.00 | - | - | - | - | +// | | | | | | | | | | | | | | +// | Shuffle4Channel | 1. No HwIntrinsics | COMPlus_EnableHWIntrinsic=0,COMPlus_FeatureSIMD=0 | 1024 | 112.48 ns | 0.285 ns | 0.253 ns | 1.00 | 0.00 | - | - | - | - | +// | Shuffle4Channel | 2. SSE | COMPlus_EnableAVX=0 | 1024 | 31.57 ns | 0.041 ns | 0.036 ns | 0.28 | 0.00 | - | - | - | - | +// | Shuffle4Channel | 3. AVX | Empty | 1024 | 28.41 ns | 0.068 ns | 0.064 ns | 0.25 | 0.00 | - | - | - | - | +// | | | | | | | | | | | | | | +// | Shuffle4Channel | 1. No HwIntrinsics | COMPlus_EnableHWIntrinsic=0,COMPlus_FeatureSIMD=0 | 2048 | 218.59 ns | 0.303 ns | 0.283 ns | 1.00 | 0.00 | - | - | - | - | +// | Shuffle4Channel | 2. SSE | COMPlus_EnableAVX=0 | 2048 | 53.04 ns | 0.106 ns | 0.099 ns | 0.24 | 0.00 | - | - | - | - | +// | Shuffle4Channel | 3. AVX | Empty | 2048 | 34.74 ns | 0.061 ns | 0.054 ns | 0.16 | 0.00 | - | - | - | - | diff --git a/tests/ImageSharp.Benchmarks/Color/Bulk/ShuffleFloat4Channel.cs b/tests/ImageSharp.Benchmarks/Color/Bulk/ShuffleFloat4Channel.cs index ec23364caa..1c338e1a6c 100644 --- a/tests/ImageSharp.Benchmarks/Color/Bulk/ShuffleFloat4Channel.cs +++ b/tests/ImageSharp.Benchmarks/Color/Bulk/ShuffleFloat4Channel.cs @@ -61,3 +61,38 @@ public class ShuffleFloat4Channel // | Shuffle4Channel | 1. No HwIntrinsics | COMPlus_EnableHWIntrinsic=0,COMPlus_FeatureSIMD=0 | 2048 | 980.134 ns | 3.7407 ns | 3.1237 ns | 1.00 | - | - | - | - | // | Shuffle4Channel | 2. AVX | Empty | 2048 | 105.120 ns | 0.6140 ns | 0.5443 ns | 0.11 | - | - | - | - | // | Shuffle4Channel | 3. SSE | COMPlus_EnableAVX=0 | 2048 | 216.473 ns | 2.3268 ns | 2.0627 ns | 0.22 | - | - | - | - | + +// 2023-02-21 +// ########## +// +// BenchmarkDotNet=v0.13.0, OS=Windows 10.0.22621 +// 11th Gen Intel Core i7-11370H 3.30GHz, 1 CPU, 8 logical and 4 physical cores +// .NET SDK= 7.0.103 +// [Host] : .NET 6.0.14 (6.0.1423.7309), X64 RyuJIT +// 1. No HwIntrinsics : .NET 6.0.14 (6.0.1423.7309), X64 RyuJIT +// 2. SSE : .NET 6.0.14 (6.0.1423.7309), X64 RyuJIT +// 3. AVX : .NET 6.0.14 (6.0.1423.7309), X64 RyuJIT +// +// Runtime=.NET 6.0 +// +// | Method | Job | EnvironmentVariables | Count | Mean | Error | StdDev | Ratio | Gen 0 | Gen 1 | Gen 2 | Allocated | +// |---------------- |------------------- |-------------------------------------------------- |------ |-----------:|----------:|----------:|------:|------:|------:|------:|----------:| +// | Shuffle4Channel | 1. No HwIntrinsics | COMPlus_EnableHWIntrinsic=0,COMPlus_FeatureSIMD=0 | 128 | 57.819 ns | 0.2360 ns | 0.1970 ns | 1.00 | - | - | - | - | +// | Shuffle4Channel | 2. SSE | COMPlus_EnableAVX=0 | 128 | 11.564 ns | 0.0234 ns | 0.0195 ns | 0.20 | - | - | - | - | +// | Shuffle4Channel | 3. AVX | Empty | 128 | 7.770 ns | 0.0696 ns | 0.0617 ns | 0.13 | - | - | - | - | +// | | | | | | | | | | | | | +// | Shuffle4Channel | 1. No HwIntrinsics | COMPlus_EnableHWIntrinsic=0,COMPlus_FeatureSIMD=0 | 256 | 105.282 ns | 0.2713 ns | 0.2405 ns | 1.00 | - | - | - | - | +// | Shuffle4Channel | 2. SSE | COMPlus_EnableAVX=0 | 256 | 19.867 ns | 0.0393 ns | 0.0348 ns | 0.19 | - | - | - | - | +// | Shuffle4Channel | 3. AVX | Empty | 256 | 17.586 ns | 0.0582 ns | 0.0544 ns | 0.17 | - | - | - | - | +// | | | | | | | | | | | | | +// | Shuffle4Channel | 1. No HwIntrinsics | COMPlus_EnableHWIntrinsic=0,COMPlus_FeatureSIMD=0 | 512 | 200.799 ns | 0.5678 ns | 0.5033 ns | 1.00 | - | - | - | - | +// | Shuffle4Channel | 2. SSE | COMPlus_EnableAVX=0 | 512 | 41.137 ns | 0.1524 ns | 0.1351 ns | 0.20 | - | - | - | - | +// | Shuffle4Channel | 3. AVX | Empty | 512 | 24.040 ns | 0.0445 ns | 0.0395 ns | 0.12 | - | - | - | - | +// | | | | | | | | | | | | | +// | Shuffle4Channel | 1. No HwIntrinsics | COMPlus_EnableHWIntrinsic=0,COMPlus_FeatureSIMD=0 | 1024 | 401.046 ns | 0.5865 ns | 0.5199 ns | 1.00 | - | - | - | - | +// | Shuffle4Channel | 2. SSE | COMPlus_EnableAVX=0 | 1024 | 94.904 ns | 0.4633 ns | 0.4334 ns | 0.24 | - | - | - | - | +// | Shuffle4Channel | 3. AVX | Empty | 1024 | 68.456 ns | 0.1192 ns | 0.0996 ns | 0.17 | - | - | - | - | +// | | | | | | | | | | | | | +// | Shuffle4Channel | 1. No HwIntrinsics | COMPlus_EnableHWIntrinsic=0,COMPlus_FeatureSIMD=0 | 2048 | 772.297 ns | 0.6270 ns | 0.5558 ns | 1.00 | - | - | - | - | +// | Shuffle4Channel | 2. SSE | COMPlus_EnableAVX=0 | 2048 | 184.561 ns | 0.4319 ns | 0.4040 ns | 0.24 | - | - | - | - | +// | Shuffle4Channel | 3. AVX | Empty | 2048 | 133.634 ns | 1.7864 ns | 1.8345 ns | 0.17 | - | - | - | - | diff --git a/tests/ImageSharp.Tests/Common/SimdUtilsTests.Shuffle.cs b/tests/ImageSharp.Tests/Common/SimdUtilsTests.Shuffle.cs index 5684c20e8b..960b0613e6 100644 --- a/tests/ImageSharp.Tests/Common/SimdUtilsTests.Shuffle.cs +++ b/tests/ImageSharp.Tests/Common/SimdUtilsTests.Shuffle.cs @@ -7,6 +7,271 @@ namespace SixLabors.ImageSharp.Tests.Common; public partial class SimdUtilsTests { + public static readonly TheoryData MMShuffleData = new() + { + { SimdUtils.Shuffle.MMShuffle(0, 0, 0, 0), SimdUtils.Shuffle.MMShuffle0000 }, + { SimdUtils.Shuffle.MMShuffle(0, 0, 0, 1), SimdUtils.Shuffle.MMShuffle0001 }, + { SimdUtils.Shuffle.MMShuffle(0, 0, 0, 2), SimdUtils.Shuffle.MMShuffle0002 }, + { SimdUtils.Shuffle.MMShuffle(0, 0, 0, 3), SimdUtils.Shuffle.MMShuffle0003 }, + { SimdUtils.Shuffle.MMShuffle(0, 0, 1, 0), SimdUtils.Shuffle.MMShuffle0010 }, + { SimdUtils.Shuffle.MMShuffle(0, 0, 1, 1), SimdUtils.Shuffle.MMShuffle0011 }, + { SimdUtils.Shuffle.MMShuffle(0, 0, 1, 2), SimdUtils.Shuffle.MMShuffle0012 }, + { SimdUtils.Shuffle.MMShuffle(0, 0, 1, 3), SimdUtils.Shuffle.MMShuffle0013 }, + { SimdUtils.Shuffle.MMShuffle(0, 0, 2, 0), SimdUtils.Shuffle.MMShuffle0020 }, + { SimdUtils.Shuffle.MMShuffle(0, 0, 2, 1), SimdUtils.Shuffle.MMShuffle0021 }, + { SimdUtils.Shuffle.MMShuffle(0, 0, 2, 2), SimdUtils.Shuffle.MMShuffle0022 }, + { SimdUtils.Shuffle.MMShuffle(0, 0, 2, 3), SimdUtils.Shuffle.MMShuffle0023 }, + { SimdUtils.Shuffle.MMShuffle(0, 0, 3, 0), SimdUtils.Shuffle.MMShuffle0030 }, + { SimdUtils.Shuffle.MMShuffle(0, 0, 3, 1), SimdUtils.Shuffle.MMShuffle0031 }, + { SimdUtils.Shuffle.MMShuffle(0, 0, 3, 2), SimdUtils.Shuffle.MMShuffle0032 }, + { SimdUtils.Shuffle.MMShuffle(0, 0, 3, 3), SimdUtils.Shuffle.MMShuffle0033 }, + { SimdUtils.Shuffle.MMShuffle(0, 1, 0, 0), SimdUtils.Shuffle.MMShuffle0100 }, + { SimdUtils.Shuffle.MMShuffle(0, 1, 0, 1), SimdUtils.Shuffle.MMShuffle0101 }, + { SimdUtils.Shuffle.MMShuffle(0, 1, 0, 2), SimdUtils.Shuffle.MMShuffle0102 }, + { SimdUtils.Shuffle.MMShuffle(0, 1, 0, 3), SimdUtils.Shuffle.MMShuffle0103 }, + { SimdUtils.Shuffle.MMShuffle(0, 1, 1, 0), SimdUtils.Shuffle.MMShuffle0110 }, + { SimdUtils.Shuffle.MMShuffle(0, 1, 1, 1), SimdUtils.Shuffle.MMShuffle0111 }, + { SimdUtils.Shuffle.MMShuffle(0, 1, 1, 2), SimdUtils.Shuffle.MMShuffle0112 }, + { SimdUtils.Shuffle.MMShuffle(0, 1, 1, 3), SimdUtils.Shuffle.MMShuffle0113 }, + { SimdUtils.Shuffle.MMShuffle(0, 1, 2, 0), SimdUtils.Shuffle.MMShuffle0120 }, + { SimdUtils.Shuffle.MMShuffle(0, 1, 2, 1), SimdUtils.Shuffle.MMShuffle0121 }, + { SimdUtils.Shuffle.MMShuffle(0, 1, 2, 2), SimdUtils.Shuffle.MMShuffle0122 }, + { SimdUtils.Shuffle.MMShuffle(0, 1, 2, 3), SimdUtils.Shuffle.MMShuffle0123 }, + { SimdUtils.Shuffle.MMShuffle(0, 1, 3, 0), SimdUtils.Shuffle.MMShuffle0130 }, + { SimdUtils.Shuffle.MMShuffle(0, 1, 3, 1), SimdUtils.Shuffle.MMShuffle0131 }, + { SimdUtils.Shuffle.MMShuffle(0, 1, 3, 2), SimdUtils.Shuffle.MMShuffle0132 }, + { SimdUtils.Shuffle.MMShuffle(0, 1, 3, 3), SimdUtils.Shuffle.MMShuffle0133 }, + { SimdUtils.Shuffle.MMShuffle(0, 2, 0, 0), SimdUtils.Shuffle.MMShuffle0200 }, + { SimdUtils.Shuffle.MMShuffle(0, 2, 0, 1), SimdUtils.Shuffle.MMShuffle0201 }, + { SimdUtils.Shuffle.MMShuffle(0, 2, 0, 2), SimdUtils.Shuffle.MMShuffle0202 }, + { SimdUtils.Shuffle.MMShuffle(0, 2, 0, 3), SimdUtils.Shuffle.MMShuffle0203 }, + { SimdUtils.Shuffle.MMShuffle(0, 2, 1, 0), SimdUtils.Shuffle.MMShuffle0210 }, + { SimdUtils.Shuffle.MMShuffle(0, 2, 1, 1), SimdUtils.Shuffle.MMShuffle0211 }, + { SimdUtils.Shuffle.MMShuffle(0, 2, 1, 2), SimdUtils.Shuffle.MMShuffle0212 }, + { SimdUtils.Shuffle.MMShuffle(0, 2, 1, 3), SimdUtils.Shuffle.MMShuffle0213 }, + { SimdUtils.Shuffle.MMShuffle(0, 2, 2, 0), SimdUtils.Shuffle.MMShuffle0220 }, + { SimdUtils.Shuffle.MMShuffle(0, 2, 2, 1), SimdUtils.Shuffle.MMShuffle0221 }, + { SimdUtils.Shuffle.MMShuffle(0, 2, 2, 2), SimdUtils.Shuffle.MMShuffle0222 }, + { SimdUtils.Shuffle.MMShuffle(0, 2, 2, 3), SimdUtils.Shuffle.MMShuffle0223 }, + { SimdUtils.Shuffle.MMShuffle(0, 2, 3, 0), SimdUtils.Shuffle.MMShuffle0230 }, + { SimdUtils.Shuffle.MMShuffle(0, 2, 3, 1), SimdUtils.Shuffle.MMShuffle0231 }, + { SimdUtils.Shuffle.MMShuffle(0, 2, 3, 2), SimdUtils.Shuffle.MMShuffle0232 }, + { SimdUtils.Shuffle.MMShuffle(0, 2, 3, 3), SimdUtils.Shuffle.MMShuffle0233 }, + { SimdUtils.Shuffle.MMShuffle(0, 3, 0, 0), SimdUtils.Shuffle.MMShuffle0300 }, + { SimdUtils.Shuffle.MMShuffle(0, 3, 0, 1), SimdUtils.Shuffle.MMShuffle0301 }, + { SimdUtils.Shuffle.MMShuffle(0, 3, 0, 2), SimdUtils.Shuffle.MMShuffle0302 }, + { SimdUtils.Shuffle.MMShuffle(0, 3, 0, 3), SimdUtils.Shuffle.MMShuffle0303 }, + { SimdUtils.Shuffle.MMShuffle(0, 3, 1, 0), SimdUtils.Shuffle.MMShuffle0310 }, + { SimdUtils.Shuffle.MMShuffle(0, 3, 1, 1), SimdUtils.Shuffle.MMShuffle0311 }, + { SimdUtils.Shuffle.MMShuffle(0, 3, 1, 2), SimdUtils.Shuffle.MMShuffle0312 }, + { SimdUtils.Shuffle.MMShuffle(0, 3, 1, 3), SimdUtils.Shuffle.MMShuffle0313 }, + { SimdUtils.Shuffle.MMShuffle(0, 3, 2, 0), SimdUtils.Shuffle.MMShuffle0320 }, + { SimdUtils.Shuffle.MMShuffle(0, 3, 2, 1), SimdUtils.Shuffle.MMShuffle0321 }, + { SimdUtils.Shuffle.MMShuffle(0, 3, 2, 2), SimdUtils.Shuffle.MMShuffle0322 }, + { SimdUtils.Shuffle.MMShuffle(0, 3, 2, 3), SimdUtils.Shuffle.MMShuffle0323 }, + { SimdUtils.Shuffle.MMShuffle(0, 3, 3, 0), SimdUtils.Shuffle.MMShuffle0330 }, + { SimdUtils.Shuffle.MMShuffle(0, 3, 3, 1), SimdUtils.Shuffle.MMShuffle0331 }, + { SimdUtils.Shuffle.MMShuffle(0, 3, 3, 2), SimdUtils.Shuffle.MMShuffle0332 }, + { SimdUtils.Shuffle.MMShuffle(0, 3, 3, 3), SimdUtils.Shuffle.MMShuffle0333 }, + { SimdUtils.Shuffle.MMShuffle(1, 0, 0, 0), SimdUtils.Shuffle.MMShuffle1000 }, + { SimdUtils.Shuffle.MMShuffle(1, 0, 0, 1), SimdUtils.Shuffle.MMShuffle1001 }, + { SimdUtils.Shuffle.MMShuffle(1, 0, 0, 2), SimdUtils.Shuffle.MMShuffle1002 }, + { SimdUtils.Shuffle.MMShuffle(1, 0, 0, 3), SimdUtils.Shuffle.MMShuffle1003 }, + { SimdUtils.Shuffle.MMShuffle(1, 0, 1, 0), SimdUtils.Shuffle.MMShuffle1010 }, + { SimdUtils.Shuffle.MMShuffle(1, 0, 1, 1), SimdUtils.Shuffle.MMShuffle1011 }, + { SimdUtils.Shuffle.MMShuffle(1, 0, 1, 2), SimdUtils.Shuffle.MMShuffle1012 }, + { SimdUtils.Shuffle.MMShuffle(1, 0, 1, 3), SimdUtils.Shuffle.MMShuffle1013 }, + { SimdUtils.Shuffle.MMShuffle(1, 0, 2, 0), SimdUtils.Shuffle.MMShuffle1020 }, + { SimdUtils.Shuffle.MMShuffle(1, 0, 2, 1), SimdUtils.Shuffle.MMShuffle1021 }, + { SimdUtils.Shuffle.MMShuffle(1, 0, 2, 2), SimdUtils.Shuffle.MMShuffle1022 }, + { SimdUtils.Shuffle.MMShuffle(1, 0, 2, 3), SimdUtils.Shuffle.MMShuffle1023 }, + { SimdUtils.Shuffle.MMShuffle(1, 0, 3, 0), SimdUtils.Shuffle.MMShuffle1030 }, + { SimdUtils.Shuffle.MMShuffle(1, 0, 3, 1), SimdUtils.Shuffle.MMShuffle1031 }, + { SimdUtils.Shuffle.MMShuffle(1, 0, 3, 2), SimdUtils.Shuffle.MMShuffle1032 }, + { SimdUtils.Shuffle.MMShuffle(1, 0, 3, 3), SimdUtils.Shuffle.MMShuffle1033 }, + { SimdUtils.Shuffle.MMShuffle(1, 1, 0, 0), SimdUtils.Shuffle.MMShuffle1100 }, + { SimdUtils.Shuffle.MMShuffle(1, 1, 0, 1), SimdUtils.Shuffle.MMShuffle1101 }, + { SimdUtils.Shuffle.MMShuffle(1, 1, 0, 2), SimdUtils.Shuffle.MMShuffle1102 }, + { SimdUtils.Shuffle.MMShuffle(1, 1, 0, 3), SimdUtils.Shuffle.MMShuffle1103 }, + { SimdUtils.Shuffle.MMShuffle(1, 1, 1, 0), SimdUtils.Shuffle.MMShuffle1110 }, + { SimdUtils.Shuffle.MMShuffle(1, 1, 1, 1), SimdUtils.Shuffle.MMShuffle1111 }, + { SimdUtils.Shuffle.MMShuffle(1, 1, 1, 2), SimdUtils.Shuffle.MMShuffle1112 }, + { SimdUtils.Shuffle.MMShuffle(1, 1, 1, 3), SimdUtils.Shuffle.MMShuffle1113 }, + { SimdUtils.Shuffle.MMShuffle(1, 1, 2, 0), SimdUtils.Shuffle.MMShuffle1120 }, + { SimdUtils.Shuffle.MMShuffle(1, 1, 2, 1), SimdUtils.Shuffle.MMShuffle1121 }, + { SimdUtils.Shuffle.MMShuffle(1, 1, 2, 2), SimdUtils.Shuffle.MMShuffle1122 }, + { SimdUtils.Shuffle.MMShuffle(1, 1, 2, 3), SimdUtils.Shuffle.MMShuffle1123 }, + { SimdUtils.Shuffle.MMShuffle(1, 1, 3, 0), SimdUtils.Shuffle.MMShuffle1130 }, + { SimdUtils.Shuffle.MMShuffle(1, 1, 3, 1), SimdUtils.Shuffle.MMShuffle1131 }, + { SimdUtils.Shuffle.MMShuffle(1, 1, 3, 2), SimdUtils.Shuffle.MMShuffle1132 }, + { SimdUtils.Shuffle.MMShuffle(1, 1, 3, 3), SimdUtils.Shuffle.MMShuffle1133 }, + { SimdUtils.Shuffle.MMShuffle(1, 2, 0, 0), SimdUtils.Shuffle.MMShuffle1200 }, + { SimdUtils.Shuffle.MMShuffle(1, 2, 0, 1), SimdUtils.Shuffle.MMShuffle1201 }, + { SimdUtils.Shuffle.MMShuffle(1, 2, 0, 2), SimdUtils.Shuffle.MMShuffle1202 }, + { SimdUtils.Shuffle.MMShuffle(1, 2, 0, 3), SimdUtils.Shuffle.MMShuffle1203 }, + { SimdUtils.Shuffle.MMShuffle(1, 2, 1, 0), SimdUtils.Shuffle.MMShuffle1210 }, + { SimdUtils.Shuffle.MMShuffle(1, 2, 1, 1), SimdUtils.Shuffle.MMShuffle1211 }, + { SimdUtils.Shuffle.MMShuffle(1, 2, 1, 2), SimdUtils.Shuffle.MMShuffle1212 }, + { SimdUtils.Shuffle.MMShuffle(1, 2, 1, 3), SimdUtils.Shuffle.MMShuffle1213 }, + { SimdUtils.Shuffle.MMShuffle(1, 2, 2, 0), SimdUtils.Shuffle.MMShuffle1220 }, + { SimdUtils.Shuffle.MMShuffle(1, 2, 2, 1), SimdUtils.Shuffle.MMShuffle1221 }, + { SimdUtils.Shuffle.MMShuffle(1, 2, 2, 2), SimdUtils.Shuffle.MMShuffle1222 }, + { SimdUtils.Shuffle.MMShuffle(1, 2, 2, 3), SimdUtils.Shuffle.MMShuffle1223 }, + { SimdUtils.Shuffle.MMShuffle(1, 2, 3, 0), SimdUtils.Shuffle.MMShuffle1230 }, + { SimdUtils.Shuffle.MMShuffle(1, 2, 3, 1), SimdUtils.Shuffle.MMShuffle1231 }, + { SimdUtils.Shuffle.MMShuffle(1, 2, 3, 2), SimdUtils.Shuffle.MMShuffle1232 }, + { SimdUtils.Shuffle.MMShuffle(1, 2, 3, 3), SimdUtils.Shuffle.MMShuffle1233 }, + { SimdUtils.Shuffle.MMShuffle(1, 3, 0, 0), SimdUtils.Shuffle.MMShuffle1300 }, + { SimdUtils.Shuffle.MMShuffle(1, 3, 0, 1), SimdUtils.Shuffle.MMShuffle1301 }, + { SimdUtils.Shuffle.MMShuffle(1, 3, 0, 2), SimdUtils.Shuffle.MMShuffle1302 }, + { SimdUtils.Shuffle.MMShuffle(1, 3, 0, 3), SimdUtils.Shuffle.MMShuffle1303 }, + { SimdUtils.Shuffle.MMShuffle(1, 3, 1, 0), SimdUtils.Shuffle.MMShuffle1310 }, + { SimdUtils.Shuffle.MMShuffle(1, 3, 1, 1), SimdUtils.Shuffle.MMShuffle1311 }, + { SimdUtils.Shuffle.MMShuffle(1, 3, 1, 2), SimdUtils.Shuffle.MMShuffle1312 }, + { SimdUtils.Shuffle.MMShuffle(1, 3, 1, 3), SimdUtils.Shuffle.MMShuffle1313 }, + { SimdUtils.Shuffle.MMShuffle(1, 3, 2, 0), SimdUtils.Shuffle.MMShuffle1320 }, + { SimdUtils.Shuffle.MMShuffle(1, 3, 2, 1), SimdUtils.Shuffle.MMShuffle1321 }, + { SimdUtils.Shuffle.MMShuffle(1, 3, 2, 2), SimdUtils.Shuffle.MMShuffle1322 }, + { SimdUtils.Shuffle.MMShuffle(1, 3, 2, 3), SimdUtils.Shuffle.MMShuffle1323 }, + { SimdUtils.Shuffle.MMShuffle(1, 3, 3, 0), SimdUtils.Shuffle.MMShuffle1330 }, + { SimdUtils.Shuffle.MMShuffle(1, 3, 3, 1), SimdUtils.Shuffle.MMShuffle1331 }, + { SimdUtils.Shuffle.MMShuffle(1, 3, 3, 2), SimdUtils.Shuffle.MMShuffle1332 }, + { SimdUtils.Shuffle.MMShuffle(1, 3, 3, 3), SimdUtils.Shuffle.MMShuffle1333 }, + { SimdUtils.Shuffle.MMShuffle(2, 0, 0, 0), SimdUtils.Shuffle.MMShuffle2000 }, + { SimdUtils.Shuffle.MMShuffle(2, 0, 0, 1), SimdUtils.Shuffle.MMShuffle2001 }, + { SimdUtils.Shuffle.MMShuffle(2, 0, 0, 2), SimdUtils.Shuffle.MMShuffle2002 }, + { SimdUtils.Shuffle.MMShuffle(2, 0, 0, 3), SimdUtils.Shuffle.MMShuffle2003 }, + { SimdUtils.Shuffle.MMShuffle(2, 0, 1, 0), SimdUtils.Shuffle.MMShuffle2010 }, + { SimdUtils.Shuffle.MMShuffle(2, 0, 1, 1), SimdUtils.Shuffle.MMShuffle2011 }, + { SimdUtils.Shuffle.MMShuffle(2, 0, 1, 2), SimdUtils.Shuffle.MMShuffle2012 }, + { SimdUtils.Shuffle.MMShuffle(2, 0, 1, 3), SimdUtils.Shuffle.MMShuffle2013 }, + { SimdUtils.Shuffle.MMShuffle(2, 0, 2, 0), SimdUtils.Shuffle.MMShuffle2020 }, + { SimdUtils.Shuffle.MMShuffle(2, 0, 2, 1), SimdUtils.Shuffle.MMShuffle2021 }, + { SimdUtils.Shuffle.MMShuffle(2, 0, 2, 2), SimdUtils.Shuffle.MMShuffle2022 }, + { SimdUtils.Shuffle.MMShuffle(2, 0, 2, 3), SimdUtils.Shuffle.MMShuffle2023 }, + { SimdUtils.Shuffle.MMShuffle(2, 0, 3, 0), SimdUtils.Shuffle.MMShuffle2030 }, + { SimdUtils.Shuffle.MMShuffle(2, 0, 3, 1), SimdUtils.Shuffle.MMShuffle2031 }, + { SimdUtils.Shuffle.MMShuffle(2, 0, 3, 2), SimdUtils.Shuffle.MMShuffle2032 }, + { SimdUtils.Shuffle.MMShuffle(2, 0, 3, 3), SimdUtils.Shuffle.MMShuffle2033 }, + { SimdUtils.Shuffle.MMShuffle(2, 1, 0, 0), SimdUtils.Shuffle.MMShuffle2100 }, + { SimdUtils.Shuffle.MMShuffle(2, 1, 0, 1), SimdUtils.Shuffle.MMShuffle2101 }, + { SimdUtils.Shuffle.MMShuffle(2, 1, 0, 2), SimdUtils.Shuffle.MMShuffle2102 }, + { SimdUtils.Shuffle.MMShuffle(2, 1, 0, 3), SimdUtils.Shuffle.MMShuffle2103 }, + { SimdUtils.Shuffle.MMShuffle(2, 1, 1, 0), SimdUtils.Shuffle.MMShuffle2110 }, + { SimdUtils.Shuffle.MMShuffle(2, 1, 1, 1), SimdUtils.Shuffle.MMShuffle2111 }, + { SimdUtils.Shuffle.MMShuffle(2, 1, 1, 2), SimdUtils.Shuffle.MMShuffle2112 }, + { SimdUtils.Shuffle.MMShuffle(2, 1, 1, 3), SimdUtils.Shuffle.MMShuffle2113 }, + { SimdUtils.Shuffle.MMShuffle(2, 1, 2, 0), SimdUtils.Shuffle.MMShuffle2120 }, + { SimdUtils.Shuffle.MMShuffle(2, 1, 2, 1), SimdUtils.Shuffle.MMShuffle2121 }, + { SimdUtils.Shuffle.MMShuffle(2, 1, 2, 2), SimdUtils.Shuffle.MMShuffle2122 }, + { SimdUtils.Shuffle.MMShuffle(2, 1, 2, 3), SimdUtils.Shuffle.MMShuffle2123 }, + { SimdUtils.Shuffle.MMShuffle(2, 1, 3, 0), SimdUtils.Shuffle.MMShuffle2130 }, + { SimdUtils.Shuffle.MMShuffle(2, 1, 3, 1), SimdUtils.Shuffle.MMShuffle2131 }, + { SimdUtils.Shuffle.MMShuffle(2, 1, 3, 2), SimdUtils.Shuffle.MMShuffle2132 }, + { SimdUtils.Shuffle.MMShuffle(2, 1, 3, 3), SimdUtils.Shuffle.MMShuffle2133 }, + { SimdUtils.Shuffle.MMShuffle(2, 2, 0, 0), SimdUtils.Shuffle.MMShuffle2200 }, + { SimdUtils.Shuffle.MMShuffle(2, 2, 0, 1), SimdUtils.Shuffle.MMShuffle2201 }, + { SimdUtils.Shuffle.MMShuffle(2, 2, 0, 2), SimdUtils.Shuffle.MMShuffle2202 }, + { SimdUtils.Shuffle.MMShuffle(2, 2, 0, 3), SimdUtils.Shuffle.MMShuffle2203 }, + { SimdUtils.Shuffle.MMShuffle(2, 2, 1, 0), SimdUtils.Shuffle.MMShuffle2210 }, + { SimdUtils.Shuffle.MMShuffle(2, 2, 1, 1), SimdUtils.Shuffle.MMShuffle2211 }, + { SimdUtils.Shuffle.MMShuffle(2, 2, 1, 2), SimdUtils.Shuffle.MMShuffle2212 }, + { SimdUtils.Shuffle.MMShuffle(2, 2, 1, 3), SimdUtils.Shuffle.MMShuffle2213 }, + { SimdUtils.Shuffle.MMShuffle(2, 2, 2, 0), SimdUtils.Shuffle.MMShuffle2220 }, + { SimdUtils.Shuffle.MMShuffle(2, 2, 2, 1), SimdUtils.Shuffle.MMShuffle2221 }, + { SimdUtils.Shuffle.MMShuffle(2, 2, 2, 2), SimdUtils.Shuffle.MMShuffle2222 }, + { SimdUtils.Shuffle.MMShuffle(2, 2, 2, 3), SimdUtils.Shuffle.MMShuffle2223 }, + { SimdUtils.Shuffle.MMShuffle(2, 2, 3, 0), SimdUtils.Shuffle.MMShuffle2230 }, + { SimdUtils.Shuffle.MMShuffle(2, 2, 3, 1), SimdUtils.Shuffle.MMShuffle2231 }, + { SimdUtils.Shuffle.MMShuffle(2, 2, 3, 2), SimdUtils.Shuffle.MMShuffle2232 }, + { SimdUtils.Shuffle.MMShuffle(2, 2, 3, 3), SimdUtils.Shuffle.MMShuffle2233 }, + { SimdUtils.Shuffle.MMShuffle(2, 3, 0, 0), SimdUtils.Shuffle.MMShuffle2300 }, + { SimdUtils.Shuffle.MMShuffle(2, 3, 0, 1), SimdUtils.Shuffle.MMShuffle2301 }, + { SimdUtils.Shuffle.MMShuffle(2, 3, 0, 2), SimdUtils.Shuffle.MMShuffle2302 }, + { SimdUtils.Shuffle.MMShuffle(2, 3, 0, 3), SimdUtils.Shuffle.MMShuffle2303 }, + { SimdUtils.Shuffle.MMShuffle(2, 3, 1, 0), SimdUtils.Shuffle.MMShuffle2310 }, + { SimdUtils.Shuffle.MMShuffle(2, 3, 1, 1), SimdUtils.Shuffle.MMShuffle2311 }, + { SimdUtils.Shuffle.MMShuffle(2, 3, 1, 2), SimdUtils.Shuffle.MMShuffle2312 }, + { SimdUtils.Shuffle.MMShuffle(2, 3, 1, 3), SimdUtils.Shuffle.MMShuffle2313 }, + { SimdUtils.Shuffle.MMShuffle(2, 3, 2, 0), SimdUtils.Shuffle.MMShuffle2320 }, + { SimdUtils.Shuffle.MMShuffle(2, 3, 2, 1), SimdUtils.Shuffle.MMShuffle2321 }, + { SimdUtils.Shuffle.MMShuffle(2, 3, 2, 2), SimdUtils.Shuffle.MMShuffle2322 }, + { SimdUtils.Shuffle.MMShuffle(2, 3, 2, 3), SimdUtils.Shuffle.MMShuffle2323 }, + { SimdUtils.Shuffle.MMShuffle(2, 3, 3, 0), SimdUtils.Shuffle.MMShuffle2330 }, + { SimdUtils.Shuffle.MMShuffle(2, 3, 3, 1), SimdUtils.Shuffle.MMShuffle2331 }, + { SimdUtils.Shuffle.MMShuffle(2, 3, 3, 2), SimdUtils.Shuffle.MMShuffle2332 }, + { SimdUtils.Shuffle.MMShuffle(2, 3, 3, 3), SimdUtils.Shuffle.MMShuffle2333 }, + { SimdUtils.Shuffle.MMShuffle(3, 0, 0, 0), SimdUtils.Shuffle.MMShuffle3000 }, + { SimdUtils.Shuffle.MMShuffle(3, 0, 0, 1), SimdUtils.Shuffle.MMShuffle3001 }, + { SimdUtils.Shuffle.MMShuffle(3, 0, 0, 2), SimdUtils.Shuffle.MMShuffle3002 }, + { SimdUtils.Shuffle.MMShuffle(3, 0, 0, 3), SimdUtils.Shuffle.MMShuffle3003 }, + { SimdUtils.Shuffle.MMShuffle(3, 0, 1, 0), SimdUtils.Shuffle.MMShuffle3010 }, + { SimdUtils.Shuffle.MMShuffle(3, 0, 1, 1), SimdUtils.Shuffle.MMShuffle3011 }, + { SimdUtils.Shuffle.MMShuffle(3, 0, 1, 2), SimdUtils.Shuffle.MMShuffle3012 }, + { SimdUtils.Shuffle.MMShuffle(3, 0, 1, 3), SimdUtils.Shuffle.MMShuffle3013 }, + { SimdUtils.Shuffle.MMShuffle(3, 0, 2, 0), SimdUtils.Shuffle.MMShuffle3020 }, + { SimdUtils.Shuffle.MMShuffle(3, 0, 2, 1), SimdUtils.Shuffle.MMShuffle3021 }, + { SimdUtils.Shuffle.MMShuffle(3, 0, 2, 2), SimdUtils.Shuffle.MMShuffle3022 }, + { SimdUtils.Shuffle.MMShuffle(3, 0, 2, 3), SimdUtils.Shuffle.MMShuffle3023 }, + { SimdUtils.Shuffle.MMShuffle(3, 0, 3, 0), SimdUtils.Shuffle.MMShuffle3030 }, + { SimdUtils.Shuffle.MMShuffle(3, 0, 3, 1), SimdUtils.Shuffle.MMShuffle3031 }, + { SimdUtils.Shuffle.MMShuffle(3, 0, 3, 2), SimdUtils.Shuffle.MMShuffle3032 }, + { SimdUtils.Shuffle.MMShuffle(3, 0, 3, 3), SimdUtils.Shuffle.MMShuffle3033 }, + { SimdUtils.Shuffle.MMShuffle(3, 1, 0, 0), SimdUtils.Shuffle.MMShuffle3100 }, + { SimdUtils.Shuffle.MMShuffle(3, 1, 0, 1), SimdUtils.Shuffle.MMShuffle3101 }, + { SimdUtils.Shuffle.MMShuffle(3, 1, 0, 2), SimdUtils.Shuffle.MMShuffle3102 }, + { SimdUtils.Shuffle.MMShuffle(3, 1, 0, 3), SimdUtils.Shuffle.MMShuffle3103 }, + { SimdUtils.Shuffle.MMShuffle(3, 1, 1, 0), SimdUtils.Shuffle.MMShuffle3110 }, + { SimdUtils.Shuffle.MMShuffle(3, 1, 1, 1), SimdUtils.Shuffle.MMShuffle3111 }, + { SimdUtils.Shuffle.MMShuffle(3, 1, 1, 2), SimdUtils.Shuffle.MMShuffle3112 }, + { SimdUtils.Shuffle.MMShuffle(3, 1, 1, 3), SimdUtils.Shuffle.MMShuffle3113 }, + { SimdUtils.Shuffle.MMShuffle(3, 1, 2, 0), SimdUtils.Shuffle.MMShuffle3120 }, + { SimdUtils.Shuffle.MMShuffle(3, 1, 2, 1), SimdUtils.Shuffle.MMShuffle3121 }, + { SimdUtils.Shuffle.MMShuffle(3, 1, 2, 2), SimdUtils.Shuffle.MMShuffle3122 }, + { SimdUtils.Shuffle.MMShuffle(3, 1, 2, 3), SimdUtils.Shuffle.MMShuffle3123 }, + { SimdUtils.Shuffle.MMShuffle(3, 1, 3, 0), SimdUtils.Shuffle.MMShuffle3130 }, + { SimdUtils.Shuffle.MMShuffle(3, 1, 3, 1), SimdUtils.Shuffle.MMShuffle3131 }, + { SimdUtils.Shuffle.MMShuffle(3, 1, 3, 2), SimdUtils.Shuffle.MMShuffle3132 }, + { SimdUtils.Shuffle.MMShuffle(3, 1, 3, 3), SimdUtils.Shuffle.MMShuffle3133 }, + { SimdUtils.Shuffle.MMShuffle(3, 2, 0, 0), SimdUtils.Shuffle.MMShuffle3200 }, + { SimdUtils.Shuffle.MMShuffle(3, 2, 0, 1), SimdUtils.Shuffle.MMShuffle3201 }, + { SimdUtils.Shuffle.MMShuffle(3, 2, 0, 2), SimdUtils.Shuffle.MMShuffle3202 }, + { SimdUtils.Shuffle.MMShuffle(3, 2, 0, 3), SimdUtils.Shuffle.MMShuffle3203 }, + { SimdUtils.Shuffle.MMShuffle(3, 2, 1, 0), SimdUtils.Shuffle.MMShuffle3210 }, + { SimdUtils.Shuffle.MMShuffle(3, 2, 1, 1), SimdUtils.Shuffle.MMShuffle3211 }, + { SimdUtils.Shuffle.MMShuffle(3, 2, 1, 2), SimdUtils.Shuffle.MMShuffle3212 }, + { SimdUtils.Shuffle.MMShuffle(3, 2, 1, 3), SimdUtils.Shuffle.MMShuffle3213 }, + { SimdUtils.Shuffle.MMShuffle(3, 2, 2, 0), SimdUtils.Shuffle.MMShuffle3220 }, + { SimdUtils.Shuffle.MMShuffle(3, 2, 2, 1), SimdUtils.Shuffle.MMShuffle3221 }, + { SimdUtils.Shuffle.MMShuffle(3, 2, 2, 2), SimdUtils.Shuffle.MMShuffle3222 }, + { SimdUtils.Shuffle.MMShuffle(3, 2, 2, 3), SimdUtils.Shuffle.MMShuffle3223 }, + { SimdUtils.Shuffle.MMShuffle(3, 2, 3, 0), SimdUtils.Shuffle.MMShuffle3230 }, + { SimdUtils.Shuffle.MMShuffle(3, 2, 3, 1), SimdUtils.Shuffle.MMShuffle3231 }, + { SimdUtils.Shuffle.MMShuffle(3, 2, 3, 2), SimdUtils.Shuffle.MMShuffle3232 }, + { SimdUtils.Shuffle.MMShuffle(3, 2, 3, 3), SimdUtils.Shuffle.MMShuffle3233 }, + { SimdUtils.Shuffle.MMShuffle(3, 3, 0, 0), SimdUtils.Shuffle.MMShuffle3300 }, + { SimdUtils.Shuffle.MMShuffle(3, 3, 0, 1), SimdUtils.Shuffle.MMShuffle3301 }, + { SimdUtils.Shuffle.MMShuffle(3, 3, 0, 2), SimdUtils.Shuffle.MMShuffle3302 }, + { SimdUtils.Shuffle.MMShuffle(3, 3, 0, 3), SimdUtils.Shuffle.MMShuffle3303 }, + { SimdUtils.Shuffle.MMShuffle(3, 3, 1, 0), SimdUtils.Shuffle.MMShuffle3310 }, + { SimdUtils.Shuffle.MMShuffle(3, 3, 1, 1), SimdUtils.Shuffle.MMShuffle3311 }, + { SimdUtils.Shuffle.MMShuffle(3, 3, 1, 2), SimdUtils.Shuffle.MMShuffle3312 }, + { SimdUtils.Shuffle.MMShuffle(3, 3, 1, 3), SimdUtils.Shuffle.MMShuffle3313 }, + { SimdUtils.Shuffle.MMShuffle(3, 3, 2, 0), SimdUtils.Shuffle.MMShuffle3320 }, + { SimdUtils.Shuffle.MMShuffle(3, 3, 2, 1), SimdUtils.Shuffle.MMShuffle3321 }, + { SimdUtils.Shuffle.MMShuffle(3, 3, 2, 2), SimdUtils.Shuffle.MMShuffle3322 }, + { SimdUtils.Shuffle.MMShuffle(3, 3, 2, 3), SimdUtils.Shuffle.MMShuffle3323 }, + { SimdUtils.Shuffle.MMShuffle(3, 3, 3, 0), SimdUtils.Shuffle.MMShuffle3330 }, + { SimdUtils.Shuffle.MMShuffle(3, 3, 3, 1), SimdUtils.Shuffle.MMShuffle3331 }, + { SimdUtils.Shuffle.MMShuffle(3, 3, 3, 2), SimdUtils.Shuffle.MMShuffle3332 }, + { SimdUtils.Shuffle.MMShuffle(3, 3, 3, 3), SimdUtils.Shuffle.MMShuffle3333 } + }; + + [Theory] + [MemberData(nameof(MMShuffleData))] + public void MMShuffleConstantsAreCorrect(byte expected, byte actual) + => Assert.Equal(expected, actual); + [Theory] [MemberData(nameof(ArraySizesDivisibleBy4))] public void BulkShuffleFloat4Channel(int count) @@ -226,7 +491,7 @@ public partial class SimdUtilsTests float[] expected = new float[count]; - SimdUtils.Shuffle.InverseMmShuffle( + SimdUtils.Shuffle.InverseMMShuffle( control, out int p3, out int p2, @@ -257,7 +522,7 @@ public partial class SimdUtilsTests byte[] expected = new byte[count]; - SimdUtils.Shuffle.InverseMmShuffle( + SimdUtils.Shuffle.InverseMMShuffle( control, out int p3, out int p2, @@ -288,7 +553,7 @@ public partial class SimdUtilsTests byte[] expected = new byte[count]; - SimdUtils.Shuffle.InverseMmShuffle( + SimdUtils.Shuffle.InverseMMShuffle( control, out int _, out int p2, @@ -319,7 +584,7 @@ public partial class SimdUtilsTests byte[] expected = new byte[result.Length]; - SimdUtils.Shuffle.InverseMmShuffle( + SimdUtils.Shuffle.InverseMMShuffle( control, out int p3, out int p2, @@ -370,7 +635,7 @@ public partial class SimdUtilsTests byte[] expected = new byte[result.Length]; - SimdUtils.Shuffle.InverseMmShuffle( + SimdUtils.Shuffle.InverseMMShuffle( control, out int _, out int p2, From d9f72786ad8576277fdc595e8b86dce8bce701ae Mon Sep 17 00:00:00 2001 From: Stefan Nikolei Date: Tue, 21 Feb 2023 14:52:55 +0100 Subject: [PATCH 30/35] #2231 First steps for removing nullable disable in webp --- src/ImageSharp/Formats/Webp/AlphaDecoder.cs | 5 +- src/ImageSharp/Formats/Webp/AlphaEncoder.cs | 3 +- .../Formats/Webp/BitReader/BitReaderBase.cs | 5 +- .../Formats/Webp/BitReader/Vp8BitReader.cs | 4 +- .../Formats/Webp/BitReader/Vp8LBitReader.cs | 2 +- .../Formats/Webp/BitWriter/BitWriterBase.cs | 4 +- .../Formats/Webp/BitWriter/Vp8BitWriter.cs | 38 ++-- .../Formats/Webp/BitWriter/Vp8LBitWriter.cs | 13 +- .../Webp/Lossless/BackwardReferenceEncoder.cs | 47 +++-- .../Formats/Webp/Lossless/ColorCache.cs | 26 +-- .../Webp/Lossless/WebpLosslessDecoder.cs | 3 +- .../Formats/Webp/WebpAnimationDecoder.cs | 25 ++- .../Formats/Webp/WebpChunkParsingUtils.cs | 1 - .../Formats/Webp/WebpDecoderCore.cs | 168 +++++++++--------- .../Formats/Webp/WebpEncoderCore.cs | 3 +- src/ImageSharp/Formats/Webp/WebpImageInfo.cs | 9 +- .../Formats/Webp/WebpThrowHelper.cs | 6 + 17 files changed, 178 insertions(+), 184 deletions(-) diff --git a/src/ImageSharp/Formats/Webp/AlphaDecoder.cs b/src/ImageSharp/Formats/Webp/AlphaDecoder.cs index d45fa2a427..4a7b505363 100644 --- a/src/ImageSharp/Formats/Webp/AlphaDecoder.cs +++ b/src/ImageSharp/Formats/Webp/AlphaDecoder.cs @@ -1,8 +1,8 @@ // Copyright (c) Six Labors. // Licensed under the Six Labors Split License. -#nullable disable using System.Buffers; +using System.Diagnostics.CodeAnalysis; using System.Runtime.CompilerServices; using System.Runtime.InteropServices; using System.Runtime.Intrinsics; @@ -110,6 +110,7 @@ internal class AlphaDecoder : IDisposable /// /// Gets a value indicating whether the alpha channel uses compression. /// + [MemberNotNullWhen(true, nameof(LosslessDecoder))] private bool Compressed { get; } /// @@ -120,7 +121,7 @@ internal class AlphaDecoder : IDisposable /// /// Gets the Vp8L decoder which is used to de compress the alpha channel, if needed. /// - private WebpLosslessDecoder LosslessDecoder { get; } + private WebpLosslessDecoder? LosslessDecoder { get; } /// /// Gets a value indicating whether the decoding needs 1 byte per pixel for decoding. diff --git a/src/ImageSharp/Formats/Webp/AlphaEncoder.cs b/src/ImageSharp/Formats/Webp/AlphaEncoder.cs index 292c31f6a5..dd21365bc3 100644 --- a/src/ImageSharp/Formats/Webp/AlphaEncoder.cs +++ b/src/ImageSharp/Formats/Webp/AlphaEncoder.cs @@ -1,6 +1,5 @@ // Copyright (c) Six Labors. // Licensed under the Six Labors Split License. -#nullable disable using System.Buffers; using SixLabors.ImageSharp.Advanced; @@ -15,7 +14,7 @@ namespace SixLabors.ImageSharp.Formats.Webp; /// internal class AlphaEncoder : IDisposable { - private IMemoryOwner alphaData; + private IMemoryOwner? alphaData; /// /// Encodes the alpha channel data. diff --git a/src/ImageSharp/Formats/Webp/BitReader/BitReaderBase.cs b/src/ImageSharp/Formats/Webp/BitReader/BitReaderBase.cs index 2586690fd3..fa121be4f5 100644 --- a/src/ImageSharp/Formats/Webp/BitReader/BitReaderBase.cs +++ b/src/ImageSharp/Formats/Webp/BitReader/BitReaderBase.cs @@ -1,8 +1,8 @@ // Copyright (c) Six Labors. // Licensed under the Six Labors Split License. -#nullable disable using System.Buffers; +using System.Diagnostics.CodeAnalysis; using SixLabors.ImageSharp.Memory; namespace SixLabors.ImageSharp.Formats.Webp.BitReader; @@ -17,7 +17,7 @@ internal abstract class BitReaderBase : IDisposable /// /// Gets or sets the raw encoded image data. /// - public IMemoryOwner Data { get; set; } + public IMemoryOwner? Data { get; set; } /// /// Copies the raw encoded image data from the stream into a byte array. @@ -25,6 +25,7 @@ internal abstract class BitReaderBase : IDisposable /// The input stream. /// Number of bytes to read as indicated from the chunk size. /// Used for allocating memory during reading data from the stream. + [MemberNotNull(nameof(Data))] protected void ReadImageDataFromStream(Stream input, int bytesToRead, MemoryAllocator memoryAllocator) { this.Data = memoryAllocator.Allocate(bytesToRead); diff --git a/src/ImageSharp/Formats/Webp/BitReader/Vp8BitReader.cs b/src/ImageSharp/Formats/Webp/BitReader/Vp8BitReader.cs index 07bfcccd91..1fa4deda97 100644 --- a/src/ImageSharp/Formats/Webp/BitReader/Vp8BitReader.cs +++ b/src/ImageSharp/Formats/Webp/BitReader/Vp8BitReader.cs @@ -186,7 +186,7 @@ internal class Vp8BitReader : BitReaderBase { if (this.pos < this.bufferMax) { - ulong inBits = BinaryPrimitives.ReadUInt64LittleEndian(this.Data.Memory.Span.Slice((int)this.pos, 8)); + ulong inBits = BinaryPrimitives.ReadUInt64LittleEndian(this.Data!.Memory.Span.Slice((int)this.pos, 8)); this.pos += BitsCount >> 3; ulong bits = ByteSwap64(inBits); bits >>= 64 - BitsCount; @@ -205,7 +205,7 @@ internal class Vp8BitReader : BitReaderBase if (this.pos < this.bufferEnd) { this.bits += 8; - this.value = this.Data.Memory.Span[(int)this.pos++] | (this.value << 8); + this.value = this.Data!.Memory.Span[(int)this.pos++] | (this.value << 8); } else if (!this.eof) { diff --git a/src/ImageSharp/Formats/Webp/BitReader/Vp8LBitReader.cs b/src/ImageSharp/Formats/Webp/BitReader/Vp8LBitReader.cs index 057abf134a..df95f01f4a 100644 --- a/src/ImageSharp/Formats/Webp/BitReader/Vp8LBitReader.cs +++ b/src/ImageSharp/Formats/Webp/BitReader/Vp8LBitReader.cs @@ -193,7 +193,7 @@ internal class Vp8LBitReader : BitReaderBase [MethodImpl(InliningOptions.ShortMethod)] private void ShiftBytes() { - System.Span dataSpan = this.Data.Memory.Span; + System.Span dataSpan = this.Data!.Memory.Span; while (this.bitPos >= 8 && this.pos < this.len) { this.value >>= 8; diff --git a/src/ImageSharp/Formats/Webp/BitWriter/BitWriterBase.cs b/src/ImageSharp/Formats/Webp/BitWriter/BitWriterBase.cs index 2df02727e0..02b1d0ab6a 100644 --- a/src/ImageSharp/Formats/Webp/BitWriter/BitWriterBase.cs +++ b/src/ImageSharp/Formats/Webp/BitWriter/BitWriterBase.cs @@ -123,7 +123,7 @@ internal abstract class BitWriterBase /// The stream to write to. /// The metadata profile's bytes. /// The chuck type to write. - protected void WriteMetadataProfile(Stream stream, byte[] metadataBytes, WebpChunkType chunkType) + protected void WriteMetadataProfile(Stream stream, byte[]? metadataBytes, WebpChunkType chunkType) { DebugGuard.NotNull(metadataBytes, nameof(metadataBytes)); @@ -207,7 +207,7 @@ internal abstract class BitWriterBase /// The width of the image. /// The height of the image. /// Flag indicating, if a alpha channel is present. - protected void WriteVp8XHeader(Stream stream, ExifProfile exifProfile, XmpProfile xmpProfile, byte[] iccProfileBytes, uint width, uint height, bool hasAlpha) + protected void WriteVp8XHeader(Stream stream, ExifProfile? exifProfile, XmpProfile? xmpProfile, byte[]? iccProfileBytes, uint width, uint height, bool hasAlpha) { if (width > MaxDimension || height > MaxDimension) { diff --git a/src/ImageSharp/Formats/Webp/BitWriter/Vp8BitWriter.cs b/src/ImageSharp/Formats/Webp/BitWriter/Vp8BitWriter.cs index 6fa02c1161..b83b44fa14 100644 --- a/src/ImageSharp/Formats/Webp/BitWriter/Vp8BitWriter.cs +++ b/src/ImageSharp/Formats/Webp/BitWriter/Vp8BitWriter.cs @@ -1,6 +1,5 @@ // Copyright (c) Six Labors. // Licensed under the Six Labors Split License. -#nullable disable using System.Buffers.Binary; using SixLabors.ImageSharp.Formats.Webp.Lossy; @@ -58,7 +57,8 @@ internal class Vp8BitWriter : BitWriterBase /// Initializes a new instance of the class. /// /// The expected size in bytes. - public Vp8BitWriter(int expectedSize) + /// The Vp8Encoder. + public Vp8BitWriter(int expectedSize, Vp8Encoder enc) : base(expectedSize) { this.range = 255 - 1; @@ -67,15 +67,9 @@ internal class Vp8BitWriter : BitWriterBase this.nbBits = -8; this.pos = 0; this.maxPos = 0; - } - /// - /// Initializes a new instance of the class. - /// - /// The expected size in bytes. - /// The Vp8Encoder. - public Vp8BitWriter(int expectedSize, Vp8Encoder enc) - : this(expectedSize) => this.enc = enc; + this.enc = enc; + } /// public override int NumBytes() => (int)this.pos; @@ -414,9 +408,9 @@ internal class Vp8BitWriter : BitWriterBase /// Indicates, if the alpha data is compressed. public void WriteEncodedImageToStream( Stream stream, - ExifProfile exifProfile, - XmpProfile xmpProfile, - IccProfile iccProfile, + ExifProfile? exifProfile, + XmpProfile? xmpProfile, + IccProfile? iccProfile, uint width, uint height, bool hasAlpha, @@ -424,22 +418,22 @@ internal class Vp8BitWriter : BitWriterBase bool alphaDataIsCompressed) { bool isVp8X = false; - byte[] exifBytes = null; - byte[] xmpBytes = null; - byte[] iccProfileBytes = null; + byte[]? exifBytes = null; + byte[]? xmpBytes = null; + byte[]? iccProfileBytes = null; uint riffSize = 0; if (exifProfile != null) { isVp8X = true; exifBytes = exifProfile.ToByteArray(); - riffSize += MetadataChunkSize(exifBytes); + riffSize += MetadataChunkSize(exifBytes!); } if (xmpProfile != null) { isVp8X = true; xmpBytes = xmpProfile.Data; - riffSize += MetadataChunkSize(xmpBytes); + riffSize += MetadataChunkSize(xmpBytes!); } if (iccProfile != null) @@ -465,7 +459,7 @@ internal class Vp8BitWriter : BitWriterBase int mbSize = this.enc.Mbw * this.enc.Mbh; int expectedSize = mbSize * 7 / 8; - var bitWriterPartZero = new Vp8BitWriter(expectedSize); + Vp8BitWriter bitWriterPartZero = new(expectedSize, this.enc); // Partition #0 with header and partition sizes. uint size0 = this.GeneratePartition0(bitWriterPartZero); @@ -676,9 +670,9 @@ internal class Vp8BitWriter : BitWriterBase bool isVp8X, uint width, uint height, - ExifProfile exifProfile, - XmpProfile xmpProfile, - byte[] iccProfileBytes, + ExifProfile? exifProfile, + XmpProfile? xmpProfile, + byte[]? iccProfileBytes, bool hasAlpha, Span alphaData, bool alphaDataIsCompressed) diff --git a/src/ImageSharp/Formats/Webp/BitWriter/Vp8LBitWriter.cs b/src/ImageSharp/Formats/Webp/BitWriter/Vp8LBitWriter.cs index 42c1af8040..22bc195d64 100644 --- a/src/ImageSharp/Formats/Webp/BitWriter/Vp8LBitWriter.cs +++ b/src/ImageSharp/Formats/Webp/BitWriter/Vp8LBitWriter.cs @@ -1,6 +1,5 @@ // Copyright (c) Six Labors. // Licensed under the Six Labors Split License. -#nullable disable using System.Buffers.Binary; using SixLabors.ImageSharp.Formats.Webp.Lossless; @@ -138,25 +137,25 @@ internal class Vp8LBitWriter : BitWriterBase /// The width of the image. /// The height of the image. /// Flag indicating, if a alpha channel is present. - public void WriteEncodedImageToStream(Stream stream, ExifProfile exifProfile, XmpProfile xmpProfile, IccProfile iccProfile, uint width, uint height, bool hasAlpha) + public void WriteEncodedImageToStream(Stream stream, ExifProfile? exifProfile, XmpProfile? xmpProfile, IccProfile? iccProfile, uint width, uint height, bool hasAlpha) { bool isVp8X = false; - byte[] exifBytes = null; - byte[] xmpBytes = null; - byte[] iccBytes = null; + byte[]? exifBytes = null; + byte[]? xmpBytes = null; + byte[]? iccBytes = null; uint riffSize = 0; if (exifProfile != null) { isVp8X = true; exifBytes = exifProfile.ToByteArray(); - riffSize += MetadataChunkSize(exifBytes); + riffSize += MetadataChunkSize(exifBytes!); } if (xmpProfile != null) { isVp8X = true; xmpBytes = xmpProfile.Data; - riffSize += MetadataChunkSize(xmpBytes); + riffSize += MetadataChunkSize(xmpBytes!); } if (iccProfile != null) diff --git a/src/ImageSharp/Formats/Webp/Lossless/BackwardReferenceEncoder.cs b/src/ImageSharp/Formats/Webp/Lossless/BackwardReferenceEncoder.cs index b3589b52c7..df19c26e0b 100644 --- a/src/ImageSharp/Formats/Webp/Lossless/BackwardReferenceEncoder.cs +++ b/src/ImageSharp/Formats/Webp/Lossless/BackwardReferenceEncoder.cs @@ -1,6 +1,5 @@ // Copyright (c) Six Labors. // Licensed under the Six Labors Split License. -#nullable disable using System.Buffers; using SixLabors.ImageSharp.Memory; @@ -50,7 +49,7 @@ internal static class BackwardReferenceEncoder int lz77TypeBest = 0; double bitCostBest = -1; int cacheBitsInitial = cacheBits; - Vp8LHashChain hashChainBox = null; + Vp8LHashChain? hashChainBox = null; var stats = new Vp8LStreaks(); var bitsEntropy = new Vp8LBitEntropy(); for (int lz77Type = 1; lz77TypesToTry > 0; lz77TypesToTry &= ~lz77Type, lz77Type <<= 1) @@ -101,7 +100,7 @@ internal static class BackwardReferenceEncoder // Improve on simple LZ77 but only for high quality (TraceBackwards is costly). if ((lz77TypeBest == (int)Vp8LLz77Type.Lz77Standard || lz77TypeBest == (int)Vp8LLz77Type.Lz77Box) && quality >= 25) { - Vp8LHashChain hashChainTmp = lz77TypeBest == (int)Vp8LLz77Type.Lz77Standard ? hashChain : hashChainBox; + Vp8LHashChain hashChainTmp = lz77TypeBest == (int)Vp8LLz77Type.Lz77Standard ? hashChain : hashChainBox!; BackwardReferencesTraceBackwards(width, height, memoryAllocator, bgra, cacheBits, hashChainTmp, best, worst); var histo = new Vp8LHistogram(worst, cacheBits); double bitCostTrace = histo.EstimateBits(stats, bitsEntropy); @@ -140,8 +139,7 @@ internal static class BackwardReferenceEncoder for (int i = 0; i <= WebpConstants.MaxColorCacheBits; i++) { histos[i] = new Vp8LHistogram(paletteCodeBits: i); - colorCache[i] = new ColorCache(); - colorCache[i].Init(i); + colorCache[i] = new ColorCache(i); } // Find the cacheBits giving the lowest entropy. @@ -274,11 +272,11 @@ internal static class BackwardReferenceEncoder double offsetCost = -1; int firstOffsetIsConstant = -1; // initialized with 'impossible' value. int reach = 0; - var colorCache = new ColorCache(); + ColorCache? colorCache = null; if (useColorCache) { - colorCache.Init(cacheBits); + colorCache = new ColorCache(cacheBits); } costModel.Build(xSize, cacheBits, refs); @@ -375,12 +373,12 @@ internal static class BackwardReferenceEncoder private static void BackwardReferencesHashChainFollowChosenPath(ReadOnlySpan bgra, int cacheBits, Span chosenPath, int chosenPathSize, Vp8LHashChain hashChain, Vp8LBackwardRefs backwardRefs) { bool useColorCache = cacheBits > 0; - var colorCache = new ColorCache(); + ColorCache? colorCache = null; int i = 0; if (useColorCache) { - colorCache.Init(cacheBits); + colorCache = new ColorCache(cacheBits); } backwardRefs.Refs.Clear(); @@ -396,7 +394,7 @@ internal static class BackwardReferenceEncoder { for (int k = 0; k < len; k++) { - colorCache.Insert(bgra[i + k]); + colorCache!.Insert(bgra[i + k]); } } @@ -405,7 +403,7 @@ internal static class BackwardReferenceEncoder else { PixOrCopy v; - int idx = useColorCache ? colorCache.Contains(bgra[i]) : -1; + int idx = useColorCache ? colorCache!.Contains(bgra[i]) : -1; if (idx >= 0) { // useColorCache is true and color cache contains bgra[i] @@ -416,7 +414,7 @@ internal static class BackwardReferenceEncoder { if (useColorCache) { - colorCache.Insert(bgra[i]); + colorCache!.Insert(bgra[i]); } v = PixOrCopy.CreateLiteral(bgra[i]); @@ -430,7 +428,7 @@ internal static class BackwardReferenceEncoder private static void AddSingleLiteralWithCostModel( ReadOnlySpan bgra, - ColorCache colorCache, + ColorCache? colorCache, CostModel costModel, int idx, bool useColorCache, @@ -440,7 +438,7 @@ internal static class BackwardReferenceEncoder { double costVal = prevCost; uint color = bgra[idx]; - int ix = useColorCache ? colorCache.Contains(color) : -1; + int ix = useColorCache ? colorCache!.Contains(color) : -1; if (ix >= 0) { double mul0 = 0.68; @@ -451,7 +449,7 @@ internal static class BackwardReferenceEncoder double mul1 = 0.82; if (useColorCache) { - colorCache.Insert(color); + colorCache!.Insert(color); } costVal += costModel.GetLiteralCost(color) * mul1; @@ -469,10 +467,10 @@ internal static class BackwardReferenceEncoder int iLastCheck = -1; bool useColorCache = cacheBits > 0; int pixCount = xSize * ySize; - var colorCache = new ColorCache(); + ColorCache? colorCache = null; if (useColorCache) { - colorCache.Init(cacheBits); + colorCache = new ColorCache(cacheBits); } refs.Refs.Clear(); @@ -529,7 +527,7 @@ internal static class BackwardReferenceEncoder { for (j = i; j < i + len; j++) { - colorCache.Insert(bgra[j]); + colorCache!.Insert(bgra[j]); } } } @@ -725,11 +723,11 @@ internal static class BackwardReferenceEncoder { int pixelCount = xSize * ySize; bool useColorCache = cacheBits > 0; - var colorCache = new ColorCache(); + ColorCache? colorCache = null; if (useColorCache) { - colorCache.Init(cacheBits); + colorCache = new ColorCache(cacheBits); } refs.Refs.Clear(); @@ -757,7 +755,7 @@ internal static class BackwardReferenceEncoder { for (int k = 0; k < prevRowLen; ++k) { - colorCache.Insert(bgra[i + k]); + colorCache!.Insert(bgra[i + k]); } } @@ -777,8 +775,7 @@ internal static class BackwardReferenceEncoder private static void BackwardRefsWithLocalCache(ReadOnlySpan bgra, int cacheBits, Vp8LBackwardRefs refs) { int pixelIndex = 0; - var colorCache = new ColorCache(); - colorCache.Init(cacheBits); + ColorCache colorCache = new(cacheBits); for (int idx = 0; idx < refs.Refs.Count; idx++) { PixOrCopy v = refs.Refs[idx]; @@ -825,12 +822,12 @@ internal static class BackwardReferenceEncoder } } - private static void AddSingleLiteral(uint pixel, bool useColorCache, ColorCache colorCache, Vp8LBackwardRefs refs) + private static void AddSingleLiteral(uint pixel, bool useColorCache, ColorCache? colorCache, Vp8LBackwardRefs refs) { PixOrCopy v; if (useColorCache) { - int key = colorCache.GetIndex(pixel); + int key = colorCache!.GetIndex(pixel); if (colorCache.Lookup(key) == pixel) { v = PixOrCopy.CreateCacheIdx(key); diff --git a/src/ImageSharp/Formats/Webp/Lossless/ColorCache.cs b/src/ImageSharp/Formats/Webp/Lossless/ColorCache.cs index 08ff60b69f..7e1c4e2fe1 100644 --- a/src/ImageSharp/Formats/Webp/Lossless/ColorCache.cs +++ b/src/ImageSharp/Formats/Webp/Lossless/ColorCache.cs @@ -1,7 +1,7 @@ // Copyright (c) Six Labors. // Licensed under the Six Labors Split License. -#nullable disable +using System.Diagnostics.CodeAnalysis; using System.Runtime.CompilerServices; namespace SixLabors.ImageSharp.Formats.Webp.Lossless; @@ -13,6 +13,18 @@ internal class ColorCache { private const uint HashMul = 0x1e35a7bdu; + /// + /// Initializes a new instance of the class. + /// + /// The hashBits determine the size of cache. It will be 1 left shifted by hashBits. + public ColorCache(int hashBits) + { + int hashSize = 1 << hashBits; + this.Colors = new uint[hashSize]; + this.HashBits = hashBits; + this.HashShift = 32 - hashBits; + } + /// /// Gets the color entries. /// @@ -28,18 +40,6 @@ internal class ColorCache /// public int HashBits { get; private set; } - /// - /// Initializes a new color cache. - /// - /// The hashBits determine the size of cache. It will be 1 left shifted by hashBits. - public void Init(int hashBits) - { - int hashSize = 1 << hashBits; - this.Colors = new uint[hashSize]; - this.HashBits = hashBits; - this.HashShift = 32 - hashBits; - } - /// /// Inserts a new color into the cache. /// diff --git a/src/ImageSharp/Formats/Webp/Lossless/WebpLosslessDecoder.cs b/src/ImageSharp/Formats/Webp/Lossless/WebpLosslessDecoder.cs index abfb67bc7e..84ddd4b785 100644 --- a/src/ImageSharp/Formats/Webp/Lossless/WebpLosslessDecoder.cs +++ b/src/ImageSharp/Formats/Webp/Lossless/WebpLosslessDecoder.cs @@ -158,10 +158,9 @@ internal sealed class WebpLosslessDecoder // Finish setting up the color-cache. if (isColorCachePresent) { - decoder.Metadata.ColorCache = new ColorCache(); + decoder.Metadata.ColorCache = new ColorCache(colorCacheBits); colorCacheSize = 1 << colorCacheBits; decoder.Metadata.ColorCacheSize = colorCacheSize; - decoder.Metadata.ColorCache.Init(colorCacheBits); } else { diff --git a/src/ImageSharp/Formats/Webp/WebpAnimationDecoder.cs b/src/ImageSharp/Formats/Webp/WebpAnimationDecoder.cs index 4c1c60591d..abaa85ef18 100644 --- a/src/ImageSharp/Formats/Webp/WebpAnimationDecoder.cs +++ b/src/ImageSharp/Formats/Webp/WebpAnimationDecoder.cs @@ -1,6 +1,5 @@ // Copyright (c) Six Labors. // Licensed under the Six Labors Split License. -#nullable disable using System.Buffers; using System.Runtime.CompilerServices; @@ -46,17 +45,17 @@ internal class WebpAnimationDecoder : IDisposable /// /// The abstract metadata. /// - private ImageMetadata metadata; + private ImageMetadata? metadata; /// /// The gif specific metadata. /// - private WebpMetadata webpMetadata; + private WebpMetadata? webpMetadata; /// /// The alpha data, if an ALPH chunk is present. /// - private IMemoryOwner alphaData; + private IMemoryOwner? alphaData; /// /// Initializes a new instance of the class. @@ -83,8 +82,8 @@ internal class WebpAnimationDecoder : IDisposable public Image Decode(BufferedReadStream stream, WebpFeatures features, uint width, uint height, uint completeDataSize) where TPixel : unmanaged, IPixel { - Image image = null; - ImageFrame previousFrame = null; + Image? image = null; + ImageFrame? previousFrame = null; this.metadata = new ImageMetadata(); this.webpMetadata = this.metadata.GetWebpMetadata(); @@ -99,12 +98,12 @@ internal class WebpAnimationDecoder : IDisposable switch (chunkType) { case WebpChunkType.Animation: - uint dataSize = this.ReadFrame(stream, ref image, ref previousFrame, width, height, features.AnimationBackgroundColor.Value); + uint dataSize = this.ReadFrame(stream, ref image, ref previousFrame, width, height, features.AnimationBackgroundColor!.Value); remainingBytes -= (int)dataSize; break; case WebpChunkType.Xmp: case WebpChunkType.Exif: - WebpChunkParsingUtils.ParseOptionalChunks(stream, chunkType, image.Metadata, false, this.buffer); + WebpChunkParsingUtils.ParseOptionalChunks(stream, chunkType, image!.Metadata, false, this.buffer); break; default: WebpThrowHelper.ThrowImageFormatException("Read unexpected webp chunk data"); @@ -117,7 +116,7 @@ internal class WebpAnimationDecoder : IDisposable } } - return image; + return image!; } /// @@ -130,7 +129,7 @@ internal class WebpAnimationDecoder : IDisposable /// The width of the image. /// The height of the image. /// The default background color of the canvas in. - private uint ReadFrame(BufferedReadStream stream, ref Image image, ref ImageFrame previousFrame, uint width, uint height, Color backgroundColor) + private uint ReadFrame(BufferedReadStream stream, ref Image? image, ref ImageFrame? previousFrame, uint width, uint height, Color backgroundColor) where TPixel : unmanaged, IPixel { AnimationFrameData frameData = this.ReadFrameHeader(stream); @@ -146,7 +145,7 @@ internal class WebpAnimationDecoder : IDisposable chunkType = WebpChunkParsingUtils.ReadChunkType(stream, this.buffer); } - WebpImageInfo webpInfo = null; + WebpImageInfo? webpInfo = null; WebpFeatures features = new(); switch (chunkType) { @@ -163,7 +162,7 @@ internal class WebpAnimationDecoder : IDisposable break; } - ImageFrame currentFrame = null; + ImageFrame? currentFrame = null; ImageFrame imageFrame; if (previousFrame is null) { @@ -175,7 +174,7 @@ internal class WebpAnimationDecoder : IDisposable } else { - currentFrame = image.Frames.AddFrame(previousFrame); // This clones the frame and adds it the collection. + currentFrame = image!.Frames.AddFrame(previousFrame); // This clones the frame and adds it the collection. SetFrameMetadata(currentFrame.Metadata, frameData.Duration); diff --git a/src/ImageSharp/Formats/Webp/WebpChunkParsingUtils.cs b/src/ImageSharp/Formats/Webp/WebpChunkParsingUtils.cs index bfe415a6e4..e4acdf311c 100644 --- a/src/ImageSharp/Formats/Webp/WebpChunkParsingUtils.cs +++ b/src/ImageSharp/Formats/Webp/WebpChunkParsingUtils.cs @@ -1,6 +1,5 @@ // Copyright (c) Six Labors. // Licensed under the Six Labors Split License. -#nullable disable using System.Buffers.Binary; using SixLabors.ImageSharp.Formats.Webp.BitReader; diff --git a/src/ImageSharp/Formats/Webp/WebpDecoderCore.cs b/src/ImageSharp/Formats/Webp/WebpDecoderCore.cs index 29be86e22f..83298237dd 100644 --- a/src/ImageSharp/Formats/Webp/WebpDecoderCore.cs +++ b/src/ImageSharp/Formats/Webp/WebpDecoderCore.cs @@ -1,6 +1,5 @@ // Copyright (c) Six Labors. // Licensed under the Six Labors Split License. -#nullable disable using System.Buffers; using System.Buffers.Binary; @@ -44,32 +43,27 @@ internal sealed class WebpDecoderCore : IImageDecoderInternals, IDisposable /// /// Gets the decoded by this decoder instance. /// - private ImageMetadata metadata; + private ImageMetadata? metadata; /// /// Gets or sets the alpha data, if an ALPH chunk is present. /// - private IMemoryOwner alphaData; + private IMemoryOwner? alphaData; /// /// Used for allocating memory during the decoding operations. /// private readonly MemoryAllocator memoryAllocator; - /// - /// The stream to decode from. - /// - private BufferedReadStream currentStream; - /// /// The webp specific metadata. /// - private WebpMetadata webpMetadata; + private WebpMetadata? webpMetadata; /// /// Information about the webp image. /// - private WebpImageInfo webImageInfo; + private WebpImageInfo? webImageInfo; /// /// Initializes a new instance of the class. @@ -88,21 +82,20 @@ internal sealed class WebpDecoderCore : IImageDecoderInternals, IDisposable public DecoderOptions Options { get; } /// - public Size Dimensions => new((int)this.webImageInfo.Width, (int)this.webImageInfo.Height); + public Size Dimensions => new((int)this.webImageInfo!.Width, (int)this.webImageInfo.Height); /// public Image Decode(BufferedReadStream stream, CancellationToken cancellationToken) where TPixel : unmanaged, IPixel { - Image image = null; + Image? image = null; try { this.metadata = new ImageMetadata(); - this.currentStream = stream; - uint fileSize = this.ReadImageHeader(); + uint fileSize = this.ReadImageHeader(stream); - using (this.webImageInfo = this.ReadVp8Info()) + using (this.webImageInfo = this.ReadVp8Info(stream)) { if (this.webImageInfo.Features is { Animation: true }) { @@ -131,7 +124,7 @@ internal sealed class WebpDecoderCore : IImageDecoderInternals, IDisposable // There can be optional chunks after the image data, like EXIF and XMP. if (this.webImageInfo.Features != null) { - this.ParseOptionalChunks(this.webImageInfo.Features); + this.ParseOptionalChunks(stream, this.webImageInfo.Features); } return image; @@ -147,10 +140,8 @@ internal sealed class WebpDecoderCore : IImageDecoderInternals, IDisposable /// public ImageInfo Identify(BufferedReadStream stream, CancellationToken cancellationToken) { - this.currentStream = stream; - - this.ReadImageHeader(); - using (this.webImageInfo = this.ReadVp8Info(true)) + this.ReadImageHeader(stream); + using (this.webImageInfo = this.ReadVp8Info(stream, true)) { return new ImageInfo(new PixelTypeInfo((int)this.webImageInfo.BitsPerPixel), (int)this.webImageInfo.Width, (int)this.webImageInfo.Height, this.metadata); } @@ -159,19 +150,20 @@ internal sealed class WebpDecoderCore : IImageDecoderInternals, IDisposable /// /// Reads and skips over the image header. /// + /// The stream to decode from. /// The file size in bytes. - private uint ReadImageHeader() + private uint ReadImageHeader(BufferedReadStream stream) { // Skip FourCC header, we already know its a RIFF file at this point. - this.currentStream.Skip(4); + stream.Skip(4); // Read file size. // The size of the file in bytes starting at offset 8. // The file size in the header is the total size of the chunks that follow plus 4 bytes for the ‘WEBP’ FourCC. - uint fileSize = WebpChunkParsingUtils.ReadChunkSize(this.currentStream, this.buffer); + uint fileSize = WebpChunkParsingUtils.ReadChunkSize(stream, this.buffer); // Skip 'WEBP' from the header. - this.currentStream.Skip(4); + stream.Skip(4); return fileSize; } @@ -179,42 +171,43 @@ internal sealed class WebpDecoderCore : IImageDecoderInternals, IDisposable /// /// Reads information present in the image header, about the image content and how to decode the image. /// + /// The stream to decode from. /// For identify, the alpha data should not be read. /// Information about the webp image. - private WebpImageInfo ReadVp8Info(bool ignoreAlpha = false) + private WebpImageInfo ReadVp8Info(BufferedReadStream stream, bool ignoreAlpha = false) { this.metadata = new ImageMetadata(); this.webpMetadata = this.metadata.GetFormatMetadata(WebpFormat.Instance); - WebpChunkType chunkType = WebpChunkParsingUtils.ReadChunkType(this.currentStream, this.buffer); + WebpChunkType chunkType = WebpChunkParsingUtils.ReadChunkType(stream, this.buffer); var features = new WebpFeatures(); switch (chunkType) { case WebpChunkType.Vp8: this.webpMetadata.FileFormat = WebpFileFormatType.Lossy; - return WebpChunkParsingUtils.ReadVp8Header(this.memoryAllocator, this.currentStream, this.buffer, features); + return WebpChunkParsingUtils.ReadVp8Header(this.memoryAllocator, stream, this.buffer, features); case WebpChunkType.Vp8L: this.webpMetadata.FileFormat = WebpFileFormatType.Lossless; - return WebpChunkParsingUtils.ReadVp8LHeader(this.memoryAllocator, this.currentStream, this.buffer, features); + return WebpChunkParsingUtils.ReadVp8LHeader(this.memoryAllocator, stream, this.buffer, features); case WebpChunkType.Vp8X: - WebpImageInfo webpInfos = WebpChunkParsingUtils.ReadVp8XHeader(this.currentStream, this.buffer, features); - while (this.currentStream.Position < this.currentStream.Length) + WebpImageInfo webpInfos = WebpChunkParsingUtils.ReadVp8XHeader(stream, this.buffer, features); + while (stream.Position < stream.Length) { - chunkType = WebpChunkParsingUtils.ReadChunkType(this.currentStream, this.buffer); + chunkType = WebpChunkParsingUtils.ReadChunkType(stream, this.buffer); if (chunkType == WebpChunkType.Vp8) { this.webpMetadata.FileFormat = WebpFileFormatType.Lossy; - webpInfos = WebpChunkParsingUtils.ReadVp8Header(this.memoryAllocator, this.currentStream, this.buffer, features); + webpInfos = WebpChunkParsingUtils.ReadVp8Header(this.memoryAllocator, stream, this.buffer, features); } else if (chunkType == WebpChunkType.Vp8L) { this.webpMetadata.FileFormat = WebpFileFormatType.Lossless; - webpInfos = WebpChunkParsingUtils.ReadVp8LHeader(this.memoryAllocator, this.currentStream, this.buffer, features); + webpInfos = WebpChunkParsingUtils.ReadVp8LHeader(this.memoryAllocator, stream, this.buffer, features); } else if (WebpChunkParsingUtils.IsOptionalVp8XChunk(chunkType)) { - bool isAnimationChunk = this.ParseOptionalExtendedChunks(chunkType, features, ignoreAlpha); + bool isAnimationChunk = this.ParseOptionalExtendedChunks(stream, chunkType, features, ignoreAlpha); if (isAnimationChunk) { return webpInfos; @@ -223,8 +216,8 @@ internal sealed class WebpDecoderCore : IImageDecoderInternals, IDisposable else { // Ignore unknown chunks. - uint chunkSize = this.ReadChunkSize(); - this.currentStream.Skip((int)chunkSize); + uint chunkSize = this.ReadChunkSize(stream); + stream.Skip((int)chunkSize); } } @@ -239,32 +232,33 @@ internal sealed class WebpDecoderCore : IImageDecoderInternals, IDisposable /// /// Parses optional VP8X chunks, which can be ICCP, XMP, ANIM or ALPH chunks. /// + /// The stream to decode from. /// The chunk type. /// The webp image features. /// For identify, the alpha data should not be read. /// true, if its a alpha chunk. - private bool ParseOptionalExtendedChunks(WebpChunkType chunkType, WebpFeatures features, bool ignoreAlpha) + private bool ParseOptionalExtendedChunks(BufferedReadStream stream, WebpChunkType chunkType, WebpFeatures features, bool ignoreAlpha) { switch (chunkType) { case WebpChunkType.Iccp: - this.ReadIccProfile(); + this.ReadIccProfile(stream); break; case WebpChunkType.Exif: - this.ReadExifProfile(); + this.ReadExifProfile(stream); break; case WebpChunkType.Xmp: - this.ReadXmpProfile(); + this.ReadXmpProfile(stream); break; case WebpChunkType.AnimationParameter: - this.ReadAnimationParameters(features); + this.ReadAnimationParameters(stream, features); return true; case WebpChunkType.Alpha: - this.ReadAlphaData(features, ignoreAlpha); + this.ReadAlphaData(stream, features, ignoreAlpha); break; default: WebpThrowHelper.ThrowImageFormatException("Unexpected chunk followed VP8X header"); @@ -277,32 +271,33 @@ internal sealed class WebpDecoderCore : IImageDecoderInternals, IDisposable /// /// Reads the optional metadata EXIF of XMP profiles, which can follow the image data. /// + /// The stream to decode from. /// The webp features. - private void ParseOptionalChunks(WebpFeatures features) + private void ParseOptionalChunks(BufferedReadStream stream, WebpFeatures features) { if (this.skipMetadata || (!features.ExifProfile && !features.XmpMetaData)) { return; } - long streamLength = this.currentStream.Length; - while (this.currentStream.Position < streamLength) + long streamLength = stream.Length; + while (stream.Position < streamLength) { // Read chunk header. - WebpChunkType chunkType = this.ReadChunkType(); - if (chunkType == WebpChunkType.Exif && this.metadata.ExifProfile == null) + WebpChunkType chunkType = this.ReadChunkType(stream); + if (chunkType == WebpChunkType.Exif && this.metadata!.ExifProfile == null) { - this.ReadExifProfile(); + this.ReadExifProfile(stream); } - else if (chunkType == WebpChunkType.Xmp && this.metadata.XmpProfile == null) + else if (chunkType == WebpChunkType.Xmp && this.metadata!.XmpProfile == null) { - this.ReadXmpProfile(); + this.ReadXmpProfile(stream); } else { // Skip duplicate XMP or EXIF chunk. - uint chunkLength = this.ReadChunkSize(); - this.currentStream.Skip((int)chunkLength); + uint chunkLength = this.ReadChunkSize(stream); + stream.Skip((int)chunkLength); } } } @@ -310,17 +305,18 @@ internal sealed class WebpDecoderCore : IImageDecoderInternals, IDisposable /// /// Reads the EXIF profile from the stream. /// - private void ReadExifProfile() + /// The stream to decode from. + private void ReadExifProfile(BufferedReadStream stream) { - uint exifChunkSize = this.ReadChunkSize(); + uint exifChunkSize = this.ReadChunkSize(stream); if (this.skipMetadata) { - this.currentStream.Skip((int)exifChunkSize); + stream.Skip((int)exifChunkSize); } else { byte[] exifData = new byte[exifChunkSize]; - int bytesRead = this.currentStream.Read(exifData, 0, (int)exifChunkSize); + int bytesRead = stream.Read(exifData, 0, (int)exifChunkSize); if (bytesRead != exifChunkSize) { // Ignore invalid chunk. @@ -328,24 +324,25 @@ internal sealed class WebpDecoderCore : IImageDecoderInternals, IDisposable } var profile = new ExifProfile(exifData); - this.metadata.ExifProfile = profile; + this.metadata!.ExifProfile = profile; } } /// /// Reads the XMP profile the stream. /// - private void ReadXmpProfile() + /// The stream to decode from. + private void ReadXmpProfile(BufferedReadStream stream) { - uint xmpChunkSize = this.ReadChunkSize(); + uint xmpChunkSize = this.ReadChunkSize(stream); if (this.skipMetadata) { - this.currentStream.Skip((int)xmpChunkSize); + stream.Skip((int)xmpChunkSize); } else { byte[] xmpData = new byte[xmpChunkSize]; - int bytesRead = this.currentStream.Read(xmpData, 0, (int)xmpChunkSize); + int bytesRead = stream.Read(xmpData, 0, (int)xmpChunkSize); if (bytesRead != xmpChunkSize) { // Ignore invalid chunk. @@ -353,24 +350,25 @@ internal sealed class WebpDecoderCore : IImageDecoderInternals, IDisposable } var profile = new XmpProfile(xmpData); - this.metadata.XmpProfile = profile; + this.metadata!.XmpProfile = profile; } } /// /// Reads the ICCP chunk from the stream. /// - private void ReadIccProfile() + /// The stream to decode from. + private void ReadIccProfile(BufferedReadStream stream) { - uint iccpChunkSize = this.ReadChunkSize(); + uint iccpChunkSize = this.ReadChunkSize(stream); if (this.skipMetadata) { - this.currentStream.Skip((int)iccpChunkSize); + stream.Skip((int)iccpChunkSize); } else { byte[] iccpData = new byte[iccpChunkSize]; - int bytesRead = this.currentStream.Read(iccpData, 0, (int)iccpChunkSize); + int bytesRead = stream.Read(iccpData, 0, (int)iccpChunkSize); if (bytesRead != iccpChunkSize) { WebpThrowHelper.ThrowInvalidImageContentException("Not enough data to read the iccp chunk"); @@ -379,7 +377,7 @@ internal sealed class WebpDecoderCore : IImageDecoderInternals, IDisposable var profile = new IccProfile(iccpData); if (profile.CheckIsValid()) { - this.metadata.IccProfile = profile; + this.metadata!.IccProfile = profile; } } } @@ -387,17 +385,18 @@ internal sealed class WebpDecoderCore : IImageDecoderInternals, IDisposable /// /// Reads the animation parameters chunk from the stream. /// + /// The stream to decode from. /// The webp features. - private void ReadAnimationParameters(WebpFeatures features) + private void ReadAnimationParameters(BufferedReadStream stream, WebpFeatures features) { features.Animation = true; - uint animationChunkSize = WebpChunkParsingUtils.ReadChunkSize(this.currentStream, this.buffer); - byte blue = (byte)this.currentStream.ReadByte(); - byte green = (byte)this.currentStream.ReadByte(); - byte red = (byte)this.currentStream.ReadByte(); - byte alpha = (byte)this.currentStream.ReadByte(); + uint animationChunkSize = WebpChunkParsingUtils.ReadChunkSize(stream, this.buffer); + byte blue = (byte)stream.ReadByte(); + byte green = (byte)stream.ReadByte(); + byte red = (byte)stream.ReadByte(); + byte alpha = (byte)stream.ReadByte(); features.AnimationBackgroundColor = new Color(new Rgba32(red, green, blue, alpha)); - int bytesRead = this.currentStream.Read(this.buffer, 0, 2); + int bytesRead = stream.Read(this.buffer, 0, 2); if (bytesRead != 2) { WebpThrowHelper.ThrowInvalidImageContentException("Not enough data to read the animation loop count"); @@ -409,22 +408,23 @@ internal sealed class WebpDecoderCore : IImageDecoderInternals, IDisposable /// /// Reads the alpha data chunk data from the stream. /// + /// The stream to decode from. /// The features. /// if set to true, skips the chunk data. - private void ReadAlphaData(WebpFeatures features, bool ignoreAlpha) + private void ReadAlphaData(BufferedReadStream stream, WebpFeatures features, bool ignoreAlpha) { - uint alphaChunkSize = WebpChunkParsingUtils.ReadChunkSize(this.currentStream, this.buffer); + uint alphaChunkSize = WebpChunkParsingUtils.ReadChunkSize(stream, this.buffer); if (ignoreAlpha) { - this.currentStream.Skip((int)alphaChunkSize); + stream.Skip((int)alphaChunkSize); return; } - features.AlphaChunkHeader = (byte)this.currentStream.ReadByte(); + features.AlphaChunkHeader = (byte)stream.ReadByte(); int alphaDataSize = (int)(alphaChunkSize - 1); this.alphaData = this.memoryAllocator.Allocate(alphaDataSize); Span alphaData = this.alphaData.GetSpan(); - int bytesRead = this.currentStream.Read(alphaData, 0, alphaDataSize); + int bytesRead = stream.Read(alphaData, 0, alphaDataSize); if (bytesRead != alphaDataSize) { WebpThrowHelper.ThrowInvalidImageContentException("Not enough data to read the alpha data from the stream"); @@ -434,12 +434,13 @@ internal sealed class WebpDecoderCore : IImageDecoderInternals, IDisposable /// /// Identifies the chunk type from the chunk. /// + /// The stream to decode from. /// /// Thrown if the input stream is not valid. /// - private WebpChunkType ReadChunkType() + private WebpChunkType ReadChunkType(BufferedReadStream stream) { - if (this.currentStream.Read(this.buffer, 0, 4) == 4) + if (stream.Read(this.buffer, 0, 4) == 4) { var chunkType = (WebpChunkType)BinaryPrimitives.ReadUInt32BigEndian(this.buffer); return chunkType; @@ -452,10 +453,11 @@ internal sealed class WebpDecoderCore : IImageDecoderInternals, IDisposable /// Reads the chunk size. If Chunk Size is odd, a single padding byte will be added to the payload, /// so the chunk size will be increased by 1 in those cases. /// + /// The stream to decode from. /// The chunk size in bytes. - private uint ReadChunkSize() + private uint ReadChunkSize(BufferedReadStream stream) { - if (this.currentStream.Read(this.buffer, 0, 4) == 4) + if (stream.Read(this.buffer, 0, 4) == 4) { uint chunkSize = BinaryPrimitives.ReadUInt32LittleEndian(this.buffer); return (chunkSize % 2 == 0) ? chunkSize : chunkSize + 1; diff --git a/src/ImageSharp/Formats/Webp/WebpEncoderCore.cs b/src/ImageSharp/Formats/Webp/WebpEncoderCore.cs index 68951e5ec0..8d707fed92 100644 --- a/src/ImageSharp/Formats/Webp/WebpEncoderCore.cs +++ b/src/ImageSharp/Formats/Webp/WebpEncoderCore.cs @@ -1,6 +1,5 @@ // Copyright (c) Six Labors. // Licensed under the Six Labors Split License. -#nullable disable using SixLabors.ImageSharp.Advanced; using SixLabors.ImageSharp.Formats.Webp.Lossless; @@ -81,7 +80,7 @@ internal sealed class WebpEncoderCore : IImageEncoderInternals /// /// The global configuration. /// - private Configuration configuration; + private Configuration? configuration; /// /// Initializes a new instance of the class. diff --git a/src/ImageSharp/Formats/Webp/WebpImageInfo.cs b/src/ImageSharp/Formats/Webp/WebpImageInfo.cs index c41403211f..5f7301b262 100644 --- a/src/ImageSharp/Formats/Webp/WebpImageInfo.cs +++ b/src/ImageSharp/Formats/Webp/WebpImageInfo.cs @@ -1,6 +1,5 @@ // Copyright (c) Six Labors. // Licensed under the Six Labors Split License. -#nullable disable using SixLabors.ImageSharp.Formats.Webp.BitReader; using SixLabors.ImageSharp.Formats.Webp.Lossy; @@ -36,7 +35,7 @@ internal class WebpImageInfo : IDisposable /// /// Gets or sets additional features present in a VP8X image. /// - public WebpFeatures Features { get; set; } + public WebpFeatures? Features { get; set; } /// /// Gets or sets the VP8 profile / version. Valid values are between 0 and 3. Default value will be the invalid value -1. @@ -46,17 +45,17 @@ internal class WebpImageInfo : IDisposable /// /// Gets or sets the VP8 frame header. /// - public Vp8FrameHeader Vp8FrameHeader { get; set; } + public Vp8FrameHeader? Vp8FrameHeader { get; set; } /// /// Gets or sets the VP8L bitreader. Will be , if its not a lossless image. /// - public Vp8LBitReader Vp8LBitReader { get; set; } + public Vp8LBitReader? Vp8LBitReader { get; set; } /// /// Gets or sets the VP8 bitreader. Will be , if its not a lossy image. /// - public Vp8BitReader Vp8BitReader { get; set; } + public Vp8BitReader? Vp8BitReader { get; set; } /// public void Dispose() diff --git a/src/ImageSharp/Formats/Webp/WebpThrowHelper.cs b/src/ImageSharp/Formats/Webp/WebpThrowHelper.cs index 944cefa745..c633c52738 100644 --- a/src/ImageSharp/Formats/Webp/WebpThrowHelper.cs +++ b/src/ImageSharp/Formats/Webp/WebpThrowHelper.cs @@ -1,15 +1,21 @@ // Copyright (c) Six Labors. // Licensed under the Six Labors Split License. +using System.Diagnostics.CodeAnalysis; + namespace SixLabors.ImageSharp.Formats.Webp; internal static class WebpThrowHelper { + [DoesNotReturn] public static void ThrowInvalidImageContentException(string errorMessage) => throw new InvalidImageContentException(errorMessage); + [DoesNotReturn] public static void ThrowImageFormatException(string errorMessage) => throw new ImageFormatException(errorMessage); + [DoesNotReturn] public static void ThrowNotSupportedException(string errorMessage) => throw new NotSupportedException(errorMessage); + [DoesNotReturn] public static void ThrowInvalidImageDimensions(string errorMessage) => throw new InvalidImageContentException(errorMessage); } From a27df8a72145e884c86e18787be474094beb87dc Mon Sep 17 00:00:00 2001 From: Stefan Nikolei Date: Tue, 21 Feb 2023 17:14:23 +0100 Subject: [PATCH 31/35] Remove nullable disable from CostManager and CostInterval --- .../Formats/Webp/Lossless/CostInterval.cs | 5 ++--- .../Formats/Webp/Lossless/CostManager.cs | 19 +++++++++---------- 2 files changed, 11 insertions(+), 13 deletions(-) diff --git a/src/ImageSharp/Formats/Webp/Lossless/CostInterval.cs b/src/ImageSharp/Formats/Webp/Lossless/CostInterval.cs index a63c786049..0cc4a30fd7 100644 --- a/src/ImageSharp/Formats/Webp/Lossless/CostInterval.cs +++ b/src/ImageSharp/Formats/Webp/Lossless/CostInterval.cs @@ -1,6 +1,5 @@ // Copyright (c) Six Labors. // Licensed under the Six Labors Split License. -#nullable disable using System.Diagnostics; @@ -33,7 +32,7 @@ internal class CostInterval public int Index { get; set; } - public CostInterval Previous { get; set; } + public CostInterval? Previous { get; set; } - public CostInterval Next { get; set; } + public CostInterval? Next { get; set; } } diff --git a/src/ImageSharp/Formats/Webp/Lossless/CostManager.cs b/src/ImageSharp/Formats/Webp/Lossless/CostManager.cs index 7b9fdff247..e393c065ec 100644 --- a/src/ImageSharp/Formats/Webp/Lossless/CostManager.cs +++ b/src/ImageSharp/Formats/Webp/Lossless/CostManager.cs @@ -1,6 +1,5 @@ // Copyright (c) Six Labors. // Licensed under the Six Labors Split License. -#nullable disable using System.Buffers; using SixLabors.ImageSharp.Memory; @@ -14,7 +13,7 @@ namespace SixLabors.ImageSharp.Formats.Webp.Lossless; /// internal sealed class CostManager : IDisposable { - private CostInterval head; + private CostInterval? head; private const int FreeIntervalsStartCount = 25; @@ -103,10 +102,10 @@ internal sealed class CostManager : IDisposable /// If 'doCleanIntervals' is true, intervals that end before 'i' will be popped. public void UpdateCostAtIndex(int i, bool doCleanIntervals) { - CostInterval current = this.head; + CostInterval? current = this.head; while (current != null && current.Start <= i) { - CostInterval next = current.Next; + CostInterval? next = current.Next; if (current.End <= i) { if (doCleanIntervals) @@ -155,7 +154,7 @@ internal sealed class CostManager : IDisposable return; } - CostInterval interval = this.head; + CostInterval? interval = this.head; for (int i = 0; i < this.CacheIntervalsSize && this.CacheIntervals[i].Start < len; i++) { // Define the intersection of the ith interval with the new one. @@ -163,7 +162,7 @@ internal sealed class CostManager : IDisposable int end = position + (this.CacheIntervals[i].End > len ? len : this.CacheIntervals[i].End); float cost = (float)(distanceCost + this.CacheIntervals[i].Cost); - CostInterval intervalNext; + CostInterval? intervalNext; for (; interval != null && interval.Start < end; interval = intervalNext) { intervalNext = interval.Next; @@ -225,7 +224,7 @@ internal sealed class CostManager : IDisposable /// Pop an interval from the manager. /// /// The interval to remove. - private void PopInterval(CostInterval interval) + private void PopInterval(CostInterval? interval) { if (interval == null) { @@ -240,7 +239,7 @@ internal sealed class CostManager : IDisposable this.freeIntervals.Push(interval); } - private void InsertInterval(CostInterval intervalIn, float cost, int position, int start, int end) + private void InsertInterval(CostInterval? intervalIn, float cost, int position, int start, int end) { if (start >= end) { @@ -271,7 +270,7 @@ internal sealed class CostManager : IDisposable /// it was orphaned (which can be NULL), set it at the right place in the list /// of intervals using the start_ ordering and the previous interval as a hint. /// - private void PositionOrphanInterval(CostInterval current, CostInterval previous) + private void PositionOrphanInterval(CostInterval current, CostInterval? previous) { previous ??= this.head; @@ -292,7 +291,7 @@ internal sealed class CostManager : IDisposable /// /// Given two intervals, make 'prev' be the previous one of 'next' in 'manager'. /// - private void ConnectIntervals(CostInterval prev, CostInterval next) + private void ConnectIntervals(CostInterval? prev, CostInterval? next) { if (prev != null) { From e89d95ac03074ee9d7b4710ce4081dfbca6f2292 Mon Sep 17 00:00:00 2001 From: James Jackson-South Date: Wed, 22 Feb 2023 21:25:42 +1000 Subject: [PATCH 32/35] Use Feedz as the nightly package host --- .github/workflows/build-and-test.yml | 6 +++--- README.md | 4 ++-- 2 files changed, 5 insertions(+), 5 deletions(-) diff --git a/.github/workflows/build-and-test.yml b/.github/workflows/build-and-test.yml index 7c7504371d..379be36107 100644 --- a/.github/workflows/build-and-test.yml +++ b/.github/workflows/build-and-test.yml @@ -196,9 +196,9 @@ jobs: shell: pwsh run: ./ci-pack.ps1 - - name: MyGet Publish + - name: Feedz Publish shell: pwsh run: | - dotnet nuget push .\artifacts\*.nupkg -k ${{secrets.MYGET_TOKEN}} -s https://www.myget.org/F/sixlabors/api/v2/package - dotnet nuget push .\artifacts\*.snupkg -k ${{secrets.MYGET_TOKEN}} -s https://www.myget.org/F/sixlabors/api/v3/index.json + dotnet nuget push .\artifacts\*.nupkg -k ${{secrets.FEEDZ_TOKEN}} -s https://f.feedz.io/sixlabors/sixlabors/nuget/index.json + dotnet nuget push .\artifacts\*.snupkg -k ${{secrets.FEEDZ_TOKEN}} -s https://f.feedz.io/sixlabors/sixlabors/symbols # TODO: If github.ref starts with 'refs/tags' then it was tag push and we can optionally push out package to nuget.org diff --git a/README.md b/README.md index 4d0eb1e3a6..4962faf5b3 100644 --- a/README.md +++ b/README.md @@ -54,9 +54,9 @@ For more information, see the [.NET Foundation Code of Conduct](https://dotnetfo Install stable releases via Nuget; development releases are available via MyGet. -| Package Name | Release (NuGet) | Nightly (MyGet) | +| Package Name | Release (NuGet) | Nightly (Feedz.io) | |--------------------------------|-----------------|-----------------| -| `SixLabors.ImageSharp` | [![NuGet](https://img.shields.io/nuget/v/SixLabors.ImageSharp.svg)](https://www.nuget.org/packages/SixLabors.ImageSharp/) | [![MyGet](https://img.shields.io/myget/sixlabors/vpre/SixLabors.ImageSharp.svg)](https://www.myget.org/feed/sixlabors/package/nuget/SixLabors.ImageSharp) | +| `SixLabors.ImageSharp` | [![NuGet](https://img.shields.io/nuget/v/SixLabors.ImageSharp.svg)](https://www.nuget.org/packages/SixLabors.ImageSharp/) | [![feedz.io](https://img.shields.io/badge/endpoint.svg?url=https%3A%2F%2Ff.feedz.io%2Fsixlabors%2Fsixlabors%2Fshield%2FSixLabors.ImageSharp%2Flatest)](https://f.feedz.io/sixlabors/sixlabors/nuget/index.json) | ## Manual build From a4d75e86025f7bef9d4077e8fa679d5b8d1aa284 Mon Sep 17 00:00:00 2001 From: James Jackson-South Date: Wed, 22 Feb 2023 21:35:55 +1000 Subject: [PATCH 33/35] Update shared-infrastructure --- shared-infrastructure | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/shared-infrastructure b/shared-infrastructure index 6c52486c51..9a6cf00d9a 160000 --- a/shared-infrastructure +++ b/shared-infrastructure @@ -1 +1 @@ -Subproject commit 6c52486c512357475cbb92bbb7c4c0af4d85b1db +Subproject commit 9a6cf00d9a3d482bb08211dd8309f4724a2735cb From ea2c4fa5f892fed1aee7e228b618cd9d40f5fd9c Mon Sep 17 00:00:00 2001 From: Stefan Nikolei Date: Wed, 22 Feb 2023 15:04:46 +0100 Subject: [PATCH 34/35] Refactor BitReaderBase --- .../Formats/Webp/BitReader/BitReaderBase.cs | 18 +++++++++++------- .../Formats/Webp/BitReader/Vp8BitReader.cs | 8 ++++---- .../Formats/Webp/BitReader/Vp8LBitReader.cs | 5 ++--- 3 files changed, 17 insertions(+), 14 deletions(-) diff --git a/src/ImageSharp/Formats/Webp/BitReader/BitReaderBase.cs b/src/ImageSharp/Formats/Webp/BitReader/BitReaderBase.cs index fa121be4f5..37997d3e29 100644 --- a/src/ImageSharp/Formats/Webp/BitReader/BitReaderBase.cs +++ b/src/ImageSharp/Formats/Webp/BitReader/BitReaderBase.cs @@ -2,7 +2,6 @@ // Licensed under the Six Labors Split License. using System.Buffers; -using System.Diagnostics.CodeAnalysis; using SixLabors.ImageSharp.Memory; namespace SixLabors.ImageSharp.Formats.Webp.BitReader; @@ -14,10 +13,14 @@ internal abstract class BitReaderBase : IDisposable { private bool isDisposed; + protected BitReaderBase(IMemoryOwner data) => this.Data = data; + + protected BitReaderBase(Stream inputStream, int imageDataSize, MemoryAllocator memoryAllocator) => this.Data = ReadImageDataFromStream(inputStream, imageDataSize, memoryAllocator); + /// /// Gets or sets the raw encoded image data. /// - public IMemoryOwner? Data { get; set; } + public IMemoryOwner Data { get; set; } /// /// Copies the raw encoded image data from the stream into a byte array. @@ -25,12 +28,13 @@ internal abstract class BitReaderBase : IDisposable /// The input stream. /// Number of bytes to read as indicated from the chunk size. /// Used for allocating memory during reading data from the stream. - [MemberNotNull(nameof(Data))] - protected void ReadImageDataFromStream(Stream input, int bytesToRead, MemoryAllocator memoryAllocator) + protected static IMemoryOwner ReadImageDataFromStream(Stream input, int bytesToRead, MemoryAllocator memoryAllocator) { - this.Data = memoryAllocator.Allocate(bytesToRead); - Span dataSpan = this.Data.Memory.Span; + IMemoryOwner data = memoryAllocator.Allocate(bytesToRead); + Span dataSpan = data.Memory.Span; input.Read(dataSpan[..bytesToRead], 0, bytesToRead); + + return data; } protected virtual void Dispose(bool disposing) @@ -42,7 +46,7 @@ internal abstract class BitReaderBase : IDisposable if (disposing) { - this.Data?.Dispose(); + this.Data.Dispose(); } this.isDisposed = true; diff --git a/src/ImageSharp/Formats/Webp/BitReader/Vp8BitReader.cs b/src/ImageSharp/Formats/Webp/BitReader/Vp8BitReader.cs index 1fa4deda97..7b64d8329c 100644 --- a/src/ImageSharp/Formats/Webp/BitReader/Vp8BitReader.cs +++ b/src/ImageSharp/Formats/Webp/BitReader/Vp8BitReader.cs @@ -57,12 +57,12 @@ internal class Vp8BitReader : BitReaderBase /// The partition length. /// Start index in the data array. Defaults to 0. public Vp8BitReader(Stream inputStream, uint imageDataSize, MemoryAllocator memoryAllocator, uint partitionLength, int startPos = 0) + : base(inputStream, (int)imageDataSize, memoryAllocator) { Guard.MustBeLessThan(imageDataSize, int.MaxValue, nameof(imageDataSize)); this.ImageDataSize = imageDataSize; this.PartitionLength = partitionLength; - this.ReadImageDataFromStream(inputStream, (int)imageDataSize, memoryAllocator); this.InitBitreader(partitionLength, startPos); } @@ -73,8 +73,8 @@ internal class Vp8BitReader : BitReaderBase /// The partition length. /// Start index in the data array. Defaults to 0. public Vp8BitReader(IMemoryOwner imageData, uint partitionLength, int startPos = 0) + : base(imageData) { - this.Data = imageData; this.ImageDataSize = (uint)imageData.Memory.Length; this.PartitionLength = partitionLength; this.InitBitreader(partitionLength, startPos); @@ -186,7 +186,7 @@ internal class Vp8BitReader : BitReaderBase { if (this.pos < this.bufferMax) { - ulong inBits = BinaryPrimitives.ReadUInt64LittleEndian(this.Data!.Memory.Span.Slice((int)this.pos, 8)); + ulong inBits = BinaryPrimitives.ReadUInt64LittleEndian(this.Data.Memory.Span.Slice((int)this.pos, 8)); this.pos += BitsCount >> 3; ulong bits = ByteSwap64(inBits); bits >>= 64 - BitsCount; @@ -205,7 +205,7 @@ internal class Vp8BitReader : BitReaderBase if (this.pos < this.bufferEnd) { this.bits += 8; - this.value = this.Data!.Memory.Span[(int)this.pos++] | (this.value << 8); + this.value = this.Data.Memory.Span[(int)this.pos++] | (this.value << 8); } else if (!this.eof) { diff --git a/src/ImageSharp/Formats/Webp/BitReader/Vp8LBitReader.cs b/src/ImageSharp/Formats/Webp/BitReader/Vp8LBitReader.cs index df95f01f4a..8da717545f 100644 --- a/src/ImageSharp/Formats/Webp/BitReader/Vp8LBitReader.cs +++ b/src/ImageSharp/Formats/Webp/BitReader/Vp8LBitReader.cs @@ -63,8 +63,8 @@ internal class Vp8LBitReader : BitReaderBase /// /// Lossless compressed image data. public Vp8LBitReader(IMemoryOwner data) + : base(data) { - this.Data = data; this.len = data.Memory.Length; this.value = 0; this.bitPos = 0; @@ -88,11 +88,10 @@ internal class Vp8LBitReader : BitReaderBase /// The raw image data size in bytes. /// Used for allocating memory during reading data from the stream. public Vp8LBitReader(Stream inputStream, uint imageDataSize, MemoryAllocator memoryAllocator) + : base(inputStream, (int)imageDataSize, memoryAllocator) { long length = imageDataSize; - this.ReadImageDataFromStream(inputStream, (int)imageDataSize, memoryAllocator); - this.len = length; this.value = 0; this.bitPos = 0; From 992361f9670db83fa8d56293f511d7aacc59384c Mon Sep 17 00:00:00 2001 From: James Jackson-South Date: Fri, 24 Feb 2023 23:43:49 +1000 Subject: [PATCH 35/35] Cleanup --- .gitattributes | 5 +- src/ImageSharp/Formats/Webp/AlphaDecoder.cs | 63 ++++++------ src/ImageSharp/Formats/Webp/AlphaEncoder.cs | 25 ++--- .../Formats/Webp/BitReader/BitReaderBase.cs | 10 +- .../Formats/Webp/Lossless/ColorCache.cs | 7 +- .../Formats/Webp/Lossy/Vp8Encoder.cs | 10 +- .../Formats/Webp/WebpDecoderCore.cs | 98 +++++++++---------- src/ImageSharp/Formats/Webp/WebpEncoder.cs | 2 +- .../Formats/Webp/WebpEncoderCore.cs | 11 +-- 9 files changed, 115 insertions(+), 116 deletions(-) diff --git a/.gitattributes b/.gitattributes index 2fdea90e17..ff4ec94087 100644 --- a/.gitattributes +++ b/.gitattributes @@ -64,18 +64,19 @@ # Set explicit file behavior to: # treat as text # normalize to Unix-style line endings and -# use a union merge when resoling conflicts +# use a union merge when resolving conflicts ############################################################################### *.csproj text eol=lf merge=union *.dbproj text eol=lf merge=union *.fsproj text eol=lf merge=union *.ncrunchproject text eol=lf merge=union *.vbproj text eol=lf merge=union +*.shproj text eol=lf merge=union ############################################################################### # Set explicit file behavior to: # treat as text # normalize to Windows-style line endings and -# use a union merge when resoling conflicts +# use a union merge when resolving conflicts ############################################################################### *.sln text eol=crlf merge=union ############################################################################### diff --git a/src/ImageSharp/Formats/Webp/AlphaDecoder.cs b/src/ImageSharp/Formats/Webp/AlphaDecoder.cs index 4a7b505363..8875ae1150 100644 --- a/src/ImageSharp/Formats/Webp/AlphaDecoder.cs +++ b/src/ImageSharp/Formats/Webp/AlphaDecoder.cs @@ -38,7 +38,7 @@ internal class AlphaDecoder : IDisposable this.LastRow = 0; int totalPixels = width * height; - var compression = (WebpAlphaCompressionMethod)(alphaChunkHeader & 0x03); + WebpAlphaCompressionMethod compression = (WebpAlphaCompressionMethod)(alphaChunkHeader & 0x03); if (compression is not WebpAlphaCompressionMethod.NoCompression and not WebpAlphaCompressionMethod.WebpLosslessCompression) { WebpThrowHelper.ThrowImageFormatException($"unexpected alpha compression method {compression} found"); @@ -59,7 +59,7 @@ internal class AlphaDecoder : IDisposable if (this.Compressed) { - var bitReader = new Vp8LBitReader(data); + Vp8LBitReader bitReader = new(data); this.LosslessDecoder = new WebpLosslessDecoder(bitReader, memoryAllocator, configuration); this.LosslessDecoder.DecodeImageStream(this.Vp8LDec, width, height, true); @@ -174,17 +174,14 @@ internal class AlphaDecoder : IDisposable dst = dst[this.Width..]; } } + else if (this.Use8BDecode) + { + this.LosslessDecoder.DecodeAlphaData(this); + } else { - if (this.Use8BDecode) - { - this.LosslessDecoder.DecodeAlphaData(this); - } - else - { - this.LosslessDecoder.DecodeImageData(this.Vp8LDec, this.Vp8LDec.Pixels.Memory.Span); - this.ExtractAlphaRows(this.Vp8LDec); - } + this.LosslessDecoder.DecodeImageData(this.Vp8LDec, this.Vp8LDec.Pixels.Memory.Span); + this.ExtractAlphaRows(this.Vp8LDec); } } @@ -262,8 +259,7 @@ internal class AlphaDecoder : IDisposable { int numRowsToProcess = dec.Height; int width = dec.Width; - Span pixels = dec.Pixels.Memory.Span; - Span input = pixels; + Span input = dec.Pixels.Memory.Span; Span output = this.Alpha.Memory.Span; // Extract alpha (which is stored in the green plane). @@ -328,7 +324,7 @@ internal class AlphaDecoder : IDisposable ref byte srcRef = ref MemoryMarshal.GetReference(input); for (i = 1; i + 8 <= width; i += 8) { - var a0 = Vector128.Create(Unsafe.As(ref Unsafe.Add(ref srcRef, i)), 0); + Vector128 a0 = Vector128.Create(Unsafe.As(ref Unsafe.Add(ref srcRef, i)), 0); Vector128 a1 = Sse2.Add(a0.AsByte(), last.AsByte()); Vector128 a2 = Sse2.ShiftLeftLogical128BitLane(a1, 1); Vector128 a3 = Sse2.Add(a1, a2); @@ -366,32 +362,29 @@ internal class AlphaDecoder : IDisposable { HorizontalUnfilter(null, input, dst, width); } - else + else if (Avx2.IsSupported) { - if (Avx2.IsSupported) + nint i; + int maxPos = width & ~31; + for (i = 0; i < maxPos; i += 32) { - nint i; - int maxPos = width & ~31; - for (i = 0; i < maxPos; i += 32) - { - Vector256 a0 = Unsafe.As>(ref Unsafe.Add(ref MemoryMarshal.GetReference(input), i)); - Vector256 b0 = Unsafe.As>(ref Unsafe.Add(ref MemoryMarshal.GetReference(prev), i)); - Vector256 c0 = Avx2.Add(a0.AsByte(), b0.AsByte()); - ref byte outputRef = ref Unsafe.Add(ref MemoryMarshal.GetReference(dst), i); - Unsafe.As>(ref outputRef) = c0; - } + Vector256 a0 = Unsafe.As>(ref Unsafe.Add(ref MemoryMarshal.GetReference(input), i)); + Vector256 b0 = Unsafe.As>(ref Unsafe.Add(ref MemoryMarshal.GetReference(prev), i)); + Vector256 c0 = Avx2.Add(a0.AsByte(), b0.AsByte()); + ref byte outputRef = ref Unsafe.Add(ref MemoryMarshal.GetReference(dst), i); + Unsafe.As>(ref outputRef) = c0; + } - for (; i < width; i++) - { - dst[(int)i] = (byte)(prev[(int)i] + input[(int)i]); - } + for (; i < width; i++) + { + dst[(int)i] = (byte)(prev[(int)i] + input[(int)i]); } - else + } + else + { + for (int i = 0; i < width; i++) { - for (int i = 0; i < width; i++) - { - dst[i] = (byte)(prev[i] + input[i]); - } + dst[i] = (byte)(prev[i] + input[i]); } } } diff --git a/src/ImageSharp/Formats/Webp/AlphaEncoder.cs b/src/ImageSharp/Formats/Webp/AlphaEncoder.cs index dd21365bc3..596715b205 100644 --- a/src/ImageSharp/Formats/Webp/AlphaEncoder.cs +++ b/src/ImageSharp/Formats/Webp/AlphaEncoder.cs @@ -12,10 +12,8 @@ namespace SixLabors.ImageSharp.Formats.Webp; /// /// Methods for encoding the alpha data of a VP8 image. /// -internal class AlphaEncoder : IDisposable +internal static class AlphaEncoder { - private IMemoryOwner? alphaData; - /// /// Encodes the alpha channel data. /// Data is either compressed as lossless webp image or uncompressed. @@ -28,12 +26,18 @@ internal class AlphaEncoder : IDisposable /// Indicates, if the data should be compressed with the lossless webp compression. /// The size in bytes of the alpha data. /// The encoded alpha data. - public IMemoryOwner EncodeAlpha(Image image, Configuration configuration, MemoryAllocator memoryAllocator, bool skipMetadata, bool compress, out int size) + public static IMemoryOwner EncodeAlpha( + Image image, + Configuration configuration, + MemoryAllocator memoryAllocator, + bool skipMetadata, + bool compress, + out int size) where TPixel : unmanaged, IPixel { int width = image.Width; int height = image.Height; - this.alphaData = ExtractAlphaChannel(image, configuration, memoryAllocator); + IMemoryOwner alphaData = ExtractAlphaChannel(image, configuration, memoryAllocator); if (compress) { @@ -54,15 +58,15 @@ internal class AlphaEncoder : IDisposable // The transparency information will be stored in the green channel of the ARGB quadruplet. // The green channel is allowed extra transformation steps in the specification -- unlike the other channels, // that can improve compression. - using Image alphaAsImage = DispatchAlphaToGreen(image, this.alphaData.GetSpan()); + using Image alphaAsImage = DispatchAlphaToGreen(image, alphaData.GetSpan()); - size = lossLessEncoder.EncodeAlphaImageData(alphaAsImage, this.alphaData); + size = lossLessEncoder.EncodeAlphaImageData(alphaAsImage, alphaData); - return this.alphaData; + return alphaData; } size = width * height; - return this.alphaData; + return alphaData; } /// @@ -127,7 +131,4 @@ internal class AlphaEncoder : IDisposable return alphaDataBuffer; } - - /// - public void Dispose() => this.alphaData?.Dispose(); } diff --git a/src/ImageSharp/Formats/Webp/BitReader/BitReaderBase.cs b/src/ImageSharp/Formats/Webp/BitReader/BitReaderBase.cs index 37997d3e29..83f9e797ab 100644 --- a/src/ImageSharp/Formats/Webp/BitReader/BitReaderBase.cs +++ b/src/ImageSharp/Formats/Webp/BitReader/BitReaderBase.cs @@ -13,14 +13,16 @@ internal abstract class BitReaderBase : IDisposable { private bool isDisposed; - protected BitReaderBase(IMemoryOwner data) => this.Data = data; + protected BitReaderBase(IMemoryOwner data) + => this.Data = data; - protected BitReaderBase(Stream inputStream, int imageDataSize, MemoryAllocator memoryAllocator) => this.Data = ReadImageDataFromStream(inputStream, imageDataSize, memoryAllocator); + protected BitReaderBase(Stream inputStream, int imageDataSize, MemoryAllocator memoryAllocator) + => this.Data = ReadImageDataFromStream(inputStream, imageDataSize, memoryAllocator); /// - /// Gets or sets the raw encoded image data. + /// Gets the raw encoded image data. /// - public IMemoryOwner Data { get; set; } + public IMemoryOwner Data { get; } /// /// Copies the raw encoded image data from the stream into a byte array. diff --git a/src/ImageSharp/Formats/Webp/Lossless/ColorCache.cs b/src/ImageSharp/Formats/Webp/Lossless/ColorCache.cs index 7e1c4e2fe1..e683fb5605 100644 --- a/src/ImageSharp/Formats/Webp/Lossless/ColorCache.cs +++ b/src/ImageSharp/Formats/Webp/Lossless/ColorCache.cs @@ -1,7 +1,6 @@ // Copyright (c) Six Labors. // Licensed under the Six Labors Split License. -using System.Diagnostics.CodeAnalysis; using System.Runtime.CompilerServices; namespace SixLabors.ImageSharp.Formats.Webp.Lossless; @@ -28,17 +27,17 @@ internal class ColorCache /// /// Gets the color entries. /// - public uint[] Colors { get; private set; } + public uint[] Colors { get; } /// /// Gets the hash shift: 32 - hashBits. /// - public int HashShift { get; private set; } + public int HashShift { get; } /// /// Gets the hash bits. /// - public int HashBits { get; private set; } + public int HashBits { get; } /// /// Inserts a new color into the cache. diff --git a/src/ImageSharp/Formats/Webp/Lossy/Vp8Encoder.cs b/src/ImageSharp/Formats/Webp/Lossy/Vp8Encoder.cs index a5d1900b2a..16b4c827ef 100644 --- a/src/ImageSharp/Formats/Webp/Lossy/Vp8Encoder.cs +++ b/src/ImageSharp/Formats/Webp/Lossy/Vp8Encoder.cs @@ -348,12 +348,18 @@ internal class Vp8Encoder : IDisposable // Extract and encode alpha channel data, if present. int alphaDataSize = 0; bool alphaCompressionSucceeded = false; - using AlphaEncoder alphaEncoder = new(); Span alphaData = Span.Empty; if (hasAlpha) { // TODO: This can potentially run in an separate task. - IMemoryOwner encodedAlphaData = alphaEncoder.EncodeAlpha(image, this.configuration, this.memoryAllocator, this.skipMetadata, this.alphaCompression, out alphaDataSize); + using IMemoryOwner encodedAlphaData = AlphaEncoder.EncodeAlpha( + image, + this.configuration, + this.memoryAllocator, + this.skipMetadata, + this.alphaCompression, + out alphaDataSize); + alphaData = encodedAlphaData.GetSpan(); if (alphaDataSize < pixelCount) { diff --git a/src/ImageSharp/Formats/Webp/WebpDecoderCore.cs b/src/ImageSharp/Formats/Webp/WebpDecoderCore.cs index 83298237dd..181f01632f 100644 --- a/src/ImageSharp/Formats/Webp/WebpDecoderCore.cs +++ b/src/ImageSharp/Formats/Webp/WebpDecoderCore.cs @@ -8,9 +8,7 @@ using SixLabors.ImageSharp.Formats.Webp.Lossy; using SixLabors.ImageSharp.IO; using SixLabors.ImageSharp.Memory; using SixLabors.ImageSharp.Metadata; -using SixLabors.ImageSharp.Metadata.Profiles.Exif; using SixLabors.ImageSharp.Metadata.Profiles.Icc; -using SixLabors.ImageSharp.Metadata.Profiles.Xmp; using SixLabors.ImageSharp.PixelFormats; namespace SixLabors.ImageSharp.Formats.Webp; @@ -40,11 +38,6 @@ internal sealed class WebpDecoderCore : IImageDecoderInternals, IDisposable /// private readonly uint maxFrames; - /// - /// Gets the decoded by this decoder instance. - /// - private ImageMetadata? metadata; - /// /// Gets or sets the alpha data, if an ALPH chunk is present. /// @@ -55,11 +48,6 @@ internal sealed class WebpDecoderCore : IImageDecoderInternals, IDisposable /// private readonly MemoryAllocator memoryAllocator; - /// - /// The webp specific metadata. - /// - private WebpMetadata? webpMetadata; - /// /// Information about the webp image. /// @@ -91,15 +79,15 @@ internal sealed class WebpDecoderCore : IImageDecoderInternals, IDisposable Image? image = null; try { - this.metadata = new ImageMetadata(); + ImageMetadata metadata = new(); uint fileSize = this.ReadImageHeader(stream); - using (this.webImageInfo = this.ReadVp8Info(stream)) + using (this.webImageInfo = this.ReadVp8Info(stream, metadata)) { if (this.webImageInfo.Features is { Animation: true }) { - using var animationDecoder = new WebpAnimationDecoder(this.memoryAllocator, this.configuration, this.maxFrames); + using WebpAnimationDecoder animationDecoder = new(this.memoryAllocator, this.configuration, this.maxFrames); return animationDecoder.Decode(stream, this.webImageInfo.Features, this.webImageInfo.Width, this.webImageInfo.Height, fileSize); } @@ -108,23 +96,23 @@ internal sealed class WebpDecoderCore : IImageDecoderInternals, IDisposable WebpThrowHelper.ThrowNotSupportedException("Animations are not supported"); } - image = new Image(this.configuration, (int)this.webImageInfo.Width, (int)this.webImageInfo.Height, this.metadata); + image = new Image(this.configuration, (int)this.webImageInfo.Width, (int)this.webImageInfo.Height, metadata); Buffer2D pixels = image.GetRootFramePixelBuffer(); if (this.webImageInfo.IsLossless) { - var losslessDecoder = new WebpLosslessDecoder(this.webImageInfo.Vp8LBitReader, this.memoryAllocator, this.configuration); + WebpLosslessDecoder losslessDecoder = new(this.webImageInfo.Vp8LBitReader, this.memoryAllocator, this.configuration); losslessDecoder.Decode(pixels, image.Width, image.Height); } else { - var lossyDecoder = new WebpLossyDecoder(this.webImageInfo.Vp8BitReader, this.memoryAllocator, this.configuration); + WebpLossyDecoder lossyDecoder = new(this.webImageInfo.Vp8BitReader, this.memoryAllocator, this.configuration); lossyDecoder.Decode(pixels, image.Width, image.Height, this.webImageInfo, this.alphaData); } // There can be optional chunks after the image data, like EXIF and XMP. if (this.webImageInfo.Features != null) { - this.ParseOptionalChunks(stream, this.webImageInfo.Features); + this.ParseOptionalChunks(stream, metadata, this.webImageInfo.Features); } return image; @@ -141,9 +129,11 @@ internal sealed class WebpDecoderCore : IImageDecoderInternals, IDisposable public ImageInfo Identify(BufferedReadStream stream, CancellationToken cancellationToken) { this.ReadImageHeader(stream); - using (this.webImageInfo = this.ReadVp8Info(stream, true)) + + ImageMetadata metadata = new(); + using (this.webImageInfo = this.ReadVp8Info(stream, metadata, true)) { - return new ImageInfo(new PixelTypeInfo((int)this.webImageInfo.BitsPerPixel), (int)this.webImageInfo.Width, (int)this.webImageInfo.Height, this.metadata); + return new ImageInfo(new PixelTypeInfo((int)this.webImageInfo.BitsPerPixel), (int)this.webImageInfo.Width, (int)this.webImageInfo.Height, metadata); } } @@ -172,23 +162,23 @@ internal sealed class WebpDecoderCore : IImageDecoderInternals, IDisposable /// Reads information present in the image header, about the image content and how to decode the image. /// /// The stream to decode from. + /// The image metadata. /// For identify, the alpha data should not be read. /// Information about the webp image. - private WebpImageInfo ReadVp8Info(BufferedReadStream stream, bool ignoreAlpha = false) + private WebpImageInfo ReadVp8Info(BufferedReadStream stream, ImageMetadata metadata, bool ignoreAlpha = false) { - this.metadata = new ImageMetadata(); - this.webpMetadata = this.metadata.GetFormatMetadata(WebpFormat.Instance); + WebpMetadata webpMetadata = metadata.GetFormatMetadata(WebpFormat.Instance); WebpChunkType chunkType = WebpChunkParsingUtils.ReadChunkType(stream, this.buffer); - var features = new WebpFeatures(); + WebpFeatures features = new(); switch (chunkType) { case WebpChunkType.Vp8: - this.webpMetadata.FileFormat = WebpFileFormatType.Lossy; + webpMetadata.FileFormat = WebpFileFormatType.Lossy; return WebpChunkParsingUtils.ReadVp8Header(this.memoryAllocator, stream, this.buffer, features); case WebpChunkType.Vp8L: - this.webpMetadata.FileFormat = WebpFileFormatType.Lossless; + webpMetadata.FileFormat = WebpFileFormatType.Lossless; return WebpChunkParsingUtils.ReadVp8LHeader(this.memoryAllocator, stream, this.buffer, features); case WebpChunkType.Vp8X: WebpImageInfo webpInfos = WebpChunkParsingUtils.ReadVp8XHeader(stream, this.buffer, features); @@ -197,17 +187,17 @@ internal sealed class WebpDecoderCore : IImageDecoderInternals, IDisposable chunkType = WebpChunkParsingUtils.ReadChunkType(stream, this.buffer); if (chunkType == WebpChunkType.Vp8) { - this.webpMetadata.FileFormat = WebpFileFormatType.Lossy; + webpMetadata.FileFormat = WebpFileFormatType.Lossy; webpInfos = WebpChunkParsingUtils.ReadVp8Header(this.memoryAllocator, stream, this.buffer, features); } else if (chunkType == WebpChunkType.Vp8L) { - this.webpMetadata.FileFormat = WebpFileFormatType.Lossless; + webpMetadata.FileFormat = WebpFileFormatType.Lossless; webpInfos = WebpChunkParsingUtils.ReadVp8LHeader(this.memoryAllocator, stream, this.buffer, features); } else if (WebpChunkParsingUtils.IsOptionalVp8XChunk(chunkType)) { - bool isAnimationChunk = this.ParseOptionalExtendedChunks(stream, chunkType, features, ignoreAlpha); + bool isAnimationChunk = this.ParseOptionalExtendedChunks(stream, metadata, chunkType, features, ignoreAlpha); if (isAnimationChunk) { return webpInfos; @@ -233,24 +223,30 @@ internal sealed class WebpDecoderCore : IImageDecoderInternals, IDisposable /// Parses optional VP8X chunks, which can be ICCP, XMP, ANIM or ALPH chunks. /// /// The stream to decode from. + /// The image metadata. /// The chunk type. /// The webp image features. /// For identify, the alpha data should not be read. /// true, if its a alpha chunk. - private bool ParseOptionalExtendedChunks(BufferedReadStream stream, WebpChunkType chunkType, WebpFeatures features, bool ignoreAlpha) + private bool ParseOptionalExtendedChunks( + BufferedReadStream stream, + ImageMetadata metadata, + WebpChunkType chunkType, + WebpFeatures features, + bool ignoreAlpha) { switch (chunkType) { case WebpChunkType.Iccp: - this.ReadIccProfile(stream); + this.ReadIccProfile(stream, metadata); break; case WebpChunkType.Exif: - this.ReadExifProfile(stream); + this.ReadExifProfile(stream, metadata); break; case WebpChunkType.Xmp: - this.ReadXmpProfile(stream); + this.ReadXmpProfile(stream, metadata); break; case WebpChunkType.AnimationParameter: @@ -272,8 +268,9 @@ internal sealed class WebpDecoderCore : IImageDecoderInternals, IDisposable /// Reads the optional metadata EXIF of XMP profiles, which can follow the image data. /// /// The stream to decode from. + /// The image metadata. /// The webp features. - private void ParseOptionalChunks(BufferedReadStream stream, WebpFeatures features) + private void ParseOptionalChunks(BufferedReadStream stream, ImageMetadata metadata, WebpFeatures features) { if (this.skipMetadata || (!features.ExifProfile && !features.XmpMetaData)) { @@ -285,13 +282,13 @@ internal sealed class WebpDecoderCore : IImageDecoderInternals, IDisposable { // Read chunk header. WebpChunkType chunkType = this.ReadChunkType(stream); - if (chunkType == WebpChunkType.Exif && this.metadata!.ExifProfile == null) + if (chunkType == WebpChunkType.Exif && metadata.ExifProfile == null) { - this.ReadExifProfile(stream); + this.ReadExifProfile(stream, metadata); } - else if (chunkType == WebpChunkType.Xmp && this.metadata!.XmpProfile == null) + else if (chunkType == WebpChunkType.Xmp && metadata.XmpProfile == null) { - this.ReadXmpProfile(stream); + this.ReadXmpProfile(stream, metadata); } else { @@ -306,7 +303,8 @@ internal sealed class WebpDecoderCore : IImageDecoderInternals, IDisposable /// Reads the EXIF profile from the stream. /// /// The stream to decode from. - private void ReadExifProfile(BufferedReadStream stream) + /// The image metadata. + private void ReadExifProfile(BufferedReadStream stream, ImageMetadata metadata) { uint exifChunkSize = this.ReadChunkSize(stream); if (this.skipMetadata) @@ -323,8 +321,7 @@ internal sealed class WebpDecoderCore : IImageDecoderInternals, IDisposable return; } - var profile = new ExifProfile(exifData); - this.metadata!.ExifProfile = profile; + metadata.ExifProfile = new(exifData); } } @@ -332,7 +329,8 @@ internal sealed class WebpDecoderCore : IImageDecoderInternals, IDisposable /// Reads the XMP profile the stream. /// /// The stream to decode from. - private void ReadXmpProfile(BufferedReadStream stream) + /// The image metadata. + private void ReadXmpProfile(BufferedReadStream stream, ImageMetadata metadata) { uint xmpChunkSize = this.ReadChunkSize(stream); if (this.skipMetadata) @@ -349,8 +347,7 @@ internal sealed class WebpDecoderCore : IImageDecoderInternals, IDisposable return; } - var profile = new XmpProfile(xmpData); - this.metadata!.XmpProfile = profile; + metadata.XmpProfile = new(xmpData); } } @@ -358,7 +355,8 @@ internal sealed class WebpDecoderCore : IImageDecoderInternals, IDisposable /// Reads the ICCP chunk from the stream. /// /// The stream to decode from. - private void ReadIccProfile(BufferedReadStream stream) + /// The image metadata. + private void ReadIccProfile(BufferedReadStream stream, ImageMetadata metadata) { uint iccpChunkSize = this.ReadChunkSize(stream); if (this.skipMetadata) @@ -374,10 +372,10 @@ internal sealed class WebpDecoderCore : IImageDecoderInternals, IDisposable WebpThrowHelper.ThrowInvalidImageContentException("Not enough data to read the iccp chunk"); } - var profile = new IccProfile(iccpData); + IccProfile profile = new(iccpData); if (profile.CheckIsValid()) { - this.metadata!.IccProfile = profile; + metadata.IccProfile = profile; } } } @@ -442,8 +440,7 @@ internal sealed class WebpDecoderCore : IImageDecoderInternals, IDisposable { if (stream.Read(this.buffer, 0, 4) == 4) { - var chunkType = (WebpChunkType)BinaryPrimitives.ReadUInt32BigEndian(this.buffer); - return chunkType; + return (WebpChunkType)BinaryPrimitives.ReadUInt32BigEndian(this.buffer); } throw new ImageFormatException("Invalid Webp data."); @@ -455,6 +452,7 @@ internal sealed class WebpDecoderCore : IImageDecoderInternals, IDisposable /// /// The stream to decode from. /// The chunk size in bytes. + /// Invalid data. private uint ReadChunkSize(BufferedReadStream stream) { if (stream.Read(this.buffer, 0, 4) == 4) diff --git a/src/ImageSharp/Formats/Webp/WebpEncoder.cs b/src/ImageSharp/Formats/Webp/WebpEncoder.cs index e314d38017..bd8303f1c8 100644 --- a/src/ImageSharp/Formats/Webp/WebpEncoder.cs +++ b/src/ImageSharp/Formats/Webp/WebpEncoder.cs @@ -82,7 +82,7 @@ public sealed class WebpEncoder : ImageEncoder /// protected override void Encode(Image image, Stream stream, CancellationToken cancellationToken) { - WebpEncoderCore encoder = new(this, image.GetMemoryAllocator()); + WebpEncoderCore encoder = new(this, image.GetConfiguration()); encoder.Encode(image, stream, cancellationToken); } } diff --git a/src/ImageSharp/Formats/Webp/WebpEncoderCore.cs b/src/ImageSharp/Formats/Webp/WebpEncoderCore.cs index 8d707fed92..33189ba845 100644 --- a/src/ImageSharp/Formats/Webp/WebpEncoderCore.cs +++ b/src/ImageSharp/Formats/Webp/WebpEncoderCore.cs @@ -1,7 +1,6 @@ // Copyright (c) Six Labors. // Licensed under the Six Labors Split License. -using SixLabors.ImageSharp.Advanced; using SixLabors.ImageSharp.Formats.Webp.Lossless; using SixLabors.ImageSharp.Formats.Webp.Lossy; using SixLabors.ImageSharp.Memory; @@ -80,16 +79,17 @@ internal sealed class WebpEncoderCore : IImageEncoderInternals /// /// The global configuration. /// - private Configuration? configuration; + private readonly Configuration configuration; /// /// Initializes a new instance of the class. /// /// The encoder with options. - /// The memory manager. - public WebpEncoderCore(WebpEncoder encoder, MemoryAllocator memoryAllocator) + /// The global configuration. + public WebpEncoderCore(WebpEncoder encoder, Configuration configuration) { - this.memoryAllocator = memoryAllocator; + this.configuration = configuration; + this.memoryAllocator = configuration.MemoryAllocator; this.alphaCompression = encoder.UseAlphaCompression; this.fileFormat = encoder.FileFormat; this.quality = encoder.Quality; @@ -116,7 +116,6 @@ internal sealed class WebpEncoderCore : IImageEncoderInternals Guard.NotNull(image, nameof(image)); Guard.NotNull(stream, nameof(stream)); - this.configuration = image.GetConfiguration(); bool lossless; if (this.fileFormat is not null) {