From ac1905328d37213c92fcf57d32417f9322294fe1 Mon Sep 17 00:00:00 2001 From: Andreas Eriksson <4438107+andreas-eriksson@users.noreply.github.com> Date: Wed, 1 Apr 2026 09:51:36 +0200 Subject: [PATCH 01/12] Fix Identify returning incorrect frame count for animated PNGs The Identify method had two bugs when processing fdAT (FrameData) chunks: 1. A spurious Skip(4) before SkipChunkDataAndCrc caused the stream to be misaligned by 4 bytes, since chunk.Length already includes the 4-byte sequence number. 2. Unlike Decode, which consumes all fdAT chunks for a frame in one shot via ReadScanlines + ReadNextFrameDataChunk, Identify processed them individually, calling InitializeFrameMetadata for each chunk and inflating the frame count. The fix removes the extra Skip(4) and adds SkipRemainingFrameDataChunks to consume all continuation fdAT chunks for a frame, mirroring how ReadNextFrameDataChunk works during decoding. --- src/ImageSharp/Formats/Png/PngDecoderCore.cs | 30 +++++++++++++++++-- .../Formats/Png/PngDecoderTests.cs | 12 ++++++++ tests/ImageSharp.Tests/TestImages.cs | 1 + .../animated/issue-animated-frame-count.png | 3 ++ 4 files changed, 44 insertions(+), 2 deletions(-) create mode 100644 tests/Images/Input/Png/animated/issue-animated-frame-count.png diff --git a/src/ImageSharp/Formats/Png/PngDecoderCore.cs b/src/ImageSharp/Formats/Png/PngDecoderCore.cs index 8962182679..52858ec129 100644 --- a/src/ImageSharp/Formats/Png/PngDecoderCore.cs +++ b/src/ImageSharp/Formats/Png/PngDecoderCore.cs @@ -428,9 +428,10 @@ internal sealed class PngDecoderCore : ImageDecoderCore InitializeFrameMetadata(framesMetadata, currentFrameControl.Value); - // Skip sequence number - this.currentStream.Skip(4); + // Skip data for this and all remaining FrameData chunks belonging to the same frame + // (comparable to how Decode consumes them via ReadScanlines + ReadNextFrameDataChunk). this.SkipChunkDataAndCrc(chunk); + this.SkipRemainingFrameDataChunks(buffer); break; case PngChunkType.Data: @@ -2093,6 +2094,31 @@ internal sealed class PngDecoderCore : ImageDecoderCore return 0; } + /// + /// Skips any remaining chunks belonging to the current frame. + /// This mirrors how is used during decoding: + /// consecutive fdAT chunks are consumed until a non-fdAT chunk is encountered, + /// which is stored in for the next iteration. + /// + /// Temporary buffer. + private void SkipRemainingFrameDataChunks(Span buffer) + { + while (this.TryReadChunk(buffer, out PngChunk chunk)) + { + if (chunk.Type is PngChunkType.FrameData) + { + chunk.Data?.Dispose(); + this.SkipChunkDataAndCrc(chunk); + } + else + { + // Not a FrameData chunk; store it so the next TryReadChunk call returns it. + this.nextChunk = chunk; + return; + } + } + } + /// /// Reads a chunk from the stream. /// diff --git a/tests/ImageSharp.Tests/Formats/Png/PngDecoderTests.cs b/tests/ImageSharp.Tests/Formats/Png/PngDecoderTests.cs index a58101a6bd..69e656849b 100644 --- a/tests/ImageSharp.Tests/Formats/Png/PngDecoderTests.cs +++ b/tests/ImageSharp.Tests/Formats/Png/PngDecoderTests.cs @@ -411,6 +411,18 @@ public partial class PngDecoderTests Assert.Equal(expectedPixelSize, imageInfo.PixelType.BitsPerPixel); } + [Fact] + public void Identify_AnimatedPng_ReadsFrameCountCorrectly() + { + TestFile testFile = TestFile.Create(TestImages.Png.AnimatedFrameCount); + + using MemoryStream stream = new(testFile.Bytes, false); + ImageInfo imageInfo = Image.Identify(stream); + + Assert.NotNull(imageInfo); + Assert.Equal(50, imageInfo.FrameMetadataCollection.Count); + } + [Theory] [WithFile(TestImages.Png.Bad.MissingDataChunk, PixelTypes.Rgba32)] public void Decode_MissingDataChunk_ThrowsException(TestImageProvider provider) diff --git a/tests/ImageSharp.Tests/TestImages.cs b/tests/ImageSharp.Tests/TestImages.cs index fab1b2891c..730e62d824 100644 --- a/tests/ImageSharp.Tests/TestImages.cs +++ b/tests/ImageSharp.Tests/TestImages.cs @@ -76,6 +76,7 @@ public static class TestImages public const string BlendOverMultiple = "Png/animated/21-blend-over-multiple.png"; public const string FrameOffset = "Png/animated/frame-offset.png"; public const string DefaultNotAnimated = "Png/animated/default-not-animated.png"; + public const string AnimatedFrameCount = "Png/animated/issue-animated-frame-count.png"; public const string Issue2666 = "Png/issues/Issue_2666.png"; public const string Issue2882 = "Png/issues/Issue_2882.png"; diff --git a/tests/Images/Input/Png/animated/issue-animated-frame-count.png b/tests/Images/Input/Png/animated/issue-animated-frame-count.png new file mode 100644 index 0000000000..db8ff47b9b --- /dev/null +++ b/tests/Images/Input/Png/animated/issue-animated-frame-count.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:62d51679bcb096ae45ae0f5bf874916ad929014f68ae43b487253d5050c8b68b +size 13561079 From 7b13e1df1b909510ba1303e4b7b25d9d1d8e9df4 Mon Sep 17 00:00:00 2001 From: Andreas Eriksson <4438107+andreas-eriksson@users.noreply.github.com> Date: Wed, 1 Apr 2026 12:58:13 +0200 Subject: [PATCH 02/12] Add generated animated PNG tests for Identify and Decode frame counts --- .../Formats/Png/PngDecoderTests.cs | 49 +++++++++++++++++++ 1 file changed, 49 insertions(+) diff --git a/tests/ImageSharp.Tests/Formats/Png/PngDecoderTests.cs b/tests/ImageSharp.Tests/Formats/Png/PngDecoderTests.cs index 69e656849b..0ba8866127 100644 --- a/tests/ImageSharp.Tests/Formats/Png/PngDecoderTests.cs +++ b/tests/ImageSharp.Tests/Formats/Png/PngDecoderTests.cs @@ -423,6 +423,55 @@ public partial class PngDecoderTests Assert.Equal(50, imageInfo.FrameMetadataCollection.Count); } + [Theory] + [InlineData(1)] + [InlineData(2)] + [InlineData(5)] + [InlineData(10)] + [InlineData(100)] + public void Identify_AnimatedPng_FrameCount_MatchesDecode(int frameCount) + { + using Image image = new(10, 10, Color.Red.ToPixel()); + for (int i = 1; i < frameCount; i++) + { + using ImageFrame frame = new(Configuration.Default, 10, 10); + image.Frames.AddFrame(frame); + } + + using MemoryStream stream = new(); + image.Save(stream, new PngEncoder()); + stream.Position = 0; + + ImageInfo imageInfo = Image.Identify(stream); + + Assert.NotNull(imageInfo); + Assert.Equal(frameCount, imageInfo.FrameMetadataCollection.Count); + } + + [Theory] + [InlineData(1)] + [InlineData(2)] + [InlineData(5)] + [InlineData(10)] + [InlineData(100)] + public void Decode_AnimatedPng_FrameCount(int frameCount) + { + using Image image = new(10, 10, Color.Red.ToPixel()); + for (int i = 1; i < frameCount; i++) + { + using ImageFrame frame = new(Configuration.Default, 10, 10); + image.Frames.AddFrame(frame); + } + + using MemoryStream stream = new(); + image.Save(stream, new PngEncoder()); + stream.Position = 0; + + using Image decoded = Image.Load(stream); + + Assert.Equal(frameCount, decoded.Frames.Count); + } + [Theory] [WithFile(TestImages.Png.Bad.MissingDataChunk, PixelTypes.Rgba32)] public void Decode_MissingDataChunk_ThrowsException(TestImageProvider provider) From e06a015cf59148e24d1fc940aed3b9e4d8356103 Mon Sep 17 00:00:00 2001 From: James Jackson-South Date: Tue, 7 Apr 2026 11:39:29 +1000 Subject: [PATCH 03/12] Fix SIMD slicing and padding length handling. Fix #3104 --- .../Common/Helpers/SimdUtils.HwIntrinsics.cs | 14 ++++++++------ .../Encoder/SpectralConverter{TPixel}.cs | 6 +++--- 2 files changed, 11 insertions(+), 9 deletions(-) diff --git a/src/ImageSharp/Common/Helpers/SimdUtils.HwIntrinsics.cs b/src/ImageSharp/Common/Helpers/SimdUtils.HwIntrinsics.cs index ff5ea5de33..076590605d 100644 --- a/src/ImageSharp/Common/Helpers/SimdUtils.HwIntrinsics.cs +++ b/src/ImageSharp/Common/Helpers/SimdUtils.HwIntrinsics.cs @@ -752,7 +752,7 @@ internal static partial class SimdUtils /// Implementation is based on MagicScaler code: /// https://github.com/saucecontrol/PhotoSauce/blob/b5811908041200488aa18fdfd17df5fc457415dc/src/MagicScaler/Magic/Processors/ConvertersFloat.cs#L80-L182 /// - internal static unsafe void ByteToNormalizedFloat( + internal static void ByteToNormalizedFloat( ReadOnlySpan source, Span destination) { @@ -1172,8 +1172,10 @@ internal static partial class SimdUtils Vector256 rgb, rg, bx; Vector256 r, g, b; + // Each iteration consumes 8 Rgb24 pixels (24 bytes) but starts with a 32-byte load, + // so we need 3 extra pixels of addressable slack beyond the vectorized chunk. const int bytesPerRgbStride = 24; - nuint count = (uint)source.Length / 8; + nuint count = source.Length > 3 ? (uint)(source.Length - 3) / 8 : 0; for (nuint i = 0; i < count; i++) { rgb = Avx2.PermuteVar8x32(Unsafe.AddByteOffset(ref rgbByteSpan, (uint)(bytesPerRgbStride * i)).AsUInt32(), extractToLanesMask).AsByte(); @@ -1193,10 +1195,10 @@ internal static partial class SimdUtils } int sliceCount = (int)(count * 8); - redChannel = redChannel.Slice(sliceCount); - greenChannel = greenChannel.Slice(sliceCount); - blueChannel = blueChannel.Slice(sliceCount); - source = source.Slice(sliceCount); + redChannel = redChannel[sliceCount..]; + greenChannel = greenChannel[sliceCount..]; + blueChannel = blueChannel[sliceCount..]; + source = source[sliceCount..]; } } } diff --git a/src/ImageSharp/Formats/Jpeg/Components/Encoder/SpectralConverter{TPixel}.cs b/src/ImageSharp/Formats/Jpeg/Components/Encoder/SpectralConverter{TPixel}.cs index b60ef68f11..8662c5c49b 100644 --- a/src/ImageSharp/Formats/Jpeg/Components/Encoder/SpectralConverter{TPixel}.cs +++ b/src/ImageSharp/Formats/Jpeg/Components/Encoder/SpectralConverter{TPixel}.cs @@ -114,9 +114,9 @@ internal class SpectralConverter : SpectralConverter, IDisposable Span sourceRow = this.pixelBuffer.DangerousGetRowSpan(srcIndex); PixelOperations.Instance.UnpackIntoRgbPlanes(rLane, gLane, bLane, sourceRow); - rLane.Slice(paddingStartIndex).Fill(rLane[paddingStartIndex - 1]); - gLane.Slice(paddingStartIndex).Fill(gLane[paddingStartIndex - 1]); - bLane.Slice(paddingStartIndex).Fill(bLane[paddingStartIndex - 1]); + rLane.Slice(paddingStartIndex, paddedPixelsCount).Fill(rLane[paddingStartIndex - 1]); + gLane.Slice(paddingStartIndex, paddedPixelsCount).Fill(gLane[paddingStartIndex - 1]); + bLane.Slice(paddingStartIndex, paddedPixelsCount).Fill(bLane[paddingStartIndex - 1]); // Convert from rgb24 to target pixel type JpegColorConverterBase.ComponentValues values = new(this.componentProcessors, y); From a76c02f7c095df9b578a8c0336712b148abd92d1 Mon Sep 17 00:00:00 2001 From: Andreas <4438107+andreas-eriksson@users.noreply.github.com> Date: Tue, 7 Apr 2026 07:46:22 +0200 Subject: [PATCH 04/12] Replace test image with a smaller one. Adjusted Identify_AnimatedPng_ReadsFrameCountCorrectly to expect 48 frames instead of 50. --- tests/ImageSharp.Tests/Formats/Png/PngDecoderTests.cs | 2 +- .../Images/Input/Png/animated/issue-animated-frame-count.png | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/tests/ImageSharp.Tests/Formats/Png/PngDecoderTests.cs b/tests/ImageSharp.Tests/Formats/Png/PngDecoderTests.cs index 0ba8866127..802f2aba39 100644 --- a/tests/ImageSharp.Tests/Formats/Png/PngDecoderTests.cs +++ b/tests/ImageSharp.Tests/Formats/Png/PngDecoderTests.cs @@ -420,7 +420,7 @@ public partial class PngDecoderTests ImageInfo imageInfo = Image.Identify(stream); Assert.NotNull(imageInfo); - Assert.Equal(50, imageInfo.FrameMetadataCollection.Count); + Assert.Equal(48, imageInfo.FrameMetadataCollection.Count); } [Theory] diff --git a/tests/Images/Input/Png/animated/issue-animated-frame-count.png b/tests/Images/Input/Png/animated/issue-animated-frame-count.png index db8ff47b9b..88427f4873 100644 --- a/tests/Images/Input/Png/animated/issue-animated-frame-count.png +++ b/tests/Images/Input/Png/animated/issue-animated-frame-count.png @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:62d51679bcb096ae45ae0f5bf874916ad929014f68ae43b487253d5050c8b68b -size 13561079 +oid sha256:af4e320f586ab26c55612a7ccfc98a8c99cd6a0efe8a70d379503751d06fe8bd +size 51542 From 9569449ac46e844c33c7b4073c8773d9f3d87134 Mon Sep 17 00:00:00 2001 From: Andreas <4438107+andreas-eriksson@users.noreply.github.com> Date: Tue, 7 Apr 2026 08:19:40 +0200 Subject: [PATCH 05/12] Fix MaxFrames handling in PNG decoder - Change >= to > for correct MaxFrames boundary - Skip fdAT chunk data when hitting maxFrames in Identify to maintain stream alignment - Add tests for Identify and Load with MaxFrames --- src/ImageSharp/Formats/Png/PngDecoderCore.cs | 12 ++++++---- .../Formats/Png/PngDecoderTests.cs | 24 +++++++++++++++++++ 2 files changed, 32 insertions(+), 4 deletions(-) diff --git a/src/ImageSharp/Formats/Png/PngDecoderCore.cs b/src/ImageSharp/Formats/Png/PngDecoderCore.cs index 52858ec129..d794c66e27 100644 --- a/src/ImageSharp/Formats/Png/PngDecoderCore.cs +++ b/src/ImageSharp/Formats/Png/PngDecoderCore.cs @@ -214,7 +214,7 @@ internal sealed class PngDecoderCore : ImageDecoderCore break; case PngChunkType.FrameData: { - if (frameCount >= this.maxFrames) + if (frameCount > this.maxFrames) { goto EOF; } @@ -275,7 +275,7 @@ internal sealed class PngDecoderCore : ImageDecoderCore previousFrameControl = currentFrameControl; } - if (frameCount >= this.maxFrames) + if (frameCount > this.maxFrames) { goto EOF; } @@ -402,7 +402,7 @@ internal sealed class PngDecoderCore : ImageDecoderCore break; case PngChunkType.FrameControl: ++frameCount; - if (frameCount >= this.maxFrames) + if (frameCount > this.maxFrames) { break; } @@ -411,8 +411,12 @@ internal sealed class PngDecoderCore : ImageDecoderCore break; case PngChunkType.FrameData: - if (frameCount >= this.maxFrames) + if (frameCount > this.maxFrames) { + // Must skip the chunk data even when we've hit maxFrames, because TryReadChunk + // restores the stream position to the start of the fdAT data after CRC validation. + this.SkipChunkDataAndCrc(chunk); + this.SkipRemainingFrameDataChunks(buffer); break; } diff --git a/tests/ImageSharp.Tests/Formats/Png/PngDecoderTests.cs b/tests/ImageSharp.Tests/Formats/Png/PngDecoderTests.cs index 802f2aba39..4712fc0dd5 100644 --- a/tests/ImageSharp.Tests/Formats/Png/PngDecoderTests.cs +++ b/tests/ImageSharp.Tests/Formats/Png/PngDecoderTests.cs @@ -423,6 +423,30 @@ public partial class PngDecoderTests Assert.Equal(48, imageInfo.FrameMetadataCollection.Count); } + [Fact] + public void Identify_AnimatedPngWithMaxFrames_ReadsFrameCountCorrectly() + { + TestFile testFile = TestFile.Create(TestImages.Png.AnimatedFrameCount); + + using MemoryStream stream = new(testFile.Bytes, false); + ImageInfo imageInfo = Image.Identify(new DecoderOptions { MaxFrames = 40 }, stream); + + Assert.NotNull(imageInfo); + Assert.Equal(40, imageInfo.FrameMetadataCollection.Count); + } + + [Fact] + public void Load_AnimatedPngWithMaxFrames_ReadsFrameCountCorrectly() + { + TestFile testFile = TestFile.Create(TestImages.Png.AnimatedFrameCount); + + using MemoryStream stream = new(testFile.Bytes, false); + using Image image = Image.Load(new DecoderOptions { MaxFrames = 40 }, stream); + + Assert.NotNull(image); + Assert.Equal(40, image.Frames.Count); + } + [Theory] [InlineData(1)] [InlineData(2)] From 90f0c0b5d47bc3d68cc4ad69222658f497e29516 Mon Sep 17 00:00:00 2001 From: James Jackson-South Date: Tue, 7 Apr 2026 21:32:38 +1000 Subject: [PATCH 06/12] Update and simplify quantization color caches. --- src/ImageSharp/Advanced/AotCompilerTools.cs | 2 - .../Quantization/ColorMatchingMode.cs | 10 +- .../EuclideanPixelMap{TPixel,TCache}.cs | 116 +++++- .../Quantization/IColorIndexCache.cs | 387 +++++------------- .../Quantization/OctreeQuantizer{TPixel}.cs | 2 +- .../Processing/ColorMatchingCaches.cs | 302 ++++++++++++++ .../Formats/Png/PngEncoderTests.cs | 2 +- .../Quantization/PaletteQuantizerTests.cs | 158 +++++++ 8 files changed, 667 insertions(+), 312 deletions(-) create mode 100644 tests/ImageSharp.Benchmarks/Processing/ColorMatchingCaches.cs diff --git a/src/ImageSharp/Advanced/AotCompilerTools.cs b/src/ImageSharp/Advanced/AotCompilerTools.cs index fef49bffd4..2944b58e5f 100644 --- a/src/ImageSharp/Advanced/AotCompilerTools.cs +++ b/src/ImageSharp/Advanced/AotCompilerTools.cs @@ -523,10 +523,8 @@ internal static class AotCompilerTools private static void AotCompilePixelMaps() where TPixel : unmanaged, IPixel { - default(EuclideanPixelMap).GetClosestColor(default, out _); default(EuclideanPixelMap).GetClosestColor(default, out _); default(EuclideanPixelMap).GetClosestColor(default, out _); - default(EuclideanPixelMap).GetClosestColor(default, out _); } /// diff --git a/src/ImageSharp/Processing/Processors/Quantization/ColorMatchingMode.cs b/src/ImageSharp/Processing/Processors/Quantization/ColorMatchingMode.cs index 26fd7d5d76..c520d7c54b 100644 --- a/src/ImageSharp/Processing/Processors/Quantization/ColorMatchingMode.cs +++ b/src/ImageSharp/Processing/Processors/Quantization/ColorMatchingMode.cs @@ -15,14 +15,8 @@ public enum ColorMatchingMode Coarse, /// - /// Enables an exact color match cache for the first 512 unique colors encountered, - /// falling back to coarse matching thereafter. - /// - Hybrid, - - /// - /// Performs exact color matching without any caching optimizations. - /// This is the slowest but most accurate matching strategy. + /// Performs exact color matching using a bounded exact-match cache with eviction. + /// This preserves exact color matching while accelerating repeated colors. /// Exact } diff --git a/src/ImageSharp/Processing/Processors/Quantization/EuclideanPixelMap{TPixel,TCache}.cs b/src/ImageSharp/Processing/Processors/Quantization/EuclideanPixelMap{TPixel,TCache}.cs index 5b0c7252cb..e2e7206e09 100644 --- a/src/ImageSharp/Processing/Processors/Quantization/EuclideanPixelMap{TPixel,TCache}.cs +++ b/src/ImageSharp/Processing/Processors/Quantization/EuclideanPixelMap{TPixel,TCache}.cs @@ -3,6 +3,8 @@ using System.Runtime.CompilerServices; using System.Runtime.InteropServices; +using System.Runtime.Intrinsics; +using SixLabors.ImageSharp.Common.Helpers; using SixLabors.ImageSharp.PixelFormats; namespace SixLabors.ImageSharp.Processing.Processors.Quantization; @@ -71,32 +73,107 @@ internal sealed class EuclideanPixelMap : PixelMap [MethodImpl(InliningOptions.ColdPath)] private int GetClosestColorSlow(Rgba32 rgba, ref TPixel paletteRef, out TPixel match) { - // Loop through the palette and find the nearest match. + ReadOnlySpan rgbaPalette = this.rgbaPalette; + ref Rgba32 rgbaPaletteRef = ref MemoryMarshal.GetReference(rgbaPalette); int index = 0; - float leastDistance = float.MaxValue; - for (int i = 0; i < this.rgbaPalette.Length; i++) + int leastDistance = int.MaxValue; + int i = 0; + + if (Vector128.IsHardwareAccelerated && rgbaPalette.Length >= 4) { - Rgba32 candidate = this.rgbaPalette[i]; - if (candidate.PackedValue == rgba.PackedValue) - { - index = i; - break; - } + // Duplicate the query color so one 128-bit register can be subtracted from + // two packed RGBA candidates at a time after widening. + Vector128 pixel = Vector128.Create( + rgba.R, + rgba.G, + rgba.B, + rgba.A, + rgba.R, + rgba.G, + rgba.B, + rgba.A); - float distance = DistanceSquared(rgba, candidate); - if (distance == 0) + int vectorizedLength = rgbaPalette.Length & ~0x03; + + for (; i < vectorizedLength; i += 4) { - index = i; - break; + // Load four packed Rgba32 values (16 bytes) and widen them into two vectors: + // [c0.r, c0.g, c0.b, c0.a, c1.r, ...] and [c2.r, c2.g, c2.b, c2.a, c3.r, ...]. + Vector128 packed = Vector128.LoadUnsafe(ref Unsafe.As(ref Unsafe.Add(ref rgbaPaletteRef, i))); + Vector128 lowerDiff = Vector128.WidenLower(packed).AsInt16() - pixel; + Vector128 upperDiff = Vector128.WidenUpper(packed).AsInt16() - pixel; + + // MultiplyAddAdjacent collapses channel squares into RG + BA partial sums, + // so each pair of int lanes still corresponds to one candidate color. + Vector128 lowerPairs = Vector128_.MultiplyAddAdjacent(lowerDiff, lowerDiff); + Vector128 upperPairs = Vector128_.MultiplyAddAdjacent(upperDiff, upperDiff); + + // Sum the two partials for candidates i and i + 1. + ref int lowerRef = ref Unsafe.As, int>(ref lowerPairs); + int distance = lowerRef + Unsafe.Add(ref lowerRef, 1); + if (distance < leastDistance) + { + index = i; + leastDistance = distance; + if (distance == 0) + { + goto Found; + } + } + + distance = Unsafe.Add(ref lowerRef, 2) + Unsafe.Add(ref lowerRef, 3); + if (distance < leastDistance) + { + index = i + 1; + leastDistance = distance; + if (distance == 0) + { + goto Found; + } + } + + // Sum the two partials for candidates i + 2 and i + 3. + ref int upperRef = ref Unsafe.As, int>(ref upperPairs); + distance = upperRef + Unsafe.Add(ref upperRef, 1); + if (distance < leastDistance) + { + index = i + 2; + leastDistance = distance; + if (distance == 0) + { + goto Found; + } + } + + distance = Unsafe.Add(ref upperRef, 2) + Unsafe.Add(ref upperRef, 3); + if (distance < leastDistance) + { + index = i + 3; + leastDistance = distance; + if (distance == 0) + { + goto Found; + } + } } + } + for (; i < rgbaPalette.Length; i++) + { + int distance = DistanceSquared(rgba, Unsafe.Add(ref rgbaPaletteRef, i)); if (distance < leastDistance) { index = i; leastDistance = distance; + if (distance == 0) + { + goto Found; + } } } + Found: + // Now I have the index, pop it into the cache for next time _ = this.cache.TryAdd(rgba, (short)index); match = Unsafe.Add(ref paletteRef, (uint)index); @@ -111,12 +188,12 @@ internal sealed class EuclideanPixelMap : PixelMap /// The second point. /// The distance squared. [MethodImpl(InliningOptions.ShortMethod)] - private static float DistanceSquared(Rgba32 a, Rgba32 b) + private static int DistanceSquared(Rgba32 a, Rgba32 b) { - float deltaR = a.R - b.R; - float deltaG = a.G - b.G; - float deltaB = a.B - b.B; - float deltaA = a.A - b.A; + int deltaR = a.R - b.R; + int deltaG = a.G - b.G; + int deltaB = a.B - b.B; + int deltaA = a.A - b.A; return (deltaR * deltaR) + (deltaG * deltaG) + (deltaB * deltaB) + (deltaA * deltaA); } @@ -177,8 +254,7 @@ internal static class PixelMapFactory ColorMatchingMode colorMatchingMode) where TPixel : unmanaged, IPixel => colorMatchingMode switch { - ColorMatchingMode.Hybrid => new EuclideanPixelMap(configuration, palette), - ColorMatchingMode.Exact => new EuclideanPixelMap(configuration, palette), + ColorMatchingMode.Exact => new EuclideanPixelMap(configuration, palette), _ => new EuclideanPixelMap(configuration, palette), }; } diff --git a/src/ImageSharp/Processing/Processors/Quantization/IColorIndexCache.cs b/src/ImageSharp/Processing/Processors/Quantization/IColorIndexCache.cs index 32d95137bc..76598e0046 100644 --- a/src/ImageSharp/Processing/Processors/Quantization/IColorIndexCache.cs +++ b/src/ImageSharp/Processing/Processors/Quantization/IColorIndexCache.cs @@ -56,147 +56,6 @@ internal interface IColorIndexCache : IColorIndexCache public static abstract T Create(MemoryAllocator allocator); } -/// -/// A hybrid color distance cache that combines a small, fixed-capacity exact-match dictionary -/// (ExactCache, ~4–5 KB for up to 512 entries) with a coarse lookup table (CoarseCache) for 5,5,5,6 precision. -/// -/// -/// ExactCache provides O(1) lookup for common cases using a simple 256-entry hash-based dictionary, while CoarseCache -/// quantizes RGB channels to 5 bits (yielding 32^3 buckets) and alpha to 6 bits, storing up to 4 alpha entries per bucket -/// (a design chosen based on probability theory to capture most real-world variations) for a total memory footprint of -/// roughly 576 KB. Lookups and insertions are performed in constant time, making the overall design both fast and memory-predictable. -/// -internal unsafe struct HybridCache : IColorIndexCache -{ - private CoarseCache coarseCache; - private AccurateCache accurateCache; - - public HybridCache(MemoryAllocator allocator) - { - this.accurateCache = AccurateCache.Create(allocator); - this.coarseCache = CoarseCache.Create(allocator); - } - - /// - public static HybridCache Create(MemoryAllocator allocator) => new(allocator); - - /// - [MethodImpl(InliningOptions.ShortMethod)] - public bool TryAdd(Rgba32 color, short index) - { - if (this.accurateCache.TryAdd(color, index)) - { - return true; - } - - return this.coarseCache.TryAdd(color, index); - } - - /// - [MethodImpl(InliningOptions.ShortMethod)] - public readonly bool TryGetValue(Rgba32 color, out short value) - { - if (this.accurateCache.TryGetValue(color, out value)) - { - return true; - } - - return this.coarseCache.TryGetValue(color, out value); - } - - /// - public readonly void Clear() - { - this.accurateCache.Clear(); - this.coarseCache.Clear(); - } - - /// - public void Dispose() - { - this.accurateCache.Dispose(); - this.coarseCache.Dispose(); - } -} - -/// -/// A coarse cache for color distance lookups that uses a fixed-size lookup table. -/// -/// -/// This cache uses a fixed lookup table with 2,097,152 bins, each storing a 2-byte value, -/// resulting in a memory usage of approximately 4 MB. Lookups and insertions are -/// performed in constant time (O(1)) via direct table indexing. This design is optimized for -/// speed while maintaining a predictable, fixed memory footprint. -/// -internal unsafe struct CoarseCache : IColorIndexCache -{ - private const int IndexRBits = 5; - private const int IndexGBits = 5; - private const int IndexBBits = 5; - private const int IndexABits = 6; - private const int IndexRCount = 1 << IndexRBits; // 32 bins for red - private const int IndexGCount = 1 << IndexGBits; // 32 bins for green - private const int IndexBCount = 1 << IndexBBits; // 32 bins for blue - private const int IndexACount = 1 << IndexABits; // 64 bins for alpha - private const int TotalBins = IndexRCount * IndexGCount * IndexBCount * IndexACount; // 2,097,152 bins - - private readonly IMemoryOwner binsOwner; - private readonly short* binsPointer; - private MemoryHandle binsHandle; - - private CoarseCache(MemoryAllocator allocator) - { - this.binsOwner = allocator.Allocate(TotalBins); - this.binsOwner.GetSpan().Fill(-1); - this.binsHandle = this.binsOwner.Memory.Pin(); - this.binsPointer = (short*)this.binsHandle.Pointer; - } - - /// - public static CoarseCache Create(MemoryAllocator allocator) => new(allocator); - - /// - [MethodImpl(InliningOptions.ShortMethod)] - public readonly bool TryAdd(Rgba32 color, short value) - { - this.binsPointer[GetCoarseIndex(color)] = value; - return true; - } - - /// - [MethodImpl(InliningOptions.ShortMethod)] - public readonly bool TryGetValue(Rgba32 color, out short value) - { - value = this.binsPointer[GetCoarseIndex(color)]; - return value > -1; // Coarse match found - } - - [MethodImpl(InliningOptions.ShortMethod)] - private static int GetCoarseIndex(Rgba32 color) - { - int rIndex = color.R >> (8 - IndexRBits); - int gIndex = color.G >> (8 - IndexGBits); - int bIndex = color.B >> (8 - IndexBBits); - int aIndex = color.A >> (8 - IndexABits); - - return (aIndex * IndexRCount * IndexGCount * IndexBCount) + - (rIndex * IndexGCount * IndexBCount) + - (gIndex * IndexBCount) + - bIndex; - } - - /// - public readonly void Clear() - => this.binsOwner.GetSpan().Fill(-1); - - /// - public void Dispose() - { - this.binsHandle.Dispose(); - this.binsOwner.Dispose(); - } -} - /// /// /// CoarseCache is a fast, low-memory lookup structure for caching palette indices associated with RGBA values, @@ -225,7 +84,7 @@ internal unsafe struct CoarseCache : IColorIndexCache /// making it ideal for applications such as color distance caching in images with a limited palette (up to 256 entries). /// /// -internal unsafe struct CoarseCacheLite : IColorIndexCache +internal unsafe struct CoarseCache : IColorIndexCache { // Use 5 bits per channel for R, G, and B: 32 levels each. // Total buckets = 32^3 = 32768. @@ -236,7 +95,7 @@ internal unsafe struct CoarseCacheLite : IColorIndexCache private readonly AlphaBucket* buckets; private MemoryHandle bucketHandle; - private CoarseCacheLite(MemoryAllocator allocator) + private CoarseCache(MemoryAllocator allocator) { this.bucketsOwner = allocator.Allocate(BucketCount, AllocationOptions.Clean); this.bucketHandle = this.bucketsOwner.Memory.Pin(); @@ -244,7 +103,7 @@ internal unsafe struct CoarseCacheLite : IColorIndexCache } /// - public static CoarseCacheLite Create(MemoryAllocator allocator) => new(allocator); + public static CoarseCache Create(MemoryAllocator allocator) => new(allocator); /// public readonly bool TryAdd(Rgba32 color, short paletteIndex) @@ -289,14 +148,11 @@ internal unsafe struct CoarseCacheLite : IColorIndexCache } [MethodImpl(InliningOptions.ShortMethod)] - private static byte QuantizeAlpha(byte a) - - // Quantize to 6 bits: shift right by (8 - 6) = 2 bits. - => (byte)(a >> 2); + private static byte QuantizeAlpha(byte a) => (byte)(a >> 2); public struct AlphaEntry { - // Store the alpha value quantized to 6 bits (0..63) + // Store the alpha value quantized to 6 bits (0..63). public byte QuantizedAlpha; public short PaletteIndex; } @@ -312,7 +168,7 @@ internal unsafe struct CoarseCacheLite : IColorIndexCache // 2. However, in practice (based on probability theory and typical image data), // the number of unique alpha values that actually occur for a given quantized RGB // bucket is usually very small. If you randomly sample 8 values out of 64, - // the probability that these 4 samples are all unique is high if the distribution + // the probability that these samples are all unique is high if the distribution // of alpha values is skewed or if only a few alpha values are used. // // 3. Statistically, for many real-world images, most RGB buckets will have only a couple @@ -377,51 +233,49 @@ internal unsafe struct CoarseCacheLite : IColorIndexCache } /// -/// A fixed-capacity dictionary with exactly 512 entries mapping a key -/// to a value. +/// A fixed-size exact-match cache that stores packed RGBA keys with 4-way set associativity. /// /// -/// The dictionary is implemented using a fixed array of 512 buckets and an entries array -/// of the same size. The bucket for a key is computed as (key & 0x1FF), and collisions are -/// resolved through a linked chain stored in the field. +/// The cache holds 512 total entries split across 128 sets. Entries are evicted within a set +/// using round-robin replacement, but cached values are returned only when the full packed RGBA +/// key matches, preserving exact quantization results with predictable memory usage. /// The overall memory usage is approximately 4–5 KB. Both lookup and insertion operations are, -/// on average, O(1) since the bucket is determined via a simple bitmask and collision chains are -/// typically very short; in the worst-case, the number of iterations is bounded by 256. +/// on average, O(1) since each lookup probes at most four candidate entries within the selected set. /// This guarantees highly efficient and predictable performance for small, fixed-size color palettes. /// internal unsafe struct AccurateCache : IColorIndexCache { - // Buckets array: each bucket holds the index (0-based) into the entries array - // of the first entry in the chain, or -1 if empty. - private readonly IMemoryOwner bucketsOwner; - private MemoryHandle bucketsHandle; - private short* buckets; + public const int Capacity = 512; + private const int Ways = 4; + private const int SetCount = Capacity / Ways; + private const int SetMask = SetCount - 1; - // Entries array: stores up to 256 entries. - private readonly IMemoryOwner entriesOwner; - private MemoryHandle entriesHandle; - private Entry* entries; + private readonly IMemoryOwner keysOwner; + private MemoryHandle keysHandle; + private uint* keys; - public const int Capacity = 512; + private readonly IMemoryOwner valuesOwner; + private MemoryHandle valuesHandle; + private ushort* values; + + private readonly IMemoryOwner nextVictimOwner; + private MemoryHandle nextVictimHandle; + private byte* nextVictim; private AccurateCache(MemoryAllocator allocator) { - this.Count = 0; - - // Allocate exactly 512 indexes for buckets. - this.bucketsOwner = allocator.Allocate(Capacity, AllocationOptions.Clean); - Span bucketSpan = this.bucketsOwner.GetSpan(); - bucketSpan.Fill(-1); - this.bucketsHandle = this.bucketsOwner.Memory.Pin(); - this.buckets = (short*)this.bucketsHandle.Pointer; - - // Allocate exactly 512 entries. - this.entriesOwner = allocator.Allocate(Capacity, AllocationOptions.Clean); - this.entriesHandle = this.entriesOwner.Memory.Pin(); - this.entries = (Entry*)this.entriesHandle.Pointer; - } + this.keysOwner = allocator.Allocate(Capacity, AllocationOptions.Clean); + this.keysHandle = this.keysOwner.Memory.Pin(); + this.keys = (uint*)this.keysHandle.Pointer; - public int Count { get; private set; } + this.valuesOwner = allocator.Allocate(Capacity, AllocationOptions.Clean); + this.valuesHandle = this.valuesOwner.Memory.Pin(); + this.values = (ushort*)this.valuesHandle.Pointer; + + this.nextVictimOwner = allocator.Allocate(SetCount, AllocationOptions.Clean); + this.nextVictimHandle = this.nextVictimOwner.Memory.Pin(); + this.nextVictim = (byte*)this.nextVictimHandle.Pointer; + } /// public static AccurateCache Create(MemoryAllocator allocator) => new(allocator); @@ -430,140 +284,113 @@ internal unsafe struct AccurateCache : IColorIndexCache [MethodImpl(InliningOptions.ShortMethod)] public bool TryAdd(Rgba32 color, short value) { - if (this.Count == Capacity) - { - return false; // Dictionary is full. - } - uint key = color.PackedValue; + int set = GetSetIndex(key); + int start = set * Ways; + int empty = -1; + + uint* keys = this.keys; + ushort* values = this.values; + ushort storedValue = (ushort)(value + 1); - // The key is a 32-bit unsigned integer representing an RGBA color, where the bytes are laid out as R|G|B|A - // (with R in the most significant byte and A in the least significant). - // To compute the bucket index: - // 1. (key >> 16) extracts the top 16 bits, effectively giving us the R and G channels. - // 2. (key >> 8) shifts the key right by 8 bits, bringing R, G, and B into the lower 24 bits (dropping A). - // 3. XORing these two values with the original key mixes bits from all four channels (R, G, B, and A), - // which helps to counteract situations where one or more channels have a limited range. - // 4. Finally, we apply a bitmask of 0x1FF to keep only the lowest 9 bits, ensuring the result is between 0 and 511, - // which corresponds to our fixed bucket count of 512. - int bucket = (int)(((key >> 16) ^ (key >> 8) ^ key) & 0x1FF); - int i = this.buckets[bucket]; - - // Traverse the collision chain. - Entry* entries = this.entries; - while (i != -1) + for (int i = start; i < start + Ways; i++) { - Entry e = entries[i]; - if (e.Key == key) + ushort candidate = values[i]; + if (candidate == 0) { - // Key already exists; do not overwrite. - return false; + empty = i; + continue; } - i = e.Next; + if (keys[i] == key) + { + values[i] = storedValue; + return true; + } } - short index = (short)this.Count; - this.Count++; + int slot = empty >= 0 ? empty : start + this.nextVictim[set]; + keys[slot] = key; + values[slot] = storedValue; - // Insert the new entry: - entries[index].Key = key; - entries[index].Value = value; + if (empty < 0) + { + this.nextVictim[set] = (byte)((this.nextVictim[set] + 1) & (Ways - 1)); + } - // Link this new entry into the bucket chain. - entries[index].Next = this.buckets[bucket]; - this.buckets[bucket] = index; return true; } /// [MethodImpl(InliningOptions.ShortMethod)] - public bool TryGetValue(Rgba32 color, out short value) + public readonly bool TryGetValue(Rgba32 color, out short value) { uint key = color.PackedValue; - int bucket = (int)(((key >> 16) ^ (key >> 8) ^ key) & 0x1FF); - int i = this.buckets[bucket]; + int start = GetSetIndex(key) * Ways; - // If the bucket is empty, return immediately. - if (i == -1) - { - value = -1; - return false; - } + uint* keys = this.keys; + ushort* values = this.values; - // Traverse the chain. - Entry* entries = this.entries; - do + for (int i = start; i < start + Ways; i++) { - Entry e = entries[i]; - if (e.Key == key) + ushort candidate = values[i]; + if (candidate != 0 && keys[i] == key) { - value = e.Value; + value = (short)(candidate - 1); return true; } - - i = e.Next; } - while (i != -1); value = -1; return false; } /// - /// Clears the dictionary. + /// Clears the cache. /// - public void Clear() + public readonly void Clear() { - Span bucketSpan = this.bucketsOwner.GetSpan(); - bucketSpan.Fill(-1); - this.Count = 0; + this.valuesOwner.GetSpan().Clear(); + this.nextVictimOwner.GetSpan().Clear(); } public void Dispose() { - this.bucketsHandle.Dispose(); - this.bucketsOwner.Dispose(); - this.entriesHandle.Dispose(); - this.entriesOwner.Dispose(); - this.buckets = null; - this.entries = null; + this.keysHandle.Dispose(); + this.keysOwner.Dispose(); + this.valuesHandle.Dispose(); + this.valuesOwner.Dispose(); + this.nextVictimHandle.Dispose(); + this.nextVictimOwner.Dispose(); + this.keys = null; + this.values = null; + this.nextVictim = null; } - private struct Entry - { - public uint Key; // The key (packed RGBA) - public short Value; // The value; -1 means unused. - public short Next; // Index of the next entry in the chain, or -1 if none. - } -} - -/// -/// Represents a cache that does not store any values. -/// It allows adding colors, but always returns false when trying to retrieve them. -/// -internal readonly struct NullCache : IColorIndexCache -{ - /// - public static NullCache Create(MemoryAllocator allocator) => default; - - /// - public bool TryAdd(Rgba32 color, short value) => true; - - /// - public bool TryGetValue(Rgba32 color, out short value) - { - value = -1; - return false; - } - - /// - public void Clear() - { - } - - /// - public void Dispose() - { - } + /// + /// Maps a packed RGBA key to one of the cache sets used by . + /// + /// The packed key. + /// The zero-based set index for the key. + /// + /// + /// The cache is 4-way set-associative, so this hash only needs to choose one of + /// sets before probing up to four candidate entries. + /// + /// + /// is laid out as R | (G << 8) | (B << 16) | (A << 24). + /// The XOR-fold mixes neighboring bytes into the low bits, and the final mask selects the + /// set. With the current 128-set layout that makes the selected set effectively depend on + /// the low 7 bits of R ^ G ^ B. Alpha still participates in the later exact key + /// comparison, but not in set selection. + /// + /// + /// Collisions are expected and acceptable here. Correctness comes from the full packed-key + /// comparison during probing; this hash only aims to spread keys cheaply enough that each + /// access touches at most one 4-entry set. + /// + /// + [MethodImpl(InliningOptions.ShortMethod)] + private static int GetSetIndex(uint key) + => (int)(((key >> 16) ^ (key >> 8) ^ key) & SetMask); } diff --git a/src/ImageSharp/Processing/Processors/Quantization/OctreeQuantizer{TPixel}.cs b/src/ImageSharp/Processing/Processors/Quantization/OctreeQuantizer{TPixel}.cs index 07596b68a8..bdf2ba20a8 100644 --- a/src/ImageSharp/Processing/Processors/Quantization/OctreeQuantizer{TPixel}.cs +++ b/src/ImageSharp/Processing/Processors/Quantization/OctreeQuantizer{TPixel}.cs @@ -368,7 +368,7 @@ public struct OctreeQuantizer : IQuantizer public void Dispose() => this.nodesOwner.Dispose(); [StructLayout(LayoutKind.Sequential)] - internal unsafe struct OctreeNode + internal struct OctreeNode { public bool Leaf; public int PixelCount; diff --git a/tests/ImageSharp.Benchmarks/Processing/ColorMatchingCaches.cs b/tests/ImageSharp.Benchmarks/Processing/ColorMatchingCaches.cs new file mode 100644 index 0000000000..dbaf21a8ef --- /dev/null +++ b/tests/ImageSharp.Benchmarks/Processing/ColorMatchingCaches.cs @@ -0,0 +1,302 @@ +// Copyright (c) Six Labors. +// Licensed under the Six Labors Split License. + +using System.Buffers; +using System.Runtime.CompilerServices; +using BenchmarkDotNet.Attributes; +using SixLabors.ImageSharp.Memory; +using SixLabors.ImageSharp.PixelFormats; +using SixLabors.ImageSharp.Processing.Processors.Quantization; + +namespace SixLabors.ImageSharp.Benchmarks.Processing; + +[Config(typeof(Config.Standard))] +public class ColorMatchingCaches +{ + // IterationSetup forces BenchmarkDotNet to use a single benchmark invocation per iteration. + // Repeated lookups can safely replay a smaller working set because that workload is explicitly + // meant to model steady-state cache hits after warmup. + private const int RepeatedLookupCount = 262_144; + + // DitherLike should avoid replaying the same stream across multiple passes because that warms + // the caches in a way real high-churn input usually does not. Make the single pass larger instead. + private const int DitherLikeLookupCount = 1_048_576; + private const int RepeatedPassCount = 16; + + private Rgba32[] palette; + private Rgba32[] repeatedSeedColors; + private Rgba32[] repeatedLookups; + private Rgba32[] ditherLookups; + + private PixelMap coarse; + private PixelMap legacyCoarse; + private PixelMap exact; + private PixelMap uncached; + + [Params(16, 256)] + public int PaletteSize { get; set; } + + [Params(CacheWorkload.Repeated, CacheWorkload.DitherLike)] + public CacheWorkload Workload { get; set; } + + [GlobalSetup] + public void Setup() + { + this.palette = CreatePalette(this.PaletteSize); + this.repeatedSeedColors = CreateRepeatedSeedColors(this.palette); + this.repeatedLookups = CreateRepeatedLookups(this.repeatedSeedColors); + this.ditherLookups = CreateDitherLikeLookups(); + + this.coarse = CreatePixelMap(this.palette); + this.legacyCoarse = CreatePixelMap(this.palette); + this.exact = CreatePixelMap(this.palette); + this.uncached = CreatePixelMap(this.palette); + } + + [IterationSetup] + public void ResetCaches() + { + // Each benchmark iteration should start from the same cache state so we measure + // the cache policy itself rather than warm state carried over from a previous iteration. + this.coarse.Clear(this.palette); + this.legacyCoarse.Clear(this.palette); + this.exact.Clear(this.palette); + this.uncached.Clear(this.palette); + + if (this.Workload == CacheWorkload.Repeated) + { + // Prime the repeated workload so the benchmark reflects steady-state hit behavior + // instead of mostly measuring the first-wave fill cost. + Prime(this.coarse, this.repeatedSeedColors); + Prime(this.legacyCoarse, this.repeatedSeedColors); + Prime(this.exact, this.repeatedSeedColors); + Prime(this.uncached, this.repeatedSeedColors); + } + } + + [GlobalCleanup] + public void Cleanup() + { + this.coarse.Dispose(); + this.legacyCoarse.Dispose(); + this.exact.Dispose(); + this.uncached.Dispose(); + } + + [Benchmark(Baseline = true, Description = "Coarse")] + public int Coarse() => this.Run(this.coarse); + + [Benchmark(Description = "Legacy Coarse")] + public int LegacyCoarse() => this.Run(this.legacyCoarse); + + [Benchmark(Description = "Exact Cached")] + public int Exact() => this.Run(this.exact); + + [Benchmark(Description = "Exact Uncached")] + public int Uncached() => this.Run(this.uncached); + + public enum CacheWorkload + { + // A small working set that is intentionally reused after priming to measure hit-heavy behavior. + Repeated, + + // A deterministic high-churn stream intended to resemble dithered lookups where exact repeats are rare. + DitherLike + } + + private int Run(PixelMap map) + { + Rgba32[] lookups = this.Workload == CacheWorkload.Repeated ? this.repeatedLookups : this.ditherLookups; + int passCount = this.Workload == CacheWorkload.Repeated ? RepeatedPassCount : 1; + int checksum = 0; + + // Repeated intentionally replays the same lookup stream to measure steady-state hit behavior. + // DitherLike runs as a single larger pass so we do not turn a churn-heavy workload into an + // artificially warmed cache benchmark by replaying the exact same sequence. + for (int pass = 0; pass < passCount; pass++) + { + for (int i = 0; i < lookups.Length; i++) + { + checksum = unchecked((checksum * 31) + map.GetClosestColor(lookups[i], out _)); + } + } + + return checksum; + } + + private static PixelMap CreatePixelMap(Rgba32[] palette) + where TCache : struct, IColorIndexCache + => new EuclideanPixelMap(Configuration.Default, palette); + + private static void Prime(PixelMap map, Rgba32[] colors) + { + for (int i = 0; i < colors.Length; i++) + { + map.GetClosestColor(colors[i], out _); + } + } + + private static Rgba32[] CreatePalette(int count) + { + Rgba32[] result = new Rgba32[count]; + + for (int i = 0; i < result.Length; i++) + { + // Use the Knuth/golden-ratio multiplicative hash constant to spread colors across + // RGBA space without clustering into a gradient. That keeps the benchmark from + // accidentally favoring any cache because the palette itself is too regular. + uint value = unchecked((uint)(i + 1) * 2654435761U); + result[i] = new( + (byte)value, + (byte)(value >> 8), + (byte)(value >> 16), + (byte)((value >> 24) | 0x80)); + } + + return result; + } + + private static Rgba32[] CreateRepeatedSeedColors(Rgba32[] palette) + { + // Reuse colors derived from the palette but perturb them slightly so the workload still + // exercises nearest-color matching rather than only exact palette-entry hits. + int count = Math.Min(64, palette.Length * 2); + Rgba32[] result = new Rgba32[count]; + + for (int i = 0; i < result.Length; i++) + { + Rgba32 source = palette[(i * 17) % palette.Length]; + result[i] = new( + (byte)(source.R + ((i * 3) & 0x07)), + (byte)(source.G + ((i * 5) & 0x07)), + (byte)(source.B + ((i * 7) & 0x07)), + source.A); + } + + return result; + } + + private static Rgba32[] CreateRepeatedLookups(Rgba32[] seedColors) + { + Rgba32[] result = new Rgba32[RepeatedLookupCount]; + + // Cycle a small seed set to produce a stable, hit-heavy stream after priming. + for (int i = 0; i < result.Length; i++) + { + result[i] = seedColors[i % seedColors.Length]; + } + + return result; + } + + private static Rgba32[] CreateDitherLikeLookups() + { + Rgba32[] result = new Rgba32[DitherLikeLookupCount]; + + // Generate a deterministic pseudo-image signal with independent channel slopes so nearby + // samples are correlated but exact repeats are uncommon, which is closer to dithered input. + for (int i = 0; i < result.Length; i++) + { + int x = i & 511; + int y = i >> 9; + + result[i] = new( + (byte)((x * 17) + (y * 13)), + (byte)((x * 29) + (y * 7)), + (byte)((x * 11) + (y * 23)), + (byte)(255 - ((x * 3) + (y * 5)))); + } + + return result; + } + + /// + /// Preserves the original direct-mapped coarse cache implementation for side-by-side benchmarks. + /// + private unsafe struct LegacyCoarseCache : IColorIndexCache + { + private const int IndexRBits = 5; + private const int IndexGBits = 5; + private const int IndexBBits = 5; + private const int IndexABits = 6; + private const int IndexRCount = 1 << IndexRBits; + private const int IndexGCount = 1 << IndexGBits; + private const int IndexBCount = 1 << IndexBBits; + private const int IndexACount = 1 << IndexABits; + private const int TotalBins = IndexRCount * IndexGCount * IndexBCount * IndexACount; + + private readonly IMemoryOwner binsOwner; + private readonly short* binsPointer; + private MemoryHandle binsHandle; + + private LegacyCoarseCache(MemoryAllocator allocator) + { + this.binsOwner = allocator.Allocate(TotalBins); + this.binsOwner.GetSpan().Fill(-1); + this.binsHandle = this.binsOwner.Memory.Pin(); + this.binsPointer = (short*)this.binsHandle.Pointer; + } + + public static LegacyCoarseCache Create(MemoryAllocator allocator) => new(allocator); + + [MethodImpl(InliningOptions.ShortMethod)] + public readonly bool TryAdd(Rgba32 color, short value) + { + this.binsPointer[GetCoarseIndex(color)] = value; + return true; + } + + [MethodImpl(InliningOptions.ShortMethod)] + public readonly bool TryGetValue(Rgba32 color, out short value) + { + value = this.binsPointer[GetCoarseIndex(color)]; + return value > -1; + } + + public readonly void Clear() => this.binsOwner.GetSpan().Fill(-1); + + public void Dispose() + { + this.binsHandle.Dispose(); + this.binsOwner.Dispose(); + } + + [MethodImpl(InliningOptions.ShortMethod)] + private static int GetCoarseIndex(Rgba32 color) + { + int rIndex = color.R >> (8 - IndexRBits); + int gIndex = color.G >> (8 - IndexGBits); + int bIndex = color.B >> (8 - IndexBBits); + int aIndex = color.A >> (8 - IndexABits); + + return (aIndex * IndexRCount * IndexGCount * IndexBCount) + + (rIndex * IndexGCount * IndexBCount) + + (gIndex * IndexBCount) + + bIndex; + } + } + + /// + /// Preserves the uncached path for exact-cache comparison benchmarks. + /// + private readonly struct UncachedCache : IColorIndexCache + { + public static UncachedCache Create(MemoryAllocator allocator) => default; + + public bool TryAdd(Rgba32 color, short value) => true; + + public bool TryGetValue(Rgba32 color, out short value) + { + value = -1; + return false; + } + + public void Clear() + { + } + + public void Dispose() + { + } + } +} diff --git a/tests/ImageSharp.Tests/Formats/Png/PngEncoderTests.cs b/tests/ImageSharp.Tests/Formats/Png/PngEncoderTests.cs index 4ebcbc13b6..eef8d5ba84 100644 --- a/tests/ImageSharp.Tests/Formats/Png/PngEncoderTests.cs +++ b/tests/ImageSharp.Tests/Formats/Png/PngEncoderTests.cs @@ -680,7 +680,7 @@ public partial class PngEncoderTests PaletteQuantizer quantizer = new( palette.Select(Color.FromPixel).ToArray(), - new QuantizerOptions { ColorMatchingMode = ColorMatchingMode.Hybrid }); + new QuantizerOptions { ColorMatchingMode = ColorMatchingMode.Exact }); using MemoryStream ms = new(); image.Save(ms, new PngEncoder diff --git a/tests/ImageSharp.Tests/Processing/Processors/Quantization/PaletteQuantizerTests.cs b/tests/ImageSharp.Tests/Processing/Processors/Quantization/PaletteQuantizerTests.cs index f2a4b079b5..07e9a4b0d6 100644 --- a/tests/ImageSharp.Tests/Processing/Processors/Quantization/PaletteQuantizerTests.cs +++ b/tests/ImageSharp.Tests/Processing/Processors/Quantization/PaletteQuantizerTests.cs @@ -1,6 +1,7 @@ // Copyright (c) Six Labors. // Licensed under the Six Labors Split License. +using SixLabors.ImageSharp.Memory; using SixLabors.ImageSharp.PixelFormats; using SixLabors.ImageSharp.Processing; using SixLabors.ImageSharp.Processing.Processors.Quantization; @@ -75,4 +76,161 @@ public class PaletteQuantizerTests IQuantizer quantizer = KnownQuantizers.Werner; Assert.Equal(QuantizerConstants.DefaultDither, quantizer.Options.Dither); } + + [Fact] + public void ExactColorMatchingMatchesUncachedAfterCacheOverflow() + { + Rgba32[] palette = + [ + new Rgba32(0, 0, 0), + new Rgba32(7, 0, 0) + ]; + + using PixelMap exact = CreatePixelMap(palette); + using PixelMap cachedExact = CreatePixelMap(palette); + + for (int i = 0; i < AccurateCache.Capacity; i++) + { + cachedExact.GetClosestColor(CreateOverflowFillerColor(i), out _); + } + + Rgba32 first = new(1, 0, 0); + Rgba32 second = new(6, 0, 0); + + AssertMatchesUncached(exact, cachedExact, first); + AssertMatchesUncached(exact, cachedExact, second); + } + + [Fact] + public void ExactColorMatchingMatchesUncachedAcrossManyProbeBinsAfterRepeatedEviction() + { + Rgba32[] palette = CreateGrayscalePalette(256); + + using PixelMap exact = CreatePixelMap(palette); + using PixelMap cachedExact = CreatePixelMap(palette); + + for (int i = 0; i < AccurateCache.Capacity * 2; i++) + { + cachedExact.GetClosestColor(CreateEvictionFillerColor(i), out _); + } + + for (int i = 0; i < AccurateCache.Capacity; i++) + { + AssertMatchesUncached(exact, cachedExact, CreateEvictionProbeColor(i)); + } + } + + [Fact] + public void ExactColorMatchingMatchesUncachedForDitherStressColorSequence() + { + Rgba32[] palette = CreateGrayscalePalette(16); + + using Image source = CreateDitherStressImage(); + using PixelMap exact = CreatePixelMap(palette); + using PixelMap cachedExact = CreatePixelMap(palette); + + for (int y = 0; y < source.Height; y++) + { + for (int x = 0; x < source.Width; x++) + { + AssertMatchesUncached(exact, cachedExact, source[x, y]); + } + } + } + + // Split the first 512 integers across R and G so the warmup loop produces 512 distinct exact colors: + // the low 8 bits go into R, and the ninth bit spills into G once R wraps after 255. + // Keeping B fixed and G offset away from zero also avoids accidentally probing the red-axis test colors below. + private static Rgba32 CreateOverflowFillerColor(int i) + => new((byte)i, (byte)(16 + (i >> 8)), 32); + + // Treat i as three packed 5-bit coordinates and expand each coordinate back to an 8-bit channel by + // shifting left by 3. That lands on the lower edge of each 5-bit coarse bucket, giving the test a + // deterministic way to fill many distinct coarse buckets before probing nearby exact colors. + private static Rgba32 CreateEvictionFillerColor(int i) + { + byte r = (byte)((i & 31) << 3); + byte g = (byte)(((i >> 5) & 31) << 3); + byte b = (byte)(((i >> 10) & 31) << 3); + return new(r, g, b); + } + + // Reconstruct the same 5-bit RGB bucket coordinates used by CreateEvictionFillerColor, then set the + // low 3 bits in each channel to 0b111. That keeps the probe inside the same coarse bucket while making + // it a different exact color, which is the shape that used to expose coarse-fallback false hits. + private static Rgba32 CreateEvictionProbeColor(int i) + { + byte r = (byte)(((i & 31) << 3) | 0x07); + byte g = (byte)((((i >> 5) & 31) << 3) | 0x07); + byte b = (byte)((((i >> 10) & 31) << 3) | 0x07); + return new(r, g, b); + } + + private static PixelMap CreatePixelMap(Rgba32[] palette) + where TCache : struct, IColorIndexCache + => new EuclideanPixelMap(Configuration.Default, palette); + + private static void AssertMatchesUncached(PixelMap exact, PixelMap cachedExact, Rgba32 color) + { + int exactIndex = exact.GetClosestColor(color, out Rgba32 exactMatch); + int cachedIndex = cachedExact.GetClosestColor(color, out Rgba32 cachedMatch); + + Assert.Equal(exactIndex, cachedIndex); + Assert.Equal(exactMatch, cachedMatch); + } + + private static Rgba32[] CreateGrayscalePalette(int count) + { + Rgba32[] palette = new Rgba32[count]; + for (int i = 0; i < count; i++) + { + byte value = count == 1 ? (byte)0 : (byte)((i * 255) / (count - 1)); + palette[i] = new Rgba32(value, value, value); + } + + return palette; + } + + // Generate a deterministic pseudo-image where each channel uses a different x/y slope. + // Neighboring pixels stay correlated, like real image content, but the combined RGB values + // churn heavily enough that exact repeats are rare. That makes this a useful stress input + // for verifying cached exact matching against an uncached baseline under dither-like access. + private static Image CreateDitherStressImage() + { + Image image = new(192, 96); + + for (int y = 0; y < image.Height; y++) + { + for (int x = 0; x < image.Width; x++) + { + image[x, y] = new Rgba32( + (byte)((x * 17) + (y * 13)), + (byte)((x * 29) + (y * 7)), + (byte)((x * 11) + (y * 23))); + } + } + + return image; + } + + private readonly struct UncachedCache : IColorIndexCache + { + public static UncachedCache Create(MemoryAllocator allocator) => default; + + public bool TryAdd(Rgba32 color, short value) => true; + + public bool TryGetValue(Rgba32 color, out short value) + { + value = -1; + return false; + } + + public void Clear() + { + } + + public void Dispose() + { + } + } } From f0ce591a64381458cb276231a6c30c400fafe658 Mon Sep 17 00:00:00 2001 From: James Jackson-South Date: Tue, 7 Apr 2026 22:14:58 +1000 Subject: [PATCH 07/12] Rename quantizer and update tests --- src/ImageSharp/Advanced/AotCompilerTools.cs | 8 +- src/ImageSharp/Formats/Bmp/BmpEncoder.cs | 2 +- src/ImageSharp/Formats/Bmp/BmpEncoderCore.cs | 2 +- src/ImageSharp/Formats/Gif/GifEncoderCore.cs | 6 +- src/ImageSharp/Formats/Tiff/TiffEncoder.cs | 2 +- .../Formats/Tiff/TiffEncoderCore.cs | 2 +- .../Quantization/QuantizeExtensions.cs | 8 +- src/ImageSharp/Processing/KnownQuantizers.cs | 8 +- ...eQuantizer.cs => HexadecatreeQuantizer.cs} | 22 +- ...l}.cs => HexadecatreeQuantizer{TPixel}.cs} | 354 ++++++++++-------- .../Codecs/Png/EncodeIndexedPng.cs | 12 +- .../Formats/Bmp/BmpEncoderTests.cs | 8 +- .../Formats/GeneralFormatTests.cs | 2 +- .../Formats/Gif/GifEncoderTests.cs | 6 +- .../Formats/WebP/WebpEncoderTests.cs | 10 +- ...Tests.cs => HexadecatreeQuantizerTests.cs} | 20 +- .../Processors/Quantization/QuantizerTests.cs | 26 +- .../Quantization/QuantizedImageTests.cs | 14 +- ...tColor_WithHexadecatreeQuantizer_rgb32.bmp | 3 + ...zed_Encode_Artifacts_Rgba32_issue_2469.png | 4 +- ...Bike_HexadecatreeQuantizer_ErrorDither.png | 3 + ...ox_Bike_HexadecatreeQuantizer_NoDither.png | 3 + ...e_HexadecatreeQuantizer_OrderedDither.png} | 0 ...InBox_Bike_OctreeQuantizer_ErrorDither.png | 3 - ...ionInBox_Bike_OctreeQuantizer_NoDither.png | 3 - ...ke_WebSafePaletteQuantizer_ErrorDither.png | 4 +- ..._Bike_WebSafePaletteQuantizer_NoDither.png | 4 +- ..._WebSafePaletteQuantizer_OrderedDither.png | 4 +- ...ike_WernerPaletteQuantizer_ErrorDither.png | 4 +- ...x_Bike_WernerPaletteQuantizer_NoDither.png | 4 +- ...e_WernerPaletteQuantizer_OrderedDither.png | 4 +- ...tionInBox_Bike_WuQuantizer_ErrorDither.png | 4 +- ...izationInBox_Bike_WuQuantizer_NoDither.png | 4 +- ...tial_HexadecatreeQuantizer_ErrorDither.png | 3 + ...Partial_HexadecatreeQuantizer_NoDither.png | 3 + ...al_HexadecatreeQuantizer_OrderedDither.png | 3 + ...oraPartial_OctreeQuantizer_ErrorDither.png | 3 - ...iphoraPartial_OctreeQuantizer_NoDither.png | 3 - ...aPartial_OctreeQuantizer_OrderedDither.png | 3 - ...al_WebSafePaletteQuantizer_ErrorDither.png | 4 +- ...rtial_WebSafePaletteQuantizer_NoDither.png | 4 +- ..._WebSafePaletteQuantizer_OrderedDither.png | 4 +- ...ial_WernerPaletteQuantizer_ErrorDither.png | 4 +- ...artial_WernerPaletteQuantizer_NoDither.png | 4 +- ...l_WernerPaletteQuantizer_OrderedDither.png | 4 +- ...liphoraPartial_WuQuantizer_ErrorDither.png | 4 +- ...CalliphoraPartial_WuQuantizer_NoDither.png | 4 +- ...phoraPartial_WuQuantizer_OrderedDither.png | 4 +- ...HexadecatreeQuantizer_ErrorDither_0.25.png | 3 + ..._HexadecatreeQuantizer_ErrorDither_0.5.png | 3 + ...HexadecatreeQuantizer_ErrorDither_0.75.png | 3 + ...id_HexadecatreeQuantizer_ErrorDither_0.png | 3 + ...id_HexadecatreeQuantizer_ErrorDither_1.png | 3 + ...adecatreeQuantizer_OrderedDither_0.25.png} | 0 ...xadecatreeQuantizer_OrderedDither_0.5.png} | 0 ...adecatreeQuantizer_OrderedDither_0.75.png} | 0 ..._HexadecatreeQuantizer_OrderedDither_0.png | 3 + ...HexadecatreeQuantizer_OrderedDither_1.png} | 0 ...david_OctreeQuantizer_ErrorDither_0.25.png | 3 - ..._david_OctreeQuantizer_ErrorDither_0.5.png | 3 - ...david_OctreeQuantizer_ErrorDither_0.75.png | 3 - ...le_david_OctreeQuantizer_ErrorDither_0.png | 3 - ...le_david_OctreeQuantizer_ErrorDither_1.png | 3 - ..._david_OctreeQuantizer_OrderedDither_0.png | 3 - ...WernerPaletteQuantizer_OrderedDither_1.png | 4 +- ...ale_david_WuQuantizer_ErrorDither_0.25.png | 4 +- ...cale_david_WuQuantizer_ErrorDither_0.5.png | 4 +- ...ale_david_WuQuantizer_ErrorDither_0.75.png | 4 +- ...gScale_david_WuQuantizer_ErrorDither_0.png | 4 +- ...gScale_david_WuQuantizer_ErrorDither_1.png | 4 +- ...e_david_WuQuantizer_OrderedDither_0.25.png | 4 +- ...e_david_WuQuantizer_OrderedDither_0.75.png | 4 +- ...cale_david_WuQuantizer_OrderedDither_0.png | 4 +- ...cale_david_WuQuantizer_OrderedDither_1.png | 4 +- ...ike_HexadecatreeQuantizer_ErrorDither.png} | 0 ...n_Bike_HexadecatreeQuantizer_NoDither.png} | 0 ...e_HexadecatreeQuantizer_OrderedDither.png} | 0 ...e_WernerPaletteQuantizer_OrderedDither.png | 4 +- ...ntization_Bike_WuQuantizer_ErrorDither.png | 4 +- ...Quantization_Bike_WuQuantizer_NoDither.png | 4 +- ...ization_Bike_WuQuantizer_OrderedDither.png | 4 +- ...tial_HexadecatreeQuantizer_ErrorDither.png | 3 + ...Partial_HexadecatreeQuantizer_NoDither.png | 3 + ...l_HexadecatreeQuantizer_OrderedDither.png} | 0 ...oraPartial_OctreeQuantizer_ErrorDither.png | 3 - ...iphoraPartial_OctreeQuantizer_NoDither.png | 3 - ...l_WernerPaletteQuantizer_OrderedDither.png | 4 +- ...liphoraPartial_WuQuantizer_ErrorDither.png | 4 +- ...CalliphoraPartial_WuQuantizer_NoDither.png | 4 +- ...phoraPartial_WuQuantizer_OrderedDither.png | 4 +- 90 files changed, 383 insertions(+), 354 deletions(-) rename src/ImageSharp/Processing/Processors/Quantization/{OctreeQuantizer.cs => HexadecatreeQuantizer.cs} (51%) rename src/ImageSharp/Processing/Processors/Quantization/{OctreeQuantizer{TPixel}.cs => HexadecatreeQuantizer{TPixel}.cs} (54%) rename tests/ImageSharp.Tests/Processing/Processors/Quantization/{OctreeQuantizerTests.cs => HexadecatreeQuantizerTests.cs} (76%) create mode 100644 tests/Images/External/ReferenceOutput/BmpEncoderTests/Encode_8BitColor_WithHexadecatreeQuantizer_rgb32.bmp create mode 100644 tests/Images/External/ReferenceOutput/QuantizerTests/ApplyQuantizationInBox_Bike_HexadecatreeQuantizer_ErrorDither.png create mode 100644 tests/Images/External/ReferenceOutput/QuantizerTests/ApplyQuantizationInBox_Bike_HexadecatreeQuantizer_NoDither.png rename tests/Images/External/ReferenceOutput/QuantizerTests/{ApplyQuantizationInBox_Bike_OctreeQuantizer_OrderedDither.png => ApplyQuantizationInBox_Bike_HexadecatreeQuantizer_OrderedDither.png} (100%) delete mode 100644 tests/Images/External/ReferenceOutput/QuantizerTests/ApplyQuantizationInBox_Bike_OctreeQuantizer_ErrorDither.png delete mode 100644 tests/Images/External/ReferenceOutput/QuantizerTests/ApplyQuantizationInBox_Bike_OctreeQuantizer_NoDither.png create mode 100644 tests/Images/External/ReferenceOutput/QuantizerTests/ApplyQuantizationInBox_CalliphoraPartial_HexadecatreeQuantizer_ErrorDither.png create mode 100644 tests/Images/External/ReferenceOutput/QuantizerTests/ApplyQuantizationInBox_CalliphoraPartial_HexadecatreeQuantizer_NoDither.png create mode 100644 tests/Images/External/ReferenceOutput/QuantizerTests/ApplyQuantizationInBox_CalliphoraPartial_HexadecatreeQuantizer_OrderedDither.png delete mode 100644 tests/Images/External/ReferenceOutput/QuantizerTests/ApplyQuantizationInBox_CalliphoraPartial_OctreeQuantizer_ErrorDither.png delete mode 100644 tests/Images/External/ReferenceOutput/QuantizerTests/ApplyQuantizationInBox_CalliphoraPartial_OctreeQuantizer_NoDither.png delete mode 100644 tests/Images/External/ReferenceOutput/QuantizerTests/ApplyQuantizationInBox_CalliphoraPartial_OctreeQuantizer_OrderedDither.png create mode 100644 tests/Images/External/ReferenceOutput/QuantizerTests/ApplyQuantizationWithDitheringScale_david_HexadecatreeQuantizer_ErrorDither_0.25.png create mode 100644 tests/Images/External/ReferenceOutput/QuantizerTests/ApplyQuantizationWithDitheringScale_david_HexadecatreeQuantizer_ErrorDither_0.5.png create mode 100644 tests/Images/External/ReferenceOutput/QuantizerTests/ApplyQuantizationWithDitheringScale_david_HexadecatreeQuantizer_ErrorDither_0.75.png create mode 100644 tests/Images/External/ReferenceOutput/QuantizerTests/ApplyQuantizationWithDitheringScale_david_HexadecatreeQuantizer_ErrorDither_0.png create mode 100644 tests/Images/External/ReferenceOutput/QuantizerTests/ApplyQuantizationWithDitheringScale_david_HexadecatreeQuantizer_ErrorDither_1.png rename tests/Images/External/ReferenceOutput/QuantizerTests/{ApplyQuantizationWithDitheringScale_david_OctreeQuantizer_OrderedDither_0.25.png => ApplyQuantizationWithDitheringScale_david_HexadecatreeQuantizer_OrderedDither_0.25.png} (100%) rename tests/Images/External/ReferenceOutput/QuantizerTests/{ApplyQuantizationWithDitheringScale_david_OctreeQuantizer_OrderedDither_0.5.png => ApplyQuantizationWithDitheringScale_david_HexadecatreeQuantizer_OrderedDither_0.5.png} (100%) rename tests/Images/External/ReferenceOutput/QuantizerTests/{ApplyQuantizationWithDitheringScale_david_OctreeQuantizer_OrderedDither_0.75.png => ApplyQuantizationWithDitheringScale_david_HexadecatreeQuantizer_OrderedDither_0.75.png} (100%) create mode 100644 tests/Images/External/ReferenceOutput/QuantizerTests/ApplyQuantizationWithDitheringScale_david_HexadecatreeQuantizer_OrderedDither_0.png rename tests/Images/External/ReferenceOutput/QuantizerTests/{ApplyQuantizationWithDitheringScale_david_OctreeQuantizer_OrderedDither_1.png => ApplyQuantizationWithDitheringScale_david_HexadecatreeQuantizer_OrderedDither_1.png} (100%) delete mode 100644 tests/Images/External/ReferenceOutput/QuantizerTests/ApplyQuantizationWithDitheringScale_david_OctreeQuantizer_ErrorDither_0.25.png delete mode 100644 tests/Images/External/ReferenceOutput/QuantizerTests/ApplyQuantizationWithDitheringScale_david_OctreeQuantizer_ErrorDither_0.5.png delete mode 100644 tests/Images/External/ReferenceOutput/QuantizerTests/ApplyQuantizationWithDitheringScale_david_OctreeQuantizer_ErrorDither_0.75.png delete mode 100644 tests/Images/External/ReferenceOutput/QuantizerTests/ApplyQuantizationWithDitheringScale_david_OctreeQuantizer_ErrorDither_0.png delete mode 100644 tests/Images/External/ReferenceOutput/QuantizerTests/ApplyQuantizationWithDitheringScale_david_OctreeQuantizer_ErrorDither_1.png delete mode 100644 tests/Images/External/ReferenceOutput/QuantizerTests/ApplyQuantizationWithDitheringScale_david_OctreeQuantizer_OrderedDither_0.png rename tests/Images/External/ReferenceOutput/QuantizerTests/{ApplyQuantization_Bike_OctreeQuantizer_ErrorDither.png => ApplyQuantization_Bike_HexadecatreeQuantizer_ErrorDither.png} (100%) rename tests/Images/External/ReferenceOutput/QuantizerTests/{ApplyQuantization_Bike_OctreeQuantizer_NoDither.png => ApplyQuantization_Bike_HexadecatreeQuantizer_NoDither.png} (100%) rename tests/Images/External/ReferenceOutput/QuantizerTests/{ApplyQuantization_Bike_OctreeQuantizer_OrderedDither.png => ApplyQuantization_Bike_HexadecatreeQuantizer_OrderedDither.png} (100%) create mode 100644 tests/Images/External/ReferenceOutput/QuantizerTests/ApplyQuantization_CalliphoraPartial_HexadecatreeQuantizer_ErrorDither.png create mode 100644 tests/Images/External/ReferenceOutput/QuantizerTests/ApplyQuantization_CalliphoraPartial_HexadecatreeQuantizer_NoDither.png rename tests/Images/External/ReferenceOutput/QuantizerTests/{ApplyQuantization_CalliphoraPartial_OctreeQuantizer_OrderedDither.png => ApplyQuantization_CalliphoraPartial_HexadecatreeQuantizer_OrderedDither.png} (100%) delete mode 100644 tests/Images/External/ReferenceOutput/QuantizerTests/ApplyQuantization_CalliphoraPartial_OctreeQuantizer_ErrorDither.png delete mode 100644 tests/Images/External/ReferenceOutput/QuantizerTests/ApplyQuantization_CalliphoraPartial_OctreeQuantizer_NoDither.png diff --git a/src/ImageSharp/Advanced/AotCompilerTools.cs b/src/ImageSharp/Advanced/AotCompilerTools.cs index 2944b58e5f..0f28b28901 100644 --- a/src/ImageSharp/Advanced/AotCompilerTools.cs +++ b/src/ImageSharp/Advanced/AotCompilerTools.cs @@ -54,7 +54,7 @@ internal static class AotCompilerTools /// /// This method doesn't actually do anything but serves an important purpose... /// If you are running ImageSharp on iOS and try to call SaveAsGif, it will throw an exception: - /// "Attempting to JIT compile method... OctreeFrameQuantizer.ConstructPalette... while running in aot-only mode." + /// "Attempting to JIT compile method... HexadecatreeQuantizer.ConstructPalette... while running in aot-only mode." /// The reason this happens is the SaveAsGif method makes heavy use of generics, which are too confusing for the AoT /// compiler used on Xamarin.iOS. It spins up the JIT compiler to try and figure it out, but that is an illegal op on /// iOS so it bombs out. @@ -479,7 +479,7 @@ internal static class AotCompilerTools private static void AotCompileQuantizers() where TPixel : unmanaged, IPixel { - AotCompileQuantizer(); + AotCompileQuantizer(); AotCompileQuantizer(); AotCompileQuantizer(); AotCompileQuantizer(); @@ -549,8 +549,8 @@ internal static class AotCompilerTools where TPixel : unmanaged, IPixel where TDither : struct, IDither { - OctreeQuantizer octree = default; - default(TDither).ApplyQuantizationDither, TPixel>(ref octree, default, default, default); + HexadecatreeQuantizer hexadecatree = default; + default(TDither).ApplyQuantizationDither, TPixel>(ref hexadecatree, default, default, default); PaletteQuantizer palette = default; default(TDither).ApplyQuantizationDither, TPixel>(ref palette, default, default, default); diff --git a/src/ImageSharp/Formats/Bmp/BmpEncoder.cs b/src/ImageSharp/Formats/Bmp/BmpEncoder.cs index e255568047..210c08464a 100644 --- a/src/ImageSharp/Formats/Bmp/BmpEncoder.cs +++ b/src/ImageSharp/Formats/Bmp/BmpEncoder.cs @@ -13,7 +13,7 @@ public sealed class BmpEncoder : QuantizingImageEncoder /// /// Initializes a new instance of the class. /// - public BmpEncoder() => this.Quantizer = KnownQuantizers.Octree; + public BmpEncoder() => this.Quantizer = KnownQuantizers.Hexadecatree; /// /// Gets the number of bits per pixel. diff --git a/src/ImageSharp/Formats/Bmp/BmpEncoderCore.cs b/src/ImageSharp/Formats/Bmp/BmpEncoderCore.cs index ccc620d6c4..0bf57c5612 100644 --- a/src/ImageSharp/Formats/Bmp/BmpEncoderCore.cs +++ b/src/ImageSharp/Formats/Bmp/BmpEncoderCore.cs @@ -116,7 +116,7 @@ internal sealed class BmpEncoderCore this.bitsPerPixel = encoder.BitsPerPixel; // TODO: Use a palette quantizer if supplied. - this.quantizer = encoder.Quantizer ?? KnownQuantizers.Octree; + this.quantizer = encoder.Quantizer ?? KnownQuantizers.Hexadecatree; this.pixelSamplingStrategy = encoder.PixelSamplingStrategy; this.transparentColorMode = encoder.TransparentColorMode; this.infoHeaderType = encoder.SupportTransparency ? BmpInfoHeaderType.WinVersion4 : BmpInfoHeaderType.WinVersion3; diff --git a/src/ImageSharp/Formats/Gif/GifEncoderCore.cs b/src/ImageSharp/Formats/Gif/GifEncoderCore.cs index 07c73dcf22..d2883e2811 100644 --- a/src/ImageSharp/Formats/Gif/GifEncoderCore.cs +++ b/src/ImageSharp/Formats/Gif/GifEncoderCore.cs @@ -117,7 +117,7 @@ internal sealed class GifEncoderCore if (globalQuantizer is null) { - // Is this a gif with color information. If so use that, otherwise use octree. + // Is this a gif with color information. If so use that, otherwise use the adaptive hexadecatree quantizer. if (gifMetadata.ColorTableMode == FrameColorTableMode.Global && gifMetadata.GlobalColorTable?.Length > 0) { int ti = GetTransparentIndex(quantized, frameMetadata); @@ -132,12 +132,12 @@ internal sealed class GifEncoderCore } else { - globalQuantizer = new OctreeQuantizer(options); + globalQuantizer = new HexadecatreeQuantizer(options); } } else { - globalQuantizer = new OctreeQuantizer(options); + globalQuantizer = new HexadecatreeQuantizer(options); } } diff --git a/src/ImageSharp/Formats/Tiff/TiffEncoder.cs b/src/ImageSharp/Formats/Tiff/TiffEncoder.cs index a068613bf4..7859b2c902 100644 --- a/src/ImageSharp/Formats/Tiff/TiffEncoder.cs +++ b/src/ImageSharp/Formats/Tiff/TiffEncoder.cs @@ -15,7 +15,7 @@ public class TiffEncoder : QuantizingImageEncoder /// /// Initializes a new instance of the class. /// - public TiffEncoder() => this.Quantizer = KnownQuantizers.Octree; + public TiffEncoder() => this.Quantizer = KnownQuantizers.Hexadecatree; /// /// Gets the number of bits per pixel. diff --git a/src/ImageSharp/Formats/Tiff/TiffEncoderCore.cs b/src/ImageSharp/Formats/Tiff/TiffEncoderCore.cs index d7508b02e8..e5e47166e9 100644 --- a/src/ImageSharp/Formats/Tiff/TiffEncoderCore.cs +++ b/src/ImageSharp/Formats/Tiff/TiffEncoderCore.cs @@ -71,7 +71,7 @@ internal sealed class TiffEncoderCore this.configuration = configuration; this.memoryAllocator = configuration.MemoryAllocator; this.PhotometricInterpretation = encoder.PhotometricInterpretation; - this.quantizer = encoder.Quantizer ?? KnownQuantizers.Octree; + this.quantizer = encoder.Quantizer ?? KnownQuantizers.Hexadecatree; this.pixelSamplingStrategy = encoder.PixelSamplingStrategy; this.BitsPerPixel = encoder.BitsPerPixel; this.HorizontalPredictor = encoder.HorizontalPredictor; diff --git a/src/ImageSharp/Processing/Extensions/Quantization/QuantizeExtensions.cs b/src/ImageSharp/Processing/Extensions/Quantization/QuantizeExtensions.cs index bf6d2221f4..b0f5cb7d60 100644 --- a/src/ImageSharp/Processing/Extensions/Quantization/QuantizeExtensions.cs +++ b/src/ImageSharp/Processing/Extensions/Quantization/QuantizeExtensions.cs @@ -12,12 +12,12 @@ namespace SixLabors.ImageSharp.Processing; public static class QuantizeExtensions { /// - /// Applies quantization to the image using the . + /// Applies quantization to the image using the . /// /// The current image processing context. /// The . public static IImageProcessingContext Quantize(this IImageProcessingContext source) => - Quantize(source, KnownQuantizers.Octree); + Quantize(source, KnownQuantizers.Hexadecatree); /// /// Applies quantization to the image. @@ -29,7 +29,7 @@ public static class QuantizeExtensions source.ApplyProcessor(new QuantizeProcessor(quantizer)); /// - /// Applies quantization to the image using the . + /// Applies quantization to the image using the . /// /// The current image processing context. /// @@ -37,7 +37,7 @@ public static class QuantizeExtensions /// /// The . public static IImageProcessingContext Quantize(this IImageProcessingContext source, Rectangle rectangle) => - Quantize(source, KnownQuantizers.Octree, rectangle); + Quantize(source, KnownQuantizers.Hexadecatree, rectangle); /// /// Applies quantization to the image. diff --git a/src/ImageSharp/Processing/KnownQuantizers.cs b/src/ImageSharp/Processing/KnownQuantizers.cs index 6fb3c72e81..b63ba597d1 100644 --- a/src/ImageSharp/Processing/KnownQuantizers.cs +++ b/src/ImageSharp/Processing/KnownQuantizers.cs @@ -1,4 +1,4 @@ -// Copyright (c) Six Labors. +// Copyright (c) Six Labors. // Licensed under the Six Labors Split License. using SixLabors.ImageSharp.Processing.Processors.Quantization; @@ -6,14 +6,14 @@ using SixLabors.ImageSharp.Processing.Processors.Quantization; namespace SixLabors.ImageSharp.Processing; /// -/// Contains reusable static instances of known quantizing algorithms +/// Contains reusable static instances of known quantizing algorithms. /// public static class KnownQuantizers { /// - /// Gets the adaptive Octree quantizer. Fast with good quality. + /// Gets the adaptive hexadecatree quantizer. Fast with good quality. /// - public static IQuantizer Octree { get; } = new OctreeQuantizer(); + public static IQuantizer Hexadecatree { get; } = new HexadecatreeQuantizer(); /// /// Gets the Xiaolin Wu's Color Quantizer which generates high quality output. diff --git a/src/ImageSharp/Processing/Processors/Quantization/OctreeQuantizer.cs b/src/ImageSharp/Processing/Processors/Quantization/HexadecatreeQuantizer.cs similarity index 51% rename from src/ImageSharp/Processing/Processors/Quantization/OctreeQuantizer.cs rename to src/ImageSharp/Processing/Processors/Quantization/HexadecatreeQuantizer.cs index 0a1032bf0d..6b2f5a0131 100644 --- a/src/ImageSharp/Processing/Processors/Quantization/OctreeQuantizer.cs +++ b/src/ImageSharp/Processing/Processors/Quantization/HexadecatreeQuantizer.cs @@ -6,25 +6,29 @@ using SixLabors.ImageSharp.PixelFormats; namespace SixLabors.ImageSharp.Processing.Processors.Quantization; /// -/// Allows the quantization of images pixels using Octrees. -/// +/// Quantizes images by grouping colors in an adaptive 16-way tree and reducing those groups into a palette. /// -public class OctreeQuantizer : IQuantizer +/// +/// Each level routes colors using one bit of RGB and, when useful, one bit of alpha. Fully opaque mid-tone colors +/// use RGB-only routing so more branch resolution is spent on visible color detail, while transparent, dark, and +/// light colors use alpha-aware routing so opacity changes can form their own palette buckets. +/// +public class HexadecatreeQuantizer : IQuantizer { /// - /// Initializes a new instance of the class + /// Initializes a new instance of the class /// using the default . /// - public OctreeQuantizer() + public HexadecatreeQuantizer() : this(new QuantizerOptions()) { } /// - /// Initializes a new instance of the class. + /// Initializes a new instance of the class. /// - /// The quantizer options defining quantization rules. - public OctreeQuantizer(QuantizerOptions options) + /// The quantizer options that control palette size, dithering, and transparency behavior. + public HexadecatreeQuantizer(QuantizerOptions options) { Guard.NotNull(options, nameof(options)); this.Options = options; @@ -41,5 +45,5 @@ public class OctreeQuantizer : IQuantizer /// public IQuantizer CreatePixelSpecificQuantizer(Configuration configuration, QuantizerOptions options) where TPixel : unmanaged, IPixel - => new OctreeQuantizer(configuration, options); + => new HexadecatreeQuantizer(configuration, options); } diff --git a/src/ImageSharp/Processing/Processors/Quantization/OctreeQuantizer{TPixel}.cs b/src/ImageSharp/Processing/Processors/Quantization/HexadecatreeQuantizer{TPixel}.cs similarity index 54% rename from src/ImageSharp/Processing/Processors/Quantization/OctreeQuantizer{TPixel}.cs rename to src/ImageSharp/Processing/Processors/Quantization/HexadecatreeQuantizer{TPixel}.cs index bdf2ba20a8..b5d39d73ec 100644 --- a/src/ImageSharp/Processing/Processors/Quantization/OctreeQuantizer{TPixel}.cs +++ b/src/ImageSharp/Processing/Processors/Quantization/HexadecatreeQuantizer{TPixel}.cs @@ -12,19 +12,28 @@ using SixLabors.ImageSharp.PixelFormats; namespace SixLabors.ImageSharp.Processing.Processors.Quantization; /// -/// Encapsulates methods to calculate the color palette if an image using an Octree pattern. -/// +/// Quantizes an image by building an adaptive 16-way color tree and reducing it to the requested palette size. /// +/// +/// +/// Each level routes colors using one bit of RGB and, when useful, one bit of alpha, giving the tree up to 16 children +/// per node and letting transparency participate directly in palette construction. +/// +/// +/// Fully opaque mid-tone colors use RGB-only routing so more branch resolution is spent on visible color detail. +/// Transparent, dark, and light colors use alpha-aware routing so opacity changes can form distinct palette buckets. +/// +/// /// The pixel format. #pragma warning disable CA1001 // Types that own disposable fields should be disposable // See https://github.com/dotnet/roslyn-analyzers/issues/6151 -public struct OctreeQuantizer : IQuantizer +public struct HexadecatreeQuantizer : IQuantizer #pragma warning restore CA1001 // Types that own disposable fields should be disposable where TPixel : unmanaged, IPixel { private readonly int maxColors; private readonly int bitDepth; - private readonly Octree octree; + private readonly Hexadecatree tree; private readonly IMemoryOwner paletteOwner; private ReadOnlyMemory palette; private PixelMap? pixelMap; @@ -32,19 +41,19 @@ public struct OctreeQuantizer : IQuantizer private bool isDisposed; /// - /// Initializes a new instance of the struct. + /// Initializes a new instance of the struct. /// - /// The configuration which allows altering default behavior or extending the library. - /// The quantizer options defining quantization rules. + /// The configuration that provides memory allocation and pixel conversion services. + /// The quantizer options that control palette size, dithering, and transparency behavior. [MethodImpl(InliningOptions.ShortMethod)] - public OctreeQuantizer(Configuration configuration, QuantizerOptions options) + public HexadecatreeQuantizer(Configuration configuration, QuantizerOptions options) { this.Configuration = configuration; this.Options = options; this.maxColors = this.Options.MaxColors; this.bitDepth = Numerics.Clamp(ColorNumerics.GetBitsNeededForColorDepth(this.maxColors), 1, 8); - this.octree = new Octree(configuration, this.bitDepth, this.maxColors, this.Options.TransparencyThreshold); + this.tree = new Hexadecatree(configuration, this.bitDepth, this.maxColors, this.Options.TransparencyThreshold); this.paletteOwner = configuration.MemoryAllocator.Allocate(this.maxColors, AllocationOptions.Clean); this.pixelMap = default; this.palette = default; @@ -76,23 +85,28 @@ public struct OctreeQuantizer : IQuantizer /// public readonly void AddPaletteColors(in Buffer2DRegion pixelRegion) { - PixelRowDelegate pixelRowDelegate = new(this.octree); - QuantizerUtilities.AddPaletteColors, TPixel, Rgba32, PixelRowDelegate>( + PixelRowDelegate pixelRowDelegate = new(this.tree); + QuantizerUtilities.AddPaletteColors, TPixel, Rgba32, PixelRowDelegate>( ref Unsafe.AsRef(in this), in pixelRegion, in pixelRowDelegate); } + /// + /// Materializes the final palette from the accumulated tree and prepares the dither lookup map when needed. + /// private void ResolvePalette() { short paletteIndex = 0; Span paletteSpan = this.paletteOwner.GetSpan(); - this.octree.Palettize(paletteSpan, ref paletteIndex); + this.tree.Palettize(paletteSpan, ref paletteIndex); ReadOnlyMemory result = this.paletteOwner.Memory[..paletteSpan.Length]; if (this.isDithering) { + // Dithered colors often no longer land on a color that was seen during palette construction, + // so the quantization pass switches to nearest-palette matching once the palette is finalized. this.pixelMap = PixelMapFactory.Create(this.Configuration, result, this.Options.ColorMatchingMode); } @@ -108,17 +122,15 @@ public struct OctreeQuantizer : IQuantizer [MethodImpl(InliningOptions.ShortMethod)] public readonly byte GetQuantizedColor(TPixel color, out TPixel match) { - // Due to the addition of new colors by dithering that are not part of the original histogram, - // the octree nodes might not match the correct color. - // In this case, we must use the pixel map to get the closest color. if (this.isDithering) { + // Dithering introduces adjusted colors that were never inserted into the tree, so tree lookup + // is only reliable for the non-dithered path. return (byte)this.pixelMap!.GetClosestColor(color, out match); } ref TPixel paletteRef = ref MemoryMarshal.GetReference(this.palette.Span); - - int index = this.octree.GetPaletteIndex(color); + int index = this.tree.GetPaletteIndex(color); match = Unsafe.Add(ref paletteRef, (nuint)index); return (byte)index; } @@ -132,34 +144,43 @@ public struct OctreeQuantizer : IQuantizer this.paletteOwner.Dispose(); this.pixelMap?.Dispose(); this.pixelMap = null; - this.octree.Dispose(); + this.tree.Dispose(); } } + /// + /// Forwards source rows into the tree without creating an intermediate buffer. + /// private readonly struct PixelRowDelegate : IQuantizingPixelRowDelegate { - private readonly Octree octree; + private readonly Hexadecatree tree; - public PixelRowDelegate(Octree octree) => this.octree = octree; + /// + /// Initializes a new instance of the struct. + /// + /// The destination tree that should accumulate each visited row. + public PixelRowDelegate(Hexadecatree tree) => this.tree = tree; - public void Invoke(ReadOnlySpan row, int rowIndex) => this.octree.AddColors(row); + /// + public void Invoke(ReadOnlySpan row, int rowIndex) => this.tree.AddColors(row); } /// - /// A hexadecatree-based color quantization structure used for fast color distance lookups and palette generation. - /// This tree maintains a fixed pool of nodes (capacity 4096) where each node can have up to 16 children, stores - /// color accumulation data, and supports dynamic node allocation and reduction. It offers near-constant-time insertions - /// and lookups while consuming roughly 240 KB for the node pool. + /// Stores the adaptive 16-way partition tree used to accumulate colors and emit palette entries. /// - internal sealed class Octree : IDisposable + /// + /// The tree uses a fixed node arena for predictable allocation behavior, keeps per-level reducible node lists so + /// deeper buckets can be merged until the palette fits, and caches the previously inserted leaf so repeated colors + /// can be accumulated cheaply. + /// + internal sealed class Hexadecatree : IDisposable { - // The memory allocator. - private readonly MemoryAllocator allocator; - // Pooled buffer for OctreeNodes. - private readonly IMemoryOwner nodesOwner; + private readonly IMemoryOwner nodesOwner; - // Reducible nodes: one per level; we use an integer index; -1 means “no node.” + // One reducible-node head per level. + // Each entry stores a node index, or -1 when that level currently + // has no reducible nodes. private readonly short[] reducibleNodes; // Maximum number of allowable colors. @@ -186,13 +207,13 @@ public struct OctreeQuantizer : IQuantizer private readonly Stack freeIndices = new(); /// - /// Initializes a new instance of the class. + /// Initializes a new instance of the class. /// - /// The configuration which allows altering default behavior or extending the library. - /// The maximum number of significant bits in the image. - /// The maximum number of colors to allow in the palette. - /// The threshold for transparent colors. - public Octree( + /// The configuration that provides the backing memory allocator. + /// The number of levels to descend before forcing leaves. + /// The maximum number of palette entries the reduced tree may retain. + /// The alpha threshold below which generated palette entries become fully transparent. + public Hexadecatree( Configuration configuration, int maxColorBits, int maxColors, @@ -207,8 +228,7 @@ public struct OctreeQuantizer : IQuantizer // Allocate a conservative buffer for nodes. const int capacity = 4096; - this.allocator = configuration.MemoryAllocator; - this.nodesOwner = this.allocator.Allocate(capacity, AllocationOptions.Clean); + this.nodesOwner = configuration.MemoryAllocator.Allocate(capacity, AllocationOptions.Clean); // Create the reducible nodes array (one per level 0 .. maxColorBits-1). this.reducibleNodes = new short[this.maxColorBits]; @@ -216,24 +236,24 @@ public struct OctreeQuantizer : IQuantizer // Reserve index 0 for the root. this.rootIndex = 0; - ref OctreeNode root = ref this.Nodes[this.rootIndex]; + ref Node root = ref this.Nodes[this.rootIndex]; root.Initialize(0, this.maxColorBits, this, this.rootIndex); } /// - /// Gets or sets the number of leaves in the tree. + /// Gets or sets the number of leaf nodes currently representing palette buckets. /// public int Leaves { get; set; } /// - /// Gets the full collection of nodes as a span. + /// Gets the underlying node arena. /// - internal Span Nodes => this.nodesOwner.Memory.Span; + internal Span Nodes => this.nodesOwner.Memory.Span; /// - /// Adds a span of colors to the octree. + /// Adds a row of colors to the tree. /// - /// A span of color values to be added. + /// The colors to accumulate. public void AddColors(ReadOnlySpan row) { for (int x = 0; x < row.Length; x++) @@ -243,12 +263,13 @@ public struct OctreeQuantizer : IQuantizer } /// - /// Add a color to the Octree. + /// Adds a single color sample to the tree. /// - /// The color to add. + /// The color to accumulate. private void AddColor(Rgba32 color) { - // Ensure that the tree is not already full. + // Once the node arena is full and there are no recycled slots available, keep collapsing + // reducible leaves until the tree is small enough to make forward progress again. if (this.nextNode >= this.Nodes.Length && this.freeIndices.Count == 0) { while (this.Leaves > this.maxColors) @@ -257,32 +278,32 @@ public struct OctreeQuantizer : IQuantizer } } - // If the color is the same as the previous color, increment the node. - // Otherwise, add a new node. + // Scanlines often contain long runs of the same color. Caching the previous leaf lets those + // repeats skip the tree walk and just bump the accumulated sums in place. if (this.previousColor.Equals(color)) { if (this.previousNode == -1) { this.previousColor = color; - OctreeNode.AddColor(this.rootIndex, color, this.maxColorBits, 0, this); + Node.AddColor(this.rootIndex, color, this.maxColorBits, 0, this); } else { - OctreeNode.Increment(this.previousNode, color, this); + Node.Increment(this.previousNode, color, this); } } else { this.previousColor = color; - OctreeNode.AddColor(this.rootIndex, color, this.maxColorBits, 0, this); + Node.AddColor(this.rootIndex, color, this.maxColorBits, 0, this); } } /// - /// Construct the palette from the octree. + /// Reduces the tree to the requested palette size and emits the final palette entries. /// - /// The palette to construct. - /// The current palette index. + /// The destination palette span. + /// The running palette index. public void Palettize(Span palette, ref short paletteIndex) { while (this.Leaves > this.maxColors) @@ -294,48 +315,45 @@ public struct OctreeQuantizer : IQuantizer } /// - /// Get the palette index for the passed color. + /// Gets the palette index selected by the tree for the supplied color. /// - /// The color to get the palette index for. - /// The . + /// The color to resolve. + /// The palette index represented by the best matching leaf in the reduced tree. [MethodImpl(MethodImplOptions.AggressiveInlining)] public int GetPaletteIndex(TPixel color) => this.Nodes[this.rootIndex].GetPaletteIndex(color.ToRgba32(), 0, this); /// - /// Track the previous node and color. + /// Records the most recently touched leaf so repeated colors can bypass another descent. /// - /// The node index. + /// The leaf node index. [MethodImpl(MethodImplOptions.AggressiveInlining)] public void TrackPrevious(int nodeIndex) => this.previousNode = nodeIndex; /// - /// Reduce the depth of the tree. + /// Collapses the deepest currently reducible node into a single leaf. /// private void Reduce() { - // Find the deepest level containing at least one reducible node int index = this.maxColorBits - 1; while ((index > 0) && (this.reducibleNodes[index] == -1)) { index--; } - // Reduce the node most recently added to the list at level 'index' - ref OctreeNode node = ref this.Nodes[this.reducibleNodes[index]]; + ref Node node = ref this.Nodes[this.reducibleNodes[index]]; this.reducibleNodes[index] = node.NextReducibleIndex; - - // Decrement the leaf count after reducing the node node.Reduce(this); - // And just in case I've reduced the last color to be added, and the next color to - // be added is the same, invalidate the previousNode... + // If the last inserted leaf was merged away, the next repeated color must walk the tree again. this.previousNode = -1; } - // Allocate a new OctreeNode from the pooled buffer. - // First check the freeIndices stack. + /// + /// Allocates a node index from the free list or from the unused tail of the arena. + /// + /// The allocated node index, or -1 if no node can be allocated. internal short AllocateNode() { if (this.freeIndices.Count > 0) @@ -354,9 +372,9 @@ public struct OctreeQuantizer : IQuantizer } /// - /// Free a node index, making it available for re-allocation. + /// Returns a node index to the free list. /// - /// The index to free. + /// The node index to recycle. [MethodImpl(MethodImplOptions.AggressiveInlining)] internal void FreeNode(short index) { @@ -367,8 +385,11 @@ public struct OctreeQuantizer : IQuantizer /// public void Dispose() => this.nodesOwner.Dispose(); + /// + /// Represents one node in the hexadecatree node arena. + /// [StructLayout(LayoutKind.Sequential)] - internal struct OctreeNode + internal struct Node { public bool Leaf; public int PixelCount; @@ -380,19 +401,21 @@ public struct OctreeQuantizer : IQuantizer public short NextReducibleIndex; private InlineArray16 children; + /// + /// Gets the 16 child slots for this node. + /// [UnscopedRef] public Span Children => this.children; /// - /// Initialize the . + /// Initializes a node either as a leaf or as a reducible interior node. /// - /// The level of the node. - /// The number of significant color bits in the image. - /// The parent octree. - /// The index of the node. - public void Initialize(int level, int colorBits, Octree octree, short index) + /// The depth of the node being initialized. + /// The maximum tree depth. + /// The owning tree. + /// The node index in the arena. + public void Initialize(int level, int colorBits, Hexadecatree tree, short index) { - // Construct the new node. this.Leaf = level == colorBits; this.Red = 0; this.Green = 0; @@ -401,76 +424,73 @@ public struct OctreeQuantizer : IQuantizer this.PixelCount = 0; this.PaletteIndex = 0; this.NextReducibleIndex = -1; - - // Always clear the Children array. this.Children.Fill(-1); if (this.Leaf) { - octree.Leaves++; + tree.Leaves++; } else { - // Add this node to the reducible nodes list for its level. - this.NextReducibleIndex = octree.reducibleNodes[level]; - octree.reducibleNodes[level] = index; + // Track reducible nodes per level so palette reduction can always collapse the deepest + // buckets first without scanning the entire arena. + this.NextReducibleIndex = tree.reducibleNodes[level]; + tree.reducibleNodes[level] = index; } } /// - /// Add a color to the Octree. + /// Descends the tree for the supplied color, allocating nodes as needed until a leaf is reached. /// - /// The node index. - /// The color to add. - /// The number of significant color bits in the image. - /// The level of the node. - /// The parent octree. - public static void AddColor(int nodeIndex, Rgba32 color, int colorBits, int level, Octree octree) + /// The current node index. + /// The color being accumulated. + /// The maximum tree depth. + /// The current depth. + /// The owning tree. + public static void AddColor(int nodeIndex, Rgba32 color, int colorBits, int level, Hexadecatree tree) { - ref OctreeNode node = ref octree.Nodes[nodeIndex]; + ref Node node = ref tree.Nodes[nodeIndex]; if (node.Leaf) { - Increment(nodeIndex, color, octree); - octree.TrackPrevious(nodeIndex); + Increment(nodeIndex, color, tree); + tree.TrackPrevious(nodeIndex); + return; } - else - { - int index = GetColorIndex(color, level); - short childIndex; - Span children = node.Children; - childIndex = children[index]; + int index = GetColorIndex(color, level); + Span children = node.Children; + short childIndex = children[index]; + if (childIndex == -1) + { + childIndex = tree.AllocateNode(); if (childIndex == -1) { - childIndex = octree.AllocateNode(); - - if (childIndex == -1) - { - // No room in the tree, so increment the count and return. - Increment(nodeIndex, color, octree); - octree.TrackPrevious(nodeIndex); - return; - } - - ref OctreeNode child = ref octree.Nodes[childIndex]; - child.Initialize(level + 1, colorBits, octree, childIndex); - children[index] = childIndex; + // If the arena is exhausted and no node can be reclaimed yet, fall back to + // accumulating into the current node instead of failing the insert outright. + Increment(nodeIndex, color, tree); + tree.TrackPrevious(nodeIndex); + return; } - AddColor(childIndex, color, colorBits, level + 1, octree); + ref Node child = ref tree.Nodes[childIndex]; + child.Initialize(level + 1, colorBits, tree, childIndex); + children[index] = childIndex; } + + // Keep descending until we reach the leaf bucket that should accumulate this sample. + AddColor(childIndex, color, colorBits, level + 1, tree); } /// - /// Increment the color components of this node. + /// Adds the supplied color sample to an existing node's running sums. /// - /// The node index. - /// The color to increment by. - /// The parent octree. - public static void Increment(int nodeIndex, Rgba32 color, Octree octree) + /// The node index to update. + /// The color sample being accumulated. + /// The owning tree. + public static void Increment(int nodeIndex, Rgba32 color, Hexadecatree tree) { - ref OctreeNode node = ref octree.Nodes[nodeIndex]; + ref Node node = ref tree.Nodes[nodeIndex]; node.PixelCount++; node.Red += color.R; node.Green += color.G; @@ -479,10 +499,10 @@ public struct OctreeQuantizer : IQuantizer } /// - /// Reduce this node by ensuring its children are all reduced (i.e. leaves) and then merging their data. + /// Merges all child nodes into this node and turns it into a leaf. /// - /// The parent octree. - public void Reduce(Octree octree) + /// The owning tree. + public void Reduce(Hexadecatree tree) { // If already a leaf, do nothing. if (this.Leaf) @@ -492,25 +512,27 @@ public struct OctreeQuantizer : IQuantizer // Now merge the (presumably reduced) children. int pixelCount = 0; - int sumRed = 0, sumGreen = 0, sumBlue = 0, sumAlpha = 0; + int sumRed = 0; + int sumGreen = 0; + int sumBlue = 0; + int sumAlpha = 0; Span children = this.Children; + for (int i = 0; i < children.Length; i++) { short childIndex = children[i]; if (childIndex != -1) { - ref OctreeNode child = ref octree.Nodes[childIndex]; + ref Node child = ref tree.Nodes[childIndex]; int pixels = child.PixelCount; - sumRed += child.Red; sumGreen += child.Green; sumBlue += child.Blue; sumAlpha += child.Alpha; pixelCount += pixels; - // Free the child immediately. children[i] = -1; - octree.FreeNode(childIndex); + tree.FreeNode(childIndex); } } @@ -529,16 +551,16 @@ public struct OctreeQuantizer : IQuantizer } this.Leaf = true; - octree.Leaves++; + tree.Leaves++; } /// - /// Traverse the tree to construct the palette. + /// Traverses the reduced tree and emits one palette color per leaf. /// - /// The parent octree. - /// The palette to construct. - /// The current palette index. - public void ConstructPalette(Octree octree, Span palette, ref short paletteIndex) + /// The owning tree. + /// The destination palette span. + /// The running palette index. + public void ConstructPalette(Hexadecatree tree, Span palette, ref short paletteIndex) { if (this.Leaf) { @@ -549,13 +571,12 @@ public struct OctreeQuantizer : IQuantizer Vector4.Zero, new Vector4(255)); - if (vector.W < octree.transparencyThreshold255) + if (vector.W < tree.transparencyThreshold255) { vector = Vector4.Zero; } palette[paletteIndex] = TPixel.FromRgba32(new Rgba32((byte)vector.X, (byte)vector.Y, (byte)vector.Z, (byte)vector.W)); - this.PaletteIndex = paletteIndex++; } else @@ -566,19 +587,20 @@ public struct OctreeQuantizer : IQuantizer int childIndex = children[i]; if (childIndex != -1) { - octree.Nodes[childIndex].ConstructPalette(octree, palette, ref paletteIndex); + tree.Nodes[childIndex].ConstructPalette(tree, palette, ref paletteIndex); } } } } /// - /// Get the palette index for the passed color. + /// Resolves the palette index represented by this node for the supplied color. /// - /// The color to get the palette index for. - /// The level of the node. - /// The parent octree. - public int GetPaletteIndex(Rgba32 color, int level, Octree octree) + /// The color to resolve. + /// The current tree depth. + /// The owning tree. + /// The palette index for the best reachable leaf, or -1 if no leaf can be reached. + public int GetPaletteIndex(Rgba32 color, int level, Hexadecatree tree) { if (this.Leaf) { @@ -590,15 +612,16 @@ public struct OctreeQuantizer : IQuantizer int childIndex = children[colorIndex]; if (childIndex != -1) { - return octree.Nodes[childIndex].GetPaletteIndex(color, level + 1, octree); + return tree.Nodes[childIndex].GetPaletteIndex(color, level + 1, tree); } + // After reductions the exact branch can disappear, so fall back to the first reachable descendant leaf. for (int i = 0; i < children.Length; i++) { childIndex = children[i]; if (childIndex != -1) { - int childPaletteIndex = octree.Nodes[childIndex].GetPaletteIndex(color, level + 1, octree); + int childPaletteIndex = tree.Nodes[childIndex].GetPaletteIndex(color, level + 1, tree); if (childPaletteIndex != -1) { return childPaletteIndex; @@ -610,37 +633,35 @@ public struct OctreeQuantizer : IQuantizer } /// - /// Gets the color index at the given level. + /// Computes the child slot for a color at the supplied tree level. /// - /// The color to get the index for. - /// The level to get the index at. + /// The color being routed. + /// The tree depth whose bit plane should be sampled. + /// The child slot index for the color at the supplied level. + /// + /// For fully opaque mid-tone colors the tree ignores alpha and routes on RGB only, preserving more branch + /// resolution for visible color detail. For transparent, dark, and light colors it includes alpha as the + /// most significant routing bit so opacity changes can form their own branches. + /// public static int GetColorIndex(Rgba32 color, int level) { - // Determine how many bits to shift based on the current tree level. - // At level 0, shift = 7; as level increases, the shift decreases. + // Sample one bit plane per level, starting at the most significant bit and moving downward. int shift = 7 - level; byte mask = (byte)(1 << shift); - // Compute the luminance of the RGB components using the BT.709 standard. - // This gives a measure of brightness for the color. + // Use BT.709 luminance as a cheap brightness estimate for deciding whether alpha carries + // useful information at this level for fully opaque colors. int luminance = ColorNumerics.Get8BitBT709Luminance(color.R, color.G, color.B); - // Define thresholds for determining when to include the alpha bit in the index. - // The thresholds are scaled according to the current level. - // 128 is the midpoint of the 8-bit range (0–255), so shifting it right by 'level' - // produces a threshold that scales with the color cube subdivision. + // Scale the brightness thresholds with depth so deeper levels become stricter about when + // to spend a branch bit on alpha instead of RGB detail. int darkThreshold = 128 >> level; - - // The light threshold is set symmetrically: 255 minus the scaled midpoint. int lightThreshold = 255 - (128 >> level); - // If the pixel is fully opaque and its brightness falls between the dark and light thresholds, - // ignore the alpha channel to maximize RGB resolution. - // Otherwise (if the pixel is dark, light, or semi-transparent), include the alpha bit - // to preserve any gradient that may be present. if (color.A == 255 && luminance > darkThreshold && luminance < lightThreshold) { - // Extract one bit each from R, G, and B channels and combine them into a 3-bit index. + // Fully opaque mid-tone colors route on RGB only, which preserves more visible color + // resolution because alpha would contribute no extra separation here. int rBits = ((color.R & mask) >> shift) << 2; int gBits = ((color.G & mask) >> shift) << 1; int bBits = (color.B & mask) >> shift; @@ -648,7 +669,8 @@ public struct OctreeQuantizer : IQuantizer } else { - // Extract one bit from each channel including alpha (alpha becomes the most significant bit). + // Transparent, dark, and light colors include alpha as the high routing bit so opacity + // changes can form distinct buckets alongside RGB differences. int aBits = ((color.A & mask) >> shift) << 3; int rBits = ((color.R & mask) >> shift) << 2; int gBits = ((color.G & mask) >> shift) << 1; diff --git a/tests/ImageSharp.Benchmarks/Codecs/Png/EncodeIndexedPng.cs b/tests/ImageSharp.Benchmarks/Codecs/Png/EncodeIndexedPng.cs index 125b42680d..69779731be 100644 --- a/tests/ImageSharp.Benchmarks/Codecs/Png/EncodeIndexedPng.cs +++ b/tests/ImageSharp.Benchmarks/Codecs/Png/EncodeIndexedPng.cs @@ -40,19 +40,19 @@ public class EncodeIndexedPng this.bmpCore.Dispose(); } - [Benchmark(Baseline = true, Description = "ImageSharp Octree Png")] - public void PngCoreOctree() + [Benchmark(Baseline = true, Description = "ImageSharp Hexadecatree Png")] + public void PngCoreHexadecatree() { using MemoryStream memoryStream = new(); - PngEncoder options = new() { Quantizer = KnownQuantizers.Octree }; + PngEncoder options = new() { Quantizer = KnownQuantizers.Hexadecatree }; this.bmpCore.SaveAsPng(memoryStream, options); } - [Benchmark(Description = "ImageSharp Octree NoDither Png")] - public void PngCoreOctreeNoDither() + [Benchmark(Description = "ImageSharp Hexadecatree NoDither Png")] + public void PngCoreHexadecatreeNoDither() { using MemoryStream memoryStream = new(); - PngEncoder options = new() { Quantizer = new OctreeQuantizer(new QuantizerOptions { Dither = null }) }; + PngEncoder options = new() { Quantizer = new HexadecatreeQuantizer(new QuantizerOptions { Dither = null }) }; this.bmpCore.SaveAsPng(memoryStream, options); } diff --git a/tests/ImageSharp.Tests/Formats/Bmp/BmpEncoderTests.cs b/tests/ImageSharp.Tests/Formats/Bmp/BmpEncoderTests.cs index 5ebcc8bb96..6bd7e0103f 100644 --- a/tests/ImageSharp.Tests/Formats/Bmp/BmpEncoderTests.cs +++ b/tests/ImageSharp.Tests/Formats/Bmp/BmpEncoderTests.cs @@ -292,7 +292,7 @@ public class BmpEncoderTests [Theory] [WithFile(Bit32Rgb, PixelTypes.Rgba32)] - public void Encode_8BitColor_WithOctreeQuantizer(TestImageProvider provider) + public void Encode_8BitColor_WithHexadecatreeQuantizer(TestImageProvider provider) where TPixel : unmanaged, IPixel { if (!TestEnvironment.Is64BitProcess) @@ -304,7 +304,7 @@ public class BmpEncoderTests BmpEncoder encoder = new() { BitsPerPixel = BmpBitsPerPixel.Bit8, - Quantizer = new OctreeQuantizer() + Quantizer = new HexadecatreeQuantizer() }; string actualOutputFile = provider.Utility.SaveTestOutputFile(image, "bmp", encoder, appendPixelTypeToFileName: false); @@ -385,7 +385,7 @@ public class BmpEncoderTests { BitsPerPixel = bitsPerPixel, SupportTransparency = false, - Quantizer = KnownQuantizers.Octree + Quantizer = KnownQuantizers.Hexadecatree }; image.SaveAsBmp(reencodedStream, encoder); reencodedStream.Seek(0, SeekOrigin.Begin); @@ -478,7 +478,7 @@ public class BmpEncoderTests { BitsPerPixel = bitsPerPixel, SupportTransparency = supportTransparency, - Quantizer = quantizer ?? KnownQuantizers.Octree + Quantizer = quantizer ?? KnownQuantizers.Hexadecatree }; // Does DebugSave & load reference CompareToReferenceInput(): diff --git a/tests/ImageSharp.Tests/Formats/GeneralFormatTests.cs b/tests/ImageSharp.Tests/Formats/GeneralFormatTests.cs index 072b04fa0d..2b91c4dbfa 100644 --- a/tests/ImageSharp.Tests/Formats/GeneralFormatTests.cs +++ b/tests/ImageSharp.Tests/Formats/GeneralFormatTests.cs @@ -125,7 +125,7 @@ public class GeneralFormatTests public static readonly TheoryData QuantizerNames = new() { - nameof(KnownQuantizers.Octree), + nameof(KnownQuantizers.Hexadecatree), nameof(KnownQuantizers.WebSafe), nameof(KnownQuantizers.Werner), nameof(KnownQuantizers.Wu) diff --git a/tests/ImageSharp.Tests/Formats/Gif/GifEncoderTests.cs b/tests/ImageSharp.Tests/Formats/Gif/GifEncoderTests.cs index 370106ca30..b7bbe4971a 100644 --- a/tests/ImageSharp.Tests/Formats/Gif/GifEncoderTests.cs +++ b/tests/ImageSharp.Tests/Formats/Gif/GifEncoderTests.cs @@ -115,7 +115,7 @@ public class GifEncoderTests GifEncoder encoder = new() { ColorTableMode = FrameColorTableMode.Global, - Quantizer = new OctreeQuantizer(new QuantizerOptions { Dither = null }) + Quantizer = new HexadecatreeQuantizer(new QuantizerOptions { Dither = null }) }; // Always save as we need to compare the encoded output. @@ -124,7 +124,7 @@ public class GifEncoderTests encoder = new GifEncoder { ColorTableMode = FrameColorTableMode.Local, - Quantizer = new OctreeQuantizer(new QuantizerOptions { Dither = null }), + Quantizer = new HexadecatreeQuantizer(new QuantizerOptions { Dither = null }), }; provider.Utility.SaveTestOutputFile(image, "gif", encoder, "local"); @@ -191,7 +191,7 @@ public class GifEncoderTests GifEncoder encoder = new() { ColorTableMode = colorMode, - Quantizer = new OctreeQuantizer(new QuantizerOptions { MaxColors = maxColors }) + Quantizer = new HexadecatreeQuantizer(new QuantizerOptions { MaxColors = maxColors }) }; image.Save(outStream, encoder); diff --git a/tests/ImageSharp.Tests/Formats/WebP/WebpEncoderTests.cs b/tests/ImageSharp.Tests/Formats/WebP/WebpEncoderTests.cs index f9836ffb13..ee82687167 100644 --- a/tests/ImageSharp.Tests/Formats/WebP/WebpEncoderTests.cs +++ b/tests/ImageSharp.Tests/Formats/WebP/WebpEncoderTests.cs @@ -135,9 +135,9 @@ public class WebpEncoderTests // Alpha thresholding is 64/255F. GifEncoder gifEncoder = new() { - Quantizer = new OctreeQuantizer(options) + Quantizer = new HexadecatreeQuantizer(options) }; - provider.Utility.SaveTestOutputFile(image, "gif", gifEncoder, "octree"); + provider.Utility.SaveTestOutputFile(image, "gif", gifEncoder, "hexadecatree"); gifEncoder = new GifEncoder { @@ -152,8 +152,8 @@ public class WebpEncoderTests }; using Image cloned1 = image.Clone(); - cloned1.Mutate(c => c.Quantize(new OctreeQuantizer(options))); - provider.Utility.SaveTestOutputFile(cloned1, "webp", encoder, "octree"); + cloned1.Mutate(c => c.Quantize(new HexadecatreeQuantizer(options))); + provider.Utility.SaveTestOutputFile(cloned1, "webp", encoder, "hexadecatree"); using Image cloned2 = image.Clone(); cloned2.Mutate(c => c.Quantize(new WuQuantizer(options))); @@ -162,7 +162,7 @@ public class WebpEncoderTests // Now blend the images with a blue background and save as webp. using Image background1 = new(image.Width, image.Height, Color.White.ToPixel()); background1.Mutate(c => c.DrawImage(cloned1, 1)); - provider.Utility.SaveTestOutputFile(background1, "webp", encoder, "octree-blended"); + provider.Utility.SaveTestOutputFile(background1, "webp", encoder, "hexadecatree-blended"); using Image background2 = new(image.Width, image.Height, Color.White.ToPixel()); background2.Mutate(c => c.DrawImage(cloned2, 1)); diff --git a/tests/ImageSharp.Tests/Processing/Processors/Quantization/OctreeQuantizerTests.cs b/tests/ImageSharp.Tests/Processing/Processors/Quantization/HexadecatreeQuantizerTests.cs similarity index 76% rename from tests/ImageSharp.Tests/Processing/Processors/Quantization/OctreeQuantizerTests.cs rename to tests/ImageSharp.Tests/Processing/Processors/Quantization/HexadecatreeQuantizerTests.cs index c9f3daf0f2..4ef2159305 100644 --- a/tests/ImageSharp.Tests/Processing/Processors/Quantization/OctreeQuantizerTests.cs +++ b/tests/ImageSharp.Tests/Processing/Processors/Quantization/HexadecatreeQuantizerTests.cs @@ -8,37 +8,37 @@ using SixLabors.ImageSharp.Processing.Processors.Quantization; namespace SixLabors.ImageSharp.Tests.Processing.Processors.Quantization; [Trait("Category", "Processors")] -public class OctreeQuantizerTests +public class HexadecatreeQuantizerTests { [Fact] - public void OctreeQuantizerConstructor() + public void HexadecatreeQuantizerConstructor() { QuantizerOptions expected = new() { MaxColors = 128 }; - OctreeQuantizer quantizer = new(expected); + HexadecatreeQuantizer quantizer = new(expected); Assert.Equal(expected.MaxColors, quantizer.Options.MaxColors); Assert.Equal(QuantizerConstants.DefaultDither, quantizer.Options.Dither); expected = new QuantizerOptions { Dither = null }; - quantizer = new OctreeQuantizer(expected); + quantizer = new HexadecatreeQuantizer(expected); Assert.Equal(QuantizerConstants.MaxColors, quantizer.Options.MaxColors); Assert.Null(quantizer.Options.Dither); expected = new QuantizerOptions { Dither = KnownDitherings.Atkinson }; - quantizer = new OctreeQuantizer(expected); + quantizer = new HexadecatreeQuantizer(expected); Assert.Equal(QuantizerConstants.MaxColors, quantizer.Options.MaxColors); Assert.Equal(KnownDitherings.Atkinson, quantizer.Options.Dither); expected = new QuantizerOptions { Dither = KnownDitherings.Atkinson, MaxColors = 0 }; - quantizer = new OctreeQuantizer(expected); + quantizer = new HexadecatreeQuantizer(expected); Assert.Equal(QuantizerConstants.MinColors, quantizer.Options.MaxColors); Assert.Equal(KnownDitherings.Atkinson, quantizer.Options.Dither); } [Fact] - public void OctreeQuantizerCanCreateFrameQuantizer() + public void HexadecatreeQuantizerCanCreateFrameQuantizer() { - OctreeQuantizer quantizer = new(); + HexadecatreeQuantizer quantizer = new(); IQuantizer frameQuantizer = quantizer.CreatePixelSpecificQuantizer(Configuration.Default); Assert.NotNull(frameQuantizer); @@ -46,14 +46,14 @@ public class OctreeQuantizerTests Assert.Equal(QuantizerConstants.DefaultDither, frameQuantizer.Options.Dither); frameQuantizer.Dispose(); - quantizer = new OctreeQuantizer(new QuantizerOptions { Dither = null }); + quantizer = new HexadecatreeQuantizer(new QuantizerOptions { Dither = null }); frameQuantizer = quantizer.CreatePixelSpecificQuantizer(Configuration.Default); Assert.NotNull(frameQuantizer); Assert.Null(frameQuantizer.Options.Dither); frameQuantizer.Dispose(); - quantizer = new OctreeQuantizer(new QuantizerOptions { Dither = KnownDitherings.Atkinson }); + quantizer = new HexadecatreeQuantizer(new QuantizerOptions { Dither = KnownDitherings.Atkinson }); frameQuantizer = quantizer.CreatePixelSpecificQuantizer(Configuration.Default); Assert.NotNull(frameQuantizer); Assert.Equal(KnownDitherings.Atkinson, frameQuantizer.Options.Dither); diff --git a/tests/ImageSharp.Tests/Processing/Processors/Quantization/QuantizerTests.cs b/tests/ImageSharp.Tests/Processing/Processors/Quantization/QuantizerTests.cs index 2ba757c117..00e09d83b0 100644 --- a/tests/ImageSharp.Tests/Processing/Processors/Quantization/QuantizerTests.cs +++ b/tests/ImageSharp.Tests/Processing/Processors/Quantization/QuantizerTests.cs @@ -74,15 +74,15 @@ public class QuantizerTests = new() { // Known uses error diffusion by default. - KnownQuantizers.Octree, + KnownQuantizers.Hexadecatree, KnownQuantizers.WebSafe, KnownQuantizers.Werner, KnownQuantizers.Wu, - new OctreeQuantizer(NoDitherOptions), + new HexadecatreeQuantizer(NoDitherOptions), new WebSafePaletteQuantizer(NoDitherOptions), new WernerPaletteQuantizer(NoDitherOptions), new WuQuantizer(NoDitherOptions), - new OctreeQuantizer(OrderedDitherOptions), + new HexadecatreeQuantizer(OrderedDitherOptions), new WebSafePaletteQuantizer(OrderedDitherOptions), new WernerPaletteQuantizer(OrderedDitherOptions), new WuQuantizer(OrderedDitherOptions) @@ -91,52 +91,52 @@ public class QuantizerTests public static readonly TheoryData DitherScaleQuantizers = new() { - new OctreeQuantizer(Diffuser0_ScaleDitherOptions), + new HexadecatreeQuantizer(Diffuser0_ScaleDitherOptions), new WebSafePaletteQuantizer(Diffuser0_ScaleDitherOptions), new WernerPaletteQuantizer(Diffuser0_ScaleDitherOptions), new WuQuantizer(Diffuser0_ScaleDitherOptions), - new OctreeQuantizer(Diffuser0_25_ScaleDitherOptions), + new HexadecatreeQuantizer(Diffuser0_25_ScaleDitherOptions), new WebSafePaletteQuantizer(Diffuser0_25_ScaleDitherOptions), new WernerPaletteQuantizer(Diffuser0_25_ScaleDitherOptions), new WuQuantizer(Diffuser0_25_ScaleDitherOptions), - new OctreeQuantizer(Diffuser0_5_ScaleDitherOptions), + new HexadecatreeQuantizer(Diffuser0_5_ScaleDitherOptions), new WebSafePaletteQuantizer(Diffuser0_5_ScaleDitherOptions), new WernerPaletteQuantizer(Diffuser0_5_ScaleDitherOptions), new WuQuantizer(Diffuser0_5_ScaleDitherOptions), - new OctreeQuantizer(Diffuser0_75_ScaleDitherOptions), + new HexadecatreeQuantizer(Diffuser0_75_ScaleDitherOptions), new WebSafePaletteQuantizer(Diffuser0_75_ScaleDitherOptions), new WernerPaletteQuantizer(Diffuser0_75_ScaleDitherOptions), new WuQuantizer(Diffuser0_75_ScaleDitherOptions), - new OctreeQuantizer(DiffuserDitherOptions), + new HexadecatreeQuantizer(DiffuserDitherOptions), new WebSafePaletteQuantizer(DiffuserDitherOptions), new WernerPaletteQuantizer(DiffuserDitherOptions), new WuQuantizer(DiffuserDitherOptions), - new OctreeQuantizer(Ordered0_ScaleDitherOptions), + new HexadecatreeQuantizer(Ordered0_ScaleDitherOptions), new WebSafePaletteQuantizer(Ordered0_ScaleDitherOptions), new WernerPaletteQuantizer(Ordered0_ScaleDitherOptions), new WuQuantizer(Ordered0_ScaleDitherOptions), - new OctreeQuantizer(Ordered0_25_ScaleDitherOptions), + new HexadecatreeQuantizer(Ordered0_25_ScaleDitherOptions), new WebSafePaletteQuantizer(Ordered0_25_ScaleDitherOptions), new WernerPaletteQuantizer(Ordered0_25_ScaleDitherOptions), new WuQuantizer(Ordered0_25_ScaleDitherOptions), - new OctreeQuantizer(Ordered0_5_ScaleDitherOptions), + new HexadecatreeQuantizer(Ordered0_5_ScaleDitherOptions), new WebSafePaletteQuantizer(Ordered0_5_ScaleDitherOptions), new WernerPaletteQuantizer(Ordered0_5_ScaleDitherOptions), new WuQuantizer(Ordered0_5_ScaleDitherOptions), - new OctreeQuantizer(Ordered0_75_ScaleDitherOptions), + new HexadecatreeQuantizer(Ordered0_75_ScaleDitherOptions), new WebSafePaletteQuantizer(Ordered0_75_ScaleDitherOptions), new WernerPaletteQuantizer(Ordered0_75_ScaleDitherOptions), new WuQuantizer(Ordered0_75_ScaleDitherOptions), - new OctreeQuantizer(OrderedDitherOptions), + new HexadecatreeQuantizer(OrderedDitherOptions), new WebSafePaletteQuantizer(OrderedDitherOptions), new WernerPaletteQuantizer(OrderedDitherOptions), new WuQuantizer(OrderedDitherOptions), diff --git a/tests/ImageSharp.Tests/Quantization/QuantizedImageTests.cs b/tests/ImageSharp.Tests/Quantization/QuantizedImageTests.cs index d832136a98..e940886520 100644 --- a/tests/ImageSharp.Tests/Quantization/QuantizedImageTests.cs +++ b/tests/ImageSharp.Tests/Quantization/QuantizedImageTests.cs @@ -15,12 +15,12 @@ public class QuantizedImageTests { WernerPaletteQuantizer werner = new(); WebSafePaletteQuantizer webSafe = new(); - OctreeQuantizer octree = new(); + HexadecatreeQuantizer hexadecatree = new(); WuQuantizer wu = new(); Assert.NotNull(werner.Options.Dither); Assert.NotNull(webSafe.Options.Dither); - Assert.NotNull(octree.Options.Dither); + Assert.NotNull(hexadecatree.Options.Dither); Assert.NotNull(wu.Options.Dither); using (IQuantizer quantizer = werner.CreatePixelSpecificQuantizer(this.Configuration)) @@ -33,7 +33,7 @@ public class QuantizedImageTests Assert.NotNull(quantizer.Options.Dither); } - using (IQuantizer quantizer = octree.CreatePixelSpecificQuantizer(this.Configuration)) + using (IQuantizer quantizer = hexadecatree.CreatePixelSpecificQuantizer(this.Configuration)) { Assert.NotNull(quantizer.Options.Dither); } @@ -47,7 +47,7 @@ public class QuantizedImageTests [Theory] [WithFile(TestImages.Gif.Giphy, PixelTypes.Rgba32, true)] [WithFile(TestImages.Gif.Giphy, PixelTypes.Rgba32, false)] - public void OctreeQuantizerYieldsCorrectTransparentPixel( + public void HexadecatreeQuantizerYieldsCorrectTransparentPixel( TestImageProvider provider, bool dither) where TPixel : unmanaged, IPixel @@ -60,7 +60,7 @@ public class QuantizedImageTests options.Dither = null; } - OctreeQuantizer quantizer = new(options); + HexadecatreeQuantizer quantizer = new(options); foreach (ImageFrame frame in image.Frames) { @@ -103,8 +103,8 @@ public class QuantizedImageTests where TPixel : unmanaged, IPixel { using Image image = provider.GetImage(); - OctreeQuantizer octreeQuantizer = new(); - IQuantizer quantizer = octreeQuantizer.CreatePixelSpecificQuantizer(Configuration.Default, new QuantizerOptions { MaxColors = 128 }); + HexadecatreeQuantizer hexadecatreeQuantizer = new(); + IQuantizer quantizer = hexadecatreeQuantizer.CreatePixelSpecificQuantizer(Configuration.Default, new QuantizerOptions { MaxColors = 128 }); ImageFrame frame = image.Frames[0]; quantizer.BuildPaletteAndQuantizeFrame(frame, frame.Bounds); } diff --git a/tests/Images/External/ReferenceOutput/BmpEncoderTests/Encode_8BitColor_WithHexadecatreeQuantizer_rgb32.bmp b/tests/Images/External/ReferenceOutput/BmpEncoderTests/Encode_8BitColor_WithHexadecatreeQuantizer_rgb32.bmp new file mode 100644 index 0000000000..f4ae3b9b68 --- /dev/null +++ b/tests/Images/External/ReferenceOutput/BmpEncoderTests/Encode_8BitColor_WithHexadecatreeQuantizer_rgb32.bmp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a98b1ec707af066f77fad7d1a64b858d460986beb6d27682717dd5e221310fd4 +size 9270 diff --git a/tests/Images/External/ReferenceOutput/PngEncoderTests/Issue2469_Quantized_Encode_Artifacts_Rgba32_issue_2469.png b/tests/Images/External/ReferenceOutput/PngEncoderTests/Issue2469_Quantized_Encode_Artifacts_Rgba32_issue_2469.png index 4c78303750..ecf0691cd5 100644 --- a/tests/Images/External/ReferenceOutput/PngEncoderTests/Issue2469_Quantized_Encode_Artifacts_Rgba32_issue_2469.png +++ b/tests/Images/External/ReferenceOutput/PngEncoderTests/Issue2469_Quantized_Encode_Artifacts_Rgba32_issue_2469.png @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:1af50619f835b4470afac4553445176c121c3c9fa838dff937dcc56ae37941c3 -size 945821 +oid sha256:770061fbb29cd20bc700ce3fc57e38a758c632c3e89de51f5fbee3d5d522539e +size 912635 diff --git a/tests/Images/External/ReferenceOutput/QuantizerTests/ApplyQuantizationInBox_Bike_HexadecatreeQuantizer_ErrorDither.png b/tests/Images/External/ReferenceOutput/QuantizerTests/ApplyQuantizationInBox_Bike_HexadecatreeQuantizer_ErrorDither.png new file mode 100644 index 0000000000..d2b62e63ac --- /dev/null +++ b/tests/Images/External/ReferenceOutput/QuantizerTests/ApplyQuantizationInBox_Bike_HexadecatreeQuantizer_ErrorDither.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:27f6e8e195c4431dc7354a379152d3a8664582bc2bb1c8960ebf4088aa6505e2 +size 248709 diff --git a/tests/Images/External/ReferenceOutput/QuantizerTests/ApplyQuantizationInBox_Bike_HexadecatreeQuantizer_NoDither.png b/tests/Images/External/ReferenceOutput/QuantizerTests/ApplyQuantizationInBox_Bike_HexadecatreeQuantizer_NoDither.png new file mode 100644 index 0000000000..ecbf328d36 --- /dev/null +++ b/tests/Images/External/ReferenceOutput/QuantizerTests/ApplyQuantizationInBox_Bike_HexadecatreeQuantizer_NoDither.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:46b5751dc43e9ad5541913cf851ef1b061aa474a95283c712511531202d7015e +size 239326 diff --git a/tests/Images/External/ReferenceOutput/QuantizerTests/ApplyQuantizationInBox_Bike_OctreeQuantizer_OrderedDither.png b/tests/Images/External/ReferenceOutput/QuantizerTests/ApplyQuantizationInBox_Bike_HexadecatreeQuantizer_OrderedDither.png similarity index 100% rename from tests/Images/External/ReferenceOutput/QuantizerTests/ApplyQuantizationInBox_Bike_OctreeQuantizer_OrderedDither.png rename to tests/Images/External/ReferenceOutput/QuantizerTests/ApplyQuantizationInBox_Bike_HexadecatreeQuantizer_OrderedDither.png diff --git a/tests/Images/External/ReferenceOutput/QuantizerTests/ApplyQuantizationInBox_Bike_OctreeQuantizer_ErrorDither.png b/tests/Images/External/ReferenceOutput/QuantizerTests/ApplyQuantizationInBox_Bike_OctreeQuantizer_ErrorDither.png deleted file mode 100644 index 327366f5b6..0000000000 --- a/tests/Images/External/ReferenceOutput/QuantizerTests/ApplyQuantizationInBox_Bike_OctreeQuantizer_ErrorDither.png +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:0086044f12a7c58e49733f203af29a8aff2826ea654730274720eada15669254 -size 249163 diff --git a/tests/Images/External/ReferenceOutput/QuantizerTests/ApplyQuantizationInBox_Bike_OctreeQuantizer_NoDither.png b/tests/Images/External/ReferenceOutput/QuantizerTests/ApplyQuantizationInBox_Bike_OctreeQuantizer_NoDither.png deleted file mode 100644 index 3e0be536e3..0000000000 --- a/tests/Images/External/ReferenceOutput/QuantizerTests/ApplyQuantizationInBox_Bike_OctreeQuantizer_NoDither.png +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:85ee8479984aa52f837badbc49085c5448597fbfd987438fe25b58bad475e85f -size 239498 diff --git a/tests/Images/External/ReferenceOutput/QuantizerTests/ApplyQuantizationInBox_Bike_WebSafePaletteQuantizer_ErrorDither.png b/tests/Images/External/ReferenceOutput/QuantizerTests/ApplyQuantizationInBox_Bike_WebSafePaletteQuantizer_ErrorDither.png index 922c2bf9b2..28db1b73ac 100644 --- a/tests/Images/External/ReferenceOutput/QuantizerTests/ApplyQuantizationInBox_Bike_WebSafePaletteQuantizer_ErrorDither.png +++ b/tests/Images/External/ReferenceOutput/QuantizerTests/ApplyQuantizationInBox_Bike_WebSafePaletteQuantizer_ErrorDither.png @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:4f1462733e02d499b0d8c61ab835a27c7fee560fdc7fc521d20ec09bb4ccc80f -size 216030 +oid sha256:af40e835e2f3cf0f406e15248169d058dc1ae69219f2bc5c3413ecea4eb4985f +size 215873 diff --git a/tests/Images/External/ReferenceOutput/QuantizerTests/ApplyQuantizationInBox_Bike_WebSafePaletteQuantizer_NoDither.png b/tests/Images/External/ReferenceOutput/QuantizerTests/ApplyQuantizationInBox_Bike_WebSafePaletteQuantizer_NoDither.png index 922c2bf9b2..28db1b73ac 100644 --- a/tests/Images/External/ReferenceOutput/QuantizerTests/ApplyQuantizationInBox_Bike_WebSafePaletteQuantizer_NoDither.png +++ b/tests/Images/External/ReferenceOutput/QuantizerTests/ApplyQuantizationInBox_Bike_WebSafePaletteQuantizer_NoDither.png @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:4f1462733e02d499b0d8c61ab835a27c7fee560fdc7fc521d20ec09bb4ccc80f -size 216030 +oid sha256:af40e835e2f3cf0f406e15248169d058dc1ae69219f2bc5c3413ecea4eb4985f +size 215873 diff --git a/tests/Images/External/ReferenceOutput/QuantizerTests/ApplyQuantizationInBox_Bike_WebSafePaletteQuantizer_OrderedDither.png b/tests/Images/External/ReferenceOutput/QuantizerTests/ApplyQuantizationInBox_Bike_WebSafePaletteQuantizer_OrderedDither.png index 29c93d14e2..078c75c45a 100644 --- a/tests/Images/External/ReferenceOutput/QuantizerTests/ApplyQuantizationInBox_Bike_WebSafePaletteQuantizer_OrderedDither.png +++ b/tests/Images/External/ReferenceOutput/QuantizerTests/ApplyQuantizationInBox_Bike_WebSafePaletteQuantizer_OrderedDither.png @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:7e6d91a3ec4f974af675dc360fd5fd623ec8773cdbc88c0a3a6506880838718a -size 226727 +oid sha256:5eb87f02c7924b764bbd2c951047b7204c56a0a1a0d6853a0fb3d30a56ed0184 +size 226633 diff --git a/tests/Images/External/ReferenceOutput/QuantizerTests/ApplyQuantizationInBox_Bike_WernerPaletteQuantizer_ErrorDither.png b/tests/Images/External/ReferenceOutput/QuantizerTests/ApplyQuantizationInBox_Bike_WernerPaletteQuantizer_ErrorDither.png index dbfab2b508..e80b9b8b12 100644 --- a/tests/Images/External/ReferenceOutput/QuantizerTests/ApplyQuantizationInBox_Bike_WernerPaletteQuantizer_ErrorDither.png +++ b/tests/Images/External/ReferenceOutput/QuantizerTests/ApplyQuantizationInBox_Bike_WernerPaletteQuantizer_ErrorDither.png @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:c68eba122814b5470e5f2e03e34190ff79e84e4b431ad8227355ce7ffcd4a6a7 -size 220192 +oid sha256:84b55eefd699cd74a1a7de958762b095f196275d2bbde2750936aed9a47f68f3 +size 220099 diff --git a/tests/Images/External/ReferenceOutput/QuantizerTests/ApplyQuantizationInBox_Bike_WernerPaletteQuantizer_NoDither.png b/tests/Images/External/ReferenceOutput/QuantizerTests/ApplyQuantizationInBox_Bike_WernerPaletteQuantizer_NoDither.png index dbfab2b508..e80b9b8b12 100644 --- a/tests/Images/External/ReferenceOutput/QuantizerTests/ApplyQuantizationInBox_Bike_WernerPaletteQuantizer_NoDither.png +++ b/tests/Images/External/ReferenceOutput/QuantizerTests/ApplyQuantizationInBox_Bike_WernerPaletteQuantizer_NoDither.png @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:c68eba122814b5470e5f2e03e34190ff79e84e4b431ad8227355ce7ffcd4a6a7 -size 220192 +oid sha256:84b55eefd699cd74a1a7de958762b095f196275d2bbde2750936aed9a47f68f3 +size 220099 diff --git a/tests/Images/External/ReferenceOutput/QuantizerTests/ApplyQuantizationInBox_Bike_WernerPaletteQuantizer_OrderedDither.png b/tests/Images/External/ReferenceOutput/QuantizerTests/ApplyQuantizationInBox_Bike_WernerPaletteQuantizer_OrderedDither.png index 86655af42b..ad899553d7 100644 --- a/tests/Images/External/ReferenceOutput/QuantizerTests/ApplyQuantizationInBox_Bike_WernerPaletteQuantizer_OrderedDither.png +++ b/tests/Images/External/ReferenceOutput/QuantizerTests/ApplyQuantizationInBox_Bike_WernerPaletteQuantizer_OrderedDither.png @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:6dbd3189b559941f91dd6e0aa15b34a3e5081477400678c2396c6a66d398876f -size 230883 +oid sha256:c4548abed72e4f833b33eed14392206d7232112fc651becb2351fdee27da5bc1 +size 230687 diff --git a/tests/Images/External/ReferenceOutput/QuantizerTests/ApplyQuantizationInBox_Bike_WuQuantizer_ErrorDither.png b/tests/Images/External/ReferenceOutput/QuantizerTests/ApplyQuantizationInBox_Bike_WuQuantizer_ErrorDither.png index 82d5e5d592..a30d69d177 100644 --- a/tests/Images/External/ReferenceOutput/QuantizerTests/ApplyQuantizationInBox_Bike_WuQuantizer_ErrorDither.png +++ b/tests/Images/External/ReferenceOutput/QuantizerTests/ApplyQuantizationInBox_Bike_WuQuantizer_ErrorDither.png @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:f4df5b1bc2c291ec1cf599580d198b447278412576ab998e099cc21110e82b3d -size 263152 +oid sha256:832173c8ca6bd7a8bf417d83b459ccddb541daed1c31539bf596cacea455441d +size 263018 diff --git a/tests/Images/External/ReferenceOutput/QuantizerTests/ApplyQuantizationInBox_Bike_WuQuantizer_NoDither.png b/tests/Images/External/ReferenceOutput/QuantizerTests/ApplyQuantizationInBox_Bike_WuQuantizer_NoDither.png index d8a1178adc..e5591852bf 100644 --- a/tests/Images/External/ReferenceOutput/QuantizerTests/ApplyQuantizationInBox_Bike_WuQuantizer_NoDither.png +++ b/tests/Images/External/ReferenceOutput/QuantizerTests/ApplyQuantizationInBox_Bike_WuQuantizer_NoDither.png @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:df63a3d12e2998d5242b64169ac86e3df7ab4be585a80daddc3e3888dfcb7095 -size 262298 +oid sha256:15a6dc485f0c3fd4c9fbbdb6b50437d58d68210790e37f8aab32e66a864e2746 +size 261872 diff --git a/tests/Images/External/ReferenceOutput/QuantizerTests/ApplyQuantizationInBox_CalliphoraPartial_HexadecatreeQuantizer_ErrorDither.png b/tests/Images/External/ReferenceOutput/QuantizerTests/ApplyQuantizationInBox_CalliphoraPartial_HexadecatreeQuantizer_ErrorDither.png new file mode 100644 index 0000000000..2e815d4d1c --- /dev/null +++ b/tests/Images/External/ReferenceOutput/QuantizerTests/ApplyQuantizationInBox_CalliphoraPartial_HexadecatreeQuantizer_ErrorDither.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f6eeed563b407940e2a05f068c42b52738e6e1217a1500c9230f7068ca4e9f1e +size 304162 diff --git a/tests/Images/External/ReferenceOutput/QuantizerTests/ApplyQuantizationInBox_CalliphoraPartial_HexadecatreeQuantizer_NoDither.png b/tests/Images/External/ReferenceOutput/QuantizerTests/ApplyQuantizationInBox_CalliphoraPartial_HexadecatreeQuantizer_NoDither.png new file mode 100644 index 0000000000..a8f30e5f5b --- /dev/null +++ b/tests/Images/External/ReferenceOutput/QuantizerTests/ApplyQuantizationInBox_CalliphoraPartial_HexadecatreeQuantizer_NoDither.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3dc7dc55af4ef0741a66c569876ad8a2df27164a653baa5bae536e6d121b2c11 +size 300528 diff --git a/tests/Images/External/ReferenceOutput/QuantizerTests/ApplyQuantizationInBox_CalliphoraPartial_HexadecatreeQuantizer_OrderedDither.png b/tests/Images/External/ReferenceOutput/QuantizerTests/ApplyQuantizationInBox_CalliphoraPartial_HexadecatreeQuantizer_OrderedDither.png new file mode 100644 index 0000000000..3ece7ee289 --- /dev/null +++ b/tests/Images/External/ReferenceOutput/QuantizerTests/ApplyQuantizationInBox_CalliphoraPartial_HexadecatreeQuantizer_OrderedDither.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0b65e7903fbfa1ed0682221fdd86c6f0448b3f6a886cae5379720cce881a1f1e +size 305962 diff --git a/tests/Images/External/ReferenceOutput/QuantizerTests/ApplyQuantizationInBox_CalliphoraPartial_OctreeQuantizer_ErrorDither.png b/tests/Images/External/ReferenceOutput/QuantizerTests/ApplyQuantizationInBox_CalliphoraPartial_OctreeQuantizer_ErrorDither.png deleted file mode 100644 index f29db004f5..0000000000 --- a/tests/Images/External/ReferenceOutput/QuantizerTests/ApplyQuantizationInBox_CalliphoraPartial_OctreeQuantizer_ErrorDither.png +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:ce381c2d261b9b1ca61d8f6e2ff07b992283c327dc6b7cf53c7e5c9317abb7d3 -size 316443 diff --git a/tests/Images/External/ReferenceOutput/QuantizerTests/ApplyQuantizationInBox_CalliphoraPartial_OctreeQuantizer_NoDither.png b/tests/Images/External/ReferenceOutput/QuantizerTests/ApplyQuantizationInBox_CalliphoraPartial_OctreeQuantizer_NoDither.png deleted file mode 100644 index 284c3a2702..0000000000 --- a/tests/Images/External/ReferenceOutput/QuantizerTests/ApplyQuantizationInBox_CalliphoraPartial_OctreeQuantizer_NoDither.png +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:2bfc23a95df8a88ac6e2777d67f381e800d23647c162a9a97131a101bbb97143 -size 306703 diff --git a/tests/Images/External/ReferenceOutput/QuantizerTests/ApplyQuantizationInBox_CalliphoraPartial_OctreeQuantizer_OrderedDither.png b/tests/Images/External/ReferenceOutput/QuantizerTests/ApplyQuantizationInBox_CalliphoraPartial_OctreeQuantizer_OrderedDither.png deleted file mode 100644 index 5911faa723..0000000000 --- a/tests/Images/External/ReferenceOutput/QuantizerTests/ApplyQuantizationInBox_CalliphoraPartial_OctreeQuantizer_OrderedDither.png +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:9d3f58a108d933ec9ac0a5271af5b65d0a8ab9d521d54e48312b280cc42d71ac -size 322049 diff --git a/tests/Images/External/ReferenceOutput/QuantizerTests/ApplyQuantizationInBox_CalliphoraPartial_WebSafePaletteQuantizer_ErrorDither.png b/tests/Images/External/ReferenceOutput/QuantizerTests/ApplyQuantizationInBox_CalliphoraPartial_WebSafePaletteQuantizer_ErrorDither.png index 0205626738..03b9a37f73 100644 --- a/tests/Images/External/ReferenceOutput/QuantizerTests/ApplyQuantizationInBox_CalliphoraPartial_WebSafePaletteQuantizer_ErrorDither.png +++ b/tests/Images/External/ReferenceOutput/QuantizerTests/ApplyQuantizationInBox_CalliphoraPartial_WebSafePaletteQuantizer_ErrorDither.png @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:3a2aae04edebcaca9b95f30963201794887fa0eac954b64c68bfe529b14fa9be -size 269397 +oid sha256:97c277005703b029a9e791e4c9dc3adcbe06054885fdd31e361e8a0a0222a291 +size 268504 diff --git a/tests/Images/External/ReferenceOutput/QuantizerTests/ApplyQuantizationInBox_CalliphoraPartial_WebSafePaletteQuantizer_NoDither.png b/tests/Images/External/ReferenceOutput/QuantizerTests/ApplyQuantizationInBox_CalliphoraPartial_WebSafePaletteQuantizer_NoDither.png index 0205626738..03b9a37f73 100644 --- a/tests/Images/External/ReferenceOutput/QuantizerTests/ApplyQuantizationInBox_CalliphoraPartial_WebSafePaletteQuantizer_NoDither.png +++ b/tests/Images/External/ReferenceOutput/QuantizerTests/ApplyQuantizationInBox_CalliphoraPartial_WebSafePaletteQuantizer_NoDither.png @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:3a2aae04edebcaca9b95f30963201794887fa0eac954b64c68bfe529b14fa9be -size 269397 +oid sha256:97c277005703b029a9e791e4c9dc3adcbe06054885fdd31e361e8a0a0222a291 +size 268504 diff --git a/tests/Images/External/ReferenceOutput/QuantizerTests/ApplyQuantizationInBox_CalliphoraPartial_WebSafePaletteQuantizer_OrderedDither.png b/tests/Images/External/ReferenceOutput/QuantizerTests/ApplyQuantizationInBox_CalliphoraPartial_WebSafePaletteQuantizer_OrderedDither.png index 68d91fc437..a1d28a1697 100644 --- a/tests/Images/External/ReferenceOutput/QuantizerTests/ApplyQuantizationInBox_CalliphoraPartial_WebSafePaletteQuantizer_OrderedDither.png +++ b/tests/Images/External/ReferenceOutput/QuantizerTests/ApplyQuantizationInBox_CalliphoraPartial_WebSafePaletteQuantizer_OrderedDither.png @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:2f3e9a338a5ae37c88ce0c348e0b655429220da051db3352779c277bb2dcb441 -size 270622 +oid sha256:b5fa657236e12cbb2a8d2cd747029723a6b3829b475f28626d7647d7b2150918 +size 271579 diff --git a/tests/Images/External/ReferenceOutput/QuantizerTests/ApplyQuantizationInBox_CalliphoraPartial_WernerPaletteQuantizer_ErrorDither.png b/tests/Images/External/ReferenceOutput/QuantizerTests/ApplyQuantizationInBox_CalliphoraPartial_WernerPaletteQuantizer_ErrorDither.png index 324bd92539..eba58870f4 100644 --- a/tests/Images/External/ReferenceOutput/QuantizerTests/ApplyQuantizationInBox_CalliphoraPartial_WernerPaletteQuantizer_ErrorDither.png +++ b/tests/Images/External/ReferenceOutput/QuantizerTests/ApplyQuantizationInBox_CalliphoraPartial_WernerPaletteQuantizer_ErrorDither.png @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:752760327cc1416c171a920f1e0e95e34eae6d78bd0c7393a3be427bf3c8e55c -size 284481 +oid sha256:532fa8044bb424b451343f89bf7cb954311641056bdbd5685cd7c4fa4ad8f3c8 +size 284056 diff --git a/tests/Images/External/ReferenceOutput/QuantizerTests/ApplyQuantizationInBox_CalliphoraPartial_WernerPaletteQuantizer_NoDither.png b/tests/Images/External/ReferenceOutput/QuantizerTests/ApplyQuantizationInBox_CalliphoraPartial_WernerPaletteQuantizer_NoDither.png index 324bd92539..eba58870f4 100644 --- a/tests/Images/External/ReferenceOutput/QuantizerTests/ApplyQuantizationInBox_CalliphoraPartial_WernerPaletteQuantizer_NoDither.png +++ b/tests/Images/External/ReferenceOutput/QuantizerTests/ApplyQuantizationInBox_CalliphoraPartial_WernerPaletteQuantizer_NoDither.png @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:752760327cc1416c171a920f1e0e95e34eae6d78bd0c7393a3be427bf3c8e55c -size 284481 +oid sha256:532fa8044bb424b451343f89bf7cb954311641056bdbd5685cd7c4fa4ad8f3c8 +size 284056 diff --git a/tests/Images/External/ReferenceOutput/QuantizerTests/ApplyQuantizationInBox_CalliphoraPartial_WernerPaletteQuantizer_OrderedDither.png b/tests/Images/External/ReferenceOutput/QuantizerTests/ApplyQuantizationInBox_CalliphoraPartial_WernerPaletteQuantizer_OrderedDither.png index 52bf2a163f..de30e7574e 100644 --- a/tests/Images/External/ReferenceOutput/QuantizerTests/ApplyQuantizationInBox_CalliphoraPartial_WernerPaletteQuantizer_OrderedDither.png +++ b/tests/Images/External/ReferenceOutput/QuantizerTests/ApplyQuantizationInBox_CalliphoraPartial_WernerPaletteQuantizer_OrderedDither.png @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:293459538454e07bc9ea1e9df1fa5b0eb986fde7de42f6c25b43e4c8859bd28a -size 285370 +oid sha256:61ed5f4d77428be46357609d80a66e884dedbb8c255fdcc71d49eeba0eed2bf2 +size 285037 diff --git a/tests/Images/External/ReferenceOutput/QuantizerTests/ApplyQuantizationInBox_CalliphoraPartial_WuQuantizer_ErrorDither.png b/tests/Images/External/ReferenceOutput/QuantizerTests/ApplyQuantizationInBox_CalliphoraPartial_WuQuantizer_ErrorDither.png index 05be1395ab..c56a90ad27 100644 --- a/tests/Images/External/ReferenceOutput/QuantizerTests/ApplyQuantizationInBox_CalliphoraPartial_WuQuantizer_ErrorDither.png +++ b/tests/Images/External/ReferenceOutput/QuantizerTests/ApplyQuantizationInBox_CalliphoraPartial_WuQuantizer_ErrorDither.png @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:90a2b7b3872c6eb1f1f039558d9f6ace92891c86951c801da01ad55b055fd670 -size 316544 +oid sha256:1cc2ef3cb819b5a82e0af32c3ab44aff0206530e291b00bdade58da2ebe4494a +size 308246 diff --git a/tests/Images/External/ReferenceOutput/QuantizerTests/ApplyQuantizationInBox_CalliphoraPartial_WuQuantizer_NoDither.png b/tests/Images/External/ReferenceOutput/QuantizerTests/ApplyQuantizationInBox_CalliphoraPartial_WuQuantizer_NoDither.png index d94d57759f..c3ab7996db 100644 --- a/tests/Images/External/ReferenceOutput/QuantizerTests/ApplyQuantizationInBox_CalliphoraPartial_WuQuantizer_NoDither.png +++ b/tests/Images/External/ReferenceOutput/QuantizerTests/ApplyQuantizationInBox_CalliphoraPartial_WuQuantizer_NoDither.png @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:ff094e6bafe81e818bcbac69018dcfe29366389dfca0d63d8e05ef42896ffe1d -size 317309 +oid sha256:575c8d81152642fa0eec0ea9901d1941fea58b7686cfaac1d01e0bf59f393c4b +size 308330 diff --git a/tests/Images/External/ReferenceOutput/QuantizerTests/ApplyQuantizationInBox_CalliphoraPartial_WuQuantizer_OrderedDither.png b/tests/Images/External/ReferenceOutput/QuantizerTests/ApplyQuantizationInBox_CalliphoraPartial_WuQuantizer_OrderedDither.png index e016e3de69..47616cd31a 100644 --- a/tests/Images/External/ReferenceOutput/QuantizerTests/ApplyQuantizationInBox_CalliphoraPartial_WuQuantizer_OrderedDither.png +++ b/tests/Images/External/ReferenceOutput/QuantizerTests/ApplyQuantizationInBox_CalliphoraPartial_WuQuantizer_OrderedDither.png @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:ee0778aac671365dd0afae06cdcf8f36243bd9815f684b975f83e297bb694e63 -size 323979 +oid sha256:ba295a5ddb79bc61f0be9a28a636fdcc63055c26c46872d407fe20ff785f11ed +size 310415 diff --git a/tests/Images/External/ReferenceOutput/QuantizerTests/ApplyQuantizationWithDitheringScale_david_HexadecatreeQuantizer_ErrorDither_0.25.png b/tests/Images/External/ReferenceOutput/QuantizerTests/ApplyQuantizationWithDitheringScale_david_HexadecatreeQuantizer_ErrorDither_0.25.png new file mode 100644 index 0000000000..2f939d957d --- /dev/null +++ b/tests/Images/External/ReferenceOutput/QuantizerTests/ApplyQuantizationWithDitheringScale_david_HexadecatreeQuantizer_ErrorDither_0.25.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3e8a5da54da08f7450ffb5b49c412e654215e2c2e72c32919abc78b77dc828f5 +size 13160 diff --git a/tests/Images/External/ReferenceOutput/QuantizerTests/ApplyQuantizationWithDitheringScale_david_HexadecatreeQuantizer_ErrorDither_0.5.png b/tests/Images/External/ReferenceOutput/QuantizerTests/ApplyQuantizationWithDitheringScale_david_HexadecatreeQuantizer_ErrorDither_0.5.png new file mode 100644 index 0000000000..9e8002ad19 --- /dev/null +++ b/tests/Images/External/ReferenceOutput/QuantizerTests/ApplyQuantizationWithDitheringScale_david_HexadecatreeQuantizer_ErrorDither_0.5.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d9a87ef109c08411ca61d91ddcf010c272303a17abd90b6ba2204eac021055e5 +size 13665 diff --git a/tests/Images/External/ReferenceOutput/QuantizerTests/ApplyQuantizationWithDitheringScale_david_HexadecatreeQuantizer_ErrorDither_0.75.png b/tests/Images/External/ReferenceOutput/QuantizerTests/ApplyQuantizationWithDitheringScale_david_HexadecatreeQuantizer_ErrorDither_0.75.png new file mode 100644 index 0000000000..45e770bd96 --- /dev/null +++ b/tests/Images/External/ReferenceOutput/QuantizerTests/ApplyQuantizationWithDitheringScale_david_HexadecatreeQuantizer_ErrorDither_0.75.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:81496d88b42edf4b39ab723d0b5414b56140892f45d30fc2435904b630fa9af5 +size 13886 diff --git a/tests/Images/External/ReferenceOutput/QuantizerTests/ApplyQuantizationWithDitheringScale_david_HexadecatreeQuantizer_ErrorDither_0.png b/tests/Images/External/ReferenceOutput/QuantizerTests/ApplyQuantizationWithDitheringScale_david_HexadecatreeQuantizer_ErrorDither_0.png new file mode 100644 index 0000000000..2f939d957d --- /dev/null +++ b/tests/Images/External/ReferenceOutput/QuantizerTests/ApplyQuantizationWithDitheringScale_david_HexadecatreeQuantizer_ErrorDither_0.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3e8a5da54da08f7450ffb5b49c412e654215e2c2e72c32919abc78b77dc828f5 +size 13160 diff --git a/tests/Images/External/ReferenceOutput/QuantizerTests/ApplyQuantizationWithDitheringScale_david_HexadecatreeQuantizer_ErrorDither_1.png b/tests/Images/External/ReferenceOutput/QuantizerTests/ApplyQuantizationWithDitheringScale_david_HexadecatreeQuantizer_ErrorDither_1.png new file mode 100644 index 0000000000..c84edd138f --- /dev/null +++ b/tests/Images/External/ReferenceOutput/QuantizerTests/ApplyQuantizationWithDitheringScale_david_HexadecatreeQuantizer_ErrorDither_1.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:997e5281abd8cf3a587984ec1b7e31487ec5ddf16326d025124833d536e4ac27 +size 13910 diff --git a/tests/Images/External/ReferenceOutput/QuantizerTests/ApplyQuantizationWithDitheringScale_david_OctreeQuantizer_OrderedDither_0.25.png b/tests/Images/External/ReferenceOutput/QuantizerTests/ApplyQuantizationWithDitheringScale_david_HexadecatreeQuantizer_OrderedDither_0.25.png similarity index 100% rename from tests/Images/External/ReferenceOutput/QuantizerTests/ApplyQuantizationWithDitheringScale_david_OctreeQuantizer_OrderedDither_0.25.png rename to tests/Images/External/ReferenceOutput/QuantizerTests/ApplyQuantizationWithDitheringScale_david_HexadecatreeQuantizer_OrderedDither_0.25.png diff --git a/tests/Images/External/ReferenceOutput/QuantizerTests/ApplyQuantizationWithDitheringScale_david_OctreeQuantizer_OrderedDither_0.5.png b/tests/Images/External/ReferenceOutput/QuantizerTests/ApplyQuantizationWithDitheringScale_david_HexadecatreeQuantizer_OrderedDither_0.5.png similarity index 100% rename from tests/Images/External/ReferenceOutput/QuantizerTests/ApplyQuantizationWithDitheringScale_david_OctreeQuantizer_OrderedDither_0.5.png rename to tests/Images/External/ReferenceOutput/QuantizerTests/ApplyQuantizationWithDitheringScale_david_HexadecatreeQuantizer_OrderedDither_0.5.png diff --git a/tests/Images/External/ReferenceOutput/QuantizerTests/ApplyQuantizationWithDitheringScale_david_OctreeQuantizer_OrderedDither_0.75.png b/tests/Images/External/ReferenceOutput/QuantizerTests/ApplyQuantizationWithDitheringScale_david_HexadecatreeQuantizer_OrderedDither_0.75.png similarity index 100% rename from tests/Images/External/ReferenceOutput/QuantizerTests/ApplyQuantizationWithDitheringScale_david_OctreeQuantizer_OrderedDither_0.75.png rename to tests/Images/External/ReferenceOutput/QuantizerTests/ApplyQuantizationWithDitheringScale_david_HexadecatreeQuantizer_OrderedDither_0.75.png diff --git a/tests/Images/External/ReferenceOutput/QuantizerTests/ApplyQuantizationWithDitheringScale_david_HexadecatreeQuantizer_OrderedDither_0.png b/tests/Images/External/ReferenceOutput/QuantizerTests/ApplyQuantizationWithDitheringScale_david_HexadecatreeQuantizer_OrderedDither_0.png new file mode 100644 index 0000000000..2f939d957d --- /dev/null +++ b/tests/Images/External/ReferenceOutput/QuantizerTests/ApplyQuantizationWithDitheringScale_david_HexadecatreeQuantizer_OrderedDither_0.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3e8a5da54da08f7450ffb5b49c412e654215e2c2e72c32919abc78b77dc828f5 +size 13160 diff --git a/tests/Images/External/ReferenceOutput/QuantizerTests/ApplyQuantizationWithDitheringScale_david_OctreeQuantizer_OrderedDither_1.png b/tests/Images/External/ReferenceOutput/QuantizerTests/ApplyQuantizationWithDitheringScale_david_HexadecatreeQuantizer_OrderedDither_1.png similarity index 100% rename from tests/Images/External/ReferenceOutput/QuantizerTests/ApplyQuantizationWithDitheringScale_david_OctreeQuantizer_OrderedDither_1.png rename to tests/Images/External/ReferenceOutput/QuantizerTests/ApplyQuantizationWithDitheringScale_david_HexadecatreeQuantizer_OrderedDither_1.png diff --git a/tests/Images/External/ReferenceOutput/QuantizerTests/ApplyQuantizationWithDitheringScale_david_OctreeQuantizer_ErrorDither_0.25.png b/tests/Images/External/ReferenceOutput/QuantizerTests/ApplyQuantizationWithDitheringScale_david_OctreeQuantizer_ErrorDither_0.25.png deleted file mode 100644 index a2fb2a6760..0000000000 --- a/tests/Images/External/ReferenceOutput/QuantizerTests/ApplyQuantizationWithDitheringScale_david_OctreeQuantizer_ErrorDither_0.25.png +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:18a47a6fa0f7949daef6969a847d8bc04deeb16bb482211ec3a958bc63f23f89 -size 13158 diff --git a/tests/Images/External/ReferenceOutput/QuantizerTests/ApplyQuantizationWithDitheringScale_david_OctreeQuantizer_ErrorDither_0.5.png b/tests/Images/External/ReferenceOutput/QuantizerTests/ApplyQuantizationWithDitheringScale_david_OctreeQuantizer_ErrorDither_0.5.png deleted file mode 100644 index 8d99eb49b2..0000000000 --- a/tests/Images/External/ReferenceOutput/QuantizerTests/ApplyQuantizationWithDitheringScale_david_OctreeQuantizer_ErrorDither_0.5.png +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:abfdd1e40c2c1d7fde419bda1da6e534ed989598e790b8ae4de35152a83f77a0 -size 13686 diff --git a/tests/Images/External/ReferenceOutput/QuantizerTests/ApplyQuantizationWithDitheringScale_david_OctreeQuantizer_ErrorDither_0.75.png b/tests/Images/External/ReferenceOutput/QuantizerTests/ApplyQuantizationWithDitheringScale_david_OctreeQuantizer_ErrorDither_0.75.png deleted file mode 100644 index bf93c39ff8..0000000000 --- a/tests/Images/External/ReferenceOutput/QuantizerTests/ApplyQuantizationWithDitheringScale_david_OctreeQuantizer_ErrorDither_0.75.png +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:60c28eb1dc3c0416b20cec230917c0e4a70dd2929467bbab796ecbb04fe5a178 -size 13886 diff --git a/tests/Images/External/ReferenceOutput/QuantizerTests/ApplyQuantizationWithDitheringScale_david_OctreeQuantizer_ErrorDither_0.png b/tests/Images/External/ReferenceOutput/QuantizerTests/ApplyQuantizationWithDitheringScale_david_OctreeQuantizer_ErrorDither_0.png deleted file mode 100644 index a2fb2a6760..0000000000 --- a/tests/Images/External/ReferenceOutput/QuantizerTests/ApplyQuantizationWithDitheringScale_david_OctreeQuantizer_ErrorDither_0.png +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:18a47a6fa0f7949daef6969a847d8bc04deeb16bb482211ec3a958bc63f23f89 -size 13158 diff --git a/tests/Images/External/ReferenceOutput/QuantizerTests/ApplyQuantizationWithDitheringScale_david_OctreeQuantizer_ErrorDither_1.png b/tests/Images/External/ReferenceOutput/QuantizerTests/ApplyQuantizationWithDitheringScale_david_OctreeQuantizer_ErrorDither_1.png deleted file mode 100644 index 457298b544..0000000000 --- a/tests/Images/External/ReferenceOutput/QuantizerTests/ApplyQuantizationWithDitheringScale_david_OctreeQuantizer_ErrorDither_1.png +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:a523f097bf3b155f3823c5e400190b5d5e0d4470db7136576472c3257db76600 -size 13909 diff --git a/tests/Images/External/ReferenceOutput/QuantizerTests/ApplyQuantizationWithDitheringScale_david_OctreeQuantizer_OrderedDither_0.png b/tests/Images/External/ReferenceOutput/QuantizerTests/ApplyQuantizationWithDitheringScale_david_OctreeQuantizer_OrderedDither_0.png deleted file mode 100644 index a2fb2a6760..0000000000 --- a/tests/Images/External/ReferenceOutput/QuantizerTests/ApplyQuantizationWithDitheringScale_david_OctreeQuantizer_OrderedDither_0.png +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:18a47a6fa0f7949daef6969a847d8bc04deeb16bb482211ec3a958bc63f23f89 -size 13158 diff --git a/tests/Images/External/ReferenceOutput/QuantizerTests/ApplyQuantizationWithDitheringScale_david_WernerPaletteQuantizer_OrderedDither_1.png b/tests/Images/External/ReferenceOutput/QuantizerTests/ApplyQuantizationWithDitheringScale_david_WernerPaletteQuantizer_OrderedDither_1.png index 878a36a477..f288b3c8b7 100644 --- a/tests/Images/External/ReferenceOutput/QuantizerTests/ApplyQuantizationWithDitheringScale_david_WernerPaletteQuantizer_OrderedDither_1.png +++ b/tests/Images/External/ReferenceOutput/QuantizerTests/ApplyQuantizationWithDitheringScale_david_WernerPaletteQuantizer_OrderedDither_1.png @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:b2bd11fa19fab712b5cd6c2b36d673c7dce904b5032b860d257b00e095e4aadf -size 13432 +oid sha256:dd31b6fc59e1f9f88230d57b39362b76cedd0bd94e15904f69071ba3f465e48d +size 13656 diff --git a/tests/Images/External/ReferenceOutput/QuantizerTests/ApplyQuantizationWithDitheringScale_david_WuQuantizer_ErrorDither_0.25.png b/tests/Images/External/ReferenceOutput/QuantizerTests/ApplyQuantizationWithDitheringScale_david_WuQuantizer_ErrorDither_0.25.png index eaf7e8241d..62b1fb0558 100644 --- a/tests/Images/External/ReferenceOutput/QuantizerTests/ApplyQuantizationWithDitheringScale_david_WuQuantizer_ErrorDither_0.25.png +++ b/tests/Images/External/ReferenceOutput/QuantizerTests/ApplyQuantizationWithDitheringScale_david_WuQuantizer_ErrorDither_0.25.png @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:4baf0e7bc4ae8b8a911d87f3a7af2bf3ef0235f77f3f509251f2d2f26cfb639d -size 13158 +oid sha256:0e88f74acac9cfa1a47a4402aa032975ec4bf698d51e6eb1ae103480e2e10489 +size 13160 diff --git a/tests/Images/External/ReferenceOutput/QuantizerTests/ApplyQuantizationWithDitheringScale_david_WuQuantizer_ErrorDither_0.5.png b/tests/Images/External/ReferenceOutput/QuantizerTests/ApplyQuantizationWithDitheringScale_david_WuQuantizer_ErrorDither_0.5.png index 02879b7a38..d2d2e3e4b9 100644 --- a/tests/Images/External/ReferenceOutput/QuantizerTests/ApplyQuantizationWithDitheringScale_david_WuQuantizer_ErrorDither_0.5.png +++ b/tests/Images/External/ReferenceOutput/QuantizerTests/ApplyQuantizationWithDitheringScale_david_WuQuantizer_ErrorDither_0.5.png @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:c4ac8b88b317281738d833fc71f52348d9f4f45ea5a1303dd91fdb8b42be4267 -size 13186 +oid sha256:dd738ee2a397bb1ee305f03c70e185dea6f67827dc15b9df1966cfe8c0f28040 +size 13177 diff --git a/tests/Images/External/ReferenceOutput/QuantizerTests/ApplyQuantizationWithDitheringScale_david_WuQuantizer_ErrorDither_0.75.png b/tests/Images/External/ReferenceOutput/QuantizerTests/ApplyQuantizationWithDitheringScale_david_WuQuantizer_ErrorDither_0.75.png index ba05094800..c444923a29 100644 --- a/tests/Images/External/ReferenceOutput/QuantizerTests/ApplyQuantizationWithDitheringScale_david_WuQuantizer_ErrorDither_0.75.png +++ b/tests/Images/External/ReferenceOutput/QuantizerTests/ApplyQuantizationWithDitheringScale_david_WuQuantizer_ErrorDither_0.75.png @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:1305d54f2139d4577490317051d6ce94a7fc8dd45b902d87a30fb04098dd4594 -size 13407 +oid sha256:2a2df64f89df17428415932c2ef0028d8ad408b5276264d99e6038b70473ebde +size 13417 diff --git a/tests/Images/External/ReferenceOutput/QuantizerTests/ApplyQuantizationWithDitheringScale_david_WuQuantizer_ErrorDither_0.png b/tests/Images/External/ReferenceOutput/QuantizerTests/ApplyQuantizationWithDitheringScale_david_WuQuantizer_ErrorDither_0.png index eaf7e8241d..62b1fb0558 100644 --- a/tests/Images/External/ReferenceOutput/QuantizerTests/ApplyQuantizationWithDitheringScale_david_WuQuantizer_ErrorDither_0.png +++ b/tests/Images/External/ReferenceOutput/QuantizerTests/ApplyQuantizationWithDitheringScale_david_WuQuantizer_ErrorDither_0.png @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:4baf0e7bc4ae8b8a911d87f3a7af2bf3ef0235f77f3f509251f2d2f26cfb639d -size 13158 +oid sha256:0e88f74acac9cfa1a47a4402aa032975ec4bf698d51e6eb1ae103480e2e10489 +size 13160 diff --git a/tests/Images/External/ReferenceOutput/QuantizerTests/ApplyQuantizationWithDitheringScale_david_WuQuantizer_ErrorDither_1.png b/tests/Images/External/ReferenceOutput/QuantizerTests/ApplyQuantizationWithDitheringScale_david_WuQuantizer_ErrorDither_1.png index b16a5a5c7b..74cbbd7581 100644 --- a/tests/Images/External/ReferenceOutput/QuantizerTests/ApplyQuantizationWithDitheringScale_david_WuQuantizer_ErrorDither_1.png +++ b/tests/Images/External/ReferenceOutput/QuantizerTests/ApplyQuantizationWithDitheringScale_david_WuQuantizer_ErrorDither_1.png @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:a3fc3a7ace123c330ea06072eb36dd5d65ed9154d4d0f55a828fc542c8a422c1 -size 13472 +oid sha256:234854be2a3f774a58baf79f20e68c7331b6caff486ab4b1e509a96e2a3d70b9 +size 13455 diff --git a/tests/Images/External/ReferenceOutput/QuantizerTests/ApplyQuantizationWithDitheringScale_david_WuQuantizer_OrderedDither_0.25.png b/tests/Images/External/ReferenceOutput/QuantizerTests/ApplyQuantizationWithDitheringScale_david_WuQuantizer_OrderedDither_0.25.png index 6adac16cf5..a9bb2c3163 100644 --- a/tests/Images/External/ReferenceOutput/QuantizerTests/ApplyQuantizationWithDitheringScale_david_WuQuantizer_OrderedDither_0.25.png +++ b/tests/Images/External/ReferenceOutput/QuantizerTests/ApplyQuantizationWithDitheringScale_david_WuQuantizer_OrderedDither_0.25.png @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:35757f2e0831cae2fbd3cc11ffaaae855e853ebaa9a1a5564b6568a5e1c442e9 -size 16031 +oid sha256:ef65ce360293ca5659730747087c15735c15df1143204acb60120a5b68cd7cd4 +size 15905 diff --git a/tests/Images/External/ReferenceOutput/QuantizerTests/ApplyQuantizationWithDitheringScale_david_WuQuantizer_OrderedDither_0.75.png b/tests/Images/External/ReferenceOutput/QuantizerTests/ApplyQuantizationWithDitheringScale_david_WuQuantizer_OrderedDither_0.75.png index 5d1030e6b8..ceb9188005 100644 --- a/tests/Images/External/ReferenceOutput/QuantizerTests/ApplyQuantizationWithDitheringScale_david_WuQuantizer_OrderedDither_0.75.png +++ b/tests/Images/External/ReferenceOutput/QuantizerTests/ApplyQuantizationWithDitheringScale_david_WuQuantizer_OrderedDither_0.75.png @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:6679d6d6f7c8b44461956b54654cea71180a2b0d43712d3775e60cbedd90cc82 -size 17520 +oid sha256:6618f169cf4b585979f8e9261af88fe4a61c3c40b453a159cb643cc062a6a9dc +size 17517 diff --git a/tests/Images/External/ReferenceOutput/QuantizerTests/ApplyQuantizationWithDitheringScale_david_WuQuantizer_OrderedDither_0.png b/tests/Images/External/ReferenceOutput/QuantizerTests/ApplyQuantizationWithDitheringScale_david_WuQuantizer_OrderedDither_0.png index eaf7e8241d..62b1fb0558 100644 --- a/tests/Images/External/ReferenceOutput/QuantizerTests/ApplyQuantizationWithDitheringScale_david_WuQuantizer_OrderedDither_0.png +++ b/tests/Images/External/ReferenceOutput/QuantizerTests/ApplyQuantizationWithDitheringScale_david_WuQuantizer_OrderedDither_0.png @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:4baf0e7bc4ae8b8a911d87f3a7af2bf3ef0235f77f3f509251f2d2f26cfb639d -size 13158 +oid sha256:0e88f74acac9cfa1a47a4402aa032975ec4bf698d51e6eb1ae103480e2e10489 +size 13160 diff --git a/tests/Images/External/ReferenceOutput/QuantizerTests/ApplyQuantizationWithDitheringScale_david_WuQuantizer_OrderedDither_1.png b/tests/Images/External/ReferenceOutput/QuantizerTests/ApplyQuantizationWithDitheringScale_david_WuQuantizer_OrderedDither_1.png index 567e5d6a3b..f6cb173678 100644 --- a/tests/Images/External/ReferenceOutput/QuantizerTests/ApplyQuantizationWithDitheringScale_david_WuQuantizer_OrderedDither_1.png +++ b/tests/Images/External/ReferenceOutput/QuantizerTests/ApplyQuantizationWithDitheringScale_david_WuQuantizer_OrderedDither_1.png @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:5af5d16f875172d73f8426928fc8edaa4a6cab321a968b6c29fca32d0fba0df5 -size 18182 +oid sha256:4acf21f23978c83c9872bb2575ab45e4f0bbc86c8610c99479b1469fc12df5f2 +size 18112 diff --git a/tests/Images/External/ReferenceOutput/QuantizerTests/ApplyQuantization_Bike_OctreeQuantizer_ErrorDither.png b/tests/Images/External/ReferenceOutput/QuantizerTests/ApplyQuantization_Bike_HexadecatreeQuantizer_ErrorDither.png similarity index 100% rename from tests/Images/External/ReferenceOutput/QuantizerTests/ApplyQuantization_Bike_OctreeQuantizer_ErrorDither.png rename to tests/Images/External/ReferenceOutput/QuantizerTests/ApplyQuantization_Bike_HexadecatreeQuantizer_ErrorDither.png diff --git a/tests/Images/External/ReferenceOutput/QuantizerTests/ApplyQuantization_Bike_OctreeQuantizer_NoDither.png b/tests/Images/External/ReferenceOutput/QuantizerTests/ApplyQuantization_Bike_HexadecatreeQuantizer_NoDither.png similarity index 100% rename from tests/Images/External/ReferenceOutput/QuantizerTests/ApplyQuantization_Bike_OctreeQuantizer_NoDither.png rename to tests/Images/External/ReferenceOutput/QuantizerTests/ApplyQuantization_Bike_HexadecatreeQuantizer_NoDither.png diff --git a/tests/Images/External/ReferenceOutput/QuantizerTests/ApplyQuantization_Bike_OctreeQuantizer_OrderedDither.png b/tests/Images/External/ReferenceOutput/QuantizerTests/ApplyQuantization_Bike_HexadecatreeQuantizer_OrderedDither.png similarity index 100% rename from tests/Images/External/ReferenceOutput/QuantizerTests/ApplyQuantization_Bike_OctreeQuantizer_OrderedDither.png rename to tests/Images/External/ReferenceOutput/QuantizerTests/ApplyQuantization_Bike_HexadecatreeQuantizer_OrderedDither.png diff --git a/tests/Images/External/ReferenceOutput/QuantizerTests/ApplyQuantization_Bike_WernerPaletteQuantizer_OrderedDither.png b/tests/Images/External/ReferenceOutput/QuantizerTests/ApplyQuantization_Bike_WernerPaletteQuantizer_OrderedDither.png index 10daff76b2..fd565c0a12 100644 --- a/tests/Images/External/ReferenceOutput/QuantizerTests/ApplyQuantization_Bike_WernerPaletteQuantizer_OrderedDither.png +++ b/tests/Images/External/ReferenceOutput/QuantizerTests/ApplyQuantization_Bike_WernerPaletteQuantizer_OrderedDither.png @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:d8ba00e2948337f77d935d98349958c6a520958671e9ec714ff1bfadfb130e72 -size 44622 +oid sha256:4ded8db323023a7c7620bba3b2259a549571442fe0a37883c7755ac69ae9d6d5 +size 44646 diff --git a/tests/Images/External/ReferenceOutput/QuantizerTests/ApplyQuantization_Bike_WuQuantizer_ErrorDither.png b/tests/Images/External/ReferenceOutput/QuantizerTests/ApplyQuantization_Bike_WuQuantizer_ErrorDither.png index 37e5035d86..c342e3a230 100644 --- a/tests/Images/External/ReferenceOutput/QuantizerTests/ApplyQuantization_Bike_WuQuantizer_ErrorDither.png +++ b/tests/Images/External/ReferenceOutput/QuantizerTests/ApplyQuantization_Bike_WuQuantizer_ErrorDither.png @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:3802cfe67638a24869d6cc9ace1d94460b4c0c26f2c91b12b95fa8f979de64bb -size 101579 +oid sha256:83c8403f5d0e5457721d992c1e6980134e8a65a1f646163a4f091cf34583ca02 +size 101417 diff --git a/tests/Images/External/ReferenceOutput/QuantizerTests/ApplyQuantization_Bike_WuQuantizer_NoDither.png b/tests/Images/External/ReferenceOutput/QuantizerTests/ApplyQuantization_Bike_WuQuantizer_NoDither.png index e72ea4b246..d07231c185 100644 --- a/tests/Images/External/ReferenceOutput/QuantizerTests/ApplyQuantization_Bike_WuQuantizer_NoDither.png +++ b/tests/Images/External/ReferenceOutput/QuantizerTests/ApplyQuantization_Bike_WuQuantizer_NoDither.png @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:bf2021eba9edbb2295924f8394472ac0bb237f0c462c39aa32a2074ef15f9acc -size 81771 +oid sha256:e5412b892143bb433804c662750a64a1660b2072520db53d76ec6897c636ec50 +size 81742 diff --git a/tests/Images/External/ReferenceOutput/QuantizerTests/ApplyQuantization_Bike_WuQuantizer_OrderedDither.png b/tests/Images/External/ReferenceOutput/QuantizerTests/ApplyQuantization_Bike_WuQuantizer_OrderedDither.png index 0997945e52..7d2070820d 100644 --- a/tests/Images/External/ReferenceOutput/QuantizerTests/ApplyQuantization_Bike_WuQuantizer_OrderedDither.png +++ b/tests/Images/External/ReferenceOutput/QuantizerTests/ApplyQuantization_Bike_WuQuantizer_OrderedDither.png @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:2d11b18946d373b995ecbb449c8c4cfcc7078aad1c8705997bcbf83131acde03 -size 102439 +oid sha256:a88a48586502de786aca0b36341cf6033fb3ec3ce7924ce1e2819fd14791ffe4 +size 102235 diff --git a/tests/Images/External/ReferenceOutput/QuantizerTests/ApplyQuantization_CalliphoraPartial_HexadecatreeQuantizer_ErrorDither.png b/tests/Images/External/ReferenceOutput/QuantizerTests/ApplyQuantization_CalliphoraPartial_HexadecatreeQuantizer_ErrorDither.png new file mode 100644 index 0000000000..79711e2ebc --- /dev/null +++ b/tests/Images/External/ReferenceOutput/QuantizerTests/ApplyQuantization_CalliphoraPartial_HexadecatreeQuantizer_ErrorDither.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:22920fb2379dee7d12fee52f6a39b8e46e1e99f77b91f879c51bb33a981dfdcb +size 98851 diff --git a/tests/Images/External/ReferenceOutput/QuantizerTests/ApplyQuantization_CalliphoraPartial_HexadecatreeQuantizer_NoDither.png b/tests/Images/External/ReferenceOutput/QuantizerTests/ApplyQuantization_CalliphoraPartial_HexadecatreeQuantizer_NoDither.png new file mode 100644 index 0000000000..fa6d4cb432 --- /dev/null +++ b/tests/Images/External/ReferenceOutput/QuantizerTests/ApplyQuantization_CalliphoraPartial_HexadecatreeQuantizer_NoDither.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7c7137e1b87d317d7e139cde8499deafa89f27bddba146cc5736f9c0566778c5 +size 81609 diff --git a/tests/Images/External/ReferenceOutput/QuantizerTests/ApplyQuantization_CalliphoraPartial_OctreeQuantizer_OrderedDither.png b/tests/Images/External/ReferenceOutput/QuantizerTests/ApplyQuantization_CalliphoraPartial_HexadecatreeQuantizer_OrderedDither.png similarity index 100% rename from tests/Images/External/ReferenceOutput/QuantizerTests/ApplyQuantization_CalliphoraPartial_OctreeQuantizer_OrderedDither.png rename to tests/Images/External/ReferenceOutput/QuantizerTests/ApplyQuantization_CalliphoraPartial_HexadecatreeQuantizer_OrderedDither.png diff --git a/tests/Images/External/ReferenceOutput/QuantizerTests/ApplyQuantization_CalliphoraPartial_OctreeQuantizer_ErrorDither.png b/tests/Images/External/ReferenceOutput/QuantizerTests/ApplyQuantization_CalliphoraPartial_OctreeQuantizer_ErrorDither.png deleted file mode 100644 index 314a056060..0000000000 --- a/tests/Images/External/ReferenceOutput/QuantizerTests/ApplyQuantization_CalliphoraPartial_OctreeQuantizer_ErrorDither.png +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:2236e81d33fcfb50afb9d5fd1a38c5ddf5d33fbb52de1c3204a4a9892fd334ce -size 99084 diff --git a/tests/Images/External/ReferenceOutput/QuantizerTests/ApplyQuantization_CalliphoraPartial_OctreeQuantizer_NoDither.png b/tests/Images/External/ReferenceOutput/QuantizerTests/ApplyQuantization_CalliphoraPartial_OctreeQuantizer_NoDither.png deleted file mode 100644 index 5293046724..0000000000 --- a/tests/Images/External/ReferenceOutput/QuantizerTests/ApplyQuantization_CalliphoraPartial_OctreeQuantizer_NoDither.png +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:c4b59097d1507236af2556ae5f2638360b223b7752cd4c8f760bc14673d811d0 -size 81709 diff --git a/tests/Images/External/ReferenceOutput/QuantizerTests/ApplyQuantization_CalliphoraPartial_WernerPaletteQuantizer_OrderedDither.png b/tests/Images/External/ReferenceOutput/QuantizerTests/ApplyQuantization_CalliphoraPartial_WernerPaletteQuantizer_OrderedDither.png index b51076bd17..0fe3d30bfb 100644 --- a/tests/Images/External/ReferenceOutput/QuantizerTests/ApplyQuantization_CalliphoraPartial_WernerPaletteQuantizer_OrderedDither.png +++ b/tests/Images/External/ReferenceOutput/QuantizerTests/ApplyQuantization_CalliphoraPartial_WernerPaletteQuantizer_OrderedDither.png @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:7a8d9c0d81525d9f37d2f36946939040aea30edfc2b7ec0bf329fb49f6c7d73f -size 69896 +oid sha256:aee197677c3276d4abb8fc027358b38be26462374e364841781626f0aa67e1a4 +size 69769 diff --git a/tests/Images/External/ReferenceOutput/QuantizerTests/ApplyQuantization_CalliphoraPartial_WuQuantizer_ErrorDither.png b/tests/Images/External/ReferenceOutput/QuantizerTests/ApplyQuantization_CalliphoraPartial_WuQuantizer_ErrorDither.png index 7204abff47..ef86e7c48d 100644 --- a/tests/Images/External/ReferenceOutput/QuantizerTests/ApplyQuantization_CalliphoraPartial_WuQuantizer_ErrorDither.png +++ b/tests/Images/External/ReferenceOutput/QuantizerTests/ApplyQuantization_CalliphoraPartial_WuQuantizer_ErrorDither.png @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:4474b94e2d563938e10ec0526e7d94ba06b440db51b910604e752f7f9e814d66 -size 110757 +oid sha256:0b3c8dc7e653ef1846c7359e9a0f719bee91549846f160abb547cd0aab6a8a59 +size 110711 diff --git a/tests/Images/External/ReferenceOutput/QuantizerTests/ApplyQuantization_CalliphoraPartial_WuQuantizer_NoDither.png b/tests/Images/External/ReferenceOutput/QuantizerTests/ApplyQuantization_CalliphoraPartial_WuQuantizer_NoDither.png index 691623fc88..c65381c052 100644 --- a/tests/Images/External/ReferenceOutput/QuantizerTests/ApplyQuantization_CalliphoraPartial_WuQuantizer_NoDither.png +++ b/tests/Images/External/ReferenceOutput/QuantizerTests/ApplyQuantization_CalliphoraPartial_WuQuantizer_NoDither.png @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:58a61c1d9a1d05acd484948c3e5c0496dbc74c0060f5de71741de39eae04ffa8 -size 103875 +oid sha256:4b95721a963def9e82dd32e277ed4594213920d7808ad26696d01e4f8fda842e +size 103855 diff --git a/tests/Images/External/ReferenceOutput/QuantizerTests/ApplyQuantization_CalliphoraPartial_WuQuantizer_OrderedDither.png b/tests/Images/External/ReferenceOutput/QuantizerTests/ApplyQuantization_CalliphoraPartial_WuQuantizer_OrderedDither.png index e80e6c6e81..eb2c2a0c98 100644 --- a/tests/Images/External/ReferenceOutput/QuantizerTests/ApplyQuantization_CalliphoraPartial_WuQuantizer_OrderedDither.png +++ b/tests/Images/External/ReferenceOutput/QuantizerTests/ApplyQuantization_CalliphoraPartial_WuQuantizer_OrderedDither.png @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:b6649918c0394ead13c016a57b6a08561290651bccac88f7f15ba0e29dc5faa4 -size 110422 +oid sha256:cb174c104cdcf35433c98522a1d9d52ccf42e8927e0b59fec3556aeee8b15a47 +size 110505 From c9f7e6ea60c516b887256a32f47aee42a448fc21 Mon Sep 17 00:00:00 2001 From: James Jackson-South Date: Tue, 7 Apr 2026 23:19:38 +1000 Subject: [PATCH 08/12] Add ImageInfo.GetPixelMemorySize; docs & tests --- src/ImageSharp/Formats/Gif/GifDecoderCore.cs | 2 +- src/ImageSharp/ImageInfo.cs | 32 ++++++++++++++++++-- tests/ImageSharp.Tests/ImageInfoTests.cs | 30 ++++++++++++++++++ 3 files changed, 61 insertions(+), 3 deletions(-) diff --git a/src/ImageSharp/Formats/Gif/GifDecoderCore.cs b/src/ImageSharp/Formats/Gif/GifDecoderCore.cs index 78ceb0b233..3d32c7cdac 100644 --- a/src/ImageSharp/Formats/Gif/GifDecoderCore.cs +++ b/src/ImageSharp/Formats/Gif/GifDecoderCore.cs @@ -468,7 +468,7 @@ internal sealed class GifDecoderCore : ImageDecoderCore int length = this.currentLocalColorTableSize = this.imageDescriptor.LocalColorTableSize * 3; this.currentLocalColorTable ??= this.configuration.MemoryAllocator.Allocate(768, AllocationOptions.Clean); stream.Read(this.currentLocalColorTable.GetSpan()[..length]); - rawColorTable = this.currentLocalColorTable!.GetSpan()[..length]; + rawColorTable = this.currentLocalColorTable.GetSpan()[..length]; } else if (this.globalColorTable != null) { diff --git a/src/ImageSharp/ImageInfo.cs b/src/ImageSharp/ImageInfo.cs index 0bbd73b63a..d27c4b9330 100644 --- a/src/ImageSharp/ImageInfo.cs +++ b/src/ImageSharp/ImageInfo.cs @@ -63,8 +63,12 @@ public class ImageInfo public int Height => this.Size.Height; /// - /// Gets the number of frames in the image. + /// Gets the number of frame metadata entries available for the image. /// + /// + /// This value is the same as count and may be 0 when frame + /// metadata was not populated by the decoder. + /// public int FrameCount => this.FrameMetadataCollection.Count; /// @@ -73,8 +77,12 @@ public class ImageInfo public ImageMetadata Metadata { get; } /// - /// Gets the collection of metadata associated with individual image frames. + /// Gets the metadata associated with the decoded image frames, if available. /// + /// + /// For multi-frame formats, decoders populate one entry per decoded frame. For single-frame formats, this + /// collection is typically empty. + /// public IReadOnlyList FrameMetadataCollection { get; } /// @@ -86,4 +94,24 @@ public class ImageInfo /// Gets the bounds of the image. /// public Rectangle Bounds => new(Point.Empty, this.Size); + + /// + /// Gets the total number of bytes required to store the image pixels in memory. + /// + /// + /// This reports the in-memory size of the pixel data represented by this , not the + /// encoded size of the image file. The value is computed from the image dimensions and + /// . When contains decoded frame metadata, the + /// per-frame size is multiplied by that count. Otherwise, the value is the in-memory size of the single + /// image frame represented by this . + /// + /// The total number of bytes required to store the image pixels in memory. + public long GetPixelMemorySize() + { + int count = this.FrameMetadataCollection.Count > 0 + ? this.FrameMetadataCollection.Count + : 1; + + return (long)this.Size.Width * this.Size.Height * (this.PixelType.BitsPerPixel / 8) * count; + } } diff --git a/tests/ImageSharp.Tests/ImageInfoTests.cs b/tests/ImageSharp.Tests/ImageInfoTests.cs index 322b0af196..748c8a4f6d 100644 --- a/tests/ImageSharp.Tests/ImageInfoTests.cs +++ b/tests/ImageSharp.Tests/ImageInfoTests.cs @@ -54,4 +54,34 @@ public class ImageInfoTests Assert.Equal(meta, info.Metadata); Assert.Equal(frameMetadata.Count, info.FrameMetadataCollection.Count); } + + [Fact] + public void GetPixelMemorySize_UsesSingleFrameWhenFrameMetadataIsEmpty() + { + const int width = 10; + const int height = 20; + + ImageMetadata meta = new() { DecodedImageFormat = PngFormat.Instance }; + meta.GetPngMetadata(); + + ImageInfo info = new(new Size(width, height), meta); + + Assert.Equal(width * height * 4, info.GetPixelMemorySize()); + } + + [Fact] + public void GetPixelMemorySize_UsesFrameMetadataCountWhenAvailable() + { + const int width = 10; + const int height = 20; + IReadOnlyList frameMetadata = [new(), new(), new()]; + + ImageMetadata meta = new() { DecodedImageFormat = PngFormat.Instance }; + meta.GetPngMetadata(); + + ImageInfo info = new(new Size(width, height), meta, frameMetadata); + + Assert.Equal(width * height * 4 * frameMetadata.Count, info.GetPixelMemorySize()); + } + } From 1c5e3e1a69d89d0202185100166c6a4c1339bd3a Mon Sep 17 00:00:00 2001 From: James Jackson-South Date: Wed, 8 Apr 2026 14:39:56 +1000 Subject: [PATCH 09/12] Modernize base PorterDuffFunctions --- src/ImageSharp/Common/Helpers/Numerics.cs | 2 +- .../Common/Helpers/SimdUtils.HwIntrinsics.cs | 37 +++--------- .../Common/Helpers/Vector256Utilities.cs | 22 +++++++ .../PixelBlenders/PorterDuffFunctions.cs | 57 ++++++++++--------- 4 files changed, 59 insertions(+), 59 deletions(-) diff --git a/src/ImageSharp/Common/Helpers/Numerics.cs b/src/ImageSharp/Common/Helpers/Numerics.cs index efe68977bb..513eb7ab19 100644 --- a/src/ImageSharp/Common/Helpers/Numerics.cs +++ b/src/ImageSharp/Common/Helpers/Numerics.cs @@ -690,7 +690,7 @@ internal static class Numerics /// /// The span of vectors [MethodImpl(MethodImplOptions.AggressiveInlining)] - public static unsafe void CubePowOnXYZ(Span vectors) + public static void CubePowOnXYZ(Span vectors) { ref Vector4 baseRef = ref MemoryMarshal.GetReference(vectors); ref Vector4 endRef = ref Unsafe.Add(ref baseRef, (uint)vectors.Length); diff --git a/src/ImageSharp/Common/Helpers/SimdUtils.HwIntrinsics.cs b/src/ImageSharp/Common/Helpers/SimdUtils.HwIntrinsics.cs index 076590605d..154f0b5e22 100644 --- a/src/ImageSharp/Common/Helpers/SimdUtils.HwIntrinsics.cs +++ b/src/ImageSharp/Common/Helpers/SimdUtils.HwIntrinsics.cs @@ -602,48 +602,25 @@ internal static partial class SimdUtils } /// - /// Performs a multiplication and an addition of the . - /// TODO: Fix. The arguments are in a different order to the FMA intrinsic. + /// Performs a multiplication and a negated addition of the . /// - /// ret = (vm0 * vm1) + va - /// The vector to add to the intermediate result. + /// ret = va - (vm0 * vm1) + /// The vector to add to the negated intermediate result. /// The first vector to multiply. /// The second vector to multiply. /// The . - [MethodImpl(InliningOptions.AlwaysInline)] - public static Vector256 MultiplyAdd( + [MethodImpl(InliningOptions.ShortMethod)] + public static Vector256 MultiplyAddNegated( Vector256 va, Vector256 vm0, Vector256 vm1) { if (Fma.IsSupported) { - return Fma.MultiplyAdd(vm1, vm0, va); - } - - return va + (vm0 * vm1); - } - - /// - /// Performs a multiplication and a negated addition of the . - /// - /// ret = c - (a * b) - /// The first vector to multiply. - /// The second vector to multiply. - /// The vector to add negated to the intermediate result. - /// The . - [MethodImpl(InliningOptions.ShortMethod)] - public static Vector256 MultiplyAddNegated( - Vector256 a, - Vector256 b, - Vector256 c) - { - if (Fma.IsSupported) - { - return Fma.MultiplyAddNegated(a, b, c); + return Fma.MultiplyAddNegated(vm0, vm1, va); } - return Avx.Subtract(c, Avx.Multiply(a, b)); + return Avx.Subtract(va, Avx.Multiply(vm0, vm1)); } /// diff --git a/src/ImageSharp/Common/Helpers/Vector256Utilities.cs b/src/ImageSharp/Common/Helpers/Vector256Utilities.cs index 14ac13dd8d..90e3169b37 100644 --- a/src/ImageSharp/Common/Helpers/Vector256Utilities.cs +++ b/src/ImageSharp/Common/Helpers/Vector256Utilities.cs @@ -115,6 +115,28 @@ internal static class Vector256_ return va + (vm0 * vm1); } + /// + /// Performs a multiplication and a negated addition of the . + /// + /// ret = va - (vm0 * vm1) + /// The vector to add to the negated intermediate result. + /// The first vector to multiply. + /// The second vector to multiply. + /// The . + [MethodImpl(InliningOptions.ShortMethod)] + public static Vector256 MultiplyAddNegated( + Vector256 va, + Vector256 vm0, + Vector256 vm1) + { + if (Fma.IsSupported) + { + return Fma.MultiplyAddNegated(vm0, vm1, va); + } + + return va - (vm0 * vm1); + } + /// /// Performs a multiplication and a subtraction of the . /// diff --git a/src/ImageSharp/PixelFormats/PixelBlenders/PorterDuffFunctions.cs b/src/ImageSharp/PixelFormats/PixelBlenders/PorterDuffFunctions.cs index ca358be31c..45c4aade7b 100644 --- a/src/ImageSharp/PixelFormats/PixelBlenders/PorterDuffFunctions.cs +++ b/src/ImageSharp/PixelFormats/PixelBlenders/PorterDuffFunctions.cs @@ -5,6 +5,7 @@ using System.Numerics; using System.Runtime.CompilerServices; using System.Runtime.Intrinsics; using System.Runtime.Intrinsics.X86; +using SixLabors.ImageSharp.Common.Helpers; namespace SixLabors.ImageSharp.PixelFormats.PixelBlenders; @@ -62,7 +63,7 @@ internal static partial class PorterDuffFunctions /// The . [MethodImpl(MethodImplOptions.AggressiveInlining)] public static Vector256 Multiply(Vector256 backdrop, Vector256 source) - => Avx.Multiply(backdrop, source); + => backdrop * source; /// /// Returns the result of the "Add" compositing equation. @@ -82,7 +83,7 @@ internal static partial class PorterDuffFunctions /// The . [MethodImpl(MethodImplOptions.AggressiveInlining)] public static Vector256 Add(Vector256 backdrop, Vector256 source) - => Avx.Min(Vector256.Create(1F), Avx.Add(backdrop, source)); + => Vector256.Min(Vector256.Create(1F), backdrop + source); /// /// Returns the result of the "Subtract" compositing equation. @@ -102,7 +103,7 @@ internal static partial class PorterDuffFunctions /// The . [MethodImpl(MethodImplOptions.AggressiveInlining)] public static Vector256 Subtract(Vector256 backdrop, Vector256 source) - => Avx.Max(Vector256.Zero, Avx.Subtract(backdrop, source)); + => Vector256.Max(Vector256.Zero, backdrop - source); /// /// Returns the result of the "Screen" compositing equation. @@ -124,7 +125,7 @@ internal static partial class PorterDuffFunctions public static Vector256 Screen(Vector256 backdrop, Vector256 source) { Vector256 vOne = Vector256.Create(1F); - return SimdUtils.HwIntrinsics.MultiplyAddNegated(Avx.Subtract(vOne, backdrop), Avx.Subtract(vOne, source), vOne); + return Vector256_.MultiplyAddNegated(vOne, vOne - backdrop, vOne - source); } /// @@ -145,7 +146,7 @@ internal static partial class PorterDuffFunctions /// The . [MethodImpl(MethodImplOptions.AggressiveInlining)] public static Vector256 Darken(Vector256 backdrop, Vector256 source) - => Avx.Min(backdrop, source); + => Vector256.Min(backdrop, source); /// /// Returns the result of the "Lighten" compositing equation. @@ -164,7 +165,7 @@ internal static partial class PorterDuffFunctions /// The . [MethodImpl(MethodImplOptions.AggressiveInlining)] public static Vector256 Lighten(Vector256 backdrop, Vector256 source) - => Avx.Max(backdrop, source); + => Vector256.Max(backdrop, source); /// /// Returns the result of the "Overlay" compositing equation. @@ -192,7 +193,7 @@ internal static partial class PorterDuffFunctions public static Vector256 Overlay(Vector256 backdrop, Vector256 source) { Vector256 color = OverlayValueFunction(backdrop, source); - return Avx.Min(Vector256.Create(1F), Avx.Blend(color, Vector256.Zero, BlendAlphaControl)); + return Vector256.Min(Vector256.Create(1F), Avx.Blend(color, Vector256.Zero, BlendAlphaControl)); } /// @@ -221,7 +222,7 @@ internal static partial class PorterDuffFunctions public static Vector256 HardLight(Vector256 backdrop, Vector256 source) { Vector256 color = OverlayValueFunction(source, backdrop); - return Avx.Min(Vector256.Create(1F), Avx.Blend(color, Vector256.Zero, BlendAlphaControl)); + return Vector256.Min(Vector256.Create(1F), Avx.Blend(color, Vector256.Zero, BlendAlphaControl)); } /// @@ -244,10 +245,10 @@ internal static partial class PorterDuffFunctions public static Vector256 OverlayValueFunction(Vector256 backdrop, Vector256 source) { Vector256 vOne = Vector256.Create(1F); - Vector256 left = Avx.Multiply(Avx.Add(backdrop, backdrop), source); + Vector256 left = (backdrop + backdrop) * source; Vector256 vOneMinusSource = Avx.Subtract(vOne, source); - Vector256 right = SimdUtils.HwIntrinsics.MultiplyAddNegated(Avx.Add(vOneMinusSource, vOneMinusSource), Avx.Subtract(vOne, backdrop), vOne); + Vector256 right = Vector256_.MultiplyAddNegated(vOne, vOneMinusSource + vOneMinusSource, vOne - backdrop); Vector256 cmp = Avx.CompareGreaterThan(backdrop, Vector256.Create(.5F)); return Avx.BlendVariable(left, right, cmp); } @@ -295,17 +296,17 @@ internal static partial class PorterDuffFunctions Vector256 sW = Avx.Permute(source, ShuffleAlphaControl); Vector256 dW = Avx.Permute(destination, ShuffleAlphaControl); - Vector256 blendW = Avx.Multiply(sW, dW); - Vector256 dstW = Avx.Subtract(dW, blendW); - Vector256 srcW = Avx.Subtract(sW, blendW); + Vector256 blendW = sW * dW; + Vector256 dstW = dW - blendW; + Vector256 srcW = sW - blendW; // calculate final alpha - Vector256 alpha = Avx.Add(dstW, sW); + Vector256 alpha = dstW + sW; // calculate final color - Vector256 color = Avx.Multiply(destination, dstW); - color = SimdUtils.HwIntrinsics.MultiplyAdd(color, source, srcW); - color = SimdUtils.HwIntrinsics.MultiplyAdd(color, blend, blendW); + Vector256 color = destination * dstW; + color = Vector256_.MultiplyAdd(color, source, srcW); + color = Vector256_.MultiplyAdd(color, blend, blendW); // unpremultiply return Numerics.UnPremultiply(color, alpha); @@ -354,11 +355,11 @@ internal static partial class PorterDuffFunctions // calculate weights Vector256 sW = Avx.Permute(source, ShuffleAlphaControl); - Vector256 blendW = Avx.Multiply(sW, alpha); - Vector256 dstW = Avx.Subtract(alpha, blendW); + Vector256 blendW = sW * alpha; + Vector256 dstW = alpha - blendW; // calculate final color - Vector256 color = SimdUtils.HwIntrinsics.MultiplyAdd(Avx.Multiply(blend, blendW), destination, dstW); + Vector256 color = Vector256_.MultiplyAdd(Avx.Multiply(blend, blendW), destination, dstW); // unpremultiply return Numerics.UnPremultiply(color, alpha); @@ -392,10 +393,10 @@ internal static partial class PorterDuffFunctions public static Vector256 In(Vector256 destination, Vector256 source) { // calculate alpha - Vector256 alpha = Avx.Permute(Avx.Multiply(source, destination), ShuffleAlphaControl); + Vector256 alpha = Avx.Permute(source * destination, ShuffleAlphaControl); // premultiply - Vector256 color = Avx.Multiply(source, alpha); + Vector256 color = source * alpha; // unpremultiply return Numerics.UnPremultiply(color, alpha); @@ -429,10 +430,10 @@ internal static partial class PorterDuffFunctions public static Vector256 Out(Vector256 destination, Vector256 source) { // calculate alpha - Vector256 alpha = Avx.Permute(Avx.Multiply(source, Avx.Subtract(Vector256.Create(1F), destination)), ShuffleAlphaControl); + Vector256 alpha = Avx.Permute(source * (Vector256.Create(1F) - destination), ShuffleAlphaControl); // premultiply - Vector256 color = Avx.Multiply(source, alpha); + Vector256 color = source * alpha; // unpremultiply return Numerics.UnPremultiply(color, alpha); @@ -475,12 +476,12 @@ internal static partial class PorterDuffFunctions Vector256 dW = Avx.Shuffle(destination, destination, ShuffleAlphaControl); Vector256 vOne = Vector256.Create(1F); - Vector256 srcW = Avx.Subtract(vOne, dW); - Vector256 dstW = Avx.Subtract(vOne, sW); + Vector256 srcW = vOne - dW; + Vector256 dstW = vOne - sW; // calculate alpha - Vector256 alpha = SimdUtils.HwIntrinsics.MultiplyAdd(Avx.Multiply(dW, dstW), sW, srcW); - Vector256 color = SimdUtils.HwIntrinsics.MultiplyAdd(Avx.Multiply(Avx.Multiply(dW, destination), dstW), Avx.Multiply(sW, source), srcW); + Vector256 alpha = Vector256_.MultiplyAdd(Avx.Multiply(dW, dstW), sW, srcW); + Vector256 color = Vector256_.MultiplyAdd(Avx.Multiply(Avx.Multiply(dW, destination), dstW), Avx.Multiply(sW, source), srcW); // unpremultiply return Numerics.UnPremultiply(color, alpha); From 5e4f3ef1ffb738240ccffda755d5d1be3ab80385 Mon Sep 17 00:00:00 2001 From: James Jackson-South Date: Wed, 8 Apr 2026 14:48:45 +1000 Subject: [PATCH 10/12] Use operators in generated functions --- .../PorterDuffFunctions.Generated.cs | 198 +++++++++--------- .../PorterDuffFunctions.Generated.tt | 22 +- .../FeatureTesting/FeatureTestRunner.cs | 2 + 3 files changed, 112 insertions(+), 110 deletions(-) diff --git a/src/ImageSharp/PixelFormats/PixelBlenders/PorterDuffFunctions.Generated.cs b/src/ImageSharp/PixelFormats/PixelBlenders/PorterDuffFunctions.Generated.cs index 255bafc798..f0635230ca 100644 --- a/src/ImageSharp/PixelFormats/PixelBlenders/PorterDuffFunctions.Generated.cs +++ b/src/ImageSharp/PixelFormats/PixelBlenders/PorterDuffFunctions.Generated.cs @@ -37,7 +37,7 @@ internal static partial class PorterDuffFunctions /// The . [MethodImpl(MethodImplOptions.AggressiveInlining)] public static Vector256 NormalSrc(Vector256 backdrop, Vector256 source, Vector256 opacity) - => Avx.Blend(source, Avx.Multiply(source, opacity), BlendAlphaControl); + => Avx.Blend(source, source * opacity, BlendAlphaControl); /// /// Returns the result of the "NormalSrcAtop" compositing equation. @@ -64,7 +64,7 @@ internal static partial class PorterDuffFunctions [MethodImpl(MethodImplOptions.AggressiveInlining)] public static Vector256 NormalSrcAtop(Vector256 backdrop, Vector256 source, Vector256 opacity) { - source = Avx.Blend(source, Avx.Multiply(source, opacity), BlendAlphaControl); + source = Avx.Blend(source, source * opacity, BlendAlphaControl); return Atop(backdrop, source, Normal(backdrop, source)); } @@ -94,7 +94,7 @@ internal static partial class PorterDuffFunctions [MethodImpl(MethodImplOptions.AggressiveInlining)] public static Vector256 NormalSrcOver(Vector256 backdrop, Vector256 source, Vector256 opacity) { - source = Avx.Blend(source, Avx.Multiply(source, opacity), BlendAlphaControl); + source = Avx.Blend(source, source * opacity, BlendAlphaControl); return Over(backdrop, source, Normal(backdrop, source)); } @@ -123,7 +123,7 @@ internal static partial class PorterDuffFunctions /// The . [MethodImpl(MethodImplOptions.AggressiveInlining)] public static Vector256 NormalSrcIn(Vector256 backdrop, Vector256 source, Vector256 opacity) - => In(backdrop, Avx.Blend(source, Avx.Multiply(source, opacity), BlendAlphaControl)); + => In(backdrop, Avx.Blend(source, source * opacity, BlendAlphaControl)); /// /// Returns the result of the "NormalSrcOut" compositing equation. @@ -149,7 +149,7 @@ internal static partial class PorterDuffFunctions /// The . [MethodImpl(MethodImplOptions.AggressiveInlining)] public static Vector256 NormalSrcOut(Vector256 backdrop, Vector256 source, Vector256 opacity) - => Out(backdrop, Avx.Blend(source, Avx.Multiply(source, opacity), BlendAlphaControl)); + => Out(backdrop, Avx.Blend(source, source * opacity, BlendAlphaControl)); /// /// Returns the result of the "NormalDest" compositing equation. @@ -202,7 +202,7 @@ internal static partial class PorterDuffFunctions [MethodImpl(MethodImplOptions.AggressiveInlining)] public static Vector256 NormalDestAtop(Vector256 backdrop, Vector256 source, Vector256 opacity) { - source = Avx.Blend(source, Avx.Multiply(source, opacity), BlendAlphaControl); + source = Avx.Blend(source, source * opacity, BlendAlphaControl); return Atop(source, backdrop, Normal(source, backdrop)); } @@ -232,7 +232,7 @@ internal static partial class PorterDuffFunctions [MethodImpl(MethodImplOptions.AggressiveInlining)] public static Vector256 NormalDestOver(Vector256 backdrop, Vector256 source, Vector256 opacity) { - source = Avx.Blend(source, Avx.Multiply(source, opacity), BlendAlphaControl); + source = Avx.Blend(source, source * opacity, BlendAlphaControl); return Over(source, backdrop, Normal(source, backdrop)); } @@ -261,7 +261,7 @@ internal static partial class PorterDuffFunctions /// The . [MethodImpl(MethodImplOptions.AggressiveInlining)] public static Vector256 NormalDestIn(Vector256 backdrop, Vector256 source, Vector256 opacity) - => In(Avx.Blend(source, Avx.Multiply(source, opacity), BlendAlphaControl), backdrop); + => In(Avx.Blend(source, source * opacity, BlendAlphaControl), backdrop); /// /// Returns the result of the "NormalDestOut" compositing equation. @@ -287,7 +287,7 @@ internal static partial class PorterDuffFunctions /// The . [MethodImpl(MethodImplOptions.AggressiveInlining)] public static Vector256 NormalDestOut(Vector256 backdrop, Vector256 source, Vector256 opacity) - => Out(Avx.Blend(source, Avx.Multiply(source, opacity), BlendAlphaControl), backdrop); + => Out(Avx.Blend(source, source * opacity, BlendAlphaControl), backdrop); /// /// Returns the result of the "NormalXor" compositing equation. @@ -313,7 +313,7 @@ internal static partial class PorterDuffFunctions /// The . [MethodImpl(MethodImplOptions.AggressiveInlining)] public static Vector256 NormalXor(Vector256 backdrop, Vector256 source, Vector256 opacity) - => Xor(backdrop, Avx.Blend(source, Avx.Multiply(source, opacity), BlendAlphaControl)); + => Xor(backdrop, Avx.Blend(source, source * opacity, BlendAlphaControl)); /// /// Returns the result of the "NormalClear" compositing equation. @@ -339,7 +339,7 @@ internal static partial class PorterDuffFunctions /// The . [MethodImpl(MethodImplOptions.AggressiveInlining)] public static Vector256 NormalClear(Vector256 backdrop, Vector256 source, Vector256 opacity) - => Clear(backdrop, Avx.Blend(source, Avx.Multiply(source, opacity), BlendAlphaControl)); + => Clear(backdrop, Avx.Blend(source, source * opacity, BlendAlphaControl)); /// @@ -558,7 +558,7 @@ internal static partial class PorterDuffFunctions /// The . [MethodImpl(MethodImplOptions.AggressiveInlining)] public static Vector256 MultiplySrc(Vector256 backdrop, Vector256 source, Vector256 opacity) - => Avx.Blend(source, Avx.Multiply(source, opacity), BlendAlphaControl); + => Avx.Blend(source, source * opacity, BlendAlphaControl); /// /// Returns the result of the "MultiplySrcAtop" compositing equation. @@ -585,7 +585,7 @@ internal static partial class PorterDuffFunctions [MethodImpl(MethodImplOptions.AggressiveInlining)] public static Vector256 MultiplySrcAtop(Vector256 backdrop, Vector256 source, Vector256 opacity) { - source = Avx.Blend(source, Avx.Multiply(source, opacity), BlendAlphaControl); + source = Avx.Blend(source, source * opacity, BlendAlphaControl); return Atop(backdrop, source, Multiply(backdrop, source)); } @@ -615,7 +615,7 @@ internal static partial class PorterDuffFunctions [MethodImpl(MethodImplOptions.AggressiveInlining)] public static Vector256 MultiplySrcOver(Vector256 backdrop, Vector256 source, Vector256 opacity) { - source = Avx.Blend(source, Avx.Multiply(source, opacity), BlendAlphaControl); + source = Avx.Blend(source, source * opacity, BlendAlphaControl); return Over(backdrop, source, Multiply(backdrop, source)); } @@ -644,7 +644,7 @@ internal static partial class PorterDuffFunctions /// The . [MethodImpl(MethodImplOptions.AggressiveInlining)] public static Vector256 MultiplySrcIn(Vector256 backdrop, Vector256 source, Vector256 opacity) - => In(backdrop, Avx.Blend(source, Avx.Multiply(source, opacity), BlendAlphaControl)); + => In(backdrop, Avx.Blend(source, source * opacity, BlendAlphaControl)); /// /// Returns the result of the "MultiplySrcOut" compositing equation. @@ -670,7 +670,7 @@ internal static partial class PorterDuffFunctions /// The . [MethodImpl(MethodImplOptions.AggressiveInlining)] public static Vector256 MultiplySrcOut(Vector256 backdrop, Vector256 source, Vector256 opacity) - => Out(backdrop, Avx.Blend(source, Avx.Multiply(source, opacity), BlendAlphaControl)); + => Out(backdrop, Avx.Blend(source, source * opacity, BlendAlphaControl)); /// /// Returns the result of the "MultiplyDest" compositing equation. @@ -723,7 +723,7 @@ internal static partial class PorterDuffFunctions [MethodImpl(MethodImplOptions.AggressiveInlining)] public static Vector256 MultiplyDestAtop(Vector256 backdrop, Vector256 source, Vector256 opacity) { - source = Avx.Blend(source, Avx.Multiply(source, opacity), BlendAlphaControl); + source = Avx.Blend(source, source * opacity, BlendAlphaControl); return Atop(source, backdrop, Multiply(source, backdrop)); } @@ -753,7 +753,7 @@ internal static partial class PorterDuffFunctions [MethodImpl(MethodImplOptions.AggressiveInlining)] public static Vector256 MultiplyDestOver(Vector256 backdrop, Vector256 source, Vector256 opacity) { - source = Avx.Blend(source, Avx.Multiply(source, opacity), BlendAlphaControl); + source = Avx.Blend(source, source * opacity, BlendAlphaControl); return Over(source, backdrop, Multiply(source, backdrop)); } @@ -782,7 +782,7 @@ internal static partial class PorterDuffFunctions /// The . [MethodImpl(MethodImplOptions.AggressiveInlining)] public static Vector256 MultiplyDestIn(Vector256 backdrop, Vector256 source, Vector256 opacity) - => In(Avx.Blend(source, Avx.Multiply(source, opacity), BlendAlphaControl), backdrop); + => In(Avx.Blend(source, source * opacity, BlendAlphaControl), backdrop); /// /// Returns the result of the "MultiplyDestOut" compositing equation. @@ -808,7 +808,7 @@ internal static partial class PorterDuffFunctions /// The . [MethodImpl(MethodImplOptions.AggressiveInlining)] public static Vector256 MultiplyDestOut(Vector256 backdrop, Vector256 source, Vector256 opacity) - => Out(Avx.Blend(source, Avx.Multiply(source, opacity), BlendAlphaControl), backdrop); + => Out(Avx.Blend(source, source * opacity, BlendAlphaControl), backdrop); /// /// Returns the result of the "MultiplyXor" compositing equation. @@ -834,7 +834,7 @@ internal static partial class PorterDuffFunctions /// The . [MethodImpl(MethodImplOptions.AggressiveInlining)] public static Vector256 MultiplyXor(Vector256 backdrop, Vector256 source, Vector256 opacity) - => Xor(backdrop, Avx.Blend(source, Avx.Multiply(source, opacity), BlendAlphaControl)); + => Xor(backdrop, Avx.Blend(source, source * opacity, BlendAlphaControl)); /// /// Returns the result of the "MultiplyClear" compositing equation. @@ -860,7 +860,7 @@ internal static partial class PorterDuffFunctions /// The . [MethodImpl(MethodImplOptions.AggressiveInlining)] public static Vector256 MultiplyClear(Vector256 backdrop, Vector256 source, Vector256 opacity) - => Clear(backdrop, Avx.Blend(source, Avx.Multiply(source, opacity), BlendAlphaControl)); + => Clear(backdrop, Avx.Blend(source, source * opacity, BlendAlphaControl)); /// @@ -1079,7 +1079,7 @@ internal static partial class PorterDuffFunctions /// The . [MethodImpl(MethodImplOptions.AggressiveInlining)] public static Vector256 AddSrc(Vector256 backdrop, Vector256 source, Vector256 opacity) - => Avx.Blend(source, Avx.Multiply(source, opacity), BlendAlphaControl); + => Avx.Blend(source, source * opacity, BlendAlphaControl); /// /// Returns the result of the "AddSrcAtop" compositing equation. @@ -1106,7 +1106,7 @@ internal static partial class PorterDuffFunctions [MethodImpl(MethodImplOptions.AggressiveInlining)] public static Vector256 AddSrcAtop(Vector256 backdrop, Vector256 source, Vector256 opacity) { - source = Avx.Blend(source, Avx.Multiply(source, opacity), BlendAlphaControl); + source = Avx.Blend(source, source * opacity, BlendAlphaControl); return Atop(backdrop, source, Add(backdrop, source)); } @@ -1136,7 +1136,7 @@ internal static partial class PorterDuffFunctions [MethodImpl(MethodImplOptions.AggressiveInlining)] public static Vector256 AddSrcOver(Vector256 backdrop, Vector256 source, Vector256 opacity) { - source = Avx.Blend(source, Avx.Multiply(source, opacity), BlendAlphaControl); + source = Avx.Blend(source, source * opacity, BlendAlphaControl); return Over(backdrop, source, Add(backdrop, source)); } @@ -1165,7 +1165,7 @@ internal static partial class PorterDuffFunctions /// The . [MethodImpl(MethodImplOptions.AggressiveInlining)] public static Vector256 AddSrcIn(Vector256 backdrop, Vector256 source, Vector256 opacity) - => In(backdrop, Avx.Blend(source, Avx.Multiply(source, opacity), BlendAlphaControl)); + => In(backdrop, Avx.Blend(source, source * opacity, BlendAlphaControl)); /// /// Returns the result of the "AddSrcOut" compositing equation. @@ -1191,7 +1191,7 @@ internal static partial class PorterDuffFunctions /// The . [MethodImpl(MethodImplOptions.AggressiveInlining)] public static Vector256 AddSrcOut(Vector256 backdrop, Vector256 source, Vector256 opacity) - => Out(backdrop, Avx.Blend(source, Avx.Multiply(source, opacity), BlendAlphaControl)); + => Out(backdrop, Avx.Blend(source, source * opacity, BlendAlphaControl)); /// /// Returns the result of the "AddDest" compositing equation. @@ -1244,7 +1244,7 @@ internal static partial class PorterDuffFunctions [MethodImpl(MethodImplOptions.AggressiveInlining)] public static Vector256 AddDestAtop(Vector256 backdrop, Vector256 source, Vector256 opacity) { - source = Avx.Blend(source, Avx.Multiply(source, opacity), BlendAlphaControl); + source = Avx.Blend(source, source * opacity, BlendAlphaControl); return Atop(source, backdrop, Add(source, backdrop)); } @@ -1274,7 +1274,7 @@ internal static partial class PorterDuffFunctions [MethodImpl(MethodImplOptions.AggressiveInlining)] public static Vector256 AddDestOver(Vector256 backdrop, Vector256 source, Vector256 opacity) { - source = Avx.Blend(source, Avx.Multiply(source, opacity), BlendAlphaControl); + source = Avx.Blend(source, source * opacity, BlendAlphaControl); return Over(source, backdrop, Add(source, backdrop)); } @@ -1303,7 +1303,7 @@ internal static partial class PorterDuffFunctions /// The . [MethodImpl(MethodImplOptions.AggressiveInlining)] public static Vector256 AddDestIn(Vector256 backdrop, Vector256 source, Vector256 opacity) - => In(Avx.Blend(source, Avx.Multiply(source, opacity), BlendAlphaControl), backdrop); + => In(Avx.Blend(source, source * opacity, BlendAlphaControl), backdrop); /// /// Returns the result of the "AddDestOut" compositing equation. @@ -1329,7 +1329,7 @@ internal static partial class PorterDuffFunctions /// The . [MethodImpl(MethodImplOptions.AggressiveInlining)] public static Vector256 AddDestOut(Vector256 backdrop, Vector256 source, Vector256 opacity) - => Out(Avx.Blend(source, Avx.Multiply(source, opacity), BlendAlphaControl), backdrop); + => Out(Avx.Blend(source, source * opacity, BlendAlphaControl), backdrop); /// /// Returns the result of the "AddXor" compositing equation. @@ -1355,7 +1355,7 @@ internal static partial class PorterDuffFunctions /// The . [MethodImpl(MethodImplOptions.AggressiveInlining)] public static Vector256 AddXor(Vector256 backdrop, Vector256 source, Vector256 opacity) - => Xor(backdrop, Avx.Blend(source, Avx.Multiply(source, opacity), BlendAlphaControl)); + => Xor(backdrop, Avx.Blend(source, source * opacity, BlendAlphaControl)); /// /// Returns the result of the "AddClear" compositing equation. @@ -1381,7 +1381,7 @@ internal static partial class PorterDuffFunctions /// The . [MethodImpl(MethodImplOptions.AggressiveInlining)] public static Vector256 AddClear(Vector256 backdrop, Vector256 source, Vector256 opacity) - => Clear(backdrop, Avx.Blend(source, Avx.Multiply(source, opacity), BlendAlphaControl)); + => Clear(backdrop, Avx.Blend(source, source * opacity, BlendAlphaControl)); /// @@ -1600,7 +1600,7 @@ internal static partial class PorterDuffFunctions /// The . [MethodImpl(MethodImplOptions.AggressiveInlining)] public static Vector256 SubtractSrc(Vector256 backdrop, Vector256 source, Vector256 opacity) - => Avx.Blend(source, Avx.Multiply(source, opacity), BlendAlphaControl); + => Avx.Blend(source, source * opacity, BlendAlphaControl); /// /// Returns the result of the "SubtractSrcAtop" compositing equation. @@ -1627,7 +1627,7 @@ internal static partial class PorterDuffFunctions [MethodImpl(MethodImplOptions.AggressiveInlining)] public static Vector256 SubtractSrcAtop(Vector256 backdrop, Vector256 source, Vector256 opacity) { - source = Avx.Blend(source, Avx.Multiply(source, opacity), BlendAlphaControl); + source = Avx.Blend(source, source * opacity, BlendAlphaControl); return Atop(backdrop, source, Subtract(backdrop, source)); } @@ -1657,7 +1657,7 @@ internal static partial class PorterDuffFunctions [MethodImpl(MethodImplOptions.AggressiveInlining)] public static Vector256 SubtractSrcOver(Vector256 backdrop, Vector256 source, Vector256 opacity) { - source = Avx.Blend(source, Avx.Multiply(source, opacity), BlendAlphaControl); + source = Avx.Blend(source, source * opacity, BlendAlphaControl); return Over(backdrop, source, Subtract(backdrop, source)); } @@ -1686,7 +1686,7 @@ internal static partial class PorterDuffFunctions /// The . [MethodImpl(MethodImplOptions.AggressiveInlining)] public static Vector256 SubtractSrcIn(Vector256 backdrop, Vector256 source, Vector256 opacity) - => In(backdrop, Avx.Blend(source, Avx.Multiply(source, opacity), BlendAlphaControl)); + => In(backdrop, Avx.Blend(source, source * opacity, BlendAlphaControl)); /// /// Returns the result of the "SubtractSrcOut" compositing equation. @@ -1712,7 +1712,7 @@ internal static partial class PorterDuffFunctions /// The . [MethodImpl(MethodImplOptions.AggressiveInlining)] public static Vector256 SubtractSrcOut(Vector256 backdrop, Vector256 source, Vector256 opacity) - => Out(backdrop, Avx.Blend(source, Avx.Multiply(source, opacity), BlendAlphaControl)); + => Out(backdrop, Avx.Blend(source, source * opacity, BlendAlphaControl)); /// /// Returns the result of the "SubtractDest" compositing equation. @@ -1765,7 +1765,7 @@ internal static partial class PorterDuffFunctions [MethodImpl(MethodImplOptions.AggressiveInlining)] public static Vector256 SubtractDestAtop(Vector256 backdrop, Vector256 source, Vector256 opacity) { - source = Avx.Blend(source, Avx.Multiply(source, opacity), BlendAlphaControl); + source = Avx.Blend(source, source * opacity, BlendAlphaControl); return Atop(source, backdrop, Subtract(source, backdrop)); } @@ -1795,7 +1795,7 @@ internal static partial class PorterDuffFunctions [MethodImpl(MethodImplOptions.AggressiveInlining)] public static Vector256 SubtractDestOver(Vector256 backdrop, Vector256 source, Vector256 opacity) { - source = Avx.Blend(source, Avx.Multiply(source, opacity), BlendAlphaControl); + source = Avx.Blend(source, source * opacity, BlendAlphaControl); return Over(source, backdrop, Subtract(source, backdrop)); } @@ -1824,7 +1824,7 @@ internal static partial class PorterDuffFunctions /// The . [MethodImpl(MethodImplOptions.AggressiveInlining)] public static Vector256 SubtractDestIn(Vector256 backdrop, Vector256 source, Vector256 opacity) - => In(Avx.Blend(source, Avx.Multiply(source, opacity), BlendAlphaControl), backdrop); + => In(Avx.Blend(source, source * opacity, BlendAlphaControl), backdrop); /// /// Returns the result of the "SubtractDestOut" compositing equation. @@ -1850,7 +1850,7 @@ internal static partial class PorterDuffFunctions /// The . [MethodImpl(MethodImplOptions.AggressiveInlining)] public static Vector256 SubtractDestOut(Vector256 backdrop, Vector256 source, Vector256 opacity) - => Out(Avx.Blend(source, Avx.Multiply(source, opacity), BlendAlphaControl), backdrop); + => Out(Avx.Blend(source, source * opacity, BlendAlphaControl), backdrop); /// /// Returns the result of the "SubtractXor" compositing equation. @@ -1876,7 +1876,7 @@ internal static partial class PorterDuffFunctions /// The . [MethodImpl(MethodImplOptions.AggressiveInlining)] public static Vector256 SubtractXor(Vector256 backdrop, Vector256 source, Vector256 opacity) - => Xor(backdrop, Avx.Blend(source, Avx.Multiply(source, opacity), BlendAlphaControl)); + => Xor(backdrop, Avx.Blend(source, source * opacity, BlendAlphaControl)); /// /// Returns the result of the "SubtractClear" compositing equation. @@ -1902,7 +1902,7 @@ internal static partial class PorterDuffFunctions /// The . [MethodImpl(MethodImplOptions.AggressiveInlining)] public static Vector256 SubtractClear(Vector256 backdrop, Vector256 source, Vector256 opacity) - => Clear(backdrop, Avx.Blend(source, Avx.Multiply(source, opacity), BlendAlphaControl)); + => Clear(backdrop, Avx.Blend(source, source * opacity, BlendAlphaControl)); /// @@ -2121,7 +2121,7 @@ internal static partial class PorterDuffFunctions /// The . [MethodImpl(MethodImplOptions.AggressiveInlining)] public static Vector256 ScreenSrc(Vector256 backdrop, Vector256 source, Vector256 opacity) - => Avx.Blend(source, Avx.Multiply(source, opacity), BlendAlphaControl); + => Avx.Blend(source, source * opacity, BlendAlphaControl); /// /// Returns the result of the "ScreenSrcAtop" compositing equation. @@ -2148,7 +2148,7 @@ internal static partial class PorterDuffFunctions [MethodImpl(MethodImplOptions.AggressiveInlining)] public static Vector256 ScreenSrcAtop(Vector256 backdrop, Vector256 source, Vector256 opacity) { - source = Avx.Blend(source, Avx.Multiply(source, opacity), BlendAlphaControl); + source = Avx.Blend(source, source * opacity, BlendAlphaControl); return Atop(backdrop, source, Screen(backdrop, source)); } @@ -2178,7 +2178,7 @@ internal static partial class PorterDuffFunctions [MethodImpl(MethodImplOptions.AggressiveInlining)] public static Vector256 ScreenSrcOver(Vector256 backdrop, Vector256 source, Vector256 opacity) { - source = Avx.Blend(source, Avx.Multiply(source, opacity), BlendAlphaControl); + source = Avx.Blend(source, source * opacity, BlendAlphaControl); return Over(backdrop, source, Screen(backdrop, source)); } @@ -2207,7 +2207,7 @@ internal static partial class PorterDuffFunctions /// The . [MethodImpl(MethodImplOptions.AggressiveInlining)] public static Vector256 ScreenSrcIn(Vector256 backdrop, Vector256 source, Vector256 opacity) - => In(backdrop, Avx.Blend(source, Avx.Multiply(source, opacity), BlendAlphaControl)); + => In(backdrop, Avx.Blend(source, source * opacity, BlendAlphaControl)); /// /// Returns the result of the "ScreenSrcOut" compositing equation. @@ -2233,7 +2233,7 @@ internal static partial class PorterDuffFunctions /// The . [MethodImpl(MethodImplOptions.AggressiveInlining)] public static Vector256 ScreenSrcOut(Vector256 backdrop, Vector256 source, Vector256 opacity) - => Out(backdrop, Avx.Blend(source, Avx.Multiply(source, opacity), BlendAlphaControl)); + => Out(backdrop, Avx.Blend(source, source * opacity, BlendAlphaControl)); /// /// Returns the result of the "ScreenDest" compositing equation. @@ -2286,7 +2286,7 @@ internal static partial class PorterDuffFunctions [MethodImpl(MethodImplOptions.AggressiveInlining)] public static Vector256 ScreenDestAtop(Vector256 backdrop, Vector256 source, Vector256 opacity) { - source = Avx.Blend(source, Avx.Multiply(source, opacity), BlendAlphaControl); + source = Avx.Blend(source, source * opacity, BlendAlphaControl); return Atop(source, backdrop, Screen(source, backdrop)); } @@ -2316,7 +2316,7 @@ internal static partial class PorterDuffFunctions [MethodImpl(MethodImplOptions.AggressiveInlining)] public static Vector256 ScreenDestOver(Vector256 backdrop, Vector256 source, Vector256 opacity) { - source = Avx.Blend(source, Avx.Multiply(source, opacity), BlendAlphaControl); + source = Avx.Blend(source, source * opacity, BlendAlphaControl); return Over(source, backdrop, Screen(source, backdrop)); } @@ -2345,7 +2345,7 @@ internal static partial class PorterDuffFunctions /// The . [MethodImpl(MethodImplOptions.AggressiveInlining)] public static Vector256 ScreenDestIn(Vector256 backdrop, Vector256 source, Vector256 opacity) - => In(Avx.Blend(source, Avx.Multiply(source, opacity), BlendAlphaControl), backdrop); + => In(Avx.Blend(source, source * opacity, BlendAlphaControl), backdrop); /// /// Returns the result of the "ScreenDestOut" compositing equation. @@ -2371,7 +2371,7 @@ internal static partial class PorterDuffFunctions /// The . [MethodImpl(MethodImplOptions.AggressiveInlining)] public static Vector256 ScreenDestOut(Vector256 backdrop, Vector256 source, Vector256 opacity) - => Out(Avx.Blend(source, Avx.Multiply(source, opacity), BlendAlphaControl), backdrop); + => Out(Avx.Blend(source, source * opacity, BlendAlphaControl), backdrop); /// /// Returns the result of the "ScreenXor" compositing equation. @@ -2397,7 +2397,7 @@ internal static partial class PorterDuffFunctions /// The . [MethodImpl(MethodImplOptions.AggressiveInlining)] public static Vector256 ScreenXor(Vector256 backdrop, Vector256 source, Vector256 opacity) - => Xor(backdrop, Avx.Blend(source, Avx.Multiply(source, opacity), BlendAlphaControl)); + => Xor(backdrop, Avx.Blend(source, source * opacity, BlendAlphaControl)); /// /// Returns the result of the "ScreenClear" compositing equation. @@ -2423,7 +2423,7 @@ internal static partial class PorterDuffFunctions /// The . [MethodImpl(MethodImplOptions.AggressiveInlining)] public static Vector256 ScreenClear(Vector256 backdrop, Vector256 source, Vector256 opacity) - => Clear(backdrop, Avx.Blend(source, Avx.Multiply(source, opacity), BlendAlphaControl)); + => Clear(backdrop, Avx.Blend(source, source * opacity, BlendAlphaControl)); /// @@ -2642,7 +2642,7 @@ internal static partial class PorterDuffFunctions /// The . [MethodImpl(MethodImplOptions.AggressiveInlining)] public static Vector256 DarkenSrc(Vector256 backdrop, Vector256 source, Vector256 opacity) - => Avx.Blend(source, Avx.Multiply(source, opacity), BlendAlphaControl); + => Avx.Blend(source, source * opacity, BlendAlphaControl); /// /// Returns the result of the "DarkenSrcAtop" compositing equation. @@ -2669,7 +2669,7 @@ internal static partial class PorterDuffFunctions [MethodImpl(MethodImplOptions.AggressiveInlining)] public static Vector256 DarkenSrcAtop(Vector256 backdrop, Vector256 source, Vector256 opacity) { - source = Avx.Blend(source, Avx.Multiply(source, opacity), BlendAlphaControl); + source = Avx.Blend(source, source * opacity, BlendAlphaControl); return Atop(backdrop, source, Darken(backdrop, source)); } @@ -2699,7 +2699,7 @@ internal static partial class PorterDuffFunctions [MethodImpl(MethodImplOptions.AggressiveInlining)] public static Vector256 DarkenSrcOver(Vector256 backdrop, Vector256 source, Vector256 opacity) { - source = Avx.Blend(source, Avx.Multiply(source, opacity), BlendAlphaControl); + source = Avx.Blend(source, source * opacity, BlendAlphaControl); return Over(backdrop, source, Darken(backdrop, source)); } @@ -2728,7 +2728,7 @@ internal static partial class PorterDuffFunctions /// The . [MethodImpl(MethodImplOptions.AggressiveInlining)] public static Vector256 DarkenSrcIn(Vector256 backdrop, Vector256 source, Vector256 opacity) - => In(backdrop, Avx.Blend(source, Avx.Multiply(source, opacity), BlendAlphaControl)); + => In(backdrop, Avx.Blend(source, source * opacity, BlendAlphaControl)); /// /// Returns the result of the "DarkenSrcOut" compositing equation. @@ -2754,7 +2754,7 @@ internal static partial class PorterDuffFunctions /// The . [MethodImpl(MethodImplOptions.AggressiveInlining)] public static Vector256 DarkenSrcOut(Vector256 backdrop, Vector256 source, Vector256 opacity) - => Out(backdrop, Avx.Blend(source, Avx.Multiply(source, opacity), BlendAlphaControl)); + => Out(backdrop, Avx.Blend(source, source * opacity, BlendAlphaControl)); /// /// Returns the result of the "DarkenDest" compositing equation. @@ -2807,7 +2807,7 @@ internal static partial class PorterDuffFunctions [MethodImpl(MethodImplOptions.AggressiveInlining)] public static Vector256 DarkenDestAtop(Vector256 backdrop, Vector256 source, Vector256 opacity) { - source = Avx.Blend(source, Avx.Multiply(source, opacity), BlendAlphaControl); + source = Avx.Blend(source, source * opacity, BlendAlphaControl); return Atop(source, backdrop, Darken(source, backdrop)); } @@ -2837,7 +2837,7 @@ internal static partial class PorterDuffFunctions [MethodImpl(MethodImplOptions.AggressiveInlining)] public static Vector256 DarkenDestOver(Vector256 backdrop, Vector256 source, Vector256 opacity) { - source = Avx.Blend(source, Avx.Multiply(source, opacity), BlendAlphaControl); + source = Avx.Blend(source, source * opacity, BlendAlphaControl); return Over(source, backdrop, Darken(source, backdrop)); } @@ -2866,7 +2866,7 @@ internal static partial class PorterDuffFunctions /// The . [MethodImpl(MethodImplOptions.AggressiveInlining)] public static Vector256 DarkenDestIn(Vector256 backdrop, Vector256 source, Vector256 opacity) - => In(Avx.Blend(source, Avx.Multiply(source, opacity), BlendAlphaControl), backdrop); + => In(Avx.Blend(source, source * opacity, BlendAlphaControl), backdrop); /// /// Returns the result of the "DarkenDestOut" compositing equation. @@ -2892,7 +2892,7 @@ internal static partial class PorterDuffFunctions /// The . [MethodImpl(MethodImplOptions.AggressiveInlining)] public static Vector256 DarkenDestOut(Vector256 backdrop, Vector256 source, Vector256 opacity) - => Out(Avx.Blend(source, Avx.Multiply(source, opacity), BlendAlphaControl), backdrop); + => Out(Avx.Blend(source, source * opacity, BlendAlphaControl), backdrop); /// /// Returns the result of the "DarkenXor" compositing equation. @@ -2918,7 +2918,7 @@ internal static partial class PorterDuffFunctions /// The . [MethodImpl(MethodImplOptions.AggressiveInlining)] public static Vector256 DarkenXor(Vector256 backdrop, Vector256 source, Vector256 opacity) - => Xor(backdrop, Avx.Blend(source, Avx.Multiply(source, opacity), BlendAlphaControl)); + => Xor(backdrop, Avx.Blend(source, source * opacity, BlendAlphaControl)); /// /// Returns the result of the "DarkenClear" compositing equation. @@ -2944,7 +2944,7 @@ internal static partial class PorterDuffFunctions /// The . [MethodImpl(MethodImplOptions.AggressiveInlining)] public static Vector256 DarkenClear(Vector256 backdrop, Vector256 source, Vector256 opacity) - => Clear(backdrop, Avx.Blend(source, Avx.Multiply(source, opacity), BlendAlphaControl)); + => Clear(backdrop, Avx.Blend(source, source * opacity, BlendAlphaControl)); /// @@ -3163,7 +3163,7 @@ internal static partial class PorterDuffFunctions /// The . [MethodImpl(MethodImplOptions.AggressiveInlining)] public static Vector256 LightenSrc(Vector256 backdrop, Vector256 source, Vector256 opacity) - => Avx.Blend(source, Avx.Multiply(source, opacity), BlendAlphaControl); + => Avx.Blend(source, source * opacity, BlendAlphaControl); /// /// Returns the result of the "LightenSrcAtop" compositing equation. @@ -3190,7 +3190,7 @@ internal static partial class PorterDuffFunctions [MethodImpl(MethodImplOptions.AggressiveInlining)] public static Vector256 LightenSrcAtop(Vector256 backdrop, Vector256 source, Vector256 opacity) { - source = Avx.Blend(source, Avx.Multiply(source, opacity), BlendAlphaControl); + source = Avx.Blend(source, source * opacity, BlendAlphaControl); return Atop(backdrop, source, Lighten(backdrop, source)); } @@ -3220,7 +3220,7 @@ internal static partial class PorterDuffFunctions [MethodImpl(MethodImplOptions.AggressiveInlining)] public static Vector256 LightenSrcOver(Vector256 backdrop, Vector256 source, Vector256 opacity) { - source = Avx.Blend(source, Avx.Multiply(source, opacity), BlendAlphaControl); + source = Avx.Blend(source, source * opacity, BlendAlphaControl); return Over(backdrop, source, Lighten(backdrop, source)); } @@ -3249,7 +3249,7 @@ internal static partial class PorterDuffFunctions /// The . [MethodImpl(MethodImplOptions.AggressiveInlining)] public static Vector256 LightenSrcIn(Vector256 backdrop, Vector256 source, Vector256 opacity) - => In(backdrop, Avx.Blend(source, Avx.Multiply(source, opacity), BlendAlphaControl)); + => In(backdrop, Avx.Blend(source, source * opacity, BlendAlphaControl)); /// /// Returns the result of the "LightenSrcOut" compositing equation. @@ -3275,7 +3275,7 @@ internal static partial class PorterDuffFunctions /// The . [MethodImpl(MethodImplOptions.AggressiveInlining)] public static Vector256 LightenSrcOut(Vector256 backdrop, Vector256 source, Vector256 opacity) - => Out(backdrop, Avx.Blend(source, Avx.Multiply(source, opacity), BlendAlphaControl)); + => Out(backdrop, Avx.Blend(source, source * opacity, BlendAlphaControl)); /// /// Returns the result of the "LightenDest" compositing equation. @@ -3328,7 +3328,7 @@ internal static partial class PorterDuffFunctions [MethodImpl(MethodImplOptions.AggressiveInlining)] public static Vector256 LightenDestAtop(Vector256 backdrop, Vector256 source, Vector256 opacity) { - source = Avx.Blend(source, Avx.Multiply(source, opacity), BlendAlphaControl); + source = Avx.Blend(source, source * opacity, BlendAlphaControl); return Atop(source, backdrop, Lighten(source, backdrop)); } @@ -3358,7 +3358,7 @@ internal static partial class PorterDuffFunctions [MethodImpl(MethodImplOptions.AggressiveInlining)] public static Vector256 LightenDestOver(Vector256 backdrop, Vector256 source, Vector256 opacity) { - source = Avx.Blend(source, Avx.Multiply(source, opacity), BlendAlphaControl); + source = Avx.Blend(source, source * opacity, BlendAlphaControl); return Over(source, backdrop, Lighten(source, backdrop)); } @@ -3387,7 +3387,7 @@ internal static partial class PorterDuffFunctions /// The . [MethodImpl(MethodImplOptions.AggressiveInlining)] public static Vector256 LightenDestIn(Vector256 backdrop, Vector256 source, Vector256 opacity) - => In(Avx.Blend(source, Avx.Multiply(source, opacity), BlendAlphaControl), backdrop); + => In(Avx.Blend(source, source * opacity, BlendAlphaControl), backdrop); /// /// Returns the result of the "LightenDestOut" compositing equation. @@ -3413,7 +3413,7 @@ internal static partial class PorterDuffFunctions /// The . [MethodImpl(MethodImplOptions.AggressiveInlining)] public static Vector256 LightenDestOut(Vector256 backdrop, Vector256 source, Vector256 opacity) - => Out(Avx.Blend(source, Avx.Multiply(source, opacity), BlendAlphaControl), backdrop); + => Out(Avx.Blend(source, source * opacity, BlendAlphaControl), backdrop); /// /// Returns the result of the "LightenXor" compositing equation. @@ -3439,7 +3439,7 @@ internal static partial class PorterDuffFunctions /// The . [MethodImpl(MethodImplOptions.AggressiveInlining)] public static Vector256 LightenXor(Vector256 backdrop, Vector256 source, Vector256 opacity) - => Xor(backdrop, Avx.Blend(source, Avx.Multiply(source, opacity), BlendAlphaControl)); + => Xor(backdrop, Avx.Blend(source, source * opacity, BlendAlphaControl)); /// /// Returns the result of the "LightenClear" compositing equation. @@ -3465,7 +3465,7 @@ internal static partial class PorterDuffFunctions /// The . [MethodImpl(MethodImplOptions.AggressiveInlining)] public static Vector256 LightenClear(Vector256 backdrop, Vector256 source, Vector256 opacity) - => Clear(backdrop, Avx.Blend(source, Avx.Multiply(source, opacity), BlendAlphaControl)); + => Clear(backdrop, Avx.Blend(source, source * opacity, BlendAlphaControl)); /// @@ -3684,7 +3684,7 @@ internal static partial class PorterDuffFunctions /// The . [MethodImpl(MethodImplOptions.AggressiveInlining)] public static Vector256 OverlaySrc(Vector256 backdrop, Vector256 source, Vector256 opacity) - => Avx.Blend(source, Avx.Multiply(source, opacity), BlendAlphaControl); + => Avx.Blend(source, source * opacity, BlendAlphaControl); /// /// Returns the result of the "OverlaySrcAtop" compositing equation. @@ -3711,7 +3711,7 @@ internal static partial class PorterDuffFunctions [MethodImpl(MethodImplOptions.AggressiveInlining)] public static Vector256 OverlaySrcAtop(Vector256 backdrop, Vector256 source, Vector256 opacity) { - source = Avx.Blend(source, Avx.Multiply(source, opacity), BlendAlphaControl); + source = Avx.Blend(source, source * opacity, BlendAlphaControl); return Atop(backdrop, source, Overlay(backdrop, source)); } @@ -3741,7 +3741,7 @@ internal static partial class PorterDuffFunctions [MethodImpl(MethodImplOptions.AggressiveInlining)] public static Vector256 OverlaySrcOver(Vector256 backdrop, Vector256 source, Vector256 opacity) { - source = Avx.Blend(source, Avx.Multiply(source, opacity), BlendAlphaControl); + source = Avx.Blend(source, source * opacity, BlendAlphaControl); return Over(backdrop, source, Overlay(backdrop, source)); } @@ -3770,7 +3770,7 @@ internal static partial class PorterDuffFunctions /// The . [MethodImpl(MethodImplOptions.AggressiveInlining)] public static Vector256 OverlaySrcIn(Vector256 backdrop, Vector256 source, Vector256 opacity) - => In(backdrop, Avx.Blend(source, Avx.Multiply(source, opacity), BlendAlphaControl)); + => In(backdrop, Avx.Blend(source, source * opacity, BlendAlphaControl)); /// /// Returns the result of the "OverlaySrcOut" compositing equation. @@ -3796,7 +3796,7 @@ internal static partial class PorterDuffFunctions /// The . [MethodImpl(MethodImplOptions.AggressiveInlining)] public static Vector256 OverlaySrcOut(Vector256 backdrop, Vector256 source, Vector256 opacity) - => Out(backdrop, Avx.Blend(source, Avx.Multiply(source, opacity), BlendAlphaControl)); + => Out(backdrop, Avx.Blend(source, source * opacity, BlendAlphaControl)); /// /// Returns the result of the "OverlayDest" compositing equation. @@ -3849,7 +3849,7 @@ internal static partial class PorterDuffFunctions [MethodImpl(MethodImplOptions.AggressiveInlining)] public static Vector256 OverlayDestAtop(Vector256 backdrop, Vector256 source, Vector256 opacity) { - source = Avx.Blend(source, Avx.Multiply(source, opacity), BlendAlphaControl); + source = Avx.Blend(source, source * opacity, BlendAlphaControl); return Atop(source, backdrop, Overlay(source, backdrop)); } @@ -3879,7 +3879,7 @@ internal static partial class PorterDuffFunctions [MethodImpl(MethodImplOptions.AggressiveInlining)] public static Vector256 OverlayDestOver(Vector256 backdrop, Vector256 source, Vector256 opacity) { - source = Avx.Blend(source, Avx.Multiply(source, opacity), BlendAlphaControl); + source = Avx.Blend(source, source * opacity, BlendAlphaControl); return Over(source, backdrop, Overlay(source, backdrop)); } @@ -3908,7 +3908,7 @@ internal static partial class PorterDuffFunctions /// The . [MethodImpl(MethodImplOptions.AggressiveInlining)] public static Vector256 OverlayDestIn(Vector256 backdrop, Vector256 source, Vector256 opacity) - => In(Avx.Blend(source, Avx.Multiply(source, opacity), BlendAlphaControl), backdrop); + => In(Avx.Blend(source, source * opacity, BlendAlphaControl), backdrop); /// /// Returns the result of the "OverlayDestOut" compositing equation. @@ -3934,7 +3934,7 @@ internal static partial class PorterDuffFunctions /// The . [MethodImpl(MethodImplOptions.AggressiveInlining)] public static Vector256 OverlayDestOut(Vector256 backdrop, Vector256 source, Vector256 opacity) - => Out(Avx.Blend(source, Avx.Multiply(source, opacity), BlendAlphaControl), backdrop); + => Out(Avx.Blend(source, source * opacity, BlendAlphaControl), backdrop); /// /// Returns the result of the "OverlayXor" compositing equation. @@ -3960,7 +3960,7 @@ internal static partial class PorterDuffFunctions /// The . [MethodImpl(MethodImplOptions.AggressiveInlining)] public static Vector256 OverlayXor(Vector256 backdrop, Vector256 source, Vector256 opacity) - => Xor(backdrop, Avx.Blend(source, Avx.Multiply(source, opacity), BlendAlphaControl)); + => Xor(backdrop, Avx.Blend(source, source * opacity, BlendAlphaControl)); /// /// Returns the result of the "OverlayClear" compositing equation. @@ -3986,7 +3986,7 @@ internal static partial class PorterDuffFunctions /// The . [MethodImpl(MethodImplOptions.AggressiveInlining)] public static Vector256 OverlayClear(Vector256 backdrop, Vector256 source, Vector256 opacity) - => Clear(backdrop, Avx.Blend(source, Avx.Multiply(source, opacity), BlendAlphaControl)); + => Clear(backdrop, Avx.Blend(source, source * opacity, BlendAlphaControl)); /// @@ -4205,7 +4205,7 @@ internal static partial class PorterDuffFunctions /// The . [MethodImpl(MethodImplOptions.AggressiveInlining)] public static Vector256 HardLightSrc(Vector256 backdrop, Vector256 source, Vector256 opacity) - => Avx.Blend(source, Avx.Multiply(source, opacity), BlendAlphaControl); + => Avx.Blend(source, source * opacity, BlendAlphaControl); /// /// Returns the result of the "HardLightSrcAtop" compositing equation. @@ -4232,7 +4232,7 @@ internal static partial class PorterDuffFunctions [MethodImpl(MethodImplOptions.AggressiveInlining)] public static Vector256 HardLightSrcAtop(Vector256 backdrop, Vector256 source, Vector256 opacity) { - source = Avx.Blend(source, Avx.Multiply(source, opacity), BlendAlphaControl); + source = Avx.Blend(source, source * opacity, BlendAlphaControl); return Atop(backdrop, source, HardLight(backdrop, source)); } @@ -4262,7 +4262,7 @@ internal static partial class PorterDuffFunctions [MethodImpl(MethodImplOptions.AggressiveInlining)] public static Vector256 HardLightSrcOver(Vector256 backdrop, Vector256 source, Vector256 opacity) { - source = Avx.Blend(source, Avx.Multiply(source, opacity), BlendAlphaControl); + source = Avx.Blend(source, source * opacity, BlendAlphaControl); return Over(backdrop, source, HardLight(backdrop, source)); } @@ -4291,7 +4291,7 @@ internal static partial class PorterDuffFunctions /// The . [MethodImpl(MethodImplOptions.AggressiveInlining)] public static Vector256 HardLightSrcIn(Vector256 backdrop, Vector256 source, Vector256 opacity) - => In(backdrop, Avx.Blend(source, Avx.Multiply(source, opacity), BlendAlphaControl)); + => In(backdrop, Avx.Blend(source, source * opacity, BlendAlphaControl)); /// /// Returns the result of the "HardLightSrcOut" compositing equation. @@ -4317,7 +4317,7 @@ internal static partial class PorterDuffFunctions /// The . [MethodImpl(MethodImplOptions.AggressiveInlining)] public static Vector256 HardLightSrcOut(Vector256 backdrop, Vector256 source, Vector256 opacity) - => Out(backdrop, Avx.Blend(source, Avx.Multiply(source, opacity), BlendAlphaControl)); + => Out(backdrop, Avx.Blend(source, source * opacity, BlendAlphaControl)); /// /// Returns the result of the "HardLightDest" compositing equation. @@ -4370,7 +4370,7 @@ internal static partial class PorterDuffFunctions [MethodImpl(MethodImplOptions.AggressiveInlining)] public static Vector256 HardLightDestAtop(Vector256 backdrop, Vector256 source, Vector256 opacity) { - source = Avx.Blend(source, Avx.Multiply(source, opacity), BlendAlphaControl); + source = Avx.Blend(source, source * opacity, BlendAlphaControl); return Atop(source, backdrop, HardLight(source, backdrop)); } @@ -4400,7 +4400,7 @@ internal static partial class PorterDuffFunctions [MethodImpl(MethodImplOptions.AggressiveInlining)] public static Vector256 HardLightDestOver(Vector256 backdrop, Vector256 source, Vector256 opacity) { - source = Avx.Blend(source, Avx.Multiply(source, opacity), BlendAlphaControl); + source = Avx.Blend(source, source * opacity, BlendAlphaControl); return Over(source, backdrop, HardLight(source, backdrop)); } @@ -4429,7 +4429,7 @@ internal static partial class PorterDuffFunctions /// The . [MethodImpl(MethodImplOptions.AggressiveInlining)] public static Vector256 HardLightDestIn(Vector256 backdrop, Vector256 source, Vector256 opacity) - => In(Avx.Blend(source, Avx.Multiply(source, opacity), BlendAlphaControl), backdrop); + => In(Avx.Blend(source, source * opacity, BlendAlphaControl), backdrop); /// /// Returns the result of the "HardLightDestOut" compositing equation. @@ -4455,7 +4455,7 @@ internal static partial class PorterDuffFunctions /// The . [MethodImpl(MethodImplOptions.AggressiveInlining)] public static Vector256 HardLightDestOut(Vector256 backdrop, Vector256 source, Vector256 opacity) - => Out(Avx.Blend(source, Avx.Multiply(source, opacity), BlendAlphaControl), backdrop); + => Out(Avx.Blend(source, source * opacity, BlendAlphaControl), backdrop); /// /// Returns the result of the "HardLightXor" compositing equation. @@ -4481,7 +4481,7 @@ internal static partial class PorterDuffFunctions /// The . [MethodImpl(MethodImplOptions.AggressiveInlining)] public static Vector256 HardLightXor(Vector256 backdrop, Vector256 source, Vector256 opacity) - => Xor(backdrop, Avx.Blend(source, Avx.Multiply(source, opacity), BlendAlphaControl)); + => Xor(backdrop, Avx.Blend(source, source * opacity, BlendAlphaControl)); /// /// Returns the result of the "HardLightClear" compositing equation. @@ -4507,7 +4507,7 @@ internal static partial class PorterDuffFunctions /// The . [MethodImpl(MethodImplOptions.AggressiveInlining)] public static Vector256 HardLightClear(Vector256 backdrop, Vector256 source, Vector256 opacity) - => Clear(backdrop, Avx.Blend(source, Avx.Multiply(source, opacity), BlendAlphaControl)); + => Clear(backdrop, Avx.Blend(source, source * opacity, BlendAlphaControl)); /// diff --git a/src/ImageSharp/PixelFormats/PixelBlenders/PorterDuffFunctions.Generated.tt b/src/ImageSharp/PixelFormats/PixelBlenders/PorterDuffFunctions.Generated.tt index 150adb33a8..83bc055eff 100644 --- a/src/ImageSharp/PixelFormats/PixelBlenders/PorterDuffFunctions.Generated.tt +++ b/src/ImageSharp/PixelFormats/PixelBlenders/PorterDuffFunctions.Generated.tt @@ -47,7 +47,7 @@ internal static partial class PorterDuffFunctions /// The . [MethodImpl(MethodImplOptions.AggressiveInlining)] public static Vector256 <#=blender#>Src(Vector256 backdrop, Vector256 source, Vector256 opacity) - => Avx.Blend(source, Avx.Multiply(source, opacity), BlendAlphaControl); + => Avx.Blend(source, source * opacity, BlendAlphaControl); /// /// Returns the result of the "<#=blender#>SrcAtop" compositing equation. @@ -74,7 +74,7 @@ internal static partial class PorterDuffFunctions [MethodImpl(MethodImplOptions.AggressiveInlining)] public static Vector256 <#=blender#>SrcAtop(Vector256 backdrop, Vector256 source, Vector256 opacity) { - source = Avx.Blend(source, Avx.Multiply(source, opacity), BlendAlphaControl); + source = Avx.Blend(source, source * opacity, BlendAlphaControl); return Atop(backdrop, source, <#=blender#>(backdrop, source)); } @@ -104,7 +104,7 @@ internal static partial class PorterDuffFunctions [MethodImpl(MethodImplOptions.AggressiveInlining)] public static Vector256 <#=blender#>SrcOver(Vector256 backdrop, Vector256 source, Vector256 opacity) { - source = Avx.Blend(source, Avx.Multiply(source, opacity), BlendAlphaControl); + source = Avx.Blend(source, source * opacity, BlendAlphaControl); return Over(backdrop, source, <#=blender#>(backdrop, source)); } @@ -133,7 +133,7 @@ internal static partial class PorterDuffFunctions /// The . [MethodImpl(MethodImplOptions.AggressiveInlining)] public static Vector256 <#=blender#>SrcIn(Vector256 backdrop, Vector256 source, Vector256 opacity) - => In(backdrop, Avx.Blend(source, Avx.Multiply(source, opacity), BlendAlphaControl)); + => In(backdrop, Avx.Blend(source, source * opacity, BlendAlphaControl)); /// /// Returns the result of the "<#=blender#>SrcOut" compositing equation. @@ -159,7 +159,7 @@ internal static partial class PorterDuffFunctions /// The . [MethodImpl(MethodImplOptions.AggressiveInlining)] public static Vector256 <#=blender#>SrcOut(Vector256 backdrop, Vector256 source, Vector256 opacity) - => Out(backdrop, Avx.Blend(source, Avx.Multiply(source, opacity), BlendAlphaControl)); + => Out(backdrop, Avx.Blend(source, source * opacity, BlendAlphaControl)); /// /// Returns the result of the "<#=blender#>Dest" compositing equation. @@ -212,7 +212,7 @@ internal static partial class PorterDuffFunctions [MethodImpl(MethodImplOptions.AggressiveInlining)] public static Vector256 <#=blender#>DestAtop(Vector256 backdrop, Vector256 source, Vector256 opacity) { - source = Avx.Blend(source, Avx.Multiply(source, opacity), BlendAlphaControl); + source = Avx.Blend(source, source * opacity, BlendAlphaControl); return Atop(source, backdrop, <#=blender#>(source, backdrop)); } @@ -242,7 +242,7 @@ internal static partial class PorterDuffFunctions [MethodImpl(MethodImplOptions.AggressiveInlining)] public static Vector256 <#=blender#>DestOver(Vector256 backdrop, Vector256 source, Vector256 opacity) { - source = Avx.Blend(source, Avx.Multiply(source, opacity), BlendAlphaControl); + source = Avx.Blend(source, source * opacity, BlendAlphaControl); return Over(source, backdrop, <#=blender#>(source, backdrop)); } @@ -271,7 +271,7 @@ internal static partial class PorterDuffFunctions /// The . [MethodImpl(MethodImplOptions.AggressiveInlining)] public static Vector256 <#=blender#>DestIn(Vector256 backdrop, Vector256 source, Vector256 opacity) - => In(Avx.Blend(source, Avx.Multiply(source, opacity), BlendAlphaControl), backdrop); + => In(Avx.Blend(source, source * opacity, BlendAlphaControl), backdrop); /// /// Returns the result of the "<#=blender#>DestOut" compositing equation. @@ -297,7 +297,7 @@ internal static partial class PorterDuffFunctions /// The . [MethodImpl(MethodImplOptions.AggressiveInlining)] public static Vector256 <#=blender#>DestOut(Vector256 backdrop, Vector256 source, Vector256 opacity) - => Out(Avx.Blend(source, Avx.Multiply(source, opacity), BlendAlphaControl), backdrop); + => Out(Avx.Blend(source, source * opacity, BlendAlphaControl), backdrop); /// /// Returns the result of the "<#=blender#>Xor" compositing equation. @@ -323,7 +323,7 @@ internal static partial class PorterDuffFunctions /// The . [MethodImpl(MethodImplOptions.AggressiveInlining)] public static Vector256 <#=blender#>Xor(Vector256 backdrop, Vector256 source, Vector256 opacity) - => Xor(backdrop, Avx.Blend(source, Avx.Multiply(source, opacity), BlendAlphaControl)); + => Xor(backdrop, Avx.Blend(source, source * opacity, BlendAlphaControl)); /// /// Returns the result of the "<#=blender#>Clear" compositing equation. @@ -349,7 +349,7 @@ internal static partial class PorterDuffFunctions /// The . [MethodImpl(MethodImplOptions.AggressiveInlining)] public static Vector256 <#=blender#>Clear(Vector256 backdrop, Vector256 source, Vector256 opacity) - => Clear(backdrop, Avx.Blend(source, Avx.Multiply(source, opacity), BlendAlphaControl)); + => Clear(backdrop, Avx.Blend(source, source * opacity, BlendAlphaControl)); <#} #> diff --git a/tests/ImageSharp.Tests/TestUtilities/FeatureTesting/FeatureTestRunner.cs b/tests/ImageSharp.Tests/TestUtilities/FeatureTesting/FeatureTestRunner.cs index d3671abd47..be3e9ccd5d 100644 --- a/tests/ImageSharp.Tests/TestUtilities/FeatureTesting/FeatureTestRunner.cs +++ b/tests/ImageSharp.Tests/TestUtilities/FeatureTesting/FeatureTestRunner.cs @@ -455,6 +455,7 @@ public enum HwIntrinsics : long DisableVAES = 1L << 17, DisableWAITPKG = 1L << 18, DisableX86Serialize = 1 << 19, + // Arm64 DisableArm64Aes = 1L << 20, DisableArm64Atomics = 1L << 21, @@ -466,6 +467,7 @@ public enum HwIntrinsics : long DisableArm64Sha256 = 1L << 27, DisableArm64Sve = 1L << 28, DisableArm64Sve2 = 1L << 29, + // RISC-V64 DisableRiscV64Zba = 1L << 30, DisableRiscV64Zbb = 1L << 31, From fd688db0eb1c011a2b41e879c6b83ba843b9136c Mon Sep 17 00:00:00 2001 From: James Jackson-South Date: Wed, 8 Apr 2026 15:32:55 +1000 Subject: [PATCH 11/12] Complete implementation and add tests/benchmark --- src/ImageSharp/Common/Helpers/Numerics.cs | 14 + .../Common/Helpers/SimdUtils.HwIntrinsics.cs | 22 - .../Common/Helpers/Vector512Utilities.cs | 15 + .../DefaultPixelBlenders.Generated.cs | 17056 +++++++++++++++- .../DefaultPixelBlenders.Generated.tt | 156 +- .../PorterDuffFunctions.Generated.cs | 1370 +- .../PorterDuffFunctions.Generated.tt | 150 + .../PixelBlenders/PorterDuffFunctions.cs | 242 + .../PorterDuffBulkVsSingleVector.cs | 23 +- .../PorterDuffCompositorTests.cs | 2 +- .../PixelBlenders/PorterDuffFunctionsTests.cs | 152 +- .../PorterDuffFunctionsTestsTPixel.cs | 108 +- .../TestUtilities/ApproximateFloatComparer.cs | 18 +- 13 files changed, 18723 insertions(+), 605 deletions(-) diff --git a/src/ImageSharp/Common/Helpers/Numerics.cs b/src/ImageSharp/Common/Helpers/Numerics.cs index 513eb7ab19..04ed48e210 100644 --- a/src/ImageSharp/Common/Helpers/Numerics.cs +++ b/src/ImageSharp/Common/Helpers/Numerics.cs @@ -643,6 +643,20 @@ internal static class Numerics return Avx.Blend(result, alpha, BlendAlphaControl); } + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public static Vector512 UnPremultiply(Vector512 source, Vector512 alpha) + { + // Check if alpha is zero to avoid division by zero + Vector512 zeroMask = Vector512.Equals(alpha, Vector512.Zero); + + // Divide source by alpha if alpha is nonzero, otherwise set all components to match the source value + Vector512 result = Vector512.ConditionalSelect(zeroMask, source, source / alpha); + + // Blend the result with the alpha vector to ensure that the alpha component is unchanged + Vector512 alphaMask = Vector512.Create(0, 0, 0, -1, 0, 0, 0, -1, 0, 0, 0, -1, 0, 0, 0, -1).AsSingle(); + return Vector512.ConditionalSelect(alphaMask, alpha, result); + } + /// /// Permutes the given vector return a new instance with all the values set to . /// diff --git a/src/ImageSharp/Common/Helpers/SimdUtils.HwIntrinsics.cs b/src/ImageSharp/Common/Helpers/SimdUtils.HwIntrinsics.cs index 154f0b5e22..022056deb0 100644 --- a/src/ImageSharp/Common/Helpers/SimdUtils.HwIntrinsics.cs +++ b/src/ImageSharp/Common/Helpers/SimdUtils.HwIntrinsics.cs @@ -601,28 +601,6 @@ internal static partial class SimdUtils } } - /// - /// Performs a multiplication and a negated addition of the . - /// - /// ret = va - (vm0 * vm1) - /// The vector to add to the negated intermediate result. - /// The first vector to multiply. - /// The second vector to multiply. - /// The . - [MethodImpl(InliningOptions.ShortMethod)] - public static Vector256 MultiplyAddNegated( - Vector256 va, - Vector256 vm0, - Vector256 vm1) - { - if (Fma.IsSupported) - { - return Fma.MultiplyAddNegated(vm0, vm1, va); - } - - return Avx.Subtract(va, Avx.Multiply(vm0, vm1)); - } - /// /// Blend packed 8-bit integers from and using . /// The high bit of each corresponding byte determines the selection. diff --git a/src/ImageSharp/Common/Helpers/Vector512Utilities.cs b/src/ImageSharp/Common/Helpers/Vector512Utilities.cs index 03ee4626cd..82a20158ae 100644 --- a/src/ImageSharp/Common/Helpers/Vector512Utilities.cs +++ b/src/ImageSharp/Common/Helpers/Vector512Utilities.cs @@ -87,6 +87,21 @@ internal static class Vector512_ Vector512 vm1) => Avx512F.FusedMultiplyAdd(vm0, vm1, va); + /// + /// Performs a multiplication and a negated addition of the . + /// + /// ret = va - (vm0 * vm1) + /// The vector to add to the negated intermediate result. + /// The first vector to multiply. + /// The second vector to multiply. + /// The . + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public static Vector512 MultiplyAddNegated( + Vector512 va, + Vector512 vm0, + Vector512 vm1) + => Avx512F.FusedMultiplyAddNegated(vm0, vm1, va); + /// /// Restricts a vector between a minimum and a maximum value. /// diff --git a/src/ImageSharp/PixelFormats/PixelBlenders/DefaultPixelBlenders.Generated.cs b/src/ImageSharp/PixelFormats/PixelBlenders/DefaultPixelBlenders.Generated.cs index 7cd9cc57ad..883693031e 100644 --- a/src/ImageSharp/PixelFormats/PixelBlenders/DefaultPixelBlenders.Generated.cs +++ b/src/ImageSharp/PixelFormats/PixelBlenders/DefaultPixelBlenders.Generated.cs @@ -46,7 +46,34 @@ internal static class DefaultPixelBlenders { amount = Numerics.Clamp(amount, 0, 1); - if (Avx2.IsSupported && destination.Length >= 2) + if (Avx512F.IsSupported && destination.Length >= 4) + { + // Divide by 4 as 4 elements per Vector4 and 16 per Vector512 + ref Vector512 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); + ref Vector512 destinationLast = ref Unsafe.Add(ref destinationBase, (uint)destination.Length / 4u); + + ref Vector512 backgroundBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(background)); + ref Vector512 sourceBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(source)); + Vector512 opacity = Vector512.Create(amount); + + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) + { + destinationBase = PorterDuffFunctions.NormalSrc(backgroundBase, sourceBase, opacity); + destinationBase = ref Unsafe.Add(ref destinationBase, 1); + backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); + sourceBase = ref Unsafe.Add(ref sourceBase, 1); + } + + int remainder = Numerics.Modulo4(destination.Length); + if (remainder != 0) + { + for (int i = destination.Length - remainder; i < destination.Length; i++) + { + destination[i] = PorterDuffFunctions.NormalSrc(background[i], source[i], amount); + } + } + } + else if (Avx2.IsSupported && destination.Length >= 2) { // Divide by 2 as 4 elements per Vector4 and 8 per Vector256 ref Vector256 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); @@ -85,7 +112,37 @@ internal static class DefaultPixelBlenders { amount = Numerics.Clamp(amount, 0, 1); - if (Avx2.IsSupported && destination.Length >= 2) + if (Avx512F.IsSupported && destination.Length >= 4) + { + // Divide by 4 as 4 elements per Vector4 and 16 per Vector512 + ref Vector512 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); + ref Vector512 destinationLast = ref Unsafe.Add(ref destinationBase, (uint)destination.Length / 4u); + + ref Vector512 backgroundBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(background)); + Vector512 sourceBase = Vector512.Create( + source.X, source.Y, source.Z, source.W, + source.X, source.Y, source.Z, source.W, + source.X, source.Y, source.Z, source.W, + source.X, source.Y, source.Z, source.W); + Vector512 opacity = Vector512.Create(amount); + + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) + { + destinationBase = PorterDuffFunctions.NormalSrc(backgroundBase, sourceBase, opacity); + destinationBase = ref Unsafe.Add(ref destinationBase, 1); + backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); + } + + int remainder = Numerics.Modulo4(destination.Length); + if (remainder != 0) + { + for (int i = destination.Length - remainder; i < destination.Length; i++) + { + destination[i] = PorterDuffFunctions.NormalSrc(background[i], source, amount); + } + } + } + else if (Avx2.IsSupported && destination.Length >= 2) { // Divide by 2 as 4 elements per Vector4 and 8 per Vector256 ref Vector256 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); @@ -121,7 +178,51 @@ internal static class DefaultPixelBlenders /// protected override void BlendFunction(Span destination, ReadOnlySpan background, ReadOnlySpan source, ReadOnlySpan amount) { - if (Avx2.IsSupported && destination.Length >= 2) + if (Avx512F.IsSupported && destination.Length >= 4) + { + // Divide by 4 as 4 elements per Vector4 and 16 per Vector512 + ref Vector512 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); + ref Vector512 destinationLast = ref Unsafe.Add(ref destinationBase, (uint)destination.Length / 4u); + + ref Vector512 backgroundBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(background)); + ref Vector512 sourceBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(source)); + ref float amountBase = ref MemoryMarshal.GetReference(amount); + + Vector512 vOne = Vector512.Create(1F); + + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) + { + float amount0 = amountBase; + float amount1 = Unsafe.Add(ref amountBase, 1); + float amount2 = Unsafe.Add(ref amountBase, 2); + float amount3 = Unsafe.Add(ref amountBase, 3); + + // We need to create a Vector512 containing the current four amount values + // taking up each quarter of the Vector512 and then clamp them. + Vector512 opacity = Vector512.Create( + amount0, amount0, amount0, amount0, + amount1, amount1, amount1, amount1, + amount2, amount2, amount2, amount2, + amount3, amount3, amount3, amount3); + opacity = Vector512.Min(Vector512.Max(Vector512.Zero, opacity), vOne); + + destinationBase = PorterDuffFunctions.NormalSrc(backgroundBase, sourceBase, opacity); + destinationBase = ref Unsafe.Add(ref destinationBase, 1); + backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); + sourceBase = ref Unsafe.Add(ref sourceBase, 1); + amountBase = ref Unsafe.Add(ref amountBase, 4); + } + + int remainder = Numerics.Modulo4(destination.Length); + if (remainder != 0) + { + for (int i = destination.Length - remainder; i < destination.Length; i++) + { + destination[i] = PorterDuffFunctions.NormalSrc(background[i], source[i], Numerics.Clamp(amount[i], 0, 1F)); + } + } + } + else if (Avx2.IsSupported && destination.Length >= 2) { // Divide by 2 as 4 elements per Vector4 and 8 per Vector256 ref Vector256 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); @@ -168,7 +269,54 @@ internal static class DefaultPixelBlenders /// protected override void BlendFunction(Span destination, ReadOnlySpan background, Vector4 source, ReadOnlySpan amount) { - if (Avx2.IsSupported && destination.Length >= 2) + if (Avx512F.IsSupported && destination.Length >= 4) + { + // Divide by 4 as 4 elements per Vector4 and 16 per Vector512 + ref Vector512 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); + ref Vector512 destinationLast = ref Unsafe.Add(ref destinationBase, (uint)destination.Length / 4u); + + ref Vector512 backgroundBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(background)); + ref float amountBase = ref MemoryMarshal.GetReference(amount); + + Vector512 sourceBase = Vector512.Create( + source.X, source.Y, source.Z, source.W, + source.X, source.Y, source.Z, source.W, + source.X, source.Y, source.Z, source.W, + source.X, source.Y, source.Z, source.W); + Vector512 vOne = Vector512.Create(1F); + + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) + { + float amount0 = amountBase; + float amount1 = Unsafe.Add(ref amountBase, 1); + float amount2 = Unsafe.Add(ref amountBase, 2); + float amount3 = Unsafe.Add(ref amountBase, 3); + + // We need to create a Vector512 containing the current four amount values + // taking up each quarter of the Vector512 and then clamp them. + Vector512 opacity = Vector512.Create( + amount0, amount0, amount0, amount0, + amount1, amount1, amount1, amount1, + amount2, amount2, amount2, amount2, + amount3, amount3, amount3, amount3); + opacity = Vector512.Min(Vector512.Max(Vector512.Zero, opacity), vOne); + + destinationBase = PorterDuffFunctions.NormalSrc(backgroundBase, sourceBase, opacity); + destinationBase = ref Unsafe.Add(ref destinationBase, 1); + backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); + amountBase = ref Unsafe.Add(ref amountBase, 4); + } + + int remainder = Numerics.Modulo4(destination.Length); + if (remainder != 0) + { + for (int i = destination.Length - remainder; i < destination.Length; i++) + { + destination[i] = PorterDuffFunctions.NormalSrc(background[i], source, Numerics.Clamp(amount[i], 0, 1F)); + } + } + } + else if (Avx2.IsSupported && destination.Length >= 2) { // Divide by 2 as 4 elements per Vector4 and 8 per Vector256 ref Vector256 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); @@ -233,7 +381,34 @@ internal static class DefaultPixelBlenders { amount = Numerics.Clamp(amount, 0, 1); - if (Avx2.IsSupported && destination.Length >= 2) + if (Avx512F.IsSupported && destination.Length >= 4) + { + // Divide by 4 as 4 elements per Vector4 and 16 per Vector512 + ref Vector512 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); + ref Vector512 destinationLast = ref Unsafe.Add(ref destinationBase, (uint)destination.Length / 4u); + + ref Vector512 backgroundBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(background)); + ref Vector512 sourceBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(source)); + Vector512 opacity = Vector512.Create(amount); + + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) + { + destinationBase = PorterDuffFunctions.MultiplySrc(backgroundBase, sourceBase, opacity); + destinationBase = ref Unsafe.Add(ref destinationBase, 1); + backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); + sourceBase = ref Unsafe.Add(ref sourceBase, 1); + } + + int remainder = Numerics.Modulo4(destination.Length); + if (remainder != 0) + { + for (int i = destination.Length - remainder; i < destination.Length; i++) + { + destination[i] = PorterDuffFunctions.MultiplySrc(background[i], source[i], amount); + } + } + } + else if (Avx2.IsSupported && destination.Length >= 2) { // Divide by 2 as 4 elements per Vector4 and 8 per Vector256 ref Vector256 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); @@ -272,7 +447,37 @@ internal static class DefaultPixelBlenders { amount = Numerics.Clamp(amount, 0, 1); - if (Avx2.IsSupported && destination.Length >= 2) + if (Avx512F.IsSupported && destination.Length >= 4) + { + // Divide by 4 as 4 elements per Vector4 and 16 per Vector512 + ref Vector512 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); + ref Vector512 destinationLast = ref Unsafe.Add(ref destinationBase, (uint)destination.Length / 4u); + + ref Vector512 backgroundBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(background)); + Vector512 sourceBase = Vector512.Create( + source.X, source.Y, source.Z, source.W, + source.X, source.Y, source.Z, source.W, + source.X, source.Y, source.Z, source.W, + source.X, source.Y, source.Z, source.W); + Vector512 opacity = Vector512.Create(amount); + + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) + { + destinationBase = PorterDuffFunctions.MultiplySrc(backgroundBase, sourceBase, opacity); + destinationBase = ref Unsafe.Add(ref destinationBase, 1); + backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); + } + + int remainder = Numerics.Modulo4(destination.Length); + if (remainder != 0) + { + for (int i = destination.Length - remainder; i < destination.Length; i++) + { + destination[i] = PorterDuffFunctions.MultiplySrc(background[i], source, amount); + } + } + } + else if (Avx2.IsSupported && destination.Length >= 2) { // Divide by 2 as 4 elements per Vector4 and 8 per Vector256 ref Vector256 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); @@ -308,7 +513,51 @@ internal static class DefaultPixelBlenders /// protected override void BlendFunction(Span destination, ReadOnlySpan background, ReadOnlySpan source, ReadOnlySpan amount) { - if (Avx2.IsSupported && destination.Length >= 2) + if (Avx512F.IsSupported && destination.Length >= 4) + { + // Divide by 4 as 4 elements per Vector4 and 16 per Vector512 + ref Vector512 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); + ref Vector512 destinationLast = ref Unsafe.Add(ref destinationBase, (uint)destination.Length / 4u); + + ref Vector512 backgroundBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(background)); + ref Vector512 sourceBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(source)); + ref float amountBase = ref MemoryMarshal.GetReference(amount); + + Vector512 vOne = Vector512.Create(1F); + + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) + { + float amount0 = amountBase; + float amount1 = Unsafe.Add(ref amountBase, 1); + float amount2 = Unsafe.Add(ref amountBase, 2); + float amount3 = Unsafe.Add(ref amountBase, 3); + + // We need to create a Vector512 containing the current four amount values + // taking up each quarter of the Vector512 and then clamp them. + Vector512 opacity = Vector512.Create( + amount0, amount0, amount0, amount0, + amount1, amount1, amount1, amount1, + amount2, amount2, amount2, amount2, + amount3, amount3, amount3, amount3); + opacity = Vector512.Min(Vector512.Max(Vector512.Zero, opacity), vOne); + + destinationBase = PorterDuffFunctions.MultiplySrc(backgroundBase, sourceBase, opacity); + destinationBase = ref Unsafe.Add(ref destinationBase, 1); + backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); + sourceBase = ref Unsafe.Add(ref sourceBase, 1); + amountBase = ref Unsafe.Add(ref amountBase, 4); + } + + int remainder = Numerics.Modulo4(destination.Length); + if (remainder != 0) + { + for (int i = destination.Length - remainder; i < destination.Length; i++) + { + destination[i] = PorterDuffFunctions.MultiplySrc(background[i], source[i], Numerics.Clamp(amount[i], 0, 1F)); + } + } + } + else if (Avx2.IsSupported && destination.Length >= 2) { // Divide by 2 as 4 elements per Vector4 and 8 per Vector256 ref Vector256 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); @@ -355,7 +604,54 @@ internal static class DefaultPixelBlenders /// protected override void BlendFunction(Span destination, ReadOnlySpan background, Vector4 source, ReadOnlySpan amount) { - if (Avx2.IsSupported && destination.Length >= 2) + if (Avx512F.IsSupported && destination.Length >= 4) + { + // Divide by 4 as 4 elements per Vector4 and 16 per Vector512 + ref Vector512 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); + ref Vector512 destinationLast = ref Unsafe.Add(ref destinationBase, (uint)destination.Length / 4u); + + ref Vector512 backgroundBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(background)); + ref float amountBase = ref MemoryMarshal.GetReference(amount); + + Vector512 sourceBase = Vector512.Create( + source.X, source.Y, source.Z, source.W, + source.X, source.Y, source.Z, source.W, + source.X, source.Y, source.Z, source.W, + source.X, source.Y, source.Z, source.W); + Vector512 vOne = Vector512.Create(1F); + + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) + { + float amount0 = amountBase; + float amount1 = Unsafe.Add(ref amountBase, 1); + float amount2 = Unsafe.Add(ref amountBase, 2); + float amount3 = Unsafe.Add(ref amountBase, 3); + + // We need to create a Vector512 containing the current four amount values + // taking up each quarter of the Vector512 and then clamp them. + Vector512 opacity = Vector512.Create( + amount0, amount0, amount0, amount0, + amount1, amount1, amount1, amount1, + amount2, amount2, amount2, amount2, + amount3, amount3, amount3, amount3); + opacity = Vector512.Min(Vector512.Max(Vector512.Zero, opacity), vOne); + + destinationBase = PorterDuffFunctions.MultiplySrc(backgroundBase, sourceBase, opacity); + destinationBase = ref Unsafe.Add(ref destinationBase, 1); + backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); + amountBase = ref Unsafe.Add(ref amountBase, 4); + } + + int remainder = Numerics.Modulo4(destination.Length); + if (remainder != 0) + { + for (int i = destination.Length - remainder; i < destination.Length; i++) + { + destination[i] = PorterDuffFunctions.MultiplySrc(background[i], source, Numerics.Clamp(amount[i], 0, 1F)); + } + } + } + else if (Avx2.IsSupported && destination.Length >= 2) { // Divide by 2 as 4 elements per Vector4 and 8 per Vector256 ref Vector256 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); @@ -420,7 +716,34 @@ internal static class DefaultPixelBlenders { amount = Numerics.Clamp(amount, 0, 1); - if (Avx2.IsSupported && destination.Length >= 2) + if (Avx512F.IsSupported && destination.Length >= 4) + { + // Divide by 4 as 4 elements per Vector4 and 16 per Vector512 + ref Vector512 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); + ref Vector512 destinationLast = ref Unsafe.Add(ref destinationBase, (uint)destination.Length / 4u); + + ref Vector512 backgroundBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(background)); + ref Vector512 sourceBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(source)); + Vector512 opacity = Vector512.Create(amount); + + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) + { + destinationBase = PorterDuffFunctions.AddSrc(backgroundBase, sourceBase, opacity); + destinationBase = ref Unsafe.Add(ref destinationBase, 1); + backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); + sourceBase = ref Unsafe.Add(ref sourceBase, 1); + } + + int remainder = Numerics.Modulo4(destination.Length); + if (remainder != 0) + { + for (int i = destination.Length - remainder; i < destination.Length; i++) + { + destination[i] = PorterDuffFunctions.AddSrc(background[i], source[i], amount); + } + } + } + else if (Avx2.IsSupported && destination.Length >= 2) { // Divide by 2 as 4 elements per Vector4 and 8 per Vector256 ref Vector256 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); @@ -459,7 +782,37 @@ internal static class DefaultPixelBlenders { amount = Numerics.Clamp(amount, 0, 1); - if (Avx2.IsSupported && destination.Length >= 2) + if (Avx512F.IsSupported && destination.Length >= 4) + { + // Divide by 4 as 4 elements per Vector4 and 16 per Vector512 + ref Vector512 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); + ref Vector512 destinationLast = ref Unsafe.Add(ref destinationBase, (uint)destination.Length / 4u); + + ref Vector512 backgroundBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(background)); + Vector512 sourceBase = Vector512.Create( + source.X, source.Y, source.Z, source.W, + source.X, source.Y, source.Z, source.W, + source.X, source.Y, source.Z, source.W, + source.X, source.Y, source.Z, source.W); + Vector512 opacity = Vector512.Create(amount); + + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) + { + destinationBase = PorterDuffFunctions.AddSrc(backgroundBase, sourceBase, opacity); + destinationBase = ref Unsafe.Add(ref destinationBase, 1); + backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); + } + + int remainder = Numerics.Modulo4(destination.Length); + if (remainder != 0) + { + for (int i = destination.Length - remainder; i < destination.Length; i++) + { + destination[i] = PorterDuffFunctions.AddSrc(background[i], source, amount); + } + } + } + else if (Avx2.IsSupported && destination.Length >= 2) { // Divide by 2 as 4 elements per Vector4 and 8 per Vector256 ref Vector256 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); @@ -495,7 +848,51 @@ internal static class DefaultPixelBlenders /// protected override void BlendFunction(Span destination, ReadOnlySpan background, ReadOnlySpan source, ReadOnlySpan amount) { - if (Avx2.IsSupported && destination.Length >= 2) + if (Avx512F.IsSupported && destination.Length >= 4) + { + // Divide by 4 as 4 elements per Vector4 and 16 per Vector512 + ref Vector512 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); + ref Vector512 destinationLast = ref Unsafe.Add(ref destinationBase, (uint)destination.Length / 4u); + + ref Vector512 backgroundBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(background)); + ref Vector512 sourceBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(source)); + ref float amountBase = ref MemoryMarshal.GetReference(amount); + + Vector512 vOne = Vector512.Create(1F); + + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) + { + float amount0 = amountBase; + float amount1 = Unsafe.Add(ref amountBase, 1); + float amount2 = Unsafe.Add(ref amountBase, 2); + float amount3 = Unsafe.Add(ref amountBase, 3); + + // We need to create a Vector512 containing the current four amount values + // taking up each quarter of the Vector512 and then clamp them. + Vector512 opacity = Vector512.Create( + amount0, amount0, amount0, amount0, + amount1, amount1, amount1, amount1, + amount2, amount2, amount2, amount2, + amount3, amount3, amount3, amount3); + opacity = Vector512.Min(Vector512.Max(Vector512.Zero, opacity), vOne); + + destinationBase = PorterDuffFunctions.AddSrc(backgroundBase, sourceBase, opacity); + destinationBase = ref Unsafe.Add(ref destinationBase, 1); + backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); + sourceBase = ref Unsafe.Add(ref sourceBase, 1); + amountBase = ref Unsafe.Add(ref amountBase, 4); + } + + int remainder = Numerics.Modulo4(destination.Length); + if (remainder != 0) + { + for (int i = destination.Length - remainder; i < destination.Length; i++) + { + destination[i] = PorterDuffFunctions.AddSrc(background[i], source[i], Numerics.Clamp(amount[i], 0, 1F)); + } + } + } + else if (Avx2.IsSupported && destination.Length >= 2) { // Divide by 2 as 4 elements per Vector4 and 8 per Vector256 ref Vector256 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); @@ -542,7 +939,54 @@ internal static class DefaultPixelBlenders /// protected override void BlendFunction(Span destination, ReadOnlySpan background, Vector4 source, ReadOnlySpan amount) { - if (Avx2.IsSupported && destination.Length >= 2) + if (Avx512F.IsSupported && destination.Length >= 4) + { + // Divide by 4 as 4 elements per Vector4 and 16 per Vector512 + ref Vector512 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); + ref Vector512 destinationLast = ref Unsafe.Add(ref destinationBase, (uint)destination.Length / 4u); + + ref Vector512 backgroundBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(background)); + ref float amountBase = ref MemoryMarshal.GetReference(amount); + + Vector512 sourceBase = Vector512.Create( + source.X, source.Y, source.Z, source.W, + source.X, source.Y, source.Z, source.W, + source.X, source.Y, source.Z, source.W, + source.X, source.Y, source.Z, source.W); + Vector512 vOne = Vector512.Create(1F); + + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) + { + float amount0 = amountBase; + float amount1 = Unsafe.Add(ref amountBase, 1); + float amount2 = Unsafe.Add(ref amountBase, 2); + float amount3 = Unsafe.Add(ref amountBase, 3); + + // We need to create a Vector512 containing the current four amount values + // taking up each quarter of the Vector512 and then clamp them. + Vector512 opacity = Vector512.Create( + amount0, amount0, amount0, amount0, + amount1, amount1, amount1, amount1, + amount2, amount2, amount2, amount2, + amount3, amount3, amount3, amount3); + opacity = Vector512.Min(Vector512.Max(Vector512.Zero, opacity), vOne); + + destinationBase = PorterDuffFunctions.AddSrc(backgroundBase, sourceBase, opacity); + destinationBase = ref Unsafe.Add(ref destinationBase, 1); + backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); + amountBase = ref Unsafe.Add(ref amountBase, 4); + } + + int remainder = Numerics.Modulo4(destination.Length); + if (remainder != 0) + { + for (int i = destination.Length - remainder; i < destination.Length; i++) + { + destination[i] = PorterDuffFunctions.AddSrc(background[i], source, Numerics.Clamp(amount[i], 0, 1F)); + } + } + } + else if (Avx2.IsSupported && destination.Length >= 2) { // Divide by 2 as 4 elements per Vector4 and 8 per Vector256 ref Vector256 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); @@ -607,7 +1051,34 @@ internal static class DefaultPixelBlenders { amount = Numerics.Clamp(amount, 0, 1); - if (Avx2.IsSupported && destination.Length >= 2) + if (Avx512F.IsSupported && destination.Length >= 4) + { + // Divide by 4 as 4 elements per Vector4 and 16 per Vector512 + ref Vector512 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); + ref Vector512 destinationLast = ref Unsafe.Add(ref destinationBase, (uint)destination.Length / 4u); + + ref Vector512 backgroundBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(background)); + ref Vector512 sourceBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(source)); + Vector512 opacity = Vector512.Create(amount); + + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) + { + destinationBase = PorterDuffFunctions.SubtractSrc(backgroundBase, sourceBase, opacity); + destinationBase = ref Unsafe.Add(ref destinationBase, 1); + backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); + sourceBase = ref Unsafe.Add(ref sourceBase, 1); + } + + int remainder = Numerics.Modulo4(destination.Length); + if (remainder != 0) + { + for (int i = destination.Length - remainder; i < destination.Length; i++) + { + destination[i] = PorterDuffFunctions.SubtractSrc(background[i], source[i], amount); + } + } + } + else if (Avx2.IsSupported && destination.Length >= 2) { // Divide by 2 as 4 elements per Vector4 and 8 per Vector256 ref Vector256 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); @@ -646,7 +1117,37 @@ internal static class DefaultPixelBlenders { amount = Numerics.Clamp(amount, 0, 1); - if (Avx2.IsSupported && destination.Length >= 2) + if (Avx512F.IsSupported && destination.Length >= 4) + { + // Divide by 4 as 4 elements per Vector4 and 16 per Vector512 + ref Vector512 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); + ref Vector512 destinationLast = ref Unsafe.Add(ref destinationBase, (uint)destination.Length / 4u); + + ref Vector512 backgroundBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(background)); + Vector512 sourceBase = Vector512.Create( + source.X, source.Y, source.Z, source.W, + source.X, source.Y, source.Z, source.W, + source.X, source.Y, source.Z, source.W, + source.X, source.Y, source.Z, source.W); + Vector512 opacity = Vector512.Create(amount); + + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) + { + destinationBase = PorterDuffFunctions.SubtractSrc(backgroundBase, sourceBase, opacity); + destinationBase = ref Unsafe.Add(ref destinationBase, 1); + backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); + } + + int remainder = Numerics.Modulo4(destination.Length); + if (remainder != 0) + { + for (int i = destination.Length - remainder; i < destination.Length; i++) + { + destination[i] = PorterDuffFunctions.SubtractSrc(background[i], source, amount); + } + } + } + else if (Avx2.IsSupported && destination.Length >= 2) { // Divide by 2 as 4 elements per Vector4 and 8 per Vector256 ref Vector256 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); @@ -682,7 +1183,51 @@ internal static class DefaultPixelBlenders /// protected override void BlendFunction(Span destination, ReadOnlySpan background, ReadOnlySpan source, ReadOnlySpan amount) { - if (Avx2.IsSupported && destination.Length >= 2) + if (Avx512F.IsSupported && destination.Length >= 4) + { + // Divide by 4 as 4 elements per Vector4 and 16 per Vector512 + ref Vector512 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); + ref Vector512 destinationLast = ref Unsafe.Add(ref destinationBase, (uint)destination.Length / 4u); + + ref Vector512 backgroundBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(background)); + ref Vector512 sourceBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(source)); + ref float amountBase = ref MemoryMarshal.GetReference(amount); + + Vector512 vOne = Vector512.Create(1F); + + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) + { + float amount0 = amountBase; + float amount1 = Unsafe.Add(ref amountBase, 1); + float amount2 = Unsafe.Add(ref amountBase, 2); + float amount3 = Unsafe.Add(ref amountBase, 3); + + // We need to create a Vector512 containing the current four amount values + // taking up each quarter of the Vector512 and then clamp them. + Vector512 opacity = Vector512.Create( + amount0, amount0, amount0, amount0, + amount1, amount1, amount1, amount1, + amount2, amount2, amount2, amount2, + amount3, amount3, amount3, amount3); + opacity = Vector512.Min(Vector512.Max(Vector512.Zero, opacity), vOne); + + destinationBase = PorterDuffFunctions.SubtractSrc(backgroundBase, sourceBase, opacity); + destinationBase = ref Unsafe.Add(ref destinationBase, 1); + backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); + sourceBase = ref Unsafe.Add(ref sourceBase, 1); + amountBase = ref Unsafe.Add(ref amountBase, 4); + } + + int remainder = Numerics.Modulo4(destination.Length); + if (remainder != 0) + { + for (int i = destination.Length - remainder; i < destination.Length; i++) + { + destination[i] = PorterDuffFunctions.SubtractSrc(background[i], source[i], Numerics.Clamp(amount[i], 0, 1F)); + } + } + } + else if (Avx2.IsSupported && destination.Length >= 2) { // Divide by 2 as 4 elements per Vector4 and 8 per Vector256 ref Vector256 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); @@ -729,7 +1274,54 @@ internal static class DefaultPixelBlenders /// protected override void BlendFunction(Span destination, ReadOnlySpan background, Vector4 source, ReadOnlySpan amount) { - if (Avx2.IsSupported && destination.Length >= 2) + if (Avx512F.IsSupported && destination.Length >= 4) + { + // Divide by 4 as 4 elements per Vector4 and 16 per Vector512 + ref Vector512 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); + ref Vector512 destinationLast = ref Unsafe.Add(ref destinationBase, (uint)destination.Length / 4u); + + ref Vector512 backgroundBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(background)); + ref float amountBase = ref MemoryMarshal.GetReference(amount); + + Vector512 sourceBase = Vector512.Create( + source.X, source.Y, source.Z, source.W, + source.X, source.Y, source.Z, source.W, + source.X, source.Y, source.Z, source.W, + source.X, source.Y, source.Z, source.W); + Vector512 vOne = Vector512.Create(1F); + + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) + { + float amount0 = amountBase; + float amount1 = Unsafe.Add(ref amountBase, 1); + float amount2 = Unsafe.Add(ref amountBase, 2); + float amount3 = Unsafe.Add(ref amountBase, 3); + + // We need to create a Vector512 containing the current four amount values + // taking up each quarter of the Vector512 and then clamp them. + Vector512 opacity = Vector512.Create( + amount0, amount0, amount0, amount0, + amount1, amount1, amount1, amount1, + amount2, amount2, amount2, amount2, + amount3, amount3, amount3, amount3); + opacity = Vector512.Min(Vector512.Max(Vector512.Zero, opacity), vOne); + + destinationBase = PorterDuffFunctions.SubtractSrc(backgroundBase, sourceBase, opacity); + destinationBase = ref Unsafe.Add(ref destinationBase, 1); + backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); + amountBase = ref Unsafe.Add(ref amountBase, 4); + } + + int remainder = Numerics.Modulo4(destination.Length); + if (remainder != 0) + { + for (int i = destination.Length - remainder; i < destination.Length; i++) + { + destination[i] = PorterDuffFunctions.SubtractSrc(background[i], source, Numerics.Clamp(amount[i], 0, 1F)); + } + } + } + else if (Avx2.IsSupported && destination.Length >= 2) { // Divide by 2 as 4 elements per Vector4 and 8 per Vector256 ref Vector256 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); @@ -794,15 +1386,15 @@ internal static class DefaultPixelBlenders { amount = Numerics.Clamp(amount, 0, 1); - if (Avx2.IsSupported && destination.Length >= 2) + if (Avx512F.IsSupported && destination.Length >= 4) { - // Divide by 2 as 4 elements per Vector4 and 8 per Vector256 - ref Vector256 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); - ref Vector256 destinationLast = ref Unsafe.Add(ref destinationBase, (uint)destination.Length / 2u); + // Divide by 4 as 4 elements per Vector4 and 16 per Vector512 + ref Vector512 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); + ref Vector512 destinationLast = ref Unsafe.Add(ref destinationBase, (uint)destination.Length / 4u); - ref Vector256 backgroundBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(background)); - ref Vector256 sourceBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(source)); - Vector256 opacity = Vector256.Create(amount); + ref Vector512 backgroundBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(background)); + ref Vector512 sourceBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(source)); + Vector512 opacity = Vector512.Create(amount); while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) { @@ -812,9 +1404,36 @@ internal static class DefaultPixelBlenders sourceBase = ref Unsafe.Add(ref sourceBase, 1); } - if (Numerics.Modulo2(destination.Length) != 0) + int remainder = Numerics.Modulo4(destination.Length); + if (remainder != 0) { - // Vector4 fits neatly in pairs. Any overlap has to be equal to 1. + for (int i = destination.Length - remainder; i < destination.Length; i++) + { + destination[i] = PorterDuffFunctions.ScreenSrc(background[i], source[i], amount); + } + } + } + else if (Avx2.IsSupported && destination.Length >= 2) + { + // Divide by 2 as 4 elements per Vector4 and 8 per Vector256 + ref Vector256 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); + ref Vector256 destinationLast = ref Unsafe.Add(ref destinationBase, (uint)destination.Length / 2u); + + ref Vector256 backgroundBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(background)); + ref Vector256 sourceBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(source)); + Vector256 opacity = Vector256.Create(amount); + + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) + { + destinationBase = PorterDuffFunctions.ScreenSrc(backgroundBase, sourceBase, opacity); + destinationBase = ref Unsafe.Add(ref destinationBase, 1); + backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); + sourceBase = ref Unsafe.Add(ref sourceBase, 1); + } + + if (Numerics.Modulo2(destination.Length) != 0) + { + // Vector4 fits neatly in pairs. Any overlap has to be equal to 1. int i = destination.Length - 1; destination[i] = PorterDuffFunctions.ScreenSrc(background[i], source[i], amount); } @@ -833,7 +1452,37 @@ internal static class DefaultPixelBlenders { amount = Numerics.Clamp(amount, 0, 1); - if (Avx2.IsSupported && destination.Length >= 2) + if (Avx512F.IsSupported && destination.Length >= 4) + { + // Divide by 4 as 4 elements per Vector4 and 16 per Vector512 + ref Vector512 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); + ref Vector512 destinationLast = ref Unsafe.Add(ref destinationBase, (uint)destination.Length / 4u); + + ref Vector512 backgroundBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(background)); + Vector512 sourceBase = Vector512.Create( + source.X, source.Y, source.Z, source.W, + source.X, source.Y, source.Z, source.W, + source.X, source.Y, source.Z, source.W, + source.X, source.Y, source.Z, source.W); + Vector512 opacity = Vector512.Create(amount); + + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) + { + destinationBase = PorterDuffFunctions.ScreenSrc(backgroundBase, sourceBase, opacity); + destinationBase = ref Unsafe.Add(ref destinationBase, 1); + backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); + } + + int remainder = Numerics.Modulo4(destination.Length); + if (remainder != 0) + { + for (int i = destination.Length - remainder; i < destination.Length; i++) + { + destination[i] = PorterDuffFunctions.ScreenSrc(background[i], source, amount); + } + } + } + else if (Avx2.IsSupported && destination.Length >= 2) { // Divide by 2 as 4 elements per Vector4 and 8 per Vector256 ref Vector256 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); @@ -869,7 +1518,51 @@ internal static class DefaultPixelBlenders /// protected override void BlendFunction(Span destination, ReadOnlySpan background, ReadOnlySpan source, ReadOnlySpan amount) { - if (Avx2.IsSupported && destination.Length >= 2) + if (Avx512F.IsSupported && destination.Length >= 4) + { + // Divide by 4 as 4 elements per Vector4 and 16 per Vector512 + ref Vector512 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); + ref Vector512 destinationLast = ref Unsafe.Add(ref destinationBase, (uint)destination.Length / 4u); + + ref Vector512 backgroundBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(background)); + ref Vector512 sourceBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(source)); + ref float amountBase = ref MemoryMarshal.GetReference(amount); + + Vector512 vOne = Vector512.Create(1F); + + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) + { + float amount0 = amountBase; + float amount1 = Unsafe.Add(ref amountBase, 1); + float amount2 = Unsafe.Add(ref amountBase, 2); + float amount3 = Unsafe.Add(ref amountBase, 3); + + // We need to create a Vector512 containing the current four amount values + // taking up each quarter of the Vector512 and then clamp them. + Vector512 opacity = Vector512.Create( + amount0, amount0, amount0, amount0, + amount1, amount1, amount1, amount1, + amount2, amount2, amount2, amount2, + amount3, amount3, amount3, amount3); + opacity = Vector512.Min(Vector512.Max(Vector512.Zero, opacity), vOne); + + destinationBase = PorterDuffFunctions.ScreenSrc(backgroundBase, sourceBase, opacity); + destinationBase = ref Unsafe.Add(ref destinationBase, 1); + backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); + sourceBase = ref Unsafe.Add(ref sourceBase, 1); + amountBase = ref Unsafe.Add(ref amountBase, 4); + } + + int remainder = Numerics.Modulo4(destination.Length); + if (remainder != 0) + { + for (int i = destination.Length - remainder; i < destination.Length; i++) + { + destination[i] = PorterDuffFunctions.ScreenSrc(background[i], source[i], Numerics.Clamp(amount[i], 0, 1F)); + } + } + } + else if (Avx2.IsSupported && destination.Length >= 2) { // Divide by 2 as 4 elements per Vector4 and 8 per Vector256 ref Vector256 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); @@ -916,7 +1609,54 @@ internal static class DefaultPixelBlenders /// protected override void BlendFunction(Span destination, ReadOnlySpan background, Vector4 source, ReadOnlySpan amount) { - if (Avx2.IsSupported && destination.Length >= 2) + if (Avx512F.IsSupported && destination.Length >= 4) + { + // Divide by 4 as 4 elements per Vector4 and 16 per Vector512 + ref Vector512 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); + ref Vector512 destinationLast = ref Unsafe.Add(ref destinationBase, (uint)destination.Length / 4u); + + ref Vector512 backgroundBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(background)); + ref float amountBase = ref MemoryMarshal.GetReference(amount); + + Vector512 sourceBase = Vector512.Create( + source.X, source.Y, source.Z, source.W, + source.X, source.Y, source.Z, source.W, + source.X, source.Y, source.Z, source.W, + source.X, source.Y, source.Z, source.W); + Vector512 vOne = Vector512.Create(1F); + + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) + { + float amount0 = amountBase; + float amount1 = Unsafe.Add(ref amountBase, 1); + float amount2 = Unsafe.Add(ref amountBase, 2); + float amount3 = Unsafe.Add(ref amountBase, 3); + + // We need to create a Vector512 containing the current four amount values + // taking up each quarter of the Vector512 and then clamp them. + Vector512 opacity = Vector512.Create( + amount0, amount0, amount0, amount0, + amount1, amount1, amount1, amount1, + amount2, amount2, amount2, amount2, + amount3, amount3, amount3, amount3); + opacity = Vector512.Min(Vector512.Max(Vector512.Zero, opacity), vOne); + + destinationBase = PorterDuffFunctions.ScreenSrc(backgroundBase, sourceBase, opacity); + destinationBase = ref Unsafe.Add(ref destinationBase, 1); + backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); + amountBase = ref Unsafe.Add(ref amountBase, 4); + } + + int remainder = Numerics.Modulo4(destination.Length); + if (remainder != 0) + { + for (int i = destination.Length - remainder; i < destination.Length; i++) + { + destination[i] = PorterDuffFunctions.ScreenSrc(background[i], source, Numerics.Clamp(amount[i], 0, 1F)); + } + } + } + else if (Avx2.IsSupported && destination.Length >= 2) { // Divide by 2 as 4 elements per Vector4 and 8 per Vector256 ref Vector256 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); @@ -981,7 +1721,34 @@ internal static class DefaultPixelBlenders { amount = Numerics.Clamp(amount, 0, 1); - if (Avx2.IsSupported && destination.Length >= 2) + if (Avx512F.IsSupported && destination.Length >= 4) + { + // Divide by 4 as 4 elements per Vector4 and 16 per Vector512 + ref Vector512 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); + ref Vector512 destinationLast = ref Unsafe.Add(ref destinationBase, (uint)destination.Length / 4u); + + ref Vector512 backgroundBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(background)); + ref Vector512 sourceBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(source)); + Vector512 opacity = Vector512.Create(amount); + + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) + { + destinationBase = PorterDuffFunctions.DarkenSrc(backgroundBase, sourceBase, opacity); + destinationBase = ref Unsafe.Add(ref destinationBase, 1); + backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); + sourceBase = ref Unsafe.Add(ref sourceBase, 1); + } + + int remainder = Numerics.Modulo4(destination.Length); + if (remainder != 0) + { + for (int i = destination.Length - remainder; i < destination.Length; i++) + { + destination[i] = PorterDuffFunctions.DarkenSrc(background[i], source[i], amount); + } + } + } + else if (Avx2.IsSupported && destination.Length >= 2) { // Divide by 2 as 4 elements per Vector4 and 8 per Vector256 ref Vector256 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); @@ -1020,7 +1787,37 @@ internal static class DefaultPixelBlenders { amount = Numerics.Clamp(amount, 0, 1); - if (Avx2.IsSupported && destination.Length >= 2) + if (Avx512F.IsSupported && destination.Length >= 4) + { + // Divide by 4 as 4 elements per Vector4 and 16 per Vector512 + ref Vector512 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); + ref Vector512 destinationLast = ref Unsafe.Add(ref destinationBase, (uint)destination.Length / 4u); + + ref Vector512 backgroundBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(background)); + Vector512 sourceBase = Vector512.Create( + source.X, source.Y, source.Z, source.W, + source.X, source.Y, source.Z, source.W, + source.X, source.Y, source.Z, source.W, + source.X, source.Y, source.Z, source.W); + Vector512 opacity = Vector512.Create(amount); + + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) + { + destinationBase = PorterDuffFunctions.DarkenSrc(backgroundBase, sourceBase, opacity); + destinationBase = ref Unsafe.Add(ref destinationBase, 1); + backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); + } + + int remainder = Numerics.Modulo4(destination.Length); + if (remainder != 0) + { + for (int i = destination.Length - remainder; i < destination.Length; i++) + { + destination[i] = PorterDuffFunctions.DarkenSrc(background[i], source, amount); + } + } + } + else if (Avx2.IsSupported && destination.Length >= 2) { // Divide by 2 as 4 elements per Vector4 and 8 per Vector256 ref Vector256 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); @@ -1056,7 +1853,51 @@ internal static class DefaultPixelBlenders /// protected override void BlendFunction(Span destination, ReadOnlySpan background, ReadOnlySpan source, ReadOnlySpan amount) { - if (Avx2.IsSupported && destination.Length >= 2) + if (Avx512F.IsSupported && destination.Length >= 4) + { + // Divide by 4 as 4 elements per Vector4 and 16 per Vector512 + ref Vector512 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); + ref Vector512 destinationLast = ref Unsafe.Add(ref destinationBase, (uint)destination.Length / 4u); + + ref Vector512 backgroundBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(background)); + ref Vector512 sourceBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(source)); + ref float amountBase = ref MemoryMarshal.GetReference(amount); + + Vector512 vOne = Vector512.Create(1F); + + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) + { + float amount0 = amountBase; + float amount1 = Unsafe.Add(ref amountBase, 1); + float amount2 = Unsafe.Add(ref amountBase, 2); + float amount3 = Unsafe.Add(ref amountBase, 3); + + // We need to create a Vector512 containing the current four amount values + // taking up each quarter of the Vector512 and then clamp them. + Vector512 opacity = Vector512.Create( + amount0, amount0, amount0, amount0, + amount1, amount1, amount1, amount1, + amount2, amount2, amount2, amount2, + amount3, amount3, amount3, amount3); + opacity = Vector512.Min(Vector512.Max(Vector512.Zero, opacity), vOne); + + destinationBase = PorterDuffFunctions.DarkenSrc(backgroundBase, sourceBase, opacity); + destinationBase = ref Unsafe.Add(ref destinationBase, 1); + backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); + sourceBase = ref Unsafe.Add(ref sourceBase, 1); + amountBase = ref Unsafe.Add(ref amountBase, 4); + } + + int remainder = Numerics.Modulo4(destination.Length); + if (remainder != 0) + { + for (int i = destination.Length - remainder; i < destination.Length; i++) + { + destination[i] = PorterDuffFunctions.DarkenSrc(background[i], source[i], Numerics.Clamp(amount[i], 0, 1F)); + } + } + } + else if (Avx2.IsSupported && destination.Length >= 2) { // Divide by 2 as 4 elements per Vector4 and 8 per Vector256 ref Vector256 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); @@ -1103,7 +1944,54 @@ internal static class DefaultPixelBlenders /// protected override void BlendFunction(Span destination, ReadOnlySpan background, Vector4 source, ReadOnlySpan amount) { - if (Avx2.IsSupported && destination.Length >= 2) + if (Avx512F.IsSupported && destination.Length >= 4) + { + // Divide by 4 as 4 elements per Vector4 and 16 per Vector512 + ref Vector512 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); + ref Vector512 destinationLast = ref Unsafe.Add(ref destinationBase, (uint)destination.Length / 4u); + + ref Vector512 backgroundBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(background)); + ref float amountBase = ref MemoryMarshal.GetReference(amount); + + Vector512 sourceBase = Vector512.Create( + source.X, source.Y, source.Z, source.W, + source.X, source.Y, source.Z, source.W, + source.X, source.Y, source.Z, source.W, + source.X, source.Y, source.Z, source.W); + Vector512 vOne = Vector512.Create(1F); + + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) + { + float amount0 = amountBase; + float amount1 = Unsafe.Add(ref amountBase, 1); + float amount2 = Unsafe.Add(ref amountBase, 2); + float amount3 = Unsafe.Add(ref amountBase, 3); + + // We need to create a Vector512 containing the current four amount values + // taking up each quarter of the Vector512 and then clamp them. + Vector512 opacity = Vector512.Create( + amount0, amount0, amount0, amount0, + amount1, amount1, amount1, amount1, + amount2, amount2, amount2, amount2, + amount3, amount3, amount3, amount3); + opacity = Vector512.Min(Vector512.Max(Vector512.Zero, opacity), vOne); + + destinationBase = PorterDuffFunctions.DarkenSrc(backgroundBase, sourceBase, opacity); + destinationBase = ref Unsafe.Add(ref destinationBase, 1); + backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); + amountBase = ref Unsafe.Add(ref amountBase, 4); + } + + int remainder = Numerics.Modulo4(destination.Length); + if (remainder != 0) + { + for (int i = destination.Length - remainder; i < destination.Length; i++) + { + destination[i] = PorterDuffFunctions.DarkenSrc(background[i], source, Numerics.Clamp(amount[i], 0, 1F)); + } + } + } + else if (Avx2.IsSupported && destination.Length >= 2) { // Divide by 2 as 4 elements per Vector4 and 8 per Vector256 ref Vector256 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); @@ -1168,7 +2056,34 @@ internal static class DefaultPixelBlenders { amount = Numerics.Clamp(amount, 0, 1); - if (Avx2.IsSupported && destination.Length >= 2) + if (Avx512F.IsSupported && destination.Length >= 4) + { + // Divide by 4 as 4 elements per Vector4 and 16 per Vector512 + ref Vector512 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); + ref Vector512 destinationLast = ref Unsafe.Add(ref destinationBase, (uint)destination.Length / 4u); + + ref Vector512 backgroundBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(background)); + ref Vector512 sourceBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(source)); + Vector512 opacity = Vector512.Create(amount); + + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) + { + destinationBase = PorterDuffFunctions.LightenSrc(backgroundBase, sourceBase, opacity); + destinationBase = ref Unsafe.Add(ref destinationBase, 1); + backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); + sourceBase = ref Unsafe.Add(ref sourceBase, 1); + } + + int remainder = Numerics.Modulo4(destination.Length); + if (remainder != 0) + { + for (int i = destination.Length - remainder; i < destination.Length; i++) + { + destination[i] = PorterDuffFunctions.LightenSrc(background[i], source[i], amount); + } + } + } + else if (Avx2.IsSupported && destination.Length >= 2) { // Divide by 2 as 4 elements per Vector4 and 8 per Vector256 ref Vector256 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); @@ -1207,7 +2122,37 @@ internal static class DefaultPixelBlenders { amount = Numerics.Clamp(amount, 0, 1); - if (Avx2.IsSupported && destination.Length >= 2) + if (Avx512F.IsSupported && destination.Length >= 4) + { + // Divide by 4 as 4 elements per Vector4 and 16 per Vector512 + ref Vector512 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); + ref Vector512 destinationLast = ref Unsafe.Add(ref destinationBase, (uint)destination.Length / 4u); + + ref Vector512 backgroundBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(background)); + Vector512 sourceBase = Vector512.Create( + source.X, source.Y, source.Z, source.W, + source.X, source.Y, source.Z, source.W, + source.X, source.Y, source.Z, source.W, + source.X, source.Y, source.Z, source.W); + Vector512 opacity = Vector512.Create(amount); + + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) + { + destinationBase = PorterDuffFunctions.LightenSrc(backgroundBase, sourceBase, opacity); + destinationBase = ref Unsafe.Add(ref destinationBase, 1); + backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); + } + + int remainder = Numerics.Modulo4(destination.Length); + if (remainder != 0) + { + for (int i = destination.Length - remainder; i < destination.Length; i++) + { + destination[i] = PorterDuffFunctions.LightenSrc(background[i], source, amount); + } + } + } + else if (Avx2.IsSupported && destination.Length >= 2) { // Divide by 2 as 4 elements per Vector4 and 8 per Vector256 ref Vector256 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); @@ -1243,7 +2188,51 @@ internal static class DefaultPixelBlenders /// protected override void BlendFunction(Span destination, ReadOnlySpan background, ReadOnlySpan source, ReadOnlySpan amount) { - if (Avx2.IsSupported && destination.Length >= 2) + if (Avx512F.IsSupported && destination.Length >= 4) + { + // Divide by 4 as 4 elements per Vector4 and 16 per Vector512 + ref Vector512 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); + ref Vector512 destinationLast = ref Unsafe.Add(ref destinationBase, (uint)destination.Length / 4u); + + ref Vector512 backgroundBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(background)); + ref Vector512 sourceBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(source)); + ref float amountBase = ref MemoryMarshal.GetReference(amount); + + Vector512 vOne = Vector512.Create(1F); + + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) + { + float amount0 = amountBase; + float amount1 = Unsafe.Add(ref amountBase, 1); + float amount2 = Unsafe.Add(ref amountBase, 2); + float amount3 = Unsafe.Add(ref amountBase, 3); + + // We need to create a Vector512 containing the current four amount values + // taking up each quarter of the Vector512 and then clamp them. + Vector512 opacity = Vector512.Create( + amount0, amount0, amount0, amount0, + amount1, amount1, amount1, amount1, + amount2, amount2, amount2, amount2, + amount3, amount3, amount3, amount3); + opacity = Vector512.Min(Vector512.Max(Vector512.Zero, opacity), vOne); + + destinationBase = PorterDuffFunctions.LightenSrc(backgroundBase, sourceBase, opacity); + destinationBase = ref Unsafe.Add(ref destinationBase, 1); + backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); + sourceBase = ref Unsafe.Add(ref sourceBase, 1); + amountBase = ref Unsafe.Add(ref amountBase, 4); + } + + int remainder = Numerics.Modulo4(destination.Length); + if (remainder != 0) + { + for (int i = destination.Length - remainder; i < destination.Length; i++) + { + destination[i] = PorterDuffFunctions.LightenSrc(background[i], source[i], Numerics.Clamp(amount[i], 0, 1F)); + } + } + } + else if (Avx2.IsSupported && destination.Length >= 2) { // Divide by 2 as 4 elements per Vector4 and 8 per Vector256 ref Vector256 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); @@ -1290,7 +2279,54 @@ internal static class DefaultPixelBlenders /// protected override void BlendFunction(Span destination, ReadOnlySpan background, Vector4 source, ReadOnlySpan amount) { - if (Avx2.IsSupported && destination.Length >= 2) + if (Avx512F.IsSupported && destination.Length >= 4) + { + // Divide by 4 as 4 elements per Vector4 and 16 per Vector512 + ref Vector512 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); + ref Vector512 destinationLast = ref Unsafe.Add(ref destinationBase, (uint)destination.Length / 4u); + + ref Vector512 backgroundBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(background)); + ref float amountBase = ref MemoryMarshal.GetReference(amount); + + Vector512 sourceBase = Vector512.Create( + source.X, source.Y, source.Z, source.W, + source.X, source.Y, source.Z, source.W, + source.X, source.Y, source.Z, source.W, + source.X, source.Y, source.Z, source.W); + Vector512 vOne = Vector512.Create(1F); + + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) + { + float amount0 = amountBase; + float amount1 = Unsafe.Add(ref amountBase, 1); + float amount2 = Unsafe.Add(ref amountBase, 2); + float amount3 = Unsafe.Add(ref amountBase, 3); + + // We need to create a Vector512 containing the current four amount values + // taking up each quarter of the Vector512 and then clamp them. + Vector512 opacity = Vector512.Create( + amount0, amount0, amount0, amount0, + amount1, amount1, amount1, amount1, + amount2, amount2, amount2, amount2, + amount3, amount3, amount3, amount3); + opacity = Vector512.Min(Vector512.Max(Vector512.Zero, opacity), vOne); + + destinationBase = PorterDuffFunctions.LightenSrc(backgroundBase, sourceBase, opacity); + destinationBase = ref Unsafe.Add(ref destinationBase, 1); + backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); + amountBase = ref Unsafe.Add(ref amountBase, 4); + } + + int remainder = Numerics.Modulo4(destination.Length); + if (remainder != 0) + { + for (int i = destination.Length - remainder; i < destination.Length; i++) + { + destination[i] = PorterDuffFunctions.LightenSrc(background[i], source, Numerics.Clamp(amount[i], 0, 1F)); + } + } + } + else if (Avx2.IsSupported && destination.Length >= 2) { // Divide by 2 as 4 elements per Vector4 and 8 per Vector256 ref Vector256 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); @@ -1355,7 +2391,34 @@ internal static class DefaultPixelBlenders { amount = Numerics.Clamp(amount, 0, 1); - if (Avx2.IsSupported && destination.Length >= 2) + if (Avx512F.IsSupported && destination.Length >= 4) + { + // Divide by 4 as 4 elements per Vector4 and 16 per Vector512 + ref Vector512 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); + ref Vector512 destinationLast = ref Unsafe.Add(ref destinationBase, (uint)destination.Length / 4u); + + ref Vector512 backgroundBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(background)); + ref Vector512 sourceBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(source)); + Vector512 opacity = Vector512.Create(amount); + + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) + { + destinationBase = PorterDuffFunctions.OverlaySrc(backgroundBase, sourceBase, opacity); + destinationBase = ref Unsafe.Add(ref destinationBase, 1); + backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); + sourceBase = ref Unsafe.Add(ref sourceBase, 1); + } + + int remainder = Numerics.Modulo4(destination.Length); + if (remainder != 0) + { + for (int i = destination.Length - remainder; i < destination.Length; i++) + { + destination[i] = PorterDuffFunctions.OverlaySrc(background[i], source[i], amount); + } + } + } + else if (Avx2.IsSupported && destination.Length >= 2) { // Divide by 2 as 4 elements per Vector4 and 8 per Vector256 ref Vector256 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); @@ -1394,7 +2457,37 @@ internal static class DefaultPixelBlenders { amount = Numerics.Clamp(amount, 0, 1); - if (Avx2.IsSupported && destination.Length >= 2) + if (Avx512F.IsSupported && destination.Length >= 4) + { + // Divide by 4 as 4 elements per Vector4 and 16 per Vector512 + ref Vector512 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); + ref Vector512 destinationLast = ref Unsafe.Add(ref destinationBase, (uint)destination.Length / 4u); + + ref Vector512 backgroundBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(background)); + Vector512 sourceBase = Vector512.Create( + source.X, source.Y, source.Z, source.W, + source.X, source.Y, source.Z, source.W, + source.X, source.Y, source.Z, source.W, + source.X, source.Y, source.Z, source.W); + Vector512 opacity = Vector512.Create(amount); + + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) + { + destinationBase = PorterDuffFunctions.OverlaySrc(backgroundBase, sourceBase, opacity); + destinationBase = ref Unsafe.Add(ref destinationBase, 1); + backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); + } + + int remainder = Numerics.Modulo4(destination.Length); + if (remainder != 0) + { + for (int i = destination.Length - remainder; i < destination.Length; i++) + { + destination[i] = PorterDuffFunctions.OverlaySrc(background[i], source, amount); + } + } + } + else if (Avx2.IsSupported && destination.Length >= 2) { // Divide by 2 as 4 elements per Vector4 and 8 per Vector256 ref Vector256 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); @@ -1430,7 +2523,51 @@ internal static class DefaultPixelBlenders /// protected override void BlendFunction(Span destination, ReadOnlySpan background, ReadOnlySpan source, ReadOnlySpan amount) { - if (Avx2.IsSupported && destination.Length >= 2) + if (Avx512F.IsSupported && destination.Length >= 4) + { + // Divide by 4 as 4 elements per Vector4 and 16 per Vector512 + ref Vector512 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); + ref Vector512 destinationLast = ref Unsafe.Add(ref destinationBase, (uint)destination.Length / 4u); + + ref Vector512 backgroundBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(background)); + ref Vector512 sourceBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(source)); + ref float amountBase = ref MemoryMarshal.GetReference(amount); + + Vector512 vOne = Vector512.Create(1F); + + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) + { + float amount0 = amountBase; + float amount1 = Unsafe.Add(ref amountBase, 1); + float amount2 = Unsafe.Add(ref amountBase, 2); + float amount3 = Unsafe.Add(ref amountBase, 3); + + // We need to create a Vector512 containing the current four amount values + // taking up each quarter of the Vector512 and then clamp them. + Vector512 opacity = Vector512.Create( + amount0, amount0, amount0, amount0, + amount1, amount1, amount1, amount1, + amount2, amount2, amount2, amount2, + amount3, amount3, amount3, amount3); + opacity = Vector512.Min(Vector512.Max(Vector512.Zero, opacity), vOne); + + destinationBase = PorterDuffFunctions.OverlaySrc(backgroundBase, sourceBase, opacity); + destinationBase = ref Unsafe.Add(ref destinationBase, 1); + backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); + sourceBase = ref Unsafe.Add(ref sourceBase, 1); + amountBase = ref Unsafe.Add(ref amountBase, 4); + } + + int remainder = Numerics.Modulo4(destination.Length); + if (remainder != 0) + { + for (int i = destination.Length - remainder; i < destination.Length; i++) + { + destination[i] = PorterDuffFunctions.OverlaySrc(background[i], source[i], Numerics.Clamp(amount[i], 0, 1F)); + } + } + } + else if (Avx2.IsSupported && destination.Length >= 2) { // Divide by 2 as 4 elements per Vector4 and 8 per Vector256 ref Vector256 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); @@ -1477,7 +2614,54 @@ internal static class DefaultPixelBlenders /// protected override void BlendFunction(Span destination, ReadOnlySpan background, Vector4 source, ReadOnlySpan amount) { - if (Avx2.IsSupported && destination.Length >= 2) + if (Avx512F.IsSupported && destination.Length >= 4) + { + // Divide by 4 as 4 elements per Vector4 and 16 per Vector512 + ref Vector512 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); + ref Vector512 destinationLast = ref Unsafe.Add(ref destinationBase, (uint)destination.Length / 4u); + + ref Vector512 backgroundBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(background)); + ref float amountBase = ref MemoryMarshal.GetReference(amount); + + Vector512 sourceBase = Vector512.Create( + source.X, source.Y, source.Z, source.W, + source.X, source.Y, source.Z, source.W, + source.X, source.Y, source.Z, source.W, + source.X, source.Y, source.Z, source.W); + Vector512 vOne = Vector512.Create(1F); + + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) + { + float amount0 = amountBase; + float amount1 = Unsafe.Add(ref amountBase, 1); + float amount2 = Unsafe.Add(ref amountBase, 2); + float amount3 = Unsafe.Add(ref amountBase, 3); + + // We need to create a Vector512 containing the current four amount values + // taking up each quarter of the Vector512 and then clamp them. + Vector512 opacity = Vector512.Create( + amount0, amount0, amount0, amount0, + amount1, amount1, amount1, amount1, + amount2, amount2, amount2, amount2, + amount3, amount3, amount3, amount3); + opacity = Vector512.Min(Vector512.Max(Vector512.Zero, opacity), vOne); + + destinationBase = PorterDuffFunctions.OverlaySrc(backgroundBase, sourceBase, opacity); + destinationBase = ref Unsafe.Add(ref destinationBase, 1); + backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); + amountBase = ref Unsafe.Add(ref amountBase, 4); + } + + int remainder = Numerics.Modulo4(destination.Length); + if (remainder != 0) + { + for (int i = destination.Length - remainder; i < destination.Length; i++) + { + destination[i] = PorterDuffFunctions.OverlaySrc(background[i], source, Numerics.Clamp(amount[i], 0, 1F)); + } + } + } + else if (Avx2.IsSupported && destination.Length >= 2) { // Divide by 2 as 4 elements per Vector4 and 8 per Vector256 ref Vector256 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); @@ -1542,7 +2726,34 @@ internal static class DefaultPixelBlenders { amount = Numerics.Clamp(amount, 0, 1); - if (Avx2.IsSupported && destination.Length >= 2) + if (Avx512F.IsSupported && destination.Length >= 4) + { + // Divide by 4 as 4 elements per Vector4 and 16 per Vector512 + ref Vector512 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); + ref Vector512 destinationLast = ref Unsafe.Add(ref destinationBase, (uint)destination.Length / 4u); + + ref Vector512 backgroundBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(background)); + ref Vector512 sourceBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(source)); + Vector512 opacity = Vector512.Create(amount); + + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) + { + destinationBase = PorterDuffFunctions.HardLightSrc(backgroundBase, sourceBase, opacity); + destinationBase = ref Unsafe.Add(ref destinationBase, 1); + backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); + sourceBase = ref Unsafe.Add(ref sourceBase, 1); + } + + int remainder = Numerics.Modulo4(destination.Length); + if (remainder != 0) + { + for (int i = destination.Length - remainder; i < destination.Length; i++) + { + destination[i] = PorterDuffFunctions.HardLightSrc(background[i], source[i], amount); + } + } + } + else if (Avx2.IsSupported && destination.Length >= 2) { // Divide by 2 as 4 elements per Vector4 and 8 per Vector256 ref Vector256 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); @@ -1581,7 +2792,37 @@ internal static class DefaultPixelBlenders { amount = Numerics.Clamp(amount, 0, 1); - if (Avx2.IsSupported && destination.Length >= 2) + if (Avx512F.IsSupported && destination.Length >= 4) + { + // Divide by 4 as 4 elements per Vector4 and 16 per Vector512 + ref Vector512 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); + ref Vector512 destinationLast = ref Unsafe.Add(ref destinationBase, (uint)destination.Length / 4u); + + ref Vector512 backgroundBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(background)); + Vector512 sourceBase = Vector512.Create( + source.X, source.Y, source.Z, source.W, + source.X, source.Y, source.Z, source.W, + source.X, source.Y, source.Z, source.W, + source.X, source.Y, source.Z, source.W); + Vector512 opacity = Vector512.Create(amount); + + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) + { + destinationBase = PorterDuffFunctions.HardLightSrc(backgroundBase, sourceBase, opacity); + destinationBase = ref Unsafe.Add(ref destinationBase, 1); + backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); + } + + int remainder = Numerics.Modulo4(destination.Length); + if (remainder != 0) + { + for (int i = destination.Length - remainder; i < destination.Length; i++) + { + destination[i] = PorterDuffFunctions.HardLightSrc(background[i], source, amount); + } + } + } + else if (Avx2.IsSupported && destination.Length >= 2) { // Divide by 2 as 4 elements per Vector4 and 8 per Vector256 ref Vector256 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); @@ -1617,7 +2858,51 @@ internal static class DefaultPixelBlenders /// protected override void BlendFunction(Span destination, ReadOnlySpan background, ReadOnlySpan source, ReadOnlySpan amount) { - if (Avx2.IsSupported && destination.Length >= 2) + if (Avx512F.IsSupported && destination.Length >= 4) + { + // Divide by 4 as 4 elements per Vector4 and 16 per Vector512 + ref Vector512 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); + ref Vector512 destinationLast = ref Unsafe.Add(ref destinationBase, (uint)destination.Length / 4u); + + ref Vector512 backgroundBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(background)); + ref Vector512 sourceBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(source)); + ref float amountBase = ref MemoryMarshal.GetReference(amount); + + Vector512 vOne = Vector512.Create(1F); + + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) + { + float amount0 = amountBase; + float amount1 = Unsafe.Add(ref amountBase, 1); + float amount2 = Unsafe.Add(ref amountBase, 2); + float amount3 = Unsafe.Add(ref amountBase, 3); + + // We need to create a Vector512 containing the current four amount values + // taking up each quarter of the Vector512 and then clamp them. + Vector512 opacity = Vector512.Create( + amount0, amount0, amount0, amount0, + amount1, amount1, amount1, amount1, + amount2, amount2, amount2, amount2, + amount3, amount3, amount3, amount3); + opacity = Vector512.Min(Vector512.Max(Vector512.Zero, opacity), vOne); + + destinationBase = PorterDuffFunctions.HardLightSrc(backgroundBase, sourceBase, opacity); + destinationBase = ref Unsafe.Add(ref destinationBase, 1); + backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); + sourceBase = ref Unsafe.Add(ref sourceBase, 1); + amountBase = ref Unsafe.Add(ref amountBase, 4); + } + + int remainder = Numerics.Modulo4(destination.Length); + if (remainder != 0) + { + for (int i = destination.Length - remainder; i < destination.Length; i++) + { + destination[i] = PorterDuffFunctions.HardLightSrc(background[i], source[i], Numerics.Clamp(amount[i], 0, 1F)); + } + } + } + else if (Avx2.IsSupported && destination.Length >= 2) { // Divide by 2 as 4 elements per Vector4 and 8 per Vector256 ref Vector256 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); @@ -1664,7 +2949,54 @@ internal static class DefaultPixelBlenders /// protected override void BlendFunction(Span destination, ReadOnlySpan background, Vector4 source, ReadOnlySpan amount) { - if (Avx2.IsSupported && destination.Length >= 2) + if (Avx512F.IsSupported && destination.Length >= 4) + { + // Divide by 4 as 4 elements per Vector4 and 16 per Vector512 + ref Vector512 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); + ref Vector512 destinationLast = ref Unsafe.Add(ref destinationBase, (uint)destination.Length / 4u); + + ref Vector512 backgroundBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(background)); + ref float amountBase = ref MemoryMarshal.GetReference(amount); + + Vector512 sourceBase = Vector512.Create( + source.X, source.Y, source.Z, source.W, + source.X, source.Y, source.Z, source.W, + source.X, source.Y, source.Z, source.W, + source.X, source.Y, source.Z, source.W); + Vector512 vOne = Vector512.Create(1F); + + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) + { + float amount0 = amountBase; + float amount1 = Unsafe.Add(ref amountBase, 1); + float amount2 = Unsafe.Add(ref amountBase, 2); + float amount3 = Unsafe.Add(ref amountBase, 3); + + // We need to create a Vector512 containing the current four amount values + // taking up each quarter of the Vector512 and then clamp them. + Vector512 opacity = Vector512.Create( + amount0, amount0, amount0, amount0, + amount1, amount1, amount1, amount1, + amount2, amount2, amount2, amount2, + amount3, amount3, amount3, amount3); + opacity = Vector512.Min(Vector512.Max(Vector512.Zero, opacity), vOne); + + destinationBase = PorterDuffFunctions.HardLightSrc(backgroundBase, sourceBase, opacity); + destinationBase = ref Unsafe.Add(ref destinationBase, 1); + backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); + amountBase = ref Unsafe.Add(ref amountBase, 4); + } + + int remainder = Numerics.Modulo4(destination.Length); + if (remainder != 0) + { + for (int i = destination.Length - remainder; i < destination.Length; i++) + { + destination[i] = PorterDuffFunctions.HardLightSrc(background[i], source, Numerics.Clamp(amount[i], 0, 1F)); + } + } + } + else if (Avx2.IsSupported && destination.Length >= 2) { // Divide by 2 as 4 elements per Vector4 and 8 per Vector256 ref Vector256 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); @@ -1729,7 +3061,34 @@ internal static class DefaultPixelBlenders { amount = Numerics.Clamp(amount, 0, 1); - if (Avx2.IsSupported && destination.Length >= 2) + if (Avx512F.IsSupported && destination.Length >= 4) + { + // Divide by 4 as 4 elements per Vector4 and 16 per Vector512 + ref Vector512 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); + ref Vector512 destinationLast = ref Unsafe.Add(ref destinationBase, (uint)destination.Length / 4u); + + ref Vector512 backgroundBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(background)); + ref Vector512 sourceBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(source)); + Vector512 opacity = Vector512.Create(amount); + + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) + { + destinationBase = PorterDuffFunctions.NormalSrcAtop(backgroundBase, sourceBase, opacity); + destinationBase = ref Unsafe.Add(ref destinationBase, 1); + backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); + sourceBase = ref Unsafe.Add(ref sourceBase, 1); + } + + int remainder = Numerics.Modulo4(destination.Length); + if (remainder != 0) + { + for (int i = destination.Length - remainder; i < destination.Length; i++) + { + destination[i] = PorterDuffFunctions.NormalSrcAtop(background[i], source[i], amount); + } + } + } + else if (Avx2.IsSupported && destination.Length >= 2) { // Divide by 2 as 4 elements per Vector4 and 8 per Vector256 ref Vector256 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); @@ -1768,7 +3127,37 @@ internal static class DefaultPixelBlenders { amount = Numerics.Clamp(amount, 0, 1); - if (Avx2.IsSupported && destination.Length >= 2) + if (Avx512F.IsSupported && destination.Length >= 4) + { + // Divide by 4 as 4 elements per Vector4 and 16 per Vector512 + ref Vector512 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); + ref Vector512 destinationLast = ref Unsafe.Add(ref destinationBase, (uint)destination.Length / 4u); + + ref Vector512 backgroundBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(background)); + Vector512 sourceBase = Vector512.Create( + source.X, source.Y, source.Z, source.W, + source.X, source.Y, source.Z, source.W, + source.X, source.Y, source.Z, source.W, + source.X, source.Y, source.Z, source.W); + Vector512 opacity = Vector512.Create(amount); + + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) + { + destinationBase = PorterDuffFunctions.NormalSrcAtop(backgroundBase, sourceBase, opacity); + destinationBase = ref Unsafe.Add(ref destinationBase, 1); + backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); + } + + int remainder = Numerics.Modulo4(destination.Length); + if (remainder != 0) + { + for (int i = destination.Length - remainder; i < destination.Length; i++) + { + destination[i] = PorterDuffFunctions.NormalSrcAtop(background[i], source, amount); + } + } + } + else if (Avx2.IsSupported && destination.Length >= 2) { // Divide by 2 as 4 elements per Vector4 and 8 per Vector256 ref Vector256 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); @@ -1804,7 +3193,51 @@ internal static class DefaultPixelBlenders /// protected override void BlendFunction(Span destination, ReadOnlySpan background, ReadOnlySpan source, ReadOnlySpan amount) { - if (Avx2.IsSupported && destination.Length >= 2) + if (Avx512F.IsSupported && destination.Length >= 4) + { + // Divide by 4 as 4 elements per Vector4 and 16 per Vector512 + ref Vector512 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); + ref Vector512 destinationLast = ref Unsafe.Add(ref destinationBase, (uint)destination.Length / 4u); + + ref Vector512 backgroundBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(background)); + ref Vector512 sourceBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(source)); + ref float amountBase = ref MemoryMarshal.GetReference(amount); + + Vector512 vOne = Vector512.Create(1F); + + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) + { + float amount0 = amountBase; + float amount1 = Unsafe.Add(ref amountBase, 1); + float amount2 = Unsafe.Add(ref amountBase, 2); + float amount3 = Unsafe.Add(ref amountBase, 3); + + // We need to create a Vector512 containing the current four amount values + // taking up each quarter of the Vector512 and then clamp them. + Vector512 opacity = Vector512.Create( + amount0, amount0, amount0, amount0, + amount1, amount1, amount1, amount1, + amount2, amount2, amount2, amount2, + amount3, amount3, amount3, amount3); + opacity = Vector512.Min(Vector512.Max(Vector512.Zero, opacity), vOne); + + destinationBase = PorterDuffFunctions.NormalSrcAtop(backgroundBase, sourceBase, opacity); + destinationBase = ref Unsafe.Add(ref destinationBase, 1); + backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); + sourceBase = ref Unsafe.Add(ref sourceBase, 1); + amountBase = ref Unsafe.Add(ref amountBase, 4); + } + + int remainder = Numerics.Modulo4(destination.Length); + if (remainder != 0) + { + for (int i = destination.Length - remainder; i < destination.Length; i++) + { + destination[i] = PorterDuffFunctions.NormalSrcAtop(background[i], source[i], Numerics.Clamp(amount[i], 0, 1F)); + } + } + } + else if (Avx2.IsSupported && destination.Length >= 2) { // Divide by 2 as 4 elements per Vector4 and 8 per Vector256 ref Vector256 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); @@ -1851,7 +3284,54 @@ internal static class DefaultPixelBlenders /// protected override void BlendFunction(Span destination, ReadOnlySpan background, Vector4 source, ReadOnlySpan amount) { - if (Avx2.IsSupported && destination.Length >= 2) + if (Avx512F.IsSupported && destination.Length >= 4) + { + // Divide by 4 as 4 elements per Vector4 and 16 per Vector512 + ref Vector512 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); + ref Vector512 destinationLast = ref Unsafe.Add(ref destinationBase, (uint)destination.Length / 4u); + + ref Vector512 backgroundBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(background)); + ref float amountBase = ref MemoryMarshal.GetReference(amount); + + Vector512 sourceBase = Vector512.Create( + source.X, source.Y, source.Z, source.W, + source.X, source.Y, source.Z, source.W, + source.X, source.Y, source.Z, source.W, + source.X, source.Y, source.Z, source.W); + Vector512 vOne = Vector512.Create(1F); + + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) + { + float amount0 = amountBase; + float amount1 = Unsafe.Add(ref amountBase, 1); + float amount2 = Unsafe.Add(ref amountBase, 2); + float amount3 = Unsafe.Add(ref amountBase, 3); + + // We need to create a Vector512 containing the current four amount values + // taking up each quarter of the Vector512 and then clamp them. + Vector512 opacity = Vector512.Create( + amount0, amount0, amount0, amount0, + amount1, amount1, amount1, amount1, + amount2, amount2, amount2, amount2, + amount3, amount3, amount3, amount3); + opacity = Vector512.Min(Vector512.Max(Vector512.Zero, opacity), vOne); + + destinationBase = PorterDuffFunctions.NormalSrcAtop(backgroundBase, sourceBase, opacity); + destinationBase = ref Unsafe.Add(ref destinationBase, 1); + backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); + amountBase = ref Unsafe.Add(ref amountBase, 4); + } + + int remainder = Numerics.Modulo4(destination.Length); + if (remainder != 0) + { + for (int i = destination.Length - remainder; i < destination.Length; i++) + { + destination[i] = PorterDuffFunctions.NormalSrcAtop(background[i], source, Numerics.Clamp(amount[i], 0, 1F)); + } + } + } + else if (Avx2.IsSupported && destination.Length >= 2) { // Divide by 2 as 4 elements per Vector4 and 8 per Vector256 ref Vector256 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); @@ -1916,7 +3396,34 @@ internal static class DefaultPixelBlenders { amount = Numerics.Clamp(amount, 0, 1); - if (Avx2.IsSupported && destination.Length >= 2) + if (Avx512F.IsSupported && destination.Length >= 4) + { + // Divide by 4 as 4 elements per Vector4 and 16 per Vector512 + ref Vector512 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); + ref Vector512 destinationLast = ref Unsafe.Add(ref destinationBase, (uint)destination.Length / 4u); + + ref Vector512 backgroundBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(background)); + ref Vector512 sourceBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(source)); + Vector512 opacity = Vector512.Create(amount); + + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) + { + destinationBase = PorterDuffFunctions.MultiplySrcAtop(backgroundBase, sourceBase, opacity); + destinationBase = ref Unsafe.Add(ref destinationBase, 1); + backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); + sourceBase = ref Unsafe.Add(ref sourceBase, 1); + } + + int remainder = Numerics.Modulo4(destination.Length); + if (remainder != 0) + { + for (int i = destination.Length - remainder; i < destination.Length; i++) + { + destination[i] = PorterDuffFunctions.MultiplySrcAtop(background[i], source[i], amount); + } + } + } + else if (Avx2.IsSupported && destination.Length >= 2) { // Divide by 2 as 4 elements per Vector4 and 8 per Vector256 ref Vector256 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); @@ -1955,7 +3462,37 @@ internal static class DefaultPixelBlenders { amount = Numerics.Clamp(amount, 0, 1); - if (Avx2.IsSupported && destination.Length >= 2) + if (Avx512F.IsSupported && destination.Length >= 4) + { + // Divide by 4 as 4 elements per Vector4 and 16 per Vector512 + ref Vector512 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); + ref Vector512 destinationLast = ref Unsafe.Add(ref destinationBase, (uint)destination.Length / 4u); + + ref Vector512 backgroundBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(background)); + Vector512 sourceBase = Vector512.Create( + source.X, source.Y, source.Z, source.W, + source.X, source.Y, source.Z, source.W, + source.X, source.Y, source.Z, source.W, + source.X, source.Y, source.Z, source.W); + Vector512 opacity = Vector512.Create(amount); + + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) + { + destinationBase = PorterDuffFunctions.MultiplySrcAtop(backgroundBase, sourceBase, opacity); + destinationBase = ref Unsafe.Add(ref destinationBase, 1); + backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); + } + + int remainder = Numerics.Modulo4(destination.Length); + if (remainder != 0) + { + for (int i = destination.Length - remainder; i < destination.Length; i++) + { + destination[i] = PorterDuffFunctions.MultiplySrcAtop(background[i], source, amount); + } + } + } + else if (Avx2.IsSupported && destination.Length >= 2) { // Divide by 2 as 4 elements per Vector4 and 8 per Vector256 ref Vector256 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); @@ -1991,7 +3528,51 @@ internal static class DefaultPixelBlenders /// protected override void BlendFunction(Span destination, ReadOnlySpan background, ReadOnlySpan source, ReadOnlySpan amount) { - if (Avx2.IsSupported && destination.Length >= 2) + if (Avx512F.IsSupported && destination.Length >= 4) + { + // Divide by 4 as 4 elements per Vector4 and 16 per Vector512 + ref Vector512 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); + ref Vector512 destinationLast = ref Unsafe.Add(ref destinationBase, (uint)destination.Length / 4u); + + ref Vector512 backgroundBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(background)); + ref Vector512 sourceBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(source)); + ref float amountBase = ref MemoryMarshal.GetReference(amount); + + Vector512 vOne = Vector512.Create(1F); + + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) + { + float amount0 = amountBase; + float amount1 = Unsafe.Add(ref amountBase, 1); + float amount2 = Unsafe.Add(ref amountBase, 2); + float amount3 = Unsafe.Add(ref amountBase, 3); + + // We need to create a Vector512 containing the current four amount values + // taking up each quarter of the Vector512 and then clamp them. + Vector512 opacity = Vector512.Create( + amount0, amount0, amount0, amount0, + amount1, amount1, amount1, amount1, + amount2, amount2, amount2, amount2, + amount3, amount3, amount3, amount3); + opacity = Vector512.Min(Vector512.Max(Vector512.Zero, opacity), vOne); + + destinationBase = PorterDuffFunctions.MultiplySrcAtop(backgroundBase, sourceBase, opacity); + destinationBase = ref Unsafe.Add(ref destinationBase, 1); + backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); + sourceBase = ref Unsafe.Add(ref sourceBase, 1); + amountBase = ref Unsafe.Add(ref amountBase, 4); + } + + int remainder = Numerics.Modulo4(destination.Length); + if (remainder != 0) + { + for (int i = destination.Length - remainder; i < destination.Length; i++) + { + destination[i] = PorterDuffFunctions.MultiplySrcAtop(background[i], source[i], Numerics.Clamp(amount[i], 0, 1F)); + } + } + } + else if (Avx2.IsSupported && destination.Length >= 2) { // Divide by 2 as 4 elements per Vector4 and 8 per Vector256 ref Vector256 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); @@ -2038,7 +3619,54 @@ internal static class DefaultPixelBlenders /// protected override void BlendFunction(Span destination, ReadOnlySpan background, Vector4 source, ReadOnlySpan amount) { - if (Avx2.IsSupported && destination.Length >= 2) + if (Avx512F.IsSupported && destination.Length >= 4) + { + // Divide by 4 as 4 elements per Vector4 and 16 per Vector512 + ref Vector512 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); + ref Vector512 destinationLast = ref Unsafe.Add(ref destinationBase, (uint)destination.Length / 4u); + + ref Vector512 backgroundBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(background)); + ref float amountBase = ref MemoryMarshal.GetReference(amount); + + Vector512 sourceBase = Vector512.Create( + source.X, source.Y, source.Z, source.W, + source.X, source.Y, source.Z, source.W, + source.X, source.Y, source.Z, source.W, + source.X, source.Y, source.Z, source.W); + Vector512 vOne = Vector512.Create(1F); + + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) + { + float amount0 = amountBase; + float amount1 = Unsafe.Add(ref amountBase, 1); + float amount2 = Unsafe.Add(ref amountBase, 2); + float amount3 = Unsafe.Add(ref amountBase, 3); + + // We need to create a Vector512 containing the current four amount values + // taking up each quarter of the Vector512 and then clamp them. + Vector512 opacity = Vector512.Create( + amount0, amount0, amount0, amount0, + amount1, amount1, amount1, amount1, + amount2, amount2, amount2, amount2, + amount3, amount3, amount3, amount3); + opacity = Vector512.Min(Vector512.Max(Vector512.Zero, opacity), vOne); + + destinationBase = PorterDuffFunctions.MultiplySrcAtop(backgroundBase, sourceBase, opacity); + destinationBase = ref Unsafe.Add(ref destinationBase, 1); + backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); + amountBase = ref Unsafe.Add(ref amountBase, 4); + } + + int remainder = Numerics.Modulo4(destination.Length); + if (remainder != 0) + { + for (int i = destination.Length - remainder; i < destination.Length; i++) + { + destination[i] = PorterDuffFunctions.MultiplySrcAtop(background[i], source, Numerics.Clamp(amount[i], 0, 1F)); + } + } + } + else if (Avx2.IsSupported && destination.Length >= 2) { // Divide by 2 as 4 elements per Vector4 and 8 per Vector256 ref Vector256 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); @@ -2103,7 +3731,34 @@ internal static class DefaultPixelBlenders { amount = Numerics.Clamp(amount, 0, 1); - if (Avx2.IsSupported && destination.Length >= 2) + if (Avx512F.IsSupported && destination.Length >= 4) + { + // Divide by 4 as 4 elements per Vector4 and 16 per Vector512 + ref Vector512 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); + ref Vector512 destinationLast = ref Unsafe.Add(ref destinationBase, (uint)destination.Length / 4u); + + ref Vector512 backgroundBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(background)); + ref Vector512 sourceBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(source)); + Vector512 opacity = Vector512.Create(amount); + + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) + { + destinationBase = PorterDuffFunctions.AddSrcAtop(backgroundBase, sourceBase, opacity); + destinationBase = ref Unsafe.Add(ref destinationBase, 1); + backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); + sourceBase = ref Unsafe.Add(ref sourceBase, 1); + } + + int remainder = Numerics.Modulo4(destination.Length); + if (remainder != 0) + { + for (int i = destination.Length - remainder; i < destination.Length; i++) + { + destination[i] = PorterDuffFunctions.AddSrcAtop(background[i], source[i], amount); + } + } + } + else if (Avx2.IsSupported && destination.Length >= 2) { // Divide by 2 as 4 elements per Vector4 and 8 per Vector256 ref Vector256 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); @@ -2142,7 +3797,37 @@ internal static class DefaultPixelBlenders { amount = Numerics.Clamp(amount, 0, 1); - if (Avx2.IsSupported && destination.Length >= 2) + if (Avx512F.IsSupported && destination.Length >= 4) + { + // Divide by 4 as 4 elements per Vector4 and 16 per Vector512 + ref Vector512 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); + ref Vector512 destinationLast = ref Unsafe.Add(ref destinationBase, (uint)destination.Length / 4u); + + ref Vector512 backgroundBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(background)); + Vector512 sourceBase = Vector512.Create( + source.X, source.Y, source.Z, source.W, + source.X, source.Y, source.Z, source.W, + source.X, source.Y, source.Z, source.W, + source.X, source.Y, source.Z, source.W); + Vector512 opacity = Vector512.Create(amount); + + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) + { + destinationBase = PorterDuffFunctions.AddSrcAtop(backgroundBase, sourceBase, opacity); + destinationBase = ref Unsafe.Add(ref destinationBase, 1); + backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); + } + + int remainder = Numerics.Modulo4(destination.Length); + if (remainder != 0) + { + for (int i = destination.Length - remainder; i < destination.Length; i++) + { + destination[i] = PorterDuffFunctions.AddSrcAtop(background[i], source, amount); + } + } + } + else if (Avx2.IsSupported && destination.Length >= 2) { // Divide by 2 as 4 elements per Vector4 and 8 per Vector256 ref Vector256 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); @@ -2178,7 +3863,51 @@ internal static class DefaultPixelBlenders /// protected override void BlendFunction(Span destination, ReadOnlySpan background, ReadOnlySpan source, ReadOnlySpan amount) { - if (Avx2.IsSupported && destination.Length >= 2) + if (Avx512F.IsSupported && destination.Length >= 4) + { + // Divide by 4 as 4 elements per Vector4 and 16 per Vector512 + ref Vector512 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); + ref Vector512 destinationLast = ref Unsafe.Add(ref destinationBase, (uint)destination.Length / 4u); + + ref Vector512 backgroundBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(background)); + ref Vector512 sourceBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(source)); + ref float amountBase = ref MemoryMarshal.GetReference(amount); + + Vector512 vOne = Vector512.Create(1F); + + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) + { + float amount0 = amountBase; + float amount1 = Unsafe.Add(ref amountBase, 1); + float amount2 = Unsafe.Add(ref amountBase, 2); + float amount3 = Unsafe.Add(ref amountBase, 3); + + // We need to create a Vector512 containing the current four amount values + // taking up each quarter of the Vector512 and then clamp them. + Vector512 opacity = Vector512.Create( + amount0, amount0, amount0, amount0, + amount1, amount1, amount1, amount1, + amount2, amount2, amount2, amount2, + amount3, amount3, amount3, amount3); + opacity = Vector512.Min(Vector512.Max(Vector512.Zero, opacity), vOne); + + destinationBase = PorterDuffFunctions.AddSrcAtop(backgroundBase, sourceBase, opacity); + destinationBase = ref Unsafe.Add(ref destinationBase, 1); + backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); + sourceBase = ref Unsafe.Add(ref sourceBase, 1); + amountBase = ref Unsafe.Add(ref amountBase, 4); + } + + int remainder = Numerics.Modulo4(destination.Length); + if (remainder != 0) + { + for (int i = destination.Length - remainder; i < destination.Length; i++) + { + destination[i] = PorterDuffFunctions.AddSrcAtop(background[i], source[i], Numerics.Clamp(amount[i], 0, 1F)); + } + } + } + else if (Avx2.IsSupported && destination.Length >= 2) { // Divide by 2 as 4 elements per Vector4 and 8 per Vector256 ref Vector256 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); @@ -2225,7 +3954,54 @@ internal static class DefaultPixelBlenders /// protected override void BlendFunction(Span destination, ReadOnlySpan background, Vector4 source, ReadOnlySpan amount) { - if (Avx2.IsSupported && destination.Length >= 2) + if (Avx512F.IsSupported && destination.Length >= 4) + { + // Divide by 4 as 4 elements per Vector4 and 16 per Vector512 + ref Vector512 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); + ref Vector512 destinationLast = ref Unsafe.Add(ref destinationBase, (uint)destination.Length / 4u); + + ref Vector512 backgroundBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(background)); + ref float amountBase = ref MemoryMarshal.GetReference(amount); + + Vector512 sourceBase = Vector512.Create( + source.X, source.Y, source.Z, source.W, + source.X, source.Y, source.Z, source.W, + source.X, source.Y, source.Z, source.W, + source.X, source.Y, source.Z, source.W); + Vector512 vOne = Vector512.Create(1F); + + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) + { + float amount0 = amountBase; + float amount1 = Unsafe.Add(ref amountBase, 1); + float amount2 = Unsafe.Add(ref amountBase, 2); + float amount3 = Unsafe.Add(ref amountBase, 3); + + // We need to create a Vector512 containing the current four amount values + // taking up each quarter of the Vector512 and then clamp them. + Vector512 opacity = Vector512.Create( + amount0, amount0, amount0, amount0, + amount1, amount1, amount1, amount1, + amount2, amount2, amount2, amount2, + amount3, amount3, amount3, amount3); + opacity = Vector512.Min(Vector512.Max(Vector512.Zero, opacity), vOne); + + destinationBase = PorterDuffFunctions.AddSrcAtop(backgroundBase, sourceBase, opacity); + destinationBase = ref Unsafe.Add(ref destinationBase, 1); + backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); + amountBase = ref Unsafe.Add(ref amountBase, 4); + } + + int remainder = Numerics.Modulo4(destination.Length); + if (remainder != 0) + { + for (int i = destination.Length - remainder; i < destination.Length; i++) + { + destination[i] = PorterDuffFunctions.AddSrcAtop(background[i], source, Numerics.Clamp(amount[i], 0, 1F)); + } + } + } + else if (Avx2.IsSupported && destination.Length >= 2) { // Divide by 2 as 4 elements per Vector4 and 8 per Vector256 ref Vector256 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); @@ -2290,7 +4066,34 @@ internal static class DefaultPixelBlenders { amount = Numerics.Clamp(amount, 0, 1); - if (Avx2.IsSupported && destination.Length >= 2) + if (Avx512F.IsSupported && destination.Length >= 4) + { + // Divide by 4 as 4 elements per Vector4 and 16 per Vector512 + ref Vector512 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); + ref Vector512 destinationLast = ref Unsafe.Add(ref destinationBase, (uint)destination.Length / 4u); + + ref Vector512 backgroundBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(background)); + ref Vector512 sourceBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(source)); + Vector512 opacity = Vector512.Create(amount); + + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) + { + destinationBase = PorterDuffFunctions.SubtractSrcAtop(backgroundBase, sourceBase, opacity); + destinationBase = ref Unsafe.Add(ref destinationBase, 1); + backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); + sourceBase = ref Unsafe.Add(ref sourceBase, 1); + } + + int remainder = Numerics.Modulo4(destination.Length); + if (remainder != 0) + { + for (int i = destination.Length - remainder; i < destination.Length; i++) + { + destination[i] = PorterDuffFunctions.SubtractSrcAtop(background[i], source[i], amount); + } + } + } + else if (Avx2.IsSupported && destination.Length >= 2) { // Divide by 2 as 4 elements per Vector4 and 8 per Vector256 ref Vector256 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); @@ -2329,7 +4132,37 @@ internal static class DefaultPixelBlenders { amount = Numerics.Clamp(amount, 0, 1); - if (Avx2.IsSupported && destination.Length >= 2) + if (Avx512F.IsSupported && destination.Length >= 4) + { + // Divide by 4 as 4 elements per Vector4 and 16 per Vector512 + ref Vector512 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); + ref Vector512 destinationLast = ref Unsafe.Add(ref destinationBase, (uint)destination.Length / 4u); + + ref Vector512 backgroundBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(background)); + Vector512 sourceBase = Vector512.Create( + source.X, source.Y, source.Z, source.W, + source.X, source.Y, source.Z, source.W, + source.X, source.Y, source.Z, source.W, + source.X, source.Y, source.Z, source.W); + Vector512 opacity = Vector512.Create(amount); + + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) + { + destinationBase = PorterDuffFunctions.SubtractSrcAtop(backgroundBase, sourceBase, opacity); + destinationBase = ref Unsafe.Add(ref destinationBase, 1); + backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); + } + + int remainder = Numerics.Modulo4(destination.Length); + if (remainder != 0) + { + for (int i = destination.Length - remainder; i < destination.Length; i++) + { + destination[i] = PorterDuffFunctions.SubtractSrcAtop(background[i], source, amount); + } + } + } + else if (Avx2.IsSupported && destination.Length >= 2) { // Divide by 2 as 4 elements per Vector4 and 8 per Vector256 ref Vector256 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); @@ -2365,7 +4198,51 @@ internal static class DefaultPixelBlenders /// protected override void BlendFunction(Span destination, ReadOnlySpan background, ReadOnlySpan source, ReadOnlySpan amount) { - if (Avx2.IsSupported && destination.Length >= 2) + if (Avx512F.IsSupported && destination.Length >= 4) + { + // Divide by 4 as 4 elements per Vector4 and 16 per Vector512 + ref Vector512 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); + ref Vector512 destinationLast = ref Unsafe.Add(ref destinationBase, (uint)destination.Length / 4u); + + ref Vector512 backgroundBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(background)); + ref Vector512 sourceBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(source)); + ref float amountBase = ref MemoryMarshal.GetReference(amount); + + Vector512 vOne = Vector512.Create(1F); + + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) + { + float amount0 = amountBase; + float amount1 = Unsafe.Add(ref amountBase, 1); + float amount2 = Unsafe.Add(ref amountBase, 2); + float amount3 = Unsafe.Add(ref amountBase, 3); + + // We need to create a Vector512 containing the current four amount values + // taking up each quarter of the Vector512 and then clamp them. + Vector512 opacity = Vector512.Create( + amount0, amount0, amount0, amount0, + amount1, amount1, amount1, amount1, + amount2, amount2, amount2, amount2, + amount3, amount3, amount3, amount3); + opacity = Vector512.Min(Vector512.Max(Vector512.Zero, opacity), vOne); + + destinationBase = PorterDuffFunctions.SubtractSrcAtop(backgroundBase, sourceBase, opacity); + destinationBase = ref Unsafe.Add(ref destinationBase, 1); + backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); + sourceBase = ref Unsafe.Add(ref sourceBase, 1); + amountBase = ref Unsafe.Add(ref amountBase, 4); + } + + int remainder = Numerics.Modulo4(destination.Length); + if (remainder != 0) + { + for (int i = destination.Length - remainder; i < destination.Length; i++) + { + destination[i] = PorterDuffFunctions.SubtractSrcAtop(background[i], source[i], Numerics.Clamp(amount[i], 0, 1F)); + } + } + } + else if (Avx2.IsSupported && destination.Length >= 2) { // Divide by 2 as 4 elements per Vector4 and 8 per Vector256 ref Vector256 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); @@ -2412,7 +4289,54 @@ internal static class DefaultPixelBlenders /// protected override void BlendFunction(Span destination, ReadOnlySpan background, Vector4 source, ReadOnlySpan amount) { - if (Avx2.IsSupported && destination.Length >= 2) + if (Avx512F.IsSupported && destination.Length >= 4) + { + // Divide by 4 as 4 elements per Vector4 and 16 per Vector512 + ref Vector512 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); + ref Vector512 destinationLast = ref Unsafe.Add(ref destinationBase, (uint)destination.Length / 4u); + + ref Vector512 backgroundBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(background)); + ref float amountBase = ref MemoryMarshal.GetReference(amount); + + Vector512 sourceBase = Vector512.Create( + source.X, source.Y, source.Z, source.W, + source.X, source.Y, source.Z, source.W, + source.X, source.Y, source.Z, source.W, + source.X, source.Y, source.Z, source.W); + Vector512 vOne = Vector512.Create(1F); + + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) + { + float amount0 = amountBase; + float amount1 = Unsafe.Add(ref amountBase, 1); + float amount2 = Unsafe.Add(ref amountBase, 2); + float amount3 = Unsafe.Add(ref amountBase, 3); + + // We need to create a Vector512 containing the current four amount values + // taking up each quarter of the Vector512 and then clamp them. + Vector512 opacity = Vector512.Create( + amount0, amount0, amount0, amount0, + amount1, amount1, amount1, amount1, + amount2, amount2, amount2, amount2, + amount3, amount3, amount3, amount3); + opacity = Vector512.Min(Vector512.Max(Vector512.Zero, opacity), vOne); + + destinationBase = PorterDuffFunctions.SubtractSrcAtop(backgroundBase, sourceBase, opacity); + destinationBase = ref Unsafe.Add(ref destinationBase, 1); + backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); + amountBase = ref Unsafe.Add(ref amountBase, 4); + } + + int remainder = Numerics.Modulo4(destination.Length); + if (remainder != 0) + { + for (int i = destination.Length - remainder; i < destination.Length; i++) + { + destination[i] = PorterDuffFunctions.SubtractSrcAtop(background[i], source, Numerics.Clamp(amount[i], 0, 1F)); + } + } + } + else if (Avx2.IsSupported && destination.Length >= 2) { // Divide by 2 as 4 elements per Vector4 and 8 per Vector256 ref Vector256 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); @@ -2477,7 +4401,34 @@ internal static class DefaultPixelBlenders { amount = Numerics.Clamp(amount, 0, 1); - if (Avx2.IsSupported && destination.Length >= 2) + if (Avx512F.IsSupported && destination.Length >= 4) + { + // Divide by 4 as 4 elements per Vector4 and 16 per Vector512 + ref Vector512 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); + ref Vector512 destinationLast = ref Unsafe.Add(ref destinationBase, (uint)destination.Length / 4u); + + ref Vector512 backgroundBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(background)); + ref Vector512 sourceBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(source)); + Vector512 opacity = Vector512.Create(amount); + + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) + { + destinationBase = PorterDuffFunctions.ScreenSrcAtop(backgroundBase, sourceBase, opacity); + destinationBase = ref Unsafe.Add(ref destinationBase, 1); + backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); + sourceBase = ref Unsafe.Add(ref sourceBase, 1); + } + + int remainder = Numerics.Modulo4(destination.Length); + if (remainder != 0) + { + for (int i = destination.Length - remainder; i < destination.Length; i++) + { + destination[i] = PorterDuffFunctions.ScreenSrcAtop(background[i], source[i], amount); + } + } + } + else if (Avx2.IsSupported && destination.Length >= 2) { // Divide by 2 as 4 elements per Vector4 and 8 per Vector256 ref Vector256 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); @@ -2516,7 +4467,37 @@ internal static class DefaultPixelBlenders { amount = Numerics.Clamp(amount, 0, 1); - if (Avx2.IsSupported && destination.Length >= 2) + if (Avx512F.IsSupported && destination.Length >= 4) + { + // Divide by 4 as 4 elements per Vector4 and 16 per Vector512 + ref Vector512 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); + ref Vector512 destinationLast = ref Unsafe.Add(ref destinationBase, (uint)destination.Length / 4u); + + ref Vector512 backgroundBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(background)); + Vector512 sourceBase = Vector512.Create( + source.X, source.Y, source.Z, source.W, + source.X, source.Y, source.Z, source.W, + source.X, source.Y, source.Z, source.W, + source.X, source.Y, source.Z, source.W); + Vector512 opacity = Vector512.Create(amount); + + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) + { + destinationBase = PorterDuffFunctions.ScreenSrcAtop(backgroundBase, sourceBase, opacity); + destinationBase = ref Unsafe.Add(ref destinationBase, 1); + backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); + } + + int remainder = Numerics.Modulo4(destination.Length); + if (remainder != 0) + { + for (int i = destination.Length - remainder; i < destination.Length; i++) + { + destination[i] = PorterDuffFunctions.ScreenSrcAtop(background[i], source, amount); + } + } + } + else if (Avx2.IsSupported && destination.Length >= 2) { // Divide by 2 as 4 elements per Vector4 and 8 per Vector256 ref Vector256 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); @@ -2552,7 +4533,51 @@ internal static class DefaultPixelBlenders /// protected override void BlendFunction(Span destination, ReadOnlySpan background, ReadOnlySpan source, ReadOnlySpan amount) { - if (Avx2.IsSupported && destination.Length >= 2) + if (Avx512F.IsSupported && destination.Length >= 4) + { + // Divide by 4 as 4 elements per Vector4 and 16 per Vector512 + ref Vector512 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); + ref Vector512 destinationLast = ref Unsafe.Add(ref destinationBase, (uint)destination.Length / 4u); + + ref Vector512 backgroundBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(background)); + ref Vector512 sourceBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(source)); + ref float amountBase = ref MemoryMarshal.GetReference(amount); + + Vector512 vOne = Vector512.Create(1F); + + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) + { + float amount0 = amountBase; + float amount1 = Unsafe.Add(ref amountBase, 1); + float amount2 = Unsafe.Add(ref amountBase, 2); + float amount3 = Unsafe.Add(ref amountBase, 3); + + // We need to create a Vector512 containing the current four amount values + // taking up each quarter of the Vector512 and then clamp them. + Vector512 opacity = Vector512.Create( + amount0, amount0, amount0, amount0, + amount1, amount1, amount1, amount1, + amount2, amount2, amount2, amount2, + amount3, amount3, amount3, amount3); + opacity = Vector512.Min(Vector512.Max(Vector512.Zero, opacity), vOne); + + destinationBase = PorterDuffFunctions.ScreenSrcAtop(backgroundBase, sourceBase, opacity); + destinationBase = ref Unsafe.Add(ref destinationBase, 1); + backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); + sourceBase = ref Unsafe.Add(ref sourceBase, 1); + amountBase = ref Unsafe.Add(ref amountBase, 4); + } + + int remainder = Numerics.Modulo4(destination.Length); + if (remainder != 0) + { + for (int i = destination.Length - remainder; i < destination.Length; i++) + { + destination[i] = PorterDuffFunctions.ScreenSrcAtop(background[i], source[i], Numerics.Clamp(amount[i], 0, 1F)); + } + } + } + else if (Avx2.IsSupported && destination.Length >= 2) { // Divide by 2 as 4 elements per Vector4 and 8 per Vector256 ref Vector256 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); @@ -2599,7 +4624,54 @@ internal static class DefaultPixelBlenders /// protected override void BlendFunction(Span destination, ReadOnlySpan background, Vector4 source, ReadOnlySpan amount) { - if (Avx2.IsSupported && destination.Length >= 2) + if (Avx512F.IsSupported && destination.Length >= 4) + { + // Divide by 4 as 4 elements per Vector4 and 16 per Vector512 + ref Vector512 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); + ref Vector512 destinationLast = ref Unsafe.Add(ref destinationBase, (uint)destination.Length / 4u); + + ref Vector512 backgroundBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(background)); + ref float amountBase = ref MemoryMarshal.GetReference(amount); + + Vector512 sourceBase = Vector512.Create( + source.X, source.Y, source.Z, source.W, + source.X, source.Y, source.Z, source.W, + source.X, source.Y, source.Z, source.W, + source.X, source.Y, source.Z, source.W); + Vector512 vOne = Vector512.Create(1F); + + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) + { + float amount0 = amountBase; + float amount1 = Unsafe.Add(ref amountBase, 1); + float amount2 = Unsafe.Add(ref amountBase, 2); + float amount3 = Unsafe.Add(ref amountBase, 3); + + // We need to create a Vector512 containing the current four amount values + // taking up each quarter of the Vector512 and then clamp them. + Vector512 opacity = Vector512.Create( + amount0, amount0, amount0, amount0, + amount1, amount1, amount1, amount1, + amount2, amount2, amount2, amount2, + amount3, amount3, amount3, amount3); + opacity = Vector512.Min(Vector512.Max(Vector512.Zero, opacity), vOne); + + destinationBase = PorterDuffFunctions.ScreenSrcAtop(backgroundBase, sourceBase, opacity); + destinationBase = ref Unsafe.Add(ref destinationBase, 1); + backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); + amountBase = ref Unsafe.Add(ref amountBase, 4); + } + + int remainder = Numerics.Modulo4(destination.Length); + if (remainder != 0) + { + for (int i = destination.Length - remainder; i < destination.Length; i++) + { + destination[i] = PorterDuffFunctions.ScreenSrcAtop(background[i], source, Numerics.Clamp(amount[i], 0, 1F)); + } + } + } + else if (Avx2.IsSupported && destination.Length >= 2) { // Divide by 2 as 4 elements per Vector4 and 8 per Vector256 ref Vector256 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); @@ -2664,7 +4736,34 @@ internal static class DefaultPixelBlenders { amount = Numerics.Clamp(amount, 0, 1); - if (Avx2.IsSupported && destination.Length >= 2) + if (Avx512F.IsSupported && destination.Length >= 4) + { + // Divide by 4 as 4 elements per Vector4 and 16 per Vector512 + ref Vector512 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); + ref Vector512 destinationLast = ref Unsafe.Add(ref destinationBase, (uint)destination.Length / 4u); + + ref Vector512 backgroundBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(background)); + ref Vector512 sourceBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(source)); + Vector512 opacity = Vector512.Create(amount); + + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) + { + destinationBase = PorterDuffFunctions.DarkenSrcAtop(backgroundBase, sourceBase, opacity); + destinationBase = ref Unsafe.Add(ref destinationBase, 1); + backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); + sourceBase = ref Unsafe.Add(ref sourceBase, 1); + } + + int remainder = Numerics.Modulo4(destination.Length); + if (remainder != 0) + { + for (int i = destination.Length - remainder; i < destination.Length; i++) + { + destination[i] = PorterDuffFunctions.DarkenSrcAtop(background[i], source[i], amount); + } + } + } + else if (Avx2.IsSupported && destination.Length >= 2) { // Divide by 2 as 4 elements per Vector4 and 8 per Vector256 ref Vector256 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); @@ -2703,7 +4802,37 @@ internal static class DefaultPixelBlenders { amount = Numerics.Clamp(amount, 0, 1); - if (Avx2.IsSupported && destination.Length >= 2) + if (Avx512F.IsSupported && destination.Length >= 4) + { + // Divide by 4 as 4 elements per Vector4 and 16 per Vector512 + ref Vector512 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); + ref Vector512 destinationLast = ref Unsafe.Add(ref destinationBase, (uint)destination.Length / 4u); + + ref Vector512 backgroundBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(background)); + Vector512 sourceBase = Vector512.Create( + source.X, source.Y, source.Z, source.W, + source.X, source.Y, source.Z, source.W, + source.X, source.Y, source.Z, source.W, + source.X, source.Y, source.Z, source.W); + Vector512 opacity = Vector512.Create(amount); + + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) + { + destinationBase = PorterDuffFunctions.DarkenSrcAtop(backgroundBase, sourceBase, opacity); + destinationBase = ref Unsafe.Add(ref destinationBase, 1); + backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); + } + + int remainder = Numerics.Modulo4(destination.Length); + if (remainder != 0) + { + for (int i = destination.Length - remainder; i < destination.Length; i++) + { + destination[i] = PorterDuffFunctions.DarkenSrcAtop(background[i], source, amount); + } + } + } + else if (Avx2.IsSupported && destination.Length >= 2) { // Divide by 2 as 4 elements per Vector4 and 8 per Vector256 ref Vector256 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); @@ -2739,7 +4868,51 @@ internal static class DefaultPixelBlenders /// protected override void BlendFunction(Span destination, ReadOnlySpan background, ReadOnlySpan source, ReadOnlySpan amount) { - if (Avx2.IsSupported && destination.Length >= 2) + if (Avx512F.IsSupported && destination.Length >= 4) + { + // Divide by 4 as 4 elements per Vector4 and 16 per Vector512 + ref Vector512 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); + ref Vector512 destinationLast = ref Unsafe.Add(ref destinationBase, (uint)destination.Length / 4u); + + ref Vector512 backgroundBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(background)); + ref Vector512 sourceBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(source)); + ref float amountBase = ref MemoryMarshal.GetReference(amount); + + Vector512 vOne = Vector512.Create(1F); + + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) + { + float amount0 = amountBase; + float amount1 = Unsafe.Add(ref amountBase, 1); + float amount2 = Unsafe.Add(ref amountBase, 2); + float amount3 = Unsafe.Add(ref amountBase, 3); + + // We need to create a Vector512 containing the current four amount values + // taking up each quarter of the Vector512 and then clamp them. + Vector512 opacity = Vector512.Create( + amount0, amount0, amount0, amount0, + amount1, amount1, amount1, amount1, + amount2, amount2, amount2, amount2, + amount3, amount3, amount3, amount3); + opacity = Vector512.Min(Vector512.Max(Vector512.Zero, opacity), vOne); + + destinationBase = PorterDuffFunctions.DarkenSrcAtop(backgroundBase, sourceBase, opacity); + destinationBase = ref Unsafe.Add(ref destinationBase, 1); + backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); + sourceBase = ref Unsafe.Add(ref sourceBase, 1); + amountBase = ref Unsafe.Add(ref amountBase, 4); + } + + int remainder = Numerics.Modulo4(destination.Length); + if (remainder != 0) + { + for (int i = destination.Length - remainder; i < destination.Length; i++) + { + destination[i] = PorterDuffFunctions.DarkenSrcAtop(background[i], source[i], Numerics.Clamp(amount[i], 0, 1F)); + } + } + } + else if (Avx2.IsSupported && destination.Length >= 2) { // Divide by 2 as 4 elements per Vector4 and 8 per Vector256 ref Vector256 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); @@ -2786,7 +4959,54 @@ internal static class DefaultPixelBlenders /// protected override void BlendFunction(Span destination, ReadOnlySpan background, Vector4 source, ReadOnlySpan amount) { - if (Avx2.IsSupported && destination.Length >= 2) + if (Avx512F.IsSupported && destination.Length >= 4) + { + // Divide by 4 as 4 elements per Vector4 and 16 per Vector512 + ref Vector512 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); + ref Vector512 destinationLast = ref Unsafe.Add(ref destinationBase, (uint)destination.Length / 4u); + + ref Vector512 backgroundBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(background)); + ref float amountBase = ref MemoryMarshal.GetReference(amount); + + Vector512 sourceBase = Vector512.Create( + source.X, source.Y, source.Z, source.W, + source.X, source.Y, source.Z, source.W, + source.X, source.Y, source.Z, source.W, + source.X, source.Y, source.Z, source.W); + Vector512 vOne = Vector512.Create(1F); + + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) + { + float amount0 = amountBase; + float amount1 = Unsafe.Add(ref amountBase, 1); + float amount2 = Unsafe.Add(ref amountBase, 2); + float amount3 = Unsafe.Add(ref amountBase, 3); + + // We need to create a Vector512 containing the current four amount values + // taking up each quarter of the Vector512 and then clamp them. + Vector512 opacity = Vector512.Create( + amount0, amount0, amount0, amount0, + amount1, amount1, amount1, amount1, + amount2, amount2, amount2, amount2, + amount3, amount3, amount3, amount3); + opacity = Vector512.Min(Vector512.Max(Vector512.Zero, opacity), vOne); + + destinationBase = PorterDuffFunctions.DarkenSrcAtop(backgroundBase, sourceBase, opacity); + destinationBase = ref Unsafe.Add(ref destinationBase, 1); + backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); + amountBase = ref Unsafe.Add(ref amountBase, 4); + } + + int remainder = Numerics.Modulo4(destination.Length); + if (remainder != 0) + { + for (int i = destination.Length - remainder; i < destination.Length; i++) + { + destination[i] = PorterDuffFunctions.DarkenSrcAtop(background[i], source, Numerics.Clamp(amount[i], 0, 1F)); + } + } + } + else if (Avx2.IsSupported && destination.Length >= 2) { // Divide by 2 as 4 elements per Vector4 and 8 per Vector256 ref Vector256 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); @@ -2851,7 +5071,34 @@ internal static class DefaultPixelBlenders { amount = Numerics.Clamp(amount, 0, 1); - if (Avx2.IsSupported && destination.Length >= 2) + if (Avx512F.IsSupported && destination.Length >= 4) + { + // Divide by 4 as 4 elements per Vector4 and 16 per Vector512 + ref Vector512 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); + ref Vector512 destinationLast = ref Unsafe.Add(ref destinationBase, (uint)destination.Length / 4u); + + ref Vector512 backgroundBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(background)); + ref Vector512 sourceBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(source)); + Vector512 opacity = Vector512.Create(amount); + + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) + { + destinationBase = PorterDuffFunctions.LightenSrcAtop(backgroundBase, sourceBase, opacity); + destinationBase = ref Unsafe.Add(ref destinationBase, 1); + backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); + sourceBase = ref Unsafe.Add(ref sourceBase, 1); + } + + int remainder = Numerics.Modulo4(destination.Length); + if (remainder != 0) + { + for (int i = destination.Length - remainder; i < destination.Length; i++) + { + destination[i] = PorterDuffFunctions.LightenSrcAtop(background[i], source[i], amount); + } + } + } + else if (Avx2.IsSupported && destination.Length >= 2) { // Divide by 2 as 4 elements per Vector4 and 8 per Vector256 ref Vector256 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); @@ -2890,7 +5137,37 @@ internal static class DefaultPixelBlenders { amount = Numerics.Clamp(amount, 0, 1); - if (Avx2.IsSupported && destination.Length >= 2) + if (Avx512F.IsSupported && destination.Length >= 4) + { + // Divide by 4 as 4 elements per Vector4 and 16 per Vector512 + ref Vector512 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); + ref Vector512 destinationLast = ref Unsafe.Add(ref destinationBase, (uint)destination.Length / 4u); + + ref Vector512 backgroundBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(background)); + Vector512 sourceBase = Vector512.Create( + source.X, source.Y, source.Z, source.W, + source.X, source.Y, source.Z, source.W, + source.X, source.Y, source.Z, source.W, + source.X, source.Y, source.Z, source.W); + Vector512 opacity = Vector512.Create(amount); + + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) + { + destinationBase = PorterDuffFunctions.LightenSrcAtop(backgroundBase, sourceBase, opacity); + destinationBase = ref Unsafe.Add(ref destinationBase, 1); + backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); + } + + int remainder = Numerics.Modulo4(destination.Length); + if (remainder != 0) + { + for (int i = destination.Length - remainder; i < destination.Length; i++) + { + destination[i] = PorterDuffFunctions.LightenSrcAtop(background[i], source, amount); + } + } + } + else if (Avx2.IsSupported && destination.Length >= 2) { // Divide by 2 as 4 elements per Vector4 and 8 per Vector256 ref Vector256 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); @@ -2926,7 +5203,51 @@ internal static class DefaultPixelBlenders /// protected override void BlendFunction(Span destination, ReadOnlySpan background, ReadOnlySpan source, ReadOnlySpan amount) { - if (Avx2.IsSupported && destination.Length >= 2) + if (Avx512F.IsSupported && destination.Length >= 4) + { + // Divide by 4 as 4 elements per Vector4 and 16 per Vector512 + ref Vector512 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); + ref Vector512 destinationLast = ref Unsafe.Add(ref destinationBase, (uint)destination.Length / 4u); + + ref Vector512 backgroundBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(background)); + ref Vector512 sourceBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(source)); + ref float amountBase = ref MemoryMarshal.GetReference(amount); + + Vector512 vOne = Vector512.Create(1F); + + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) + { + float amount0 = amountBase; + float amount1 = Unsafe.Add(ref amountBase, 1); + float amount2 = Unsafe.Add(ref amountBase, 2); + float amount3 = Unsafe.Add(ref amountBase, 3); + + // We need to create a Vector512 containing the current four amount values + // taking up each quarter of the Vector512 and then clamp them. + Vector512 opacity = Vector512.Create( + amount0, amount0, amount0, amount0, + amount1, amount1, amount1, amount1, + amount2, amount2, amount2, amount2, + amount3, amount3, amount3, amount3); + opacity = Vector512.Min(Vector512.Max(Vector512.Zero, opacity), vOne); + + destinationBase = PorterDuffFunctions.LightenSrcAtop(backgroundBase, sourceBase, opacity); + destinationBase = ref Unsafe.Add(ref destinationBase, 1); + backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); + sourceBase = ref Unsafe.Add(ref sourceBase, 1); + amountBase = ref Unsafe.Add(ref amountBase, 4); + } + + int remainder = Numerics.Modulo4(destination.Length); + if (remainder != 0) + { + for (int i = destination.Length - remainder; i < destination.Length; i++) + { + destination[i] = PorterDuffFunctions.LightenSrcAtop(background[i], source[i], Numerics.Clamp(amount[i], 0, 1F)); + } + } + } + else if (Avx2.IsSupported && destination.Length >= 2) { // Divide by 2 as 4 elements per Vector4 and 8 per Vector256 ref Vector256 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); @@ -2960,20 +5281,67 @@ internal static class DefaultPixelBlenders int i = destination.Length - 1; destination[i] = PorterDuffFunctions.LightenSrcAtop(background[i], source[i], Numerics.Clamp(amount[i], 0, 1F)); } - } - else - { - for (int i = 0; i < destination.Length; i++) + } + else + { + for (int i = 0; i < destination.Length; i++) + { + destination[i] = PorterDuffFunctions.LightenSrcAtop(background[i], source[i], Numerics.Clamp(amount[i], 0, 1F)); + } + } + } + + /// + protected override void BlendFunction(Span destination, ReadOnlySpan background, Vector4 source, ReadOnlySpan amount) + { + if (Avx512F.IsSupported && destination.Length >= 4) + { + // Divide by 4 as 4 elements per Vector4 and 16 per Vector512 + ref Vector512 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); + ref Vector512 destinationLast = ref Unsafe.Add(ref destinationBase, (uint)destination.Length / 4u); + + ref Vector512 backgroundBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(background)); + ref float amountBase = ref MemoryMarshal.GetReference(amount); + + Vector512 sourceBase = Vector512.Create( + source.X, source.Y, source.Z, source.W, + source.X, source.Y, source.Z, source.W, + source.X, source.Y, source.Z, source.W, + source.X, source.Y, source.Z, source.W); + Vector512 vOne = Vector512.Create(1F); + + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) + { + float amount0 = amountBase; + float amount1 = Unsafe.Add(ref amountBase, 1); + float amount2 = Unsafe.Add(ref amountBase, 2); + float amount3 = Unsafe.Add(ref amountBase, 3); + + // We need to create a Vector512 containing the current four amount values + // taking up each quarter of the Vector512 and then clamp them. + Vector512 opacity = Vector512.Create( + amount0, amount0, amount0, amount0, + amount1, amount1, amount1, amount1, + amount2, amount2, amount2, amount2, + amount3, amount3, amount3, amount3); + opacity = Vector512.Min(Vector512.Max(Vector512.Zero, opacity), vOne); + + destinationBase = PorterDuffFunctions.LightenSrcAtop(backgroundBase, sourceBase, opacity); + destinationBase = ref Unsafe.Add(ref destinationBase, 1); + backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); + amountBase = ref Unsafe.Add(ref amountBase, 4); + } + + int remainder = Numerics.Modulo4(destination.Length); + if (remainder != 0) { - destination[i] = PorterDuffFunctions.LightenSrcAtop(background[i], source[i], Numerics.Clamp(amount[i], 0, 1F)); + for (int i = destination.Length - remainder; i < destination.Length; i++) + { + destination[i] = PorterDuffFunctions.LightenSrcAtop(background[i], source, Numerics.Clamp(amount[i], 0, 1F)); + } } } - } - - /// - protected override void BlendFunction(Span destination, ReadOnlySpan background, Vector4 source, ReadOnlySpan amount) - { - if (Avx2.IsSupported && destination.Length >= 2) + else if (Avx2.IsSupported && destination.Length >= 2) { // Divide by 2 as 4 elements per Vector4 and 8 per Vector256 ref Vector256 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); @@ -3038,7 +5406,34 @@ internal static class DefaultPixelBlenders { amount = Numerics.Clamp(amount, 0, 1); - if (Avx2.IsSupported && destination.Length >= 2) + if (Avx512F.IsSupported && destination.Length >= 4) + { + // Divide by 4 as 4 elements per Vector4 and 16 per Vector512 + ref Vector512 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); + ref Vector512 destinationLast = ref Unsafe.Add(ref destinationBase, (uint)destination.Length / 4u); + + ref Vector512 backgroundBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(background)); + ref Vector512 sourceBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(source)); + Vector512 opacity = Vector512.Create(amount); + + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) + { + destinationBase = PorterDuffFunctions.OverlaySrcAtop(backgroundBase, sourceBase, opacity); + destinationBase = ref Unsafe.Add(ref destinationBase, 1); + backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); + sourceBase = ref Unsafe.Add(ref sourceBase, 1); + } + + int remainder = Numerics.Modulo4(destination.Length); + if (remainder != 0) + { + for (int i = destination.Length - remainder; i < destination.Length; i++) + { + destination[i] = PorterDuffFunctions.OverlaySrcAtop(background[i], source[i], amount); + } + } + } + else if (Avx2.IsSupported && destination.Length >= 2) { // Divide by 2 as 4 elements per Vector4 and 8 per Vector256 ref Vector256 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); @@ -3077,7 +5472,37 @@ internal static class DefaultPixelBlenders { amount = Numerics.Clamp(amount, 0, 1); - if (Avx2.IsSupported && destination.Length >= 2) + if (Avx512F.IsSupported && destination.Length >= 4) + { + // Divide by 4 as 4 elements per Vector4 and 16 per Vector512 + ref Vector512 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); + ref Vector512 destinationLast = ref Unsafe.Add(ref destinationBase, (uint)destination.Length / 4u); + + ref Vector512 backgroundBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(background)); + Vector512 sourceBase = Vector512.Create( + source.X, source.Y, source.Z, source.W, + source.X, source.Y, source.Z, source.W, + source.X, source.Y, source.Z, source.W, + source.X, source.Y, source.Z, source.W); + Vector512 opacity = Vector512.Create(amount); + + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) + { + destinationBase = PorterDuffFunctions.OverlaySrcAtop(backgroundBase, sourceBase, opacity); + destinationBase = ref Unsafe.Add(ref destinationBase, 1); + backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); + } + + int remainder = Numerics.Modulo4(destination.Length); + if (remainder != 0) + { + for (int i = destination.Length - remainder; i < destination.Length; i++) + { + destination[i] = PorterDuffFunctions.OverlaySrcAtop(background[i], source, amount); + } + } + } + else if (Avx2.IsSupported && destination.Length >= 2) { // Divide by 2 as 4 elements per Vector4 and 8 per Vector256 ref Vector256 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); @@ -3113,7 +5538,51 @@ internal static class DefaultPixelBlenders /// protected override void BlendFunction(Span destination, ReadOnlySpan background, ReadOnlySpan source, ReadOnlySpan amount) { - if (Avx2.IsSupported && destination.Length >= 2) + if (Avx512F.IsSupported && destination.Length >= 4) + { + // Divide by 4 as 4 elements per Vector4 and 16 per Vector512 + ref Vector512 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); + ref Vector512 destinationLast = ref Unsafe.Add(ref destinationBase, (uint)destination.Length / 4u); + + ref Vector512 backgroundBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(background)); + ref Vector512 sourceBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(source)); + ref float amountBase = ref MemoryMarshal.GetReference(amount); + + Vector512 vOne = Vector512.Create(1F); + + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) + { + float amount0 = amountBase; + float amount1 = Unsafe.Add(ref amountBase, 1); + float amount2 = Unsafe.Add(ref amountBase, 2); + float amount3 = Unsafe.Add(ref amountBase, 3); + + // We need to create a Vector512 containing the current four amount values + // taking up each quarter of the Vector512 and then clamp them. + Vector512 opacity = Vector512.Create( + amount0, amount0, amount0, amount0, + amount1, amount1, amount1, amount1, + amount2, amount2, amount2, amount2, + amount3, amount3, amount3, amount3); + opacity = Vector512.Min(Vector512.Max(Vector512.Zero, opacity), vOne); + + destinationBase = PorterDuffFunctions.OverlaySrcAtop(backgroundBase, sourceBase, opacity); + destinationBase = ref Unsafe.Add(ref destinationBase, 1); + backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); + sourceBase = ref Unsafe.Add(ref sourceBase, 1); + amountBase = ref Unsafe.Add(ref amountBase, 4); + } + + int remainder = Numerics.Modulo4(destination.Length); + if (remainder != 0) + { + for (int i = destination.Length - remainder; i < destination.Length; i++) + { + destination[i] = PorterDuffFunctions.OverlaySrcAtop(background[i], source[i], Numerics.Clamp(amount[i], 0, 1F)); + } + } + } + else if (Avx2.IsSupported && destination.Length >= 2) { // Divide by 2 as 4 elements per Vector4 and 8 per Vector256 ref Vector256 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); @@ -3160,7 +5629,54 @@ internal static class DefaultPixelBlenders /// protected override void BlendFunction(Span destination, ReadOnlySpan background, Vector4 source, ReadOnlySpan amount) { - if (Avx2.IsSupported && destination.Length >= 2) + if (Avx512F.IsSupported && destination.Length >= 4) + { + // Divide by 4 as 4 elements per Vector4 and 16 per Vector512 + ref Vector512 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); + ref Vector512 destinationLast = ref Unsafe.Add(ref destinationBase, (uint)destination.Length / 4u); + + ref Vector512 backgroundBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(background)); + ref float amountBase = ref MemoryMarshal.GetReference(amount); + + Vector512 sourceBase = Vector512.Create( + source.X, source.Y, source.Z, source.W, + source.X, source.Y, source.Z, source.W, + source.X, source.Y, source.Z, source.W, + source.X, source.Y, source.Z, source.W); + Vector512 vOne = Vector512.Create(1F); + + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) + { + float amount0 = amountBase; + float amount1 = Unsafe.Add(ref amountBase, 1); + float amount2 = Unsafe.Add(ref amountBase, 2); + float amount3 = Unsafe.Add(ref amountBase, 3); + + // We need to create a Vector512 containing the current four amount values + // taking up each quarter of the Vector512 and then clamp them. + Vector512 opacity = Vector512.Create( + amount0, amount0, amount0, amount0, + amount1, amount1, amount1, amount1, + amount2, amount2, amount2, amount2, + amount3, amount3, amount3, amount3); + opacity = Vector512.Min(Vector512.Max(Vector512.Zero, opacity), vOne); + + destinationBase = PorterDuffFunctions.OverlaySrcAtop(backgroundBase, sourceBase, opacity); + destinationBase = ref Unsafe.Add(ref destinationBase, 1); + backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); + amountBase = ref Unsafe.Add(ref amountBase, 4); + } + + int remainder = Numerics.Modulo4(destination.Length); + if (remainder != 0) + { + for (int i = destination.Length - remainder; i < destination.Length; i++) + { + destination[i] = PorterDuffFunctions.OverlaySrcAtop(background[i], source, Numerics.Clamp(amount[i], 0, 1F)); + } + } + } + else if (Avx2.IsSupported && destination.Length >= 2) { // Divide by 2 as 4 elements per Vector4 and 8 per Vector256 ref Vector256 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); @@ -3225,7 +5741,34 @@ internal static class DefaultPixelBlenders { amount = Numerics.Clamp(amount, 0, 1); - if (Avx2.IsSupported && destination.Length >= 2) + if (Avx512F.IsSupported && destination.Length >= 4) + { + // Divide by 4 as 4 elements per Vector4 and 16 per Vector512 + ref Vector512 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); + ref Vector512 destinationLast = ref Unsafe.Add(ref destinationBase, (uint)destination.Length / 4u); + + ref Vector512 backgroundBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(background)); + ref Vector512 sourceBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(source)); + Vector512 opacity = Vector512.Create(amount); + + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) + { + destinationBase = PorterDuffFunctions.HardLightSrcAtop(backgroundBase, sourceBase, opacity); + destinationBase = ref Unsafe.Add(ref destinationBase, 1); + backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); + sourceBase = ref Unsafe.Add(ref sourceBase, 1); + } + + int remainder = Numerics.Modulo4(destination.Length); + if (remainder != 0) + { + for (int i = destination.Length - remainder; i < destination.Length; i++) + { + destination[i] = PorterDuffFunctions.HardLightSrcAtop(background[i], source[i], amount); + } + } + } + else if (Avx2.IsSupported && destination.Length >= 2) { // Divide by 2 as 4 elements per Vector4 and 8 per Vector256 ref Vector256 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); @@ -3264,7 +5807,37 @@ internal static class DefaultPixelBlenders { amount = Numerics.Clamp(amount, 0, 1); - if (Avx2.IsSupported && destination.Length >= 2) + if (Avx512F.IsSupported && destination.Length >= 4) + { + // Divide by 4 as 4 elements per Vector4 and 16 per Vector512 + ref Vector512 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); + ref Vector512 destinationLast = ref Unsafe.Add(ref destinationBase, (uint)destination.Length / 4u); + + ref Vector512 backgroundBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(background)); + Vector512 sourceBase = Vector512.Create( + source.X, source.Y, source.Z, source.W, + source.X, source.Y, source.Z, source.W, + source.X, source.Y, source.Z, source.W, + source.X, source.Y, source.Z, source.W); + Vector512 opacity = Vector512.Create(amount); + + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) + { + destinationBase = PorterDuffFunctions.HardLightSrcAtop(backgroundBase, sourceBase, opacity); + destinationBase = ref Unsafe.Add(ref destinationBase, 1); + backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); + } + + int remainder = Numerics.Modulo4(destination.Length); + if (remainder != 0) + { + for (int i = destination.Length - remainder; i < destination.Length; i++) + { + destination[i] = PorterDuffFunctions.HardLightSrcAtop(background[i], source, amount); + } + } + } + else if (Avx2.IsSupported && destination.Length >= 2) { // Divide by 2 as 4 elements per Vector4 and 8 per Vector256 ref Vector256 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); @@ -3300,7 +5873,51 @@ internal static class DefaultPixelBlenders /// protected override void BlendFunction(Span destination, ReadOnlySpan background, ReadOnlySpan source, ReadOnlySpan amount) { - if (Avx2.IsSupported && destination.Length >= 2) + if (Avx512F.IsSupported && destination.Length >= 4) + { + // Divide by 4 as 4 elements per Vector4 and 16 per Vector512 + ref Vector512 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); + ref Vector512 destinationLast = ref Unsafe.Add(ref destinationBase, (uint)destination.Length / 4u); + + ref Vector512 backgroundBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(background)); + ref Vector512 sourceBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(source)); + ref float amountBase = ref MemoryMarshal.GetReference(amount); + + Vector512 vOne = Vector512.Create(1F); + + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) + { + float amount0 = amountBase; + float amount1 = Unsafe.Add(ref amountBase, 1); + float amount2 = Unsafe.Add(ref amountBase, 2); + float amount3 = Unsafe.Add(ref amountBase, 3); + + // We need to create a Vector512 containing the current four amount values + // taking up each quarter of the Vector512 and then clamp them. + Vector512 opacity = Vector512.Create( + amount0, amount0, amount0, amount0, + amount1, amount1, amount1, amount1, + amount2, amount2, amount2, amount2, + amount3, amount3, amount3, amount3); + opacity = Vector512.Min(Vector512.Max(Vector512.Zero, opacity), vOne); + + destinationBase = PorterDuffFunctions.HardLightSrcAtop(backgroundBase, sourceBase, opacity); + destinationBase = ref Unsafe.Add(ref destinationBase, 1); + backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); + sourceBase = ref Unsafe.Add(ref sourceBase, 1); + amountBase = ref Unsafe.Add(ref amountBase, 4); + } + + int remainder = Numerics.Modulo4(destination.Length); + if (remainder != 0) + { + for (int i = destination.Length - remainder; i < destination.Length; i++) + { + destination[i] = PorterDuffFunctions.HardLightSrcAtop(background[i], source[i], Numerics.Clamp(amount[i], 0, 1F)); + } + } + } + else if (Avx2.IsSupported && destination.Length >= 2) { // Divide by 2 as 4 elements per Vector4 and 8 per Vector256 ref Vector256 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); @@ -3347,7 +5964,54 @@ internal static class DefaultPixelBlenders /// protected override void BlendFunction(Span destination, ReadOnlySpan background, Vector4 source, ReadOnlySpan amount) { - if (Avx2.IsSupported && destination.Length >= 2) + if (Avx512F.IsSupported && destination.Length >= 4) + { + // Divide by 4 as 4 elements per Vector4 and 16 per Vector512 + ref Vector512 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); + ref Vector512 destinationLast = ref Unsafe.Add(ref destinationBase, (uint)destination.Length / 4u); + + ref Vector512 backgroundBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(background)); + ref float amountBase = ref MemoryMarshal.GetReference(amount); + + Vector512 sourceBase = Vector512.Create( + source.X, source.Y, source.Z, source.W, + source.X, source.Y, source.Z, source.W, + source.X, source.Y, source.Z, source.W, + source.X, source.Y, source.Z, source.W); + Vector512 vOne = Vector512.Create(1F); + + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) + { + float amount0 = amountBase; + float amount1 = Unsafe.Add(ref amountBase, 1); + float amount2 = Unsafe.Add(ref amountBase, 2); + float amount3 = Unsafe.Add(ref amountBase, 3); + + // We need to create a Vector512 containing the current four amount values + // taking up each quarter of the Vector512 and then clamp them. + Vector512 opacity = Vector512.Create( + amount0, amount0, amount0, amount0, + amount1, amount1, amount1, amount1, + amount2, amount2, amount2, amount2, + amount3, amount3, amount3, amount3); + opacity = Vector512.Min(Vector512.Max(Vector512.Zero, opacity), vOne); + + destinationBase = PorterDuffFunctions.HardLightSrcAtop(backgroundBase, sourceBase, opacity); + destinationBase = ref Unsafe.Add(ref destinationBase, 1); + backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); + amountBase = ref Unsafe.Add(ref amountBase, 4); + } + + int remainder = Numerics.Modulo4(destination.Length); + if (remainder != 0) + { + for (int i = destination.Length - remainder; i < destination.Length; i++) + { + destination[i] = PorterDuffFunctions.HardLightSrcAtop(background[i], source, Numerics.Clamp(amount[i], 0, 1F)); + } + } + } + else if (Avx2.IsSupported && destination.Length >= 2) { // Divide by 2 as 4 elements per Vector4 and 8 per Vector256 ref Vector256 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); @@ -3412,7 +6076,34 @@ internal static class DefaultPixelBlenders { amount = Numerics.Clamp(amount, 0, 1); - if (Avx2.IsSupported && destination.Length >= 2) + if (Avx512F.IsSupported && destination.Length >= 4) + { + // Divide by 4 as 4 elements per Vector4 and 16 per Vector512 + ref Vector512 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); + ref Vector512 destinationLast = ref Unsafe.Add(ref destinationBase, (uint)destination.Length / 4u); + + ref Vector512 backgroundBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(background)); + ref Vector512 sourceBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(source)); + Vector512 opacity = Vector512.Create(amount); + + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) + { + destinationBase = PorterDuffFunctions.NormalSrcOver(backgroundBase, sourceBase, opacity); + destinationBase = ref Unsafe.Add(ref destinationBase, 1); + backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); + sourceBase = ref Unsafe.Add(ref sourceBase, 1); + } + + int remainder = Numerics.Modulo4(destination.Length); + if (remainder != 0) + { + for (int i = destination.Length - remainder; i < destination.Length; i++) + { + destination[i] = PorterDuffFunctions.NormalSrcOver(background[i], source[i], amount); + } + } + } + else if (Avx2.IsSupported && destination.Length >= 2) { // Divide by 2 as 4 elements per Vector4 and 8 per Vector256 ref Vector256 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); @@ -3451,7 +6142,37 @@ internal static class DefaultPixelBlenders { amount = Numerics.Clamp(amount, 0, 1); - if (Avx2.IsSupported && destination.Length >= 2) + if (Avx512F.IsSupported && destination.Length >= 4) + { + // Divide by 4 as 4 elements per Vector4 and 16 per Vector512 + ref Vector512 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); + ref Vector512 destinationLast = ref Unsafe.Add(ref destinationBase, (uint)destination.Length / 4u); + + ref Vector512 backgroundBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(background)); + Vector512 sourceBase = Vector512.Create( + source.X, source.Y, source.Z, source.W, + source.X, source.Y, source.Z, source.W, + source.X, source.Y, source.Z, source.W, + source.X, source.Y, source.Z, source.W); + Vector512 opacity = Vector512.Create(amount); + + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) + { + destinationBase = PorterDuffFunctions.NormalSrcOver(backgroundBase, sourceBase, opacity); + destinationBase = ref Unsafe.Add(ref destinationBase, 1); + backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); + } + + int remainder = Numerics.Modulo4(destination.Length); + if (remainder != 0) + { + for (int i = destination.Length - remainder; i < destination.Length; i++) + { + destination[i] = PorterDuffFunctions.NormalSrcOver(background[i], source, amount); + } + } + } + else if (Avx2.IsSupported && destination.Length >= 2) { // Divide by 2 as 4 elements per Vector4 and 8 per Vector256 ref Vector256 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); @@ -3487,7 +6208,51 @@ internal static class DefaultPixelBlenders /// protected override void BlendFunction(Span destination, ReadOnlySpan background, ReadOnlySpan source, ReadOnlySpan amount) { - if (Avx2.IsSupported && destination.Length >= 2) + if (Avx512F.IsSupported && destination.Length >= 4) + { + // Divide by 4 as 4 elements per Vector4 and 16 per Vector512 + ref Vector512 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); + ref Vector512 destinationLast = ref Unsafe.Add(ref destinationBase, (uint)destination.Length / 4u); + + ref Vector512 backgroundBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(background)); + ref Vector512 sourceBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(source)); + ref float amountBase = ref MemoryMarshal.GetReference(amount); + + Vector512 vOne = Vector512.Create(1F); + + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) + { + float amount0 = amountBase; + float amount1 = Unsafe.Add(ref amountBase, 1); + float amount2 = Unsafe.Add(ref amountBase, 2); + float amount3 = Unsafe.Add(ref amountBase, 3); + + // We need to create a Vector512 containing the current four amount values + // taking up each quarter of the Vector512 and then clamp them. + Vector512 opacity = Vector512.Create( + amount0, amount0, amount0, amount0, + amount1, amount1, amount1, amount1, + amount2, amount2, amount2, amount2, + amount3, amount3, amount3, amount3); + opacity = Vector512.Min(Vector512.Max(Vector512.Zero, opacity), vOne); + + destinationBase = PorterDuffFunctions.NormalSrcOver(backgroundBase, sourceBase, opacity); + destinationBase = ref Unsafe.Add(ref destinationBase, 1); + backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); + sourceBase = ref Unsafe.Add(ref sourceBase, 1); + amountBase = ref Unsafe.Add(ref amountBase, 4); + } + + int remainder = Numerics.Modulo4(destination.Length); + if (remainder != 0) + { + for (int i = destination.Length - remainder; i < destination.Length; i++) + { + destination[i] = PorterDuffFunctions.NormalSrcOver(background[i], source[i], Numerics.Clamp(amount[i], 0, 1F)); + } + } + } + else if (Avx2.IsSupported && destination.Length >= 2) { // Divide by 2 as 4 elements per Vector4 and 8 per Vector256 ref Vector256 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); @@ -3534,7 +6299,54 @@ internal static class DefaultPixelBlenders /// protected override void BlendFunction(Span destination, ReadOnlySpan background, Vector4 source, ReadOnlySpan amount) { - if (Avx2.IsSupported && destination.Length >= 2) + if (Avx512F.IsSupported && destination.Length >= 4) + { + // Divide by 4 as 4 elements per Vector4 and 16 per Vector512 + ref Vector512 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); + ref Vector512 destinationLast = ref Unsafe.Add(ref destinationBase, (uint)destination.Length / 4u); + + ref Vector512 backgroundBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(background)); + ref float amountBase = ref MemoryMarshal.GetReference(amount); + + Vector512 sourceBase = Vector512.Create( + source.X, source.Y, source.Z, source.W, + source.X, source.Y, source.Z, source.W, + source.X, source.Y, source.Z, source.W, + source.X, source.Y, source.Z, source.W); + Vector512 vOne = Vector512.Create(1F); + + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) + { + float amount0 = amountBase; + float amount1 = Unsafe.Add(ref amountBase, 1); + float amount2 = Unsafe.Add(ref amountBase, 2); + float amount3 = Unsafe.Add(ref amountBase, 3); + + // We need to create a Vector512 containing the current four amount values + // taking up each quarter of the Vector512 and then clamp them. + Vector512 opacity = Vector512.Create( + amount0, amount0, amount0, amount0, + amount1, amount1, amount1, amount1, + amount2, amount2, amount2, amount2, + amount3, amount3, amount3, amount3); + opacity = Vector512.Min(Vector512.Max(Vector512.Zero, opacity), vOne); + + destinationBase = PorterDuffFunctions.NormalSrcOver(backgroundBase, sourceBase, opacity); + destinationBase = ref Unsafe.Add(ref destinationBase, 1); + backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); + amountBase = ref Unsafe.Add(ref amountBase, 4); + } + + int remainder = Numerics.Modulo4(destination.Length); + if (remainder != 0) + { + for (int i = destination.Length - remainder; i < destination.Length; i++) + { + destination[i] = PorterDuffFunctions.NormalSrcOver(background[i], source, Numerics.Clamp(amount[i], 0, 1F)); + } + } + } + else if (Avx2.IsSupported && destination.Length >= 2) { // Divide by 2 as 4 elements per Vector4 and 8 per Vector256 ref Vector256 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); @@ -3599,7 +6411,34 @@ internal static class DefaultPixelBlenders { amount = Numerics.Clamp(amount, 0, 1); - if (Avx2.IsSupported && destination.Length >= 2) + if (Avx512F.IsSupported && destination.Length >= 4) + { + // Divide by 4 as 4 elements per Vector4 and 16 per Vector512 + ref Vector512 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); + ref Vector512 destinationLast = ref Unsafe.Add(ref destinationBase, (uint)destination.Length / 4u); + + ref Vector512 backgroundBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(background)); + ref Vector512 sourceBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(source)); + Vector512 opacity = Vector512.Create(amount); + + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) + { + destinationBase = PorterDuffFunctions.MultiplySrcOver(backgroundBase, sourceBase, opacity); + destinationBase = ref Unsafe.Add(ref destinationBase, 1); + backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); + sourceBase = ref Unsafe.Add(ref sourceBase, 1); + } + + int remainder = Numerics.Modulo4(destination.Length); + if (remainder != 0) + { + for (int i = destination.Length - remainder; i < destination.Length; i++) + { + destination[i] = PorterDuffFunctions.MultiplySrcOver(background[i], source[i], amount); + } + } + } + else if (Avx2.IsSupported && destination.Length >= 2) { // Divide by 2 as 4 elements per Vector4 and 8 per Vector256 ref Vector256 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); @@ -3638,7 +6477,37 @@ internal static class DefaultPixelBlenders { amount = Numerics.Clamp(amount, 0, 1); - if (Avx2.IsSupported && destination.Length >= 2) + if (Avx512F.IsSupported && destination.Length >= 4) + { + // Divide by 4 as 4 elements per Vector4 and 16 per Vector512 + ref Vector512 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); + ref Vector512 destinationLast = ref Unsafe.Add(ref destinationBase, (uint)destination.Length / 4u); + + ref Vector512 backgroundBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(background)); + Vector512 sourceBase = Vector512.Create( + source.X, source.Y, source.Z, source.W, + source.X, source.Y, source.Z, source.W, + source.X, source.Y, source.Z, source.W, + source.X, source.Y, source.Z, source.W); + Vector512 opacity = Vector512.Create(amount); + + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) + { + destinationBase = PorterDuffFunctions.MultiplySrcOver(backgroundBase, sourceBase, opacity); + destinationBase = ref Unsafe.Add(ref destinationBase, 1); + backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); + } + + int remainder = Numerics.Modulo4(destination.Length); + if (remainder != 0) + { + for (int i = destination.Length - remainder; i < destination.Length; i++) + { + destination[i] = PorterDuffFunctions.MultiplySrcOver(background[i], source, amount); + } + } + } + else if (Avx2.IsSupported && destination.Length >= 2) { // Divide by 2 as 4 elements per Vector4 and 8 per Vector256 ref Vector256 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); @@ -3674,7 +6543,51 @@ internal static class DefaultPixelBlenders /// protected override void BlendFunction(Span destination, ReadOnlySpan background, ReadOnlySpan source, ReadOnlySpan amount) { - if (Avx2.IsSupported && destination.Length >= 2) + if (Avx512F.IsSupported && destination.Length >= 4) + { + // Divide by 4 as 4 elements per Vector4 and 16 per Vector512 + ref Vector512 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); + ref Vector512 destinationLast = ref Unsafe.Add(ref destinationBase, (uint)destination.Length / 4u); + + ref Vector512 backgroundBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(background)); + ref Vector512 sourceBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(source)); + ref float amountBase = ref MemoryMarshal.GetReference(amount); + + Vector512 vOne = Vector512.Create(1F); + + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) + { + float amount0 = amountBase; + float amount1 = Unsafe.Add(ref amountBase, 1); + float amount2 = Unsafe.Add(ref amountBase, 2); + float amount3 = Unsafe.Add(ref amountBase, 3); + + // We need to create a Vector512 containing the current four amount values + // taking up each quarter of the Vector512 and then clamp them. + Vector512 opacity = Vector512.Create( + amount0, amount0, amount0, amount0, + amount1, amount1, amount1, amount1, + amount2, amount2, amount2, amount2, + amount3, amount3, amount3, amount3); + opacity = Vector512.Min(Vector512.Max(Vector512.Zero, opacity), vOne); + + destinationBase = PorterDuffFunctions.MultiplySrcOver(backgroundBase, sourceBase, opacity); + destinationBase = ref Unsafe.Add(ref destinationBase, 1); + backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); + sourceBase = ref Unsafe.Add(ref sourceBase, 1); + amountBase = ref Unsafe.Add(ref amountBase, 4); + } + + int remainder = Numerics.Modulo4(destination.Length); + if (remainder != 0) + { + for (int i = destination.Length - remainder; i < destination.Length; i++) + { + destination[i] = PorterDuffFunctions.MultiplySrcOver(background[i], source[i], Numerics.Clamp(amount[i], 0, 1F)); + } + } + } + else if (Avx2.IsSupported && destination.Length >= 2) { // Divide by 2 as 4 elements per Vector4 and 8 per Vector256 ref Vector256 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); @@ -3721,7 +6634,54 @@ internal static class DefaultPixelBlenders /// protected override void BlendFunction(Span destination, ReadOnlySpan background, Vector4 source, ReadOnlySpan amount) { - if (Avx2.IsSupported && destination.Length >= 2) + if (Avx512F.IsSupported && destination.Length >= 4) + { + // Divide by 4 as 4 elements per Vector4 and 16 per Vector512 + ref Vector512 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); + ref Vector512 destinationLast = ref Unsafe.Add(ref destinationBase, (uint)destination.Length / 4u); + + ref Vector512 backgroundBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(background)); + ref float amountBase = ref MemoryMarshal.GetReference(amount); + + Vector512 sourceBase = Vector512.Create( + source.X, source.Y, source.Z, source.W, + source.X, source.Y, source.Z, source.W, + source.X, source.Y, source.Z, source.W, + source.X, source.Y, source.Z, source.W); + Vector512 vOne = Vector512.Create(1F); + + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) + { + float amount0 = amountBase; + float amount1 = Unsafe.Add(ref amountBase, 1); + float amount2 = Unsafe.Add(ref amountBase, 2); + float amount3 = Unsafe.Add(ref amountBase, 3); + + // We need to create a Vector512 containing the current four amount values + // taking up each quarter of the Vector512 and then clamp them. + Vector512 opacity = Vector512.Create( + amount0, amount0, amount0, amount0, + amount1, amount1, amount1, amount1, + amount2, amount2, amount2, amount2, + amount3, amount3, amount3, amount3); + opacity = Vector512.Min(Vector512.Max(Vector512.Zero, opacity), vOne); + + destinationBase = PorterDuffFunctions.MultiplySrcOver(backgroundBase, sourceBase, opacity); + destinationBase = ref Unsafe.Add(ref destinationBase, 1); + backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); + amountBase = ref Unsafe.Add(ref amountBase, 4); + } + + int remainder = Numerics.Modulo4(destination.Length); + if (remainder != 0) + { + for (int i = destination.Length - remainder; i < destination.Length; i++) + { + destination[i] = PorterDuffFunctions.MultiplySrcOver(background[i], source, Numerics.Clamp(amount[i], 0, 1F)); + } + } + } + else if (Avx2.IsSupported && destination.Length >= 2) { // Divide by 2 as 4 elements per Vector4 and 8 per Vector256 ref Vector256 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); @@ -3786,7 +6746,34 @@ internal static class DefaultPixelBlenders { amount = Numerics.Clamp(amount, 0, 1); - if (Avx2.IsSupported && destination.Length >= 2) + if (Avx512F.IsSupported && destination.Length >= 4) + { + // Divide by 4 as 4 elements per Vector4 and 16 per Vector512 + ref Vector512 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); + ref Vector512 destinationLast = ref Unsafe.Add(ref destinationBase, (uint)destination.Length / 4u); + + ref Vector512 backgroundBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(background)); + ref Vector512 sourceBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(source)); + Vector512 opacity = Vector512.Create(amount); + + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) + { + destinationBase = PorterDuffFunctions.AddSrcOver(backgroundBase, sourceBase, opacity); + destinationBase = ref Unsafe.Add(ref destinationBase, 1); + backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); + sourceBase = ref Unsafe.Add(ref sourceBase, 1); + } + + int remainder = Numerics.Modulo4(destination.Length); + if (remainder != 0) + { + for (int i = destination.Length - remainder; i < destination.Length; i++) + { + destination[i] = PorterDuffFunctions.AddSrcOver(background[i], source[i], amount); + } + } + } + else if (Avx2.IsSupported && destination.Length >= 2) { // Divide by 2 as 4 elements per Vector4 and 8 per Vector256 ref Vector256 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); @@ -3825,7 +6812,37 @@ internal static class DefaultPixelBlenders { amount = Numerics.Clamp(amount, 0, 1); - if (Avx2.IsSupported && destination.Length >= 2) + if (Avx512F.IsSupported && destination.Length >= 4) + { + // Divide by 4 as 4 elements per Vector4 and 16 per Vector512 + ref Vector512 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); + ref Vector512 destinationLast = ref Unsafe.Add(ref destinationBase, (uint)destination.Length / 4u); + + ref Vector512 backgroundBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(background)); + Vector512 sourceBase = Vector512.Create( + source.X, source.Y, source.Z, source.W, + source.X, source.Y, source.Z, source.W, + source.X, source.Y, source.Z, source.W, + source.X, source.Y, source.Z, source.W); + Vector512 opacity = Vector512.Create(amount); + + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) + { + destinationBase = PorterDuffFunctions.AddSrcOver(backgroundBase, sourceBase, opacity); + destinationBase = ref Unsafe.Add(ref destinationBase, 1); + backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); + } + + int remainder = Numerics.Modulo4(destination.Length); + if (remainder != 0) + { + for (int i = destination.Length - remainder; i < destination.Length; i++) + { + destination[i] = PorterDuffFunctions.AddSrcOver(background[i], source, amount); + } + } + } + else if (Avx2.IsSupported && destination.Length >= 2) { // Divide by 2 as 4 elements per Vector4 and 8 per Vector256 ref Vector256 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); @@ -3861,7 +6878,51 @@ internal static class DefaultPixelBlenders /// protected override void BlendFunction(Span destination, ReadOnlySpan background, ReadOnlySpan source, ReadOnlySpan amount) { - if (Avx2.IsSupported && destination.Length >= 2) + if (Avx512F.IsSupported && destination.Length >= 4) + { + // Divide by 4 as 4 elements per Vector4 and 16 per Vector512 + ref Vector512 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); + ref Vector512 destinationLast = ref Unsafe.Add(ref destinationBase, (uint)destination.Length / 4u); + + ref Vector512 backgroundBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(background)); + ref Vector512 sourceBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(source)); + ref float amountBase = ref MemoryMarshal.GetReference(amount); + + Vector512 vOne = Vector512.Create(1F); + + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) + { + float amount0 = amountBase; + float amount1 = Unsafe.Add(ref amountBase, 1); + float amount2 = Unsafe.Add(ref amountBase, 2); + float amount3 = Unsafe.Add(ref amountBase, 3); + + // We need to create a Vector512 containing the current four amount values + // taking up each quarter of the Vector512 and then clamp them. + Vector512 opacity = Vector512.Create( + amount0, amount0, amount0, amount0, + amount1, amount1, amount1, amount1, + amount2, amount2, amount2, amount2, + amount3, amount3, amount3, amount3); + opacity = Vector512.Min(Vector512.Max(Vector512.Zero, opacity), vOne); + + destinationBase = PorterDuffFunctions.AddSrcOver(backgroundBase, sourceBase, opacity); + destinationBase = ref Unsafe.Add(ref destinationBase, 1); + backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); + sourceBase = ref Unsafe.Add(ref sourceBase, 1); + amountBase = ref Unsafe.Add(ref amountBase, 4); + } + + int remainder = Numerics.Modulo4(destination.Length); + if (remainder != 0) + { + for (int i = destination.Length - remainder; i < destination.Length; i++) + { + destination[i] = PorterDuffFunctions.AddSrcOver(background[i], source[i], Numerics.Clamp(amount[i], 0, 1F)); + } + } + } + else if (Avx2.IsSupported && destination.Length >= 2) { // Divide by 2 as 4 elements per Vector4 and 8 per Vector256 ref Vector256 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); @@ -3908,7 +6969,54 @@ internal static class DefaultPixelBlenders /// protected override void BlendFunction(Span destination, ReadOnlySpan background, Vector4 source, ReadOnlySpan amount) { - if (Avx2.IsSupported && destination.Length >= 2) + if (Avx512F.IsSupported && destination.Length >= 4) + { + // Divide by 4 as 4 elements per Vector4 and 16 per Vector512 + ref Vector512 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); + ref Vector512 destinationLast = ref Unsafe.Add(ref destinationBase, (uint)destination.Length / 4u); + + ref Vector512 backgroundBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(background)); + ref float amountBase = ref MemoryMarshal.GetReference(amount); + + Vector512 sourceBase = Vector512.Create( + source.X, source.Y, source.Z, source.W, + source.X, source.Y, source.Z, source.W, + source.X, source.Y, source.Z, source.W, + source.X, source.Y, source.Z, source.W); + Vector512 vOne = Vector512.Create(1F); + + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) + { + float amount0 = amountBase; + float amount1 = Unsafe.Add(ref amountBase, 1); + float amount2 = Unsafe.Add(ref amountBase, 2); + float amount3 = Unsafe.Add(ref amountBase, 3); + + // We need to create a Vector512 containing the current four amount values + // taking up each quarter of the Vector512 and then clamp them. + Vector512 opacity = Vector512.Create( + amount0, amount0, amount0, amount0, + amount1, amount1, amount1, amount1, + amount2, amount2, amount2, amount2, + amount3, amount3, amount3, amount3); + opacity = Vector512.Min(Vector512.Max(Vector512.Zero, opacity), vOne); + + destinationBase = PorterDuffFunctions.AddSrcOver(backgroundBase, sourceBase, opacity); + destinationBase = ref Unsafe.Add(ref destinationBase, 1); + backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); + amountBase = ref Unsafe.Add(ref amountBase, 4); + } + + int remainder = Numerics.Modulo4(destination.Length); + if (remainder != 0) + { + for (int i = destination.Length - remainder; i < destination.Length; i++) + { + destination[i] = PorterDuffFunctions.AddSrcOver(background[i], source, Numerics.Clamp(amount[i], 0, 1F)); + } + } + } + else if (Avx2.IsSupported && destination.Length >= 2) { // Divide by 2 as 4 elements per Vector4 and 8 per Vector256 ref Vector256 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); @@ -3973,7 +7081,34 @@ internal static class DefaultPixelBlenders { amount = Numerics.Clamp(amount, 0, 1); - if (Avx2.IsSupported && destination.Length >= 2) + if (Avx512F.IsSupported && destination.Length >= 4) + { + // Divide by 4 as 4 elements per Vector4 and 16 per Vector512 + ref Vector512 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); + ref Vector512 destinationLast = ref Unsafe.Add(ref destinationBase, (uint)destination.Length / 4u); + + ref Vector512 backgroundBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(background)); + ref Vector512 sourceBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(source)); + Vector512 opacity = Vector512.Create(amount); + + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) + { + destinationBase = PorterDuffFunctions.SubtractSrcOver(backgroundBase, sourceBase, opacity); + destinationBase = ref Unsafe.Add(ref destinationBase, 1); + backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); + sourceBase = ref Unsafe.Add(ref sourceBase, 1); + } + + int remainder = Numerics.Modulo4(destination.Length); + if (remainder != 0) + { + for (int i = destination.Length - remainder; i < destination.Length; i++) + { + destination[i] = PorterDuffFunctions.SubtractSrcOver(background[i], source[i], amount); + } + } + } + else if (Avx2.IsSupported && destination.Length >= 2) { // Divide by 2 as 4 elements per Vector4 and 8 per Vector256 ref Vector256 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); @@ -4012,7 +7147,37 @@ internal static class DefaultPixelBlenders { amount = Numerics.Clamp(amount, 0, 1); - if (Avx2.IsSupported && destination.Length >= 2) + if (Avx512F.IsSupported && destination.Length >= 4) + { + // Divide by 4 as 4 elements per Vector4 and 16 per Vector512 + ref Vector512 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); + ref Vector512 destinationLast = ref Unsafe.Add(ref destinationBase, (uint)destination.Length / 4u); + + ref Vector512 backgroundBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(background)); + Vector512 sourceBase = Vector512.Create( + source.X, source.Y, source.Z, source.W, + source.X, source.Y, source.Z, source.W, + source.X, source.Y, source.Z, source.W, + source.X, source.Y, source.Z, source.W); + Vector512 opacity = Vector512.Create(amount); + + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) + { + destinationBase = PorterDuffFunctions.SubtractSrcOver(backgroundBase, sourceBase, opacity); + destinationBase = ref Unsafe.Add(ref destinationBase, 1); + backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); + } + + int remainder = Numerics.Modulo4(destination.Length); + if (remainder != 0) + { + for (int i = destination.Length - remainder; i < destination.Length; i++) + { + destination[i] = PorterDuffFunctions.SubtractSrcOver(background[i], source, amount); + } + } + } + else if (Avx2.IsSupported && destination.Length >= 2) { // Divide by 2 as 4 elements per Vector4 and 8 per Vector256 ref Vector256 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); @@ -4048,7 +7213,51 @@ internal static class DefaultPixelBlenders /// protected override void BlendFunction(Span destination, ReadOnlySpan background, ReadOnlySpan source, ReadOnlySpan amount) { - if (Avx2.IsSupported && destination.Length >= 2) + if (Avx512F.IsSupported && destination.Length >= 4) + { + // Divide by 4 as 4 elements per Vector4 and 16 per Vector512 + ref Vector512 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); + ref Vector512 destinationLast = ref Unsafe.Add(ref destinationBase, (uint)destination.Length / 4u); + + ref Vector512 backgroundBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(background)); + ref Vector512 sourceBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(source)); + ref float amountBase = ref MemoryMarshal.GetReference(amount); + + Vector512 vOne = Vector512.Create(1F); + + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) + { + float amount0 = amountBase; + float amount1 = Unsafe.Add(ref amountBase, 1); + float amount2 = Unsafe.Add(ref amountBase, 2); + float amount3 = Unsafe.Add(ref amountBase, 3); + + // We need to create a Vector512 containing the current four amount values + // taking up each quarter of the Vector512 and then clamp them. + Vector512 opacity = Vector512.Create( + amount0, amount0, amount0, amount0, + amount1, amount1, amount1, amount1, + amount2, amount2, amount2, amount2, + amount3, amount3, amount3, amount3); + opacity = Vector512.Min(Vector512.Max(Vector512.Zero, opacity), vOne); + + destinationBase = PorterDuffFunctions.SubtractSrcOver(backgroundBase, sourceBase, opacity); + destinationBase = ref Unsafe.Add(ref destinationBase, 1); + backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); + sourceBase = ref Unsafe.Add(ref sourceBase, 1); + amountBase = ref Unsafe.Add(ref amountBase, 4); + } + + int remainder = Numerics.Modulo4(destination.Length); + if (remainder != 0) + { + for (int i = destination.Length - remainder; i < destination.Length; i++) + { + destination[i] = PorterDuffFunctions.SubtractSrcOver(background[i], source[i], Numerics.Clamp(amount[i], 0, 1F)); + } + } + } + else if (Avx2.IsSupported && destination.Length >= 2) { // Divide by 2 as 4 elements per Vector4 and 8 per Vector256 ref Vector256 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); @@ -4095,7 +7304,54 @@ internal static class DefaultPixelBlenders /// protected override void BlendFunction(Span destination, ReadOnlySpan background, Vector4 source, ReadOnlySpan amount) { - if (Avx2.IsSupported && destination.Length >= 2) + if (Avx512F.IsSupported && destination.Length >= 4) + { + // Divide by 4 as 4 elements per Vector4 and 16 per Vector512 + ref Vector512 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); + ref Vector512 destinationLast = ref Unsafe.Add(ref destinationBase, (uint)destination.Length / 4u); + + ref Vector512 backgroundBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(background)); + ref float amountBase = ref MemoryMarshal.GetReference(amount); + + Vector512 sourceBase = Vector512.Create( + source.X, source.Y, source.Z, source.W, + source.X, source.Y, source.Z, source.W, + source.X, source.Y, source.Z, source.W, + source.X, source.Y, source.Z, source.W); + Vector512 vOne = Vector512.Create(1F); + + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) + { + float amount0 = amountBase; + float amount1 = Unsafe.Add(ref amountBase, 1); + float amount2 = Unsafe.Add(ref amountBase, 2); + float amount3 = Unsafe.Add(ref amountBase, 3); + + // We need to create a Vector512 containing the current four amount values + // taking up each quarter of the Vector512 and then clamp them. + Vector512 opacity = Vector512.Create( + amount0, amount0, amount0, amount0, + amount1, amount1, amount1, amount1, + amount2, amount2, amount2, amount2, + amount3, amount3, amount3, amount3); + opacity = Vector512.Min(Vector512.Max(Vector512.Zero, opacity), vOne); + + destinationBase = PorterDuffFunctions.SubtractSrcOver(backgroundBase, sourceBase, opacity); + destinationBase = ref Unsafe.Add(ref destinationBase, 1); + backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); + amountBase = ref Unsafe.Add(ref amountBase, 4); + } + + int remainder = Numerics.Modulo4(destination.Length); + if (remainder != 0) + { + for (int i = destination.Length - remainder; i < destination.Length; i++) + { + destination[i] = PorterDuffFunctions.SubtractSrcOver(background[i], source, Numerics.Clamp(amount[i], 0, 1F)); + } + } + } + else if (Avx2.IsSupported && destination.Length >= 2) { // Divide by 2 as 4 elements per Vector4 and 8 per Vector256 ref Vector256 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); @@ -4160,7 +7416,34 @@ internal static class DefaultPixelBlenders { amount = Numerics.Clamp(amount, 0, 1); - if (Avx2.IsSupported && destination.Length >= 2) + if (Avx512F.IsSupported && destination.Length >= 4) + { + // Divide by 4 as 4 elements per Vector4 and 16 per Vector512 + ref Vector512 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); + ref Vector512 destinationLast = ref Unsafe.Add(ref destinationBase, (uint)destination.Length / 4u); + + ref Vector512 backgroundBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(background)); + ref Vector512 sourceBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(source)); + Vector512 opacity = Vector512.Create(amount); + + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) + { + destinationBase = PorterDuffFunctions.ScreenSrcOver(backgroundBase, sourceBase, opacity); + destinationBase = ref Unsafe.Add(ref destinationBase, 1); + backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); + sourceBase = ref Unsafe.Add(ref sourceBase, 1); + } + + int remainder = Numerics.Modulo4(destination.Length); + if (remainder != 0) + { + for (int i = destination.Length - remainder; i < destination.Length; i++) + { + destination[i] = PorterDuffFunctions.ScreenSrcOver(background[i], source[i], amount); + } + } + } + else if (Avx2.IsSupported && destination.Length >= 2) { // Divide by 2 as 4 elements per Vector4 and 8 per Vector256 ref Vector256 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); @@ -4199,7 +7482,37 @@ internal static class DefaultPixelBlenders { amount = Numerics.Clamp(amount, 0, 1); - if (Avx2.IsSupported && destination.Length >= 2) + if (Avx512F.IsSupported && destination.Length >= 4) + { + // Divide by 4 as 4 elements per Vector4 and 16 per Vector512 + ref Vector512 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); + ref Vector512 destinationLast = ref Unsafe.Add(ref destinationBase, (uint)destination.Length / 4u); + + ref Vector512 backgroundBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(background)); + Vector512 sourceBase = Vector512.Create( + source.X, source.Y, source.Z, source.W, + source.X, source.Y, source.Z, source.W, + source.X, source.Y, source.Z, source.W, + source.X, source.Y, source.Z, source.W); + Vector512 opacity = Vector512.Create(amount); + + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) + { + destinationBase = PorterDuffFunctions.ScreenSrcOver(backgroundBase, sourceBase, opacity); + destinationBase = ref Unsafe.Add(ref destinationBase, 1); + backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); + } + + int remainder = Numerics.Modulo4(destination.Length); + if (remainder != 0) + { + for (int i = destination.Length - remainder; i < destination.Length; i++) + { + destination[i] = PorterDuffFunctions.ScreenSrcOver(background[i], source, amount); + } + } + } + else if (Avx2.IsSupported && destination.Length >= 2) { // Divide by 2 as 4 elements per Vector4 and 8 per Vector256 ref Vector256 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); @@ -4235,7 +7548,51 @@ internal static class DefaultPixelBlenders /// protected override void BlendFunction(Span destination, ReadOnlySpan background, ReadOnlySpan source, ReadOnlySpan amount) { - if (Avx2.IsSupported && destination.Length >= 2) + if (Avx512F.IsSupported && destination.Length >= 4) + { + // Divide by 4 as 4 elements per Vector4 and 16 per Vector512 + ref Vector512 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); + ref Vector512 destinationLast = ref Unsafe.Add(ref destinationBase, (uint)destination.Length / 4u); + + ref Vector512 backgroundBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(background)); + ref Vector512 sourceBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(source)); + ref float amountBase = ref MemoryMarshal.GetReference(amount); + + Vector512 vOne = Vector512.Create(1F); + + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) + { + float amount0 = amountBase; + float amount1 = Unsafe.Add(ref amountBase, 1); + float amount2 = Unsafe.Add(ref amountBase, 2); + float amount3 = Unsafe.Add(ref amountBase, 3); + + // We need to create a Vector512 containing the current four amount values + // taking up each quarter of the Vector512 and then clamp them. + Vector512 opacity = Vector512.Create( + amount0, amount0, amount0, amount0, + amount1, amount1, amount1, amount1, + amount2, amount2, amount2, amount2, + amount3, amount3, amount3, amount3); + opacity = Vector512.Min(Vector512.Max(Vector512.Zero, opacity), vOne); + + destinationBase = PorterDuffFunctions.ScreenSrcOver(backgroundBase, sourceBase, opacity); + destinationBase = ref Unsafe.Add(ref destinationBase, 1); + backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); + sourceBase = ref Unsafe.Add(ref sourceBase, 1); + amountBase = ref Unsafe.Add(ref amountBase, 4); + } + + int remainder = Numerics.Modulo4(destination.Length); + if (remainder != 0) + { + for (int i = destination.Length - remainder; i < destination.Length; i++) + { + destination[i] = PorterDuffFunctions.ScreenSrcOver(background[i], source[i], Numerics.Clamp(amount[i], 0, 1F)); + } + } + } + else if (Avx2.IsSupported && destination.Length >= 2) { // Divide by 2 as 4 elements per Vector4 and 8 per Vector256 ref Vector256 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); @@ -4282,7 +7639,54 @@ internal static class DefaultPixelBlenders /// protected override void BlendFunction(Span destination, ReadOnlySpan background, Vector4 source, ReadOnlySpan amount) { - if (Avx2.IsSupported && destination.Length >= 2) + if (Avx512F.IsSupported && destination.Length >= 4) + { + // Divide by 4 as 4 elements per Vector4 and 16 per Vector512 + ref Vector512 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); + ref Vector512 destinationLast = ref Unsafe.Add(ref destinationBase, (uint)destination.Length / 4u); + + ref Vector512 backgroundBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(background)); + ref float amountBase = ref MemoryMarshal.GetReference(amount); + + Vector512 sourceBase = Vector512.Create( + source.X, source.Y, source.Z, source.W, + source.X, source.Y, source.Z, source.W, + source.X, source.Y, source.Z, source.W, + source.X, source.Y, source.Z, source.W); + Vector512 vOne = Vector512.Create(1F); + + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) + { + float amount0 = amountBase; + float amount1 = Unsafe.Add(ref amountBase, 1); + float amount2 = Unsafe.Add(ref amountBase, 2); + float amount3 = Unsafe.Add(ref amountBase, 3); + + // We need to create a Vector512 containing the current four amount values + // taking up each quarter of the Vector512 and then clamp them. + Vector512 opacity = Vector512.Create( + amount0, amount0, amount0, amount0, + amount1, amount1, amount1, amount1, + amount2, amount2, amount2, amount2, + amount3, amount3, amount3, amount3); + opacity = Vector512.Min(Vector512.Max(Vector512.Zero, opacity), vOne); + + destinationBase = PorterDuffFunctions.ScreenSrcOver(backgroundBase, sourceBase, opacity); + destinationBase = ref Unsafe.Add(ref destinationBase, 1); + backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); + amountBase = ref Unsafe.Add(ref amountBase, 4); + } + + int remainder = Numerics.Modulo4(destination.Length); + if (remainder != 0) + { + for (int i = destination.Length - remainder; i < destination.Length; i++) + { + destination[i] = PorterDuffFunctions.ScreenSrcOver(background[i], source, Numerics.Clamp(amount[i], 0, 1F)); + } + } + } + else if (Avx2.IsSupported && destination.Length >= 2) { // Divide by 2 as 4 elements per Vector4 and 8 per Vector256 ref Vector256 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); @@ -4347,7 +7751,34 @@ internal static class DefaultPixelBlenders { amount = Numerics.Clamp(amount, 0, 1); - if (Avx2.IsSupported && destination.Length >= 2) + if (Avx512F.IsSupported && destination.Length >= 4) + { + // Divide by 4 as 4 elements per Vector4 and 16 per Vector512 + ref Vector512 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); + ref Vector512 destinationLast = ref Unsafe.Add(ref destinationBase, (uint)destination.Length / 4u); + + ref Vector512 backgroundBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(background)); + ref Vector512 sourceBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(source)); + Vector512 opacity = Vector512.Create(amount); + + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) + { + destinationBase = PorterDuffFunctions.DarkenSrcOver(backgroundBase, sourceBase, opacity); + destinationBase = ref Unsafe.Add(ref destinationBase, 1); + backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); + sourceBase = ref Unsafe.Add(ref sourceBase, 1); + } + + int remainder = Numerics.Modulo4(destination.Length); + if (remainder != 0) + { + for (int i = destination.Length - remainder; i < destination.Length; i++) + { + destination[i] = PorterDuffFunctions.DarkenSrcOver(background[i], source[i], amount); + } + } + } + else if (Avx2.IsSupported && destination.Length >= 2) { // Divide by 2 as 4 elements per Vector4 and 8 per Vector256 ref Vector256 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); @@ -4386,7 +7817,37 @@ internal static class DefaultPixelBlenders { amount = Numerics.Clamp(amount, 0, 1); - if (Avx2.IsSupported && destination.Length >= 2) + if (Avx512F.IsSupported && destination.Length >= 4) + { + // Divide by 4 as 4 elements per Vector4 and 16 per Vector512 + ref Vector512 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); + ref Vector512 destinationLast = ref Unsafe.Add(ref destinationBase, (uint)destination.Length / 4u); + + ref Vector512 backgroundBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(background)); + Vector512 sourceBase = Vector512.Create( + source.X, source.Y, source.Z, source.W, + source.X, source.Y, source.Z, source.W, + source.X, source.Y, source.Z, source.W, + source.X, source.Y, source.Z, source.W); + Vector512 opacity = Vector512.Create(amount); + + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) + { + destinationBase = PorterDuffFunctions.DarkenSrcOver(backgroundBase, sourceBase, opacity); + destinationBase = ref Unsafe.Add(ref destinationBase, 1); + backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); + } + + int remainder = Numerics.Modulo4(destination.Length); + if (remainder != 0) + { + for (int i = destination.Length - remainder; i < destination.Length; i++) + { + destination[i] = PorterDuffFunctions.DarkenSrcOver(background[i], source, amount); + } + } + } + else if (Avx2.IsSupported && destination.Length >= 2) { // Divide by 2 as 4 elements per Vector4 and 8 per Vector256 ref Vector256 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); @@ -4422,7 +7883,51 @@ internal static class DefaultPixelBlenders /// protected override void BlendFunction(Span destination, ReadOnlySpan background, ReadOnlySpan source, ReadOnlySpan amount) { - if (Avx2.IsSupported && destination.Length >= 2) + if (Avx512F.IsSupported && destination.Length >= 4) + { + // Divide by 4 as 4 elements per Vector4 and 16 per Vector512 + ref Vector512 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); + ref Vector512 destinationLast = ref Unsafe.Add(ref destinationBase, (uint)destination.Length / 4u); + + ref Vector512 backgroundBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(background)); + ref Vector512 sourceBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(source)); + ref float amountBase = ref MemoryMarshal.GetReference(amount); + + Vector512 vOne = Vector512.Create(1F); + + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) + { + float amount0 = amountBase; + float amount1 = Unsafe.Add(ref amountBase, 1); + float amount2 = Unsafe.Add(ref amountBase, 2); + float amount3 = Unsafe.Add(ref amountBase, 3); + + // We need to create a Vector512 containing the current four amount values + // taking up each quarter of the Vector512 and then clamp them. + Vector512 opacity = Vector512.Create( + amount0, amount0, amount0, amount0, + amount1, amount1, amount1, amount1, + amount2, amount2, amount2, amount2, + amount3, amount3, amount3, amount3); + opacity = Vector512.Min(Vector512.Max(Vector512.Zero, opacity), vOne); + + destinationBase = PorterDuffFunctions.DarkenSrcOver(backgroundBase, sourceBase, opacity); + destinationBase = ref Unsafe.Add(ref destinationBase, 1); + backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); + sourceBase = ref Unsafe.Add(ref sourceBase, 1); + amountBase = ref Unsafe.Add(ref amountBase, 4); + } + + int remainder = Numerics.Modulo4(destination.Length); + if (remainder != 0) + { + for (int i = destination.Length - remainder; i < destination.Length; i++) + { + destination[i] = PorterDuffFunctions.DarkenSrcOver(background[i], source[i], Numerics.Clamp(amount[i], 0, 1F)); + } + } + } + else if (Avx2.IsSupported && destination.Length >= 2) { // Divide by 2 as 4 elements per Vector4 and 8 per Vector256 ref Vector256 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); @@ -4461,15 +7966,62 @@ internal static class DefaultPixelBlenders { for (int i = 0; i < destination.Length; i++) { - destination[i] = PorterDuffFunctions.DarkenSrcOver(background[i], source[i], Numerics.Clamp(amount[i], 0, 1F)); + destination[i] = PorterDuffFunctions.DarkenSrcOver(background[i], source[i], Numerics.Clamp(amount[i], 0, 1F)); + } + } + } + + /// + protected override void BlendFunction(Span destination, ReadOnlySpan background, Vector4 source, ReadOnlySpan amount) + { + if (Avx512F.IsSupported && destination.Length >= 4) + { + // Divide by 4 as 4 elements per Vector4 and 16 per Vector512 + ref Vector512 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); + ref Vector512 destinationLast = ref Unsafe.Add(ref destinationBase, (uint)destination.Length / 4u); + + ref Vector512 backgroundBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(background)); + ref float amountBase = ref MemoryMarshal.GetReference(amount); + + Vector512 sourceBase = Vector512.Create( + source.X, source.Y, source.Z, source.W, + source.X, source.Y, source.Z, source.W, + source.X, source.Y, source.Z, source.W, + source.X, source.Y, source.Z, source.W); + Vector512 vOne = Vector512.Create(1F); + + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) + { + float amount0 = amountBase; + float amount1 = Unsafe.Add(ref amountBase, 1); + float amount2 = Unsafe.Add(ref amountBase, 2); + float amount3 = Unsafe.Add(ref amountBase, 3); + + // We need to create a Vector512 containing the current four amount values + // taking up each quarter of the Vector512 and then clamp them. + Vector512 opacity = Vector512.Create( + amount0, amount0, amount0, amount0, + amount1, amount1, amount1, amount1, + amount2, amount2, amount2, amount2, + amount3, amount3, amount3, amount3); + opacity = Vector512.Min(Vector512.Max(Vector512.Zero, opacity), vOne); + + destinationBase = PorterDuffFunctions.DarkenSrcOver(backgroundBase, sourceBase, opacity); + destinationBase = ref Unsafe.Add(ref destinationBase, 1); + backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); + amountBase = ref Unsafe.Add(ref amountBase, 4); + } + + int remainder = Numerics.Modulo4(destination.Length); + if (remainder != 0) + { + for (int i = destination.Length - remainder; i < destination.Length; i++) + { + destination[i] = PorterDuffFunctions.DarkenSrcOver(background[i], source, Numerics.Clamp(amount[i], 0, 1F)); + } } } - } - - /// - protected override void BlendFunction(Span destination, ReadOnlySpan background, Vector4 source, ReadOnlySpan amount) - { - if (Avx2.IsSupported && destination.Length >= 2) + else if (Avx2.IsSupported && destination.Length >= 2) { // Divide by 2 as 4 elements per Vector4 and 8 per Vector256 ref Vector256 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); @@ -4534,7 +8086,34 @@ internal static class DefaultPixelBlenders { amount = Numerics.Clamp(amount, 0, 1); - if (Avx2.IsSupported && destination.Length >= 2) + if (Avx512F.IsSupported && destination.Length >= 4) + { + // Divide by 4 as 4 elements per Vector4 and 16 per Vector512 + ref Vector512 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); + ref Vector512 destinationLast = ref Unsafe.Add(ref destinationBase, (uint)destination.Length / 4u); + + ref Vector512 backgroundBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(background)); + ref Vector512 sourceBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(source)); + Vector512 opacity = Vector512.Create(amount); + + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) + { + destinationBase = PorterDuffFunctions.LightenSrcOver(backgroundBase, sourceBase, opacity); + destinationBase = ref Unsafe.Add(ref destinationBase, 1); + backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); + sourceBase = ref Unsafe.Add(ref sourceBase, 1); + } + + int remainder = Numerics.Modulo4(destination.Length); + if (remainder != 0) + { + for (int i = destination.Length - remainder; i < destination.Length; i++) + { + destination[i] = PorterDuffFunctions.LightenSrcOver(background[i], source[i], amount); + } + } + } + else if (Avx2.IsSupported && destination.Length >= 2) { // Divide by 2 as 4 elements per Vector4 and 8 per Vector256 ref Vector256 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); @@ -4573,7 +8152,37 @@ internal static class DefaultPixelBlenders { amount = Numerics.Clamp(amount, 0, 1); - if (Avx2.IsSupported && destination.Length >= 2) + if (Avx512F.IsSupported && destination.Length >= 4) + { + // Divide by 4 as 4 elements per Vector4 and 16 per Vector512 + ref Vector512 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); + ref Vector512 destinationLast = ref Unsafe.Add(ref destinationBase, (uint)destination.Length / 4u); + + ref Vector512 backgroundBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(background)); + Vector512 sourceBase = Vector512.Create( + source.X, source.Y, source.Z, source.W, + source.X, source.Y, source.Z, source.W, + source.X, source.Y, source.Z, source.W, + source.X, source.Y, source.Z, source.W); + Vector512 opacity = Vector512.Create(amount); + + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) + { + destinationBase = PorterDuffFunctions.LightenSrcOver(backgroundBase, sourceBase, opacity); + destinationBase = ref Unsafe.Add(ref destinationBase, 1); + backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); + } + + int remainder = Numerics.Modulo4(destination.Length); + if (remainder != 0) + { + for (int i = destination.Length - remainder; i < destination.Length; i++) + { + destination[i] = PorterDuffFunctions.LightenSrcOver(background[i], source, amount); + } + } + } + else if (Avx2.IsSupported && destination.Length >= 2) { // Divide by 2 as 4 elements per Vector4 and 8 per Vector256 ref Vector256 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); @@ -4609,7 +8218,51 @@ internal static class DefaultPixelBlenders /// protected override void BlendFunction(Span destination, ReadOnlySpan background, ReadOnlySpan source, ReadOnlySpan amount) { - if (Avx2.IsSupported && destination.Length >= 2) + if (Avx512F.IsSupported && destination.Length >= 4) + { + // Divide by 4 as 4 elements per Vector4 and 16 per Vector512 + ref Vector512 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); + ref Vector512 destinationLast = ref Unsafe.Add(ref destinationBase, (uint)destination.Length / 4u); + + ref Vector512 backgroundBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(background)); + ref Vector512 sourceBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(source)); + ref float amountBase = ref MemoryMarshal.GetReference(amount); + + Vector512 vOne = Vector512.Create(1F); + + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) + { + float amount0 = amountBase; + float amount1 = Unsafe.Add(ref amountBase, 1); + float amount2 = Unsafe.Add(ref amountBase, 2); + float amount3 = Unsafe.Add(ref amountBase, 3); + + // We need to create a Vector512 containing the current four amount values + // taking up each quarter of the Vector512 and then clamp them. + Vector512 opacity = Vector512.Create( + amount0, amount0, amount0, amount0, + amount1, amount1, amount1, amount1, + amount2, amount2, amount2, amount2, + amount3, amount3, amount3, amount3); + opacity = Vector512.Min(Vector512.Max(Vector512.Zero, opacity), vOne); + + destinationBase = PorterDuffFunctions.LightenSrcOver(backgroundBase, sourceBase, opacity); + destinationBase = ref Unsafe.Add(ref destinationBase, 1); + backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); + sourceBase = ref Unsafe.Add(ref sourceBase, 1); + amountBase = ref Unsafe.Add(ref amountBase, 4); + } + + int remainder = Numerics.Modulo4(destination.Length); + if (remainder != 0) + { + for (int i = destination.Length - remainder; i < destination.Length; i++) + { + destination[i] = PorterDuffFunctions.LightenSrcOver(background[i], source[i], Numerics.Clamp(amount[i], 0, 1F)); + } + } + } + else if (Avx2.IsSupported && destination.Length >= 2) { // Divide by 2 as 4 elements per Vector4 and 8 per Vector256 ref Vector256 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); @@ -4656,7 +8309,54 @@ internal static class DefaultPixelBlenders /// protected override void BlendFunction(Span destination, ReadOnlySpan background, Vector4 source, ReadOnlySpan amount) { - if (Avx2.IsSupported && destination.Length >= 2) + if (Avx512F.IsSupported && destination.Length >= 4) + { + // Divide by 4 as 4 elements per Vector4 and 16 per Vector512 + ref Vector512 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); + ref Vector512 destinationLast = ref Unsafe.Add(ref destinationBase, (uint)destination.Length / 4u); + + ref Vector512 backgroundBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(background)); + ref float amountBase = ref MemoryMarshal.GetReference(amount); + + Vector512 sourceBase = Vector512.Create( + source.X, source.Y, source.Z, source.W, + source.X, source.Y, source.Z, source.W, + source.X, source.Y, source.Z, source.W, + source.X, source.Y, source.Z, source.W); + Vector512 vOne = Vector512.Create(1F); + + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) + { + float amount0 = amountBase; + float amount1 = Unsafe.Add(ref amountBase, 1); + float amount2 = Unsafe.Add(ref amountBase, 2); + float amount3 = Unsafe.Add(ref amountBase, 3); + + // We need to create a Vector512 containing the current four amount values + // taking up each quarter of the Vector512 and then clamp them. + Vector512 opacity = Vector512.Create( + amount0, amount0, amount0, amount0, + amount1, amount1, amount1, amount1, + amount2, amount2, amount2, amount2, + amount3, amount3, amount3, amount3); + opacity = Vector512.Min(Vector512.Max(Vector512.Zero, opacity), vOne); + + destinationBase = PorterDuffFunctions.LightenSrcOver(backgroundBase, sourceBase, opacity); + destinationBase = ref Unsafe.Add(ref destinationBase, 1); + backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); + amountBase = ref Unsafe.Add(ref amountBase, 4); + } + + int remainder = Numerics.Modulo4(destination.Length); + if (remainder != 0) + { + for (int i = destination.Length - remainder; i < destination.Length; i++) + { + destination[i] = PorterDuffFunctions.LightenSrcOver(background[i], source, Numerics.Clamp(amount[i], 0, 1F)); + } + } + } + else if (Avx2.IsSupported && destination.Length >= 2) { // Divide by 2 as 4 elements per Vector4 and 8 per Vector256 ref Vector256 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); @@ -4721,7 +8421,34 @@ internal static class DefaultPixelBlenders { amount = Numerics.Clamp(amount, 0, 1); - if (Avx2.IsSupported && destination.Length >= 2) + if (Avx512F.IsSupported && destination.Length >= 4) + { + // Divide by 4 as 4 elements per Vector4 and 16 per Vector512 + ref Vector512 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); + ref Vector512 destinationLast = ref Unsafe.Add(ref destinationBase, (uint)destination.Length / 4u); + + ref Vector512 backgroundBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(background)); + ref Vector512 sourceBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(source)); + Vector512 opacity = Vector512.Create(amount); + + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) + { + destinationBase = PorterDuffFunctions.OverlaySrcOver(backgroundBase, sourceBase, opacity); + destinationBase = ref Unsafe.Add(ref destinationBase, 1); + backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); + sourceBase = ref Unsafe.Add(ref sourceBase, 1); + } + + int remainder = Numerics.Modulo4(destination.Length); + if (remainder != 0) + { + for (int i = destination.Length - remainder; i < destination.Length; i++) + { + destination[i] = PorterDuffFunctions.OverlaySrcOver(background[i], source[i], amount); + } + } + } + else if (Avx2.IsSupported && destination.Length >= 2) { // Divide by 2 as 4 elements per Vector4 and 8 per Vector256 ref Vector256 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); @@ -4760,7 +8487,37 @@ internal static class DefaultPixelBlenders { amount = Numerics.Clamp(amount, 0, 1); - if (Avx2.IsSupported && destination.Length >= 2) + if (Avx512F.IsSupported && destination.Length >= 4) + { + // Divide by 4 as 4 elements per Vector4 and 16 per Vector512 + ref Vector512 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); + ref Vector512 destinationLast = ref Unsafe.Add(ref destinationBase, (uint)destination.Length / 4u); + + ref Vector512 backgroundBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(background)); + Vector512 sourceBase = Vector512.Create( + source.X, source.Y, source.Z, source.W, + source.X, source.Y, source.Z, source.W, + source.X, source.Y, source.Z, source.W, + source.X, source.Y, source.Z, source.W); + Vector512 opacity = Vector512.Create(amount); + + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) + { + destinationBase = PorterDuffFunctions.OverlaySrcOver(backgroundBase, sourceBase, opacity); + destinationBase = ref Unsafe.Add(ref destinationBase, 1); + backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); + } + + int remainder = Numerics.Modulo4(destination.Length); + if (remainder != 0) + { + for (int i = destination.Length - remainder; i < destination.Length; i++) + { + destination[i] = PorterDuffFunctions.OverlaySrcOver(background[i], source, amount); + } + } + } + else if (Avx2.IsSupported && destination.Length >= 2) { // Divide by 2 as 4 elements per Vector4 and 8 per Vector256 ref Vector256 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); @@ -4796,7 +8553,51 @@ internal static class DefaultPixelBlenders /// protected override void BlendFunction(Span destination, ReadOnlySpan background, ReadOnlySpan source, ReadOnlySpan amount) { - if (Avx2.IsSupported && destination.Length >= 2) + if (Avx512F.IsSupported && destination.Length >= 4) + { + // Divide by 4 as 4 elements per Vector4 and 16 per Vector512 + ref Vector512 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); + ref Vector512 destinationLast = ref Unsafe.Add(ref destinationBase, (uint)destination.Length / 4u); + + ref Vector512 backgroundBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(background)); + ref Vector512 sourceBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(source)); + ref float amountBase = ref MemoryMarshal.GetReference(amount); + + Vector512 vOne = Vector512.Create(1F); + + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) + { + float amount0 = amountBase; + float amount1 = Unsafe.Add(ref amountBase, 1); + float amount2 = Unsafe.Add(ref amountBase, 2); + float amount3 = Unsafe.Add(ref amountBase, 3); + + // We need to create a Vector512 containing the current four amount values + // taking up each quarter of the Vector512 and then clamp them. + Vector512 opacity = Vector512.Create( + amount0, amount0, amount0, amount0, + amount1, amount1, amount1, amount1, + amount2, amount2, amount2, amount2, + amount3, amount3, amount3, amount3); + opacity = Vector512.Min(Vector512.Max(Vector512.Zero, opacity), vOne); + + destinationBase = PorterDuffFunctions.OverlaySrcOver(backgroundBase, sourceBase, opacity); + destinationBase = ref Unsafe.Add(ref destinationBase, 1); + backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); + sourceBase = ref Unsafe.Add(ref sourceBase, 1); + amountBase = ref Unsafe.Add(ref amountBase, 4); + } + + int remainder = Numerics.Modulo4(destination.Length); + if (remainder != 0) + { + for (int i = destination.Length - remainder; i < destination.Length; i++) + { + destination[i] = PorterDuffFunctions.OverlaySrcOver(background[i], source[i], Numerics.Clamp(amount[i], 0, 1F)); + } + } + } + else if (Avx2.IsSupported && destination.Length >= 2) { // Divide by 2 as 4 elements per Vector4 and 8 per Vector256 ref Vector256 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); @@ -4843,7 +8644,54 @@ internal static class DefaultPixelBlenders /// protected override void BlendFunction(Span destination, ReadOnlySpan background, Vector4 source, ReadOnlySpan amount) { - if (Avx2.IsSupported && destination.Length >= 2) + if (Avx512F.IsSupported && destination.Length >= 4) + { + // Divide by 4 as 4 elements per Vector4 and 16 per Vector512 + ref Vector512 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); + ref Vector512 destinationLast = ref Unsafe.Add(ref destinationBase, (uint)destination.Length / 4u); + + ref Vector512 backgroundBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(background)); + ref float amountBase = ref MemoryMarshal.GetReference(amount); + + Vector512 sourceBase = Vector512.Create( + source.X, source.Y, source.Z, source.W, + source.X, source.Y, source.Z, source.W, + source.X, source.Y, source.Z, source.W, + source.X, source.Y, source.Z, source.W); + Vector512 vOne = Vector512.Create(1F); + + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) + { + float amount0 = amountBase; + float amount1 = Unsafe.Add(ref amountBase, 1); + float amount2 = Unsafe.Add(ref amountBase, 2); + float amount3 = Unsafe.Add(ref amountBase, 3); + + // We need to create a Vector512 containing the current four amount values + // taking up each quarter of the Vector512 and then clamp them. + Vector512 opacity = Vector512.Create( + amount0, amount0, amount0, amount0, + amount1, amount1, amount1, amount1, + amount2, amount2, amount2, amount2, + amount3, amount3, amount3, amount3); + opacity = Vector512.Min(Vector512.Max(Vector512.Zero, opacity), vOne); + + destinationBase = PorterDuffFunctions.OverlaySrcOver(backgroundBase, sourceBase, opacity); + destinationBase = ref Unsafe.Add(ref destinationBase, 1); + backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); + amountBase = ref Unsafe.Add(ref amountBase, 4); + } + + int remainder = Numerics.Modulo4(destination.Length); + if (remainder != 0) + { + for (int i = destination.Length - remainder; i < destination.Length; i++) + { + destination[i] = PorterDuffFunctions.OverlaySrcOver(background[i], source, Numerics.Clamp(amount[i], 0, 1F)); + } + } + } + else if (Avx2.IsSupported && destination.Length >= 2) { // Divide by 2 as 4 elements per Vector4 and 8 per Vector256 ref Vector256 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); @@ -4908,7 +8756,34 @@ internal static class DefaultPixelBlenders { amount = Numerics.Clamp(amount, 0, 1); - if (Avx2.IsSupported && destination.Length >= 2) + if (Avx512F.IsSupported && destination.Length >= 4) + { + // Divide by 4 as 4 elements per Vector4 and 16 per Vector512 + ref Vector512 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); + ref Vector512 destinationLast = ref Unsafe.Add(ref destinationBase, (uint)destination.Length / 4u); + + ref Vector512 backgroundBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(background)); + ref Vector512 sourceBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(source)); + Vector512 opacity = Vector512.Create(amount); + + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) + { + destinationBase = PorterDuffFunctions.HardLightSrcOver(backgroundBase, sourceBase, opacity); + destinationBase = ref Unsafe.Add(ref destinationBase, 1); + backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); + sourceBase = ref Unsafe.Add(ref sourceBase, 1); + } + + int remainder = Numerics.Modulo4(destination.Length); + if (remainder != 0) + { + for (int i = destination.Length - remainder; i < destination.Length; i++) + { + destination[i] = PorterDuffFunctions.HardLightSrcOver(background[i], source[i], amount); + } + } + } + else if (Avx2.IsSupported && destination.Length >= 2) { // Divide by 2 as 4 elements per Vector4 and 8 per Vector256 ref Vector256 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); @@ -4947,7 +8822,37 @@ internal static class DefaultPixelBlenders { amount = Numerics.Clamp(amount, 0, 1); - if (Avx2.IsSupported && destination.Length >= 2) + if (Avx512F.IsSupported && destination.Length >= 4) + { + // Divide by 4 as 4 elements per Vector4 and 16 per Vector512 + ref Vector512 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); + ref Vector512 destinationLast = ref Unsafe.Add(ref destinationBase, (uint)destination.Length / 4u); + + ref Vector512 backgroundBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(background)); + Vector512 sourceBase = Vector512.Create( + source.X, source.Y, source.Z, source.W, + source.X, source.Y, source.Z, source.W, + source.X, source.Y, source.Z, source.W, + source.X, source.Y, source.Z, source.W); + Vector512 opacity = Vector512.Create(amount); + + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) + { + destinationBase = PorterDuffFunctions.HardLightSrcOver(backgroundBase, sourceBase, opacity); + destinationBase = ref Unsafe.Add(ref destinationBase, 1); + backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); + } + + int remainder = Numerics.Modulo4(destination.Length); + if (remainder != 0) + { + for (int i = destination.Length - remainder; i < destination.Length; i++) + { + destination[i] = PorterDuffFunctions.HardLightSrcOver(background[i], source, amount); + } + } + } + else if (Avx2.IsSupported && destination.Length >= 2) { // Divide by 2 as 4 elements per Vector4 and 8 per Vector256 ref Vector256 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); @@ -4983,7 +8888,51 @@ internal static class DefaultPixelBlenders /// protected override void BlendFunction(Span destination, ReadOnlySpan background, ReadOnlySpan source, ReadOnlySpan amount) { - if (Avx2.IsSupported && destination.Length >= 2) + if (Avx512F.IsSupported && destination.Length >= 4) + { + // Divide by 4 as 4 elements per Vector4 and 16 per Vector512 + ref Vector512 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); + ref Vector512 destinationLast = ref Unsafe.Add(ref destinationBase, (uint)destination.Length / 4u); + + ref Vector512 backgroundBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(background)); + ref Vector512 sourceBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(source)); + ref float amountBase = ref MemoryMarshal.GetReference(amount); + + Vector512 vOne = Vector512.Create(1F); + + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) + { + float amount0 = amountBase; + float amount1 = Unsafe.Add(ref amountBase, 1); + float amount2 = Unsafe.Add(ref amountBase, 2); + float amount3 = Unsafe.Add(ref amountBase, 3); + + // We need to create a Vector512 containing the current four amount values + // taking up each quarter of the Vector512 and then clamp them. + Vector512 opacity = Vector512.Create( + amount0, amount0, amount0, amount0, + amount1, amount1, amount1, amount1, + amount2, amount2, amount2, amount2, + amount3, amount3, amount3, amount3); + opacity = Vector512.Min(Vector512.Max(Vector512.Zero, opacity), vOne); + + destinationBase = PorterDuffFunctions.HardLightSrcOver(backgroundBase, sourceBase, opacity); + destinationBase = ref Unsafe.Add(ref destinationBase, 1); + backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); + sourceBase = ref Unsafe.Add(ref sourceBase, 1); + amountBase = ref Unsafe.Add(ref amountBase, 4); + } + + int remainder = Numerics.Modulo4(destination.Length); + if (remainder != 0) + { + for (int i = destination.Length - remainder; i < destination.Length; i++) + { + destination[i] = PorterDuffFunctions.HardLightSrcOver(background[i], source[i], Numerics.Clamp(amount[i], 0, 1F)); + } + } + } + else if (Avx2.IsSupported && destination.Length >= 2) { // Divide by 2 as 4 elements per Vector4 and 8 per Vector256 ref Vector256 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); @@ -5030,7 +8979,54 @@ internal static class DefaultPixelBlenders /// protected override void BlendFunction(Span destination, ReadOnlySpan background, Vector4 source, ReadOnlySpan amount) { - if (Avx2.IsSupported && destination.Length >= 2) + if (Avx512F.IsSupported && destination.Length >= 4) + { + // Divide by 4 as 4 elements per Vector4 and 16 per Vector512 + ref Vector512 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); + ref Vector512 destinationLast = ref Unsafe.Add(ref destinationBase, (uint)destination.Length / 4u); + + ref Vector512 backgroundBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(background)); + ref float amountBase = ref MemoryMarshal.GetReference(amount); + + Vector512 sourceBase = Vector512.Create( + source.X, source.Y, source.Z, source.W, + source.X, source.Y, source.Z, source.W, + source.X, source.Y, source.Z, source.W, + source.X, source.Y, source.Z, source.W); + Vector512 vOne = Vector512.Create(1F); + + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) + { + float amount0 = amountBase; + float amount1 = Unsafe.Add(ref amountBase, 1); + float amount2 = Unsafe.Add(ref amountBase, 2); + float amount3 = Unsafe.Add(ref amountBase, 3); + + // We need to create a Vector512 containing the current four amount values + // taking up each quarter of the Vector512 and then clamp them. + Vector512 opacity = Vector512.Create( + amount0, amount0, amount0, amount0, + amount1, amount1, amount1, amount1, + amount2, amount2, amount2, amount2, + amount3, amount3, amount3, amount3); + opacity = Vector512.Min(Vector512.Max(Vector512.Zero, opacity), vOne); + + destinationBase = PorterDuffFunctions.HardLightSrcOver(backgroundBase, sourceBase, opacity); + destinationBase = ref Unsafe.Add(ref destinationBase, 1); + backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); + amountBase = ref Unsafe.Add(ref amountBase, 4); + } + + int remainder = Numerics.Modulo4(destination.Length); + if (remainder != 0) + { + for (int i = destination.Length - remainder; i < destination.Length; i++) + { + destination[i] = PorterDuffFunctions.HardLightSrcOver(background[i], source, Numerics.Clamp(amount[i], 0, 1F)); + } + } + } + else if (Avx2.IsSupported && destination.Length >= 2) { // Divide by 2 as 4 elements per Vector4 and 8 per Vector256 ref Vector256 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); @@ -5095,7 +9091,34 @@ internal static class DefaultPixelBlenders { amount = Numerics.Clamp(amount, 0, 1); - if (Avx2.IsSupported && destination.Length >= 2) + if (Avx512F.IsSupported && destination.Length >= 4) + { + // Divide by 4 as 4 elements per Vector4 and 16 per Vector512 + ref Vector512 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); + ref Vector512 destinationLast = ref Unsafe.Add(ref destinationBase, (uint)destination.Length / 4u); + + ref Vector512 backgroundBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(background)); + ref Vector512 sourceBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(source)); + Vector512 opacity = Vector512.Create(amount); + + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) + { + destinationBase = PorterDuffFunctions.NormalSrcIn(backgroundBase, sourceBase, opacity); + destinationBase = ref Unsafe.Add(ref destinationBase, 1); + backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); + sourceBase = ref Unsafe.Add(ref sourceBase, 1); + } + + int remainder = Numerics.Modulo4(destination.Length); + if (remainder != 0) + { + for (int i = destination.Length - remainder; i < destination.Length; i++) + { + destination[i] = PorterDuffFunctions.NormalSrcIn(background[i], source[i], amount); + } + } + } + else if (Avx2.IsSupported && destination.Length >= 2) { // Divide by 2 as 4 elements per Vector4 and 8 per Vector256 ref Vector256 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); @@ -5134,7 +9157,37 @@ internal static class DefaultPixelBlenders { amount = Numerics.Clamp(amount, 0, 1); - if (Avx2.IsSupported && destination.Length >= 2) + if (Avx512F.IsSupported && destination.Length >= 4) + { + // Divide by 4 as 4 elements per Vector4 and 16 per Vector512 + ref Vector512 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); + ref Vector512 destinationLast = ref Unsafe.Add(ref destinationBase, (uint)destination.Length / 4u); + + ref Vector512 backgroundBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(background)); + Vector512 sourceBase = Vector512.Create( + source.X, source.Y, source.Z, source.W, + source.X, source.Y, source.Z, source.W, + source.X, source.Y, source.Z, source.W, + source.X, source.Y, source.Z, source.W); + Vector512 opacity = Vector512.Create(amount); + + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) + { + destinationBase = PorterDuffFunctions.NormalSrcIn(backgroundBase, sourceBase, opacity); + destinationBase = ref Unsafe.Add(ref destinationBase, 1); + backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); + } + + int remainder = Numerics.Modulo4(destination.Length); + if (remainder != 0) + { + for (int i = destination.Length - remainder; i < destination.Length; i++) + { + destination[i] = PorterDuffFunctions.NormalSrcIn(background[i], source, amount); + } + } + } + else if (Avx2.IsSupported && destination.Length >= 2) { // Divide by 2 as 4 elements per Vector4 and 8 per Vector256 ref Vector256 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); @@ -5170,7 +9223,51 @@ internal static class DefaultPixelBlenders /// protected override void BlendFunction(Span destination, ReadOnlySpan background, ReadOnlySpan source, ReadOnlySpan amount) { - if (Avx2.IsSupported && destination.Length >= 2) + if (Avx512F.IsSupported && destination.Length >= 4) + { + // Divide by 4 as 4 elements per Vector4 and 16 per Vector512 + ref Vector512 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); + ref Vector512 destinationLast = ref Unsafe.Add(ref destinationBase, (uint)destination.Length / 4u); + + ref Vector512 backgroundBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(background)); + ref Vector512 sourceBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(source)); + ref float amountBase = ref MemoryMarshal.GetReference(amount); + + Vector512 vOne = Vector512.Create(1F); + + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) + { + float amount0 = amountBase; + float amount1 = Unsafe.Add(ref amountBase, 1); + float amount2 = Unsafe.Add(ref amountBase, 2); + float amount3 = Unsafe.Add(ref amountBase, 3); + + // We need to create a Vector512 containing the current four amount values + // taking up each quarter of the Vector512 and then clamp them. + Vector512 opacity = Vector512.Create( + amount0, amount0, amount0, amount0, + amount1, amount1, amount1, amount1, + amount2, amount2, amount2, amount2, + amount3, amount3, amount3, amount3); + opacity = Vector512.Min(Vector512.Max(Vector512.Zero, opacity), vOne); + + destinationBase = PorterDuffFunctions.NormalSrcIn(backgroundBase, sourceBase, opacity); + destinationBase = ref Unsafe.Add(ref destinationBase, 1); + backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); + sourceBase = ref Unsafe.Add(ref sourceBase, 1); + amountBase = ref Unsafe.Add(ref amountBase, 4); + } + + int remainder = Numerics.Modulo4(destination.Length); + if (remainder != 0) + { + for (int i = destination.Length - remainder; i < destination.Length; i++) + { + destination[i] = PorterDuffFunctions.NormalSrcIn(background[i], source[i], Numerics.Clamp(amount[i], 0, 1F)); + } + } + } + else if (Avx2.IsSupported && destination.Length >= 2) { // Divide by 2 as 4 elements per Vector4 and 8 per Vector256 ref Vector256 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); @@ -5217,7 +9314,54 @@ internal static class DefaultPixelBlenders /// protected override void BlendFunction(Span destination, ReadOnlySpan background, Vector4 source, ReadOnlySpan amount) { - if (Avx2.IsSupported && destination.Length >= 2) + if (Avx512F.IsSupported && destination.Length >= 4) + { + // Divide by 4 as 4 elements per Vector4 and 16 per Vector512 + ref Vector512 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); + ref Vector512 destinationLast = ref Unsafe.Add(ref destinationBase, (uint)destination.Length / 4u); + + ref Vector512 backgroundBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(background)); + ref float amountBase = ref MemoryMarshal.GetReference(amount); + + Vector512 sourceBase = Vector512.Create( + source.X, source.Y, source.Z, source.W, + source.X, source.Y, source.Z, source.W, + source.X, source.Y, source.Z, source.W, + source.X, source.Y, source.Z, source.W); + Vector512 vOne = Vector512.Create(1F); + + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) + { + float amount0 = amountBase; + float amount1 = Unsafe.Add(ref amountBase, 1); + float amount2 = Unsafe.Add(ref amountBase, 2); + float amount3 = Unsafe.Add(ref amountBase, 3); + + // We need to create a Vector512 containing the current four amount values + // taking up each quarter of the Vector512 and then clamp them. + Vector512 opacity = Vector512.Create( + amount0, amount0, amount0, amount0, + amount1, amount1, amount1, amount1, + amount2, amount2, amount2, amount2, + amount3, amount3, amount3, amount3); + opacity = Vector512.Min(Vector512.Max(Vector512.Zero, opacity), vOne); + + destinationBase = PorterDuffFunctions.NormalSrcIn(backgroundBase, sourceBase, opacity); + destinationBase = ref Unsafe.Add(ref destinationBase, 1); + backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); + amountBase = ref Unsafe.Add(ref amountBase, 4); + } + + int remainder = Numerics.Modulo4(destination.Length); + if (remainder != 0) + { + for (int i = destination.Length - remainder; i < destination.Length; i++) + { + destination[i] = PorterDuffFunctions.NormalSrcIn(background[i], source, Numerics.Clamp(amount[i], 0, 1F)); + } + } + } + else if (Avx2.IsSupported && destination.Length >= 2) { // Divide by 2 as 4 elements per Vector4 and 8 per Vector256 ref Vector256 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); @@ -5282,7 +9426,34 @@ internal static class DefaultPixelBlenders { amount = Numerics.Clamp(amount, 0, 1); - if (Avx2.IsSupported && destination.Length >= 2) + if (Avx512F.IsSupported && destination.Length >= 4) + { + // Divide by 4 as 4 elements per Vector4 and 16 per Vector512 + ref Vector512 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); + ref Vector512 destinationLast = ref Unsafe.Add(ref destinationBase, (uint)destination.Length / 4u); + + ref Vector512 backgroundBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(background)); + ref Vector512 sourceBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(source)); + Vector512 opacity = Vector512.Create(amount); + + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) + { + destinationBase = PorterDuffFunctions.MultiplySrcIn(backgroundBase, sourceBase, opacity); + destinationBase = ref Unsafe.Add(ref destinationBase, 1); + backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); + sourceBase = ref Unsafe.Add(ref sourceBase, 1); + } + + int remainder = Numerics.Modulo4(destination.Length); + if (remainder != 0) + { + for (int i = destination.Length - remainder; i < destination.Length; i++) + { + destination[i] = PorterDuffFunctions.MultiplySrcIn(background[i], source[i], amount); + } + } + } + else if (Avx2.IsSupported && destination.Length >= 2) { // Divide by 2 as 4 elements per Vector4 and 8 per Vector256 ref Vector256 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); @@ -5321,7 +9492,37 @@ internal static class DefaultPixelBlenders { amount = Numerics.Clamp(amount, 0, 1); - if (Avx2.IsSupported && destination.Length >= 2) + if (Avx512F.IsSupported && destination.Length >= 4) + { + // Divide by 4 as 4 elements per Vector4 and 16 per Vector512 + ref Vector512 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); + ref Vector512 destinationLast = ref Unsafe.Add(ref destinationBase, (uint)destination.Length / 4u); + + ref Vector512 backgroundBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(background)); + Vector512 sourceBase = Vector512.Create( + source.X, source.Y, source.Z, source.W, + source.X, source.Y, source.Z, source.W, + source.X, source.Y, source.Z, source.W, + source.X, source.Y, source.Z, source.W); + Vector512 opacity = Vector512.Create(amount); + + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) + { + destinationBase = PorterDuffFunctions.MultiplySrcIn(backgroundBase, sourceBase, opacity); + destinationBase = ref Unsafe.Add(ref destinationBase, 1); + backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); + } + + int remainder = Numerics.Modulo4(destination.Length); + if (remainder != 0) + { + for (int i = destination.Length - remainder; i < destination.Length; i++) + { + destination[i] = PorterDuffFunctions.MultiplySrcIn(background[i], source, amount); + } + } + } + else if (Avx2.IsSupported && destination.Length >= 2) { // Divide by 2 as 4 elements per Vector4 and 8 per Vector256 ref Vector256 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); @@ -5357,7 +9558,51 @@ internal static class DefaultPixelBlenders /// protected override void BlendFunction(Span destination, ReadOnlySpan background, ReadOnlySpan source, ReadOnlySpan amount) { - if (Avx2.IsSupported && destination.Length >= 2) + if (Avx512F.IsSupported && destination.Length >= 4) + { + // Divide by 4 as 4 elements per Vector4 and 16 per Vector512 + ref Vector512 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); + ref Vector512 destinationLast = ref Unsafe.Add(ref destinationBase, (uint)destination.Length / 4u); + + ref Vector512 backgroundBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(background)); + ref Vector512 sourceBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(source)); + ref float amountBase = ref MemoryMarshal.GetReference(amount); + + Vector512 vOne = Vector512.Create(1F); + + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) + { + float amount0 = amountBase; + float amount1 = Unsafe.Add(ref amountBase, 1); + float amount2 = Unsafe.Add(ref amountBase, 2); + float amount3 = Unsafe.Add(ref amountBase, 3); + + // We need to create a Vector512 containing the current four amount values + // taking up each quarter of the Vector512 and then clamp them. + Vector512 opacity = Vector512.Create( + amount0, amount0, amount0, amount0, + amount1, amount1, amount1, amount1, + amount2, amount2, amount2, amount2, + amount3, amount3, amount3, amount3); + opacity = Vector512.Min(Vector512.Max(Vector512.Zero, opacity), vOne); + + destinationBase = PorterDuffFunctions.MultiplySrcIn(backgroundBase, sourceBase, opacity); + destinationBase = ref Unsafe.Add(ref destinationBase, 1); + backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); + sourceBase = ref Unsafe.Add(ref sourceBase, 1); + amountBase = ref Unsafe.Add(ref amountBase, 4); + } + + int remainder = Numerics.Modulo4(destination.Length); + if (remainder != 0) + { + for (int i = destination.Length - remainder; i < destination.Length; i++) + { + destination[i] = PorterDuffFunctions.MultiplySrcIn(background[i], source[i], Numerics.Clamp(amount[i], 0, 1F)); + } + } + } + else if (Avx2.IsSupported && destination.Length >= 2) { // Divide by 2 as 4 elements per Vector4 and 8 per Vector256 ref Vector256 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); @@ -5404,7 +9649,54 @@ internal static class DefaultPixelBlenders /// protected override void BlendFunction(Span destination, ReadOnlySpan background, Vector4 source, ReadOnlySpan amount) { - if (Avx2.IsSupported && destination.Length >= 2) + if (Avx512F.IsSupported && destination.Length >= 4) + { + // Divide by 4 as 4 elements per Vector4 and 16 per Vector512 + ref Vector512 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); + ref Vector512 destinationLast = ref Unsafe.Add(ref destinationBase, (uint)destination.Length / 4u); + + ref Vector512 backgroundBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(background)); + ref float amountBase = ref MemoryMarshal.GetReference(amount); + + Vector512 sourceBase = Vector512.Create( + source.X, source.Y, source.Z, source.W, + source.X, source.Y, source.Z, source.W, + source.X, source.Y, source.Z, source.W, + source.X, source.Y, source.Z, source.W); + Vector512 vOne = Vector512.Create(1F); + + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) + { + float amount0 = amountBase; + float amount1 = Unsafe.Add(ref amountBase, 1); + float amount2 = Unsafe.Add(ref amountBase, 2); + float amount3 = Unsafe.Add(ref amountBase, 3); + + // We need to create a Vector512 containing the current four amount values + // taking up each quarter of the Vector512 and then clamp them. + Vector512 opacity = Vector512.Create( + amount0, amount0, amount0, amount0, + amount1, amount1, amount1, amount1, + amount2, amount2, amount2, amount2, + amount3, amount3, amount3, amount3); + opacity = Vector512.Min(Vector512.Max(Vector512.Zero, opacity), vOne); + + destinationBase = PorterDuffFunctions.MultiplySrcIn(backgroundBase, sourceBase, opacity); + destinationBase = ref Unsafe.Add(ref destinationBase, 1); + backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); + amountBase = ref Unsafe.Add(ref amountBase, 4); + } + + int remainder = Numerics.Modulo4(destination.Length); + if (remainder != 0) + { + for (int i = destination.Length - remainder; i < destination.Length; i++) + { + destination[i] = PorterDuffFunctions.MultiplySrcIn(background[i], source, Numerics.Clamp(amount[i], 0, 1F)); + } + } + } + else if (Avx2.IsSupported && destination.Length >= 2) { // Divide by 2 as 4 elements per Vector4 and 8 per Vector256 ref Vector256 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); @@ -5469,7 +9761,34 @@ internal static class DefaultPixelBlenders { amount = Numerics.Clamp(amount, 0, 1); - if (Avx2.IsSupported && destination.Length >= 2) + if (Avx512F.IsSupported && destination.Length >= 4) + { + // Divide by 4 as 4 elements per Vector4 and 16 per Vector512 + ref Vector512 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); + ref Vector512 destinationLast = ref Unsafe.Add(ref destinationBase, (uint)destination.Length / 4u); + + ref Vector512 backgroundBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(background)); + ref Vector512 sourceBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(source)); + Vector512 opacity = Vector512.Create(amount); + + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) + { + destinationBase = PorterDuffFunctions.AddSrcIn(backgroundBase, sourceBase, opacity); + destinationBase = ref Unsafe.Add(ref destinationBase, 1); + backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); + sourceBase = ref Unsafe.Add(ref sourceBase, 1); + } + + int remainder = Numerics.Modulo4(destination.Length); + if (remainder != 0) + { + for (int i = destination.Length - remainder; i < destination.Length; i++) + { + destination[i] = PorterDuffFunctions.AddSrcIn(background[i], source[i], amount); + } + } + } + else if (Avx2.IsSupported && destination.Length >= 2) { // Divide by 2 as 4 elements per Vector4 and 8 per Vector256 ref Vector256 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); @@ -5508,7 +9827,37 @@ internal static class DefaultPixelBlenders { amount = Numerics.Clamp(amount, 0, 1); - if (Avx2.IsSupported && destination.Length >= 2) + if (Avx512F.IsSupported && destination.Length >= 4) + { + // Divide by 4 as 4 elements per Vector4 and 16 per Vector512 + ref Vector512 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); + ref Vector512 destinationLast = ref Unsafe.Add(ref destinationBase, (uint)destination.Length / 4u); + + ref Vector512 backgroundBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(background)); + Vector512 sourceBase = Vector512.Create( + source.X, source.Y, source.Z, source.W, + source.X, source.Y, source.Z, source.W, + source.X, source.Y, source.Z, source.W, + source.X, source.Y, source.Z, source.W); + Vector512 opacity = Vector512.Create(amount); + + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) + { + destinationBase = PorterDuffFunctions.AddSrcIn(backgroundBase, sourceBase, opacity); + destinationBase = ref Unsafe.Add(ref destinationBase, 1); + backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); + } + + int remainder = Numerics.Modulo4(destination.Length); + if (remainder != 0) + { + for (int i = destination.Length - remainder; i < destination.Length; i++) + { + destination[i] = PorterDuffFunctions.AddSrcIn(background[i], source, amount); + } + } + } + else if (Avx2.IsSupported && destination.Length >= 2) { // Divide by 2 as 4 elements per Vector4 and 8 per Vector256 ref Vector256 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); @@ -5544,7 +9893,51 @@ internal static class DefaultPixelBlenders /// protected override void BlendFunction(Span destination, ReadOnlySpan background, ReadOnlySpan source, ReadOnlySpan amount) { - if (Avx2.IsSupported && destination.Length >= 2) + if (Avx512F.IsSupported && destination.Length >= 4) + { + // Divide by 4 as 4 elements per Vector4 and 16 per Vector512 + ref Vector512 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); + ref Vector512 destinationLast = ref Unsafe.Add(ref destinationBase, (uint)destination.Length / 4u); + + ref Vector512 backgroundBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(background)); + ref Vector512 sourceBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(source)); + ref float amountBase = ref MemoryMarshal.GetReference(amount); + + Vector512 vOne = Vector512.Create(1F); + + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) + { + float amount0 = amountBase; + float amount1 = Unsafe.Add(ref amountBase, 1); + float amount2 = Unsafe.Add(ref amountBase, 2); + float amount3 = Unsafe.Add(ref amountBase, 3); + + // We need to create a Vector512 containing the current four amount values + // taking up each quarter of the Vector512 and then clamp them. + Vector512 opacity = Vector512.Create( + amount0, amount0, amount0, amount0, + amount1, amount1, amount1, amount1, + amount2, amount2, amount2, amount2, + amount3, amount3, amount3, amount3); + opacity = Vector512.Min(Vector512.Max(Vector512.Zero, opacity), vOne); + + destinationBase = PorterDuffFunctions.AddSrcIn(backgroundBase, sourceBase, opacity); + destinationBase = ref Unsafe.Add(ref destinationBase, 1); + backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); + sourceBase = ref Unsafe.Add(ref sourceBase, 1); + amountBase = ref Unsafe.Add(ref amountBase, 4); + } + + int remainder = Numerics.Modulo4(destination.Length); + if (remainder != 0) + { + for (int i = destination.Length - remainder; i < destination.Length; i++) + { + destination[i] = PorterDuffFunctions.AddSrcIn(background[i], source[i], Numerics.Clamp(amount[i], 0, 1F)); + } + } + } + else if (Avx2.IsSupported && destination.Length >= 2) { // Divide by 2 as 4 elements per Vector4 and 8 per Vector256 ref Vector256 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); @@ -5591,7 +9984,54 @@ internal static class DefaultPixelBlenders /// protected override void BlendFunction(Span destination, ReadOnlySpan background, Vector4 source, ReadOnlySpan amount) { - if (Avx2.IsSupported && destination.Length >= 2) + if (Avx512F.IsSupported && destination.Length >= 4) + { + // Divide by 4 as 4 elements per Vector4 and 16 per Vector512 + ref Vector512 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); + ref Vector512 destinationLast = ref Unsafe.Add(ref destinationBase, (uint)destination.Length / 4u); + + ref Vector512 backgroundBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(background)); + ref float amountBase = ref MemoryMarshal.GetReference(amount); + + Vector512 sourceBase = Vector512.Create( + source.X, source.Y, source.Z, source.W, + source.X, source.Y, source.Z, source.W, + source.X, source.Y, source.Z, source.W, + source.X, source.Y, source.Z, source.W); + Vector512 vOne = Vector512.Create(1F); + + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) + { + float amount0 = amountBase; + float amount1 = Unsafe.Add(ref amountBase, 1); + float amount2 = Unsafe.Add(ref amountBase, 2); + float amount3 = Unsafe.Add(ref amountBase, 3); + + // We need to create a Vector512 containing the current four amount values + // taking up each quarter of the Vector512 and then clamp them. + Vector512 opacity = Vector512.Create( + amount0, amount0, amount0, amount0, + amount1, amount1, amount1, amount1, + amount2, amount2, amount2, amount2, + amount3, amount3, amount3, amount3); + opacity = Vector512.Min(Vector512.Max(Vector512.Zero, opacity), vOne); + + destinationBase = PorterDuffFunctions.AddSrcIn(backgroundBase, sourceBase, opacity); + destinationBase = ref Unsafe.Add(ref destinationBase, 1); + backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); + amountBase = ref Unsafe.Add(ref amountBase, 4); + } + + int remainder = Numerics.Modulo4(destination.Length); + if (remainder != 0) + { + for (int i = destination.Length - remainder; i < destination.Length; i++) + { + destination[i] = PorterDuffFunctions.AddSrcIn(background[i], source, Numerics.Clamp(amount[i], 0, 1F)); + } + } + } + else if (Avx2.IsSupported && destination.Length >= 2) { // Divide by 2 as 4 elements per Vector4 and 8 per Vector256 ref Vector256 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); @@ -5656,7 +10096,34 @@ internal static class DefaultPixelBlenders { amount = Numerics.Clamp(amount, 0, 1); - if (Avx2.IsSupported && destination.Length >= 2) + if (Avx512F.IsSupported && destination.Length >= 4) + { + // Divide by 4 as 4 elements per Vector4 and 16 per Vector512 + ref Vector512 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); + ref Vector512 destinationLast = ref Unsafe.Add(ref destinationBase, (uint)destination.Length / 4u); + + ref Vector512 backgroundBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(background)); + ref Vector512 sourceBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(source)); + Vector512 opacity = Vector512.Create(amount); + + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) + { + destinationBase = PorterDuffFunctions.SubtractSrcIn(backgroundBase, sourceBase, opacity); + destinationBase = ref Unsafe.Add(ref destinationBase, 1); + backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); + sourceBase = ref Unsafe.Add(ref sourceBase, 1); + } + + int remainder = Numerics.Modulo4(destination.Length); + if (remainder != 0) + { + for (int i = destination.Length - remainder; i < destination.Length; i++) + { + destination[i] = PorterDuffFunctions.SubtractSrcIn(background[i], source[i], amount); + } + } + } + else if (Avx2.IsSupported && destination.Length >= 2) { // Divide by 2 as 4 elements per Vector4 and 8 per Vector256 ref Vector256 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); @@ -5695,7 +10162,37 @@ internal static class DefaultPixelBlenders { amount = Numerics.Clamp(amount, 0, 1); - if (Avx2.IsSupported && destination.Length >= 2) + if (Avx512F.IsSupported && destination.Length >= 4) + { + // Divide by 4 as 4 elements per Vector4 and 16 per Vector512 + ref Vector512 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); + ref Vector512 destinationLast = ref Unsafe.Add(ref destinationBase, (uint)destination.Length / 4u); + + ref Vector512 backgroundBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(background)); + Vector512 sourceBase = Vector512.Create( + source.X, source.Y, source.Z, source.W, + source.X, source.Y, source.Z, source.W, + source.X, source.Y, source.Z, source.W, + source.X, source.Y, source.Z, source.W); + Vector512 opacity = Vector512.Create(amount); + + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) + { + destinationBase = PorterDuffFunctions.SubtractSrcIn(backgroundBase, sourceBase, opacity); + destinationBase = ref Unsafe.Add(ref destinationBase, 1); + backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); + } + + int remainder = Numerics.Modulo4(destination.Length); + if (remainder != 0) + { + for (int i = destination.Length - remainder; i < destination.Length; i++) + { + destination[i] = PorterDuffFunctions.SubtractSrcIn(background[i], source, amount); + } + } + } + else if (Avx2.IsSupported && destination.Length >= 2) { // Divide by 2 as 4 elements per Vector4 and 8 per Vector256 ref Vector256 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); @@ -5731,7 +10228,51 @@ internal static class DefaultPixelBlenders /// protected override void BlendFunction(Span destination, ReadOnlySpan background, ReadOnlySpan source, ReadOnlySpan amount) { - if (Avx2.IsSupported && destination.Length >= 2) + if (Avx512F.IsSupported && destination.Length >= 4) + { + // Divide by 4 as 4 elements per Vector4 and 16 per Vector512 + ref Vector512 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); + ref Vector512 destinationLast = ref Unsafe.Add(ref destinationBase, (uint)destination.Length / 4u); + + ref Vector512 backgroundBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(background)); + ref Vector512 sourceBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(source)); + ref float amountBase = ref MemoryMarshal.GetReference(amount); + + Vector512 vOne = Vector512.Create(1F); + + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) + { + float amount0 = amountBase; + float amount1 = Unsafe.Add(ref amountBase, 1); + float amount2 = Unsafe.Add(ref amountBase, 2); + float amount3 = Unsafe.Add(ref amountBase, 3); + + // We need to create a Vector512 containing the current four amount values + // taking up each quarter of the Vector512 and then clamp them. + Vector512 opacity = Vector512.Create( + amount0, amount0, amount0, amount0, + amount1, amount1, amount1, amount1, + amount2, amount2, amount2, amount2, + amount3, amount3, amount3, amount3); + opacity = Vector512.Min(Vector512.Max(Vector512.Zero, opacity), vOne); + + destinationBase = PorterDuffFunctions.SubtractSrcIn(backgroundBase, sourceBase, opacity); + destinationBase = ref Unsafe.Add(ref destinationBase, 1); + backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); + sourceBase = ref Unsafe.Add(ref sourceBase, 1); + amountBase = ref Unsafe.Add(ref amountBase, 4); + } + + int remainder = Numerics.Modulo4(destination.Length); + if (remainder != 0) + { + for (int i = destination.Length - remainder; i < destination.Length; i++) + { + destination[i] = PorterDuffFunctions.SubtractSrcIn(background[i], source[i], Numerics.Clamp(amount[i], 0, 1F)); + } + } + } + else if (Avx2.IsSupported && destination.Length >= 2) { // Divide by 2 as 4 elements per Vector4 and 8 per Vector256 ref Vector256 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); @@ -5778,7 +10319,54 @@ internal static class DefaultPixelBlenders /// protected override void BlendFunction(Span destination, ReadOnlySpan background, Vector4 source, ReadOnlySpan amount) { - if (Avx2.IsSupported && destination.Length >= 2) + if (Avx512F.IsSupported && destination.Length >= 4) + { + // Divide by 4 as 4 elements per Vector4 and 16 per Vector512 + ref Vector512 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); + ref Vector512 destinationLast = ref Unsafe.Add(ref destinationBase, (uint)destination.Length / 4u); + + ref Vector512 backgroundBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(background)); + ref float amountBase = ref MemoryMarshal.GetReference(amount); + + Vector512 sourceBase = Vector512.Create( + source.X, source.Y, source.Z, source.W, + source.X, source.Y, source.Z, source.W, + source.X, source.Y, source.Z, source.W, + source.X, source.Y, source.Z, source.W); + Vector512 vOne = Vector512.Create(1F); + + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) + { + float amount0 = amountBase; + float amount1 = Unsafe.Add(ref amountBase, 1); + float amount2 = Unsafe.Add(ref amountBase, 2); + float amount3 = Unsafe.Add(ref amountBase, 3); + + // We need to create a Vector512 containing the current four amount values + // taking up each quarter of the Vector512 and then clamp them. + Vector512 opacity = Vector512.Create( + amount0, amount0, amount0, amount0, + amount1, amount1, amount1, amount1, + amount2, amount2, amount2, amount2, + amount3, amount3, amount3, amount3); + opacity = Vector512.Min(Vector512.Max(Vector512.Zero, opacity), vOne); + + destinationBase = PorterDuffFunctions.SubtractSrcIn(backgroundBase, sourceBase, opacity); + destinationBase = ref Unsafe.Add(ref destinationBase, 1); + backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); + amountBase = ref Unsafe.Add(ref amountBase, 4); + } + + int remainder = Numerics.Modulo4(destination.Length); + if (remainder != 0) + { + for (int i = destination.Length - remainder; i < destination.Length; i++) + { + destination[i] = PorterDuffFunctions.SubtractSrcIn(background[i], source, Numerics.Clamp(amount[i], 0, 1F)); + } + } + } + else if (Avx2.IsSupported && destination.Length >= 2) { // Divide by 2 as 4 elements per Vector4 and 8 per Vector256 ref Vector256 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); @@ -5843,7 +10431,34 @@ internal static class DefaultPixelBlenders { amount = Numerics.Clamp(amount, 0, 1); - if (Avx2.IsSupported && destination.Length >= 2) + if (Avx512F.IsSupported && destination.Length >= 4) + { + // Divide by 4 as 4 elements per Vector4 and 16 per Vector512 + ref Vector512 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); + ref Vector512 destinationLast = ref Unsafe.Add(ref destinationBase, (uint)destination.Length / 4u); + + ref Vector512 backgroundBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(background)); + ref Vector512 sourceBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(source)); + Vector512 opacity = Vector512.Create(amount); + + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) + { + destinationBase = PorterDuffFunctions.ScreenSrcIn(backgroundBase, sourceBase, opacity); + destinationBase = ref Unsafe.Add(ref destinationBase, 1); + backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); + sourceBase = ref Unsafe.Add(ref sourceBase, 1); + } + + int remainder = Numerics.Modulo4(destination.Length); + if (remainder != 0) + { + for (int i = destination.Length - remainder; i < destination.Length; i++) + { + destination[i] = PorterDuffFunctions.ScreenSrcIn(background[i], source[i], amount); + } + } + } + else if (Avx2.IsSupported && destination.Length >= 2) { // Divide by 2 as 4 elements per Vector4 and 8 per Vector256 ref Vector256 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); @@ -5882,7 +10497,37 @@ internal static class DefaultPixelBlenders { amount = Numerics.Clamp(amount, 0, 1); - if (Avx2.IsSupported && destination.Length >= 2) + if (Avx512F.IsSupported && destination.Length >= 4) + { + // Divide by 4 as 4 elements per Vector4 and 16 per Vector512 + ref Vector512 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); + ref Vector512 destinationLast = ref Unsafe.Add(ref destinationBase, (uint)destination.Length / 4u); + + ref Vector512 backgroundBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(background)); + Vector512 sourceBase = Vector512.Create( + source.X, source.Y, source.Z, source.W, + source.X, source.Y, source.Z, source.W, + source.X, source.Y, source.Z, source.W, + source.X, source.Y, source.Z, source.W); + Vector512 opacity = Vector512.Create(amount); + + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) + { + destinationBase = PorterDuffFunctions.ScreenSrcIn(backgroundBase, sourceBase, opacity); + destinationBase = ref Unsafe.Add(ref destinationBase, 1); + backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); + } + + int remainder = Numerics.Modulo4(destination.Length); + if (remainder != 0) + { + for (int i = destination.Length - remainder; i < destination.Length; i++) + { + destination[i] = PorterDuffFunctions.ScreenSrcIn(background[i], source, amount); + } + } + } + else if (Avx2.IsSupported && destination.Length >= 2) { // Divide by 2 as 4 elements per Vector4 and 8 per Vector256 ref Vector256 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); @@ -5918,7 +10563,51 @@ internal static class DefaultPixelBlenders /// protected override void BlendFunction(Span destination, ReadOnlySpan background, ReadOnlySpan source, ReadOnlySpan amount) { - if (Avx2.IsSupported && destination.Length >= 2) + if (Avx512F.IsSupported && destination.Length >= 4) + { + // Divide by 4 as 4 elements per Vector4 and 16 per Vector512 + ref Vector512 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); + ref Vector512 destinationLast = ref Unsafe.Add(ref destinationBase, (uint)destination.Length / 4u); + + ref Vector512 backgroundBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(background)); + ref Vector512 sourceBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(source)); + ref float amountBase = ref MemoryMarshal.GetReference(amount); + + Vector512 vOne = Vector512.Create(1F); + + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) + { + float amount0 = amountBase; + float amount1 = Unsafe.Add(ref amountBase, 1); + float amount2 = Unsafe.Add(ref amountBase, 2); + float amount3 = Unsafe.Add(ref amountBase, 3); + + // We need to create a Vector512 containing the current four amount values + // taking up each quarter of the Vector512 and then clamp them. + Vector512 opacity = Vector512.Create( + amount0, amount0, amount0, amount0, + amount1, amount1, amount1, amount1, + amount2, amount2, amount2, amount2, + amount3, amount3, amount3, amount3); + opacity = Vector512.Min(Vector512.Max(Vector512.Zero, opacity), vOne); + + destinationBase = PorterDuffFunctions.ScreenSrcIn(backgroundBase, sourceBase, opacity); + destinationBase = ref Unsafe.Add(ref destinationBase, 1); + backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); + sourceBase = ref Unsafe.Add(ref sourceBase, 1); + amountBase = ref Unsafe.Add(ref amountBase, 4); + } + + int remainder = Numerics.Modulo4(destination.Length); + if (remainder != 0) + { + for (int i = destination.Length - remainder; i < destination.Length; i++) + { + destination[i] = PorterDuffFunctions.ScreenSrcIn(background[i], source[i], Numerics.Clamp(amount[i], 0, 1F)); + } + } + } + else if (Avx2.IsSupported && destination.Length >= 2) { // Divide by 2 as 4 elements per Vector4 and 8 per Vector256 ref Vector256 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); @@ -5952,20 +10641,67 @@ internal static class DefaultPixelBlenders int i = destination.Length - 1; destination[i] = PorterDuffFunctions.ScreenSrcIn(background[i], source[i], Numerics.Clamp(amount[i], 0, 1F)); } - } - else - { - for (int i = 0; i < destination.Length; i++) + } + else + { + for (int i = 0; i < destination.Length; i++) + { + destination[i] = PorterDuffFunctions.ScreenSrcIn(background[i], source[i], Numerics.Clamp(amount[i], 0, 1F)); + } + } + } + + /// + protected override void BlendFunction(Span destination, ReadOnlySpan background, Vector4 source, ReadOnlySpan amount) + { + if (Avx512F.IsSupported && destination.Length >= 4) + { + // Divide by 4 as 4 elements per Vector4 and 16 per Vector512 + ref Vector512 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); + ref Vector512 destinationLast = ref Unsafe.Add(ref destinationBase, (uint)destination.Length / 4u); + + ref Vector512 backgroundBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(background)); + ref float amountBase = ref MemoryMarshal.GetReference(amount); + + Vector512 sourceBase = Vector512.Create( + source.X, source.Y, source.Z, source.W, + source.X, source.Y, source.Z, source.W, + source.X, source.Y, source.Z, source.W, + source.X, source.Y, source.Z, source.W); + Vector512 vOne = Vector512.Create(1F); + + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) + { + float amount0 = amountBase; + float amount1 = Unsafe.Add(ref amountBase, 1); + float amount2 = Unsafe.Add(ref amountBase, 2); + float amount3 = Unsafe.Add(ref amountBase, 3); + + // We need to create a Vector512 containing the current four amount values + // taking up each quarter of the Vector512 and then clamp them. + Vector512 opacity = Vector512.Create( + amount0, amount0, amount0, amount0, + amount1, amount1, amount1, amount1, + amount2, amount2, amount2, amount2, + amount3, amount3, amount3, amount3); + opacity = Vector512.Min(Vector512.Max(Vector512.Zero, opacity), vOne); + + destinationBase = PorterDuffFunctions.ScreenSrcIn(backgroundBase, sourceBase, opacity); + destinationBase = ref Unsafe.Add(ref destinationBase, 1); + backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); + amountBase = ref Unsafe.Add(ref amountBase, 4); + } + + int remainder = Numerics.Modulo4(destination.Length); + if (remainder != 0) { - destination[i] = PorterDuffFunctions.ScreenSrcIn(background[i], source[i], Numerics.Clamp(amount[i], 0, 1F)); + for (int i = destination.Length - remainder; i < destination.Length; i++) + { + destination[i] = PorterDuffFunctions.ScreenSrcIn(background[i], source, Numerics.Clamp(amount[i], 0, 1F)); + } } } - } - - /// - protected override void BlendFunction(Span destination, ReadOnlySpan background, Vector4 source, ReadOnlySpan amount) - { - if (Avx2.IsSupported && destination.Length >= 2) + else if (Avx2.IsSupported && destination.Length >= 2) { // Divide by 2 as 4 elements per Vector4 and 8 per Vector256 ref Vector256 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); @@ -6030,7 +10766,34 @@ internal static class DefaultPixelBlenders { amount = Numerics.Clamp(amount, 0, 1); - if (Avx2.IsSupported && destination.Length >= 2) + if (Avx512F.IsSupported && destination.Length >= 4) + { + // Divide by 4 as 4 elements per Vector4 and 16 per Vector512 + ref Vector512 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); + ref Vector512 destinationLast = ref Unsafe.Add(ref destinationBase, (uint)destination.Length / 4u); + + ref Vector512 backgroundBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(background)); + ref Vector512 sourceBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(source)); + Vector512 opacity = Vector512.Create(amount); + + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) + { + destinationBase = PorterDuffFunctions.DarkenSrcIn(backgroundBase, sourceBase, opacity); + destinationBase = ref Unsafe.Add(ref destinationBase, 1); + backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); + sourceBase = ref Unsafe.Add(ref sourceBase, 1); + } + + int remainder = Numerics.Modulo4(destination.Length); + if (remainder != 0) + { + for (int i = destination.Length - remainder; i < destination.Length; i++) + { + destination[i] = PorterDuffFunctions.DarkenSrcIn(background[i], source[i], amount); + } + } + } + else if (Avx2.IsSupported && destination.Length >= 2) { // Divide by 2 as 4 elements per Vector4 and 8 per Vector256 ref Vector256 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); @@ -6069,7 +10832,37 @@ internal static class DefaultPixelBlenders { amount = Numerics.Clamp(amount, 0, 1); - if (Avx2.IsSupported && destination.Length >= 2) + if (Avx512F.IsSupported && destination.Length >= 4) + { + // Divide by 4 as 4 elements per Vector4 and 16 per Vector512 + ref Vector512 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); + ref Vector512 destinationLast = ref Unsafe.Add(ref destinationBase, (uint)destination.Length / 4u); + + ref Vector512 backgroundBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(background)); + Vector512 sourceBase = Vector512.Create( + source.X, source.Y, source.Z, source.W, + source.X, source.Y, source.Z, source.W, + source.X, source.Y, source.Z, source.W, + source.X, source.Y, source.Z, source.W); + Vector512 opacity = Vector512.Create(amount); + + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) + { + destinationBase = PorterDuffFunctions.DarkenSrcIn(backgroundBase, sourceBase, opacity); + destinationBase = ref Unsafe.Add(ref destinationBase, 1); + backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); + } + + int remainder = Numerics.Modulo4(destination.Length); + if (remainder != 0) + { + for (int i = destination.Length - remainder; i < destination.Length; i++) + { + destination[i] = PorterDuffFunctions.DarkenSrcIn(background[i], source, amount); + } + } + } + else if (Avx2.IsSupported && destination.Length >= 2) { // Divide by 2 as 4 elements per Vector4 and 8 per Vector256 ref Vector256 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); @@ -6105,7 +10898,51 @@ internal static class DefaultPixelBlenders /// protected override void BlendFunction(Span destination, ReadOnlySpan background, ReadOnlySpan source, ReadOnlySpan amount) { - if (Avx2.IsSupported && destination.Length >= 2) + if (Avx512F.IsSupported && destination.Length >= 4) + { + // Divide by 4 as 4 elements per Vector4 and 16 per Vector512 + ref Vector512 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); + ref Vector512 destinationLast = ref Unsafe.Add(ref destinationBase, (uint)destination.Length / 4u); + + ref Vector512 backgroundBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(background)); + ref Vector512 sourceBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(source)); + ref float amountBase = ref MemoryMarshal.GetReference(amount); + + Vector512 vOne = Vector512.Create(1F); + + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) + { + float amount0 = amountBase; + float amount1 = Unsafe.Add(ref amountBase, 1); + float amount2 = Unsafe.Add(ref amountBase, 2); + float amount3 = Unsafe.Add(ref amountBase, 3); + + // We need to create a Vector512 containing the current four amount values + // taking up each quarter of the Vector512 and then clamp them. + Vector512 opacity = Vector512.Create( + amount0, amount0, amount0, amount0, + amount1, amount1, amount1, amount1, + amount2, amount2, amount2, amount2, + amount3, amount3, amount3, amount3); + opacity = Vector512.Min(Vector512.Max(Vector512.Zero, opacity), vOne); + + destinationBase = PorterDuffFunctions.DarkenSrcIn(backgroundBase, sourceBase, opacity); + destinationBase = ref Unsafe.Add(ref destinationBase, 1); + backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); + sourceBase = ref Unsafe.Add(ref sourceBase, 1); + amountBase = ref Unsafe.Add(ref amountBase, 4); + } + + int remainder = Numerics.Modulo4(destination.Length); + if (remainder != 0) + { + for (int i = destination.Length - remainder; i < destination.Length; i++) + { + destination[i] = PorterDuffFunctions.DarkenSrcIn(background[i], source[i], Numerics.Clamp(amount[i], 0, 1F)); + } + } + } + else if (Avx2.IsSupported && destination.Length >= 2) { // Divide by 2 as 4 elements per Vector4 and 8 per Vector256 ref Vector256 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); @@ -6152,7 +10989,54 @@ internal static class DefaultPixelBlenders /// protected override void BlendFunction(Span destination, ReadOnlySpan background, Vector4 source, ReadOnlySpan amount) { - if (Avx2.IsSupported && destination.Length >= 2) + if (Avx512F.IsSupported && destination.Length >= 4) + { + // Divide by 4 as 4 elements per Vector4 and 16 per Vector512 + ref Vector512 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); + ref Vector512 destinationLast = ref Unsafe.Add(ref destinationBase, (uint)destination.Length / 4u); + + ref Vector512 backgroundBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(background)); + ref float amountBase = ref MemoryMarshal.GetReference(amount); + + Vector512 sourceBase = Vector512.Create( + source.X, source.Y, source.Z, source.W, + source.X, source.Y, source.Z, source.W, + source.X, source.Y, source.Z, source.W, + source.X, source.Y, source.Z, source.W); + Vector512 vOne = Vector512.Create(1F); + + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) + { + float amount0 = amountBase; + float amount1 = Unsafe.Add(ref amountBase, 1); + float amount2 = Unsafe.Add(ref amountBase, 2); + float amount3 = Unsafe.Add(ref amountBase, 3); + + // We need to create a Vector512 containing the current four amount values + // taking up each quarter of the Vector512 and then clamp them. + Vector512 opacity = Vector512.Create( + amount0, amount0, amount0, amount0, + amount1, amount1, amount1, amount1, + amount2, amount2, amount2, amount2, + amount3, amount3, amount3, amount3); + opacity = Vector512.Min(Vector512.Max(Vector512.Zero, opacity), vOne); + + destinationBase = PorterDuffFunctions.DarkenSrcIn(backgroundBase, sourceBase, opacity); + destinationBase = ref Unsafe.Add(ref destinationBase, 1); + backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); + amountBase = ref Unsafe.Add(ref amountBase, 4); + } + + int remainder = Numerics.Modulo4(destination.Length); + if (remainder != 0) + { + for (int i = destination.Length - remainder; i < destination.Length; i++) + { + destination[i] = PorterDuffFunctions.DarkenSrcIn(background[i], source, Numerics.Clamp(amount[i], 0, 1F)); + } + } + } + else if (Avx2.IsSupported && destination.Length >= 2) { // Divide by 2 as 4 elements per Vector4 and 8 per Vector256 ref Vector256 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); @@ -6217,7 +11101,34 @@ internal static class DefaultPixelBlenders { amount = Numerics.Clamp(amount, 0, 1); - if (Avx2.IsSupported && destination.Length >= 2) + if (Avx512F.IsSupported && destination.Length >= 4) + { + // Divide by 4 as 4 elements per Vector4 and 16 per Vector512 + ref Vector512 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); + ref Vector512 destinationLast = ref Unsafe.Add(ref destinationBase, (uint)destination.Length / 4u); + + ref Vector512 backgroundBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(background)); + ref Vector512 sourceBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(source)); + Vector512 opacity = Vector512.Create(amount); + + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) + { + destinationBase = PorterDuffFunctions.LightenSrcIn(backgroundBase, sourceBase, opacity); + destinationBase = ref Unsafe.Add(ref destinationBase, 1); + backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); + sourceBase = ref Unsafe.Add(ref sourceBase, 1); + } + + int remainder = Numerics.Modulo4(destination.Length); + if (remainder != 0) + { + for (int i = destination.Length - remainder; i < destination.Length; i++) + { + destination[i] = PorterDuffFunctions.LightenSrcIn(background[i], source[i], amount); + } + } + } + else if (Avx2.IsSupported && destination.Length >= 2) { // Divide by 2 as 4 elements per Vector4 and 8 per Vector256 ref Vector256 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); @@ -6256,7 +11167,37 @@ internal static class DefaultPixelBlenders { amount = Numerics.Clamp(amount, 0, 1); - if (Avx2.IsSupported && destination.Length >= 2) + if (Avx512F.IsSupported && destination.Length >= 4) + { + // Divide by 4 as 4 elements per Vector4 and 16 per Vector512 + ref Vector512 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); + ref Vector512 destinationLast = ref Unsafe.Add(ref destinationBase, (uint)destination.Length / 4u); + + ref Vector512 backgroundBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(background)); + Vector512 sourceBase = Vector512.Create( + source.X, source.Y, source.Z, source.W, + source.X, source.Y, source.Z, source.W, + source.X, source.Y, source.Z, source.W, + source.X, source.Y, source.Z, source.W); + Vector512 opacity = Vector512.Create(amount); + + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) + { + destinationBase = PorterDuffFunctions.LightenSrcIn(backgroundBase, sourceBase, opacity); + destinationBase = ref Unsafe.Add(ref destinationBase, 1); + backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); + } + + int remainder = Numerics.Modulo4(destination.Length); + if (remainder != 0) + { + for (int i = destination.Length - remainder; i < destination.Length; i++) + { + destination[i] = PorterDuffFunctions.LightenSrcIn(background[i], source, amount); + } + } + } + else if (Avx2.IsSupported && destination.Length >= 2) { // Divide by 2 as 4 elements per Vector4 and 8 per Vector256 ref Vector256 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); @@ -6292,7 +11233,51 @@ internal static class DefaultPixelBlenders /// protected override void BlendFunction(Span destination, ReadOnlySpan background, ReadOnlySpan source, ReadOnlySpan amount) { - if (Avx2.IsSupported && destination.Length >= 2) + if (Avx512F.IsSupported && destination.Length >= 4) + { + // Divide by 4 as 4 elements per Vector4 and 16 per Vector512 + ref Vector512 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); + ref Vector512 destinationLast = ref Unsafe.Add(ref destinationBase, (uint)destination.Length / 4u); + + ref Vector512 backgroundBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(background)); + ref Vector512 sourceBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(source)); + ref float amountBase = ref MemoryMarshal.GetReference(amount); + + Vector512 vOne = Vector512.Create(1F); + + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) + { + float amount0 = amountBase; + float amount1 = Unsafe.Add(ref amountBase, 1); + float amount2 = Unsafe.Add(ref amountBase, 2); + float amount3 = Unsafe.Add(ref amountBase, 3); + + // We need to create a Vector512 containing the current four amount values + // taking up each quarter of the Vector512 and then clamp them. + Vector512 opacity = Vector512.Create( + amount0, amount0, amount0, amount0, + amount1, amount1, amount1, amount1, + amount2, amount2, amount2, amount2, + amount3, amount3, amount3, amount3); + opacity = Vector512.Min(Vector512.Max(Vector512.Zero, opacity), vOne); + + destinationBase = PorterDuffFunctions.LightenSrcIn(backgroundBase, sourceBase, opacity); + destinationBase = ref Unsafe.Add(ref destinationBase, 1); + backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); + sourceBase = ref Unsafe.Add(ref sourceBase, 1); + amountBase = ref Unsafe.Add(ref amountBase, 4); + } + + int remainder = Numerics.Modulo4(destination.Length); + if (remainder != 0) + { + for (int i = destination.Length - remainder; i < destination.Length; i++) + { + destination[i] = PorterDuffFunctions.LightenSrcIn(background[i], source[i], Numerics.Clamp(amount[i], 0, 1F)); + } + } + } + else if (Avx2.IsSupported && destination.Length >= 2) { // Divide by 2 as 4 elements per Vector4 and 8 per Vector256 ref Vector256 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); @@ -6339,7 +11324,54 @@ internal static class DefaultPixelBlenders /// protected override void BlendFunction(Span destination, ReadOnlySpan background, Vector4 source, ReadOnlySpan amount) { - if (Avx2.IsSupported && destination.Length >= 2) + if (Avx512F.IsSupported && destination.Length >= 4) + { + // Divide by 4 as 4 elements per Vector4 and 16 per Vector512 + ref Vector512 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); + ref Vector512 destinationLast = ref Unsafe.Add(ref destinationBase, (uint)destination.Length / 4u); + + ref Vector512 backgroundBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(background)); + ref float amountBase = ref MemoryMarshal.GetReference(amount); + + Vector512 sourceBase = Vector512.Create( + source.X, source.Y, source.Z, source.W, + source.X, source.Y, source.Z, source.W, + source.X, source.Y, source.Z, source.W, + source.X, source.Y, source.Z, source.W); + Vector512 vOne = Vector512.Create(1F); + + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) + { + float amount0 = amountBase; + float amount1 = Unsafe.Add(ref amountBase, 1); + float amount2 = Unsafe.Add(ref amountBase, 2); + float amount3 = Unsafe.Add(ref amountBase, 3); + + // We need to create a Vector512 containing the current four amount values + // taking up each quarter of the Vector512 and then clamp them. + Vector512 opacity = Vector512.Create( + amount0, amount0, amount0, amount0, + amount1, amount1, amount1, amount1, + amount2, amount2, amount2, amount2, + amount3, amount3, amount3, amount3); + opacity = Vector512.Min(Vector512.Max(Vector512.Zero, opacity), vOne); + + destinationBase = PorterDuffFunctions.LightenSrcIn(backgroundBase, sourceBase, opacity); + destinationBase = ref Unsafe.Add(ref destinationBase, 1); + backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); + amountBase = ref Unsafe.Add(ref amountBase, 4); + } + + int remainder = Numerics.Modulo4(destination.Length); + if (remainder != 0) + { + for (int i = destination.Length - remainder; i < destination.Length; i++) + { + destination[i] = PorterDuffFunctions.LightenSrcIn(background[i], source, Numerics.Clamp(amount[i], 0, 1F)); + } + } + } + else if (Avx2.IsSupported && destination.Length >= 2) { // Divide by 2 as 4 elements per Vector4 and 8 per Vector256 ref Vector256 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); @@ -6404,7 +11436,34 @@ internal static class DefaultPixelBlenders { amount = Numerics.Clamp(amount, 0, 1); - if (Avx2.IsSupported && destination.Length >= 2) + if (Avx512F.IsSupported && destination.Length >= 4) + { + // Divide by 4 as 4 elements per Vector4 and 16 per Vector512 + ref Vector512 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); + ref Vector512 destinationLast = ref Unsafe.Add(ref destinationBase, (uint)destination.Length / 4u); + + ref Vector512 backgroundBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(background)); + ref Vector512 sourceBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(source)); + Vector512 opacity = Vector512.Create(amount); + + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) + { + destinationBase = PorterDuffFunctions.OverlaySrcIn(backgroundBase, sourceBase, opacity); + destinationBase = ref Unsafe.Add(ref destinationBase, 1); + backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); + sourceBase = ref Unsafe.Add(ref sourceBase, 1); + } + + int remainder = Numerics.Modulo4(destination.Length); + if (remainder != 0) + { + for (int i = destination.Length - remainder; i < destination.Length; i++) + { + destination[i] = PorterDuffFunctions.OverlaySrcIn(background[i], source[i], amount); + } + } + } + else if (Avx2.IsSupported && destination.Length >= 2) { // Divide by 2 as 4 elements per Vector4 and 8 per Vector256 ref Vector256 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); @@ -6443,7 +11502,37 @@ internal static class DefaultPixelBlenders { amount = Numerics.Clamp(amount, 0, 1); - if (Avx2.IsSupported && destination.Length >= 2) + if (Avx512F.IsSupported && destination.Length >= 4) + { + // Divide by 4 as 4 elements per Vector4 and 16 per Vector512 + ref Vector512 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); + ref Vector512 destinationLast = ref Unsafe.Add(ref destinationBase, (uint)destination.Length / 4u); + + ref Vector512 backgroundBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(background)); + Vector512 sourceBase = Vector512.Create( + source.X, source.Y, source.Z, source.W, + source.X, source.Y, source.Z, source.W, + source.X, source.Y, source.Z, source.W, + source.X, source.Y, source.Z, source.W); + Vector512 opacity = Vector512.Create(amount); + + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) + { + destinationBase = PorterDuffFunctions.OverlaySrcIn(backgroundBase, sourceBase, opacity); + destinationBase = ref Unsafe.Add(ref destinationBase, 1); + backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); + } + + int remainder = Numerics.Modulo4(destination.Length); + if (remainder != 0) + { + for (int i = destination.Length - remainder; i < destination.Length; i++) + { + destination[i] = PorterDuffFunctions.OverlaySrcIn(background[i], source, amount); + } + } + } + else if (Avx2.IsSupported && destination.Length >= 2) { // Divide by 2 as 4 elements per Vector4 and 8 per Vector256 ref Vector256 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); @@ -6479,7 +11568,51 @@ internal static class DefaultPixelBlenders /// protected override void BlendFunction(Span destination, ReadOnlySpan background, ReadOnlySpan source, ReadOnlySpan amount) { - if (Avx2.IsSupported && destination.Length >= 2) + if (Avx512F.IsSupported && destination.Length >= 4) + { + // Divide by 4 as 4 elements per Vector4 and 16 per Vector512 + ref Vector512 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); + ref Vector512 destinationLast = ref Unsafe.Add(ref destinationBase, (uint)destination.Length / 4u); + + ref Vector512 backgroundBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(background)); + ref Vector512 sourceBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(source)); + ref float amountBase = ref MemoryMarshal.GetReference(amount); + + Vector512 vOne = Vector512.Create(1F); + + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) + { + float amount0 = amountBase; + float amount1 = Unsafe.Add(ref amountBase, 1); + float amount2 = Unsafe.Add(ref amountBase, 2); + float amount3 = Unsafe.Add(ref amountBase, 3); + + // We need to create a Vector512 containing the current four amount values + // taking up each quarter of the Vector512 and then clamp them. + Vector512 opacity = Vector512.Create( + amount0, amount0, amount0, amount0, + amount1, amount1, amount1, amount1, + amount2, amount2, amount2, amount2, + amount3, amount3, amount3, amount3); + opacity = Vector512.Min(Vector512.Max(Vector512.Zero, opacity), vOne); + + destinationBase = PorterDuffFunctions.OverlaySrcIn(backgroundBase, sourceBase, opacity); + destinationBase = ref Unsafe.Add(ref destinationBase, 1); + backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); + sourceBase = ref Unsafe.Add(ref sourceBase, 1); + amountBase = ref Unsafe.Add(ref amountBase, 4); + } + + int remainder = Numerics.Modulo4(destination.Length); + if (remainder != 0) + { + for (int i = destination.Length - remainder; i < destination.Length; i++) + { + destination[i] = PorterDuffFunctions.OverlaySrcIn(background[i], source[i], Numerics.Clamp(amount[i], 0, 1F)); + } + } + } + else if (Avx2.IsSupported && destination.Length >= 2) { // Divide by 2 as 4 elements per Vector4 and 8 per Vector256 ref Vector256 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); @@ -6526,7 +11659,54 @@ internal static class DefaultPixelBlenders /// protected override void BlendFunction(Span destination, ReadOnlySpan background, Vector4 source, ReadOnlySpan amount) { - if (Avx2.IsSupported && destination.Length >= 2) + if (Avx512F.IsSupported && destination.Length >= 4) + { + // Divide by 4 as 4 elements per Vector4 and 16 per Vector512 + ref Vector512 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); + ref Vector512 destinationLast = ref Unsafe.Add(ref destinationBase, (uint)destination.Length / 4u); + + ref Vector512 backgroundBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(background)); + ref float amountBase = ref MemoryMarshal.GetReference(amount); + + Vector512 sourceBase = Vector512.Create( + source.X, source.Y, source.Z, source.W, + source.X, source.Y, source.Z, source.W, + source.X, source.Y, source.Z, source.W, + source.X, source.Y, source.Z, source.W); + Vector512 vOne = Vector512.Create(1F); + + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) + { + float amount0 = amountBase; + float amount1 = Unsafe.Add(ref amountBase, 1); + float amount2 = Unsafe.Add(ref amountBase, 2); + float amount3 = Unsafe.Add(ref amountBase, 3); + + // We need to create a Vector512 containing the current four amount values + // taking up each quarter of the Vector512 and then clamp them. + Vector512 opacity = Vector512.Create( + amount0, amount0, amount0, amount0, + amount1, amount1, amount1, amount1, + amount2, amount2, amount2, amount2, + amount3, amount3, amount3, amount3); + opacity = Vector512.Min(Vector512.Max(Vector512.Zero, opacity), vOne); + + destinationBase = PorterDuffFunctions.OverlaySrcIn(backgroundBase, sourceBase, opacity); + destinationBase = ref Unsafe.Add(ref destinationBase, 1); + backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); + amountBase = ref Unsafe.Add(ref amountBase, 4); + } + + int remainder = Numerics.Modulo4(destination.Length); + if (remainder != 0) + { + for (int i = destination.Length - remainder; i < destination.Length; i++) + { + destination[i] = PorterDuffFunctions.OverlaySrcIn(background[i], source, Numerics.Clamp(amount[i], 0, 1F)); + } + } + } + else if (Avx2.IsSupported && destination.Length >= 2) { // Divide by 2 as 4 elements per Vector4 and 8 per Vector256 ref Vector256 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); @@ -6591,7 +11771,34 @@ internal static class DefaultPixelBlenders { amount = Numerics.Clamp(amount, 0, 1); - if (Avx2.IsSupported && destination.Length >= 2) + if (Avx512F.IsSupported && destination.Length >= 4) + { + // Divide by 4 as 4 elements per Vector4 and 16 per Vector512 + ref Vector512 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); + ref Vector512 destinationLast = ref Unsafe.Add(ref destinationBase, (uint)destination.Length / 4u); + + ref Vector512 backgroundBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(background)); + ref Vector512 sourceBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(source)); + Vector512 opacity = Vector512.Create(amount); + + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) + { + destinationBase = PorterDuffFunctions.HardLightSrcIn(backgroundBase, sourceBase, opacity); + destinationBase = ref Unsafe.Add(ref destinationBase, 1); + backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); + sourceBase = ref Unsafe.Add(ref sourceBase, 1); + } + + int remainder = Numerics.Modulo4(destination.Length); + if (remainder != 0) + { + for (int i = destination.Length - remainder; i < destination.Length; i++) + { + destination[i] = PorterDuffFunctions.HardLightSrcIn(background[i], source[i], amount); + } + } + } + else if (Avx2.IsSupported && destination.Length >= 2) { // Divide by 2 as 4 elements per Vector4 and 8 per Vector256 ref Vector256 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); @@ -6630,7 +11837,37 @@ internal static class DefaultPixelBlenders { amount = Numerics.Clamp(amount, 0, 1); - if (Avx2.IsSupported && destination.Length >= 2) + if (Avx512F.IsSupported && destination.Length >= 4) + { + // Divide by 4 as 4 elements per Vector4 and 16 per Vector512 + ref Vector512 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); + ref Vector512 destinationLast = ref Unsafe.Add(ref destinationBase, (uint)destination.Length / 4u); + + ref Vector512 backgroundBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(background)); + Vector512 sourceBase = Vector512.Create( + source.X, source.Y, source.Z, source.W, + source.X, source.Y, source.Z, source.W, + source.X, source.Y, source.Z, source.W, + source.X, source.Y, source.Z, source.W); + Vector512 opacity = Vector512.Create(amount); + + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) + { + destinationBase = PorterDuffFunctions.HardLightSrcIn(backgroundBase, sourceBase, opacity); + destinationBase = ref Unsafe.Add(ref destinationBase, 1); + backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); + } + + int remainder = Numerics.Modulo4(destination.Length); + if (remainder != 0) + { + for (int i = destination.Length - remainder; i < destination.Length; i++) + { + destination[i] = PorterDuffFunctions.HardLightSrcIn(background[i], source, amount); + } + } + } + else if (Avx2.IsSupported && destination.Length >= 2) { // Divide by 2 as 4 elements per Vector4 and 8 per Vector256 ref Vector256 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); @@ -6666,7 +11903,51 @@ internal static class DefaultPixelBlenders /// protected override void BlendFunction(Span destination, ReadOnlySpan background, ReadOnlySpan source, ReadOnlySpan amount) { - if (Avx2.IsSupported && destination.Length >= 2) + if (Avx512F.IsSupported && destination.Length >= 4) + { + // Divide by 4 as 4 elements per Vector4 and 16 per Vector512 + ref Vector512 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); + ref Vector512 destinationLast = ref Unsafe.Add(ref destinationBase, (uint)destination.Length / 4u); + + ref Vector512 backgroundBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(background)); + ref Vector512 sourceBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(source)); + ref float amountBase = ref MemoryMarshal.GetReference(amount); + + Vector512 vOne = Vector512.Create(1F); + + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) + { + float amount0 = amountBase; + float amount1 = Unsafe.Add(ref amountBase, 1); + float amount2 = Unsafe.Add(ref amountBase, 2); + float amount3 = Unsafe.Add(ref amountBase, 3); + + // We need to create a Vector512 containing the current four amount values + // taking up each quarter of the Vector512 and then clamp them. + Vector512 opacity = Vector512.Create( + amount0, amount0, amount0, amount0, + amount1, amount1, amount1, amount1, + amount2, amount2, amount2, amount2, + amount3, amount3, amount3, amount3); + opacity = Vector512.Min(Vector512.Max(Vector512.Zero, opacity), vOne); + + destinationBase = PorterDuffFunctions.HardLightSrcIn(backgroundBase, sourceBase, opacity); + destinationBase = ref Unsafe.Add(ref destinationBase, 1); + backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); + sourceBase = ref Unsafe.Add(ref sourceBase, 1); + amountBase = ref Unsafe.Add(ref amountBase, 4); + } + + int remainder = Numerics.Modulo4(destination.Length); + if (remainder != 0) + { + for (int i = destination.Length - remainder; i < destination.Length; i++) + { + destination[i] = PorterDuffFunctions.HardLightSrcIn(background[i], source[i], Numerics.Clamp(amount[i], 0, 1F)); + } + } + } + else if (Avx2.IsSupported && destination.Length >= 2) { // Divide by 2 as 4 elements per Vector4 and 8 per Vector256 ref Vector256 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); @@ -6713,7 +11994,54 @@ internal static class DefaultPixelBlenders /// protected override void BlendFunction(Span destination, ReadOnlySpan background, Vector4 source, ReadOnlySpan amount) { - if (Avx2.IsSupported && destination.Length >= 2) + if (Avx512F.IsSupported && destination.Length >= 4) + { + // Divide by 4 as 4 elements per Vector4 and 16 per Vector512 + ref Vector512 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); + ref Vector512 destinationLast = ref Unsafe.Add(ref destinationBase, (uint)destination.Length / 4u); + + ref Vector512 backgroundBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(background)); + ref float amountBase = ref MemoryMarshal.GetReference(amount); + + Vector512 sourceBase = Vector512.Create( + source.X, source.Y, source.Z, source.W, + source.X, source.Y, source.Z, source.W, + source.X, source.Y, source.Z, source.W, + source.X, source.Y, source.Z, source.W); + Vector512 vOne = Vector512.Create(1F); + + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) + { + float amount0 = amountBase; + float amount1 = Unsafe.Add(ref amountBase, 1); + float amount2 = Unsafe.Add(ref amountBase, 2); + float amount3 = Unsafe.Add(ref amountBase, 3); + + // We need to create a Vector512 containing the current four amount values + // taking up each quarter of the Vector512 and then clamp them. + Vector512 opacity = Vector512.Create( + amount0, amount0, amount0, amount0, + amount1, amount1, amount1, amount1, + amount2, amount2, amount2, amount2, + amount3, amount3, amount3, amount3); + opacity = Vector512.Min(Vector512.Max(Vector512.Zero, opacity), vOne); + + destinationBase = PorterDuffFunctions.HardLightSrcIn(backgroundBase, sourceBase, opacity); + destinationBase = ref Unsafe.Add(ref destinationBase, 1); + backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); + amountBase = ref Unsafe.Add(ref amountBase, 4); + } + + int remainder = Numerics.Modulo4(destination.Length); + if (remainder != 0) + { + for (int i = destination.Length - remainder; i < destination.Length; i++) + { + destination[i] = PorterDuffFunctions.HardLightSrcIn(background[i], source, Numerics.Clamp(amount[i], 0, 1F)); + } + } + } + else if (Avx2.IsSupported && destination.Length >= 2) { // Divide by 2 as 4 elements per Vector4 and 8 per Vector256 ref Vector256 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); @@ -6778,7 +12106,34 @@ internal static class DefaultPixelBlenders { amount = Numerics.Clamp(amount, 0, 1); - if (Avx2.IsSupported && destination.Length >= 2) + if (Avx512F.IsSupported && destination.Length >= 4) + { + // Divide by 4 as 4 elements per Vector4 and 16 per Vector512 + ref Vector512 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); + ref Vector512 destinationLast = ref Unsafe.Add(ref destinationBase, (uint)destination.Length / 4u); + + ref Vector512 backgroundBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(background)); + ref Vector512 sourceBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(source)); + Vector512 opacity = Vector512.Create(amount); + + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) + { + destinationBase = PorterDuffFunctions.NormalSrcOut(backgroundBase, sourceBase, opacity); + destinationBase = ref Unsafe.Add(ref destinationBase, 1); + backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); + sourceBase = ref Unsafe.Add(ref sourceBase, 1); + } + + int remainder = Numerics.Modulo4(destination.Length); + if (remainder != 0) + { + for (int i = destination.Length - remainder; i < destination.Length; i++) + { + destination[i] = PorterDuffFunctions.NormalSrcOut(background[i], source[i], amount); + } + } + } + else if (Avx2.IsSupported && destination.Length >= 2) { // Divide by 2 as 4 elements per Vector4 and 8 per Vector256 ref Vector256 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); @@ -6817,7 +12172,37 @@ internal static class DefaultPixelBlenders { amount = Numerics.Clamp(amount, 0, 1); - if (Avx2.IsSupported && destination.Length >= 2) + if (Avx512F.IsSupported && destination.Length >= 4) + { + // Divide by 4 as 4 elements per Vector4 and 16 per Vector512 + ref Vector512 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); + ref Vector512 destinationLast = ref Unsafe.Add(ref destinationBase, (uint)destination.Length / 4u); + + ref Vector512 backgroundBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(background)); + Vector512 sourceBase = Vector512.Create( + source.X, source.Y, source.Z, source.W, + source.X, source.Y, source.Z, source.W, + source.X, source.Y, source.Z, source.W, + source.X, source.Y, source.Z, source.W); + Vector512 opacity = Vector512.Create(amount); + + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) + { + destinationBase = PorterDuffFunctions.NormalSrcOut(backgroundBase, sourceBase, opacity); + destinationBase = ref Unsafe.Add(ref destinationBase, 1); + backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); + } + + int remainder = Numerics.Modulo4(destination.Length); + if (remainder != 0) + { + for (int i = destination.Length - remainder; i < destination.Length; i++) + { + destination[i] = PorterDuffFunctions.NormalSrcOut(background[i], source, amount); + } + } + } + else if (Avx2.IsSupported && destination.Length >= 2) { // Divide by 2 as 4 elements per Vector4 and 8 per Vector256 ref Vector256 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); @@ -6853,7 +12238,51 @@ internal static class DefaultPixelBlenders /// protected override void BlendFunction(Span destination, ReadOnlySpan background, ReadOnlySpan source, ReadOnlySpan amount) { - if (Avx2.IsSupported && destination.Length >= 2) + if (Avx512F.IsSupported && destination.Length >= 4) + { + // Divide by 4 as 4 elements per Vector4 and 16 per Vector512 + ref Vector512 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); + ref Vector512 destinationLast = ref Unsafe.Add(ref destinationBase, (uint)destination.Length / 4u); + + ref Vector512 backgroundBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(background)); + ref Vector512 sourceBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(source)); + ref float amountBase = ref MemoryMarshal.GetReference(amount); + + Vector512 vOne = Vector512.Create(1F); + + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) + { + float amount0 = amountBase; + float amount1 = Unsafe.Add(ref amountBase, 1); + float amount2 = Unsafe.Add(ref amountBase, 2); + float amount3 = Unsafe.Add(ref amountBase, 3); + + // We need to create a Vector512 containing the current four amount values + // taking up each quarter of the Vector512 and then clamp them. + Vector512 opacity = Vector512.Create( + amount0, amount0, amount0, amount0, + amount1, amount1, amount1, amount1, + amount2, amount2, amount2, amount2, + amount3, amount3, amount3, amount3); + opacity = Vector512.Min(Vector512.Max(Vector512.Zero, opacity), vOne); + + destinationBase = PorterDuffFunctions.NormalSrcOut(backgroundBase, sourceBase, opacity); + destinationBase = ref Unsafe.Add(ref destinationBase, 1); + backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); + sourceBase = ref Unsafe.Add(ref sourceBase, 1); + amountBase = ref Unsafe.Add(ref amountBase, 4); + } + + int remainder = Numerics.Modulo4(destination.Length); + if (remainder != 0) + { + for (int i = destination.Length - remainder; i < destination.Length; i++) + { + destination[i] = PorterDuffFunctions.NormalSrcOut(background[i], source[i], Numerics.Clamp(amount[i], 0, 1F)); + } + } + } + else if (Avx2.IsSupported && destination.Length >= 2) { // Divide by 2 as 4 elements per Vector4 and 8 per Vector256 ref Vector256 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); @@ -6900,7 +12329,54 @@ internal static class DefaultPixelBlenders /// protected override void BlendFunction(Span destination, ReadOnlySpan background, Vector4 source, ReadOnlySpan amount) { - if (Avx2.IsSupported && destination.Length >= 2) + if (Avx512F.IsSupported && destination.Length >= 4) + { + // Divide by 4 as 4 elements per Vector4 and 16 per Vector512 + ref Vector512 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); + ref Vector512 destinationLast = ref Unsafe.Add(ref destinationBase, (uint)destination.Length / 4u); + + ref Vector512 backgroundBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(background)); + ref float amountBase = ref MemoryMarshal.GetReference(amount); + + Vector512 sourceBase = Vector512.Create( + source.X, source.Y, source.Z, source.W, + source.X, source.Y, source.Z, source.W, + source.X, source.Y, source.Z, source.W, + source.X, source.Y, source.Z, source.W); + Vector512 vOne = Vector512.Create(1F); + + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) + { + float amount0 = amountBase; + float amount1 = Unsafe.Add(ref amountBase, 1); + float amount2 = Unsafe.Add(ref amountBase, 2); + float amount3 = Unsafe.Add(ref amountBase, 3); + + // We need to create a Vector512 containing the current four amount values + // taking up each quarter of the Vector512 and then clamp them. + Vector512 opacity = Vector512.Create( + amount0, amount0, amount0, amount0, + amount1, amount1, amount1, amount1, + amount2, amount2, amount2, amount2, + amount3, amount3, amount3, amount3); + opacity = Vector512.Min(Vector512.Max(Vector512.Zero, opacity), vOne); + + destinationBase = PorterDuffFunctions.NormalSrcOut(backgroundBase, sourceBase, opacity); + destinationBase = ref Unsafe.Add(ref destinationBase, 1); + backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); + amountBase = ref Unsafe.Add(ref amountBase, 4); + } + + int remainder = Numerics.Modulo4(destination.Length); + if (remainder != 0) + { + for (int i = destination.Length - remainder; i < destination.Length; i++) + { + destination[i] = PorterDuffFunctions.NormalSrcOut(background[i], source, Numerics.Clamp(amount[i], 0, 1F)); + } + } + } + else if (Avx2.IsSupported && destination.Length >= 2) { // Divide by 2 as 4 elements per Vector4 and 8 per Vector256 ref Vector256 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); @@ -6965,7 +12441,34 @@ internal static class DefaultPixelBlenders { amount = Numerics.Clamp(amount, 0, 1); - if (Avx2.IsSupported && destination.Length >= 2) + if (Avx512F.IsSupported && destination.Length >= 4) + { + // Divide by 4 as 4 elements per Vector4 and 16 per Vector512 + ref Vector512 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); + ref Vector512 destinationLast = ref Unsafe.Add(ref destinationBase, (uint)destination.Length / 4u); + + ref Vector512 backgroundBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(background)); + ref Vector512 sourceBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(source)); + Vector512 opacity = Vector512.Create(amount); + + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) + { + destinationBase = PorterDuffFunctions.MultiplySrcOut(backgroundBase, sourceBase, opacity); + destinationBase = ref Unsafe.Add(ref destinationBase, 1); + backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); + sourceBase = ref Unsafe.Add(ref sourceBase, 1); + } + + int remainder = Numerics.Modulo4(destination.Length); + if (remainder != 0) + { + for (int i = destination.Length - remainder; i < destination.Length; i++) + { + destination[i] = PorterDuffFunctions.MultiplySrcOut(background[i], source[i], amount); + } + } + } + else if (Avx2.IsSupported && destination.Length >= 2) { // Divide by 2 as 4 elements per Vector4 and 8 per Vector256 ref Vector256 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); @@ -7004,7 +12507,37 @@ internal static class DefaultPixelBlenders { amount = Numerics.Clamp(amount, 0, 1); - if (Avx2.IsSupported && destination.Length >= 2) + if (Avx512F.IsSupported && destination.Length >= 4) + { + // Divide by 4 as 4 elements per Vector4 and 16 per Vector512 + ref Vector512 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); + ref Vector512 destinationLast = ref Unsafe.Add(ref destinationBase, (uint)destination.Length / 4u); + + ref Vector512 backgroundBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(background)); + Vector512 sourceBase = Vector512.Create( + source.X, source.Y, source.Z, source.W, + source.X, source.Y, source.Z, source.W, + source.X, source.Y, source.Z, source.W, + source.X, source.Y, source.Z, source.W); + Vector512 opacity = Vector512.Create(amount); + + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) + { + destinationBase = PorterDuffFunctions.MultiplySrcOut(backgroundBase, sourceBase, opacity); + destinationBase = ref Unsafe.Add(ref destinationBase, 1); + backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); + } + + int remainder = Numerics.Modulo4(destination.Length); + if (remainder != 0) + { + for (int i = destination.Length - remainder; i < destination.Length; i++) + { + destination[i] = PorterDuffFunctions.MultiplySrcOut(background[i], source, amount); + } + } + } + else if (Avx2.IsSupported && destination.Length >= 2) { // Divide by 2 as 4 elements per Vector4 and 8 per Vector256 ref Vector256 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); @@ -7040,7 +12573,51 @@ internal static class DefaultPixelBlenders /// protected override void BlendFunction(Span destination, ReadOnlySpan background, ReadOnlySpan source, ReadOnlySpan amount) { - if (Avx2.IsSupported && destination.Length >= 2) + if (Avx512F.IsSupported && destination.Length >= 4) + { + // Divide by 4 as 4 elements per Vector4 and 16 per Vector512 + ref Vector512 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); + ref Vector512 destinationLast = ref Unsafe.Add(ref destinationBase, (uint)destination.Length / 4u); + + ref Vector512 backgroundBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(background)); + ref Vector512 sourceBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(source)); + ref float amountBase = ref MemoryMarshal.GetReference(amount); + + Vector512 vOne = Vector512.Create(1F); + + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) + { + float amount0 = amountBase; + float amount1 = Unsafe.Add(ref amountBase, 1); + float amount2 = Unsafe.Add(ref amountBase, 2); + float amount3 = Unsafe.Add(ref amountBase, 3); + + // We need to create a Vector512 containing the current four amount values + // taking up each quarter of the Vector512 and then clamp them. + Vector512 opacity = Vector512.Create( + amount0, amount0, amount0, amount0, + amount1, amount1, amount1, amount1, + amount2, amount2, amount2, amount2, + amount3, amount3, amount3, amount3); + opacity = Vector512.Min(Vector512.Max(Vector512.Zero, opacity), vOne); + + destinationBase = PorterDuffFunctions.MultiplySrcOut(backgroundBase, sourceBase, opacity); + destinationBase = ref Unsafe.Add(ref destinationBase, 1); + backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); + sourceBase = ref Unsafe.Add(ref sourceBase, 1); + amountBase = ref Unsafe.Add(ref amountBase, 4); + } + + int remainder = Numerics.Modulo4(destination.Length); + if (remainder != 0) + { + for (int i = destination.Length - remainder; i < destination.Length; i++) + { + destination[i] = PorterDuffFunctions.MultiplySrcOut(background[i], source[i], Numerics.Clamp(amount[i], 0, 1F)); + } + } + } + else if (Avx2.IsSupported && destination.Length >= 2) { // Divide by 2 as 4 elements per Vector4 and 8 per Vector256 ref Vector256 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); @@ -7087,7 +12664,54 @@ internal static class DefaultPixelBlenders /// protected override void BlendFunction(Span destination, ReadOnlySpan background, Vector4 source, ReadOnlySpan amount) { - if (Avx2.IsSupported && destination.Length >= 2) + if (Avx512F.IsSupported && destination.Length >= 4) + { + // Divide by 4 as 4 elements per Vector4 and 16 per Vector512 + ref Vector512 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); + ref Vector512 destinationLast = ref Unsafe.Add(ref destinationBase, (uint)destination.Length / 4u); + + ref Vector512 backgroundBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(background)); + ref float amountBase = ref MemoryMarshal.GetReference(amount); + + Vector512 sourceBase = Vector512.Create( + source.X, source.Y, source.Z, source.W, + source.X, source.Y, source.Z, source.W, + source.X, source.Y, source.Z, source.W, + source.X, source.Y, source.Z, source.W); + Vector512 vOne = Vector512.Create(1F); + + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) + { + float amount0 = amountBase; + float amount1 = Unsafe.Add(ref amountBase, 1); + float amount2 = Unsafe.Add(ref amountBase, 2); + float amount3 = Unsafe.Add(ref amountBase, 3); + + // We need to create a Vector512 containing the current four amount values + // taking up each quarter of the Vector512 and then clamp them. + Vector512 opacity = Vector512.Create( + amount0, amount0, amount0, amount0, + amount1, amount1, amount1, amount1, + amount2, amount2, amount2, amount2, + amount3, amount3, amount3, amount3); + opacity = Vector512.Min(Vector512.Max(Vector512.Zero, opacity), vOne); + + destinationBase = PorterDuffFunctions.MultiplySrcOut(backgroundBase, sourceBase, opacity); + destinationBase = ref Unsafe.Add(ref destinationBase, 1); + backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); + amountBase = ref Unsafe.Add(ref amountBase, 4); + } + + int remainder = Numerics.Modulo4(destination.Length); + if (remainder != 0) + { + for (int i = destination.Length - remainder; i < destination.Length; i++) + { + destination[i] = PorterDuffFunctions.MultiplySrcOut(background[i], source, Numerics.Clamp(amount[i], 0, 1F)); + } + } + } + else if (Avx2.IsSupported && destination.Length >= 2) { // Divide by 2 as 4 elements per Vector4 and 8 per Vector256 ref Vector256 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); @@ -7152,7 +12776,34 @@ internal static class DefaultPixelBlenders { amount = Numerics.Clamp(amount, 0, 1); - if (Avx2.IsSupported && destination.Length >= 2) + if (Avx512F.IsSupported && destination.Length >= 4) + { + // Divide by 4 as 4 elements per Vector4 and 16 per Vector512 + ref Vector512 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); + ref Vector512 destinationLast = ref Unsafe.Add(ref destinationBase, (uint)destination.Length / 4u); + + ref Vector512 backgroundBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(background)); + ref Vector512 sourceBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(source)); + Vector512 opacity = Vector512.Create(amount); + + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) + { + destinationBase = PorterDuffFunctions.AddSrcOut(backgroundBase, sourceBase, opacity); + destinationBase = ref Unsafe.Add(ref destinationBase, 1); + backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); + sourceBase = ref Unsafe.Add(ref sourceBase, 1); + } + + int remainder = Numerics.Modulo4(destination.Length); + if (remainder != 0) + { + for (int i = destination.Length - remainder; i < destination.Length; i++) + { + destination[i] = PorterDuffFunctions.AddSrcOut(background[i], source[i], amount); + } + } + } + else if (Avx2.IsSupported && destination.Length >= 2) { // Divide by 2 as 4 elements per Vector4 and 8 per Vector256 ref Vector256 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); @@ -7191,7 +12842,37 @@ internal static class DefaultPixelBlenders { amount = Numerics.Clamp(amount, 0, 1); - if (Avx2.IsSupported && destination.Length >= 2) + if (Avx512F.IsSupported && destination.Length >= 4) + { + // Divide by 4 as 4 elements per Vector4 and 16 per Vector512 + ref Vector512 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); + ref Vector512 destinationLast = ref Unsafe.Add(ref destinationBase, (uint)destination.Length / 4u); + + ref Vector512 backgroundBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(background)); + Vector512 sourceBase = Vector512.Create( + source.X, source.Y, source.Z, source.W, + source.X, source.Y, source.Z, source.W, + source.X, source.Y, source.Z, source.W, + source.X, source.Y, source.Z, source.W); + Vector512 opacity = Vector512.Create(amount); + + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) + { + destinationBase = PorterDuffFunctions.AddSrcOut(backgroundBase, sourceBase, opacity); + destinationBase = ref Unsafe.Add(ref destinationBase, 1); + backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); + } + + int remainder = Numerics.Modulo4(destination.Length); + if (remainder != 0) + { + for (int i = destination.Length - remainder; i < destination.Length; i++) + { + destination[i] = PorterDuffFunctions.AddSrcOut(background[i], source, amount); + } + } + } + else if (Avx2.IsSupported && destination.Length >= 2) { // Divide by 2 as 4 elements per Vector4 and 8 per Vector256 ref Vector256 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); @@ -7227,7 +12908,51 @@ internal static class DefaultPixelBlenders /// protected override void BlendFunction(Span destination, ReadOnlySpan background, ReadOnlySpan source, ReadOnlySpan amount) { - if (Avx2.IsSupported && destination.Length >= 2) + if (Avx512F.IsSupported && destination.Length >= 4) + { + // Divide by 4 as 4 elements per Vector4 and 16 per Vector512 + ref Vector512 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); + ref Vector512 destinationLast = ref Unsafe.Add(ref destinationBase, (uint)destination.Length / 4u); + + ref Vector512 backgroundBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(background)); + ref Vector512 sourceBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(source)); + ref float amountBase = ref MemoryMarshal.GetReference(amount); + + Vector512 vOne = Vector512.Create(1F); + + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) + { + float amount0 = amountBase; + float amount1 = Unsafe.Add(ref amountBase, 1); + float amount2 = Unsafe.Add(ref amountBase, 2); + float amount3 = Unsafe.Add(ref amountBase, 3); + + // We need to create a Vector512 containing the current four amount values + // taking up each quarter of the Vector512 and then clamp them. + Vector512 opacity = Vector512.Create( + amount0, amount0, amount0, amount0, + amount1, amount1, amount1, amount1, + amount2, amount2, amount2, amount2, + amount3, amount3, amount3, amount3); + opacity = Vector512.Min(Vector512.Max(Vector512.Zero, opacity), vOne); + + destinationBase = PorterDuffFunctions.AddSrcOut(backgroundBase, sourceBase, opacity); + destinationBase = ref Unsafe.Add(ref destinationBase, 1); + backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); + sourceBase = ref Unsafe.Add(ref sourceBase, 1); + amountBase = ref Unsafe.Add(ref amountBase, 4); + } + + int remainder = Numerics.Modulo4(destination.Length); + if (remainder != 0) + { + for (int i = destination.Length - remainder; i < destination.Length; i++) + { + destination[i] = PorterDuffFunctions.AddSrcOut(background[i], source[i], Numerics.Clamp(amount[i], 0, 1F)); + } + } + } + else if (Avx2.IsSupported && destination.Length >= 2) { // Divide by 2 as 4 elements per Vector4 and 8 per Vector256 ref Vector256 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); @@ -7274,7 +12999,54 @@ internal static class DefaultPixelBlenders /// protected override void BlendFunction(Span destination, ReadOnlySpan background, Vector4 source, ReadOnlySpan amount) { - if (Avx2.IsSupported && destination.Length >= 2) + if (Avx512F.IsSupported && destination.Length >= 4) + { + // Divide by 4 as 4 elements per Vector4 and 16 per Vector512 + ref Vector512 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); + ref Vector512 destinationLast = ref Unsafe.Add(ref destinationBase, (uint)destination.Length / 4u); + + ref Vector512 backgroundBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(background)); + ref float amountBase = ref MemoryMarshal.GetReference(amount); + + Vector512 sourceBase = Vector512.Create( + source.X, source.Y, source.Z, source.W, + source.X, source.Y, source.Z, source.W, + source.X, source.Y, source.Z, source.W, + source.X, source.Y, source.Z, source.W); + Vector512 vOne = Vector512.Create(1F); + + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) + { + float amount0 = amountBase; + float amount1 = Unsafe.Add(ref amountBase, 1); + float amount2 = Unsafe.Add(ref amountBase, 2); + float amount3 = Unsafe.Add(ref amountBase, 3); + + // We need to create a Vector512 containing the current four amount values + // taking up each quarter of the Vector512 and then clamp them. + Vector512 opacity = Vector512.Create( + amount0, amount0, amount0, amount0, + amount1, amount1, amount1, amount1, + amount2, amount2, amount2, amount2, + amount3, amount3, amount3, amount3); + opacity = Vector512.Min(Vector512.Max(Vector512.Zero, opacity), vOne); + + destinationBase = PorterDuffFunctions.AddSrcOut(backgroundBase, sourceBase, opacity); + destinationBase = ref Unsafe.Add(ref destinationBase, 1); + backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); + amountBase = ref Unsafe.Add(ref amountBase, 4); + } + + int remainder = Numerics.Modulo4(destination.Length); + if (remainder != 0) + { + for (int i = destination.Length - remainder; i < destination.Length; i++) + { + destination[i] = PorterDuffFunctions.AddSrcOut(background[i], source, Numerics.Clamp(amount[i], 0, 1F)); + } + } + } + else if (Avx2.IsSupported && destination.Length >= 2) { // Divide by 2 as 4 elements per Vector4 and 8 per Vector256 ref Vector256 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); @@ -7339,7 +13111,34 @@ internal static class DefaultPixelBlenders { amount = Numerics.Clamp(amount, 0, 1); - if (Avx2.IsSupported && destination.Length >= 2) + if (Avx512F.IsSupported && destination.Length >= 4) + { + // Divide by 4 as 4 elements per Vector4 and 16 per Vector512 + ref Vector512 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); + ref Vector512 destinationLast = ref Unsafe.Add(ref destinationBase, (uint)destination.Length / 4u); + + ref Vector512 backgroundBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(background)); + ref Vector512 sourceBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(source)); + Vector512 opacity = Vector512.Create(amount); + + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) + { + destinationBase = PorterDuffFunctions.SubtractSrcOut(backgroundBase, sourceBase, opacity); + destinationBase = ref Unsafe.Add(ref destinationBase, 1); + backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); + sourceBase = ref Unsafe.Add(ref sourceBase, 1); + } + + int remainder = Numerics.Modulo4(destination.Length); + if (remainder != 0) + { + for (int i = destination.Length - remainder; i < destination.Length; i++) + { + destination[i] = PorterDuffFunctions.SubtractSrcOut(background[i], source[i], amount); + } + } + } + else if (Avx2.IsSupported && destination.Length >= 2) { // Divide by 2 as 4 elements per Vector4 and 8 per Vector256 ref Vector256 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); @@ -7378,7 +13177,37 @@ internal static class DefaultPixelBlenders { amount = Numerics.Clamp(amount, 0, 1); - if (Avx2.IsSupported && destination.Length >= 2) + if (Avx512F.IsSupported && destination.Length >= 4) + { + // Divide by 4 as 4 elements per Vector4 and 16 per Vector512 + ref Vector512 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); + ref Vector512 destinationLast = ref Unsafe.Add(ref destinationBase, (uint)destination.Length / 4u); + + ref Vector512 backgroundBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(background)); + Vector512 sourceBase = Vector512.Create( + source.X, source.Y, source.Z, source.W, + source.X, source.Y, source.Z, source.W, + source.X, source.Y, source.Z, source.W, + source.X, source.Y, source.Z, source.W); + Vector512 opacity = Vector512.Create(amount); + + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) + { + destinationBase = PorterDuffFunctions.SubtractSrcOut(backgroundBase, sourceBase, opacity); + destinationBase = ref Unsafe.Add(ref destinationBase, 1); + backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); + } + + int remainder = Numerics.Modulo4(destination.Length); + if (remainder != 0) + { + for (int i = destination.Length - remainder; i < destination.Length; i++) + { + destination[i] = PorterDuffFunctions.SubtractSrcOut(background[i], source, amount); + } + } + } + else if (Avx2.IsSupported && destination.Length >= 2) { // Divide by 2 as 4 elements per Vector4 and 8 per Vector256 ref Vector256 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); @@ -7414,7 +13243,51 @@ internal static class DefaultPixelBlenders /// protected override void BlendFunction(Span destination, ReadOnlySpan background, ReadOnlySpan source, ReadOnlySpan amount) { - if (Avx2.IsSupported && destination.Length >= 2) + if (Avx512F.IsSupported && destination.Length >= 4) + { + // Divide by 4 as 4 elements per Vector4 and 16 per Vector512 + ref Vector512 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); + ref Vector512 destinationLast = ref Unsafe.Add(ref destinationBase, (uint)destination.Length / 4u); + + ref Vector512 backgroundBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(background)); + ref Vector512 sourceBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(source)); + ref float amountBase = ref MemoryMarshal.GetReference(amount); + + Vector512 vOne = Vector512.Create(1F); + + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) + { + float amount0 = amountBase; + float amount1 = Unsafe.Add(ref amountBase, 1); + float amount2 = Unsafe.Add(ref amountBase, 2); + float amount3 = Unsafe.Add(ref amountBase, 3); + + // We need to create a Vector512 containing the current four amount values + // taking up each quarter of the Vector512 and then clamp them. + Vector512 opacity = Vector512.Create( + amount0, amount0, amount0, amount0, + amount1, amount1, amount1, amount1, + amount2, amount2, amount2, amount2, + amount3, amount3, amount3, amount3); + opacity = Vector512.Min(Vector512.Max(Vector512.Zero, opacity), vOne); + + destinationBase = PorterDuffFunctions.SubtractSrcOut(backgroundBase, sourceBase, opacity); + destinationBase = ref Unsafe.Add(ref destinationBase, 1); + backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); + sourceBase = ref Unsafe.Add(ref sourceBase, 1); + amountBase = ref Unsafe.Add(ref amountBase, 4); + } + + int remainder = Numerics.Modulo4(destination.Length); + if (remainder != 0) + { + for (int i = destination.Length - remainder; i < destination.Length; i++) + { + destination[i] = PorterDuffFunctions.SubtractSrcOut(background[i], source[i], Numerics.Clamp(amount[i], 0, 1F)); + } + } + } + else if (Avx2.IsSupported && destination.Length >= 2) { // Divide by 2 as 4 elements per Vector4 and 8 per Vector256 ref Vector256 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); @@ -7453,15 +13326,62 @@ internal static class DefaultPixelBlenders { for (int i = 0; i < destination.Length; i++) { - destination[i] = PorterDuffFunctions.SubtractSrcOut(background[i], source[i], Numerics.Clamp(amount[i], 0, 1F)); + destination[i] = PorterDuffFunctions.SubtractSrcOut(background[i], source[i], Numerics.Clamp(amount[i], 0, 1F)); + } + } + } + + /// + protected override void BlendFunction(Span destination, ReadOnlySpan background, Vector4 source, ReadOnlySpan amount) + { + if (Avx512F.IsSupported && destination.Length >= 4) + { + // Divide by 4 as 4 elements per Vector4 and 16 per Vector512 + ref Vector512 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); + ref Vector512 destinationLast = ref Unsafe.Add(ref destinationBase, (uint)destination.Length / 4u); + + ref Vector512 backgroundBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(background)); + ref float amountBase = ref MemoryMarshal.GetReference(amount); + + Vector512 sourceBase = Vector512.Create( + source.X, source.Y, source.Z, source.W, + source.X, source.Y, source.Z, source.W, + source.X, source.Y, source.Z, source.W, + source.X, source.Y, source.Z, source.W); + Vector512 vOne = Vector512.Create(1F); + + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) + { + float amount0 = amountBase; + float amount1 = Unsafe.Add(ref amountBase, 1); + float amount2 = Unsafe.Add(ref amountBase, 2); + float amount3 = Unsafe.Add(ref amountBase, 3); + + // We need to create a Vector512 containing the current four amount values + // taking up each quarter of the Vector512 and then clamp them. + Vector512 opacity = Vector512.Create( + amount0, amount0, amount0, amount0, + amount1, amount1, amount1, amount1, + amount2, amount2, amount2, amount2, + amount3, amount3, amount3, amount3); + opacity = Vector512.Min(Vector512.Max(Vector512.Zero, opacity), vOne); + + destinationBase = PorterDuffFunctions.SubtractSrcOut(backgroundBase, sourceBase, opacity); + destinationBase = ref Unsafe.Add(ref destinationBase, 1); + backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); + amountBase = ref Unsafe.Add(ref amountBase, 4); + } + + int remainder = Numerics.Modulo4(destination.Length); + if (remainder != 0) + { + for (int i = destination.Length - remainder; i < destination.Length; i++) + { + destination[i] = PorterDuffFunctions.SubtractSrcOut(background[i], source, Numerics.Clamp(amount[i], 0, 1F)); + } } } - } - - /// - protected override void BlendFunction(Span destination, ReadOnlySpan background, Vector4 source, ReadOnlySpan amount) - { - if (Avx2.IsSupported && destination.Length >= 2) + else if (Avx2.IsSupported && destination.Length >= 2) { // Divide by 2 as 4 elements per Vector4 and 8 per Vector256 ref Vector256 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); @@ -7526,7 +13446,34 @@ internal static class DefaultPixelBlenders { amount = Numerics.Clamp(amount, 0, 1); - if (Avx2.IsSupported && destination.Length >= 2) + if (Avx512F.IsSupported && destination.Length >= 4) + { + // Divide by 4 as 4 elements per Vector4 and 16 per Vector512 + ref Vector512 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); + ref Vector512 destinationLast = ref Unsafe.Add(ref destinationBase, (uint)destination.Length / 4u); + + ref Vector512 backgroundBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(background)); + ref Vector512 sourceBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(source)); + Vector512 opacity = Vector512.Create(amount); + + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) + { + destinationBase = PorterDuffFunctions.ScreenSrcOut(backgroundBase, sourceBase, opacity); + destinationBase = ref Unsafe.Add(ref destinationBase, 1); + backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); + sourceBase = ref Unsafe.Add(ref sourceBase, 1); + } + + int remainder = Numerics.Modulo4(destination.Length); + if (remainder != 0) + { + for (int i = destination.Length - remainder; i < destination.Length; i++) + { + destination[i] = PorterDuffFunctions.ScreenSrcOut(background[i], source[i], amount); + } + } + } + else if (Avx2.IsSupported && destination.Length >= 2) { // Divide by 2 as 4 elements per Vector4 and 8 per Vector256 ref Vector256 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); @@ -7565,7 +13512,37 @@ internal static class DefaultPixelBlenders { amount = Numerics.Clamp(amount, 0, 1); - if (Avx2.IsSupported && destination.Length >= 2) + if (Avx512F.IsSupported && destination.Length >= 4) + { + // Divide by 4 as 4 elements per Vector4 and 16 per Vector512 + ref Vector512 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); + ref Vector512 destinationLast = ref Unsafe.Add(ref destinationBase, (uint)destination.Length / 4u); + + ref Vector512 backgroundBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(background)); + Vector512 sourceBase = Vector512.Create( + source.X, source.Y, source.Z, source.W, + source.X, source.Y, source.Z, source.W, + source.X, source.Y, source.Z, source.W, + source.X, source.Y, source.Z, source.W); + Vector512 opacity = Vector512.Create(amount); + + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) + { + destinationBase = PorterDuffFunctions.ScreenSrcOut(backgroundBase, sourceBase, opacity); + destinationBase = ref Unsafe.Add(ref destinationBase, 1); + backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); + } + + int remainder = Numerics.Modulo4(destination.Length); + if (remainder != 0) + { + for (int i = destination.Length - remainder; i < destination.Length; i++) + { + destination[i] = PorterDuffFunctions.ScreenSrcOut(background[i], source, amount); + } + } + } + else if (Avx2.IsSupported && destination.Length >= 2) { // Divide by 2 as 4 elements per Vector4 and 8 per Vector256 ref Vector256 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); @@ -7601,7 +13578,51 @@ internal static class DefaultPixelBlenders /// protected override void BlendFunction(Span destination, ReadOnlySpan background, ReadOnlySpan source, ReadOnlySpan amount) { - if (Avx2.IsSupported && destination.Length >= 2) + if (Avx512F.IsSupported && destination.Length >= 4) + { + // Divide by 4 as 4 elements per Vector4 and 16 per Vector512 + ref Vector512 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); + ref Vector512 destinationLast = ref Unsafe.Add(ref destinationBase, (uint)destination.Length / 4u); + + ref Vector512 backgroundBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(background)); + ref Vector512 sourceBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(source)); + ref float amountBase = ref MemoryMarshal.GetReference(amount); + + Vector512 vOne = Vector512.Create(1F); + + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) + { + float amount0 = amountBase; + float amount1 = Unsafe.Add(ref amountBase, 1); + float amount2 = Unsafe.Add(ref amountBase, 2); + float amount3 = Unsafe.Add(ref amountBase, 3); + + // We need to create a Vector512 containing the current four amount values + // taking up each quarter of the Vector512 and then clamp them. + Vector512 opacity = Vector512.Create( + amount0, amount0, amount0, amount0, + amount1, amount1, amount1, amount1, + amount2, amount2, amount2, amount2, + amount3, amount3, amount3, amount3); + opacity = Vector512.Min(Vector512.Max(Vector512.Zero, opacity), vOne); + + destinationBase = PorterDuffFunctions.ScreenSrcOut(backgroundBase, sourceBase, opacity); + destinationBase = ref Unsafe.Add(ref destinationBase, 1); + backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); + sourceBase = ref Unsafe.Add(ref sourceBase, 1); + amountBase = ref Unsafe.Add(ref amountBase, 4); + } + + int remainder = Numerics.Modulo4(destination.Length); + if (remainder != 0) + { + for (int i = destination.Length - remainder; i < destination.Length; i++) + { + destination[i] = PorterDuffFunctions.ScreenSrcOut(background[i], source[i], Numerics.Clamp(amount[i], 0, 1F)); + } + } + } + else if (Avx2.IsSupported && destination.Length >= 2) { // Divide by 2 as 4 elements per Vector4 and 8 per Vector256 ref Vector256 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); @@ -7648,7 +13669,54 @@ internal static class DefaultPixelBlenders /// protected override void BlendFunction(Span destination, ReadOnlySpan background, Vector4 source, ReadOnlySpan amount) { - if (Avx2.IsSupported && destination.Length >= 2) + if (Avx512F.IsSupported && destination.Length >= 4) + { + // Divide by 4 as 4 elements per Vector4 and 16 per Vector512 + ref Vector512 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); + ref Vector512 destinationLast = ref Unsafe.Add(ref destinationBase, (uint)destination.Length / 4u); + + ref Vector512 backgroundBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(background)); + ref float amountBase = ref MemoryMarshal.GetReference(amount); + + Vector512 sourceBase = Vector512.Create( + source.X, source.Y, source.Z, source.W, + source.X, source.Y, source.Z, source.W, + source.X, source.Y, source.Z, source.W, + source.X, source.Y, source.Z, source.W); + Vector512 vOne = Vector512.Create(1F); + + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) + { + float amount0 = amountBase; + float amount1 = Unsafe.Add(ref amountBase, 1); + float amount2 = Unsafe.Add(ref amountBase, 2); + float amount3 = Unsafe.Add(ref amountBase, 3); + + // We need to create a Vector512 containing the current four amount values + // taking up each quarter of the Vector512 and then clamp them. + Vector512 opacity = Vector512.Create( + amount0, amount0, amount0, amount0, + amount1, amount1, amount1, amount1, + amount2, amount2, amount2, amount2, + amount3, amount3, amount3, amount3); + opacity = Vector512.Min(Vector512.Max(Vector512.Zero, opacity), vOne); + + destinationBase = PorterDuffFunctions.ScreenSrcOut(backgroundBase, sourceBase, opacity); + destinationBase = ref Unsafe.Add(ref destinationBase, 1); + backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); + amountBase = ref Unsafe.Add(ref amountBase, 4); + } + + int remainder = Numerics.Modulo4(destination.Length); + if (remainder != 0) + { + for (int i = destination.Length - remainder; i < destination.Length; i++) + { + destination[i] = PorterDuffFunctions.ScreenSrcOut(background[i], source, Numerics.Clamp(amount[i], 0, 1F)); + } + } + } + else if (Avx2.IsSupported && destination.Length >= 2) { // Divide by 2 as 4 elements per Vector4 and 8 per Vector256 ref Vector256 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); @@ -7713,7 +13781,34 @@ internal static class DefaultPixelBlenders { amount = Numerics.Clamp(amount, 0, 1); - if (Avx2.IsSupported && destination.Length >= 2) + if (Avx512F.IsSupported && destination.Length >= 4) + { + // Divide by 4 as 4 elements per Vector4 and 16 per Vector512 + ref Vector512 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); + ref Vector512 destinationLast = ref Unsafe.Add(ref destinationBase, (uint)destination.Length / 4u); + + ref Vector512 backgroundBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(background)); + ref Vector512 sourceBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(source)); + Vector512 opacity = Vector512.Create(amount); + + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) + { + destinationBase = PorterDuffFunctions.DarkenSrcOut(backgroundBase, sourceBase, opacity); + destinationBase = ref Unsafe.Add(ref destinationBase, 1); + backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); + sourceBase = ref Unsafe.Add(ref sourceBase, 1); + } + + int remainder = Numerics.Modulo4(destination.Length); + if (remainder != 0) + { + for (int i = destination.Length - remainder; i < destination.Length; i++) + { + destination[i] = PorterDuffFunctions.DarkenSrcOut(background[i], source[i], amount); + } + } + } + else if (Avx2.IsSupported && destination.Length >= 2) { // Divide by 2 as 4 elements per Vector4 and 8 per Vector256 ref Vector256 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); @@ -7752,7 +13847,37 @@ internal static class DefaultPixelBlenders { amount = Numerics.Clamp(amount, 0, 1); - if (Avx2.IsSupported && destination.Length >= 2) + if (Avx512F.IsSupported && destination.Length >= 4) + { + // Divide by 4 as 4 elements per Vector4 and 16 per Vector512 + ref Vector512 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); + ref Vector512 destinationLast = ref Unsafe.Add(ref destinationBase, (uint)destination.Length / 4u); + + ref Vector512 backgroundBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(background)); + Vector512 sourceBase = Vector512.Create( + source.X, source.Y, source.Z, source.W, + source.X, source.Y, source.Z, source.W, + source.X, source.Y, source.Z, source.W, + source.X, source.Y, source.Z, source.W); + Vector512 opacity = Vector512.Create(amount); + + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) + { + destinationBase = PorterDuffFunctions.DarkenSrcOut(backgroundBase, sourceBase, opacity); + destinationBase = ref Unsafe.Add(ref destinationBase, 1); + backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); + } + + int remainder = Numerics.Modulo4(destination.Length); + if (remainder != 0) + { + for (int i = destination.Length - remainder; i < destination.Length; i++) + { + destination[i] = PorterDuffFunctions.DarkenSrcOut(background[i], source, amount); + } + } + } + else if (Avx2.IsSupported && destination.Length >= 2) { // Divide by 2 as 4 elements per Vector4 and 8 per Vector256 ref Vector256 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); @@ -7788,7 +13913,51 @@ internal static class DefaultPixelBlenders /// protected override void BlendFunction(Span destination, ReadOnlySpan background, ReadOnlySpan source, ReadOnlySpan amount) { - if (Avx2.IsSupported && destination.Length >= 2) + if (Avx512F.IsSupported && destination.Length >= 4) + { + // Divide by 4 as 4 elements per Vector4 and 16 per Vector512 + ref Vector512 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); + ref Vector512 destinationLast = ref Unsafe.Add(ref destinationBase, (uint)destination.Length / 4u); + + ref Vector512 backgroundBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(background)); + ref Vector512 sourceBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(source)); + ref float amountBase = ref MemoryMarshal.GetReference(amount); + + Vector512 vOne = Vector512.Create(1F); + + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) + { + float amount0 = amountBase; + float amount1 = Unsafe.Add(ref amountBase, 1); + float amount2 = Unsafe.Add(ref amountBase, 2); + float amount3 = Unsafe.Add(ref amountBase, 3); + + // We need to create a Vector512 containing the current four amount values + // taking up each quarter of the Vector512 and then clamp them. + Vector512 opacity = Vector512.Create( + amount0, amount0, amount0, amount0, + amount1, amount1, amount1, amount1, + amount2, amount2, amount2, amount2, + amount3, amount3, amount3, amount3); + opacity = Vector512.Min(Vector512.Max(Vector512.Zero, opacity), vOne); + + destinationBase = PorterDuffFunctions.DarkenSrcOut(backgroundBase, sourceBase, opacity); + destinationBase = ref Unsafe.Add(ref destinationBase, 1); + backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); + sourceBase = ref Unsafe.Add(ref sourceBase, 1); + amountBase = ref Unsafe.Add(ref amountBase, 4); + } + + int remainder = Numerics.Modulo4(destination.Length); + if (remainder != 0) + { + for (int i = destination.Length - remainder; i < destination.Length; i++) + { + destination[i] = PorterDuffFunctions.DarkenSrcOut(background[i], source[i], Numerics.Clamp(amount[i], 0, 1F)); + } + } + } + else if (Avx2.IsSupported && destination.Length >= 2) { // Divide by 2 as 4 elements per Vector4 and 8 per Vector256 ref Vector256 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); @@ -7835,7 +14004,54 @@ internal static class DefaultPixelBlenders /// protected override void BlendFunction(Span destination, ReadOnlySpan background, Vector4 source, ReadOnlySpan amount) { - if (Avx2.IsSupported && destination.Length >= 2) + if (Avx512F.IsSupported && destination.Length >= 4) + { + // Divide by 4 as 4 elements per Vector4 and 16 per Vector512 + ref Vector512 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); + ref Vector512 destinationLast = ref Unsafe.Add(ref destinationBase, (uint)destination.Length / 4u); + + ref Vector512 backgroundBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(background)); + ref float amountBase = ref MemoryMarshal.GetReference(amount); + + Vector512 sourceBase = Vector512.Create( + source.X, source.Y, source.Z, source.W, + source.X, source.Y, source.Z, source.W, + source.X, source.Y, source.Z, source.W, + source.X, source.Y, source.Z, source.W); + Vector512 vOne = Vector512.Create(1F); + + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) + { + float amount0 = amountBase; + float amount1 = Unsafe.Add(ref amountBase, 1); + float amount2 = Unsafe.Add(ref amountBase, 2); + float amount3 = Unsafe.Add(ref amountBase, 3); + + // We need to create a Vector512 containing the current four amount values + // taking up each quarter of the Vector512 and then clamp them. + Vector512 opacity = Vector512.Create( + amount0, amount0, amount0, amount0, + amount1, amount1, amount1, amount1, + amount2, amount2, amount2, amount2, + amount3, amount3, amount3, amount3); + opacity = Vector512.Min(Vector512.Max(Vector512.Zero, opacity), vOne); + + destinationBase = PorterDuffFunctions.DarkenSrcOut(backgroundBase, sourceBase, opacity); + destinationBase = ref Unsafe.Add(ref destinationBase, 1); + backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); + amountBase = ref Unsafe.Add(ref amountBase, 4); + } + + int remainder = Numerics.Modulo4(destination.Length); + if (remainder != 0) + { + for (int i = destination.Length - remainder; i < destination.Length; i++) + { + destination[i] = PorterDuffFunctions.DarkenSrcOut(background[i], source, Numerics.Clamp(amount[i], 0, 1F)); + } + } + } + else if (Avx2.IsSupported && destination.Length >= 2) { // Divide by 2 as 4 elements per Vector4 and 8 per Vector256 ref Vector256 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); @@ -7900,7 +14116,34 @@ internal static class DefaultPixelBlenders { amount = Numerics.Clamp(amount, 0, 1); - if (Avx2.IsSupported && destination.Length >= 2) + if (Avx512F.IsSupported && destination.Length >= 4) + { + // Divide by 4 as 4 elements per Vector4 and 16 per Vector512 + ref Vector512 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); + ref Vector512 destinationLast = ref Unsafe.Add(ref destinationBase, (uint)destination.Length / 4u); + + ref Vector512 backgroundBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(background)); + ref Vector512 sourceBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(source)); + Vector512 opacity = Vector512.Create(amount); + + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) + { + destinationBase = PorterDuffFunctions.LightenSrcOut(backgroundBase, sourceBase, opacity); + destinationBase = ref Unsafe.Add(ref destinationBase, 1); + backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); + sourceBase = ref Unsafe.Add(ref sourceBase, 1); + } + + int remainder = Numerics.Modulo4(destination.Length); + if (remainder != 0) + { + for (int i = destination.Length - remainder; i < destination.Length; i++) + { + destination[i] = PorterDuffFunctions.LightenSrcOut(background[i], source[i], amount); + } + } + } + else if (Avx2.IsSupported && destination.Length >= 2) { // Divide by 2 as 4 elements per Vector4 and 8 per Vector256 ref Vector256 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); @@ -7939,7 +14182,37 @@ internal static class DefaultPixelBlenders { amount = Numerics.Clamp(amount, 0, 1); - if (Avx2.IsSupported && destination.Length >= 2) + if (Avx512F.IsSupported && destination.Length >= 4) + { + // Divide by 4 as 4 elements per Vector4 and 16 per Vector512 + ref Vector512 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); + ref Vector512 destinationLast = ref Unsafe.Add(ref destinationBase, (uint)destination.Length / 4u); + + ref Vector512 backgroundBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(background)); + Vector512 sourceBase = Vector512.Create( + source.X, source.Y, source.Z, source.W, + source.X, source.Y, source.Z, source.W, + source.X, source.Y, source.Z, source.W, + source.X, source.Y, source.Z, source.W); + Vector512 opacity = Vector512.Create(amount); + + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) + { + destinationBase = PorterDuffFunctions.LightenSrcOut(backgroundBase, sourceBase, opacity); + destinationBase = ref Unsafe.Add(ref destinationBase, 1); + backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); + } + + int remainder = Numerics.Modulo4(destination.Length); + if (remainder != 0) + { + for (int i = destination.Length - remainder; i < destination.Length; i++) + { + destination[i] = PorterDuffFunctions.LightenSrcOut(background[i], source, amount); + } + } + } + else if (Avx2.IsSupported && destination.Length >= 2) { // Divide by 2 as 4 elements per Vector4 and 8 per Vector256 ref Vector256 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); @@ -7975,7 +14248,51 @@ internal static class DefaultPixelBlenders /// protected override void BlendFunction(Span destination, ReadOnlySpan background, ReadOnlySpan source, ReadOnlySpan amount) { - if (Avx2.IsSupported && destination.Length >= 2) + if (Avx512F.IsSupported && destination.Length >= 4) + { + // Divide by 4 as 4 elements per Vector4 and 16 per Vector512 + ref Vector512 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); + ref Vector512 destinationLast = ref Unsafe.Add(ref destinationBase, (uint)destination.Length / 4u); + + ref Vector512 backgroundBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(background)); + ref Vector512 sourceBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(source)); + ref float amountBase = ref MemoryMarshal.GetReference(amount); + + Vector512 vOne = Vector512.Create(1F); + + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) + { + float amount0 = amountBase; + float amount1 = Unsafe.Add(ref amountBase, 1); + float amount2 = Unsafe.Add(ref amountBase, 2); + float amount3 = Unsafe.Add(ref amountBase, 3); + + // We need to create a Vector512 containing the current four amount values + // taking up each quarter of the Vector512 and then clamp them. + Vector512 opacity = Vector512.Create( + amount0, amount0, amount0, amount0, + amount1, amount1, amount1, amount1, + amount2, amount2, amount2, amount2, + amount3, amount3, amount3, amount3); + opacity = Vector512.Min(Vector512.Max(Vector512.Zero, opacity), vOne); + + destinationBase = PorterDuffFunctions.LightenSrcOut(backgroundBase, sourceBase, opacity); + destinationBase = ref Unsafe.Add(ref destinationBase, 1); + backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); + sourceBase = ref Unsafe.Add(ref sourceBase, 1); + amountBase = ref Unsafe.Add(ref amountBase, 4); + } + + int remainder = Numerics.Modulo4(destination.Length); + if (remainder != 0) + { + for (int i = destination.Length - remainder; i < destination.Length; i++) + { + destination[i] = PorterDuffFunctions.LightenSrcOut(background[i], source[i], Numerics.Clamp(amount[i], 0, 1F)); + } + } + } + else if (Avx2.IsSupported && destination.Length >= 2) { // Divide by 2 as 4 elements per Vector4 and 8 per Vector256 ref Vector256 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); @@ -8022,7 +14339,54 @@ internal static class DefaultPixelBlenders /// protected override void BlendFunction(Span destination, ReadOnlySpan background, Vector4 source, ReadOnlySpan amount) { - if (Avx2.IsSupported && destination.Length >= 2) + if (Avx512F.IsSupported && destination.Length >= 4) + { + // Divide by 4 as 4 elements per Vector4 and 16 per Vector512 + ref Vector512 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); + ref Vector512 destinationLast = ref Unsafe.Add(ref destinationBase, (uint)destination.Length / 4u); + + ref Vector512 backgroundBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(background)); + ref float amountBase = ref MemoryMarshal.GetReference(amount); + + Vector512 sourceBase = Vector512.Create( + source.X, source.Y, source.Z, source.W, + source.X, source.Y, source.Z, source.W, + source.X, source.Y, source.Z, source.W, + source.X, source.Y, source.Z, source.W); + Vector512 vOne = Vector512.Create(1F); + + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) + { + float amount0 = amountBase; + float amount1 = Unsafe.Add(ref amountBase, 1); + float amount2 = Unsafe.Add(ref amountBase, 2); + float amount3 = Unsafe.Add(ref amountBase, 3); + + // We need to create a Vector512 containing the current four amount values + // taking up each quarter of the Vector512 and then clamp them. + Vector512 opacity = Vector512.Create( + amount0, amount0, amount0, amount0, + amount1, amount1, amount1, amount1, + amount2, amount2, amount2, amount2, + amount3, amount3, amount3, amount3); + opacity = Vector512.Min(Vector512.Max(Vector512.Zero, opacity), vOne); + + destinationBase = PorterDuffFunctions.LightenSrcOut(backgroundBase, sourceBase, opacity); + destinationBase = ref Unsafe.Add(ref destinationBase, 1); + backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); + amountBase = ref Unsafe.Add(ref amountBase, 4); + } + + int remainder = Numerics.Modulo4(destination.Length); + if (remainder != 0) + { + for (int i = destination.Length - remainder; i < destination.Length; i++) + { + destination[i] = PorterDuffFunctions.LightenSrcOut(background[i], source, Numerics.Clamp(amount[i], 0, 1F)); + } + } + } + else if (Avx2.IsSupported && destination.Length >= 2) { // Divide by 2 as 4 elements per Vector4 and 8 per Vector256 ref Vector256 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); @@ -8087,7 +14451,34 @@ internal static class DefaultPixelBlenders { amount = Numerics.Clamp(amount, 0, 1); - if (Avx2.IsSupported && destination.Length >= 2) + if (Avx512F.IsSupported && destination.Length >= 4) + { + // Divide by 4 as 4 elements per Vector4 and 16 per Vector512 + ref Vector512 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); + ref Vector512 destinationLast = ref Unsafe.Add(ref destinationBase, (uint)destination.Length / 4u); + + ref Vector512 backgroundBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(background)); + ref Vector512 sourceBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(source)); + Vector512 opacity = Vector512.Create(amount); + + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) + { + destinationBase = PorterDuffFunctions.OverlaySrcOut(backgroundBase, sourceBase, opacity); + destinationBase = ref Unsafe.Add(ref destinationBase, 1); + backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); + sourceBase = ref Unsafe.Add(ref sourceBase, 1); + } + + int remainder = Numerics.Modulo4(destination.Length); + if (remainder != 0) + { + for (int i = destination.Length - remainder; i < destination.Length; i++) + { + destination[i] = PorterDuffFunctions.OverlaySrcOut(background[i], source[i], amount); + } + } + } + else if (Avx2.IsSupported && destination.Length >= 2) { // Divide by 2 as 4 elements per Vector4 and 8 per Vector256 ref Vector256 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); @@ -8126,7 +14517,37 @@ internal static class DefaultPixelBlenders { amount = Numerics.Clamp(amount, 0, 1); - if (Avx2.IsSupported && destination.Length >= 2) + if (Avx512F.IsSupported && destination.Length >= 4) + { + // Divide by 4 as 4 elements per Vector4 and 16 per Vector512 + ref Vector512 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); + ref Vector512 destinationLast = ref Unsafe.Add(ref destinationBase, (uint)destination.Length / 4u); + + ref Vector512 backgroundBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(background)); + Vector512 sourceBase = Vector512.Create( + source.X, source.Y, source.Z, source.W, + source.X, source.Y, source.Z, source.W, + source.X, source.Y, source.Z, source.W, + source.X, source.Y, source.Z, source.W); + Vector512 opacity = Vector512.Create(amount); + + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) + { + destinationBase = PorterDuffFunctions.OverlaySrcOut(backgroundBase, sourceBase, opacity); + destinationBase = ref Unsafe.Add(ref destinationBase, 1); + backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); + } + + int remainder = Numerics.Modulo4(destination.Length); + if (remainder != 0) + { + for (int i = destination.Length - remainder; i < destination.Length; i++) + { + destination[i] = PorterDuffFunctions.OverlaySrcOut(background[i], source, amount); + } + } + } + else if (Avx2.IsSupported && destination.Length >= 2) { // Divide by 2 as 4 elements per Vector4 and 8 per Vector256 ref Vector256 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); @@ -8162,7 +14583,51 @@ internal static class DefaultPixelBlenders /// protected override void BlendFunction(Span destination, ReadOnlySpan background, ReadOnlySpan source, ReadOnlySpan amount) { - if (Avx2.IsSupported && destination.Length >= 2) + if (Avx512F.IsSupported && destination.Length >= 4) + { + // Divide by 4 as 4 elements per Vector4 and 16 per Vector512 + ref Vector512 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); + ref Vector512 destinationLast = ref Unsafe.Add(ref destinationBase, (uint)destination.Length / 4u); + + ref Vector512 backgroundBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(background)); + ref Vector512 sourceBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(source)); + ref float amountBase = ref MemoryMarshal.GetReference(amount); + + Vector512 vOne = Vector512.Create(1F); + + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) + { + float amount0 = amountBase; + float amount1 = Unsafe.Add(ref amountBase, 1); + float amount2 = Unsafe.Add(ref amountBase, 2); + float amount3 = Unsafe.Add(ref amountBase, 3); + + // We need to create a Vector512 containing the current four amount values + // taking up each quarter of the Vector512 and then clamp them. + Vector512 opacity = Vector512.Create( + amount0, amount0, amount0, amount0, + amount1, amount1, amount1, amount1, + amount2, amount2, amount2, amount2, + amount3, amount3, amount3, amount3); + opacity = Vector512.Min(Vector512.Max(Vector512.Zero, opacity), vOne); + + destinationBase = PorterDuffFunctions.OverlaySrcOut(backgroundBase, sourceBase, opacity); + destinationBase = ref Unsafe.Add(ref destinationBase, 1); + backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); + sourceBase = ref Unsafe.Add(ref sourceBase, 1); + amountBase = ref Unsafe.Add(ref amountBase, 4); + } + + int remainder = Numerics.Modulo4(destination.Length); + if (remainder != 0) + { + for (int i = destination.Length - remainder; i < destination.Length; i++) + { + destination[i] = PorterDuffFunctions.OverlaySrcOut(background[i], source[i], Numerics.Clamp(amount[i], 0, 1F)); + } + } + } + else if (Avx2.IsSupported && destination.Length >= 2) { // Divide by 2 as 4 elements per Vector4 and 8 per Vector256 ref Vector256 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); @@ -8209,7 +14674,54 @@ internal static class DefaultPixelBlenders /// protected override void BlendFunction(Span destination, ReadOnlySpan background, Vector4 source, ReadOnlySpan amount) { - if (Avx2.IsSupported && destination.Length >= 2) + if (Avx512F.IsSupported && destination.Length >= 4) + { + // Divide by 4 as 4 elements per Vector4 and 16 per Vector512 + ref Vector512 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); + ref Vector512 destinationLast = ref Unsafe.Add(ref destinationBase, (uint)destination.Length / 4u); + + ref Vector512 backgroundBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(background)); + ref float amountBase = ref MemoryMarshal.GetReference(amount); + + Vector512 sourceBase = Vector512.Create( + source.X, source.Y, source.Z, source.W, + source.X, source.Y, source.Z, source.W, + source.X, source.Y, source.Z, source.W, + source.X, source.Y, source.Z, source.W); + Vector512 vOne = Vector512.Create(1F); + + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) + { + float amount0 = amountBase; + float amount1 = Unsafe.Add(ref amountBase, 1); + float amount2 = Unsafe.Add(ref amountBase, 2); + float amount3 = Unsafe.Add(ref amountBase, 3); + + // We need to create a Vector512 containing the current four amount values + // taking up each quarter of the Vector512 and then clamp them. + Vector512 opacity = Vector512.Create( + amount0, amount0, amount0, amount0, + amount1, amount1, amount1, amount1, + amount2, amount2, amount2, amount2, + amount3, amount3, amount3, amount3); + opacity = Vector512.Min(Vector512.Max(Vector512.Zero, opacity), vOne); + + destinationBase = PorterDuffFunctions.OverlaySrcOut(backgroundBase, sourceBase, opacity); + destinationBase = ref Unsafe.Add(ref destinationBase, 1); + backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); + amountBase = ref Unsafe.Add(ref amountBase, 4); + } + + int remainder = Numerics.Modulo4(destination.Length); + if (remainder != 0) + { + for (int i = destination.Length - remainder; i < destination.Length; i++) + { + destination[i] = PorterDuffFunctions.OverlaySrcOut(background[i], source, Numerics.Clamp(amount[i], 0, 1F)); + } + } + } + else if (Avx2.IsSupported && destination.Length >= 2) { // Divide by 2 as 4 elements per Vector4 and 8 per Vector256 ref Vector256 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); @@ -8274,7 +14786,34 @@ internal static class DefaultPixelBlenders { amount = Numerics.Clamp(amount, 0, 1); - if (Avx2.IsSupported && destination.Length >= 2) + if (Avx512F.IsSupported && destination.Length >= 4) + { + // Divide by 4 as 4 elements per Vector4 and 16 per Vector512 + ref Vector512 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); + ref Vector512 destinationLast = ref Unsafe.Add(ref destinationBase, (uint)destination.Length / 4u); + + ref Vector512 backgroundBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(background)); + ref Vector512 sourceBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(source)); + Vector512 opacity = Vector512.Create(amount); + + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) + { + destinationBase = PorterDuffFunctions.HardLightSrcOut(backgroundBase, sourceBase, opacity); + destinationBase = ref Unsafe.Add(ref destinationBase, 1); + backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); + sourceBase = ref Unsafe.Add(ref sourceBase, 1); + } + + int remainder = Numerics.Modulo4(destination.Length); + if (remainder != 0) + { + for (int i = destination.Length - remainder; i < destination.Length; i++) + { + destination[i] = PorterDuffFunctions.HardLightSrcOut(background[i], source[i], amount); + } + } + } + else if (Avx2.IsSupported && destination.Length >= 2) { // Divide by 2 as 4 elements per Vector4 and 8 per Vector256 ref Vector256 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); @@ -8313,7 +14852,37 @@ internal static class DefaultPixelBlenders { amount = Numerics.Clamp(amount, 0, 1); - if (Avx2.IsSupported && destination.Length >= 2) + if (Avx512F.IsSupported && destination.Length >= 4) + { + // Divide by 4 as 4 elements per Vector4 and 16 per Vector512 + ref Vector512 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); + ref Vector512 destinationLast = ref Unsafe.Add(ref destinationBase, (uint)destination.Length / 4u); + + ref Vector512 backgroundBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(background)); + Vector512 sourceBase = Vector512.Create( + source.X, source.Y, source.Z, source.W, + source.X, source.Y, source.Z, source.W, + source.X, source.Y, source.Z, source.W, + source.X, source.Y, source.Z, source.W); + Vector512 opacity = Vector512.Create(amount); + + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) + { + destinationBase = PorterDuffFunctions.HardLightSrcOut(backgroundBase, sourceBase, opacity); + destinationBase = ref Unsafe.Add(ref destinationBase, 1); + backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); + } + + int remainder = Numerics.Modulo4(destination.Length); + if (remainder != 0) + { + for (int i = destination.Length - remainder; i < destination.Length; i++) + { + destination[i] = PorterDuffFunctions.HardLightSrcOut(background[i], source, amount); + } + } + } + else if (Avx2.IsSupported && destination.Length >= 2) { // Divide by 2 as 4 elements per Vector4 and 8 per Vector256 ref Vector256 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); @@ -8349,7 +14918,51 @@ internal static class DefaultPixelBlenders /// protected override void BlendFunction(Span destination, ReadOnlySpan background, ReadOnlySpan source, ReadOnlySpan amount) { - if (Avx2.IsSupported && destination.Length >= 2) + if (Avx512F.IsSupported && destination.Length >= 4) + { + // Divide by 4 as 4 elements per Vector4 and 16 per Vector512 + ref Vector512 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); + ref Vector512 destinationLast = ref Unsafe.Add(ref destinationBase, (uint)destination.Length / 4u); + + ref Vector512 backgroundBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(background)); + ref Vector512 sourceBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(source)); + ref float amountBase = ref MemoryMarshal.GetReference(amount); + + Vector512 vOne = Vector512.Create(1F); + + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) + { + float amount0 = amountBase; + float amount1 = Unsafe.Add(ref amountBase, 1); + float amount2 = Unsafe.Add(ref amountBase, 2); + float amount3 = Unsafe.Add(ref amountBase, 3); + + // We need to create a Vector512 containing the current four amount values + // taking up each quarter of the Vector512 and then clamp them. + Vector512 opacity = Vector512.Create( + amount0, amount0, amount0, amount0, + amount1, amount1, amount1, amount1, + amount2, amount2, amount2, amount2, + amount3, amount3, amount3, amount3); + opacity = Vector512.Min(Vector512.Max(Vector512.Zero, opacity), vOne); + + destinationBase = PorterDuffFunctions.HardLightSrcOut(backgroundBase, sourceBase, opacity); + destinationBase = ref Unsafe.Add(ref destinationBase, 1); + backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); + sourceBase = ref Unsafe.Add(ref sourceBase, 1); + amountBase = ref Unsafe.Add(ref amountBase, 4); + } + + int remainder = Numerics.Modulo4(destination.Length); + if (remainder != 0) + { + for (int i = destination.Length - remainder; i < destination.Length; i++) + { + destination[i] = PorterDuffFunctions.HardLightSrcOut(background[i], source[i], Numerics.Clamp(amount[i], 0, 1F)); + } + } + } + else if (Avx2.IsSupported && destination.Length >= 2) { // Divide by 2 as 4 elements per Vector4 and 8 per Vector256 ref Vector256 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); @@ -8396,7 +15009,54 @@ internal static class DefaultPixelBlenders /// protected override void BlendFunction(Span destination, ReadOnlySpan background, Vector4 source, ReadOnlySpan amount) { - if (Avx2.IsSupported && destination.Length >= 2) + if (Avx512F.IsSupported && destination.Length >= 4) + { + // Divide by 4 as 4 elements per Vector4 and 16 per Vector512 + ref Vector512 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); + ref Vector512 destinationLast = ref Unsafe.Add(ref destinationBase, (uint)destination.Length / 4u); + + ref Vector512 backgroundBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(background)); + ref float amountBase = ref MemoryMarshal.GetReference(amount); + + Vector512 sourceBase = Vector512.Create( + source.X, source.Y, source.Z, source.W, + source.X, source.Y, source.Z, source.W, + source.X, source.Y, source.Z, source.W, + source.X, source.Y, source.Z, source.W); + Vector512 vOne = Vector512.Create(1F); + + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) + { + float amount0 = amountBase; + float amount1 = Unsafe.Add(ref amountBase, 1); + float amount2 = Unsafe.Add(ref amountBase, 2); + float amount3 = Unsafe.Add(ref amountBase, 3); + + // We need to create a Vector512 containing the current four amount values + // taking up each quarter of the Vector512 and then clamp them. + Vector512 opacity = Vector512.Create( + amount0, amount0, amount0, amount0, + amount1, amount1, amount1, amount1, + amount2, amount2, amount2, amount2, + amount3, amount3, amount3, amount3); + opacity = Vector512.Min(Vector512.Max(Vector512.Zero, opacity), vOne); + + destinationBase = PorterDuffFunctions.HardLightSrcOut(backgroundBase, sourceBase, opacity); + destinationBase = ref Unsafe.Add(ref destinationBase, 1); + backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); + amountBase = ref Unsafe.Add(ref amountBase, 4); + } + + int remainder = Numerics.Modulo4(destination.Length); + if (remainder != 0) + { + for (int i = destination.Length - remainder; i < destination.Length; i++) + { + destination[i] = PorterDuffFunctions.HardLightSrcOut(background[i], source, Numerics.Clamp(amount[i], 0, 1F)); + } + } + } + else if (Avx2.IsSupported && destination.Length >= 2) { // Divide by 2 as 4 elements per Vector4 and 8 per Vector256 ref Vector256 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); @@ -8461,7 +15121,34 @@ internal static class DefaultPixelBlenders { amount = Numerics.Clamp(amount, 0, 1); - if (Avx2.IsSupported && destination.Length >= 2) + if (Avx512F.IsSupported && destination.Length >= 4) + { + // Divide by 4 as 4 elements per Vector4 and 16 per Vector512 + ref Vector512 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); + ref Vector512 destinationLast = ref Unsafe.Add(ref destinationBase, (uint)destination.Length / 4u); + + ref Vector512 backgroundBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(background)); + ref Vector512 sourceBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(source)); + Vector512 opacity = Vector512.Create(amount); + + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) + { + destinationBase = PorterDuffFunctions.NormalDest(backgroundBase, sourceBase, opacity); + destinationBase = ref Unsafe.Add(ref destinationBase, 1); + backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); + sourceBase = ref Unsafe.Add(ref sourceBase, 1); + } + + int remainder = Numerics.Modulo4(destination.Length); + if (remainder != 0) + { + for (int i = destination.Length - remainder; i < destination.Length; i++) + { + destination[i] = PorterDuffFunctions.NormalDest(background[i], source[i], amount); + } + } + } + else if (Avx2.IsSupported && destination.Length >= 2) { // Divide by 2 as 4 elements per Vector4 and 8 per Vector256 ref Vector256 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); @@ -8500,7 +15187,37 @@ internal static class DefaultPixelBlenders { amount = Numerics.Clamp(amount, 0, 1); - if (Avx2.IsSupported && destination.Length >= 2) + if (Avx512F.IsSupported && destination.Length >= 4) + { + // Divide by 4 as 4 elements per Vector4 and 16 per Vector512 + ref Vector512 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); + ref Vector512 destinationLast = ref Unsafe.Add(ref destinationBase, (uint)destination.Length / 4u); + + ref Vector512 backgroundBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(background)); + Vector512 sourceBase = Vector512.Create( + source.X, source.Y, source.Z, source.W, + source.X, source.Y, source.Z, source.W, + source.X, source.Y, source.Z, source.W, + source.X, source.Y, source.Z, source.W); + Vector512 opacity = Vector512.Create(amount); + + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) + { + destinationBase = PorterDuffFunctions.NormalDest(backgroundBase, sourceBase, opacity); + destinationBase = ref Unsafe.Add(ref destinationBase, 1); + backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); + } + + int remainder = Numerics.Modulo4(destination.Length); + if (remainder != 0) + { + for (int i = destination.Length - remainder; i < destination.Length; i++) + { + destination[i] = PorterDuffFunctions.NormalDest(background[i], source, amount); + } + } + } + else if (Avx2.IsSupported && destination.Length >= 2) { // Divide by 2 as 4 elements per Vector4 and 8 per Vector256 ref Vector256 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); @@ -8536,7 +15253,51 @@ internal static class DefaultPixelBlenders /// protected override void BlendFunction(Span destination, ReadOnlySpan background, ReadOnlySpan source, ReadOnlySpan amount) { - if (Avx2.IsSupported && destination.Length >= 2) + if (Avx512F.IsSupported && destination.Length >= 4) + { + // Divide by 4 as 4 elements per Vector4 and 16 per Vector512 + ref Vector512 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); + ref Vector512 destinationLast = ref Unsafe.Add(ref destinationBase, (uint)destination.Length / 4u); + + ref Vector512 backgroundBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(background)); + ref Vector512 sourceBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(source)); + ref float amountBase = ref MemoryMarshal.GetReference(amount); + + Vector512 vOne = Vector512.Create(1F); + + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) + { + float amount0 = amountBase; + float amount1 = Unsafe.Add(ref amountBase, 1); + float amount2 = Unsafe.Add(ref amountBase, 2); + float amount3 = Unsafe.Add(ref amountBase, 3); + + // We need to create a Vector512 containing the current four amount values + // taking up each quarter of the Vector512 and then clamp them. + Vector512 opacity = Vector512.Create( + amount0, amount0, amount0, amount0, + amount1, amount1, amount1, amount1, + amount2, amount2, amount2, amount2, + amount3, amount3, amount3, amount3); + opacity = Vector512.Min(Vector512.Max(Vector512.Zero, opacity), vOne); + + destinationBase = PorterDuffFunctions.NormalDest(backgroundBase, sourceBase, opacity); + destinationBase = ref Unsafe.Add(ref destinationBase, 1); + backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); + sourceBase = ref Unsafe.Add(ref sourceBase, 1); + amountBase = ref Unsafe.Add(ref amountBase, 4); + } + + int remainder = Numerics.Modulo4(destination.Length); + if (remainder != 0) + { + for (int i = destination.Length - remainder; i < destination.Length; i++) + { + destination[i] = PorterDuffFunctions.NormalDest(background[i], source[i], Numerics.Clamp(amount[i], 0, 1F)); + } + } + } + else if (Avx2.IsSupported && destination.Length >= 2) { // Divide by 2 as 4 elements per Vector4 and 8 per Vector256 ref Vector256 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); @@ -8583,7 +15344,54 @@ internal static class DefaultPixelBlenders /// protected override void BlendFunction(Span destination, ReadOnlySpan background, Vector4 source, ReadOnlySpan amount) { - if (Avx2.IsSupported && destination.Length >= 2) + if (Avx512F.IsSupported && destination.Length >= 4) + { + // Divide by 4 as 4 elements per Vector4 and 16 per Vector512 + ref Vector512 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); + ref Vector512 destinationLast = ref Unsafe.Add(ref destinationBase, (uint)destination.Length / 4u); + + ref Vector512 backgroundBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(background)); + ref float amountBase = ref MemoryMarshal.GetReference(amount); + + Vector512 sourceBase = Vector512.Create( + source.X, source.Y, source.Z, source.W, + source.X, source.Y, source.Z, source.W, + source.X, source.Y, source.Z, source.W, + source.X, source.Y, source.Z, source.W); + Vector512 vOne = Vector512.Create(1F); + + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) + { + float amount0 = amountBase; + float amount1 = Unsafe.Add(ref amountBase, 1); + float amount2 = Unsafe.Add(ref amountBase, 2); + float amount3 = Unsafe.Add(ref amountBase, 3); + + // We need to create a Vector512 containing the current four amount values + // taking up each quarter of the Vector512 and then clamp them. + Vector512 opacity = Vector512.Create( + amount0, amount0, amount0, amount0, + amount1, amount1, amount1, amount1, + amount2, amount2, amount2, amount2, + amount3, amount3, amount3, amount3); + opacity = Vector512.Min(Vector512.Max(Vector512.Zero, opacity), vOne); + + destinationBase = PorterDuffFunctions.NormalDest(backgroundBase, sourceBase, opacity); + destinationBase = ref Unsafe.Add(ref destinationBase, 1); + backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); + amountBase = ref Unsafe.Add(ref amountBase, 4); + } + + int remainder = Numerics.Modulo4(destination.Length); + if (remainder != 0) + { + for (int i = destination.Length - remainder; i < destination.Length; i++) + { + destination[i] = PorterDuffFunctions.NormalDest(background[i], source, Numerics.Clamp(amount[i], 0, 1F)); + } + } + } + else if (Avx2.IsSupported && destination.Length >= 2) { // Divide by 2 as 4 elements per Vector4 and 8 per Vector256 ref Vector256 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); @@ -8648,7 +15456,34 @@ internal static class DefaultPixelBlenders { amount = Numerics.Clamp(amount, 0, 1); - if (Avx2.IsSupported && destination.Length >= 2) + if (Avx512F.IsSupported && destination.Length >= 4) + { + // Divide by 4 as 4 elements per Vector4 and 16 per Vector512 + ref Vector512 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); + ref Vector512 destinationLast = ref Unsafe.Add(ref destinationBase, (uint)destination.Length / 4u); + + ref Vector512 backgroundBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(background)); + ref Vector512 sourceBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(source)); + Vector512 opacity = Vector512.Create(amount); + + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) + { + destinationBase = PorterDuffFunctions.MultiplyDest(backgroundBase, sourceBase, opacity); + destinationBase = ref Unsafe.Add(ref destinationBase, 1); + backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); + sourceBase = ref Unsafe.Add(ref sourceBase, 1); + } + + int remainder = Numerics.Modulo4(destination.Length); + if (remainder != 0) + { + for (int i = destination.Length - remainder; i < destination.Length; i++) + { + destination[i] = PorterDuffFunctions.MultiplyDest(background[i], source[i], amount); + } + } + } + else if (Avx2.IsSupported && destination.Length >= 2) { // Divide by 2 as 4 elements per Vector4 and 8 per Vector256 ref Vector256 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); @@ -8687,7 +15522,37 @@ internal static class DefaultPixelBlenders { amount = Numerics.Clamp(amount, 0, 1); - if (Avx2.IsSupported && destination.Length >= 2) + if (Avx512F.IsSupported && destination.Length >= 4) + { + // Divide by 4 as 4 elements per Vector4 and 16 per Vector512 + ref Vector512 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); + ref Vector512 destinationLast = ref Unsafe.Add(ref destinationBase, (uint)destination.Length / 4u); + + ref Vector512 backgroundBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(background)); + Vector512 sourceBase = Vector512.Create( + source.X, source.Y, source.Z, source.W, + source.X, source.Y, source.Z, source.W, + source.X, source.Y, source.Z, source.W, + source.X, source.Y, source.Z, source.W); + Vector512 opacity = Vector512.Create(amount); + + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) + { + destinationBase = PorterDuffFunctions.MultiplyDest(backgroundBase, sourceBase, opacity); + destinationBase = ref Unsafe.Add(ref destinationBase, 1); + backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); + } + + int remainder = Numerics.Modulo4(destination.Length); + if (remainder != 0) + { + for (int i = destination.Length - remainder; i < destination.Length; i++) + { + destination[i] = PorterDuffFunctions.MultiplyDest(background[i], source, amount); + } + } + } + else if (Avx2.IsSupported && destination.Length >= 2) { // Divide by 2 as 4 elements per Vector4 and 8 per Vector256 ref Vector256 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); @@ -8723,7 +15588,51 @@ internal static class DefaultPixelBlenders /// protected override void BlendFunction(Span destination, ReadOnlySpan background, ReadOnlySpan source, ReadOnlySpan amount) { - if (Avx2.IsSupported && destination.Length >= 2) + if (Avx512F.IsSupported && destination.Length >= 4) + { + // Divide by 4 as 4 elements per Vector4 and 16 per Vector512 + ref Vector512 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); + ref Vector512 destinationLast = ref Unsafe.Add(ref destinationBase, (uint)destination.Length / 4u); + + ref Vector512 backgroundBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(background)); + ref Vector512 sourceBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(source)); + ref float amountBase = ref MemoryMarshal.GetReference(amount); + + Vector512 vOne = Vector512.Create(1F); + + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) + { + float amount0 = amountBase; + float amount1 = Unsafe.Add(ref amountBase, 1); + float amount2 = Unsafe.Add(ref amountBase, 2); + float amount3 = Unsafe.Add(ref amountBase, 3); + + // We need to create a Vector512 containing the current four amount values + // taking up each quarter of the Vector512 and then clamp them. + Vector512 opacity = Vector512.Create( + amount0, amount0, amount0, amount0, + amount1, amount1, amount1, amount1, + amount2, amount2, amount2, amount2, + amount3, amount3, amount3, amount3); + opacity = Vector512.Min(Vector512.Max(Vector512.Zero, opacity), vOne); + + destinationBase = PorterDuffFunctions.MultiplyDest(backgroundBase, sourceBase, opacity); + destinationBase = ref Unsafe.Add(ref destinationBase, 1); + backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); + sourceBase = ref Unsafe.Add(ref sourceBase, 1); + amountBase = ref Unsafe.Add(ref amountBase, 4); + } + + int remainder = Numerics.Modulo4(destination.Length); + if (remainder != 0) + { + for (int i = destination.Length - remainder; i < destination.Length; i++) + { + destination[i] = PorterDuffFunctions.MultiplyDest(background[i], source[i], Numerics.Clamp(amount[i], 0, 1F)); + } + } + } + else if (Avx2.IsSupported && destination.Length >= 2) { // Divide by 2 as 4 elements per Vector4 and 8 per Vector256 ref Vector256 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); @@ -8770,7 +15679,54 @@ internal static class DefaultPixelBlenders /// protected override void BlendFunction(Span destination, ReadOnlySpan background, Vector4 source, ReadOnlySpan amount) { - if (Avx2.IsSupported && destination.Length >= 2) + if (Avx512F.IsSupported && destination.Length >= 4) + { + // Divide by 4 as 4 elements per Vector4 and 16 per Vector512 + ref Vector512 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); + ref Vector512 destinationLast = ref Unsafe.Add(ref destinationBase, (uint)destination.Length / 4u); + + ref Vector512 backgroundBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(background)); + ref float amountBase = ref MemoryMarshal.GetReference(amount); + + Vector512 sourceBase = Vector512.Create( + source.X, source.Y, source.Z, source.W, + source.X, source.Y, source.Z, source.W, + source.X, source.Y, source.Z, source.W, + source.X, source.Y, source.Z, source.W); + Vector512 vOne = Vector512.Create(1F); + + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) + { + float amount0 = amountBase; + float amount1 = Unsafe.Add(ref amountBase, 1); + float amount2 = Unsafe.Add(ref amountBase, 2); + float amount3 = Unsafe.Add(ref amountBase, 3); + + // We need to create a Vector512 containing the current four amount values + // taking up each quarter of the Vector512 and then clamp them. + Vector512 opacity = Vector512.Create( + amount0, amount0, amount0, amount0, + amount1, amount1, amount1, amount1, + amount2, amount2, amount2, amount2, + amount3, amount3, amount3, amount3); + opacity = Vector512.Min(Vector512.Max(Vector512.Zero, opacity), vOne); + + destinationBase = PorterDuffFunctions.MultiplyDest(backgroundBase, sourceBase, opacity); + destinationBase = ref Unsafe.Add(ref destinationBase, 1); + backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); + amountBase = ref Unsafe.Add(ref amountBase, 4); + } + + int remainder = Numerics.Modulo4(destination.Length); + if (remainder != 0) + { + for (int i = destination.Length - remainder; i < destination.Length; i++) + { + destination[i] = PorterDuffFunctions.MultiplyDest(background[i], source, Numerics.Clamp(amount[i], 0, 1F)); + } + } + } + else if (Avx2.IsSupported && destination.Length >= 2) { // Divide by 2 as 4 elements per Vector4 and 8 per Vector256 ref Vector256 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); @@ -8835,7 +15791,34 @@ internal static class DefaultPixelBlenders { amount = Numerics.Clamp(amount, 0, 1); - if (Avx2.IsSupported && destination.Length >= 2) + if (Avx512F.IsSupported && destination.Length >= 4) + { + // Divide by 4 as 4 elements per Vector4 and 16 per Vector512 + ref Vector512 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); + ref Vector512 destinationLast = ref Unsafe.Add(ref destinationBase, (uint)destination.Length / 4u); + + ref Vector512 backgroundBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(background)); + ref Vector512 sourceBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(source)); + Vector512 opacity = Vector512.Create(amount); + + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) + { + destinationBase = PorterDuffFunctions.AddDest(backgroundBase, sourceBase, opacity); + destinationBase = ref Unsafe.Add(ref destinationBase, 1); + backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); + sourceBase = ref Unsafe.Add(ref sourceBase, 1); + } + + int remainder = Numerics.Modulo4(destination.Length); + if (remainder != 0) + { + for (int i = destination.Length - remainder; i < destination.Length; i++) + { + destination[i] = PorterDuffFunctions.AddDest(background[i], source[i], amount); + } + } + } + else if (Avx2.IsSupported && destination.Length >= 2) { // Divide by 2 as 4 elements per Vector4 and 8 per Vector256 ref Vector256 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); @@ -8874,7 +15857,37 @@ internal static class DefaultPixelBlenders { amount = Numerics.Clamp(amount, 0, 1); - if (Avx2.IsSupported && destination.Length >= 2) + if (Avx512F.IsSupported && destination.Length >= 4) + { + // Divide by 4 as 4 elements per Vector4 and 16 per Vector512 + ref Vector512 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); + ref Vector512 destinationLast = ref Unsafe.Add(ref destinationBase, (uint)destination.Length / 4u); + + ref Vector512 backgroundBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(background)); + Vector512 sourceBase = Vector512.Create( + source.X, source.Y, source.Z, source.W, + source.X, source.Y, source.Z, source.W, + source.X, source.Y, source.Z, source.W, + source.X, source.Y, source.Z, source.W); + Vector512 opacity = Vector512.Create(amount); + + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) + { + destinationBase = PorterDuffFunctions.AddDest(backgroundBase, sourceBase, opacity); + destinationBase = ref Unsafe.Add(ref destinationBase, 1); + backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); + } + + int remainder = Numerics.Modulo4(destination.Length); + if (remainder != 0) + { + for (int i = destination.Length - remainder; i < destination.Length; i++) + { + destination[i] = PorterDuffFunctions.AddDest(background[i], source, amount); + } + } + } + else if (Avx2.IsSupported && destination.Length >= 2) { // Divide by 2 as 4 elements per Vector4 and 8 per Vector256 ref Vector256 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); @@ -8910,7 +15923,51 @@ internal static class DefaultPixelBlenders /// protected override void BlendFunction(Span destination, ReadOnlySpan background, ReadOnlySpan source, ReadOnlySpan amount) { - if (Avx2.IsSupported && destination.Length >= 2) + if (Avx512F.IsSupported && destination.Length >= 4) + { + // Divide by 4 as 4 elements per Vector4 and 16 per Vector512 + ref Vector512 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); + ref Vector512 destinationLast = ref Unsafe.Add(ref destinationBase, (uint)destination.Length / 4u); + + ref Vector512 backgroundBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(background)); + ref Vector512 sourceBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(source)); + ref float amountBase = ref MemoryMarshal.GetReference(amount); + + Vector512 vOne = Vector512.Create(1F); + + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) + { + float amount0 = amountBase; + float amount1 = Unsafe.Add(ref amountBase, 1); + float amount2 = Unsafe.Add(ref amountBase, 2); + float amount3 = Unsafe.Add(ref amountBase, 3); + + // We need to create a Vector512 containing the current four amount values + // taking up each quarter of the Vector512 and then clamp them. + Vector512 opacity = Vector512.Create( + amount0, amount0, amount0, amount0, + amount1, amount1, amount1, amount1, + amount2, amount2, amount2, amount2, + amount3, amount3, amount3, amount3); + opacity = Vector512.Min(Vector512.Max(Vector512.Zero, opacity), vOne); + + destinationBase = PorterDuffFunctions.AddDest(backgroundBase, sourceBase, opacity); + destinationBase = ref Unsafe.Add(ref destinationBase, 1); + backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); + sourceBase = ref Unsafe.Add(ref sourceBase, 1); + amountBase = ref Unsafe.Add(ref amountBase, 4); + } + + int remainder = Numerics.Modulo4(destination.Length); + if (remainder != 0) + { + for (int i = destination.Length - remainder; i < destination.Length; i++) + { + destination[i] = PorterDuffFunctions.AddDest(background[i], source[i], Numerics.Clamp(amount[i], 0, 1F)); + } + } + } + else if (Avx2.IsSupported && destination.Length >= 2) { // Divide by 2 as 4 elements per Vector4 and 8 per Vector256 ref Vector256 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); @@ -8944,20 +16001,67 @@ internal static class DefaultPixelBlenders int i = destination.Length - 1; destination[i] = PorterDuffFunctions.AddDest(background[i], source[i], Numerics.Clamp(amount[i], 0, 1F)); } - } - else - { - for (int i = 0; i < destination.Length; i++) + } + else + { + for (int i = 0; i < destination.Length; i++) + { + destination[i] = PorterDuffFunctions.AddDest(background[i], source[i], Numerics.Clamp(amount[i], 0, 1F)); + } + } + } + + /// + protected override void BlendFunction(Span destination, ReadOnlySpan background, Vector4 source, ReadOnlySpan amount) + { + if (Avx512F.IsSupported && destination.Length >= 4) + { + // Divide by 4 as 4 elements per Vector4 and 16 per Vector512 + ref Vector512 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); + ref Vector512 destinationLast = ref Unsafe.Add(ref destinationBase, (uint)destination.Length / 4u); + + ref Vector512 backgroundBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(background)); + ref float amountBase = ref MemoryMarshal.GetReference(amount); + + Vector512 sourceBase = Vector512.Create( + source.X, source.Y, source.Z, source.W, + source.X, source.Y, source.Z, source.W, + source.X, source.Y, source.Z, source.W, + source.X, source.Y, source.Z, source.W); + Vector512 vOne = Vector512.Create(1F); + + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) + { + float amount0 = amountBase; + float amount1 = Unsafe.Add(ref amountBase, 1); + float amount2 = Unsafe.Add(ref amountBase, 2); + float amount3 = Unsafe.Add(ref amountBase, 3); + + // We need to create a Vector512 containing the current four amount values + // taking up each quarter of the Vector512 and then clamp them. + Vector512 opacity = Vector512.Create( + amount0, amount0, amount0, amount0, + amount1, amount1, amount1, amount1, + amount2, amount2, amount2, amount2, + amount3, amount3, amount3, amount3); + opacity = Vector512.Min(Vector512.Max(Vector512.Zero, opacity), vOne); + + destinationBase = PorterDuffFunctions.AddDest(backgroundBase, sourceBase, opacity); + destinationBase = ref Unsafe.Add(ref destinationBase, 1); + backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); + amountBase = ref Unsafe.Add(ref amountBase, 4); + } + + int remainder = Numerics.Modulo4(destination.Length); + if (remainder != 0) { - destination[i] = PorterDuffFunctions.AddDest(background[i], source[i], Numerics.Clamp(amount[i], 0, 1F)); + for (int i = destination.Length - remainder; i < destination.Length; i++) + { + destination[i] = PorterDuffFunctions.AddDest(background[i], source, Numerics.Clamp(amount[i], 0, 1F)); + } } } - } - - /// - protected override void BlendFunction(Span destination, ReadOnlySpan background, Vector4 source, ReadOnlySpan amount) - { - if (Avx2.IsSupported && destination.Length >= 2) + else if (Avx2.IsSupported && destination.Length >= 2) { // Divide by 2 as 4 elements per Vector4 and 8 per Vector256 ref Vector256 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); @@ -9022,7 +16126,34 @@ internal static class DefaultPixelBlenders { amount = Numerics.Clamp(amount, 0, 1); - if (Avx2.IsSupported && destination.Length >= 2) + if (Avx512F.IsSupported && destination.Length >= 4) + { + // Divide by 4 as 4 elements per Vector4 and 16 per Vector512 + ref Vector512 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); + ref Vector512 destinationLast = ref Unsafe.Add(ref destinationBase, (uint)destination.Length / 4u); + + ref Vector512 backgroundBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(background)); + ref Vector512 sourceBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(source)); + Vector512 opacity = Vector512.Create(amount); + + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) + { + destinationBase = PorterDuffFunctions.SubtractDest(backgroundBase, sourceBase, opacity); + destinationBase = ref Unsafe.Add(ref destinationBase, 1); + backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); + sourceBase = ref Unsafe.Add(ref sourceBase, 1); + } + + int remainder = Numerics.Modulo4(destination.Length); + if (remainder != 0) + { + for (int i = destination.Length - remainder; i < destination.Length; i++) + { + destination[i] = PorterDuffFunctions.SubtractDest(background[i], source[i], amount); + } + } + } + else if (Avx2.IsSupported && destination.Length >= 2) { // Divide by 2 as 4 elements per Vector4 and 8 per Vector256 ref Vector256 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); @@ -9061,7 +16192,37 @@ internal static class DefaultPixelBlenders { amount = Numerics.Clamp(amount, 0, 1); - if (Avx2.IsSupported && destination.Length >= 2) + if (Avx512F.IsSupported && destination.Length >= 4) + { + // Divide by 4 as 4 elements per Vector4 and 16 per Vector512 + ref Vector512 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); + ref Vector512 destinationLast = ref Unsafe.Add(ref destinationBase, (uint)destination.Length / 4u); + + ref Vector512 backgroundBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(background)); + Vector512 sourceBase = Vector512.Create( + source.X, source.Y, source.Z, source.W, + source.X, source.Y, source.Z, source.W, + source.X, source.Y, source.Z, source.W, + source.X, source.Y, source.Z, source.W); + Vector512 opacity = Vector512.Create(amount); + + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) + { + destinationBase = PorterDuffFunctions.SubtractDest(backgroundBase, sourceBase, opacity); + destinationBase = ref Unsafe.Add(ref destinationBase, 1); + backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); + } + + int remainder = Numerics.Modulo4(destination.Length); + if (remainder != 0) + { + for (int i = destination.Length - remainder; i < destination.Length; i++) + { + destination[i] = PorterDuffFunctions.SubtractDest(background[i], source, amount); + } + } + } + else if (Avx2.IsSupported && destination.Length >= 2) { // Divide by 2 as 4 elements per Vector4 and 8 per Vector256 ref Vector256 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); @@ -9097,7 +16258,51 @@ internal static class DefaultPixelBlenders /// protected override void BlendFunction(Span destination, ReadOnlySpan background, ReadOnlySpan source, ReadOnlySpan amount) { - if (Avx2.IsSupported && destination.Length >= 2) + if (Avx512F.IsSupported && destination.Length >= 4) + { + // Divide by 4 as 4 elements per Vector4 and 16 per Vector512 + ref Vector512 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); + ref Vector512 destinationLast = ref Unsafe.Add(ref destinationBase, (uint)destination.Length / 4u); + + ref Vector512 backgroundBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(background)); + ref Vector512 sourceBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(source)); + ref float amountBase = ref MemoryMarshal.GetReference(amount); + + Vector512 vOne = Vector512.Create(1F); + + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) + { + float amount0 = amountBase; + float amount1 = Unsafe.Add(ref amountBase, 1); + float amount2 = Unsafe.Add(ref amountBase, 2); + float amount3 = Unsafe.Add(ref amountBase, 3); + + // We need to create a Vector512 containing the current four amount values + // taking up each quarter of the Vector512 and then clamp them. + Vector512 opacity = Vector512.Create( + amount0, amount0, amount0, amount0, + amount1, amount1, amount1, amount1, + amount2, amount2, amount2, amount2, + amount3, amount3, amount3, amount3); + opacity = Vector512.Min(Vector512.Max(Vector512.Zero, opacity), vOne); + + destinationBase = PorterDuffFunctions.SubtractDest(backgroundBase, sourceBase, opacity); + destinationBase = ref Unsafe.Add(ref destinationBase, 1); + backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); + sourceBase = ref Unsafe.Add(ref sourceBase, 1); + amountBase = ref Unsafe.Add(ref amountBase, 4); + } + + int remainder = Numerics.Modulo4(destination.Length); + if (remainder != 0) + { + for (int i = destination.Length - remainder; i < destination.Length; i++) + { + destination[i] = PorterDuffFunctions.SubtractDest(background[i], source[i], Numerics.Clamp(amount[i], 0, 1F)); + } + } + } + else if (Avx2.IsSupported && destination.Length >= 2) { // Divide by 2 as 4 elements per Vector4 and 8 per Vector256 ref Vector256 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); @@ -9144,7 +16349,54 @@ internal static class DefaultPixelBlenders /// protected override void BlendFunction(Span destination, ReadOnlySpan background, Vector4 source, ReadOnlySpan amount) { - if (Avx2.IsSupported && destination.Length >= 2) + if (Avx512F.IsSupported && destination.Length >= 4) + { + // Divide by 4 as 4 elements per Vector4 and 16 per Vector512 + ref Vector512 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); + ref Vector512 destinationLast = ref Unsafe.Add(ref destinationBase, (uint)destination.Length / 4u); + + ref Vector512 backgroundBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(background)); + ref float amountBase = ref MemoryMarshal.GetReference(amount); + + Vector512 sourceBase = Vector512.Create( + source.X, source.Y, source.Z, source.W, + source.X, source.Y, source.Z, source.W, + source.X, source.Y, source.Z, source.W, + source.X, source.Y, source.Z, source.W); + Vector512 vOne = Vector512.Create(1F); + + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) + { + float amount0 = amountBase; + float amount1 = Unsafe.Add(ref amountBase, 1); + float amount2 = Unsafe.Add(ref amountBase, 2); + float amount3 = Unsafe.Add(ref amountBase, 3); + + // We need to create a Vector512 containing the current four amount values + // taking up each quarter of the Vector512 and then clamp them. + Vector512 opacity = Vector512.Create( + amount0, amount0, amount0, amount0, + amount1, amount1, amount1, amount1, + amount2, amount2, amount2, amount2, + amount3, amount3, amount3, amount3); + opacity = Vector512.Min(Vector512.Max(Vector512.Zero, opacity), vOne); + + destinationBase = PorterDuffFunctions.SubtractDest(backgroundBase, sourceBase, opacity); + destinationBase = ref Unsafe.Add(ref destinationBase, 1); + backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); + amountBase = ref Unsafe.Add(ref amountBase, 4); + } + + int remainder = Numerics.Modulo4(destination.Length); + if (remainder != 0) + { + for (int i = destination.Length - remainder; i < destination.Length; i++) + { + destination[i] = PorterDuffFunctions.SubtractDest(background[i], source, Numerics.Clamp(amount[i], 0, 1F)); + } + } + } + else if (Avx2.IsSupported && destination.Length >= 2) { // Divide by 2 as 4 elements per Vector4 and 8 per Vector256 ref Vector256 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); @@ -9209,7 +16461,34 @@ internal static class DefaultPixelBlenders { amount = Numerics.Clamp(amount, 0, 1); - if (Avx2.IsSupported && destination.Length >= 2) + if (Avx512F.IsSupported && destination.Length >= 4) + { + // Divide by 4 as 4 elements per Vector4 and 16 per Vector512 + ref Vector512 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); + ref Vector512 destinationLast = ref Unsafe.Add(ref destinationBase, (uint)destination.Length / 4u); + + ref Vector512 backgroundBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(background)); + ref Vector512 sourceBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(source)); + Vector512 opacity = Vector512.Create(amount); + + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) + { + destinationBase = PorterDuffFunctions.ScreenDest(backgroundBase, sourceBase, opacity); + destinationBase = ref Unsafe.Add(ref destinationBase, 1); + backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); + sourceBase = ref Unsafe.Add(ref sourceBase, 1); + } + + int remainder = Numerics.Modulo4(destination.Length); + if (remainder != 0) + { + for (int i = destination.Length - remainder; i < destination.Length; i++) + { + destination[i] = PorterDuffFunctions.ScreenDest(background[i], source[i], amount); + } + } + } + else if (Avx2.IsSupported && destination.Length >= 2) { // Divide by 2 as 4 elements per Vector4 and 8 per Vector256 ref Vector256 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); @@ -9248,7 +16527,37 @@ internal static class DefaultPixelBlenders { amount = Numerics.Clamp(amount, 0, 1); - if (Avx2.IsSupported && destination.Length >= 2) + if (Avx512F.IsSupported && destination.Length >= 4) + { + // Divide by 4 as 4 elements per Vector4 and 16 per Vector512 + ref Vector512 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); + ref Vector512 destinationLast = ref Unsafe.Add(ref destinationBase, (uint)destination.Length / 4u); + + ref Vector512 backgroundBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(background)); + Vector512 sourceBase = Vector512.Create( + source.X, source.Y, source.Z, source.W, + source.X, source.Y, source.Z, source.W, + source.X, source.Y, source.Z, source.W, + source.X, source.Y, source.Z, source.W); + Vector512 opacity = Vector512.Create(amount); + + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) + { + destinationBase = PorterDuffFunctions.ScreenDest(backgroundBase, sourceBase, opacity); + destinationBase = ref Unsafe.Add(ref destinationBase, 1); + backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); + } + + int remainder = Numerics.Modulo4(destination.Length); + if (remainder != 0) + { + for (int i = destination.Length - remainder; i < destination.Length; i++) + { + destination[i] = PorterDuffFunctions.ScreenDest(background[i], source, amount); + } + } + } + else if (Avx2.IsSupported && destination.Length >= 2) { // Divide by 2 as 4 elements per Vector4 and 8 per Vector256 ref Vector256 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); @@ -9284,7 +16593,51 @@ internal static class DefaultPixelBlenders /// protected override void BlendFunction(Span destination, ReadOnlySpan background, ReadOnlySpan source, ReadOnlySpan amount) { - if (Avx2.IsSupported && destination.Length >= 2) + if (Avx512F.IsSupported && destination.Length >= 4) + { + // Divide by 4 as 4 elements per Vector4 and 16 per Vector512 + ref Vector512 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); + ref Vector512 destinationLast = ref Unsafe.Add(ref destinationBase, (uint)destination.Length / 4u); + + ref Vector512 backgroundBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(background)); + ref Vector512 sourceBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(source)); + ref float amountBase = ref MemoryMarshal.GetReference(amount); + + Vector512 vOne = Vector512.Create(1F); + + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) + { + float amount0 = amountBase; + float amount1 = Unsafe.Add(ref amountBase, 1); + float amount2 = Unsafe.Add(ref amountBase, 2); + float amount3 = Unsafe.Add(ref amountBase, 3); + + // We need to create a Vector512 containing the current four amount values + // taking up each quarter of the Vector512 and then clamp them. + Vector512 opacity = Vector512.Create( + amount0, amount0, amount0, amount0, + amount1, amount1, amount1, amount1, + amount2, amount2, amount2, amount2, + amount3, amount3, amount3, amount3); + opacity = Vector512.Min(Vector512.Max(Vector512.Zero, opacity), vOne); + + destinationBase = PorterDuffFunctions.ScreenDest(backgroundBase, sourceBase, opacity); + destinationBase = ref Unsafe.Add(ref destinationBase, 1); + backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); + sourceBase = ref Unsafe.Add(ref sourceBase, 1); + amountBase = ref Unsafe.Add(ref amountBase, 4); + } + + int remainder = Numerics.Modulo4(destination.Length); + if (remainder != 0) + { + for (int i = destination.Length - remainder; i < destination.Length; i++) + { + destination[i] = PorterDuffFunctions.ScreenDest(background[i], source[i], Numerics.Clamp(amount[i], 0, 1F)); + } + } + } + else if (Avx2.IsSupported && destination.Length >= 2) { // Divide by 2 as 4 elements per Vector4 and 8 per Vector256 ref Vector256 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); @@ -9331,7 +16684,54 @@ internal static class DefaultPixelBlenders /// protected override void BlendFunction(Span destination, ReadOnlySpan background, Vector4 source, ReadOnlySpan amount) { - if (Avx2.IsSupported && destination.Length >= 2) + if (Avx512F.IsSupported && destination.Length >= 4) + { + // Divide by 4 as 4 elements per Vector4 and 16 per Vector512 + ref Vector512 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); + ref Vector512 destinationLast = ref Unsafe.Add(ref destinationBase, (uint)destination.Length / 4u); + + ref Vector512 backgroundBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(background)); + ref float amountBase = ref MemoryMarshal.GetReference(amount); + + Vector512 sourceBase = Vector512.Create( + source.X, source.Y, source.Z, source.W, + source.X, source.Y, source.Z, source.W, + source.X, source.Y, source.Z, source.W, + source.X, source.Y, source.Z, source.W); + Vector512 vOne = Vector512.Create(1F); + + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) + { + float amount0 = amountBase; + float amount1 = Unsafe.Add(ref amountBase, 1); + float amount2 = Unsafe.Add(ref amountBase, 2); + float amount3 = Unsafe.Add(ref amountBase, 3); + + // We need to create a Vector512 containing the current four amount values + // taking up each quarter of the Vector512 and then clamp them. + Vector512 opacity = Vector512.Create( + amount0, amount0, amount0, amount0, + amount1, amount1, amount1, amount1, + amount2, amount2, amount2, amount2, + amount3, amount3, amount3, amount3); + opacity = Vector512.Min(Vector512.Max(Vector512.Zero, opacity), vOne); + + destinationBase = PorterDuffFunctions.ScreenDest(backgroundBase, sourceBase, opacity); + destinationBase = ref Unsafe.Add(ref destinationBase, 1); + backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); + amountBase = ref Unsafe.Add(ref amountBase, 4); + } + + int remainder = Numerics.Modulo4(destination.Length); + if (remainder != 0) + { + for (int i = destination.Length - remainder; i < destination.Length; i++) + { + destination[i] = PorterDuffFunctions.ScreenDest(background[i], source, Numerics.Clamp(amount[i], 0, 1F)); + } + } + } + else if (Avx2.IsSupported && destination.Length >= 2) { // Divide by 2 as 4 elements per Vector4 and 8 per Vector256 ref Vector256 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); @@ -9396,7 +16796,34 @@ internal static class DefaultPixelBlenders { amount = Numerics.Clamp(amount, 0, 1); - if (Avx2.IsSupported && destination.Length >= 2) + if (Avx512F.IsSupported && destination.Length >= 4) + { + // Divide by 4 as 4 elements per Vector4 and 16 per Vector512 + ref Vector512 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); + ref Vector512 destinationLast = ref Unsafe.Add(ref destinationBase, (uint)destination.Length / 4u); + + ref Vector512 backgroundBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(background)); + ref Vector512 sourceBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(source)); + Vector512 opacity = Vector512.Create(amount); + + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) + { + destinationBase = PorterDuffFunctions.DarkenDest(backgroundBase, sourceBase, opacity); + destinationBase = ref Unsafe.Add(ref destinationBase, 1); + backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); + sourceBase = ref Unsafe.Add(ref sourceBase, 1); + } + + int remainder = Numerics.Modulo4(destination.Length); + if (remainder != 0) + { + for (int i = destination.Length - remainder; i < destination.Length; i++) + { + destination[i] = PorterDuffFunctions.DarkenDest(background[i], source[i], amount); + } + } + } + else if (Avx2.IsSupported && destination.Length >= 2) { // Divide by 2 as 4 elements per Vector4 and 8 per Vector256 ref Vector256 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); @@ -9435,7 +16862,37 @@ internal static class DefaultPixelBlenders { amount = Numerics.Clamp(amount, 0, 1); - if (Avx2.IsSupported && destination.Length >= 2) + if (Avx512F.IsSupported && destination.Length >= 4) + { + // Divide by 4 as 4 elements per Vector4 and 16 per Vector512 + ref Vector512 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); + ref Vector512 destinationLast = ref Unsafe.Add(ref destinationBase, (uint)destination.Length / 4u); + + ref Vector512 backgroundBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(background)); + Vector512 sourceBase = Vector512.Create( + source.X, source.Y, source.Z, source.W, + source.X, source.Y, source.Z, source.W, + source.X, source.Y, source.Z, source.W, + source.X, source.Y, source.Z, source.W); + Vector512 opacity = Vector512.Create(amount); + + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) + { + destinationBase = PorterDuffFunctions.DarkenDest(backgroundBase, sourceBase, opacity); + destinationBase = ref Unsafe.Add(ref destinationBase, 1); + backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); + } + + int remainder = Numerics.Modulo4(destination.Length); + if (remainder != 0) + { + for (int i = destination.Length - remainder; i < destination.Length; i++) + { + destination[i] = PorterDuffFunctions.DarkenDest(background[i], source, amount); + } + } + } + else if (Avx2.IsSupported && destination.Length >= 2) { // Divide by 2 as 4 elements per Vector4 and 8 per Vector256 ref Vector256 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); @@ -9471,7 +16928,51 @@ internal static class DefaultPixelBlenders /// protected override void BlendFunction(Span destination, ReadOnlySpan background, ReadOnlySpan source, ReadOnlySpan amount) { - if (Avx2.IsSupported && destination.Length >= 2) + if (Avx512F.IsSupported && destination.Length >= 4) + { + // Divide by 4 as 4 elements per Vector4 and 16 per Vector512 + ref Vector512 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); + ref Vector512 destinationLast = ref Unsafe.Add(ref destinationBase, (uint)destination.Length / 4u); + + ref Vector512 backgroundBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(background)); + ref Vector512 sourceBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(source)); + ref float amountBase = ref MemoryMarshal.GetReference(amount); + + Vector512 vOne = Vector512.Create(1F); + + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) + { + float amount0 = amountBase; + float amount1 = Unsafe.Add(ref amountBase, 1); + float amount2 = Unsafe.Add(ref amountBase, 2); + float amount3 = Unsafe.Add(ref amountBase, 3); + + // We need to create a Vector512 containing the current four amount values + // taking up each quarter of the Vector512 and then clamp them. + Vector512 opacity = Vector512.Create( + amount0, amount0, amount0, amount0, + amount1, amount1, amount1, amount1, + amount2, amount2, amount2, amount2, + amount3, amount3, amount3, amount3); + opacity = Vector512.Min(Vector512.Max(Vector512.Zero, opacity), vOne); + + destinationBase = PorterDuffFunctions.DarkenDest(backgroundBase, sourceBase, opacity); + destinationBase = ref Unsafe.Add(ref destinationBase, 1); + backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); + sourceBase = ref Unsafe.Add(ref sourceBase, 1); + amountBase = ref Unsafe.Add(ref amountBase, 4); + } + + int remainder = Numerics.Modulo4(destination.Length); + if (remainder != 0) + { + for (int i = destination.Length - remainder; i < destination.Length; i++) + { + destination[i] = PorterDuffFunctions.DarkenDest(background[i], source[i], Numerics.Clamp(amount[i], 0, 1F)); + } + } + } + else if (Avx2.IsSupported && destination.Length >= 2) { // Divide by 2 as 4 elements per Vector4 and 8 per Vector256 ref Vector256 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); @@ -9518,7 +17019,54 @@ internal static class DefaultPixelBlenders /// protected override void BlendFunction(Span destination, ReadOnlySpan background, Vector4 source, ReadOnlySpan amount) { - if (Avx2.IsSupported && destination.Length >= 2) + if (Avx512F.IsSupported && destination.Length >= 4) + { + // Divide by 4 as 4 elements per Vector4 and 16 per Vector512 + ref Vector512 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); + ref Vector512 destinationLast = ref Unsafe.Add(ref destinationBase, (uint)destination.Length / 4u); + + ref Vector512 backgroundBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(background)); + ref float amountBase = ref MemoryMarshal.GetReference(amount); + + Vector512 sourceBase = Vector512.Create( + source.X, source.Y, source.Z, source.W, + source.X, source.Y, source.Z, source.W, + source.X, source.Y, source.Z, source.W, + source.X, source.Y, source.Z, source.W); + Vector512 vOne = Vector512.Create(1F); + + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) + { + float amount0 = amountBase; + float amount1 = Unsafe.Add(ref amountBase, 1); + float amount2 = Unsafe.Add(ref amountBase, 2); + float amount3 = Unsafe.Add(ref amountBase, 3); + + // We need to create a Vector512 containing the current four amount values + // taking up each quarter of the Vector512 and then clamp them. + Vector512 opacity = Vector512.Create( + amount0, amount0, amount0, amount0, + amount1, amount1, amount1, amount1, + amount2, amount2, amount2, amount2, + amount3, amount3, amount3, amount3); + opacity = Vector512.Min(Vector512.Max(Vector512.Zero, opacity), vOne); + + destinationBase = PorterDuffFunctions.DarkenDest(backgroundBase, sourceBase, opacity); + destinationBase = ref Unsafe.Add(ref destinationBase, 1); + backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); + amountBase = ref Unsafe.Add(ref amountBase, 4); + } + + int remainder = Numerics.Modulo4(destination.Length); + if (remainder != 0) + { + for (int i = destination.Length - remainder; i < destination.Length; i++) + { + destination[i] = PorterDuffFunctions.DarkenDest(background[i], source, Numerics.Clamp(amount[i], 0, 1F)); + } + } + } + else if (Avx2.IsSupported && destination.Length >= 2) { // Divide by 2 as 4 elements per Vector4 and 8 per Vector256 ref Vector256 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); @@ -9583,7 +17131,34 @@ internal static class DefaultPixelBlenders { amount = Numerics.Clamp(amount, 0, 1); - if (Avx2.IsSupported && destination.Length >= 2) + if (Avx512F.IsSupported && destination.Length >= 4) + { + // Divide by 4 as 4 elements per Vector4 and 16 per Vector512 + ref Vector512 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); + ref Vector512 destinationLast = ref Unsafe.Add(ref destinationBase, (uint)destination.Length / 4u); + + ref Vector512 backgroundBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(background)); + ref Vector512 sourceBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(source)); + Vector512 opacity = Vector512.Create(amount); + + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) + { + destinationBase = PorterDuffFunctions.LightenDest(backgroundBase, sourceBase, opacity); + destinationBase = ref Unsafe.Add(ref destinationBase, 1); + backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); + sourceBase = ref Unsafe.Add(ref sourceBase, 1); + } + + int remainder = Numerics.Modulo4(destination.Length); + if (remainder != 0) + { + for (int i = destination.Length - remainder; i < destination.Length; i++) + { + destination[i] = PorterDuffFunctions.LightenDest(background[i], source[i], amount); + } + } + } + else if (Avx2.IsSupported && destination.Length >= 2) { // Divide by 2 as 4 elements per Vector4 and 8 per Vector256 ref Vector256 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); @@ -9622,7 +17197,37 @@ internal static class DefaultPixelBlenders { amount = Numerics.Clamp(amount, 0, 1); - if (Avx2.IsSupported && destination.Length >= 2) + if (Avx512F.IsSupported && destination.Length >= 4) + { + // Divide by 4 as 4 elements per Vector4 and 16 per Vector512 + ref Vector512 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); + ref Vector512 destinationLast = ref Unsafe.Add(ref destinationBase, (uint)destination.Length / 4u); + + ref Vector512 backgroundBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(background)); + Vector512 sourceBase = Vector512.Create( + source.X, source.Y, source.Z, source.W, + source.X, source.Y, source.Z, source.W, + source.X, source.Y, source.Z, source.W, + source.X, source.Y, source.Z, source.W); + Vector512 opacity = Vector512.Create(amount); + + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) + { + destinationBase = PorterDuffFunctions.LightenDest(backgroundBase, sourceBase, opacity); + destinationBase = ref Unsafe.Add(ref destinationBase, 1); + backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); + } + + int remainder = Numerics.Modulo4(destination.Length); + if (remainder != 0) + { + for (int i = destination.Length - remainder; i < destination.Length; i++) + { + destination[i] = PorterDuffFunctions.LightenDest(background[i], source, amount); + } + } + } + else if (Avx2.IsSupported && destination.Length >= 2) { // Divide by 2 as 4 elements per Vector4 and 8 per Vector256 ref Vector256 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); @@ -9658,7 +17263,51 @@ internal static class DefaultPixelBlenders /// protected override void BlendFunction(Span destination, ReadOnlySpan background, ReadOnlySpan source, ReadOnlySpan amount) { - if (Avx2.IsSupported && destination.Length >= 2) + if (Avx512F.IsSupported && destination.Length >= 4) + { + // Divide by 4 as 4 elements per Vector4 and 16 per Vector512 + ref Vector512 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); + ref Vector512 destinationLast = ref Unsafe.Add(ref destinationBase, (uint)destination.Length / 4u); + + ref Vector512 backgroundBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(background)); + ref Vector512 sourceBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(source)); + ref float amountBase = ref MemoryMarshal.GetReference(amount); + + Vector512 vOne = Vector512.Create(1F); + + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) + { + float amount0 = amountBase; + float amount1 = Unsafe.Add(ref amountBase, 1); + float amount2 = Unsafe.Add(ref amountBase, 2); + float amount3 = Unsafe.Add(ref amountBase, 3); + + // We need to create a Vector512 containing the current four amount values + // taking up each quarter of the Vector512 and then clamp them. + Vector512 opacity = Vector512.Create( + amount0, amount0, amount0, amount0, + amount1, amount1, amount1, amount1, + amount2, amount2, amount2, amount2, + amount3, amount3, amount3, amount3); + opacity = Vector512.Min(Vector512.Max(Vector512.Zero, opacity), vOne); + + destinationBase = PorterDuffFunctions.LightenDest(backgroundBase, sourceBase, opacity); + destinationBase = ref Unsafe.Add(ref destinationBase, 1); + backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); + sourceBase = ref Unsafe.Add(ref sourceBase, 1); + amountBase = ref Unsafe.Add(ref amountBase, 4); + } + + int remainder = Numerics.Modulo4(destination.Length); + if (remainder != 0) + { + for (int i = destination.Length - remainder; i < destination.Length; i++) + { + destination[i] = PorterDuffFunctions.LightenDest(background[i], source[i], Numerics.Clamp(amount[i], 0, 1F)); + } + } + } + else if (Avx2.IsSupported && destination.Length >= 2) { // Divide by 2 as 4 elements per Vector4 and 8 per Vector256 ref Vector256 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); @@ -9705,7 +17354,54 @@ internal static class DefaultPixelBlenders /// protected override void BlendFunction(Span destination, ReadOnlySpan background, Vector4 source, ReadOnlySpan amount) { - if (Avx2.IsSupported && destination.Length >= 2) + if (Avx512F.IsSupported && destination.Length >= 4) + { + // Divide by 4 as 4 elements per Vector4 and 16 per Vector512 + ref Vector512 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); + ref Vector512 destinationLast = ref Unsafe.Add(ref destinationBase, (uint)destination.Length / 4u); + + ref Vector512 backgroundBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(background)); + ref float amountBase = ref MemoryMarshal.GetReference(amount); + + Vector512 sourceBase = Vector512.Create( + source.X, source.Y, source.Z, source.W, + source.X, source.Y, source.Z, source.W, + source.X, source.Y, source.Z, source.W, + source.X, source.Y, source.Z, source.W); + Vector512 vOne = Vector512.Create(1F); + + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) + { + float amount0 = amountBase; + float amount1 = Unsafe.Add(ref amountBase, 1); + float amount2 = Unsafe.Add(ref amountBase, 2); + float amount3 = Unsafe.Add(ref amountBase, 3); + + // We need to create a Vector512 containing the current four amount values + // taking up each quarter of the Vector512 and then clamp them. + Vector512 opacity = Vector512.Create( + amount0, amount0, amount0, amount0, + amount1, amount1, amount1, amount1, + amount2, amount2, amount2, amount2, + amount3, amount3, amount3, amount3); + opacity = Vector512.Min(Vector512.Max(Vector512.Zero, opacity), vOne); + + destinationBase = PorterDuffFunctions.LightenDest(backgroundBase, sourceBase, opacity); + destinationBase = ref Unsafe.Add(ref destinationBase, 1); + backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); + amountBase = ref Unsafe.Add(ref amountBase, 4); + } + + int remainder = Numerics.Modulo4(destination.Length); + if (remainder != 0) + { + for (int i = destination.Length - remainder; i < destination.Length; i++) + { + destination[i] = PorterDuffFunctions.LightenDest(background[i], source, Numerics.Clamp(amount[i], 0, 1F)); + } + } + } + else if (Avx2.IsSupported && destination.Length >= 2) { // Divide by 2 as 4 elements per Vector4 and 8 per Vector256 ref Vector256 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); @@ -9770,7 +17466,34 @@ internal static class DefaultPixelBlenders { amount = Numerics.Clamp(amount, 0, 1); - if (Avx2.IsSupported && destination.Length >= 2) + if (Avx512F.IsSupported && destination.Length >= 4) + { + // Divide by 4 as 4 elements per Vector4 and 16 per Vector512 + ref Vector512 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); + ref Vector512 destinationLast = ref Unsafe.Add(ref destinationBase, (uint)destination.Length / 4u); + + ref Vector512 backgroundBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(background)); + ref Vector512 sourceBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(source)); + Vector512 opacity = Vector512.Create(amount); + + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) + { + destinationBase = PorterDuffFunctions.OverlayDest(backgroundBase, sourceBase, opacity); + destinationBase = ref Unsafe.Add(ref destinationBase, 1); + backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); + sourceBase = ref Unsafe.Add(ref sourceBase, 1); + } + + int remainder = Numerics.Modulo4(destination.Length); + if (remainder != 0) + { + for (int i = destination.Length - remainder; i < destination.Length; i++) + { + destination[i] = PorterDuffFunctions.OverlayDest(background[i], source[i], amount); + } + } + } + else if (Avx2.IsSupported && destination.Length >= 2) { // Divide by 2 as 4 elements per Vector4 and 8 per Vector256 ref Vector256 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); @@ -9809,7 +17532,37 @@ internal static class DefaultPixelBlenders { amount = Numerics.Clamp(amount, 0, 1); - if (Avx2.IsSupported && destination.Length >= 2) + if (Avx512F.IsSupported && destination.Length >= 4) + { + // Divide by 4 as 4 elements per Vector4 and 16 per Vector512 + ref Vector512 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); + ref Vector512 destinationLast = ref Unsafe.Add(ref destinationBase, (uint)destination.Length / 4u); + + ref Vector512 backgroundBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(background)); + Vector512 sourceBase = Vector512.Create( + source.X, source.Y, source.Z, source.W, + source.X, source.Y, source.Z, source.W, + source.X, source.Y, source.Z, source.W, + source.X, source.Y, source.Z, source.W); + Vector512 opacity = Vector512.Create(amount); + + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) + { + destinationBase = PorterDuffFunctions.OverlayDest(backgroundBase, sourceBase, opacity); + destinationBase = ref Unsafe.Add(ref destinationBase, 1); + backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); + } + + int remainder = Numerics.Modulo4(destination.Length); + if (remainder != 0) + { + for (int i = destination.Length - remainder; i < destination.Length; i++) + { + destination[i] = PorterDuffFunctions.OverlayDest(background[i], source, amount); + } + } + } + else if (Avx2.IsSupported && destination.Length >= 2) { // Divide by 2 as 4 elements per Vector4 and 8 per Vector256 ref Vector256 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); @@ -9845,7 +17598,51 @@ internal static class DefaultPixelBlenders /// protected override void BlendFunction(Span destination, ReadOnlySpan background, ReadOnlySpan source, ReadOnlySpan amount) { - if (Avx2.IsSupported && destination.Length >= 2) + if (Avx512F.IsSupported && destination.Length >= 4) + { + // Divide by 4 as 4 elements per Vector4 and 16 per Vector512 + ref Vector512 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); + ref Vector512 destinationLast = ref Unsafe.Add(ref destinationBase, (uint)destination.Length / 4u); + + ref Vector512 backgroundBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(background)); + ref Vector512 sourceBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(source)); + ref float amountBase = ref MemoryMarshal.GetReference(amount); + + Vector512 vOne = Vector512.Create(1F); + + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) + { + float amount0 = amountBase; + float amount1 = Unsafe.Add(ref amountBase, 1); + float amount2 = Unsafe.Add(ref amountBase, 2); + float amount3 = Unsafe.Add(ref amountBase, 3); + + // We need to create a Vector512 containing the current four amount values + // taking up each quarter of the Vector512 and then clamp them. + Vector512 opacity = Vector512.Create( + amount0, amount0, amount0, amount0, + amount1, amount1, amount1, amount1, + amount2, amount2, amount2, amount2, + amount3, amount3, amount3, amount3); + opacity = Vector512.Min(Vector512.Max(Vector512.Zero, opacity), vOne); + + destinationBase = PorterDuffFunctions.OverlayDest(backgroundBase, sourceBase, opacity); + destinationBase = ref Unsafe.Add(ref destinationBase, 1); + backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); + sourceBase = ref Unsafe.Add(ref sourceBase, 1); + amountBase = ref Unsafe.Add(ref amountBase, 4); + } + + int remainder = Numerics.Modulo4(destination.Length); + if (remainder != 0) + { + for (int i = destination.Length - remainder; i < destination.Length; i++) + { + destination[i] = PorterDuffFunctions.OverlayDest(background[i], source[i], Numerics.Clamp(amount[i], 0, 1F)); + } + } + } + else if (Avx2.IsSupported && destination.Length >= 2) { // Divide by 2 as 4 elements per Vector4 and 8 per Vector256 ref Vector256 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); @@ -9892,7 +17689,54 @@ internal static class DefaultPixelBlenders /// protected override void BlendFunction(Span destination, ReadOnlySpan background, Vector4 source, ReadOnlySpan amount) { - if (Avx2.IsSupported && destination.Length >= 2) + if (Avx512F.IsSupported && destination.Length >= 4) + { + // Divide by 4 as 4 elements per Vector4 and 16 per Vector512 + ref Vector512 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); + ref Vector512 destinationLast = ref Unsafe.Add(ref destinationBase, (uint)destination.Length / 4u); + + ref Vector512 backgroundBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(background)); + ref float amountBase = ref MemoryMarshal.GetReference(amount); + + Vector512 sourceBase = Vector512.Create( + source.X, source.Y, source.Z, source.W, + source.X, source.Y, source.Z, source.W, + source.X, source.Y, source.Z, source.W, + source.X, source.Y, source.Z, source.W); + Vector512 vOne = Vector512.Create(1F); + + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) + { + float amount0 = amountBase; + float amount1 = Unsafe.Add(ref amountBase, 1); + float amount2 = Unsafe.Add(ref amountBase, 2); + float amount3 = Unsafe.Add(ref amountBase, 3); + + // We need to create a Vector512 containing the current four amount values + // taking up each quarter of the Vector512 and then clamp them. + Vector512 opacity = Vector512.Create( + amount0, amount0, amount0, amount0, + amount1, amount1, amount1, amount1, + amount2, amount2, amount2, amount2, + amount3, amount3, amount3, amount3); + opacity = Vector512.Min(Vector512.Max(Vector512.Zero, opacity), vOne); + + destinationBase = PorterDuffFunctions.OverlayDest(backgroundBase, sourceBase, opacity); + destinationBase = ref Unsafe.Add(ref destinationBase, 1); + backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); + amountBase = ref Unsafe.Add(ref amountBase, 4); + } + + int remainder = Numerics.Modulo4(destination.Length); + if (remainder != 0) + { + for (int i = destination.Length - remainder; i < destination.Length; i++) + { + destination[i] = PorterDuffFunctions.OverlayDest(background[i], source, Numerics.Clamp(amount[i], 0, 1F)); + } + } + } + else if (Avx2.IsSupported && destination.Length >= 2) { // Divide by 2 as 4 elements per Vector4 and 8 per Vector256 ref Vector256 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); @@ -9957,7 +17801,34 @@ internal static class DefaultPixelBlenders { amount = Numerics.Clamp(amount, 0, 1); - if (Avx2.IsSupported && destination.Length >= 2) + if (Avx512F.IsSupported && destination.Length >= 4) + { + // Divide by 4 as 4 elements per Vector4 and 16 per Vector512 + ref Vector512 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); + ref Vector512 destinationLast = ref Unsafe.Add(ref destinationBase, (uint)destination.Length / 4u); + + ref Vector512 backgroundBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(background)); + ref Vector512 sourceBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(source)); + Vector512 opacity = Vector512.Create(amount); + + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) + { + destinationBase = PorterDuffFunctions.HardLightDest(backgroundBase, sourceBase, opacity); + destinationBase = ref Unsafe.Add(ref destinationBase, 1); + backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); + sourceBase = ref Unsafe.Add(ref sourceBase, 1); + } + + int remainder = Numerics.Modulo4(destination.Length); + if (remainder != 0) + { + for (int i = destination.Length - remainder; i < destination.Length; i++) + { + destination[i] = PorterDuffFunctions.HardLightDest(background[i], source[i], amount); + } + } + } + else if (Avx2.IsSupported && destination.Length >= 2) { // Divide by 2 as 4 elements per Vector4 and 8 per Vector256 ref Vector256 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); @@ -9996,7 +17867,37 @@ internal static class DefaultPixelBlenders { amount = Numerics.Clamp(amount, 0, 1); - if (Avx2.IsSupported && destination.Length >= 2) + if (Avx512F.IsSupported && destination.Length >= 4) + { + // Divide by 4 as 4 elements per Vector4 and 16 per Vector512 + ref Vector512 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); + ref Vector512 destinationLast = ref Unsafe.Add(ref destinationBase, (uint)destination.Length / 4u); + + ref Vector512 backgroundBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(background)); + Vector512 sourceBase = Vector512.Create( + source.X, source.Y, source.Z, source.W, + source.X, source.Y, source.Z, source.W, + source.X, source.Y, source.Z, source.W, + source.X, source.Y, source.Z, source.W); + Vector512 opacity = Vector512.Create(amount); + + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) + { + destinationBase = PorterDuffFunctions.HardLightDest(backgroundBase, sourceBase, opacity); + destinationBase = ref Unsafe.Add(ref destinationBase, 1); + backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); + } + + int remainder = Numerics.Modulo4(destination.Length); + if (remainder != 0) + { + for (int i = destination.Length - remainder; i < destination.Length; i++) + { + destination[i] = PorterDuffFunctions.HardLightDest(background[i], source, amount); + } + } + } + else if (Avx2.IsSupported && destination.Length >= 2) { // Divide by 2 as 4 elements per Vector4 and 8 per Vector256 ref Vector256 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); @@ -10032,7 +17933,51 @@ internal static class DefaultPixelBlenders /// protected override void BlendFunction(Span destination, ReadOnlySpan background, ReadOnlySpan source, ReadOnlySpan amount) { - if (Avx2.IsSupported && destination.Length >= 2) + if (Avx512F.IsSupported && destination.Length >= 4) + { + // Divide by 4 as 4 elements per Vector4 and 16 per Vector512 + ref Vector512 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); + ref Vector512 destinationLast = ref Unsafe.Add(ref destinationBase, (uint)destination.Length / 4u); + + ref Vector512 backgroundBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(background)); + ref Vector512 sourceBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(source)); + ref float amountBase = ref MemoryMarshal.GetReference(amount); + + Vector512 vOne = Vector512.Create(1F); + + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) + { + float amount0 = amountBase; + float amount1 = Unsafe.Add(ref amountBase, 1); + float amount2 = Unsafe.Add(ref amountBase, 2); + float amount3 = Unsafe.Add(ref amountBase, 3); + + // We need to create a Vector512 containing the current four amount values + // taking up each quarter of the Vector512 and then clamp them. + Vector512 opacity = Vector512.Create( + amount0, amount0, amount0, amount0, + amount1, amount1, amount1, amount1, + amount2, amount2, amount2, amount2, + amount3, amount3, amount3, amount3); + opacity = Vector512.Min(Vector512.Max(Vector512.Zero, opacity), vOne); + + destinationBase = PorterDuffFunctions.HardLightDest(backgroundBase, sourceBase, opacity); + destinationBase = ref Unsafe.Add(ref destinationBase, 1); + backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); + sourceBase = ref Unsafe.Add(ref sourceBase, 1); + amountBase = ref Unsafe.Add(ref amountBase, 4); + } + + int remainder = Numerics.Modulo4(destination.Length); + if (remainder != 0) + { + for (int i = destination.Length - remainder; i < destination.Length; i++) + { + destination[i] = PorterDuffFunctions.HardLightDest(background[i], source[i], Numerics.Clamp(amount[i], 0, 1F)); + } + } + } + else if (Avx2.IsSupported && destination.Length >= 2) { // Divide by 2 as 4 elements per Vector4 and 8 per Vector256 ref Vector256 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); @@ -10079,7 +18024,54 @@ internal static class DefaultPixelBlenders /// protected override void BlendFunction(Span destination, ReadOnlySpan background, Vector4 source, ReadOnlySpan amount) { - if (Avx2.IsSupported && destination.Length >= 2) + if (Avx512F.IsSupported && destination.Length >= 4) + { + // Divide by 4 as 4 elements per Vector4 and 16 per Vector512 + ref Vector512 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); + ref Vector512 destinationLast = ref Unsafe.Add(ref destinationBase, (uint)destination.Length / 4u); + + ref Vector512 backgroundBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(background)); + ref float amountBase = ref MemoryMarshal.GetReference(amount); + + Vector512 sourceBase = Vector512.Create( + source.X, source.Y, source.Z, source.W, + source.X, source.Y, source.Z, source.W, + source.X, source.Y, source.Z, source.W, + source.X, source.Y, source.Z, source.W); + Vector512 vOne = Vector512.Create(1F); + + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) + { + float amount0 = amountBase; + float amount1 = Unsafe.Add(ref amountBase, 1); + float amount2 = Unsafe.Add(ref amountBase, 2); + float amount3 = Unsafe.Add(ref amountBase, 3); + + // We need to create a Vector512 containing the current four amount values + // taking up each quarter of the Vector512 and then clamp them. + Vector512 opacity = Vector512.Create( + amount0, amount0, amount0, amount0, + amount1, amount1, amount1, amount1, + amount2, amount2, amount2, amount2, + amount3, amount3, amount3, amount3); + opacity = Vector512.Min(Vector512.Max(Vector512.Zero, opacity), vOne); + + destinationBase = PorterDuffFunctions.HardLightDest(backgroundBase, sourceBase, opacity); + destinationBase = ref Unsafe.Add(ref destinationBase, 1); + backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); + amountBase = ref Unsafe.Add(ref amountBase, 4); + } + + int remainder = Numerics.Modulo4(destination.Length); + if (remainder != 0) + { + for (int i = destination.Length - remainder; i < destination.Length; i++) + { + destination[i] = PorterDuffFunctions.HardLightDest(background[i], source, Numerics.Clamp(amount[i], 0, 1F)); + } + } + } + else if (Avx2.IsSupported && destination.Length >= 2) { // Divide by 2 as 4 elements per Vector4 and 8 per Vector256 ref Vector256 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); @@ -10144,7 +18136,34 @@ internal static class DefaultPixelBlenders { amount = Numerics.Clamp(amount, 0, 1); - if (Avx2.IsSupported && destination.Length >= 2) + if (Avx512F.IsSupported && destination.Length >= 4) + { + // Divide by 4 as 4 elements per Vector4 and 16 per Vector512 + ref Vector512 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); + ref Vector512 destinationLast = ref Unsafe.Add(ref destinationBase, (uint)destination.Length / 4u); + + ref Vector512 backgroundBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(background)); + ref Vector512 sourceBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(source)); + Vector512 opacity = Vector512.Create(amount); + + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) + { + destinationBase = PorterDuffFunctions.NormalDestAtop(backgroundBase, sourceBase, opacity); + destinationBase = ref Unsafe.Add(ref destinationBase, 1); + backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); + sourceBase = ref Unsafe.Add(ref sourceBase, 1); + } + + int remainder = Numerics.Modulo4(destination.Length); + if (remainder != 0) + { + for (int i = destination.Length - remainder; i < destination.Length; i++) + { + destination[i] = PorterDuffFunctions.NormalDestAtop(background[i], source[i], amount); + } + } + } + else if (Avx2.IsSupported && destination.Length >= 2) { // Divide by 2 as 4 elements per Vector4 and 8 per Vector256 ref Vector256 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); @@ -10183,7 +18202,37 @@ internal static class DefaultPixelBlenders { amount = Numerics.Clamp(amount, 0, 1); - if (Avx2.IsSupported && destination.Length >= 2) + if (Avx512F.IsSupported && destination.Length >= 4) + { + // Divide by 4 as 4 elements per Vector4 and 16 per Vector512 + ref Vector512 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); + ref Vector512 destinationLast = ref Unsafe.Add(ref destinationBase, (uint)destination.Length / 4u); + + ref Vector512 backgroundBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(background)); + Vector512 sourceBase = Vector512.Create( + source.X, source.Y, source.Z, source.W, + source.X, source.Y, source.Z, source.W, + source.X, source.Y, source.Z, source.W, + source.X, source.Y, source.Z, source.W); + Vector512 opacity = Vector512.Create(amount); + + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) + { + destinationBase = PorterDuffFunctions.NormalDestAtop(backgroundBase, sourceBase, opacity); + destinationBase = ref Unsafe.Add(ref destinationBase, 1); + backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); + } + + int remainder = Numerics.Modulo4(destination.Length); + if (remainder != 0) + { + for (int i = destination.Length - remainder; i < destination.Length; i++) + { + destination[i] = PorterDuffFunctions.NormalDestAtop(background[i], source, amount); + } + } + } + else if (Avx2.IsSupported && destination.Length >= 2) { // Divide by 2 as 4 elements per Vector4 and 8 per Vector256 ref Vector256 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); @@ -10219,7 +18268,51 @@ internal static class DefaultPixelBlenders /// protected override void BlendFunction(Span destination, ReadOnlySpan background, ReadOnlySpan source, ReadOnlySpan amount) { - if (Avx2.IsSupported && destination.Length >= 2) + if (Avx512F.IsSupported && destination.Length >= 4) + { + // Divide by 4 as 4 elements per Vector4 and 16 per Vector512 + ref Vector512 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); + ref Vector512 destinationLast = ref Unsafe.Add(ref destinationBase, (uint)destination.Length / 4u); + + ref Vector512 backgroundBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(background)); + ref Vector512 sourceBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(source)); + ref float amountBase = ref MemoryMarshal.GetReference(amount); + + Vector512 vOne = Vector512.Create(1F); + + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) + { + float amount0 = amountBase; + float amount1 = Unsafe.Add(ref amountBase, 1); + float amount2 = Unsafe.Add(ref amountBase, 2); + float amount3 = Unsafe.Add(ref amountBase, 3); + + // We need to create a Vector512 containing the current four amount values + // taking up each quarter of the Vector512 and then clamp them. + Vector512 opacity = Vector512.Create( + amount0, amount0, amount0, amount0, + amount1, amount1, amount1, amount1, + amount2, amount2, amount2, amount2, + amount3, amount3, amount3, amount3); + opacity = Vector512.Min(Vector512.Max(Vector512.Zero, opacity), vOne); + + destinationBase = PorterDuffFunctions.NormalDestAtop(backgroundBase, sourceBase, opacity); + destinationBase = ref Unsafe.Add(ref destinationBase, 1); + backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); + sourceBase = ref Unsafe.Add(ref sourceBase, 1); + amountBase = ref Unsafe.Add(ref amountBase, 4); + } + + int remainder = Numerics.Modulo4(destination.Length); + if (remainder != 0) + { + for (int i = destination.Length - remainder; i < destination.Length; i++) + { + destination[i] = PorterDuffFunctions.NormalDestAtop(background[i], source[i], Numerics.Clamp(amount[i], 0, 1F)); + } + } + } + else if (Avx2.IsSupported && destination.Length >= 2) { // Divide by 2 as 4 elements per Vector4 and 8 per Vector256 ref Vector256 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); @@ -10266,7 +18359,54 @@ internal static class DefaultPixelBlenders /// protected override void BlendFunction(Span destination, ReadOnlySpan background, Vector4 source, ReadOnlySpan amount) { - if (Avx2.IsSupported && destination.Length >= 2) + if (Avx512F.IsSupported && destination.Length >= 4) + { + // Divide by 4 as 4 elements per Vector4 and 16 per Vector512 + ref Vector512 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); + ref Vector512 destinationLast = ref Unsafe.Add(ref destinationBase, (uint)destination.Length / 4u); + + ref Vector512 backgroundBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(background)); + ref float amountBase = ref MemoryMarshal.GetReference(amount); + + Vector512 sourceBase = Vector512.Create( + source.X, source.Y, source.Z, source.W, + source.X, source.Y, source.Z, source.W, + source.X, source.Y, source.Z, source.W, + source.X, source.Y, source.Z, source.W); + Vector512 vOne = Vector512.Create(1F); + + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) + { + float amount0 = amountBase; + float amount1 = Unsafe.Add(ref amountBase, 1); + float amount2 = Unsafe.Add(ref amountBase, 2); + float amount3 = Unsafe.Add(ref amountBase, 3); + + // We need to create a Vector512 containing the current four amount values + // taking up each quarter of the Vector512 and then clamp them. + Vector512 opacity = Vector512.Create( + amount0, amount0, amount0, amount0, + amount1, amount1, amount1, amount1, + amount2, amount2, amount2, amount2, + amount3, amount3, amount3, amount3); + opacity = Vector512.Min(Vector512.Max(Vector512.Zero, opacity), vOne); + + destinationBase = PorterDuffFunctions.NormalDestAtop(backgroundBase, sourceBase, opacity); + destinationBase = ref Unsafe.Add(ref destinationBase, 1); + backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); + amountBase = ref Unsafe.Add(ref amountBase, 4); + } + + int remainder = Numerics.Modulo4(destination.Length); + if (remainder != 0) + { + for (int i = destination.Length - remainder; i < destination.Length; i++) + { + destination[i] = PorterDuffFunctions.NormalDestAtop(background[i], source, Numerics.Clamp(amount[i], 0, 1F)); + } + } + } + else if (Avx2.IsSupported && destination.Length >= 2) { // Divide by 2 as 4 elements per Vector4 and 8 per Vector256 ref Vector256 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); @@ -10331,7 +18471,34 @@ internal static class DefaultPixelBlenders { amount = Numerics.Clamp(amount, 0, 1); - if (Avx2.IsSupported && destination.Length >= 2) + if (Avx512F.IsSupported && destination.Length >= 4) + { + // Divide by 4 as 4 elements per Vector4 and 16 per Vector512 + ref Vector512 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); + ref Vector512 destinationLast = ref Unsafe.Add(ref destinationBase, (uint)destination.Length / 4u); + + ref Vector512 backgroundBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(background)); + ref Vector512 sourceBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(source)); + Vector512 opacity = Vector512.Create(amount); + + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) + { + destinationBase = PorterDuffFunctions.MultiplyDestAtop(backgroundBase, sourceBase, opacity); + destinationBase = ref Unsafe.Add(ref destinationBase, 1); + backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); + sourceBase = ref Unsafe.Add(ref sourceBase, 1); + } + + int remainder = Numerics.Modulo4(destination.Length); + if (remainder != 0) + { + for (int i = destination.Length - remainder; i < destination.Length; i++) + { + destination[i] = PorterDuffFunctions.MultiplyDestAtop(background[i], source[i], amount); + } + } + } + else if (Avx2.IsSupported && destination.Length >= 2) { // Divide by 2 as 4 elements per Vector4 and 8 per Vector256 ref Vector256 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); @@ -10370,7 +18537,37 @@ internal static class DefaultPixelBlenders { amount = Numerics.Clamp(amount, 0, 1); - if (Avx2.IsSupported && destination.Length >= 2) + if (Avx512F.IsSupported && destination.Length >= 4) + { + // Divide by 4 as 4 elements per Vector4 and 16 per Vector512 + ref Vector512 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); + ref Vector512 destinationLast = ref Unsafe.Add(ref destinationBase, (uint)destination.Length / 4u); + + ref Vector512 backgroundBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(background)); + Vector512 sourceBase = Vector512.Create( + source.X, source.Y, source.Z, source.W, + source.X, source.Y, source.Z, source.W, + source.X, source.Y, source.Z, source.W, + source.X, source.Y, source.Z, source.W); + Vector512 opacity = Vector512.Create(amount); + + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) + { + destinationBase = PorterDuffFunctions.MultiplyDestAtop(backgroundBase, sourceBase, opacity); + destinationBase = ref Unsafe.Add(ref destinationBase, 1); + backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); + } + + int remainder = Numerics.Modulo4(destination.Length); + if (remainder != 0) + { + for (int i = destination.Length - remainder; i < destination.Length; i++) + { + destination[i] = PorterDuffFunctions.MultiplyDestAtop(background[i], source, amount); + } + } + } + else if (Avx2.IsSupported && destination.Length >= 2) { // Divide by 2 as 4 elements per Vector4 and 8 per Vector256 ref Vector256 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); @@ -10406,7 +18603,51 @@ internal static class DefaultPixelBlenders /// protected override void BlendFunction(Span destination, ReadOnlySpan background, ReadOnlySpan source, ReadOnlySpan amount) { - if (Avx2.IsSupported && destination.Length >= 2) + if (Avx512F.IsSupported && destination.Length >= 4) + { + // Divide by 4 as 4 elements per Vector4 and 16 per Vector512 + ref Vector512 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); + ref Vector512 destinationLast = ref Unsafe.Add(ref destinationBase, (uint)destination.Length / 4u); + + ref Vector512 backgroundBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(background)); + ref Vector512 sourceBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(source)); + ref float amountBase = ref MemoryMarshal.GetReference(amount); + + Vector512 vOne = Vector512.Create(1F); + + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) + { + float amount0 = amountBase; + float amount1 = Unsafe.Add(ref amountBase, 1); + float amount2 = Unsafe.Add(ref amountBase, 2); + float amount3 = Unsafe.Add(ref amountBase, 3); + + // We need to create a Vector512 containing the current four amount values + // taking up each quarter of the Vector512 and then clamp them. + Vector512 opacity = Vector512.Create( + amount0, amount0, amount0, amount0, + amount1, amount1, amount1, amount1, + amount2, amount2, amount2, amount2, + amount3, amount3, amount3, amount3); + opacity = Vector512.Min(Vector512.Max(Vector512.Zero, opacity), vOne); + + destinationBase = PorterDuffFunctions.MultiplyDestAtop(backgroundBase, sourceBase, opacity); + destinationBase = ref Unsafe.Add(ref destinationBase, 1); + backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); + sourceBase = ref Unsafe.Add(ref sourceBase, 1); + amountBase = ref Unsafe.Add(ref amountBase, 4); + } + + int remainder = Numerics.Modulo4(destination.Length); + if (remainder != 0) + { + for (int i = destination.Length - remainder; i < destination.Length; i++) + { + destination[i] = PorterDuffFunctions.MultiplyDestAtop(background[i], source[i], Numerics.Clamp(amount[i], 0, 1F)); + } + } + } + else if (Avx2.IsSupported && destination.Length >= 2) { // Divide by 2 as 4 elements per Vector4 and 8 per Vector256 ref Vector256 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); @@ -10445,15 +18686,62 @@ internal static class DefaultPixelBlenders { for (int i = 0; i < destination.Length; i++) { - destination[i] = PorterDuffFunctions.MultiplyDestAtop(background[i], source[i], Numerics.Clamp(amount[i], 0, 1F)); + destination[i] = PorterDuffFunctions.MultiplyDestAtop(background[i], source[i], Numerics.Clamp(amount[i], 0, 1F)); + } + } + } + + /// + protected override void BlendFunction(Span destination, ReadOnlySpan background, Vector4 source, ReadOnlySpan amount) + { + if (Avx512F.IsSupported && destination.Length >= 4) + { + // Divide by 4 as 4 elements per Vector4 and 16 per Vector512 + ref Vector512 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); + ref Vector512 destinationLast = ref Unsafe.Add(ref destinationBase, (uint)destination.Length / 4u); + + ref Vector512 backgroundBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(background)); + ref float amountBase = ref MemoryMarshal.GetReference(amount); + + Vector512 sourceBase = Vector512.Create( + source.X, source.Y, source.Z, source.W, + source.X, source.Y, source.Z, source.W, + source.X, source.Y, source.Z, source.W, + source.X, source.Y, source.Z, source.W); + Vector512 vOne = Vector512.Create(1F); + + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) + { + float amount0 = amountBase; + float amount1 = Unsafe.Add(ref amountBase, 1); + float amount2 = Unsafe.Add(ref amountBase, 2); + float amount3 = Unsafe.Add(ref amountBase, 3); + + // We need to create a Vector512 containing the current four amount values + // taking up each quarter of the Vector512 and then clamp them. + Vector512 opacity = Vector512.Create( + amount0, amount0, amount0, amount0, + amount1, amount1, amount1, amount1, + amount2, amount2, amount2, amount2, + amount3, amount3, amount3, amount3); + opacity = Vector512.Min(Vector512.Max(Vector512.Zero, opacity), vOne); + + destinationBase = PorterDuffFunctions.MultiplyDestAtop(backgroundBase, sourceBase, opacity); + destinationBase = ref Unsafe.Add(ref destinationBase, 1); + backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); + amountBase = ref Unsafe.Add(ref amountBase, 4); + } + + int remainder = Numerics.Modulo4(destination.Length); + if (remainder != 0) + { + for (int i = destination.Length - remainder; i < destination.Length; i++) + { + destination[i] = PorterDuffFunctions.MultiplyDestAtop(background[i], source, Numerics.Clamp(amount[i], 0, 1F)); + } } } - } - - /// - protected override void BlendFunction(Span destination, ReadOnlySpan background, Vector4 source, ReadOnlySpan amount) - { - if (Avx2.IsSupported && destination.Length >= 2) + else if (Avx2.IsSupported && destination.Length >= 2) { // Divide by 2 as 4 elements per Vector4 and 8 per Vector256 ref Vector256 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); @@ -10518,7 +18806,34 @@ internal static class DefaultPixelBlenders { amount = Numerics.Clamp(amount, 0, 1); - if (Avx2.IsSupported && destination.Length >= 2) + if (Avx512F.IsSupported && destination.Length >= 4) + { + // Divide by 4 as 4 elements per Vector4 and 16 per Vector512 + ref Vector512 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); + ref Vector512 destinationLast = ref Unsafe.Add(ref destinationBase, (uint)destination.Length / 4u); + + ref Vector512 backgroundBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(background)); + ref Vector512 sourceBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(source)); + Vector512 opacity = Vector512.Create(amount); + + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) + { + destinationBase = PorterDuffFunctions.AddDestAtop(backgroundBase, sourceBase, opacity); + destinationBase = ref Unsafe.Add(ref destinationBase, 1); + backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); + sourceBase = ref Unsafe.Add(ref sourceBase, 1); + } + + int remainder = Numerics.Modulo4(destination.Length); + if (remainder != 0) + { + for (int i = destination.Length - remainder; i < destination.Length; i++) + { + destination[i] = PorterDuffFunctions.AddDestAtop(background[i], source[i], amount); + } + } + } + else if (Avx2.IsSupported && destination.Length >= 2) { // Divide by 2 as 4 elements per Vector4 and 8 per Vector256 ref Vector256 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); @@ -10557,7 +18872,37 @@ internal static class DefaultPixelBlenders { amount = Numerics.Clamp(amount, 0, 1); - if (Avx2.IsSupported && destination.Length >= 2) + if (Avx512F.IsSupported && destination.Length >= 4) + { + // Divide by 4 as 4 elements per Vector4 and 16 per Vector512 + ref Vector512 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); + ref Vector512 destinationLast = ref Unsafe.Add(ref destinationBase, (uint)destination.Length / 4u); + + ref Vector512 backgroundBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(background)); + Vector512 sourceBase = Vector512.Create( + source.X, source.Y, source.Z, source.W, + source.X, source.Y, source.Z, source.W, + source.X, source.Y, source.Z, source.W, + source.X, source.Y, source.Z, source.W); + Vector512 opacity = Vector512.Create(amount); + + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) + { + destinationBase = PorterDuffFunctions.AddDestAtop(backgroundBase, sourceBase, opacity); + destinationBase = ref Unsafe.Add(ref destinationBase, 1); + backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); + } + + int remainder = Numerics.Modulo4(destination.Length); + if (remainder != 0) + { + for (int i = destination.Length - remainder; i < destination.Length; i++) + { + destination[i] = PorterDuffFunctions.AddDestAtop(background[i], source, amount); + } + } + } + else if (Avx2.IsSupported && destination.Length >= 2) { // Divide by 2 as 4 elements per Vector4 and 8 per Vector256 ref Vector256 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); @@ -10593,7 +18938,51 @@ internal static class DefaultPixelBlenders /// protected override void BlendFunction(Span destination, ReadOnlySpan background, ReadOnlySpan source, ReadOnlySpan amount) { - if (Avx2.IsSupported && destination.Length >= 2) + if (Avx512F.IsSupported && destination.Length >= 4) + { + // Divide by 4 as 4 elements per Vector4 and 16 per Vector512 + ref Vector512 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); + ref Vector512 destinationLast = ref Unsafe.Add(ref destinationBase, (uint)destination.Length / 4u); + + ref Vector512 backgroundBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(background)); + ref Vector512 sourceBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(source)); + ref float amountBase = ref MemoryMarshal.GetReference(amount); + + Vector512 vOne = Vector512.Create(1F); + + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) + { + float amount0 = amountBase; + float amount1 = Unsafe.Add(ref amountBase, 1); + float amount2 = Unsafe.Add(ref amountBase, 2); + float amount3 = Unsafe.Add(ref amountBase, 3); + + // We need to create a Vector512 containing the current four amount values + // taking up each quarter of the Vector512 and then clamp them. + Vector512 opacity = Vector512.Create( + amount0, amount0, amount0, amount0, + amount1, amount1, amount1, amount1, + amount2, amount2, amount2, amount2, + amount3, amount3, amount3, amount3); + opacity = Vector512.Min(Vector512.Max(Vector512.Zero, opacity), vOne); + + destinationBase = PorterDuffFunctions.AddDestAtop(backgroundBase, sourceBase, opacity); + destinationBase = ref Unsafe.Add(ref destinationBase, 1); + backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); + sourceBase = ref Unsafe.Add(ref sourceBase, 1); + amountBase = ref Unsafe.Add(ref amountBase, 4); + } + + int remainder = Numerics.Modulo4(destination.Length); + if (remainder != 0) + { + for (int i = destination.Length - remainder; i < destination.Length; i++) + { + destination[i] = PorterDuffFunctions.AddDestAtop(background[i], source[i], Numerics.Clamp(amount[i], 0, 1F)); + } + } + } + else if (Avx2.IsSupported && destination.Length >= 2) { // Divide by 2 as 4 elements per Vector4 and 8 per Vector256 ref Vector256 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); @@ -10640,7 +19029,54 @@ internal static class DefaultPixelBlenders /// protected override void BlendFunction(Span destination, ReadOnlySpan background, Vector4 source, ReadOnlySpan amount) { - if (Avx2.IsSupported && destination.Length >= 2) + if (Avx512F.IsSupported && destination.Length >= 4) + { + // Divide by 4 as 4 elements per Vector4 and 16 per Vector512 + ref Vector512 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); + ref Vector512 destinationLast = ref Unsafe.Add(ref destinationBase, (uint)destination.Length / 4u); + + ref Vector512 backgroundBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(background)); + ref float amountBase = ref MemoryMarshal.GetReference(amount); + + Vector512 sourceBase = Vector512.Create( + source.X, source.Y, source.Z, source.W, + source.X, source.Y, source.Z, source.W, + source.X, source.Y, source.Z, source.W, + source.X, source.Y, source.Z, source.W); + Vector512 vOne = Vector512.Create(1F); + + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) + { + float amount0 = amountBase; + float amount1 = Unsafe.Add(ref amountBase, 1); + float amount2 = Unsafe.Add(ref amountBase, 2); + float amount3 = Unsafe.Add(ref amountBase, 3); + + // We need to create a Vector512 containing the current four amount values + // taking up each quarter of the Vector512 and then clamp them. + Vector512 opacity = Vector512.Create( + amount0, amount0, amount0, amount0, + amount1, amount1, amount1, amount1, + amount2, amount2, amount2, amount2, + amount3, amount3, amount3, amount3); + opacity = Vector512.Min(Vector512.Max(Vector512.Zero, opacity), vOne); + + destinationBase = PorterDuffFunctions.AddDestAtop(backgroundBase, sourceBase, opacity); + destinationBase = ref Unsafe.Add(ref destinationBase, 1); + backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); + amountBase = ref Unsafe.Add(ref amountBase, 4); + } + + int remainder = Numerics.Modulo4(destination.Length); + if (remainder != 0) + { + for (int i = destination.Length - remainder; i < destination.Length; i++) + { + destination[i] = PorterDuffFunctions.AddDestAtop(background[i], source, Numerics.Clamp(amount[i], 0, 1F)); + } + } + } + else if (Avx2.IsSupported && destination.Length >= 2) { // Divide by 2 as 4 elements per Vector4 and 8 per Vector256 ref Vector256 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); @@ -10705,7 +19141,34 @@ internal static class DefaultPixelBlenders { amount = Numerics.Clamp(amount, 0, 1); - if (Avx2.IsSupported && destination.Length >= 2) + if (Avx512F.IsSupported && destination.Length >= 4) + { + // Divide by 4 as 4 elements per Vector4 and 16 per Vector512 + ref Vector512 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); + ref Vector512 destinationLast = ref Unsafe.Add(ref destinationBase, (uint)destination.Length / 4u); + + ref Vector512 backgroundBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(background)); + ref Vector512 sourceBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(source)); + Vector512 opacity = Vector512.Create(amount); + + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) + { + destinationBase = PorterDuffFunctions.SubtractDestAtop(backgroundBase, sourceBase, opacity); + destinationBase = ref Unsafe.Add(ref destinationBase, 1); + backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); + sourceBase = ref Unsafe.Add(ref sourceBase, 1); + } + + int remainder = Numerics.Modulo4(destination.Length); + if (remainder != 0) + { + for (int i = destination.Length - remainder; i < destination.Length; i++) + { + destination[i] = PorterDuffFunctions.SubtractDestAtop(background[i], source[i], amount); + } + } + } + else if (Avx2.IsSupported && destination.Length >= 2) { // Divide by 2 as 4 elements per Vector4 and 8 per Vector256 ref Vector256 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); @@ -10744,7 +19207,37 @@ internal static class DefaultPixelBlenders { amount = Numerics.Clamp(amount, 0, 1); - if (Avx2.IsSupported && destination.Length >= 2) + if (Avx512F.IsSupported && destination.Length >= 4) + { + // Divide by 4 as 4 elements per Vector4 and 16 per Vector512 + ref Vector512 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); + ref Vector512 destinationLast = ref Unsafe.Add(ref destinationBase, (uint)destination.Length / 4u); + + ref Vector512 backgroundBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(background)); + Vector512 sourceBase = Vector512.Create( + source.X, source.Y, source.Z, source.W, + source.X, source.Y, source.Z, source.W, + source.X, source.Y, source.Z, source.W, + source.X, source.Y, source.Z, source.W); + Vector512 opacity = Vector512.Create(amount); + + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) + { + destinationBase = PorterDuffFunctions.SubtractDestAtop(backgroundBase, sourceBase, opacity); + destinationBase = ref Unsafe.Add(ref destinationBase, 1); + backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); + } + + int remainder = Numerics.Modulo4(destination.Length); + if (remainder != 0) + { + for (int i = destination.Length - remainder; i < destination.Length; i++) + { + destination[i] = PorterDuffFunctions.SubtractDestAtop(background[i], source, amount); + } + } + } + else if (Avx2.IsSupported && destination.Length >= 2) { // Divide by 2 as 4 elements per Vector4 and 8 per Vector256 ref Vector256 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); @@ -10780,7 +19273,51 @@ internal static class DefaultPixelBlenders /// protected override void BlendFunction(Span destination, ReadOnlySpan background, ReadOnlySpan source, ReadOnlySpan amount) { - if (Avx2.IsSupported && destination.Length >= 2) + if (Avx512F.IsSupported && destination.Length >= 4) + { + // Divide by 4 as 4 elements per Vector4 and 16 per Vector512 + ref Vector512 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); + ref Vector512 destinationLast = ref Unsafe.Add(ref destinationBase, (uint)destination.Length / 4u); + + ref Vector512 backgroundBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(background)); + ref Vector512 sourceBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(source)); + ref float amountBase = ref MemoryMarshal.GetReference(amount); + + Vector512 vOne = Vector512.Create(1F); + + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) + { + float amount0 = amountBase; + float amount1 = Unsafe.Add(ref amountBase, 1); + float amount2 = Unsafe.Add(ref amountBase, 2); + float amount3 = Unsafe.Add(ref amountBase, 3); + + // We need to create a Vector512 containing the current four amount values + // taking up each quarter of the Vector512 and then clamp them. + Vector512 opacity = Vector512.Create( + amount0, amount0, amount0, amount0, + amount1, amount1, amount1, amount1, + amount2, amount2, amount2, amount2, + amount3, amount3, amount3, amount3); + opacity = Vector512.Min(Vector512.Max(Vector512.Zero, opacity), vOne); + + destinationBase = PorterDuffFunctions.SubtractDestAtop(backgroundBase, sourceBase, opacity); + destinationBase = ref Unsafe.Add(ref destinationBase, 1); + backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); + sourceBase = ref Unsafe.Add(ref sourceBase, 1); + amountBase = ref Unsafe.Add(ref amountBase, 4); + } + + int remainder = Numerics.Modulo4(destination.Length); + if (remainder != 0) + { + for (int i = destination.Length - remainder; i < destination.Length; i++) + { + destination[i] = PorterDuffFunctions.SubtractDestAtop(background[i], source[i], Numerics.Clamp(amount[i], 0, 1F)); + } + } + } + else if (Avx2.IsSupported && destination.Length >= 2) { // Divide by 2 as 4 elements per Vector4 and 8 per Vector256 ref Vector256 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); @@ -10827,7 +19364,54 @@ internal static class DefaultPixelBlenders /// protected override void BlendFunction(Span destination, ReadOnlySpan background, Vector4 source, ReadOnlySpan amount) { - if (Avx2.IsSupported && destination.Length >= 2) + if (Avx512F.IsSupported && destination.Length >= 4) + { + // Divide by 4 as 4 elements per Vector4 and 16 per Vector512 + ref Vector512 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); + ref Vector512 destinationLast = ref Unsafe.Add(ref destinationBase, (uint)destination.Length / 4u); + + ref Vector512 backgroundBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(background)); + ref float amountBase = ref MemoryMarshal.GetReference(amount); + + Vector512 sourceBase = Vector512.Create( + source.X, source.Y, source.Z, source.W, + source.X, source.Y, source.Z, source.W, + source.X, source.Y, source.Z, source.W, + source.X, source.Y, source.Z, source.W); + Vector512 vOne = Vector512.Create(1F); + + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) + { + float amount0 = amountBase; + float amount1 = Unsafe.Add(ref amountBase, 1); + float amount2 = Unsafe.Add(ref amountBase, 2); + float amount3 = Unsafe.Add(ref amountBase, 3); + + // We need to create a Vector512 containing the current four amount values + // taking up each quarter of the Vector512 and then clamp them. + Vector512 opacity = Vector512.Create( + amount0, amount0, amount0, amount0, + amount1, amount1, amount1, amount1, + amount2, amount2, amount2, amount2, + amount3, amount3, amount3, amount3); + opacity = Vector512.Min(Vector512.Max(Vector512.Zero, opacity), vOne); + + destinationBase = PorterDuffFunctions.SubtractDestAtop(backgroundBase, sourceBase, opacity); + destinationBase = ref Unsafe.Add(ref destinationBase, 1); + backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); + amountBase = ref Unsafe.Add(ref amountBase, 4); + } + + int remainder = Numerics.Modulo4(destination.Length); + if (remainder != 0) + { + for (int i = destination.Length - remainder; i < destination.Length; i++) + { + destination[i] = PorterDuffFunctions.SubtractDestAtop(background[i], source, Numerics.Clamp(amount[i], 0, 1F)); + } + } + } + else if (Avx2.IsSupported && destination.Length >= 2) { // Divide by 2 as 4 elements per Vector4 and 8 per Vector256 ref Vector256 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); @@ -10892,7 +19476,34 @@ internal static class DefaultPixelBlenders { amount = Numerics.Clamp(amount, 0, 1); - if (Avx2.IsSupported && destination.Length >= 2) + if (Avx512F.IsSupported && destination.Length >= 4) + { + // Divide by 4 as 4 elements per Vector4 and 16 per Vector512 + ref Vector512 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); + ref Vector512 destinationLast = ref Unsafe.Add(ref destinationBase, (uint)destination.Length / 4u); + + ref Vector512 backgroundBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(background)); + ref Vector512 sourceBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(source)); + Vector512 opacity = Vector512.Create(amount); + + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) + { + destinationBase = PorterDuffFunctions.ScreenDestAtop(backgroundBase, sourceBase, opacity); + destinationBase = ref Unsafe.Add(ref destinationBase, 1); + backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); + sourceBase = ref Unsafe.Add(ref sourceBase, 1); + } + + int remainder = Numerics.Modulo4(destination.Length); + if (remainder != 0) + { + for (int i = destination.Length - remainder; i < destination.Length; i++) + { + destination[i] = PorterDuffFunctions.ScreenDestAtop(background[i], source[i], amount); + } + } + } + else if (Avx2.IsSupported && destination.Length >= 2) { // Divide by 2 as 4 elements per Vector4 and 8 per Vector256 ref Vector256 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); @@ -10931,7 +19542,37 @@ internal static class DefaultPixelBlenders { amount = Numerics.Clamp(amount, 0, 1); - if (Avx2.IsSupported && destination.Length >= 2) + if (Avx512F.IsSupported && destination.Length >= 4) + { + // Divide by 4 as 4 elements per Vector4 and 16 per Vector512 + ref Vector512 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); + ref Vector512 destinationLast = ref Unsafe.Add(ref destinationBase, (uint)destination.Length / 4u); + + ref Vector512 backgroundBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(background)); + Vector512 sourceBase = Vector512.Create( + source.X, source.Y, source.Z, source.W, + source.X, source.Y, source.Z, source.W, + source.X, source.Y, source.Z, source.W, + source.X, source.Y, source.Z, source.W); + Vector512 opacity = Vector512.Create(amount); + + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) + { + destinationBase = PorterDuffFunctions.ScreenDestAtop(backgroundBase, sourceBase, opacity); + destinationBase = ref Unsafe.Add(ref destinationBase, 1); + backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); + } + + int remainder = Numerics.Modulo4(destination.Length); + if (remainder != 0) + { + for (int i = destination.Length - remainder; i < destination.Length; i++) + { + destination[i] = PorterDuffFunctions.ScreenDestAtop(background[i], source, amount); + } + } + } + else if (Avx2.IsSupported && destination.Length >= 2) { // Divide by 2 as 4 elements per Vector4 and 8 per Vector256 ref Vector256 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); @@ -10967,7 +19608,51 @@ internal static class DefaultPixelBlenders /// protected override void BlendFunction(Span destination, ReadOnlySpan background, ReadOnlySpan source, ReadOnlySpan amount) { - if (Avx2.IsSupported && destination.Length >= 2) + if (Avx512F.IsSupported && destination.Length >= 4) + { + // Divide by 4 as 4 elements per Vector4 and 16 per Vector512 + ref Vector512 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); + ref Vector512 destinationLast = ref Unsafe.Add(ref destinationBase, (uint)destination.Length / 4u); + + ref Vector512 backgroundBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(background)); + ref Vector512 sourceBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(source)); + ref float amountBase = ref MemoryMarshal.GetReference(amount); + + Vector512 vOne = Vector512.Create(1F); + + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) + { + float amount0 = amountBase; + float amount1 = Unsafe.Add(ref amountBase, 1); + float amount2 = Unsafe.Add(ref amountBase, 2); + float amount3 = Unsafe.Add(ref amountBase, 3); + + // We need to create a Vector512 containing the current four amount values + // taking up each quarter of the Vector512 and then clamp them. + Vector512 opacity = Vector512.Create( + amount0, amount0, amount0, amount0, + amount1, amount1, amount1, amount1, + amount2, amount2, amount2, amount2, + amount3, amount3, amount3, amount3); + opacity = Vector512.Min(Vector512.Max(Vector512.Zero, opacity), vOne); + + destinationBase = PorterDuffFunctions.ScreenDestAtop(backgroundBase, sourceBase, opacity); + destinationBase = ref Unsafe.Add(ref destinationBase, 1); + backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); + sourceBase = ref Unsafe.Add(ref sourceBase, 1); + amountBase = ref Unsafe.Add(ref amountBase, 4); + } + + int remainder = Numerics.Modulo4(destination.Length); + if (remainder != 0) + { + for (int i = destination.Length - remainder; i < destination.Length; i++) + { + destination[i] = PorterDuffFunctions.ScreenDestAtop(background[i], source[i], Numerics.Clamp(amount[i], 0, 1F)); + } + } + } + else if (Avx2.IsSupported && destination.Length >= 2) { // Divide by 2 as 4 elements per Vector4 and 8 per Vector256 ref Vector256 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); @@ -11014,7 +19699,54 @@ internal static class DefaultPixelBlenders /// protected override void BlendFunction(Span destination, ReadOnlySpan background, Vector4 source, ReadOnlySpan amount) { - if (Avx2.IsSupported && destination.Length >= 2) + if (Avx512F.IsSupported && destination.Length >= 4) + { + // Divide by 4 as 4 elements per Vector4 and 16 per Vector512 + ref Vector512 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); + ref Vector512 destinationLast = ref Unsafe.Add(ref destinationBase, (uint)destination.Length / 4u); + + ref Vector512 backgroundBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(background)); + ref float amountBase = ref MemoryMarshal.GetReference(amount); + + Vector512 sourceBase = Vector512.Create( + source.X, source.Y, source.Z, source.W, + source.X, source.Y, source.Z, source.W, + source.X, source.Y, source.Z, source.W, + source.X, source.Y, source.Z, source.W); + Vector512 vOne = Vector512.Create(1F); + + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) + { + float amount0 = amountBase; + float amount1 = Unsafe.Add(ref amountBase, 1); + float amount2 = Unsafe.Add(ref amountBase, 2); + float amount3 = Unsafe.Add(ref amountBase, 3); + + // We need to create a Vector512 containing the current four amount values + // taking up each quarter of the Vector512 and then clamp them. + Vector512 opacity = Vector512.Create( + amount0, amount0, amount0, amount0, + amount1, amount1, amount1, amount1, + amount2, amount2, amount2, amount2, + amount3, amount3, amount3, amount3); + opacity = Vector512.Min(Vector512.Max(Vector512.Zero, opacity), vOne); + + destinationBase = PorterDuffFunctions.ScreenDestAtop(backgroundBase, sourceBase, opacity); + destinationBase = ref Unsafe.Add(ref destinationBase, 1); + backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); + amountBase = ref Unsafe.Add(ref amountBase, 4); + } + + int remainder = Numerics.Modulo4(destination.Length); + if (remainder != 0) + { + for (int i = destination.Length - remainder; i < destination.Length; i++) + { + destination[i] = PorterDuffFunctions.ScreenDestAtop(background[i], source, Numerics.Clamp(amount[i], 0, 1F)); + } + } + } + else if (Avx2.IsSupported && destination.Length >= 2) { // Divide by 2 as 4 elements per Vector4 and 8 per Vector256 ref Vector256 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); @@ -11079,7 +19811,34 @@ internal static class DefaultPixelBlenders { amount = Numerics.Clamp(amount, 0, 1); - if (Avx2.IsSupported && destination.Length >= 2) + if (Avx512F.IsSupported && destination.Length >= 4) + { + // Divide by 4 as 4 elements per Vector4 and 16 per Vector512 + ref Vector512 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); + ref Vector512 destinationLast = ref Unsafe.Add(ref destinationBase, (uint)destination.Length / 4u); + + ref Vector512 backgroundBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(background)); + ref Vector512 sourceBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(source)); + Vector512 opacity = Vector512.Create(amount); + + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) + { + destinationBase = PorterDuffFunctions.DarkenDestAtop(backgroundBase, sourceBase, opacity); + destinationBase = ref Unsafe.Add(ref destinationBase, 1); + backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); + sourceBase = ref Unsafe.Add(ref sourceBase, 1); + } + + int remainder = Numerics.Modulo4(destination.Length); + if (remainder != 0) + { + for (int i = destination.Length - remainder; i < destination.Length; i++) + { + destination[i] = PorterDuffFunctions.DarkenDestAtop(background[i], source[i], amount); + } + } + } + else if (Avx2.IsSupported && destination.Length >= 2) { // Divide by 2 as 4 elements per Vector4 and 8 per Vector256 ref Vector256 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); @@ -11118,7 +19877,37 @@ internal static class DefaultPixelBlenders { amount = Numerics.Clamp(amount, 0, 1); - if (Avx2.IsSupported && destination.Length >= 2) + if (Avx512F.IsSupported && destination.Length >= 4) + { + // Divide by 4 as 4 elements per Vector4 and 16 per Vector512 + ref Vector512 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); + ref Vector512 destinationLast = ref Unsafe.Add(ref destinationBase, (uint)destination.Length / 4u); + + ref Vector512 backgroundBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(background)); + Vector512 sourceBase = Vector512.Create( + source.X, source.Y, source.Z, source.W, + source.X, source.Y, source.Z, source.W, + source.X, source.Y, source.Z, source.W, + source.X, source.Y, source.Z, source.W); + Vector512 opacity = Vector512.Create(amount); + + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) + { + destinationBase = PorterDuffFunctions.DarkenDestAtop(backgroundBase, sourceBase, opacity); + destinationBase = ref Unsafe.Add(ref destinationBase, 1); + backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); + } + + int remainder = Numerics.Modulo4(destination.Length); + if (remainder != 0) + { + for (int i = destination.Length - remainder; i < destination.Length; i++) + { + destination[i] = PorterDuffFunctions.DarkenDestAtop(background[i], source, amount); + } + } + } + else if (Avx2.IsSupported && destination.Length >= 2) { // Divide by 2 as 4 elements per Vector4 and 8 per Vector256 ref Vector256 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); @@ -11154,7 +19943,51 @@ internal static class DefaultPixelBlenders /// protected override void BlendFunction(Span destination, ReadOnlySpan background, ReadOnlySpan source, ReadOnlySpan amount) { - if (Avx2.IsSupported && destination.Length >= 2) + if (Avx512F.IsSupported && destination.Length >= 4) + { + // Divide by 4 as 4 elements per Vector4 and 16 per Vector512 + ref Vector512 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); + ref Vector512 destinationLast = ref Unsafe.Add(ref destinationBase, (uint)destination.Length / 4u); + + ref Vector512 backgroundBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(background)); + ref Vector512 sourceBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(source)); + ref float amountBase = ref MemoryMarshal.GetReference(amount); + + Vector512 vOne = Vector512.Create(1F); + + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) + { + float amount0 = amountBase; + float amount1 = Unsafe.Add(ref amountBase, 1); + float amount2 = Unsafe.Add(ref amountBase, 2); + float amount3 = Unsafe.Add(ref amountBase, 3); + + // We need to create a Vector512 containing the current four amount values + // taking up each quarter of the Vector512 and then clamp them. + Vector512 opacity = Vector512.Create( + amount0, amount0, amount0, amount0, + amount1, amount1, amount1, amount1, + amount2, amount2, amount2, amount2, + amount3, amount3, amount3, amount3); + opacity = Vector512.Min(Vector512.Max(Vector512.Zero, opacity), vOne); + + destinationBase = PorterDuffFunctions.DarkenDestAtop(backgroundBase, sourceBase, opacity); + destinationBase = ref Unsafe.Add(ref destinationBase, 1); + backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); + sourceBase = ref Unsafe.Add(ref sourceBase, 1); + amountBase = ref Unsafe.Add(ref amountBase, 4); + } + + int remainder = Numerics.Modulo4(destination.Length); + if (remainder != 0) + { + for (int i = destination.Length - remainder; i < destination.Length; i++) + { + destination[i] = PorterDuffFunctions.DarkenDestAtop(background[i], source[i], Numerics.Clamp(amount[i], 0, 1F)); + } + } + } + else if (Avx2.IsSupported && destination.Length >= 2) { // Divide by 2 as 4 elements per Vector4 and 8 per Vector256 ref Vector256 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); @@ -11201,7 +20034,54 @@ internal static class DefaultPixelBlenders /// protected override void BlendFunction(Span destination, ReadOnlySpan background, Vector4 source, ReadOnlySpan amount) { - if (Avx2.IsSupported && destination.Length >= 2) + if (Avx512F.IsSupported && destination.Length >= 4) + { + // Divide by 4 as 4 elements per Vector4 and 16 per Vector512 + ref Vector512 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); + ref Vector512 destinationLast = ref Unsafe.Add(ref destinationBase, (uint)destination.Length / 4u); + + ref Vector512 backgroundBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(background)); + ref float amountBase = ref MemoryMarshal.GetReference(amount); + + Vector512 sourceBase = Vector512.Create( + source.X, source.Y, source.Z, source.W, + source.X, source.Y, source.Z, source.W, + source.X, source.Y, source.Z, source.W, + source.X, source.Y, source.Z, source.W); + Vector512 vOne = Vector512.Create(1F); + + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) + { + float amount0 = amountBase; + float amount1 = Unsafe.Add(ref amountBase, 1); + float amount2 = Unsafe.Add(ref amountBase, 2); + float amount3 = Unsafe.Add(ref amountBase, 3); + + // We need to create a Vector512 containing the current four amount values + // taking up each quarter of the Vector512 and then clamp them. + Vector512 opacity = Vector512.Create( + amount0, amount0, amount0, amount0, + amount1, amount1, amount1, amount1, + amount2, amount2, amount2, amount2, + amount3, amount3, amount3, amount3); + opacity = Vector512.Min(Vector512.Max(Vector512.Zero, opacity), vOne); + + destinationBase = PorterDuffFunctions.DarkenDestAtop(backgroundBase, sourceBase, opacity); + destinationBase = ref Unsafe.Add(ref destinationBase, 1); + backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); + amountBase = ref Unsafe.Add(ref amountBase, 4); + } + + int remainder = Numerics.Modulo4(destination.Length); + if (remainder != 0) + { + for (int i = destination.Length - remainder; i < destination.Length; i++) + { + destination[i] = PorterDuffFunctions.DarkenDestAtop(background[i], source, Numerics.Clamp(amount[i], 0, 1F)); + } + } + } + else if (Avx2.IsSupported && destination.Length >= 2) { // Divide by 2 as 4 elements per Vector4 and 8 per Vector256 ref Vector256 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); @@ -11266,7 +20146,34 @@ internal static class DefaultPixelBlenders { amount = Numerics.Clamp(amount, 0, 1); - if (Avx2.IsSupported && destination.Length >= 2) + if (Avx512F.IsSupported && destination.Length >= 4) + { + // Divide by 4 as 4 elements per Vector4 and 16 per Vector512 + ref Vector512 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); + ref Vector512 destinationLast = ref Unsafe.Add(ref destinationBase, (uint)destination.Length / 4u); + + ref Vector512 backgroundBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(background)); + ref Vector512 sourceBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(source)); + Vector512 opacity = Vector512.Create(amount); + + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) + { + destinationBase = PorterDuffFunctions.LightenDestAtop(backgroundBase, sourceBase, opacity); + destinationBase = ref Unsafe.Add(ref destinationBase, 1); + backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); + sourceBase = ref Unsafe.Add(ref sourceBase, 1); + } + + int remainder = Numerics.Modulo4(destination.Length); + if (remainder != 0) + { + for (int i = destination.Length - remainder; i < destination.Length; i++) + { + destination[i] = PorterDuffFunctions.LightenDestAtop(background[i], source[i], amount); + } + } + } + else if (Avx2.IsSupported && destination.Length >= 2) { // Divide by 2 as 4 elements per Vector4 and 8 per Vector256 ref Vector256 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); @@ -11305,7 +20212,37 @@ internal static class DefaultPixelBlenders { amount = Numerics.Clamp(amount, 0, 1); - if (Avx2.IsSupported && destination.Length >= 2) + if (Avx512F.IsSupported && destination.Length >= 4) + { + // Divide by 4 as 4 elements per Vector4 and 16 per Vector512 + ref Vector512 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); + ref Vector512 destinationLast = ref Unsafe.Add(ref destinationBase, (uint)destination.Length / 4u); + + ref Vector512 backgroundBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(background)); + Vector512 sourceBase = Vector512.Create( + source.X, source.Y, source.Z, source.W, + source.X, source.Y, source.Z, source.W, + source.X, source.Y, source.Z, source.W, + source.X, source.Y, source.Z, source.W); + Vector512 opacity = Vector512.Create(amount); + + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) + { + destinationBase = PorterDuffFunctions.LightenDestAtop(backgroundBase, sourceBase, opacity); + destinationBase = ref Unsafe.Add(ref destinationBase, 1); + backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); + } + + int remainder = Numerics.Modulo4(destination.Length); + if (remainder != 0) + { + for (int i = destination.Length - remainder; i < destination.Length; i++) + { + destination[i] = PorterDuffFunctions.LightenDestAtop(background[i], source, amount); + } + } + } + else if (Avx2.IsSupported && destination.Length >= 2) { // Divide by 2 as 4 elements per Vector4 and 8 per Vector256 ref Vector256 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); @@ -11341,7 +20278,51 @@ internal static class DefaultPixelBlenders /// protected override void BlendFunction(Span destination, ReadOnlySpan background, ReadOnlySpan source, ReadOnlySpan amount) { - if (Avx2.IsSupported && destination.Length >= 2) + if (Avx512F.IsSupported && destination.Length >= 4) + { + // Divide by 4 as 4 elements per Vector4 and 16 per Vector512 + ref Vector512 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); + ref Vector512 destinationLast = ref Unsafe.Add(ref destinationBase, (uint)destination.Length / 4u); + + ref Vector512 backgroundBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(background)); + ref Vector512 sourceBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(source)); + ref float amountBase = ref MemoryMarshal.GetReference(amount); + + Vector512 vOne = Vector512.Create(1F); + + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) + { + float amount0 = amountBase; + float amount1 = Unsafe.Add(ref amountBase, 1); + float amount2 = Unsafe.Add(ref amountBase, 2); + float amount3 = Unsafe.Add(ref amountBase, 3); + + // We need to create a Vector512 containing the current four amount values + // taking up each quarter of the Vector512 and then clamp them. + Vector512 opacity = Vector512.Create( + amount0, amount0, amount0, amount0, + amount1, amount1, amount1, amount1, + amount2, amount2, amount2, amount2, + amount3, amount3, amount3, amount3); + opacity = Vector512.Min(Vector512.Max(Vector512.Zero, opacity), vOne); + + destinationBase = PorterDuffFunctions.LightenDestAtop(backgroundBase, sourceBase, opacity); + destinationBase = ref Unsafe.Add(ref destinationBase, 1); + backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); + sourceBase = ref Unsafe.Add(ref sourceBase, 1); + amountBase = ref Unsafe.Add(ref amountBase, 4); + } + + int remainder = Numerics.Modulo4(destination.Length); + if (remainder != 0) + { + for (int i = destination.Length - remainder; i < destination.Length; i++) + { + destination[i] = PorterDuffFunctions.LightenDestAtop(background[i], source[i], Numerics.Clamp(amount[i], 0, 1F)); + } + } + } + else if (Avx2.IsSupported && destination.Length >= 2) { // Divide by 2 as 4 elements per Vector4 and 8 per Vector256 ref Vector256 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); @@ -11388,7 +20369,54 @@ internal static class DefaultPixelBlenders /// protected override void BlendFunction(Span destination, ReadOnlySpan background, Vector4 source, ReadOnlySpan amount) { - if (Avx2.IsSupported && destination.Length >= 2) + if (Avx512F.IsSupported && destination.Length >= 4) + { + // Divide by 4 as 4 elements per Vector4 and 16 per Vector512 + ref Vector512 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); + ref Vector512 destinationLast = ref Unsafe.Add(ref destinationBase, (uint)destination.Length / 4u); + + ref Vector512 backgroundBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(background)); + ref float amountBase = ref MemoryMarshal.GetReference(amount); + + Vector512 sourceBase = Vector512.Create( + source.X, source.Y, source.Z, source.W, + source.X, source.Y, source.Z, source.W, + source.X, source.Y, source.Z, source.W, + source.X, source.Y, source.Z, source.W); + Vector512 vOne = Vector512.Create(1F); + + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) + { + float amount0 = amountBase; + float amount1 = Unsafe.Add(ref amountBase, 1); + float amount2 = Unsafe.Add(ref amountBase, 2); + float amount3 = Unsafe.Add(ref amountBase, 3); + + // We need to create a Vector512 containing the current four amount values + // taking up each quarter of the Vector512 and then clamp them. + Vector512 opacity = Vector512.Create( + amount0, amount0, amount0, amount0, + amount1, amount1, amount1, amount1, + amount2, amount2, amount2, amount2, + amount3, amount3, amount3, amount3); + opacity = Vector512.Min(Vector512.Max(Vector512.Zero, opacity), vOne); + + destinationBase = PorterDuffFunctions.LightenDestAtop(backgroundBase, sourceBase, opacity); + destinationBase = ref Unsafe.Add(ref destinationBase, 1); + backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); + amountBase = ref Unsafe.Add(ref amountBase, 4); + } + + int remainder = Numerics.Modulo4(destination.Length); + if (remainder != 0) + { + for (int i = destination.Length - remainder; i < destination.Length; i++) + { + destination[i] = PorterDuffFunctions.LightenDestAtop(background[i], source, Numerics.Clamp(amount[i], 0, 1F)); + } + } + } + else if (Avx2.IsSupported && destination.Length >= 2) { // Divide by 2 as 4 elements per Vector4 and 8 per Vector256 ref Vector256 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); @@ -11453,7 +20481,34 @@ internal static class DefaultPixelBlenders { amount = Numerics.Clamp(amount, 0, 1); - if (Avx2.IsSupported && destination.Length >= 2) + if (Avx512F.IsSupported && destination.Length >= 4) + { + // Divide by 4 as 4 elements per Vector4 and 16 per Vector512 + ref Vector512 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); + ref Vector512 destinationLast = ref Unsafe.Add(ref destinationBase, (uint)destination.Length / 4u); + + ref Vector512 backgroundBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(background)); + ref Vector512 sourceBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(source)); + Vector512 opacity = Vector512.Create(amount); + + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) + { + destinationBase = PorterDuffFunctions.OverlayDestAtop(backgroundBase, sourceBase, opacity); + destinationBase = ref Unsafe.Add(ref destinationBase, 1); + backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); + sourceBase = ref Unsafe.Add(ref sourceBase, 1); + } + + int remainder = Numerics.Modulo4(destination.Length); + if (remainder != 0) + { + for (int i = destination.Length - remainder; i < destination.Length; i++) + { + destination[i] = PorterDuffFunctions.OverlayDestAtop(background[i], source[i], amount); + } + } + } + else if (Avx2.IsSupported && destination.Length >= 2) { // Divide by 2 as 4 elements per Vector4 and 8 per Vector256 ref Vector256 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); @@ -11492,7 +20547,37 @@ internal static class DefaultPixelBlenders { amount = Numerics.Clamp(amount, 0, 1); - if (Avx2.IsSupported && destination.Length >= 2) + if (Avx512F.IsSupported && destination.Length >= 4) + { + // Divide by 4 as 4 elements per Vector4 and 16 per Vector512 + ref Vector512 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); + ref Vector512 destinationLast = ref Unsafe.Add(ref destinationBase, (uint)destination.Length / 4u); + + ref Vector512 backgroundBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(background)); + Vector512 sourceBase = Vector512.Create( + source.X, source.Y, source.Z, source.W, + source.X, source.Y, source.Z, source.W, + source.X, source.Y, source.Z, source.W, + source.X, source.Y, source.Z, source.W); + Vector512 opacity = Vector512.Create(amount); + + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) + { + destinationBase = PorterDuffFunctions.OverlayDestAtop(backgroundBase, sourceBase, opacity); + destinationBase = ref Unsafe.Add(ref destinationBase, 1); + backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); + } + + int remainder = Numerics.Modulo4(destination.Length); + if (remainder != 0) + { + for (int i = destination.Length - remainder; i < destination.Length; i++) + { + destination[i] = PorterDuffFunctions.OverlayDestAtop(background[i], source, amount); + } + } + } + else if (Avx2.IsSupported && destination.Length >= 2) { // Divide by 2 as 4 elements per Vector4 and 8 per Vector256 ref Vector256 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); @@ -11528,7 +20613,51 @@ internal static class DefaultPixelBlenders /// protected override void BlendFunction(Span destination, ReadOnlySpan background, ReadOnlySpan source, ReadOnlySpan amount) { - if (Avx2.IsSupported && destination.Length >= 2) + if (Avx512F.IsSupported && destination.Length >= 4) + { + // Divide by 4 as 4 elements per Vector4 and 16 per Vector512 + ref Vector512 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); + ref Vector512 destinationLast = ref Unsafe.Add(ref destinationBase, (uint)destination.Length / 4u); + + ref Vector512 backgroundBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(background)); + ref Vector512 sourceBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(source)); + ref float amountBase = ref MemoryMarshal.GetReference(amount); + + Vector512 vOne = Vector512.Create(1F); + + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) + { + float amount0 = amountBase; + float amount1 = Unsafe.Add(ref amountBase, 1); + float amount2 = Unsafe.Add(ref amountBase, 2); + float amount3 = Unsafe.Add(ref amountBase, 3); + + // We need to create a Vector512 containing the current four amount values + // taking up each quarter of the Vector512 and then clamp them. + Vector512 opacity = Vector512.Create( + amount0, amount0, amount0, amount0, + amount1, amount1, amount1, amount1, + amount2, amount2, amount2, amount2, + amount3, amount3, amount3, amount3); + opacity = Vector512.Min(Vector512.Max(Vector512.Zero, opacity), vOne); + + destinationBase = PorterDuffFunctions.OverlayDestAtop(backgroundBase, sourceBase, opacity); + destinationBase = ref Unsafe.Add(ref destinationBase, 1); + backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); + sourceBase = ref Unsafe.Add(ref sourceBase, 1); + amountBase = ref Unsafe.Add(ref amountBase, 4); + } + + int remainder = Numerics.Modulo4(destination.Length); + if (remainder != 0) + { + for (int i = destination.Length - remainder; i < destination.Length; i++) + { + destination[i] = PorterDuffFunctions.OverlayDestAtop(background[i], source[i], Numerics.Clamp(amount[i], 0, 1F)); + } + } + } + else if (Avx2.IsSupported && destination.Length >= 2) { // Divide by 2 as 4 elements per Vector4 and 8 per Vector256 ref Vector256 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); @@ -11575,7 +20704,54 @@ internal static class DefaultPixelBlenders /// protected override void BlendFunction(Span destination, ReadOnlySpan background, Vector4 source, ReadOnlySpan amount) { - if (Avx2.IsSupported && destination.Length >= 2) + if (Avx512F.IsSupported && destination.Length >= 4) + { + // Divide by 4 as 4 elements per Vector4 and 16 per Vector512 + ref Vector512 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); + ref Vector512 destinationLast = ref Unsafe.Add(ref destinationBase, (uint)destination.Length / 4u); + + ref Vector512 backgroundBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(background)); + ref float amountBase = ref MemoryMarshal.GetReference(amount); + + Vector512 sourceBase = Vector512.Create( + source.X, source.Y, source.Z, source.W, + source.X, source.Y, source.Z, source.W, + source.X, source.Y, source.Z, source.W, + source.X, source.Y, source.Z, source.W); + Vector512 vOne = Vector512.Create(1F); + + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) + { + float amount0 = amountBase; + float amount1 = Unsafe.Add(ref amountBase, 1); + float amount2 = Unsafe.Add(ref amountBase, 2); + float amount3 = Unsafe.Add(ref amountBase, 3); + + // We need to create a Vector512 containing the current four amount values + // taking up each quarter of the Vector512 and then clamp them. + Vector512 opacity = Vector512.Create( + amount0, amount0, amount0, amount0, + amount1, amount1, amount1, amount1, + amount2, amount2, amount2, amount2, + amount3, amount3, amount3, amount3); + opacity = Vector512.Min(Vector512.Max(Vector512.Zero, opacity), vOne); + + destinationBase = PorterDuffFunctions.OverlayDestAtop(backgroundBase, sourceBase, opacity); + destinationBase = ref Unsafe.Add(ref destinationBase, 1); + backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); + amountBase = ref Unsafe.Add(ref amountBase, 4); + } + + int remainder = Numerics.Modulo4(destination.Length); + if (remainder != 0) + { + for (int i = destination.Length - remainder; i < destination.Length; i++) + { + destination[i] = PorterDuffFunctions.OverlayDestAtop(background[i], source, Numerics.Clamp(amount[i], 0, 1F)); + } + } + } + else if (Avx2.IsSupported && destination.Length >= 2) { // Divide by 2 as 4 elements per Vector4 and 8 per Vector256 ref Vector256 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); @@ -11640,7 +20816,34 @@ internal static class DefaultPixelBlenders { amount = Numerics.Clamp(amount, 0, 1); - if (Avx2.IsSupported && destination.Length >= 2) + if (Avx512F.IsSupported && destination.Length >= 4) + { + // Divide by 4 as 4 elements per Vector4 and 16 per Vector512 + ref Vector512 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); + ref Vector512 destinationLast = ref Unsafe.Add(ref destinationBase, (uint)destination.Length / 4u); + + ref Vector512 backgroundBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(background)); + ref Vector512 sourceBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(source)); + Vector512 opacity = Vector512.Create(amount); + + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) + { + destinationBase = PorterDuffFunctions.HardLightDestAtop(backgroundBase, sourceBase, opacity); + destinationBase = ref Unsafe.Add(ref destinationBase, 1); + backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); + sourceBase = ref Unsafe.Add(ref sourceBase, 1); + } + + int remainder = Numerics.Modulo4(destination.Length); + if (remainder != 0) + { + for (int i = destination.Length - remainder; i < destination.Length; i++) + { + destination[i] = PorterDuffFunctions.HardLightDestAtop(background[i], source[i], amount); + } + } + } + else if (Avx2.IsSupported && destination.Length >= 2) { // Divide by 2 as 4 elements per Vector4 and 8 per Vector256 ref Vector256 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); @@ -11679,7 +20882,37 @@ internal static class DefaultPixelBlenders { amount = Numerics.Clamp(amount, 0, 1); - if (Avx2.IsSupported && destination.Length >= 2) + if (Avx512F.IsSupported && destination.Length >= 4) + { + // Divide by 4 as 4 elements per Vector4 and 16 per Vector512 + ref Vector512 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); + ref Vector512 destinationLast = ref Unsafe.Add(ref destinationBase, (uint)destination.Length / 4u); + + ref Vector512 backgroundBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(background)); + Vector512 sourceBase = Vector512.Create( + source.X, source.Y, source.Z, source.W, + source.X, source.Y, source.Z, source.W, + source.X, source.Y, source.Z, source.W, + source.X, source.Y, source.Z, source.W); + Vector512 opacity = Vector512.Create(amount); + + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) + { + destinationBase = PorterDuffFunctions.HardLightDestAtop(backgroundBase, sourceBase, opacity); + destinationBase = ref Unsafe.Add(ref destinationBase, 1); + backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); + } + + int remainder = Numerics.Modulo4(destination.Length); + if (remainder != 0) + { + for (int i = destination.Length - remainder; i < destination.Length; i++) + { + destination[i] = PorterDuffFunctions.HardLightDestAtop(background[i], source, amount); + } + } + } + else if (Avx2.IsSupported && destination.Length >= 2) { // Divide by 2 as 4 elements per Vector4 and 8 per Vector256 ref Vector256 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); @@ -11715,7 +20948,51 @@ internal static class DefaultPixelBlenders /// protected override void BlendFunction(Span destination, ReadOnlySpan background, ReadOnlySpan source, ReadOnlySpan amount) { - if (Avx2.IsSupported && destination.Length >= 2) + if (Avx512F.IsSupported && destination.Length >= 4) + { + // Divide by 4 as 4 elements per Vector4 and 16 per Vector512 + ref Vector512 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); + ref Vector512 destinationLast = ref Unsafe.Add(ref destinationBase, (uint)destination.Length / 4u); + + ref Vector512 backgroundBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(background)); + ref Vector512 sourceBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(source)); + ref float amountBase = ref MemoryMarshal.GetReference(amount); + + Vector512 vOne = Vector512.Create(1F); + + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) + { + float amount0 = amountBase; + float amount1 = Unsafe.Add(ref amountBase, 1); + float amount2 = Unsafe.Add(ref amountBase, 2); + float amount3 = Unsafe.Add(ref amountBase, 3); + + // We need to create a Vector512 containing the current four amount values + // taking up each quarter of the Vector512 and then clamp them. + Vector512 opacity = Vector512.Create( + amount0, amount0, amount0, amount0, + amount1, amount1, amount1, amount1, + amount2, amount2, amount2, amount2, + amount3, amount3, amount3, amount3); + opacity = Vector512.Min(Vector512.Max(Vector512.Zero, opacity), vOne); + + destinationBase = PorterDuffFunctions.HardLightDestAtop(backgroundBase, sourceBase, opacity); + destinationBase = ref Unsafe.Add(ref destinationBase, 1); + backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); + sourceBase = ref Unsafe.Add(ref sourceBase, 1); + amountBase = ref Unsafe.Add(ref amountBase, 4); + } + + int remainder = Numerics.Modulo4(destination.Length); + if (remainder != 0) + { + for (int i = destination.Length - remainder; i < destination.Length; i++) + { + destination[i] = PorterDuffFunctions.HardLightDestAtop(background[i], source[i], Numerics.Clamp(amount[i], 0, 1F)); + } + } + } + else if (Avx2.IsSupported && destination.Length >= 2) { // Divide by 2 as 4 elements per Vector4 and 8 per Vector256 ref Vector256 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); @@ -11762,7 +21039,54 @@ internal static class DefaultPixelBlenders /// protected override void BlendFunction(Span destination, ReadOnlySpan background, Vector4 source, ReadOnlySpan amount) { - if (Avx2.IsSupported && destination.Length >= 2) + if (Avx512F.IsSupported && destination.Length >= 4) + { + // Divide by 4 as 4 elements per Vector4 and 16 per Vector512 + ref Vector512 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); + ref Vector512 destinationLast = ref Unsafe.Add(ref destinationBase, (uint)destination.Length / 4u); + + ref Vector512 backgroundBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(background)); + ref float amountBase = ref MemoryMarshal.GetReference(amount); + + Vector512 sourceBase = Vector512.Create( + source.X, source.Y, source.Z, source.W, + source.X, source.Y, source.Z, source.W, + source.X, source.Y, source.Z, source.W, + source.X, source.Y, source.Z, source.W); + Vector512 vOne = Vector512.Create(1F); + + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) + { + float amount0 = amountBase; + float amount1 = Unsafe.Add(ref amountBase, 1); + float amount2 = Unsafe.Add(ref amountBase, 2); + float amount3 = Unsafe.Add(ref amountBase, 3); + + // We need to create a Vector512 containing the current four amount values + // taking up each quarter of the Vector512 and then clamp them. + Vector512 opacity = Vector512.Create( + amount0, amount0, amount0, amount0, + amount1, amount1, amount1, amount1, + amount2, amount2, amount2, amount2, + amount3, amount3, amount3, amount3); + opacity = Vector512.Min(Vector512.Max(Vector512.Zero, opacity), vOne); + + destinationBase = PorterDuffFunctions.HardLightDestAtop(backgroundBase, sourceBase, opacity); + destinationBase = ref Unsafe.Add(ref destinationBase, 1); + backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); + amountBase = ref Unsafe.Add(ref amountBase, 4); + } + + int remainder = Numerics.Modulo4(destination.Length); + if (remainder != 0) + { + for (int i = destination.Length - remainder; i < destination.Length; i++) + { + destination[i] = PorterDuffFunctions.HardLightDestAtop(background[i], source, Numerics.Clamp(amount[i], 0, 1F)); + } + } + } + else if (Avx2.IsSupported && destination.Length >= 2) { // Divide by 2 as 4 elements per Vector4 and 8 per Vector256 ref Vector256 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); @@ -11827,7 +21151,34 @@ internal static class DefaultPixelBlenders { amount = Numerics.Clamp(amount, 0, 1); - if (Avx2.IsSupported && destination.Length >= 2) + if (Avx512F.IsSupported && destination.Length >= 4) + { + // Divide by 4 as 4 elements per Vector4 and 16 per Vector512 + ref Vector512 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); + ref Vector512 destinationLast = ref Unsafe.Add(ref destinationBase, (uint)destination.Length / 4u); + + ref Vector512 backgroundBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(background)); + ref Vector512 sourceBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(source)); + Vector512 opacity = Vector512.Create(amount); + + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) + { + destinationBase = PorterDuffFunctions.NormalDestOver(backgroundBase, sourceBase, opacity); + destinationBase = ref Unsafe.Add(ref destinationBase, 1); + backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); + sourceBase = ref Unsafe.Add(ref sourceBase, 1); + } + + int remainder = Numerics.Modulo4(destination.Length); + if (remainder != 0) + { + for (int i = destination.Length - remainder; i < destination.Length; i++) + { + destination[i] = PorterDuffFunctions.NormalDestOver(background[i], source[i], amount); + } + } + } + else if (Avx2.IsSupported && destination.Length >= 2) { // Divide by 2 as 4 elements per Vector4 and 8 per Vector256 ref Vector256 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); @@ -11866,7 +21217,37 @@ internal static class DefaultPixelBlenders { amount = Numerics.Clamp(amount, 0, 1); - if (Avx2.IsSupported && destination.Length >= 2) + if (Avx512F.IsSupported && destination.Length >= 4) + { + // Divide by 4 as 4 elements per Vector4 and 16 per Vector512 + ref Vector512 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); + ref Vector512 destinationLast = ref Unsafe.Add(ref destinationBase, (uint)destination.Length / 4u); + + ref Vector512 backgroundBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(background)); + Vector512 sourceBase = Vector512.Create( + source.X, source.Y, source.Z, source.W, + source.X, source.Y, source.Z, source.W, + source.X, source.Y, source.Z, source.W, + source.X, source.Y, source.Z, source.W); + Vector512 opacity = Vector512.Create(amount); + + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) + { + destinationBase = PorterDuffFunctions.NormalDestOver(backgroundBase, sourceBase, opacity); + destinationBase = ref Unsafe.Add(ref destinationBase, 1); + backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); + } + + int remainder = Numerics.Modulo4(destination.Length); + if (remainder != 0) + { + for (int i = destination.Length - remainder; i < destination.Length; i++) + { + destination[i] = PorterDuffFunctions.NormalDestOver(background[i], source, amount); + } + } + } + else if (Avx2.IsSupported && destination.Length >= 2) { // Divide by 2 as 4 elements per Vector4 and 8 per Vector256 ref Vector256 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); @@ -11902,7 +21283,51 @@ internal static class DefaultPixelBlenders /// protected override void BlendFunction(Span destination, ReadOnlySpan background, ReadOnlySpan source, ReadOnlySpan amount) { - if (Avx2.IsSupported && destination.Length >= 2) + if (Avx512F.IsSupported && destination.Length >= 4) + { + // Divide by 4 as 4 elements per Vector4 and 16 per Vector512 + ref Vector512 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); + ref Vector512 destinationLast = ref Unsafe.Add(ref destinationBase, (uint)destination.Length / 4u); + + ref Vector512 backgroundBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(background)); + ref Vector512 sourceBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(source)); + ref float amountBase = ref MemoryMarshal.GetReference(amount); + + Vector512 vOne = Vector512.Create(1F); + + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) + { + float amount0 = amountBase; + float amount1 = Unsafe.Add(ref amountBase, 1); + float amount2 = Unsafe.Add(ref amountBase, 2); + float amount3 = Unsafe.Add(ref amountBase, 3); + + // We need to create a Vector512 containing the current four amount values + // taking up each quarter of the Vector512 and then clamp them. + Vector512 opacity = Vector512.Create( + amount0, amount0, amount0, amount0, + amount1, amount1, amount1, amount1, + amount2, amount2, amount2, amount2, + amount3, amount3, amount3, amount3); + opacity = Vector512.Min(Vector512.Max(Vector512.Zero, opacity), vOne); + + destinationBase = PorterDuffFunctions.NormalDestOver(backgroundBase, sourceBase, opacity); + destinationBase = ref Unsafe.Add(ref destinationBase, 1); + backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); + sourceBase = ref Unsafe.Add(ref sourceBase, 1); + amountBase = ref Unsafe.Add(ref amountBase, 4); + } + + int remainder = Numerics.Modulo4(destination.Length); + if (remainder != 0) + { + for (int i = destination.Length - remainder; i < destination.Length; i++) + { + destination[i] = PorterDuffFunctions.NormalDestOver(background[i], source[i], Numerics.Clamp(amount[i], 0, 1F)); + } + } + } + else if (Avx2.IsSupported && destination.Length >= 2) { // Divide by 2 as 4 elements per Vector4 and 8 per Vector256 ref Vector256 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); @@ -11936,20 +21361,67 @@ internal static class DefaultPixelBlenders int i = destination.Length - 1; destination[i] = PorterDuffFunctions.NormalDestOver(background[i], source[i], Numerics.Clamp(amount[i], 0, 1F)); } - } - else - { - for (int i = 0; i < destination.Length; i++) + } + else + { + for (int i = 0; i < destination.Length; i++) + { + destination[i] = PorterDuffFunctions.NormalDestOver(background[i], source[i], Numerics.Clamp(amount[i], 0, 1F)); + } + } + } + + /// + protected override void BlendFunction(Span destination, ReadOnlySpan background, Vector4 source, ReadOnlySpan amount) + { + if (Avx512F.IsSupported && destination.Length >= 4) + { + // Divide by 4 as 4 elements per Vector4 and 16 per Vector512 + ref Vector512 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); + ref Vector512 destinationLast = ref Unsafe.Add(ref destinationBase, (uint)destination.Length / 4u); + + ref Vector512 backgroundBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(background)); + ref float amountBase = ref MemoryMarshal.GetReference(amount); + + Vector512 sourceBase = Vector512.Create( + source.X, source.Y, source.Z, source.W, + source.X, source.Y, source.Z, source.W, + source.X, source.Y, source.Z, source.W, + source.X, source.Y, source.Z, source.W); + Vector512 vOne = Vector512.Create(1F); + + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) + { + float amount0 = amountBase; + float amount1 = Unsafe.Add(ref amountBase, 1); + float amount2 = Unsafe.Add(ref amountBase, 2); + float amount3 = Unsafe.Add(ref amountBase, 3); + + // We need to create a Vector512 containing the current four amount values + // taking up each quarter of the Vector512 and then clamp them. + Vector512 opacity = Vector512.Create( + amount0, amount0, amount0, amount0, + amount1, amount1, amount1, amount1, + amount2, amount2, amount2, amount2, + amount3, amount3, amount3, amount3); + opacity = Vector512.Min(Vector512.Max(Vector512.Zero, opacity), vOne); + + destinationBase = PorterDuffFunctions.NormalDestOver(backgroundBase, sourceBase, opacity); + destinationBase = ref Unsafe.Add(ref destinationBase, 1); + backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); + amountBase = ref Unsafe.Add(ref amountBase, 4); + } + + int remainder = Numerics.Modulo4(destination.Length); + if (remainder != 0) { - destination[i] = PorterDuffFunctions.NormalDestOver(background[i], source[i], Numerics.Clamp(amount[i], 0, 1F)); + for (int i = destination.Length - remainder; i < destination.Length; i++) + { + destination[i] = PorterDuffFunctions.NormalDestOver(background[i], source, Numerics.Clamp(amount[i], 0, 1F)); + } } } - } - - /// - protected override void BlendFunction(Span destination, ReadOnlySpan background, Vector4 source, ReadOnlySpan amount) - { - if (Avx2.IsSupported && destination.Length >= 2) + else if (Avx2.IsSupported && destination.Length >= 2) { // Divide by 2 as 4 elements per Vector4 and 8 per Vector256 ref Vector256 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); @@ -12014,7 +21486,34 @@ internal static class DefaultPixelBlenders { amount = Numerics.Clamp(amount, 0, 1); - if (Avx2.IsSupported && destination.Length >= 2) + if (Avx512F.IsSupported && destination.Length >= 4) + { + // Divide by 4 as 4 elements per Vector4 and 16 per Vector512 + ref Vector512 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); + ref Vector512 destinationLast = ref Unsafe.Add(ref destinationBase, (uint)destination.Length / 4u); + + ref Vector512 backgroundBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(background)); + ref Vector512 sourceBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(source)); + Vector512 opacity = Vector512.Create(amount); + + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) + { + destinationBase = PorterDuffFunctions.MultiplyDestOver(backgroundBase, sourceBase, opacity); + destinationBase = ref Unsafe.Add(ref destinationBase, 1); + backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); + sourceBase = ref Unsafe.Add(ref sourceBase, 1); + } + + int remainder = Numerics.Modulo4(destination.Length); + if (remainder != 0) + { + for (int i = destination.Length - remainder; i < destination.Length; i++) + { + destination[i] = PorterDuffFunctions.MultiplyDestOver(background[i], source[i], amount); + } + } + } + else if (Avx2.IsSupported && destination.Length >= 2) { // Divide by 2 as 4 elements per Vector4 and 8 per Vector256 ref Vector256 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); @@ -12053,7 +21552,37 @@ internal static class DefaultPixelBlenders { amount = Numerics.Clamp(amount, 0, 1); - if (Avx2.IsSupported && destination.Length >= 2) + if (Avx512F.IsSupported && destination.Length >= 4) + { + // Divide by 4 as 4 elements per Vector4 and 16 per Vector512 + ref Vector512 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); + ref Vector512 destinationLast = ref Unsafe.Add(ref destinationBase, (uint)destination.Length / 4u); + + ref Vector512 backgroundBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(background)); + Vector512 sourceBase = Vector512.Create( + source.X, source.Y, source.Z, source.W, + source.X, source.Y, source.Z, source.W, + source.X, source.Y, source.Z, source.W, + source.X, source.Y, source.Z, source.W); + Vector512 opacity = Vector512.Create(amount); + + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) + { + destinationBase = PorterDuffFunctions.MultiplyDestOver(backgroundBase, sourceBase, opacity); + destinationBase = ref Unsafe.Add(ref destinationBase, 1); + backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); + } + + int remainder = Numerics.Modulo4(destination.Length); + if (remainder != 0) + { + for (int i = destination.Length - remainder; i < destination.Length; i++) + { + destination[i] = PorterDuffFunctions.MultiplyDestOver(background[i], source, amount); + } + } + } + else if (Avx2.IsSupported && destination.Length >= 2) { // Divide by 2 as 4 elements per Vector4 and 8 per Vector256 ref Vector256 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); @@ -12089,7 +21618,51 @@ internal static class DefaultPixelBlenders /// protected override void BlendFunction(Span destination, ReadOnlySpan background, ReadOnlySpan source, ReadOnlySpan amount) { - if (Avx2.IsSupported && destination.Length >= 2) + if (Avx512F.IsSupported && destination.Length >= 4) + { + // Divide by 4 as 4 elements per Vector4 and 16 per Vector512 + ref Vector512 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); + ref Vector512 destinationLast = ref Unsafe.Add(ref destinationBase, (uint)destination.Length / 4u); + + ref Vector512 backgroundBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(background)); + ref Vector512 sourceBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(source)); + ref float amountBase = ref MemoryMarshal.GetReference(amount); + + Vector512 vOne = Vector512.Create(1F); + + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) + { + float amount0 = amountBase; + float amount1 = Unsafe.Add(ref amountBase, 1); + float amount2 = Unsafe.Add(ref amountBase, 2); + float amount3 = Unsafe.Add(ref amountBase, 3); + + // We need to create a Vector512 containing the current four amount values + // taking up each quarter of the Vector512 and then clamp them. + Vector512 opacity = Vector512.Create( + amount0, amount0, amount0, amount0, + amount1, amount1, amount1, amount1, + amount2, amount2, amount2, amount2, + amount3, amount3, amount3, amount3); + opacity = Vector512.Min(Vector512.Max(Vector512.Zero, opacity), vOne); + + destinationBase = PorterDuffFunctions.MultiplyDestOver(backgroundBase, sourceBase, opacity); + destinationBase = ref Unsafe.Add(ref destinationBase, 1); + backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); + sourceBase = ref Unsafe.Add(ref sourceBase, 1); + amountBase = ref Unsafe.Add(ref amountBase, 4); + } + + int remainder = Numerics.Modulo4(destination.Length); + if (remainder != 0) + { + for (int i = destination.Length - remainder; i < destination.Length; i++) + { + destination[i] = PorterDuffFunctions.MultiplyDestOver(background[i], source[i], Numerics.Clamp(amount[i], 0, 1F)); + } + } + } + else if (Avx2.IsSupported && destination.Length >= 2) { // Divide by 2 as 4 elements per Vector4 and 8 per Vector256 ref Vector256 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); @@ -12136,7 +21709,54 @@ internal static class DefaultPixelBlenders /// protected override void BlendFunction(Span destination, ReadOnlySpan background, Vector4 source, ReadOnlySpan amount) { - if (Avx2.IsSupported && destination.Length >= 2) + if (Avx512F.IsSupported && destination.Length >= 4) + { + // Divide by 4 as 4 elements per Vector4 and 16 per Vector512 + ref Vector512 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); + ref Vector512 destinationLast = ref Unsafe.Add(ref destinationBase, (uint)destination.Length / 4u); + + ref Vector512 backgroundBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(background)); + ref float amountBase = ref MemoryMarshal.GetReference(amount); + + Vector512 sourceBase = Vector512.Create( + source.X, source.Y, source.Z, source.W, + source.X, source.Y, source.Z, source.W, + source.X, source.Y, source.Z, source.W, + source.X, source.Y, source.Z, source.W); + Vector512 vOne = Vector512.Create(1F); + + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) + { + float amount0 = amountBase; + float amount1 = Unsafe.Add(ref amountBase, 1); + float amount2 = Unsafe.Add(ref amountBase, 2); + float amount3 = Unsafe.Add(ref amountBase, 3); + + // We need to create a Vector512 containing the current four amount values + // taking up each quarter of the Vector512 and then clamp them. + Vector512 opacity = Vector512.Create( + amount0, amount0, amount0, amount0, + amount1, amount1, amount1, amount1, + amount2, amount2, amount2, amount2, + amount3, amount3, amount3, amount3); + opacity = Vector512.Min(Vector512.Max(Vector512.Zero, opacity), vOne); + + destinationBase = PorterDuffFunctions.MultiplyDestOver(backgroundBase, sourceBase, opacity); + destinationBase = ref Unsafe.Add(ref destinationBase, 1); + backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); + amountBase = ref Unsafe.Add(ref amountBase, 4); + } + + int remainder = Numerics.Modulo4(destination.Length); + if (remainder != 0) + { + for (int i = destination.Length - remainder; i < destination.Length; i++) + { + destination[i] = PorterDuffFunctions.MultiplyDestOver(background[i], source, Numerics.Clamp(amount[i], 0, 1F)); + } + } + } + else if (Avx2.IsSupported && destination.Length >= 2) { // Divide by 2 as 4 elements per Vector4 and 8 per Vector256 ref Vector256 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); @@ -12201,7 +21821,34 @@ internal static class DefaultPixelBlenders { amount = Numerics.Clamp(amount, 0, 1); - if (Avx2.IsSupported && destination.Length >= 2) + if (Avx512F.IsSupported && destination.Length >= 4) + { + // Divide by 4 as 4 elements per Vector4 and 16 per Vector512 + ref Vector512 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); + ref Vector512 destinationLast = ref Unsafe.Add(ref destinationBase, (uint)destination.Length / 4u); + + ref Vector512 backgroundBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(background)); + ref Vector512 sourceBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(source)); + Vector512 opacity = Vector512.Create(amount); + + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) + { + destinationBase = PorterDuffFunctions.AddDestOver(backgroundBase, sourceBase, opacity); + destinationBase = ref Unsafe.Add(ref destinationBase, 1); + backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); + sourceBase = ref Unsafe.Add(ref sourceBase, 1); + } + + int remainder = Numerics.Modulo4(destination.Length); + if (remainder != 0) + { + for (int i = destination.Length - remainder; i < destination.Length; i++) + { + destination[i] = PorterDuffFunctions.AddDestOver(background[i], source[i], amount); + } + } + } + else if (Avx2.IsSupported && destination.Length >= 2) { // Divide by 2 as 4 elements per Vector4 and 8 per Vector256 ref Vector256 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); @@ -12240,7 +21887,37 @@ internal static class DefaultPixelBlenders { amount = Numerics.Clamp(amount, 0, 1); - if (Avx2.IsSupported && destination.Length >= 2) + if (Avx512F.IsSupported && destination.Length >= 4) + { + // Divide by 4 as 4 elements per Vector4 and 16 per Vector512 + ref Vector512 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); + ref Vector512 destinationLast = ref Unsafe.Add(ref destinationBase, (uint)destination.Length / 4u); + + ref Vector512 backgroundBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(background)); + Vector512 sourceBase = Vector512.Create( + source.X, source.Y, source.Z, source.W, + source.X, source.Y, source.Z, source.W, + source.X, source.Y, source.Z, source.W, + source.X, source.Y, source.Z, source.W); + Vector512 opacity = Vector512.Create(amount); + + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) + { + destinationBase = PorterDuffFunctions.AddDestOver(backgroundBase, sourceBase, opacity); + destinationBase = ref Unsafe.Add(ref destinationBase, 1); + backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); + } + + int remainder = Numerics.Modulo4(destination.Length); + if (remainder != 0) + { + for (int i = destination.Length - remainder; i < destination.Length; i++) + { + destination[i] = PorterDuffFunctions.AddDestOver(background[i], source, amount); + } + } + } + else if (Avx2.IsSupported && destination.Length >= 2) { // Divide by 2 as 4 elements per Vector4 and 8 per Vector256 ref Vector256 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); @@ -12276,7 +21953,51 @@ internal static class DefaultPixelBlenders /// protected override void BlendFunction(Span destination, ReadOnlySpan background, ReadOnlySpan source, ReadOnlySpan amount) { - if (Avx2.IsSupported && destination.Length >= 2) + if (Avx512F.IsSupported && destination.Length >= 4) + { + // Divide by 4 as 4 elements per Vector4 and 16 per Vector512 + ref Vector512 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); + ref Vector512 destinationLast = ref Unsafe.Add(ref destinationBase, (uint)destination.Length / 4u); + + ref Vector512 backgroundBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(background)); + ref Vector512 sourceBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(source)); + ref float amountBase = ref MemoryMarshal.GetReference(amount); + + Vector512 vOne = Vector512.Create(1F); + + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) + { + float amount0 = amountBase; + float amount1 = Unsafe.Add(ref amountBase, 1); + float amount2 = Unsafe.Add(ref amountBase, 2); + float amount3 = Unsafe.Add(ref amountBase, 3); + + // We need to create a Vector512 containing the current four amount values + // taking up each quarter of the Vector512 and then clamp them. + Vector512 opacity = Vector512.Create( + amount0, amount0, amount0, amount0, + amount1, amount1, amount1, amount1, + amount2, amount2, amount2, amount2, + amount3, amount3, amount3, amount3); + opacity = Vector512.Min(Vector512.Max(Vector512.Zero, opacity), vOne); + + destinationBase = PorterDuffFunctions.AddDestOver(backgroundBase, sourceBase, opacity); + destinationBase = ref Unsafe.Add(ref destinationBase, 1); + backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); + sourceBase = ref Unsafe.Add(ref sourceBase, 1); + amountBase = ref Unsafe.Add(ref amountBase, 4); + } + + int remainder = Numerics.Modulo4(destination.Length); + if (remainder != 0) + { + for (int i = destination.Length - remainder; i < destination.Length; i++) + { + destination[i] = PorterDuffFunctions.AddDestOver(background[i], source[i], Numerics.Clamp(amount[i], 0, 1F)); + } + } + } + else if (Avx2.IsSupported && destination.Length >= 2) { // Divide by 2 as 4 elements per Vector4 and 8 per Vector256 ref Vector256 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); @@ -12323,7 +22044,54 @@ internal static class DefaultPixelBlenders /// protected override void BlendFunction(Span destination, ReadOnlySpan background, Vector4 source, ReadOnlySpan amount) { - if (Avx2.IsSupported && destination.Length >= 2) + if (Avx512F.IsSupported && destination.Length >= 4) + { + // Divide by 4 as 4 elements per Vector4 and 16 per Vector512 + ref Vector512 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); + ref Vector512 destinationLast = ref Unsafe.Add(ref destinationBase, (uint)destination.Length / 4u); + + ref Vector512 backgroundBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(background)); + ref float amountBase = ref MemoryMarshal.GetReference(amount); + + Vector512 sourceBase = Vector512.Create( + source.X, source.Y, source.Z, source.W, + source.X, source.Y, source.Z, source.W, + source.X, source.Y, source.Z, source.W, + source.X, source.Y, source.Z, source.W); + Vector512 vOne = Vector512.Create(1F); + + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) + { + float amount0 = amountBase; + float amount1 = Unsafe.Add(ref amountBase, 1); + float amount2 = Unsafe.Add(ref amountBase, 2); + float amount3 = Unsafe.Add(ref amountBase, 3); + + // We need to create a Vector512 containing the current four amount values + // taking up each quarter of the Vector512 and then clamp them. + Vector512 opacity = Vector512.Create( + amount0, amount0, amount0, amount0, + amount1, amount1, amount1, amount1, + amount2, amount2, amount2, amount2, + amount3, amount3, amount3, amount3); + opacity = Vector512.Min(Vector512.Max(Vector512.Zero, opacity), vOne); + + destinationBase = PorterDuffFunctions.AddDestOver(backgroundBase, sourceBase, opacity); + destinationBase = ref Unsafe.Add(ref destinationBase, 1); + backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); + amountBase = ref Unsafe.Add(ref amountBase, 4); + } + + int remainder = Numerics.Modulo4(destination.Length); + if (remainder != 0) + { + for (int i = destination.Length - remainder; i < destination.Length; i++) + { + destination[i] = PorterDuffFunctions.AddDestOver(background[i], source, Numerics.Clamp(amount[i], 0, 1F)); + } + } + } + else if (Avx2.IsSupported && destination.Length >= 2) { // Divide by 2 as 4 elements per Vector4 and 8 per Vector256 ref Vector256 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); @@ -12388,7 +22156,34 @@ internal static class DefaultPixelBlenders { amount = Numerics.Clamp(amount, 0, 1); - if (Avx2.IsSupported && destination.Length >= 2) + if (Avx512F.IsSupported && destination.Length >= 4) + { + // Divide by 4 as 4 elements per Vector4 and 16 per Vector512 + ref Vector512 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); + ref Vector512 destinationLast = ref Unsafe.Add(ref destinationBase, (uint)destination.Length / 4u); + + ref Vector512 backgroundBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(background)); + ref Vector512 sourceBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(source)); + Vector512 opacity = Vector512.Create(amount); + + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) + { + destinationBase = PorterDuffFunctions.SubtractDestOver(backgroundBase, sourceBase, opacity); + destinationBase = ref Unsafe.Add(ref destinationBase, 1); + backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); + sourceBase = ref Unsafe.Add(ref sourceBase, 1); + } + + int remainder = Numerics.Modulo4(destination.Length); + if (remainder != 0) + { + for (int i = destination.Length - remainder; i < destination.Length; i++) + { + destination[i] = PorterDuffFunctions.SubtractDestOver(background[i], source[i], amount); + } + } + } + else if (Avx2.IsSupported && destination.Length >= 2) { // Divide by 2 as 4 elements per Vector4 and 8 per Vector256 ref Vector256 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); @@ -12427,7 +22222,37 @@ internal static class DefaultPixelBlenders { amount = Numerics.Clamp(amount, 0, 1); - if (Avx2.IsSupported && destination.Length >= 2) + if (Avx512F.IsSupported && destination.Length >= 4) + { + // Divide by 4 as 4 elements per Vector4 and 16 per Vector512 + ref Vector512 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); + ref Vector512 destinationLast = ref Unsafe.Add(ref destinationBase, (uint)destination.Length / 4u); + + ref Vector512 backgroundBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(background)); + Vector512 sourceBase = Vector512.Create( + source.X, source.Y, source.Z, source.W, + source.X, source.Y, source.Z, source.W, + source.X, source.Y, source.Z, source.W, + source.X, source.Y, source.Z, source.W); + Vector512 opacity = Vector512.Create(amount); + + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) + { + destinationBase = PorterDuffFunctions.SubtractDestOver(backgroundBase, sourceBase, opacity); + destinationBase = ref Unsafe.Add(ref destinationBase, 1); + backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); + } + + int remainder = Numerics.Modulo4(destination.Length); + if (remainder != 0) + { + for (int i = destination.Length - remainder; i < destination.Length; i++) + { + destination[i] = PorterDuffFunctions.SubtractDestOver(background[i], source, amount); + } + } + } + else if (Avx2.IsSupported && destination.Length >= 2) { // Divide by 2 as 4 elements per Vector4 and 8 per Vector256 ref Vector256 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); @@ -12463,7 +22288,51 @@ internal static class DefaultPixelBlenders /// protected override void BlendFunction(Span destination, ReadOnlySpan background, ReadOnlySpan source, ReadOnlySpan amount) { - if (Avx2.IsSupported && destination.Length >= 2) + if (Avx512F.IsSupported && destination.Length >= 4) + { + // Divide by 4 as 4 elements per Vector4 and 16 per Vector512 + ref Vector512 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); + ref Vector512 destinationLast = ref Unsafe.Add(ref destinationBase, (uint)destination.Length / 4u); + + ref Vector512 backgroundBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(background)); + ref Vector512 sourceBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(source)); + ref float amountBase = ref MemoryMarshal.GetReference(amount); + + Vector512 vOne = Vector512.Create(1F); + + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) + { + float amount0 = amountBase; + float amount1 = Unsafe.Add(ref amountBase, 1); + float amount2 = Unsafe.Add(ref amountBase, 2); + float amount3 = Unsafe.Add(ref amountBase, 3); + + // We need to create a Vector512 containing the current four amount values + // taking up each quarter of the Vector512 and then clamp them. + Vector512 opacity = Vector512.Create( + amount0, amount0, amount0, amount0, + amount1, amount1, amount1, amount1, + amount2, amount2, amount2, amount2, + amount3, amount3, amount3, amount3); + opacity = Vector512.Min(Vector512.Max(Vector512.Zero, opacity), vOne); + + destinationBase = PorterDuffFunctions.SubtractDestOver(backgroundBase, sourceBase, opacity); + destinationBase = ref Unsafe.Add(ref destinationBase, 1); + backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); + sourceBase = ref Unsafe.Add(ref sourceBase, 1); + amountBase = ref Unsafe.Add(ref amountBase, 4); + } + + int remainder = Numerics.Modulo4(destination.Length); + if (remainder != 0) + { + for (int i = destination.Length - remainder; i < destination.Length; i++) + { + destination[i] = PorterDuffFunctions.SubtractDestOver(background[i], source[i], Numerics.Clamp(amount[i], 0, 1F)); + } + } + } + else if (Avx2.IsSupported && destination.Length >= 2) { // Divide by 2 as 4 elements per Vector4 and 8 per Vector256 ref Vector256 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); @@ -12510,7 +22379,54 @@ internal static class DefaultPixelBlenders /// protected override void BlendFunction(Span destination, ReadOnlySpan background, Vector4 source, ReadOnlySpan amount) { - if (Avx2.IsSupported && destination.Length >= 2) + if (Avx512F.IsSupported && destination.Length >= 4) + { + // Divide by 4 as 4 elements per Vector4 and 16 per Vector512 + ref Vector512 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); + ref Vector512 destinationLast = ref Unsafe.Add(ref destinationBase, (uint)destination.Length / 4u); + + ref Vector512 backgroundBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(background)); + ref float amountBase = ref MemoryMarshal.GetReference(amount); + + Vector512 sourceBase = Vector512.Create( + source.X, source.Y, source.Z, source.W, + source.X, source.Y, source.Z, source.W, + source.X, source.Y, source.Z, source.W, + source.X, source.Y, source.Z, source.W); + Vector512 vOne = Vector512.Create(1F); + + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) + { + float amount0 = amountBase; + float amount1 = Unsafe.Add(ref amountBase, 1); + float amount2 = Unsafe.Add(ref amountBase, 2); + float amount3 = Unsafe.Add(ref amountBase, 3); + + // We need to create a Vector512 containing the current four amount values + // taking up each quarter of the Vector512 and then clamp them. + Vector512 opacity = Vector512.Create( + amount0, amount0, amount0, amount0, + amount1, amount1, amount1, amount1, + amount2, amount2, amount2, amount2, + amount3, amount3, amount3, amount3); + opacity = Vector512.Min(Vector512.Max(Vector512.Zero, opacity), vOne); + + destinationBase = PorterDuffFunctions.SubtractDestOver(backgroundBase, sourceBase, opacity); + destinationBase = ref Unsafe.Add(ref destinationBase, 1); + backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); + amountBase = ref Unsafe.Add(ref amountBase, 4); + } + + int remainder = Numerics.Modulo4(destination.Length); + if (remainder != 0) + { + for (int i = destination.Length - remainder; i < destination.Length; i++) + { + destination[i] = PorterDuffFunctions.SubtractDestOver(background[i], source, Numerics.Clamp(amount[i], 0, 1F)); + } + } + } + else if (Avx2.IsSupported && destination.Length >= 2) { // Divide by 2 as 4 elements per Vector4 and 8 per Vector256 ref Vector256 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); @@ -12575,7 +22491,34 @@ internal static class DefaultPixelBlenders { amount = Numerics.Clamp(amount, 0, 1); - if (Avx2.IsSupported && destination.Length >= 2) + if (Avx512F.IsSupported && destination.Length >= 4) + { + // Divide by 4 as 4 elements per Vector4 and 16 per Vector512 + ref Vector512 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); + ref Vector512 destinationLast = ref Unsafe.Add(ref destinationBase, (uint)destination.Length / 4u); + + ref Vector512 backgroundBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(background)); + ref Vector512 sourceBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(source)); + Vector512 opacity = Vector512.Create(amount); + + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) + { + destinationBase = PorterDuffFunctions.ScreenDestOver(backgroundBase, sourceBase, opacity); + destinationBase = ref Unsafe.Add(ref destinationBase, 1); + backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); + sourceBase = ref Unsafe.Add(ref sourceBase, 1); + } + + int remainder = Numerics.Modulo4(destination.Length); + if (remainder != 0) + { + for (int i = destination.Length - remainder; i < destination.Length; i++) + { + destination[i] = PorterDuffFunctions.ScreenDestOver(background[i], source[i], amount); + } + } + } + else if (Avx2.IsSupported && destination.Length >= 2) { // Divide by 2 as 4 elements per Vector4 and 8 per Vector256 ref Vector256 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); @@ -12614,7 +22557,37 @@ internal static class DefaultPixelBlenders { amount = Numerics.Clamp(amount, 0, 1); - if (Avx2.IsSupported && destination.Length >= 2) + if (Avx512F.IsSupported && destination.Length >= 4) + { + // Divide by 4 as 4 elements per Vector4 and 16 per Vector512 + ref Vector512 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); + ref Vector512 destinationLast = ref Unsafe.Add(ref destinationBase, (uint)destination.Length / 4u); + + ref Vector512 backgroundBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(background)); + Vector512 sourceBase = Vector512.Create( + source.X, source.Y, source.Z, source.W, + source.X, source.Y, source.Z, source.W, + source.X, source.Y, source.Z, source.W, + source.X, source.Y, source.Z, source.W); + Vector512 opacity = Vector512.Create(amount); + + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) + { + destinationBase = PorterDuffFunctions.ScreenDestOver(backgroundBase, sourceBase, opacity); + destinationBase = ref Unsafe.Add(ref destinationBase, 1); + backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); + } + + int remainder = Numerics.Modulo4(destination.Length); + if (remainder != 0) + { + for (int i = destination.Length - remainder; i < destination.Length; i++) + { + destination[i] = PorterDuffFunctions.ScreenDestOver(background[i], source, amount); + } + } + } + else if (Avx2.IsSupported && destination.Length >= 2) { // Divide by 2 as 4 elements per Vector4 and 8 per Vector256 ref Vector256 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); @@ -12650,7 +22623,51 @@ internal static class DefaultPixelBlenders /// protected override void BlendFunction(Span destination, ReadOnlySpan background, ReadOnlySpan source, ReadOnlySpan amount) { - if (Avx2.IsSupported && destination.Length >= 2) + if (Avx512F.IsSupported && destination.Length >= 4) + { + // Divide by 4 as 4 elements per Vector4 and 16 per Vector512 + ref Vector512 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); + ref Vector512 destinationLast = ref Unsafe.Add(ref destinationBase, (uint)destination.Length / 4u); + + ref Vector512 backgroundBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(background)); + ref Vector512 sourceBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(source)); + ref float amountBase = ref MemoryMarshal.GetReference(amount); + + Vector512 vOne = Vector512.Create(1F); + + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) + { + float amount0 = amountBase; + float amount1 = Unsafe.Add(ref amountBase, 1); + float amount2 = Unsafe.Add(ref amountBase, 2); + float amount3 = Unsafe.Add(ref amountBase, 3); + + // We need to create a Vector512 containing the current four amount values + // taking up each quarter of the Vector512 and then clamp them. + Vector512 opacity = Vector512.Create( + amount0, amount0, amount0, amount0, + amount1, amount1, amount1, amount1, + amount2, amount2, amount2, amount2, + amount3, amount3, amount3, amount3); + opacity = Vector512.Min(Vector512.Max(Vector512.Zero, opacity), vOne); + + destinationBase = PorterDuffFunctions.ScreenDestOver(backgroundBase, sourceBase, opacity); + destinationBase = ref Unsafe.Add(ref destinationBase, 1); + backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); + sourceBase = ref Unsafe.Add(ref sourceBase, 1); + amountBase = ref Unsafe.Add(ref amountBase, 4); + } + + int remainder = Numerics.Modulo4(destination.Length); + if (remainder != 0) + { + for (int i = destination.Length - remainder; i < destination.Length; i++) + { + destination[i] = PorterDuffFunctions.ScreenDestOver(background[i], source[i], Numerics.Clamp(amount[i], 0, 1F)); + } + } + } + else if (Avx2.IsSupported && destination.Length >= 2) { // Divide by 2 as 4 elements per Vector4 and 8 per Vector256 ref Vector256 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); @@ -12697,7 +22714,54 @@ internal static class DefaultPixelBlenders /// protected override void BlendFunction(Span destination, ReadOnlySpan background, Vector4 source, ReadOnlySpan amount) { - if (Avx2.IsSupported && destination.Length >= 2) + if (Avx512F.IsSupported && destination.Length >= 4) + { + // Divide by 4 as 4 elements per Vector4 and 16 per Vector512 + ref Vector512 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); + ref Vector512 destinationLast = ref Unsafe.Add(ref destinationBase, (uint)destination.Length / 4u); + + ref Vector512 backgroundBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(background)); + ref float amountBase = ref MemoryMarshal.GetReference(amount); + + Vector512 sourceBase = Vector512.Create( + source.X, source.Y, source.Z, source.W, + source.X, source.Y, source.Z, source.W, + source.X, source.Y, source.Z, source.W, + source.X, source.Y, source.Z, source.W); + Vector512 vOne = Vector512.Create(1F); + + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) + { + float amount0 = amountBase; + float amount1 = Unsafe.Add(ref amountBase, 1); + float amount2 = Unsafe.Add(ref amountBase, 2); + float amount3 = Unsafe.Add(ref amountBase, 3); + + // We need to create a Vector512 containing the current four amount values + // taking up each quarter of the Vector512 and then clamp them. + Vector512 opacity = Vector512.Create( + amount0, amount0, amount0, amount0, + amount1, amount1, amount1, amount1, + amount2, amount2, amount2, amount2, + amount3, amount3, amount3, amount3); + opacity = Vector512.Min(Vector512.Max(Vector512.Zero, opacity), vOne); + + destinationBase = PorterDuffFunctions.ScreenDestOver(backgroundBase, sourceBase, opacity); + destinationBase = ref Unsafe.Add(ref destinationBase, 1); + backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); + amountBase = ref Unsafe.Add(ref amountBase, 4); + } + + int remainder = Numerics.Modulo4(destination.Length); + if (remainder != 0) + { + for (int i = destination.Length - remainder; i < destination.Length; i++) + { + destination[i] = PorterDuffFunctions.ScreenDestOver(background[i], source, Numerics.Clamp(amount[i], 0, 1F)); + } + } + } + else if (Avx2.IsSupported && destination.Length >= 2) { // Divide by 2 as 4 elements per Vector4 and 8 per Vector256 ref Vector256 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); @@ -12762,7 +22826,34 @@ internal static class DefaultPixelBlenders { amount = Numerics.Clamp(amount, 0, 1); - if (Avx2.IsSupported && destination.Length >= 2) + if (Avx512F.IsSupported && destination.Length >= 4) + { + // Divide by 4 as 4 elements per Vector4 and 16 per Vector512 + ref Vector512 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); + ref Vector512 destinationLast = ref Unsafe.Add(ref destinationBase, (uint)destination.Length / 4u); + + ref Vector512 backgroundBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(background)); + ref Vector512 sourceBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(source)); + Vector512 opacity = Vector512.Create(amount); + + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) + { + destinationBase = PorterDuffFunctions.DarkenDestOver(backgroundBase, sourceBase, opacity); + destinationBase = ref Unsafe.Add(ref destinationBase, 1); + backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); + sourceBase = ref Unsafe.Add(ref sourceBase, 1); + } + + int remainder = Numerics.Modulo4(destination.Length); + if (remainder != 0) + { + for (int i = destination.Length - remainder; i < destination.Length; i++) + { + destination[i] = PorterDuffFunctions.DarkenDestOver(background[i], source[i], amount); + } + } + } + else if (Avx2.IsSupported && destination.Length >= 2) { // Divide by 2 as 4 elements per Vector4 and 8 per Vector256 ref Vector256 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); @@ -12801,7 +22892,37 @@ internal static class DefaultPixelBlenders { amount = Numerics.Clamp(amount, 0, 1); - if (Avx2.IsSupported && destination.Length >= 2) + if (Avx512F.IsSupported && destination.Length >= 4) + { + // Divide by 4 as 4 elements per Vector4 and 16 per Vector512 + ref Vector512 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); + ref Vector512 destinationLast = ref Unsafe.Add(ref destinationBase, (uint)destination.Length / 4u); + + ref Vector512 backgroundBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(background)); + Vector512 sourceBase = Vector512.Create( + source.X, source.Y, source.Z, source.W, + source.X, source.Y, source.Z, source.W, + source.X, source.Y, source.Z, source.W, + source.X, source.Y, source.Z, source.W); + Vector512 opacity = Vector512.Create(amount); + + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) + { + destinationBase = PorterDuffFunctions.DarkenDestOver(backgroundBase, sourceBase, opacity); + destinationBase = ref Unsafe.Add(ref destinationBase, 1); + backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); + } + + int remainder = Numerics.Modulo4(destination.Length); + if (remainder != 0) + { + for (int i = destination.Length - remainder; i < destination.Length; i++) + { + destination[i] = PorterDuffFunctions.DarkenDestOver(background[i], source, amount); + } + } + } + else if (Avx2.IsSupported && destination.Length >= 2) { // Divide by 2 as 4 elements per Vector4 and 8 per Vector256 ref Vector256 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); @@ -12837,7 +22958,51 @@ internal static class DefaultPixelBlenders /// protected override void BlendFunction(Span destination, ReadOnlySpan background, ReadOnlySpan source, ReadOnlySpan amount) { - if (Avx2.IsSupported && destination.Length >= 2) + if (Avx512F.IsSupported && destination.Length >= 4) + { + // Divide by 4 as 4 elements per Vector4 and 16 per Vector512 + ref Vector512 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); + ref Vector512 destinationLast = ref Unsafe.Add(ref destinationBase, (uint)destination.Length / 4u); + + ref Vector512 backgroundBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(background)); + ref Vector512 sourceBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(source)); + ref float amountBase = ref MemoryMarshal.GetReference(amount); + + Vector512 vOne = Vector512.Create(1F); + + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) + { + float amount0 = amountBase; + float amount1 = Unsafe.Add(ref amountBase, 1); + float amount2 = Unsafe.Add(ref amountBase, 2); + float amount3 = Unsafe.Add(ref amountBase, 3); + + // We need to create a Vector512 containing the current four amount values + // taking up each quarter of the Vector512 and then clamp them. + Vector512 opacity = Vector512.Create( + amount0, amount0, amount0, amount0, + amount1, amount1, amount1, amount1, + amount2, amount2, amount2, amount2, + amount3, amount3, amount3, amount3); + opacity = Vector512.Min(Vector512.Max(Vector512.Zero, opacity), vOne); + + destinationBase = PorterDuffFunctions.DarkenDestOver(backgroundBase, sourceBase, opacity); + destinationBase = ref Unsafe.Add(ref destinationBase, 1); + backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); + sourceBase = ref Unsafe.Add(ref sourceBase, 1); + amountBase = ref Unsafe.Add(ref amountBase, 4); + } + + int remainder = Numerics.Modulo4(destination.Length); + if (remainder != 0) + { + for (int i = destination.Length - remainder; i < destination.Length; i++) + { + destination[i] = PorterDuffFunctions.DarkenDestOver(background[i], source[i], Numerics.Clamp(amount[i], 0, 1F)); + } + } + } + else if (Avx2.IsSupported && destination.Length >= 2) { // Divide by 2 as 4 elements per Vector4 and 8 per Vector256 ref Vector256 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); @@ -12884,7 +23049,54 @@ internal static class DefaultPixelBlenders /// protected override void BlendFunction(Span destination, ReadOnlySpan background, Vector4 source, ReadOnlySpan amount) { - if (Avx2.IsSupported && destination.Length >= 2) + if (Avx512F.IsSupported && destination.Length >= 4) + { + // Divide by 4 as 4 elements per Vector4 and 16 per Vector512 + ref Vector512 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); + ref Vector512 destinationLast = ref Unsafe.Add(ref destinationBase, (uint)destination.Length / 4u); + + ref Vector512 backgroundBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(background)); + ref float amountBase = ref MemoryMarshal.GetReference(amount); + + Vector512 sourceBase = Vector512.Create( + source.X, source.Y, source.Z, source.W, + source.X, source.Y, source.Z, source.W, + source.X, source.Y, source.Z, source.W, + source.X, source.Y, source.Z, source.W); + Vector512 vOne = Vector512.Create(1F); + + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) + { + float amount0 = amountBase; + float amount1 = Unsafe.Add(ref amountBase, 1); + float amount2 = Unsafe.Add(ref amountBase, 2); + float amount3 = Unsafe.Add(ref amountBase, 3); + + // We need to create a Vector512 containing the current four amount values + // taking up each quarter of the Vector512 and then clamp them. + Vector512 opacity = Vector512.Create( + amount0, amount0, amount0, amount0, + amount1, amount1, amount1, amount1, + amount2, amount2, amount2, amount2, + amount3, amount3, amount3, amount3); + opacity = Vector512.Min(Vector512.Max(Vector512.Zero, opacity), vOne); + + destinationBase = PorterDuffFunctions.DarkenDestOver(backgroundBase, sourceBase, opacity); + destinationBase = ref Unsafe.Add(ref destinationBase, 1); + backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); + amountBase = ref Unsafe.Add(ref amountBase, 4); + } + + int remainder = Numerics.Modulo4(destination.Length); + if (remainder != 0) + { + for (int i = destination.Length - remainder; i < destination.Length; i++) + { + destination[i] = PorterDuffFunctions.DarkenDestOver(background[i], source, Numerics.Clamp(amount[i], 0, 1F)); + } + } + } + else if (Avx2.IsSupported && destination.Length >= 2) { // Divide by 2 as 4 elements per Vector4 and 8 per Vector256 ref Vector256 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); @@ -12949,7 +23161,34 @@ internal static class DefaultPixelBlenders { amount = Numerics.Clamp(amount, 0, 1); - if (Avx2.IsSupported && destination.Length >= 2) + if (Avx512F.IsSupported && destination.Length >= 4) + { + // Divide by 4 as 4 elements per Vector4 and 16 per Vector512 + ref Vector512 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); + ref Vector512 destinationLast = ref Unsafe.Add(ref destinationBase, (uint)destination.Length / 4u); + + ref Vector512 backgroundBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(background)); + ref Vector512 sourceBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(source)); + Vector512 opacity = Vector512.Create(amount); + + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) + { + destinationBase = PorterDuffFunctions.LightenDestOver(backgroundBase, sourceBase, opacity); + destinationBase = ref Unsafe.Add(ref destinationBase, 1); + backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); + sourceBase = ref Unsafe.Add(ref sourceBase, 1); + } + + int remainder = Numerics.Modulo4(destination.Length); + if (remainder != 0) + { + for (int i = destination.Length - remainder; i < destination.Length; i++) + { + destination[i] = PorterDuffFunctions.LightenDestOver(background[i], source[i], amount); + } + } + } + else if (Avx2.IsSupported && destination.Length >= 2) { // Divide by 2 as 4 elements per Vector4 and 8 per Vector256 ref Vector256 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); @@ -12988,7 +23227,37 @@ internal static class DefaultPixelBlenders { amount = Numerics.Clamp(amount, 0, 1); - if (Avx2.IsSupported && destination.Length >= 2) + if (Avx512F.IsSupported && destination.Length >= 4) + { + // Divide by 4 as 4 elements per Vector4 and 16 per Vector512 + ref Vector512 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); + ref Vector512 destinationLast = ref Unsafe.Add(ref destinationBase, (uint)destination.Length / 4u); + + ref Vector512 backgroundBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(background)); + Vector512 sourceBase = Vector512.Create( + source.X, source.Y, source.Z, source.W, + source.X, source.Y, source.Z, source.W, + source.X, source.Y, source.Z, source.W, + source.X, source.Y, source.Z, source.W); + Vector512 opacity = Vector512.Create(amount); + + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) + { + destinationBase = PorterDuffFunctions.LightenDestOver(backgroundBase, sourceBase, opacity); + destinationBase = ref Unsafe.Add(ref destinationBase, 1); + backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); + } + + int remainder = Numerics.Modulo4(destination.Length); + if (remainder != 0) + { + for (int i = destination.Length - remainder; i < destination.Length; i++) + { + destination[i] = PorterDuffFunctions.LightenDestOver(background[i], source, amount); + } + } + } + else if (Avx2.IsSupported && destination.Length >= 2) { // Divide by 2 as 4 elements per Vector4 and 8 per Vector256 ref Vector256 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); @@ -13024,7 +23293,51 @@ internal static class DefaultPixelBlenders /// protected override void BlendFunction(Span destination, ReadOnlySpan background, ReadOnlySpan source, ReadOnlySpan amount) { - if (Avx2.IsSupported && destination.Length >= 2) + if (Avx512F.IsSupported && destination.Length >= 4) + { + // Divide by 4 as 4 elements per Vector4 and 16 per Vector512 + ref Vector512 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); + ref Vector512 destinationLast = ref Unsafe.Add(ref destinationBase, (uint)destination.Length / 4u); + + ref Vector512 backgroundBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(background)); + ref Vector512 sourceBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(source)); + ref float amountBase = ref MemoryMarshal.GetReference(amount); + + Vector512 vOne = Vector512.Create(1F); + + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) + { + float amount0 = amountBase; + float amount1 = Unsafe.Add(ref amountBase, 1); + float amount2 = Unsafe.Add(ref amountBase, 2); + float amount3 = Unsafe.Add(ref amountBase, 3); + + // We need to create a Vector512 containing the current four amount values + // taking up each quarter of the Vector512 and then clamp them. + Vector512 opacity = Vector512.Create( + amount0, amount0, amount0, amount0, + amount1, amount1, amount1, amount1, + amount2, amount2, amount2, amount2, + amount3, amount3, amount3, amount3); + opacity = Vector512.Min(Vector512.Max(Vector512.Zero, opacity), vOne); + + destinationBase = PorterDuffFunctions.LightenDestOver(backgroundBase, sourceBase, opacity); + destinationBase = ref Unsafe.Add(ref destinationBase, 1); + backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); + sourceBase = ref Unsafe.Add(ref sourceBase, 1); + amountBase = ref Unsafe.Add(ref amountBase, 4); + } + + int remainder = Numerics.Modulo4(destination.Length); + if (remainder != 0) + { + for (int i = destination.Length - remainder; i < destination.Length; i++) + { + destination[i] = PorterDuffFunctions.LightenDestOver(background[i], source[i], Numerics.Clamp(amount[i], 0, 1F)); + } + } + } + else if (Avx2.IsSupported && destination.Length >= 2) { // Divide by 2 as 4 elements per Vector4 and 8 per Vector256 ref Vector256 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); @@ -13071,7 +23384,54 @@ internal static class DefaultPixelBlenders /// protected override void BlendFunction(Span destination, ReadOnlySpan background, Vector4 source, ReadOnlySpan amount) { - if (Avx2.IsSupported && destination.Length >= 2) + if (Avx512F.IsSupported && destination.Length >= 4) + { + // Divide by 4 as 4 elements per Vector4 and 16 per Vector512 + ref Vector512 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); + ref Vector512 destinationLast = ref Unsafe.Add(ref destinationBase, (uint)destination.Length / 4u); + + ref Vector512 backgroundBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(background)); + ref float amountBase = ref MemoryMarshal.GetReference(amount); + + Vector512 sourceBase = Vector512.Create( + source.X, source.Y, source.Z, source.W, + source.X, source.Y, source.Z, source.W, + source.X, source.Y, source.Z, source.W, + source.X, source.Y, source.Z, source.W); + Vector512 vOne = Vector512.Create(1F); + + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) + { + float amount0 = amountBase; + float amount1 = Unsafe.Add(ref amountBase, 1); + float amount2 = Unsafe.Add(ref amountBase, 2); + float amount3 = Unsafe.Add(ref amountBase, 3); + + // We need to create a Vector512 containing the current four amount values + // taking up each quarter of the Vector512 and then clamp them. + Vector512 opacity = Vector512.Create( + amount0, amount0, amount0, amount0, + amount1, amount1, amount1, amount1, + amount2, amount2, amount2, amount2, + amount3, amount3, amount3, amount3); + opacity = Vector512.Min(Vector512.Max(Vector512.Zero, opacity), vOne); + + destinationBase = PorterDuffFunctions.LightenDestOver(backgroundBase, sourceBase, opacity); + destinationBase = ref Unsafe.Add(ref destinationBase, 1); + backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); + amountBase = ref Unsafe.Add(ref amountBase, 4); + } + + int remainder = Numerics.Modulo4(destination.Length); + if (remainder != 0) + { + for (int i = destination.Length - remainder; i < destination.Length; i++) + { + destination[i] = PorterDuffFunctions.LightenDestOver(background[i], source, Numerics.Clamp(amount[i], 0, 1F)); + } + } + } + else if (Avx2.IsSupported && destination.Length >= 2) { // Divide by 2 as 4 elements per Vector4 and 8 per Vector256 ref Vector256 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); @@ -13136,7 +23496,34 @@ internal static class DefaultPixelBlenders { amount = Numerics.Clamp(amount, 0, 1); - if (Avx2.IsSupported && destination.Length >= 2) + if (Avx512F.IsSupported && destination.Length >= 4) + { + // Divide by 4 as 4 elements per Vector4 and 16 per Vector512 + ref Vector512 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); + ref Vector512 destinationLast = ref Unsafe.Add(ref destinationBase, (uint)destination.Length / 4u); + + ref Vector512 backgroundBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(background)); + ref Vector512 sourceBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(source)); + Vector512 opacity = Vector512.Create(amount); + + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) + { + destinationBase = PorterDuffFunctions.OverlayDestOver(backgroundBase, sourceBase, opacity); + destinationBase = ref Unsafe.Add(ref destinationBase, 1); + backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); + sourceBase = ref Unsafe.Add(ref sourceBase, 1); + } + + int remainder = Numerics.Modulo4(destination.Length); + if (remainder != 0) + { + for (int i = destination.Length - remainder; i < destination.Length; i++) + { + destination[i] = PorterDuffFunctions.OverlayDestOver(background[i], source[i], amount); + } + } + } + else if (Avx2.IsSupported && destination.Length >= 2) { // Divide by 2 as 4 elements per Vector4 and 8 per Vector256 ref Vector256 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); @@ -13175,7 +23562,37 @@ internal static class DefaultPixelBlenders { amount = Numerics.Clamp(amount, 0, 1); - if (Avx2.IsSupported && destination.Length >= 2) + if (Avx512F.IsSupported && destination.Length >= 4) + { + // Divide by 4 as 4 elements per Vector4 and 16 per Vector512 + ref Vector512 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); + ref Vector512 destinationLast = ref Unsafe.Add(ref destinationBase, (uint)destination.Length / 4u); + + ref Vector512 backgroundBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(background)); + Vector512 sourceBase = Vector512.Create( + source.X, source.Y, source.Z, source.W, + source.X, source.Y, source.Z, source.W, + source.X, source.Y, source.Z, source.W, + source.X, source.Y, source.Z, source.W); + Vector512 opacity = Vector512.Create(amount); + + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) + { + destinationBase = PorterDuffFunctions.OverlayDestOver(backgroundBase, sourceBase, opacity); + destinationBase = ref Unsafe.Add(ref destinationBase, 1); + backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); + } + + int remainder = Numerics.Modulo4(destination.Length); + if (remainder != 0) + { + for (int i = destination.Length - remainder; i < destination.Length; i++) + { + destination[i] = PorterDuffFunctions.OverlayDestOver(background[i], source, amount); + } + } + } + else if (Avx2.IsSupported && destination.Length >= 2) { // Divide by 2 as 4 elements per Vector4 and 8 per Vector256 ref Vector256 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); @@ -13211,7 +23628,51 @@ internal static class DefaultPixelBlenders /// protected override void BlendFunction(Span destination, ReadOnlySpan background, ReadOnlySpan source, ReadOnlySpan amount) { - if (Avx2.IsSupported && destination.Length >= 2) + if (Avx512F.IsSupported && destination.Length >= 4) + { + // Divide by 4 as 4 elements per Vector4 and 16 per Vector512 + ref Vector512 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); + ref Vector512 destinationLast = ref Unsafe.Add(ref destinationBase, (uint)destination.Length / 4u); + + ref Vector512 backgroundBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(background)); + ref Vector512 sourceBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(source)); + ref float amountBase = ref MemoryMarshal.GetReference(amount); + + Vector512 vOne = Vector512.Create(1F); + + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) + { + float amount0 = amountBase; + float amount1 = Unsafe.Add(ref amountBase, 1); + float amount2 = Unsafe.Add(ref amountBase, 2); + float amount3 = Unsafe.Add(ref amountBase, 3); + + // We need to create a Vector512 containing the current four amount values + // taking up each quarter of the Vector512 and then clamp them. + Vector512 opacity = Vector512.Create( + amount0, amount0, amount0, amount0, + amount1, amount1, amount1, amount1, + amount2, amount2, amount2, amount2, + amount3, amount3, amount3, amount3); + opacity = Vector512.Min(Vector512.Max(Vector512.Zero, opacity), vOne); + + destinationBase = PorterDuffFunctions.OverlayDestOver(backgroundBase, sourceBase, opacity); + destinationBase = ref Unsafe.Add(ref destinationBase, 1); + backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); + sourceBase = ref Unsafe.Add(ref sourceBase, 1); + amountBase = ref Unsafe.Add(ref amountBase, 4); + } + + int remainder = Numerics.Modulo4(destination.Length); + if (remainder != 0) + { + for (int i = destination.Length - remainder; i < destination.Length; i++) + { + destination[i] = PorterDuffFunctions.OverlayDestOver(background[i], source[i], Numerics.Clamp(amount[i], 0, 1F)); + } + } + } + else if (Avx2.IsSupported && destination.Length >= 2) { // Divide by 2 as 4 elements per Vector4 and 8 per Vector256 ref Vector256 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); @@ -13258,7 +23719,54 @@ internal static class DefaultPixelBlenders /// protected override void BlendFunction(Span destination, ReadOnlySpan background, Vector4 source, ReadOnlySpan amount) { - if (Avx2.IsSupported && destination.Length >= 2) + if (Avx512F.IsSupported && destination.Length >= 4) + { + // Divide by 4 as 4 elements per Vector4 and 16 per Vector512 + ref Vector512 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); + ref Vector512 destinationLast = ref Unsafe.Add(ref destinationBase, (uint)destination.Length / 4u); + + ref Vector512 backgroundBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(background)); + ref float amountBase = ref MemoryMarshal.GetReference(amount); + + Vector512 sourceBase = Vector512.Create( + source.X, source.Y, source.Z, source.W, + source.X, source.Y, source.Z, source.W, + source.X, source.Y, source.Z, source.W, + source.X, source.Y, source.Z, source.W); + Vector512 vOne = Vector512.Create(1F); + + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) + { + float amount0 = amountBase; + float amount1 = Unsafe.Add(ref amountBase, 1); + float amount2 = Unsafe.Add(ref amountBase, 2); + float amount3 = Unsafe.Add(ref amountBase, 3); + + // We need to create a Vector512 containing the current four amount values + // taking up each quarter of the Vector512 and then clamp them. + Vector512 opacity = Vector512.Create( + amount0, amount0, amount0, amount0, + amount1, amount1, amount1, amount1, + amount2, amount2, amount2, amount2, + amount3, amount3, amount3, amount3); + opacity = Vector512.Min(Vector512.Max(Vector512.Zero, opacity), vOne); + + destinationBase = PorterDuffFunctions.OverlayDestOver(backgroundBase, sourceBase, opacity); + destinationBase = ref Unsafe.Add(ref destinationBase, 1); + backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); + amountBase = ref Unsafe.Add(ref amountBase, 4); + } + + int remainder = Numerics.Modulo4(destination.Length); + if (remainder != 0) + { + for (int i = destination.Length - remainder; i < destination.Length; i++) + { + destination[i] = PorterDuffFunctions.OverlayDestOver(background[i], source, Numerics.Clamp(amount[i], 0, 1F)); + } + } + } + else if (Avx2.IsSupported && destination.Length >= 2) { // Divide by 2 as 4 elements per Vector4 and 8 per Vector256 ref Vector256 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); @@ -13323,7 +23831,34 @@ internal static class DefaultPixelBlenders { amount = Numerics.Clamp(amount, 0, 1); - if (Avx2.IsSupported && destination.Length >= 2) + if (Avx512F.IsSupported && destination.Length >= 4) + { + // Divide by 4 as 4 elements per Vector4 and 16 per Vector512 + ref Vector512 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); + ref Vector512 destinationLast = ref Unsafe.Add(ref destinationBase, (uint)destination.Length / 4u); + + ref Vector512 backgroundBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(background)); + ref Vector512 sourceBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(source)); + Vector512 opacity = Vector512.Create(amount); + + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) + { + destinationBase = PorterDuffFunctions.HardLightDestOver(backgroundBase, sourceBase, opacity); + destinationBase = ref Unsafe.Add(ref destinationBase, 1); + backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); + sourceBase = ref Unsafe.Add(ref sourceBase, 1); + } + + int remainder = Numerics.Modulo4(destination.Length); + if (remainder != 0) + { + for (int i = destination.Length - remainder; i < destination.Length; i++) + { + destination[i] = PorterDuffFunctions.HardLightDestOver(background[i], source[i], amount); + } + } + } + else if (Avx2.IsSupported && destination.Length >= 2) { // Divide by 2 as 4 elements per Vector4 and 8 per Vector256 ref Vector256 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); @@ -13362,7 +23897,37 @@ internal static class DefaultPixelBlenders { amount = Numerics.Clamp(amount, 0, 1); - if (Avx2.IsSupported && destination.Length >= 2) + if (Avx512F.IsSupported && destination.Length >= 4) + { + // Divide by 4 as 4 elements per Vector4 and 16 per Vector512 + ref Vector512 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); + ref Vector512 destinationLast = ref Unsafe.Add(ref destinationBase, (uint)destination.Length / 4u); + + ref Vector512 backgroundBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(background)); + Vector512 sourceBase = Vector512.Create( + source.X, source.Y, source.Z, source.W, + source.X, source.Y, source.Z, source.W, + source.X, source.Y, source.Z, source.W, + source.X, source.Y, source.Z, source.W); + Vector512 opacity = Vector512.Create(amount); + + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) + { + destinationBase = PorterDuffFunctions.HardLightDestOver(backgroundBase, sourceBase, opacity); + destinationBase = ref Unsafe.Add(ref destinationBase, 1); + backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); + } + + int remainder = Numerics.Modulo4(destination.Length); + if (remainder != 0) + { + for (int i = destination.Length - remainder; i < destination.Length; i++) + { + destination[i] = PorterDuffFunctions.HardLightDestOver(background[i], source, amount); + } + } + } + else if (Avx2.IsSupported && destination.Length >= 2) { // Divide by 2 as 4 elements per Vector4 and 8 per Vector256 ref Vector256 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); @@ -13398,7 +23963,51 @@ internal static class DefaultPixelBlenders /// protected override void BlendFunction(Span destination, ReadOnlySpan background, ReadOnlySpan source, ReadOnlySpan amount) { - if (Avx2.IsSupported && destination.Length >= 2) + if (Avx512F.IsSupported && destination.Length >= 4) + { + // Divide by 4 as 4 elements per Vector4 and 16 per Vector512 + ref Vector512 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); + ref Vector512 destinationLast = ref Unsafe.Add(ref destinationBase, (uint)destination.Length / 4u); + + ref Vector512 backgroundBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(background)); + ref Vector512 sourceBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(source)); + ref float amountBase = ref MemoryMarshal.GetReference(amount); + + Vector512 vOne = Vector512.Create(1F); + + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) + { + float amount0 = amountBase; + float amount1 = Unsafe.Add(ref amountBase, 1); + float amount2 = Unsafe.Add(ref amountBase, 2); + float amount3 = Unsafe.Add(ref amountBase, 3); + + // We need to create a Vector512 containing the current four amount values + // taking up each quarter of the Vector512 and then clamp them. + Vector512 opacity = Vector512.Create( + amount0, amount0, amount0, amount0, + amount1, amount1, amount1, amount1, + amount2, amount2, amount2, amount2, + amount3, amount3, amount3, amount3); + opacity = Vector512.Min(Vector512.Max(Vector512.Zero, opacity), vOne); + + destinationBase = PorterDuffFunctions.HardLightDestOver(backgroundBase, sourceBase, opacity); + destinationBase = ref Unsafe.Add(ref destinationBase, 1); + backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); + sourceBase = ref Unsafe.Add(ref sourceBase, 1); + amountBase = ref Unsafe.Add(ref amountBase, 4); + } + + int remainder = Numerics.Modulo4(destination.Length); + if (remainder != 0) + { + for (int i = destination.Length - remainder; i < destination.Length; i++) + { + destination[i] = PorterDuffFunctions.HardLightDestOver(background[i], source[i], Numerics.Clamp(amount[i], 0, 1F)); + } + } + } + else if (Avx2.IsSupported && destination.Length >= 2) { // Divide by 2 as 4 elements per Vector4 and 8 per Vector256 ref Vector256 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); @@ -13437,15 +24046,62 @@ internal static class DefaultPixelBlenders { for (int i = 0; i < destination.Length; i++) { - destination[i] = PorterDuffFunctions.HardLightDestOver(background[i], source[i], Numerics.Clamp(amount[i], 0, 1F)); + destination[i] = PorterDuffFunctions.HardLightDestOver(background[i], source[i], Numerics.Clamp(amount[i], 0, 1F)); + } + } + } + + /// + protected override void BlendFunction(Span destination, ReadOnlySpan background, Vector4 source, ReadOnlySpan amount) + { + if (Avx512F.IsSupported && destination.Length >= 4) + { + // Divide by 4 as 4 elements per Vector4 and 16 per Vector512 + ref Vector512 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); + ref Vector512 destinationLast = ref Unsafe.Add(ref destinationBase, (uint)destination.Length / 4u); + + ref Vector512 backgroundBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(background)); + ref float amountBase = ref MemoryMarshal.GetReference(amount); + + Vector512 sourceBase = Vector512.Create( + source.X, source.Y, source.Z, source.W, + source.X, source.Y, source.Z, source.W, + source.X, source.Y, source.Z, source.W, + source.X, source.Y, source.Z, source.W); + Vector512 vOne = Vector512.Create(1F); + + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) + { + float amount0 = amountBase; + float amount1 = Unsafe.Add(ref amountBase, 1); + float amount2 = Unsafe.Add(ref amountBase, 2); + float amount3 = Unsafe.Add(ref amountBase, 3); + + // We need to create a Vector512 containing the current four amount values + // taking up each quarter of the Vector512 and then clamp them. + Vector512 opacity = Vector512.Create( + amount0, amount0, amount0, amount0, + amount1, amount1, amount1, amount1, + amount2, amount2, amount2, amount2, + amount3, amount3, amount3, amount3); + opacity = Vector512.Min(Vector512.Max(Vector512.Zero, opacity), vOne); + + destinationBase = PorterDuffFunctions.HardLightDestOver(backgroundBase, sourceBase, opacity); + destinationBase = ref Unsafe.Add(ref destinationBase, 1); + backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); + amountBase = ref Unsafe.Add(ref amountBase, 4); + } + + int remainder = Numerics.Modulo4(destination.Length); + if (remainder != 0) + { + for (int i = destination.Length - remainder; i < destination.Length; i++) + { + destination[i] = PorterDuffFunctions.HardLightDestOver(background[i], source, Numerics.Clamp(amount[i], 0, 1F)); + } } } - } - - /// - protected override void BlendFunction(Span destination, ReadOnlySpan background, Vector4 source, ReadOnlySpan amount) - { - if (Avx2.IsSupported && destination.Length >= 2) + else if (Avx2.IsSupported && destination.Length >= 2) { // Divide by 2 as 4 elements per Vector4 and 8 per Vector256 ref Vector256 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); @@ -13510,7 +24166,34 @@ internal static class DefaultPixelBlenders { amount = Numerics.Clamp(amount, 0, 1); - if (Avx2.IsSupported && destination.Length >= 2) + if (Avx512F.IsSupported && destination.Length >= 4) + { + // Divide by 4 as 4 elements per Vector4 and 16 per Vector512 + ref Vector512 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); + ref Vector512 destinationLast = ref Unsafe.Add(ref destinationBase, (uint)destination.Length / 4u); + + ref Vector512 backgroundBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(background)); + ref Vector512 sourceBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(source)); + Vector512 opacity = Vector512.Create(amount); + + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) + { + destinationBase = PorterDuffFunctions.NormalDestIn(backgroundBase, sourceBase, opacity); + destinationBase = ref Unsafe.Add(ref destinationBase, 1); + backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); + sourceBase = ref Unsafe.Add(ref sourceBase, 1); + } + + int remainder = Numerics.Modulo4(destination.Length); + if (remainder != 0) + { + for (int i = destination.Length - remainder; i < destination.Length; i++) + { + destination[i] = PorterDuffFunctions.NormalDestIn(background[i], source[i], amount); + } + } + } + else if (Avx2.IsSupported && destination.Length >= 2) { // Divide by 2 as 4 elements per Vector4 and 8 per Vector256 ref Vector256 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); @@ -13549,7 +24232,37 @@ internal static class DefaultPixelBlenders { amount = Numerics.Clamp(amount, 0, 1); - if (Avx2.IsSupported && destination.Length >= 2) + if (Avx512F.IsSupported && destination.Length >= 4) + { + // Divide by 4 as 4 elements per Vector4 and 16 per Vector512 + ref Vector512 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); + ref Vector512 destinationLast = ref Unsafe.Add(ref destinationBase, (uint)destination.Length / 4u); + + ref Vector512 backgroundBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(background)); + Vector512 sourceBase = Vector512.Create( + source.X, source.Y, source.Z, source.W, + source.X, source.Y, source.Z, source.W, + source.X, source.Y, source.Z, source.W, + source.X, source.Y, source.Z, source.W); + Vector512 opacity = Vector512.Create(amount); + + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) + { + destinationBase = PorterDuffFunctions.NormalDestIn(backgroundBase, sourceBase, opacity); + destinationBase = ref Unsafe.Add(ref destinationBase, 1); + backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); + } + + int remainder = Numerics.Modulo4(destination.Length); + if (remainder != 0) + { + for (int i = destination.Length - remainder; i < destination.Length; i++) + { + destination[i] = PorterDuffFunctions.NormalDestIn(background[i], source, amount); + } + } + } + else if (Avx2.IsSupported && destination.Length >= 2) { // Divide by 2 as 4 elements per Vector4 and 8 per Vector256 ref Vector256 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); @@ -13585,7 +24298,51 @@ internal static class DefaultPixelBlenders /// protected override void BlendFunction(Span destination, ReadOnlySpan background, ReadOnlySpan source, ReadOnlySpan amount) { - if (Avx2.IsSupported && destination.Length >= 2) + if (Avx512F.IsSupported && destination.Length >= 4) + { + // Divide by 4 as 4 elements per Vector4 and 16 per Vector512 + ref Vector512 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); + ref Vector512 destinationLast = ref Unsafe.Add(ref destinationBase, (uint)destination.Length / 4u); + + ref Vector512 backgroundBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(background)); + ref Vector512 sourceBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(source)); + ref float amountBase = ref MemoryMarshal.GetReference(amount); + + Vector512 vOne = Vector512.Create(1F); + + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) + { + float amount0 = amountBase; + float amount1 = Unsafe.Add(ref amountBase, 1); + float amount2 = Unsafe.Add(ref amountBase, 2); + float amount3 = Unsafe.Add(ref amountBase, 3); + + // We need to create a Vector512 containing the current four amount values + // taking up each quarter of the Vector512 and then clamp them. + Vector512 opacity = Vector512.Create( + amount0, amount0, amount0, amount0, + amount1, amount1, amount1, amount1, + amount2, amount2, amount2, amount2, + amount3, amount3, amount3, amount3); + opacity = Vector512.Min(Vector512.Max(Vector512.Zero, opacity), vOne); + + destinationBase = PorterDuffFunctions.NormalDestIn(backgroundBase, sourceBase, opacity); + destinationBase = ref Unsafe.Add(ref destinationBase, 1); + backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); + sourceBase = ref Unsafe.Add(ref sourceBase, 1); + amountBase = ref Unsafe.Add(ref amountBase, 4); + } + + int remainder = Numerics.Modulo4(destination.Length); + if (remainder != 0) + { + for (int i = destination.Length - remainder; i < destination.Length; i++) + { + destination[i] = PorterDuffFunctions.NormalDestIn(background[i], source[i], Numerics.Clamp(amount[i], 0, 1F)); + } + } + } + else if (Avx2.IsSupported && destination.Length >= 2) { // Divide by 2 as 4 elements per Vector4 and 8 per Vector256 ref Vector256 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); @@ -13632,7 +24389,54 @@ internal static class DefaultPixelBlenders /// protected override void BlendFunction(Span destination, ReadOnlySpan background, Vector4 source, ReadOnlySpan amount) { - if (Avx2.IsSupported && destination.Length >= 2) + if (Avx512F.IsSupported && destination.Length >= 4) + { + // Divide by 4 as 4 elements per Vector4 and 16 per Vector512 + ref Vector512 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); + ref Vector512 destinationLast = ref Unsafe.Add(ref destinationBase, (uint)destination.Length / 4u); + + ref Vector512 backgroundBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(background)); + ref float amountBase = ref MemoryMarshal.GetReference(amount); + + Vector512 sourceBase = Vector512.Create( + source.X, source.Y, source.Z, source.W, + source.X, source.Y, source.Z, source.W, + source.X, source.Y, source.Z, source.W, + source.X, source.Y, source.Z, source.W); + Vector512 vOne = Vector512.Create(1F); + + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) + { + float amount0 = amountBase; + float amount1 = Unsafe.Add(ref amountBase, 1); + float amount2 = Unsafe.Add(ref amountBase, 2); + float amount3 = Unsafe.Add(ref amountBase, 3); + + // We need to create a Vector512 containing the current four amount values + // taking up each quarter of the Vector512 and then clamp them. + Vector512 opacity = Vector512.Create( + amount0, amount0, amount0, amount0, + amount1, amount1, amount1, amount1, + amount2, amount2, amount2, amount2, + amount3, amount3, amount3, amount3); + opacity = Vector512.Min(Vector512.Max(Vector512.Zero, opacity), vOne); + + destinationBase = PorterDuffFunctions.NormalDestIn(backgroundBase, sourceBase, opacity); + destinationBase = ref Unsafe.Add(ref destinationBase, 1); + backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); + amountBase = ref Unsafe.Add(ref amountBase, 4); + } + + int remainder = Numerics.Modulo4(destination.Length); + if (remainder != 0) + { + for (int i = destination.Length - remainder; i < destination.Length; i++) + { + destination[i] = PorterDuffFunctions.NormalDestIn(background[i], source, Numerics.Clamp(amount[i], 0, 1F)); + } + } + } + else if (Avx2.IsSupported && destination.Length >= 2) { // Divide by 2 as 4 elements per Vector4 and 8 per Vector256 ref Vector256 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); @@ -13697,7 +24501,34 @@ internal static class DefaultPixelBlenders { amount = Numerics.Clamp(amount, 0, 1); - if (Avx2.IsSupported && destination.Length >= 2) + if (Avx512F.IsSupported && destination.Length >= 4) + { + // Divide by 4 as 4 elements per Vector4 and 16 per Vector512 + ref Vector512 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); + ref Vector512 destinationLast = ref Unsafe.Add(ref destinationBase, (uint)destination.Length / 4u); + + ref Vector512 backgroundBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(background)); + ref Vector512 sourceBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(source)); + Vector512 opacity = Vector512.Create(amount); + + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) + { + destinationBase = PorterDuffFunctions.MultiplyDestIn(backgroundBase, sourceBase, opacity); + destinationBase = ref Unsafe.Add(ref destinationBase, 1); + backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); + sourceBase = ref Unsafe.Add(ref sourceBase, 1); + } + + int remainder = Numerics.Modulo4(destination.Length); + if (remainder != 0) + { + for (int i = destination.Length - remainder; i < destination.Length; i++) + { + destination[i] = PorterDuffFunctions.MultiplyDestIn(background[i], source[i], amount); + } + } + } + else if (Avx2.IsSupported && destination.Length >= 2) { // Divide by 2 as 4 elements per Vector4 and 8 per Vector256 ref Vector256 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); @@ -13736,7 +24567,37 @@ internal static class DefaultPixelBlenders { amount = Numerics.Clamp(amount, 0, 1); - if (Avx2.IsSupported && destination.Length >= 2) + if (Avx512F.IsSupported && destination.Length >= 4) + { + // Divide by 4 as 4 elements per Vector4 and 16 per Vector512 + ref Vector512 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); + ref Vector512 destinationLast = ref Unsafe.Add(ref destinationBase, (uint)destination.Length / 4u); + + ref Vector512 backgroundBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(background)); + Vector512 sourceBase = Vector512.Create( + source.X, source.Y, source.Z, source.W, + source.X, source.Y, source.Z, source.W, + source.X, source.Y, source.Z, source.W, + source.X, source.Y, source.Z, source.W); + Vector512 opacity = Vector512.Create(amount); + + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) + { + destinationBase = PorterDuffFunctions.MultiplyDestIn(backgroundBase, sourceBase, opacity); + destinationBase = ref Unsafe.Add(ref destinationBase, 1); + backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); + } + + int remainder = Numerics.Modulo4(destination.Length); + if (remainder != 0) + { + for (int i = destination.Length - remainder; i < destination.Length; i++) + { + destination[i] = PorterDuffFunctions.MultiplyDestIn(background[i], source, amount); + } + } + } + else if (Avx2.IsSupported && destination.Length >= 2) { // Divide by 2 as 4 elements per Vector4 and 8 per Vector256 ref Vector256 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); @@ -13772,7 +24633,51 @@ internal static class DefaultPixelBlenders /// protected override void BlendFunction(Span destination, ReadOnlySpan background, ReadOnlySpan source, ReadOnlySpan amount) { - if (Avx2.IsSupported && destination.Length >= 2) + if (Avx512F.IsSupported && destination.Length >= 4) + { + // Divide by 4 as 4 elements per Vector4 and 16 per Vector512 + ref Vector512 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); + ref Vector512 destinationLast = ref Unsafe.Add(ref destinationBase, (uint)destination.Length / 4u); + + ref Vector512 backgroundBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(background)); + ref Vector512 sourceBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(source)); + ref float amountBase = ref MemoryMarshal.GetReference(amount); + + Vector512 vOne = Vector512.Create(1F); + + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) + { + float amount0 = amountBase; + float amount1 = Unsafe.Add(ref amountBase, 1); + float amount2 = Unsafe.Add(ref amountBase, 2); + float amount3 = Unsafe.Add(ref amountBase, 3); + + // We need to create a Vector512 containing the current four amount values + // taking up each quarter of the Vector512 and then clamp them. + Vector512 opacity = Vector512.Create( + amount0, amount0, amount0, amount0, + amount1, amount1, amount1, amount1, + amount2, amount2, amount2, amount2, + amount3, amount3, amount3, amount3); + opacity = Vector512.Min(Vector512.Max(Vector512.Zero, opacity), vOne); + + destinationBase = PorterDuffFunctions.MultiplyDestIn(backgroundBase, sourceBase, opacity); + destinationBase = ref Unsafe.Add(ref destinationBase, 1); + backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); + sourceBase = ref Unsafe.Add(ref sourceBase, 1); + amountBase = ref Unsafe.Add(ref amountBase, 4); + } + + int remainder = Numerics.Modulo4(destination.Length); + if (remainder != 0) + { + for (int i = destination.Length - remainder; i < destination.Length; i++) + { + destination[i] = PorterDuffFunctions.MultiplyDestIn(background[i], source[i], Numerics.Clamp(amount[i], 0, 1F)); + } + } + } + else if (Avx2.IsSupported && destination.Length >= 2) { // Divide by 2 as 4 elements per Vector4 and 8 per Vector256 ref Vector256 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); @@ -13819,7 +24724,54 @@ internal static class DefaultPixelBlenders /// protected override void BlendFunction(Span destination, ReadOnlySpan background, Vector4 source, ReadOnlySpan amount) { - if (Avx2.IsSupported && destination.Length >= 2) + if (Avx512F.IsSupported && destination.Length >= 4) + { + // Divide by 4 as 4 elements per Vector4 and 16 per Vector512 + ref Vector512 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); + ref Vector512 destinationLast = ref Unsafe.Add(ref destinationBase, (uint)destination.Length / 4u); + + ref Vector512 backgroundBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(background)); + ref float amountBase = ref MemoryMarshal.GetReference(amount); + + Vector512 sourceBase = Vector512.Create( + source.X, source.Y, source.Z, source.W, + source.X, source.Y, source.Z, source.W, + source.X, source.Y, source.Z, source.W, + source.X, source.Y, source.Z, source.W); + Vector512 vOne = Vector512.Create(1F); + + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) + { + float amount0 = amountBase; + float amount1 = Unsafe.Add(ref amountBase, 1); + float amount2 = Unsafe.Add(ref amountBase, 2); + float amount3 = Unsafe.Add(ref amountBase, 3); + + // We need to create a Vector512 containing the current four amount values + // taking up each quarter of the Vector512 and then clamp them. + Vector512 opacity = Vector512.Create( + amount0, amount0, amount0, amount0, + amount1, amount1, amount1, amount1, + amount2, amount2, amount2, amount2, + amount3, amount3, amount3, amount3); + opacity = Vector512.Min(Vector512.Max(Vector512.Zero, opacity), vOne); + + destinationBase = PorterDuffFunctions.MultiplyDestIn(backgroundBase, sourceBase, opacity); + destinationBase = ref Unsafe.Add(ref destinationBase, 1); + backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); + amountBase = ref Unsafe.Add(ref amountBase, 4); + } + + int remainder = Numerics.Modulo4(destination.Length); + if (remainder != 0) + { + for (int i = destination.Length - remainder; i < destination.Length; i++) + { + destination[i] = PorterDuffFunctions.MultiplyDestIn(background[i], source, Numerics.Clamp(amount[i], 0, 1F)); + } + } + } + else if (Avx2.IsSupported && destination.Length >= 2) { // Divide by 2 as 4 elements per Vector4 and 8 per Vector256 ref Vector256 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); @@ -13884,7 +24836,34 @@ internal static class DefaultPixelBlenders { amount = Numerics.Clamp(amount, 0, 1); - if (Avx2.IsSupported && destination.Length >= 2) + if (Avx512F.IsSupported && destination.Length >= 4) + { + // Divide by 4 as 4 elements per Vector4 and 16 per Vector512 + ref Vector512 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); + ref Vector512 destinationLast = ref Unsafe.Add(ref destinationBase, (uint)destination.Length / 4u); + + ref Vector512 backgroundBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(background)); + ref Vector512 sourceBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(source)); + Vector512 opacity = Vector512.Create(amount); + + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) + { + destinationBase = PorterDuffFunctions.AddDestIn(backgroundBase, sourceBase, opacity); + destinationBase = ref Unsafe.Add(ref destinationBase, 1); + backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); + sourceBase = ref Unsafe.Add(ref sourceBase, 1); + } + + int remainder = Numerics.Modulo4(destination.Length); + if (remainder != 0) + { + for (int i = destination.Length - remainder; i < destination.Length; i++) + { + destination[i] = PorterDuffFunctions.AddDestIn(background[i], source[i], amount); + } + } + } + else if (Avx2.IsSupported && destination.Length >= 2) { // Divide by 2 as 4 elements per Vector4 and 8 per Vector256 ref Vector256 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); @@ -13923,7 +24902,37 @@ internal static class DefaultPixelBlenders { amount = Numerics.Clamp(amount, 0, 1); - if (Avx2.IsSupported && destination.Length >= 2) + if (Avx512F.IsSupported && destination.Length >= 4) + { + // Divide by 4 as 4 elements per Vector4 and 16 per Vector512 + ref Vector512 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); + ref Vector512 destinationLast = ref Unsafe.Add(ref destinationBase, (uint)destination.Length / 4u); + + ref Vector512 backgroundBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(background)); + Vector512 sourceBase = Vector512.Create( + source.X, source.Y, source.Z, source.W, + source.X, source.Y, source.Z, source.W, + source.X, source.Y, source.Z, source.W, + source.X, source.Y, source.Z, source.W); + Vector512 opacity = Vector512.Create(amount); + + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) + { + destinationBase = PorterDuffFunctions.AddDestIn(backgroundBase, sourceBase, opacity); + destinationBase = ref Unsafe.Add(ref destinationBase, 1); + backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); + } + + int remainder = Numerics.Modulo4(destination.Length); + if (remainder != 0) + { + for (int i = destination.Length - remainder; i < destination.Length; i++) + { + destination[i] = PorterDuffFunctions.AddDestIn(background[i], source, amount); + } + } + } + else if (Avx2.IsSupported && destination.Length >= 2) { // Divide by 2 as 4 elements per Vector4 and 8 per Vector256 ref Vector256 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); @@ -13959,7 +24968,51 @@ internal static class DefaultPixelBlenders /// protected override void BlendFunction(Span destination, ReadOnlySpan background, ReadOnlySpan source, ReadOnlySpan amount) { - if (Avx2.IsSupported && destination.Length >= 2) + if (Avx512F.IsSupported && destination.Length >= 4) + { + // Divide by 4 as 4 elements per Vector4 and 16 per Vector512 + ref Vector512 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); + ref Vector512 destinationLast = ref Unsafe.Add(ref destinationBase, (uint)destination.Length / 4u); + + ref Vector512 backgroundBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(background)); + ref Vector512 sourceBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(source)); + ref float amountBase = ref MemoryMarshal.GetReference(amount); + + Vector512 vOne = Vector512.Create(1F); + + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) + { + float amount0 = amountBase; + float amount1 = Unsafe.Add(ref amountBase, 1); + float amount2 = Unsafe.Add(ref amountBase, 2); + float amount3 = Unsafe.Add(ref amountBase, 3); + + // We need to create a Vector512 containing the current four amount values + // taking up each quarter of the Vector512 and then clamp them. + Vector512 opacity = Vector512.Create( + amount0, amount0, amount0, amount0, + amount1, amount1, amount1, amount1, + amount2, amount2, amount2, amount2, + amount3, amount3, amount3, amount3); + opacity = Vector512.Min(Vector512.Max(Vector512.Zero, opacity), vOne); + + destinationBase = PorterDuffFunctions.AddDestIn(backgroundBase, sourceBase, opacity); + destinationBase = ref Unsafe.Add(ref destinationBase, 1); + backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); + sourceBase = ref Unsafe.Add(ref sourceBase, 1); + amountBase = ref Unsafe.Add(ref amountBase, 4); + } + + int remainder = Numerics.Modulo4(destination.Length); + if (remainder != 0) + { + for (int i = destination.Length - remainder; i < destination.Length; i++) + { + destination[i] = PorterDuffFunctions.AddDestIn(background[i], source[i], Numerics.Clamp(amount[i], 0, 1F)); + } + } + } + else if (Avx2.IsSupported && destination.Length >= 2) { // Divide by 2 as 4 elements per Vector4 and 8 per Vector256 ref Vector256 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); @@ -14006,7 +25059,54 @@ internal static class DefaultPixelBlenders /// protected override void BlendFunction(Span destination, ReadOnlySpan background, Vector4 source, ReadOnlySpan amount) { - if (Avx2.IsSupported && destination.Length >= 2) + if (Avx512F.IsSupported && destination.Length >= 4) + { + // Divide by 4 as 4 elements per Vector4 and 16 per Vector512 + ref Vector512 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); + ref Vector512 destinationLast = ref Unsafe.Add(ref destinationBase, (uint)destination.Length / 4u); + + ref Vector512 backgroundBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(background)); + ref float amountBase = ref MemoryMarshal.GetReference(amount); + + Vector512 sourceBase = Vector512.Create( + source.X, source.Y, source.Z, source.W, + source.X, source.Y, source.Z, source.W, + source.X, source.Y, source.Z, source.W, + source.X, source.Y, source.Z, source.W); + Vector512 vOne = Vector512.Create(1F); + + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) + { + float amount0 = amountBase; + float amount1 = Unsafe.Add(ref amountBase, 1); + float amount2 = Unsafe.Add(ref amountBase, 2); + float amount3 = Unsafe.Add(ref amountBase, 3); + + // We need to create a Vector512 containing the current four amount values + // taking up each quarter of the Vector512 and then clamp them. + Vector512 opacity = Vector512.Create( + amount0, amount0, amount0, amount0, + amount1, amount1, amount1, amount1, + amount2, amount2, amount2, amount2, + amount3, amount3, amount3, amount3); + opacity = Vector512.Min(Vector512.Max(Vector512.Zero, opacity), vOne); + + destinationBase = PorterDuffFunctions.AddDestIn(backgroundBase, sourceBase, opacity); + destinationBase = ref Unsafe.Add(ref destinationBase, 1); + backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); + amountBase = ref Unsafe.Add(ref amountBase, 4); + } + + int remainder = Numerics.Modulo4(destination.Length); + if (remainder != 0) + { + for (int i = destination.Length - remainder; i < destination.Length; i++) + { + destination[i] = PorterDuffFunctions.AddDestIn(background[i], source, Numerics.Clamp(amount[i], 0, 1F)); + } + } + } + else if (Avx2.IsSupported && destination.Length >= 2) { // Divide by 2 as 4 elements per Vector4 and 8 per Vector256 ref Vector256 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); @@ -14071,7 +25171,34 @@ internal static class DefaultPixelBlenders { amount = Numerics.Clamp(amount, 0, 1); - if (Avx2.IsSupported && destination.Length >= 2) + if (Avx512F.IsSupported && destination.Length >= 4) + { + // Divide by 4 as 4 elements per Vector4 and 16 per Vector512 + ref Vector512 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); + ref Vector512 destinationLast = ref Unsafe.Add(ref destinationBase, (uint)destination.Length / 4u); + + ref Vector512 backgroundBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(background)); + ref Vector512 sourceBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(source)); + Vector512 opacity = Vector512.Create(amount); + + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) + { + destinationBase = PorterDuffFunctions.SubtractDestIn(backgroundBase, sourceBase, opacity); + destinationBase = ref Unsafe.Add(ref destinationBase, 1); + backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); + sourceBase = ref Unsafe.Add(ref sourceBase, 1); + } + + int remainder = Numerics.Modulo4(destination.Length); + if (remainder != 0) + { + for (int i = destination.Length - remainder; i < destination.Length; i++) + { + destination[i] = PorterDuffFunctions.SubtractDestIn(background[i], source[i], amount); + } + } + } + else if (Avx2.IsSupported && destination.Length >= 2) { // Divide by 2 as 4 elements per Vector4 and 8 per Vector256 ref Vector256 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); @@ -14110,7 +25237,37 @@ internal static class DefaultPixelBlenders { amount = Numerics.Clamp(amount, 0, 1); - if (Avx2.IsSupported && destination.Length >= 2) + if (Avx512F.IsSupported && destination.Length >= 4) + { + // Divide by 4 as 4 elements per Vector4 and 16 per Vector512 + ref Vector512 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); + ref Vector512 destinationLast = ref Unsafe.Add(ref destinationBase, (uint)destination.Length / 4u); + + ref Vector512 backgroundBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(background)); + Vector512 sourceBase = Vector512.Create( + source.X, source.Y, source.Z, source.W, + source.X, source.Y, source.Z, source.W, + source.X, source.Y, source.Z, source.W, + source.X, source.Y, source.Z, source.W); + Vector512 opacity = Vector512.Create(amount); + + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) + { + destinationBase = PorterDuffFunctions.SubtractDestIn(backgroundBase, sourceBase, opacity); + destinationBase = ref Unsafe.Add(ref destinationBase, 1); + backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); + } + + int remainder = Numerics.Modulo4(destination.Length); + if (remainder != 0) + { + for (int i = destination.Length - remainder; i < destination.Length; i++) + { + destination[i] = PorterDuffFunctions.SubtractDestIn(background[i], source, amount); + } + } + } + else if (Avx2.IsSupported && destination.Length >= 2) { // Divide by 2 as 4 elements per Vector4 and 8 per Vector256 ref Vector256 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); @@ -14146,7 +25303,51 @@ internal static class DefaultPixelBlenders /// protected override void BlendFunction(Span destination, ReadOnlySpan background, ReadOnlySpan source, ReadOnlySpan amount) { - if (Avx2.IsSupported && destination.Length >= 2) + if (Avx512F.IsSupported && destination.Length >= 4) + { + // Divide by 4 as 4 elements per Vector4 and 16 per Vector512 + ref Vector512 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); + ref Vector512 destinationLast = ref Unsafe.Add(ref destinationBase, (uint)destination.Length / 4u); + + ref Vector512 backgroundBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(background)); + ref Vector512 sourceBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(source)); + ref float amountBase = ref MemoryMarshal.GetReference(amount); + + Vector512 vOne = Vector512.Create(1F); + + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) + { + float amount0 = amountBase; + float amount1 = Unsafe.Add(ref amountBase, 1); + float amount2 = Unsafe.Add(ref amountBase, 2); + float amount3 = Unsafe.Add(ref amountBase, 3); + + // We need to create a Vector512 containing the current four amount values + // taking up each quarter of the Vector512 and then clamp them. + Vector512 opacity = Vector512.Create( + amount0, amount0, amount0, amount0, + amount1, amount1, amount1, amount1, + amount2, amount2, amount2, amount2, + amount3, amount3, amount3, amount3); + opacity = Vector512.Min(Vector512.Max(Vector512.Zero, opacity), vOne); + + destinationBase = PorterDuffFunctions.SubtractDestIn(backgroundBase, sourceBase, opacity); + destinationBase = ref Unsafe.Add(ref destinationBase, 1); + backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); + sourceBase = ref Unsafe.Add(ref sourceBase, 1); + amountBase = ref Unsafe.Add(ref amountBase, 4); + } + + int remainder = Numerics.Modulo4(destination.Length); + if (remainder != 0) + { + for (int i = destination.Length - remainder; i < destination.Length; i++) + { + destination[i] = PorterDuffFunctions.SubtractDestIn(background[i], source[i], Numerics.Clamp(amount[i], 0, 1F)); + } + } + } + else if (Avx2.IsSupported && destination.Length >= 2) { // Divide by 2 as 4 elements per Vector4 and 8 per Vector256 ref Vector256 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); @@ -14193,7 +25394,54 @@ internal static class DefaultPixelBlenders /// protected override void BlendFunction(Span destination, ReadOnlySpan background, Vector4 source, ReadOnlySpan amount) { - if (Avx2.IsSupported && destination.Length >= 2) + if (Avx512F.IsSupported && destination.Length >= 4) + { + // Divide by 4 as 4 elements per Vector4 and 16 per Vector512 + ref Vector512 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); + ref Vector512 destinationLast = ref Unsafe.Add(ref destinationBase, (uint)destination.Length / 4u); + + ref Vector512 backgroundBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(background)); + ref float amountBase = ref MemoryMarshal.GetReference(amount); + + Vector512 sourceBase = Vector512.Create( + source.X, source.Y, source.Z, source.W, + source.X, source.Y, source.Z, source.W, + source.X, source.Y, source.Z, source.W, + source.X, source.Y, source.Z, source.W); + Vector512 vOne = Vector512.Create(1F); + + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) + { + float amount0 = amountBase; + float amount1 = Unsafe.Add(ref amountBase, 1); + float amount2 = Unsafe.Add(ref amountBase, 2); + float amount3 = Unsafe.Add(ref amountBase, 3); + + // We need to create a Vector512 containing the current four amount values + // taking up each quarter of the Vector512 and then clamp them. + Vector512 opacity = Vector512.Create( + amount0, amount0, amount0, amount0, + amount1, amount1, amount1, amount1, + amount2, amount2, amount2, amount2, + amount3, amount3, amount3, amount3); + opacity = Vector512.Min(Vector512.Max(Vector512.Zero, opacity), vOne); + + destinationBase = PorterDuffFunctions.SubtractDestIn(backgroundBase, sourceBase, opacity); + destinationBase = ref Unsafe.Add(ref destinationBase, 1); + backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); + amountBase = ref Unsafe.Add(ref amountBase, 4); + } + + int remainder = Numerics.Modulo4(destination.Length); + if (remainder != 0) + { + for (int i = destination.Length - remainder; i < destination.Length; i++) + { + destination[i] = PorterDuffFunctions.SubtractDestIn(background[i], source, Numerics.Clamp(amount[i], 0, 1F)); + } + } + } + else if (Avx2.IsSupported && destination.Length >= 2) { // Divide by 2 as 4 elements per Vector4 and 8 per Vector256 ref Vector256 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); @@ -14258,7 +25506,34 @@ internal static class DefaultPixelBlenders { amount = Numerics.Clamp(amount, 0, 1); - if (Avx2.IsSupported && destination.Length >= 2) + if (Avx512F.IsSupported && destination.Length >= 4) + { + // Divide by 4 as 4 elements per Vector4 and 16 per Vector512 + ref Vector512 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); + ref Vector512 destinationLast = ref Unsafe.Add(ref destinationBase, (uint)destination.Length / 4u); + + ref Vector512 backgroundBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(background)); + ref Vector512 sourceBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(source)); + Vector512 opacity = Vector512.Create(amount); + + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) + { + destinationBase = PorterDuffFunctions.ScreenDestIn(backgroundBase, sourceBase, opacity); + destinationBase = ref Unsafe.Add(ref destinationBase, 1); + backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); + sourceBase = ref Unsafe.Add(ref sourceBase, 1); + } + + int remainder = Numerics.Modulo4(destination.Length); + if (remainder != 0) + { + for (int i = destination.Length - remainder; i < destination.Length; i++) + { + destination[i] = PorterDuffFunctions.ScreenDestIn(background[i], source[i], amount); + } + } + } + else if (Avx2.IsSupported && destination.Length >= 2) { // Divide by 2 as 4 elements per Vector4 and 8 per Vector256 ref Vector256 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); @@ -14297,7 +25572,37 @@ internal static class DefaultPixelBlenders { amount = Numerics.Clamp(amount, 0, 1); - if (Avx2.IsSupported && destination.Length >= 2) + if (Avx512F.IsSupported && destination.Length >= 4) + { + // Divide by 4 as 4 elements per Vector4 and 16 per Vector512 + ref Vector512 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); + ref Vector512 destinationLast = ref Unsafe.Add(ref destinationBase, (uint)destination.Length / 4u); + + ref Vector512 backgroundBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(background)); + Vector512 sourceBase = Vector512.Create( + source.X, source.Y, source.Z, source.W, + source.X, source.Y, source.Z, source.W, + source.X, source.Y, source.Z, source.W, + source.X, source.Y, source.Z, source.W); + Vector512 opacity = Vector512.Create(amount); + + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) + { + destinationBase = PorterDuffFunctions.ScreenDestIn(backgroundBase, sourceBase, opacity); + destinationBase = ref Unsafe.Add(ref destinationBase, 1); + backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); + } + + int remainder = Numerics.Modulo4(destination.Length); + if (remainder != 0) + { + for (int i = destination.Length - remainder; i < destination.Length; i++) + { + destination[i] = PorterDuffFunctions.ScreenDestIn(background[i], source, amount); + } + } + } + else if (Avx2.IsSupported && destination.Length >= 2) { // Divide by 2 as 4 elements per Vector4 and 8 per Vector256 ref Vector256 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); @@ -14333,7 +25638,51 @@ internal static class DefaultPixelBlenders /// protected override void BlendFunction(Span destination, ReadOnlySpan background, ReadOnlySpan source, ReadOnlySpan amount) { - if (Avx2.IsSupported && destination.Length >= 2) + if (Avx512F.IsSupported && destination.Length >= 4) + { + // Divide by 4 as 4 elements per Vector4 and 16 per Vector512 + ref Vector512 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); + ref Vector512 destinationLast = ref Unsafe.Add(ref destinationBase, (uint)destination.Length / 4u); + + ref Vector512 backgroundBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(background)); + ref Vector512 sourceBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(source)); + ref float amountBase = ref MemoryMarshal.GetReference(amount); + + Vector512 vOne = Vector512.Create(1F); + + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) + { + float amount0 = amountBase; + float amount1 = Unsafe.Add(ref amountBase, 1); + float amount2 = Unsafe.Add(ref amountBase, 2); + float amount3 = Unsafe.Add(ref amountBase, 3); + + // We need to create a Vector512 containing the current four amount values + // taking up each quarter of the Vector512 and then clamp them. + Vector512 opacity = Vector512.Create( + amount0, amount0, amount0, amount0, + amount1, amount1, amount1, amount1, + amount2, amount2, amount2, amount2, + amount3, amount3, amount3, amount3); + opacity = Vector512.Min(Vector512.Max(Vector512.Zero, opacity), vOne); + + destinationBase = PorterDuffFunctions.ScreenDestIn(backgroundBase, sourceBase, opacity); + destinationBase = ref Unsafe.Add(ref destinationBase, 1); + backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); + sourceBase = ref Unsafe.Add(ref sourceBase, 1); + amountBase = ref Unsafe.Add(ref amountBase, 4); + } + + int remainder = Numerics.Modulo4(destination.Length); + if (remainder != 0) + { + for (int i = destination.Length - remainder; i < destination.Length; i++) + { + destination[i] = PorterDuffFunctions.ScreenDestIn(background[i], source[i], Numerics.Clamp(amount[i], 0, 1F)); + } + } + } + else if (Avx2.IsSupported && destination.Length >= 2) { // Divide by 2 as 4 elements per Vector4 and 8 per Vector256 ref Vector256 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); @@ -14380,7 +25729,54 @@ internal static class DefaultPixelBlenders /// protected override void BlendFunction(Span destination, ReadOnlySpan background, Vector4 source, ReadOnlySpan amount) { - if (Avx2.IsSupported && destination.Length >= 2) + if (Avx512F.IsSupported && destination.Length >= 4) + { + // Divide by 4 as 4 elements per Vector4 and 16 per Vector512 + ref Vector512 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); + ref Vector512 destinationLast = ref Unsafe.Add(ref destinationBase, (uint)destination.Length / 4u); + + ref Vector512 backgroundBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(background)); + ref float amountBase = ref MemoryMarshal.GetReference(amount); + + Vector512 sourceBase = Vector512.Create( + source.X, source.Y, source.Z, source.W, + source.X, source.Y, source.Z, source.W, + source.X, source.Y, source.Z, source.W, + source.X, source.Y, source.Z, source.W); + Vector512 vOne = Vector512.Create(1F); + + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) + { + float amount0 = amountBase; + float amount1 = Unsafe.Add(ref amountBase, 1); + float amount2 = Unsafe.Add(ref amountBase, 2); + float amount3 = Unsafe.Add(ref amountBase, 3); + + // We need to create a Vector512 containing the current four amount values + // taking up each quarter of the Vector512 and then clamp them. + Vector512 opacity = Vector512.Create( + amount0, amount0, amount0, amount0, + amount1, amount1, amount1, amount1, + amount2, amount2, amount2, amount2, + amount3, amount3, amount3, amount3); + opacity = Vector512.Min(Vector512.Max(Vector512.Zero, opacity), vOne); + + destinationBase = PorterDuffFunctions.ScreenDestIn(backgroundBase, sourceBase, opacity); + destinationBase = ref Unsafe.Add(ref destinationBase, 1); + backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); + amountBase = ref Unsafe.Add(ref amountBase, 4); + } + + int remainder = Numerics.Modulo4(destination.Length); + if (remainder != 0) + { + for (int i = destination.Length - remainder; i < destination.Length; i++) + { + destination[i] = PorterDuffFunctions.ScreenDestIn(background[i], source, Numerics.Clamp(amount[i], 0, 1F)); + } + } + } + else if (Avx2.IsSupported && destination.Length >= 2) { // Divide by 2 as 4 elements per Vector4 and 8 per Vector256 ref Vector256 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); @@ -14445,7 +25841,34 @@ internal static class DefaultPixelBlenders { amount = Numerics.Clamp(amount, 0, 1); - if (Avx2.IsSupported && destination.Length >= 2) + if (Avx512F.IsSupported && destination.Length >= 4) + { + // Divide by 4 as 4 elements per Vector4 and 16 per Vector512 + ref Vector512 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); + ref Vector512 destinationLast = ref Unsafe.Add(ref destinationBase, (uint)destination.Length / 4u); + + ref Vector512 backgroundBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(background)); + ref Vector512 sourceBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(source)); + Vector512 opacity = Vector512.Create(amount); + + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) + { + destinationBase = PorterDuffFunctions.DarkenDestIn(backgroundBase, sourceBase, opacity); + destinationBase = ref Unsafe.Add(ref destinationBase, 1); + backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); + sourceBase = ref Unsafe.Add(ref sourceBase, 1); + } + + int remainder = Numerics.Modulo4(destination.Length); + if (remainder != 0) + { + for (int i = destination.Length - remainder; i < destination.Length; i++) + { + destination[i] = PorterDuffFunctions.DarkenDestIn(background[i], source[i], amount); + } + } + } + else if (Avx2.IsSupported && destination.Length >= 2) { // Divide by 2 as 4 elements per Vector4 and 8 per Vector256 ref Vector256 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); @@ -14484,7 +25907,37 @@ internal static class DefaultPixelBlenders { amount = Numerics.Clamp(amount, 0, 1); - if (Avx2.IsSupported && destination.Length >= 2) + if (Avx512F.IsSupported && destination.Length >= 4) + { + // Divide by 4 as 4 elements per Vector4 and 16 per Vector512 + ref Vector512 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); + ref Vector512 destinationLast = ref Unsafe.Add(ref destinationBase, (uint)destination.Length / 4u); + + ref Vector512 backgroundBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(background)); + Vector512 sourceBase = Vector512.Create( + source.X, source.Y, source.Z, source.W, + source.X, source.Y, source.Z, source.W, + source.X, source.Y, source.Z, source.W, + source.X, source.Y, source.Z, source.W); + Vector512 opacity = Vector512.Create(amount); + + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) + { + destinationBase = PorterDuffFunctions.DarkenDestIn(backgroundBase, sourceBase, opacity); + destinationBase = ref Unsafe.Add(ref destinationBase, 1); + backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); + } + + int remainder = Numerics.Modulo4(destination.Length); + if (remainder != 0) + { + for (int i = destination.Length - remainder; i < destination.Length; i++) + { + destination[i] = PorterDuffFunctions.DarkenDestIn(background[i], source, amount); + } + } + } + else if (Avx2.IsSupported && destination.Length >= 2) { // Divide by 2 as 4 elements per Vector4 and 8 per Vector256 ref Vector256 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); @@ -14520,7 +25973,51 @@ internal static class DefaultPixelBlenders /// protected override void BlendFunction(Span destination, ReadOnlySpan background, ReadOnlySpan source, ReadOnlySpan amount) { - if (Avx2.IsSupported && destination.Length >= 2) + if (Avx512F.IsSupported && destination.Length >= 4) + { + // Divide by 4 as 4 elements per Vector4 and 16 per Vector512 + ref Vector512 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); + ref Vector512 destinationLast = ref Unsafe.Add(ref destinationBase, (uint)destination.Length / 4u); + + ref Vector512 backgroundBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(background)); + ref Vector512 sourceBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(source)); + ref float amountBase = ref MemoryMarshal.GetReference(amount); + + Vector512 vOne = Vector512.Create(1F); + + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) + { + float amount0 = amountBase; + float amount1 = Unsafe.Add(ref amountBase, 1); + float amount2 = Unsafe.Add(ref amountBase, 2); + float amount3 = Unsafe.Add(ref amountBase, 3); + + // We need to create a Vector512 containing the current four amount values + // taking up each quarter of the Vector512 and then clamp them. + Vector512 opacity = Vector512.Create( + amount0, amount0, amount0, amount0, + amount1, amount1, amount1, amount1, + amount2, amount2, amount2, amount2, + amount3, amount3, amount3, amount3); + opacity = Vector512.Min(Vector512.Max(Vector512.Zero, opacity), vOne); + + destinationBase = PorterDuffFunctions.DarkenDestIn(backgroundBase, sourceBase, opacity); + destinationBase = ref Unsafe.Add(ref destinationBase, 1); + backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); + sourceBase = ref Unsafe.Add(ref sourceBase, 1); + amountBase = ref Unsafe.Add(ref amountBase, 4); + } + + int remainder = Numerics.Modulo4(destination.Length); + if (remainder != 0) + { + for (int i = destination.Length - remainder; i < destination.Length; i++) + { + destination[i] = PorterDuffFunctions.DarkenDestIn(background[i], source[i], Numerics.Clamp(amount[i], 0, 1F)); + } + } + } + else if (Avx2.IsSupported && destination.Length >= 2) { // Divide by 2 as 4 elements per Vector4 and 8 per Vector256 ref Vector256 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); @@ -14567,7 +26064,54 @@ internal static class DefaultPixelBlenders /// protected override void BlendFunction(Span destination, ReadOnlySpan background, Vector4 source, ReadOnlySpan amount) { - if (Avx2.IsSupported && destination.Length >= 2) + if (Avx512F.IsSupported && destination.Length >= 4) + { + // Divide by 4 as 4 elements per Vector4 and 16 per Vector512 + ref Vector512 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); + ref Vector512 destinationLast = ref Unsafe.Add(ref destinationBase, (uint)destination.Length / 4u); + + ref Vector512 backgroundBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(background)); + ref float amountBase = ref MemoryMarshal.GetReference(amount); + + Vector512 sourceBase = Vector512.Create( + source.X, source.Y, source.Z, source.W, + source.X, source.Y, source.Z, source.W, + source.X, source.Y, source.Z, source.W, + source.X, source.Y, source.Z, source.W); + Vector512 vOne = Vector512.Create(1F); + + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) + { + float amount0 = amountBase; + float amount1 = Unsafe.Add(ref amountBase, 1); + float amount2 = Unsafe.Add(ref amountBase, 2); + float amount3 = Unsafe.Add(ref amountBase, 3); + + // We need to create a Vector512 containing the current four amount values + // taking up each quarter of the Vector512 and then clamp them. + Vector512 opacity = Vector512.Create( + amount0, amount0, amount0, amount0, + amount1, amount1, amount1, amount1, + amount2, amount2, amount2, amount2, + amount3, amount3, amount3, amount3); + opacity = Vector512.Min(Vector512.Max(Vector512.Zero, opacity), vOne); + + destinationBase = PorterDuffFunctions.DarkenDestIn(backgroundBase, sourceBase, opacity); + destinationBase = ref Unsafe.Add(ref destinationBase, 1); + backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); + amountBase = ref Unsafe.Add(ref amountBase, 4); + } + + int remainder = Numerics.Modulo4(destination.Length); + if (remainder != 0) + { + for (int i = destination.Length - remainder; i < destination.Length; i++) + { + destination[i] = PorterDuffFunctions.DarkenDestIn(background[i], source, Numerics.Clamp(amount[i], 0, 1F)); + } + } + } + else if (Avx2.IsSupported && destination.Length >= 2) { // Divide by 2 as 4 elements per Vector4 and 8 per Vector256 ref Vector256 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); @@ -14632,7 +26176,34 @@ internal static class DefaultPixelBlenders { amount = Numerics.Clamp(amount, 0, 1); - if (Avx2.IsSupported && destination.Length >= 2) + if (Avx512F.IsSupported && destination.Length >= 4) + { + // Divide by 4 as 4 elements per Vector4 and 16 per Vector512 + ref Vector512 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); + ref Vector512 destinationLast = ref Unsafe.Add(ref destinationBase, (uint)destination.Length / 4u); + + ref Vector512 backgroundBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(background)); + ref Vector512 sourceBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(source)); + Vector512 opacity = Vector512.Create(amount); + + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) + { + destinationBase = PorterDuffFunctions.LightenDestIn(backgroundBase, sourceBase, opacity); + destinationBase = ref Unsafe.Add(ref destinationBase, 1); + backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); + sourceBase = ref Unsafe.Add(ref sourceBase, 1); + } + + int remainder = Numerics.Modulo4(destination.Length); + if (remainder != 0) + { + for (int i = destination.Length - remainder; i < destination.Length; i++) + { + destination[i] = PorterDuffFunctions.LightenDestIn(background[i], source[i], amount); + } + } + } + else if (Avx2.IsSupported && destination.Length >= 2) { // Divide by 2 as 4 elements per Vector4 and 8 per Vector256 ref Vector256 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); @@ -14671,7 +26242,37 @@ internal static class DefaultPixelBlenders { amount = Numerics.Clamp(amount, 0, 1); - if (Avx2.IsSupported && destination.Length >= 2) + if (Avx512F.IsSupported && destination.Length >= 4) + { + // Divide by 4 as 4 elements per Vector4 and 16 per Vector512 + ref Vector512 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); + ref Vector512 destinationLast = ref Unsafe.Add(ref destinationBase, (uint)destination.Length / 4u); + + ref Vector512 backgroundBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(background)); + Vector512 sourceBase = Vector512.Create( + source.X, source.Y, source.Z, source.W, + source.X, source.Y, source.Z, source.W, + source.X, source.Y, source.Z, source.W, + source.X, source.Y, source.Z, source.W); + Vector512 opacity = Vector512.Create(amount); + + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) + { + destinationBase = PorterDuffFunctions.LightenDestIn(backgroundBase, sourceBase, opacity); + destinationBase = ref Unsafe.Add(ref destinationBase, 1); + backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); + } + + int remainder = Numerics.Modulo4(destination.Length); + if (remainder != 0) + { + for (int i = destination.Length - remainder; i < destination.Length; i++) + { + destination[i] = PorterDuffFunctions.LightenDestIn(background[i], source, amount); + } + } + } + else if (Avx2.IsSupported && destination.Length >= 2) { // Divide by 2 as 4 elements per Vector4 and 8 per Vector256 ref Vector256 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); @@ -14707,7 +26308,51 @@ internal static class DefaultPixelBlenders /// protected override void BlendFunction(Span destination, ReadOnlySpan background, ReadOnlySpan source, ReadOnlySpan amount) { - if (Avx2.IsSupported && destination.Length >= 2) + if (Avx512F.IsSupported && destination.Length >= 4) + { + // Divide by 4 as 4 elements per Vector4 and 16 per Vector512 + ref Vector512 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); + ref Vector512 destinationLast = ref Unsafe.Add(ref destinationBase, (uint)destination.Length / 4u); + + ref Vector512 backgroundBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(background)); + ref Vector512 sourceBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(source)); + ref float amountBase = ref MemoryMarshal.GetReference(amount); + + Vector512 vOne = Vector512.Create(1F); + + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) + { + float amount0 = amountBase; + float amount1 = Unsafe.Add(ref amountBase, 1); + float amount2 = Unsafe.Add(ref amountBase, 2); + float amount3 = Unsafe.Add(ref amountBase, 3); + + // We need to create a Vector512 containing the current four amount values + // taking up each quarter of the Vector512 and then clamp them. + Vector512 opacity = Vector512.Create( + amount0, amount0, amount0, amount0, + amount1, amount1, amount1, amount1, + amount2, amount2, amount2, amount2, + amount3, amount3, amount3, amount3); + opacity = Vector512.Min(Vector512.Max(Vector512.Zero, opacity), vOne); + + destinationBase = PorterDuffFunctions.LightenDestIn(backgroundBase, sourceBase, opacity); + destinationBase = ref Unsafe.Add(ref destinationBase, 1); + backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); + sourceBase = ref Unsafe.Add(ref sourceBase, 1); + amountBase = ref Unsafe.Add(ref amountBase, 4); + } + + int remainder = Numerics.Modulo4(destination.Length); + if (remainder != 0) + { + for (int i = destination.Length - remainder; i < destination.Length; i++) + { + destination[i] = PorterDuffFunctions.LightenDestIn(background[i], source[i], Numerics.Clamp(amount[i], 0, 1F)); + } + } + } + else if (Avx2.IsSupported && destination.Length >= 2) { // Divide by 2 as 4 elements per Vector4 and 8 per Vector256 ref Vector256 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); @@ -14754,7 +26399,54 @@ internal static class DefaultPixelBlenders /// protected override void BlendFunction(Span destination, ReadOnlySpan background, Vector4 source, ReadOnlySpan amount) { - if (Avx2.IsSupported && destination.Length >= 2) + if (Avx512F.IsSupported && destination.Length >= 4) + { + // Divide by 4 as 4 elements per Vector4 and 16 per Vector512 + ref Vector512 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); + ref Vector512 destinationLast = ref Unsafe.Add(ref destinationBase, (uint)destination.Length / 4u); + + ref Vector512 backgroundBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(background)); + ref float amountBase = ref MemoryMarshal.GetReference(amount); + + Vector512 sourceBase = Vector512.Create( + source.X, source.Y, source.Z, source.W, + source.X, source.Y, source.Z, source.W, + source.X, source.Y, source.Z, source.W, + source.X, source.Y, source.Z, source.W); + Vector512 vOne = Vector512.Create(1F); + + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) + { + float amount0 = amountBase; + float amount1 = Unsafe.Add(ref amountBase, 1); + float amount2 = Unsafe.Add(ref amountBase, 2); + float amount3 = Unsafe.Add(ref amountBase, 3); + + // We need to create a Vector512 containing the current four amount values + // taking up each quarter of the Vector512 and then clamp them. + Vector512 opacity = Vector512.Create( + amount0, amount0, amount0, amount0, + amount1, amount1, amount1, amount1, + amount2, amount2, amount2, amount2, + amount3, amount3, amount3, amount3); + opacity = Vector512.Min(Vector512.Max(Vector512.Zero, opacity), vOne); + + destinationBase = PorterDuffFunctions.LightenDestIn(backgroundBase, sourceBase, opacity); + destinationBase = ref Unsafe.Add(ref destinationBase, 1); + backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); + amountBase = ref Unsafe.Add(ref amountBase, 4); + } + + int remainder = Numerics.Modulo4(destination.Length); + if (remainder != 0) + { + for (int i = destination.Length - remainder; i < destination.Length; i++) + { + destination[i] = PorterDuffFunctions.LightenDestIn(background[i], source, Numerics.Clamp(amount[i], 0, 1F)); + } + } + } + else if (Avx2.IsSupported && destination.Length >= 2) { // Divide by 2 as 4 elements per Vector4 and 8 per Vector256 ref Vector256 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); @@ -14819,7 +26511,34 @@ internal static class DefaultPixelBlenders { amount = Numerics.Clamp(amount, 0, 1); - if (Avx2.IsSupported && destination.Length >= 2) + if (Avx512F.IsSupported && destination.Length >= 4) + { + // Divide by 4 as 4 elements per Vector4 and 16 per Vector512 + ref Vector512 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); + ref Vector512 destinationLast = ref Unsafe.Add(ref destinationBase, (uint)destination.Length / 4u); + + ref Vector512 backgroundBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(background)); + ref Vector512 sourceBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(source)); + Vector512 opacity = Vector512.Create(amount); + + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) + { + destinationBase = PorterDuffFunctions.OverlayDestIn(backgroundBase, sourceBase, opacity); + destinationBase = ref Unsafe.Add(ref destinationBase, 1); + backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); + sourceBase = ref Unsafe.Add(ref sourceBase, 1); + } + + int remainder = Numerics.Modulo4(destination.Length); + if (remainder != 0) + { + for (int i = destination.Length - remainder; i < destination.Length; i++) + { + destination[i] = PorterDuffFunctions.OverlayDestIn(background[i], source[i], amount); + } + } + } + else if (Avx2.IsSupported && destination.Length >= 2) { // Divide by 2 as 4 elements per Vector4 and 8 per Vector256 ref Vector256 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); @@ -14858,7 +26577,37 @@ internal static class DefaultPixelBlenders { amount = Numerics.Clamp(amount, 0, 1); - if (Avx2.IsSupported && destination.Length >= 2) + if (Avx512F.IsSupported && destination.Length >= 4) + { + // Divide by 4 as 4 elements per Vector4 and 16 per Vector512 + ref Vector512 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); + ref Vector512 destinationLast = ref Unsafe.Add(ref destinationBase, (uint)destination.Length / 4u); + + ref Vector512 backgroundBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(background)); + Vector512 sourceBase = Vector512.Create( + source.X, source.Y, source.Z, source.W, + source.X, source.Y, source.Z, source.W, + source.X, source.Y, source.Z, source.W, + source.X, source.Y, source.Z, source.W); + Vector512 opacity = Vector512.Create(amount); + + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) + { + destinationBase = PorterDuffFunctions.OverlayDestIn(backgroundBase, sourceBase, opacity); + destinationBase = ref Unsafe.Add(ref destinationBase, 1); + backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); + } + + int remainder = Numerics.Modulo4(destination.Length); + if (remainder != 0) + { + for (int i = destination.Length - remainder; i < destination.Length; i++) + { + destination[i] = PorterDuffFunctions.OverlayDestIn(background[i], source, amount); + } + } + } + else if (Avx2.IsSupported && destination.Length >= 2) { // Divide by 2 as 4 elements per Vector4 and 8 per Vector256 ref Vector256 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); @@ -14894,7 +26643,51 @@ internal static class DefaultPixelBlenders /// protected override void BlendFunction(Span destination, ReadOnlySpan background, ReadOnlySpan source, ReadOnlySpan amount) { - if (Avx2.IsSupported && destination.Length >= 2) + if (Avx512F.IsSupported && destination.Length >= 4) + { + // Divide by 4 as 4 elements per Vector4 and 16 per Vector512 + ref Vector512 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); + ref Vector512 destinationLast = ref Unsafe.Add(ref destinationBase, (uint)destination.Length / 4u); + + ref Vector512 backgroundBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(background)); + ref Vector512 sourceBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(source)); + ref float amountBase = ref MemoryMarshal.GetReference(amount); + + Vector512 vOne = Vector512.Create(1F); + + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) + { + float amount0 = amountBase; + float amount1 = Unsafe.Add(ref amountBase, 1); + float amount2 = Unsafe.Add(ref amountBase, 2); + float amount3 = Unsafe.Add(ref amountBase, 3); + + // We need to create a Vector512 containing the current four amount values + // taking up each quarter of the Vector512 and then clamp them. + Vector512 opacity = Vector512.Create( + amount0, amount0, amount0, amount0, + amount1, amount1, amount1, amount1, + amount2, amount2, amount2, amount2, + amount3, amount3, amount3, amount3); + opacity = Vector512.Min(Vector512.Max(Vector512.Zero, opacity), vOne); + + destinationBase = PorterDuffFunctions.OverlayDestIn(backgroundBase, sourceBase, opacity); + destinationBase = ref Unsafe.Add(ref destinationBase, 1); + backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); + sourceBase = ref Unsafe.Add(ref sourceBase, 1); + amountBase = ref Unsafe.Add(ref amountBase, 4); + } + + int remainder = Numerics.Modulo4(destination.Length); + if (remainder != 0) + { + for (int i = destination.Length - remainder; i < destination.Length; i++) + { + destination[i] = PorterDuffFunctions.OverlayDestIn(background[i], source[i], Numerics.Clamp(amount[i], 0, 1F)); + } + } + } + else if (Avx2.IsSupported && destination.Length >= 2) { // Divide by 2 as 4 elements per Vector4 and 8 per Vector256 ref Vector256 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); @@ -14928,20 +26721,67 @@ internal static class DefaultPixelBlenders int i = destination.Length - 1; destination[i] = PorterDuffFunctions.OverlayDestIn(background[i], source[i], Numerics.Clamp(amount[i], 0, 1F)); } - } - else - { - for (int i = 0; i < destination.Length; i++) + } + else + { + for (int i = 0; i < destination.Length; i++) + { + destination[i] = PorterDuffFunctions.OverlayDestIn(background[i], source[i], Numerics.Clamp(amount[i], 0, 1F)); + } + } + } + + /// + protected override void BlendFunction(Span destination, ReadOnlySpan background, Vector4 source, ReadOnlySpan amount) + { + if (Avx512F.IsSupported && destination.Length >= 4) + { + // Divide by 4 as 4 elements per Vector4 and 16 per Vector512 + ref Vector512 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); + ref Vector512 destinationLast = ref Unsafe.Add(ref destinationBase, (uint)destination.Length / 4u); + + ref Vector512 backgroundBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(background)); + ref float amountBase = ref MemoryMarshal.GetReference(amount); + + Vector512 sourceBase = Vector512.Create( + source.X, source.Y, source.Z, source.W, + source.X, source.Y, source.Z, source.W, + source.X, source.Y, source.Z, source.W, + source.X, source.Y, source.Z, source.W); + Vector512 vOne = Vector512.Create(1F); + + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) + { + float amount0 = amountBase; + float amount1 = Unsafe.Add(ref amountBase, 1); + float amount2 = Unsafe.Add(ref amountBase, 2); + float amount3 = Unsafe.Add(ref amountBase, 3); + + // We need to create a Vector512 containing the current four amount values + // taking up each quarter of the Vector512 and then clamp them. + Vector512 opacity = Vector512.Create( + amount0, amount0, amount0, amount0, + amount1, amount1, amount1, amount1, + amount2, amount2, amount2, amount2, + amount3, amount3, amount3, amount3); + opacity = Vector512.Min(Vector512.Max(Vector512.Zero, opacity), vOne); + + destinationBase = PorterDuffFunctions.OverlayDestIn(backgroundBase, sourceBase, opacity); + destinationBase = ref Unsafe.Add(ref destinationBase, 1); + backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); + amountBase = ref Unsafe.Add(ref amountBase, 4); + } + + int remainder = Numerics.Modulo4(destination.Length); + if (remainder != 0) { - destination[i] = PorterDuffFunctions.OverlayDestIn(background[i], source[i], Numerics.Clamp(amount[i], 0, 1F)); + for (int i = destination.Length - remainder; i < destination.Length; i++) + { + destination[i] = PorterDuffFunctions.OverlayDestIn(background[i], source, Numerics.Clamp(amount[i], 0, 1F)); + } } } - } - - /// - protected override void BlendFunction(Span destination, ReadOnlySpan background, Vector4 source, ReadOnlySpan amount) - { - if (Avx2.IsSupported && destination.Length >= 2) + else if (Avx2.IsSupported && destination.Length >= 2) { // Divide by 2 as 4 elements per Vector4 and 8 per Vector256 ref Vector256 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); @@ -15006,7 +26846,34 @@ internal static class DefaultPixelBlenders { amount = Numerics.Clamp(amount, 0, 1); - if (Avx2.IsSupported && destination.Length >= 2) + if (Avx512F.IsSupported && destination.Length >= 4) + { + // Divide by 4 as 4 elements per Vector4 and 16 per Vector512 + ref Vector512 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); + ref Vector512 destinationLast = ref Unsafe.Add(ref destinationBase, (uint)destination.Length / 4u); + + ref Vector512 backgroundBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(background)); + ref Vector512 sourceBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(source)); + Vector512 opacity = Vector512.Create(amount); + + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) + { + destinationBase = PorterDuffFunctions.HardLightDestIn(backgroundBase, sourceBase, opacity); + destinationBase = ref Unsafe.Add(ref destinationBase, 1); + backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); + sourceBase = ref Unsafe.Add(ref sourceBase, 1); + } + + int remainder = Numerics.Modulo4(destination.Length); + if (remainder != 0) + { + for (int i = destination.Length - remainder; i < destination.Length; i++) + { + destination[i] = PorterDuffFunctions.HardLightDestIn(background[i], source[i], amount); + } + } + } + else if (Avx2.IsSupported && destination.Length >= 2) { // Divide by 2 as 4 elements per Vector4 and 8 per Vector256 ref Vector256 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); @@ -15045,7 +26912,37 @@ internal static class DefaultPixelBlenders { amount = Numerics.Clamp(amount, 0, 1); - if (Avx2.IsSupported && destination.Length >= 2) + if (Avx512F.IsSupported && destination.Length >= 4) + { + // Divide by 4 as 4 elements per Vector4 and 16 per Vector512 + ref Vector512 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); + ref Vector512 destinationLast = ref Unsafe.Add(ref destinationBase, (uint)destination.Length / 4u); + + ref Vector512 backgroundBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(background)); + Vector512 sourceBase = Vector512.Create( + source.X, source.Y, source.Z, source.W, + source.X, source.Y, source.Z, source.W, + source.X, source.Y, source.Z, source.W, + source.X, source.Y, source.Z, source.W); + Vector512 opacity = Vector512.Create(amount); + + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) + { + destinationBase = PorterDuffFunctions.HardLightDestIn(backgroundBase, sourceBase, opacity); + destinationBase = ref Unsafe.Add(ref destinationBase, 1); + backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); + } + + int remainder = Numerics.Modulo4(destination.Length); + if (remainder != 0) + { + for (int i = destination.Length - remainder; i < destination.Length; i++) + { + destination[i] = PorterDuffFunctions.HardLightDestIn(background[i], source, amount); + } + } + } + else if (Avx2.IsSupported && destination.Length >= 2) { // Divide by 2 as 4 elements per Vector4 and 8 per Vector256 ref Vector256 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); @@ -15081,7 +26978,51 @@ internal static class DefaultPixelBlenders /// protected override void BlendFunction(Span destination, ReadOnlySpan background, ReadOnlySpan source, ReadOnlySpan amount) { - if (Avx2.IsSupported && destination.Length >= 2) + if (Avx512F.IsSupported && destination.Length >= 4) + { + // Divide by 4 as 4 elements per Vector4 and 16 per Vector512 + ref Vector512 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); + ref Vector512 destinationLast = ref Unsafe.Add(ref destinationBase, (uint)destination.Length / 4u); + + ref Vector512 backgroundBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(background)); + ref Vector512 sourceBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(source)); + ref float amountBase = ref MemoryMarshal.GetReference(amount); + + Vector512 vOne = Vector512.Create(1F); + + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) + { + float amount0 = amountBase; + float amount1 = Unsafe.Add(ref amountBase, 1); + float amount2 = Unsafe.Add(ref amountBase, 2); + float amount3 = Unsafe.Add(ref amountBase, 3); + + // We need to create a Vector512 containing the current four amount values + // taking up each quarter of the Vector512 and then clamp them. + Vector512 opacity = Vector512.Create( + amount0, amount0, amount0, amount0, + amount1, amount1, amount1, amount1, + amount2, amount2, amount2, amount2, + amount3, amount3, amount3, amount3); + opacity = Vector512.Min(Vector512.Max(Vector512.Zero, opacity), vOne); + + destinationBase = PorterDuffFunctions.HardLightDestIn(backgroundBase, sourceBase, opacity); + destinationBase = ref Unsafe.Add(ref destinationBase, 1); + backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); + sourceBase = ref Unsafe.Add(ref sourceBase, 1); + amountBase = ref Unsafe.Add(ref amountBase, 4); + } + + int remainder = Numerics.Modulo4(destination.Length); + if (remainder != 0) + { + for (int i = destination.Length - remainder; i < destination.Length; i++) + { + destination[i] = PorterDuffFunctions.HardLightDestIn(background[i], source[i], Numerics.Clamp(amount[i], 0, 1F)); + } + } + } + else if (Avx2.IsSupported && destination.Length >= 2) { // Divide by 2 as 4 elements per Vector4 and 8 per Vector256 ref Vector256 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); @@ -15128,7 +27069,54 @@ internal static class DefaultPixelBlenders /// protected override void BlendFunction(Span destination, ReadOnlySpan background, Vector4 source, ReadOnlySpan amount) { - if (Avx2.IsSupported && destination.Length >= 2) + if (Avx512F.IsSupported && destination.Length >= 4) + { + // Divide by 4 as 4 elements per Vector4 and 16 per Vector512 + ref Vector512 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); + ref Vector512 destinationLast = ref Unsafe.Add(ref destinationBase, (uint)destination.Length / 4u); + + ref Vector512 backgroundBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(background)); + ref float amountBase = ref MemoryMarshal.GetReference(amount); + + Vector512 sourceBase = Vector512.Create( + source.X, source.Y, source.Z, source.W, + source.X, source.Y, source.Z, source.W, + source.X, source.Y, source.Z, source.W, + source.X, source.Y, source.Z, source.W); + Vector512 vOne = Vector512.Create(1F); + + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) + { + float amount0 = amountBase; + float amount1 = Unsafe.Add(ref amountBase, 1); + float amount2 = Unsafe.Add(ref amountBase, 2); + float amount3 = Unsafe.Add(ref amountBase, 3); + + // We need to create a Vector512 containing the current four amount values + // taking up each quarter of the Vector512 and then clamp them. + Vector512 opacity = Vector512.Create( + amount0, amount0, amount0, amount0, + amount1, amount1, amount1, amount1, + amount2, amount2, amount2, amount2, + amount3, amount3, amount3, amount3); + opacity = Vector512.Min(Vector512.Max(Vector512.Zero, opacity), vOne); + + destinationBase = PorterDuffFunctions.HardLightDestIn(backgroundBase, sourceBase, opacity); + destinationBase = ref Unsafe.Add(ref destinationBase, 1); + backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); + amountBase = ref Unsafe.Add(ref amountBase, 4); + } + + int remainder = Numerics.Modulo4(destination.Length); + if (remainder != 0) + { + for (int i = destination.Length - remainder; i < destination.Length; i++) + { + destination[i] = PorterDuffFunctions.HardLightDestIn(background[i], source, Numerics.Clamp(amount[i], 0, 1F)); + } + } + } + else if (Avx2.IsSupported && destination.Length >= 2) { // Divide by 2 as 4 elements per Vector4 and 8 per Vector256 ref Vector256 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); @@ -15193,7 +27181,34 @@ internal static class DefaultPixelBlenders { amount = Numerics.Clamp(amount, 0, 1); - if (Avx2.IsSupported && destination.Length >= 2) + if (Avx512F.IsSupported && destination.Length >= 4) + { + // Divide by 4 as 4 elements per Vector4 and 16 per Vector512 + ref Vector512 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); + ref Vector512 destinationLast = ref Unsafe.Add(ref destinationBase, (uint)destination.Length / 4u); + + ref Vector512 backgroundBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(background)); + ref Vector512 sourceBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(source)); + Vector512 opacity = Vector512.Create(amount); + + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) + { + destinationBase = PorterDuffFunctions.NormalDestOut(backgroundBase, sourceBase, opacity); + destinationBase = ref Unsafe.Add(ref destinationBase, 1); + backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); + sourceBase = ref Unsafe.Add(ref sourceBase, 1); + } + + int remainder = Numerics.Modulo4(destination.Length); + if (remainder != 0) + { + for (int i = destination.Length - remainder; i < destination.Length; i++) + { + destination[i] = PorterDuffFunctions.NormalDestOut(background[i], source[i], amount); + } + } + } + else if (Avx2.IsSupported && destination.Length >= 2) { // Divide by 2 as 4 elements per Vector4 and 8 per Vector256 ref Vector256 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); @@ -15232,7 +27247,37 @@ internal static class DefaultPixelBlenders { amount = Numerics.Clamp(amount, 0, 1); - if (Avx2.IsSupported && destination.Length >= 2) + if (Avx512F.IsSupported && destination.Length >= 4) + { + // Divide by 4 as 4 elements per Vector4 and 16 per Vector512 + ref Vector512 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); + ref Vector512 destinationLast = ref Unsafe.Add(ref destinationBase, (uint)destination.Length / 4u); + + ref Vector512 backgroundBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(background)); + Vector512 sourceBase = Vector512.Create( + source.X, source.Y, source.Z, source.W, + source.X, source.Y, source.Z, source.W, + source.X, source.Y, source.Z, source.W, + source.X, source.Y, source.Z, source.W); + Vector512 opacity = Vector512.Create(amount); + + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) + { + destinationBase = PorterDuffFunctions.NormalDestOut(backgroundBase, sourceBase, opacity); + destinationBase = ref Unsafe.Add(ref destinationBase, 1); + backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); + } + + int remainder = Numerics.Modulo4(destination.Length); + if (remainder != 0) + { + for (int i = destination.Length - remainder; i < destination.Length; i++) + { + destination[i] = PorterDuffFunctions.NormalDestOut(background[i], source, amount); + } + } + } + else if (Avx2.IsSupported && destination.Length >= 2) { // Divide by 2 as 4 elements per Vector4 and 8 per Vector256 ref Vector256 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); @@ -15268,7 +27313,51 @@ internal static class DefaultPixelBlenders /// protected override void BlendFunction(Span destination, ReadOnlySpan background, ReadOnlySpan source, ReadOnlySpan amount) { - if (Avx2.IsSupported && destination.Length >= 2) + if (Avx512F.IsSupported && destination.Length >= 4) + { + // Divide by 4 as 4 elements per Vector4 and 16 per Vector512 + ref Vector512 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); + ref Vector512 destinationLast = ref Unsafe.Add(ref destinationBase, (uint)destination.Length / 4u); + + ref Vector512 backgroundBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(background)); + ref Vector512 sourceBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(source)); + ref float amountBase = ref MemoryMarshal.GetReference(amount); + + Vector512 vOne = Vector512.Create(1F); + + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) + { + float amount0 = amountBase; + float amount1 = Unsafe.Add(ref amountBase, 1); + float amount2 = Unsafe.Add(ref amountBase, 2); + float amount3 = Unsafe.Add(ref amountBase, 3); + + // We need to create a Vector512 containing the current four amount values + // taking up each quarter of the Vector512 and then clamp them. + Vector512 opacity = Vector512.Create( + amount0, amount0, amount0, amount0, + amount1, amount1, amount1, amount1, + amount2, amount2, amount2, amount2, + amount3, amount3, amount3, amount3); + opacity = Vector512.Min(Vector512.Max(Vector512.Zero, opacity), vOne); + + destinationBase = PorterDuffFunctions.NormalDestOut(backgroundBase, sourceBase, opacity); + destinationBase = ref Unsafe.Add(ref destinationBase, 1); + backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); + sourceBase = ref Unsafe.Add(ref sourceBase, 1); + amountBase = ref Unsafe.Add(ref amountBase, 4); + } + + int remainder = Numerics.Modulo4(destination.Length); + if (remainder != 0) + { + for (int i = destination.Length - remainder; i < destination.Length; i++) + { + destination[i] = PorterDuffFunctions.NormalDestOut(background[i], source[i], Numerics.Clamp(amount[i], 0, 1F)); + } + } + } + else if (Avx2.IsSupported && destination.Length >= 2) { // Divide by 2 as 4 elements per Vector4 and 8 per Vector256 ref Vector256 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); @@ -15315,7 +27404,54 @@ internal static class DefaultPixelBlenders /// protected override void BlendFunction(Span destination, ReadOnlySpan background, Vector4 source, ReadOnlySpan amount) { - if (Avx2.IsSupported && destination.Length >= 2) + if (Avx512F.IsSupported && destination.Length >= 4) + { + // Divide by 4 as 4 elements per Vector4 and 16 per Vector512 + ref Vector512 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); + ref Vector512 destinationLast = ref Unsafe.Add(ref destinationBase, (uint)destination.Length / 4u); + + ref Vector512 backgroundBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(background)); + ref float amountBase = ref MemoryMarshal.GetReference(amount); + + Vector512 sourceBase = Vector512.Create( + source.X, source.Y, source.Z, source.W, + source.X, source.Y, source.Z, source.W, + source.X, source.Y, source.Z, source.W, + source.X, source.Y, source.Z, source.W); + Vector512 vOne = Vector512.Create(1F); + + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) + { + float amount0 = amountBase; + float amount1 = Unsafe.Add(ref amountBase, 1); + float amount2 = Unsafe.Add(ref amountBase, 2); + float amount3 = Unsafe.Add(ref amountBase, 3); + + // We need to create a Vector512 containing the current four amount values + // taking up each quarter of the Vector512 and then clamp them. + Vector512 opacity = Vector512.Create( + amount0, amount0, amount0, amount0, + amount1, amount1, amount1, amount1, + amount2, amount2, amount2, amount2, + amount3, amount3, amount3, amount3); + opacity = Vector512.Min(Vector512.Max(Vector512.Zero, opacity), vOne); + + destinationBase = PorterDuffFunctions.NormalDestOut(backgroundBase, sourceBase, opacity); + destinationBase = ref Unsafe.Add(ref destinationBase, 1); + backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); + amountBase = ref Unsafe.Add(ref amountBase, 4); + } + + int remainder = Numerics.Modulo4(destination.Length); + if (remainder != 0) + { + for (int i = destination.Length - remainder; i < destination.Length; i++) + { + destination[i] = PorterDuffFunctions.NormalDestOut(background[i], source, Numerics.Clamp(amount[i], 0, 1F)); + } + } + } + else if (Avx2.IsSupported && destination.Length >= 2) { // Divide by 2 as 4 elements per Vector4 and 8 per Vector256 ref Vector256 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); @@ -15380,7 +27516,34 @@ internal static class DefaultPixelBlenders { amount = Numerics.Clamp(amount, 0, 1); - if (Avx2.IsSupported && destination.Length >= 2) + if (Avx512F.IsSupported && destination.Length >= 4) + { + // Divide by 4 as 4 elements per Vector4 and 16 per Vector512 + ref Vector512 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); + ref Vector512 destinationLast = ref Unsafe.Add(ref destinationBase, (uint)destination.Length / 4u); + + ref Vector512 backgroundBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(background)); + ref Vector512 sourceBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(source)); + Vector512 opacity = Vector512.Create(amount); + + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) + { + destinationBase = PorterDuffFunctions.MultiplyDestOut(backgroundBase, sourceBase, opacity); + destinationBase = ref Unsafe.Add(ref destinationBase, 1); + backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); + sourceBase = ref Unsafe.Add(ref sourceBase, 1); + } + + int remainder = Numerics.Modulo4(destination.Length); + if (remainder != 0) + { + for (int i = destination.Length - remainder; i < destination.Length; i++) + { + destination[i] = PorterDuffFunctions.MultiplyDestOut(background[i], source[i], amount); + } + } + } + else if (Avx2.IsSupported && destination.Length >= 2) { // Divide by 2 as 4 elements per Vector4 and 8 per Vector256 ref Vector256 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); @@ -15419,7 +27582,37 @@ internal static class DefaultPixelBlenders { amount = Numerics.Clamp(amount, 0, 1); - if (Avx2.IsSupported && destination.Length >= 2) + if (Avx512F.IsSupported && destination.Length >= 4) + { + // Divide by 4 as 4 elements per Vector4 and 16 per Vector512 + ref Vector512 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); + ref Vector512 destinationLast = ref Unsafe.Add(ref destinationBase, (uint)destination.Length / 4u); + + ref Vector512 backgroundBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(background)); + Vector512 sourceBase = Vector512.Create( + source.X, source.Y, source.Z, source.W, + source.X, source.Y, source.Z, source.W, + source.X, source.Y, source.Z, source.W, + source.X, source.Y, source.Z, source.W); + Vector512 opacity = Vector512.Create(amount); + + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) + { + destinationBase = PorterDuffFunctions.MultiplyDestOut(backgroundBase, sourceBase, opacity); + destinationBase = ref Unsafe.Add(ref destinationBase, 1); + backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); + } + + int remainder = Numerics.Modulo4(destination.Length); + if (remainder != 0) + { + for (int i = destination.Length - remainder; i < destination.Length; i++) + { + destination[i] = PorterDuffFunctions.MultiplyDestOut(background[i], source, amount); + } + } + } + else if (Avx2.IsSupported && destination.Length >= 2) { // Divide by 2 as 4 elements per Vector4 and 8 per Vector256 ref Vector256 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); @@ -15455,7 +27648,51 @@ internal static class DefaultPixelBlenders /// protected override void BlendFunction(Span destination, ReadOnlySpan background, ReadOnlySpan source, ReadOnlySpan amount) { - if (Avx2.IsSupported && destination.Length >= 2) + if (Avx512F.IsSupported && destination.Length >= 4) + { + // Divide by 4 as 4 elements per Vector4 and 16 per Vector512 + ref Vector512 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); + ref Vector512 destinationLast = ref Unsafe.Add(ref destinationBase, (uint)destination.Length / 4u); + + ref Vector512 backgroundBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(background)); + ref Vector512 sourceBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(source)); + ref float amountBase = ref MemoryMarshal.GetReference(amount); + + Vector512 vOne = Vector512.Create(1F); + + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) + { + float amount0 = amountBase; + float amount1 = Unsafe.Add(ref amountBase, 1); + float amount2 = Unsafe.Add(ref amountBase, 2); + float amount3 = Unsafe.Add(ref amountBase, 3); + + // We need to create a Vector512 containing the current four amount values + // taking up each quarter of the Vector512 and then clamp them. + Vector512 opacity = Vector512.Create( + amount0, amount0, amount0, amount0, + amount1, amount1, amount1, amount1, + amount2, amount2, amount2, amount2, + amount3, amount3, amount3, amount3); + opacity = Vector512.Min(Vector512.Max(Vector512.Zero, opacity), vOne); + + destinationBase = PorterDuffFunctions.MultiplyDestOut(backgroundBase, sourceBase, opacity); + destinationBase = ref Unsafe.Add(ref destinationBase, 1); + backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); + sourceBase = ref Unsafe.Add(ref sourceBase, 1); + amountBase = ref Unsafe.Add(ref amountBase, 4); + } + + int remainder = Numerics.Modulo4(destination.Length); + if (remainder != 0) + { + for (int i = destination.Length - remainder; i < destination.Length; i++) + { + destination[i] = PorterDuffFunctions.MultiplyDestOut(background[i], source[i], Numerics.Clamp(amount[i], 0, 1F)); + } + } + } + else if (Avx2.IsSupported && destination.Length >= 2) { // Divide by 2 as 4 elements per Vector4 and 8 per Vector256 ref Vector256 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); @@ -15502,7 +27739,54 @@ internal static class DefaultPixelBlenders /// protected override void BlendFunction(Span destination, ReadOnlySpan background, Vector4 source, ReadOnlySpan amount) { - if (Avx2.IsSupported && destination.Length >= 2) + if (Avx512F.IsSupported && destination.Length >= 4) + { + // Divide by 4 as 4 elements per Vector4 and 16 per Vector512 + ref Vector512 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); + ref Vector512 destinationLast = ref Unsafe.Add(ref destinationBase, (uint)destination.Length / 4u); + + ref Vector512 backgroundBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(background)); + ref float amountBase = ref MemoryMarshal.GetReference(amount); + + Vector512 sourceBase = Vector512.Create( + source.X, source.Y, source.Z, source.W, + source.X, source.Y, source.Z, source.W, + source.X, source.Y, source.Z, source.W, + source.X, source.Y, source.Z, source.W); + Vector512 vOne = Vector512.Create(1F); + + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) + { + float amount0 = amountBase; + float amount1 = Unsafe.Add(ref amountBase, 1); + float amount2 = Unsafe.Add(ref amountBase, 2); + float amount3 = Unsafe.Add(ref amountBase, 3); + + // We need to create a Vector512 containing the current four amount values + // taking up each quarter of the Vector512 and then clamp them. + Vector512 opacity = Vector512.Create( + amount0, amount0, amount0, amount0, + amount1, amount1, amount1, amount1, + amount2, amount2, amount2, amount2, + amount3, amount3, amount3, amount3); + opacity = Vector512.Min(Vector512.Max(Vector512.Zero, opacity), vOne); + + destinationBase = PorterDuffFunctions.MultiplyDestOut(backgroundBase, sourceBase, opacity); + destinationBase = ref Unsafe.Add(ref destinationBase, 1); + backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); + amountBase = ref Unsafe.Add(ref amountBase, 4); + } + + int remainder = Numerics.Modulo4(destination.Length); + if (remainder != 0) + { + for (int i = destination.Length - remainder; i < destination.Length; i++) + { + destination[i] = PorterDuffFunctions.MultiplyDestOut(background[i], source, Numerics.Clamp(amount[i], 0, 1F)); + } + } + } + else if (Avx2.IsSupported && destination.Length >= 2) { // Divide by 2 as 4 elements per Vector4 and 8 per Vector256 ref Vector256 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); @@ -15567,7 +27851,34 @@ internal static class DefaultPixelBlenders { amount = Numerics.Clamp(amount, 0, 1); - if (Avx2.IsSupported && destination.Length >= 2) + if (Avx512F.IsSupported && destination.Length >= 4) + { + // Divide by 4 as 4 elements per Vector4 and 16 per Vector512 + ref Vector512 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); + ref Vector512 destinationLast = ref Unsafe.Add(ref destinationBase, (uint)destination.Length / 4u); + + ref Vector512 backgroundBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(background)); + ref Vector512 sourceBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(source)); + Vector512 opacity = Vector512.Create(amount); + + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) + { + destinationBase = PorterDuffFunctions.AddDestOut(backgroundBase, sourceBase, opacity); + destinationBase = ref Unsafe.Add(ref destinationBase, 1); + backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); + sourceBase = ref Unsafe.Add(ref sourceBase, 1); + } + + int remainder = Numerics.Modulo4(destination.Length); + if (remainder != 0) + { + for (int i = destination.Length - remainder; i < destination.Length; i++) + { + destination[i] = PorterDuffFunctions.AddDestOut(background[i], source[i], amount); + } + } + } + else if (Avx2.IsSupported && destination.Length >= 2) { // Divide by 2 as 4 elements per Vector4 and 8 per Vector256 ref Vector256 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); @@ -15606,7 +27917,37 @@ internal static class DefaultPixelBlenders { amount = Numerics.Clamp(amount, 0, 1); - if (Avx2.IsSupported && destination.Length >= 2) + if (Avx512F.IsSupported && destination.Length >= 4) + { + // Divide by 4 as 4 elements per Vector4 and 16 per Vector512 + ref Vector512 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); + ref Vector512 destinationLast = ref Unsafe.Add(ref destinationBase, (uint)destination.Length / 4u); + + ref Vector512 backgroundBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(background)); + Vector512 sourceBase = Vector512.Create( + source.X, source.Y, source.Z, source.W, + source.X, source.Y, source.Z, source.W, + source.X, source.Y, source.Z, source.W, + source.X, source.Y, source.Z, source.W); + Vector512 opacity = Vector512.Create(amount); + + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) + { + destinationBase = PorterDuffFunctions.AddDestOut(backgroundBase, sourceBase, opacity); + destinationBase = ref Unsafe.Add(ref destinationBase, 1); + backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); + } + + int remainder = Numerics.Modulo4(destination.Length); + if (remainder != 0) + { + for (int i = destination.Length - remainder; i < destination.Length; i++) + { + destination[i] = PorterDuffFunctions.AddDestOut(background[i], source, amount); + } + } + } + else if (Avx2.IsSupported && destination.Length >= 2) { // Divide by 2 as 4 elements per Vector4 and 8 per Vector256 ref Vector256 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); @@ -15642,7 +27983,51 @@ internal static class DefaultPixelBlenders /// protected override void BlendFunction(Span destination, ReadOnlySpan background, ReadOnlySpan source, ReadOnlySpan amount) { - if (Avx2.IsSupported && destination.Length >= 2) + if (Avx512F.IsSupported && destination.Length >= 4) + { + // Divide by 4 as 4 elements per Vector4 and 16 per Vector512 + ref Vector512 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); + ref Vector512 destinationLast = ref Unsafe.Add(ref destinationBase, (uint)destination.Length / 4u); + + ref Vector512 backgroundBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(background)); + ref Vector512 sourceBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(source)); + ref float amountBase = ref MemoryMarshal.GetReference(amount); + + Vector512 vOne = Vector512.Create(1F); + + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) + { + float amount0 = amountBase; + float amount1 = Unsafe.Add(ref amountBase, 1); + float amount2 = Unsafe.Add(ref amountBase, 2); + float amount3 = Unsafe.Add(ref amountBase, 3); + + // We need to create a Vector512 containing the current four amount values + // taking up each quarter of the Vector512 and then clamp them. + Vector512 opacity = Vector512.Create( + amount0, amount0, amount0, amount0, + amount1, amount1, amount1, amount1, + amount2, amount2, amount2, amount2, + amount3, amount3, amount3, amount3); + opacity = Vector512.Min(Vector512.Max(Vector512.Zero, opacity), vOne); + + destinationBase = PorterDuffFunctions.AddDestOut(backgroundBase, sourceBase, opacity); + destinationBase = ref Unsafe.Add(ref destinationBase, 1); + backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); + sourceBase = ref Unsafe.Add(ref sourceBase, 1); + amountBase = ref Unsafe.Add(ref amountBase, 4); + } + + int remainder = Numerics.Modulo4(destination.Length); + if (remainder != 0) + { + for (int i = destination.Length - remainder; i < destination.Length; i++) + { + destination[i] = PorterDuffFunctions.AddDestOut(background[i], source[i], Numerics.Clamp(amount[i], 0, 1F)); + } + } + } + else if (Avx2.IsSupported && destination.Length >= 2) { // Divide by 2 as 4 elements per Vector4 and 8 per Vector256 ref Vector256 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); @@ -15689,7 +28074,54 @@ internal static class DefaultPixelBlenders /// protected override void BlendFunction(Span destination, ReadOnlySpan background, Vector4 source, ReadOnlySpan amount) { - if (Avx2.IsSupported && destination.Length >= 2) + if (Avx512F.IsSupported && destination.Length >= 4) + { + // Divide by 4 as 4 elements per Vector4 and 16 per Vector512 + ref Vector512 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); + ref Vector512 destinationLast = ref Unsafe.Add(ref destinationBase, (uint)destination.Length / 4u); + + ref Vector512 backgroundBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(background)); + ref float amountBase = ref MemoryMarshal.GetReference(amount); + + Vector512 sourceBase = Vector512.Create( + source.X, source.Y, source.Z, source.W, + source.X, source.Y, source.Z, source.W, + source.X, source.Y, source.Z, source.W, + source.X, source.Y, source.Z, source.W); + Vector512 vOne = Vector512.Create(1F); + + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) + { + float amount0 = amountBase; + float amount1 = Unsafe.Add(ref amountBase, 1); + float amount2 = Unsafe.Add(ref amountBase, 2); + float amount3 = Unsafe.Add(ref amountBase, 3); + + // We need to create a Vector512 containing the current four amount values + // taking up each quarter of the Vector512 and then clamp them. + Vector512 opacity = Vector512.Create( + amount0, amount0, amount0, amount0, + amount1, amount1, amount1, amount1, + amount2, amount2, amount2, amount2, + amount3, amount3, amount3, amount3); + opacity = Vector512.Min(Vector512.Max(Vector512.Zero, opacity), vOne); + + destinationBase = PorterDuffFunctions.AddDestOut(backgroundBase, sourceBase, opacity); + destinationBase = ref Unsafe.Add(ref destinationBase, 1); + backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); + amountBase = ref Unsafe.Add(ref amountBase, 4); + } + + int remainder = Numerics.Modulo4(destination.Length); + if (remainder != 0) + { + for (int i = destination.Length - remainder; i < destination.Length; i++) + { + destination[i] = PorterDuffFunctions.AddDestOut(background[i], source, Numerics.Clamp(amount[i], 0, 1F)); + } + } + } + else if (Avx2.IsSupported && destination.Length >= 2) { // Divide by 2 as 4 elements per Vector4 and 8 per Vector256 ref Vector256 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); @@ -15754,7 +28186,34 @@ internal static class DefaultPixelBlenders { amount = Numerics.Clamp(amount, 0, 1); - if (Avx2.IsSupported && destination.Length >= 2) + if (Avx512F.IsSupported && destination.Length >= 4) + { + // Divide by 4 as 4 elements per Vector4 and 16 per Vector512 + ref Vector512 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); + ref Vector512 destinationLast = ref Unsafe.Add(ref destinationBase, (uint)destination.Length / 4u); + + ref Vector512 backgroundBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(background)); + ref Vector512 sourceBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(source)); + Vector512 opacity = Vector512.Create(amount); + + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) + { + destinationBase = PorterDuffFunctions.SubtractDestOut(backgroundBase, sourceBase, opacity); + destinationBase = ref Unsafe.Add(ref destinationBase, 1); + backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); + sourceBase = ref Unsafe.Add(ref sourceBase, 1); + } + + int remainder = Numerics.Modulo4(destination.Length); + if (remainder != 0) + { + for (int i = destination.Length - remainder; i < destination.Length; i++) + { + destination[i] = PorterDuffFunctions.SubtractDestOut(background[i], source[i], amount); + } + } + } + else if (Avx2.IsSupported && destination.Length >= 2) { // Divide by 2 as 4 elements per Vector4 and 8 per Vector256 ref Vector256 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); @@ -15793,7 +28252,37 @@ internal static class DefaultPixelBlenders { amount = Numerics.Clamp(amount, 0, 1); - if (Avx2.IsSupported && destination.Length >= 2) + if (Avx512F.IsSupported && destination.Length >= 4) + { + // Divide by 4 as 4 elements per Vector4 and 16 per Vector512 + ref Vector512 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); + ref Vector512 destinationLast = ref Unsafe.Add(ref destinationBase, (uint)destination.Length / 4u); + + ref Vector512 backgroundBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(background)); + Vector512 sourceBase = Vector512.Create( + source.X, source.Y, source.Z, source.W, + source.X, source.Y, source.Z, source.W, + source.X, source.Y, source.Z, source.W, + source.X, source.Y, source.Z, source.W); + Vector512 opacity = Vector512.Create(amount); + + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) + { + destinationBase = PorterDuffFunctions.SubtractDestOut(backgroundBase, sourceBase, opacity); + destinationBase = ref Unsafe.Add(ref destinationBase, 1); + backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); + } + + int remainder = Numerics.Modulo4(destination.Length); + if (remainder != 0) + { + for (int i = destination.Length - remainder; i < destination.Length; i++) + { + destination[i] = PorterDuffFunctions.SubtractDestOut(background[i], source, amount); + } + } + } + else if (Avx2.IsSupported && destination.Length >= 2) { // Divide by 2 as 4 elements per Vector4 and 8 per Vector256 ref Vector256 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); @@ -15829,7 +28318,51 @@ internal static class DefaultPixelBlenders /// protected override void BlendFunction(Span destination, ReadOnlySpan background, ReadOnlySpan source, ReadOnlySpan amount) { - if (Avx2.IsSupported && destination.Length >= 2) + if (Avx512F.IsSupported && destination.Length >= 4) + { + // Divide by 4 as 4 elements per Vector4 and 16 per Vector512 + ref Vector512 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); + ref Vector512 destinationLast = ref Unsafe.Add(ref destinationBase, (uint)destination.Length / 4u); + + ref Vector512 backgroundBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(background)); + ref Vector512 sourceBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(source)); + ref float amountBase = ref MemoryMarshal.GetReference(amount); + + Vector512 vOne = Vector512.Create(1F); + + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) + { + float amount0 = amountBase; + float amount1 = Unsafe.Add(ref amountBase, 1); + float amount2 = Unsafe.Add(ref amountBase, 2); + float amount3 = Unsafe.Add(ref amountBase, 3); + + // We need to create a Vector512 containing the current four amount values + // taking up each quarter of the Vector512 and then clamp them. + Vector512 opacity = Vector512.Create( + amount0, amount0, amount0, amount0, + amount1, amount1, amount1, amount1, + amount2, amount2, amount2, amount2, + amount3, amount3, amount3, amount3); + opacity = Vector512.Min(Vector512.Max(Vector512.Zero, opacity), vOne); + + destinationBase = PorterDuffFunctions.SubtractDestOut(backgroundBase, sourceBase, opacity); + destinationBase = ref Unsafe.Add(ref destinationBase, 1); + backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); + sourceBase = ref Unsafe.Add(ref sourceBase, 1); + amountBase = ref Unsafe.Add(ref amountBase, 4); + } + + int remainder = Numerics.Modulo4(destination.Length); + if (remainder != 0) + { + for (int i = destination.Length - remainder; i < destination.Length; i++) + { + destination[i] = PorterDuffFunctions.SubtractDestOut(background[i], source[i], Numerics.Clamp(amount[i], 0, 1F)); + } + } + } + else if (Avx2.IsSupported && destination.Length >= 2) { // Divide by 2 as 4 elements per Vector4 and 8 per Vector256 ref Vector256 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); @@ -15876,7 +28409,54 @@ internal static class DefaultPixelBlenders /// protected override void BlendFunction(Span destination, ReadOnlySpan background, Vector4 source, ReadOnlySpan amount) { - if (Avx2.IsSupported && destination.Length >= 2) + if (Avx512F.IsSupported && destination.Length >= 4) + { + // Divide by 4 as 4 elements per Vector4 and 16 per Vector512 + ref Vector512 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); + ref Vector512 destinationLast = ref Unsafe.Add(ref destinationBase, (uint)destination.Length / 4u); + + ref Vector512 backgroundBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(background)); + ref float amountBase = ref MemoryMarshal.GetReference(amount); + + Vector512 sourceBase = Vector512.Create( + source.X, source.Y, source.Z, source.W, + source.X, source.Y, source.Z, source.W, + source.X, source.Y, source.Z, source.W, + source.X, source.Y, source.Z, source.W); + Vector512 vOne = Vector512.Create(1F); + + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) + { + float amount0 = amountBase; + float amount1 = Unsafe.Add(ref amountBase, 1); + float amount2 = Unsafe.Add(ref amountBase, 2); + float amount3 = Unsafe.Add(ref amountBase, 3); + + // We need to create a Vector512 containing the current four amount values + // taking up each quarter of the Vector512 and then clamp them. + Vector512 opacity = Vector512.Create( + amount0, amount0, amount0, amount0, + amount1, amount1, amount1, amount1, + amount2, amount2, amount2, amount2, + amount3, amount3, amount3, amount3); + opacity = Vector512.Min(Vector512.Max(Vector512.Zero, opacity), vOne); + + destinationBase = PorterDuffFunctions.SubtractDestOut(backgroundBase, sourceBase, opacity); + destinationBase = ref Unsafe.Add(ref destinationBase, 1); + backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); + amountBase = ref Unsafe.Add(ref amountBase, 4); + } + + int remainder = Numerics.Modulo4(destination.Length); + if (remainder != 0) + { + for (int i = destination.Length - remainder; i < destination.Length; i++) + { + destination[i] = PorterDuffFunctions.SubtractDestOut(background[i], source, Numerics.Clamp(amount[i], 0, 1F)); + } + } + } + else if (Avx2.IsSupported && destination.Length >= 2) { // Divide by 2 as 4 elements per Vector4 and 8 per Vector256 ref Vector256 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); @@ -15941,7 +28521,34 @@ internal static class DefaultPixelBlenders { amount = Numerics.Clamp(amount, 0, 1); - if (Avx2.IsSupported && destination.Length >= 2) + if (Avx512F.IsSupported && destination.Length >= 4) + { + // Divide by 4 as 4 elements per Vector4 and 16 per Vector512 + ref Vector512 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); + ref Vector512 destinationLast = ref Unsafe.Add(ref destinationBase, (uint)destination.Length / 4u); + + ref Vector512 backgroundBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(background)); + ref Vector512 sourceBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(source)); + Vector512 opacity = Vector512.Create(amount); + + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) + { + destinationBase = PorterDuffFunctions.ScreenDestOut(backgroundBase, sourceBase, opacity); + destinationBase = ref Unsafe.Add(ref destinationBase, 1); + backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); + sourceBase = ref Unsafe.Add(ref sourceBase, 1); + } + + int remainder = Numerics.Modulo4(destination.Length); + if (remainder != 0) + { + for (int i = destination.Length - remainder; i < destination.Length; i++) + { + destination[i] = PorterDuffFunctions.ScreenDestOut(background[i], source[i], amount); + } + } + } + else if (Avx2.IsSupported && destination.Length >= 2) { // Divide by 2 as 4 elements per Vector4 and 8 per Vector256 ref Vector256 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); @@ -15980,7 +28587,37 @@ internal static class DefaultPixelBlenders { amount = Numerics.Clamp(amount, 0, 1); - if (Avx2.IsSupported && destination.Length >= 2) + if (Avx512F.IsSupported && destination.Length >= 4) + { + // Divide by 4 as 4 elements per Vector4 and 16 per Vector512 + ref Vector512 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); + ref Vector512 destinationLast = ref Unsafe.Add(ref destinationBase, (uint)destination.Length / 4u); + + ref Vector512 backgroundBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(background)); + Vector512 sourceBase = Vector512.Create( + source.X, source.Y, source.Z, source.W, + source.X, source.Y, source.Z, source.W, + source.X, source.Y, source.Z, source.W, + source.X, source.Y, source.Z, source.W); + Vector512 opacity = Vector512.Create(amount); + + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) + { + destinationBase = PorterDuffFunctions.ScreenDestOut(backgroundBase, sourceBase, opacity); + destinationBase = ref Unsafe.Add(ref destinationBase, 1); + backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); + } + + int remainder = Numerics.Modulo4(destination.Length); + if (remainder != 0) + { + for (int i = destination.Length - remainder; i < destination.Length; i++) + { + destination[i] = PorterDuffFunctions.ScreenDestOut(background[i], source, amount); + } + } + } + else if (Avx2.IsSupported && destination.Length >= 2) { // Divide by 2 as 4 elements per Vector4 and 8 per Vector256 ref Vector256 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); @@ -16016,7 +28653,51 @@ internal static class DefaultPixelBlenders /// protected override void BlendFunction(Span destination, ReadOnlySpan background, ReadOnlySpan source, ReadOnlySpan amount) { - if (Avx2.IsSupported && destination.Length >= 2) + if (Avx512F.IsSupported && destination.Length >= 4) + { + // Divide by 4 as 4 elements per Vector4 and 16 per Vector512 + ref Vector512 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); + ref Vector512 destinationLast = ref Unsafe.Add(ref destinationBase, (uint)destination.Length / 4u); + + ref Vector512 backgroundBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(background)); + ref Vector512 sourceBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(source)); + ref float amountBase = ref MemoryMarshal.GetReference(amount); + + Vector512 vOne = Vector512.Create(1F); + + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) + { + float amount0 = amountBase; + float amount1 = Unsafe.Add(ref amountBase, 1); + float amount2 = Unsafe.Add(ref amountBase, 2); + float amount3 = Unsafe.Add(ref amountBase, 3); + + // We need to create a Vector512 containing the current four amount values + // taking up each quarter of the Vector512 and then clamp them. + Vector512 opacity = Vector512.Create( + amount0, amount0, amount0, amount0, + amount1, amount1, amount1, amount1, + amount2, amount2, amount2, amount2, + amount3, amount3, amount3, amount3); + opacity = Vector512.Min(Vector512.Max(Vector512.Zero, opacity), vOne); + + destinationBase = PorterDuffFunctions.ScreenDestOut(backgroundBase, sourceBase, opacity); + destinationBase = ref Unsafe.Add(ref destinationBase, 1); + backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); + sourceBase = ref Unsafe.Add(ref sourceBase, 1); + amountBase = ref Unsafe.Add(ref amountBase, 4); + } + + int remainder = Numerics.Modulo4(destination.Length); + if (remainder != 0) + { + for (int i = destination.Length - remainder; i < destination.Length; i++) + { + destination[i] = PorterDuffFunctions.ScreenDestOut(background[i], source[i], Numerics.Clamp(amount[i], 0, 1F)); + } + } + } + else if (Avx2.IsSupported && destination.Length >= 2) { // Divide by 2 as 4 elements per Vector4 and 8 per Vector256 ref Vector256 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); @@ -16063,7 +28744,54 @@ internal static class DefaultPixelBlenders /// protected override void BlendFunction(Span destination, ReadOnlySpan background, Vector4 source, ReadOnlySpan amount) { - if (Avx2.IsSupported && destination.Length >= 2) + if (Avx512F.IsSupported && destination.Length >= 4) + { + // Divide by 4 as 4 elements per Vector4 and 16 per Vector512 + ref Vector512 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); + ref Vector512 destinationLast = ref Unsafe.Add(ref destinationBase, (uint)destination.Length / 4u); + + ref Vector512 backgroundBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(background)); + ref float amountBase = ref MemoryMarshal.GetReference(amount); + + Vector512 sourceBase = Vector512.Create( + source.X, source.Y, source.Z, source.W, + source.X, source.Y, source.Z, source.W, + source.X, source.Y, source.Z, source.W, + source.X, source.Y, source.Z, source.W); + Vector512 vOne = Vector512.Create(1F); + + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) + { + float amount0 = amountBase; + float amount1 = Unsafe.Add(ref amountBase, 1); + float amount2 = Unsafe.Add(ref amountBase, 2); + float amount3 = Unsafe.Add(ref amountBase, 3); + + // We need to create a Vector512 containing the current four amount values + // taking up each quarter of the Vector512 and then clamp them. + Vector512 opacity = Vector512.Create( + amount0, amount0, amount0, amount0, + amount1, amount1, amount1, amount1, + amount2, amount2, amount2, amount2, + amount3, amount3, amount3, amount3); + opacity = Vector512.Min(Vector512.Max(Vector512.Zero, opacity), vOne); + + destinationBase = PorterDuffFunctions.ScreenDestOut(backgroundBase, sourceBase, opacity); + destinationBase = ref Unsafe.Add(ref destinationBase, 1); + backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); + amountBase = ref Unsafe.Add(ref amountBase, 4); + } + + int remainder = Numerics.Modulo4(destination.Length); + if (remainder != 0) + { + for (int i = destination.Length - remainder; i < destination.Length; i++) + { + destination[i] = PorterDuffFunctions.ScreenDestOut(background[i], source, Numerics.Clamp(amount[i], 0, 1F)); + } + } + } + else if (Avx2.IsSupported && destination.Length >= 2) { // Divide by 2 as 4 elements per Vector4 and 8 per Vector256 ref Vector256 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); @@ -16128,7 +28856,34 @@ internal static class DefaultPixelBlenders { amount = Numerics.Clamp(amount, 0, 1); - if (Avx2.IsSupported && destination.Length >= 2) + if (Avx512F.IsSupported && destination.Length >= 4) + { + // Divide by 4 as 4 elements per Vector4 and 16 per Vector512 + ref Vector512 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); + ref Vector512 destinationLast = ref Unsafe.Add(ref destinationBase, (uint)destination.Length / 4u); + + ref Vector512 backgroundBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(background)); + ref Vector512 sourceBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(source)); + Vector512 opacity = Vector512.Create(amount); + + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) + { + destinationBase = PorterDuffFunctions.DarkenDestOut(backgroundBase, sourceBase, opacity); + destinationBase = ref Unsafe.Add(ref destinationBase, 1); + backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); + sourceBase = ref Unsafe.Add(ref sourceBase, 1); + } + + int remainder = Numerics.Modulo4(destination.Length); + if (remainder != 0) + { + for (int i = destination.Length - remainder; i < destination.Length; i++) + { + destination[i] = PorterDuffFunctions.DarkenDestOut(background[i], source[i], amount); + } + } + } + else if (Avx2.IsSupported && destination.Length >= 2) { // Divide by 2 as 4 elements per Vector4 and 8 per Vector256 ref Vector256 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); @@ -16167,7 +28922,37 @@ internal static class DefaultPixelBlenders { amount = Numerics.Clamp(amount, 0, 1); - if (Avx2.IsSupported && destination.Length >= 2) + if (Avx512F.IsSupported && destination.Length >= 4) + { + // Divide by 4 as 4 elements per Vector4 and 16 per Vector512 + ref Vector512 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); + ref Vector512 destinationLast = ref Unsafe.Add(ref destinationBase, (uint)destination.Length / 4u); + + ref Vector512 backgroundBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(background)); + Vector512 sourceBase = Vector512.Create( + source.X, source.Y, source.Z, source.W, + source.X, source.Y, source.Z, source.W, + source.X, source.Y, source.Z, source.W, + source.X, source.Y, source.Z, source.W); + Vector512 opacity = Vector512.Create(amount); + + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) + { + destinationBase = PorterDuffFunctions.DarkenDestOut(backgroundBase, sourceBase, opacity); + destinationBase = ref Unsafe.Add(ref destinationBase, 1); + backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); + } + + int remainder = Numerics.Modulo4(destination.Length); + if (remainder != 0) + { + for (int i = destination.Length - remainder; i < destination.Length; i++) + { + destination[i] = PorterDuffFunctions.DarkenDestOut(background[i], source, amount); + } + } + } + else if (Avx2.IsSupported && destination.Length >= 2) { // Divide by 2 as 4 elements per Vector4 and 8 per Vector256 ref Vector256 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); @@ -16203,7 +28988,51 @@ internal static class DefaultPixelBlenders /// protected override void BlendFunction(Span destination, ReadOnlySpan background, ReadOnlySpan source, ReadOnlySpan amount) { - if (Avx2.IsSupported && destination.Length >= 2) + if (Avx512F.IsSupported && destination.Length >= 4) + { + // Divide by 4 as 4 elements per Vector4 and 16 per Vector512 + ref Vector512 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); + ref Vector512 destinationLast = ref Unsafe.Add(ref destinationBase, (uint)destination.Length / 4u); + + ref Vector512 backgroundBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(background)); + ref Vector512 sourceBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(source)); + ref float amountBase = ref MemoryMarshal.GetReference(amount); + + Vector512 vOne = Vector512.Create(1F); + + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) + { + float amount0 = amountBase; + float amount1 = Unsafe.Add(ref amountBase, 1); + float amount2 = Unsafe.Add(ref amountBase, 2); + float amount3 = Unsafe.Add(ref amountBase, 3); + + // We need to create a Vector512 containing the current four amount values + // taking up each quarter of the Vector512 and then clamp them. + Vector512 opacity = Vector512.Create( + amount0, amount0, amount0, amount0, + amount1, amount1, amount1, amount1, + amount2, amount2, amount2, amount2, + amount3, amount3, amount3, amount3); + opacity = Vector512.Min(Vector512.Max(Vector512.Zero, opacity), vOne); + + destinationBase = PorterDuffFunctions.DarkenDestOut(backgroundBase, sourceBase, opacity); + destinationBase = ref Unsafe.Add(ref destinationBase, 1); + backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); + sourceBase = ref Unsafe.Add(ref sourceBase, 1); + amountBase = ref Unsafe.Add(ref amountBase, 4); + } + + int remainder = Numerics.Modulo4(destination.Length); + if (remainder != 0) + { + for (int i = destination.Length - remainder; i < destination.Length; i++) + { + destination[i] = PorterDuffFunctions.DarkenDestOut(background[i], source[i], Numerics.Clamp(amount[i], 0, 1F)); + } + } + } + else if (Avx2.IsSupported && destination.Length >= 2) { // Divide by 2 as 4 elements per Vector4 and 8 per Vector256 ref Vector256 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); @@ -16250,7 +29079,54 @@ internal static class DefaultPixelBlenders /// protected override void BlendFunction(Span destination, ReadOnlySpan background, Vector4 source, ReadOnlySpan amount) { - if (Avx2.IsSupported && destination.Length >= 2) + if (Avx512F.IsSupported && destination.Length >= 4) + { + // Divide by 4 as 4 elements per Vector4 and 16 per Vector512 + ref Vector512 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); + ref Vector512 destinationLast = ref Unsafe.Add(ref destinationBase, (uint)destination.Length / 4u); + + ref Vector512 backgroundBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(background)); + ref float amountBase = ref MemoryMarshal.GetReference(amount); + + Vector512 sourceBase = Vector512.Create( + source.X, source.Y, source.Z, source.W, + source.X, source.Y, source.Z, source.W, + source.X, source.Y, source.Z, source.W, + source.X, source.Y, source.Z, source.W); + Vector512 vOne = Vector512.Create(1F); + + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) + { + float amount0 = amountBase; + float amount1 = Unsafe.Add(ref amountBase, 1); + float amount2 = Unsafe.Add(ref amountBase, 2); + float amount3 = Unsafe.Add(ref amountBase, 3); + + // We need to create a Vector512 containing the current four amount values + // taking up each quarter of the Vector512 and then clamp them. + Vector512 opacity = Vector512.Create( + amount0, amount0, amount0, amount0, + amount1, amount1, amount1, amount1, + amount2, amount2, amount2, amount2, + amount3, amount3, amount3, amount3); + opacity = Vector512.Min(Vector512.Max(Vector512.Zero, opacity), vOne); + + destinationBase = PorterDuffFunctions.DarkenDestOut(backgroundBase, sourceBase, opacity); + destinationBase = ref Unsafe.Add(ref destinationBase, 1); + backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); + amountBase = ref Unsafe.Add(ref amountBase, 4); + } + + int remainder = Numerics.Modulo4(destination.Length); + if (remainder != 0) + { + for (int i = destination.Length - remainder; i < destination.Length; i++) + { + destination[i] = PorterDuffFunctions.DarkenDestOut(background[i], source, Numerics.Clamp(amount[i], 0, 1F)); + } + } + } + else if (Avx2.IsSupported && destination.Length >= 2) { // Divide by 2 as 4 elements per Vector4 and 8 per Vector256 ref Vector256 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); @@ -16315,7 +29191,34 @@ internal static class DefaultPixelBlenders { amount = Numerics.Clamp(amount, 0, 1); - if (Avx2.IsSupported && destination.Length >= 2) + if (Avx512F.IsSupported && destination.Length >= 4) + { + // Divide by 4 as 4 elements per Vector4 and 16 per Vector512 + ref Vector512 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); + ref Vector512 destinationLast = ref Unsafe.Add(ref destinationBase, (uint)destination.Length / 4u); + + ref Vector512 backgroundBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(background)); + ref Vector512 sourceBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(source)); + Vector512 opacity = Vector512.Create(amount); + + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) + { + destinationBase = PorterDuffFunctions.LightenDestOut(backgroundBase, sourceBase, opacity); + destinationBase = ref Unsafe.Add(ref destinationBase, 1); + backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); + sourceBase = ref Unsafe.Add(ref sourceBase, 1); + } + + int remainder = Numerics.Modulo4(destination.Length); + if (remainder != 0) + { + for (int i = destination.Length - remainder; i < destination.Length; i++) + { + destination[i] = PorterDuffFunctions.LightenDestOut(background[i], source[i], amount); + } + } + } + else if (Avx2.IsSupported && destination.Length >= 2) { // Divide by 2 as 4 elements per Vector4 and 8 per Vector256 ref Vector256 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); @@ -16354,7 +29257,37 @@ internal static class DefaultPixelBlenders { amount = Numerics.Clamp(amount, 0, 1); - if (Avx2.IsSupported && destination.Length >= 2) + if (Avx512F.IsSupported && destination.Length >= 4) + { + // Divide by 4 as 4 elements per Vector4 and 16 per Vector512 + ref Vector512 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); + ref Vector512 destinationLast = ref Unsafe.Add(ref destinationBase, (uint)destination.Length / 4u); + + ref Vector512 backgroundBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(background)); + Vector512 sourceBase = Vector512.Create( + source.X, source.Y, source.Z, source.W, + source.X, source.Y, source.Z, source.W, + source.X, source.Y, source.Z, source.W, + source.X, source.Y, source.Z, source.W); + Vector512 opacity = Vector512.Create(amount); + + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) + { + destinationBase = PorterDuffFunctions.LightenDestOut(backgroundBase, sourceBase, opacity); + destinationBase = ref Unsafe.Add(ref destinationBase, 1); + backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); + } + + int remainder = Numerics.Modulo4(destination.Length); + if (remainder != 0) + { + for (int i = destination.Length - remainder; i < destination.Length; i++) + { + destination[i] = PorterDuffFunctions.LightenDestOut(background[i], source, amount); + } + } + } + else if (Avx2.IsSupported && destination.Length >= 2) { // Divide by 2 as 4 elements per Vector4 and 8 per Vector256 ref Vector256 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); @@ -16390,7 +29323,51 @@ internal static class DefaultPixelBlenders /// protected override void BlendFunction(Span destination, ReadOnlySpan background, ReadOnlySpan source, ReadOnlySpan amount) { - if (Avx2.IsSupported && destination.Length >= 2) + if (Avx512F.IsSupported && destination.Length >= 4) + { + // Divide by 4 as 4 elements per Vector4 and 16 per Vector512 + ref Vector512 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); + ref Vector512 destinationLast = ref Unsafe.Add(ref destinationBase, (uint)destination.Length / 4u); + + ref Vector512 backgroundBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(background)); + ref Vector512 sourceBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(source)); + ref float amountBase = ref MemoryMarshal.GetReference(amount); + + Vector512 vOne = Vector512.Create(1F); + + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) + { + float amount0 = amountBase; + float amount1 = Unsafe.Add(ref amountBase, 1); + float amount2 = Unsafe.Add(ref amountBase, 2); + float amount3 = Unsafe.Add(ref amountBase, 3); + + // We need to create a Vector512 containing the current four amount values + // taking up each quarter of the Vector512 and then clamp them. + Vector512 opacity = Vector512.Create( + amount0, amount0, amount0, amount0, + amount1, amount1, amount1, amount1, + amount2, amount2, amount2, amount2, + amount3, amount3, amount3, amount3); + opacity = Vector512.Min(Vector512.Max(Vector512.Zero, opacity), vOne); + + destinationBase = PorterDuffFunctions.LightenDestOut(backgroundBase, sourceBase, opacity); + destinationBase = ref Unsafe.Add(ref destinationBase, 1); + backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); + sourceBase = ref Unsafe.Add(ref sourceBase, 1); + amountBase = ref Unsafe.Add(ref amountBase, 4); + } + + int remainder = Numerics.Modulo4(destination.Length); + if (remainder != 0) + { + for (int i = destination.Length - remainder; i < destination.Length; i++) + { + destination[i] = PorterDuffFunctions.LightenDestOut(background[i], source[i], Numerics.Clamp(amount[i], 0, 1F)); + } + } + } + else if (Avx2.IsSupported && destination.Length >= 2) { // Divide by 2 as 4 elements per Vector4 and 8 per Vector256 ref Vector256 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); @@ -16429,15 +29406,62 @@ internal static class DefaultPixelBlenders { for (int i = 0; i < destination.Length; i++) { - destination[i] = PorterDuffFunctions.LightenDestOut(background[i], source[i], Numerics.Clamp(amount[i], 0, 1F)); + destination[i] = PorterDuffFunctions.LightenDestOut(background[i], source[i], Numerics.Clamp(amount[i], 0, 1F)); + } + } + } + + /// + protected override void BlendFunction(Span destination, ReadOnlySpan background, Vector4 source, ReadOnlySpan amount) + { + if (Avx512F.IsSupported && destination.Length >= 4) + { + // Divide by 4 as 4 elements per Vector4 and 16 per Vector512 + ref Vector512 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); + ref Vector512 destinationLast = ref Unsafe.Add(ref destinationBase, (uint)destination.Length / 4u); + + ref Vector512 backgroundBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(background)); + ref float amountBase = ref MemoryMarshal.GetReference(amount); + + Vector512 sourceBase = Vector512.Create( + source.X, source.Y, source.Z, source.W, + source.X, source.Y, source.Z, source.W, + source.X, source.Y, source.Z, source.W, + source.X, source.Y, source.Z, source.W); + Vector512 vOne = Vector512.Create(1F); + + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) + { + float amount0 = amountBase; + float amount1 = Unsafe.Add(ref amountBase, 1); + float amount2 = Unsafe.Add(ref amountBase, 2); + float amount3 = Unsafe.Add(ref amountBase, 3); + + // We need to create a Vector512 containing the current four amount values + // taking up each quarter of the Vector512 and then clamp them. + Vector512 opacity = Vector512.Create( + amount0, amount0, amount0, amount0, + amount1, amount1, amount1, amount1, + amount2, amount2, amount2, amount2, + amount3, amount3, amount3, amount3); + opacity = Vector512.Min(Vector512.Max(Vector512.Zero, opacity), vOne); + + destinationBase = PorterDuffFunctions.LightenDestOut(backgroundBase, sourceBase, opacity); + destinationBase = ref Unsafe.Add(ref destinationBase, 1); + backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); + amountBase = ref Unsafe.Add(ref amountBase, 4); + } + + int remainder = Numerics.Modulo4(destination.Length); + if (remainder != 0) + { + for (int i = destination.Length - remainder; i < destination.Length; i++) + { + destination[i] = PorterDuffFunctions.LightenDestOut(background[i], source, Numerics.Clamp(amount[i], 0, 1F)); + } } } - } - - /// - protected override void BlendFunction(Span destination, ReadOnlySpan background, Vector4 source, ReadOnlySpan amount) - { - if (Avx2.IsSupported && destination.Length >= 2) + else if (Avx2.IsSupported && destination.Length >= 2) { // Divide by 2 as 4 elements per Vector4 and 8 per Vector256 ref Vector256 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); @@ -16502,7 +29526,34 @@ internal static class DefaultPixelBlenders { amount = Numerics.Clamp(amount, 0, 1); - if (Avx2.IsSupported && destination.Length >= 2) + if (Avx512F.IsSupported && destination.Length >= 4) + { + // Divide by 4 as 4 elements per Vector4 and 16 per Vector512 + ref Vector512 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); + ref Vector512 destinationLast = ref Unsafe.Add(ref destinationBase, (uint)destination.Length / 4u); + + ref Vector512 backgroundBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(background)); + ref Vector512 sourceBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(source)); + Vector512 opacity = Vector512.Create(amount); + + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) + { + destinationBase = PorterDuffFunctions.OverlayDestOut(backgroundBase, sourceBase, opacity); + destinationBase = ref Unsafe.Add(ref destinationBase, 1); + backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); + sourceBase = ref Unsafe.Add(ref sourceBase, 1); + } + + int remainder = Numerics.Modulo4(destination.Length); + if (remainder != 0) + { + for (int i = destination.Length - remainder; i < destination.Length; i++) + { + destination[i] = PorterDuffFunctions.OverlayDestOut(background[i], source[i], amount); + } + } + } + else if (Avx2.IsSupported && destination.Length >= 2) { // Divide by 2 as 4 elements per Vector4 and 8 per Vector256 ref Vector256 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); @@ -16541,7 +29592,37 @@ internal static class DefaultPixelBlenders { amount = Numerics.Clamp(amount, 0, 1); - if (Avx2.IsSupported && destination.Length >= 2) + if (Avx512F.IsSupported && destination.Length >= 4) + { + // Divide by 4 as 4 elements per Vector4 and 16 per Vector512 + ref Vector512 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); + ref Vector512 destinationLast = ref Unsafe.Add(ref destinationBase, (uint)destination.Length / 4u); + + ref Vector512 backgroundBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(background)); + Vector512 sourceBase = Vector512.Create( + source.X, source.Y, source.Z, source.W, + source.X, source.Y, source.Z, source.W, + source.X, source.Y, source.Z, source.W, + source.X, source.Y, source.Z, source.W); + Vector512 opacity = Vector512.Create(amount); + + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) + { + destinationBase = PorterDuffFunctions.OverlayDestOut(backgroundBase, sourceBase, opacity); + destinationBase = ref Unsafe.Add(ref destinationBase, 1); + backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); + } + + int remainder = Numerics.Modulo4(destination.Length); + if (remainder != 0) + { + for (int i = destination.Length - remainder; i < destination.Length; i++) + { + destination[i] = PorterDuffFunctions.OverlayDestOut(background[i], source, amount); + } + } + } + else if (Avx2.IsSupported && destination.Length >= 2) { // Divide by 2 as 4 elements per Vector4 and 8 per Vector256 ref Vector256 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); @@ -16577,7 +29658,51 @@ internal static class DefaultPixelBlenders /// protected override void BlendFunction(Span destination, ReadOnlySpan background, ReadOnlySpan source, ReadOnlySpan amount) { - if (Avx2.IsSupported && destination.Length >= 2) + if (Avx512F.IsSupported && destination.Length >= 4) + { + // Divide by 4 as 4 elements per Vector4 and 16 per Vector512 + ref Vector512 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); + ref Vector512 destinationLast = ref Unsafe.Add(ref destinationBase, (uint)destination.Length / 4u); + + ref Vector512 backgroundBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(background)); + ref Vector512 sourceBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(source)); + ref float amountBase = ref MemoryMarshal.GetReference(amount); + + Vector512 vOne = Vector512.Create(1F); + + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) + { + float amount0 = amountBase; + float amount1 = Unsafe.Add(ref amountBase, 1); + float amount2 = Unsafe.Add(ref amountBase, 2); + float amount3 = Unsafe.Add(ref amountBase, 3); + + // We need to create a Vector512 containing the current four amount values + // taking up each quarter of the Vector512 and then clamp them. + Vector512 opacity = Vector512.Create( + amount0, amount0, amount0, amount0, + amount1, amount1, amount1, amount1, + amount2, amount2, amount2, amount2, + amount3, amount3, amount3, amount3); + opacity = Vector512.Min(Vector512.Max(Vector512.Zero, opacity), vOne); + + destinationBase = PorterDuffFunctions.OverlayDestOut(backgroundBase, sourceBase, opacity); + destinationBase = ref Unsafe.Add(ref destinationBase, 1); + backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); + sourceBase = ref Unsafe.Add(ref sourceBase, 1); + amountBase = ref Unsafe.Add(ref amountBase, 4); + } + + int remainder = Numerics.Modulo4(destination.Length); + if (remainder != 0) + { + for (int i = destination.Length - remainder; i < destination.Length; i++) + { + destination[i] = PorterDuffFunctions.OverlayDestOut(background[i], source[i], Numerics.Clamp(amount[i], 0, 1F)); + } + } + } + else if (Avx2.IsSupported && destination.Length >= 2) { // Divide by 2 as 4 elements per Vector4 and 8 per Vector256 ref Vector256 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); @@ -16624,7 +29749,54 @@ internal static class DefaultPixelBlenders /// protected override void BlendFunction(Span destination, ReadOnlySpan background, Vector4 source, ReadOnlySpan amount) { - if (Avx2.IsSupported && destination.Length >= 2) + if (Avx512F.IsSupported && destination.Length >= 4) + { + // Divide by 4 as 4 elements per Vector4 and 16 per Vector512 + ref Vector512 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); + ref Vector512 destinationLast = ref Unsafe.Add(ref destinationBase, (uint)destination.Length / 4u); + + ref Vector512 backgroundBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(background)); + ref float amountBase = ref MemoryMarshal.GetReference(amount); + + Vector512 sourceBase = Vector512.Create( + source.X, source.Y, source.Z, source.W, + source.X, source.Y, source.Z, source.W, + source.X, source.Y, source.Z, source.W, + source.X, source.Y, source.Z, source.W); + Vector512 vOne = Vector512.Create(1F); + + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) + { + float amount0 = amountBase; + float amount1 = Unsafe.Add(ref amountBase, 1); + float amount2 = Unsafe.Add(ref amountBase, 2); + float amount3 = Unsafe.Add(ref amountBase, 3); + + // We need to create a Vector512 containing the current four amount values + // taking up each quarter of the Vector512 and then clamp them. + Vector512 opacity = Vector512.Create( + amount0, amount0, amount0, amount0, + amount1, amount1, amount1, amount1, + amount2, amount2, amount2, amount2, + amount3, amount3, amount3, amount3); + opacity = Vector512.Min(Vector512.Max(Vector512.Zero, opacity), vOne); + + destinationBase = PorterDuffFunctions.OverlayDestOut(backgroundBase, sourceBase, opacity); + destinationBase = ref Unsafe.Add(ref destinationBase, 1); + backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); + amountBase = ref Unsafe.Add(ref amountBase, 4); + } + + int remainder = Numerics.Modulo4(destination.Length); + if (remainder != 0) + { + for (int i = destination.Length - remainder; i < destination.Length; i++) + { + destination[i] = PorterDuffFunctions.OverlayDestOut(background[i], source, Numerics.Clamp(amount[i], 0, 1F)); + } + } + } + else if (Avx2.IsSupported && destination.Length >= 2) { // Divide by 2 as 4 elements per Vector4 and 8 per Vector256 ref Vector256 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); @@ -16689,7 +29861,34 @@ internal static class DefaultPixelBlenders { amount = Numerics.Clamp(amount, 0, 1); - if (Avx2.IsSupported && destination.Length >= 2) + if (Avx512F.IsSupported && destination.Length >= 4) + { + // Divide by 4 as 4 elements per Vector4 and 16 per Vector512 + ref Vector512 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); + ref Vector512 destinationLast = ref Unsafe.Add(ref destinationBase, (uint)destination.Length / 4u); + + ref Vector512 backgroundBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(background)); + ref Vector512 sourceBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(source)); + Vector512 opacity = Vector512.Create(amount); + + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) + { + destinationBase = PorterDuffFunctions.HardLightDestOut(backgroundBase, sourceBase, opacity); + destinationBase = ref Unsafe.Add(ref destinationBase, 1); + backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); + sourceBase = ref Unsafe.Add(ref sourceBase, 1); + } + + int remainder = Numerics.Modulo4(destination.Length); + if (remainder != 0) + { + for (int i = destination.Length - remainder; i < destination.Length; i++) + { + destination[i] = PorterDuffFunctions.HardLightDestOut(background[i], source[i], amount); + } + } + } + else if (Avx2.IsSupported && destination.Length >= 2) { // Divide by 2 as 4 elements per Vector4 and 8 per Vector256 ref Vector256 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); @@ -16728,7 +29927,37 @@ internal static class DefaultPixelBlenders { amount = Numerics.Clamp(amount, 0, 1); - if (Avx2.IsSupported && destination.Length >= 2) + if (Avx512F.IsSupported && destination.Length >= 4) + { + // Divide by 4 as 4 elements per Vector4 and 16 per Vector512 + ref Vector512 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); + ref Vector512 destinationLast = ref Unsafe.Add(ref destinationBase, (uint)destination.Length / 4u); + + ref Vector512 backgroundBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(background)); + Vector512 sourceBase = Vector512.Create( + source.X, source.Y, source.Z, source.W, + source.X, source.Y, source.Z, source.W, + source.X, source.Y, source.Z, source.W, + source.X, source.Y, source.Z, source.W); + Vector512 opacity = Vector512.Create(amount); + + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) + { + destinationBase = PorterDuffFunctions.HardLightDestOut(backgroundBase, sourceBase, opacity); + destinationBase = ref Unsafe.Add(ref destinationBase, 1); + backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); + } + + int remainder = Numerics.Modulo4(destination.Length); + if (remainder != 0) + { + for (int i = destination.Length - remainder; i < destination.Length; i++) + { + destination[i] = PorterDuffFunctions.HardLightDestOut(background[i], source, amount); + } + } + } + else if (Avx2.IsSupported && destination.Length >= 2) { // Divide by 2 as 4 elements per Vector4 and 8 per Vector256 ref Vector256 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); @@ -16764,7 +29993,51 @@ internal static class DefaultPixelBlenders /// protected override void BlendFunction(Span destination, ReadOnlySpan background, ReadOnlySpan source, ReadOnlySpan amount) { - if (Avx2.IsSupported && destination.Length >= 2) + if (Avx512F.IsSupported && destination.Length >= 4) + { + // Divide by 4 as 4 elements per Vector4 and 16 per Vector512 + ref Vector512 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); + ref Vector512 destinationLast = ref Unsafe.Add(ref destinationBase, (uint)destination.Length / 4u); + + ref Vector512 backgroundBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(background)); + ref Vector512 sourceBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(source)); + ref float amountBase = ref MemoryMarshal.GetReference(amount); + + Vector512 vOne = Vector512.Create(1F); + + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) + { + float amount0 = amountBase; + float amount1 = Unsafe.Add(ref amountBase, 1); + float amount2 = Unsafe.Add(ref amountBase, 2); + float amount3 = Unsafe.Add(ref amountBase, 3); + + // We need to create a Vector512 containing the current four amount values + // taking up each quarter of the Vector512 and then clamp them. + Vector512 opacity = Vector512.Create( + amount0, amount0, amount0, amount0, + amount1, amount1, amount1, amount1, + amount2, amount2, amount2, amount2, + amount3, amount3, amount3, amount3); + opacity = Vector512.Min(Vector512.Max(Vector512.Zero, opacity), vOne); + + destinationBase = PorterDuffFunctions.HardLightDestOut(backgroundBase, sourceBase, opacity); + destinationBase = ref Unsafe.Add(ref destinationBase, 1); + backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); + sourceBase = ref Unsafe.Add(ref sourceBase, 1); + amountBase = ref Unsafe.Add(ref amountBase, 4); + } + + int remainder = Numerics.Modulo4(destination.Length); + if (remainder != 0) + { + for (int i = destination.Length - remainder; i < destination.Length; i++) + { + destination[i] = PorterDuffFunctions.HardLightDestOut(background[i], source[i], Numerics.Clamp(amount[i], 0, 1F)); + } + } + } + else if (Avx2.IsSupported && destination.Length >= 2) { // Divide by 2 as 4 elements per Vector4 and 8 per Vector256 ref Vector256 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); @@ -16811,7 +30084,54 @@ internal static class DefaultPixelBlenders /// protected override void BlendFunction(Span destination, ReadOnlySpan background, Vector4 source, ReadOnlySpan amount) { - if (Avx2.IsSupported && destination.Length >= 2) + if (Avx512F.IsSupported && destination.Length >= 4) + { + // Divide by 4 as 4 elements per Vector4 and 16 per Vector512 + ref Vector512 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); + ref Vector512 destinationLast = ref Unsafe.Add(ref destinationBase, (uint)destination.Length / 4u); + + ref Vector512 backgroundBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(background)); + ref float amountBase = ref MemoryMarshal.GetReference(amount); + + Vector512 sourceBase = Vector512.Create( + source.X, source.Y, source.Z, source.W, + source.X, source.Y, source.Z, source.W, + source.X, source.Y, source.Z, source.W, + source.X, source.Y, source.Z, source.W); + Vector512 vOne = Vector512.Create(1F); + + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) + { + float amount0 = amountBase; + float amount1 = Unsafe.Add(ref amountBase, 1); + float amount2 = Unsafe.Add(ref amountBase, 2); + float amount3 = Unsafe.Add(ref amountBase, 3); + + // We need to create a Vector512 containing the current four amount values + // taking up each quarter of the Vector512 and then clamp them. + Vector512 opacity = Vector512.Create( + amount0, amount0, amount0, amount0, + amount1, amount1, amount1, amount1, + amount2, amount2, amount2, amount2, + amount3, amount3, amount3, amount3); + opacity = Vector512.Min(Vector512.Max(Vector512.Zero, opacity), vOne); + + destinationBase = PorterDuffFunctions.HardLightDestOut(backgroundBase, sourceBase, opacity); + destinationBase = ref Unsafe.Add(ref destinationBase, 1); + backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); + amountBase = ref Unsafe.Add(ref amountBase, 4); + } + + int remainder = Numerics.Modulo4(destination.Length); + if (remainder != 0) + { + for (int i = destination.Length - remainder; i < destination.Length; i++) + { + destination[i] = PorterDuffFunctions.HardLightDestOut(background[i], source, Numerics.Clamp(amount[i], 0, 1F)); + } + } + } + else if (Avx2.IsSupported && destination.Length >= 2) { // Divide by 2 as 4 elements per Vector4 and 8 per Vector256 ref Vector256 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); @@ -16876,7 +30196,34 @@ internal static class DefaultPixelBlenders { amount = Numerics.Clamp(amount, 0, 1); - if (Avx2.IsSupported && destination.Length >= 2) + if (Avx512F.IsSupported && destination.Length >= 4) + { + // Divide by 4 as 4 elements per Vector4 and 16 per Vector512 + ref Vector512 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); + ref Vector512 destinationLast = ref Unsafe.Add(ref destinationBase, (uint)destination.Length / 4u); + + ref Vector512 backgroundBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(background)); + ref Vector512 sourceBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(source)); + Vector512 opacity = Vector512.Create(amount); + + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) + { + destinationBase = PorterDuffFunctions.NormalClear(backgroundBase, sourceBase, opacity); + destinationBase = ref Unsafe.Add(ref destinationBase, 1); + backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); + sourceBase = ref Unsafe.Add(ref sourceBase, 1); + } + + int remainder = Numerics.Modulo4(destination.Length); + if (remainder != 0) + { + for (int i = destination.Length - remainder; i < destination.Length; i++) + { + destination[i] = PorterDuffFunctions.NormalClear(background[i], source[i], amount); + } + } + } + else if (Avx2.IsSupported && destination.Length >= 2) { // Divide by 2 as 4 elements per Vector4 and 8 per Vector256 ref Vector256 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); @@ -16915,7 +30262,37 @@ internal static class DefaultPixelBlenders { amount = Numerics.Clamp(amount, 0, 1); - if (Avx2.IsSupported && destination.Length >= 2) + if (Avx512F.IsSupported && destination.Length >= 4) + { + // Divide by 4 as 4 elements per Vector4 and 16 per Vector512 + ref Vector512 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); + ref Vector512 destinationLast = ref Unsafe.Add(ref destinationBase, (uint)destination.Length / 4u); + + ref Vector512 backgroundBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(background)); + Vector512 sourceBase = Vector512.Create( + source.X, source.Y, source.Z, source.W, + source.X, source.Y, source.Z, source.W, + source.X, source.Y, source.Z, source.W, + source.X, source.Y, source.Z, source.W); + Vector512 opacity = Vector512.Create(amount); + + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) + { + destinationBase = PorterDuffFunctions.NormalClear(backgroundBase, sourceBase, opacity); + destinationBase = ref Unsafe.Add(ref destinationBase, 1); + backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); + } + + int remainder = Numerics.Modulo4(destination.Length); + if (remainder != 0) + { + for (int i = destination.Length - remainder; i < destination.Length; i++) + { + destination[i] = PorterDuffFunctions.NormalClear(background[i], source, amount); + } + } + } + else if (Avx2.IsSupported && destination.Length >= 2) { // Divide by 2 as 4 elements per Vector4 and 8 per Vector256 ref Vector256 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); @@ -16951,7 +30328,51 @@ internal static class DefaultPixelBlenders /// protected override void BlendFunction(Span destination, ReadOnlySpan background, ReadOnlySpan source, ReadOnlySpan amount) { - if (Avx2.IsSupported && destination.Length >= 2) + if (Avx512F.IsSupported && destination.Length >= 4) + { + // Divide by 4 as 4 elements per Vector4 and 16 per Vector512 + ref Vector512 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); + ref Vector512 destinationLast = ref Unsafe.Add(ref destinationBase, (uint)destination.Length / 4u); + + ref Vector512 backgroundBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(background)); + ref Vector512 sourceBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(source)); + ref float amountBase = ref MemoryMarshal.GetReference(amount); + + Vector512 vOne = Vector512.Create(1F); + + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) + { + float amount0 = amountBase; + float amount1 = Unsafe.Add(ref amountBase, 1); + float amount2 = Unsafe.Add(ref amountBase, 2); + float amount3 = Unsafe.Add(ref amountBase, 3); + + // We need to create a Vector512 containing the current four amount values + // taking up each quarter of the Vector512 and then clamp them. + Vector512 opacity = Vector512.Create( + amount0, amount0, amount0, amount0, + amount1, amount1, amount1, amount1, + amount2, amount2, amount2, amount2, + amount3, amount3, amount3, amount3); + opacity = Vector512.Min(Vector512.Max(Vector512.Zero, opacity), vOne); + + destinationBase = PorterDuffFunctions.NormalClear(backgroundBase, sourceBase, opacity); + destinationBase = ref Unsafe.Add(ref destinationBase, 1); + backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); + sourceBase = ref Unsafe.Add(ref sourceBase, 1); + amountBase = ref Unsafe.Add(ref amountBase, 4); + } + + int remainder = Numerics.Modulo4(destination.Length); + if (remainder != 0) + { + for (int i = destination.Length - remainder; i < destination.Length; i++) + { + destination[i] = PorterDuffFunctions.NormalClear(background[i], source[i], Numerics.Clamp(amount[i], 0, 1F)); + } + } + } + else if (Avx2.IsSupported && destination.Length >= 2) { // Divide by 2 as 4 elements per Vector4 and 8 per Vector256 ref Vector256 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); @@ -16998,7 +30419,54 @@ internal static class DefaultPixelBlenders /// protected override void BlendFunction(Span destination, ReadOnlySpan background, Vector4 source, ReadOnlySpan amount) { - if (Avx2.IsSupported && destination.Length >= 2) + if (Avx512F.IsSupported && destination.Length >= 4) + { + // Divide by 4 as 4 elements per Vector4 and 16 per Vector512 + ref Vector512 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); + ref Vector512 destinationLast = ref Unsafe.Add(ref destinationBase, (uint)destination.Length / 4u); + + ref Vector512 backgroundBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(background)); + ref float amountBase = ref MemoryMarshal.GetReference(amount); + + Vector512 sourceBase = Vector512.Create( + source.X, source.Y, source.Z, source.W, + source.X, source.Y, source.Z, source.W, + source.X, source.Y, source.Z, source.W, + source.X, source.Y, source.Z, source.W); + Vector512 vOne = Vector512.Create(1F); + + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) + { + float amount0 = amountBase; + float amount1 = Unsafe.Add(ref amountBase, 1); + float amount2 = Unsafe.Add(ref amountBase, 2); + float amount3 = Unsafe.Add(ref amountBase, 3); + + // We need to create a Vector512 containing the current four amount values + // taking up each quarter of the Vector512 and then clamp them. + Vector512 opacity = Vector512.Create( + amount0, amount0, amount0, amount0, + amount1, amount1, amount1, amount1, + amount2, amount2, amount2, amount2, + amount3, amount3, amount3, amount3); + opacity = Vector512.Min(Vector512.Max(Vector512.Zero, opacity), vOne); + + destinationBase = PorterDuffFunctions.NormalClear(backgroundBase, sourceBase, opacity); + destinationBase = ref Unsafe.Add(ref destinationBase, 1); + backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); + amountBase = ref Unsafe.Add(ref amountBase, 4); + } + + int remainder = Numerics.Modulo4(destination.Length); + if (remainder != 0) + { + for (int i = destination.Length - remainder; i < destination.Length; i++) + { + destination[i] = PorterDuffFunctions.NormalClear(background[i], source, Numerics.Clamp(amount[i], 0, 1F)); + } + } + } + else if (Avx2.IsSupported && destination.Length >= 2) { // Divide by 2 as 4 elements per Vector4 and 8 per Vector256 ref Vector256 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); @@ -17063,7 +30531,34 @@ internal static class DefaultPixelBlenders { amount = Numerics.Clamp(amount, 0, 1); - if (Avx2.IsSupported && destination.Length >= 2) + if (Avx512F.IsSupported && destination.Length >= 4) + { + // Divide by 4 as 4 elements per Vector4 and 16 per Vector512 + ref Vector512 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); + ref Vector512 destinationLast = ref Unsafe.Add(ref destinationBase, (uint)destination.Length / 4u); + + ref Vector512 backgroundBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(background)); + ref Vector512 sourceBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(source)); + Vector512 opacity = Vector512.Create(amount); + + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) + { + destinationBase = PorterDuffFunctions.MultiplyClear(backgroundBase, sourceBase, opacity); + destinationBase = ref Unsafe.Add(ref destinationBase, 1); + backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); + sourceBase = ref Unsafe.Add(ref sourceBase, 1); + } + + int remainder = Numerics.Modulo4(destination.Length); + if (remainder != 0) + { + for (int i = destination.Length - remainder; i < destination.Length; i++) + { + destination[i] = PorterDuffFunctions.MultiplyClear(background[i], source[i], amount); + } + } + } + else if (Avx2.IsSupported && destination.Length >= 2) { // Divide by 2 as 4 elements per Vector4 and 8 per Vector256 ref Vector256 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); @@ -17102,7 +30597,37 @@ internal static class DefaultPixelBlenders { amount = Numerics.Clamp(amount, 0, 1); - if (Avx2.IsSupported && destination.Length >= 2) + if (Avx512F.IsSupported && destination.Length >= 4) + { + // Divide by 4 as 4 elements per Vector4 and 16 per Vector512 + ref Vector512 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); + ref Vector512 destinationLast = ref Unsafe.Add(ref destinationBase, (uint)destination.Length / 4u); + + ref Vector512 backgroundBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(background)); + Vector512 sourceBase = Vector512.Create( + source.X, source.Y, source.Z, source.W, + source.X, source.Y, source.Z, source.W, + source.X, source.Y, source.Z, source.W, + source.X, source.Y, source.Z, source.W); + Vector512 opacity = Vector512.Create(amount); + + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) + { + destinationBase = PorterDuffFunctions.MultiplyClear(backgroundBase, sourceBase, opacity); + destinationBase = ref Unsafe.Add(ref destinationBase, 1); + backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); + } + + int remainder = Numerics.Modulo4(destination.Length); + if (remainder != 0) + { + for (int i = destination.Length - remainder; i < destination.Length; i++) + { + destination[i] = PorterDuffFunctions.MultiplyClear(background[i], source, amount); + } + } + } + else if (Avx2.IsSupported && destination.Length >= 2) { // Divide by 2 as 4 elements per Vector4 and 8 per Vector256 ref Vector256 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); @@ -17138,7 +30663,51 @@ internal static class DefaultPixelBlenders /// protected override void BlendFunction(Span destination, ReadOnlySpan background, ReadOnlySpan source, ReadOnlySpan amount) { - if (Avx2.IsSupported && destination.Length >= 2) + if (Avx512F.IsSupported && destination.Length >= 4) + { + // Divide by 4 as 4 elements per Vector4 and 16 per Vector512 + ref Vector512 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); + ref Vector512 destinationLast = ref Unsafe.Add(ref destinationBase, (uint)destination.Length / 4u); + + ref Vector512 backgroundBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(background)); + ref Vector512 sourceBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(source)); + ref float amountBase = ref MemoryMarshal.GetReference(amount); + + Vector512 vOne = Vector512.Create(1F); + + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) + { + float amount0 = amountBase; + float amount1 = Unsafe.Add(ref amountBase, 1); + float amount2 = Unsafe.Add(ref amountBase, 2); + float amount3 = Unsafe.Add(ref amountBase, 3); + + // We need to create a Vector512 containing the current four amount values + // taking up each quarter of the Vector512 and then clamp them. + Vector512 opacity = Vector512.Create( + amount0, amount0, amount0, amount0, + amount1, amount1, amount1, amount1, + amount2, amount2, amount2, amount2, + amount3, amount3, amount3, amount3); + opacity = Vector512.Min(Vector512.Max(Vector512.Zero, opacity), vOne); + + destinationBase = PorterDuffFunctions.MultiplyClear(backgroundBase, sourceBase, opacity); + destinationBase = ref Unsafe.Add(ref destinationBase, 1); + backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); + sourceBase = ref Unsafe.Add(ref sourceBase, 1); + amountBase = ref Unsafe.Add(ref amountBase, 4); + } + + int remainder = Numerics.Modulo4(destination.Length); + if (remainder != 0) + { + for (int i = destination.Length - remainder; i < destination.Length; i++) + { + destination[i] = PorterDuffFunctions.MultiplyClear(background[i], source[i], Numerics.Clamp(amount[i], 0, 1F)); + } + } + } + else if (Avx2.IsSupported && destination.Length >= 2) { // Divide by 2 as 4 elements per Vector4 and 8 per Vector256 ref Vector256 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); @@ -17185,7 +30754,54 @@ internal static class DefaultPixelBlenders /// protected override void BlendFunction(Span destination, ReadOnlySpan background, Vector4 source, ReadOnlySpan amount) { - if (Avx2.IsSupported && destination.Length >= 2) + if (Avx512F.IsSupported && destination.Length >= 4) + { + // Divide by 4 as 4 elements per Vector4 and 16 per Vector512 + ref Vector512 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); + ref Vector512 destinationLast = ref Unsafe.Add(ref destinationBase, (uint)destination.Length / 4u); + + ref Vector512 backgroundBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(background)); + ref float amountBase = ref MemoryMarshal.GetReference(amount); + + Vector512 sourceBase = Vector512.Create( + source.X, source.Y, source.Z, source.W, + source.X, source.Y, source.Z, source.W, + source.X, source.Y, source.Z, source.W, + source.X, source.Y, source.Z, source.W); + Vector512 vOne = Vector512.Create(1F); + + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) + { + float amount0 = amountBase; + float amount1 = Unsafe.Add(ref amountBase, 1); + float amount2 = Unsafe.Add(ref amountBase, 2); + float amount3 = Unsafe.Add(ref amountBase, 3); + + // We need to create a Vector512 containing the current four amount values + // taking up each quarter of the Vector512 and then clamp them. + Vector512 opacity = Vector512.Create( + amount0, amount0, amount0, amount0, + amount1, amount1, amount1, amount1, + amount2, amount2, amount2, amount2, + amount3, amount3, amount3, amount3); + opacity = Vector512.Min(Vector512.Max(Vector512.Zero, opacity), vOne); + + destinationBase = PorterDuffFunctions.MultiplyClear(backgroundBase, sourceBase, opacity); + destinationBase = ref Unsafe.Add(ref destinationBase, 1); + backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); + amountBase = ref Unsafe.Add(ref amountBase, 4); + } + + int remainder = Numerics.Modulo4(destination.Length); + if (remainder != 0) + { + for (int i = destination.Length - remainder; i < destination.Length; i++) + { + destination[i] = PorterDuffFunctions.MultiplyClear(background[i], source, Numerics.Clamp(amount[i], 0, 1F)); + } + } + } + else if (Avx2.IsSupported && destination.Length >= 2) { // Divide by 2 as 4 elements per Vector4 and 8 per Vector256 ref Vector256 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); @@ -17250,7 +30866,34 @@ internal static class DefaultPixelBlenders { amount = Numerics.Clamp(amount, 0, 1); - if (Avx2.IsSupported && destination.Length >= 2) + if (Avx512F.IsSupported && destination.Length >= 4) + { + // Divide by 4 as 4 elements per Vector4 and 16 per Vector512 + ref Vector512 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); + ref Vector512 destinationLast = ref Unsafe.Add(ref destinationBase, (uint)destination.Length / 4u); + + ref Vector512 backgroundBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(background)); + ref Vector512 sourceBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(source)); + Vector512 opacity = Vector512.Create(amount); + + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) + { + destinationBase = PorterDuffFunctions.AddClear(backgroundBase, sourceBase, opacity); + destinationBase = ref Unsafe.Add(ref destinationBase, 1); + backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); + sourceBase = ref Unsafe.Add(ref sourceBase, 1); + } + + int remainder = Numerics.Modulo4(destination.Length); + if (remainder != 0) + { + for (int i = destination.Length - remainder; i < destination.Length; i++) + { + destination[i] = PorterDuffFunctions.AddClear(background[i], source[i], amount); + } + } + } + else if (Avx2.IsSupported && destination.Length >= 2) { // Divide by 2 as 4 elements per Vector4 and 8 per Vector256 ref Vector256 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); @@ -17289,7 +30932,37 @@ internal static class DefaultPixelBlenders { amount = Numerics.Clamp(amount, 0, 1); - if (Avx2.IsSupported && destination.Length >= 2) + if (Avx512F.IsSupported && destination.Length >= 4) + { + // Divide by 4 as 4 elements per Vector4 and 16 per Vector512 + ref Vector512 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); + ref Vector512 destinationLast = ref Unsafe.Add(ref destinationBase, (uint)destination.Length / 4u); + + ref Vector512 backgroundBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(background)); + Vector512 sourceBase = Vector512.Create( + source.X, source.Y, source.Z, source.W, + source.X, source.Y, source.Z, source.W, + source.X, source.Y, source.Z, source.W, + source.X, source.Y, source.Z, source.W); + Vector512 opacity = Vector512.Create(amount); + + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) + { + destinationBase = PorterDuffFunctions.AddClear(backgroundBase, sourceBase, opacity); + destinationBase = ref Unsafe.Add(ref destinationBase, 1); + backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); + } + + int remainder = Numerics.Modulo4(destination.Length); + if (remainder != 0) + { + for (int i = destination.Length - remainder; i < destination.Length; i++) + { + destination[i] = PorterDuffFunctions.AddClear(background[i], source, amount); + } + } + } + else if (Avx2.IsSupported && destination.Length >= 2) { // Divide by 2 as 4 elements per Vector4 and 8 per Vector256 ref Vector256 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); @@ -17325,7 +30998,51 @@ internal static class DefaultPixelBlenders /// protected override void BlendFunction(Span destination, ReadOnlySpan background, ReadOnlySpan source, ReadOnlySpan amount) { - if (Avx2.IsSupported && destination.Length >= 2) + if (Avx512F.IsSupported && destination.Length >= 4) + { + // Divide by 4 as 4 elements per Vector4 and 16 per Vector512 + ref Vector512 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); + ref Vector512 destinationLast = ref Unsafe.Add(ref destinationBase, (uint)destination.Length / 4u); + + ref Vector512 backgroundBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(background)); + ref Vector512 sourceBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(source)); + ref float amountBase = ref MemoryMarshal.GetReference(amount); + + Vector512 vOne = Vector512.Create(1F); + + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) + { + float amount0 = amountBase; + float amount1 = Unsafe.Add(ref amountBase, 1); + float amount2 = Unsafe.Add(ref amountBase, 2); + float amount3 = Unsafe.Add(ref amountBase, 3); + + // We need to create a Vector512 containing the current four amount values + // taking up each quarter of the Vector512 and then clamp them. + Vector512 opacity = Vector512.Create( + amount0, amount0, amount0, amount0, + amount1, amount1, amount1, amount1, + amount2, amount2, amount2, amount2, + amount3, amount3, amount3, amount3); + opacity = Vector512.Min(Vector512.Max(Vector512.Zero, opacity), vOne); + + destinationBase = PorterDuffFunctions.AddClear(backgroundBase, sourceBase, opacity); + destinationBase = ref Unsafe.Add(ref destinationBase, 1); + backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); + sourceBase = ref Unsafe.Add(ref sourceBase, 1); + amountBase = ref Unsafe.Add(ref amountBase, 4); + } + + int remainder = Numerics.Modulo4(destination.Length); + if (remainder != 0) + { + for (int i = destination.Length - remainder; i < destination.Length; i++) + { + destination[i] = PorterDuffFunctions.AddClear(background[i], source[i], Numerics.Clamp(amount[i], 0, 1F)); + } + } + } + else if (Avx2.IsSupported && destination.Length >= 2) { // Divide by 2 as 4 elements per Vector4 and 8 per Vector256 ref Vector256 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); @@ -17372,7 +31089,54 @@ internal static class DefaultPixelBlenders /// protected override void BlendFunction(Span destination, ReadOnlySpan background, Vector4 source, ReadOnlySpan amount) { - if (Avx2.IsSupported && destination.Length >= 2) + if (Avx512F.IsSupported && destination.Length >= 4) + { + // Divide by 4 as 4 elements per Vector4 and 16 per Vector512 + ref Vector512 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); + ref Vector512 destinationLast = ref Unsafe.Add(ref destinationBase, (uint)destination.Length / 4u); + + ref Vector512 backgroundBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(background)); + ref float amountBase = ref MemoryMarshal.GetReference(amount); + + Vector512 sourceBase = Vector512.Create( + source.X, source.Y, source.Z, source.W, + source.X, source.Y, source.Z, source.W, + source.X, source.Y, source.Z, source.W, + source.X, source.Y, source.Z, source.W); + Vector512 vOne = Vector512.Create(1F); + + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) + { + float amount0 = amountBase; + float amount1 = Unsafe.Add(ref amountBase, 1); + float amount2 = Unsafe.Add(ref amountBase, 2); + float amount3 = Unsafe.Add(ref amountBase, 3); + + // We need to create a Vector512 containing the current four amount values + // taking up each quarter of the Vector512 and then clamp them. + Vector512 opacity = Vector512.Create( + amount0, amount0, amount0, amount0, + amount1, amount1, amount1, amount1, + amount2, amount2, amount2, amount2, + amount3, amount3, amount3, amount3); + opacity = Vector512.Min(Vector512.Max(Vector512.Zero, opacity), vOne); + + destinationBase = PorterDuffFunctions.AddClear(backgroundBase, sourceBase, opacity); + destinationBase = ref Unsafe.Add(ref destinationBase, 1); + backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); + amountBase = ref Unsafe.Add(ref amountBase, 4); + } + + int remainder = Numerics.Modulo4(destination.Length); + if (remainder != 0) + { + for (int i = destination.Length - remainder; i < destination.Length; i++) + { + destination[i] = PorterDuffFunctions.AddClear(background[i], source, Numerics.Clamp(amount[i], 0, 1F)); + } + } + } + else if (Avx2.IsSupported && destination.Length >= 2) { // Divide by 2 as 4 elements per Vector4 and 8 per Vector256 ref Vector256 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); @@ -17437,7 +31201,34 @@ internal static class DefaultPixelBlenders { amount = Numerics.Clamp(amount, 0, 1); - if (Avx2.IsSupported && destination.Length >= 2) + if (Avx512F.IsSupported && destination.Length >= 4) + { + // Divide by 4 as 4 elements per Vector4 and 16 per Vector512 + ref Vector512 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); + ref Vector512 destinationLast = ref Unsafe.Add(ref destinationBase, (uint)destination.Length / 4u); + + ref Vector512 backgroundBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(background)); + ref Vector512 sourceBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(source)); + Vector512 opacity = Vector512.Create(amount); + + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) + { + destinationBase = PorterDuffFunctions.SubtractClear(backgroundBase, sourceBase, opacity); + destinationBase = ref Unsafe.Add(ref destinationBase, 1); + backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); + sourceBase = ref Unsafe.Add(ref sourceBase, 1); + } + + int remainder = Numerics.Modulo4(destination.Length); + if (remainder != 0) + { + for (int i = destination.Length - remainder; i < destination.Length; i++) + { + destination[i] = PorterDuffFunctions.SubtractClear(background[i], source[i], amount); + } + } + } + else if (Avx2.IsSupported && destination.Length >= 2) { // Divide by 2 as 4 elements per Vector4 and 8 per Vector256 ref Vector256 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); @@ -17476,7 +31267,37 @@ internal static class DefaultPixelBlenders { amount = Numerics.Clamp(amount, 0, 1); - if (Avx2.IsSupported && destination.Length >= 2) + if (Avx512F.IsSupported && destination.Length >= 4) + { + // Divide by 4 as 4 elements per Vector4 and 16 per Vector512 + ref Vector512 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); + ref Vector512 destinationLast = ref Unsafe.Add(ref destinationBase, (uint)destination.Length / 4u); + + ref Vector512 backgroundBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(background)); + Vector512 sourceBase = Vector512.Create( + source.X, source.Y, source.Z, source.W, + source.X, source.Y, source.Z, source.W, + source.X, source.Y, source.Z, source.W, + source.X, source.Y, source.Z, source.W); + Vector512 opacity = Vector512.Create(amount); + + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) + { + destinationBase = PorterDuffFunctions.SubtractClear(backgroundBase, sourceBase, opacity); + destinationBase = ref Unsafe.Add(ref destinationBase, 1); + backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); + } + + int remainder = Numerics.Modulo4(destination.Length); + if (remainder != 0) + { + for (int i = destination.Length - remainder; i < destination.Length; i++) + { + destination[i] = PorterDuffFunctions.SubtractClear(background[i], source, amount); + } + } + } + else if (Avx2.IsSupported && destination.Length >= 2) { // Divide by 2 as 4 elements per Vector4 and 8 per Vector256 ref Vector256 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); @@ -17512,7 +31333,51 @@ internal static class DefaultPixelBlenders /// protected override void BlendFunction(Span destination, ReadOnlySpan background, ReadOnlySpan source, ReadOnlySpan amount) { - if (Avx2.IsSupported && destination.Length >= 2) + if (Avx512F.IsSupported && destination.Length >= 4) + { + // Divide by 4 as 4 elements per Vector4 and 16 per Vector512 + ref Vector512 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); + ref Vector512 destinationLast = ref Unsafe.Add(ref destinationBase, (uint)destination.Length / 4u); + + ref Vector512 backgroundBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(background)); + ref Vector512 sourceBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(source)); + ref float amountBase = ref MemoryMarshal.GetReference(amount); + + Vector512 vOne = Vector512.Create(1F); + + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) + { + float amount0 = amountBase; + float amount1 = Unsafe.Add(ref amountBase, 1); + float amount2 = Unsafe.Add(ref amountBase, 2); + float amount3 = Unsafe.Add(ref amountBase, 3); + + // We need to create a Vector512 containing the current four amount values + // taking up each quarter of the Vector512 and then clamp them. + Vector512 opacity = Vector512.Create( + amount0, amount0, amount0, amount0, + amount1, amount1, amount1, amount1, + amount2, amount2, amount2, amount2, + amount3, amount3, amount3, amount3); + opacity = Vector512.Min(Vector512.Max(Vector512.Zero, opacity), vOne); + + destinationBase = PorterDuffFunctions.SubtractClear(backgroundBase, sourceBase, opacity); + destinationBase = ref Unsafe.Add(ref destinationBase, 1); + backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); + sourceBase = ref Unsafe.Add(ref sourceBase, 1); + amountBase = ref Unsafe.Add(ref amountBase, 4); + } + + int remainder = Numerics.Modulo4(destination.Length); + if (remainder != 0) + { + for (int i = destination.Length - remainder; i < destination.Length; i++) + { + destination[i] = PorterDuffFunctions.SubtractClear(background[i], source[i], Numerics.Clamp(amount[i], 0, 1F)); + } + } + } + else if (Avx2.IsSupported && destination.Length >= 2) { // Divide by 2 as 4 elements per Vector4 and 8 per Vector256 ref Vector256 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); @@ -17559,7 +31424,54 @@ internal static class DefaultPixelBlenders /// protected override void BlendFunction(Span destination, ReadOnlySpan background, Vector4 source, ReadOnlySpan amount) { - if (Avx2.IsSupported && destination.Length >= 2) + if (Avx512F.IsSupported && destination.Length >= 4) + { + // Divide by 4 as 4 elements per Vector4 and 16 per Vector512 + ref Vector512 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); + ref Vector512 destinationLast = ref Unsafe.Add(ref destinationBase, (uint)destination.Length / 4u); + + ref Vector512 backgroundBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(background)); + ref float amountBase = ref MemoryMarshal.GetReference(amount); + + Vector512 sourceBase = Vector512.Create( + source.X, source.Y, source.Z, source.W, + source.X, source.Y, source.Z, source.W, + source.X, source.Y, source.Z, source.W, + source.X, source.Y, source.Z, source.W); + Vector512 vOne = Vector512.Create(1F); + + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) + { + float amount0 = amountBase; + float amount1 = Unsafe.Add(ref amountBase, 1); + float amount2 = Unsafe.Add(ref amountBase, 2); + float amount3 = Unsafe.Add(ref amountBase, 3); + + // We need to create a Vector512 containing the current four amount values + // taking up each quarter of the Vector512 and then clamp them. + Vector512 opacity = Vector512.Create( + amount0, amount0, amount0, amount0, + amount1, amount1, amount1, amount1, + amount2, amount2, amount2, amount2, + amount3, amount3, amount3, amount3); + opacity = Vector512.Min(Vector512.Max(Vector512.Zero, opacity), vOne); + + destinationBase = PorterDuffFunctions.SubtractClear(backgroundBase, sourceBase, opacity); + destinationBase = ref Unsafe.Add(ref destinationBase, 1); + backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); + amountBase = ref Unsafe.Add(ref amountBase, 4); + } + + int remainder = Numerics.Modulo4(destination.Length); + if (remainder != 0) + { + for (int i = destination.Length - remainder; i < destination.Length; i++) + { + destination[i] = PorterDuffFunctions.SubtractClear(background[i], source, Numerics.Clamp(amount[i], 0, 1F)); + } + } + } + else if (Avx2.IsSupported && destination.Length >= 2) { // Divide by 2 as 4 elements per Vector4 and 8 per Vector256 ref Vector256 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); @@ -17624,7 +31536,34 @@ internal static class DefaultPixelBlenders { amount = Numerics.Clamp(amount, 0, 1); - if (Avx2.IsSupported && destination.Length >= 2) + if (Avx512F.IsSupported && destination.Length >= 4) + { + // Divide by 4 as 4 elements per Vector4 and 16 per Vector512 + ref Vector512 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); + ref Vector512 destinationLast = ref Unsafe.Add(ref destinationBase, (uint)destination.Length / 4u); + + ref Vector512 backgroundBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(background)); + ref Vector512 sourceBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(source)); + Vector512 opacity = Vector512.Create(amount); + + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) + { + destinationBase = PorterDuffFunctions.ScreenClear(backgroundBase, sourceBase, opacity); + destinationBase = ref Unsafe.Add(ref destinationBase, 1); + backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); + sourceBase = ref Unsafe.Add(ref sourceBase, 1); + } + + int remainder = Numerics.Modulo4(destination.Length); + if (remainder != 0) + { + for (int i = destination.Length - remainder; i < destination.Length; i++) + { + destination[i] = PorterDuffFunctions.ScreenClear(background[i], source[i], amount); + } + } + } + else if (Avx2.IsSupported && destination.Length >= 2) { // Divide by 2 as 4 elements per Vector4 and 8 per Vector256 ref Vector256 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); @@ -17663,7 +31602,37 @@ internal static class DefaultPixelBlenders { amount = Numerics.Clamp(amount, 0, 1); - if (Avx2.IsSupported && destination.Length >= 2) + if (Avx512F.IsSupported && destination.Length >= 4) + { + // Divide by 4 as 4 elements per Vector4 and 16 per Vector512 + ref Vector512 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); + ref Vector512 destinationLast = ref Unsafe.Add(ref destinationBase, (uint)destination.Length / 4u); + + ref Vector512 backgroundBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(background)); + Vector512 sourceBase = Vector512.Create( + source.X, source.Y, source.Z, source.W, + source.X, source.Y, source.Z, source.W, + source.X, source.Y, source.Z, source.W, + source.X, source.Y, source.Z, source.W); + Vector512 opacity = Vector512.Create(amount); + + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) + { + destinationBase = PorterDuffFunctions.ScreenClear(backgroundBase, sourceBase, opacity); + destinationBase = ref Unsafe.Add(ref destinationBase, 1); + backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); + } + + int remainder = Numerics.Modulo4(destination.Length); + if (remainder != 0) + { + for (int i = destination.Length - remainder; i < destination.Length; i++) + { + destination[i] = PorterDuffFunctions.ScreenClear(background[i], source, amount); + } + } + } + else if (Avx2.IsSupported && destination.Length >= 2) { // Divide by 2 as 4 elements per Vector4 and 8 per Vector256 ref Vector256 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); @@ -17699,7 +31668,51 @@ internal static class DefaultPixelBlenders /// protected override void BlendFunction(Span destination, ReadOnlySpan background, ReadOnlySpan source, ReadOnlySpan amount) { - if (Avx2.IsSupported && destination.Length >= 2) + if (Avx512F.IsSupported && destination.Length >= 4) + { + // Divide by 4 as 4 elements per Vector4 and 16 per Vector512 + ref Vector512 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); + ref Vector512 destinationLast = ref Unsafe.Add(ref destinationBase, (uint)destination.Length / 4u); + + ref Vector512 backgroundBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(background)); + ref Vector512 sourceBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(source)); + ref float amountBase = ref MemoryMarshal.GetReference(amount); + + Vector512 vOne = Vector512.Create(1F); + + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) + { + float amount0 = amountBase; + float amount1 = Unsafe.Add(ref amountBase, 1); + float amount2 = Unsafe.Add(ref amountBase, 2); + float amount3 = Unsafe.Add(ref amountBase, 3); + + // We need to create a Vector512 containing the current four amount values + // taking up each quarter of the Vector512 and then clamp them. + Vector512 opacity = Vector512.Create( + amount0, amount0, amount0, amount0, + amount1, amount1, amount1, amount1, + amount2, amount2, amount2, amount2, + amount3, amount3, amount3, amount3); + opacity = Vector512.Min(Vector512.Max(Vector512.Zero, opacity), vOne); + + destinationBase = PorterDuffFunctions.ScreenClear(backgroundBase, sourceBase, opacity); + destinationBase = ref Unsafe.Add(ref destinationBase, 1); + backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); + sourceBase = ref Unsafe.Add(ref sourceBase, 1); + amountBase = ref Unsafe.Add(ref amountBase, 4); + } + + int remainder = Numerics.Modulo4(destination.Length); + if (remainder != 0) + { + for (int i = destination.Length - remainder; i < destination.Length; i++) + { + destination[i] = PorterDuffFunctions.ScreenClear(background[i], source[i], Numerics.Clamp(amount[i], 0, 1F)); + } + } + } + else if (Avx2.IsSupported && destination.Length >= 2) { // Divide by 2 as 4 elements per Vector4 and 8 per Vector256 ref Vector256 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); @@ -17746,7 +31759,54 @@ internal static class DefaultPixelBlenders /// protected override void BlendFunction(Span destination, ReadOnlySpan background, Vector4 source, ReadOnlySpan amount) { - if (Avx2.IsSupported && destination.Length >= 2) + if (Avx512F.IsSupported && destination.Length >= 4) + { + // Divide by 4 as 4 elements per Vector4 and 16 per Vector512 + ref Vector512 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); + ref Vector512 destinationLast = ref Unsafe.Add(ref destinationBase, (uint)destination.Length / 4u); + + ref Vector512 backgroundBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(background)); + ref float amountBase = ref MemoryMarshal.GetReference(amount); + + Vector512 sourceBase = Vector512.Create( + source.X, source.Y, source.Z, source.W, + source.X, source.Y, source.Z, source.W, + source.X, source.Y, source.Z, source.W, + source.X, source.Y, source.Z, source.W); + Vector512 vOne = Vector512.Create(1F); + + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) + { + float amount0 = amountBase; + float amount1 = Unsafe.Add(ref amountBase, 1); + float amount2 = Unsafe.Add(ref amountBase, 2); + float amount3 = Unsafe.Add(ref amountBase, 3); + + // We need to create a Vector512 containing the current four amount values + // taking up each quarter of the Vector512 and then clamp them. + Vector512 opacity = Vector512.Create( + amount0, amount0, amount0, amount0, + amount1, amount1, amount1, amount1, + amount2, amount2, amount2, amount2, + amount3, amount3, amount3, amount3); + opacity = Vector512.Min(Vector512.Max(Vector512.Zero, opacity), vOne); + + destinationBase = PorterDuffFunctions.ScreenClear(backgroundBase, sourceBase, opacity); + destinationBase = ref Unsafe.Add(ref destinationBase, 1); + backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); + amountBase = ref Unsafe.Add(ref amountBase, 4); + } + + int remainder = Numerics.Modulo4(destination.Length); + if (remainder != 0) + { + for (int i = destination.Length - remainder; i < destination.Length; i++) + { + destination[i] = PorterDuffFunctions.ScreenClear(background[i], source, Numerics.Clamp(amount[i], 0, 1F)); + } + } + } + else if (Avx2.IsSupported && destination.Length >= 2) { // Divide by 2 as 4 elements per Vector4 and 8 per Vector256 ref Vector256 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); @@ -17811,7 +31871,34 @@ internal static class DefaultPixelBlenders { amount = Numerics.Clamp(amount, 0, 1); - if (Avx2.IsSupported && destination.Length >= 2) + if (Avx512F.IsSupported && destination.Length >= 4) + { + // Divide by 4 as 4 elements per Vector4 and 16 per Vector512 + ref Vector512 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); + ref Vector512 destinationLast = ref Unsafe.Add(ref destinationBase, (uint)destination.Length / 4u); + + ref Vector512 backgroundBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(background)); + ref Vector512 sourceBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(source)); + Vector512 opacity = Vector512.Create(amount); + + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) + { + destinationBase = PorterDuffFunctions.DarkenClear(backgroundBase, sourceBase, opacity); + destinationBase = ref Unsafe.Add(ref destinationBase, 1); + backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); + sourceBase = ref Unsafe.Add(ref sourceBase, 1); + } + + int remainder = Numerics.Modulo4(destination.Length); + if (remainder != 0) + { + for (int i = destination.Length - remainder; i < destination.Length; i++) + { + destination[i] = PorterDuffFunctions.DarkenClear(background[i], source[i], amount); + } + } + } + else if (Avx2.IsSupported && destination.Length >= 2) { // Divide by 2 as 4 elements per Vector4 and 8 per Vector256 ref Vector256 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); @@ -17850,7 +31937,37 @@ internal static class DefaultPixelBlenders { amount = Numerics.Clamp(amount, 0, 1); - if (Avx2.IsSupported && destination.Length >= 2) + if (Avx512F.IsSupported && destination.Length >= 4) + { + // Divide by 4 as 4 elements per Vector4 and 16 per Vector512 + ref Vector512 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); + ref Vector512 destinationLast = ref Unsafe.Add(ref destinationBase, (uint)destination.Length / 4u); + + ref Vector512 backgroundBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(background)); + Vector512 sourceBase = Vector512.Create( + source.X, source.Y, source.Z, source.W, + source.X, source.Y, source.Z, source.W, + source.X, source.Y, source.Z, source.W, + source.X, source.Y, source.Z, source.W); + Vector512 opacity = Vector512.Create(amount); + + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) + { + destinationBase = PorterDuffFunctions.DarkenClear(backgroundBase, sourceBase, opacity); + destinationBase = ref Unsafe.Add(ref destinationBase, 1); + backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); + } + + int remainder = Numerics.Modulo4(destination.Length); + if (remainder != 0) + { + for (int i = destination.Length - remainder; i < destination.Length; i++) + { + destination[i] = PorterDuffFunctions.DarkenClear(background[i], source, amount); + } + } + } + else if (Avx2.IsSupported && destination.Length >= 2) { // Divide by 2 as 4 elements per Vector4 and 8 per Vector256 ref Vector256 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); @@ -17886,7 +32003,51 @@ internal static class DefaultPixelBlenders /// protected override void BlendFunction(Span destination, ReadOnlySpan background, ReadOnlySpan source, ReadOnlySpan amount) { - if (Avx2.IsSupported && destination.Length >= 2) + if (Avx512F.IsSupported && destination.Length >= 4) + { + // Divide by 4 as 4 elements per Vector4 and 16 per Vector512 + ref Vector512 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); + ref Vector512 destinationLast = ref Unsafe.Add(ref destinationBase, (uint)destination.Length / 4u); + + ref Vector512 backgroundBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(background)); + ref Vector512 sourceBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(source)); + ref float amountBase = ref MemoryMarshal.GetReference(amount); + + Vector512 vOne = Vector512.Create(1F); + + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) + { + float amount0 = amountBase; + float amount1 = Unsafe.Add(ref amountBase, 1); + float amount2 = Unsafe.Add(ref amountBase, 2); + float amount3 = Unsafe.Add(ref amountBase, 3); + + // We need to create a Vector512 containing the current four amount values + // taking up each quarter of the Vector512 and then clamp them. + Vector512 opacity = Vector512.Create( + amount0, amount0, amount0, amount0, + amount1, amount1, amount1, amount1, + amount2, amount2, amount2, amount2, + amount3, amount3, amount3, amount3); + opacity = Vector512.Min(Vector512.Max(Vector512.Zero, opacity), vOne); + + destinationBase = PorterDuffFunctions.DarkenClear(backgroundBase, sourceBase, opacity); + destinationBase = ref Unsafe.Add(ref destinationBase, 1); + backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); + sourceBase = ref Unsafe.Add(ref sourceBase, 1); + amountBase = ref Unsafe.Add(ref amountBase, 4); + } + + int remainder = Numerics.Modulo4(destination.Length); + if (remainder != 0) + { + for (int i = destination.Length - remainder; i < destination.Length; i++) + { + destination[i] = PorterDuffFunctions.DarkenClear(background[i], source[i], Numerics.Clamp(amount[i], 0, 1F)); + } + } + } + else if (Avx2.IsSupported && destination.Length >= 2) { // Divide by 2 as 4 elements per Vector4 and 8 per Vector256 ref Vector256 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); @@ -17920,20 +32081,67 @@ internal static class DefaultPixelBlenders int i = destination.Length - 1; destination[i] = PorterDuffFunctions.DarkenClear(background[i], source[i], Numerics.Clamp(amount[i], 0, 1F)); } - } - else - { - for (int i = 0; i < destination.Length; i++) + } + else + { + for (int i = 0; i < destination.Length; i++) + { + destination[i] = PorterDuffFunctions.DarkenClear(background[i], source[i], Numerics.Clamp(amount[i], 0, 1F)); + } + } + } + + /// + protected override void BlendFunction(Span destination, ReadOnlySpan background, Vector4 source, ReadOnlySpan amount) + { + if (Avx512F.IsSupported && destination.Length >= 4) + { + // Divide by 4 as 4 elements per Vector4 and 16 per Vector512 + ref Vector512 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); + ref Vector512 destinationLast = ref Unsafe.Add(ref destinationBase, (uint)destination.Length / 4u); + + ref Vector512 backgroundBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(background)); + ref float amountBase = ref MemoryMarshal.GetReference(amount); + + Vector512 sourceBase = Vector512.Create( + source.X, source.Y, source.Z, source.W, + source.X, source.Y, source.Z, source.W, + source.X, source.Y, source.Z, source.W, + source.X, source.Y, source.Z, source.W); + Vector512 vOne = Vector512.Create(1F); + + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) + { + float amount0 = amountBase; + float amount1 = Unsafe.Add(ref amountBase, 1); + float amount2 = Unsafe.Add(ref amountBase, 2); + float amount3 = Unsafe.Add(ref amountBase, 3); + + // We need to create a Vector512 containing the current four amount values + // taking up each quarter of the Vector512 and then clamp them. + Vector512 opacity = Vector512.Create( + amount0, amount0, amount0, amount0, + amount1, amount1, amount1, amount1, + amount2, amount2, amount2, amount2, + amount3, amount3, amount3, amount3); + opacity = Vector512.Min(Vector512.Max(Vector512.Zero, opacity), vOne); + + destinationBase = PorterDuffFunctions.DarkenClear(backgroundBase, sourceBase, opacity); + destinationBase = ref Unsafe.Add(ref destinationBase, 1); + backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); + amountBase = ref Unsafe.Add(ref amountBase, 4); + } + + int remainder = Numerics.Modulo4(destination.Length); + if (remainder != 0) { - destination[i] = PorterDuffFunctions.DarkenClear(background[i], source[i], Numerics.Clamp(amount[i], 0, 1F)); + for (int i = destination.Length - remainder; i < destination.Length; i++) + { + destination[i] = PorterDuffFunctions.DarkenClear(background[i], source, Numerics.Clamp(amount[i], 0, 1F)); + } } } - } - - /// - protected override void BlendFunction(Span destination, ReadOnlySpan background, Vector4 source, ReadOnlySpan amount) - { - if (Avx2.IsSupported && destination.Length >= 2) + else if (Avx2.IsSupported && destination.Length >= 2) { // Divide by 2 as 4 elements per Vector4 and 8 per Vector256 ref Vector256 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); @@ -17998,7 +32206,34 @@ internal static class DefaultPixelBlenders { amount = Numerics.Clamp(amount, 0, 1); - if (Avx2.IsSupported && destination.Length >= 2) + if (Avx512F.IsSupported && destination.Length >= 4) + { + // Divide by 4 as 4 elements per Vector4 and 16 per Vector512 + ref Vector512 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); + ref Vector512 destinationLast = ref Unsafe.Add(ref destinationBase, (uint)destination.Length / 4u); + + ref Vector512 backgroundBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(background)); + ref Vector512 sourceBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(source)); + Vector512 opacity = Vector512.Create(amount); + + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) + { + destinationBase = PorterDuffFunctions.LightenClear(backgroundBase, sourceBase, opacity); + destinationBase = ref Unsafe.Add(ref destinationBase, 1); + backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); + sourceBase = ref Unsafe.Add(ref sourceBase, 1); + } + + int remainder = Numerics.Modulo4(destination.Length); + if (remainder != 0) + { + for (int i = destination.Length - remainder; i < destination.Length; i++) + { + destination[i] = PorterDuffFunctions.LightenClear(background[i], source[i], amount); + } + } + } + else if (Avx2.IsSupported && destination.Length >= 2) { // Divide by 2 as 4 elements per Vector4 and 8 per Vector256 ref Vector256 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); @@ -18037,7 +32272,37 @@ internal static class DefaultPixelBlenders { amount = Numerics.Clamp(amount, 0, 1); - if (Avx2.IsSupported && destination.Length >= 2) + if (Avx512F.IsSupported && destination.Length >= 4) + { + // Divide by 4 as 4 elements per Vector4 and 16 per Vector512 + ref Vector512 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); + ref Vector512 destinationLast = ref Unsafe.Add(ref destinationBase, (uint)destination.Length / 4u); + + ref Vector512 backgroundBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(background)); + Vector512 sourceBase = Vector512.Create( + source.X, source.Y, source.Z, source.W, + source.X, source.Y, source.Z, source.W, + source.X, source.Y, source.Z, source.W, + source.X, source.Y, source.Z, source.W); + Vector512 opacity = Vector512.Create(amount); + + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) + { + destinationBase = PorterDuffFunctions.LightenClear(backgroundBase, sourceBase, opacity); + destinationBase = ref Unsafe.Add(ref destinationBase, 1); + backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); + } + + int remainder = Numerics.Modulo4(destination.Length); + if (remainder != 0) + { + for (int i = destination.Length - remainder; i < destination.Length; i++) + { + destination[i] = PorterDuffFunctions.LightenClear(background[i], source, amount); + } + } + } + else if (Avx2.IsSupported && destination.Length >= 2) { // Divide by 2 as 4 elements per Vector4 and 8 per Vector256 ref Vector256 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); @@ -18073,7 +32338,51 @@ internal static class DefaultPixelBlenders /// protected override void BlendFunction(Span destination, ReadOnlySpan background, ReadOnlySpan source, ReadOnlySpan amount) { - if (Avx2.IsSupported && destination.Length >= 2) + if (Avx512F.IsSupported && destination.Length >= 4) + { + // Divide by 4 as 4 elements per Vector4 and 16 per Vector512 + ref Vector512 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); + ref Vector512 destinationLast = ref Unsafe.Add(ref destinationBase, (uint)destination.Length / 4u); + + ref Vector512 backgroundBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(background)); + ref Vector512 sourceBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(source)); + ref float amountBase = ref MemoryMarshal.GetReference(amount); + + Vector512 vOne = Vector512.Create(1F); + + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) + { + float amount0 = amountBase; + float amount1 = Unsafe.Add(ref amountBase, 1); + float amount2 = Unsafe.Add(ref amountBase, 2); + float amount3 = Unsafe.Add(ref amountBase, 3); + + // We need to create a Vector512 containing the current four amount values + // taking up each quarter of the Vector512 and then clamp them. + Vector512 opacity = Vector512.Create( + amount0, amount0, amount0, amount0, + amount1, amount1, amount1, amount1, + amount2, amount2, amount2, amount2, + amount3, amount3, amount3, amount3); + opacity = Vector512.Min(Vector512.Max(Vector512.Zero, opacity), vOne); + + destinationBase = PorterDuffFunctions.LightenClear(backgroundBase, sourceBase, opacity); + destinationBase = ref Unsafe.Add(ref destinationBase, 1); + backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); + sourceBase = ref Unsafe.Add(ref sourceBase, 1); + amountBase = ref Unsafe.Add(ref amountBase, 4); + } + + int remainder = Numerics.Modulo4(destination.Length); + if (remainder != 0) + { + for (int i = destination.Length - remainder; i < destination.Length; i++) + { + destination[i] = PorterDuffFunctions.LightenClear(background[i], source[i], Numerics.Clamp(amount[i], 0, 1F)); + } + } + } + else if (Avx2.IsSupported && destination.Length >= 2) { // Divide by 2 as 4 elements per Vector4 and 8 per Vector256 ref Vector256 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); @@ -18120,7 +32429,54 @@ internal static class DefaultPixelBlenders /// protected override void BlendFunction(Span destination, ReadOnlySpan background, Vector4 source, ReadOnlySpan amount) { - if (Avx2.IsSupported && destination.Length >= 2) + if (Avx512F.IsSupported && destination.Length >= 4) + { + // Divide by 4 as 4 elements per Vector4 and 16 per Vector512 + ref Vector512 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); + ref Vector512 destinationLast = ref Unsafe.Add(ref destinationBase, (uint)destination.Length / 4u); + + ref Vector512 backgroundBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(background)); + ref float amountBase = ref MemoryMarshal.GetReference(amount); + + Vector512 sourceBase = Vector512.Create( + source.X, source.Y, source.Z, source.W, + source.X, source.Y, source.Z, source.W, + source.X, source.Y, source.Z, source.W, + source.X, source.Y, source.Z, source.W); + Vector512 vOne = Vector512.Create(1F); + + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) + { + float amount0 = amountBase; + float amount1 = Unsafe.Add(ref amountBase, 1); + float amount2 = Unsafe.Add(ref amountBase, 2); + float amount3 = Unsafe.Add(ref amountBase, 3); + + // We need to create a Vector512 containing the current four amount values + // taking up each quarter of the Vector512 and then clamp them. + Vector512 opacity = Vector512.Create( + amount0, amount0, amount0, amount0, + amount1, amount1, amount1, amount1, + amount2, amount2, amount2, amount2, + amount3, amount3, amount3, amount3); + opacity = Vector512.Min(Vector512.Max(Vector512.Zero, opacity), vOne); + + destinationBase = PorterDuffFunctions.LightenClear(backgroundBase, sourceBase, opacity); + destinationBase = ref Unsafe.Add(ref destinationBase, 1); + backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); + amountBase = ref Unsafe.Add(ref amountBase, 4); + } + + int remainder = Numerics.Modulo4(destination.Length); + if (remainder != 0) + { + for (int i = destination.Length - remainder; i < destination.Length; i++) + { + destination[i] = PorterDuffFunctions.LightenClear(background[i], source, Numerics.Clamp(amount[i], 0, 1F)); + } + } + } + else if (Avx2.IsSupported && destination.Length >= 2) { // Divide by 2 as 4 elements per Vector4 and 8 per Vector256 ref Vector256 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); @@ -18185,7 +32541,34 @@ internal static class DefaultPixelBlenders { amount = Numerics.Clamp(amount, 0, 1); - if (Avx2.IsSupported && destination.Length >= 2) + if (Avx512F.IsSupported && destination.Length >= 4) + { + // Divide by 4 as 4 elements per Vector4 and 16 per Vector512 + ref Vector512 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); + ref Vector512 destinationLast = ref Unsafe.Add(ref destinationBase, (uint)destination.Length / 4u); + + ref Vector512 backgroundBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(background)); + ref Vector512 sourceBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(source)); + Vector512 opacity = Vector512.Create(amount); + + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) + { + destinationBase = PorterDuffFunctions.OverlayClear(backgroundBase, sourceBase, opacity); + destinationBase = ref Unsafe.Add(ref destinationBase, 1); + backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); + sourceBase = ref Unsafe.Add(ref sourceBase, 1); + } + + int remainder = Numerics.Modulo4(destination.Length); + if (remainder != 0) + { + for (int i = destination.Length - remainder; i < destination.Length; i++) + { + destination[i] = PorterDuffFunctions.OverlayClear(background[i], source[i], amount); + } + } + } + else if (Avx2.IsSupported && destination.Length >= 2) { // Divide by 2 as 4 elements per Vector4 and 8 per Vector256 ref Vector256 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); @@ -18224,7 +32607,37 @@ internal static class DefaultPixelBlenders { amount = Numerics.Clamp(amount, 0, 1); - if (Avx2.IsSupported && destination.Length >= 2) + if (Avx512F.IsSupported && destination.Length >= 4) + { + // Divide by 4 as 4 elements per Vector4 and 16 per Vector512 + ref Vector512 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); + ref Vector512 destinationLast = ref Unsafe.Add(ref destinationBase, (uint)destination.Length / 4u); + + ref Vector512 backgroundBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(background)); + Vector512 sourceBase = Vector512.Create( + source.X, source.Y, source.Z, source.W, + source.X, source.Y, source.Z, source.W, + source.X, source.Y, source.Z, source.W, + source.X, source.Y, source.Z, source.W); + Vector512 opacity = Vector512.Create(amount); + + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) + { + destinationBase = PorterDuffFunctions.OverlayClear(backgroundBase, sourceBase, opacity); + destinationBase = ref Unsafe.Add(ref destinationBase, 1); + backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); + } + + int remainder = Numerics.Modulo4(destination.Length); + if (remainder != 0) + { + for (int i = destination.Length - remainder; i < destination.Length; i++) + { + destination[i] = PorterDuffFunctions.OverlayClear(background[i], source, amount); + } + } + } + else if (Avx2.IsSupported && destination.Length >= 2) { // Divide by 2 as 4 elements per Vector4 and 8 per Vector256 ref Vector256 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); @@ -18260,7 +32673,51 @@ internal static class DefaultPixelBlenders /// protected override void BlendFunction(Span destination, ReadOnlySpan background, ReadOnlySpan source, ReadOnlySpan amount) { - if (Avx2.IsSupported && destination.Length >= 2) + if (Avx512F.IsSupported && destination.Length >= 4) + { + // Divide by 4 as 4 elements per Vector4 and 16 per Vector512 + ref Vector512 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); + ref Vector512 destinationLast = ref Unsafe.Add(ref destinationBase, (uint)destination.Length / 4u); + + ref Vector512 backgroundBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(background)); + ref Vector512 sourceBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(source)); + ref float amountBase = ref MemoryMarshal.GetReference(amount); + + Vector512 vOne = Vector512.Create(1F); + + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) + { + float amount0 = amountBase; + float amount1 = Unsafe.Add(ref amountBase, 1); + float amount2 = Unsafe.Add(ref amountBase, 2); + float amount3 = Unsafe.Add(ref amountBase, 3); + + // We need to create a Vector512 containing the current four amount values + // taking up each quarter of the Vector512 and then clamp them. + Vector512 opacity = Vector512.Create( + amount0, amount0, amount0, amount0, + amount1, amount1, amount1, amount1, + amount2, amount2, amount2, amount2, + amount3, amount3, amount3, amount3); + opacity = Vector512.Min(Vector512.Max(Vector512.Zero, opacity), vOne); + + destinationBase = PorterDuffFunctions.OverlayClear(backgroundBase, sourceBase, opacity); + destinationBase = ref Unsafe.Add(ref destinationBase, 1); + backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); + sourceBase = ref Unsafe.Add(ref sourceBase, 1); + amountBase = ref Unsafe.Add(ref amountBase, 4); + } + + int remainder = Numerics.Modulo4(destination.Length); + if (remainder != 0) + { + for (int i = destination.Length - remainder; i < destination.Length; i++) + { + destination[i] = PorterDuffFunctions.OverlayClear(background[i], source[i], Numerics.Clamp(amount[i], 0, 1F)); + } + } + } + else if (Avx2.IsSupported && destination.Length >= 2) { // Divide by 2 as 4 elements per Vector4 and 8 per Vector256 ref Vector256 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); @@ -18307,7 +32764,54 @@ internal static class DefaultPixelBlenders /// protected override void BlendFunction(Span destination, ReadOnlySpan background, Vector4 source, ReadOnlySpan amount) { - if (Avx2.IsSupported && destination.Length >= 2) + if (Avx512F.IsSupported && destination.Length >= 4) + { + // Divide by 4 as 4 elements per Vector4 and 16 per Vector512 + ref Vector512 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); + ref Vector512 destinationLast = ref Unsafe.Add(ref destinationBase, (uint)destination.Length / 4u); + + ref Vector512 backgroundBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(background)); + ref float amountBase = ref MemoryMarshal.GetReference(amount); + + Vector512 sourceBase = Vector512.Create( + source.X, source.Y, source.Z, source.W, + source.X, source.Y, source.Z, source.W, + source.X, source.Y, source.Z, source.W, + source.X, source.Y, source.Z, source.W); + Vector512 vOne = Vector512.Create(1F); + + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) + { + float amount0 = amountBase; + float amount1 = Unsafe.Add(ref amountBase, 1); + float amount2 = Unsafe.Add(ref amountBase, 2); + float amount3 = Unsafe.Add(ref amountBase, 3); + + // We need to create a Vector512 containing the current four amount values + // taking up each quarter of the Vector512 and then clamp them. + Vector512 opacity = Vector512.Create( + amount0, amount0, amount0, amount0, + amount1, amount1, amount1, amount1, + amount2, amount2, amount2, amount2, + amount3, amount3, amount3, amount3); + opacity = Vector512.Min(Vector512.Max(Vector512.Zero, opacity), vOne); + + destinationBase = PorterDuffFunctions.OverlayClear(backgroundBase, sourceBase, opacity); + destinationBase = ref Unsafe.Add(ref destinationBase, 1); + backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); + amountBase = ref Unsafe.Add(ref amountBase, 4); + } + + int remainder = Numerics.Modulo4(destination.Length); + if (remainder != 0) + { + for (int i = destination.Length - remainder; i < destination.Length; i++) + { + destination[i] = PorterDuffFunctions.OverlayClear(background[i], source, Numerics.Clamp(amount[i], 0, 1F)); + } + } + } + else if (Avx2.IsSupported && destination.Length >= 2) { // Divide by 2 as 4 elements per Vector4 and 8 per Vector256 ref Vector256 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); @@ -18372,7 +32876,34 @@ internal static class DefaultPixelBlenders { amount = Numerics.Clamp(amount, 0, 1); - if (Avx2.IsSupported && destination.Length >= 2) + if (Avx512F.IsSupported && destination.Length >= 4) + { + // Divide by 4 as 4 elements per Vector4 and 16 per Vector512 + ref Vector512 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); + ref Vector512 destinationLast = ref Unsafe.Add(ref destinationBase, (uint)destination.Length / 4u); + + ref Vector512 backgroundBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(background)); + ref Vector512 sourceBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(source)); + Vector512 opacity = Vector512.Create(amount); + + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) + { + destinationBase = PorterDuffFunctions.HardLightClear(backgroundBase, sourceBase, opacity); + destinationBase = ref Unsafe.Add(ref destinationBase, 1); + backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); + sourceBase = ref Unsafe.Add(ref sourceBase, 1); + } + + int remainder = Numerics.Modulo4(destination.Length); + if (remainder != 0) + { + for (int i = destination.Length - remainder; i < destination.Length; i++) + { + destination[i] = PorterDuffFunctions.HardLightClear(background[i], source[i], amount); + } + } + } + else if (Avx2.IsSupported && destination.Length >= 2) { // Divide by 2 as 4 elements per Vector4 and 8 per Vector256 ref Vector256 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); @@ -18411,7 +32942,37 @@ internal static class DefaultPixelBlenders { amount = Numerics.Clamp(amount, 0, 1); - if (Avx2.IsSupported && destination.Length >= 2) + if (Avx512F.IsSupported && destination.Length >= 4) + { + // Divide by 4 as 4 elements per Vector4 and 16 per Vector512 + ref Vector512 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); + ref Vector512 destinationLast = ref Unsafe.Add(ref destinationBase, (uint)destination.Length / 4u); + + ref Vector512 backgroundBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(background)); + Vector512 sourceBase = Vector512.Create( + source.X, source.Y, source.Z, source.W, + source.X, source.Y, source.Z, source.W, + source.X, source.Y, source.Z, source.W, + source.X, source.Y, source.Z, source.W); + Vector512 opacity = Vector512.Create(amount); + + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) + { + destinationBase = PorterDuffFunctions.HardLightClear(backgroundBase, sourceBase, opacity); + destinationBase = ref Unsafe.Add(ref destinationBase, 1); + backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); + } + + int remainder = Numerics.Modulo4(destination.Length); + if (remainder != 0) + { + for (int i = destination.Length - remainder; i < destination.Length; i++) + { + destination[i] = PorterDuffFunctions.HardLightClear(background[i], source, amount); + } + } + } + else if (Avx2.IsSupported && destination.Length >= 2) { // Divide by 2 as 4 elements per Vector4 and 8 per Vector256 ref Vector256 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); @@ -18447,7 +33008,51 @@ internal static class DefaultPixelBlenders /// protected override void BlendFunction(Span destination, ReadOnlySpan background, ReadOnlySpan source, ReadOnlySpan amount) { - if (Avx2.IsSupported && destination.Length >= 2) + if (Avx512F.IsSupported && destination.Length >= 4) + { + // Divide by 4 as 4 elements per Vector4 and 16 per Vector512 + ref Vector512 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); + ref Vector512 destinationLast = ref Unsafe.Add(ref destinationBase, (uint)destination.Length / 4u); + + ref Vector512 backgroundBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(background)); + ref Vector512 sourceBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(source)); + ref float amountBase = ref MemoryMarshal.GetReference(amount); + + Vector512 vOne = Vector512.Create(1F); + + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) + { + float amount0 = amountBase; + float amount1 = Unsafe.Add(ref amountBase, 1); + float amount2 = Unsafe.Add(ref amountBase, 2); + float amount3 = Unsafe.Add(ref amountBase, 3); + + // We need to create a Vector512 containing the current four amount values + // taking up each quarter of the Vector512 and then clamp them. + Vector512 opacity = Vector512.Create( + amount0, amount0, amount0, amount0, + amount1, amount1, amount1, amount1, + amount2, amount2, amount2, amount2, + amount3, amount3, amount3, amount3); + opacity = Vector512.Min(Vector512.Max(Vector512.Zero, opacity), vOne); + + destinationBase = PorterDuffFunctions.HardLightClear(backgroundBase, sourceBase, opacity); + destinationBase = ref Unsafe.Add(ref destinationBase, 1); + backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); + sourceBase = ref Unsafe.Add(ref sourceBase, 1); + amountBase = ref Unsafe.Add(ref amountBase, 4); + } + + int remainder = Numerics.Modulo4(destination.Length); + if (remainder != 0) + { + for (int i = destination.Length - remainder; i < destination.Length; i++) + { + destination[i] = PorterDuffFunctions.HardLightClear(background[i], source[i], Numerics.Clamp(amount[i], 0, 1F)); + } + } + } + else if (Avx2.IsSupported && destination.Length >= 2) { // Divide by 2 as 4 elements per Vector4 and 8 per Vector256 ref Vector256 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); @@ -18494,7 +33099,54 @@ internal static class DefaultPixelBlenders /// protected override void BlendFunction(Span destination, ReadOnlySpan background, Vector4 source, ReadOnlySpan amount) { - if (Avx2.IsSupported && destination.Length >= 2) + if (Avx512F.IsSupported && destination.Length >= 4) + { + // Divide by 4 as 4 elements per Vector4 and 16 per Vector512 + ref Vector512 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); + ref Vector512 destinationLast = ref Unsafe.Add(ref destinationBase, (uint)destination.Length / 4u); + + ref Vector512 backgroundBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(background)); + ref float amountBase = ref MemoryMarshal.GetReference(amount); + + Vector512 sourceBase = Vector512.Create( + source.X, source.Y, source.Z, source.W, + source.X, source.Y, source.Z, source.W, + source.X, source.Y, source.Z, source.W, + source.X, source.Y, source.Z, source.W); + Vector512 vOne = Vector512.Create(1F); + + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) + { + float amount0 = amountBase; + float amount1 = Unsafe.Add(ref amountBase, 1); + float amount2 = Unsafe.Add(ref amountBase, 2); + float amount3 = Unsafe.Add(ref amountBase, 3); + + // We need to create a Vector512 containing the current four amount values + // taking up each quarter of the Vector512 and then clamp them. + Vector512 opacity = Vector512.Create( + amount0, amount0, amount0, amount0, + amount1, amount1, amount1, amount1, + amount2, amount2, amount2, amount2, + amount3, amount3, amount3, amount3); + opacity = Vector512.Min(Vector512.Max(Vector512.Zero, opacity), vOne); + + destinationBase = PorterDuffFunctions.HardLightClear(backgroundBase, sourceBase, opacity); + destinationBase = ref Unsafe.Add(ref destinationBase, 1); + backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); + amountBase = ref Unsafe.Add(ref amountBase, 4); + } + + int remainder = Numerics.Modulo4(destination.Length); + if (remainder != 0) + { + for (int i = destination.Length - remainder; i < destination.Length; i++) + { + destination[i] = PorterDuffFunctions.HardLightClear(background[i], source, Numerics.Clamp(amount[i], 0, 1F)); + } + } + } + else if (Avx2.IsSupported && destination.Length >= 2) { // Divide by 2 as 4 elements per Vector4 and 8 per Vector256 ref Vector256 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); @@ -18559,7 +33211,34 @@ internal static class DefaultPixelBlenders { amount = Numerics.Clamp(amount, 0, 1); - if (Avx2.IsSupported && destination.Length >= 2) + if (Avx512F.IsSupported && destination.Length >= 4) + { + // Divide by 4 as 4 elements per Vector4 and 16 per Vector512 + ref Vector512 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); + ref Vector512 destinationLast = ref Unsafe.Add(ref destinationBase, (uint)destination.Length / 4u); + + ref Vector512 backgroundBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(background)); + ref Vector512 sourceBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(source)); + Vector512 opacity = Vector512.Create(amount); + + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) + { + destinationBase = PorterDuffFunctions.NormalXor(backgroundBase, sourceBase, opacity); + destinationBase = ref Unsafe.Add(ref destinationBase, 1); + backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); + sourceBase = ref Unsafe.Add(ref sourceBase, 1); + } + + int remainder = Numerics.Modulo4(destination.Length); + if (remainder != 0) + { + for (int i = destination.Length - remainder; i < destination.Length; i++) + { + destination[i] = PorterDuffFunctions.NormalXor(background[i], source[i], amount); + } + } + } + else if (Avx2.IsSupported && destination.Length >= 2) { // Divide by 2 as 4 elements per Vector4 and 8 per Vector256 ref Vector256 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); @@ -18598,7 +33277,37 @@ internal static class DefaultPixelBlenders { amount = Numerics.Clamp(amount, 0, 1); - if (Avx2.IsSupported && destination.Length >= 2) + if (Avx512F.IsSupported && destination.Length >= 4) + { + // Divide by 4 as 4 elements per Vector4 and 16 per Vector512 + ref Vector512 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); + ref Vector512 destinationLast = ref Unsafe.Add(ref destinationBase, (uint)destination.Length / 4u); + + ref Vector512 backgroundBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(background)); + Vector512 sourceBase = Vector512.Create( + source.X, source.Y, source.Z, source.W, + source.X, source.Y, source.Z, source.W, + source.X, source.Y, source.Z, source.W, + source.X, source.Y, source.Z, source.W); + Vector512 opacity = Vector512.Create(amount); + + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) + { + destinationBase = PorterDuffFunctions.NormalXor(backgroundBase, sourceBase, opacity); + destinationBase = ref Unsafe.Add(ref destinationBase, 1); + backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); + } + + int remainder = Numerics.Modulo4(destination.Length); + if (remainder != 0) + { + for (int i = destination.Length - remainder; i < destination.Length; i++) + { + destination[i] = PorterDuffFunctions.NormalXor(background[i], source, amount); + } + } + } + else if (Avx2.IsSupported && destination.Length >= 2) { // Divide by 2 as 4 elements per Vector4 and 8 per Vector256 ref Vector256 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); @@ -18634,7 +33343,51 @@ internal static class DefaultPixelBlenders /// protected override void BlendFunction(Span destination, ReadOnlySpan background, ReadOnlySpan source, ReadOnlySpan amount) { - if (Avx2.IsSupported && destination.Length >= 2) + if (Avx512F.IsSupported && destination.Length >= 4) + { + // Divide by 4 as 4 elements per Vector4 and 16 per Vector512 + ref Vector512 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); + ref Vector512 destinationLast = ref Unsafe.Add(ref destinationBase, (uint)destination.Length / 4u); + + ref Vector512 backgroundBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(background)); + ref Vector512 sourceBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(source)); + ref float amountBase = ref MemoryMarshal.GetReference(amount); + + Vector512 vOne = Vector512.Create(1F); + + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) + { + float amount0 = amountBase; + float amount1 = Unsafe.Add(ref amountBase, 1); + float amount2 = Unsafe.Add(ref amountBase, 2); + float amount3 = Unsafe.Add(ref amountBase, 3); + + // We need to create a Vector512 containing the current four amount values + // taking up each quarter of the Vector512 and then clamp them. + Vector512 opacity = Vector512.Create( + amount0, amount0, amount0, amount0, + amount1, amount1, amount1, amount1, + amount2, amount2, amount2, amount2, + amount3, amount3, amount3, amount3); + opacity = Vector512.Min(Vector512.Max(Vector512.Zero, opacity), vOne); + + destinationBase = PorterDuffFunctions.NormalXor(backgroundBase, sourceBase, opacity); + destinationBase = ref Unsafe.Add(ref destinationBase, 1); + backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); + sourceBase = ref Unsafe.Add(ref sourceBase, 1); + amountBase = ref Unsafe.Add(ref amountBase, 4); + } + + int remainder = Numerics.Modulo4(destination.Length); + if (remainder != 0) + { + for (int i = destination.Length - remainder; i < destination.Length; i++) + { + destination[i] = PorterDuffFunctions.NormalXor(background[i], source[i], Numerics.Clamp(amount[i], 0, 1F)); + } + } + } + else if (Avx2.IsSupported && destination.Length >= 2) { // Divide by 2 as 4 elements per Vector4 and 8 per Vector256 ref Vector256 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); @@ -18681,7 +33434,54 @@ internal static class DefaultPixelBlenders /// protected override void BlendFunction(Span destination, ReadOnlySpan background, Vector4 source, ReadOnlySpan amount) { - if (Avx2.IsSupported && destination.Length >= 2) + if (Avx512F.IsSupported && destination.Length >= 4) + { + // Divide by 4 as 4 elements per Vector4 and 16 per Vector512 + ref Vector512 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); + ref Vector512 destinationLast = ref Unsafe.Add(ref destinationBase, (uint)destination.Length / 4u); + + ref Vector512 backgroundBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(background)); + ref float amountBase = ref MemoryMarshal.GetReference(amount); + + Vector512 sourceBase = Vector512.Create( + source.X, source.Y, source.Z, source.W, + source.X, source.Y, source.Z, source.W, + source.X, source.Y, source.Z, source.W, + source.X, source.Y, source.Z, source.W); + Vector512 vOne = Vector512.Create(1F); + + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) + { + float amount0 = amountBase; + float amount1 = Unsafe.Add(ref amountBase, 1); + float amount2 = Unsafe.Add(ref amountBase, 2); + float amount3 = Unsafe.Add(ref amountBase, 3); + + // We need to create a Vector512 containing the current four amount values + // taking up each quarter of the Vector512 and then clamp them. + Vector512 opacity = Vector512.Create( + amount0, amount0, amount0, amount0, + amount1, amount1, amount1, amount1, + amount2, amount2, amount2, amount2, + amount3, amount3, amount3, amount3); + opacity = Vector512.Min(Vector512.Max(Vector512.Zero, opacity), vOne); + + destinationBase = PorterDuffFunctions.NormalXor(backgroundBase, sourceBase, opacity); + destinationBase = ref Unsafe.Add(ref destinationBase, 1); + backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); + amountBase = ref Unsafe.Add(ref amountBase, 4); + } + + int remainder = Numerics.Modulo4(destination.Length); + if (remainder != 0) + { + for (int i = destination.Length - remainder; i < destination.Length; i++) + { + destination[i] = PorterDuffFunctions.NormalXor(background[i], source, Numerics.Clamp(amount[i], 0, 1F)); + } + } + } + else if (Avx2.IsSupported && destination.Length >= 2) { // Divide by 2 as 4 elements per Vector4 and 8 per Vector256 ref Vector256 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); @@ -18746,7 +33546,34 @@ internal static class DefaultPixelBlenders { amount = Numerics.Clamp(amount, 0, 1); - if (Avx2.IsSupported && destination.Length >= 2) + if (Avx512F.IsSupported && destination.Length >= 4) + { + // Divide by 4 as 4 elements per Vector4 and 16 per Vector512 + ref Vector512 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); + ref Vector512 destinationLast = ref Unsafe.Add(ref destinationBase, (uint)destination.Length / 4u); + + ref Vector512 backgroundBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(background)); + ref Vector512 sourceBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(source)); + Vector512 opacity = Vector512.Create(amount); + + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) + { + destinationBase = PorterDuffFunctions.MultiplyXor(backgroundBase, sourceBase, opacity); + destinationBase = ref Unsafe.Add(ref destinationBase, 1); + backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); + sourceBase = ref Unsafe.Add(ref sourceBase, 1); + } + + int remainder = Numerics.Modulo4(destination.Length); + if (remainder != 0) + { + for (int i = destination.Length - remainder; i < destination.Length; i++) + { + destination[i] = PorterDuffFunctions.MultiplyXor(background[i], source[i], amount); + } + } + } + else if (Avx2.IsSupported && destination.Length >= 2) { // Divide by 2 as 4 elements per Vector4 and 8 per Vector256 ref Vector256 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); @@ -18785,7 +33612,37 @@ internal static class DefaultPixelBlenders { amount = Numerics.Clamp(amount, 0, 1); - if (Avx2.IsSupported && destination.Length >= 2) + if (Avx512F.IsSupported && destination.Length >= 4) + { + // Divide by 4 as 4 elements per Vector4 and 16 per Vector512 + ref Vector512 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); + ref Vector512 destinationLast = ref Unsafe.Add(ref destinationBase, (uint)destination.Length / 4u); + + ref Vector512 backgroundBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(background)); + Vector512 sourceBase = Vector512.Create( + source.X, source.Y, source.Z, source.W, + source.X, source.Y, source.Z, source.W, + source.X, source.Y, source.Z, source.W, + source.X, source.Y, source.Z, source.W); + Vector512 opacity = Vector512.Create(amount); + + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) + { + destinationBase = PorterDuffFunctions.MultiplyXor(backgroundBase, sourceBase, opacity); + destinationBase = ref Unsafe.Add(ref destinationBase, 1); + backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); + } + + int remainder = Numerics.Modulo4(destination.Length); + if (remainder != 0) + { + for (int i = destination.Length - remainder; i < destination.Length; i++) + { + destination[i] = PorterDuffFunctions.MultiplyXor(background[i], source, amount); + } + } + } + else if (Avx2.IsSupported && destination.Length >= 2) { // Divide by 2 as 4 elements per Vector4 and 8 per Vector256 ref Vector256 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); @@ -18821,7 +33678,51 @@ internal static class DefaultPixelBlenders /// protected override void BlendFunction(Span destination, ReadOnlySpan background, ReadOnlySpan source, ReadOnlySpan amount) { - if (Avx2.IsSupported && destination.Length >= 2) + if (Avx512F.IsSupported && destination.Length >= 4) + { + // Divide by 4 as 4 elements per Vector4 and 16 per Vector512 + ref Vector512 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); + ref Vector512 destinationLast = ref Unsafe.Add(ref destinationBase, (uint)destination.Length / 4u); + + ref Vector512 backgroundBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(background)); + ref Vector512 sourceBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(source)); + ref float amountBase = ref MemoryMarshal.GetReference(amount); + + Vector512 vOne = Vector512.Create(1F); + + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) + { + float amount0 = amountBase; + float amount1 = Unsafe.Add(ref amountBase, 1); + float amount2 = Unsafe.Add(ref amountBase, 2); + float amount3 = Unsafe.Add(ref amountBase, 3); + + // We need to create a Vector512 containing the current four amount values + // taking up each quarter of the Vector512 and then clamp them. + Vector512 opacity = Vector512.Create( + amount0, amount0, amount0, amount0, + amount1, amount1, amount1, amount1, + amount2, amount2, amount2, amount2, + amount3, amount3, amount3, amount3); + opacity = Vector512.Min(Vector512.Max(Vector512.Zero, opacity), vOne); + + destinationBase = PorterDuffFunctions.MultiplyXor(backgroundBase, sourceBase, opacity); + destinationBase = ref Unsafe.Add(ref destinationBase, 1); + backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); + sourceBase = ref Unsafe.Add(ref sourceBase, 1); + amountBase = ref Unsafe.Add(ref amountBase, 4); + } + + int remainder = Numerics.Modulo4(destination.Length); + if (remainder != 0) + { + for (int i = destination.Length - remainder; i < destination.Length; i++) + { + destination[i] = PorterDuffFunctions.MultiplyXor(background[i], source[i], Numerics.Clamp(amount[i], 0, 1F)); + } + } + } + else if (Avx2.IsSupported && destination.Length >= 2) { // Divide by 2 as 4 elements per Vector4 and 8 per Vector256 ref Vector256 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); @@ -18868,7 +33769,54 @@ internal static class DefaultPixelBlenders /// protected override void BlendFunction(Span destination, ReadOnlySpan background, Vector4 source, ReadOnlySpan amount) { - if (Avx2.IsSupported && destination.Length >= 2) + if (Avx512F.IsSupported && destination.Length >= 4) + { + // Divide by 4 as 4 elements per Vector4 and 16 per Vector512 + ref Vector512 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); + ref Vector512 destinationLast = ref Unsafe.Add(ref destinationBase, (uint)destination.Length / 4u); + + ref Vector512 backgroundBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(background)); + ref float amountBase = ref MemoryMarshal.GetReference(amount); + + Vector512 sourceBase = Vector512.Create( + source.X, source.Y, source.Z, source.W, + source.X, source.Y, source.Z, source.W, + source.X, source.Y, source.Z, source.W, + source.X, source.Y, source.Z, source.W); + Vector512 vOne = Vector512.Create(1F); + + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) + { + float amount0 = amountBase; + float amount1 = Unsafe.Add(ref amountBase, 1); + float amount2 = Unsafe.Add(ref amountBase, 2); + float amount3 = Unsafe.Add(ref amountBase, 3); + + // We need to create a Vector512 containing the current four amount values + // taking up each quarter of the Vector512 and then clamp them. + Vector512 opacity = Vector512.Create( + amount0, amount0, amount0, amount0, + amount1, amount1, amount1, amount1, + amount2, amount2, amount2, amount2, + amount3, amount3, amount3, amount3); + opacity = Vector512.Min(Vector512.Max(Vector512.Zero, opacity), vOne); + + destinationBase = PorterDuffFunctions.MultiplyXor(backgroundBase, sourceBase, opacity); + destinationBase = ref Unsafe.Add(ref destinationBase, 1); + backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); + amountBase = ref Unsafe.Add(ref amountBase, 4); + } + + int remainder = Numerics.Modulo4(destination.Length); + if (remainder != 0) + { + for (int i = destination.Length - remainder; i < destination.Length; i++) + { + destination[i] = PorterDuffFunctions.MultiplyXor(background[i], source, Numerics.Clamp(amount[i], 0, 1F)); + } + } + } + else if (Avx2.IsSupported && destination.Length >= 2) { // Divide by 2 as 4 elements per Vector4 and 8 per Vector256 ref Vector256 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); @@ -18933,7 +33881,34 @@ internal static class DefaultPixelBlenders { amount = Numerics.Clamp(amount, 0, 1); - if (Avx2.IsSupported && destination.Length >= 2) + if (Avx512F.IsSupported && destination.Length >= 4) + { + // Divide by 4 as 4 elements per Vector4 and 16 per Vector512 + ref Vector512 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); + ref Vector512 destinationLast = ref Unsafe.Add(ref destinationBase, (uint)destination.Length / 4u); + + ref Vector512 backgroundBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(background)); + ref Vector512 sourceBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(source)); + Vector512 opacity = Vector512.Create(amount); + + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) + { + destinationBase = PorterDuffFunctions.AddXor(backgroundBase, sourceBase, opacity); + destinationBase = ref Unsafe.Add(ref destinationBase, 1); + backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); + sourceBase = ref Unsafe.Add(ref sourceBase, 1); + } + + int remainder = Numerics.Modulo4(destination.Length); + if (remainder != 0) + { + for (int i = destination.Length - remainder; i < destination.Length; i++) + { + destination[i] = PorterDuffFunctions.AddXor(background[i], source[i], amount); + } + } + } + else if (Avx2.IsSupported && destination.Length >= 2) { // Divide by 2 as 4 elements per Vector4 and 8 per Vector256 ref Vector256 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); @@ -18972,7 +33947,37 @@ internal static class DefaultPixelBlenders { amount = Numerics.Clamp(amount, 0, 1); - if (Avx2.IsSupported && destination.Length >= 2) + if (Avx512F.IsSupported && destination.Length >= 4) + { + // Divide by 4 as 4 elements per Vector4 and 16 per Vector512 + ref Vector512 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); + ref Vector512 destinationLast = ref Unsafe.Add(ref destinationBase, (uint)destination.Length / 4u); + + ref Vector512 backgroundBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(background)); + Vector512 sourceBase = Vector512.Create( + source.X, source.Y, source.Z, source.W, + source.X, source.Y, source.Z, source.W, + source.X, source.Y, source.Z, source.W, + source.X, source.Y, source.Z, source.W); + Vector512 opacity = Vector512.Create(amount); + + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) + { + destinationBase = PorterDuffFunctions.AddXor(backgroundBase, sourceBase, opacity); + destinationBase = ref Unsafe.Add(ref destinationBase, 1); + backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); + } + + int remainder = Numerics.Modulo4(destination.Length); + if (remainder != 0) + { + for (int i = destination.Length - remainder; i < destination.Length; i++) + { + destination[i] = PorterDuffFunctions.AddXor(background[i], source, amount); + } + } + } + else if (Avx2.IsSupported && destination.Length >= 2) { // Divide by 2 as 4 elements per Vector4 and 8 per Vector256 ref Vector256 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); @@ -19008,7 +34013,51 @@ internal static class DefaultPixelBlenders /// protected override void BlendFunction(Span destination, ReadOnlySpan background, ReadOnlySpan source, ReadOnlySpan amount) { - if (Avx2.IsSupported && destination.Length >= 2) + if (Avx512F.IsSupported && destination.Length >= 4) + { + // Divide by 4 as 4 elements per Vector4 and 16 per Vector512 + ref Vector512 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); + ref Vector512 destinationLast = ref Unsafe.Add(ref destinationBase, (uint)destination.Length / 4u); + + ref Vector512 backgroundBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(background)); + ref Vector512 sourceBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(source)); + ref float amountBase = ref MemoryMarshal.GetReference(amount); + + Vector512 vOne = Vector512.Create(1F); + + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) + { + float amount0 = amountBase; + float amount1 = Unsafe.Add(ref amountBase, 1); + float amount2 = Unsafe.Add(ref amountBase, 2); + float amount3 = Unsafe.Add(ref amountBase, 3); + + // We need to create a Vector512 containing the current four amount values + // taking up each quarter of the Vector512 and then clamp them. + Vector512 opacity = Vector512.Create( + amount0, amount0, amount0, amount0, + amount1, amount1, amount1, amount1, + amount2, amount2, amount2, amount2, + amount3, amount3, amount3, amount3); + opacity = Vector512.Min(Vector512.Max(Vector512.Zero, opacity), vOne); + + destinationBase = PorterDuffFunctions.AddXor(backgroundBase, sourceBase, opacity); + destinationBase = ref Unsafe.Add(ref destinationBase, 1); + backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); + sourceBase = ref Unsafe.Add(ref sourceBase, 1); + amountBase = ref Unsafe.Add(ref amountBase, 4); + } + + int remainder = Numerics.Modulo4(destination.Length); + if (remainder != 0) + { + for (int i = destination.Length - remainder; i < destination.Length; i++) + { + destination[i] = PorterDuffFunctions.AddXor(background[i], source[i], Numerics.Clamp(amount[i], 0, 1F)); + } + } + } + else if (Avx2.IsSupported && destination.Length >= 2) { // Divide by 2 as 4 elements per Vector4 and 8 per Vector256 ref Vector256 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); @@ -19055,7 +34104,54 @@ internal static class DefaultPixelBlenders /// protected override void BlendFunction(Span destination, ReadOnlySpan background, Vector4 source, ReadOnlySpan amount) { - if (Avx2.IsSupported && destination.Length >= 2) + if (Avx512F.IsSupported && destination.Length >= 4) + { + // Divide by 4 as 4 elements per Vector4 and 16 per Vector512 + ref Vector512 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); + ref Vector512 destinationLast = ref Unsafe.Add(ref destinationBase, (uint)destination.Length / 4u); + + ref Vector512 backgroundBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(background)); + ref float amountBase = ref MemoryMarshal.GetReference(amount); + + Vector512 sourceBase = Vector512.Create( + source.X, source.Y, source.Z, source.W, + source.X, source.Y, source.Z, source.W, + source.X, source.Y, source.Z, source.W, + source.X, source.Y, source.Z, source.W); + Vector512 vOne = Vector512.Create(1F); + + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) + { + float amount0 = amountBase; + float amount1 = Unsafe.Add(ref amountBase, 1); + float amount2 = Unsafe.Add(ref amountBase, 2); + float amount3 = Unsafe.Add(ref amountBase, 3); + + // We need to create a Vector512 containing the current four amount values + // taking up each quarter of the Vector512 and then clamp them. + Vector512 opacity = Vector512.Create( + amount0, amount0, amount0, amount0, + amount1, amount1, amount1, amount1, + amount2, amount2, amount2, amount2, + amount3, amount3, amount3, amount3); + opacity = Vector512.Min(Vector512.Max(Vector512.Zero, opacity), vOne); + + destinationBase = PorterDuffFunctions.AddXor(backgroundBase, sourceBase, opacity); + destinationBase = ref Unsafe.Add(ref destinationBase, 1); + backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); + amountBase = ref Unsafe.Add(ref amountBase, 4); + } + + int remainder = Numerics.Modulo4(destination.Length); + if (remainder != 0) + { + for (int i = destination.Length - remainder; i < destination.Length; i++) + { + destination[i] = PorterDuffFunctions.AddXor(background[i], source, Numerics.Clamp(amount[i], 0, 1F)); + } + } + } + else if (Avx2.IsSupported && destination.Length >= 2) { // Divide by 2 as 4 elements per Vector4 and 8 per Vector256 ref Vector256 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); @@ -19120,7 +34216,34 @@ internal static class DefaultPixelBlenders { amount = Numerics.Clamp(amount, 0, 1); - if (Avx2.IsSupported && destination.Length >= 2) + if (Avx512F.IsSupported && destination.Length >= 4) + { + // Divide by 4 as 4 elements per Vector4 and 16 per Vector512 + ref Vector512 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); + ref Vector512 destinationLast = ref Unsafe.Add(ref destinationBase, (uint)destination.Length / 4u); + + ref Vector512 backgroundBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(background)); + ref Vector512 sourceBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(source)); + Vector512 opacity = Vector512.Create(amount); + + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) + { + destinationBase = PorterDuffFunctions.SubtractXor(backgroundBase, sourceBase, opacity); + destinationBase = ref Unsafe.Add(ref destinationBase, 1); + backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); + sourceBase = ref Unsafe.Add(ref sourceBase, 1); + } + + int remainder = Numerics.Modulo4(destination.Length); + if (remainder != 0) + { + for (int i = destination.Length - remainder; i < destination.Length; i++) + { + destination[i] = PorterDuffFunctions.SubtractXor(background[i], source[i], amount); + } + } + } + else if (Avx2.IsSupported && destination.Length >= 2) { // Divide by 2 as 4 elements per Vector4 and 8 per Vector256 ref Vector256 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); @@ -19159,7 +34282,37 @@ internal static class DefaultPixelBlenders { amount = Numerics.Clamp(amount, 0, 1); - if (Avx2.IsSupported && destination.Length >= 2) + if (Avx512F.IsSupported && destination.Length >= 4) + { + // Divide by 4 as 4 elements per Vector4 and 16 per Vector512 + ref Vector512 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); + ref Vector512 destinationLast = ref Unsafe.Add(ref destinationBase, (uint)destination.Length / 4u); + + ref Vector512 backgroundBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(background)); + Vector512 sourceBase = Vector512.Create( + source.X, source.Y, source.Z, source.W, + source.X, source.Y, source.Z, source.W, + source.X, source.Y, source.Z, source.W, + source.X, source.Y, source.Z, source.W); + Vector512 opacity = Vector512.Create(amount); + + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) + { + destinationBase = PorterDuffFunctions.SubtractXor(backgroundBase, sourceBase, opacity); + destinationBase = ref Unsafe.Add(ref destinationBase, 1); + backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); + } + + int remainder = Numerics.Modulo4(destination.Length); + if (remainder != 0) + { + for (int i = destination.Length - remainder; i < destination.Length; i++) + { + destination[i] = PorterDuffFunctions.SubtractXor(background[i], source, amount); + } + } + } + else if (Avx2.IsSupported && destination.Length >= 2) { // Divide by 2 as 4 elements per Vector4 and 8 per Vector256 ref Vector256 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); @@ -19195,7 +34348,51 @@ internal static class DefaultPixelBlenders /// protected override void BlendFunction(Span destination, ReadOnlySpan background, ReadOnlySpan source, ReadOnlySpan amount) { - if (Avx2.IsSupported && destination.Length >= 2) + if (Avx512F.IsSupported && destination.Length >= 4) + { + // Divide by 4 as 4 elements per Vector4 and 16 per Vector512 + ref Vector512 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); + ref Vector512 destinationLast = ref Unsafe.Add(ref destinationBase, (uint)destination.Length / 4u); + + ref Vector512 backgroundBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(background)); + ref Vector512 sourceBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(source)); + ref float amountBase = ref MemoryMarshal.GetReference(amount); + + Vector512 vOne = Vector512.Create(1F); + + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) + { + float amount0 = amountBase; + float amount1 = Unsafe.Add(ref amountBase, 1); + float amount2 = Unsafe.Add(ref amountBase, 2); + float amount3 = Unsafe.Add(ref amountBase, 3); + + // We need to create a Vector512 containing the current four amount values + // taking up each quarter of the Vector512 and then clamp them. + Vector512 opacity = Vector512.Create( + amount0, amount0, amount0, amount0, + amount1, amount1, amount1, amount1, + amount2, amount2, amount2, amount2, + amount3, amount3, amount3, amount3); + opacity = Vector512.Min(Vector512.Max(Vector512.Zero, opacity), vOne); + + destinationBase = PorterDuffFunctions.SubtractXor(backgroundBase, sourceBase, opacity); + destinationBase = ref Unsafe.Add(ref destinationBase, 1); + backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); + sourceBase = ref Unsafe.Add(ref sourceBase, 1); + amountBase = ref Unsafe.Add(ref amountBase, 4); + } + + int remainder = Numerics.Modulo4(destination.Length); + if (remainder != 0) + { + for (int i = destination.Length - remainder; i < destination.Length; i++) + { + destination[i] = PorterDuffFunctions.SubtractXor(background[i], source[i], Numerics.Clamp(amount[i], 0, 1F)); + } + } + } + else if (Avx2.IsSupported && destination.Length >= 2) { // Divide by 2 as 4 elements per Vector4 and 8 per Vector256 ref Vector256 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); @@ -19242,7 +34439,54 @@ internal static class DefaultPixelBlenders /// protected override void BlendFunction(Span destination, ReadOnlySpan background, Vector4 source, ReadOnlySpan amount) { - if (Avx2.IsSupported && destination.Length >= 2) + if (Avx512F.IsSupported && destination.Length >= 4) + { + // Divide by 4 as 4 elements per Vector4 and 16 per Vector512 + ref Vector512 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); + ref Vector512 destinationLast = ref Unsafe.Add(ref destinationBase, (uint)destination.Length / 4u); + + ref Vector512 backgroundBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(background)); + ref float amountBase = ref MemoryMarshal.GetReference(amount); + + Vector512 sourceBase = Vector512.Create( + source.X, source.Y, source.Z, source.W, + source.X, source.Y, source.Z, source.W, + source.X, source.Y, source.Z, source.W, + source.X, source.Y, source.Z, source.W); + Vector512 vOne = Vector512.Create(1F); + + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) + { + float amount0 = amountBase; + float amount1 = Unsafe.Add(ref amountBase, 1); + float amount2 = Unsafe.Add(ref amountBase, 2); + float amount3 = Unsafe.Add(ref amountBase, 3); + + // We need to create a Vector512 containing the current four amount values + // taking up each quarter of the Vector512 and then clamp them. + Vector512 opacity = Vector512.Create( + amount0, amount0, amount0, amount0, + amount1, amount1, amount1, amount1, + amount2, amount2, amount2, amount2, + amount3, amount3, amount3, amount3); + opacity = Vector512.Min(Vector512.Max(Vector512.Zero, opacity), vOne); + + destinationBase = PorterDuffFunctions.SubtractXor(backgroundBase, sourceBase, opacity); + destinationBase = ref Unsafe.Add(ref destinationBase, 1); + backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); + amountBase = ref Unsafe.Add(ref amountBase, 4); + } + + int remainder = Numerics.Modulo4(destination.Length); + if (remainder != 0) + { + for (int i = destination.Length - remainder; i < destination.Length; i++) + { + destination[i] = PorterDuffFunctions.SubtractXor(background[i], source, Numerics.Clamp(amount[i], 0, 1F)); + } + } + } + else if (Avx2.IsSupported && destination.Length >= 2) { // Divide by 2 as 4 elements per Vector4 and 8 per Vector256 ref Vector256 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); @@ -19307,7 +34551,34 @@ internal static class DefaultPixelBlenders { amount = Numerics.Clamp(amount, 0, 1); - if (Avx2.IsSupported && destination.Length >= 2) + if (Avx512F.IsSupported && destination.Length >= 4) + { + // Divide by 4 as 4 elements per Vector4 and 16 per Vector512 + ref Vector512 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); + ref Vector512 destinationLast = ref Unsafe.Add(ref destinationBase, (uint)destination.Length / 4u); + + ref Vector512 backgroundBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(background)); + ref Vector512 sourceBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(source)); + Vector512 opacity = Vector512.Create(amount); + + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) + { + destinationBase = PorterDuffFunctions.ScreenXor(backgroundBase, sourceBase, opacity); + destinationBase = ref Unsafe.Add(ref destinationBase, 1); + backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); + sourceBase = ref Unsafe.Add(ref sourceBase, 1); + } + + int remainder = Numerics.Modulo4(destination.Length); + if (remainder != 0) + { + for (int i = destination.Length - remainder; i < destination.Length; i++) + { + destination[i] = PorterDuffFunctions.ScreenXor(background[i], source[i], amount); + } + } + } + else if (Avx2.IsSupported && destination.Length >= 2) { // Divide by 2 as 4 elements per Vector4 and 8 per Vector256 ref Vector256 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); @@ -19346,7 +34617,37 @@ internal static class DefaultPixelBlenders { amount = Numerics.Clamp(amount, 0, 1); - if (Avx2.IsSupported && destination.Length >= 2) + if (Avx512F.IsSupported && destination.Length >= 4) + { + // Divide by 4 as 4 elements per Vector4 and 16 per Vector512 + ref Vector512 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); + ref Vector512 destinationLast = ref Unsafe.Add(ref destinationBase, (uint)destination.Length / 4u); + + ref Vector512 backgroundBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(background)); + Vector512 sourceBase = Vector512.Create( + source.X, source.Y, source.Z, source.W, + source.X, source.Y, source.Z, source.W, + source.X, source.Y, source.Z, source.W, + source.X, source.Y, source.Z, source.W); + Vector512 opacity = Vector512.Create(amount); + + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) + { + destinationBase = PorterDuffFunctions.ScreenXor(backgroundBase, sourceBase, opacity); + destinationBase = ref Unsafe.Add(ref destinationBase, 1); + backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); + } + + int remainder = Numerics.Modulo4(destination.Length); + if (remainder != 0) + { + for (int i = destination.Length - remainder; i < destination.Length; i++) + { + destination[i] = PorterDuffFunctions.ScreenXor(background[i], source, amount); + } + } + } + else if (Avx2.IsSupported && destination.Length >= 2) { // Divide by 2 as 4 elements per Vector4 and 8 per Vector256 ref Vector256 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); @@ -19382,7 +34683,51 @@ internal static class DefaultPixelBlenders /// protected override void BlendFunction(Span destination, ReadOnlySpan background, ReadOnlySpan source, ReadOnlySpan amount) { - if (Avx2.IsSupported && destination.Length >= 2) + if (Avx512F.IsSupported && destination.Length >= 4) + { + // Divide by 4 as 4 elements per Vector4 and 16 per Vector512 + ref Vector512 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); + ref Vector512 destinationLast = ref Unsafe.Add(ref destinationBase, (uint)destination.Length / 4u); + + ref Vector512 backgroundBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(background)); + ref Vector512 sourceBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(source)); + ref float amountBase = ref MemoryMarshal.GetReference(amount); + + Vector512 vOne = Vector512.Create(1F); + + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) + { + float amount0 = amountBase; + float amount1 = Unsafe.Add(ref amountBase, 1); + float amount2 = Unsafe.Add(ref amountBase, 2); + float amount3 = Unsafe.Add(ref amountBase, 3); + + // We need to create a Vector512 containing the current four amount values + // taking up each quarter of the Vector512 and then clamp them. + Vector512 opacity = Vector512.Create( + amount0, amount0, amount0, amount0, + amount1, amount1, amount1, amount1, + amount2, amount2, amount2, amount2, + amount3, amount3, amount3, amount3); + opacity = Vector512.Min(Vector512.Max(Vector512.Zero, opacity), vOne); + + destinationBase = PorterDuffFunctions.ScreenXor(backgroundBase, sourceBase, opacity); + destinationBase = ref Unsafe.Add(ref destinationBase, 1); + backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); + sourceBase = ref Unsafe.Add(ref sourceBase, 1); + amountBase = ref Unsafe.Add(ref amountBase, 4); + } + + int remainder = Numerics.Modulo4(destination.Length); + if (remainder != 0) + { + for (int i = destination.Length - remainder; i < destination.Length; i++) + { + destination[i] = PorterDuffFunctions.ScreenXor(background[i], source[i], Numerics.Clamp(amount[i], 0, 1F)); + } + } + } + else if (Avx2.IsSupported && destination.Length >= 2) { // Divide by 2 as 4 elements per Vector4 and 8 per Vector256 ref Vector256 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); @@ -19421,15 +34766,62 @@ internal static class DefaultPixelBlenders { for (int i = 0; i < destination.Length; i++) { - destination[i] = PorterDuffFunctions.ScreenXor(background[i], source[i], Numerics.Clamp(amount[i], 0, 1F)); + destination[i] = PorterDuffFunctions.ScreenXor(background[i], source[i], Numerics.Clamp(amount[i], 0, 1F)); + } + } + } + + /// + protected override void BlendFunction(Span destination, ReadOnlySpan background, Vector4 source, ReadOnlySpan amount) + { + if (Avx512F.IsSupported && destination.Length >= 4) + { + // Divide by 4 as 4 elements per Vector4 and 16 per Vector512 + ref Vector512 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); + ref Vector512 destinationLast = ref Unsafe.Add(ref destinationBase, (uint)destination.Length / 4u); + + ref Vector512 backgroundBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(background)); + ref float amountBase = ref MemoryMarshal.GetReference(amount); + + Vector512 sourceBase = Vector512.Create( + source.X, source.Y, source.Z, source.W, + source.X, source.Y, source.Z, source.W, + source.X, source.Y, source.Z, source.W, + source.X, source.Y, source.Z, source.W); + Vector512 vOne = Vector512.Create(1F); + + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) + { + float amount0 = amountBase; + float amount1 = Unsafe.Add(ref amountBase, 1); + float amount2 = Unsafe.Add(ref amountBase, 2); + float amount3 = Unsafe.Add(ref amountBase, 3); + + // We need to create a Vector512 containing the current four amount values + // taking up each quarter of the Vector512 and then clamp them. + Vector512 opacity = Vector512.Create( + amount0, amount0, amount0, amount0, + amount1, amount1, amount1, amount1, + amount2, amount2, amount2, amount2, + amount3, amount3, amount3, amount3); + opacity = Vector512.Min(Vector512.Max(Vector512.Zero, opacity), vOne); + + destinationBase = PorterDuffFunctions.ScreenXor(backgroundBase, sourceBase, opacity); + destinationBase = ref Unsafe.Add(ref destinationBase, 1); + backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); + amountBase = ref Unsafe.Add(ref amountBase, 4); + } + + int remainder = Numerics.Modulo4(destination.Length); + if (remainder != 0) + { + for (int i = destination.Length - remainder; i < destination.Length; i++) + { + destination[i] = PorterDuffFunctions.ScreenXor(background[i], source, Numerics.Clamp(amount[i], 0, 1F)); + } } } - } - - /// - protected override void BlendFunction(Span destination, ReadOnlySpan background, Vector4 source, ReadOnlySpan amount) - { - if (Avx2.IsSupported && destination.Length >= 2) + else if (Avx2.IsSupported && destination.Length >= 2) { // Divide by 2 as 4 elements per Vector4 and 8 per Vector256 ref Vector256 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); @@ -19494,7 +34886,34 @@ internal static class DefaultPixelBlenders { amount = Numerics.Clamp(amount, 0, 1); - if (Avx2.IsSupported && destination.Length >= 2) + if (Avx512F.IsSupported && destination.Length >= 4) + { + // Divide by 4 as 4 elements per Vector4 and 16 per Vector512 + ref Vector512 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); + ref Vector512 destinationLast = ref Unsafe.Add(ref destinationBase, (uint)destination.Length / 4u); + + ref Vector512 backgroundBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(background)); + ref Vector512 sourceBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(source)); + Vector512 opacity = Vector512.Create(amount); + + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) + { + destinationBase = PorterDuffFunctions.DarkenXor(backgroundBase, sourceBase, opacity); + destinationBase = ref Unsafe.Add(ref destinationBase, 1); + backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); + sourceBase = ref Unsafe.Add(ref sourceBase, 1); + } + + int remainder = Numerics.Modulo4(destination.Length); + if (remainder != 0) + { + for (int i = destination.Length - remainder; i < destination.Length; i++) + { + destination[i] = PorterDuffFunctions.DarkenXor(background[i], source[i], amount); + } + } + } + else if (Avx2.IsSupported && destination.Length >= 2) { // Divide by 2 as 4 elements per Vector4 and 8 per Vector256 ref Vector256 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); @@ -19533,7 +34952,37 @@ internal static class DefaultPixelBlenders { amount = Numerics.Clamp(amount, 0, 1); - if (Avx2.IsSupported && destination.Length >= 2) + if (Avx512F.IsSupported && destination.Length >= 4) + { + // Divide by 4 as 4 elements per Vector4 and 16 per Vector512 + ref Vector512 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); + ref Vector512 destinationLast = ref Unsafe.Add(ref destinationBase, (uint)destination.Length / 4u); + + ref Vector512 backgroundBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(background)); + Vector512 sourceBase = Vector512.Create( + source.X, source.Y, source.Z, source.W, + source.X, source.Y, source.Z, source.W, + source.X, source.Y, source.Z, source.W, + source.X, source.Y, source.Z, source.W); + Vector512 opacity = Vector512.Create(amount); + + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) + { + destinationBase = PorterDuffFunctions.DarkenXor(backgroundBase, sourceBase, opacity); + destinationBase = ref Unsafe.Add(ref destinationBase, 1); + backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); + } + + int remainder = Numerics.Modulo4(destination.Length); + if (remainder != 0) + { + for (int i = destination.Length - remainder; i < destination.Length; i++) + { + destination[i] = PorterDuffFunctions.DarkenXor(background[i], source, amount); + } + } + } + else if (Avx2.IsSupported && destination.Length >= 2) { // Divide by 2 as 4 elements per Vector4 and 8 per Vector256 ref Vector256 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); @@ -19569,7 +35018,51 @@ internal static class DefaultPixelBlenders /// protected override void BlendFunction(Span destination, ReadOnlySpan background, ReadOnlySpan source, ReadOnlySpan amount) { - if (Avx2.IsSupported && destination.Length >= 2) + if (Avx512F.IsSupported && destination.Length >= 4) + { + // Divide by 4 as 4 elements per Vector4 and 16 per Vector512 + ref Vector512 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); + ref Vector512 destinationLast = ref Unsafe.Add(ref destinationBase, (uint)destination.Length / 4u); + + ref Vector512 backgroundBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(background)); + ref Vector512 sourceBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(source)); + ref float amountBase = ref MemoryMarshal.GetReference(amount); + + Vector512 vOne = Vector512.Create(1F); + + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) + { + float amount0 = amountBase; + float amount1 = Unsafe.Add(ref amountBase, 1); + float amount2 = Unsafe.Add(ref amountBase, 2); + float amount3 = Unsafe.Add(ref amountBase, 3); + + // We need to create a Vector512 containing the current four amount values + // taking up each quarter of the Vector512 and then clamp them. + Vector512 opacity = Vector512.Create( + amount0, amount0, amount0, amount0, + amount1, amount1, amount1, amount1, + amount2, amount2, amount2, amount2, + amount3, amount3, amount3, amount3); + opacity = Vector512.Min(Vector512.Max(Vector512.Zero, opacity), vOne); + + destinationBase = PorterDuffFunctions.DarkenXor(backgroundBase, sourceBase, opacity); + destinationBase = ref Unsafe.Add(ref destinationBase, 1); + backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); + sourceBase = ref Unsafe.Add(ref sourceBase, 1); + amountBase = ref Unsafe.Add(ref amountBase, 4); + } + + int remainder = Numerics.Modulo4(destination.Length); + if (remainder != 0) + { + for (int i = destination.Length - remainder; i < destination.Length; i++) + { + destination[i] = PorterDuffFunctions.DarkenXor(background[i], source[i], Numerics.Clamp(amount[i], 0, 1F)); + } + } + } + else if (Avx2.IsSupported && destination.Length >= 2) { // Divide by 2 as 4 elements per Vector4 and 8 per Vector256 ref Vector256 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); @@ -19616,7 +35109,54 @@ internal static class DefaultPixelBlenders /// protected override void BlendFunction(Span destination, ReadOnlySpan background, Vector4 source, ReadOnlySpan amount) { - if (Avx2.IsSupported && destination.Length >= 2) + if (Avx512F.IsSupported && destination.Length >= 4) + { + // Divide by 4 as 4 elements per Vector4 and 16 per Vector512 + ref Vector512 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); + ref Vector512 destinationLast = ref Unsafe.Add(ref destinationBase, (uint)destination.Length / 4u); + + ref Vector512 backgroundBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(background)); + ref float amountBase = ref MemoryMarshal.GetReference(amount); + + Vector512 sourceBase = Vector512.Create( + source.X, source.Y, source.Z, source.W, + source.X, source.Y, source.Z, source.W, + source.X, source.Y, source.Z, source.W, + source.X, source.Y, source.Z, source.W); + Vector512 vOne = Vector512.Create(1F); + + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) + { + float amount0 = amountBase; + float amount1 = Unsafe.Add(ref amountBase, 1); + float amount2 = Unsafe.Add(ref amountBase, 2); + float amount3 = Unsafe.Add(ref amountBase, 3); + + // We need to create a Vector512 containing the current four amount values + // taking up each quarter of the Vector512 and then clamp them. + Vector512 opacity = Vector512.Create( + amount0, amount0, amount0, amount0, + amount1, amount1, amount1, amount1, + amount2, amount2, amount2, amount2, + amount3, amount3, amount3, amount3); + opacity = Vector512.Min(Vector512.Max(Vector512.Zero, opacity), vOne); + + destinationBase = PorterDuffFunctions.DarkenXor(backgroundBase, sourceBase, opacity); + destinationBase = ref Unsafe.Add(ref destinationBase, 1); + backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); + amountBase = ref Unsafe.Add(ref amountBase, 4); + } + + int remainder = Numerics.Modulo4(destination.Length); + if (remainder != 0) + { + for (int i = destination.Length - remainder; i < destination.Length; i++) + { + destination[i] = PorterDuffFunctions.DarkenXor(background[i], source, Numerics.Clamp(amount[i], 0, 1F)); + } + } + } + else if (Avx2.IsSupported && destination.Length >= 2) { // Divide by 2 as 4 elements per Vector4 and 8 per Vector256 ref Vector256 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); @@ -19681,7 +35221,34 @@ internal static class DefaultPixelBlenders { amount = Numerics.Clamp(amount, 0, 1); - if (Avx2.IsSupported && destination.Length >= 2) + if (Avx512F.IsSupported && destination.Length >= 4) + { + // Divide by 4 as 4 elements per Vector4 and 16 per Vector512 + ref Vector512 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); + ref Vector512 destinationLast = ref Unsafe.Add(ref destinationBase, (uint)destination.Length / 4u); + + ref Vector512 backgroundBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(background)); + ref Vector512 sourceBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(source)); + Vector512 opacity = Vector512.Create(amount); + + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) + { + destinationBase = PorterDuffFunctions.LightenXor(backgroundBase, sourceBase, opacity); + destinationBase = ref Unsafe.Add(ref destinationBase, 1); + backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); + sourceBase = ref Unsafe.Add(ref sourceBase, 1); + } + + int remainder = Numerics.Modulo4(destination.Length); + if (remainder != 0) + { + for (int i = destination.Length - remainder; i < destination.Length; i++) + { + destination[i] = PorterDuffFunctions.LightenXor(background[i], source[i], amount); + } + } + } + else if (Avx2.IsSupported && destination.Length >= 2) { // Divide by 2 as 4 elements per Vector4 and 8 per Vector256 ref Vector256 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); @@ -19720,7 +35287,37 @@ internal static class DefaultPixelBlenders { amount = Numerics.Clamp(amount, 0, 1); - if (Avx2.IsSupported && destination.Length >= 2) + if (Avx512F.IsSupported && destination.Length >= 4) + { + // Divide by 4 as 4 elements per Vector4 and 16 per Vector512 + ref Vector512 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); + ref Vector512 destinationLast = ref Unsafe.Add(ref destinationBase, (uint)destination.Length / 4u); + + ref Vector512 backgroundBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(background)); + Vector512 sourceBase = Vector512.Create( + source.X, source.Y, source.Z, source.W, + source.X, source.Y, source.Z, source.W, + source.X, source.Y, source.Z, source.W, + source.X, source.Y, source.Z, source.W); + Vector512 opacity = Vector512.Create(amount); + + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) + { + destinationBase = PorterDuffFunctions.LightenXor(backgroundBase, sourceBase, opacity); + destinationBase = ref Unsafe.Add(ref destinationBase, 1); + backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); + } + + int remainder = Numerics.Modulo4(destination.Length); + if (remainder != 0) + { + for (int i = destination.Length - remainder; i < destination.Length; i++) + { + destination[i] = PorterDuffFunctions.LightenXor(background[i], source, amount); + } + } + } + else if (Avx2.IsSupported && destination.Length >= 2) { // Divide by 2 as 4 elements per Vector4 and 8 per Vector256 ref Vector256 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); @@ -19756,7 +35353,51 @@ internal static class DefaultPixelBlenders /// protected override void BlendFunction(Span destination, ReadOnlySpan background, ReadOnlySpan source, ReadOnlySpan amount) { - if (Avx2.IsSupported && destination.Length >= 2) + if (Avx512F.IsSupported && destination.Length >= 4) + { + // Divide by 4 as 4 elements per Vector4 and 16 per Vector512 + ref Vector512 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); + ref Vector512 destinationLast = ref Unsafe.Add(ref destinationBase, (uint)destination.Length / 4u); + + ref Vector512 backgroundBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(background)); + ref Vector512 sourceBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(source)); + ref float amountBase = ref MemoryMarshal.GetReference(amount); + + Vector512 vOne = Vector512.Create(1F); + + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) + { + float amount0 = amountBase; + float amount1 = Unsafe.Add(ref amountBase, 1); + float amount2 = Unsafe.Add(ref amountBase, 2); + float amount3 = Unsafe.Add(ref amountBase, 3); + + // We need to create a Vector512 containing the current four amount values + // taking up each quarter of the Vector512 and then clamp them. + Vector512 opacity = Vector512.Create( + amount0, amount0, amount0, amount0, + amount1, amount1, amount1, amount1, + amount2, amount2, amount2, amount2, + amount3, amount3, amount3, amount3); + opacity = Vector512.Min(Vector512.Max(Vector512.Zero, opacity), vOne); + + destinationBase = PorterDuffFunctions.LightenXor(backgroundBase, sourceBase, opacity); + destinationBase = ref Unsafe.Add(ref destinationBase, 1); + backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); + sourceBase = ref Unsafe.Add(ref sourceBase, 1); + amountBase = ref Unsafe.Add(ref amountBase, 4); + } + + int remainder = Numerics.Modulo4(destination.Length); + if (remainder != 0) + { + for (int i = destination.Length - remainder; i < destination.Length; i++) + { + destination[i] = PorterDuffFunctions.LightenXor(background[i], source[i], Numerics.Clamp(amount[i], 0, 1F)); + } + } + } + else if (Avx2.IsSupported && destination.Length >= 2) { // Divide by 2 as 4 elements per Vector4 and 8 per Vector256 ref Vector256 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); @@ -19803,7 +35444,54 @@ internal static class DefaultPixelBlenders /// protected override void BlendFunction(Span destination, ReadOnlySpan background, Vector4 source, ReadOnlySpan amount) { - if (Avx2.IsSupported && destination.Length >= 2) + if (Avx512F.IsSupported && destination.Length >= 4) + { + // Divide by 4 as 4 elements per Vector4 and 16 per Vector512 + ref Vector512 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); + ref Vector512 destinationLast = ref Unsafe.Add(ref destinationBase, (uint)destination.Length / 4u); + + ref Vector512 backgroundBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(background)); + ref float amountBase = ref MemoryMarshal.GetReference(amount); + + Vector512 sourceBase = Vector512.Create( + source.X, source.Y, source.Z, source.W, + source.X, source.Y, source.Z, source.W, + source.X, source.Y, source.Z, source.W, + source.X, source.Y, source.Z, source.W); + Vector512 vOne = Vector512.Create(1F); + + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) + { + float amount0 = amountBase; + float amount1 = Unsafe.Add(ref amountBase, 1); + float amount2 = Unsafe.Add(ref amountBase, 2); + float amount3 = Unsafe.Add(ref amountBase, 3); + + // We need to create a Vector512 containing the current four amount values + // taking up each quarter of the Vector512 and then clamp them. + Vector512 opacity = Vector512.Create( + amount0, amount0, amount0, amount0, + amount1, amount1, amount1, amount1, + amount2, amount2, amount2, amount2, + amount3, amount3, amount3, amount3); + opacity = Vector512.Min(Vector512.Max(Vector512.Zero, opacity), vOne); + + destinationBase = PorterDuffFunctions.LightenXor(backgroundBase, sourceBase, opacity); + destinationBase = ref Unsafe.Add(ref destinationBase, 1); + backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); + amountBase = ref Unsafe.Add(ref amountBase, 4); + } + + int remainder = Numerics.Modulo4(destination.Length); + if (remainder != 0) + { + for (int i = destination.Length - remainder; i < destination.Length; i++) + { + destination[i] = PorterDuffFunctions.LightenXor(background[i], source, Numerics.Clamp(amount[i], 0, 1F)); + } + } + } + else if (Avx2.IsSupported && destination.Length >= 2) { // Divide by 2 as 4 elements per Vector4 and 8 per Vector256 ref Vector256 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); @@ -19868,7 +35556,34 @@ internal static class DefaultPixelBlenders { amount = Numerics.Clamp(amount, 0, 1); - if (Avx2.IsSupported && destination.Length >= 2) + if (Avx512F.IsSupported && destination.Length >= 4) + { + // Divide by 4 as 4 elements per Vector4 and 16 per Vector512 + ref Vector512 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); + ref Vector512 destinationLast = ref Unsafe.Add(ref destinationBase, (uint)destination.Length / 4u); + + ref Vector512 backgroundBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(background)); + ref Vector512 sourceBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(source)); + Vector512 opacity = Vector512.Create(amount); + + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) + { + destinationBase = PorterDuffFunctions.OverlayXor(backgroundBase, sourceBase, opacity); + destinationBase = ref Unsafe.Add(ref destinationBase, 1); + backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); + sourceBase = ref Unsafe.Add(ref sourceBase, 1); + } + + int remainder = Numerics.Modulo4(destination.Length); + if (remainder != 0) + { + for (int i = destination.Length - remainder; i < destination.Length; i++) + { + destination[i] = PorterDuffFunctions.OverlayXor(background[i], source[i], amount); + } + } + } + else if (Avx2.IsSupported && destination.Length >= 2) { // Divide by 2 as 4 elements per Vector4 and 8 per Vector256 ref Vector256 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); @@ -19907,7 +35622,37 @@ internal static class DefaultPixelBlenders { amount = Numerics.Clamp(amount, 0, 1); - if (Avx2.IsSupported && destination.Length >= 2) + if (Avx512F.IsSupported && destination.Length >= 4) + { + // Divide by 4 as 4 elements per Vector4 and 16 per Vector512 + ref Vector512 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); + ref Vector512 destinationLast = ref Unsafe.Add(ref destinationBase, (uint)destination.Length / 4u); + + ref Vector512 backgroundBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(background)); + Vector512 sourceBase = Vector512.Create( + source.X, source.Y, source.Z, source.W, + source.X, source.Y, source.Z, source.W, + source.X, source.Y, source.Z, source.W, + source.X, source.Y, source.Z, source.W); + Vector512 opacity = Vector512.Create(amount); + + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) + { + destinationBase = PorterDuffFunctions.OverlayXor(backgroundBase, sourceBase, opacity); + destinationBase = ref Unsafe.Add(ref destinationBase, 1); + backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); + } + + int remainder = Numerics.Modulo4(destination.Length); + if (remainder != 0) + { + for (int i = destination.Length - remainder; i < destination.Length; i++) + { + destination[i] = PorterDuffFunctions.OverlayXor(background[i], source, amount); + } + } + } + else if (Avx2.IsSupported && destination.Length >= 2) { // Divide by 2 as 4 elements per Vector4 and 8 per Vector256 ref Vector256 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); @@ -19943,7 +35688,51 @@ internal static class DefaultPixelBlenders /// protected override void BlendFunction(Span destination, ReadOnlySpan background, ReadOnlySpan source, ReadOnlySpan amount) { - if (Avx2.IsSupported && destination.Length >= 2) + if (Avx512F.IsSupported && destination.Length >= 4) + { + // Divide by 4 as 4 elements per Vector4 and 16 per Vector512 + ref Vector512 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); + ref Vector512 destinationLast = ref Unsafe.Add(ref destinationBase, (uint)destination.Length / 4u); + + ref Vector512 backgroundBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(background)); + ref Vector512 sourceBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(source)); + ref float amountBase = ref MemoryMarshal.GetReference(amount); + + Vector512 vOne = Vector512.Create(1F); + + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) + { + float amount0 = amountBase; + float amount1 = Unsafe.Add(ref amountBase, 1); + float amount2 = Unsafe.Add(ref amountBase, 2); + float amount3 = Unsafe.Add(ref amountBase, 3); + + // We need to create a Vector512 containing the current four amount values + // taking up each quarter of the Vector512 and then clamp them. + Vector512 opacity = Vector512.Create( + amount0, amount0, amount0, amount0, + amount1, amount1, amount1, amount1, + amount2, amount2, amount2, amount2, + amount3, amount3, amount3, amount3); + opacity = Vector512.Min(Vector512.Max(Vector512.Zero, opacity), vOne); + + destinationBase = PorterDuffFunctions.OverlayXor(backgroundBase, sourceBase, opacity); + destinationBase = ref Unsafe.Add(ref destinationBase, 1); + backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); + sourceBase = ref Unsafe.Add(ref sourceBase, 1); + amountBase = ref Unsafe.Add(ref amountBase, 4); + } + + int remainder = Numerics.Modulo4(destination.Length); + if (remainder != 0) + { + for (int i = destination.Length - remainder; i < destination.Length; i++) + { + destination[i] = PorterDuffFunctions.OverlayXor(background[i], source[i], Numerics.Clamp(amount[i], 0, 1F)); + } + } + } + else if (Avx2.IsSupported && destination.Length >= 2) { // Divide by 2 as 4 elements per Vector4 and 8 per Vector256 ref Vector256 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); @@ -19990,7 +35779,54 @@ internal static class DefaultPixelBlenders /// protected override void BlendFunction(Span destination, ReadOnlySpan background, Vector4 source, ReadOnlySpan amount) { - if (Avx2.IsSupported && destination.Length >= 2) + if (Avx512F.IsSupported && destination.Length >= 4) + { + // Divide by 4 as 4 elements per Vector4 and 16 per Vector512 + ref Vector512 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); + ref Vector512 destinationLast = ref Unsafe.Add(ref destinationBase, (uint)destination.Length / 4u); + + ref Vector512 backgroundBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(background)); + ref float amountBase = ref MemoryMarshal.GetReference(amount); + + Vector512 sourceBase = Vector512.Create( + source.X, source.Y, source.Z, source.W, + source.X, source.Y, source.Z, source.W, + source.X, source.Y, source.Z, source.W, + source.X, source.Y, source.Z, source.W); + Vector512 vOne = Vector512.Create(1F); + + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) + { + float amount0 = amountBase; + float amount1 = Unsafe.Add(ref amountBase, 1); + float amount2 = Unsafe.Add(ref amountBase, 2); + float amount3 = Unsafe.Add(ref amountBase, 3); + + // We need to create a Vector512 containing the current four amount values + // taking up each quarter of the Vector512 and then clamp them. + Vector512 opacity = Vector512.Create( + amount0, amount0, amount0, amount0, + amount1, amount1, amount1, amount1, + amount2, amount2, amount2, amount2, + amount3, amount3, amount3, amount3); + opacity = Vector512.Min(Vector512.Max(Vector512.Zero, opacity), vOne); + + destinationBase = PorterDuffFunctions.OverlayXor(backgroundBase, sourceBase, opacity); + destinationBase = ref Unsafe.Add(ref destinationBase, 1); + backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); + amountBase = ref Unsafe.Add(ref amountBase, 4); + } + + int remainder = Numerics.Modulo4(destination.Length); + if (remainder != 0) + { + for (int i = destination.Length - remainder; i < destination.Length; i++) + { + destination[i] = PorterDuffFunctions.OverlayXor(background[i], source, Numerics.Clamp(amount[i], 0, 1F)); + } + } + } + else if (Avx2.IsSupported && destination.Length >= 2) { // Divide by 2 as 4 elements per Vector4 and 8 per Vector256 ref Vector256 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); @@ -20055,7 +35891,34 @@ internal static class DefaultPixelBlenders { amount = Numerics.Clamp(amount, 0, 1); - if (Avx2.IsSupported && destination.Length >= 2) + if (Avx512F.IsSupported && destination.Length >= 4) + { + // Divide by 4 as 4 elements per Vector4 and 16 per Vector512 + ref Vector512 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); + ref Vector512 destinationLast = ref Unsafe.Add(ref destinationBase, (uint)destination.Length / 4u); + + ref Vector512 backgroundBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(background)); + ref Vector512 sourceBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(source)); + Vector512 opacity = Vector512.Create(amount); + + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) + { + destinationBase = PorterDuffFunctions.HardLightXor(backgroundBase, sourceBase, opacity); + destinationBase = ref Unsafe.Add(ref destinationBase, 1); + backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); + sourceBase = ref Unsafe.Add(ref sourceBase, 1); + } + + int remainder = Numerics.Modulo4(destination.Length); + if (remainder != 0) + { + for (int i = destination.Length - remainder; i < destination.Length; i++) + { + destination[i] = PorterDuffFunctions.HardLightXor(background[i], source[i], amount); + } + } + } + else if (Avx2.IsSupported && destination.Length >= 2) { // Divide by 2 as 4 elements per Vector4 and 8 per Vector256 ref Vector256 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); @@ -20094,7 +35957,37 @@ internal static class DefaultPixelBlenders { amount = Numerics.Clamp(amount, 0, 1); - if (Avx2.IsSupported && destination.Length >= 2) + if (Avx512F.IsSupported && destination.Length >= 4) + { + // Divide by 4 as 4 elements per Vector4 and 16 per Vector512 + ref Vector512 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); + ref Vector512 destinationLast = ref Unsafe.Add(ref destinationBase, (uint)destination.Length / 4u); + + ref Vector512 backgroundBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(background)); + Vector512 sourceBase = Vector512.Create( + source.X, source.Y, source.Z, source.W, + source.X, source.Y, source.Z, source.W, + source.X, source.Y, source.Z, source.W, + source.X, source.Y, source.Z, source.W); + Vector512 opacity = Vector512.Create(amount); + + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) + { + destinationBase = PorterDuffFunctions.HardLightXor(backgroundBase, sourceBase, opacity); + destinationBase = ref Unsafe.Add(ref destinationBase, 1); + backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); + } + + int remainder = Numerics.Modulo4(destination.Length); + if (remainder != 0) + { + for (int i = destination.Length - remainder; i < destination.Length; i++) + { + destination[i] = PorterDuffFunctions.HardLightXor(background[i], source, amount); + } + } + } + else if (Avx2.IsSupported && destination.Length >= 2) { // Divide by 2 as 4 elements per Vector4 and 8 per Vector256 ref Vector256 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); @@ -20130,7 +36023,51 @@ internal static class DefaultPixelBlenders /// protected override void BlendFunction(Span destination, ReadOnlySpan background, ReadOnlySpan source, ReadOnlySpan amount) { - if (Avx2.IsSupported && destination.Length >= 2) + if (Avx512F.IsSupported && destination.Length >= 4) + { + // Divide by 4 as 4 elements per Vector4 and 16 per Vector512 + ref Vector512 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); + ref Vector512 destinationLast = ref Unsafe.Add(ref destinationBase, (uint)destination.Length / 4u); + + ref Vector512 backgroundBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(background)); + ref Vector512 sourceBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(source)); + ref float amountBase = ref MemoryMarshal.GetReference(amount); + + Vector512 vOne = Vector512.Create(1F); + + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) + { + float amount0 = amountBase; + float amount1 = Unsafe.Add(ref amountBase, 1); + float amount2 = Unsafe.Add(ref amountBase, 2); + float amount3 = Unsafe.Add(ref amountBase, 3); + + // We need to create a Vector512 containing the current four amount values + // taking up each quarter of the Vector512 and then clamp them. + Vector512 opacity = Vector512.Create( + amount0, amount0, amount0, amount0, + amount1, amount1, amount1, amount1, + amount2, amount2, amount2, amount2, + amount3, amount3, amount3, amount3); + opacity = Vector512.Min(Vector512.Max(Vector512.Zero, opacity), vOne); + + destinationBase = PorterDuffFunctions.HardLightXor(backgroundBase, sourceBase, opacity); + destinationBase = ref Unsafe.Add(ref destinationBase, 1); + backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); + sourceBase = ref Unsafe.Add(ref sourceBase, 1); + amountBase = ref Unsafe.Add(ref amountBase, 4); + } + + int remainder = Numerics.Modulo4(destination.Length); + if (remainder != 0) + { + for (int i = destination.Length - remainder; i < destination.Length; i++) + { + destination[i] = PorterDuffFunctions.HardLightXor(background[i], source[i], Numerics.Clamp(amount[i], 0, 1F)); + } + } + } + else if (Avx2.IsSupported && destination.Length >= 2) { // Divide by 2 as 4 elements per Vector4 and 8 per Vector256 ref Vector256 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); @@ -20177,7 +36114,54 @@ internal static class DefaultPixelBlenders /// protected override void BlendFunction(Span destination, ReadOnlySpan background, Vector4 source, ReadOnlySpan amount) { - if (Avx2.IsSupported && destination.Length >= 2) + if (Avx512F.IsSupported && destination.Length >= 4) + { + // Divide by 4 as 4 elements per Vector4 and 16 per Vector512 + ref Vector512 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); + ref Vector512 destinationLast = ref Unsafe.Add(ref destinationBase, (uint)destination.Length / 4u); + + ref Vector512 backgroundBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(background)); + ref float amountBase = ref MemoryMarshal.GetReference(amount); + + Vector512 sourceBase = Vector512.Create( + source.X, source.Y, source.Z, source.W, + source.X, source.Y, source.Z, source.W, + source.X, source.Y, source.Z, source.W, + source.X, source.Y, source.Z, source.W); + Vector512 vOne = Vector512.Create(1F); + + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) + { + float amount0 = amountBase; + float amount1 = Unsafe.Add(ref amountBase, 1); + float amount2 = Unsafe.Add(ref amountBase, 2); + float amount3 = Unsafe.Add(ref amountBase, 3); + + // We need to create a Vector512 containing the current four amount values + // taking up each quarter of the Vector512 and then clamp them. + Vector512 opacity = Vector512.Create( + amount0, amount0, amount0, amount0, + amount1, amount1, amount1, amount1, + amount2, amount2, amount2, amount2, + amount3, amount3, amount3, amount3); + opacity = Vector512.Min(Vector512.Max(Vector512.Zero, opacity), vOne); + + destinationBase = PorterDuffFunctions.HardLightXor(backgroundBase, sourceBase, opacity); + destinationBase = ref Unsafe.Add(ref destinationBase, 1); + backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); + amountBase = ref Unsafe.Add(ref amountBase, 4); + } + + int remainder = Numerics.Modulo4(destination.Length); + if (remainder != 0) + { + for (int i = destination.Length - remainder; i < destination.Length; i++) + { + destination[i] = PorterDuffFunctions.HardLightXor(background[i], source, Numerics.Clamp(amount[i], 0, 1F)); + } + } + } + else if (Avx2.IsSupported && destination.Length >= 2) { // Divide by 2 as 4 elements per Vector4 and 8 per Vector256 ref Vector256 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); diff --git a/src/ImageSharp/PixelFormats/PixelBlenders/DefaultPixelBlenders.Generated.tt b/src/ImageSharp/PixelFormats/PixelBlenders/DefaultPixelBlenders.Generated.tt index 3b885826b8..c2439c24cc 100644 --- a/src/ImageSharp/PixelFormats/PixelBlenders/DefaultPixelBlenders.Generated.tt +++ b/src/ImageSharp/PixelFormats/PixelBlenders/DefaultPixelBlenders.Generated.tt @@ -89,7 +89,34 @@ var blenders = new []{ { amount = Numerics.Clamp(amount, 0, 1); - if (Avx2.IsSupported && destination.Length >= 2) + if (Avx512F.IsSupported && destination.Length >= 4) + { + // Divide by 4 as 4 elements per Vector4 and 16 per Vector512 + ref Vector512 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); + ref Vector512 destinationLast = ref Unsafe.Add(ref destinationBase, (uint)destination.Length / 4u); + + ref Vector512 backgroundBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(background)); + ref Vector512 sourceBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(source)); + Vector512 opacity = Vector512.Create(amount); + + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) + { + destinationBase = PorterDuffFunctions.<#=blender_composer#>(backgroundBase, sourceBase, opacity); + destinationBase = ref Unsafe.Add(ref destinationBase, 1); + backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); + sourceBase = ref Unsafe.Add(ref sourceBase, 1); + } + + int remainder = Numerics.Modulo4(destination.Length); + if (remainder != 0) + { + for (int i = destination.Length - remainder; i < destination.Length; i++) + { + destination[i] = PorterDuffFunctions.<#=blender_composer#>(background[i], source[i], amount); + } + } + } + else if (Avx2.IsSupported && destination.Length >= 2) { // Divide by 2 as 4 elements per Vector4 and 8 per Vector256 ref Vector256 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); @@ -128,7 +155,37 @@ var blenders = new []{ { amount = Numerics.Clamp(amount, 0, 1); - if (Avx2.IsSupported && destination.Length >= 2) + if (Avx512F.IsSupported && destination.Length >= 4) + { + // Divide by 4 as 4 elements per Vector4 and 16 per Vector512 + ref Vector512 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); + ref Vector512 destinationLast = ref Unsafe.Add(ref destinationBase, (uint)destination.Length / 4u); + + ref Vector512 backgroundBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(background)); + Vector512 sourceBase = Vector512.Create( + source.X, source.Y, source.Z, source.W, + source.X, source.Y, source.Z, source.W, + source.X, source.Y, source.Z, source.W, + source.X, source.Y, source.Z, source.W); + Vector512 opacity = Vector512.Create(amount); + + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) + { + destinationBase = PorterDuffFunctions.<#=blender_composer#>(backgroundBase, sourceBase, opacity); + destinationBase = ref Unsafe.Add(ref destinationBase, 1); + backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); + } + + int remainder = Numerics.Modulo4(destination.Length); + if (remainder != 0) + { + for (int i = destination.Length - remainder; i < destination.Length; i++) + { + destination[i] = PorterDuffFunctions.<#=blender_composer#>(background[i], source, amount); + } + } + } + else if (Avx2.IsSupported && destination.Length >= 2) { // Divide by 2 as 4 elements per Vector4 and 8 per Vector256 ref Vector256 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); @@ -164,7 +221,51 @@ var blenders = new []{ /// protected override void BlendFunction(Span destination, ReadOnlySpan background, ReadOnlySpan source, ReadOnlySpan amount) { - if (Avx2.IsSupported && destination.Length >= 2) + if (Avx512F.IsSupported && destination.Length >= 4) + { + // Divide by 4 as 4 elements per Vector4 and 16 per Vector512 + ref Vector512 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); + ref Vector512 destinationLast = ref Unsafe.Add(ref destinationBase, (uint)destination.Length / 4u); + + ref Vector512 backgroundBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(background)); + ref Vector512 sourceBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(source)); + ref float amountBase = ref MemoryMarshal.GetReference(amount); + + Vector512 vOne = Vector512.Create(1F); + + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) + { + float amount0 = amountBase; + float amount1 = Unsafe.Add(ref amountBase, 1); + float amount2 = Unsafe.Add(ref amountBase, 2); + float amount3 = Unsafe.Add(ref amountBase, 3); + + // We need to create a Vector512 containing the current four amount values + // taking up each quarter of the Vector512 and then clamp them. + Vector512 opacity = Vector512.Create( + amount0, amount0, amount0, amount0, + amount1, amount1, amount1, amount1, + amount2, amount2, amount2, amount2, + amount3, amount3, amount3, amount3); + opacity = Vector512.Min(Vector512.Max(Vector512.Zero, opacity), vOne); + + destinationBase = PorterDuffFunctions.<#=blender_composer#>(backgroundBase, sourceBase, opacity); + destinationBase = ref Unsafe.Add(ref destinationBase, 1); + backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); + sourceBase = ref Unsafe.Add(ref sourceBase, 1); + amountBase = ref Unsafe.Add(ref amountBase, 4); + } + + int remainder = Numerics.Modulo4(destination.Length); + if (remainder != 0) + { + for (int i = destination.Length - remainder; i < destination.Length; i++) + { + destination[i] = PorterDuffFunctions.<#=blender_composer#>(background[i], source[i], Numerics.Clamp(amount[i], 0, 1F)); + } + } + } + else if (Avx2.IsSupported && destination.Length >= 2) { // Divide by 2 as 4 elements per Vector4 and 8 per Vector256 ref Vector256 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); @@ -211,7 +312,54 @@ var blenders = new []{ /// protected override void BlendFunction(Span destination, ReadOnlySpan background, Vector4 source, ReadOnlySpan amount) { - if (Avx2.IsSupported && destination.Length >= 2) + if (Avx512F.IsSupported && destination.Length >= 4) + { + // Divide by 4 as 4 elements per Vector4 and 16 per Vector512 + ref Vector512 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); + ref Vector512 destinationLast = ref Unsafe.Add(ref destinationBase, (uint)destination.Length / 4u); + + ref Vector512 backgroundBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(background)); + ref float amountBase = ref MemoryMarshal.GetReference(amount); + + Vector512 sourceBase = Vector512.Create( + source.X, source.Y, source.Z, source.W, + source.X, source.Y, source.Z, source.W, + source.X, source.Y, source.Z, source.W, + source.X, source.Y, source.Z, source.W); + Vector512 vOne = Vector512.Create(1F); + + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) + { + float amount0 = amountBase; + float amount1 = Unsafe.Add(ref amountBase, 1); + float amount2 = Unsafe.Add(ref amountBase, 2); + float amount3 = Unsafe.Add(ref amountBase, 3); + + // We need to create a Vector512 containing the current four amount values + // taking up each quarter of the Vector512 and then clamp them. + Vector512 opacity = Vector512.Create( + amount0, amount0, amount0, amount0, + amount1, amount1, amount1, amount1, + amount2, amount2, amount2, amount2, + amount3, amount3, amount3, amount3); + opacity = Vector512.Min(Vector512.Max(Vector512.Zero, opacity), vOne); + + destinationBase = PorterDuffFunctions.<#=blender_composer#>(backgroundBase, sourceBase, opacity); + destinationBase = ref Unsafe.Add(ref destinationBase, 1); + backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); + amountBase = ref Unsafe.Add(ref amountBase, 4); + } + + int remainder = Numerics.Modulo4(destination.Length); + if (remainder != 0) + { + for (int i = destination.Length - remainder; i < destination.Length; i++) + { + destination[i] = PorterDuffFunctions.<#=blender_composer#>(background[i], source, Numerics.Clamp(amount[i], 0, 1F)); + } + } + } + else if (Avx2.IsSupported && destination.Length >= 2) { // Divide by 2 as 4 elements per Vector4 and 8 per Vector256 ref Vector256 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); diff --git a/src/ImageSharp/PixelFormats/PixelBlenders/PorterDuffFunctions.Generated.cs b/src/ImageSharp/PixelFormats/PixelBlenders/PorterDuffFunctions.Generated.cs index f0635230ca..d32966c24e 100644 --- a/src/ImageSharp/PixelFormats/PixelBlenders/PorterDuffFunctions.Generated.cs +++ b/src/ImageSharp/PixelFormats/PixelBlenders/PorterDuffFunctions.Generated.cs @@ -39,6 +39,17 @@ internal static partial class PorterDuffFunctions public static Vector256 NormalSrc(Vector256 backdrop, Vector256 source, Vector256 opacity) => Avx.Blend(source, source * opacity, BlendAlphaControl); + /// + /// Returns the result of the "NormalSrc compositing equation. + /// + /// The backdrop vector. + /// The source vector. + /// The source opacity. Range 0..1 + /// The . + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public static Vector512 NormalSrc(Vector512 backdrop, Vector512 source, Vector512 opacity) + => Avx512F.BlendVariable(source, source * opacity, AlphaMask512()); + /// /// Returns the result of the "NormalSrcAtop" compositing equation. /// @@ -69,6 +80,21 @@ internal static partial class PorterDuffFunctions return Atop(backdrop, source, Normal(backdrop, source)); } + /// + /// Returns the result of the "NormalSrcAtop" compositing equation. + /// + /// The backdrop vector. + /// The source vector. + /// The source opacity. Range 0..1 + /// The . + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public static Vector512 NormalSrcAtop(Vector512 backdrop, Vector512 source, Vector512 opacity) + { + source = Avx512F.BlendVariable(source, source * opacity, AlphaMask512()); + + return Atop(backdrop, source, Normal(backdrop, source)); + } + /// /// Returns the result of the "NormalSrcOver" compositing equation. /// @@ -99,6 +125,21 @@ internal static partial class PorterDuffFunctions return Over(backdrop, source, Normal(backdrop, source)); } + /// + /// Returns the result of the "NormalSrcOver" compositing equation. + /// + /// The backdrop vector. + /// The source vector. + /// The source opacity. Range 0..1 + /// The . + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public static Vector512 NormalSrcOver(Vector512 backdrop, Vector512 source, Vector512 opacity) + { + source = Avx512F.BlendVariable(source, source * opacity, AlphaMask512()); + + return Over(backdrop, source, Normal(backdrop, source)); + } + /// /// Returns the result of the "NormalSrcIn" compositing equation. /// @@ -125,6 +166,17 @@ internal static partial class PorterDuffFunctions public static Vector256 NormalSrcIn(Vector256 backdrop, Vector256 source, Vector256 opacity) => In(backdrop, Avx.Blend(source, source * opacity, BlendAlphaControl)); + /// + /// Returns the result of the "NormalSrcIn" compositing equation. + /// + /// The backdrop vector. + /// The source vector. + /// The source opacity. Range 0..1 + /// The . + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public static Vector512 NormalSrcIn(Vector512 backdrop, Vector512 source, Vector512 opacity) + => In(backdrop, Avx512F.BlendVariable(source, source * opacity, AlphaMask512())); + /// /// Returns the result of the "NormalSrcOut" compositing equation. /// @@ -151,6 +203,17 @@ internal static partial class PorterDuffFunctions public static Vector256 NormalSrcOut(Vector256 backdrop, Vector256 source, Vector256 opacity) => Out(backdrop, Avx.Blend(source, source * opacity, BlendAlphaControl)); + /// + /// Returns the result of the "NormalSrcOut" compositing equation. + /// + /// The backdrop vector. + /// The source vector. + /// The source opacity. Range 0..1 + /// The . + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public static Vector512 NormalSrcOut(Vector512 backdrop, Vector512 source, Vector512 opacity) + => Out(backdrop, Avx512F.BlendVariable(source, source * opacity, AlphaMask512())); + /// /// Returns the result of the "NormalDest" compositing equation. /// @@ -177,6 +240,19 @@ internal static partial class PorterDuffFunctions return backdrop; } + /// + /// Returns the result of the "NormalDest" compositing equation. + /// + /// The backdrop vector. + /// The source vector. + /// The source opacity. Range 0..1 + /// The . + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public static Vector512 NormalDest(Vector512 backdrop, Vector512 source, Vector512 opacity) + { + return backdrop; + } + /// /// Returns the result of the "NormalDestAtop" compositing equation. /// @@ -207,6 +283,21 @@ internal static partial class PorterDuffFunctions return Atop(source, backdrop, Normal(source, backdrop)); } + /// + /// Returns the result of the "NormalDestAtop" compositing equation. + /// + /// The backdrop vector. + /// The source vector. + /// The source opacity. Range 0..1 + /// The . + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public static Vector512 NormalDestAtop(Vector512 backdrop, Vector512 source, Vector512 opacity) + { + source = Avx512F.BlendVariable(source, source * opacity, AlphaMask512()); + + return Atop(source, backdrop, Normal(source, backdrop)); + } + /// /// Returns the result of the "NormalDestOver" compositing equation. /// @@ -237,6 +328,21 @@ internal static partial class PorterDuffFunctions return Over(source, backdrop, Normal(source, backdrop)); } + /// + /// Returns the result of the "NormalDestOver" compositing equation. + /// + /// The backdrop vector. + /// The source vector. + /// The source opacity. Range 0..1 + /// The . + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public static Vector512 NormalDestOver(Vector512 backdrop, Vector512 source, Vector512 opacity) + { + source = Avx512F.BlendVariable(source, source * opacity, AlphaMask512()); + + return Over(source, backdrop, Normal(source, backdrop)); + } + /// /// Returns the result of the "NormalDestIn" compositing equation. /// @@ -263,6 +369,17 @@ internal static partial class PorterDuffFunctions public static Vector256 NormalDestIn(Vector256 backdrop, Vector256 source, Vector256 opacity) => In(Avx.Blend(source, source * opacity, BlendAlphaControl), backdrop); + /// + /// Returns the result of the "NormalDestIn" compositing equation. + /// + /// The backdrop vector. + /// The source vector. + /// The source opacity. Range 0..1 + /// The . + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public static Vector512 NormalDestIn(Vector512 backdrop, Vector512 source, Vector512 opacity) + => In(Avx512F.BlendVariable(source, source * opacity, AlphaMask512()), backdrop); + /// /// Returns the result of the "NormalDestOut" compositing equation. /// @@ -289,6 +406,17 @@ internal static partial class PorterDuffFunctions public static Vector256 NormalDestOut(Vector256 backdrop, Vector256 source, Vector256 opacity) => Out(Avx.Blend(source, source * opacity, BlendAlphaControl), backdrop); + /// + /// Returns the result of the "NormalDestOut" compositing equation. + /// + /// The backdrop vector. + /// The source vector. + /// The source opacity. Range 0..1 + /// The . + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public static Vector512 NormalDestOut(Vector512 backdrop, Vector512 source, Vector512 opacity) + => Out(Avx512F.BlendVariable(source, source * opacity, AlphaMask512()), backdrop); + /// /// Returns the result of the "NormalXor" compositing equation. /// @@ -315,6 +443,17 @@ internal static partial class PorterDuffFunctions public static Vector256 NormalXor(Vector256 backdrop, Vector256 source, Vector256 opacity) => Xor(backdrop, Avx.Blend(source, source * opacity, BlendAlphaControl)); + /// + /// Returns the result of the "NormalXor" compositing equation. + /// + /// The backdrop vector. + /// The source vector. + /// The source opacity. Range 0..1 + /// The . + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public static Vector512 NormalXor(Vector512 backdrop, Vector512 source, Vector512 opacity) + => Xor(backdrop, Avx512F.BlendVariable(source, source * opacity, AlphaMask512())); + /// /// Returns the result of the "NormalClear" compositing equation. /// @@ -341,6 +480,17 @@ internal static partial class PorterDuffFunctions public static Vector256 NormalClear(Vector256 backdrop, Vector256 source, Vector256 opacity) => Clear(backdrop, Avx.Blend(source, source * opacity, BlendAlphaControl)); + /// + /// Returns the result of the "NormalClear" compositing equation. + /// + /// The backdrop vector. + /// The source vector. + /// The source opacity. Range 0..1 + /// The . + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public static Vector512 NormalClear(Vector512 backdrop, Vector512 source, Vector512 opacity) + => Clear(backdrop, Avx512F.BlendVariable(source, source * opacity, AlphaMask512())); + /// /// Returns the result of the "NormalSrc" compositing equation. @@ -560,6 +710,17 @@ internal static partial class PorterDuffFunctions public static Vector256 MultiplySrc(Vector256 backdrop, Vector256 source, Vector256 opacity) => Avx.Blend(source, source * opacity, BlendAlphaControl); + /// + /// Returns the result of the "MultiplySrc compositing equation. + /// + /// The backdrop vector. + /// The source vector. + /// The source opacity. Range 0..1 + /// The . + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public static Vector512 MultiplySrc(Vector512 backdrop, Vector512 source, Vector512 opacity) + => Avx512F.BlendVariable(source, source * opacity, AlphaMask512()); + /// /// Returns the result of the "MultiplySrcAtop" compositing equation. /// @@ -590,6 +751,21 @@ internal static partial class PorterDuffFunctions return Atop(backdrop, source, Multiply(backdrop, source)); } + /// + /// Returns the result of the "MultiplySrcAtop" compositing equation. + /// + /// The backdrop vector. + /// The source vector. + /// The source opacity. Range 0..1 + /// The . + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public static Vector512 MultiplySrcAtop(Vector512 backdrop, Vector512 source, Vector512 opacity) + { + source = Avx512F.BlendVariable(source, source * opacity, AlphaMask512()); + + return Atop(backdrop, source, Multiply(backdrop, source)); + } + /// /// Returns the result of the "MultiplySrcOver" compositing equation. /// @@ -620,6 +796,21 @@ internal static partial class PorterDuffFunctions return Over(backdrop, source, Multiply(backdrop, source)); } + /// + /// Returns the result of the "MultiplySrcOver" compositing equation. + /// + /// The backdrop vector. + /// The source vector. + /// The source opacity. Range 0..1 + /// The . + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public static Vector512 MultiplySrcOver(Vector512 backdrop, Vector512 source, Vector512 opacity) + { + source = Avx512F.BlendVariable(source, source * opacity, AlphaMask512()); + + return Over(backdrop, source, Multiply(backdrop, source)); + } + /// /// Returns the result of the "MultiplySrcIn" compositing equation. /// @@ -646,6 +837,17 @@ internal static partial class PorterDuffFunctions public static Vector256 MultiplySrcIn(Vector256 backdrop, Vector256 source, Vector256 opacity) => In(backdrop, Avx.Blend(source, source * opacity, BlendAlphaControl)); + /// + /// Returns the result of the "MultiplySrcIn" compositing equation. + /// + /// The backdrop vector. + /// The source vector. + /// The source opacity. Range 0..1 + /// The . + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public static Vector512 MultiplySrcIn(Vector512 backdrop, Vector512 source, Vector512 opacity) + => In(backdrop, Avx512F.BlendVariable(source, source * opacity, AlphaMask512())); + /// /// Returns the result of the "MultiplySrcOut" compositing equation. /// @@ -672,6 +874,17 @@ internal static partial class PorterDuffFunctions public static Vector256 MultiplySrcOut(Vector256 backdrop, Vector256 source, Vector256 opacity) => Out(backdrop, Avx.Blend(source, source * opacity, BlendAlphaControl)); + /// + /// Returns the result of the "MultiplySrcOut" compositing equation. + /// + /// The backdrop vector. + /// The source vector. + /// The source opacity. Range 0..1 + /// The . + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public static Vector512 MultiplySrcOut(Vector512 backdrop, Vector512 source, Vector512 opacity) + => Out(backdrop, Avx512F.BlendVariable(source, source * opacity, AlphaMask512())); + /// /// Returns the result of the "MultiplyDest" compositing equation. /// @@ -698,6 +911,19 @@ internal static partial class PorterDuffFunctions return backdrop; } + /// + /// Returns the result of the "MultiplyDest" compositing equation. + /// + /// The backdrop vector. + /// The source vector. + /// The source opacity. Range 0..1 + /// The . + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public static Vector512 MultiplyDest(Vector512 backdrop, Vector512 source, Vector512 opacity) + { + return backdrop; + } + /// /// Returns the result of the "MultiplyDestAtop" compositing equation. /// @@ -728,6 +954,21 @@ internal static partial class PorterDuffFunctions return Atop(source, backdrop, Multiply(source, backdrop)); } + /// + /// Returns the result of the "MultiplyDestAtop" compositing equation. + /// + /// The backdrop vector. + /// The source vector. + /// The source opacity. Range 0..1 + /// The . + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public static Vector512 MultiplyDestAtop(Vector512 backdrop, Vector512 source, Vector512 opacity) + { + source = Avx512F.BlendVariable(source, source * opacity, AlphaMask512()); + + return Atop(source, backdrop, Multiply(source, backdrop)); + } + /// /// Returns the result of the "MultiplyDestOver" compositing equation. /// @@ -758,6 +999,21 @@ internal static partial class PorterDuffFunctions return Over(source, backdrop, Multiply(source, backdrop)); } + /// + /// Returns the result of the "MultiplyDestOver" compositing equation. + /// + /// The backdrop vector. + /// The source vector. + /// The source opacity. Range 0..1 + /// The . + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public static Vector512 MultiplyDestOver(Vector512 backdrop, Vector512 source, Vector512 opacity) + { + source = Avx512F.BlendVariable(source, source * opacity, AlphaMask512()); + + return Over(source, backdrop, Multiply(source, backdrop)); + } + /// /// Returns the result of the "MultiplyDestIn" compositing equation. /// @@ -784,6 +1040,17 @@ internal static partial class PorterDuffFunctions public static Vector256 MultiplyDestIn(Vector256 backdrop, Vector256 source, Vector256 opacity) => In(Avx.Blend(source, source * opacity, BlendAlphaControl), backdrop); + /// + /// Returns the result of the "MultiplyDestIn" compositing equation. + /// + /// The backdrop vector. + /// The source vector. + /// The source opacity. Range 0..1 + /// The . + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public static Vector512 MultiplyDestIn(Vector512 backdrop, Vector512 source, Vector512 opacity) + => In(Avx512F.BlendVariable(source, source * opacity, AlphaMask512()), backdrop); + /// /// Returns the result of the "MultiplyDestOut" compositing equation. /// @@ -810,6 +1077,17 @@ internal static partial class PorterDuffFunctions public static Vector256 MultiplyDestOut(Vector256 backdrop, Vector256 source, Vector256 opacity) => Out(Avx.Blend(source, source * opacity, BlendAlphaControl), backdrop); + /// + /// Returns the result of the "MultiplyDestOut" compositing equation. + /// + /// The backdrop vector. + /// The source vector. + /// The source opacity. Range 0..1 + /// The . + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public static Vector512 MultiplyDestOut(Vector512 backdrop, Vector512 source, Vector512 opacity) + => Out(Avx512F.BlendVariable(source, source * opacity, AlphaMask512()), backdrop); + /// /// Returns the result of the "MultiplyXor" compositing equation. /// @@ -836,6 +1114,17 @@ internal static partial class PorterDuffFunctions public static Vector256 MultiplyXor(Vector256 backdrop, Vector256 source, Vector256 opacity) => Xor(backdrop, Avx.Blend(source, source * opacity, BlendAlphaControl)); + /// + /// Returns the result of the "MultiplyXor" compositing equation. + /// + /// The backdrop vector. + /// The source vector. + /// The source opacity. Range 0..1 + /// The . + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public static Vector512 MultiplyXor(Vector512 backdrop, Vector512 source, Vector512 opacity) + => Xor(backdrop, Avx512F.BlendVariable(source, source * opacity, AlphaMask512())); + /// /// Returns the result of the "MultiplyClear" compositing equation. /// @@ -862,6 +1151,17 @@ internal static partial class PorterDuffFunctions public static Vector256 MultiplyClear(Vector256 backdrop, Vector256 source, Vector256 opacity) => Clear(backdrop, Avx.Blend(source, source * opacity, BlendAlphaControl)); + /// + /// Returns the result of the "MultiplyClear" compositing equation. + /// + /// The backdrop vector. + /// The source vector. + /// The source opacity. Range 0..1 + /// The . + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public static Vector512 MultiplyClear(Vector512 backdrop, Vector512 source, Vector512 opacity) + => Clear(backdrop, Avx512F.BlendVariable(source, source * opacity, AlphaMask512())); + /// /// Returns the result of the "MultiplySrc" compositing equation. @@ -1081,6 +1381,17 @@ internal static partial class PorterDuffFunctions public static Vector256 AddSrc(Vector256 backdrop, Vector256 source, Vector256 opacity) => Avx.Blend(source, source * opacity, BlendAlphaControl); + /// + /// Returns the result of the "AddSrc compositing equation. + /// + /// The backdrop vector. + /// The source vector. + /// The source opacity. Range 0..1 + /// The . + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public static Vector512 AddSrc(Vector512 backdrop, Vector512 source, Vector512 opacity) + => Avx512F.BlendVariable(source, source * opacity, AlphaMask512()); + /// /// Returns the result of the "AddSrcAtop" compositing equation. /// @@ -1111,6 +1422,21 @@ internal static partial class PorterDuffFunctions return Atop(backdrop, source, Add(backdrop, source)); } + /// + /// Returns the result of the "AddSrcAtop" compositing equation. + /// + /// The backdrop vector. + /// The source vector. + /// The source opacity. Range 0..1 + /// The . + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public static Vector512 AddSrcAtop(Vector512 backdrop, Vector512 source, Vector512 opacity) + { + source = Avx512F.BlendVariable(source, source * opacity, AlphaMask512()); + + return Atop(backdrop, source, Add(backdrop, source)); + } + /// /// Returns the result of the "AddSrcOver" compositing equation. /// @@ -1142,18 +1468,33 @@ internal static partial class PorterDuffFunctions } /// - /// Returns the result of the "AddSrcIn" compositing equation. + /// Returns the result of the "AddSrcOver" compositing equation. /// /// The backdrop vector. /// The source vector. /// The source opacity. Range 0..1 - /// The . + /// The . [MethodImpl(MethodImplOptions.AggressiveInlining)] - public static Vector4 AddSrcIn(Vector4 backdrop, Vector4 source, float opacity) + public static Vector512 AddSrcOver(Vector512 backdrop, Vector512 source, Vector512 opacity) { - source = Numerics.WithW(source, source * opacity); + source = Avx512F.BlendVariable(source, source * opacity, AlphaMask512()); - return In(backdrop, source); + return Over(backdrop, source, Add(backdrop, source)); + } + + /// + /// Returns the result of the "AddSrcIn" compositing equation. + /// + /// The backdrop vector. + /// The source vector. + /// The source opacity. Range 0..1 + /// The . + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public static Vector4 AddSrcIn(Vector4 backdrop, Vector4 source, float opacity) + { + source = Numerics.WithW(source, source * opacity); + + return In(backdrop, source); } /// @@ -1167,6 +1508,17 @@ internal static partial class PorterDuffFunctions public static Vector256 AddSrcIn(Vector256 backdrop, Vector256 source, Vector256 opacity) => In(backdrop, Avx.Blend(source, source * opacity, BlendAlphaControl)); + /// + /// Returns the result of the "AddSrcIn" compositing equation. + /// + /// The backdrop vector. + /// The source vector. + /// The source opacity. Range 0..1 + /// The . + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public static Vector512 AddSrcIn(Vector512 backdrop, Vector512 source, Vector512 opacity) + => In(backdrop, Avx512F.BlendVariable(source, source * opacity, AlphaMask512())); + /// /// Returns the result of the "AddSrcOut" compositing equation. /// @@ -1193,6 +1545,17 @@ internal static partial class PorterDuffFunctions public static Vector256 AddSrcOut(Vector256 backdrop, Vector256 source, Vector256 opacity) => Out(backdrop, Avx.Blend(source, source * opacity, BlendAlphaControl)); + /// + /// Returns the result of the "AddSrcOut" compositing equation. + /// + /// The backdrop vector. + /// The source vector. + /// The source opacity. Range 0..1 + /// The . + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public static Vector512 AddSrcOut(Vector512 backdrop, Vector512 source, Vector512 opacity) + => Out(backdrop, Avx512F.BlendVariable(source, source * opacity, AlphaMask512())); + /// /// Returns the result of the "AddDest" compositing equation. /// @@ -1219,6 +1582,19 @@ internal static partial class PorterDuffFunctions return backdrop; } + /// + /// Returns the result of the "AddDest" compositing equation. + /// + /// The backdrop vector. + /// The source vector. + /// The source opacity. Range 0..1 + /// The . + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public static Vector512 AddDest(Vector512 backdrop, Vector512 source, Vector512 opacity) + { + return backdrop; + } + /// /// Returns the result of the "AddDestAtop" compositing equation. /// @@ -1249,6 +1625,21 @@ internal static partial class PorterDuffFunctions return Atop(source, backdrop, Add(source, backdrop)); } + /// + /// Returns the result of the "AddDestAtop" compositing equation. + /// + /// The backdrop vector. + /// The source vector. + /// The source opacity. Range 0..1 + /// The . + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public static Vector512 AddDestAtop(Vector512 backdrop, Vector512 source, Vector512 opacity) + { + source = Avx512F.BlendVariable(source, source * opacity, AlphaMask512()); + + return Atop(source, backdrop, Add(source, backdrop)); + } + /// /// Returns the result of the "AddDestOver" compositing equation. /// @@ -1279,6 +1670,21 @@ internal static partial class PorterDuffFunctions return Over(source, backdrop, Add(source, backdrop)); } + /// + /// Returns the result of the "AddDestOver" compositing equation. + /// + /// The backdrop vector. + /// The source vector. + /// The source opacity. Range 0..1 + /// The . + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public static Vector512 AddDestOver(Vector512 backdrop, Vector512 source, Vector512 opacity) + { + source = Avx512F.BlendVariable(source, source * opacity, AlphaMask512()); + + return Over(source, backdrop, Add(source, backdrop)); + } + /// /// Returns the result of the "AddDestIn" compositing equation. /// @@ -1305,6 +1711,17 @@ internal static partial class PorterDuffFunctions public static Vector256 AddDestIn(Vector256 backdrop, Vector256 source, Vector256 opacity) => In(Avx.Blend(source, source * opacity, BlendAlphaControl), backdrop); + /// + /// Returns the result of the "AddDestIn" compositing equation. + /// + /// The backdrop vector. + /// The source vector. + /// The source opacity. Range 0..1 + /// The . + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public static Vector512 AddDestIn(Vector512 backdrop, Vector512 source, Vector512 opacity) + => In(Avx512F.BlendVariable(source, source * opacity, AlphaMask512()), backdrop); + /// /// Returns the result of the "AddDestOut" compositing equation. /// @@ -1331,6 +1748,17 @@ internal static partial class PorterDuffFunctions public static Vector256 AddDestOut(Vector256 backdrop, Vector256 source, Vector256 opacity) => Out(Avx.Blend(source, source * opacity, BlendAlphaControl), backdrop); + /// + /// Returns the result of the "AddDestOut" compositing equation. + /// + /// The backdrop vector. + /// The source vector. + /// The source opacity. Range 0..1 + /// The . + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public static Vector512 AddDestOut(Vector512 backdrop, Vector512 source, Vector512 opacity) + => Out(Avx512F.BlendVariable(source, source * opacity, AlphaMask512()), backdrop); + /// /// Returns the result of the "AddXor" compositing equation. /// @@ -1357,6 +1785,17 @@ internal static partial class PorterDuffFunctions public static Vector256 AddXor(Vector256 backdrop, Vector256 source, Vector256 opacity) => Xor(backdrop, Avx.Blend(source, source * opacity, BlendAlphaControl)); + /// + /// Returns the result of the "AddXor" compositing equation. + /// + /// The backdrop vector. + /// The source vector. + /// The source opacity. Range 0..1 + /// The . + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public static Vector512 AddXor(Vector512 backdrop, Vector512 source, Vector512 opacity) + => Xor(backdrop, Avx512F.BlendVariable(source, source * opacity, AlphaMask512())); + /// /// Returns the result of the "AddClear" compositing equation. /// @@ -1383,6 +1822,17 @@ internal static partial class PorterDuffFunctions public static Vector256 AddClear(Vector256 backdrop, Vector256 source, Vector256 opacity) => Clear(backdrop, Avx.Blend(source, source * opacity, BlendAlphaControl)); + /// + /// Returns the result of the "AddClear" compositing equation. + /// + /// The backdrop vector. + /// The source vector. + /// The source opacity. Range 0..1 + /// The . + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public static Vector512 AddClear(Vector512 backdrop, Vector512 source, Vector512 opacity) + => Clear(backdrop, Avx512F.BlendVariable(source, source * opacity, AlphaMask512())); + /// /// Returns the result of the "AddSrc" compositing equation. @@ -1602,6 +2052,17 @@ internal static partial class PorterDuffFunctions public static Vector256 SubtractSrc(Vector256 backdrop, Vector256 source, Vector256 opacity) => Avx.Blend(source, source * opacity, BlendAlphaControl); + /// + /// Returns the result of the "SubtractSrc compositing equation. + /// + /// The backdrop vector. + /// The source vector. + /// The source opacity. Range 0..1 + /// The . + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public static Vector512 SubtractSrc(Vector512 backdrop, Vector512 source, Vector512 opacity) + => Avx512F.BlendVariable(source, source * opacity, AlphaMask512()); + /// /// Returns the result of the "SubtractSrcAtop" compositing equation. /// @@ -1632,6 +2093,21 @@ internal static partial class PorterDuffFunctions return Atop(backdrop, source, Subtract(backdrop, source)); } + /// + /// Returns the result of the "SubtractSrcAtop" compositing equation. + /// + /// The backdrop vector. + /// The source vector. + /// The source opacity. Range 0..1 + /// The . + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public static Vector512 SubtractSrcAtop(Vector512 backdrop, Vector512 source, Vector512 opacity) + { + source = Avx512F.BlendVariable(source, source * opacity, AlphaMask512()); + + return Atop(backdrop, source, Subtract(backdrop, source)); + } + /// /// Returns the result of the "SubtractSrcOver" compositing equation. /// @@ -1662,6 +2138,21 @@ internal static partial class PorterDuffFunctions return Over(backdrop, source, Subtract(backdrop, source)); } + /// + /// Returns the result of the "SubtractSrcOver" compositing equation. + /// + /// The backdrop vector. + /// The source vector. + /// The source opacity. Range 0..1 + /// The . + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public static Vector512 SubtractSrcOver(Vector512 backdrop, Vector512 source, Vector512 opacity) + { + source = Avx512F.BlendVariable(source, source * opacity, AlphaMask512()); + + return Over(backdrop, source, Subtract(backdrop, source)); + } + /// /// Returns the result of the "SubtractSrcIn" compositing equation. /// @@ -1688,6 +2179,17 @@ internal static partial class PorterDuffFunctions public static Vector256 SubtractSrcIn(Vector256 backdrop, Vector256 source, Vector256 opacity) => In(backdrop, Avx.Blend(source, source * opacity, BlendAlphaControl)); + /// + /// Returns the result of the "SubtractSrcIn" compositing equation. + /// + /// The backdrop vector. + /// The source vector. + /// The source opacity. Range 0..1 + /// The . + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public static Vector512 SubtractSrcIn(Vector512 backdrop, Vector512 source, Vector512 opacity) + => In(backdrop, Avx512F.BlendVariable(source, source * opacity, AlphaMask512())); + /// /// Returns the result of the "SubtractSrcOut" compositing equation. /// @@ -1714,6 +2216,17 @@ internal static partial class PorterDuffFunctions public static Vector256 SubtractSrcOut(Vector256 backdrop, Vector256 source, Vector256 opacity) => Out(backdrop, Avx.Blend(source, source * opacity, BlendAlphaControl)); + /// + /// Returns the result of the "SubtractSrcOut" compositing equation. + /// + /// The backdrop vector. + /// The source vector. + /// The source opacity. Range 0..1 + /// The . + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public static Vector512 SubtractSrcOut(Vector512 backdrop, Vector512 source, Vector512 opacity) + => Out(backdrop, Avx512F.BlendVariable(source, source * opacity, AlphaMask512())); + /// /// Returns the result of the "SubtractDest" compositing equation. /// @@ -1740,6 +2253,19 @@ internal static partial class PorterDuffFunctions return backdrop; } + /// + /// Returns the result of the "SubtractDest" compositing equation. + /// + /// The backdrop vector. + /// The source vector. + /// The source opacity. Range 0..1 + /// The . + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public static Vector512 SubtractDest(Vector512 backdrop, Vector512 source, Vector512 opacity) + { + return backdrop; + } + /// /// Returns the result of the "SubtractDestAtop" compositing equation. /// @@ -1770,6 +2296,21 @@ internal static partial class PorterDuffFunctions return Atop(source, backdrop, Subtract(source, backdrop)); } + /// + /// Returns the result of the "SubtractDestAtop" compositing equation. + /// + /// The backdrop vector. + /// The source vector. + /// The source opacity. Range 0..1 + /// The . + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public static Vector512 SubtractDestAtop(Vector512 backdrop, Vector512 source, Vector512 opacity) + { + source = Avx512F.BlendVariable(source, source * opacity, AlphaMask512()); + + return Atop(source, backdrop, Subtract(source, backdrop)); + } + /// /// Returns the result of the "SubtractDestOver" compositing equation. /// @@ -1800,6 +2341,21 @@ internal static partial class PorterDuffFunctions return Over(source, backdrop, Subtract(source, backdrop)); } + /// + /// Returns the result of the "SubtractDestOver" compositing equation. + /// + /// The backdrop vector. + /// The source vector. + /// The source opacity. Range 0..1 + /// The . + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public static Vector512 SubtractDestOver(Vector512 backdrop, Vector512 source, Vector512 opacity) + { + source = Avx512F.BlendVariable(source, source * opacity, AlphaMask512()); + + return Over(source, backdrop, Subtract(source, backdrop)); + } + /// /// Returns the result of the "SubtractDestIn" compositing equation. /// @@ -1826,6 +2382,17 @@ internal static partial class PorterDuffFunctions public static Vector256 SubtractDestIn(Vector256 backdrop, Vector256 source, Vector256 opacity) => In(Avx.Blend(source, source * opacity, BlendAlphaControl), backdrop); + /// + /// Returns the result of the "SubtractDestIn" compositing equation. + /// + /// The backdrop vector. + /// The source vector. + /// The source opacity. Range 0..1 + /// The . + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public static Vector512 SubtractDestIn(Vector512 backdrop, Vector512 source, Vector512 opacity) + => In(Avx512F.BlendVariable(source, source * opacity, AlphaMask512()), backdrop); + /// /// Returns the result of the "SubtractDestOut" compositing equation. /// @@ -1852,6 +2419,17 @@ internal static partial class PorterDuffFunctions public static Vector256 SubtractDestOut(Vector256 backdrop, Vector256 source, Vector256 opacity) => Out(Avx.Blend(source, source * opacity, BlendAlphaControl), backdrop); + /// + /// Returns the result of the "SubtractDestOut" compositing equation. + /// + /// The backdrop vector. + /// The source vector. + /// The source opacity. Range 0..1 + /// The . + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public static Vector512 SubtractDestOut(Vector512 backdrop, Vector512 source, Vector512 opacity) + => Out(Avx512F.BlendVariable(source, source * opacity, AlphaMask512()), backdrop); + /// /// Returns the result of the "SubtractXor" compositing equation. /// @@ -1878,6 +2456,17 @@ internal static partial class PorterDuffFunctions public static Vector256 SubtractXor(Vector256 backdrop, Vector256 source, Vector256 opacity) => Xor(backdrop, Avx.Blend(source, source * opacity, BlendAlphaControl)); + /// + /// Returns the result of the "SubtractXor" compositing equation. + /// + /// The backdrop vector. + /// The source vector. + /// The source opacity. Range 0..1 + /// The . + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public static Vector512 SubtractXor(Vector512 backdrop, Vector512 source, Vector512 opacity) + => Xor(backdrop, Avx512F.BlendVariable(source, source * opacity, AlphaMask512())); + /// /// Returns the result of the "SubtractClear" compositing equation. /// @@ -1904,6 +2493,17 @@ internal static partial class PorterDuffFunctions public static Vector256 SubtractClear(Vector256 backdrop, Vector256 source, Vector256 opacity) => Clear(backdrop, Avx.Blend(source, source * opacity, BlendAlphaControl)); + /// + /// Returns the result of the "SubtractClear" compositing equation. + /// + /// The backdrop vector. + /// The source vector. + /// The source opacity. Range 0..1 + /// The . + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public static Vector512 SubtractClear(Vector512 backdrop, Vector512 source, Vector512 opacity) + => Clear(backdrop, Avx512F.BlendVariable(source, source * opacity, AlphaMask512())); + /// /// Returns the result of the "SubtractSrc" compositing equation. @@ -2123,6 +2723,17 @@ internal static partial class PorterDuffFunctions public static Vector256 ScreenSrc(Vector256 backdrop, Vector256 source, Vector256 opacity) => Avx.Blend(source, source * opacity, BlendAlphaControl); + /// + /// Returns the result of the "ScreenSrc compositing equation. + /// + /// The backdrop vector. + /// The source vector. + /// The source opacity. Range 0..1 + /// The . + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public static Vector512 ScreenSrc(Vector512 backdrop, Vector512 source, Vector512 opacity) + => Avx512F.BlendVariable(source, source * opacity, AlphaMask512()); + /// /// Returns the result of the "ScreenSrcAtop" compositing equation. /// @@ -2153,6 +2764,21 @@ internal static partial class PorterDuffFunctions return Atop(backdrop, source, Screen(backdrop, source)); } + /// + /// Returns the result of the "ScreenSrcAtop" compositing equation. + /// + /// The backdrop vector. + /// The source vector. + /// The source opacity. Range 0..1 + /// The . + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public static Vector512 ScreenSrcAtop(Vector512 backdrop, Vector512 source, Vector512 opacity) + { + source = Avx512F.BlendVariable(source, source * opacity, AlphaMask512()); + + return Atop(backdrop, source, Screen(backdrop, source)); + } + /// /// Returns the result of the "ScreenSrcOver" compositing equation. /// @@ -2183,6 +2809,21 @@ internal static partial class PorterDuffFunctions return Over(backdrop, source, Screen(backdrop, source)); } + /// + /// Returns the result of the "ScreenSrcOver" compositing equation. + /// + /// The backdrop vector. + /// The source vector. + /// The source opacity. Range 0..1 + /// The . + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public static Vector512 ScreenSrcOver(Vector512 backdrop, Vector512 source, Vector512 opacity) + { + source = Avx512F.BlendVariable(source, source * opacity, AlphaMask512()); + + return Over(backdrop, source, Screen(backdrop, source)); + } + /// /// Returns the result of the "ScreenSrcIn" compositing equation. /// @@ -2209,6 +2850,17 @@ internal static partial class PorterDuffFunctions public static Vector256 ScreenSrcIn(Vector256 backdrop, Vector256 source, Vector256 opacity) => In(backdrop, Avx.Blend(source, source * opacity, BlendAlphaControl)); + /// + /// Returns the result of the "ScreenSrcIn" compositing equation. + /// + /// The backdrop vector. + /// The source vector. + /// The source opacity. Range 0..1 + /// The . + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public static Vector512 ScreenSrcIn(Vector512 backdrop, Vector512 source, Vector512 opacity) + => In(backdrop, Avx512F.BlendVariable(source, source * opacity, AlphaMask512())); + /// /// Returns the result of the "ScreenSrcOut" compositing equation. /// @@ -2235,6 +2887,17 @@ internal static partial class PorterDuffFunctions public static Vector256 ScreenSrcOut(Vector256 backdrop, Vector256 source, Vector256 opacity) => Out(backdrop, Avx.Blend(source, source * opacity, BlendAlphaControl)); + /// + /// Returns the result of the "ScreenSrcOut" compositing equation. + /// + /// The backdrop vector. + /// The source vector. + /// The source opacity. Range 0..1 + /// The . + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public static Vector512 ScreenSrcOut(Vector512 backdrop, Vector512 source, Vector512 opacity) + => Out(backdrop, Avx512F.BlendVariable(source, source * opacity, AlphaMask512())); + /// /// Returns the result of the "ScreenDest" compositing equation. /// @@ -2261,6 +2924,19 @@ internal static partial class PorterDuffFunctions return backdrop; } + /// + /// Returns the result of the "ScreenDest" compositing equation. + /// + /// The backdrop vector. + /// The source vector. + /// The source opacity. Range 0..1 + /// The . + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public static Vector512 ScreenDest(Vector512 backdrop, Vector512 source, Vector512 opacity) + { + return backdrop; + } + /// /// Returns the result of the "ScreenDestAtop" compositing equation. /// @@ -2282,11 +2958,26 @@ internal static partial class PorterDuffFunctions /// The backdrop vector. /// The source vector. /// The source opacity. Range 0..1 - /// The . + /// The . + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public static Vector256 ScreenDestAtop(Vector256 backdrop, Vector256 source, Vector256 opacity) + { + source = Avx.Blend(source, source * opacity, BlendAlphaControl); + + return Atop(source, backdrop, Screen(source, backdrop)); + } + + /// + /// Returns the result of the "ScreenDestAtop" compositing equation. + /// + /// The backdrop vector. + /// The source vector. + /// The source opacity. Range 0..1 + /// The . [MethodImpl(MethodImplOptions.AggressiveInlining)] - public static Vector256 ScreenDestAtop(Vector256 backdrop, Vector256 source, Vector256 opacity) + public static Vector512 ScreenDestAtop(Vector512 backdrop, Vector512 source, Vector512 opacity) { - source = Avx.Blend(source, source * opacity, BlendAlphaControl); + source = Avx512F.BlendVariable(source, source * opacity, AlphaMask512()); return Atop(source, backdrop, Screen(source, backdrop)); } @@ -2321,6 +3012,21 @@ internal static partial class PorterDuffFunctions return Over(source, backdrop, Screen(source, backdrop)); } + /// + /// Returns the result of the "ScreenDestOver" compositing equation. + /// + /// The backdrop vector. + /// The source vector. + /// The source opacity. Range 0..1 + /// The . + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public static Vector512 ScreenDestOver(Vector512 backdrop, Vector512 source, Vector512 opacity) + { + source = Avx512F.BlendVariable(source, source * opacity, AlphaMask512()); + + return Over(source, backdrop, Screen(source, backdrop)); + } + /// /// Returns the result of the "ScreenDestIn" compositing equation. /// @@ -2347,6 +3053,17 @@ internal static partial class PorterDuffFunctions public static Vector256 ScreenDestIn(Vector256 backdrop, Vector256 source, Vector256 opacity) => In(Avx.Blend(source, source * opacity, BlendAlphaControl), backdrop); + /// + /// Returns the result of the "ScreenDestIn" compositing equation. + /// + /// The backdrop vector. + /// The source vector. + /// The source opacity. Range 0..1 + /// The . + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public static Vector512 ScreenDestIn(Vector512 backdrop, Vector512 source, Vector512 opacity) + => In(Avx512F.BlendVariable(source, source * opacity, AlphaMask512()), backdrop); + /// /// Returns the result of the "ScreenDestOut" compositing equation. /// @@ -2373,6 +3090,17 @@ internal static partial class PorterDuffFunctions public static Vector256 ScreenDestOut(Vector256 backdrop, Vector256 source, Vector256 opacity) => Out(Avx.Blend(source, source * opacity, BlendAlphaControl), backdrop); + /// + /// Returns the result of the "ScreenDestOut" compositing equation. + /// + /// The backdrop vector. + /// The source vector. + /// The source opacity. Range 0..1 + /// The . + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public static Vector512 ScreenDestOut(Vector512 backdrop, Vector512 source, Vector512 opacity) + => Out(Avx512F.BlendVariable(source, source * opacity, AlphaMask512()), backdrop); + /// /// Returns the result of the "ScreenXor" compositing equation. /// @@ -2399,6 +3127,17 @@ internal static partial class PorterDuffFunctions public static Vector256 ScreenXor(Vector256 backdrop, Vector256 source, Vector256 opacity) => Xor(backdrop, Avx.Blend(source, source * opacity, BlendAlphaControl)); + /// + /// Returns the result of the "ScreenXor" compositing equation. + /// + /// The backdrop vector. + /// The source vector. + /// The source opacity. Range 0..1 + /// The . + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public static Vector512 ScreenXor(Vector512 backdrop, Vector512 source, Vector512 opacity) + => Xor(backdrop, Avx512F.BlendVariable(source, source * opacity, AlphaMask512())); + /// /// Returns the result of the "ScreenClear" compositing equation. /// @@ -2425,6 +3164,17 @@ internal static partial class PorterDuffFunctions public static Vector256 ScreenClear(Vector256 backdrop, Vector256 source, Vector256 opacity) => Clear(backdrop, Avx.Blend(source, source * opacity, BlendAlphaControl)); + /// + /// Returns the result of the "ScreenClear" compositing equation. + /// + /// The backdrop vector. + /// The source vector. + /// The source opacity. Range 0..1 + /// The . + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public static Vector512 ScreenClear(Vector512 backdrop, Vector512 source, Vector512 opacity) + => Clear(backdrop, Avx512F.BlendVariable(source, source * opacity, AlphaMask512())); + /// /// Returns the result of the "ScreenSrc" compositing equation. @@ -2644,6 +3394,17 @@ internal static partial class PorterDuffFunctions public static Vector256 DarkenSrc(Vector256 backdrop, Vector256 source, Vector256 opacity) => Avx.Blend(source, source * opacity, BlendAlphaControl); + /// + /// Returns the result of the "DarkenSrc compositing equation. + /// + /// The backdrop vector. + /// The source vector. + /// The source opacity. Range 0..1 + /// The . + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public static Vector512 DarkenSrc(Vector512 backdrop, Vector512 source, Vector512 opacity) + => Avx512F.BlendVariable(source, source * opacity, AlphaMask512()); + /// /// Returns the result of the "DarkenSrcAtop" compositing equation. /// @@ -2674,6 +3435,21 @@ internal static partial class PorterDuffFunctions return Atop(backdrop, source, Darken(backdrop, source)); } + /// + /// Returns the result of the "DarkenSrcAtop" compositing equation. + /// + /// The backdrop vector. + /// The source vector. + /// The source opacity. Range 0..1 + /// The . + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public static Vector512 DarkenSrcAtop(Vector512 backdrop, Vector512 source, Vector512 opacity) + { + source = Avx512F.BlendVariable(source, source * opacity, AlphaMask512()); + + return Atop(backdrop, source, Darken(backdrop, source)); + } + /// /// Returns the result of the "DarkenSrcOver" compositing equation. /// @@ -2704,6 +3480,21 @@ internal static partial class PorterDuffFunctions return Over(backdrop, source, Darken(backdrop, source)); } + /// + /// Returns the result of the "DarkenSrcOver" compositing equation. + /// + /// The backdrop vector. + /// The source vector. + /// The source opacity. Range 0..1 + /// The . + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public static Vector512 DarkenSrcOver(Vector512 backdrop, Vector512 source, Vector512 opacity) + { + source = Avx512F.BlendVariable(source, source * opacity, AlphaMask512()); + + return Over(backdrop, source, Darken(backdrop, source)); + } + /// /// Returns the result of the "DarkenSrcIn" compositing equation. /// @@ -2730,6 +3521,17 @@ internal static partial class PorterDuffFunctions public static Vector256 DarkenSrcIn(Vector256 backdrop, Vector256 source, Vector256 opacity) => In(backdrop, Avx.Blend(source, source * opacity, BlendAlphaControl)); + /// + /// Returns the result of the "DarkenSrcIn" compositing equation. + /// + /// The backdrop vector. + /// The source vector. + /// The source opacity. Range 0..1 + /// The . + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public static Vector512 DarkenSrcIn(Vector512 backdrop, Vector512 source, Vector512 opacity) + => In(backdrop, Avx512F.BlendVariable(source, source * opacity, AlphaMask512())); + /// /// Returns the result of the "DarkenSrcOut" compositing equation. /// @@ -2756,6 +3558,17 @@ internal static partial class PorterDuffFunctions public static Vector256 DarkenSrcOut(Vector256 backdrop, Vector256 source, Vector256 opacity) => Out(backdrop, Avx.Blend(source, source * opacity, BlendAlphaControl)); + /// + /// Returns the result of the "DarkenSrcOut" compositing equation. + /// + /// The backdrop vector. + /// The source vector. + /// The source opacity. Range 0..1 + /// The . + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public static Vector512 DarkenSrcOut(Vector512 backdrop, Vector512 source, Vector512 opacity) + => Out(backdrop, Avx512F.BlendVariable(source, source * opacity, AlphaMask512())); + /// /// Returns the result of the "DarkenDest" compositing equation. /// @@ -2782,6 +3595,19 @@ internal static partial class PorterDuffFunctions return backdrop; } + /// + /// Returns the result of the "DarkenDest" compositing equation. + /// + /// The backdrop vector. + /// The source vector. + /// The source opacity. Range 0..1 + /// The . + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public static Vector512 DarkenDest(Vector512 backdrop, Vector512 source, Vector512 opacity) + { + return backdrop; + } + /// /// Returns the result of the "DarkenDestAtop" compositing equation. /// @@ -2812,6 +3638,21 @@ internal static partial class PorterDuffFunctions return Atop(source, backdrop, Darken(source, backdrop)); } + /// + /// Returns the result of the "DarkenDestAtop" compositing equation. + /// + /// The backdrop vector. + /// The source vector. + /// The source opacity. Range 0..1 + /// The . + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public static Vector512 DarkenDestAtop(Vector512 backdrop, Vector512 source, Vector512 opacity) + { + source = Avx512F.BlendVariable(source, source * opacity, AlphaMask512()); + + return Atop(source, backdrop, Darken(source, backdrop)); + } + /// /// Returns the result of the "DarkenDestOver" compositing equation. /// @@ -2842,6 +3683,21 @@ internal static partial class PorterDuffFunctions return Over(source, backdrop, Darken(source, backdrop)); } + /// + /// Returns the result of the "DarkenDestOver" compositing equation. + /// + /// The backdrop vector. + /// The source vector. + /// The source opacity. Range 0..1 + /// The . + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public static Vector512 DarkenDestOver(Vector512 backdrop, Vector512 source, Vector512 opacity) + { + source = Avx512F.BlendVariable(source, source * opacity, AlphaMask512()); + + return Over(source, backdrop, Darken(source, backdrop)); + } + /// /// Returns the result of the "DarkenDestIn" compositing equation. /// @@ -2868,6 +3724,17 @@ internal static partial class PorterDuffFunctions public static Vector256 DarkenDestIn(Vector256 backdrop, Vector256 source, Vector256 opacity) => In(Avx.Blend(source, source * opacity, BlendAlphaControl), backdrop); + /// + /// Returns the result of the "DarkenDestIn" compositing equation. + /// + /// The backdrop vector. + /// The source vector. + /// The source opacity. Range 0..1 + /// The . + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public static Vector512 DarkenDestIn(Vector512 backdrop, Vector512 source, Vector512 opacity) + => In(Avx512F.BlendVariable(source, source * opacity, AlphaMask512()), backdrop); + /// /// Returns the result of the "DarkenDestOut" compositing equation. /// @@ -2894,6 +3761,17 @@ internal static partial class PorterDuffFunctions public static Vector256 DarkenDestOut(Vector256 backdrop, Vector256 source, Vector256 opacity) => Out(Avx.Blend(source, source * opacity, BlendAlphaControl), backdrop); + /// + /// Returns the result of the "DarkenDestOut" compositing equation. + /// + /// The backdrop vector. + /// The source vector. + /// The source opacity. Range 0..1 + /// The . + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public static Vector512 DarkenDestOut(Vector512 backdrop, Vector512 source, Vector512 opacity) + => Out(Avx512F.BlendVariable(source, source * opacity, AlphaMask512()), backdrop); + /// /// Returns the result of the "DarkenXor" compositing equation. /// @@ -2920,6 +3798,17 @@ internal static partial class PorterDuffFunctions public static Vector256 DarkenXor(Vector256 backdrop, Vector256 source, Vector256 opacity) => Xor(backdrop, Avx.Blend(source, source * opacity, BlendAlphaControl)); + /// + /// Returns the result of the "DarkenXor" compositing equation. + /// + /// The backdrop vector. + /// The source vector. + /// The source opacity. Range 0..1 + /// The . + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public static Vector512 DarkenXor(Vector512 backdrop, Vector512 source, Vector512 opacity) + => Xor(backdrop, Avx512F.BlendVariable(source, source * opacity, AlphaMask512())); + /// /// Returns the result of the "DarkenClear" compositing equation. /// @@ -2946,6 +3835,17 @@ internal static partial class PorterDuffFunctions public static Vector256 DarkenClear(Vector256 backdrop, Vector256 source, Vector256 opacity) => Clear(backdrop, Avx.Blend(source, source * opacity, BlendAlphaControl)); + /// + /// Returns the result of the "DarkenClear" compositing equation. + /// + /// The backdrop vector. + /// The source vector. + /// The source opacity. Range 0..1 + /// The . + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public static Vector512 DarkenClear(Vector512 backdrop, Vector512 source, Vector512 opacity) + => Clear(backdrop, Avx512F.BlendVariable(source, source * opacity, AlphaMask512())); + /// /// Returns the result of the "DarkenSrc" compositing equation. @@ -3165,6 +4065,17 @@ internal static partial class PorterDuffFunctions public static Vector256 LightenSrc(Vector256 backdrop, Vector256 source, Vector256 opacity) => Avx.Blend(source, source * opacity, BlendAlphaControl); + /// + /// Returns the result of the "LightenSrc compositing equation. + /// + /// The backdrop vector. + /// The source vector. + /// The source opacity. Range 0..1 + /// The . + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public static Vector512 LightenSrc(Vector512 backdrop, Vector512 source, Vector512 opacity) + => Avx512F.BlendVariable(source, source * opacity, AlphaMask512()); + /// /// Returns the result of the "LightenSrcAtop" compositing equation. /// @@ -3195,6 +4106,21 @@ internal static partial class PorterDuffFunctions return Atop(backdrop, source, Lighten(backdrop, source)); } + /// + /// Returns the result of the "LightenSrcAtop" compositing equation. + /// + /// The backdrop vector. + /// The source vector. + /// The source opacity. Range 0..1 + /// The . + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public static Vector512 LightenSrcAtop(Vector512 backdrop, Vector512 source, Vector512 opacity) + { + source = Avx512F.BlendVariable(source, source * opacity, AlphaMask512()); + + return Atop(backdrop, source, Lighten(backdrop, source)); + } + /// /// Returns the result of the "LightenSrcOver" compositing equation. /// @@ -3225,6 +4151,21 @@ internal static partial class PorterDuffFunctions return Over(backdrop, source, Lighten(backdrop, source)); } + /// + /// Returns the result of the "LightenSrcOver" compositing equation. + /// + /// The backdrop vector. + /// The source vector. + /// The source opacity. Range 0..1 + /// The . + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public static Vector512 LightenSrcOver(Vector512 backdrop, Vector512 source, Vector512 opacity) + { + source = Avx512F.BlendVariable(source, source * opacity, AlphaMask512()); + + return Over(backdrop, source, Lighten(backdrop, source)); + } + /// /// Returns the result of the "LightenSrcIn" compositing equation. /// @@ -3251,6 +4192,17 @@ internal static partial class PorterDuffFunctions public static Vector256 LightenSrcIn(Vector256 backdrop, Vector256 source, Vector256 opacity) => In(backdrop, Avx.Blend(source, source * opacity, BlendAlphaControl)); + /// + /// Returns the result of the "LightenSrcIn" compositing equation. + /// + /// The backdrop vector. + /// The source vector. + /// The source opacity. Range 0..1 + /// The . + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public static Vector512 LightenSrcIn(Vector512 backdrop, Vector512 source, Vector512 opacity) + => In(backdrop, Avx512F.BlendVariable(source, source * opacity, AlphaMask512())); + /// /// Returns the result of the "LightenSrcOut" compositing equation. /// @@ -3277,6 +4229,17 @@ internal static partial class PorterDuffFunctions public static Vector256 LightenSrcOut(Vector256 backdrop, Vector256 source, Vector256 opacity) => Out(backdrop, Avx.Blend(source, source * opacity, BlendAlphaControl)); + /// + /// Returns the result of the "LightenSrcOut" compositing equation. + /// + /// The backdrop vector. + /// The source vector. + /// The source opacity. Range 0..1 + /// The . + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public static Vector512 LightenSrcOut(Vector512 backdrop, Vector512 source, Vector512 opacity) + => Out(backdrop, Avx512F.BlendVariable(source, source * opacity, AlphaMask512())); + /// /// Returns the result of the "LightenDest" compositing equation. /// @@ -3303,6 +4266,19 @@ internal static partial class PorterDuffFunctions return backdrop; } + /// + /// Returns the result of the "LightenDest" compositing equation. + /// + /// The backdrop vector. + /// The source vector. + /// The source opacity. Range 0..1 + /// The . + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public static Vector512 LightenDest(Vector512 backdrop, Vector512 source, Vector512 opacity) + { + return backdrop; + } + /// /// Returns the result of the "LightenDestAtop" compositing equation. /// @@ -3333,6 +4309,21 @@ internal static partial class PorterDuffFunctions return Atop(source, backdrop, Lighten(source, backdrop)); } + /// + /// Returns the result of the "LightenDestAtop" compositing equation. + /// + /// The backdrop vector. + /// The source vector. + /// The source opacity. Range 0..1 + /// The . + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public static Vector512 LightenDestAtop(Vector512 backdrop, Vector512 source, Vector512 opacity) + { + source = Avx512F.BlendVariable(source, source * opacity, AlphaMask512()); + + return Atop(source, backdrop, Lighten(source, backdrop)); + } + /// /// Returns the result of the "LightenDestOver" compositing equation. /// @@ -3363,6 +4354,21 @@ internal static partial class PorterDuffFunctions return Over(source, backdrop, Lighten(source, backdrop)); } + /// + /// Returns the result of the "LightenDestOver" compositing equation. + /// + /// The backdrop vector. + /// The source vector. + /// The source opacity. Range 0..1 + /// The . + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public static Vector512 LightenDestOver(Vector512 backdrop, Vector512 source, Vector512 opacity) + { + source = Avx512F.BlendVariable(source, source * opacity, AlphaMask512()); + + return Over(source, backdrop, Lighten(source, backdrop)); + } + /// /// Returns the result of the "LightenDestIn" compositing equation. /// @@ -3386,8 +4392,19 @@ internal static partial class PorterDuffFunctions /// The source opacity. Range 0..1 /// The . [MethodImpl(MethodImplOptions.AggressiveInlining)] - public static Vector256 LightenDestIn(Vector256 backdrop, Vector256 source, Vector256 opacity) - => In(Avx.Blend(source, source * opacity, BlendAlphaControl), backdrop); + public static Vector256 LightenDestIn(Vector256 backdrop, Vector256 source, Vector256 opacity) + => In(Avx.Blend(source, source * opacity, BlendAlphaControl), backdrop); + + /// + /// Returns the result of the "LightenDestIn" compositing equation. + /// + /// The backdrop vector. + /// The source vector. + /// The source opacity. Range 0..1 + /// The . + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public static Vector512 LightenDestIn(Vector512 backdrop, Vector512 source, Vector512 opacity) + => In(Avx512F.BlendVariable(source, source * opacity, AlphaMask512()), backdrop); /// /// Returns the result of the "LightenDestOut" compositing equation. @@ -3415,6 +4432,17 @@ internal static partial class PorterDuffFunctions public static Vector256 LightenDestOut(Vector256 backdrop, Vector256 source, Vector256 opacity) => Out(Avx.Blend(source, source * opacity, BlendAlphaControl), backdrop); + /// + /// Returns the result of the "LightenDestOut" compositing equation. + /// + /// The backdrop vector. + /// The source vector. + /// The source opacity. Range 0..1 + /// The . + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public static Vector512 LightenDestOut(Vector512 backdrop, Vector512 source, Vector512 opacity) + => Out(Avx512F.BlendVariable(source, source * opacity, AlphaMask512()), backdrop); + /// /// Returns the result of the "LightenXor" compositing equation. /// @@ -3441,6 +4469,17 @@ internal static partial class PorterDuffFunctions public static Vector256 LightenXor(Vector256 backdrop, Vector256 source, Vector256 opacity) => Xor(backdrop, Avx.Blend(source, source * opacity, BlendAlphaControl)); + /// + /// Returns the result of the "LightenXor" compositing equation. + /// + /// The backdrop vector. + /// The source vector. + /// The source opacity. Range 0..1 + /// The . + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public static Vector512 LightenXor(Vector512 backdrop, Vector512 source, Vector512 opacity) + => Xor(backdrop, Avx512F.BlendVariable(source, source * opacity, AlphaMask512())); + /// /// Returns the result of the "LightenClear" compositing equation. /// @@ -3467,6 +4506,17 @@ internal static partial class PorterDuffFunctions public static Vector256 LightenClear(Vector256 backdrop, Vector256 source, Vector256 opacity) => Clear(backdrop, Avx.Blend(source, source * opacity, BlendAlphaControl)); + /// + /// Returns the result of the "LightenClear" compositing equation. + /// + /// The backdrop vector. + /// The source vector. + /// The source opacity. Range 0..1 + /// The . + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public static Vector512 LightenClear(Vector512 backdrop, Vector512 source, Vector512 opacity) + => Clear(backdrop, Avx512F.BlendVariable(source, source * opacity, AlphaMask512())); + /// /// Returns the result of the "LightenSrc" compositing equation. @@ -3686,6 +4736,17 @@ internal static partial class PorterDuffFunctions public static Vector256 OverlaySrc(Vector256 backdrop, Vector256 source, Vector256 opacity) => Avx.Blend(source, source * opacity, BlendAlphaControl); + /// + /// Returns the result of the "OverlaySrc compositing equation. + /// + /// The backdrop vector. + /// The source vector. + /// The source opacity. Range 0..1 + /// The . + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public static Vector512 OverlaySrc(Vector512 backdrop, Vector512 source, Vector512 opacity) + => Avx512F.BlendVariable(source, source * opacity, AlphaMask512()); + /// /// Returns the result of the "OverlaySrcAtop" compositing equation. /// @@ -3716,6 +4777,21 @@ internal static partial class PorterDuffFunctions return Atop(backdrop, source, Overlay(backdrop, source)); } + /// + /// Returns the result of the "OverlaySrcAtop" compositing equation. + /// + /// The backdrop vector. + /// The source vector. + /// The source opacity. Range 0..1 + /// The . + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public static Vector512 OverlaySrcAtop(Vector512 backdrop, Vector512 source, Vector512 opacity) + { + source = Avx512F.BlendVariable(source, source * opacity, AlphaMask512()); + + return Atop(backdrop, source, Overlay(backdrop, source)); + } + /// /// Returns the result of the "OverlaySrcOver" compositing equation. /// @@ -3746,6 +4822,21 @@ internal static partial class PorterDuffFunctions return Over(backdrop, source, Overlay(backdrop, source)); } + /// + /// Returns the result of the "OverlaySrcOver" compositing equation. + /// + /// The backdrop vector. + /// The source vector. + /// The source opacity. Range 0..1 + /// The . + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public static Vector512 OverlaySrcOver(Vector512 backdrop, Vector512 source, Vector512 opacity) + { + source = Avx512F.BlendVariable(source, source * opacity, AlphaMask512()); + + return Over(backdrop, source, Overlay(backdrop, source)); + } + /// /// Returns the result of the "OverlaySrcIn" compositing equation. /// @@ -3772,6 +4863,17 @@ internal static partial class PorterDuffFunctions public static Vector256 OverlaySrcIn(Vector256 backdrop, Vector256 source, Vector256 opacity) => In(backdrop, Avx.Blend(source, source * opacity, BlendAlphaControl)); + /// + /// Returns the result of the "OverlaySrcIn" compositing equation. + /// + /// The backdrop vector. + /// The source vector. + /// The source opacity. Range 0..1 + /// The . + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public static Vector512 OverlaySrcIn(Vector512 backdrop, Vector512 source, Vector512 opacity) + => In(backdrop, Avx512F.BlendVariable(source, source * opacity, AlphaMask512())); + /// /// Returns the result of the "OverlaySrcOut" compositing equation. /// @@ -3798,6 +4900,17 @@ internal static partial class PorterDuffFunctions public static Vector256 OverlaySrcOut(Vector256 backdrop, Vector256 source, Vector256 opacity) => Out(backdrop, Avx.Blend(source, source * opacity, BlendAlphaControl)); + /// + /// Returns the result of the "OverlaySrcOut" compositing equation. + /// + /// The backdrop vector. + /// The source vector. + /// The source opacity. Range 0..1 + /// The . + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public static Vector512 OverlaySrcOut(Vector512 backdrop, Vector512 source, Vector512 opacity) + => Out(backdrop, Avx512F.BlendVariable(source, source * opacity, AlphaMask512())); + /// /// Returns the result of the "OverlayDest" compositing equation. /// @@ -3824,6 +4937,19 @@ internal static partial class PorterDuffFunctions return backdrop; } + /// + /// Returns the result of the "OverlayDest" compositing equation. + /// + /// The backdrop vector. + /// The source vector. + /// The source opacity. Range 0..1 + /// The . + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public static Vector512 OverlayDest(Vector512 backdrop, Vector512 source, Vector512 opacity) + { + return backdrop; + } + /// /// Returns the result of the "OverlayDestAtop" compositing equation. /// @@ -3854,6 +4980,21 @@ internal static partial class PorterDuffFunctions return Atop(source, backdrop, Overlay(source, backdrop)); } + /// + /// Returns the result of the "OverlayDestAtop" compositing equation. + /// + /// The backdrop vector. + /// The source vector. + /// The source opacity. Range 0..1 + /// The . + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public static Vector512 OverlayDestAtop(Vector512 backdrop, Vector512 source, Vector512 opacity) + { + source = Avx512F.BlendVariable(source, source * opacity, AlphaMask512()); + + return Atop(source, backdrop, Overlay(source, backdrop)); + } + /// /// Returns the result of the "OverlayDestOver" compositing equation. /// @@ -3884,6 +5025,21 @@ internal static partial class PorterDuffFunctions return Over(source, backdrop, Overlay(source, backdrop)); } + /// + /// Returns the result of the "OverlayDestOver" compositing equation. + /// + /// The backdrop vector. + /// The source vector. + /// The source opacity. Range 0..1 + /// The . + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public static Vector512 OverlayDestOver(Vector512 backdrop, Vector512 source, Vector512 opacity) + { + source = Avx512F.BlendVariable(source, source * opacity, AlphaMask512()); + + return Over(source, backdrop, Overlay(source, backdrop)); + } + /// /// Returns the result of the "OverlayDestIn" compositing equation. /// @@ -3910,6 +5066,17 @@ internal static partial class PorterDuffFunctions public static Vector256 OverlayDestIn(Vector256 backdrop, Vector256 source, Vector256 opacity) => In(Avx.Blend(source, source * opacity, BlendAlphaControl), backdrop); + /// + /// Returns the result of the "OverlayDestIn" compositing equation. + /// + /// The backdrop vector. + /// The source vector. + /// The source opacity. Range 0..1 + /// The . + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public static Vector512 OverlayDestIn(Vector512 backdrop, Vector512 source, Vector512 opacity) + => In(Avx512F.BlendVariable(source, source * opacity, AlphaMask512()), backdrop); + /// /// Returns the result of the "OverlayDestOut" compositing equation. /// @@ -3936,6 +5103,17 @@ internal static partial class PorterDuffFunctions public static Vector256 OverlayDestOut(Vector256 backdrop, Vector256 source, Vector256 opacity) => Out(Avx.Blend(source, source * opacity, BlendAlphaControl), backdrop); + /// + /// Returns the result of the "OverlayDestOut" compositing equation. + /// + /// The backdrop vector. + /// The source vector. + /// The source opacity. Range 0..1 + /// The . + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public static Vector512 OverlayDestOut(Vector512 backdrop, Vector512 source, Vector512 opacity) + => Out(Avx512F.BlendVariable(source, source * opacity, AlphaMask512()), backdrop); + /// /// Returns the result of the "OverlayXor" compositing equation. /// @@ -3962,6 +5140,17 @@ internal static partial class PorterDuffFunctions public static Vector256 OverlayXor(Vector256 backdrop, Vector256 source, Vector256 opacity) => Xor(backdrop, Avx.Blend(source, source * opacity, BlendAlphaControl)); + /// + /// Returns the result of the "OverlayXor" compositing equation. + /// + /// The backdrop vector. + /// The source vector. + /// The source opacity. Range 0..1 + /// The . + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public static Vector512 OverlayXor(Vector512 backdrop, Vector512 source, Vector512 opacity) + => Xor(backdrop, Avx512F.BlendVariable(source, source * opacity, AlphaMask512())); + /// /// Returns the result of the "OverlayClear" compositing equation. /// @@ -3988,6 +5177,17 @@ internal static partial class PorterDuffFunctions public static Vector256 OverlayClear(Vector256 backdrop, Vector256 source, Vector256 opacity) => Clear(backdrop, Avx.Blend(source, source * opacity, BlendAlphaControl)); + /// + /// Returns the result of the "OverlayClear" compositing equation. + /// + /// The backdrop vector. + /// The source vector. + /// The source opacity. Range 0..1 + /// The . + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public static Vector512 OverlayClear(Vector512 backdrop, Vector512 source, Vector512 opacity) + => Clear(backdrop, Avx512F.BlendVariable(source, source * opacity, AlphaMask512())); + /// /// Returns the result of the "OverlaySrc" compositing equation. @@ -4207,6 +5407,17 @@ internal static partial class PorterDuffFunctions public static Vector256 HardLightSrc(Vector256 backdrop, Vector256 source, Vector256 opacity) => Avx.Blend(source, source * opacity, BlendAlphaControl); + /// + /// Returns the result of the "HardLightSrc compositing equation. + /// + /// The backdrop vector. + /// The source vector. + /// The source opacity. Range 0..1 + /// The . + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public static Vector512 HardLightSrc(Vector512 backdrop, Vector512 source, Vector512 opacity) + => Avx512F.BlendVariable(source, source * opacity, AlphaMask512()); + /// /// Returns the result of the "HardLightSrcAtop" compositing equation. /// @@ -4237,6 +5448,21 @@ internal static partial class PorterDuffFunctions return Atop(backdrop, source, HardLight(backdrop, source)); } + /// + /// Returns the result of the "HardLightSrcAtop" compositing equation. + /// + /// The backdrop vector. + /// The source vector. + /// The source opacity. Range 0..1 + /// The . + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public static Vector512 HardLightSrcAtop(Vector512 backdrop, Vector512 source, Vector512 opacity) + { + source = Avx512F.BlendVariable(source, source * opacity, AlphaMask512()); + + return Atop(backdrop, source, HardLight(backdrop, source)); + } + /// /// Returns the result of the "HardLightSrcOver" compositing equation. /// @@ -4267,6 +5493,21 @@ internal static partial class PorterDuffFunctions return Over(backdrop, source, HardLight(backdrop, source)); } + /// + /// Returns the result of the "HardLightSrcOver" compositing equation. + /// + /// The backdrop vector. + /// The source vector. + /// The source opacity. Range 0..1 + /// The . + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public static Vector512 HardLightSrcOver(Vector512 backdrop, Vector512 source, Vector512 opacity) + { + source = Avx512F.BlendVariable(source, source * opacity, AlphaMask512()); + + return Over(backdrop, source, HardLight(backdrop, source)); + } + /// /// Returns the result of the "HardLightSrcIn" compositing equation. /// @@ -4293,6 +5534,17 @@ internal static partial class PorterDuffFunctions public static Vector256 HardLightSrcIn(Vector256 backdrop, Vector256 source, Vector256 opacity) => In(backdrop, Avx.Blend(source, source * opacity, BlendAlphaControl)); + /// + /// Returns the result of the "HardLightSrcIn" compositing equation. + /// + /// The backdrop vector. + /// The source vector. + /// The source opacity. Range 0..1 + /// The . + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public static Vector512 HardLightSrcIn(Vector512 backdrop, Vector512 source, Vector512 opacity) + => In(backdrop, Avx512F.BlendVariable(source, source * opacity, AlphaMask512())); + /// /// Returns the result of the "HardLightSrcOut" compositing equation. /// @@ -4319,6 +5571,17 @@ internal static partial class PorterDuffFunctions public static Vector256 HardLightSrcOut(Vector256 backdrop, Vector256 source, Vector256 opacity) => Out(backdrop, Avx.Blend(source, source * opacity, BlendAlphaControl)); + /// + /// Returns the result of the "HardLightSrcOut" compositing equation. + /// + /// The backdrop vector. + /// The source vector. + /// The source opacity. Range 0..1 + /// The . + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public static Vector512 HardLightSrcOut(Vector512 backdrop, Vector512 source, Vector512 opacity) + => Out(backdrop, Avx512F.BlendVariable(source, source * opacity, AlphaMask512())); + /// /// Returns the result of the "HardLightDest" compositing equation. /// @@ -4345,6 +5608,19 @@ internal static partial class PorterDuffFunctions return backdrop; } + /// + /// Returns the result of the "HardLightDest" compositing equation. + /// + /// The backdrop vector. + /// The source vector. + /// The source opacity. Range 0..1 + /// The . + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public static Vector512 HardLightDest(Vector512 backdrop, Vector512 source, Vector512 opacity) + { + return backdrop; + } + /// /// Returns the result of the "HardLightDestAtop" compositing equation. /// @@ -4375,6 +5651,21 @@ internal static partial class PorterDuffFunctions return Atop(source, backdrop, HardLight(source, backdrop)); } + /// + /// Returns the result of the "HardLightDestAtop" compositing equation. + /// + /// The backdrop vector. + /// The source vector. + /// The source opacity. Range 0..1 + /// The . + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public static Vector512 HardLightDestAtop(Vector512 backdrop, Vector512 source, Vector512 opacity) + { + source = Avx512F.BlendVariable(source, source * opacity, AlphaMask512()); + + return Atop(source, backdrop, HardLight(source, backdrop)); + } + /// /// Returns the result of the "HardLightDestOver" compositing equation. /// @@ -4405,6 +5696,21 @@ internal static partial class PorterDuffFunctions return Over(source, backdrop, HardLight(source, backdrop)); } + /// + /// Returns the result of the "HardLightDestOver" compositing equation. + /// + /// The backdrop vector. + /// The source vector. + /// The source opacity. Range 0..1 + /// The . + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public static Vector512 HardLightDestOver(Vector512 backdrop, Vector512 source, Vector512 opacity) + { + source = Avx512F.BlendVariable(source, source * opacity, AlphaMask512()); + + return Over(source, backdrop, HardLight(source, backdrop)); + } + /// /// Returns the result of the "HardLightDestIn" compositing equation. /// @@ -4431,6 +5737,17 @@ internal static partial class PorterDuffFunctions public static Vector256 HardLightDestIn(Vector256 backdrop, Vector256 source, Vector256 opacity) => In(Avx.Blend(source, source * opacity, BlendAlphaControl), backdrop); + /// + /// Returns the result of the "HardLightDestIn" compositing equation. + /// + /// The backdrop vector. + /// The source vector. + /// The source opacity. Range 0..1 + /// The . + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public static Vector512 HardLightDestIn(Vector512 backdrop, Vector512 source, Vector512 opacity) + => In(Avx512F.BlendVariable(source, source * opacity, AlphaMask512()), backdrop); + /// /// Returns the result of the "HardLightDestOut" compositing equation. /// @@ -4457,6 +5774,17 @@ internal static partial class PorterDuffFunctions public static Vector256 HardLightDestOut(Vector256 backdrop, Vector256 source, Vector256 opacity) => Out(Avx.Blend(source, source * opacity, BlendAlphaControl), backdrop); + /// + /// Returns the result of the "HardLightDestOut" compositing equation. + /// + /// The backdrop vector. + /// The source vector. + /// The source opacity. Range 0..1 + /// The . + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public static Vector512 HardLightDestOut(Vector512 backdrop, Vector512 source, Vector512 opacity) + => Out(Avx512F.BlendVariable(source, source * opacity, AlphaMask512()), backdrop); + /// /// Returns the result of the "HardLightXor" compositing equation. /// @@ -4483,6 +5811,17 @@ internal static partial class PorterDuffFunctions public static Vector256 HardLightXor(Vector256 backdrop, Vector256 source, Vector256 opacity) => Xor(backdrop, Avx.Blend(source, source * opacity, BlendAlphaControl)); + /// + /// Returns the result of the "HardLightXor" compositing equation. + /// + /// The backdrop vector. + /// The source vector. + /// The source opacity. Range 0..1 + /// The . + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public static Vector512 HardLightXor(Vector512 backdrop, Vector512 source, Vector512 opacity) + => Xor(backdrop, Avx512F.BlendVariable(source, source * opacity, AlphaMask512())); + /// /// Returns the result of the "HardLightClear" compositing equation. /// @@ -4509,6 +5848,17 @@ internal static partial class PorterDuffFunctions public static Vector256 HardLightClear(Vector256 backdrop, Vector256 source, Vector256 opacity) => Clear(backdrop, Avx.Blend(source, source * opacity, BlendAlphaControl)); + /// + /// Returns the result of the "HardLightClear" compositing equation. + /// + /// The backdrop vector. + /// The source vector. + /// The source opacity. Range 0..1 + /// The . + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public static Vector512 HardLightClear(Vector512 backdrop, Vector512 source, Vector512 opacity) + => Clear(backdrop, Avx512F.BlendVariable(source, source * opacity, AlphaMask512())); + /// /// Returns the result of the "HardLightSrc" compositing equation. diff --git a/src/ImageSharp/PixelFormats/PixelBlenders/PorterDuffFunctions.Generated.tt b/src/ImageSharp/PixelFormats/PixelBlenders/PorterDuffFunctions.Generated.tt index 83bc055eff..7cb007bcae 100644 --- a/src/ImageSharp/PixelFormats/PixelBlenders/PorterDuffFunctions.Generated.tt +++ b/src/ImageSharp/PixelFormats/PixelBlenders/PorterDuffFunctions.Generated.tt @@ -49,6 +49,17 @@ internal static partial class PorterDuffFunctions public static Vector256 <#=blender#>Src(Vector256 backdrop, Vector256 source, Vector256 opacity) => Avx.Blend(source, source * opacity, BlendAlphaControl); + /// + /// Returns the result of the "<#=blender#>Src compositing equation. + /// + /// The backdrop vector. + /// The source vector. + /// The source opacity. Range 0..1 + /// The . + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public static Vector512 <#=blender#>Src(Vector512 backdrop, Vector512 source, Vector512 opacity) + => Avx512F.BlendVariable(source, source * opacity, AlphaMask512()); + /// /// Returns the result of the "<#=blender#>SrcAtop" compositing equation. /// @@ -79,6 +90,21 @@ internal static partial class PorterDuffFunctions return Atop(backdrop, source, <#=blender#>(backdrop, source)); } + /// + /// Returns the result of the "<#=blender#>SrcAtop" compositing equation. + /// + /// The backdrop vector. + /// The source vector. + /// The source opacity. Range 0..1 + /// The . + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public static Vector512 <#=blender#>SrcAtop(Vector512 backdrop, Vector512 source, Vector512 opacity) + { + source = Avx512F.BlendVariable(source, source * opacity, AlphaMask512()); + + return Atop(backdrop, source, <#=blender#>(backdrop, source)); + } + /// /// Returns the result of the "<#=blender#>SrcOver" compositing equation. /// @@ -109,6 +135,21 @@ internal static partial class PorterDuffFunctions return Over(backdrop, source, <#=blender#>(backdrop, source)); } + /// + /// Returns the result of the "<#=blender#>SrcOver" compositing equation. + /// + /// The backdrop vector. + /// The source vector. + /// The source opacity. Range 0..1 + /// The . + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public static Vector512 <#=blender#>SrcOver(Vector512 backdrop, Vector512 source, Vector512 opacity) + { + source = Avx512F.BlendVariable(source, source * opacity, AlphaMask512()); + + return Over(backdrop, source, <#=blender#>(backdrop, source)); + } + /// /// Returns the result of the "<#=blender#>SrcIn" compositing equation. /// @@ -135,6 +176,17 @@ internal static partial class PorterDuffFunctions public static Vector256 <#=blender#>SrcIn(Vector256 backdrop, Vector256 source, Vector256 opacity) => In(backdrop, Avx.Blend(source, source * opacity, BlendAlphaControl)); + /// + /// Returns the result of the "<#=blender#>SrcIn" compositing equation. + /// + /// The backdrop vector. + /// The source vector. + /// The source opacity. Range 0..1 + /// The . + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public static Vector512 <#=blender#>SrcIn(Vector512 backdrop, Vector512 source, Vector512 opacity) + => In(backdrop, Avx512F.BlendVariable(source, source * opacity, AlphaMask512())); + /// /// Returns the result of the "<#=blender#>SrcOut" compositing equation. /// @@ -161,6 +213,17 @@ internal static partial class PorterDuffFunctions public static Vector256 <#=blender#>SrcOut(Vector256 backdrop, Vector256 source, Vector256 opacity) => Out(backdrop, Avx.Blend(source, source * opacity, BlendAlphaControl)); + /// + /// Returns the result of the "<#=blender#>SrcOut" compositing equation. + /// + /// The backdrop vector. + /// The source vector. + /// The source opacity. Range 0..1 + /// The . + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public static Vector512 <#=blender#>SrcOut(Vector512 backdrop, Vector512 source, Vector512 opacity) + => Out(backdrop, Avx512F.BlendVariable(source, source * opacity, AlphaMask512())); + /// /// Returns the result of the "<#=blender#>Dest" compositing equation. /// @@ -187,6 +250,19 @@ internal static partial class PorterDuffFunctions return backdrop; } + /// + /// Returns the result of the "<#=blender#>Dest" compositing equation. + /// + /// The backdrop vector. + /// The source vector. + /// The source opacity. Range 0..1 + /// The . + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public static Vector512 <#=blender#>Dest(Vector512 backdrop, Vector512 source, Vector512 opacity) + { + return backdrop; + } + /// /// Returns the result of the "<#=blender#>DestAtop" compositing equation. /// @@ -217,6 +293,21 @@ internal static partial class PorterDuffFunctions return Atop(source, backdrop, <#=blender#>(source, backdrop)); } + /// + /// Returns the result of the "<#=blender#>DestAtop" compositing equation. + /// + /// The backdrop vector. + /// The source vector. + /// The source opacity. Range 0..1 + /// The . + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public static Vector512 <#=blender#>DestAtop(Vector512 backdrop, Vector512 source, Vector512 opacity) + { + source = Avx512F.BlendVariable(source, source * opacity, AlphaMask512()); + + return Atop(source, backdrop, <#=blender#>(source, backdrop)); + } + /// /// Returns the result of the "<#=blender#>DestOver" compositing equation. /// @@ -247,6 +338,21 @@ internal static partial class PorterDuffFunctions return Over(source, backdrop, <#=blender#>(source, backdrop)); } + /// + /// Returns the result of the "<#=blender#>DestOver" compositing equation. + /// + /// The backdrop vector. + /// The source vector. + /// The source opacity. Range 0..1 + /// The . + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public static Vector512 <#=blender#>DestOver(Vector512 backdrop, Vector512 source, Vector512 opacity) + { + source = Avx512F.BlendVariable(source, source * opacity, AlphaMask512()); + + return Over(source, backdrop, <#=blender#>(source, backdrop)); + } + /// /// Returns the result of the "<#=blender#>DestIn" compositing equation. /// @@ -273,6 +379,17 @@ internal static partial class PorterDuffFunctions public static Vector256 <#=blender#>DestIn(Vector256 backdrop, Vector256 source, Vector256 opacity) => In(Avx.Blend(source, source * opacity, BlendAlphaControl), backdrop); + /// + /// Returns the result of the "<#=blender#>DestIn" compositing equation. + /// + /// The backdrop vector. + /// The source vector. + /// The source opacity. Range 0..1 + /// The . + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public static Vector512 <#=blender#>DestIn(Vector512 backdrop, Vector512 source, Vector512 opacity) + => In(Avx512F.BlendVariable(source, source * opacity, AlphaMask512()), backdrop); + /// /// Returns the result of the "<#=blender#>DestOut" compositing equation. /// @@ -299,6 +416,17 @@ internal static partial class PorterDuffFunctions public static Vector256 <#=blender#>DestOut(Vector256 backdrop, Vector256 source, Vector256 opacity) => Out(Avx.Blend(source, source * opacity, BlendAlphaControl), backdrop); + /// + /// Returns the result of the "<#=blender#>DestOut" compositing equation. + /// + /// The backdrop vector. + /// The source vector. + /// The source opacity. Range 0..1 + /// The . + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public static Vector512 <#=blender#>DestOut(Vector512 backdrop, Vector512 source, Vector512 opacity) + => Out(Avx512F.BlendVariable(source, source * opacity, AlphaMask512()), backdrop); + /// /// Returns the result of the "<#=blender#>Xor" compositing equation. /// @@ -325,6 +453,17 @@ internal static partial class PorterDuffFunctions public static Vector256 <#=blender#>Xor(Vector256 backdrop, Vector256 source, Vector256 opacity) => Xor(backdrop, Avx.Blend(source, source * opacity, BlendAlphaControl)); + /// + /// Returns the result of the "<#=blender#>Xor" compositing equation. + /// + /// The backdrop vector. + /// The source vector. + /// The source opacity. Range 0..1 + /// The . + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public static Vector512 <#=blender#>Xor(Vector512 backdrop, Vector512 source, Vector512 opacity) + => Xor(backdrop, Avx512F.BlendVariable(source, source * opacity, AlphaMask512())); + /// /// Returns the result of the "<#=blender#>Clear" compositing equation. /// @@ -351,6 +490,17 @@ internal static partial class PorterDuffFunctions public static Vector256 <#=blender#>Clear(Vector256 backdrop, Vector256 source, Vector256 opacity) => Clear(backdrop, Avx.Blend(source, source * opacity, BlendAlphaControl)); + /// + /// Returns the result of the "<#=blender#>Clear" compositing equation. + /// + /// The backdrop vector. + /// The source vector. + /// The source opacity. Range 0..1 + /// The . + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public static Vector512 <#=blender#>Clear(Vector512 backdrop, Vector512 source, Vector512 opacity) + => Clear(backdrop, Avx512F.BlendVariable(source, source * opacity, AlphaMask512())); + <#} #> <# void GenerateGenericPixelBlender(string blender, string composer) { #> diff --git a/src/ImageSharp/PixelFormats/PixelBlenders/PorterDuffFunctions.cs b/src/ImageSharp/PixelFormats/PixelBlenders/PorterDuffFunctions.cs index 45c4aade7b..948076fa32 100644 --- a/src/ImageSharp/PixelFormats/PixelBlenders/PorterDuffFunctions.cs +++ b/src/ImageSharp/PixelFormats/PixelBlenders/PorterDuffFunctions.cs @@ -45,6 +45,16 @@ internal static partial class PorterDuffFunctions public static Vector256 Normal(Vector256 backdrop, Vector256 source) => source; + /// + /// Returns the result of the "Normal" compositing equation. + /// + /// The backdrop vector. + /// The source vector. + /// The . + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public static Vector512 Normal(Vector512 backdrop, Vector512 source) + => source; + /// /// Returns the result of the "Multiply" compositing equation. /// @@ -65,6 +75,16 @@ internal static partial class PorterDuffFunctions public static Vector256 Multiply(Vector256 backdrop, Vector256 source) => backdrop * source; + /// + /// Returns the result of the "Multiply" compositing equation. + /// + /// The backdrop vector. + /// The source vector. + /// The . + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public static Vector512 Multiply(Vector512 backdrop, Vector512 source) + => backdrop * source; + /// /// Returns the result of the "Add" compositing equation. /// @@ -85,6 +105,16 @@ internal static partial class PorterDuffFunctions public static Vector256 Add(Vector256 backdrop, Vector256 source) => Vector256.Min(Vector256.Create(1F), backdrop + source); + /// + /// Returns the result of the "Add" compositing equation. + /// + /// The backdrop vector. + /// The source vector. + /// The . + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public static Vector512 Add(Vector512 backdrop, Vector512 source) + => Vector512.Min(Vector512.Create(1F), backdrop + source); + /// /// Returns the result of the "Subtract" compositing equation. /// @@ -105,6 +135,16 @@ internal static partial class PorterDuffFunctions public static Vector256 Subtract(Vector256 backdrop, Vector256 source) => Vector256.Max(Vector256.Zero, backdrop - source); + /// + /// Returns the result of the "Subtract" compositing equation. + /// + /// The backdrop vector. + /// The source vector. + /// The . + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public static Vector512 Subtract(Vector512 backdrop, Vector512 source) + => Vector512.Max(Vector512.Zero, backdrop - source); + /// /// Returns the result of the "Screen" compositing equation. /// @@ -128,6 +168,19 @@ internal static partial class PorterDuffFunctions return Vector256_.MultiplyAddNegated(vOne, vOne - backdrop, vOne - source); } + /// + /// Returns the result of the "Screen" compositing equation. + /// + /// The backdrop vector. + /// The source vector. + /// The . + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public static Vector512 Screen(Vector512 backdrop, Vector512 source) + { + Vector512 vOne = Vector512.Create(1F); + return Vector512_.MultiplyAddNegated(vOne, vOne - backdrop, vOne - source); + } + /// /// Returns the result of the "Darken" compositing equation. /// @@ -148,6 +201,16 @@ internal static partial class PorterDuffFunctions public static Vector256 Darken(Vector256 backdrop, Vector256 source) => Vector256.Min(backdrop, source); + /// + /// Returns the result of the "Darken" compositing equation. + /// + /// The backdrop vector. + /// The source vector. + /// The . + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public static Vector512 Darken(Vector512 backdrop, Vector512 source) + => Vector512.Min(backdrop, source); + /// /// Returns the result of the "Lighten" compositing equation. /// @@ -167,6 +230,16 @@ internal static partial class PorterDuffFunctions public static Vector256 Lighten(Vector256 backdrop, Vector256 source) => Vector256.Max(backdrop, source); + /// + /// Returns the result of the "Lighten" compositing equation. + /// + /// The backdrop vector. + /// The source vector. + /// The . + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public static Vector512 Lighten(Vector512 backdrop, Vector512 source) + => Vector512.Max(backdrop, source); + /// /// Returns the result of the "Overlay" compositing equation. /// @@ -196,6 +269,19 @@ internal static partial class PorterDuffFunctions return Vector256.Min(Vector256.Create(1F), Avx.Blend(color, Vector256.Zero, BlendAlphaControl)); } + /// + /// Returns the result of the "Overlay" compositing equation. + /// + /// The backdrop vector. + /// The source vector. + /// The . + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public static Vector512 Overlay(Vector512 backdrop, Vector512 source) + { + Vector512 color = OverlayValueFunction(backdrop, source); + return Vector512.Min(Vector512.Create(1F), Vector512.ConditionalSelect(AlphaMask512(), Vector512.Zero, color)); + } + /// /// Returns the result of the "HardLight" compositing equation. /// @@ -225,6 +311,19 @@ internal static partial class PorterDuffFunctions return Vector256.Min(Vector256.Create(1F), Avx.Blend(color, Vector256.Zero, BlendAlphaControl)); } + /// + /// Returns the result of the "HardLight" compositing equation. + /// + /// The backdrop vector. + /// The source vector. + /// The . + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public static Vector512 HardLight(Vector512 backdrop, Vector512 source) + { + Vector512 color = OverlayValueFunction(source, backdrop); + return Vector512.Min(Vector512.Create(1F), Vector512.ConditionalSelect(AlphaMask512(), Vector512.Zero, color)); + } + /// /// Helper function for Overlay and HardLight modes /// @@ -253,6 +352,24 @@ internal static partial class PorterDuffFunctions return Avx.BlendVariable(left, right, cmp); } + /// + /// Helper function for Overlay and HardLight modes + /// + /// Backdrop color element + /// Source color element + /// Overlay value + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public static Vector512 OverlayValueFunction(Vector512 backdrop, Vector512 source) + { + Vector512 vOne = Vector512.Create(1F); + Vector512 left = (backdrop + backdrop) * source; + + Vector512 vOneMinusSource = vOne - source; + Vector512 right = Vector512_.MultiplyAddNegated(vOne, vOneMinusSource + vOneMinusSource, vOne - backdrop); + Vector512 cmp = Avx512F.CompareGreaterThan(backdrop, Vector512.Create(.5F)); + return Vector512.ConditionalSelect(cmp, right, left); + } + /// /// Returns the result of the "Over" compositing equation. /// @@ -312,6 +429,36 @@ internal static partial class PorterDuffFunctions return Numerics.UnPremultiply(color, alpha); } + /// + /// Returns the result of the "Over" compositing equation. + /// + /// The destination vector. + /// The source vector. + /// The amount to blend. Range 0..1 + /// The . + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public static Vector512 Over(Vector512 destination, Vector512 source, Vector512 blend) + { + // calculate weights + Vector512 sW = Vector512_.ShuffleNative(source, ShuffleAlphaControl); + Vector512 dW = Vector512_.ShuffleNative(destination, ShuffleAlphaControl); + + Vector512 blendW = sW * dW; + Vector512 dstW = dW - blendW; + Vector512 srcW = sW - blendW; + + // calculate final alpha + Vector512 alpha = dstW + sW; + + // calculate final color + Vector512 color = destination * dstW; + color = Vector512_.MultiplyAdd(color, source, srcW); + color = Vector512_.MultiplyAdd(color, blend, blendW); + + // unpremultiply + return Numerics.UnPremultiply(color, alpha); + } + /// /// Returns the result of the "Atop" compositing equation. /// @@ -365,6 +512,31 @@ internal static partial class PorterDuffFunctions return Numerics.UnPremultiply(color, alpha); } + /// + /// Returns the result of the "Atop" compositing equation. + /// + /// The destination vector. + /// The source vector. + /// The amount to blend. Range 0..1 + /// The . + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public static Vector512 Atop(Vector512 destination, Vector512 source, Vector512 blend) + { + // calculate final alpha + Vector512 alpha = Vector512_.ShuffleNative(destination, ShuffleAlphaControl); + + // calculate weights + Vector512 sW = Vector512_.ShuffleNative(source, ShuffleAlphaControl); + Vector512 blendW = sW * alpha; + Vector512 dstW = alpha - blendW; + + // calculate final color + Vector512 color = Vector512_.MultiplyAdd(blend * blendW, destination, dstW); + + // unpremultiply + return Numerics.UnPremultiply(color, alpha); + } + /// /// Returns the result of the "In" compositing equation. /// @@ -402,6 +574,25 @@ internal static partial class PorterDuffFunctions return Numerics.UnPremultiply(color, alpha); } + /// + /// Returns the result of the "In" compositing equation. + /// + /// The destination vector. + /// The source vector. + /// The . + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public static Vector512 In(Vector512 destination, Vector512 source) + { + // calculate alpha + Vector512 alpha = Vector512_.ShuffleNative(source * destination, ShuffleAlphaControl); + + // premultiply + Vector512 color = source * alpha; + + // unpremultiply + return Numerics.UnPremultiply(color, alpha); + } + /// /// Returns the result of the "Out" compositing equation. /// @@ -439,6 +630,25 @@ internal static partial class PorterDuffFunctions return Numerics.UnPremultiply(color, alpha); } + /// + /// Returns the result of the "Out" compositing equation. + /// + /// The destination vector. + /// The source vector. + /// The . + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public static Vector512 Out(Vector512 destination, Vector512 source) + { + // calculate alpha + Vector512 alpha = Vector512_.ShuffleNative(source * (Vector512.Create(1F) - destination), ShuffleAlphaControl); + + // premultiply + Vector512 color = source * alpha; + + // unpremultiply + return Numerics.UnPremultiply(color, alpha); + } + /// /// Returns the result of the "XOr" compositing equation. /// @@ -487,9 +697,41 @@ internal static partial class PorterDuffFunctions return Numerics.UnPremultiply(color, alpha); } + /// + /// Returns the result of the "XOr" compositing equation. + /// + /// The destination vector. + /// The source vector. + /// The . + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public static Vector512 Xor(Vector512 destination, Vector512 source) + { + // calculate weights + Vector512 sW = Vector512_.ShuffleNative(source, ShuffleAlphaControl); + Vector512 dW = Vector512_.ShuffleNative(destination, ShuffleAlphaControl); + + Vector512 vOne = Vector512.Create(1F); + Vector512 srcW = vOne - dW; + Vector512 dstW = vOne - sW; + + // calculate alpha + Vector512 alpha = Vector512_.MultiplyAdd(dW * dstW, sW, srcW); + Vector512 color = Vector512_.MultiplyAdd((dW * destination) * dstW, sW * source, srcW); + + // unpremultiply + return Numerics.UnPremultiply(color, alpha); + } + [MethodImpl(MethodImplOptions.AggressiveInlining)] private static Vector4 Clear(Vector4 backdrop, Vector4 source) => Vector4.Zero; [MethodImpl(MethodImplOptions.AggressiveInlining)] private static Vector256 Clear(Vector256 backdrop, Vector256 source) => Vector256.Zero; + + [MethodImpl(MethodImplOptions.AggressiveInlining)] + private static Vector512 Clear(Vector512 backdrop, Vector512 source) => Vector512.Zero; + + [MethodImpl(MethodImplOptions.AggressiveInlining)] + private static Vector512 AlphaMask512() + => Vector512.Create(0, 0, 0, -1, 0, 0, 0, -1, 0, 0, 0, -1, 0, 0, 0, -1).AsSingle(); } diff --git a/tests/ImageSharp.Benchmarks/PixelBlenders/PorterDuffBulkVsSingleVector.cs b/tests/ImageSharp.Benchmarks/PixelBlenders/PorterDuffBulkVsSingleVector.cs index ecf8b125f7..fe43ce5e79 100644 --- a/tests/ImageSharp.Benchmarks/PixelBlenders/PorterDuffBulkVsSingleVector.cs +++ b/tests/ImageSharp.Benchmarks/PixelBlenders/PorterDuffBulkVsSingleVector.cs @@ -18,8 +18,8 @@ public class PorterDuffBulkVsSingleVector [GlobalSetup] public void Setup() { - this.backdrop = new Vector4[8 * 20]; - this.source = new Vector4[8 * 20]; + this.backdrop = new Vector4[8 * 40]; + this.source = new Vector4[8 * 40]; FillRandom(this.backdrop); FillRandom(this.source); @@ -49,7 +49,7 @@ public class PorterDuffBulkVsSingleVector return result; } - [Benchmark(Description = "Avx")] + [Benchmark(Description = "Avx2")] public Vector256 OverlayValueFunction_Avx() { ref Vector256 backdrop = ref Unsafe.As>(ref MemoryMarshal.GetReference(this.backdrop)); @@ -65,4 +65,21 @@ public class PorterDuffBulkVsSingleVector return result; } + + [Benchmark(Description = "Avx512")] + public Vector512 OverlayValueFunction_Avx512() + { + ref Vector512 backdrop = ref Unsafe.As>(ref MemoryMarshal.GetReference(this.backdrop)); + ref Vector512 source = ref Unsafe.As>(ref MemoryMarshal.GetReference(this.source)); + + Vector512 result = default; + Vector512 opacity = Vector512.Create(.5F); + int count = this.backdrop.Length / 4; + for (nuint i = 0; i < (uint)count; i++) + { + result = PorterDuffFunctions.NormalSrcOver(Unsafe.Add(ref backdrop, i), Unsafe.Add(ref source, i), opacity); + } + + return result; + } } diff --git a/tests/ImageSharp.Tests/PixelFormats/PixelBlenders/PorterDuffCompositorTests.cs b/tests/ImageSharp.Tests/PixelFormats/PixelBlenders/PorterDuffCompositorTests.cs index 1086afe76d..994b7d02ee 100644 --- a/tests/ImageSharp.Tests/PixelFormats/PixelBlenders/PorterDuffCompositorTests.cs +++ b/tests/ImageSharp.Tests/PixelFormats/PixelBlenders/PorterDuffCompositorTests.cs @@ -59,7 +59,7 @@ public class PorterDuffCompositorTests FeatureTestRunner.RunWithHwIntrinsicsFeature( RunTest, - HwIntrinsics.AllowAll | HwIntrinsics.DisableAVX, + HwIntrinsics.AllowAll | HwIntrinsics.DisableAVX512 | HwIntrinsics.DisableAVX, provider, mode.ToString()); } diff --git a/tests/ImageSharp.Tests/PixelFormats/PixelBlenders/PorterDuffFunctionsTests.cs b/tests/ImageSharp.Tests/PixelFormats/PixelBlenders/PorterDuffFunctionsTests.cs index 976a272ebf..0def097881 100644 --- a/tests/ImageSharp.Tests/PixelFormats/PixelBlenders/PorterDuffFunctionsTests.cs +++ b/tests/ImageSharp.Tests/PixelFormats/PixelBlenders/PorterDuffFunctionsTests.cs @@ -4,7 +4,6 @@ using System.Numerics; using System.Runtime.Intrinsics; using System.Runtime.Intrinsics.X86; -using Castle.Components.DictionaryAdapter; using SixLabors.ImageSharp.PixelFormats.PixelBlenders; using SixLabors.ImageSharp.Tests.TestUtilities; @@ -45,6 +44,22 @@ public class PorterDuffFunctionsTests Assert.Equal(expected256, actual, FloatComparer); } + [Theory] + [MemberData(nameof(NormalBlendFunctionData))] + public void NormalBlendFunction512(TestVector4 back, TestVector4 source, float amount, TestVector4 expected) + { + if (!Avx512F.IsSupported) + { + return; + } + + Vector512 back512 = CreateVector512(back); + Vector512 source512 = CreateVector512(source); + Vector512 expected512 = CreateVector512(expected); + Vector512 actual = PorterDuffFunctions.NormalSrcOver(back512, source512, Vector512.Create(amount)); + Assert.Equal(expected512, actual, FloatComparer); + } + public static TheoryData MultiplyFunctionData { get; } = new() { { new TestVector4(1, 1, 1, 1), new TestVector4(1, 1, 1, 1), 1, new TestVector4(1, 1, 1, 1) }, @@ -77,6 +92,22 @@ public class PorterDuffFunctionsTests Assert.Equal(expected256, actual, FloatComparer); } + [Theory] + [MemberData(nameof(MultiplyFunctionData))] + public void MultiplyFunction512(TestVector4 back, TestVector4 source, float amount, TestVector4 expected) + { + if (!Avx512F.IsSupported) + { + return; + } + + Vector512 back512 = CreateVector512(back); + Vector512 source512 = CreateVector512(source); + Vector512 expected512 = CreateVector512(expected); + Vector512 actual = PorterDuffFunctions.MultiplySrcOver(back512, source512, Vector512.Create(amount)); + Assert.Equal(expected512, actual, FloatComparer); + } + public static TheoryData AddFunctionData { get; } = new() { { new TestVector4(1, 1, 1, 1), new TestVector4(1, 1, 1, 1), 1, new TestVector4(1, 1, 1, 1) }, @@ -109,6 +140,22 @@ public class PorterDuffFunctionsTests Assert.Equal(expected256, actual, FloatComparer); } + [Theory] + [MemberData(nameof(AddFunctionData))] + public void AddFunction512(TestVector4 back, TestVector4 source, float amount, TestVector4 expected) + { + if (!Avx512F.IsSupported) + { + return; + } + + Vector512 back512 = CreateVector512(back); + Vector512 source512 = CreateVector512(source); + Vector512 expected512 = CreateVector512(expected); + Vector512 actual = PorterDuffFunctions.AddSrcOver(back512, source512, Vector512.Create(amount)); + Assert.Equal(expected512, actual, FloatComparer); + } + public static TheoryData SubtractFunctionData { get; } = new() { { new TestVector4(1, 1, 1, 1), new TestVector4(1, 1, 1, 1), 1, new TestVector4(0, 0, 0, 1) }, @@ -141,6 +188,22 @@ public class PorterDuffFunctionsTests Assert.Equal(expected256, actual, FloatComparer); } + [Theory] + [MemberData(nameof(SubtractFunctionData))] + public void SubtractFunction512(TestVector4 back, TestVector4 source, float amount, TestVector4 expected) + { + if (!Avx512F.IsSupported) + { + return; + } + + Vector512 back512 = CreateVector512(back); + Vector512 source512 = CreateVector512(source); + Vector512 expected512 = CreateVector512(expected); + Vector512 actual = PorterDuffFunctions.SubtractSrcOver(back512, source512, Vector512.Create(amount)); + Assert.Equal(expected512, actual, FloatComparer); + } + public static TheoryData ScreenFunctionData { get; } = new() { { new TestVector4(1, 1, 1, 1), new TestVector4(1, 1, 1, 1), 1, new TestVector4(1, 1, 1, 1) }, @@ -173,6 +236,22 @@ public class PorterDuffFunctionsTests Assert.Equal(expected256, actual, FloatComparer); } + [Theory] + [MemberData(nameof(ScreenFunctionData))] + public void ScreenFunction512(TestVector4 back, TestVector4 source, float amount, TestVector4 expected) + { + if (!Avx512F.IsSupported) + { + return; + } + + Vector512 back512 = CreateVector512(back); + Vector512 source512 = CreateVector512(source); + Vector512 expected512 = CreateVector512(expected); + Vector512 actual = PorterDuffFunctions.ScreenSrcOver(back512, source512, Vector512.Create(amount)); + Assert.Equal(expected512, actual, FloatComparer); + } + public static TheoryData DarkenFunctionData { get; } = new() { { new TestVector4(1, 1, 1, 1), new TestVector4(1, 1, 1, 1), 1, new TestVector4(1, 1, 1, 1) }, @@ -205,6 +284,22 @@ public class PorterDuffFunctionsTests Assert.Equal(expected256, actual, FloatComparer); } + [Theory] + [MemberData(nameof(DarkenFunctionData))] + public void DarkenFunction512(TestVector4 back, TestVector4 source, float amount, TestVector4 expected) + { + if (!Avx512F.IsSupported) + { + return; + } + + Vector512 back512 = CreateVector512(back); + Vector512 source512 = CreateVector512(source); + Vector512 expected512 = CreateVector512(expected); + Vector512 actual = PorterDuffFunctions.DarkenSrcOver(back512, source512, Vector512.Create(amount)); + Assert.Equal(expected512, actual, FloatComparer); + } + public static TheoryData LightenFunctionData { get; } = new() { { new TestVector4(1, 1, 1, 1), new TestVector4(1, 1, 1, 1), 1, new TestVector4(1, 1, 1, 1) }, @@ -237,6 +332,22 @@ public class PorterDuffFunctionsTests Assert.Equal(expected256, actual, FloatComparer); } + [Theory] + [MemberData(nameof(LightenFunctionData))] + public void LightenFunction512(TestVector4 back, TestVector4 source, float amount, TestVector4 expected) + { + if (!Avx512F.IsSupported) + { + return; + } + + Vector512 back512 = CreateVector512(back); + Vector512 source512 = CreateVector512(source); + Vector512 expected512 = CreateVector512(expected); + Vector512 actual = PorterDuffFunctions.LightenSrcOver(back512, source512, Vector512.Create(amount)); + Assert.Equal(expected512, actual, FloatComparer); + } + public static TheoryData OverlayFunctionData { get; } = new() { { new TestVector4(1, 1, 1, 1), new TestVector4(1, 1, 1, 1), 1, new TestVector4(1, 1, 1, 1) }, @@ -269,6 +380,22 @@ public class PorterDuffFunctionsTests Assert.Equal(expected256, actual, FloatComparer); } + [Theory] + [MemberData(nameof(OverlayFunctionData))] + public void OverlayFunction512(TestVector4 back, TestVector4 source, float amount, TestVector4 expected) + { + if (!Avx512F.IsSupported) + { + return; + } + + Vector512 back512 = CreateVector512(back); + Vector512 source512 = CreateVector512(source); + Vector512 expected512 = CreateVector512(expected); + Vector512 actual = PorterDuffFunctions.OverlaySrcOver(back512, source512, Vector512.Create(amount)); + Assert.Equal(expected512, actual, FloatComparer); + } + public static TheoryData HardLightFunctionData { get; } = new() { { new TestVector4(1, 1, 1, 1), new TestVector4(1, 1, 1, 1), 1, new TestVector4(1, 1, 1, 1) }, @@ -300,4 +427,27 @@ public class PorterDuffFunctionsTests Vector256 actual = PorterDuffFunctions.HardLightSrcOver(back256, source256, Vector256.Create(amount)); Assert.Equal(expected256, actual, FloatComparer); } + + [Theory] + [MemberData(nameof(HardLightFunctionData))] + public void HardLightFunction512(TestVector4 back, TestVector4 source, float amount, TestVector4 expected) + { + if (!Avx512F.IsSupported) + { + return; + } + + Vector512 back512 = CreateVector512(back); + Vector512 source512 = CreateVector512(source); + Vector512 expected512 = CreateVector512(expected); + Vector512 actual = PorterDuffFunctions.HardLightSrcOver(back512, source512, Vector512.Create(amount)); + Assert.Equal(expected512, actual, FloatComparer); + } + + private static Vector512 CreateVector512(TestVector4 vector) + => Vector512.Create( + vector.X, vector.Y, vector.Z, vector.W, + vector.X, vector.Y, vector.Z, vector.W, + vector.X, vector.Y, vector.Z, vector.W, + vector.X, vector.Y, vector.Z, vector.W); } diff --git a/tests/ImageSharp.Tests/PixelFormats/PixelBlenders/PorterDuffFunctionsTestsTPixel.cs b/tests/ImageSharp.Tests/PixelFormats/PixelBlenders/PorterDuffFunctionsTestsTPixel.cs index 2c97cbde07..153a9ac487 100644 --- a/tests/ImageSharp.Tests/PixelFormats/PixelBlenders/PorterDuffFunctionsTestsTPixel.cs +++ b/tests/ImageSharp.Tests/PixelFormats/PixelBlenders/PorterDuffFunctionsTestsTPixel.cs @@ -9,12 +9,21 @@ namespace SixLabors.ImageSharp.Tests.PixelFormats.PixelBlenders; public class PorterDuffFunctionsTestsTPixel { + private const int BulkBlendCount = 4; + private static Span AsSpan(T value) where T : struct { return new Span(new[] { value }); } + private static T[] CreateFilledArray(T value) + { + T[] values = new T[BulkBlendCount]; + values.AsSpan().Fill(value); + return values; + } + public static TheoryData NormalBlendFunctionData = new() { { new TestPixel(1, 1, 1, 1), new TestPixel(1, 1, 1, 1), 1, new TestPixel(1, 1, 1, 1) }, @@ -46,9 +55,14 @@ public class PorterDuffFunctionsTestsTPixel public void NormalBlendFunctionBlenderBulk(TestPixel back, TestPixel source, float amount, TestPixel expected) where TPixel : unmanaged, IPixel { - Span dest = new(new TPixel[1]); - new DefaultPixelBlenders.NormalSrcOver().Blend(this.Configuration, dest, back.AsSpan(), source.AsSpan(), AsSpan(amount)); - VectorAssert.Equal(expected.AsPixel(), dest[0], 2); + TPixel[] dest = new TPixel[BulkBlendCount]; + new DefaultPixelBlenders.NormalSrcOver().Blend(this.Configuration, dest, CreateFilledArray(back.AsPixel()), CreateFilledArray(source.AsPixel()), CreateFilledArray(amount)); + + TPixel expectedPixel = expected.AsPixel(); + foreach (TPixel pixel in dest) + { + VectorAssert.Equal(expectedPixel, pixel, 2); + } } public static TheoryData MultiplyFunctionData = new() @@ -86,9 +100,14 @@ public class PorterDuffFunctionsTestsTPixel public void MultiplyFunctionBlenderBulk(TestPixel back, TestPixel source, float amount, TestPixel expected) where TPixel : unmanaged, IPixel { - Span dest = new(new TPixel[1]); - new DefaultPixelBlenders.MultiplySrcOver().Blend(this.Configuration, dest, back.AsSpan(), source.AsSpan(), AsSpan(amount)); - VectorAssert.Equal(expected.AsPixel(), dest[0], 2); + TPixel[] dest = new TPixel[BulkBlendCount]; + new DefaultPixelBlenders.MultiplySrcOver().Blend(this.Configuration, dest, CreateFilledArray(back.AsPixel()), CreateFilledArray(source.AsPixel()), CreateFilledArray(amount)); + + TPixel expectedPixel = expected.AsPixel(); + foreach (TPixel pixel in dest) + { + VectorAssert.Equal(expectedPixel, pixel, 2); + } } public static TheoryData AddFunctionData = new() @@ -136,9 +155,14 @@ public class PorterDuffFunctionsTestsTPixel public void AddFunctionBlenderBulk(TestPixel back, TestPixel source, float amount, TestPixel expected) where TPixel : unmanaged, IPixel { - Span dest = new(new TPixel[1]); - new DefaultPixelBlenders.AddSrcOver().Blend(this.Configuration, dest, back.AsSpan(), source.AsSpan(), AsSpan(amount)); - VectorAssert.Equal(expected.AsPixel(), dest[0], 2); + TPixel[] dest = new TPixel[BulkBlendCount]; + new DefaultPixelBlenders.AddSrcOver().Blend(this.Configuration, dest, CreateFilledArray(back.AsPixel()), CreateFilledArray(source.AsPixel()), CreateFilledArray(amount)); + + TPixel expectedPixel = expected.AsPixel(); + foreach (TPixel pixel in dest) + { + VectorAssert.Equal(expectedPixel, pixel, 2); + } } public static TheoryData SubtractFunctionData = new() @@ -176,9 +200,14 @@ public class PorterDuffFunctionsTestsTPixel public void SubtractFunctionBlenderBulk(TestPixel back, TestPixel source, float amount, TestPixel expected) where TPixel : unmanaged, IPixel { - Span dest = new(new TPixel[1]); - new DefaultPixelBlenders.SubtractSrcOver().Blend(this.Configuration, dest, back.AsSpan(), source.AsSpan(), AsSpan(amount)); - VectorAssert.Equal(expected.AsPixel(), dest[0], 2); + TPixel[] dest = new TPixel[BulkBlendCount]; + new DefaultPixelBlenders.SubtractSrcOver().Blend(this.Configuration, dest, CreateFilledArray(back.AsPixel()), CreateFilledArray(source.AsPixel()), CreateFilledArray(amount)); + + TPixel expectedPixel = expected.AsPixel(); + foreach (TPixel pixel in dest) + { + VectorAssert.Equal(expectedPixel, pixel, 2); + } } public static TheoryData ScreenFunctionData = new() @@ -216,9 +245,14 @@ public class PorterDuffFunctionsTestsTPixel public void ScreenFunctionBlenderBulk(TestPixel back, TestPixel source, float amount, TestPixel expected) where TPixel : unmanaged, IPixel { - Span dest = new(new TPixel[1]); - new DefaultPixelBlenders.ScreenSrcOver().Blend(this.Configuration, dest, back.AsSpan(), source.AsSpan(), AsSpan(amount)); - VectorAssert.Equal(expected.AsPixel(), dest[0], 2); + TPixel[] dest = new TPixel[BulkBlendCount]; + new DefaultPixelBlenders.ScreenSrcOver().Blend(this.Configuration, dest, CreateFilledArray(back.AsPixel()), CreateFilledArray(source.AsPixel()), CreateFilledArray(amount)); + + TPixel expectedPixel = expected.AsPixel(); + foreach (TPixel pixel in dest) + { + VectorAssert.Equal(expectedPixel, pixel, 2); + } } public static TheoryData DarkenFunctionData = new() @@ -256,9 +290,14 @@ public class PorterDuffFunctionsTestsTPixel public void DarkenFunctionBlenderBulk(TestPixel back, TestPixel source, float amount, TestPixel expected) where TPixel : unmanaged, IPixel { - Span dest = new(new TPixel[1]); - new DefaultPixelBlenders.DarkenSrcOver().Blend(this.Configuration, dest, back.AsSpan(), source.AsSpan(), AsSpan(amount)); - VectorAssert.Equal(expected.AsPixel(), dest[0], 2); + TPixel[] dest = new TPixel[BulkBlendCount]; + new DefaultPixelBlenders.DarkenSrcOver().Blend(this.Configuration, dest, CreateFilledArray(back.AsPixel()), CreateFilledArray(source.AsPixel()), CreateFilledArray(amount)); + + TPixel expectedPixel = expected.AsPixel(); + foreach (TPixel pixel in dest) + { + VectorAssert.Equal(expectedPixel, pixel, 2); + } } public static TheoryData LightenFunctionData = new() @@ -296,9 +335,14 @@ public class PorterDuffFunctionsTestsTPixel public void LightenFunctionBlenderBulk(TestPixel back, TestPixel source, float amount, TestPixel expected) where TPixel : unmanaged, IPixel { - Span dest = new(new TPixel[1]); - new DefaultPixelBlenders.LightenSrcOver().Blend(this.Configuration, dest, back.AsSpan(), source.AsSpan(), AsSpan(amount)); - VectorAssert.Equal(expected.AsPixel(), dest[0], 2); + TPixel[] dest = new TPixel[BulkBlendCount]; + new DefaultPixelBlenders.LightenSrcOver().Blend(this.Configuration, dest, CreateFilledArray(back.AsPixel()), CreateFilledArray(source.AsPixel()), CreateFilledArray(amount)); + + TPixel expectedPixel = expected.AsPixel(); + foreach (TPixel pixel in dest) + { + VectorAssert.Equal(expectedPixel, pixel, 2); + } } public static TheoryData OverlayFunctionData = new() @@ -336,9 +380,14 @@ public class PorterDuffFunctionsTestsTPixel public void OverlayFunctionBlenderBulk(TestPixel back, TestPixel source, float amount, TestPixel expected) where TPixel : unmanaged, IPixel { - Span dest = new(new TPixel[1]); - new DefaultPixelBlenders.OverlaySrcOver().Blend(this.Configuration, dest, back.AsSpan(), source.AsSpan(), AsSpan(amount)); - VectorAssert.Equal(expected.AsPixel(), dest[0], 2); + TPixel[] dest = new TPixel[BulkBlendCount]; + new DefaultPixelBlenders.OverlaySrcOver().Blend(this.Configuration, dest, CreateFilledArray(back.AsPixel()), CreateFilledArray(source.AsPixel()), CreateFilledArray(amount)); + + TPixel expectedPixel = expected.AsPixel(); + foreach (TPixel pixel in dest) + { + VectorAssert.Equal(expectedPixel, pixel, 2); + } } public static TheoryData HardLightFunctionData = new() @@ -376,8 +425,13 @@ public class PorterDuffFunctionsTestsTPixel public void HardLightFunctionBlenderBulk(TestPixel back, TestPixel source, float amount, TestPixel expected) where TPixel : unmanaged, IPixel { - Span dest = new(new TPixel[1]); - new DefaultPixelBlenders.HardLightSrcOver().Blend(this.Configuration, dest, back.AsSpan(), source.AsSpan(), AsSpan(amount)); - VectorAssert.Equal(expected.AsPixel(), dest[0], 2); + TPixel[] dest = new TPixel[BulkBlendCount]; + new DefaultPixelBlenders.HardLightSrcOver().Blend(this.Configuration, dest, CreateFilledArray(back.AsPixel()), CreateFilledArray(source.AsPixel()), CreateFilledArray(amount)); + + TPixel expectedPixel = expected.AsPixel(); + foreach (TPixel pixel in dest) + { + VectorAssert.Equal(expectedPixel, pixel, 2); + } } } diff --git a/tests/ImageSharp.Tests/TestUtilities/ApproximateFloatComparer.cs b/tests/ImageSharp.Tests/TestUtilities/ApproximateFloatComparer.cs index 21ac6966b8..7c45dd047c 100644 --- a/tests/ImageSharp.Tests/TestUtilities/ApproximateFloatComparer.cs +++ b/tests/ImageSharp.Tests/TestUtilities/ApproximateFloatComparer.cs @@ -15,7 +15,8 @@ internal readonly struct ApproximateFloatComparer : IEqualityComparer, IEqualityComparer, IEqualityComparer, - IEqualityComparer> + IEqualityComparer>, + IEqualityComparer> { private readonly float epsilon; @@ -78,4 +79,19 @@ internal readonly struct ApproximateFloatComparer : && this.Equals(x.GetElement(7), y.GetElement(7)); public int GetHashCode([DisallowNull] Vector256 obj) => obj.GetHashCode(); + + public bool Equals(Vector512 x, Vector512 y) + { + for (int i = 0; i < Vector512.Count; i++) + { + if (!this.Equals(x.GetElement(i), y.GetElement(i))) + { + return false; + } + } + + return true; + } + + public int GetHashCode([DisallowNull] Vector512 obj) => obj.GetHashCode(); } From ad58e74505414da37e3f961ab1b03e79ac983abb Mon Sep 17 00:00:00 2001 From: James Jackson-South Date: Thu, 9 Apr 2026 11:58:22 +1000 Subject: [PATCH 12/12] Allow -1 (unbounded) parallelism; validate settings --- .../Advanced/ParallelExecutionSettings.cs | 11 +- .../Advanced/ParallelRowIterator.cs | 86 +++++++++++-- src/ImageSharp/Configuration.cs | 1 + .../Helpers/ParallelRowIteratorTests.cs | 115 ++++++++++++++++++ 4 files changed, 199 insertions(+), 14 deletions(-) diff --git a/src/ImageSharp/Advanced/ParallelExecutionSettings.cs b/src/ImageSharp/Advanced/ParallelExecutionSettings.cs index fd9692f9ae..ad0318297a 100644 --- a/src/ImageSharp/Advanced/ParallelExecutionSettings.cs +++ b/src/ImageSharp/Advanced/ParallelExecutionSettings.cs @@ -18,7 +18,10 @@ public readonly struct ParallelExecutionSettings /// /// Initializes a new instance of the struct. /// - /// The value used for initializing when using TPL. + /// + /// The value used for initializing when using TPL. + /// Set to -1 to leave the degree of parallelism unbounded. + /// /// The value for . /// The . public ParallelExecutionSettings( @@ -44,7 +47,10 @@ public readonly struct ParallelExecutionSettings /// /// Initializes a new instance of the struct. /// - /// The value used for initializing when using TPL. + /// + /// The value used for initializing when using TPL. + /// Set to -1 to leave the degree of parallelism unbounded. + /// /// The . public ParallelExecutionSettings(int maxDegreeOfParallelism, MemoryAllocator memoryAllocator) : this(maxDegreeOfParallelism, DefaultMinimumPixelsProcessedPerTask, memoryAllocator) @@ -58,6 +64,7 @@ public readonly struct ParallelExecutionSettings /// /// Gets the value used for initializing when using TPL. + /// A value of -1 leaves the degree of parallelism unbounded. /// public int MaxDegreeOfParallelism { get; } diff --git a/src/ImageSharp/Advanced/ParallelRowIterator.cs b/src/ImageSharp/Advanced/ParallelRowIterator.cs index d170631a29..98c2656d11 100644 --- a/src/ImageSharp/Advanced/ParallelRowIterator.cs +++ b/src/ImageSharp/Advanced/ParallelRowIterator.cs @@ -44,14 +44,14 @@ public static partial class ParallelRowIterator where T : struct, IRowOperation { ValidateRectangle(rectangle); + ValidateSettings(parallelSettings); int top = rectangle.Top; int bottom = rectangle.Bottom; int width = rectangle.Width; int height = rectangle.Height; - int maxSteps = DivideCeil(width * (long)height, parallelSettings.MinimumPixelsProcessedPerTask); - int numOfSteps = Math.Min(parallelSettings.MaxDegreeOfParallelism, maxSteps); + int numOfSteps = GetNumberOfSteps(width, height, parallelSettings); // Avoid TPL overhead in this trivial case: if (numOfSteps == 1) @@ -65,7 +65,7 @@ public static partial class ParallelRowIterator } int verticalStep = DivideCeil(rectangle.Height, numOfSteps); - ParallelOptions parallelOptions = new() { MaxDegreeOfParallelism = numOfSteps }; + ParallelOptions parallelOptions = CreateParallelOptions(parallelSettings, numOfSteps); RowOperationWrapper wrappingOperation = new(top, bottom, verticalStep, in operation); _ = Parallel.For( @@ -109,14 +109,14 @@ public static partial class ParallelRowIterator where TBuffer : unmanaged { ValidateRectangle(rectangle); + ValidateSettings(parallelSettings); int top = rectangle.Top; int bottom = rectangle.Bottom; int width = rectangle.Width; int height = rectangle.Height; - int maxSteps = DivideCeil(width * (long)height, parallelSettings.MinimumPixelsProcessedPerTask); - int numOfSteps = Math.Min(parallelSettings.MaxDegreeOfParallelism, maxSteps); + int numOfSteps = GetNumberOfSteps(width, height, parallelSettings); MemoryAllocator allocator = parallelSettings.MemoryAllocator; int bufferLength = Unsafe.AsRef(in operation).GetRequiredBufferLength(rectangle); @@ -135,7 +135,7 @@ public static partial class ParallelRowIterator } int verticalStep = DivideCeil(height, numOfSteps); - ParallelOptions parallelOptions = new() { MaxDegreeOfParallelism = numOfSteps }; + ParallelOptions parallelOptions = CreateParallelOptions(parallelSettings, numOfSteps); RowOperationWrapper wrappingOperation = new(top, bottom, verticalStep, bufferLength, allocator, in operation); _ = Parallel.For( @@ -174,14 +174,14 @@ public static partial class ParallelRowIterator where T : struct, IRowIntervalOperation { ValidateRectangle(rectangle); + ValidateSettings(parallelSettings); int top = rectangle.Top; int bottom = rectangle.Bottom; int width = rectangle.Width; int height = rectangle.Height; - int maxSteps = DivideCeil(width * (long)height, parallelSettings.MinimumPixelsProcessedPerTask); - int numOfSteps = Math.Min(parallelSettings.MaxDegreeOfParallelism, maxSteps); + int numOfSteps = GetNumberOfSteps(width, height, parallelSettings); // Avoid TPL overhead in this trivial case: if (numOfSteps == 1) @@ -192,7 +192,7 @@ public static partial class ParallelRowIterator } int verticalStep = DivideCeil(rectangle.Height, numOfSteps); - ParallelOptions parallelOptions = new() { MaxDegreeOfParallelism = numOfSteps }; + ParallelOptions parallelOptions = CreateParallelOptions(parallelSettings, numOfSteps); RowIntervalOperationWrapper wrappingOperation = new(top, bottom, verticalStep, in operation); _ = Parallel.For( @@ -236,14 +236,14 @@ public static partial class ParallelRowIterator where TBuffer : unmanaged { ValidateRectangle(rectangle); + ValidateSettings(parallelSettings); int top = rectangle.Top; int bottom = rectangle.Bottom; int width = rectangle.Width; int height = rectangle.Height; - int maxSteps = DivideCeil(width * (long)height, parallelSettings.MinimumPixelsProcessedPerTask); - int numOfSteps = Math.Min(parallelSettings.MaxDegreeOfParallelism, maxSteps); + int numOfSteps = GetNumberOfSteps(width, height, parallelSettings); MemoryAllocator allocator = parallelSettings.MemoryAllocator; int bufferLength = Unsafe.AsRef(in operation).GetRequiredBufferLength(rectangle); @@ -259,7 +259,7 @@ public static partial class ParallelRowIterator } int verticalStep = DivideCeil(height, numOfSteps); - ParallelOptions parallelOptions = new() { MaxDegreeOfParallelism = numOfSteps }; + ParallelOptions parallelOptions = CreateParallelOptions(parallelSettings, numOfSteps); RowIntervalOperationWrapper wrappingOperation = new(top, bottom, verticalStep, bufferLength, allocator, in operation); _ = Parallel.For( @@ -272,6 +272,37 @@ public static partial class ParallelRowIterator [MethodImpl(InliningOptions.ShortMethod)] private static int DivideCeil(long dividend, int divisor) => (int)Math.Min(1 + ((dividend - 1) / divisor), int.MaxValue); + /// + /// Creates the for the current iteration. + /// + /// The execution settings. + /// The number of row partitions to execute. + /// The instance. + [MethodImpl(InliningOptions.ShortMethod)] + private static ParallelOptions CreateParallelOptions(in ParallelExecutionSettings parallelSettings, int numOfSteps) + => new() { MaxDegreeOfParallelism = parallelSettings.MaxDegreeOfParallelism == -1 ? -1 : numOfSteps }; + + /// + /// Calculates the number of row partitions to execute for the given region. + /// + /// The width of the region. + /// The height of the region. + /// The execution settings. + /// The number of row partitions to execute. + [MethodImpl(InliningOptions.ShortMethod)] + private static int GetNumberOfSteps(int width, int height, in ParallelExecutionSettings parallelSettings) + { + int maxSteps = DivideCeil(width * (long)height, parallelSettings.MinimumPixelsProcessedPerTask); + + if (parallelSettings.MaxDegreeOfParallelism == -1) + { + // Row batching cannot produce more useful partitions than the number of rows available. + return Math.Min(height, maxSteps); + } + + return Math.Min(parallelSettings.MaxDegreeOfParallelism, maxSteps); + } + private static void ValidateRectangle(Rectangle rectangle) { Guard.MustBeGreaterThan( @@ -284,4 +315,35 @@ public static partial class ParallelRowIterator 0, $"{nameof(rectangle)}.{nameof(rectangle.Height)}"); } + + /// + /// Validates the supplied . + /// + /// The execution settings. + /// + /// Thrown when or + /// is invalid. + /// + /// + /// Thrown when is null. + /// This also guards the public default value, which bypasses constructor validation. + /// + private static void ValidateSettings(in ParallelExecutionSettings parallelSettings) + { + // ParallelExecutionSettings is a public struct, so callers can pass default and bypass constructor validation. + if (parallelSettings.MaxDegreeOfParallelism is 0 or < -1) + { + throw new ArgumentOutOfRangeException( + $"{nameof(parallelSettings)}.{nameof(ParallelExecutionSettings.MaxDegreeOfParallelism)}"); + } + + Guard.MustBeGreaterThan( + parallelSettings.MinimumPixelsProcessedPerTask, + 0, + $"{nameof(parallelSettings)}.{nameof(ParallelExecutionSettings.MinimumPixelsProcessedPerTask)}"); + + Guard.NotNull( + parallelSettings.MemoryAllocator, + $"{nameof(parallelSettings)}.{nameof(ParallelExecutionSettings.MemoryAllocator)}"); + } } diff --git a/src/ImageSharp/Configuration.cs b/src/ImageSharp/Configuration.cs index c2b02dedd9..2673927231 100644 --- a/src/ImageSharp/Configuration.cs +++ b/src/ImageSharp/Configuration.cs @@ -64,6 +64,7 @@ public sealed class Configuration /// /// Gets or sets the maximum number of concurrent tasks enabled in ImageSharp algorithms /// configured with this instance. + /// Set to -1 to leave the degree of parallelism unbounded. /// Initialized with by default. /// public int MaxDegreeOfParallelism diff --git a/tests/ImageSharp.Tests/Helpers/ParallelRowIteratorTests.cs b/tests/ImageSharp.Tests/Helpers/ParallelRowIteratorTests.cs index 4b06f877fc..cf68f702ac 100644 --- a/tests/ImageSharp.Tests/Helpers/ParallelRowIteratorTests.cs +++ b/tests/ImageSharp.Tests/Helpers/ParallelRowIteratorTests.cs @@ -13,6 +13,7 @@ namespace SixLabors.ImageSharp.Tests.Helpers; public class ParallelRowIteratorTests { + public delegate void BufferedRowAction(int y, Span span); public delegate void RowIntervalAction(RowInterval rows, Span span); private readonly ITestOutputHelper output; @@ -200,6 +201,47 @@ public class ParallelRowIteratorTests Assert.Equal(expectedData, actualData); } + [Fact] + public void IterateRows_MaxDegreeOfParallelismMinusOne_ShouldVisitAllRows() + { + ParallelExecutionSettings parallelSettings = new( + -1, + 10, + Configuration.Default.MemoryAllocator); + + Rectangle rectangle = new(0, 0, 10, 10); + int[] actualData = new int[rectangle.Height]; + + void RowAction(int y) => actualData[y]++; + + TestRowActionOperation operation = new(RowAction); + + ParallelRowIterator.IterateRows( + rectangle, + in parallelSettings, + in operation); + + Assert.Equal(Enumerable.Repeat(1, rectangle.Height), actualData); + } + + [Fact] + public void IterateRowsWithTempBuffer_DefaultSettingsRequireInitialization() + { + ParallelExecutionSettings parallelSettings = default; + Rectangle rect = new(0, 0, 10, 10); + + void RowAction(int y, Span memory) + { + } + + TestRowOperation operation = new(RowAction); + + ArgumentOutOfRangeException ex = Assert.Throws( + () => ParallelRowIterator.IterateRows, Rgba32>(rect, in parallelSettings, in operation)); + + Assert.Contains(nameof(ParallelExecutionSettings.MaxDegreeOfParallelism), ex.Message); + } + public static TheoryData IterateRows_WithEffectiveMinimumPixelsLimit_Data = new() { @@ -296,6 +338,53 @@ public class ParallelRowIteratorTests Assert.Equal(expectedNumberOfSteps, actualNumberOfSteps); } + [Fact] + public void IterateRowIntervalsWithTempBuffer_MaxDegreeOfParallelismMinusOne_ShouldVisitAllRows() + { + ParallelExecutionSettings parallelSettings = new( + -1, + 10, + Configuration.Default.MemoryAllocator); + + Rectangle rectangle = new(0, 0, 10, 10); + int[] actualData = new int[rectangle.Height]; + + void RowAction(RowInterval rows, Span buffer) + { + for (int y = rows.Min; y < rows.Max; y++) + { + actualData[y]++; + } + } + + TestRowIntervalOperation operation = new(RowAction); + + ParallelRowIterator.IterateRowIntervals, Vector4>( + rectangle, + in parallelSettings, + in operation); + + Assert.Equal(Enumerable.Repeat(1, rectangle.Height), actualData); + } + + [Fact] + public void IterateRows_DefaultSettingsRequireInitialization() + { + ParallelExecutionSettings parallelSettings = default; + Rectangle rect = new(0, 0, 10, 10); + + void RowAction(int y) + { + } + + TestRowActionOperation operation = new(RowAction); + + ArgumentOutOfRangeException ex = Assert.Throws( + () => ParallelRowIterator.IterateRows(rect, in parallelSettings, in operation)); + + Assert.Contains(nameof(ParallelExecutionSettings.MaxDegreeOfParallelism), ex.Message); + } + public static readonly TheoryData IterateRectangularBuffer_Data = new() { @@ -445,6 +534,32 @@ public class ParallelRowIteratorTests } } + private readonly struct TestRowActionOperation : IRowOperation + { + private readonly Action action; + + public TestRowActionOperation(Action action) + => this.action = action; + + public void Invoke(int y) + => this.action(y); + } + + private readonly struct TestRowOperation : IRowOperation + where TBuffer : unmanaged + { + private readonly BufferedRowAction action; + + public TestRowOperation(BufferedRowAction action) + => this.action = action; + + public int GetRequiredBufferLength(Rectangle bounds) + => bounds.Width; + + public void Invoke(int y, Span span) + => this.action(y, span); + } + private readonly struct TestRowIntervalOperation : IRowIntervalOperation { private readonly Action action;