diff --git a/.gitattributes b/.gitattributes index 2fdea90e17..ff4ec94087 100644 --- a/.gitattributes +++ b/.gitattributes @@ -64,18 +64,19 @@ # Set explicit file behavior to: # treat as text # normalize to Unix-style line endings and -# use a union merge when resoling conflicts +# use a union merge when resolving conflicts ############################################################################### *.csproj text eol=lf merge=union *.dbproj text eol=lf merge=union *.fsproj text eol=lf merge=union *.ncrunchproject text eol=lf merge=union *.vbproj text eol=lf merge=union +*.shproj text eol=lf merge=union ############################################################################### # Set explicit file behavior to: # treat as text # normalize to Windows-style line endings and -# use a union merge when resoling conflicts +# use a union merge when resolving conflicts ############################################################################### *.sln text eol=crlf merge=union ############################################################################### diff --git a/.github/workflows/build-and-test.yml b/.github/workflows/build-and-test.yml index 7c7504371d..379be36107 100644 --- a/.github/workflows/build-and-test.yml +++ b/.github/workflows/build-and-test.yml @@ -196,9 +196,9 @@ jobs: shell: pwsh run: ./ci-pack.ps1 - - name: MyGet Publish + - name: Feedz Publish shell: pwsh run: | - dotnet nuget push .\artifacts\*.nupkg -k ${{secrets.MYGET_TOKEN}} -s https://www.myget.org/F/sixlabors/api/v2/package - dotnet nuget push .\artifacts\*.snupkg -k ${{secrets.MYGET_TOKEN}} -s https://www.myget.org/F/sixlabors/api/v3/index.json + dotnet nuget push .\artifacts\*.nupkg -k ${{secrets.FEEDZ_TOKEN}} -s https://f.feedz.io/sixlabors/sixlabors/nuget/index.json + dotnet nuget push .\artifacts\*.snupkg -k ${{secrets.FEEDZ_TOKEN}} -s https://f.feedz.io/sixlabors/sixlabors/symbols # TODO: If github.ref starts with 'refs/tags' then it was tag push and we can optionally push out package to nuget.org diff --git a/README.md b/README.md index 4d0eb1e3a6..4962faf5b3 100644 --- a/README.md +++ b/README.md @@ -54,9 +54,9 @@ For more information, see the [.NET Foundation Code of Conduct](https://dotnetfo Install stable releases via Nuget; development releases are available via MyGet. -| Package Name | Release (NuGet) | Nightly (MyGet) | +| Package Name | Release (NuGet) | Nightly (Feedz.io) | |--------------------------------|-----------------|-----------------| -| `SixLabors.ImageSharp` | [![NuGet](https://img.shields.io/nuget/v/SixLabors.ImageSharp.svg)](https://www.nuget.org/packages/SixLabors.ImageSharp/) | [![MyGet](https://img.shields.io/myget/sixlabors/vpre/SixLabors.ImageSharp.svg)](https://www.myget.org/feed/sixlabors/package/nuget/SixLabors.ImageSharp) | +| `SixLabors.ImageSharp` | [![NuGet](https://img.shields.io/nuget/v/SixLabors.ImageSharp.svg)](https://www.nuget.org/packages/SixLabors.ImageSharp/) | [![feedz.io](https://img.shields.io/badge/endpoint.svg?url=https%3A%2F%2Ff.feedz.io%2Fsixlabors%2Fsixlabors%2Fshield%2FSixLabors.ImageSharp%2Flatest)](https://f.feedz.io/sixlabors/sixlabors/nuget/index.json) | ## Manual build diff --git a/shared-infrastructure b/shared-infrastructure index 6c52486c51..9a6cf00d9a 160000 --- a/shared-infrastructure +++ b/shared-infrastructure @@ -1 +1 @@ -Subproject commit 6c52486c512357475cbb92bbb7c4c0af4d85b1db +Subproject commit 9a6cf00d9a3d482bb08211dd8309f4724a2735cb diff --git a/src/ImageSharp/Common/Constants.cs b/src/ImageSharp/Common/Constants.cs index fa2f72c74a..d4640f133f 100644 --- a/src/ImageSharp/Common/Constants.cs +++ b/src/ImageSharp/Common/Constants.cs @@ -1,4 +1,4 @@ -// Copyright (c) Six Labors. +// Copyright (c) Six Labors. // Licensed under the Six Labors Split License. namespace SixLabors.ImageSharp; diff --git a/src/ImageSharp/Common/Helpers/Guard.cs b/src/ImageSharp/Common/Helpers/Guard.cs index 95211ccda8..96cd094cd8 100644 --- a/src/ImageSharp/Common/Helpers/Guard.cs +++ b/src/ImageSharp/Common/Helpers/Guard.cs @@ -1,6 +1,5 @@ // Copyright (c) Six Labors. // Licensed under the Six Labors Split License. -#nullable disable using System.Runtime.CompilerServices; using SixLabors.ImageSharp; @@ -17,13 +16,14 @@ internal static partial class Guard /// The type of the value. /// is not a value type. [MethodImpl(InliningOptions.ShortMethod)] - public static void MustBeValueType(TValue value, string parameterName) + public static void MustBeValueType(TValue value, [CallerArgumentExpression("value")] string? parameterName = null) + where TValue : notnull { if (value.GetType().IsValueType) { return; } - ThrowHelper.ThrowArgumentException("Type must be a struct.", parameterName); + ThrowHelper.ThrowArgumentException("Type must be a struct.", parameterName!); } } diff --git a/src/ImageSharp/Common/Helpers/Shuffle/IComponentShuffle.cs b/src/ImageSharp/Common/Helpers/Shuffle/IComponentShuffle.cs index 45d6e6d13d..345f4b5c2e 100644 --- a/src/ImageSharp/Common/Helpers/Shuffle/IComponentShuffle.cs +++ b/src/ImageSharp/Common/Helpers/Shuffle/IComponentShuffle.cs @@ -5,6 +5,7 @@ using System.Buffers.Binary; using System.Numerics; using System.Runtime.CompilerServices; using System.Runtime.InteropServices; +using static SixLabors.ImageSharp.SimdUtils; // The JIT can detect and optimize rotation idioms ROTL (Rotate Left) // and ROTR (Rotate Right) emitting efficient CPU instructions: @@ -18,9 +19,12 @@ namespace SixLabors.ImageSharp; internal interface IComponentShuffle { /// - /// Gets the shuffle control. + /// Shuffles then slices 8-bit integers within 128-bit lanes in + /// using the control and store the results in . /// - byte Control { get; } + /// The source span of bytes. + /// The destination span of bytes. + void ShuffleReduce(ref ReadOnlySpan source, ref Span dest); /// /// Shuffle 8-bit integers within 128-bit lanes in @@ -42,37 +46,25 @@ internal interface IShuffle4 : IComponentShuffle internal readonly struct DefaultShuffle4 : IShuffle4 { - private readonly byte p3; - private readonly byte p2; - private readonly byte p1; - private readonly byte p0; - - public DefaultShuffle4(byte p3, byte p2, byte p1, byte p0) + public DefaultShuffle4(byte control) { - DebugGuard.MustBeBetweenOrEqualTo(p3, 0, 3, nameof(p3)); - DebugGuard.MustBeBetweenOrEqualTo(p2, 0, 3, nameof(p2)); - DebugGuard.MustBeBetweenOrEqualTo(p1, 0, 3, nameof(p1)); - DebugGuard.MustBeBetweenOrEqualTo(p0, 0, 3, nameof(p0)); - - this.p3 = p3; - this.p2 = p2; - this.p1 = p1; - this.p0 = p0; - this.Control = SimdUtils.Shuffle.MmShuffle(p3, p2, p1, p0); + DebugGuard.MustBeBetweenOrEqualTo(control, 0, 3, nameof(control)); + this.Control = control; } public byte Control { get; } + [MethodImpl(InliningOptions.ShortMethod)] + public void ShuffleReduce(ref ReadOnlySpan source, ref Span dest) + => HwIntrinsics.Shuffle4Reduce(ref source, ref dest, this.Control); + [MethodImpl(InliningOptions.ShortMethod)] public void RunFallbackShuffle(ReadOnlySpan source, Span dest) { ref byte sBase = ref MemoryMarshal.GetReference(source); ref byte dBase = ref MemoryMarshal.GetReference(dest); - int p3 = this.p3; - int p2 = this.p2; - int p1 = this.p1; - int p0 = this.p0; + Shuffle.InverseMMShuffle(this.Control, out int p3, out int p2, out int p1, out int p0); for (int i = 0; i < source.Length; i += 4) { @@ -86,11 +78,9 @@ internal readonly struct DefaultShuffle4 : IShuffle4 internal readonly struct WXYZShuffle4 : IShuffle4 { - public byte Control - { - [MethodImpl(InliningOptions.ShortMethod)] - get => SimdUtils.Shuffle.MmShuffle(2, 1, 0, 3); - } + [MethodImpl(InliningOptions.ShortMethod)] + public void ShuffleReduce(ref ReadOnlySpan source, ref Span dest) + => HwIntrinsics.Shuffle4Reduce(ref source, ref dest, Shuffle.MMShuffle2103); [MethodImpl(InliningOptions.ShortMethod)] public void RunFallbackShuffle(ReadOnlySpan source, Span dest) @@ -112,11 +102,9 @@ internal readonly struct WXYZShuffle4 : IShuffle4 internal readonly struct WZYXShuffle4 : IShuffle4 { - public byte Control - { - [MethodImpl(InliningOptions.ShortMethod)] - get => SimdUtils.Shuffle.MmShuffle(0, 1, 2, 3); - } + [MethodImpl(InliningOptions.ShortMethod)] + public void ShuffleReduce(ref ReadOnlySpan source, ref Span dest) + => HwIntrinsics.Shuffle4Reduce(ref source, ref dest, Shuffle.MMShuffle0123); [MethodImpl(InliningOptions.ShortMethod)] public void RunFallbackShuffle(ReadOnlySpan source, Span dest) @@ -138,11 +126,9 @@ internal readonly struct WZYXShuffle4 : IShuffle4 internal readonly struct YZWXShuffle4 : IShuffle4 { - public byte Control - { - [MethodImpl(InliningOptions.ShortMethod)] - get => SimdUtils.Shuffle.MmShuffle(0, 3, 2, 1); - } + [MethodImpl(InliningOptions.ShortMethod)] + public void ShuffleReduce(ref ReadOnlySpan source, ref Span dest) + => HwIntrinsics.Shuffle4Reduce(ref source, ref dest, Shuffle.MMShuffle0321); [MethodImpl(InliningOptions.ShortMethod)] public void RunFallbackShuffle(ReadOnlySpan source, Span dest) @@ -164,11 +150,9 @@ internal readonly struct YZWXShuffle4 : IShuffle4 internal readonly struct ZYXWShuffle4 : IShuffle4 { - public byte Control - { - [MethodImpl(InliningOptions.ShortMethod)] - get => SimdUtils.Shuffle.MmShuffle(3, 0, 1, 2); - } + [MethodImpl(InliningOptions.ShortMethod)] + public void ShuffleReduce(ref ReadOnlySpan source, ref Span dest) + => HwIntrinsics.Shuffle4Reduce(ref source, ref dest, Shuffle.MMShuffle3012); [MethodImpl(InliningOptions.ShortMethod)] public void RunFallbackShuffle(ReadOnlySpan source, Span dest) @@ -197,11 +181,9 @@ internal readonly struct ZYXWShuffle4 : IShuffle4 internal readonly struct XWZYShuffle4 : IShuffle4 { - public byte Control - { - [MethodImpl(InliningOptions.ShortMethod)] - get => SimdUtils.Shuffle.MmShuffle(1, 2, 3, 0); - } + [MethodImpl(InliningOptions.ShortMethod)] + public void ShuffleReduce(ref ReadOnlySpan source, ref Span dest) + => HwIntrinsics.Shuffle4Reduce(ref source, ref dest, Shuffle.MMShuffle1230); [MethodImpl(InliningOptions.ShortMethod)] public void RunFallbackShuffle(ReadOnlySpan source, Span dest) diff --git a/src/ImageSharp/Common/Helpers/Shuffle/IPad3Shuffle4.cs b/src/ImageSharp/Common/Helpers/Shuffle/IPad3Shuffle4.cs index 76cffd82bf..348e8ec010 100644 --- a/src/ImageSharp/Common/Helpers/Shuffle/IPad3Shuffle4.cs +++ b/src/ImageSharp/Common/Helpers/Shuffle/IPad3Shuffle4.cs @@ -3,6 +3,7 @@ using System.Runtime.CompilerServices; using System.Runtime.InteropServices; +using static SixLabors.ImageSharp.SimdUtils; namespace SixLabors.ImageSharp; @@ -13,37 +14,25 @@ internal interface IPad3Shuffle4 : IComponentShuffle internal readonly struct DefaultPad3Shuffle4 : IPad3Shuffle4 { - private readonly byte p3; - private readonly byte p2; - private readonly byte p1; - private readonly byte p0; - - public DefaultPad3Shuffle4(byte p3, byte p2, byte p1, byte p0) + public DefaultPad3Shuffle4(byte control) { - DebugGuard.MustBeBetweenOrEqualTo(p3, 0, 3, nameof(p3)); - DebugGuard.MustBeBetweenOrEqualTo(p2, 0, 3, nameof(p2)); - DebugGuard.MustBeBetweenOrEqualTo(p1, 0, 3, nameof(p1)); - DebugGuard.MustBeBetweenOrEqualTo(p0, 0, 3, nameof(p0)); - - this.p3 = p3; - this.p2 = p2; - this.p1 = p1; - this.p0 = p0; - this.Control = SimdUtils.Shuffle.MmShuffle(p3, p2, p1, p0); + DebugGuard.MustBeBetweenOrEqualTo(control, 0, 3, nameof(control)); + this.Control = control; } public byte Control { get; } + [MethodImpl(InliningOptions.ShortMethod)] + public void ShuffleReduce(ref ReadOnlySpan source, ref Span dest) + => HwIntrinsics.Pad3Shuffle4Reduce(ref source, ref dest, this.Control); + [MethodImpl(InliningOptions.ShortMethod)] public void RunFallbackShuffle(ReadOnlySpan source, Span dest) { ref byte sBase = ref MemoryMarshal.GetReference(source); ref byte dBase = ref MemoryMarshal.GetReference(dest); - int p3 = this.p3; - int p2 = this.p2; - int p1 = this.p1; - int p0 = this.p0; + Shuffle.InverseMMShuffle(this.Control, out int p3, out int p2, out int p1, out int p0); Span temp = stackalloc byte[4]; ref byte t = ref MemoryMarshal.GetReference(temp); @@ -51,7 +40,7 @@ internal readonly struct DefaultPad3Shuffle4 : IPad3Shuffle4 for (int i = 0, j = 0; i < source.Length; i += 3, j += 4) { - ref var s = ref Unsafe.Add(ref sBase, i); + ref byte s = ref Unsafe.Add(ref sBase, i); tu = Unsafe.As(ref s) | 0xFF000000; Unsafe.Add(ref dBase, j) = Unsafe.Add(ref t, p0); @@ -64,11 +53,9 @@ internal readonly struct DefaultPad3Shuffle4 : IPad3Shuffle4 internal readonly struct XYZWPad3Shuffle4 : IPad3Shuffle4 { - public byte Control - { - [MethodImpl(InliningOptions.ShortMethod)] - get => SimdUtils.Shuffle.MmShuffle(3, 2, 1, 0); - } + [MethodImpl(InliningOptions.ShortMethod)] + public void ShuffleReduce(ref ReadOnlySpan source, ref Span dest) + => HwIntrinsics.Pad3Shuffle4Reduce(ref source, ref dest, Shuffle.MMShuffle3210); [MethodImpl(InliningOptions.ShortMethod)] public void RunFallbackShuffle(ReadOnlySpan source, Span dest) diff --git a/src/ImageSharp/Common/Helpers/Shuffle/IShuffle3.cs b/src/ImageSharp/Common/Helpers/Shuffle/IShuffle3.cs index 9bee9d15ec..6ac8a2428a 100644 --- a/src/ImageSharp/Common/Helpers/Shuffle/IShuffle3.cs +++ b/src/ImageSharp/Common/Helpers/Shuffle/IShuffle3.cs @@ -3,6 +3,7 @@ using System.Runtime.CompilerServices; using System.Runtime.InteropServices; +using static SixLabors.ImageSharp.SimdUtils; namespace SixLabors.ImageSharp; @@ -13,33 +14,25 @@ internal interface IShuffle3 : IComponentShuffle internal readonly struct DefaultShuffle3 : IShuffle3 { - private readonly byte p2; - private readonly byte p1; - private readonly byte p0; - - public DefaultShuffle3(byte p2, byte p1, byte p0) + public DefaultShuffle3(byte control) { - DebugGuard.MustBeBetweenOrEqualTo(p2, 0, 2, nameof(p2)); - DebugGuard.MustBeBetweenOrEqualTo(p1, 0, 2, nameof(p1)); - DebugGuard.MustBeBetweenOrEqualTo(p0, 0, 2, nameof(p0)); - - this.p2 = p2; - this.p1 = p1; - this.p0 = p0; - this.Control = SimdUtils.Shuffle.MmShuffle(3, p2, p1, p0); + DebugGuard.MustBeBetweenOrEqualTo(control, 0, 3, nameof(control)); + this.Control = control; } public byte Control { get; } + [MethodImpl(InliningOptions.ShortMethod)] + public void ShuffleReduce(ref ReadOnlySpan source, ref Span dest) + => HwIntrinsics.Shuffle3Reduce(ref source, ref dest, this.Control); + [MethodImpl(InliningOptions.ShortMethod)] public void RunFallbackShuffle(ReadOnlySpan source, Span dest) { ref byte sBase = ref MemoryMarshal.GetReference(source); ref byte dBase = ref MemoryMarshal.GetReference(dest); - int p2 = this.p2; - int p1 = this.p1; - int p0 = this.p0; + Shuffle.InverseMMShuffle(this.Control, out _, out int p2, out int p1, out int p0); for (int i = 0; i < source.Length; i += 3) { diff --git a/src/ImageSharp/Common/Helpers/Shuffle/IShuffle4Slice3.cs b/src/ImageSharp/Common/Helpers/Shuffle/IShuffle4Slice3.cs index 90b77b568e..d4a5e8130a 100644 --- a/src/ImageSharp/Common/Helpers/Shuffle/IShuffle4Slice3.cs +++ b/src/ImageSharp/Common/Helpers/Shuffle/IShuffle4Slice3.cs @@ -3,6 +3,7 @@ using System.Runtime.CompilerServices; using System.Runtime.InteropServices; +using static SixLabors.ImageSharp.SimdUtils; namespace SixLabors.ImageSharp; @@ -13,34 +14,25 @@ internal interface IShuffle4Slice3 : IComponentShuffle internal readonly struct DefaultShuffle4Slice3 : IShuffle4Slice3 { - private readonly byte p2; - private readonly byte p1; - private readonly byte p0; - - public DefaultShuffle4Slice3(byte p3, byte p2, byte p1, byte p0) + public DefaultShuffle4Slice3(byte control) { - DebugGuard.MustBeBetweenOrEqualTo(p3, 0, 3, nameof(p3)); - DebugGuard.MustBeBetweenOrEqualTo(p2, 0, 3, nameof(p2)); - DebugGuard.MustBeBetweenOrEqualTo(p1, 0, 3, nameof(p1)); - DebugGuard.MustBeBetweenOrEqualTo(p0, 0, 3, nameof(p0)); - - this.p2 = p2; - this.p1 = p1; - this.p0 = p0; - this.Control = SimdUtils.Shuffle.MmShuffle(p3, p2, p1, p0); + DebugGuard.MustBeBetweenOrEqualTo(control, 0, 3, nameof(control)); + this.Control = control; } public byte Control { get; } + [MethodImpl(InliningOptions.ShortMethod)] + public void ShuffleReduce(ref ReadOnlySpan source, ref Span dest) + => HwIntrinsics.Shuffle4Slice3Reduce(ref source, ref dest, this.Control); + [MethodImpl(InliningOptions.ShortMethod)] public void RunFallbackShuffle(ReadOnlySpan source, Span dest) { ref byte sBase = ref MemoryMarshal.GetReference(source); ref byte dBase = ref MemoryMarshal.GetReference(dest); - int p2 = this.p2; - int p1 = this.p1; - int p0 = this.p0; + Shuffle.InverseMMShuffle(this.Control, out _, out int p2, out int p1, out int p0); for (int i = 0, j = 0; i < dest.Length; i += 3, j += 4) { @@ -53,11 +45,9 @@ internal readonly struct DefaultShuffle4Slice3 : IShuffle4Slice3 internal readonly struct XYZWShuffle4Slice3 : IShuffle4Slice3 { - public byte Control - { - [MethodImpl(InliningOptions.ShortMethod)] - get => SimdUtils.Shuffle.MmShuffle(3, 2, 1, 0); - } + [MethodImpl(InliningOptions.ShortMethod)] + public void ShuffleReduce(ref ReadOnlySpan source, ref Span dest) + => HwIntrinsics.Shuffle4Slice3Reduce(ref source, ref dest, Shuffle.MMShuffle3210); [MethodImpl(InliningOptions.ShortMethod)] public void RunFallbackShuffle(ReadOnlySpan source, Span dest) diff --git a/src/ImageSharp/Common/Helpers/SimdUtils.HwIntrinsics.cs b/src/ImageSharp/Common/Helpers/SimdUtils.HwIntrinsics.cs index 4bc0040c67..3841b64b4d 100644 --- a/src/ImageSharp/Common/Helpers/SimdUtils.HwIntrinsics.cs +++ b/src/ImageSharp/Common/Helpers/SimdUtils.HwIntrinsics.cs @@ -297,7 +297,7 @@ internal static partial class SimdUtils // shuffle controls to add to the library. // We can add static ROS instances if need be in the future. Span bytes = stackalloc byte[Vector256.Count]; - Shuffle.MmShuffleSpan(ref bytes, control); + Shuffle.MMShuffleSpan(ref bytes, control); Vector256 vshuffle = Unsafe.As>(ref MemoryMarshal.GetReference(bytes)); ref Vector256 sourceBase = @@ -333,7 +333,7 @@ internal static partial class SimdUtils { // Ssse3 Span bytes = stackalloc byte[Vector128.Count]; - Shuffle.MmShuffleSpan(ref bytes, control); + Shuffle.MMShuffleSpan(ref bytes, control); Vector128 vshuffle = Unsafe.As>(ref MemoryMarshal.GetReference(bytes)); ref Vector128 sourceBase = @@ -382,7 +382,7 @@ internal static partial class SimdUtils Vector128 vmaske = Ssse3.AlignRight(vmasko, vmasko, 12); Span bytes = stackalloc byte[Vector128.Count]; - Shuffle.MmShuffleSpan(ref bytes, control); + Shuffle.MMShuffleSpan(ref bytes, control); Vector128 vshuffle = Unsafe.As>(ref MemoryMarshal.GetReference(bytes)); ref Vector128 sourceBase = @@ -445,7 +445,7 @@ internal static partial class SimdUtils Vector128 vfill = Vector128.Create(0xff000000ff000000ul).AsByte(); Span bytes = stackalloc byte[Vector128.Count]; - Shuffle.MmShuffleSpan(ref bytes, control); + Shuffle.MMShuffleSpan(ref bytes, control); Vector128 vshuffle = Unsafe.As>(ref MemoryMarshal.GetReference(bytes)); ref Vector128 sourceBase = @@ -489,7 +489,7 @@ internal static partial class SimdUtils Vector128 vmaske = Ssse3.AlignRight(vmasko, vmasko, 12); Span bytes = stackalloc byte[Vector128.Count]; - Shuffle.MmShuffleSpan(ref bytes, control); + Shuffle.MMShuffleSpan(ref bytes, control); Vector128 vshuffle = Unsafe.As>(ref MemoryMarshal.GetReference(bytes)); ref Vector128 sourceBase = @@ -532,7 +532,8 @@ internal static partial class SimdUtils } /// - /// Performs a multiplication and an addition of the . + /// Performs a multiplication and an addition of the . + /// TODO: Fix. The arguments are in a different order to the FMA intrinsic. /// /// ret = (vm0 * vm1) + va /// The vector to add to the intermediate result. @@ -549,22 +550,21 @@ internal static partial class SimdUtils { return Fma.MultiplyAdd(vm1, vm0, va); } - else - { - return Avx.Add(Avx.Multiply(vm0, vm1), va); - } + + return Avx.Add(Avx.Multiply(vm0, vm1), va); } /// - /// Performs a multiplication and a substraction of the . + /// Performs a multiplication and a subtraction of the . + /// TODO: Fix. The arguments are in a different order to the FMA intrinsic. /// /// ret = (vm0 * vm1) - vs - /// The vector to substract from the intermediate result. + /// The vector to subtract from the intermediate result. /// The first vector to multiply. /// The second vector to multiply. /// The . [MethodImpl(InliningOptions.ShortMethod)] - public static Vector256 MultiplySubstract( + public static Vector256 MultiplySubtract( in Vector256 vs, in Vector256 vm0, in Vector256 vm1) @@ -573,10 +573,30 @@ internal static partial class SimdUtils { return Fma.MultiplySubtract(vm1, vm0, vs); } - else + + return Avx.Subtract(Avx.Multiply(vm0, vm1), vs); + } + + /// + /// Performs a multiplication and a negated addition of the . + /// + /// ret = c - (a * b) + /// The first vector to multiply. + /// The second vector to multiply. + /// The vector to add negated to the intermediate result. + /// The . + [MethodImpl(InliningOptions.ShortMethod)] + public static Vector256 MultiplyAddNegated( + in Vector256 a, + in Vector256 b, + in Vector256 c) + { + if (Fma.IsSupported) { - return Avx.Subtract(Avx.Multiply(vm0, vm1), vs); + return Fma.MultiplyAddNegated(a, b, c); } + + return Avx.Subtract(c, Avx.Multiply(a, b)); } /// diff --git a/src/ImageSharp/Common/Helpers/SimdUtils.Shuffle.cs b/src/ImageSharp/Common/Helpers/SimdUtils.Shuffle.cs index b91dc2fad2..aecdaa6390 100644 --- a/src/ImageSharp/Common/Helpers/SimdUtils.Shuffle.cs +++ b/src/ImageSharp/Common/Helpers/SimdUtils.Shuffle.cs @@ -37,6 +37,7 @@ internal static partial class SimdUtils /// Shuffle 8-bit integers within 128-bit lanes in /// using the control and store the results in . /// + /// The type of shuffle struct. /// The source span of bytes. /// The destination span of bytes. /// The type of shuffle to perform. @@ -49,7 +50,7 @@ internal static partial class SimdUtils { VerifyShuffle4SpanInput(source, dest); - HwIntrinsics.Shuffle4Reduce(ref source, ref dest, shuffle.Control); + shuffle.ShuffleReduce(ref source, ref dest); // Deal with the remainder: if (source.Length > 0) @@ -62,6 +63,7 @@ internal static partial class SimdUtils /// Shuffle 8-bit integer triplets within 128-bit lanes in /// using the control and store the results in . /// + /// The type of shuffle struct. /// The source span of bytes. /// The destination span of bytes. /// The type of shuffle to perform. @@ -75,7 +77,7 @@ internal static partial class SimdUtils // Source length should be smaller than dest length, and divisible by 3. VerifyShuffle3SpanInput(source, dest); - HwIntrinsics.Shuffle3Reduce(ref source, ref dest, shuffle.Control); + shuffle.ShuffleReduce(ref source, ref dest); // Deal with the remainder: if (source.Length > 0) @@ -88,6 +90,7 @@ internal static partial class SimdUtils /// Pads then shuffles 8-bit integers within 128-bit lanes in /// using the control and store the results in . /// + /// The type of shuffle struct. /// The source span of bytes. /// The destination span of bytes. /// The type of shuffle to perform. @@ -100,7 +103,7 @@ internal static partial class SimdUtils { VerifyPad3Shuffle4SpanInput(source, dest); - HwIntrinsics.Pad3Shuffle4Reduce(ref source, ref dest, shuffle.Control); + shuffle.ShuffleReduce(ref source, ref dest); // Deal with the remainder: if (source.Length > 0) @@ -113,6 +116,7 @@ internal static partial class SimdUtils /// Shuffles then slices 8-bit integers within 128-bit lanes in /// using the control and store the results in . /// + /// The type of shuffle struct. /// The source span of bytes. /// The destination span of bytes. /// The type of shuffle to perform. @@ -125,7 +129,7 @@ internal static partial class SimdUtils { VerifyShuffle4Slice3SpanInput(source, dest); - HwIntrinsics.Shuffle4Slice3Reduce(ref source, ref dest, shuffle.Control); + shuffle.ShuffleReduce(ref source, ref dest); // Deal with the remainder: if (source.Length > 0) @@ -141,7 +145,7 @@ internal static partial class SimdUtils { ref float sBase = ref MemoryMarshal.GetReference(source); ref float dBase = ref MemoryMarshal.GetReference(dest); - Shuffle.InverseMmShuffle(control, out int p3, out int p2, out int p1, out int p0); + Shuffle.InverseMMShuffle(control, out int p3, out int p2, out int p1, out int p0); for (int i = 0; i < source.Length; i += 4) { @@ -153,7 +157,7 @@ internal static partial class SimdUtils } [Conditional("DEBUG")] - private static void VerifyShuffle4SpanInput(ReadOnlySpan source, Span dest) + internal static void VerifyShuffle4SpanInput(ReadOnlySpan source, Span dest) where T : struct { DebugGuard.IsTrue( @@ -222,14 +226,271 @@ internal static partial class SimdUtils public static class Shuffle { + public const byte MMShuffle0000 = 0b00000000; + public const byte MMShuffle0001 = 0b00000001; + public const byte MMShuffle0002 = 0b00000010; + public const byte MMShuffle0003 = 0b00000011; + public const byte MMShuffle0010 = 0b00000100; + public const byte MMShuffle0011 = 0b00000101; + public const byte MMShuffle0012 = 0b00000110; + public const byte MMShuffle0013 = 0b00000111; + public const byte MMShuffle0020 = 0b00001000; + public const byte MMShuffle0021 = 0b00001001; + public const byte MMShuffle0022 = 0b00001010; + public const byte MMShuffle0023 = 0b00001011; + public const byte MMShuffle0030 = 0b00001100; + public const byte MMShuffle0031 = 0b00001101; + public const byte MMShuffle0032 = 0b00001110; + public const byte MMShuffle0033 = 0b00001111; + public const byte MMShuffle0100 = 0b00010000; + public const byte MMShuffle0101 = 0b00010001; + public const byte MMShuffle0102 = 0b00010010; + public const byte MMShuffle0103 = 0b00010011; + public const byte MMShuffle0110 = 0b00010100; + public const byte MMShuffle0111 = 0b00010101; + public const byte MMShuffle0112 = 0b00010110; + public const byte MMShuffle0113 = 0b00010111; + public const byte MMShuffle0120 = 0b00011000; + public const byte MMShuffle0121 = 0b00011001; + public const byte MMShuffle0122 = 0b00011010; + public const byte MMShuffle0123 = 0b00011011; + public const byte MMShuffle0130 = 0b00011100; + public const byte MMShuffle0131 = 0b00011101; + public const byte MMShuffle0132 = 0b00011110; + public const byte MMShuffle0133 = 0b00011111; + public const byte MMShuffle0200 = 0b00100000; + public const byte MMShuffle0201 = 0b00100001; + public const byte MMShuffle0202 = 0b00100010; + public const byte MMShuffle0203 = 0b00100011; + public const byte MMShuffle0210 = 0b00100100; + public const byte MMShuffle0211 = 0b00100101; + public const byte MMShuffle0212 = 0b00100110; + public const byte MMShuffle0213 = 0b00100111; + public const byte MMShuffle0220 = 0b00101000; + public const byte MMShuffle0221 = 0b00101001; + public const byte MMShuffle0222 = 0b00101010; + public const byte MMShuffle0223 = 0b00101011; + public const byte MMShuffle0230 = 0b00101100; + public const byte MMShuffle0231 = 0b00101101; + public const byte MMShuffle0232 = 0b00101110; + public const byte MMShuffle0233 = 0b00101111; + public const byte MMShuffle0300 = 0b00110000; + public const byte MMShuffle0301 = 0b00110001; + public const byte MMShuffle0302 = 0b00110010; + public const byte MMShuffle0303 = 0b00110011; + public const byte MMShuffle0310 = 0b00110100; + public const byte MMShuffle0311 = 0b00110101; + public const byte MMShuffle0312 = 0b00110110; + public const byte MMShuffle0313 = 0b00110111; + public const byte MMShuffle0320 = 0b00111000; + public const byte MMShuffle0321 = 0b00111001; + public const byte MMShuffle0322 = 0b00111010; + public const byte MMShuffle0323 = 0b00111011; + public const byte MMShuffle0330 = 0b00111100; + public const byte MMShuffle0331 = 0b00111101; + public const byte MMShuffle0332 = 0b00111110; + public const byte MMShuffle0333 = 0b00111111; + public const byte MMShuffle1000 = 0b01000000; + public const byte MMShuffle1001 = 0b01000001; + public const byte MMShuffle1002 = 0b01000010; + public const byte MMShuffle1003 = 0b01000011; + public const byte MMShuffle1010 = 0b01000100; + public const byte MMShuffle1011 = 0b01000101; + public const byte MMShuffle1012 = 0b01000110; + public const byte MMShuffle1013 = 0b01000111; + public const byte MMShuffle1020 = 0b01001000; + public const byte MMShuffle1021 = 0b01001001; + public const byte MMShuffle1022 = 0b01001010; + public const byte MMShuffle1023 = 0b01001011; + public const byte MMShuffle1030 = 0b01001100; + public const byte MMShuffle1031 = 0b01001101; + public const byte MMShuffle1032 = 0b01001110; + public const byte MMShuffle1033 = 0b01001111; + public const byte MMShuffle1100 = 0b01010000; + public const byte MMShuffle1101 = 0b01010001; + public const byte MMShuffle1102 = 0b01010010; + public const byte MMShuffle1103 = 0b01010011; + public const byte MMShuffle1110 = 0b01010100; + public const byte MMShuffle1111 = 0b01010101; + public const byte MMShuffle1112 = 0b01010110; + public const byte MMShuffle1113 = 0b01010111; + public const byte MMShuffle1120 = 0b01011000; + public const byte MMShuffle1121 = 0b01011001; + public const byte MMShuffle1122 = 0b01011010; + public const byte MMShuffle1123 = 0b01011011; + public const byte MMShuffle1130 = 0b01011100; + public const byte MMShuffle1131 = 0b01011101; + public const byte MMShuffle1132 = 0b01011110; + public const byte MMShuffle1133 = 0b01011111; + public const byte MMShuffle1200 = 0b01100000; + public const byte MMShuffle1201 = 0b01100001; + public const byte MMShuffle1202 = 0b01100010; + public const byte MMShuffle1203 = 0b01100011; + public const byte MMShuffle1210 = 0b01100100; + public const byte MMShuffle1211 = 0b01100101; + public const byte MMShuffle1212 = 0b01100110; + public const byte MMShuffle1213 = 0b01100111; + public const byte MMShuffle1220 = 0b01101000; + public const byte MMShuffle1221 = 0b01101001; + public const byte MMShuffle1222 = 0b01101010; + public const byte MMShuffle1223 = 0b01101011; + public const byte MMShuffle1230 = 0b01101100; + public const byte MMShuffle1231 = 0b01101101; + public const byte MMShuffle1232 = 0b01101110; + public const byte MMShuffle1233 = 0b01101111; + public const byte MMShuffle1300 = 0b01110000; + public const byte MMShuffle1301 = 0b01110001; + public const byte MMShuffle1302 = 0b01110010; + public const byte MMShuffle1303 = 0b01110011; + public const byte MMShuffle1310 = 0b01110100; + public const byte MMShuffle1311 = 0b01110101; + public const byte MMShuffle1312 = 0b01110110; + public const byte MMShuffle1313 = 0b01110111; + public const byte MMShuffle1320 = 0b01111000; + public const byte MMShuffle1321 = 0b01111001; + public const byte MMShuffle1322 = 0b01111010; + public const byte MMShuffle1323 = 0b01111011; + public const byte MMShuffle1330 = 0b01111100; + public const byte MMShuffle1331 = 0b01111101; + public const byte MMShuffle1332 = 0b01111110; + public const byte MMShuffle1333 = 0b01111111; + public const byte MMShuffle2000 = 0b10000000; + public const byte MMShuffle2001 = 0b10000001; + public const byte MMShuffle2002 = 0b10000010; + public const byte MMShuffle2003 = 0b10000011; + public const byte MMShuffle2010 = 0b10000100; + public const byte MMShuffle2011 = 0b10000101; + public const byte MMShuffle2012 = 0b10000110; + public const byte MMShuffle2013 = 0b10000111; + public const byte MMShuffle2020 = 0b10001000; + public const byte MMShuffle2021 = 0b10001001; + public const byte MMShuffle2022 = 0b10001010; + public const byte MMShuffle2023 = 0b10001011; + public const byte MMShuffle2030 = 0b10001100; + public const byte MMShuffle2031 = 0b10001101; + public const byte MMShuffle2032 = 0b10001110; + public const byte MMShuffle2033 = 0b10001111; + public const byte MMShuffle2100 = 0b10010000; + public const byte MMShuffle2101 = 0b10010001; + public const byte MMShuffle2102 = 0b10010010; + public const byte MMShuffle2103 = 0b10010011; + public const byte MMShuffle2110 = 0b10010100; + public const byte MMShuffle2111 = 0b10010101; + public const byte MMShuffle2112 = 0b10010110; + public const byte MMShuffle2113 = 0b10010111; + public const byte MMShuffle2120 = 0b10011000; + public const byte MMShuffle2121 = 0b10011001; + public const byte MMShuffle2122 = 0b10011010; + public const byte MMShuffle2123 = 0b10011011; + public const byte MMShuffle2130 = 0b10011100; + public const byte MMShuffle2131 = 0b10011101; + public const byte MMShuffle2132 = 0b10011110; + public const byte MMShuffle2133 = 0b10011111; + public const byte MMShuffle2200 = 0b10100000; + public const byte MMShuffle2201 = 0b10100001; + public const byte MMShuffle2202 = 0b10100010; + public const byte MMShuffle2203 = 0b10100011; + public const byte MMShuffle2210 = 0b10100100; + public const byte MMShuffle2211 = 0b10100101; + public const byte MMShuffle2212 = 0b10100110; + public const byte MMShuffle2213 = 0b10100111; + public const byte MMShuffle2220 = 0b10101000; + public const byte MMShuffle2221 = 0b10101001; + public const byte MMShuffle2222 = 0b10101010; + public const byte MMShuffle2223 = 0b10101011; + public const byte MMShuffle2230 = 0b10101100; + public const byte MMShuffle2231 = 0b10101101; + public const byte MMShuffle2232 = 0b10101110; + public const byte MMShuffle2233 = 0b10101111; + public const byte MMShuffle2300 = 0b10110000; + public const byte MMShuffle2301 = 0b10110001; + public const byte MMShuffle2302 = 0b10110010; + public const byte MMShuffle2303 = 0b10110011; + public const byte MMShuffle2310 = 0b10110100; + public const byte MMShuffle2311 = 0b10110101; + public const byte MMShuffle2312 = 0b10110110; + public const byte MMShuffle2313 = 0b10110111; + public const byte MMShuffle2320 = 0b10111000; + public const byte MMShuffle2321 = 0b10111001; + public const byte MMShuffle2322 = 0b10111010; + public const byte MMShuffle2323 = 0b10111011; + public const byte MMShuffle2330 = 0b10111100; + public const byte MMShuffle2331 = 0b10111101; + public const byte MMShuffle2332 = 0b10111110; + public const byte MMShuffle2333 = 0b10111111; + public const byte MMShuffle3000 = 0b11000000; + public const byte MMShuffle3001 = 0b11000001; + public const byte MMShuffle3002 = 0b11000010; + public const byte MMShuffle3003 = 0b11000011; + public const byte MMShuffle3010 = 0b11000100; + public const byte MMShuffle3011 = 0b11000101; + public const byte MMShuffle3012 = 0b11000110; + public const byte MMShuffle3013 = 0b11000111; + public const byte MMShuffle3020 = 0b11001000; + public const byte MMShuffle3021 = 0b11001001; + public const byte MMShuffle3022 = 0b11001010; + public const byte MMShuffle3023 = 0b11001011; + public const byte MMShuffle3030 = 0b11001100; + public const byte MMShuffle3031 = 0b11001101; + public const byte MMShuffle3032 = 0b11001110; + public const byte MMShuffle3033 = 0b11001111; + public const byte MMShuffle3100 = 0b11010000; + public const byte MMShuffle3101 = 0b11010001; + public const byte MMShuffle3102 = 0b11010010; + public const byte MMShuffle3103 = 0b11010011; + public const byte MMShuffle3110 = 0b11010100; + public const byte MMShuffle3111 = 0b11010101; + public const byte MMShuffle3112 = 0b11010110; + public const byte MMShuffle3113 = 0b11010111; + public const byte MMShuffle3120 = 0b11011000; + public const byte MMShuffle3121 = 0b11011001; + public const byte MMShuffle3122 = 0b11011010; + public const byte MMShuffle3123 = 0b11011011; + public const byte MMShuffle3130 = 0b11011100; + public const byte MMShuffle3131 = 0b11011101; + public const byte MMShuffle3132 = 0b11011110; + public const byte MMShuffle3133 = 0b11011111; + public const byte MMShuffle3200 = 0b11100000; + public const byte MMShuffle3201 = 0b11100001; + public const byte MMShuffle3202 = 0b11100010; + public const byte MMShuffle3203 = 0b11100011; + public const byte MMShuffle3210 = 0b11100100; + public const byte MMShuffle3211 = 0b11100101; + public const byte MMShuffle3212 = 0b11100110; + public const byte MMShuffle3213 = 0b11100111; + public const byte MMShuffle3220 = 0b11101000; + public const byte MMShuffle3221 = 0b11101001; + public const byte MMShuffle3222 = 0b11101010; + public const byte MMShuffle3223 = 0b11101011; + public const byte MMShuffle3230 = 0b11101100; + public const byte MMShuffle3231 = 0b11101101; + public const byte MMShuffle3232 = 0b11101110; + public const byte MMShuffle3233 = 0b11101111; + public const byte MMShuffle3300 = 0b11110000; + public const byte MMShuffle3301 = 0b11110001; + public const byte MMShuffle3302 = 0b11110010; + public const byte MMShuffle3303 = 0b11110011; + public const byte MMShuffle3310 = 0b11110100; + public const byte MMShuffle3311 = 0b11110101; + public const byte MMShuffle3312 = 0b11110110; + public const byte MMShuffle3313 = 0b11110111; + public const byte MMShuffle3320 = 0b11111000; + public const byte MMShuffle3321 = 0b11111001; + public const byte MMShuffle3322 = 0b11111010; + public const byte MMShuffle3323 = 0b11111011; + public const byte MMShuffle3330 = 0b11111100; + public const byte MMShuffle3331 = 0b11111101; + public const byte MMShuffle3332 = 0b11111110; + public const byte MMShuffle3333 = 0b11111111; + [MethodImpl(InliningOptions.ShortMethod)] - public static byte MmShuffle(byte p3, byte p2, byte p1, byte p0) + public static byte MMShuffle(byte p3, byte p2, byte p1, byte p0) => (byte)((p3 << 6) | (p2 << 4) | (p1 << 2) | p0); [MethodImpl(InliningOptions.ShortMethod)] - public static void MmShuffleSpan(ref Span span, byte control) + public static void MMShuffleSpan(ref Span span, byte control) { - InverseMmShuffle( + InverseMMShuffle( control, out int p3, out int p2, @@ -248,7 +509,7 @@ internal static partial class SimdUtils } [MethodImpl(InliningOptions.ShortMethod)] - public static void InverseMmShuffle( + public static void InverseMMShuffle( byte control, out int p3, out int p2, diff --git a/src/ImageSharp/Formats/Jpeg/Components/FloatingPointDCT.Intrinsic.cs b/src/ImageSharp/Formats/Jpeg/Components/FloatingPointDCT.Intrinsic.cs index cae89fc3cf..7e102f696d 100644 --- a/src/ImageSharp/Formats/Jpeg/Components/FloatingPointDCT.Intrinsic.cs +++ b/src/ImageSharp/Formats/Jpeg/Components/FloatingPointDCT.Intrinsic.cs @@ -99,7 +99,7 @@ internal static partial class FloatingPointDCT var mm256_F_1_4142 = Vector256.Create(1.414213562f); Vector256 tmp13 = Avx.Add(tmp1, tmp3); - Vector256 tmp12 = SimdUtils.HwIntrinsics.MultiplySubstract(tmp13, Avx.Subtract(tmp1, tmp3), mm256_F_1_4142); + Vector256 tmp12 = SimdUtils.HwIntrinsics.MultiplySubtract(tmp13, Avx.Subtract(tmp1, tmp3), mm256_F_1_4142); tmp0 = Avx.Add(tmp10, tmp13); tmp3 = Avx.Subtract(tmp10, tmp13); diff --git a/src/ImageSharp/Formats/Webp/AlphaDecoder.cs b/src/ImageSharp/Formats/Webp/AlphaDecoder.cs index d45fa2a427..8875ae1150 100644 --- a/src/ImageSharp/Formats/Webp/AlphaDecoder.cs +++ b/src/ImageSharp/Formats/Webp/AlphaDecoder.cs @@ -1,8 +1,8 @@ // Copyright (c) Six Labors. // Licensed under the Six Labors Split License. -#nullable disable using System.Buffers; +using System.Diagnostics.CodeAnalysis; using System.Runtime.CompilerServices; using System.Runtime.InteropServices; using System.Runtime.Intrinsics; @@ -38,7 +38,7 @@ internal class AlphaDecoder : IDisposable this.LastRow = 0; int totalPixels = width * height; - var compression = (WebpAlphaCompressionMethod)(alphaChunkHeader & 0x03); + WebpAlphaCompressionMethod compression = (WebpAlphaCompressionMethod)(alphaChunkHeader & 0x03); if (compression is not WebpAlphaCompressionMethod.NoCompression and not WebpAlphaCompressionMethod.WebpLosslessCompression) { WebpThrowHelper.ThrowImageFormatException($"unexpected alpha compression method {compression} found"); @@ -59,7 +59,7 @@ internal class AlphaDecoder : IDisposable if (this.Compressed) { - var bitReader = new Vp8LBitReader(data); + Vp8LBitReader bitReader = new(data); this.LosslessDecoder = new WebpLosslessDecoder(bitReader, memoryAllocator, configuration); this.LosslessDecoder.DecodeImageStream(this.Vp8LDec, width, height, true); @@ -110,6 +110,7 @@ internal class AlphaDecoder : IDisposable /// /// Gets a value indicating whether the alpha channel uses compression. /// + [MemberNotNullWhen(true, nameof(LosslessDecoder))] private bool Compressed { get; } /// @@ -120,7 +121,7 @@ internal class AlphaDecoder : IDisposable /// /// Gets the Vp8L decoder which is used to de compress the alpha channel, if needed. /// - private WebpLosslessDecoder LosslessDecoder { get; } + private WebpLosslessDecoder? LosslessDecoder { get; } /// /// Gets a value indicating whether the decoding needs 1 byte per pixel for decoding. @@ -173,17 +174,14 @@ internal class AlphaDecoder : IDisposable dst = dst[this.Width..]; } } + else if (this.Use8BDecode) + { + this.LosslessDecoder.DecodeAlphaData(this); + } else { - if (this.Use8BDecode) - { - this.LosslessDecoder.DecodeAlphaData(this); - } - else - { - this.LosslessDecoder.DecodeImageData(this.Vp8LDec, this.Vp8LDec.Pixels.Memory.Span); - this.ExtractAlphaRows(this.Vp8LDec); - } + this.LosslessDecoder.DecodeImageData(this.Vp8LDec, this.Vp8LDec.Pixels.Memory.Span); + this.ExtractAlphaRows(this.Vp8LDec); } } @@ -261,8 +259,7 @@ internal class AlphaDecoder : IDisposable { int numRowsToProcess = dec.Height; int width = dec.Width; - Span pixels = dec.Pixels.Memory.Span; - Span input = pixels; + Span input = dec.Pixels.Memory.Span; Span output = this.Alpha.Memory.Span; // Extract alpha (which is stored in the green plane). @@ -327,7 +324,7 @@ internal class AlphaDecoder : IDisposable ref byte srcRef = ref MemoryMarshal.GetReference(input); for (i = 1; i + 8 <= width; i += 8) { - var a0 = Vector128.Create(Unsafe.As(ref Unsafe.Add(ref srcRef, i)), 0); + Vector128 a0 = Vector128.Create(Unsafe.As(ref Unsafe.Add(ref srcRef, i)), 0); Vector128 a1 = Sse2.Add(a0.AsByte(), last.AsByte()); Vector128 a2 = Sse2.ShiftLeftLogical128BitLane(a1, 1); Vector128 a3 = Sse2.Add(a1, a2); @@ -365,32 +362,29 @@ internal class AlphaDecoder : IDisposable { HorizontalUnfilter(null, input, dst, width); } - else + else if (Avx2.IsSupported) { - if (Avx2.IsSupported) + nint i; + int maxPos = width & ~31; + for (i = 0; i < maxPos; i += 32) { - nint i; - int maxPos = width & ~31; - for (i = 0; i < maxPos; i += 32) - { - Vector256 a0 = Unsafe.As>(ref Unsafe.Add(ref MemoryMarshal.GetReference(input), i)); - Vector256 b0 = Unsafe.As>(ref Unsafe.Add(ref MemoryMarshal.GetReference(prev), i)); - Vector256 c0 = Avx2.Add(a0.AsByte(), b0.AsByte()); - ref byte outputRef = ref Unsafe.Add(ref MemoryMarshal.GetReference(dst), i); - Unsafe.As>(ref outputRef) = c0; - } + Vector256 a0 = Unsafe.As>(ref Unsafe.Add(ref MemoryMarshal.GetReference(input), i)); + Vector256 b0 = Unsafe.As>(ref Unsafe.Add(ref MemoryMarshal.GetReference(prev), i)); + Vector256 c0 = Avx2.Add(a0.AsByte(), b0.AsByte()); + ref byte outputRef = ref Unsafe.Add(ref MemoryMarshal.GetReference(dst), i); + Unsafe.As>(ref outputRef) = c0; + } - for (; i < width; i++) - { - dst[(int)i] = (byte)(prev[(int)i] + input[(int)i]); - } + for (; i < width; i++) + { + dst[(int)i] = (byte)(prev[(int)i] + input[(int)i]); } - else + } + else + { + for (int i = 0; i < width; i++) { - for (int i = 0; i < width; i++) - { - dst[i] = (byte)(prev[i] + input[i]); - } + dst[i] = (byte)(prev[i] + input[i]); } } } diff --git a/src/ImageSharp/Formats/Webp/AlphaEncoder.cs b/src/ImageSharp/Formats/Webp/AlphaEncoder.cs index 292c31f6a5..596715b205 100644 --- a/src/ImageSharp/Formats/Webp/AlphaEncoder.cs +++ b/src/ImageSharp/Formats/Webp/AlphaEncoder.cs @@ -1,6 +1,5 @@ // Copyright (c) Six Labors. // Licensed under the Six Labors Split License. -#nullable disable using System.Buffers; using SixLabors.ImageSharp.Advanced; @@ -13,10 +12,8 @@ namespace SixLabors.ImageSharp.Formats.Webp; /// /// Methods for encoding the alpha data of a VP8 image. /// -internal class AlphaEncoder : IDisposable +internal static class AlphaEncoder { - private IMemoryOwner alphaData; - /// /// Encodes the alpha channel data. /// Data is either compressed as lossless webp image or uncompressed. @@ -29,12 +26,18 @@ internal class AlphaEncoder : IDisposable /// Indicates, if the data should be compressed with the lossless webp compression. /// The size in bytes of the alpha data. /// The encoded alpha data. - public IMemoryOwner EncodeAlpha(Image image, Configuration configuration, MemoryAllocator memoryAllocator, bool skipMetadata, bool compress, out int size) + public static IMemoryOwner EncodeAlpha( + Image image, + Configuration configuration, + MemoryAllocator memoryAllocator, + bool skipMetadata, + bool compress, + out int size) where TPixel : unmanaged, IPixel { int width = image.Width; int height = image.Height; - this.alphaData = ExtractAlphaChannel(image, configuration, memoryAllocator); + IMemoryOwner alphaData = ExtractAlphaChannel(image, configuration, memoryAllocator); if (compress) { @@ -55,15 +58,15 @@ internal class AlphaEncoder : IDisposable // The transparency information will be stored in the green channel of the ARGB quadruplet. // The green channel is allowed extra transformation steps in the specification -- unlike the other channels, // that can improve compression. - using Image alphaAsImage = DispatchAlphaToGreen(image, this.alphaData.GetSpan()); + using Image alphaAsImage = DispatchAlphaToGreen(image, alphaData.GetSpan()); - size = lossLessEncoder.EncodeAlphaImageData(alphaAsImage, this.alphaData); + size = lossLessEncoder.EncodeAlphaImageData(alphaAsImage, alphaData); - return this.alphaData; + return alphaData; } size = width * height; - return this.alphaData; + return alphaData; } /// @@ -128,7 +131,4 @@ internal class AlphaEncoder : IDisposable return alphaDataBuffer; } - - /// - public void Dispose() => this.alphaData?.Dispose(); } diff --git a/src/ImageSharp/Formats/Webp/BitReader/BitReaderBase.cs b/src/ImageSharp/Formats/Webp/BitReader/BitReaderBase.cs index 2586690fd3..83f9e797ab 100644 --- a/src/ImageSharp/Formats/Webp/BitReader/BitReaderBase.cs +++ b/src/ImageSharp/Formats/Webp/BitReader/BitReaderBase.cs @@ -1,6 +1,5 @@ // Copyright (c) Six Labors. // Licensed under the Six Labors Split License. -#nullable disable using System.Buffers; using SixLabors.ImageSharp.Memory; @@ -14,10 +13,16 @@ internal abstract class BitReaderBase : IDisposable { private bool isDisposed; + protected BitReaderBase(IMemoryOwner data) + => this.Data = data; + + protected BitReaderBase(Stream inputStream, int imageDataSize, MemoryAllocator memoryAllocator) + => this.Data = ReadImageDataFromStream(inputStream, imageDataSize, memoryAllocator); + /// - /// Gets or sets the raw encoded image data. + /// Gets the raw encoded image data. /// - public IMemoryOwner Data { get; set; } + public IMemoryOwner Data { get; } /// /// Copies the raw encoded image data from the stream into a byte array. @@ -25,11 +30,13 @@ internal abstract class BitReaderBase : IDisposable /// The input stream. /// Number of bytes to read as indicated from the chunk size. /// Used for allocating memory during reading data from the stream. - protected void ReadImageDataFromStream(Stream input, int bytesToRead, MemoryAllocator memoryAllocator) + protected static IMemoryOwner ReadImageDataFromStream(Stream input, int bytesToRead, MemoryAllocator memoryAllocator) { - this.Data = memoryAllocator.Allocate(bytesToRead); - Span dataSpan = this.Data.Memory.Span; + IMemoryOwner data = memoryAllocator.Allocate(bytesToRead); + Span dataSpan = data.Memory.Span; input.Read(dataSpan[..bytesToRead], 0, bytesToRead); + + return data; } protected virtual void Dispose(bool disposing) @@ -41,7 +48,7 @@ internal abstract class BitReaderBase : IDisposable if (disposing) { - this.Data?.Dispose(); + this.Data.Dispose(); } this.isDisposed = true; diff --git a/src/ImageSharp/Formats/Webp/BitReader/Vp8BitReader.cs b/src/ImageSharp/Formats/Webp/BitReader/Vp8BitReader.cs index 07bfcccd91..7b64d8329c 100644 --- a/src/ImageSharp/Formats/Webp/BitReader/Vp8BitReader.cs +++ b/src/ImageSharp/Formats/Webp/BitReader/Vp8BitReader.cs @@ -57,12 +57,12 @@ internal class Vp8BitReader : BitReaderBase /// The partition length. /// Start index in the data array. Defaults to 0. public Vp8BitReader(Stream inputStream, uint imageDataSize, MemoryAllocator memoryAllocator, uint partitionLength, int startPos = 0) + : base(inputStream, (int)imageDataSize, memoryAllocator) { Guard.MustBeLessThan(imageDataSize, int.MaxValue, nameof(imageDataSize)); this.ImageDataSize = imageDataSize; this.PartitionLength = partitionLength; - this.ReadImageDataFromStream(inputStream, (int)imageDataSize, memoryAllocator); this.InitBitreader(partitionLength, startPos); } @@ -73,8 +73,8 @@ internal class Vp8BitReader : BitReaderBase /// The partition length. /// Start index in the data array. Defaults to 0. public Vp8BitReader(IMemoryOwner imageData, uint partitionLength, int startPos = 0) + : base(imageData) { - this.Data = imageData; this.ImageDataSize = (uint)imageData.Memory.Length; this.PartitionLength = partitionLength; this.InitBitreader(partitionLength, startPos); diff --git a/src/ImageSharp/Formats/Webp/BitReader/Vp8LBitReader.cs b/src/ImageSharp/Formats/Webp/BitReader/Vp8LBitReader.cs index 057abf134a..8da717545f 100644 --- a/src/ImageSharp/Formats/Webp/BitReader/Vp8LBitReader.cs +++ b/src/ImageSharp/Formats/Webp/BitReader/Vp8LBitReader.cs @@ -63,8 +63,8 @@ internal class Vp8LBitReader : BitReaderBase /// /// Lossless compressed image data. public Vp8LBitReader(IMemoryOwner data) + : base(data) { - this.Data = data; this.len = data.Memory.Length; this.value = 0; this.bitPos = 0; @@ -88,11 +88,10 @@ internal class Vp8LBitReader : BitReaderBase /// The raw image data size in bytes. /// Used for allocating memory during reading data from the stream. public Vp8LBitReader(Stream inputStream, uint imageDataSize, MemoryAllocator memoryAllocator) + : base(inputStream, (int)imageDataSize, memoryAllocator) { long length = imageDataSize; - this.ReadImageDataFromStream(inputStream, (int)imageDataSize, memoryAllocator); - this.len = length; this.value = 0; this.bitPos = 0; @@ -193,7 +192,7 @@ internal class Vp8LBitReader : BitReaderBase [MethodImpl(InliningOptions.ShortMethod)] private void ShiftBytes() { - System.Span dataSpan = this.Data.Memory.Span; + System.Span dataSpan = this.Data!.Memory.Span; while (this.bitPos >= 8 && this.pos < this.len) { this.value >>= 8; diff --git a/src/ImageSharp/Formats/Webp/BitWriter/BitWriterBase.cs b/src/ImageSharp/Formats/Webp/BitWriter/BitWriterBase.cs index 2df02727e0..02b1d0ab6a 100644 --- a/src/ImageSharp/Formats/Webp/BitWriter/BitWriterBase.cs +++ b/src/ImageSharp/Formats/Webp/BitWriter/BitWriterBase.cs @@ -123,7 +123,7 @@ internal abstract class BitWriterBase /// The stream to write to. /// The metadata profile's bytes. /// The chuck type to write. - protected void WriteMetadataProfile(Stream stream, byte[] metadataBytes, WebpChunkType chunkType) + protected void WriteMetadataProfile(Stream stream, byte[]? metadataBytes, WebpChunkType chunkType) { DebugGuard.NotNull(metadataBytes, nameof(metadataBytes)); @@ -207,7 +207,7 @@ internal abstract class BitWriterBase /// The width of the image. /// The height of the image. /// Flag indicating, if a alpha channel is present. - protected void WriteVp8XHeader(Stream stream, ExifProfile exifProfile, XmpProfile xmpProfile, byte[] iccProfileBytes, uint width, uint height, bool hasAlpha) + protected void WriteVp8XHeader(Stream stream, ExifProfile? exifProfile, XmpProfile? xmpProfile, byte[]? iccProfileBytes, uint width, uint height, bool hasAlpha) { if (width > MaxDimension || height > MaxDimension) { diff --git a/src/ImageSharp/Formats/Webp/BitWriter/Vp8BitWriter.cs b/src/ImageSharp/Formats/Webp/BitWriter/Vp8BitWriter.cs index 6fa02c1161..b83b44fa14 100644 --- a/src/ImageSharp/Formats/Webp/BitWriter/Vp8BitWriter.cs +++ b/src/ImageSharp/Formats/Webp/BitWriter/Vp8BitWriter.cs @@ -1,6 +1,5 @@ // Copyright (c) Six Labors. // Licensed under the Six Labors Split License. -#nullable disable using System.Buffers.Binary; using SixLabors.ImageSharp.Formats.Webp.Lossy; @@ -58,7 +57,8 @@ internal class Vp8BitWriter : BitWriterBase /// Initializes a new instance of the class. /// /// The expected size in bytes. - public Vp8BitWriter(int expectedSize) + /// The Vp8Encoder. + public Vp8BitWriter(int expectedSize, Vp8Encoder enc) : base(expectedSize) { this.range = 255 - 1; @@ -67,15 +67,9 @@ internal class Vp8BitWriter : BitWriterBase this.nbBits = -8; this.pos = 0; this.maxPos = 0; - } - /// - /// Initializes a new instance of the class. - /// - /// The expected size in bytes. - /// The Vp8Encoder. - public Vp8BitWriter(int expectedSize, Vp8Encoder enc) - : this(expectedSize) => this.enc = enc; + this.enc = enc; + } /// public override int NumBytes() => (int)this.pos; @@ -414,9 +408,9 @@ internal class Vp8BitWriter : BitWriterBase /// Indicates, if the alpha data is compressed. public void WriteEncodedImageToStream( Stream stream, - ExifProfile exifProfile, - XmpProfile xmpProfile, - IccProfile iccProfile, + ExifProfile? exifProfile, + XmpProfile? xmpProfile, + IccProfile? iccProfile, uint width, uint height, bool hasAlpha, @@ -424,22 +418,22 @@ internal class Vp8BitWriter : BitWriterBase bool alphaDataIsCompressed) { bool isVp8X = false; - byte[] exifBytes = null; - byte[] xmpBytes = null; - byte[] iccProfileBytes = null; + byte[]? exifBytes = null; + byte[]? xmpBytes = null; + byte[]? iccProfileBytes = null; uint riffSize = 0; if (exifProfile != null) { isVp8X = true; exifBytes = exifProfile.ToByteArray(); - riffSize += MetadataChunkSize(exifBytes); + riffSize += MetadataChunkSize(exifBytes!); } if (xmpProfile != null) { isVp8X = true; xmpBytes = xmpProfile.Data; - riffSize += MetadataChunkSize(xmpBytes); + riffSize += MetadataChunkSize(xmpBytes!); } if (iccProfile != null) @@ -465,7 +459,7 @@ internal class Vp8BitWriter : BitWriterBase int mbSize = this.enc.Mbw * this.enc.Mbh; int expectedSize = mbSize * 7 / 8; - var bitWriterPartZero = new Vp8BitWriter(expectedSize); + Vp8BitWriter bitWriterPartZero = new(expectedSize, this.enc); // Partition #0 with header and partition sizes. uint size0 = this.GeneratePartition0(bitWriterPartZero); @@ -676,9 +670,9 @@ internal class Vp8BitWriter : BitWriterBase bool isVp8X, uint width, uint height, - ExifProfile exifProfile, - XmpProfile xmpProfile, - byte[] iccProfileBytes, + ExifProfile? exifProfile, + XmpProfile? xmpProfile, + byte[]? iccProfileBytes, bool hasAlpha, Span alphaData, bool alphaDataIsCompressed) diff --git a/src/ImageSharp/Formats/Webp/BitWriter/Vp8LBitWriter.cs b/src/ImageSharp/Formats/Webp/BitWriter/Vp8LBitWriter.cs index 42c1af8040..22bc195d64 100644 --- a/src/ImageSharp/Formats/Webp/BitWriter/Vp8LBitWriter.cs +++ b/src/ImageSharp/Formats/Webp/BitWriter/Vp8LBitWriter.cs @@ -1,6 +1,5 @@ // Copyright (c) Six Labors. // Licensed under the Six Labors Split License. -#nullable disable using System.Buffers.Binary; using SixLabors.ImageSharp.Formats.Webp.Lossless; @@ -138,25 +137,25 @@ internal class Vp8LBitWriter : BitWriterBase /// The width of the image. /// The height of the image. /// Flag indicating, if a alpha channel is present. - public void WriteEncodedImageToStream(Stream stream, ExifProfile exifProfile, XmpProfile xmpProfile, IccProfile iccProfile, uint width, uint height, bool hasAlpha) + public void WriteEncodedImageToStream(Stream stream, ExifProfile? exifProfile, XmpProfile? xmpProfile, IccProfile? iccProfile, uint width, uint height, bool hasAlpha) { bool isVp8X = false; - byte[] exifBytes = null; - byte[] xmpBytes = null; - byte[] iccBytes = null; + byte[]? exifBytes = null; + byte[]? xmpBytes = null; + byte[]? iccBytes = null; uint riffSize = 0; if (exifProfile != null) { isVp8X = true; exifBytes = exifProfile.ToByteArray(); - riffSize += MetadataChunkSize(exifBytes); + riffSize += MetadataChunkSize(exifBytes!); } if (xmpProfile != null) { isVp8X = true; xmpBytes = xmpProfile.Data; - riffSize += MetadataChunkSize(xmpBytes); + riffSize += MetadataChunkSize(xmpBytes!); } if (iccProfile != null) diff --git a/src/ImageSharp/Formats/Webp/Lossless/BackwardReferenceEncoder.cs b/src/ImageSharp/Formats/Webp/Lossless/BackwardReferenceEncoder.cs index b3589b52c7..df19c26e0b 100644 --- a/src/ImageSharp/Formats/Webp/Lossless/BackwardReferenceEncoder.cs +++ b/src/ImageSharp/Formats/Webp/Lossless/BackwardReferenceEncoder.cs @@ -1,6 +1,5 @@ // Copyright (c) Six Labors. // Licensed under the Six Labors Split License. -#nullable disable using System.Buffers; using SixLabors.ImageSharp.Memory; @@ -50,7 +49,7 @@ internal static class BackwardReferenceEncoder int lz77TypeBest = 0; double bitCostBest = -1; int cacheBitsInitial = cacheBits; - Vp8LHashChain hashChainBox = null; + Vp8LHashChain? hashChainBox = null; var stats = new Vp8LStreaks(); var bitsEntropy = new Vp8LBitEntropy(); for (int lz77Type = 1; lz77TypesToTry > 0; lz77TypesToTry &= ~lz77Type, lz77Type <<= 1) @@ -101,7 +100,7 @@ internal static class BackwardReferenceEncoder // Improve on simple LZ77 but only for high quality (TraceBackwards is costly). if ((lz77TypeBest == (int)Vp8LLz77Type.Lz77Standard || lz77TypeBest == (int)Vp8LLz77Type.Lz77Box) && quality >= 25) { - Vp8LHashChain hashChainTmp = lz77TypeBest == (int)Vp8LLz77Type.Lz77Standard ? hashChain : hashChainBox; + Vp8LHashChain hashChainTmp = lz77TypeBest == (int)Vp8LLz77Type.Lz77Standard ? hashChain : hashChainBox!; BackwardReferencesTraceBackwards(width, height, memoryAllocator, bgra, cacheBits, hashChainTmp, best, worst); var histo = new Vp8LHistogram(worst, cacheBits); double bitCostTrace = histo.EstimateBits(stats, bitsEntropy); @@ -140,8 +139,7 @@ internal static class BackwardReferenceEncoder for (int i = 0; i <= WebpConstants.MaxColorCacheBits; i++) { histos[i] = new Vp8LHistogram(paletteCodeBits: i); - colorCache[i] = new ColorCache(); - colorCache[i].Init(i); + colorCache[i] = new ColorCache(i); } // Find the cacheBits giving the lowest entropy. @@ -274,11 +272,11 @@ internal static class BackwardReferenceEncoder double offsetCost = -1; int firstOffsetIsConstant = -1; // initialized with 'impossible' value. int reach = 0; - var colorCache = new ColorCache(); + ColorCache? colorCache = null; if (useColorCache) { - colorCache.Init(cacheBits); + colorCache = new ColorCache(cacheBits); } costModel.Build(xSize, cacheBits, refs); @@ -375,12 +373,12 @@ internal static class BackwardReferenceEncoder private static void BackwardReferencesHashChainFollowChosenPath(ReadOnlySpan bgra, int cacheBits, Span chosenPath, int chosenPathSize, Vp8LHashChain hashChain, Vp8LBackwardRefs backwardRefs) { bool useColorCache = cacheBits > 0; - var colorCache = new ColorCache(); + ColorCache? colorCache = null; int i = 0; if (useColorCache) { - colorCache.Init(cacheBits); + colorCache = new ColorCache(cacheBits); } backwardRefs.Refs.Clear(); @@ -396,7 +394,7 @@ internal static class BackwardReferenceEncoder { for (int k = 0; k < len; k++) { - colorCache.Insert(bgra[i + k]); + colorCache!.Insert(bgra[i + k]); } } @@ -405,7 +403,7 @@ internal static class BackwardReferenceEncoder else { PixOrCopy v; - int idx = useColorCache ? colorCache.Contains(bgra[i]) : -1; + int idx = useColorCache ? colorCache!.Contains(bgra[i]) : -1; if (idx >= 0) { // useColorCache is true and color cache contains bgra[i] @@ -416,7 +414,7 @@ internal static class BackwardReferenceEncoder { if (useColorCache) { - colorCache.Insert(bgra[i]); + colorCache!.Insert(bgra[i]); } v = PixOrCopy.CreateLiteral(bgra[i]); @@ -430,7 +428,7 @@ internal static class BackwardReferenceEncoder private static void AddSingleLiteralWithCostModel( ReadOnlySpan bgra, - ColorCache colorCache, + ColorCache? colorCache, CostModel costModel, int idx, bool useColorCache, @@ -440,7 +438,7 @@ internal static class BackwardReferenceEncoder { double costVal = prevCost; uint color = bgra[idx]; - int ix = useColorCache ? colorCache.Contains(color) : -1; + int ix = useColorCache ? colorCache!.Contains(color) : -1; if (ix >= 0) { double mul0 = 0.68; @@ -451,7 +449,7 @@ internal static class BackwardReferenceEncoder double mul1 = 0.82; if (useColorCache) { - colorCache.Insert(color); + colorCache!.Insert(color); } costVal += costModel.GetLiteralCost(color) * mul1; @@ -469,10 +467,10 @@ internal static class BackwardReferenceEncoder int iLastCheck = -1; bool useColorCache = cacheBits > 0; int pixCount = xSize * ySize; - var colorCache = new ColorCache(); + ColorCache? colorCache = null; if (useColorCache) { - colorCache.Init(cacheBits); + colorCache = new ColorCache(cacheBits); } refs.Refs.Clear(); @@ -529,7 +527,7 @@ internal static class BackwardReferenceEncoder { for (j = i; j < i + len; j++) { - colorCache.Insert(bgra[j]); + colorCache!.Insert(bgra[j]); } } } @@ -725,11 +723,11 @@ internal static class BackwardReferenceEncoder { int pixelCount = xSize * ySize; bool useColorCache = cacheBits > 0; - var colorCache = new ColorCache(); + ColorCache? colorCache = null; if (useColorCache) { - colorCache.Init(cacheBits); + colorCache = new ColorCache(cacheBits); } refs.Refs.Clear(); @@ -757,7 +755,7 @@ internal static class BackwardReferenceEncoder { for (int k = 0; k < prevRowLen; ++k) { - colorCache.Insert(bgra[i + k]); + colorCache!.Insert(bgra[i + k]); } } @@ -777,8 +775,7 @@ internal static class BackwardReferenceEncoder private static void BackwardRefsWithLocalCache(ReadOnlySpan bgra, int cacheBits, Vp8LBackwardRefs refs) { int pixelIndex = 0; - var colorCache = new ColorCache(); - colorCache.Init(cacheBits); + ColorCache colorCache = new(cacheBits); for (int idx = 0; idx < refs.Refs.Count; idx++) { PixOrCopy v = refs.Refs[idx]; @@ -825,12 +822,12 @@ internal static class BackwardReferenceEncoder } } - private static void AddSingleLiteral(uint pixel, bool useColorCache, ColorCache colorCache, Vp8LBackwardRefs refs) + private static void AddSingleLiteral(uint pixel, bool useColorCache, ColorCache? colorCache, Vp8LBackwardRefs refs) { PixOrCopy v; if (useColorCache) { - int key = colorCache.GetIndex(pixel); + int key = colorCache!.GetIndex(pixel); if (colorCache.Lookup(key) == pixel) { v = PixOrCopy.CreateCacheIdx(key); diff --git a/src/ImageSharp/Formats/Webp/Lossless/ColorCache.cs b/src/ImageSharp/Formats/Webp/Lossless/ColorCache.cs index 08ff60b69f..e683fb5605 100644 --- a/src/ImageSharp/Formats/Webp/Lossless/ColorCache.cs +++ b/src/ImageSharp/Formats/Webp/Lossless/ColorCache.cs @@ -1,6 +1,5 @@ // Copyright (c) Six Labors. // Licensed under the Six Labors Split License. -#nullable disable using System.Runtime.CompilerServices; @@ -14,31 +13,31 @@ internal class ColorCache private const uint HashMul = 0x1e35a7bdu; /// - /// Gets the color entries. + /// Initializes a new instance of the class. /// - public uint[] Colors { get; private set; } + /// The hashBits determine the size of cache. It will be 1 left shifted by hashBits. + public ColorCache(int hashBits) + { + int hashSize = 1 << hashBits; + this.Colors = new uint[hashSize]; + this.HashBits = hashBits; + this.HashShift = 32 - hashBits; + } /// - /// Gets the hash shift: 32 - hashBits. + /// Gets the color entries. /// - public int HashShift { get; private set; } + public uint[] Colors { get; } /// - /// Gets the hash bits. + /// Gets the hash shift: 32 - hashBits. /// - public int HashBits { get; private set; } + public int HashShift { get; } /// - /// Initializes a new color cache. + /// Gets the hash bits. /// - /// The hashBits determine the size of cache. It will be 1 left shifted by hashBits. - public void Init(int hashBits) - { - int hashSize = 1 << hashBits; - this.Colors = new uint[hashSize]; - this.HashBits = hashBits; - this.HashShift = 32 - hashBits; - } + public int HashBits { get; } /// /// Inserts a new color into the cache. diff --git a/src/ImageSharp/Formats/Webp/Lossless/CostInterval.cs b/src/ImageSharp/Formats/Webp/Lossless/CostInterval.cs index a63c786049..0cc4a30fd7 100644 --- a/src/ImageSharp/Formats/Webp/Lossless/CostInterval.cs +++ b/src/ImageSharp/Formats/Webp/Lossless/CostInterval.cs @@ -1,6 +1,5 @@ // Copyright (c) Six Labors. // Licensed under the Six Labors Split License. -#nullable disable using System.Diagnostics; @@ -33,7 +32,7 @@ internal class CostInterval public int Index { get; set; } - public CostInterval Previous { get; set; } + public CostInterval? Previous { get; set; } - public CostInterval Next { get; set; } + public CostInterval? Next { get; set; } } diff --git a/src/ImageSharp/Formats/Webp/Lossless/CostManager.cs b/src/ImageSharp/Formats/Webp/Lossless/CostManager.cs index 7b9fdff247..e393c065ec 100644 --- a/src/ImageSharp/Formats/Webp/Lossless/CostManager.cs +++ b/src/ImageSharp/Formats/Webp/Lossless/CostManager.cs @@ -1,6 +1,5 @@ // Copyright (c) Six Labors. // Licensed under the Six Labors Split License. -#nullable disable using System.Buffers; using SixLabors.ImageSharp.Memory; @@ -14,7 +13,7 @@ namespace SixLabors.ImageSharp.Formats.Webp.Lossless; /// internal sealed class CostManager : IDisposable { - private CostInterval head; + private CostInterval? head; private const int FreeIntervalsStartCount = 25; @@ -103,10 +102,10 @@ internal sealed class CostManager : IDisposable /// If 'doCleanIntervals' is true, intervals that end before 'i' will be popped. public void UpdateCostAtIndex(int i, bool doCleanIntervals) { - CostInterval current = this.head; + CostInterval? current = this.head; while (current != null && current.Start <= i) { - CostInterval next = current.Next; + CostInterval? next = current.Next; if (current.End <= i) { if (doCleanIntervals) @@ -155,7 +154,7 @@ internal sealed class CostManager : IDisposable return; } - CostInterval interval = this.head; + CostInterval? interval = this.head; for (int i = 0; i < this.CacheIntervalsSize && this.CacheIntervals[i].Start < len; i++) { // Define the intersection of the ith interval with the new one. @@ -163,7 +162,7 @@ internal sealed class CostManager : IDisposable int end = position + (this.CacheIntervals[i].End > len ? len : this.CacheIntervals[i].End); float cost = (float)(distanceCost + this.CacheIntervals[i].Cost); - CostInterval intervalNext; + CostInterval? intervalNext; for (; interval != null && interval.Start < end; interval = intervalNext) { intervalNext = interval.Next; @@ -225,7 +224,7 @@ internal sealed class CostManager : IDisposable /// Pop an interval from the manager. /// /// The interval to remove. - private void PopInterval(CostInterval interval) + private void PopInterval(CostInterval? interval) { if (interval == null) { @@ -240,7 +239,7 @@ internal sealed class CostManager : IDisposable this.freeIntervals.Push(interval); } - private void InsertInterval(CostInterval intervalIn, float cost, int position, int start, int end) + private void InsertInterval(CostInterval? intervalIn, float cost, int position, int start, int end) { if (start >= end) { @@ -271,7 +270,7 @@ internal sealed class CostManager : IDisposable /// it was orphaned (which can be NULL), set it at the right place in the list /// of intervals using the start_ ordering and the previous interval as a hint. /// - private void PositionOrphanInterval(CostInterval current, CostInterval previous) + private void PositionOrphanInterval(CostInterval current, CostInterval? previous) { previous ??= this.head; @@ -292,7 +291,7 @@ internal sealed class CostManager : IDisposable /// /// Given two intervals, make 'prev' be the previous one of 'next' in 'manager'. /// - private void ConnectIntervals(CostInterval prev, CostInterval next) + private void ConnectIntervals(CostInterval? prev, CostInterval? next) { if (prev != null) { diff --git a/src/ImageSharp/Formats/Webp/Lossless/LosslessUtils.cs b/src/ImageSharp/Formats/Webp/Lossless/LosslessUtils.cs index 5d9c207096..ac92ce6a6a 100644 --- a/src/ImageSharp/Formats/Webp/Lossless/LosslessUtils.cs +++ b/src/ImageSharp/Formats/Webp/Lossless/LosslessUtils.cs @@ -96,7 +96,7 @@ internal static unsafe class LosslessUtils { if (Avx2.IsSupported) { - var addGreenToBlueAndRedMaskAvx2 = Vector256.Create(1, 255, 1, 255, 5, 255, 5, 255, 9, 255, 9, 255, 13, 255, 13, 255, 17, 255, 17, 255, 21, 255, 21, 255, 25, 255, 25, 255, 29, 255, 29, 255); + Vector256 addGreenToBlueAndRedMaskAvx2 = Vector256.Create(1, 255, 1, 255, 5, 255, 5, 255, 9, 255, 9, 255, 13, 255, 13, 255, 17, 255, 17, 255, 21, 255, 21, 255, 25, 255, 25, 255, 29, 255, 29, 255); int numPixels = pixelData.Length; nint i; for (i = 0; i <= numPixels - 8; i += 8) @@ -115,7 +115,7 @@ internal static unsafe class LosslessUtils } else if (Ssse3.IsSupported) { - var addGreenToBlueAndRedMaskSsse3 = Vector128.Create(1, 255, 1, 255, 5, 255, 5, 255, 9, 255, 9, 255, 13, 255, 13, 255); + Vector128 addGreenToBlueAndRedMaskSsse3 = Vector128.Create(1, 255, 1, 255, 5, 255, 5, 255, 9, 255, 9, 255, 13, 255, 13, 255); int numPixels = pixelData.Length; nint i; for (i = 0; i <= numPixels - 4; i += 4) @@ -138,13 +138,11 @@ internal static unsafe class LosslessUtils nint i; for (i = 0; i <= numPixels - 4; i += 4) { - const byte mmShuffle_2200 = 0b_10_10_00_00; - ref uint pos = ref Unsafe.Add(ref MemoryMarshal.GetReference(pixelData), i); Vector128 input = Unsafe.As>(ref pos).AsByte(); Vector128 a = Sse2.ShiftRightLogical(input.AsUInt16(), 8); // 0 a 0 g - Vector128 b = Sse2.ShuffleLow(a, mmShuffle_2200); - Vector128 c = Sse2.ShuffleHigh(b, mmShuffle_2200); // 0g0g + Vector128 b = Sse2.ShuffleLow(a, SimdUtils.Shuffle.MMShuffle2200); + Vector128 c = Sse2.ShuffleHigh(b, SimdUtils.Shuffle.MMShuffle2200); // 0g0g Vector128 output = Sse2.Add(input.AsByte(), c.AsByte()); Unsafe.As>(ref pos) = output.AsUInt32(); } @@ -178,7 +176,7 @@ internal static unsafe class LosslessUtils { if (Avx2.IsSupported) { - var subtractGreenFromBlueAndRedMaskAvx2 = Vector256.Create(1, 255, 1, 255, 5, 255, 5, 255, 9, 255, 9, 255, 13, 255, 13, 255, 17, 255, 17, 255, 21, 255, 21, 255, 25, 255, 25, 255, 29, 255, 29, 255); + Vector256 subtractGreenFromBlueAndRedMaskAvx2 = Vector256.Create(1, 255, 1, 255, 5, 255, 5, 255, 9, 255, 9, 255, 13, 255, 13, 255, 17, 255, 17, 255, 21, 255, 21, 255, 25, 255, 25, 255, 29, 255, 29, 255); int numPixels = pixelData.Length; nint i; for (i = 0; i <= numPixels - 8; i += 8) @@ -197,7 +195,7 @@ internal static unsafe class LosslessUtils } else if (Ssse3.IsSupported) { - var subtractGreenFromBlueAndRedMaskSsse3 = Vector128.Create(1, 255, 1, 255, 5, 255, 5, 255, 9, 255, 9, 255, 13, 255, 13, 255); + Vector128 subtractGreenFromBlueAndRedMaskSsse3 = Vector128.Create(1, 255, 1, 255, 5, 255, 5, 255, 9, 255, 9, 255, 13, 255, 13, 255); int numPixels = pixelData.Length; nint i; for (i = 0; i <= numPixels - 4; i += 4) @@ -220,13 +218,11 @@ internal static unsafe class LosslessUtils nint i; for (i = 0; i <= numPixels - 4; i += 4) { - const byte mmShuffle_2200 = 0b_10_10_00_00; - ref uint pos = ref Unsafe.Add(ref MemoryMarshal.GetReference(pixelData), i); Vector128 input = Unsafe.As>(ref pos).AsByte(); Vector128 a = Sse2.ShiftRightLogical(input.AsUInt16(), 8); // 0 a 0 g - Vector128 b = Sse2.ShuffleLow(a, mmShuffle_2200); - Vector128 c = Sse2.ShuffleHigh(b, mmShuffle_2200); // 0g0g + Vector128 b = Sse2.ShuffleLow(a, SimdUtils.Shuffle.MMShuffle2200); + Vector128 c = Sse2.ShuffleHigh(b, SimdUtils.Shuffle.MMShuffle2200); // 0g0g Vector128 output = Sse2.Subtract(input.AsByte(), c.AsByte()); Unsafe.As>(ref pos) = output.AsUInt32(); } @@ -334,7 +330,7 @@ internal static unsafe class LosslessUtils while (y < yEnd) { int predRowIdx = predRowIdxStart; - var m = default(Vp8LMultipliers); + Vp8LMultipliers m = default; int srcSafeEnd = pixelPos + safeWidth; int srcEnd = pixelPos + width; while (pixelPos < srcSafeEnd) @@ -371,21 +367,19 @@ internal static unsafe class LosslessUtils { if (Avx2.IsSupported && numPixels >= 8) { - var transformColorAlphaGreenMask256 = Vector256.Create(0, 255, 0, 255, 0, 255, 0, 255, 0, 255, 0, 255, 0, 255, 0, 255, 0, 255, 0, 255, 0, 255, 0, 255, 0, 255, 0, 255, 0, 255, 0, 255); - var transformColorRedBlueMask256 = Vector256.Create(255, 0, 255, 0, 255, 0, 255, 0, 255, 0, 255, 0, 255, 0, 255, 0, 255, 0, 255, 0, 255, 0, 255, 0, 255, 0, 255, 0, 255, 0, 255, 0); + Vector256 transformColorAlphaGreenMask256 = Vector256.Create(0, 255, 0, 255, 0, 255, 0, 255, 0, 255, 0, 255, 0, 255, 0, 255, 0, 255, 0, 255, 0, 255, 0, 255, 0, 255, 0, 255, 0, 255, 0, 255); + Vector256 transformColorRedBlueMask256 = Vector256.Create(255, 0, 255, 0, 255, 0, 255, 0, 255, 0, 255, 0, 255, 0, 255, 0, 255, 0, 255, 0, 255, 0, 255, 0, 255, 0, 255, 0, 255, 0, 255, 0); Vector256 multsrb = MkCst32(Cst5b(m.GreenToRed), Cst5b(m.GreenToBlue)); Vector256 multsb2 = MkCst32(Cst5b(m.RedToBlue), 0); nint idx; for (idx = 0; idx <= numPixels - 8; idx += 8) { - const byte mmShuffle_2200 = 0b_10_10_00_00; - ref uint pos = ref Unsafe.Add(ref MemoryMarshal.GetReference(pixelData), idx); Vector256 input = Unsafe.As>(ref pos); Vector256 a = Avx2.And(input.AsByte(), transformColorAlphaGreenMask256); - Vector256 b = Avx2.ShuffleLow(a.AsInt16(), mmShuffle_2200); - Vector256 c = Avx2.ShuffleHigh(b.AsInt16(), mmShuffle_2200); + Vector256 b = Avx2.ShuffleLow(a.AsInt16(), SimdUtils.Shuffle.MMShuffle2200); + Vector256 c = Avx2.ShuffleHigh(b.AsInt16(), SimdUtils.Shuffle.MMShuffle2200); Vector256 d = Avx2.MultiplyHigh(c.AsInt16(), multsrb.AsInt16()); Vector256 e = Avx2.ShiftLeftLogical(input.AsInt16(), 8); Vector256 f = Avx2.MultiplyHigh(e.AsInt16(), multsb2.AsInt16()); @@ -403,20 +397,18 @@ internal static unsafe class LosslessUtils } else if (Sse2.IsSupported) { - var transformColorAlphaGreenMask = Vector128.Create(0, 255, 0, 255, 0, 255, 0, 255, 0, 255, 0, 255, 0, 255, 0, 255); - var transformColorRedBlueMask = Vector128.Create(255, 0, 255, 0, 255, 0, 255, 0, 255, 0, 255, 0, 255, 0, 255, 0); + Vector128 transformColorAlphaGreenMask = Vector128.Create(0, 255, 0, 255, 0, 255, 0, 255, 0, 255, 0, 255, 0, 255, 0, 255); + Vector128 transformColorRedBlueMask = Vector128.Create(255, 0, 255, 0, 255, 0, 255, 0, 255, 0, 255, 0, 255, 0, 255, 0); Vector128 multsrb = MkCst16(Cst5b(m.GreenToRed), Cst5b(m.GreenToBlue)); Vector128 multsb2 = MkCst16(Cst5b(m.RedToBlue), 0); nint idx; for (idx = 0; idx <= numPixels - 4; idx += 4) { - const byte mmShuffle_2200 = 0b_10_10_00_00; - ref uint pos = ref Unsafe.Add(ref MemoryMarshal.GetReference(pixelData), idx); Vector128 input = Unsafe.As>(ref pos); Vector128 a = Sse2.And(input.AsByte(), transformColorAlphaGreenMask); - Vector128 b = Sse2.ShuffleLow(a.AsInt16(), mmShuffle_2200); - Vector128 c = Sse2.ShuffleHigh(b.AsInt16(), mmShuffle_2200); + Vector128 b = Sse2.ShuffleLow(a.AsInt16(), SimdUtils.Shuffle.MMShuffle2200); + Vector128 c = Sse2.ShuffleHigh(b.AsInt16(), SimdUtils.Shuffle.MMShuffle2200); Vector128 d = Sse2.MultiplyHigh(c.AsInt16(), multsrb.AsInt16()); Vector128 e = Sse2.ShiftLeftLogical(input.AsInt16(), 8); Vector128 f = Sse2.MultiplyHigh(e.AsInt16(), multsb2.AsInt16()); @@ -465,19 +457,17 @@ internal static unsafe class LosslessUtils { if (Avx2.IsSupported && pixelData.Length >= 8) { - var transformColorInverseAlphaGreenMask256 = Vector256.Create(0, 255, 0, 255, 0, 255, 0, 255, 0, 255, 0, 255, 0, 255, 0, 255, 0, 255, 0, 255, 0, 255, 0, 255, 0, 255, 0, 255, 0, 255, 0, 255); + Vector256 transformColorInverseAlphaGreenMask256 = Vector256.Create(0, 255, 0, 255, 0, 255, 0, 255, 0, 255, 0, 255, 0, 255, 0, 255, 0, 255, 0, 255, 0, 255, 0, 255, 0, 255, 0, 255, 0, 255, 0, 255); Vector256 multsrb = MkCst32(Cst5b(m.GreenToRed), Cst5b(m.GreenToBlue)); Vector256 multsb2 = MkCst32(Cst5b(m.RedToBlue), 0); nint idx; for (idx = 0; idx <= pixelData.Length - 8; idx += 8) { - const byte mmShuffle_2200 = 0b_10_10_00_00; - ref uint pos = ref Unsafe.Add(ref MemoryMarshal.GetReference(pixelData), idx); Vector256 input = Unsafe.As>(ref pos); Vector256 a = Avx2.And(input.AsByte(), transformColorInverseAlphaGreenMask256); - Vector256 b = Avx2.ShuffleLow(a.AsInt16(), mmShuffle_2200); - Vector256 c = Avx2.ShuffleHigh(b.AsInt16(), mmShuffle_2200); + Vector256 b = Avx2.ShuffleLow(a.AsInt16(), SimdUtils.Shuffle.MMShuffle2200); + Vector256 c = Avx2.ShuffleHigh(b.AsInt16(), SimdUtils.Shuffle.MMShuffle2200); Vector256 d = Avx2.MultiplyHigh(c.AsInt16(), multsrb.AsInt16()); Vector256 e = Avx2.Add(input.AsByte(), d.AsByte()); Vector256 f = Avx2.ShiftLeftLogical(e.AsInt16(), 8); @@ -496,20 +486,18 @@ internal static unsafe class LosslessUtils } else if (Sse2.IsSupported) { - var transformColorInverseAlphaGreenMask = Vector128.Create(0, 255, 0, 255, 0, 255, 0, 255, 0, 255, 0, 255, 0, 255, 0, 255); + Vector128 transformColorInverseAlphaGreenMask = Vector128.Create(0, 255, 0, 255, 0, 255, 0, 255, 0, 255, 0, 255, 0, 255, 0, 255); Vector128 multsrb = MkCst16(Cst5b(m.GreenToRed), Cst5b(m.GreenToBlue)); Vector128 multsb2 = MkCst16(Cst5b(m.RedToBlue), 0); nint idx; for (idx = 0; idx <= pixelData.Length - 4; idx += 4) { - const byte mmShuffle_2200 = 0b_10_10_00_00; - ref uint pos = ref Unsafe.Add(ref MemoryMarshal.GetReference(pixelData), idx); Vector128 input = Unsafe.As>(ref pos); Vector128 a = Sse2.And(input.AsByte(), transformColorInverseAlphaGreenMask); - Vector128 b = Sse2.ShuffleLow(a.AsInt16(), mmShuffle_2200); - Vector128 c = Sse2.ShuffleHigh(b.AsInt16(), mmShuffle_2200); + Vector128 b = Sse2.ShuffleLow(a.AsInt16(), SimdUtils.Shuffle.MMShuffle2200); + Vector128 c = Sse2.ShuffleHigh(b.AsInt16(), SimdUtils.Shuffle.MMShuffle2200); Vector128 d = Sse2.MultiplyHigh(c.AsInt16(), multsrb.AsInt16()); Vector128 e = Sse2.Add(input.AsByte(), d.AsByte()); Vector128 f = Sse2.ShiftLeftLogical(e.AsInt16(), 8); diff --git a/src/ImageSharp/Formats/Webp/Lossless/WebpLosslessDecoder.cs b/src/ImageSharp/Formats/Webp/Lossless/WebpLosslessDecoder.cs index abfb67bc7e..84ddd4b785 100644 --- a/src/ImageSharp/Formats/Webp/Lossless/WebpLosslessDecoder.cs +++ b/src/ImageSharp/Formats/Webp/Lossless/WebpLosslessDecoder.cs @@ -158,10 +158,9 @@ internal sealed class WebpLosslessDecoder // Finish setting up the color-cache. if (isColorCachePresent) { - decoder.Metadata.ColorCache = new ColorCache(); + decoder.Metadata.ColorCache = new ColorCache(colorCacheBits); colorCacheSize = 1 << colorCacheBits; decoder.Metadata.ColorCacheSize = colorCacheSize; - decoder.Metadata.ColorCache.Init(colorCacheBits); } else { diff --git a/src/ImageSharp/Formats/Webp/Lossy/Vp8Encoder.cs b/src/ImageSharp/Formats/Webp/Lossy/Vp8Encoder.cs index a5d1900b2a..16b4c827ef 100644 --- a/src/ImageSharp/Formats/Webp/Lossy/Vp8Encoder.cs +++ b/src/ImageSharp/Formats/Webp/Lossy/Vp8Encoder.cs @@ -348,12 +348,18 @@ internal class Vp8Encoder : IDisposable // Extract and encode alpha channel data, if present. int alphaDataSize = 0; bool alphaCompressionSucceeded = false; - using AlphaEncoder alphaEncoder = new(); Span alphaData = Span.Empty; if (hasAlpha) { // TODO: This can potentially run in an separate task. - IMemoryOwner encodedAlphaData = alphaEncoder.EncodeAlpha(image, this.configuration, this.memoryAllocator, this.skipMetadata, this.alphaCompression, out alphaDataSize); + using IMemoryOwner encodedAlphaData = AlphaEncoder.EncodeAlpha( + image, + this.configuration, + this.memoryAllocator, + this.skipMetadata, + this.alphaCompression, + out alphaDataSize); + alphaData = encodedAlphaData.GetSpan(); if (alphaDataSize < pixelCount) { diff --git a/src/ImageSharp/Formats/Webp/Lossy/Vp8Encoding.cs b/src/ImageSharp/Formats/Webp/Lossy/Vp8Encoding.cs index cc263657f1..82f00e8760 100644 --- a/src/ImageSharp/Formats/Webp/Lossy/Vp8Encoding.cs +++ b/src/ImageSharp/Formats/Webp/Lossy/Vp8Encoding.cs @@ -521,9 +521,8 @@ internal static unsafe class Vp8Encoding { // *in01 = 00 01 10 11 02 03 12 13 // *in23 = 20 21 30 31 22 23 32 33 - const byte mmShuffle_2301 = 0b_10_11_00_01; - Vector128 shuf01_p = Sse2.ShuffleHigh(row01, mmShuffle_2301); - Vector128 shuf32_p = Sse2.ShuffleHigh(row23, mmShuffle_2301); + Vector128 shuf01_p = Sse2.ShuffleHigh(row01, SimdUtils.Shuffle.MMShuffle2301); + Vector128 shuf32_p = Sse2.ShuffleHigh(row23, SimdUtils.Shuffle.MMShuffle2301); // 00 01 10 11 03 02 13 12 // 20 21 30 31 23 22 33 32 @@ -555,9 +554,7 @@ internal static unsafe class Vp8Encoding Vector128 shi = Sse2.UnpackHigh(s03, s12); // 2 3 2 3 2 3 Vector128 v23 = Sse2.UnpackHigh(slo.AsInt32(), shi.AsInt32()); out01 = Sse2.UnpackLow(slo.AsInt32(), shi.AsInt32()); - - const byte mmShuffle_1032 = 0b_01_00_11_10; - out32 = Sse2.Shuffle(v23, mmShuffle_1032); + out32 = Sse2.Shuffle(v23, SimdUtils.Shuffle.MMShuffle1032); } public static void FTransformPass2SSE2(Vector128 v01, Vector128 v32, Span output) diff --git a/src/ImageSharp/Formats/Webp/WebpAnimationDecoder.cs b/src/ImageSharp/Formats/Webp/WebpAnimationDecoder.cs index 4c1c60591d..abaa85ef18 100644 --- a/src/ImageSharp/Formats/Webp/WebpAnimationDecoder.cs +++ b/src/ImageSharp/Formats/Webp/WebpAnimationDecoder.cs @@ -1,6 +1,5 @@ // Copyright (c) Six Labors. // Licensed under the Six Labors Split License. -#nullable disable using System.Buffers; using System.Runtime.CompilerServices; @@ -46,17 +45,17 @@ internal class WebpAnimationDecoder : IDisposable /// /// The abstract metadata. /// - private ImageMetadata metadata; + private ImageMetadata? metadata; /// /// The gif specific metadata. /// - private WebpMetadata webpMetadata; + private WebpMetadata? webpMetadata; /// /// The alpha data, if an ALPH chunk is present. /// - private IMemoryOwner alphaData; + private IMemoryOwner? alphaData; /// /// Initializes a new instance of the class. @@ -83,8 +82,8 @@ internal class WebpAnimationDecoder : IDisposable public Image Decode(BufferedReadStream stream, WebpFeatures features, uint width, uint height, uint completeDataSize) where TPixel : unmanaged, IPixel { - Image image = null; - ImageFrame previousFrame = null; + Image? image = null; + ImageFrame? previousFrame = null; this.metadata = new ImageMetadata(); this.webpMetadata = this.metadata.GetWebpMetadata(); @@ -99,12 +98,12 @@ internal class WebpAnimationDecoder : IDisposable switch (chunkType) { case WebpChunkType.Animation: - uint dataSize = this.ReadFrame(stream, ref image, ref previousFrame, width, height, features.AnimationBackgroundColor.Value); + uint dataSize = this.ReadFrame(stream, ref image, ref previousFrame, width, height, features.AnimationBackgroundColor!.Value); remainingBytes -= (int)dataSize; break; case WebpChunkType.Xmp: case WebpChunkType.Exif: - WebpChunkParsingUtils.ParseOptionalChunks(stream, chunkType, image.Metadata, false, this.buffer); + WebpChunkParsingUtils.ParseOptionalChunks(stream, chunkType, image!.Metadata, false, this.buffer); break; default: WebpThrowHelper.ThrowImageFormatException("Read unexpected webp chunk data"); @@ -117,7 +116,7 @@ internal class WebpAnimationDecoder : IDisposable } } - return image; + return image!; } /// @@ -130,7 +129,7 @@ internal class WebpAnimationDecoder : IDisposable /// The width of the image. /// The height of the image. /// The default background color of the canvas in. - private uint ReadFrame(BufferedReadStream stream, ref Image image, ref ImageFrame previousFrame, uint width, uint height, Color backgroundColor) + private uint ReadFrame(BufferedReadStream stream, ref Image? image, ref ImageFrame? previousFrame, uint width, uint height, Color backgroundColor) where TPixel : unmanaged, IPixel { AnimationFrameData frameData = this.ReadFrameHeader(stream); @@ -146,7 +145,7 @@ internal class WebpAnimationDecoder : IDisposable chunkType = WebpChunkParsingUtils.ReadChunkType(stream, this.buffer); } - WebpImageInfo webpInfo = null; + WebpImageInfo? webpInfo = null; WebpFeatures features = new(); switch (chunkType) { @@ -163,7 +162,7 @@ internal class WebpAnimationDecoder : IDisposable break; } - ImageFrame currentFrame = null; + ImageFrame? currentFrame = null; ImageFrame imageFrame; if (previousFrame is null) { @@ -175,7 +174,7 @@ internal class WebpAnimationDecoder : IDisposable } else { - currentFrame = image.Frames.AddFrame(previousFrame); // This clones the frame and adds it the collection. + currentFrame = image!.Frames.AddFrame(previousFrame); // This clones the frame and adds it the collection. SetFrameMetadata(currentFrame.Metadata, frameData.Duration); diff --git a/src/ImageSharp/Formats/Webp/WebpChunkParsingUtils.cs b/src/ImageSharp/Formats/Webp/WebpChunkParsingUtils.cs index bfe415a6e4..e4acdf311c 100644 --- a/src/ImageSharp/Formats/Webp/WebpChunkParsingUtils.cs +++ b/src/ImageSharp/Formats/Webp/WebpChunkParsingUtils.cs @@ -1,6 +1,5 @@ // Copyright (c) Six Labors. // Licensed under the Six Labors Split License. -#nullable disable using System.Buffers.Binary; using SixLabors.ImageSharp.Formats.Webp.BitReader; diff --git a/src/ImageSharp/Formats/Webp/WebpDecoderCore.cs b/src/ImageSharp/Formats/Webp/WebpDecoderCore.cs index c4e2e0d55a..c158df44d9 100644 --- a/src/ImageSharp/Formats/Webp/WebpDecoderCore.cs +++ b/src/ImageSharp/Formats/Webp/WebpDecoderCore.cs @@ -1,6 +1,5 @@ // Copyright (c) Six Labors. // Licensed under the Six Labors Split License. -#nullable disable using System.Buffers; using System.Buffers.Binary; @@ -9,9 +8,7 @@ using SixLabors.ImageSharp.Formats.Webp.Lossy; using SixLabors.ImageSharp.IO; using SixLabors.ImageSharp.Memory; using SixLabors.ImageSharp.Metadata; -using SixLabors.ImageSharp.Metadata.Profiles.Exif; using SixLabors.ImageSharp.Metadata.Profiles.Icc; -using SixLabors.ImageSharp.Metadata.Profiles.Xmp; using SixLabors.ImageSharp.PixelFormats; namespace SixLabors.ImageSharp.Formats.Webp; @@ -41,35 +38,20 @@ internal sealed class WebpDecoderCore : IImageDecoderInternals, IDisposable /// private readonly uint maxFrames; - /// - /// Gets the decoded by this decoder instance. - /// - private ImageMetadata metadata; - /// /// Gets or sets the alpha data, if an ALPH chunk is present. /// - private IMemoryOwner alphaData; + private IMemoryOwner? alphaData; /// /// Used for allocating memory during the decoding operations. /// private readonly MemoryAllocator memoryAllocator; - /// - /// The stream to decode from. - /// - private BufferedReadStream currentStream; - - /// - /// The webp specific metadata. - /// - private WebpMetadata webpMetadata; - /// /// Information about the webp image. /// - private WebpImageInfo webImageInfo; + private WebpImageInfo? webImageInfo; /// /// Initializes a new instance of the class. @@ -88,25 +70,24 @@ internal sealed class WebpDecoderCore : IImageDecoderInternals, IDisposable public DecoderOptions Options { get; } /// - public Size Dimensions => new((int)this.webImageInfo.Width, (int)this.webImageInfo.Height); + public Size Dimensions => new((int)this.webImageInfo!.Width, (int)this.webImageInfo.Height); /// public Image Decode(BufferedReadStream stream, CancellationToken cancellationToken) where TPixel : unmanaged, IPixel { - Image image = null; + Image? image = null; try { - this.metadata = new ImageMetadata(); - this.currentStream = stream; + ImageMetadata metadata = new(); - uint fileSize = this.ReadImageHeader(); + uint fileSize = this.ReadImageHeader(stream); - using (this.webImageInfo = this.ReadVp8Info()) + using (this.webImageInfo = this.ReadVp8Info(stream, metadata)) { if (this.webImageInfo.Features is { Animation: true }) { - using var animationDecoder = new WebpAnimationDecoder(this.memoryAllocator, this.configuration, this.maxFrames); + using WebpAnimationDecoder animationDecoder = new(this.memoryAllocator, this.configuration, this.maxFrames); return animationDecoder.Decode(stream, this.webImageInfo.Features, this.webImageInfo.Width, this.webImageInfo.Height, fileSize); } @@ -115,23 +96,23 @@ internal sealed class WebpDecoderCore : IImageDecoderInternals, IDisposable WebpThrowHelper.ThrowNotSupportedException("Animations are not supported"); } - image = new Image(this.configuration, (int)this.webImageInfo.Width, (int)this.webImageInfo.Height, this.metadata); + image = new Image(this.configuration, (int)this.webImageInfo.Width, (int)this.webImageInfo.Height, metadata); Buffer2D pixels = image.GetRootFramePixelBuffer(); if (this.webImageInfo.IsLossless) { - var losslessDecoder = new WebpLosslessDecoder(this.webImageInfo.Vp8LBitReader, this.memoryAllocator, this.configuration); + WebpLosslessDecoder losslessDecoder = new(this.webImageInfo.Vp8LBitReader, this.memoryAllocator, this.configuration); losslessDecoder.Decode(pixels, image.Width, image.Height); } else { - var lossyDecoder = new WebpLossyDecoder(this.webImageInfo.Vp8BitReader, this.memoryAllocator, this.configuration); + WebpLossyDecoder lossyDecoder = new(this.webImageInfo.Vp8BitReader, this.memoryAllocator, this.configuration); lossyDecoder.Decode(pixels, image.Width, image.Height, this.webImageInfo, this.alphaData); } // There can be optional chunks after the image data, like EXIF and XMP. if (this.webImageInfo.Features != null) { - this.ParseOptionalChunks(this.webImageInfo.Features); + this.ParseOptionalChunks(stream, metadata, this.webImageInfo.Features); } return image; @@ -147,34 +128,35 @@ internal sealed class WebpDecoderCore : IImageDecoderInternals, IDisposable /// public ImageInfo Identify(BufferedReadStream stream, CancellationToken cancellationToken) { - this.currentStream = stream; + this.ReadImageHeader(stream); - this.ReadImageHeader(); - using (this.webImageInfo = this.ReadVp8Info(true)) + ImageMetadata metadata = new(); + using (this.webImageInfo = this.ReadVp8Info(stream, metadata, true)) { return new ImageInfo( new PixelTypeInfo((int)this.webImageInfo.BitsPerPixel), new((int)this.webImageInfo.Width, (int)this.webImageInfo.Height), - this.metadata); + metadata); } } /// /// Reads and skips over the image header. /// + /// The stream to decode from. /// The file size in bytes. - private uint ReadImageHeader() + private uint ReadImageHeader(BufferedReadStream stream) { // Skip FourCC header, we already know its a RIFF file at this point. - this.currentStream.Skip(4); + stream.Skip(4); // Read file size. // The size of the file in bytes starting at offset 8. // The file size in the header is the total size of the chunks that follow plus 4 bytes for the ‘WEBP’ FourCC. - uint fileSize = WebpChunkParsingUtils.ReadChunkSize(this.currentStream, this.buffer); + uint fileSize = WebpChunkParsingUtils.ReadChunkSize(stream, this.buffer); // Skip 'WEBP' from the header. - this.currentStream.Skip(4); + stream.Skip(4); return fileSize; } @@ -182,42 +164,43 @@ internal sealed class WebpDecoderCore : IImageDecoderInternals, IDisposable /// /// Reads information present in the image header, about the image content and how to decode the image. /// + /// The stream to decode from. + /// The image metadata. /// For identify, the alpha data should not be read. /// Information about the webp image. - private WebpImageInfo ReadVp8Info(bool ignoreAlpha = false) + private WebpImageInfo ReadVp8Info(BufferedReadStream stream, ImageMetadata metadata, bool ignoreAlpha = false) { - this.metadata = new ImageMetadata(); - this.webpMetadata = this.metadata.GetFormatMetadata(WebpFormat.Instance); + WebpMetadata webpMetadata = metadata.GetFormatMetadata(WebpFormat.Instance); - WebpChunkType chunkType = WebpChunkParsingUtils.ReadChunkType(this.currentStream, this.buffer); + WebpChunkType chunkType = WebpChunkParsingUtils.ReadChunkType(stream, this.buffer); - var features = new WebpFeatures(); + WebpFeatures features = new(); switch (chunkType) { case WebpChunkType.Vp8: - this.webpMetadata.FileFormat = WebpFileFormatType.Lossy; - return WebpChunkParsingUtils.ReadVp8Header(this.memoryAllocator, this.currentStream, this.buffer, features); + webpMetadata.FileFormat = WebpFileFormatType.Lossy; + return WebpChunkParsingUtils.ReadVp8Header(this.memoryAllocator, stream, this.buffer, features); case WebpChunkType.Vp8L: - this.webpMetadata.FileFormat = WebpFileFormatType.Lossless; - return WebpChunkParsingUtils.ReadVp8LHeader(this.memoryAllocator, this.currentStream, this.buffer, features); + webpMetadata.FileFormat = WebpFileFormatType.Lossless; + return WebpChunkParsingUtils.ReadVp8LHeader(this.memoryAllocator, stream, this.buffer, features); case WebpChunkType.Vp8X: - WebpImageInfo webpInfos = WebpChunkParsingUtils.ReadVp8XHeader(this.currentStream, this.buffer, features); - while (this.currentStream.Position < this.currentStream.Length) + WebpImageInfo webpInfos = WebpChunkParsingUtils.ReadVp8XHeader(stream, this.buffer, features); + while (stream.Position < stream.Length) { - chunkType = WebpChunkParsingUtils.ReadChunkType(this.currentStream, this.buffer); + chunkType = WebpChunkParsingUtils.ReadChunkType(stream, this.buffer); if (chunkType == WebpChunkType.Vp8) { - this.webpMetadata.FileFormat = WebpFileFormatType.Lossy; - webpInfos = WebpChunkParsingUtils.ReadVp8Header(this.memoryAllocator, this.currentStream, this.buffer, features); + webpMetadata.FileFormat = WebpFileFormatType.Lossy; + webpInfos = WebpChunkParsingUtils.ReadVp8Header(this.memoryAllocator, stream, this.buffer, features); } else if (chunkType == WebpChunkType.Vp8L) { - this.webpMetadata.FileFormat = WebpFileFormatType.Lossless; - webpInfos = WebpChunkParsingUtils.ReadVp8LHeader(this.memoryAllocator, this.currentStream, this.buffer, features); + webpMetadata.FileFormat = WebpFileFormatType.Lossless; + webpInfos = WebpChunkParsingUtils.ReadVp8LHeader(this.memoryAllocator, stream, this.buffer, features); } else if (WebpChunkParsingUtils.IsOptionalVp8XChunk(chunkType)) { - bool isAnimationChunk = this.ParseOptionalExtendedChunks(chunkType, features, ignoreAlpha); + bool isAnimationChunk = this.ParseOptionalExtendedChunks(stream, metadata, chunkType, features, ignoreAlpha); if (isAnimationChunk) { return webpInfos; @@ -226,8 +209,8 @@ internal sealed class WebpDecoderCore : IImageDecoderInternals, IDisposable else { // Ignore unknown chunks. - uint chunkSize = this.ReadChunkSize(); - this.currentStream.Skip((int)chunkSize); + uint chunkSize = this.ReadChunkSize(stream); + stream.Skip((int)chunkSize); } } @@ -242,32 +225,39 @@ internal sealed class WebpDecoderCore : IImageDecoderInternals, IDisposable /// /// Parses optional VP8X chunks, which can be ICCP, XMP, ANIM or ALPH chunks. /// + /// The stream to decode from. + /// The image metadata. /// The chunk type. /// The webp image features. /// For identify, the alpha data should not be read. /// true, if its a alpha chunk. - private bool ParseOptionalExtendedChunks(WebpChunkType chunkType, WebpFeatures features, bool ignoreAlpha) + private bool ParseOptionalExtendedChunks( + BufferedReadStream stream, + ImageMetadata metadata, + WebpChunkType chunkType, + WebpFeatures features, + bool ignoreAlpha) { switch (chunkType) { case WebpChunkType.Iccp: - this.ReadIccProfile(); + this.ReadIccProfile(stream, metadata); break; case WebpChunkType.Exif: - this.ReadExifProfile(); + this.ReadExifProfile(stream, metadata); break; case WebpChunkType.Xmp: - this.ReadXmpProfile(); + this.ReadXmpProfile(stream, metadata); break; case WebpChunkType.AnimationParameter: - this.ReadAnimationParameters(features); + this.ReadAnimationParameters(stream, features); return true; case WebpChunkType.Alpha: - this.ReadAlphaData(features, ignoreAlpha); + this.ReadAlphaData(stream, features, ignoreAlpha); break; default: WebpThrowHelper.ThrowImageFormatException("Unexpected chunk followed VP8X header"); @@ -280,32 +270,34 @@ internal sealed class WebpDecoderCore : IImageDecoderInternals, IDisposable /// /// Reads the optional metadata EXIF of XMP profiles, which can follow the image data. /// + /// The stream to decode from. + /// The image metadata. /// The webp features. - private void ParseOptionalChunks(WebpFeatures features) + private void ParseOptionalChunks(BufferedReadStream stream, ImageMetadata metadata, WebpFeatures features) { if (this.skipMetadata || (!features.ExifProfile && !features.XmpMetaData)) { return; } - long streamLength = this.currentStream.Length; - while (this.currentStream.Position < streamLength) + long streamLength = stream.Length; + while (stream.Position < streamLength) { // Read chunk header. - WebpChunkType chunkType = this.ReadChunkType(); - if (chunkType == WebpChunkType.Exif && this.metadata.ExifProfile == null) + WebpChunkType chunkType = this.ReadChunkType(stream); + if (chunkType == WebpChunkType.Exif && metadata.ExifProfile == null) { - this.ReadExifProfile(); + this.ReadExifProfile(stream, metadata); } - else if (chunkType == WebpChunkType.Xmp && this.metadata.XmpProfile == null) + else if (chunkType == WebpChunkType.Xmp && metadata.XmpProfile == null) { - this.ReadXmpProfile(); + this.ReadXmpProfile(stream, metadata); } else { // Skip duplicate XMP or EXIF chunk. - uint chunkLength = this.ReadChunkSize(); - this.currentStream.Skip((int)chunkLength); + uint chunkLength = this.ReadChunkSize(stream); + stream.Skip((int)chunkLength); } } } @@ -313,76 +305,80 @@ internal sealed class WebpDecoderCore : IImageDecoderInternals, IDisposable /// /// Reads the EXIF profile from the stream. /// - private void ReadExifProfile() + /// The stream to decode from. + /// The image metadata. + private void ReadExifProfile(BufferedReadStream stream, ImageMetadata metadata) { - uint exifChunkSize = this.ReadChunkSize(); + uint exifChunkSize = this.ReadChunkSize(stream); if (this.skipMetadata) { - this.currentStream.Skip((int)exifChunkSize); + stream.Skip((int)exifChunkSize); } else { byte[] exifData = new byte[exifChunkSize]; - int bytesRead = this.currentStream.Read(exifData, 0, (int)exifChunkSize); + int bytesRead = stream.Read(exifData, 0, (int)exifChunkSize); if (bytesRead != exifChunkSize) { // Ignore invalid chunk. return; } - var profile = new ExifProfile(exifData); - this.metadata.ExifProfile = profile; + metadata.ExifProfile = new(exifData); } } /// /// Reads the XMP profile the stream. /// - private void ReadXmpProfile() + /// The stream to decode from. + /// The image metadata. + private void ReadXmpProfile(BufferedReadStream stream, ImageMetadata metadata) { - uint xmpChunkSize = this.ReadChunkSize(); + uint xmpChunkSize = this.ReadChunkSize(stream); if (this.skipMetadata) { - this.currentStream.Skip((int)xmpChunkSize); + stream.Skip((int)xmpChunkSize); } else { byte[] xmpData = new byte[xmpChunkSize]; - int bytesRead = this.currentStream.Read(xmpData, 0, (int)xmpChunkSize); + int bytesRead = stream.Read(xmpData, 0, (int)xmpChunkSize); if (bytesRead != xmpChunkSize) { // Ignore invalid chunk. return; } - var profile = new XmpProfile(xmpData); - this.metadata.XmpProfile = profile; + metadata.XmpProfile = new(xmpData); } } /// /// Reads the ICCP chunk from the stream. /// - private void ReadIccProfile() + /// The stream to decode from. + /// The image metadata. + private void ReadIccProfile(BufferedReadStream stream, ImageMetadata metadata) { - uint iccpChunkSize = this.ReadChunkSize(); + uint iccpChunkSize = this.ReadChunkSize(stream); if (this.skipMetadata) { - this.currentStream.Skip((int)iccpChunkSize); + stream.Skip((int)iccpChunkSize); } else { byte[] iccpData = new byte[iccpChunkSize]; - int bytesRead = this.currentStream.Read(iccpData, 0, (int)iccpChunkSize); + int bytesRead = stream.Read(iccpData, 0, (int)iccpChunkSize); if (bytesRead != iccpChunkSize) { WebpThrowHelper.ThrowInvalidImageContentException("Not enough data to read the iccp chunk"); } - var profile = new IccProfile(iccpData); + IccProfile profile = new(iccpData); if (profile.CheckIsValid()) { - this.metadata.IccProfile = profile; + metadata.IccProfile = profile; } } } @@ -390,17 +386,18 @@ internal sealed class WebpDecoderCore : IImageDecoderInternals, IDisposable /// /// Reads the animation parameters chunk from the stream. /// + /// The stream to decode from. /// The webp features. - private void ReadAnimationParameters(WebpFeatures features) + private void ReadAnimationParameters(BufferedReadStream stream, WebpFeatures features) { features.Animation = true; - uint animationChunkSize = WebpChunkParsingUtils.ReadChunkSize(this.currentStream, this.buffer); - byte blue = (byte)this.currentStream.ReadByte(); - byte green = (byte)this.currentStream.ReadByte(); - byte red = (byte)this.currentStream.ReadByte(); - byte alpha = (byte)this.currentStream.ReadByte(); + uint animationChunkSize = WebpChunkParsingUtils.ReadChunkSize(stream, this.buffer); + byte blue = (byte)stream.ReadByte(); + byte green = (byte)stream.ReadByte(); + byte red = (byte)stream.ReadByte(); + byte alpha = (byte)stream.ReadByte(); features.AnimationBackgroundColor = new Color(new Rgba32(red, green, blue, alpha)); - int bytesRead = this.currentStream.Read(this.buffer, 0, 2); + int bytesRead = stream.Read(this.buffer, 0, 2); if (bytesRead != 2) { WebpThrowHelper.ThrowInvalidImageContentException("Not enough data to read the animation loop count"); @@ -412,22 +409,23 @@ internal sealed class WebpDecoderCore : IImageDecoderInternals, IDisposable /// /// Reads the alpha data chunk data from the stream. /// + /// The stream to decode from. /// The features. /// if set to true, skips the chunk data. - private void ReadAlphaData(WebpFeatures features, bool ignoreAlpha) + private void ReadAlphaData(BufferedReadStream stream, WebpFeatures features, bool ignoreAlpha) { - uint alphaChunkSize = WebpChunkParsingUtils.ReadChunkSize(this.currentStream, this.buffer); + uint alphaChunkSize = WebpChunkParsingUtils.ReadChunkSize(stream, this.buffer); if (ignoreAlpha) { - this.currentStream.Skip((int)alphaChunkSize); + stream.Skip((int)alphaChunkSize); return; } - features.AlphaChunkHeader = (byte)this.currentStream.ReadByte(); + features.AlphaChunkHeader = (byte)stream.ReadByte(); int alphaDataSize = (int)(alphaChunkSize - 1); this.alphaData = this.memoryAllocator.Allocate(alphaDataSize); Span alphaData = this.alphaData.GetSpan(); - int bytesRead = this.currentStream.Read(alphaData, 0, alphaDataSize); + int bytesRead = stream.Read(alphaData, 0, alphaDataSize); if (bytesRead != alphaDataSize) { WebpThrowHelper.ThrowInvalidImageContentException("Not enough data to read the alpha data from the stream"); @@ -437,15 +435,15 @@ internal sealed class WebpDecoderCore : IImageDecoderInternals, IDisposable /// /// Identifies the chunk type from the chunk. /// + /// The stream to decode from. /// /// Thrown if the input stream is not valid. /// - private WebpChunkType ReadChunkType() + private WebpChunkType ReadChunkType(BufferedReadStream stream) { - if (this.currentStream.Read(this.buffer, 0, 4) == 4) + if (stream.Read(this.buffer, 0, 4) == 4) { - var chunkType = (WebpChunkType)BinaryPrimitives.ReadUInt32BigEndian(this.buffer); - return chunkType; + return (WebpChunkType)BinaryPrimitives.ReadUInt32BigEndian(this.buffer); } throw new ImageFormatException("Invalid Webp data."); @@ -455,10 +453,12 @@ internal sealed class WebpDecoderCore : IImageDecoderInternals, IDisposable /// Reads the chunk size. If Chunk Size is odd, a single padding byte will be added to the payload, /// so the chunk size will be increased by 1 in those cases. /// + /// The stream to decode from. /// The chunk size in bytes. - private uint ReadChunkSize() + /// Invalid data. + private uint ReadChunkSize(BufferedReadStream stream) { - if (this.currentStream.Read(this.buffer, 0, 4) == 4) + if (stream.Read(this.buffer, 0, 4) == 4) { uint chunkSize = BinaryPrimitives.ReadUInt32LittleEndian(this.buffer); return (chunkSize % 2 == 0) ? chunkSize : chunkSize + 1; diff --git a/src/ImageSharp/Formats/Webp/WebpEncoder.cs b/src/ImageSharp/Formats/Webp/WebpEncoder.cs index e314d38017..bd8303f1c8 100644 --- a/src/ImageSharp/Formats/Webp/WebpEncoder.cs +++ b/src/ImageSharp/Formats/Webp/WebpEncoder.cs @@ -82,7 +82,7 @@ public sealed class WebpEncoder : ImageEncoder /// protected override void Encode(Image image, Stream stream, CancellationToken cancellationToken) { - WebpEncoderCore encoder = new(this, image.GetMemoryAllocator()); + WebpEncoderCore encoder = new(this, image.GetConfiguration()); encoder.Encode(image, stream, cancellationToken); } } diff --git a/src/ImageSharp/Formats/Webp/WebpEncoderCore.cs b/src/ImageSharp/Formats/Webp/WebpEncoderCore.cs index 68951e5ec0..33189ba845 100644 --- a/src/ImageSharp/Formats/Webp/WebpEncoderCore.cs +++ b/src/ImageSharp/Formats/Webp/WebpEncoderCore.cs @@ -1,8 +1,6 @@ // Copyright (c) Six Labors. // Licensed under the Six Labors Split License. -#nullable disable -using SixLabors.ImageSharp.Advanced; using SixLabors.ImageSharp.Formats.Webp.Lossless; using SixLabors.ImageSharp.Formats.Webp.Lossy; using SixLabors.ImageSharp.Memory; @@ -81,16 +79,17 @@ internal sealed class WebpEncoderCore : IImageEncoderInternals /// /// The global configuration. /// - private Configuration configuration; + private readonly Configuration configuration; /// /// Initializes a new instance of the class. /// /// The encoder with options. - /// The memory manager. - public WebpEncoderCore(WebpEncoder encoder, MemoryAllocator memoryAllocator) + /// The global configuration. + public WebpEncoderCore(WebpEncoder encoder, Configuration configuration) { - this.memoryAllocator = memoryAllocator; + this.configuration = configuration; + this.memoryAllocator = configuration.MemoryAllocator; this.alphaCompression = encoder.UseAlphaCompression; this.fileFormat = encoder.FileFormat; this.quality = encoder.Quality; @@ -117,7 +116,6 @@ internal sealed class WebpEncoderCore : IImageEncoderInternals Guard.NotNull(image, nameof(image)); Guard.NotNull(stream, nameof(stream)); - this.configuration = image.GetConfiguration(); bool lossless; if (this.fileFormat is not null) { diff --git a/src/ImageSharp/Formats/Webp/WebpImageInfo.cs b/src/ImageSharp/Formats/Webp/WebpImageInfo.cs index c41403211f..5f7301b262 100644 --- a/src/ImageSharp/Formats/Webp/WebpImageInfo.cs +++ b/src/ImageSharp/Formats/Webp/WebpImageInfo.cs @@ -1,6 +1,5 @@ // Copyright (c) Six Labors. // Licensed under the Six Labors Split License. -#nullable disable using SixLabors.ImageSharp.Formats.Webp.BitReader; using SixLabors.ImageSharp.Formats.Webp.Lossy; @@ -36,7 +35,7 @@ internal class WebpImageInfo : IDisposable /// /// Gets or sets additional features present in a VP8X image. /// - public WebpFeatures Features { get; set; } + public WebpFeatures? Features { get; set; } /// /// Gets or sets the VP8 profile / version. Valid values are between 0 and 3. Default value will be the invalid value -1. @@ -46,17 +45,17 @@ internal class WebpImageInfo : IDisposable /// /// Gets or sets the VP8 frame header. /// - public Vp8FrameHeader Vp8FrameHeader { get; set; } + public Vp8FrameHeader? Vp8FrameHeader { get; set; } /// /// Gets or sets the VP8L bitreader. Will be , if its not a lossless image. /// - public Vp8LBitReader Vp8LBitReader { get; set; } + public Vp8LBitReader? Vp8LBitReader { get; set; } /// /// Gets or sets the VP8 bitreader. Will be , if its not a lossy image. /// - public Vp8BitReader Vp8BitReader { get; set; } + public Vp8BitReader? Vp8BitReader { get; set; } /// public void Dispose() diff --git a/src/ImageSharp/Formats/Webp/WebpThrowHelper.cs b/src/ImageSharp/Formats/Webp/WebpThrowHelper.cs index 944cefa745..c633c52738 100644 --- a/src/ImageSharp/Formats/Webp/WebpThrowHelper.cs +++ b/src/ImageSharp/Formats/Webp/WebpThrowHelper.cs @@ -1,15 +1,21 @@ // Copyright (c) Six Labors. // Licensed under the Six Labors Split License. +using System.Diagnostics.CodeAnalysis; + namespace SixLabors.ImageSharp.Formats.Webp; internal static class WebpThrowHelper { + [DoesNotReturn] public static void ThrowInvalidImageContentException(string errorMessage) => throw new InvalidImageContentException(errorMessage); + [DoesNotReturn] public static void ThrowImageFormatException(string errorMessage) => throw new ImageFormatException(errorMessage); + [DoesNotReturn] public static void ThrowNotSupportedException(string errorMessage) => throw new NotSupportedException(errorMessage); + [DoesNotReturn] public static void ThrowInvalidImageDimensions(string errorMessage) => throw new InvalidImageContentException(errorMessage); } diff --git a/src/ImageSharp/ImageExtensions.cs b/src/ImageSharp/ImageExtensions.cs index 04930a2689..cf970b3166 100644 --- a/src/ImageSharp/ImageExtensions.cs +++ b/src/ImageSharp/ImageExtensions.cs @@ -1,6 +1,5 @@ // Copyright (c) Six Labors. // Licensed under the Six Labors Split License. -#nullable disable using System.Globalization; using System.Text; @@ -180,6 +179,6 @@ public static partial class ImageExtensions // Always available. stream.TryGetBuffer(out ArraySegment buffer); - return $"data:{format.DefaultMimeType};base64,{Convert.ToBase64String(buffer.Array, 0, (int)stream.Length)}"; + return $"data:{format.DefaultMimeType};base64,{Convert.ToBase64String(buffer.Array ?? Array.Empty(), 0, (int)stream.Length)}"; } } diff --git a/src/ImageSharp/ImageFrame{TPixel}.cs b/src/ImageSharp/ImageFrame{TPixel}.cs index fb4ee6ae5f..3734402d30 100644 --- a/src/ImageSharp/ImageFrame{TPixel}.cs +++ b/src/ImageSharp/ImageFrame{TPixel}.cs @@ -1,6 +1,5 @@ // Copyright (c) Six Labors. // Licensed under the Six Labors Split License. -#nullable disable using System.Runtime.CompilerServices; using System.Runtime.InteropServices; @@ -145,7 +144,7 @@ public sealed class ImageFrame : ImageFrame, IPixelSource } /// - public Buffer2D PixelBuffer { get; private set; } + public Buffer2D PixelBuffer { get; } /// /// Gets or sets the pixel at the specified position. @@ -183,7 +182,7 @@ public sealed class ImageFrame : ImageFrame, IPixelSource try { - var accessor = new PixelAccessor(this.PixelBuffer); + PixelAccessor accessor = new(this.PixelBuffer); processPixels(accessor); } finally @@ -211,8 +210,8 @@ public sealed class ImageFrame : ImageFrame, IPixelSource try { - var accessor1 = new PixelAccessor(this.PixelBuffer); - var accessor2 = new PixelAccessor(frame2.PixelBuffer); + PixelAccessor accessor1 = new(this.PixelBuffer); + PixelAccessor accessor2 = new(frame2.PixelBuffer); processPixels(accessor1, accessor2); } finally @@ -247,9 +246,9 @@ public sealed class ImageFrame : ImageFrame, IPixelSource try { - var accessor1 = new PixelAccessor(this.PixelBuffer); - var accessor2 = new PixelAccessor(frame2.PixelBuffer); - var accessor3 = new PixelAccessor(frame3.PixelBuffer); + PixelAccessor accessor1 = new(this.PixelBuffer); + PixelAccessor accessor2 = new(frame2.PixelBuffer); + PixelAccessor accessor3 = new(frame3.PixelBuffer); processPixels(accessor1, accessor2, accessor3); } finally @@ -310,6 +309,7 @@ public sealed class ImageFrame : ImageFrame, IPixelSource /// Copies the pixels to a of the same size. /// /// The target pixel buffer accessor. + /// ImageFrame{TPixel}.CopyTo(): target must be of the same size! internal void CopyTo(Buffer2D target) { if (this.Size() != target.Size()) @@ -342,8 +342,7 @@ public sealed class ImageFrame : ImageFrame, IPixelSource if (disposing) { - this.PixelBuffer?.Dispose(); - this.PixelBuffer = null; + this.PixelBuffer.Dispose(); } this.isDisposed = true; @@ -379,14 +378,14 @@ public sealed class ImageFrame : ImageFrame, IPixelSource /// /// The configuration providing initialization code which allows extending the library. /// The - internal ImageFrame Clone(Configuration configuration) => new ImageFrame(configuration, this); + internal ImageFrame Clone(Configuration configuration) => new(configuration, this); /// /// Returns a copy of the image frame in the given pixel format. /// /// The pixel format. /// The - internal ImageFrame CloneAs() + internal ImageFrame? CloneAs() where TPixel2 : unmanaged, IPixel => this.CloneAs(this.GetConfiguration()); /// @@ -400,11 +399,11 @@ public sealed class ImageFrame : ImageFrame, IPixelSource { if (typeof(TPixel2) == typeof(TPixel)) { - return this.Clone(configuration) as ImageFrame; + return (this.Clone(configuration) as ImageFrame)!; } - var target = new ImageFrame(configuration, this.Width, this.Height, this.Metadata.DeepClone()); - var operation = new RowIntervalOperation(this.PixelBuffer, target.PixelBuffer, configuration); + ImageFrame target = new(configuration, this.Width, this.Height, this.Metadata.DeepClone()); + RowIntervalOperation operation = new(this.PixelBuffer, target.PixelBuffer, configuration); ParallelRowIterator.IterateRowIntervals( configuration, @@ -447,14 +446,12 @@ public sealed class ImageFrame : ImageFrame, IPixelSource } [MethodImpl(InliningOptions.ColdPath)] - private static void ThrowArgumentOutOfRangeException(string paramName) - { - throw new ArgumentOutOfRangeException(paramName); - } + private static void ThrowArgumentOutOfRangeException(string paramName) => throw new ArgumentOutOfRangeException(paramName); /// /// A implementing the clone logic for . /// + /// The type of the target pixel format. private readonly struct RowIntervalOperation : IRowIntervalOperation where TPixel2 : unmanaged, IPixel { diff --git a/src/ImageSharp/IndexedImageFrame{TPixel}.cs b/src/ImageSharp/IndexedImageFrame{TPixel}.cs index 741b3219e0..6807e77ad2 100644 --- a/src/ImageSharp/IndexedImageFrame{TPixel}.cs +++ b/src/ImageSharp/IndexedImageFrame{TPixel}.cs @@ -1,6 +1,5 @@ // Copyright (c) Six Labors. // Licensed under the Six Labors Split License. -#nullable disable using System.Buffers; using System.Runtime.CompilerServices; @@ -18,8 +17,8 @@ namespace SixLabors.ImageSharp; public sealed class IndexedImageFrame : IPixelSource, IDisposable where TPixel : unmanaged, IPixel { - private Buffer2D pixelBuffer; - private IMemoryOwner paletteOwner; + private readonly Buffer2D pixelBuffer; + private readonly IMemoryOwner paletteOwner; private bool isDisposed; /// @@ -109,8 +108,6 @@ public sealed class IndexedImageFrame : IPixelSource, IDisposable this.isDisposed = true; this.pixelBuffer.Dispose(); this.paletteOwner.Dispose(); - this.pixelBuffer = null; - this.paletteOwner = null; } } } diff --git a/src/ImageSharp/Memory/Buffer2D{T}.cs b/src/ImageSharp/Memory/Buffer2D{T}.cs index 8d6465389f..1173e02e17 100644 --- a/src/ImageSharp/Memory/Buffer2D{T}.cs +++ b/src/ImageSharp/Memory/Buffer2D{T}.cs @@ -55,6 +55,8 @@ public sealed class Buffer2D : IDisposable /// internal MemoryGroup FastMemoryGroup { get; private set; } + internal bool IsDisposed { get; private set; } + /// /// Gets a reference to the element at the specified position. /// @@ -79,7 +81,11 @@ public sealed class Buffer2D : IDisposable /// /// Disposes the instance /// - public void Dispose() => this.FastMemoryGroup.Dispose(); + public void Dispose() + { + this.FastMemoryGroup.Dispose(); + this.IsDisposed = true; + } /// /// Gets a to the row 'y' beginning from the pixel at the first pixel on that row. diff --git a/src/ImageSharp/PixelFormats/PixelBlenders/DefaultPixelBlenders.Generated.cs b/src/ImageSharp/PixelFormats/PixelBlenders/DefaultPixelBlenders.Generated.cs index cf19101211..2db61a06f3 100644 --- a/src/ImageSharp/PixelFormats/PixelBlenders/DefaultPixelBlenders.Generated.cs +++ b/src/ImageSharp/PixelFormats/PixelBlenders/DefaultPixelBlenders.Generated.cs @@ -3,6 +3,10 @@ // using System.Numerics; +using System.Runtime.CompilerServices; +using System.Runtime.InteropServices; +using System.Runtime.Intrinsics; +using System.Runtime.Intrinsics.X86; namespace SixLabors.ImageSharp.PixelFormats.PixelBlenders; @@ -43,18 +47,85 @@ internal static class DefaultPixelBlenders protected override void BlendFunction(Span destination, ReadOnlySpan background, ReadOnlySpan source, float amount) { amount = Numerics.Clamp(amount, 0, 1); - for (int i = 0; i < destination.Length; i++) + + if (Avx2.IsSupported && destination.Length >= 2) + { + // Divide by 2 as 4 elements per Vector4 and 8 per Vector256 + ref Vector256 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); + ref Vector256 destinationLast = ref Unsafe.Add(ref destinationBase, (IntPtr)((uint)destination.Length / 2u)); + + ref Vector256 backgroundBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(background)); + ref Vector256 sourceBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(source)); + Vector256 opacity = Vector256.Create(amount); + + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) + { + destinationBase = PorterDuffFunctions.NormalSrc(backgroundBase, sourceBase, opacity); + destinationBase = ref Unsafe.Add(ref destinationBase, 1); + backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); + sourceBase = ref Unsafe.Add(ref sourceBase, 1); + } + + if (Numerics.Modulo2(destination.Length) != 0) + { + // Vector4 fits neatly in pairs. Any overlap has to be equal to 1. + int i = destination.Length - 1; + destination[i] = PorterDuffFunctions.NormalSrc(background[i], source[i], amount); + } + } + else { - destination[i] = PorterDuffFunctions.NormalSrc(background[i], source[i], amount); + for (int i = 0; i < destination.Length; i++) + { + destination[i] = PorterDuffFunctions.NormalSrc(background[i], source[i], amount); + } } } /// protected override void BlendFunction(Span destination, ReadOnlySpan background, ReadOnlySpan source, ReadOnlySpan amount) { - for (int i = 0; i < destination.Length; i++) + if (Avx2.IsSupported && destination.Length >= 2) + { + // Divide by 2 as 4 elements per Vector4 and 8 per Vector256 + ref Vector256 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); + ref Vector256 destinationLast = ref Unsafe.Add(ref destinationBase, (IntPtr)((uint)destination.Length / 2u)); + + ref Vector256 backgroundBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(background)); + ref Vector256 sourceBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(source)); + ref float amountBase = ref MemoryMarshal.GetReference(amount); + + Vector256 vOne = Vector256.Create(1F); + + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) + { + // We need to create a Vector256 containing the current and next amount values + // taking up each half of the Vector256 and then clamp them. + Vector256 opacity = Vector256.Create( + Vector128.Create(amountBase), + Vector128.Create(Unsafe.Add(ref amountBase, 1))); + opacity = Avx.Min(Avx.Max(Vector256.Zero, opacity), vOne); + + destinationBase = PorterDuffFunctions.NormalSrc(backgroundBase, sourceBase, opacity); + destinationBase = ref Unsafe.Add(ref destinationBase, 1); + backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); + sourceBase = ref Unsafe.Add(ref sourceBase, 1); + amountBase = ref Unsafe.Add(ref amountBase, 2); + } + + if (Numerics.Modulo2(destination.Length) != 0) + { + // Vector4 fits neatly in pairs. Any overlap has to be equal to 1. + int i = destination.Length - 1; + destination[i] = PorterDuffFunctions.NormalSrc(background[i], source[i], Numerics.Clamp(amount[i], 0, 1F)); + } + } + else { - destination[i] = PorterDuffFunctions.NormalSrc(background[i], source[i], Numerics.Clamp(amount[i], 0, 1)); + for (int i = 0; i < destination.Length; i++) + { + destination[i] = PorterDuffFunctions.NormalSrc(background[i], source[i], Numerics.Clamp(amount[i], 0, 1F)); + } } } } @@ -81,18 +152,85 @@ internal static class DefaultPixelBlenders protected override void BlendFunction(Span destination, ReadOnlySpan background, ReadOnlySpan source, float amount) { amount = Numerics.Clamp(amount, 0, 1); - for (int i = 0; i < destination.Length; i++) + + if (Avx2.IsSupported && destination.Length >= 2) + { + // Divide by 2 as 4 elements per Vector4 and 8 per Vector256 + ref Vector256 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); + ref Vector256 destinationLast = ref Unsafe.Add(ref destinationBase, (IntPtr)((uint)destination.Length / 2u)); + + ref Vector256 backgroundBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(background)); + ref Vector256 sourceBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(source)); + Vector256 opacity = Vector256.Create(amount); + + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) + { + destinationBase = PorterDuffFunctions.MultiplySrc(backgroundBase, sourceBase, opacity); + destinationBase = ref Unsafe.Add(ref destinationBase, 1); + backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); + sourceBase = ref Unsafe.Add(ref sourceBase, 1); + } + + if (Numerics.Modulo2(destination.Length) != 0) + { + // Vector4 fits neatly in pairs. Any overlap has to be equal to 1. + int i = destination.Length - 1; + destination[i] = PorterDuffFunctions.MultiplySrc(background[i], source[i], amount); + } + } + else { - destination[i] = PorterDuffFunctions.MultiplySrc(background[i], source[i], amount); + for (int i = 0; i < destination.Length; i++) + { + destination[i] = PorterDuffFunctions.MultiplySrc(background[i], source[i], amount); + } } } /// protected override void BlendFunction(Span destination, ReadOnlySpan background, ReadOnlySpan source, ReadOnlySpan amount) { - for (int i = 0; i < destination.Length; i++) + if (Avx2.IsSupported && destination.Length >= 2) + { + // Divide by 2 as 4 elements per Vector4 and 8 per Vector256 + ref Vector256 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); + ref Vector256 destinationLast = ref Unsafe.Add(ref destinationBase, (IntPtr)((uint)destination.Length / 2u)); + + ref Vector256 backgroundBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(background)); + ref Vector256 sourceBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(source)); + ref float amountBase = ref MemoryMarshal.GetReference(amount); + + Vector256 vOne = Vector256.Create(1F); + + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) + { + // We need to create a Vector256 containing the current and next amount values + // taking up each half of the Vector256 and then clamp them. + Vector256 opacity = Vector256.Create( + Vector128.Create(amountBase), + Vector128.Create(Unsafe.Add(ref amountBase, 1))); + opacity = Avx.Min(Avx.Max(Vector256.Zero, opacity), vOne); + + destinationBase = PorterDuffFunctions.MultiplySrc(backgroundBase, sourceBase, opacity); + destinationBase = ref Unsafe.Add(ref destinationBase, 1); + backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); + sourceBase = ref Unsafe.Add(ref sourceBase, 1); + amountBase = ref Unsafe.Add(ref amountBase, 2); + } + + if (Numerics.Modulo2(destination.Length) != 0) + { + // Vector4 fits neatly in pairs. Any overlap has to be equal to 1. + int i = destination.Length - 1; + destination[i] = PorterDuffFunctions.MultiplySrc(background[i], source[i], Numerics.Clamp(amount[i], 0, 1F)); + } + } + else { - destination[i] = PorterDuffFunctions.MultiplySrc(background[i], source[i], Numerics.Clamp(amount[i], 0, 1)); + for (int i = 0; i < destination.Length; i++) + { + destination[i] = PorterDuffFunctions.MultiplySrc(background[i], source[i], Numerics.Clamp(amount[i], 0, 1F)); + } } } } @@ -119,18 +257,85 @@ internal static class DefaultPixelBlenders protected override void BlendFunction(Span destination, ReadOnlySpan background, ReadOnlySpan source, float amount) { amount = Numerics.Clamp(amount, 0, 1); - for (int i = 0; i < destination.Length; i++) + + if (Avx2.IsSupported && destination.Length >= 2) + { + // Divide by 2 as 4 elements per Vector4 and 8 per Vector256 + ref Vector256 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); + ref Vector256 destinationLast = ref Unsafe.Add(ref destinationBase, (IntPtr)((uint)destination.Length / 2u)); + + ref Vector256 backgroundBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(background)); + ref Vector256 sourceBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(source)); + Vector256 opacity = Vector256.Create(amount); + + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) + { + destinationBase = PorterDuffFunctions.AddSrc(backgroundBase, sourceBase, opacity); + destinationBase = ref Unsafe.Add(ref destinationBase, 1); + backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); + sourceBase = ref Unsafe.Add(ref sourceBase, 1); + } + + if (Numerics.Modulo2(destination.Length) != 0) + { + // Vector4 fits neatly in pairs. Any overlap has to be equal to 1. + int i = destination.Length - 1; + destination[i] = PorterDuffFunctions.AddSrc(background[i], source[i], amount); + } + } + else { - destination[i] = PorterDuffFunctions.AddSrc(background[i], source[i], amount); + for (int i = 0; i < destination.Length; i++) + { + destination[i] = PorterDuffFunctions.AddSrc(background[i], source[i], amount); + } } } /// protected override void BlendFunction(Span destination, ReadOnlySpan background, ReadOnlySpan source, ReadOnlySpan amount) { - for (int i = 0; i < destination.Length; i++) + if (Avx2.IsSupported && destination.Length >= 2) { - destination[i] = PorterDuffFunctions.AddSrc(background[i], source[i], Numerics.Clamp(amount[i], 0, 1)); + // Divide by 2 as 4 elements per Vector4 and 8 per Vector256 + ref Vector256 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); + ref Vector256 destinationLast = ref Unsafe.Add(ref destinationBase, (IntPtr)((uint)destination.Length / 2u)); + + ref Vector256 backgroundBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(background)); + ref Vector256 sourceBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(source)); + ref float amountBase = ref MemoryMarshal.GetReference(amount); + + Vector256 vOne = Vector256.Create(1F); + + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) + { + // We need to create a Vector256 containing the current and next amount values + // taking up each half of the Vector256 and then clamp them. + Vector256 opacity = Vector256.Create( + Vector128.Create(amountBase), + Vector128.Create(Unsafe.Add(ref amountBase, 1))); + opacity = Avx.Min(Avx.Max(Vector256.Zero, opacity), vOne); + + destinationBase = PorterDuffFunctions.AddSrc(backgroundBase, sourceBase, opacity); + destinationBase = ref Unsafe.Add(ref destinationBase, 1); + backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); + sourceBase = ref Unsafe.Add(ref sourceBase, 1); + amountBase = ref Unsafe.Add(ref amountBase, 2); + } + + if (Numerics.Modulo2(destination.Length) != 0) + { + // Vector4 fits neatly in pairs. Any overlap has to be equal to 1. + int i = destination.Length - 1; + destination[i] = PorterDuffFunctions.AddSrc(background[i], source[i], Numerics.Clamp(amount[i], 0, 1F)); + } + } + else + { + for (int i = 0; i < destination.Length; i++) + { + destination[i] = PorterDuffFunctions.AddSrc(background[i], source[i], Numerics.Clamp(amount[i], 0, 1F)); + } } } } @@ -157,18 +362,85 @@ internal static class DefaultPixelBlenders protected override void BlendFunction(Span destination, ReadOnlySpan background, ReadOnlySpan source, float amount) { amount = Numerics.Clamp(amount, 0, 1); - for (int i = 0; i < destination.Length; i++) + + if (Avx2.IsSupported && destination.Length >= 2) { - destination[i] = PorterDuffFunctions.SubtractSrc(background[i], source[i], amount); + // Divide by 2 as 4 elements per Vector4 and 8 per Vector256 + ref Vector256 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); + ref Vector256 destinationLast = ref Unsafe.Add(ref destinationBase, (IntPtr)((uint)destination.Length / 2u)); + + ref Vector256 backgroundBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(background)); + ref Vector256 sourceBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(source)); + Vector256 opacity = Vector256.Create(amount); + + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) + { + destinationBase = PorterDuffFunctions.SubtractSrc(backgroundBase, sourceBase, opacity); + destinationBase = ref Unsafe.Add(ref destinationBase, 1); + backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); + sourceBase = ref Unsafe.Add(ref sourceBase, 1); + } + + if (Numerics.Modulo2(destination.Length) != 0) + { + // Vector4 fits neatly in pairs. Any overlap has to be equal to 1. + int i = destination.Length - 1; + destination[i] = PorterDuffFunctions.SubtractSrc(background[i], source[i], amount); + } + } + else + { + for (int i = 0; i < destination.Length; i++) + { + destination[i] = PorterDuffFunctions.SubtractSrc(background[i], source[i], amount); + } } } /// protected override void BlendFunction(Span destination, ReadOnlySpan background, ReadOnlySpan source, ReadOnlySpan amount) { - for (int i = 0; i < destination.Length; i++) + if (Avx2.IsSupported && destination.Length >= 2) + { + // Divide by 2 as 4 elements per Vector4 and 8 per Vector256 + ref Vector256 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); + ref Vector256 destinationLast = ref Unsafe.Add(ref destinationBase, (IntPtr)((uint)destination.Length / 2u)); + + ref Vector256 backgroundBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(background)); + ref Vector256 sourceBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(source)); + ref float amountBase = ref MemoryMarshal.GetReference(amount); + + Vector256 vOne = Vector256.Create(1F); + + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) + { + // We need to create a Vector256 containing the current and next amount values + // taking up each half of the Vector256 and then clamp them. + Vector256 opacity = Vector256.Create( + Vector128.Create(amountBase), + Vector128.Create(Unsafe.Add(ref amountBase, 1))); + opacity = Avx.Min(Avx.Max(Vector256.Zero, opacity), vOne); + + destinationBase = PorterDuffFunctions.SubtractSrc(backgroundBase, sourceBase, opacity); + destinationBase = ref Unsafe.Add(ref destinationBase, 1); + backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); + sourceBase = ref Unsafe.Add(ref sourceBase, 1); + amountBase = ref Unsafe.Add(ref amountBase, 2); + } + + if (Numerics.Modulo2(destination.Length) != 0) + { + // Vector4 fits neatly in pairs. Any overlap has to be equal to 1. + int i = destination.Length - 1; + destination[i] = PorterDuffFunctions.SubtractSrc(background[i], source[i], Numerics.Clamp(amount[i], 0, 1F)); + } + } + else { - destination[i] = PorterDuffFunctions.SubtractSrc(background[i], source[i], Numerics.Clamp(amount[i], 0, 1)); + for (int i = 0; i < destination.Length; i++) + { + destination[i] = PorterDuffFunctions.SubtractSrc(background[i], source[i], Numerics.Clamp(amount[i], 0, 1F)); + } } } } @@ -195,18 +467,85 @@ internal static class DefaultPixelBlenders protected override void BlendFunction(Span destination, ReadOnlySpan background, ReadOnlySpan source, float amount) { amount = Numerics.Clamp(amount, 0, 1); - for (int i = 0; i < destination.Length; i++) + + if (Avx2.IsSupported && destination.Length >= 2) + { + // Divide by 2 as 4 elements per Vector4 and 8 per Vector256 + ref Vector256 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); + ref Vector256 destinationLast = ref Unsafe.Add(ref destinationBase, (IntPtr)((uint)destination.Length / 2u)); + + ref Vector256 backgroundBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(background)); + ref Vector256 sourceBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(source)); + Vector256 opacity = Vector256.Create(amount); + + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) + { + destinationBase = PorterDuffFunctions.ScreenSrc(backgroundBase, sourceBase, opacity); + destinationBase = ref Unsafe.Add(ref destinationBase, 1); + backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); + sourceBase = ref Unsafe.Add(ref sourceBase, 1); + } + + if (Numerics.Modulo2(destination.Length) != 0) + { + // Vector4 fits neatly in pairs. Any overlap has to be equal to 1. + int i = destination.Length - 1; + destination[i] = PorterDuffFunctions.ScreenSrc(background[i], source[i], amount); + } + } + else { - destination[i] = PorterDuffFunctions.ScreenSrc(background[i], source[i], amount); + for (int i = 0; i < destination.Length; i++) + { + destination[i] = PorterDuffFunctions.ScreenSrc(background[i], source[i], amount); + } } } /// protected override void BlendFunction(Span destination, ReadOnlySpan background, ReadOnlySpan source, ReadOnlySpan amount) { - for (int i = 0; i < destination.Length; i++) + if (Avx2.IsSupported && destination.Length >= 2) + { + // Divide by 2 as 4 elements per Vector4 and 8 per Vector256 + ref Vector256 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); + ref Vector256 destinationLast = ref Unsafe.Add(ref destinationBase, (IntPtr)((uint)destination.Length / 2u)); + + ref Vector256 backgroundBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(background)); + ref Vector256 sourceBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(source)); + ref float amountBase = ref MemoryMarshal.GetReference(amount); + + Vector256 vOne = Vector256.Create(1F); + + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) + { + // We need to create a Vector256 containing the current and next amount values + // taking up each half of the Vector256 and then clamp them. + Vector256 opacity = Vector256.Create( + Vector128.Create(amountBase), + Vector128.Create(Unsafe.Add(ref amountBase, 1))); + opacity = Avx.Min(Avx.Max(Vector256.Zero, opacity), vOne); + + destinationBase = PorterDuffFunctions.ScreenSrc(backgroundBase, sourceBase, opacity); + destinationBase = ref Unsafe.Add(ref destinationBase, 1); + backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); + sourceBase = ref Unsafe.Add(ref sourceBase, 1); + amountBase = ref Unsafe.Add(ref amountBase, 2); + } + + if (Numerics.Modulo2(destination.Length) != 0) + { + // Vector4 fits neatly in pairs. Any overlap has to be equal to 1. + int i = destination.Length - 1; + destination[i] = PorterDuffFunctions.ScreenSrc(background[i], source[i], Numerics.Clamp(amount[i], 0, 1F)); + } + } + else { - destination[i] = PorterDuffFunctions.ScreenSrc(background[i], source[i], Numerics.Clamp(amount[i], 0, 1)); + for (int i = 0; i < destination.Length; i++) + { + destination[i] = PorterDuffFunctions.ScreenSrc(background[i], source[i], Numerics.Clamp(amount[i], 0, 1F)); + } } } } @@ -233,18 +572,85 @@ internal static class DefaultPixelBlenders protected override void BlendFunction(Span destination, ReadOnlySpan background, ReadOnlySpan source, float amount) { amount = Numerics.Clamp(amount, 0, 1); - for (int i = 0; i < destination.Length; i++) + + if (Avx2.IsSupported && destination.Length >= 2) + { + // Divide by 2 as 4 elements per Vector4 and 8 per Vector256 + ref Vector256 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); + ref Vector256 destinationLast = ref Unsafe.Add(ref destinationBase, (IntPtr)((uint)destination.Length / 2u)); + + ref Vector256 backgroundBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(background)); + ref Vector256 sourceBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(source)); + Vector256 opacity = Vector256.Create(amount); + + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) + { + destinationBase = PorterDuffFunctions.DarkenSrc(backgroundBase, sourceBase, opacity); + destinationBase = ref Unsafe.Add(ref destinationBase, 1); + backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); + sourceBase = ref Unsafe.Add(ref sourceBase, 1); + } + + if (Numerics.Modulo2(destination.Length) != 0) + { + // Vector4 fits neatly in pairs. Any overlap has to be equal to 1. + int i = destination.Length - 1; + destination[i] = PorterDuffFunctions.DarkenSrc(background[i], source[i], amount); + } + } + else { - destination[i] = PorterDuffFunctions.DarkenSrc(background[i], source[i], amount); + for (int i = 0; i < destination.Length; i++) + { + destination[i] = PorterDuffFunctions.DarkenSrc(background[i], source[i], amount); + } } } /// protected override void BlendFunction(Span destination, ReadOnlySpan background, ReadOnlySpan source, ReadOnlySpan amount) { - for (int i = 0; i < destination.Length; i++) + if (Avx2.IsSupported && destination.Length >= 2) + { + // Divide by 2 as 4 elements per Vector4 and 8 per Vector256 + ref Vector256 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); + ref Vector256 destinationLast = ref Unsafe.Add(ref destinationBase, (IntPtr)((uint)destination.Length / 2u)); + + ref Vector256 backgroundBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(background)); + ref Vector256 sourceBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(source)); + ref float amountBase = ref MemoryMarshal.GetReference(amount); + + Vector256 vOne = Vector256.Create(1F); + + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) + { + // We need to create a Vector256 containing the current and next amount values + // taking up each half of the Vector256 and then clamp them. + Vector256 opacity = Vector256.Create( + Vector128.Create(amountBase), + Vector128.Create(Unsafe.Add(ref amountBase, 1))); + opacity = Avx.Min(Avx.Max(Vector256.Zero, opacity), vOne); + + destinationBase = PorterDuffFunctions.DarkenSrc(backgroundBase, sourceBase, opacity); + destinationBase = ref Unsafe.Add(ref destinationBase, 1); + backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); + sourceBase = ref Unsafe.Add(ref sourceBase, 1); + amountBase = ref Unsafe.Add(ref amountBase, 2); + } + + if (Numerics.Modulo2(destination.Length) != 0) + { + // Vector4 fits neatly in pairs. Any overlap has to be equal to 1. + int i = destination.Length - 1; + destination[i] = PorterDuffFunctions.DarkenSrc(background[i], source[i], Numerics.Clamp(amount[i], 0, 1F)); + } + } + else { - destination[i] = PorterDuffFunctions.DarkenSrc(background[i], source[i], Numerics.Clamp(amount[i], 0, 1)); + for (int i = 0; i < destination.Length; i++) + { + destination[i] = PorterDuffFunctions.DarkenSrc(background[i], source[i], Numerics.Clamp(amount[i], 0, 1F)); + } } } } @@ -271,18 +677,85 @@ internal static class DefaultPixelBlenders protected override void BlendFunction(Span destination, ReadOnlySpan background, ReadOnlySpan source, float amount) { amount = Numerics.Clamp(amount, 0, 1); - for (int i = 0; i < destination.Length; i++) + + if (Avx2.IsSupported && destination.Length >= 2) + { + // Divide by 2 as 4 elements per Vector4 and 8 per Vector256 + ref Vector256 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); + ref Vector256 destinationLast = ref Unsafe.Add(ref destinationBase, (IntPtr)((uint)destination.Length / 2u)); + + ref Vector256 backgroundBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(background)); + ref Vector256 sourceBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(source)); + Vector256 opacity = Vector256.Create(amount); + + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) + { + destinationBase = PorterDuffFunctions.LightenSrc(backgroundBase, sourceBase, opacity); + destinationBase = ref Unsafe.Add(ref destinationBase, 1); + backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); + sourceBase = ref Unsafe.Add(ref sourceBase, 1); + } + + if (Numerics.Modulo2(destination.Length) != 0) + { + // Vector4 fits neatly in pairs. Any overlap has to be equal to 1. + int i = destination.Length - 1; + destination[i] = PorterDuffFunctions.LightenSrc(background[i], source[i], amount); + } + } + else { - destination[i] = PorterDuffFunctions.LightenSrc(background[i], source[i], amount); + for (int i = 0; i < destination.Length; i++) + { + destination[i] = PorterDuffFunctions.LightenSrc(background[i], source[i], amount); + } } } /// protected override void BlendFunction(Span destination, ReadOnlySpan background, ReadOnlySpan source, ReadOnlySpan amount) { - for (int i = 0; i < destination.Length; i++) + if (Avx2.IsSupported && destination.Length >= 2) + { + // Divide by 2 as 4 elements per Vector4 and 8 per Vector256 + ref Vector256 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); + ref Vector256 destinationLast = ref Unsafe.Add(ref destinationBase, (IntPtr)((uint)destination.Length / 2u)); + + ref Vector256 backgroundBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(background)); + ref Vector256 sourceBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(source)); + ref float amountBase = ref MemoryMarshal.GetReference(amount); + + Vector256 vOne = Vector256.Create(1F); + + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) + { + // We need to create a Vector256 containing the current and next amount values + // taking up each half of the Vector256 and then clamp them. + Vector256 opacity = Vector256.Create( + Vector128.Create(amountBase), + Vector128.Create(Unsafe.Add(ref amountBase, 1))); + opacity = Avx.Min(Avx.Max(Vector256.Zero, opacity), vOne); + + destinationBase = PorterDuffFunctions.LightenSrc(backgroundBase, sourceBase, opacity); + destinationBase = ref Unsafe.Add(ref destinationBase, 1); + backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); + sourceBase = ref Unsafe.Add(ref sourceBase, 1); + amountBase = ref Unsafe.Add(ref amountBase, 2); + } + + if (Numerics.Modulo2(destination.Length) != 0) + { + // Vector4 fits neatly in pairs. Any overlap has to be equal to 1. + int i = destination.Length - 1; + destination[i] = PorterDuffFunctions.LightenSrc(background[i], source[i], Numerics.Clamp(amount[i], 0, 1F)); + } + } + else { - destination[i] = PorterDuffFunctions.LightenSrc(background[i], source[i], Numerics.Clamp(amount[i], 0, 1)); + for (int i = 0; i < destination.Length; i++) + { + destination[i] = PorterDuffFunctions.LightenSrc(background[i], source[i], Numerics.Clamp(amount[i], 0, 1F)); + } } } } @@ -309,18 +782,85 @@ internal static class DefaultPixelBlenders protected override void BlendFunction(Span destination, ReadOnlySpan background, ReadOnlySpan source, float amount) { amount = Numerics.Clamp(amount, 0, 1); - for (int i = 0; i < destination.Length; i++) + + if (Avx2.IsSupported && destination.Length >= 2) + { + // Divide by 2 as 4 elements per Vector4 and 8 per Vector256 + ref Vector256 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); + ref Vector256 destinationLast = ref Unsafe.Add(ref destinationBase, (IntPtr)((uint)destination.Length / 2u)); + + ref Vector256 backgroundBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(background)); + ref Vector256 sourceBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(source)); + Vector256 opacity = Vector256.Create(amount); + + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) + { + destinationBase = PorterDuffFunctions.OverlaySrc(backgroundBase, sourceBase, opacity); + destinationBase = ref Unsafe.Add(ref destinationBase, 1); + backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); + sourceBase = ref Unsafe.Add(ref sourceBase, 1); + } + + if (Numerics.Modulo2(destination.Length) != 0) + { + // Vector4 fits neatly in pairs. Any overlap has to be equal to 1. + int i = destination.Length - 1; + destination[i] = PorterDuffFunctions.OverlaySrc(background[i], source[i], amount); + } + } + else { - destination[i] = PorterDuffFunctions.OverlaySrc(background[i], source[i], amount); + for (int i = 0; i < destination.Length; i++) + { + destination[i] = PorterDuffFunctions.OverlaySrc(background[i], source[i], amount); + } } } /// protected override void BlendFunction(Span destination, ReadOnlySpan background, ReadOnlySpan source, ReadOnlySpan amount) { - for (int i = 0; i < destination.Length; i++) + if (Avx2.IsSupported && destination.Length >= 2) + { + // Divide by 2 as 4 elements per Vector4 and 8 per Vector256 + ref Vector256 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); + ref Vector256 destinationLast = ref Unsafe.Add(ref destinationBase, (IntPtr)((uint)destination.Length / 2u)); + + ref Vector256 backgroundBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(background)); + ref Vector256 sourceBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(source)); + ref float amountBase = ref MemoryMarshal.GetReference(amount); + + Vector256 vOne = Vector256.Create(1F); + + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) + { + // We need to create a Vector256 containing the current and next amount values + // taking up each half of the Vector256 and then clamp them. + Vector256 opacity = Vector256.Create( + Vector128.Create(amountBase), + Vector128.Create(Unsafe.Add(ref amountBase, 1))); + opacity = Avx.Min(Avx.Max(Vector256.Zero, opacity), vOne); + + destinationBase = PorterDuffFunctions.OverlaySrc(backgroundBase, sourceBase, opacity); + destinationBase = ref Unsafe.Add(ref destinationBase, 1); + backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); + sourceBase = ref Unsafe.Add(ref sourceBase, 1); + amountBase = ref Unsafe.Add(ref amountBase, 2); + } + + if (Numerics.Modulo2(destination.Length) != 0) + { + // Vector4 fits neatly in pairs. Any overlap has to be equal to 1. + int i = destination.Length - 1; + destination[i] = PorterDuffFunctions.OverlaySrc(background[i], source[i], Numerics.Clamp(amount[i], 0, 1F)); + } + } + else { - destination[i] = PorterDuffFunctions.OverlaySrc(background[i], source[i], Numerics.Clamp(amount[i], 0, 1)); + for (int i = 0; i < destination.Length; i++) + { + destination[i] = PorterDuffFunctions.OverlaySrc(background[i], source[i], Numerics.Clamp(amount[i], 0, 1F)); + } } } } @@ -347,18 +887,85 @@ internal static class DefaultPixelBlenders protected override void BlendFunction(Span destination, ReadOnlySpan background, ReadOnlySpan source, float amount) { amount = Numerics.Clamp(amount, 0, 1); - for (int i = 0; i < destination.Length; i++) + + if (Avx2.IsSupported && destination.Length >= 2) + { + // Divide by 2 as 4 elements per Vector4 and 8 per Vector256 + ref Vector256 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); + ref Vector256 destinationLast = ref Unsafe.Add(ref destinationBase, (IntPtr)((uint)destination.Length / 2u)); + + ref Vector256 backgroundBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(background)); + ref Vector256 sourceBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(source)); + Vector256 opacity = Vector256.Create(amount); + + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) + { + destinationBase = PorterDuffFunctions.HardLightSrc(backgroundBase, sourceBase, opacity); + destinationBase = ref Unsafe.Add(ref destinationBase, 1); + backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); + sourceBase = ref Unsafe.Add(ref sourceBase, 1); + } + + if (Numerics.Modulo2(destination.Length) != 0) + { + // Vector4 fits neatly in pairs. Any overlap has to be equal to 1. + int i = destination.Length - 1; + destination[i] = PorterDuffFunctions.HardLightSrc(background[i], source[i], amount); + } + } + else { - destination[i] = PorterDuffFunctions.HardLightSrc(background[i], source[i], amount); + for (int i = 0; i < destination.Length; i++) + { + destination[i] = PorterDuffFunctions.HardLightSrc(background[i], source[i], amount); + } } } /// protected override void BlendFunction(Span destination, ReadOnlySpan background, ReadOnlySpan source, ReadOnlySpan amount) { - for (int i = 0; i < destination.Length; i++) + if (Avx2.IsSupported && destination.Length >= 2) + { + // Divide by 2 as 4 elements per Vector4 and 8 per Vector256 + ref Vector256 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); + ref Vector256 destinationLast = ref Unsafe.Add(ref destinationBase, (IntPtr)((uint)destination.Length / 2u)); + + ref Vector256 backgroundBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(background)); + ref Vector256 sourceBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(source)); + ref float amountBase = ref MemoryMarshal.GetReference(amount); + + Vector256 vOne = Vector256.Create(1F); + + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) + { + // We need to create a Vector256 containing the current and next amount values + // taking up each half of the Vector256 and then clamp them. + Vector256 opacity = Vector256.Create( + Vector128.Create(amountBase), + Vector128.Create(Unsafe.Add(ref amountBase, 1))); + opacity = Avx.Min(Avx.Max(Vector256.Zero, opacity), vOne); + + destinationBase = PorterDuffFunctions.HardLightSrc(backgroundBase, sourceBase, opacity); + destinationBase = ref Unsafe.Add(ref destinationBase, 1); + backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); + sourceBase = ref Unsafe.Add(ref sourceBase, 1); + amountBase = ref Unsafe.Add(ref amountBase, 2); + } + + if (Numerics.Modulo2(destination.Length) != 0) + { + // Vector4 fits neatly in pairs. Any overlap has to be equal to 1. + int i = destination.Length - 1; + destination[i] = PorterDuffFunctions.HardLightSrc(background[i], source[i], Numerics.Clamp(amount[i], 0, 1F)); + } + } + else { - destination[i] = PorterDuffFunctions.HardLightSrc(background[i], source[i], Numerics.Clamp(amount[i], 0, 1)); + for (int i = 0; i < destination.Length; i++) + { + destination[i] = PorterDuffFunctions.HardLightSrc(background[i], source[i], Numerics.Clamp(amount[i], 0, 1F)); + } } } } @@ -385,18 +992,85 @@ internal static class DefaultPixelBlenders protected override void BlendFunction(Span destination, ReadOnlySpan background, ReadOnlySpan source, float amount) { amount = Numerics.Clamp(amount, 0, 1); - for (int i = 0; i < destination.Length; i++) + + if (Avx2.IsSupported && destination.Length >= 2) + { + // Divide by 2 as 4 elements per Vector4 and 8 per Vector256 + ref Vector256 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); + ref Vector256 destinationLast = ref Unsafe.Add(ref destinationBase, (IntPtr)((uint)destination.Length / 2u)); + + ref Vector256 backgroundBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(background)); + ref Vector256 sourceBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(source)); + Vector256 opacity = Vector256.Create(amount); + + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) + { + destinationBase = PorterDuffFunctions.NormalSrcAtop(backgroundBase, sourceBase, opacity); + destinationBase = ref Unsafe.Add(ref destinationBase, 1); + backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); + sourceBase = ref Unsafe.Add(ref sourceBase, 1); + } + + if (Numerics.Modulo2(destination.Length) != 0) + { + // Vector4 fits neatly in pairs. Any overlap has to be equal to 1. + int i = destination.Length - 1; + destination[i] = PorterDuffFunctions.NormalSrcAtop(background[i], source[i], amount); + } + } + else { - destination[i] = PorterDuffFunctions.NormalSrcAtop(background[i], source[i], amount); + for (int i = 0; i < destination.Length; i++) + { + destination[i] = PorterDuffFunctions.NormalSrcAtop(background[i], source[i], amount); + } } } /// protected override void BlendFunction(Span destination, ReadOnlySpan background, ReadOnlySpan source, ReadOnlySpan amount) { - for (int i = 0; i < destination.Length; i++) + if (Avx2.IsSupported && destination.Length >= 2) + { + // Divide by 2 as 4 elements per Vector4 and 8 per Vector256 + ref Vector256 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); + ref Vector256 destinationLast = ref Unsafe.Add(ref destinationBase, (IntPtr)((uint)destination.Length / 2u)); + + ref Vector256 backgroundBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(background)); + ref Vector256 sourceBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(source)); + ref float amountBase = ref MemoryMarshal.GetReference(amount); + + Vector256 vOne = Vector256.Create(1F); + + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) + { + // We need to create a Vector256 containing the current and next amount values + // taking up each half of the Vector256 and then clamp them. + Vector256 opacity = Vector256.Create( + Vector128.Create(amountBase), + Vector128.Create(Unsafe.Add(ref amountBase, 1))); + opacity = Avx.Min(Avx.Max(Vector256.Zero, opacity), vOne); + + destinationBase = PorterDuffFunctions.NormalSrcAtop(backgroundBase, sourceBase, opacity); + destinationBase = ref Unsafe.Add(ref destinationBase, 1); + backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); + sourceBase = ref Unsafe.Add(ref sourceBase, 1); + amountBase = ref Unsafe.Add(ref amountBase, 2); + } + + if (Numerics.Modulo2(destination.Length) != 0) + { + // Vector4 fits neatly in pairs. Any overlap has to be equal to 1. + int i = destination.Length - 1; + destination[i] = PorterDuffFunctions.NormalSrcAtop(background[i], source[i], Numerics.Clamp(amount[i], 0, 1F)); + } + } + else { - destination[i] = PorterDuffFunctions.NormalSrcAtop(background[i], source[i], Numerics.Clamp(amount[i], 0, 1)); + for (int i = 0; i < destination.Length; i++) + { + destination[i] = PorterDuffFunctions.NormalSrcAtop(background[i], source[i], Numerics.Clamp(amount[i], 0, 1F)); + } } } } @@ -423,18 +1097,85 @@ internal static class DefaultPixelBlenders protected override void BlendFunction(Span destination, ReadOnlySpan background, ReadOnlySpan source, float amount) { amount = Numerics.Clamp(amount, 0, 1); - for (int i = 0; i < destination.Length; i++) + + if (Avx2.IsSupported && destination.Length >= 2) + { + // Divide by 2 as 4 elements per Vector4 and 8 per Vector256 + ref Vector256 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); + ref Vector256 destinationLast = ref Unsafe.Add(ref destinationBase, (IntPtr)((uint)destination.Length / 2u)); + + ref Vector256 backgroundBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(background)); + ref Vector256 sourceBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(source)); + Vector256 opacity = Vector256.Create(amount); + + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) + { + destinationBase = PorterDuffFunctions.MultiplySrcAtop(backgroundBase, sourceBase, opacity); + destinationBase = ref Unsafe.Add(ref destinationBase, 1); + backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); + sourceBase = ref Unsafe.Add(ref sourceBase, 1); + } + + if (Numerics.Modulo2(destination.Length) != 0) + { + // Vector4 fits neatly in pairs. Any overlap has to be equal to 1. + int i = destination.Length - 1; + destination[i] = PorterDuffFunctions.MultiplySrcAtop(background[i], source[i], amount); + } + } + else { - destination[i] = PorterDuffFunctions.MultiplySrcAtop(background[i], source[i], amount); + for (int i = 0; i < destination.Length; i++) + { + destination[i] = PorterDuffFunctions.MultiplySrcAtop(background[i], source[i], amount); + } } } /// protected override void BlendFunction(Span destination, ReadOnlySpan background, ReadOnlySpan source, ReadOnlySpan amount) { - for (int i = 0; i < destination.Length; i++) + if (Avx2.IsSupported && destination.Length >= 2) { - destination[i] = PorterDuffFunctions.MultiplySrcAtop(background[i], source[i], Numerics.Clamp(amount[i], 0, 1)); + // Divide by 2 as 4 elements per Vector4 and 8 per Vector256 + ref Vector256 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); + ref Vector256 destinationLast = ref Unsafe.Add(ref destinationBase, (IntPtr)((uint)destination.Length / 2u)); + + ref Vector256 backgroundBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(background)); + ref Vector256 sourceBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(source)); + ref float amountBase = ref MemoryMarshal.GetReference(amount); + + Vector256 vOne = Vector256.Create(1F); + + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) + { + // We need to create a Vector256 containing the current and next amount values + // taking up each half of the Vector256 and then clamp them. + Vector256 opacity = Vector256.Create( + Vector128.Create(amountBase), + Vector128.Create(Unsafe.Add(ref amountBase, 1))); + opacity = Avx.Min(Avx.Max(Vector256.Zero, opacity), vOne); + + destinationBase = PorterDuffFunctions.MultiplySrcAtop(backgroundBase, sourceBase, opacity); + destinationBase = ref Unsafe.Add(ref destinationBase, 1); + backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); + sourceBase = ref Unsafe.Add(ref sourceBase, 1); + amountBase = ref Unsafe.Add(ref amountBase, 2); + } + + if (Numerics.Modulo2(destination.Length) != 0) + { + // Vector4 fits neatly in pairs. Any overlap has to be equal to 1. + int i = destination.Length - 1; + destination[i] = PorterDuffFunctions.MultiplySrcAtop(background[i], source[i], Numerics.Clamp(amount[i], 0, 1F)); + } + } + else + { + for (int i = 0; i < destination.Length; i++) + { + destination[i] = PorterDuffFunctions.MultiplySrcAtop(background[i], source[i], Numerics.Clamp(amount[i], 0, 1F)); + } } } } @@ -461,18 +1202,85 @@ internal static class DefaultPixelBlenders protected override void BlendFunction(Span destination, ReadOnlySpan background, ReadOnlySpan source, float amount) { amount = Numerics.Clamp(amount, 0, 1); - for (int i = 0; i < destination.Length; i++) + + if (Avx2.IsSupported && destination.Length >= 2) { - destination[i] = PorterDuffFunctions.AddSrcAtop(background[i], source[i], amount); + // Divide by 2 as 4 elements per Vector4 and 8 per Vector256 + ref Vector256 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); + ref Vector256 destinationLast = ref Unsafe.Add(ref destinationBase, (IntPtr)((uint)destination.Length / 2u)); + + ref Vector256 backgroundBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(background)); + ref Vector256 sourceBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(source)); + Vector256 opacity = Vector256.Create(amount); + + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) + { + destinationBase = PorterDuffFunctions.AddSrcAtop(backgroundBase, sourceBase, opacity); + destinationBase = ref Unsafe.Add(ref destinationBase, 1); + backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); + sourceBase = ref Unsafe.Add(ref sourceBase, 1); + } + + if (Numerics.Modulo2(destination.Length) != 0) + { + // Vector4 fits neatly in pairs. Any overlap has to be equal to 1. + int i = destination.Length - 1; + destination[i] = PorterDuffFunctions.AddSrcAtop(background[i], source[i], amount); + } + } + else + { + for (int i = 0; i < destination.Length; i++) + { + destination[i] = PorterDuffFunctions.AddSrcAtop(background[i], source[i], amount); + } } } /// protected override void BlendFunction(Span destination, ReadOnlySpan background, ReadOnlySpan source, ReadOnlySpan amount) { - for (int i = 0; i < destination.Length; i++) + if (Avx2.IsSupported && destination.Length >= 2) + { + // Divide by 2 as 4 elements per Vector4 and 8 per Vector256 + ref Vector256 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); + ref Vector256 destinationLast = ref Unsafe.Add(ref destinationBase, (IntPtr)((uint)destination.Length / 2u)); + + ref Vector256 backgroundBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(background)); + ref Vector256 sourceBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(source)); + ref float amountBase = ref MemoryMarshal.GetReference(amount); + + Vector256 vOne = Vector256.Create(1F); + + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) + { + // We need to create a Vector256 containing the current and next amount values + // taking up each half of the Vector256 and then clamp them. + Vector256 opacity = Vector256.Create( + Vector128.Create(amountBase), + Vector128.Create(Unsafe.Add(ref amountBase, 1))); + opacity = Avx.Min(Avx.Max(Vector256.Zero, opacity), vOne); + + destinationBase = PorterDuffFunctions.AddSrcAtop(backgroundBase, sourceBase, opacity); + destinationBase = ref Unsafe.Add(ref destinationBase, 1); + backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); + sourceBase = ref Unsafe.Add(ref sourceBase, 1); + amountBase = ref Unsafe.Add(ref amountBase, 2); + } + + if (Numerics.Modulo2(destination.Length) != 0) + { + // Vector4 fits neatly in pairs. Any overlap has to be equal to 1. + int i = destination.Length - 1; + destination[i] = PorterDuffFunctions.AddSrcAtop(background[i], source[i], Numerics.Clamp(amount[i], 0, 1F)); + } + } + else { - destination[i] = PorterDuffFunctions.AddSrcAtop(background[i], source[i], Numerics.Clamp(amount[i], 0, 1)); + for (int i = 0; i < destination.Length; i++) + { + destination[i] = PorterDuffFunctions.AddSrcAtop(background[i], source[i], Numerics.Clamp(amount[i], 0, 1F)); + } } } } @@ -499,18 +1307,85 @@ internal static class DefaultPixelBlenders protected override void BlendFunction(Span destination, ReadOnlySpan background, ReadOnlySpan source, float amount) { amount = Numerics.Clamp(amount, 0, 1); - for (int i = 0; i < destination.Length; i++) + + if (Avx2.IsSupported && destination.Length >= 2) + { + // Divide by 2 as 4 elements per Vector4 and 8 per Vector256 + ref Vector256 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); + ref Vector256 destinationLast = ref Unsafe.Add(ref destinationBase, (IntPtr)((uint)destination.Length / 2u)); + + ref Vector256 backgroundBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(background)); + ref Vector256 sourceBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(source)); + Vector256 opacity = Vector256.Create(amount); + + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) + { + destinationBase = PorterDuffFunctions.SubtractSrcAtop(backgroundBase, sourceBase, opacity); + destinationBase = ref Unsafe.Add(ref destinationBase, 1); + backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); + sourceBase = ref Unsafe.Add(ref sourceBase, 1); + } + + if (Numerics.Modulo2(destination.Length) != 0) + { + // Vector4 fits neatly in pairs. Any overlap has to be equal to 1. + int i = destination.Length - 1; + destination[i] = PorterDuffFunctions.SubtractSrcAtop(background[i], source[i], amount); + } + } + else { - destination[i] = PorterDuffFunctions.SubtractSrcAtop(background[i], source[i], amount); + for (int i = 0; i < destination.Length; i++) + { + destination[i] = PorterDuffFunctions.SubtractSrcAtop(background[i], source[i], amount); + } } } /// protected override void BlendFunction(Span destination, ReadOnlySpan background, ReadOnlySpan source, ReadOnlySpan amount) { - for (int i = 0; i < destination.Length; i++) + if (Avx2.IsSupported && destination.Length >= 2) + { + // Divide by 2 as 4 elements per Vector4 and 8 per Vector256 + ref Vector256 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); + ref Vector256 destinationLast = ref Unsafe.Add(ref destinationBase, (IntPtr)((uint)destination.Length / 2u)); + + ref Vector256 backgroundBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(background)); + ref Vector256 sourceBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(source)); + ref float amountBase = ref MemoryMarshal.GetReference(amount); + + Vector256 vOne = Vector256.Create(1F); + + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) + { + // We need to create a Vector256 containing the current and next amount values + // taking up each half of the Vector256 and then clamp them. + Vector256 opacity = Vector256.Create( + Vector128.Create(amountBase), + Vector128.Create(Unsafe.Add(ref amountBase, 1))); + opacity = Avx.Min(Avx.Max(Vector256.Zero, opacity), vOne); + + destinationBase = PorterDuffFunctions.SubtractSrcAtop(backgroundBase, sourceBase, opacity); + destinationBase = ref Unsafe.Add(ref destinationBase, 1); + backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); + sourceBase = ref Unsafe.Add(ref sourceBase, 1); + amountBase = ref Unsafe.Add(ref amountBase, 2); + } + + if (Numerics.Modulo2(destination.Length) != 0) + { + // Vector4 fits neatly in pairs. Any overlap has to be equal to 1. + int i = destination.Length - 1; + destination[i] = PorterDuffFunctions.SubtractSrcAtop(background[i], source[i], Numerics.Clamp(amount[i], 0, 1F)); + } + } + else { - destination[i] = PorterDuffFunctions.SubtractSrcAtop(background[i], source[i], Numerics.Clamp(amount[i], 0, 1)); + for (int i = 0; i < destination.Length; i++) + { + destination[i] = PorterDuffFunctions.SubtractSrcAtop(background[i], source[i], Numerics.Clamp(amount[i], 0, 1F)); + } } } } @@ -537,18 +1412,85 @@ internal static class DefaultPixelBlenders protected override void BlendFunction(Span destination, ReadOnlySpan background, ReadOnlySpan source, float amount) { amount = Numerics.Clamp(amount, 0, 1); - for (int i = 0; i < destination.Length; i++) + + if (Avx2.IsSupported && destination.Length >= 2) + { + // Divide by 2 as 4 elements per Vector4 and 8 per Vector256 + ref Vector256 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); + ref Vector256 destinationLast = ref Unsafe.Add(ref destinationBase, (IntPtr)((uint)destination.Length / 2u)); + + ref Vector256 backgroundBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(background)); + ref Vector256 sourceBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(source)); + Vector256 opacity = Vector256.Create(amount); + + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) + { + destinationBase = PorterDuffFunctions.ScreenSrcAtop(backgroundBase, sourceBase, opacity); + destinationBase = ref Unsafe.Add(ref destinationBase, 1); + backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); + sourceBase = ref Unsafe.Add(ref sourceBase, 1); + } + + if (Numerics.Modulo2(destination.Length) != 0) + { + // Vector4 fits neatly in pairs. Any overlap has to be equal to 1. + int i = destination.Length - 1; + destination[i] = PorterDuffFunctions.ScreenSrcAtop(background[i], source[i], amount); + } + } + else { - destination[i] = PorterDuffFunctions.ScreenSrcAtop(background[i], source[i], amount); + for (int i = 0; i < destination.Length; i++) + { + destination[i] = PorterDuffFunctions.ScreenSrcAtop(background[i], source[i], amount); + } } } /// protected override void BlendFunction(Span destination, ReadOnlySpan background, ReadOnlySpan source, ReadOnlySpan amount) { - for (int i = 0; i < destination.Length; i++) + if (Avx2.IsSupported && destination.Length >= 2) + { + // Divide by 2 as 4 elements per Vector4 and 8 per Vector256 + ref Vector256 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); + ref Vector256 destinationLast = ref Unsafe.Add(ref destinationBase, (IntPtr)((uint)destination.Length / 2u)); + + ref Vector256 backgroundBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(background)); + ref Vector256 sourceBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(source)); + ref float amountBase = ref MemoryMarshal.GetReference(amount); + + Vector256 vOne = Vector256.Create(1F); + + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) + { + // We need to create a Vector256 containing the current and next amount values + // taking up each half of the Vector256 and then clamp them. + Vector256 opacity = Vector256.Create( + Vector128.Create(amountBase), + Vector128.Create(Unsafe.Add(ref amountBase, 1))); + opacity = Avx.Min(Avx.Max(Vector256.Zero, opacity), vOne); + + destinationBase = PorterDuffFunctions.ScreenSrcAtop(backgroundBase, sourceBase, opacity); + destinationBase = ref Unsafe.Add(ref destinationBase, 1); + backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); + sourceBase = ref Unsafe.Add(ref sourceBase, 1); + amountBase = ref Unsafe.Add(ref amountBase, 2); + } + + if (Numerics.Modulo2(destination.Length) != 0) + { + // Vector4 fits neatly in pairs. Any overlap has to be equal to 1. + int i = destination.Length - 1; + destination[i] = PorterDuffFunctions.ScreenSrcAtop(background[i], source[i], Numerics.Clamp(amount[i], 0, 1F)); + } + } + else { - destination[i] = PorterDuffFunctions.ScreenSrcAtop(background[i], source[i], Numerics.Clamp(amount[i], 0, 1)); + for (int i = 0; i < destination.Length; i++) + { + destination[i] = PorterDuffFunctions.ScreenSrcAtop(background[i], source[i], Numerics.Clamp(amount[i], 0, 1F)); + } } } } @@ -575,18 +1517,85 @@ internal static class DefaultPixelBlenders protected override void BlendFunction(Span destination, ReadOnlySpan background, ReadOnlySpan source, float amount) { amount = Numerics.Clamp(amount, 0, 1); - for (int i = 0; i < destination.Length; i++) + + if (Avx2.IsSupported && destination.Length >= 2) + { + // Divide by 2 as 4 elements per Vector4 and 8 per Vector256 + ref Vector256 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); + ref Vector256 destinationLast = ref Unsafe.Add(ref destinationBase, (IntPtr)((uint)destination.Length / 2u)); + + ref Vector256 backgroundBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(background)); + ref Vector256 sourceBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(source)); + Vector256 opacity = Vector256.Create(amount); + + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) + { + destinationBase = PorterDuffFunctions.DarkenSrcAtop(backgroundBase, sourceBase, opacity); + destinationBase = ref Unsafe.Add(ref destinationBase, 1); + backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); + sourceBase = ref Unsafe.Add(ref sourceBase, 1); + } + + if (Numerics.Modulo2(destination.Length) != 0) + { + // Vector4 fits neatly in pairs. Any overlap has to be equal to 1. + int i = destination.Length - 1; + destination[i] = PorterDuffFunctions.DarkenSrcAtop(background[i], source[i], amount); + } + } + else { - destination[i] = PorterDuffFunctions.DarkenSrcAtop(background[i], source[i], amount); + for (int i = 0; i < destination.Length; i++) + { + destination[i] = PorterDuffFunctions.DarkenSrcAtop(background[i], source[i], amount); + } } } /// protected override void BlendFunction(Span destination, ReadOnlySpan background, ReadOnlySpan source, ReadOnlySpan amount) { - for (int i = 0; i < destination.Length; i++) + if (Avx2.IsSupported && destination.Length >= 2) + { + // Divide by 2 as 4 elements per Vector4 and 8 per Vector256 + ref Vector256 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); + ref Vector256 destinationLast = ref Unsafe.Add(ref destinationBase, (IntPtr)((uint)destination.Length / 2u)); + + ref Vector256 backgroundBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(background)); + ref Vector256 sourceBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(source)); + ref float amountBase = ref MemoryMarshal.GetReference(amount); + + Vector256 vOne = Vector256.Create(1F); + + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) + { + // We need to create a Vector256 containing the current and next amount values + // taking up each half of the Vector256 and then clamp them. + Vector256 opacity = Vector256.Create( + Vector128.Create(amountBase), + Vector128.Create(Unsafe.Add(ref amountBase, 1))); + opacity = Avx.Min(Avx.Max(Vector256.Zero, opacity), vOne); + + destinationBase = PorterDuffFunctions.DarkenSrcAtop(backgroundBase, sourceBase, opacity); + destinationBase = ref Unsafe.Add(ref destinationBase, 1); + backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); + sourceBase = ref Unsafe.Add(ref sourceBase, 1); + amountBase = ref Unsafe.Add(ref amountBase, 2); + } + + if (Numerics.Modulo2(destination.Length) != 0) + { + // Vector4 fits neatly in pairs. Any overlap has to be equal to 1. + int i = destination.Length - 1; + destination[i] = PorterDuffFunctions.DarkenSrcAtop(background[i], source[i], Numerics.Clamp(amount[i], 0, 1F)); + } + } + else { - destination[i] = PorterDuffFunctions.DarkenSrcAtop(background[i], source[i], Numerics.Clamp(amount[i], 0, 1)); + for (int i = 0; i < destination.Length; i++) + { + destination[i] = PorterDuffFunctions.DarkenSrcAtop(background[i], source[i], Numerics.Clamp(amount[i], 0, 1F)); + } } } } @@ -613,18 +1622,85 @@ internal static class DefaultPixelBlenders protected override void BlendFunction(Span destination, ReadOnlySpan background, ReadOnlySpan source, float amount) { amount = Numerics.Clamp(amount, 0, 1); - for (int i = 0; i < destination.Length; i++) + + if (Avx2.IsSupported && destination.Length >= 2) + { + // Divide by 2 as 4 elements per Vector4 and 8 per Vector256 + ref Vector256 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); + ref Vector256 destinationLast = ref Unsafe.Add(ref destinationBase, (IntPtr)((uint)destination.Length / 2u)); + + ref Vector256 backgroundBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(background)); + ref Vector256 sourceBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(source)); + Vector256 opacity = Vector256.Create(amount); + + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) + { + destinationBase = PorterDuffFunctions.LightenSrcAtop(backgroundBase, sourceBase, opacity); + destinationBase = ref Unsafe.Add(ref destinationBase, 1); + backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); + sourceBase = ref Unsafe.Add(ref sourceBase, 1); + } + + if (Numerics.Modulo2(destination.Length) != 0) + { + // Vector4 fits neatly in pairs. Any overlap has to be equal to 1. + int i = destination.Length - 1; + destination[i] = PorterDuffFunctions.LightenSrcAtop(background[i], source[i], amount); + } + } + else { - destination[i] = PorterDuffFunctions.LightenSrcAtop(background[i], source[i], amount); + for (int i = 0; i < destination.Length; i++) + { + destination[i] = PorterDuffFunctions.LightenSrcAtop(background[i], source[i], amount); + } } } /// protected override void BlendFunction(Span destination, ReadOnlySpan background, ReadOnlySpan source, ReadOnlySpan amount) { - for (int i = 0; i < destination.Length; i++) + if (Avx2.IsSupported && destination.Length >= 2) + { + // Divide by 2 as 4 elements per Vector4 and 8 per Vector256 + ref Vector256 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); + ref Vector256 destinationLast = ref Unsafe.Add(ref destinationBase, (IntPtr)((uint)destination.Length / 2u)); + + ref Vector256 backgroundBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(background)); + ref Vector256 sourceBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(source)); + ref float amountBase = ref MemoryMarshal.GetReference(amount); + + Vector256 vOne = Vector256.Create(1F); + + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) + { + // We need to create a Vector256 containing the current and next amount values + // taking up each half of the Vector256 and then clamp them. + Vector256 opacity = Vector256.Create( + Vector128.Create(amountBase), + Vector128.Create(Unsafe.Add(ref amountBase, 1))); + opacity = Avx.Min(Avx.Max(Vector256.Zero, opacity), vOne); + + destinationBase = PorterDuffFunctions.LightenSrcAtop(backgroundBase, sourceBase, opacity); + destinationBase = ref Unsafe.Add(ref destinationBase, 1); + backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); + sourceBase = ref Unsafe.Add(ref sourceBase, 1); + amountBase = ref Unsafe.Add(ref amountBase, 2); + } + + if (Numerics.Modulo2(destination.Length) != 0) + { + // Vector4 fits neatly in pairs. Any overlap has to be equal to 1. + int i = destination.Length - 1; + destination[i] = PorterDuffFunctions.LightenSrcAtop(background[i], source[i], Numerics.Clamp(amount[i], 0, 1F)); + } + } + else { - destination[i] = PorterDuffFunctions.LightenSrcAtop(background[i], source[i], Numerics.Clamp(amount[i], 0, 1)); + for (int i = 0; i < destination.Length; i++) + { + destination[i] = PorterDuffFunctions.LightenSrcAtop(background[i], source[i], Numerics.Clamp(amount[i], 0, 1F)); + } } } } @@ -651,18 +1727,85 @@ internal static class DefaultPixelBlenders protected override void BlendFunction(Span destination, ReadOnlySpan background, ReadOnlySpan source, float amount) { amount = Numerics.Clamp(amount, 0, 1); - for (int i = 0; i < destination.Length; i++) + + if (Avx2.IsSupported && destination.Length >= 2) + { + // Divide by 2 as 4 elements per Vector4 and 8 per Vector256 + ref Vector256 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); + ref Vector256 destinationLast = ref Unsafe.Add(ref destinationBase, (IntPtr)((uint)destination.Length / 2u)); + + ref Vector256 backgroundBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(background)); + ref Vector256 sourceBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(source)); + Vector256 opacity = Vector256.Create(amount); + + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) + { + destinationBase = PorterDuffFunctions.OverlaySrcAtop(backgroundBase, sourceBase, opacity); + destinationBase = ref Unsafe.Add(ref destinationBase, 1); + backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); + sourceBase = ref Unsafe.Add(ref sourceBase, 1); + } + + if (Numerics.Modulo2(destination.Length) != 0) + { + // Vector4 fits neatly in pairs. Any overlap has to be equal to 1. + int i = destination.Length - 1; + destination[i] = PorterDuffFunctions.OverlaySrcAtop(background[i], source[i], amount); + } + } + else { - destination[i] = PorterDuffFunctions.OverlaySrcAtop(background[i], source[i], amount); + for (int i = 0; i < destination.Length; i++) + { + destination[i] = PorterDuffFunctions.OverlaySrcAtop(background[i], source[i], amount); + } } } /// protected override void BlendFunction(Span destination, ReadOnlySpan background, ReadOnlySpan source, ReadOnlySpan amount) { - for (int i = 0; i < destination.Length; i++) + if (Avx2.IsSupported && destination.Length >= 2) + { + // Divide by 2 as 4 elements per Vector4 and 8 per Vector256 + ref Vector256 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); + ref Vector256 destinationLast = ref Unsafe.Add(ref destinationBase, (IntPtr)((uint)destination.Length / 2u)); + + ref Vector256 backgroundBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(background)); + ref Vector256 sourceBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(source)); + ref float amountBase = ref MemoryMarshal.GetReference(amount); + + Vector256 vOne = Vector256.Create(1F); + + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) + { + // We need to create a Vector256 containing the current and next amount values + // taking up each half of the Vector256 and then clamp them. + Vector256 opacity = Vector256.Create( + Vector128.Create(amountBase), + Vector128.Create(Unsafe.Add(ref amountBase, 1))); + opacity = Avx.Min(Avx.Max(Vector256.Zero, opacity), vOne); + + destinationBase = PorterDuffFunctions.OverlaySrcAtop(backgroundBase, sourceBase, opacity); + destinationBase = ref Unsafe.Add(ref destinationBase, 1); + backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); + sourceBase = ref Unsafe.Add(ref sourceBase, 1); + amountBase = ref Unsafe.Add(ref amountBase, 2); + } + + if (Numerics.Modulo2(destination.Length) != 0) + { + // Vector4 fits neatly in pairs. Any overlap has to be equal to 1. + int i = destination.Length - 1; + destination[i] = PorterDuffFunctions.OverlaySrcAtop(background[i], source[i], Numerics.Clamp(amount[i], 0, 1F)); + } + } + else { - destination[i] = PorterDuffFunctions.OverlaySrcAtop(background[i], source[i], Numerics.Clamp(amount[i], 0, 1)); + for (int i = 0; i < destination.Length; i++) + { + destination[i] = PorterDuffFunctions.OverlaySrcAtop(background[i], source[i], Numerics.Clamp(amount[i], 0, 1F)); + } } } } @@ -689,18 +1832,85 @@ internal static class DefaultPixelBlenders protected override void BlendFunction(Span destination, ReadOnlySpan background, ReadOnlySpan source, float amount) { amount = Numerics.Clamp(amount, 0, 1); - for (int i = 0; i < destination.Length; i++) + + if (Avx2.IsSupported && destination.Length >= 2) + { + // Divide by 2 as 4 elements per Vector4 and 8 per Vector256 + ref Vector256 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); + ref Vector256 destinationLast = ref Unsafe.Add(ref destinationBase, (IntPtr)((uint)destination.Length / 2u)); + + ref Vector256 backgroundBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(background)); + ref Vector256 sourceBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(source)); + Vector256 opacity = Vector256.Create(amount); + + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) + { + destinationBase = PorterDuffFunctions.HardLightSrcAtop(backgroundBase, sourceBase, opacity); + destinationBase = ref Unsafe.Add(ref destinationBase, 1); + backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); + sourceBase = ref Unsafe.Add(ref sourceBase, 1); + } + + if (Numerics.Modulo2(destination.Length) != 0) + { + // Vector4 fits neatly in pairs. Any overlap has to be equal to 1. + int i = destination.Length - 1; + destination[i] = PorterDuffFunctions.HardLightSrcAtop(background[i], source[i], amount); + } + } + else { - destination[i] = PorterDuffFunctions.HardLightSrcAtop(background[i], source[i], amount); + for (int i = 0; i < destination.Length; i++) + { + destination[i] = PorterDuffFunctions.HardLightSrcAtop(background[i], source[i], amount); + } } } /// protected override void BlendFunction(Span destination, ReadOnlySpan background, ReadOnlySpan source, ReadOnlySpan amount) { - for (int i = 0; i < destination.Length; i++) + if (Avx2.IsSupported && destination.Length >= 2) + { + // Divide by 2 as 4 elements per Vector4 and 8 per Vector256 + ref Vector256 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); + ref Vector256 destinationLast = ref Unsafe.Add(ref destinationBase, (IntPtr)((uint)destination.Length / 2u)); + + ref Vector256 backgroundBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(background)); + ref Vector256 sourceBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(source)); + ref float amountBase = ref MemoryMarshal.GetReference(amount); + + Vector256 vOne = Vector256.Create(1F); + + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) + { + // We need to create a Vector256 containing the current and next amount values + // taking up each half of the Vector256 and then clamp them. + Vector256 opacity = Vector256.Create( + Vector128.Create(amountBase), + Vector128.Create(Unsafe.Add(ref amountBase, 1))); + opacity = Avx.Min(Avx.Max(Vector256.Zero, opacity), vOne); + + destinationBase = PorterDuffFunctions.HardLightSrcAtop(backgroundBase, sourceBase, opacity); + destinationBase = ref Unsafe.Add(ref destinationBase, 1); + backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); + sourceBase = ref Unsafe.Add(ref sourceBase, 1); + amountBase = ref Unsafe.Add(ref amountBase, 2); + } + + if (Numerics.Modulo2(destination.Length) != 0) + { + // Vector4 fits neatly in pairs. Any overlap has to be equal to 1. + int i = destination.Length - 1; + destination[i] = PorterDuffFunctions.HardLightSrcAtop(background[i], source[i], Numerics.Clamp(amount[i], 0, 1F)); + } + } + else { - destination[i] = PorterDuffFunctions.HardLightSrcAtop(background[i], source[i], Numerics.Clamp(amount[i], 0, 1)); + for (int i = 0; i < destination.Length; i++) + { + destination[i] = PorterDuffFunctions.HardLightSrcAtop(background[i], source[i], Numerics.Clamp(amount[i], 0, 1F)); + } } } } @@ -727,18 +1937,85 @@ internal static class DefaultPixelBlenders protected override void BlendFunction(Span destination, ReadOnlySpan background, ReadOnlySpan source, float amount) { amount = Numerics.Clamp(amount, 0, 1); - for (int i = 0; i < destination.Length; i++) + + if (Avx2.IsSupported && destination.Length >= 2) + { + // Divide by 2 as 4 elements per Vector4 and 8 per Vector256 + ref Vector256 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); + ref Vector256 destinationLast = ref Unsafe.Add(ref destinationBase, (IntPtr)((uint)destination.Length / 2u)); + + ref Vector256 backgroundBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(background)); + ref Vector256 sourceBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(source)); + Vector256 opacity = Vector256.Create(amount); + + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) + { + destinationBase = PorterDuffFunctions.NormalSrcOver(backgroundBase, sourceBase, opacity); + destinationBase = ref Unsafe.Add(ref destinationBase, 1); + backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); + sourceBase = ref Unsafe.Add(ref sourceBase, 1); + } + + if (Numerics.Modulo2(destination.Length) != 0) + { + // Vector4 fits neatly in pairs. Any overlap has to be equal to 1. + int i = destination.Length - 1; + destination[i] = PorterDuffFunctions.NormalSrcOver(background[i], source[i], amount); + } + } + else { - destination[i] = PorterDuffFunctions.NormalSrcOver(background[i], source[i], amount); + for (int i = 0; i < destination.Length; i++) + { + destination[i] = PorterDuffFunctions.NormalSrcOver(background[i], source[i], amount); + } } } /// protected override void BlendFunction(Span destination, ReadOnlySpan background, ReadOnlySpan source, ReadOnlySpan amount) { - for (int i = 0; i < destination.Length; i++) + if (Avx2.IsSupported && destination.Length >= 2) { - destination[i] = PorterDuffFunctions.NormalSrcOver(background[i], source[i], Numerics.Clamp(amount[i], 0, 1)); + // Divide by 2 as 4 elements per Vector4 and 8 per Vector256 + ref Vector256 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); + ref Vector256 destinationLast = ref Unsafe.Add(ref destinationBase, (IntPtr)((uint)destination.Length / 2u)); + + ref Vector256 backgroundBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(background)); + ref Vector256 sourceBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(source)); + ref float amountBase = ref MemoryMarshal.GetReference(amount); + + Vector256 vOne = Vector256.Create(1F); + + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) + { + // We need to create a Vector256 containing the current and next amount values + // taking up each half of the Vector256 and then clamp them. + Vector256 opacity = Vector256.Create( + Vector128.Create(amountBase), + Vector128.Create(Unsafe.Add(ref amountBase, 1))); + opacity = Avx.Min(Avx.Max(Vector256.Zero, opacity), vOne); + + destinationBase = PorterDuffFunctions.NormalSrcOver(backgroundBase, sourceBase, opacity); + destinationBase = ref Unsafe.Add(ref destinationBase, 1); + backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); + sourceBase = ref Unsafe.Add(ref sourceBase, 1); + amountBase = ref Unsafe.Add(ref amountBase, 2); + } + + if (Numerics.Modulo2(destination.Length) != 0) + { + // Vector4 fits neatly in pairs. Any overlap has to be equal to 1. + int i = destination.Length - 1; + destination[i] = PorterDuffFunctions.NormalSrcOver(background[i], source[i], Numerics.Clamp(amount[i], 0, 1F)); + } + } + else + { + for (int i = 0; i < destination.Length; i++) + { + destination[i] = PorterDuffFunctions.NormalSrcOver(background[i], source[i], Numerics.Clamp(amount[i], 0, 1F)); + } } } } @@ -765,21 +2042,88 @@ internal static class DefaultPixelBlenders protected override void BlendFunction(Span destination, ReadOnlySpan background, ReadOnlySpan source, float amount) { amount = Numerics.Clamp(amount, 0, 1); - for (int i = 0; i < destination.Length; i++) + + if (Avx2.IsSupported && destination.Length >= 2) { - destination[i] = PorterDuffFunctions.MultiplySrcOver(background[i], source[i], amount); + // Divide by 2 as 4 elements per Vector4 and 8 per Vector256 + ref Vector256 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); + ref Vector256 destinationLast = ref Unsafe.Add(ref destinationBase, (IntPtr)((uint)destination.Length / 2u)); + + ref Vector256 backgroundBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(background)); + ref Vector256 sourceBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(source)); + Vector256 opacity = Vector256.Create(amount); + + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) + { + destinationBase = PorterDuffFunctions.MultiplySrcOver(backgroundBase, sourceBase, opacity); + destinationBase = ref Unsafe.Add(ref destinationBase, 1); + backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); + sourceBase = ref Unsafe.Add(ref sourceBase, 1); + } + + if (Numerics.Modulo2(destination.Length) != 0) + { + // Vector4 fits neatly in pairs. Any overlap has to be equal to 1. + int i = destination.Length - 1; + destination[i] = PorterDuffFunctions.MultiplySrcOver(background[i], source[i], amount); + } + } + else + { + for (int i = 0; i < destination.Length; i++) + { + destination[i] = PorterDuffFunctions.MultiplySrcOver(background[i], source[i], amount); + } } } /// protected override void BlendFunction(Span destination, ReadOnlySpan background, ReadOnlySpan source, ReadOnlySpan amount) { - for (int i = 0; i < destination.Length; i++) + if (Avx2.IsSupported && destination.Length >= 2) { - destination[i] = PorterDuffFunctions.MultiplySrcOver(background[i], source[i], Numerics.Clamp(amount[i], 0, 1)); - } - } - } + // Divide by 2 as 4 elements per Vector4 and 8 per Vector256 + ref Vector256 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); + ref Vector256 destinationLast = ref Unsafe.Add(ref destinationBase, (IntPtr)((uint)destination.Length / 2u)); + + ref Vector256 backgroundBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(background)); + ref Vector256 sourceBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(source)); + ref float amountBase = ref MemoryMarshal.GetReference(amount); + + Vector256 vOne = Vector256.Create(1F); + + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) + { + // We need to create a Vector256 containing the current and next amount values + // taking up each half of the Vector256 and then clamp them. + Vector256 opacity = Vector256.Create( + Vector128.Create(amountBase), + Vector128.Create(Unsafe.Add(ref amountBase, 1))); + opacity = Avx.Min(Avx.Max(Vector256.Zero, opacity), vOne); + + destinationBase = PorterDuffFunctions.MultiplySrcOver(backgroundBase, sourceBase, opacity); + destinationBase = ref Unsafe.Add(ref destinationBase, 1); + backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); + sourceBase = ref Unsafe.Add(ref sourceBase, 1); + amountBase = ref Unsafe.Add(ref amountBase, 2); + } + + if (Numerics.Modulo2(destination.Length) != 0) + { + // Vector4 fits neatly in pairs. Any overlap has to be equal to 1. + int i = destination.Length - 1; + destination[i] = PorterDuffFunctions.MultiplySrcOver(background[i], source[i], Numerics.Clamp(amount[i], 0, 1F)); + } + } + else + { + for (int i = 0; i < destination.Length; i++) + { + destination[i] = PorterDuffFunctions.MultiplySrcOver(background[i], source[i], Numerics.Clamp(amount[i], 0, 1F)); + } + } + } + } /// /// A pixel blender that implements the "AddSrcOver" composition equation. @@ -803,18 +2147,85 @@ internal static class DefaultPixelBlenders protected override void BlendFunction(Span destination, ReadOnlySpan background, ReadOnlySpan source, float amount) { amount = Numerics.Clamp(amount, 0, 1); - for (int i = 0; i < destination.Length; i++) + + if (Avx2.IsSupported && destination.Length >= 2) + { + // Divide by 2 as 4 elements per Vector4 and 8 per Vector256 + ref Vector256 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); + ref Vector256 destinationLast = ref Unsafe.Add(ref destinationBase, (IntPtr)((uint)destination.Length / 2u)); + + ref Vector256 backgroundBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(background)); + ref Vector256 sourceBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(source)); + Vector256 opacity = Vector256.Create(amount); + + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) + { + destinationBase = PorterDuffFunctions.AddSrcOver(backgroundBase, sourceBase, opacity); + destinationBase = ref Unsafe.Add(ref destinationBase, 1); + backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); + sourceBase = ref Unsafe.Add(ref sourceBase, 1); + } + + if (Numerics.Modulo2(destination.Length) != 0) + { + // Vector4 fits neatly in pairs. Any overlap has to be equal to 1. + int i = destination.Length - 1; + destination[i] = PorterDuffFunctions.AddSrcOver(background[i], source[i], amount); + } + } + else { - destination[i] = PorterDuffFunctions.AddSrcOver(background[i], source[i], amount); + for (int i = 0; i < destination.Length; i++) + { + destination[i] = PorterDuffFunctions.AddSrcOver(background[i], source[i], amount); + } } } /// protected override void BlendFunction(Span destination, ReadOnlySpan background, ReadOnlySpan source, ReadOnlySpan amount) { - for (int i = 0; i < destination.Length; i++) + if (Avx2.IsSupported && destination.Length >= 2) + { + // Divide by 2 as 4 elements per Vector4 and 8 per Vector256 + ref Vector256 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); + ref Vector256 destinationLast = ref Unsafe.Add(ref destinationBase, (IntPtr)((uint)destination.Length / 2u)); + + ref Vector256 backgroundBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(background)); + ref Vector256 sourceBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(source)); + ref float amountBase = ref MemoryMarshal.GetReference(amount); + + Vector256 vOne = Vector256.Create(1F); + + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) + { + // We need to create a Vector256 containing the current and next amount values + // taking up each half of the Vector256 and then clamp them. + Vector256 opacity = Vector256.Create( + Vector128.Create(amountBase), + Vector128.Create(Unsafe.Add(ref amountBase, 1))); + opacity = Avx.Min(Avx.Max(Vector256.Zero, opacity), vOne); + + destinationBase = PorterDuffFunctions.AddSrcOver(backgroundBase, sourceBase, opacity); + destinationBase = ref Unsafe.Add(ref destinationBase, 1); + backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); + sourceBase = ref Unsafe.Add(ref sourceBase, 1); + amountBase = ref Unsafe.Add(ref amountBase, 2); + } + + if (Numerics.Modulo2(destination.Length) != 0) + { + // Vector4 fits neatly in pairs. Any overlap has to be equal to 1. + int i = destination.Length - 1; + destination[i] = PorterDuffFunctions.AddSrcOver(background[i], source[i], Numerics.Clamp(amount[i], 0, 1F)); + } + } + else { - destination[i] = PorterDuffFunctions.AddSrcOver(background[i], source[i], Numerics.Clamp(amount[i], 0, 1)); + for (int i = 0; i < destination.Length; i++) + { + destination[i] = PorterDuffFunctions.AddSrcOver(background[i], source[i], Numerics.Clamp(amount[i], 0, 1F)); + } } } } @@ -841,18 +2252,85 @@ internal static class DefaultPixelBlenders protected override void BlendFunction(Span destination, ReadOnlySpan background, ReadOnlySpan source, float amount) { amount = Numerics.Clamp(amount, 0, 1); - for (int i = 0; i < destination.Length; i++) + + if (Avx2.IsSupported && destination.Length >= 2) { - destination[i] = PorterDuffFunctions.SubtractSrcOver(background[i], source[i], amount); + // Divide by 2 as 4 elements per Vector4 and 8 per Vector256 + ref Vector256 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); + ref Vector256 destinationLast = ref Unsafe.Add(ref destinationBase, (IntPtr)((uint)destination.Length / 2u)); + + ref Vector256 backgroundBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(background)); + ref Vector256 sourceBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(source)); + Vector256 opacity = Vector256.Create(amount); + + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) + { + destinationBase = PorterDuffFunctions.SubtractSrcOver(backgroundBase, sourceBase, opacity); + destinationBase = ref Unsafe.Add(ref destinationBase, 1); + backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); + sourceBase = ref Unsafe.Add(ref sourceBase, 1); + } + + if (Numerics.Modulo2(destination.Length) != 0) + { + // Vector4 fits neatly in pairs. Any overlap has to be equal to 1. + int i = destination.Length - 1; + destination[i] = PorterDuffFunctions.SubtractSrcOver(background[i], source[i], amount); + } + } + else + { + for (int i = 0; i < destination.Length; i++) + { + destination[i] = PorterDuffFunctions.SubtractSrcOver(background[i], source[i], amount); + } } } /// protected override void BlendFunction(Span destination, ReadOnlySpan background, ReadOnlySpan source, ReadOnlySpan amount) { - for (int i = 0; i < destination.Length; i++) + if (Avx2.IsSupported && destination.Length >= 2) + { + // Divide by 2 as 4 elements per Vector4 and 8 per Vector256 + ref Vector256 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); + ref Vector256 destinationLast = ref Unsafe.Add(ref destinationBase, (IntPtr)((uint)destination.Length / 2u)); + + ref Vector256 backgroundBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(background)); + ref Vector256 sourceBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(source)); + ref float amountBase = ref MemoryMarshal.GetReference(amount); + + Vector256 vOne = Vector256.Create(1F); + + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) + { + // We need to create a Vector256 containing the current and next amount values + // taking up each half of the Vector256 and then clamp them. + Vector256 opacity = Vector256.Create( + Vector128.Create(amountBase), + Vector128.Create(Unsafe.Add(ref amountBase, 1))); + opacity = Avx.Min(Avx.Max(Vector256.Zero, opacity), vOne); + + destinationBase = PorterDuffFunctions.SubtractSrcOver(backgroundBase, sourceBase, opacity); + destinationBase = ref Unsafe.Add(ref destinationBase, 1); + backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); + sourceBase = ref Unsafe.Add(ref sourceBase, 1); + amountBase = ref Unsafe.Add(ref amountBase, 2); + } + + if (Numerics.Modulo2(destination.Length) != 0) + { + // Vector4 fits neatly in pairs. Any overlap has to be equal to 1. + int i = destination.Length - 1; + destination[i] = PorterDuffFunctions.SubtractSrcOver(background[i], source[i], Numerics.Clamp(amount[i], 0, 1F)); + } + } + else { - destination[i] = PorterDuffFunctions.SubtractSrcOver(background[i], source[i], Numerics.Clamp(amount[i], 0, 1)); + for (int i = 0; i < destination.Length; i++) + { + destination[i] = PorterDuffFunctions.SubtractSrcOver(background[i], source[i], Numerics.Clamp(amount[i], 0, 1F)); + } } } } @@ -879,18 +2357,85 @@ internal static class DefaultPixelBlenders protected override void BlendFunction(Span destination, ReadOnlySpan background, ReadOnlySpan source, float amount) { amount = Numerics.Clamp(amount, 0, 1); - for (int i = 0; i < destination.Length; i++) + + if (Avx2.IsSupported && destination.Length >= 2) + { + // Divide by 2 as 4 elements per Vector4 and 8 per Vector256 + ref Vector256 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); + ref Vector256 destinationLast = ref Unsafe.Add(ref destinationBase, (IntPtr)((uint)destination.Length / 2u)); + + ref Vector256 backgroundBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(background)); + ref Vector256 sourceBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(source)); + Vector256 opacity = Vector256.Create(amount); + + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) + { + destinationBase = PorterDuffFunctions.ScreenSrcOver(backgroundBase, sourceBase, opacity); + destinationBase = ref Unsafe.Add(ref destinationBase, 1); + backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); + sourceBase = ref Unsafe.Add(ref sourceBase, 1); + } + + if (Numerics.Modulo2(destination.Length) != 0) + { + // Vector4 fits neatly in pairs. Any overlap has to be equal to 1. + int i = destination.Length - 1; + destination[i] = PorterDuffFunctions.ScreenSrcOver(background[i], source[i], amount); + } + } + else { - destination[i] = PorterDuffFunctions.ScreenSrcOver(background[i], source[i], amount); + for (int i = 0; i < destination.Length; i++) + { + destination[i] = PorterDuffFunctions.ScreenSrcOver(background[i], source[i], amount); + } } } /// protected override void BlendFunction(Span destination, ReadOnlySpan background, ReadOnlySpan source, ReadOnlySpan amount) { - for (int i = 0; i < destination.Length; i++) + if (Avx2.IsSupported && destination.Length >= 2) { - destination[i] = PorterDuffFunctions.ScreenSrcOver(background[i], source[i], Numerics.Clamp(amount[i], 0, 1)); + // Divide by 2 as 4 elements per Vector4 and 8 per Vector256 + ref Vector256 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); + ref Vector256 destinationLast = ref Unsafe.Add(ref destinationBase, (IntPtr)((uint)destination.Length / 2u)); + + ref Vector256 backgroundBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(background)); + ref Vector256 sourceBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(source)); + ref float amountBase = ref MemoryMarshal.GetReference(amount); + + Vector256 vOne = Vector256.Create(1F); + + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) + { + // We need to create a Vector256 containing the current and next amount values + // taking up each half of the Vector256 and then clamp them. + Vector256 opacity = Vector256.Create( + Vector128.Create(amountBase), + Vector128.Create(Unsafe.Add(ref amountBase, 1))); + opacity = Avx.Min(Avx.Max(Vector256.Zero, opacity), vOne); + + destinationBase = PorterDuffFunctions.ScreenSrcOver(backgroundBase, sourceBase, opacity); + destinationBase = ref Unsafe.Add(ref destinationBase, 1); + backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); + sourceBase = ref Unsafe.Add(ref sourceBase, 1); + amountBase = ref Unsafe.Add(ref amountBase, 2); + } + + if (Numerics.Modulo2(destination.Length) != 0) + { + // Vector4 fits neatly in pairs. Any overlap has to be equal to 1. + int i = destination.Length - 1; + destination[i] = PorterDuffFunctions.ScreenSrcOver(background[i], source[i], Numerics.Clamp(amount[i], 0, 1F)); + } + } + else + { + for (int i = 0; i < destination.Length; i++) + { + destination[i] = PorterDuffFunctions.ScreenSrcOver(background[i], source[i], Numerics.Clamp(amount[i], 0, 1F)); + } } } } @@ -917,18 +2462,85 @@ internal static class DefaultPixelBlenders protected override void BlendFunction(Span destination, ReadOnlySpan background, ReadOnlySpan source, float amount) { amount = Numerics.Clamp(amount, 0, 1); - for (int i = 0; i < destination.Length; i++) + + if (Avx2.IsSupported && destination.Length >= 2) + { + // Divide by 2 as 4 elements per Vector4 and 8 per Vector256 + ref Vector256 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); + ref Vector256 destinationLast = ref Unsafe.Add(ref destinationBase, (IntPtr)((uint)destination.Length / 2u)); + + ref Vector256 backgroundBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(background)); + ref Vector256 sourceBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(source)); + Vector256 opacity = Vector256.Create(amount); + + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) + { + destinationBase = PorterDuffFunctions.DarkenSrcOver(backgroundBase, sourceBase, opacity); + destinationBase = ref Unsafe.Add(ref destinationBase, 1); + backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); + sourceBase = ref Unsafe.Add(ref sourceBase, 1); + } + + if (Numerics.Modulo2(destination.Length) != 0) + { + // Vector4 fits neatly in pairs. Any overlap has to be equal to 1. + int i = destination.Length - 1; + destination[i] = PorterDuffFunctions.DarkenSrcOver(background[i], source[i], amount); + } + } + else { - destination[i] = PorterDuffFunctions.DarkenSrcOver(background[i], source[i], amount); + for (int i = 0; i < destination.Length; i++) + { + destination[i] = PorterDuffFunctions.DarkenSrcOver(background[i], source[i], amount); + } } } /// protected override void BlendFunction(Span destination, ReadOnlySpan background, ReadOnlySpan source, ReadOnlySpan amount) { - for (int i = 0; i < destination.Length; i++) + if (Avx2.IsSupported && destination.Length >= 2) + { + // Divide by 2 as 4 elements per Vector4 and 8 per Vector256 + ref Vector256 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); + ref Vector256 destinationLast = ref Unsafe.Add(ref destinationBase, (IntPtr)((uint)destination.Length / 2u)); + + ref Vector256 backgroundBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(background)); + ref Vector256 sourceBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(source)); + ref float amountBase = ref MemoryMarshal.GetReference(amount); + + Vector256 vOne = Vector256.Create(1F); + + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) + { + // We need to create a Vector256 containing the current and next amount values + // taking up each half of the Vector256 and then clamp them. + Vector256 opacity = Vector256.Create( + Vector128.Create(amountBase), + Vector128.Create(Unsafe.Add(ref amountBase, 1))); + opacity = Avx.Min(Avx.Max(Vector256.Zero, opacity), vOne); + + destinationBase = PorterDuffFunctions.DarkenSrcOver(backgroundBase, sourceBase, opacity); + destinationBase = ref Unsafe.Add(ref destinationBase, 1); + backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); + sourceBase = ref Unsafe.Add(ref sourceBase, 1); + amountBase = ref Unsafe.Add(ref amountBase, 2); + } + + if (Numerics.Modulo2(destination.Length) != 0) + { + // Vector4 fits neatly in pairs. Any overlap has to be equal to 1. + int i = destination.Length - 1; + destination[i] = PorterDuffFunctions.DarkenSrcOver(background[i], source[i], Numerics.Clamp(amount[i], 0, 1F)); + } + } + else { - destination[i] = PorterDuffFunctions.DarkenSrcOver(background[i], source[i], Numerics.Clamp(amount[i], 0, 1)); + for (int i = 0; i < destination.Length; i++) + { + destination[i] = PorterDuffFunctions.DarkenSrcOver(background[i], source[i], Numerics.Clamp(amount[i], 0, 1F)); + } } } } @@ -955,18 +2567,85 @@ internal static class DefaultPixelBlenders protected override void BlendFunction(Span destination, ReadOnlySpan background, ReadOnlySpan source, float amount) { amount = Numerics.Clamp(amount, 0, 1); - for (int i = 0; i < destination.Length; i++) + + if (Avx2.IsSupported && destination.Length >= 2) + { + // Divide by 2 as 4 elements per Vector4 and 8 per Vector256 + ref Vector256 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); + ref Vector256 destinationLast = ref Unsafe.Add(ref destinationBase, (IntPtr)((uint)destination.Length / 2u)); + + ref Vector256 backgroundBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(background)); + ref Vector256 sourceBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(source)); + Vector256 opacity = Vector256.Create(amount); + + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) + { + destinationBase = PorterDuffFunctions.LightenSrcOver(backgroundBase, sourceBase, opacity); + destinationBase = ref Unsafe.Add(ref destinationBase, 1); + backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); + sourceBase = ref Unsafe.Add(ref sourceBase, 1); + } + + if (Numerics.Modulo2(destination.Length) != 0) + { + // Vector4 fits neatly in pairs. Any overlap has to be equal to 1. + int i = destination.Length - 1; + destination[i] = PorterDuffFunctions.LightenSrcOver(background[i], source[i], amount); + } + } + else { - destination[i] = PorterDuffFunctions.LightenSrcOver(background[i], source[i], amount); + for (int i = 0; i < destination.Length; i++) + { + destination[i] = PorterDuffFunctions.LightenSrcOver(background[i], source[i], amount); + } } } /// protected override void BlendFunction(Span destination, ReadOnlySpan background, ReadOnlySpan source, ReadOnlySpan amount) { - for (int i = 0; i < destination.Length; i++) + if (Avx2.IsSupported && destination.Length >= 2) { - destination[i] = PorterDuffFunctions.LightenSrcOver(background[i], source[i], Numerics.Clamp(amount[i], 0, 1)); + // Divide by 2 as 4 elements per Vector4 and 8 per Vector256 + ref Vector256 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); + ref Vector256 destinationLast = ref Unsafe.Add(ref destinationBase, (IntPtr)((uint)destination.Length / 2u)); + + ref Vector256 backgroundBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(background)); + ref Vector256 sourceBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(source)); + ref float amountBase = ref MemoryMarshal.GetReference(amount); + + Vector256 vOne = Vector256.Create(1F); + + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) + { + // We need to create a Vector256 containing the current and next amount values + // taking up each half of the Vector256 and then clamp them. + Vector256 opacity = Vector256.Create( + Vector128.Create(amountBase), + Vector128.Create(Unsafe.Add(ref amountBase, 1))); + opacity = Avx.Min(Avx.Max(Vector256.Zero, opacity), vOne); + + destinationBase = PorterDuffFunctions.LightenSrcOver(backgroundBase, sourceBase, opacity); + destinationBase = ref Unsafe.Add(ref destinationBase, 1); + backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); + sourceBase = ref Unsafe.Add(ref sourceBase, 1); + amountBase = ref Unsafe.Add(ref amountBase, 2); + } + + if (Numerics.Modulo2(destination.Length) != 0) + { + // Vector4 fits neatly in pairs. Any overlap has to be equal to 1. + int i = destination.Length - 1; + destination[i] = PorterDuffFunctions.LightenSrcOver(background[i], source[i], Numerics.Clamp(amount[i], 0, 1F)); + } + } + else + { + for (int i = 0; i < destination.Length; i++) + { + destination[i] = PorterDuffFunctions.LightenSrcOver(background[i], source[i], Numerics.Clamp(amount[i], 0, 1F)); + } } } } @@ -993,18 +2672,85 @@ internal static class DefaultPixelBlenders protected override void BlendFunction(Span destination, ReadOnlySpan background, ReadOnlySpan source, float amount) { amount = Numerics.Clamp(amount, 0, 1); - for (int i = 0; i < destination.Length; i++) + + if (Avx2.IsSupported && destination.Length >= 2) + { + // Divide by 2 as 4 elements per Vector4 and 8 per Vector256 + ref Vector256 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); + ref Vector256 destinationLast = ref Unsafe.Add(ref destinationBase, (IntPtr)((uint)destination.Length / 2u)); + + ref Vector256 backgroundBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(background)); + ref Vector256 sourceBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(source)); + Vector256 opacity = Vector256.Create(amount); + + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) + { + destinationBase = PorterDuffFunctions.OverlaySrcOver(backgroundBase, sourceBase, opacity); + destinationBase = ref Unsafe.Add(ref destinationBase, 1); + backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); + sourceBase = ref Unsafe.Add(ref sourceBase, 1); + } + + if (Numerics.Modulo2(destination.Length) != 0) + { + // Vector4 fits neatly in pairs. Any overlap has to be equal to 1. + int i = destination.Length - 1; + destination[i] = PorterDuffFunctions.OverlaySrcOver(background[i], source[i], amount); + } + } + else { - destination[i] = PorterDuffFunctions.OverlaySrcOver(background[i], source[i], amount); + for (int i = 0; i < destination.Length; i++) + { + destination[i] = PorterDuffFunctions.OverlaySrcOver(background[i], source[i], amount); + } } } /// protected override void BlendFunction(Span destination, ReadOnlySpan background, ReadOnlySpan source, ReadOnlySpan amount) { - for (int i = 0; i < destination.Length; i++) + if (Avx2.IsSupported && destination.Length >= 2) { - destination[i] = PorterDuffFunctions.OverlaySrcOver(background[i], source[i], Numerics.Clamp(amount[i], 0, 1)); + // Divide by 2 as 4 elements per Vector4 and 8 per Vector256 + ref Vector256 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); + ref Vector256 destinationLast = ref Unsafe.Add(ref destinationBase, (IntPtr)((uint)destination.Length / 2u)); + + ref Vector256 backgroundBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(background)); + ref Vector256 sourceBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(source)); + ref float amountBase = ref MemoryMarshal.GetReference(amount); + + Vector256 vOne = Vector256.Create(1F); + + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) + { + // We need to create a Vector256 containing the current and next amount values + // taking up each half of the Vector256 and then clamp them. + Vector256 opacity = Vector256.Create( + Vector128.Create(amountBase), + Vector128.Create(Unsafe.Add(ref amountBase, 1))); + opacity = Avx.Min(Avx.Max(Vector256.Zero, opacity), vOne); + + destinationBase = PorterDuffFunctions.OverlaySrcOver(backgroundBase, sourceBase, opacity); + destinationBase = ref Unsafe.Add(ref destinationBase, 1); + backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); + sourceBase = ref Unsafe.Add(ref sourceBase, 1); + amountBase = ref Unsafe.Add(ref amountBase, 2); + } + + if (Numerics.Modulo2(destination.Length) != 0) + { + // Vector4 fits neatly in pairs. Any overlap has to be equal to 1. + int i = destination.Length - 1; + destination[i] = PorterDuffFunctions.OverlaySrcOver(background[i], source[i], Numerics.Clamp(amount[i], 0, 1F)); + } + } + else + { + for (int i = 0; i < destination.Length; i++) + { + destination[i] = PorterDuffFunctions.OverlaySrcOver(background[i], source[i], Numerics.Clamp(amount[i], 0, 1F)); + } } } } @@ -1031,18 +2777,85 @@ internal static class DefaultPixelBlenders protected override void BlendFunction(Span destination, ReadOnlySpan background, ReadOnlySpan source, float amount) { amount = Numerics.Clamp(amount, 0, 1); - for (int i = 0; i < destination.Length; i++) + + if (Avx2.IsSupported && destination.Length >= 2) { - destination[i] = PorterDuffFunctions.HardLightSrcOver(background[i], source[i], amount); + // Divide by 2 as 4 elements per Vector4 and 8 per Vector256 + ref Vector256 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); + ref Vector256 destinationLast = ref Unsafe.Add(ref destinationBase, (IntPtr)((uint)destination.Length / 2u)); + + ref Vector256 backgroundBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(background)); + ref Vector256 sourceBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(source)); + Vector256 opacity = Vector256.Create(amount); + + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) + { + destinationBase = PorterDuffFunctions.HardLightSrcOver(backgroundBase, sourceBase, opacity); + destinationBase = ref Unsafe.Add(ref destinationBase, 1); + backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); + sourceBase = ref Unsafe.Add(ref sourceBase, 1); + } + + if (Numerics.Modulo2(destination.Length) != 0) + { + // Vector4 fits neatly in pairs. Any overlap has to be equal to 1. + int i = destination.Length - 1; + destination[i] = PorterDuffFunctions.HardLightSrcOver(background[i], source[i], amount); + } + } + else + { + for (int i = 0; i < destination.Length; i++) + { + destination[i] = PorterDuffFunctions.HardLightSrcOver(background[i], source[i], amount); + } } } /// protected override void BlendFunction(Span destination, ReadOnlySpan background, ReadOnlySpan source, ReadOnlySpan amount) { - for (int i = 0; i < destination.Length; i++) + if (Avx2.IsSupported && destination.Length >= 2) + { + // Divide by 2 as 4 elements per Vector4 and 8 per Vector256 + ref Vector256 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); + ref Vector256 destinationLast = ref Unsafe.Add(ref destinationBase, (IntPtr)((uint)destination.Length / 2u)); + + ref Vector256 backgroundBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(background)); + ref Vector256 sourceBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(source)); + ref float amountBase = ref MemoryMarshal.GetReference(amount); + + Vector256 vOne = Vector256.Create(1F); + + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) + { + // We need to create a Vector256 containing the current and next amount values + // taking up each half of the Vector256 and then clamp them. + Vector256 opacity = Vector256.Create( + Vector128.Create(amountBase), + Vector128.Create(Unsafe.Add(ref amountBase, 1))); + opacity = Avx.Min(Avx.Max(Vector256.Zero, opacity), vOne); + + destinationBase = PorterDuffFunctions.HardLightSrcOver(backgroundBase, sourceBase, opacity); + destinationBase = ref Unsafe.Add(ref destinationBase, 1); + backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); + sourceBase = ref Unsafe.Add(ref sourceBase, 1); + amountBase = ref Unsafe.Add(ref amountBase, 2); + } + + if (Numerics.Modulo2(destination.Length) != 0) + { + // Vector4 fits neatly in pairs. Any overlap has to be equal to 1. + int i = destination.Length - 1; + destination[i] = PorterDuffFunctions.HardLightSrcOver(background[i], source[i], Numerics.Clamp(amount[i], 0, 1F)); + } + } + else { - destination[i] = PorterDuffFunctions.HardLightSrcOver(background[i], source[i], Numerics.Clamp(amount[i], 0, 1)); + for (int i = 0; i < destination.Length; i++) + { + destination[i] = PorterDuffFunctions.HardLightSrcOver(background[i], source[i], Numerics.Clamp(amount[i], 0, 1F)); + } } } } @@ -1069,18 +2882,85 @@ internal static class DefaultPixelBlenders protected override void BlendFunction(Span destination, ReadOnlySpan background, ReadOnlySpan source, float amount) { amount = Numerics.Clamp(amount, 0, 1); - for (int i = 0; i < destination.Length; i++) + + if (Avx2.IsSupported && destination.Length >= 2) + { + // Divide by 2 as 4 elements per Vector4 and 8 per Vector256 + ref Vector256 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); + ref Vector256 destinationLast = ref Unsafe.Add(ref destinationBase, (IntPtr)((uint)destination.Length / 2u)); + + ref Vector256 backgroundBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(background)); + ref Vector256 sourceBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(source)); + Vector256 opacity = Vector256.Create(amount); + + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) + { + destinationBase = PorterDuffFunctions.NormalSrcIn(backgroundBase, sourceBase, opacity); + destinationBase = ref Unsafe.Add(ref destinationBase, 1); + backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); + sourceBase = ref Unsafe.Add(ref sourceBase, 1); + } + + if (Numerics.Modulo2(destination.Length) != 0) + { + // Vector4 fits neatly in pairs. Any overlap has to be equal to 1. + int i = destination.Length - 1; + destination[i] = PorterDuffFunctions.NormalSrcIn(background[i], source[i], amount); + } + } + else { - destination[i] = PorterDuffFunctions.NormalSrcIn(background[i], source[i], amount); + for (int i = 0; i < destination.Length; i++) + { + destination[i] = PorterDuffFunctions.NormalSrcIn(background[i], source[i], amount); + } } } /// protected override void BlendFunction(Span destination, ReadOnlySpan background, ReadOnlySpan source, ReadOnlySpan amount) { - for (int i = 0; i < destination.Length; i++) + if (Avx2.IsSupported && destination.Length >= 2) { - destination[i] = PorterDuffFunctions.NormalSrcIn(background[i], source[i], Numerics.Clamp(amount[i], 0, 1)); + // Divide by 2 as 4 elements per Vector4 and 8 per Vector256 + ref Vector256 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); + ref Vector256 destinationLast = ref Unsafe.Add(ref destinationBase, (IntPtr)((uint)destination.Length / 2u)); + + ref Vector256 backgroundBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(background)); + ref Vector256 sourceBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(source)); + ref float amountBase = ref MemoryMarshal.GetReference(amount); + + Vector256 vOne = Vector256.Create(1F); + + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) + { + // We need to create a Vector256 containing the current and next amount values + // taking up each half of the Vector256 and then clamp them. + Vector256 opacity = Vector256.Create( + Vector128.Create(amountBase), + Vector128.Create(Unsafe.Add(ref amountBase, 1))); + opacity = Avx.Min(Avx.Max(Vector256.Zero, opacity), vOne); + + destinationBase = PorterDuffFunctions.NormalSrcIn(backgroundBase, sourceBase, opacity); + destinationBase = ref Unsafe.Add(ref destinationBase, 1); + backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); + sourceBase = ref Unsafe.Add(ref sourceBase, 1); + amountBase = ref Unsafe.Add(ref amountBase, 2); + } + + if (Numerics.Modulo2(destination.Length) != 0) + { + // Vector4 fits neatly in pairs. Any overlap has to be equal to 1. + int i = destination.Length - 1; + destination[i] = PorterDuffFunctions.NormalSrcIn(background[i], source[i], Numerics.Clamp(amount[i], 0, 1F)); + } + } + else + { + for (int i = 0; i < destination.Length; i++) + { + destination[i] = PorterDuffFunctions.NormalSrcIn(background[i], source[i], Numerics.Clamp(amount[i], 0, 1F)); + } } } } @@ -1107,18 +2987,85 @@ internal static class DefaultPixelBlenders protected override void BlendFunction(Span destination, ReadOnlySpan background, ReadOnlySpan source, float amount) { amount = Numerics.Clamp(amount, 0, 1); - for (int i = 0; i < destination.Length; i++) + + if (Avx2.IsSupported && destination.Length >= 2) + { + // Divide by 2 as 4 elements per Vector4 and 8 per Vector256 + ref Vector256 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); + ref Vector256 destinationLast = ref Unsafe.Add(ref destinationBase, (IntPtr)((uint)destination.Length / 2u)); + + ref Vector256 backgroundBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(background)); + ref Vector256 sourceBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(source)); + Vector256 opacity = Vector256.Create(amount); + + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) + { + destinationBase = PorterDuffFunctions.MultiplySrcIn(backgroundBase, sourceBase, opacity); + destinationBase = ref Unsafe.Add(ref destinationBase, 1); + backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); + sourceBase = ref Unsafe.Add(ref sourceBase, 1); + } + + if (Numerics.Modulo2(destination.Length) != 0) + { + // Vector4 fits neatly in pairs. Any overlap has to be equal to 1. + int i = destination.Length - 1; + destination[i] = PorterDuffFunctions.MultiplySrcIn(background[i], source[i], amount); + } + } + else { - destination[i] = PorterDuffFunctions.MultiplySrcIn(background[i], source[i], amount); + for (int i = 0; i < destination.Length; i++) + { + destination[i] = PorterDuffFunctions.MultiplySrcIn(background[i], source[i], amount); + } } } /// protected override void BlendFunction(Span destination, ReadOnlySpan background, ReadOnlySpan source, ReadOnlySpan amount) { - for (int i = 0; i < destination.Length; i++) + if (Avx2.IsSupported && destination.Length >= 2) + { + // Divide by 2 as 4 elements per Vector4 and 8 per Vector256 + ref Vector256 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); + ref Vector256 destinationLast = ref Unsafe.Add(ref destinationBase, (IntPtr)((uint)destination.Length / 2u)); + + ref Vector256 backgroundBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(background)); + ref Vector256 sourceBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(source)); + ref float amountBase = ref MemoryMarshal.GetReference(amount); + + Vector256 vOne = Vector256.Create(1F); + + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) + { + // We need to create a Vector256 containing the current and next amount values + // taking up each half of the Vector256 and then clamp them. + Vector256 opacity = Vector256.Create( + Vector128.Create(amountBase), + Vector128.Create(Unsafe.Add(ref amountBase, 1))); + opacity = Avx.Min(Avx.Max(Vector256.Zero, opacity), vOne); + + destinationBase = PorterDuffFunctions.MultiplySrcIn(backgroundBase, sourceBase, opacity); + destinationBase = ref Unsafe.Add(ref destinationBase, 1); + backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); + sourceBase = ref Unsafe.Add(ref sourceBase, 1); + amountBase = ref Unsafe.Add(ref amountBase, 2); + } + + if (Numerics.Modulo2(destination.Length) != 0) + { + // Vector4 fits neatly in pairs. Any overlap has to be equal to 1. + int i = destination.Length - 1; + destination[i] = PorterDuffFunctions.MultiplySrcIn(background[i], source[i], Numerics.Clamp(amount[i], 0, 1F)); + } + } + else { - destination[i] = PorterDuffFunctions.MultiplySrcIn(background[i], source[i], Numerics.Clamp(amount[i], 0, 1)); + for (int i = 0; i < destination.Length; i++) + { + destination[i] = PorterDuffFunctions.MultiplySrcIn(background[i], source[i], Numerics.Clamp(amount[i], 0, 1F)); + } } } } @@ -1145,18 +3092,85 @@ internal static class DefaultPixelBlenders protected override void BlendFunction(Span destination, ReadOnlySpan background, ReadOnlySpan source, float amount) { amount = Numerics.Clamp(amount, 0, 1); - for (int i = 0; i < destination.Length; i++) + + if (Avx2.IsSupported && destination.Length >= 2) + { + // Divide by 2 as 4 elements per Vector4 and 8 per Vector256 + ref Vector256 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); + ref Vector256 destinationLast = ref Unsafe.Add(ref destinationBase, (IntPtr)((uint)destination.Length / 2u)); + + ref Vector256 backgroundBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(background)); + ref Vector256 sourceBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(source)); + Vector256 opacity = Vector256.Create(amount); + + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) + { + destinationBase = PorterDuffFunctions.AddSrcIn(backgroundBase, sourceBase, opacity); + destinationBase = ref Unsafe.Add(ref destinationBase, 1); + backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); + sourceBase = ref Unsafe.Add(ref sourceBase, 1); + } + + if (Numerics.Modulo2(destination.Length) != 0) + { + // Vector4 fits neatly in pairs. Any overlap has to be equal to 1. + int i = destination.Length - 1; + destination[i] = PorterDuffFunctions.AddSrcIn(background[i], source[i], amount); + } + } + else { - destination[i] = PorterDuffFunctions.AddSrcIn(background[i], source[i], amount); + for (int i = 0; i < destination.Length; i++) + { + destination[i] = PorterDuffFunctions.AddSrcIn(background[i], source[i], amount); + } } } /// protected override void BlendFunction(Span destination, ReadOnlySpan background, ReadOnlySpan source, ReadOnlySpan amount) { - for (int i = 0; i < destination.Length; i++) + if (Avx2.IsSupported && destination.Length >= 2) + { + // Divide by 2 as 4 elements per Vector4 and 8 per Vector256 + ref Vector256 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); + ref Vector256 destinationLast = ref Unsafe.Add(ref destinationBase, (IntPtr)((uint)destination.Length / 2u)); + + ref Vector256 backgroundBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(background)); + ref Vector256 sourceBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(source)); + ref float amountBase = ref MemoryMarshal.GetReference(amount); + + Vector256 vOne = Vector256.Create(1F); + + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) + { + // We need to create a Vector256 containing the current and next amount values + // taking up each half of the Vector256 and then clamp them. + Vector256 opacity = Vector256.Create( + Vector128.Create(amountBase), + Vector128.Create(Unsafe.Add(ref amountBase, 1))); + opacity = Avx.Min(Avx.Max(Vector256.Zero, opacity), vOne); + + destinationBase = PorterDuffFunctions.AddSrcIn(backgroundBase, sourceBase, opacity); + destinationBase = ref Unsafe.Add(ref destinationBase, 1); + backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); + sourceBase = ref Unsafe.Add(ref sourceBase, 1); + amountBase = ref Unsafe.Add(ref amountBase, 2); + } + + if (Numerics.Modulo2(destination.Length) != 0) + { + // Vector4 fits neatly in pairs. Any overlap has to be equal to 1. + int i = destination.Length - 1; + destination[i] = PorterDuffFunctions.AddSrcIn(background[i], source[i], Numerics.Clamp(amount[i], 0, 1F)); + } + } + else { - destination[i] = PorterDuffFunctions.AddSrcIn(background[i], source[i], Numerics.Clamp(amount[i], 0, 1)); + for (int i = 0; i < destination.Length; i++) + { + destination[i] = PorterDuffFunctions.AddSrcIn(background[i], source[i], Numerics.Clamp(amount[i], 0, 1F)); + } } } } @@ -1183,18 +3197,85 @@ internal static class DefaultPixelBlenders protected override void BlendFunction(Span destination, ReadOnlySpan background, ReadOnlySpan source, float amount) { amount = Numerics.Clamp(amount, 0, 1); - for (int i = 0; i < destination.Length; i++) + + if (Avx2.IsSupported && destination.Length >= 2) { - destination[i] = PorterDuffFunctions.SubtractSrcIn(background[i], source[i], amount); + // Divide by 2 as 4 elements per Vector4 and 8 per Vector256 + ref Vector256 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); + ref Vector256 destinationLast = ref Unsafe.Add(ref destinationBase, (IntPtr)((uint)destination.Length / 2u)); + + ref Vector256 backgroundBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(background)); + ref Vector256 sourceBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(source)); + Vector256 opacity = Vector256.Create(amount); + + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) + { + destinationBase = PorterDuffFunctions.SubtractSrcIn(backgroundBase, sourceBase, opacity); + destinationBase = ref Unsafe.Add(ref destinationBase, 1); + backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); + sourceBase = ref Unsafe.Add(ref sourceBase, 1); + } + + if (Numerics.Modulo2(destination.Length) != 0) + { + // Vector4 fits neatly in pairs. Any overlap has to be equal to 1. + int i = destination.Length - 1; + destination[i] = PorterDuffFunctions.SubtractSrcIn(background[i], source[i], amount); + } + } + else + { + for (int i = 0; i < destination.Length; i++) + { + destination[i] = PorterDuffFunctions.SubtractSrcIn(background[i], source[i], amount); + } } } /// protected override void BlendFunction(Span destination, ReadOnlySpan background, ReadOnlySpan source, ReadOnlySpan amount) { - for (int i = 0; i < destination.Length; i++) + if (Avx2.IsSupported && destination.Length >= 2) + { + // Divide by 2 as 4 elements per Vector4 and 8 per Vector256 + ref Vector256 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); + ref Vector256 destinationLast = ref Unsafe.Add(ref destinationBase, (IntPtr)((uint)destination.Length / 2u)); + + ref Vector256 backgroundBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(background)); + ref Vector256 sourceBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(source)); + ref float amountBase = ref MemoryMarshal.GetReference(amount); + + Vector256 vOne = Vector256.Create(1F); + + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) + { + // We need to create a Vector256 containing the current and next amount values + // taking up each half of the Vector256 and then clamp them. + Vector256 opacity = Vector256.Create( + Vector128.Create(amountBase), + Vector128.Create(Unsafe.Add(ref amountBase, 1))); + opacity = Avx.Min(Avx.Max(Vector256.Zero, opacity), vOne); + + destinationBase = PorterDuffFunctions.SubtractSrcIn(backgroundBase, sourceBase, opacity); + destinationBase = ref Unsafe.Add(ref destinationBase, 1); + backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); + sourceBase = ref Unsafe.Add(ref sourceBase, 1); + amountBase = ref Unsafe.Add(ref amountBase, 2); + } + + if (Numerics.Modulo2(destination.Length) != 0) + { + // Vector4 fits neatly in pairs. Any overlap has to be equal to 1. + int i = destination.Length - 1; + destination[i] = PorterDuffFunctions.SubtractSrcIn(background[i], source[i], Numerics.Clamp(amount[i], 0, 1F)); + } + } + else { - destination[i] = PorterDuffFunctions.SubtractSrcIn(background[i], source[i], Numerics.Clamp(amount[i], 0, 1)); + for (int i = 0; i < destination.Length; i++) + { + destination[i] = PorterDuffFunctions.SubtractSrcIn(background[i], source[i], Numerics.Clamp(amount[i], 0, 1F)); + } } } } @@ -1221,18 +3302,85 @@ internal static class DefaultPixelBlenders protected override void BlendFunction(Span destination, ReadOnlySpan background, ReadOnlySpan source, float amount) { amount = Numerics.Clamp(amount, 0, 1); - for (int i = 0; i < destination.Length; i++) + + if (Avx2.IsSupported && destination.Length >= 2) + { + // Divide by 2 as 4 elements per Vector4 and 8 per Vector256 + ref Vector256 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); + ref Vector256 destinationLast = ref Unsafe.Add(ref destinationBase, (IntPtr)((uint)destination.Length / 2u)); + + ref Vector256 backgroundBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(background)); + ref Vector256 sourceBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(source)); + Vector256 opacity = Vector256.Create(amount); + + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) + { + destinationBase = PorterDuffFunctions.ScreenSrcIn(backgroundBase, sourceBase, opacity); + destinationBase = ref Unsafe.Add(ref destinationBase, 1); + backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); + sourceBase = ref Unsafe.Add(ref sourceBase, 1); + } + + if (Numerics.Modulo2(destination.Length) != 0) + { + // Vector4 fits neatly in pairs. Any overlap has to be equal to 1. + int i = destination.Length - 1; + destination[i] = PorterDuffFunctions.ScreenSrcIn(background[i], source[i], amount); + } + } + else { - destination[i] = PorterDuffFunctions.ScreenSrcIn(background[i], source[i], amount); + for (int i = 0; i < destination.Length; i++) + { + destination[i] = PorterDuffFunctions.ScreenSrcIn(background[i], source[i], amount); + } } } /// protected override void BlendFunction(Span destination, ReadOnlySpan background, ReadOnlySpan source, ReadOnlySpan amount) { - for (int i = 0; i < destination.Length; i++) + if (Avx2.IsSupported && destination.Length >= 2) + { + // Divide by 2 as 4 elements per Vector4 and 8 per Vector256 + ref Vector256 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); + ref Vector256 destinationLast = ref Unsafe.Add(ref destinationBase, (IntPtr)((uint)destination.Length / 2u)); + + ref Vector256 backgroundBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(background)); + ref Vector256 sourceBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(source)); + ref float amountBase = ref MemoryMarshal.GetReference(amount); + + Vector256 vOne = Vector256.Create(1F); + + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) + { + // We need to create a Vector256 containing the current and next amount values + // taking up each half of the Vector256 and then clamp them. + Vector256 opacity = Vector256.Create( + Vector128.Create(amountBase), + Vector128.Create(Unsafe.Add(ref amountBase, 1))); + opacity = Avx.Min(Avx.Max(Vector256.Zero, opacity), vOne); + + destinationBase = PorterDuffFunctions.ScreenSrcIn(backgroundBase, sourceBase, opacity); + destinationBase = ref Unsafe.Add(ref destinationBase, 1); + backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); + sourceBase = ref Unsafe.Add(ref sourceBase, 1); + amountBase = ref Unsafe.Add(ref amountBase, 2); + } + + if (Numerics.Modulo2(destination.Length) != 0) + { + // Vector4 fits neatly in pairs. Any overlap has to be equal to 1. + int i = destination.Length - 1; + destination[i] = PorterDuffFunctions.ScreenSrcIn(background[i], source[i], Numerics.Clamp(amount[i], 0, 1F)); + } + } + else { - destination[i] = PorterDuffFunctions.ScreenSrcIn(background[i], source[i], Numerics.Clamp(amount[i], 0, 1)); + for (int i = 0; i < destination.Length; i++) + { + destination[i] = PorterDuffFunctions.ScreenSrcIn(background[i], source[i], Numerics.Clamp(amount[i], 0, 1F)); + } } } } @@ -1259,18 +3407,85 @@ internal static class DefaultPixelBlenders protected override void BlendFunction(Span destination, ReadOnlySpan background, ReadOnlySpan source, float amount) { amount = Numerics.Clamp(amount, 0, 1); - for (int i = 0; i < destination.Length; i++) + + if (Avx2.IsSupported && destination.Length >= 2) { - destination[i] = PorterDuffFunctions.DarkenSrcIn(background[i], source[i], amount); + // Divide by 2 as 4 elements per Vector4 and 8 per Vector256 + ref Vector256 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); + ref Vector256 destinationLast = ref Unsafe.Add(ref destinationBase, (IntPtr)((uint)destination.Length / 2u)); + + ref Vector256 backgroundBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(background)); + ref Vector256 sourceBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(source)); + Vector256 opacity = Vector256.Create(amount); + + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) + { + destinationBase = PorterDuffFunctions.DarkenSrcIn(backgroundBase, sourceBase, opacity); + destinationBase = ref Unsafe.Add(ref destinationBase, 1); + backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); + sourceBase = ref Unsafe.Add(ref sourceBase, 1); + } + + if (Numerics.Modulo2(destination.Length) != 0) + { + // Vector4 fits neatly in pairs. Any overlap has to be equal to 1. + int i = destination.Length - 1; + destination[i] = PorterDuffFunctions.DarkenSrcIn(background[i], source[i], amount); + } + } + else + { + for (int i = 0; i < destination.Length; i++) + { + destination[i] = PorterDuffFunctions.DarkenSrcIn(background[i], source[i], amount); + } } } /// protected override void BlendFunction(Span destination, ReadOnlySpan background, ReadOnlySpan source, ReadOnlySpan amount) { - for (int i = 0; i < destination.Length; i++) + if (Avx2.IsSupported && destination.Length >= 2) + { + // Divide by 2 as 4 elements per Vector4 and 8 per Vector256 + ref Vector256 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); + ref Vector256 destinationLast = ref Unsafe.Add(ref destinationBase, (IntPtr)((uint)destination.Length / 2u)); + + ref Vector256 backgroundBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(background)); + ref Vector256 sourceBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(source)); + ref float amountBase = ref MemoryMarshal.GetReference(amount); + + Vector256 vOne = Vector256.Create(1F); + + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) + { + // We need to create a Vector256 containing the current and next amount values + // taking up each half of the Vector256 and then clamp them. + Vector256 opacity = Vector256.Create( + Vector128.Create(amountBase), + Vector128.Create(Unsafe.Add(ref amountBase, 1))); + opacity = Avx.Min(Avx.Max(Vector256.Zero, opacity), vOne); + + destinationBase = PorterDuffFunctions.DarkenSrcIn(backgroundBase, sourceBase, opacity); + destinationBase = ref Unsafe.Add(ref destinationBase, 1); + backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); + sourceBase = ref Unsafe.Add(ref sourceBase, 1); + amountBase = ref Unsafe.Add(ref amountBase, 2); + } + + if (Numerics.Modulo2(destination.Length) != 0) + { + // Vector4 fits neatly in pairs. Any overlap has to be equal to 1. + int i = destination.Length - 1; + destination[i] = PorterDuffFunctions.DarkenSrcIn(background[i], source[i], Numerics.Clamp(amount[i], 0, 1F)); + } + } + else { - destination[i] = PorterDuffFunctions.DarkenSrcIn(background[i], source[i], Numerics.Clamp(amount[i], 0, 1)); + for (int i = 0; i < destination.Length; i++) + { + destination[i] = PorterDuffFunctions.DarkenSrcIn(background[i], source[i], Numerics.Clamp(amount[i], 0, 1F)); + } } } } @@ -1297,18 +3512,85 @@ internal static class DefaultPixelBlenders protected override void BlendFunction(Span destination, ReadOnlySpan background, ReadOnlySpan source, float amount) { amount = Numerics.Clamp(amount, 0, 1); - for (int i = 0; i < destination.Length; i++) + + if (Avx2.IsSupported && destination.Length >= 2) + { + // Divide by 2 as 4 elements per Vector4 and 8 per Vector256 + ref Vector256 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); + ref Vector256 destinationLast = ref Unsafe.Add(ref destinationBase, (IntPtr)((uint)destination.Length / 2u)); + + ref Vector256 backgroundBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(background)); + ref Vector256 sourceBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(source)); + Vector256 opacity = Vector256.Create(amount); + + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) + { + destinationBase = PorterDuffFunctions.LightenSrcIn(backgroundBase, sourceBase, opacity); + destinationBase = ref Unsafe.Add(ref destinationBase, 1); + backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); + sourceBase = ref Unsafe.Add(ref sourceBase, 1); + } + + if (Numerics.Modulo2(destination.Length) != 0) + { + // Vector4 fits neatly in pairs. Any overlap has to be equal to 1. + int i = destination.Length - 1; + destination[i] = PorterDuffFunctions.LightenSrcIn(background[i], source[i], amount); + } + } + else { - destination[i] = PorterDuffFunctions.LightenSrcIn(background[i], source[i], amount); + for (int i = 0; i < destination.Length; i++) + { + destination[i] = PorterDuffFunctions.LightenSrcIn(background[i], source[i], amount); + } } } /// protected override void BlendFunction(Span destination, ReadOnlySpan background, ReadOnlySpan source, ReadOnlySpan amount) { - for (int i = 0; i < destination.Length; i++) + if (Avx2.IsSupported && destination.Length >= 2) + { + // Divide by 2 as 4 elements per Vector4 and 8 per Vector256 + ref Vector256 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); + ref Vector256 destinationLast = ref Unsafe.Add(ref destinationBase, (IntPtr)((uint)destination.Length / 2u)); + + ref Vector256 backgroundBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(background)); + ref Vector256 sourceBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(source)); + ref float amountBase = ref MemoryMarshal.GetReference(amount); + + Vector256 vOne = Vector256.Create(1F); + + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) + { + // We need to create a Vector256 containing the current and next amount values + // taking up each half of the Vector256 and then clamp them. + Vector256 opacity = Vector256.Create( + Vector128.Create(amountBase), + Vector128.Create(Unsafe.Add(ref amountBase, 1))); + opacity = Avx.Min(Avx.Max(Vector256.Zero, opacity), vOne); + + destinationBase = PorterDuffFunctions.LightenSrcIn(backgroundBase, sourceBase, opacity); + destinationBase = ref Unsafe.Add(ref destinationBase, 1); + backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); + sourceBase = ref Unsafe.Add(ref sourceBase, 1); + amountBase = ref Unsafe.Add(ref amountBase, 2); + } + + if (Numerics.Modulo2(destination.Length) != 0) + { + // Vector4 fits neatly in pairs. Any overlap has to be equal to 1. + int i = destination.Length - 1; + destination[i] = PorterDuffFunctions.LightenSrcIn(background[i], source[i], Numerics.Clamp(amount[i], 0, 1F)); + } + } + else { - destination[i] = PorterDuffFunctions.LightenSrcIn(background[i], source[i], Numerics.Clamp(amount[i], 0, 1)); + for (int i = 0; i < destination.Length; i++) + { + destination[i] = PorterDuffFunctions.LightenSrcIn(background[i], source[i], Numerics.Clamp(amount[i], 0, 1F)); + } } } } @@ -1335,18 +3617,85 @@ internal static class DefaultPixelBlenders protected override void BlendFunction(Span destination, ReadOnlySpan background, ReadOnlySpan source, float amount) { amount = Numerics.Clamp(amount, 0, 1); - for (int i = 0; i < destination.Length; i++) + + if (Avx2.IsSupported && destination.Length >= 2) + { + // Divide by 2 as 4 elements per Vector4 and 8 per Vector256 + ref Vector256 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); + ref Vector256 destinationLast = ref Unsafe.Add(ref destinationBase, (IntPtr)((uint)destination.Length / 2u)); + + ref Vector256 backgroundBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(background)); + ref Vector256 sourceBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(source)); + Vector256 opacity = Vector256.Create(amount); + + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) + { + destinationBase = PorterDuffFunctions.OverlaySrcIn(backgroundBase, sourceBase, opacity); + destinationBase = ref Unsafe.Add(ref destinationBase, 1); + backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); + sourceBase = ref Unsafe.Add(ref sourceBase, 1); + } + + if (Numerics.Modulo2(destination.Length) != 0) + { + // Vector4 fits neatly in pairs. Any overlap has to be equal to 1. + int i = destination.Length - 1; + destination[i] = PorterDuffFunctions.OverlaySrcIn(background[i], source[i], amount); + } + } + else { - destination[i] = PorterDuffFunctions.OverlaySrcIn(background[i], source[i], amount); + for (int i = 0; i < destination.Length; i++) + { + destination[i] = PorterDuffFunctions.OverlaySrcIn(background[i], source[i], amount); + } } } /// protected override void BlendFunction(Span destination, ReadOnlySpan background, ReadOnlySpan source, ReadOnlySpan amount) { - for (int i = 0; i < destination.Length; i++) + if (Avx2.IsSupported && destination.Length >= 2) + { + // Divide by 2 as 4 elements per Vector4 and 8 per Vector256 + ref Vector256 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); + ref Vector256 destinationLast = ref Unsafe.Add(ref destinationBase, (IntPtr)((uint)destination.Length / 2u)); + + ref Vector256 backgroundBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(background)); + ref Vector256 sourceBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(source)); + ref float amountBase = ref MemoryMarshal.GetReference(amount); + + Vector256 vOne = Vector256.Create(1F); + + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) + { + // We need to create a Vector256 containing the current and next amount values + // taking up each half of the Vector256 and then clamp them. + Vector256 opacity = Vector256.Create( + Vector128.Create(amountBase), + Vector128.Create(Unsafe.Add(ref amountBase, 1))); + opacity = Avx.Min(Avx.Max(Vector256.Zero, opacity), vOne); + + destinationBase = PorterDuffFunctions.OverlaySrcIn(backgroundBase, sourceBase, opacity); + destinationBase = ref Unsafe.Add(ref destinationBase, 1); + backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); + sourceBase = ref Unsafe.Add(ref sourceBase, 1); + amountBase = ref Unsafe.Add(ref amountBase, 2); + } + + if (Numerics.Modulo2(destination.Length) != 0) + { + // Vector4 fits neatly in pairs. Any overlap has to be equal to 1. + int i = destination.Length - 1; + destination[i] = PorterDuffFunctions.OverlaySrcIn(background[i], source[i], Numerics.Clamp(amount[i], 0, 1F)); + } + } + else { - destination[i] = PorterDuffFunctions.OverlaySrcIn(background[i], source[i], Numerics.Clamp(amount[i], 0, 1)); + for (int i = 0; i < destination.Length; i++) + { + destination[i] = PorterDuffFunctions.OverlaySrcIn(background[i], source[i], Numerics.Clamp(amount[i], 0, 1F)); + } } } } @@ -1373,18 +3722,85 @@ internal static class DefaultPixelBlenders protected override void BlendFunction(Span destination, ReadOnlySpan background, ReadOnlySpan source, float amount) { amount = Numerics.Clamp(amount, 0, 1); - for (int i = 0; i < destination.Length; i++) + + if (Avx2.IsSupported && destination.Length >= 2) + { + // Divide by 2 as 4 elements per Vector4 and 8 per Vector256 + ref Vector256 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); + ref Vector256 destinationLast = ref Unsafe.Add(ref destinationBase, (IntPtr)((uint)destination.Length / 2u)); + + ref Vector256 backgroundBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(background)); + ref Vector256 sourceBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(source)); + Vector256 opacity = Vector256.Create(amount); + + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) + { + destinationBase = PorterDuffFunctions.HardLightSrcIn(backgroundBase, sourceBase, opacity); + destinationBase = ref Unsafe.Add(ref destinationBase, 1); + backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); + sourceBase = ref Unsafe.Add(ref sourceBase, 1); + } + + if (Numerics.Modulo2(destination.Length) != 0) + { + // Vector4 fits neatly in pairs. Any overlap has to be equal to 1. + int i = destination.Length - 1; + destination[i] = PorterDuffFunctions.HardLightSrcIn(background[i], source[i], amount); + } + } + else { - destination[i] = PorterDuffFunctions.HardLightSrcIn(background[i], source[i], amount); + for (int i = 0; i < destination.Length; i++) + { + destination[i] = PorterDuffFunctions.HardLightSrcIn(background[i], source[i], amount); + } } } /// protected override void BlendFunction(Span destination, ReadOnlySpan background, ReadOnlySpan source, ReadOnlySpan amount) { - for (int i = 0; i < destination.Length; i++) + if (Avx2.IsSupported && destination.Length >= 2) + { + // Divide by 2 as 4 elements per Vector4 and 8 per Vector256 + ref Vector256 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); + ref Vector256 destinationLast = ref Unsafe.Add(ref destinationBase, (IntPtr)((uint)destination.Length / 2u)); + + ref Vector256 backgroundBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(background)); + ref Vector256 sourceBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(source)); + ref float amountBase = ref MemoryMarshal.GetReference(amount); + + Vector256 vOne = Vector256.Create(1F); + + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) + { + // We need to create a Vector256 containing the current and next amount values + // taking up each half of the Vector256 and then clamp them. + Vector256 opacity = Vector256.Create( + Vector128.Create(amountBase), + Vector128.Create(Unsafe.Add(ref amountBase, 1))); + opacity = Avx.Min(Avx.Max(Vector256.Zero, opacity), vOne); + + destinationBase = PorterDuffFunctions.HardLightSrcIn(backgroundBase, sourceBase, opacity); + destinationBase = ref Unsafe.Add(ref destinationBase, 1); + backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); + sourceBase = ref Unsafe.Add(ref sourceBase, 1); + amountBase = ref Unsafe.Add(ref amountBase, 2); + } + + if (Numerics.Modulo2(destination.Length) != 0) + { + // Vector4 fits neatly in pairs. Any overlap has to be equal to 1. + int i = destination.Length - 1; + destination[i] = PorterDuffFunctions.HardLightSrcIn(background[i], source[i], Numerics.Clamp(amount[i], 0, 1F)); + } + } + else { - destination[i] = PorterDuffFunctions.HardLightSrcIn(background[i], source[i], Numerics.Clamp(amount[i], 0, 1)); + for (int i = 0; i < destination.Length; i++) + { + destination[i] = PorterDuffFunctions.HardLightSrcIn(background[i], source[i], Numerics.Clamp(amount[i], 0, 1F)); + } } } } @@ -1411,18 +3827,85 @@ internal static class DefaultPixelBlenders protected override void BlendFunction(Span destination, ReadOnlySpan background, ReadOnlySpan source, float amount) { amount = Numerics.Clamp(amount, 0, 1); - for (int i = 0; i < destination.Length; i++) + + if (Avx2.IsSupported && destination.Length >= 2) + { + // Divide by 2 as 4 elements per Vector4 and 8 per Vector256 + ref Vector256 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); + ref Vector256 destinationLast = ref Unsafe.Add(ref destinationBase, (IntPtr)((uint)destination.Length / 2u)); + + ref Vector256 backgroundBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(background)); + ref Vector256 sourceBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(source)); + Vector256 opacity = Vector256.Create(amount); + + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) + { + destinationBase = PorterDuffFunctions.NormalSrcOut(backgroundBase, sourceBase, opacity); + destinationBase = ref Unsafe.Add(ref destinationBase, 1); + backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); + sourceBase = ref Unsafe.Add(ref sourceBase, 1); + } + + if (Numerics.Modulo2(destination.Length) != 0) + { + // Vector4 fits neatly in pairs. Any overlap has to be equal to 1. + int i = destination.Length - 1; + destination[i] = PorterDuffFunctions.NormalSrcOut(background[i], source[i], amount); + } + } + else { - destination[i] = PorterDuffFunctions.NormalSrcOut(background[i], source[i], amount); + for (int i = 0; i < destination.Length; i++) + { + destination[i] = PorterDuffFunctions.NormalSrcOut(background[i], source[i], amount); + } } } /// protected override void BlendFunction(Span destination, ReadOnlySpan background, ReadOnlySpan source, ReadOnlySpan amount) { - for (int i = 0; i < destination.Length; i++) + if (Avx2.IsSupported && destination.Length >= 2) + { + // Divide by 2 as 4 elements per Vector4 and 8 per Vector256 + ref Vector256 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); + ref Vector256 destinationLast = ref Unsafe.Add(ref destinationBase, (IntPtr)((uint)destination.Length / 2u)); + + ref Vector256 backgroundBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(background)); + ref Vector256 sourceBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(source)); + ref float amountBase = ref MemoryMarshal.GetReference(amount); + + Vector256 vOne = Vector256.Create(1F); + + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) + { + // We need to create a Vector256 containing the current and next amount values + // taking up each half of the Vector256 and then clamp them. + Vector256 opacity = Vector256.Create( + Vector128.Create(amountBase), + Vector128.Create(Unsafe.Add(ref amountBase, 1))); + opacity = Avx.Min(Avx.Max(Vector256.Zero, opacity), vOne); + + destinationBase = PorterDuffFunctions.NormalSrcOut(backgroundBase, sourceBase, opacity); + destinationBase = ref Unsafe.Add(ref destinationBase, 1); + backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); + sourceBase = ref Unsafe.Add(ref sourceBase, 1); + amountBase = ref Unsafe.Add(ref amountBase, 2); + } + + if (Numerics.Modulo2(destination.Length) != 0) + { + // Vector4 fits neatly in pairs. Any overlap has to be equal to 1. + int i = destination.Length - 1; + destination[i] = PorterDuffFunctions.NormalSrcOut(background[i], source[i], Numerics.Clamp(amount[i], 0, 1F)); + } + } + else { - destination[i] = PorterDuffFunctions.NormalSrcOut(background[i], source[i], Numerics.Clamp(amount[i], 0, 1)); + for (int i = 0; i < destination.Length; i++) + { + destination[i] = PorterDuffFunctions.NormalSrcOut(background[i], source[i], Numerics.Clamp(amount[i], 0, 1F)); + } } } } @@ -1449,18 +3932,85 @@ internal static class DefaultPixelBlenders protected override void BlendFunction(Span destination, ReadOnlySpan background, ReadOnlySpan source, float amount) { amount = Numerics.Clamp(amount, 0, 1); - for (int i = 0; i < destination.Length; i++) + + if (Avx2.IsSupported && destination.Length >= 2) { - destination[i] = PorterDuffFunctions.MultiplySrcOut(background[i], source[i], amount); + // Divide by 2 as 4 elements per Vector4 and 8 per Vector256 + ref Vector256 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); + ref Vector256 destinationLast = ref Unsafe.Add(ref destinationBase, (IntPtr)((uint)destination.Length / 2u)); + + ref Vector256 backgroundBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(background)); + ref Vector256 sourceBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(source)); + Vector256 opacity = Vector256.Create(amount); + + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) + { + destinationBase = PorterDuffFunctions.MultiplySrcOut(backgroundBase, sourceBase, opacity); + destinationBase = ref Unsafe.Add(ref destinationBase, 1); + backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); + sourceBase = ref Unsafe.Add(ref sourceBase, 1); + } + + if (Numerics.Modulo2(destination.Length) != 0) + { + // Vector4 fits neatly in pairs. Any overlap has to be equal to 1. + int i = destination.Length - 1; + destination[i] = PorterDuffFunctions.MultiplySrcOut(background[i], source[i], amount); + } + } + else + { + for (int i = 0; i < destination.Length; i++) + { + destination[i] = PorterDuffFunctions.MultiplySrcOut(background[i], source[i], amount); + } } } /// protected override void BlendFunction(Span destination, ReadOnlySpan background, ReadOnlySpan source, ReadOnlySpan amount) { - for (int i = 0; i < destination.Length; i++) + if (Avx2.IsSupported && destination.Length >= 2) { - destination[i] = PorterDuffFunctions.MultiplySrcOut(background[i], source[i], Numerics.Clamp(amount[i], 0, 1)); + // Divide by 2 as 4 elements per Vector4 and 8 per Vector256 + ref Vector256 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); + ref Vector256 destinationLast = ref Unsafe.Add(ref destinationBase, (IntPtr)((uint)destination.Length / 2u)); + + ref Vector256 backgroundBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(background)); + ref Vector256 sourceBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(source)); + ref float amountBase = ref MemoryMarshal.GetReference(amount); + + Vector256 vOne = Vector256.Create(1F); + + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) + { + // We need to create a Vector256 containing the current and next amount values + // taking up each half of the Vector256 and then clamp them. + Vector256 opacity = Vector256.Create( + Vector128.Create(amountBase), + Vector128.Create(Unsafe.Add(ref amountBase, 1))); + opacity = Avx.Min(Avx.Max(Vector256.Zero, opacity), vOne); + + destinationBase = PorterDuffFunctions.MultiplySrcOut(backgroundBase, sourceBase, opacity); + destinationBase = ref Unsafe.Add(ref destinationBase, 1); + backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); + sourceBase = ref Unsafe.Add(ref sourceBase, 1); + amountBase = ref Unsafe.Add(ref amountBase, 2); + } + + if (Numerics.Modulo2(destination.Length) != 0) + { + // Vector4 fits neatly in pairs. Any overlap has to be equal to 1. + int i = destination.Length - 1; + destination[i] = PorterDuffFunctions.MultiplySrcOut(background[i], source[i], Numerics.Clamp(amount[i], 0, 1F)); + } + } + else + { + for (int i = 0; i < destination.Length; i++) + { + destination[i] = PorterDuffFunctions.MultiplySrcOut(background[i], source[i], Numerics.Clamp(amount[i], 0, 1F)); + } } } } @@ -1487,18 +4037,85 @@ internal static class DefaultPixelBlenders protected override void BlendFunction(Span destination, ReadOnlySpan background, ReadOnlySpan source, float amount) { amount = Numerics.Clamp(amount, 0, 1); - for (int i = 0; i < destination.Length; i++) + + if (Avx2.IsSupported && destination.Length >= 2) + { + // Divide by 2 as 4 elements per Vector4 and 8 per Vector256 + ref Vector256 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); + ref Vector256 destinationLast = ref Unsafe.Add(ref destinationBase, (IntPtr)((uint)destination.Length / 2u)); + + ref Vector256 backgroundBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(background)); + ref Vector256 sourceBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(source)); + Vector256 opacity = Vector256.Create(amount); + + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) + { + destinationBase = PorterDuffFunctions.AddSrcOut(backgroundBase, sourceBase, opacity); + destinationBase = ref Unsafe.Add(ref destinationBase, 1); + backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); + sourceBase = ref Unsafe.Add(ref sourceBase, 1); + } + + if (Numerics.Modulo2(destination.Length) != 0) + { + // Vector4 fits neatly in pairs. Any overlap has to be equal to 1. + int i = destination.Length - 1; + destination[i] = PorterDuffFunctions.AddSrcOut(background[i], source[i], amount); + } + } + else { - destination[i] = PorterDuffFunctions.AddSrcOut(background[i], source[i], amount); + for (int i = 0; i < destination.Length; i++) + { + destination[i] = PorterDuffFunctions.AddSrcOut(background[i], source[i], amount); + } } } /// protected override void BlendFunction(Span destination, ReadOnlySpan background, ReadOnlySpan source, ReadOnlySpan amount) { - for (int i = 0; i < destination.Length; i++) + if (Avx2.IsSupported && destination.Length >= 2) { - destination[i] = PorterDuffFunctions.AddSrcOut(background[i], source[i], Numerics.Clamp(amount[i], 0, 1)); + // Divide by 2 as 4 elements per Vector4 and 8 per Vector256 + ref Vector256 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); + ref Vector256 destinationLast = ref Unsafe.Add(ref destinationBase, (IntPtr)((uint)destination.Length / 2u)); + + ref Vector256 backgroundBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(background)); + ref Vector256 sourceBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(source)); + ref float amountBase = ref MemoryMarshal.GetReference(amount); + + Vector256 vOne = Vector256.Create(1F); + + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) + { + // We need to create a Vector256 containing the current and next amount values + // taking up each half of the Vector256 and then clamp them. + Vector256 opacity = Vector256.Create( + Vector128.Create(amountBase), + Vector128.Create(Unsafe.Add(ref amountBase, 1))); + opacity = Avx.Min(Avx.Max(Vector256.Zero, opacity), vOne); + + destinationBase = PorterDuffFunctions.AddSrcOut(backgroundBase, sourceBase, opacity); + destinationBase = ref Unsafe.Add(ref destinationBase, 1); + backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); + sourceBase = ref Unsafe.Add(ref sourceBase, 1); + amountBase = ref Unsafe.Add(ref amountBase, 2); + } + + if (Numerics.Modulo2(destination.Length) != 0) + { + // Vector4 fits neatly in pairs. Any overlap has to be equal to 1. + int i = destination.Length - 1; + destination[i] = PorterDuffFunctions.AddSrcOut(background[i], source[i], Numerics.Clamp(amount[i], 0, 1F)); + } + } + else + { + for (int i = 0; i < destination.Length; i++) + { + destination[i] = PorterDuffFunctions.AddSrcOut(background[i], source[i], Numerics.Clamp(amount[i], 0, 1F)); + } } } } @@ -1525,18 +4142,85 @@ internal static class DefaultPixelBlenders protected override void BlendFunction(Span destination, ReadOnlySpan background, ReadOnlySpan source, float amount) { amount = Numerics.Clamp(amount, 0, 1); - for (int i = 0; i < destination.Length; i++) + + if (Avx2.IsSupported && destination.Length >= 2) { - destination[i] = PorterDuffFunctions.SubtractSrcOut(background[i], source[i], amount); + // Divide by 2 as 4 elements per Vector4 and 8 per Vector256 + ref Vector256 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); + ref Vector256 destinationLast = ref Unsafe.Add(ref destinationBase, (IntPtr)((uint)destination.Length / 2u)); + + ref Vector256 backgroundBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(background)); + ref Vector256 sourceBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(source)); + Vector256 opacity = Vector256.Create(amount); + + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) + { + destinationBase = PorterDuffFunctions.SubtractSrcOut(backgroundBase, sourceBase, opacity); + destinationBase = ref Unsafe.Add(ref destinationBase, 1); + backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); + sourceBase = ref Unsafe.Add(ref sourceBase, 1); + } + + if (Numerics.Modulo2(destination.Length) != 0) + { + // Vector4 fits neatly in pairs. Any overlap has to be equal to 1. + int i = destination.Length - 1; + destination[i] = PorterDuffFunctions.SubtractSrcOut(background[i], source[i], amount); + } + } + else + { + for (int i = 0; i < destination.Length; i++) + { + destination[i] = PorterDuffFunctions.SubtractSrcOut(background[i], source[i], amount); + } } } /// protected override void BlendFunction(Span destination, ReadOnlySpan background, ReadOnlySpan source, ReadOnlySpan amount) { - for (int i = 0; i < destination.Length; i++) + if (Avx2.IsSupported && destination.Length >= 2) + { + // Divide by 2 as 4 elements per Vector4 and 8 per Vector256 + ref Vector256 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); + ref Vector256 destinationLast = ref Unsafe.Add(ref destinationBase, (IntPtr)((uint)destination.Length / 2u)); + + ref Vector256 backgroundBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(background)); + ref Vector256 sourceBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(source)); + ref float amountBase = ref MemoryMarshal.GetReference(amount); + + Vector256 vOne = Vector256.Create(1F); + + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) + { + // We need to create a Vector256 containing the current and next amount values + // taking up each half of the Vector256 and then clamp them. + Vector256 opacity = Vector256.Create( + Vector128.Create(amountBase), + Vector128.Create(Unsafe.Add(ref amountBase, 1))); + opacity = Avx.Min(Avx.Max(Vector256.Zero, opacity), vOne); + + destinationBase = PorterDuffFunctions.SubtractSrcOut(backgroundBase, sourceBase, opacity); + destinationBase = ref Unsafe.Add(ref destinationBase, 1); + backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); + sourceBase = ref Unsafe.Add(ref sourceBase, 1); + amountBase = ref Unsafe.Add(ref amountBase, 2); + } + + if (Numerics.Modulo2(destination.Length) != 0) + { + // Vector4 fits neatly in pairs. Any overlap has to be equal to 1. + int i = destination.Length - 1; + destination[i] = PorterDuffFunctions.SubtractSrcOut(background[i], source[i], Numerics.Clamp(amount[i], 0, 1F)); + } + } + else { - destination[i] = PorterDuffFunctions.SubtractSrcOut(background[i], source[i], Numerics.Clamp(amount[i], 0, 1)); + for (int i = 0; i < destination.Length; i++) + { + destination[i] = PorterDuffFunctions.SubtractSrcOut(background[i], source[i], Numerics.Clamp(amount[i], 0, 1F)); + } } } } @@ -1563,18 +4247,85 @@ internal static class DefaultPixelBlenders protected override void BlendFunction(Span destination, ReadOnlySpan background, ReadOnlySpan source, float amount) { amount = Numerics.Clamp(amount, 0, 1); - for (int i = 0; i < destination.Length; i++) + + if (Avx2.IsSupported && destination.Length >= 2) { - destination[i] = PorterDuffFunctions.ScreenSrcOut(background[i], source[i], amount); + // Divide by 2 as 4 elements per Vector4 and 8 per Vector256 + ref Vector256 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); + ref Vector256 destinationLast = ref Unsafe.Add(ref destinationBase, (IntPtr)((uint)destination.Length / 2u)); + + ref Vector256 backgroundBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(background)); + ref Vector256 sourceBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(source)); + Vector256 opacity = Vector256.Create(amount); + + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) + { + destinationBase = PorterDuffFunctions.ScreenSrcOut(backgroundBase, sourceBase, opacity); + destinationBase = ref Unsafe.Add(ref destinationBase, 1); + backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); + sourceBase = ref Unsafe.Add(ref sourceBase, 1); + } + + if (Numerics.Modulo2(destination.Length) != 0) + { + // Vector4 fits neatly in pairs. Any overlap has to be equal to 1. + int i = destination.Length - 1; + destination[i] = PorterDuffFunctions.ScreenSrcOut(background[i], source[i], amount); + } + } + else + { + for (int i = 0; i < destination.Length; i++) + { + destination[i] = PorterDuffFunctions.ScreenSrcOut(background[i], source[i], amount); + } } } /// protected override void BlendFunction(Span destination, ReadOnlySpan background, ReadOnlySpan source, ReadOnlySpan amount) { - for (int i = 0; i < destination.Length; i++) + if (Avx2.IsSupported && destination.Length >= 2) { - destination[i] = PorterDuffFunctions.ScreenSrcOut(background[i], source[i], Numerics.Clamp(amount[i], 0, 1)); + // Divide by 2 as 4 elements per Vector4 and 8 per Vector256 + ref Vector256 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); + ref Vector256 destinationLast = ref Unsafe.Add(ref destinationBase, (IntPtr)((uint)destination.Length / 2u)); + + ref Vector256 backgroundBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(background)); + ref Vector256 sourceBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(source)); + ref float amountBase = ref MemoryMarshal.GetReference(amount); + + Vector256 vOne = Vector256.Create(1F); + + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) + { + // We need to create a Vector256 containing the current and next amount values + // taking up each half of the Vector256 and then clamp them. + Vector256 opacity = Vector256.Create( + Vector128.Create(amountBase), + Vector128.Create(Unsafe.Add(ref amountBase, 1))); + opacity = Avx.Min(Avx.Max(Vector256.Zero, opacity), vOne); + + destinationBase = PorterDuffFunctions.ScreenSrcOut(backgroundBase, sourceBase, opacity); + destinationBase = ref Unsafe.Add(ref destinationBase, 1); + backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); + sourceBase = ref Unsafe.Add(ref sourceBase, 1); + amountBase = ref Unsafe.Add(ref amountBase, 2); + } + + if (Numerics.Modulo2(destination.Length) != 0) + { + // Vector4 fits neatly in pairs. Any overlap has to be equal to 1. + int i = destination.Length - 1; + destination[i] = PorterDuffFunctions.ScreenSrcOut(background[i], source[i], Numerics.Clamp(amount[i], 0, 1F)); + } + } + else + { + for (int i = 0; i < destination.Length; i++) + { + destination[i] = PorterDuffFunctions.ScreenSrcOut(background[i], source[i], Numerics.Clamp(amount[i], 0, 1F)); + } } } } @@ -1601,18 +4352,85 @@ internal static class DefaultPixelBlenders protected override void BlendFunction(Span destination, ReadOnlySpan background, ReadOnlySpan source, float amount) { amount = Numerics.Clamp(amount, 0, 1); - for (int i = 0; i < destination.Length; i++) + + if (Avx2.IsSupported && destination.Length >= 2) + { + // Divide by 2 as 4 elements per Vector4 and 8 per Vector256 + ref Vector256 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); + ref Vector256 destinationLast = ref Unsafe.Add(ref destinationBase, (IntPtr)((uint)destination.Length / 2u)); + + ref Vector256 backgroundBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(background)); + ref Vector256 sourceBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(source)); + Vector256 opacity = Vector256.Create(amount); + + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) + { + destinationBase = PorterDuffFunctions.DarkenSrcOut(backgroundBase, sourceBase, opacity); + destinationBase = ref Unsafe.Add(ref destinationBase, 1); + backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); + sourceBase = ref Unsafe.Add(ref sourceBase, 1); + } + + if (Numerics.Modulo2(destination.Length) != 0) + { + // Vector4 fits neatly in pairs. Any overlap has to be equal to 1. + int i = destination.Length - 1; + destination[i] = PorterDuffFunctions.DarkenSrcOut(background[i], source[i], amount); + } + } + else { - destination[i] = PorterDuffFunctions.DarkenSrcOut(background[i], source[i], amount); + for (int i = 0; i < destination.Length; i++) + { + destination[i] = PorterDuffFunctions.DarkenSrcOut(background[i], source[i], amount); + } } } /// protected override void BlendFunction(Span destination, ReadOnlySpan background, ReadOnlySpan source, ReadOnlySpan amount) { - for (int i = 0; i < destination.Length; i++) + if (Avx2.IsSupported && destination.Length >= 2) + { + // Divide by 2 as 4 elements per Vector4 and 8 per Vector256 + ref Vector256 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); + ref Vector256 destinationLast = ref Unsafe.Add(ref destinationBase, (IntPtr)((uint)destination.Length / 2u)); + + ref Vector256 backgroundBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(background)); + ref Vector256 sourceBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(source)); + ref float amountBase = ref MemoryMarshal.GetReference(amount); + + Vector256 vOne = Vector256.Create(1F); + + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) + { + // We need to create a Vector256 containing the current and next amount values + // taking up each half of the Vector256 and then clamp them. + Vector256 opacity = Vector256.Create( + Vector128.Create(amountBase), + Vector128.Create(Unsafe.Add(ref amountBase, 1))); + opacity = Avx.Min(Avx.Max(Vector256.Zero, opacity), vOne); + + destinationBase = PorterDuffFunctions.DarkenSrcOut(backgroundBase, sourceBase, opacity); + destinationBase = ref Unsafe.Add(ref destinationBase, 1); + backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); + sourceBase = ref Unsafe.Add(ref sourceBase, 1); + amountBase = ref Unsafe.Add(ref amountBase, 2); + } + + if (Numerics.Modulo2(destination.Length) != 0) + { + // Vector4 fits neatly in pairs. Any overlap has to be equal to 1. + int i = destination.Length - 1; + destination[i] = PorterDuffFunctions.DarkenSrcOut(background[i], source[i], Numerics.Clamp(amount[i], 0, 1F)); + } + } + else { - destination[i] = PorterDuffFunctions.DarkenSrcOut(background[i], source[i], Numerics.Clamp(amount[i], 0, 1)); + for (int i = 0; i < destination.Length; i++) + { + destination[i] = PorterDuffFunctions.DarkenSrcOut(background[i], source[i], Numerics.Clamp(amount[i], 0, 1F)); + } } } } @@ -1639,18 +4457,85 @@ internal static class DefaultPixelBlenders protected override void BlendFunction(Span destination, ReadOnlySpan background, ReadOnlySpan source, float amount) { amount = Numerics.Clamp(amount, 0, 1); - for (int i = 0; i < destination.Length; i++) + + if (Avx2.IsSupported && destination.Length >= 2) { - destination[i] = PorterDuffFunctions.LightenSrcOut(background[i], source[i], amount); + // Divide by 2 as 4 elements per Vector4 and 8 per Vector256 + ref Vector256 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); + ref Vector256 destinationLast = ref Unsafe.Add(ref destinationBase, (IntPtr)((uint)destination.Length / 2u)); + + ref Vector256 backgroundBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(background)); + ref Vector256 sourceBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(source)); + Vector256 opacity = Vector256.Create(amount); + + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) + { + destinationBase = PorterDuffFunctions.LightenSrcOut(backgroundBase, sourceBase, opacity); + destinationBase = ref Unsafe.Add(ref destinationBase, 1); + backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); + sourceBase = ref Unsafe.Add(ref sourceBase, 1); + } + + if (Numerics.Modulo2(destination.Length) != 0) + { + // Vector4 fits neatly in pairs. Any overlap has to be equal to 1. + int i = destination.Length - 1; + destination[i] = PorterDuffFunctions.LightenSrcOut(background[i], source[i], amount); + } + } + else + { + for (int i = 0; i < destination.Length; i++) + { + destination[i] = PorterDuffFunctions.LightenSrcOut(background[i], source[i], amount); + } } } /// protected override void BlendFunction(Span destination, ReadOnlySpan background, ReadOnlySpan source, ReadOnlySpan amount) { - for (int i = 0; i < destination.Length; i++) + if (Avx2.IsSupported && destination.Length >= 2) + { + // Divide by 2 as 4 elements per Vector4 and 8 per Vector256 + ref Vector256 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); + ref Vector256 destinationLast = ref Unsafe.Add(ref destinationBase, (IntPtr)((uint)destination.Length / 2u)); + + ref Vector256 backgroundBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(background)); + ref Vector256 sourceBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(source)); + ref float amountBase = ref MemoryMarshal.GetReference(amount); + + Vector256 vOne = Vector256.Create(1F); + + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) + { + // We need to create a Vector256 containing the current and next amount values + // taking up each half of the Vector256 and then clamp them. + Vector256 opacity = Vector256.Create( + Vector128.Create(amountBase), + Vector128.Create(Unsafe.Add(ref amountBase, 1))); + opacity = Avx.Min(Avx.Max(Vector256.Zero, opacity), vOne); + + destinationBase = PorterDuffFunctions.LightenSrcOut(backgroundBase, sourceBase, opacity); + destinationBase = ref Unsafe.Add(ref destinationBase, 1); + backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); + sourceBase = ref Unsafe.Add(ref sourceBase, 1); + amountBase = ref Unsafe.Add(ref amountBase, 2); + } + + if (Numerics.Modulo2(destination.Length) != 0) + { + // Vector4 fits neatly in pairs. Any overlap has to be equal to 1. + int i = destination.Length - 1; + destination[i] = PorterDuffFunctions.LightenSrcOut(background[i], source[i], Numerics.Clamp(amount[i], 0, 1F)); + } + } + else { - destination[i] = PorterDuffFunctions.LightenSrcOut(background[i], source[i], Numerics.Clamp(amount[i], 0, 1)); + for (int i = 0; i < destination.Length; i++) + { + destination[i] = PorterDuffFunctions.LightenSrcOut(background[i], source[i], Numerics.Clamp(amount[i], 0, 1F)); + } } } } @@ -1677,18 +4562,85 @@ internal static class DefaultPixelBlenders protected override void BlendFunction(Span destination, ReadOnlySpan background, ReadOnlySpan source, float amount) { amount = Numerics.Clamp(amount, 0, 1); - for (int i = 0; i < destination.Length; i++) + + if (Avx2.IsSupported && destination.Length >= 2) + { + // Divide by 2 as 4 elements per Vector4 and 8 per Vector256 + ref Vector256 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); + ref Vector256 destinationLast = ref Unsafe.Add(ref destinationBase, (IntPtr)((uint)destination.Length / 2u)); + + ref Vector256 backgroundBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(background)); + ref Vector256 sourceBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(source)); + Vector256 opacity = Vector256.Create(amount); + + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) + { + destinationBase = PorterDuffFunctions.OverlaySrcOut(backgroundBase, sourceBase, opacity); + destinationBase = ref Unsafe.Add(ref destinationBase, 1); + backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); + sourceBase = ref Unsafe.Add(ref sourceBase, 1); + } + + if (Numerics.Modulo2(destination.Length) != 0) + { + // Vector4 fits neatly in pairs. Any overlap has to be equal to 1. + int i = destination.Length - 1; + destination[i] = PorterDuffFunctions.OverlaySrcOut(background[i], source[i], amount); + } + } + else { - destination[i] = PorterDuffFunctions.OverlaySrcOut(background[i], source[i], amount); + for (int i = 0; i < destination.Length; i++) + { + destination[i] = PorterDuffFunctions.OverlaySrcOut(background[i], source[i], amount); + } } } /// protected override void BlendFunction(Span destination, ReadOnlySpan background, ReadOnlySpan source, ReadOnlySpan amount) { - for (int i = 0; i < destination.Length; i++) + if (Avx2.IsSupported && destination.Length >= 2) + { + // Divide by 2 as 4 elements per Vector4 and 8 per Vector256 + ref Vector256 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); + ref Vector256 destinationLast = ref Unsafe.Add(ref destinationBase, (IntPtr)((uint)destination.Length / 2u)); + + ref Vector256 backgroundBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(background)); + ref Vector256 sourceBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(source)); + ref float amountBase = ref MemoryMarshal.GetReference(amount); + + Vector256 vOne = Vector256.Create(1F); + + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) + { + // We need to create a Vector256 containing the current and next amount values + // taking up each half of the Vector256 and then clamp them. + Vector256 opacity = Vector256.Create( + Vector128.Create(amountBase), + Vector128.Create(Unsafe.Add(ref amountBase, 1))); + opacity = Avx.Min(Avx.Max(Vector256.Zero, opacity), vOne); + + destinationBase = PorterDuffFunctions.OverlaySrcOut(backgroundBase, sourceBase, opacity); + destinationBase = ref Unsafe.Add(ref destinationBase, 1); + backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); + sourceBase = ref Unsafe.Add(ref sourceBase, 1); + amountBase = ref Unsafe.Add(ref amountBase, 2); + } + + if (Numerics.Modulo2(destination.Length) != 0) + { + // Vector4 fits neatly in pairs. Any overlap has to be equal to 1. + int i = destination.Length - 1; + destination[i] = PorterDuffFunctions.OverlaySrcOut(background[i], source[i], Numerics.Clamp(amount[i], 0, 1F)); + } + } + else { - destination[i] = PorterDuffFunctions.OverlaySrcOut(background[i], source[i], Numerics.Clamp(amount[i], 0, 1)); + for (int i = 0; i < destination.Length; i++) + { + destination[i] = PorterDuffFunctions.OverlaySrcOut(background[i], source[i], Numerics.Clamp(amount[i], 0, 1F)); + } } } } @@ -1715,18 +4667,85 @@ internal static class DefaultPixelBlenders protected override void BlendFunction(Span destination, ReadOnlySpan background, ReadOnlySpan source, float amount) { amount = Numerics.Clamp(amount, 0, 1); - for (int i = 0; i < destination.Length; i++) + + if (Avx2.IsSupported && destination.Length >= 2) + { + // Divide by 2 as 4 elements per Vector4 and 8 per Vector256 + ref Vector256 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); + ref Vector256 destinationLast = ref Unsafe.Add(ref destinationBase, (IntPtr)((uint)destination.Length / 2u)); + + ref Vector256 backgroundBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(background)); + ref Vector256 sourceBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(source)); + Vector256 opacity = Vector256.Create(amount); + + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) + { + destinationBase = PorterDuffFunctions.HardLightSrcOut(backgroundBase, sourceBase, opacity); + destinationBase = ref Unsafe.Add(ref destinationBase, 1); + backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); + sourceBase = ref Unsafe.Add(ref sourceBase, 1); + } + + if (Numerics.Modulo2(destination.Length) != 0) + { + // Vector4 fits neatly in pairs. Any overlap has to be equal to 1. + int i = destination.Length - 1; + destination[i] = PorterDuffFunctions.HardLightSrcOut(background[i], source[i], amount); + } + } + else { - destination[i] = PorterDuffFunctions.HardLightSrcOut(background[i], source[i], amount); + for (int i = 0; i < destination.Length; i++) + { + destination[i] = PorterDuffFunctions.HardLightSrcOut(background[i], source[i], amount); + } } } /// protected override void BlendFunction(Span destination, ReadOnlySpan background, ReadOnlySpan source, ReadOnlySpan amount) { - for (int i = 0; i < destination.Length; i++) + if (Avx2.IsSupported && destination.Length >= 2) { - destination[i] = PorterDuffFunctions.HardLightSrcOut(background[i], source[i], Numerics.Clamp(amount[i], 0, 1)); + // Divide by 2 as 4 elements per Vector4 and 8 per Vector256 + ref Vector256 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); + ref Vector256 destinationLast = ref Unsafe.Add(ref destinationBase, (IntPtr)((uint)destination.Length / 2u)); + + ref Vector256 backgroundBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(background)); + ref Vector256 sourceBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(source)); + ref float amountBase = ref MemoryMarshal.GetReference(amount); + + Vector256 vOne = Vector256.Create(1F); + + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) + { + // We need to create a Vector256 containing the current and next amount values + // taking up each half of the Vector256 and then clamp them. + Vector256 opacity = Vector256.Create( + Vector128.Create(amountBase), + Vector128.Create(Unsafe.Add(ref amountBase, 1))); + opacity = Avx.Min(Avx.Max(Vector256.Zero, opacity), vOne); + + destinationBase = PorterDuffFunctions.HardLightSrcOut(backgroundBase, sourceBase, opacity); + destinationBase = ref Unsafe.Add(ref destinationBase, 1); + backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); + sourceBase = ref Unsafe.Add(ref sourceBase, 1); + amountBase = ref Unsafe.Add(ref amountBase, 2); + } + + if (Numerics.Modulo2(destination.Length) != 0) + { + // Vector4 fits neatly in pairs. Any overlap has to be equal to 1. + int i = destination.Length - 1; + destination[i] = PorterDuffFunctions.HardLightSrcOut(background[i], source[i], Numerics.Clamp(amount[i], 0, 1F)); + } + } + else + { + for (int i = 0; i < destination.Length; i++) + { + destination[i] = PorterDuffFunctions.HardLightSrcOut(background[i], source[i], Numerics.Clamp(amount[i], 0, 1F)); + } } } } @@ -1753,18 +4772,85 @@ internal static class DefaultPixelBlenders protected override void BlendFunction(Span destination, ReadOnlySpan background, ReadOnlySpan source, float amount) { amount = Numerics.Clamp(amount, 0, 1); - for (int i = 0; i < destination.Length; i++) + + if (Avx2.IsSupported && destination.Length >= 2) + { + // Divide by 2 as 4 elements per Vector4 and 8 per Vector256 + ref Vector256 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); + ref Vector256 destinationLast = ref Unsafe.Add(ref destinationBase, (IntPtr)((uint)destination.Length / 2u)); + + ref Vector256 backgroundBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(background)); + ref Vector256 sourceBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(source)); + Vector256 opacity = Vector256.Create(amount); + + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) + { + destinationBase = PorterDuffFunctions.NormalDest(backgroundBase, sourceBase, opacity); + destinationBase = ref Unsafe.Add(ref destinationBase, 1); + backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); + sourceBase = ref Unsafe.Add(ref sourceBase, 1); + } + + if (Numerics.Modulo2(destination.Length) != 0) + { + // Vector4 fits neatly in pairs. Any overlap has to be equal to 1. + int i = destination.Length - 1; + destination[i] = PorterDuffFunctions.NormalDest(background[i], source[i], amount); + } + } + else { - destination[i] = PorterDuffFunctions.NormalDest(background[i], source[i], amount); + for (int i = 0; i < destination.Length; i++) + { + destination[i] = PorterDuffFunctions.NormalDest(background[i], source[i], amount); + } } } /// protected override void BlendFunction(Span destination, ReadOnlySpan background, ReadOnlySpan source, ReadOnlySpan amount) { - for (int i = 0; i < destination.Length; i++) + if (Avx2.IsSupported && destination.Length >= 2) + { + // Divide by 2 as 4 elements per Vector4 and 8 per Vector256 + ref Vector256 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); + ref Vector256 destinationLast = ref Unsafe.Add(ref destinationBase, (IntPtr)((uint)destination.Length / 2u)); + + ref Vector256 backgroundBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(background)); + ref Vector256 sourceBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(source)); + ref float amountBase = ref MemoryMarshal.GetReference(amount); + + Vector256 vOne = Vector256.Create(1F); + + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) + { + // We need to create a Vector256 containing the current and next amount values + // taking up each half of the Vector256 and then clamp them. + Vector256 opacity = Vector256.Create( + Vector128.Create(amountBase), + Vector128.Create(Unsafe.Add(ref amountBase, 1))); + opacity = Avx.Min(Avx.Max(Vector256.Zero, opacity), vOne); + + destinationBase = PorterDuffFunctions.NormalDest(backgroundBase, sourceBase, opacity); + destinationBase = ref Unsafe.Add(ref destinationBase, 1); + backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); + sourceBase = ref Unsafe.Add(ref sourceBase, 1); + amountBase = ref Unsafe.Add(ref amountBase, 2); + } + + if (Numerics.Modulo2(destination.Length) != 0) + { + // Vector4 fits neatly in pairs. Any overlap has to be equal to 1. + int i = destination.Length - 1; + destination[i] = PorterDuffFunctions.NormalDest(background[i], source[i], Numerics.Clamp(amount[i], 0, 1F)); + } + } + else { - destination[i] = PorterDuffFunctions.NormalDest(background[i], source[i], Numerics.Clamp(amount[i], 0, 1)); + for (int i = 0; i < destination.Length; i++) + { + destination[i] = PorterDuffFunctions.NormalDest(background[i], source[i], Numerics.Clamp(amount[i], 0, 1F)); + } } } } @@ -1791,18 +4877,85 @@ internal static class DefaultPixelBlenders protected override void BlendFunction(Span destination, ReadOnlySpan background, ReadOnlySpan source, float amount) { amount = Numerics.Clamp(amount, 0, 1); - for (int i = 0; i < destination.Length; i++) + + if (Avx2.IsSupported && destination.Length >= 2) + { + // Divide by 2 as 4 elements per Vector4 and 8 per Vector256 + ref Vector256 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); + ref Vector256 destinationLast = ref Unsafe.Add(ref destinationBase, (IntPtr)((uint)destination.Length / 2u)); + + ref Vector256 backgroundBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(background)); + ref Vector256 sourceBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(source)); + Vector256 opacity = Vector256.Create(amount); + + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) + { + destinationBase = PorterDuffFunctions.MultiplyDest(backgroundBase, sourceBase, opacity); + destinationBase = ref Unsafe.Add(ref destinationBase, 1); + backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); + sourceBase = ref Unsafe.Add(ref sourceBase, 1); + } + + if (Numerics.Modulo2(destination.Length) != 0) + { + // Vector4 fits neatly in pairs. Any overlap has to be equal to 1. + int i = destination.Length - 1; + destination[i] = PorterDuffFunctions.MultiplyDest(background[i], source[i], amount); + } + } + else { - destination[i] = PorterDuffFunctions.MultiplyDest(background[i], source[i], amount); + for (int i = 0; i < destination.Length; i++) + { + destination[i] = PorterDuffFunctions.MultiplyDest(background[i], source[i], amount); + } } } /// protected override void BlendFunction(Span destination, ReadOnlySpan background, ReadOnlySpan source, ReadOnlySpan amount) { - for (int i = 0; i < destination.Length; i++) + if (Avx2.IsSupported && destination.Length >= 2) { - destination[i] = PorterDuffFunctions.MultiplyDest(background[i], source[i], Numerics.Clamp(amount[i], 0, 1)); + // Divide by 2 as 4 elements per Vector4 and 8 per Vector256 + ref Vector256 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); + ref Vector256 destinationLast = ref Unsafe.Add(ref destinationBase, (IntPtr)((uint)destination.Length / 2u)); + + ref Vector256 backgroundBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(background)); + ref Vector256 sourceBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(source)); + ref float amountBase = ref MemoryMarshal.GetReference(amount); + + Vector256 vOne = Vector256.Create(1F); + + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) + { + // We need to create a Vector256 containing the current and next amount values + // taking up each half of the Vector256 and then clamp them. + Vector256 opacity = Vector256.Create( + Vector128.Create(amountBase), + Vector128.Create(Unsafe.Add(ref amountBase, 1))); + opacity = Avx.Min(Avx.Max(Vector256.Zero, opacity), vOne); + + destinationBase = PorterDuffFunctions.MultiplyDest(backgroundBase, sourceBase, opacity); + destinationBase = ref Unsafe.Add(ref destinationBase, 1); + backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); + sourceBase = ref Unsafe.Add(ref sourceBase, 1); + amountBase = ref Unsafe.Add(ref amountBase, 2); + } + + if (Numerics.Modulo2(destination.Length) != 0) + { + // Vector4 fits neatly in pairs. Any overlap has to be equal to 1. + int i = destination.Length - 1; + destination[i] = PorterDuffFunctions.MultiplyDest(background[i], source[i], Numerics.Clamp(amount[i], 0, 1F)); + } + } + else + { + for (int i = 0; i < destination.Length; i++) + { + destination[i] = PorterDuffFunctions.MultiplyDest(background[i], source[i], Numerics.Clamp(amount[i], 0, 1F)); + } } } } @@ -1829,18 +4982,85 @@ internal static class DefaultPixelBlenders protected override void BlendFunction(Span destination, ReadOnlySpan background, ReadOnlySpan source, float amount) { amount = Numerics.Clamp(amount, 0, 1); - for (int i = 0; i < destination.Length; i++) + + if (Avx2.IsSupported && destination.Length >= 2) + { + // Divide by 2 as 4 elements per Vector4 and 8 per Vector256 + ref Vector256 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); + ref Vector256 destinationLast = ref Unsafe.Add(ref destinationBase, (IntPtr)((uint)destination.Length / 2u)); + + ref Vector256 backgroundBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(background)); + ref Vector256 sourceBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(source)); + Vector256 opacity = Vector256.Create(amount); + + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) + { + destinationBase = PorterDuffFunctions.AddDest(backgroundBase, sourceBase, opacity); + destinationBase = ref Unsafe.Add(ref destinationBase, 1); + backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); + sourceBase = ref Unsafe.Add(ref sourceBase, 1); + } + + if (Numerics.Modulo2(destination.Length) != 0) + { + // Vector4 fits neatly in pairs. Any overlap has to be equal to 1. + int i = destination.Length - 1; + destination[i] = PorterDuffFunctions.AddDest(background[i], source[i], amount); + } + } + else { - destination[i] = PorterDuffFunctions.AddDest(background[i], source[i], amount); + for (int i = 0; i < destination.Length; i++) + { + destination[i] = PorterDuffFunctions.AddDest(background[i], source[i], amount); + } } } /// protected override void BlendFunction(Span destination, ReadOnlySpan background, ReadOnlySpan source, ReadOnlySpan amount) { - for (int i = 0; i < destination.Length; i++) + if (Avx2.IsSupported && destination.Length >= 2) { - destination[i] = PorterDuffFunctions.AddDest(background[i], source[i], Numerics.Clamp(amount[i], 0, 1)); + // Divide by 2 as 4 elements per Vector4 and 8 per Vector256 + ref Vector256 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); + ref Vector256 destinationLast = ref Unsafe.Add(ref destinationBase, (IntPtr)((uint)destination.Length / 2u)); + + ref Vector256 backgroundBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(background)); + ref Vector256 sourceBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(source)); + ref float amountBase = ref MemoryMarshal.GetReference(amount); + + Vector256 vOne = Vector256.Create(1F); + + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) + { + // We need to create a Vector256 containing the current and next amount values + // taking up each half of the Vector256 and then clamp them. + Vector256 opacity = Vector256.Create( + Vector128.Create(amountBase), + Vector128.Create(Unsafe.Add(ref amountBase, 1))); + opacity = Avx.Min(Avx.Max(Vector256.Zero, opacity), vOne); + + destinationBase = PorterDuffFunctions.AddDest(backgroundBase, sourceBase, opacity); + destinationBase = ref Unsafe.Add(ref destinationBase, 1); + backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); + sourceBase = ref Unsafe.Add(ref sourceBase, 1); + amountBase = ref Unsafe.Add(ref amountBase, 2); + } + + if (Numerics.Modulo2(destination.Length) != 0) + { + // Vector4 fits neatly in pairs. Any overlap has to be equal to 1. + int i = destination.Length - 1; + destination[i] = PorterDuffFunctions.AddDest(background[i], source[i], Numerics.Clamp(amount[i], 0, 1F)); + } + } + else + { + for (int i = 0; i < destination.Length; i++) + { + destination[i] = PorterDuffFunctions.AddDest(background[i], source[i], Numerics.Clamp(amount[i], 0, 1F)); + } } } } @@ -1867,18 +5087,85 @@ internal static class DefaultPixelBlenders protected override void BlendFunction(Span destination, ReadOnlySpan background, ReadOnlySpan source, float amount) { amount = Numerics.Clamp(amount, 0, 1); - for (int i = 0; i < destination.Length; i++) + + if (Avx2.IsSupported && destination.Length >= 2) + { + // Divide by 2 as 4 elements per Vector4 and 8 per Vector256 + ref Vector256 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); + ref Vector256 destinationLast = ref Unsafe.Add(ref destinationBase, (IntPtr)((uint)destination.Length / 2u)); + + ref Vector256 backgroundBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(background)); + ref Vector256 sourceBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(source)); + Vector256 opacity = Vector256.Create(amount); + + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) + { + destinationBase = PorterDuffFunctions.SubtractDest(backgroundBase, sourceBase, opacity); + destinationBase = ref Unsafe.Add(ref destinationBase, 1); + backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); + sourceBase = ref Unsafe.Add(ref sourceBase, 1); + } + + if (Numerics.Modulo2(destination.Length) != 0) + { + // Vector4 fits neatly in pairs. Any overlap has to be equal to 1. + int i = destination.Length - 1; + destination[i] = PorterDuffFunctions.SubtractDest(background[i], source[i], amount); + } + } + else { - destination[i] = PorterDuffFunctions.SubtractDest(background[i], source[i], amount); + for (int i = 0; i < destination.Length; i++) + { + destination[i] = PorterDuffFunctions.SubtractDest(background[i], source[i], amount); + } } } /// protected override void BlendFunction(Span destination, ReadOnlySpan background, ReadOnlySpan source, ReadOnlySpan amount) { - for (int i = 0; i < destination.Length; i++) + if (Avx2.IsSupported && destination.Length >= 2) { - destination[i] = PorterDuffFunctions.SubtractDest(background[i], source[i], Numerics.Clamp(amount[i], 0, 1)); + // Divide by 2 as 4 elements per Vector4 and 8 per Vector256 + ref Vector256 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); + ref Vector256 destinationLast = ref Unsafe.Add(ref destinationBase, (IntPtr)((uint)destination.Length / 2u)); + + ref Vector256 backgroundBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(background)); + ref Vector256 sourceBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(source)); + ref float amountBase = ref MemoryMarshal.GetReference(amount); + + Vector256 vOne = Vector256.Create(1F); + + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) + { + // We need to create a Vector256 containing the current and next amount values + // taking up each half of the Vector256 and then clamp them. + Vector256 opacity = Vector256.Create( + Vector128.Create(amountBase), + Vector128.Create(Unsafe.Add(ref amountBase, 1))); + opacity = Avx.Min(Avx.Max(Vector256.Zero, opacity), vOne); + + destinationBase = PorterDuffFunctions.SubtractDest(backgroundBase, sourceBase, opacity); + destinationBase = ref Unsafe.Add(ref destinationBase, 1); + backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); + sourceBase = ref Unsafe.Add(ref sourceBase, 1); + amountBase = ref Unsafe.Add(ref amountBase, 2); + } + + if (Numerics.Modulo2(destination.Length) != 0) + { + // Vector4 fits neatly in pairs. Any overlap has to be equal to 1. + int i = destination.Length - 1; + destination[i] = PorterDuffFunctions.SubtractDest(background[i], source[i], Numerics.Clamp(amount[i], 0, 1F)); + } + } + else + { + for (int i = 0; i < destination.Length; i++) + { + destination[i] = PorterDuffFunctions.SubtractDest(background[i], source[i], Numerics.Clamp(amount[i], 0, 1F)); + } } } } @@ -1905,18 +5192,85 @@ internal static class DefaultPixelBlenders protected override void BlendFunction(Span destination, ReadOnlySpan background, ReadOnlySpan source, float amount) { amount = Numerics.Clamp(amount, 0, 1); - for (int i = 0; i < destination.Length; i++) + + if (Avx2.IsSupported && destination.Length >= 2) { - destination[i] = PorterDuffFunctions.ScreenDest(background[i], source[i], amount); + // Divide by 2 as 4 elements per Vector4 and 8 per Vector256 + ref Vector256 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); + ref Vector256 destinationLast = ref Unsafe.Add(ref destinationBase, (IntPtr)((uint)destination.Length / 2u)); + + ref Vector256 backgroundBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(background)); + ref Vector256 sourceBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(source)); + Vector256 opacity = Vector256.Create(amount); + + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) + { + destinationBase = PorterDuffFunctions.ScreenDest(backgroundBase, sourceBase, opacity); + destinationBase = ref Unsafe.Add(ref destinationBase, 1); + backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); + sourceBase = ref Unsafe.Add(ref sourceBase, 1); + } + + if (Numerics.Modulo2(destination.Length) != 0) + { + // Vector4 fits neatly in pairs. Any overlap has to be equal to 1. + int i = destination.Length - 1; + destination[i] = PorterDuffFunctions.ScreenDest(background[i], source[i], amount); + } + } + else + { + for (int i = 0; i < destination.Length; i++) + { + destination[i] = PorterDuffFunctions.ScreenDest(background[i], source[i], amount); + } } } /// protected override void BlendFunction(Span destination, ReadOnlySpan background, ReadOnlySpan source, ReadOnlySpan amount) { - for (int i = 0; i < destination.Length; i++) + if (Avx2.IsSupported && destination.Length >= 2) + { + // Divide by 2 as 4 elements per Vector4 and 8 per Vector256 + ref Vector256 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); + ref Vector256 destinationLast = ref Unsafe.Add(ref destinationBase, (IntPtr)((uint)destination.Length / 2u)); + + ref Vector256 backgroundBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(background)); + ref Vector256 sourceBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(source)); + ref float amountBase = ref MemoryMarshal.GetReference(amount); + + Vector256 vOne = Vector256.Create(1F); + + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) + { + // We need to create a Vector256 containing the current and next amount values + // taking up each half of the Vector256 and then clamp them. + Vector256 opacity = Vector256.Create( + Vector128.Create(amountBase), + Vector128.Create(Unsafe.Add(ref amountBase, 1))); + opacity = Avx.Min(Avx.Max(Vector256.Zero, opacity), vOne); + + destinationBase = PorterDuffFunctions.ScreenDest(backgroundBase, sourceBase, opacity); + destinationBase = ref Unsafe.Add(ref destinationBase, 1); + backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); + sourceBase = ref Unsafe.Add(ref sourceBase, 1); + amountBase = ref Unsafe.Add(ref amountBase, 2); + } + + if (Numerics.Modulo2(destination.Length) != 0) + { + // Vector4 fits neatly in pairs. Any overlap has to be equal to 1. + int i = destination.Length - 1; + destination[i] = PorterDuffFunctions.ScreenDest(background[i], source[i], Numerics.Clamp(amount[i], 0, 1F)); + } + } + else { - destination[i] = PorterDuffFunctions.ScreenDest(background[i], source[i], Numerics.Clamp(amount[i], 0, 1)); + for (int i = 0; i < destination.Length; i++) + { + destination[i] = PorterDuffFunctions.ScreenDest(background[i], source[i], Numerics.Clamp(amount[i], 0, 1F)); + } } } } @@ -1943,18 +5297,85 @@ internal static class DefaultPixelBlenders protected override void BlendFunction(Span destination, ReadOnlySpan background, ReadOnlySpan source, float amount) { amount = Numerics.Clamp(amount, 0, 1); - for (int i = 0; i < destination.Length; i++) + + if (Avx2.IsSupported && destination.Length >= 2) + { + // Divide by 2 as 4 elements per Vector4 and 8 per Vector256 + ref Vector256 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); + ref Vector256 destinationLast = ref Unsafe.Add(ref destinationBase, (IntPtr)((uint)destination.Length / 2u)); + + ref Vector256 backgroundBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(background)); + ref Vector256 sourceBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(source)); + Vector256 opacity = Vector256.Create(amount); + + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) + { + destinationBase = PorterDuffFunctions.DarkenDest(backgroundBase, sourceBase, opacity); + destinationBase = ref Unsafe.Add(ref destinationBase, 1); + backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); + sourceBase = ref Unsafe.Add(ref sourceBase, 1); + } + + if (Numerics.Modulo2(destination.Length) != 0) + { + // Vector4 fits neatly in pairs. Any overlap has to be equal to 1. + int i = destination.Length - 1; + destination[i] = PorterDuffFunctions.DarkenDest(background[i], source[i], amount); + } + } + else { - destination[i] = PorterDuffFunctions.DarkenDest(background[i], source[i], amount); + for (int i = 0; i < destination.Length; i++) + { + destination[i] = PorterDuffFunctions.DarkenDest(background[i], source[i], amount); + } } } /// protected override void BlendFunction(Span destination, ReadOnlySpan background, ReadOnlySpan source, ReadOnlySpan amount) { - for (int i = 0; i < destination.Length; i++) + if (Avx2.IsSupported && destination.Length >= 2) + { + // Divide by 2 as 4 elements per Vector4 and 8 per Vector256 + ref Vector256 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); + ref Vector256 destinationLast = ref Unsafe.Add(ref destinationBase, (IntPtr)((uint)destination.Length / 2u)); + + ref Vector256 backgroundBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(background)); + ref Vector256 sourceBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(source)); + ref float amountBase = ref MemoryMarshal.GetReference(amount); + + Vector256 vOne = Vector256.Create(1F); + + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) + { + // We need to create a Vector256 containing the current and next amount values + // taking up each half of the Vector256 and then clamp them. + Vector256 opacity = Vector256.Create( + Vector128.Create(amountBase), + Vector128.Create(Unsafe.Add(ref amountBase, 1))); + opacity = Avx.Min(Avx.Max(Vector256.Zero, opacity), vOne); + + destinationBase = PorterDuffFunctions.DarkenDest(backgroundBase, sourceBase, opacity); + destinationBase = ref Unsafe.Add(ref destinationBase, 1); + backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); + sourceBase = ref Unsafe.Add(ref sourceBase, 1); + amountBase = ref Unsafe.Add(ref amountBase, 2); + } + + if (Numerics.Modulo2(destination.Length) != 0) + { + // Vector4 fits neatly in pairs. Any overlap has to be equal to 1. + int i = destination.Length - 1; + destination[i] = PorterDuffFunctions.DarkenDest(background[i], source[i], Numerics.Clamp(amount[i], 0, 1F)); + } + } + else { - destination[i] = PorterDuffFunctions.DarkenDest(background[i], source[i], Numerics.Clamp(amount[i], 0, 1)); + for (int i = 0; i < destination.Length; i++) + { + destination[i] = PorterDuffFunctions.DarkenDest(background[i], source[i], Numerics.Clamp(amount[i], 0, 1F)); + } } } } @@ -1981,18 +5402,85 @@ internal static class DefaultPixelBlenders protected override void BlendFunction(Span destination, ReadOnlySpan background, ReadOnlySpan source, float amount) { amount = Numerics.Clamp(amount, 0, 1); - for (int i = 0; i < destination.Length; i++) + + if (Avx2.IsSupported && destination.Length >= 2) + { + // Divide by 2 as 4 elements per Vector4 and 8 per Vector256 + ref Vector256 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); + ref Vector256 destinationLast = ref Unsafe.Add(ref destinationBase, (IntPtr)((uint)destination.Length / 2u)); + + ref Vector256 backgroundBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(background)); + ref Vector256 sourceBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(source)); + Vector256 opacity = Vector256.Create(amount); + + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) + { + destinationBase = PorterDuffFunctions.LightenDest(backgroundBase, sourceBase, opacity); + destinationBase = ref Unsafe.Add(ref destinationBase, 1); + backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); + sourceBase = ref Unsafe.Add(ref sourceBase, 1); + } + + if (Numerics.Modulo2(destination.Length) != 0) + { + // Vector4 fits neatly in pairs. Any overlap has to be equal to 1. + int i = destination.Length - 1; + destination[i] = PorterDuffFunctions.LightenDest(background[i], source[i], amount); + } + } + else { - destination[i] = PorterDuffFunctions.LightenDest(background[i], source[i], amount); + for (int i = 0; i < destination.Length; i++) + { + destination[i] = PorterDuffFunctions.LightenDest(background[i], source[i], amount); + } } } /// protected override void BlendFunction(Span destination, ReadOnlySpan background, ReadOnlySpan source, ReadOnlySpan amount) { - for (int i = 0; i < destination.Length; i++) + if (Avx2.IsSupported && destination.Length >= 2) + { + // Divide by 2 as 4 elements per Vector4 and 8 per Vector256 + ref Vector256 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); + ref Vector256 destinationLast = ref Unsafe.Add(ref destinationBase, (IntPtr)((uint)destination.Length / 2u)); + + ref Vector256 backgroundBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(background)); + ref Vector256 sourceBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(source)); + ref float amountBase = ref MemoryMarshal.GetReference(amount); + + Vector256 vOne = Vector256.Create(1F); + + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) + { + // We need to create a Vector256 containing the current and next amount values + // taking up each half of the Vector256 and then clamp them. + Vector256 opacity = Vector256.Create( + Vector128.Create(amountBase), + Vector128.Create(Unsafe.Add(ref amountBase, 1))); + opacity = Avx.Min(Avx.Max(Vector256.Zero, opacity), vOne); + + destinationBase = PorterDuffFunctions.LightenDest(backgroundBase, sourceBase, opacity); + destinationBase = ref Unsafe.Add(ref destinationBase, 1); + backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); + sourceBase = ref Unsafe.Add(ref sourceBase, 1); + amountBase = ref Unsafe.Add(ref amountBase, 2); + } + + if (Numerics.Modulo2(destination.Length) != 0) + { + // Vector4 fits neatly in pairs. Any overlap has to be equal to 1. + int i = destination.Length - 1; + destination[i] = PorterDuffFunctions.LightenDest(background[i], source[i], Numerics.Clamp(amount[i], 0, 1F)); + } + } + else { - destination[i] = PorterDuffFunctions.LightenDest(background[i], source[i], Numerics.Clamp(amount[i], 0, 1)); + for (int i = 0; i < destination.Length; i++) + { + destination[i] = PorterDuffFunctions.LightenDest(background[i], source[i], Numerics.Clamp(amount[i], 0, 1F)); + } } } } @@ -2019,18 +5507,85 @@ internal static class DefaultPixelBlenders protected override void BlendFunction(Span destination, ReadOnlySpan background, ReadOnlySpan source, float amount) { amount = Numerics.Clamp(amount, 0, 1); - for (int i = 0; i < destination.Length; i++) + + if (Avx2.IsSupported && destination.Length >= 2) + { + // Divide by 2 as 4 elements per Vector4 and 8 per Vector256 + ref Vector256 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); + ref Vector256 destinationLast = ref Unsafe.Add(ref destinationBase, (IntPtr)((uint)destination.Length / 2u)); + + ref Vector256 backgroundBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(background)); + ref Vector256 sourceBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(source)); + Vector256 opacity = Vector256.Create(amount); + + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) + { + destinationBase = PorterDuffFunctions.OverlayDest(backgroundBase, sourceBase, opacity); + destinationBase = ref Unsafe.Add(ref destinationBase, 1); + backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); + sourceBase = ref Unsafe.Add(ref sourceBase, 1); + } + + if (Numerics.Modulo2(destination.Length) != 0) + { + // Vector4 fits neatly in pairs. Any overlap has to be equal to 1. + int i = destination.Length - 1; + destination[i] = PorterDuffFunctions.OverlayDest(background[i], source[i], amount); + } + } + else { - destination[i] = PorterDuffFunctions.OverlayDest(background[i], source[i], amount); + for (int i = 0; i < destination.Length; i++) + { + destination[i] = PorterDuffFunctions.OverlayDest(background[i], source[i], amount); + } } } /// protected override void BlendFunction(Span destination, ReadOnlySpan background, ReadOnlySpan source, ReadOnlySpan amount) { - for (int i = 0; i < destination.Length; i++) + if (Avx2.IsSupported && destination.Length >= 2) + { + // Divide by 2 as 4 elements per Vector4 and 8 per Vector256 + ref Vector256 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); + ref Vector256 destinationLast = ref Unsafe.Add(ref destinationBase, (IntPtr)((uint)destination.Length / 2u)); + + ref Vector256 backgroundBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(background)); + ref Vector256 sourceBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(source)); + ref float amountBase = ref MemoryMarshal.GetReference(amount); + + Vector256 vOne = Vector256.Create(1F); + + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) + { + // We need to create a Vector256 containing the current and next amount values + // taking up each half of the Vector256 and then clamp them. + Vector256 opacity = Vector256.Create( + Vector128.Create(amountBase), + Vector128.Create(Unsafe.Add(ref amountBase, 1))); + opacity = Avx.Min(Avx.Max(Vector256.Zero, opacity), vOne); + + destinationBase = PorterDuffFunctions.OverlayDest(backgroundBase, sourceBase, opacity); + destinationBase = ref Unsafe.Add(ref destinationBase, 1); + backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); + sourceBase = ref Unsafe.Add(ref sourceBase, 1); + amountBase = ref Unsafe.Add(ref amountBase, 2); + } + + if (Numerics.Modulo2(destination.Length) != 0) + { + // Vector4 fits neatly in pairs. Any overlap has to be equal to 1. + int i = destination.Length - 1; + destination[i] = PorterDuffFunctions.OverlayDest(background[i], source[i], Numerics.Clamp(amount[i], 0, 1F)); + } + } + else { - destination[i] = PorterDuffFunctions.OverlayDest(background[i], source[i], Numerics.Clamp(amount[i], 0, 1)); + for (int i = 0; i < destination.Length; i++) + { + destination[i] = PorterDuffFunctions.OverlayDest(background[i], source[i], Numerics.Clamp(amount[i], 0, 1F)); + } } } } @@ -2057,18 +5612,85 @@ internal static class DefaultPixelBlenders protected override void BlendFunction(Span destination, ReadOnlySpan background, ReadOnlySpan source, float amount) { amount = Numerics.Clamp(amount, 0, 1); - for (int i = 0; i < destination.Length; i++) + + if (Avx2.IsSupported && destination.Length >= 2) + { + // Divide by 2 as 4 elements per Vector4 and 8 per Vector256 + ref Vector256 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); + ref Vector256 destinationLast = ref Unsafe.Add(ref destinationBase, (IntPtr)((uint)destination.Length / 2u)); + + ref Vector256 backgroundBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(background)); + ref Vector256 sourceBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(source)); + Vector256 opacity = Vector256.Create(amount); + + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) + { + destinationBase = PorterDuffFunctions.HardLightDest(backgroundBase, sourceBase, opacity); + destinationBase = ref Unsafe.Add(ref destinationBase, 1); + backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); + sourceBase = ref Unsafe.Add(ref sourceBase, 1); + } + + if (Numerics.Modulo2(destination.Length) != 0) + { + // Vector4 fits neatly in pairs. Any overlap has to be equal to 1. + int i = destination.Length - 1; + destination[i] = PorterDuffFunctions.HardLightDest(background[i], source[i], amount); + } + } + else { - destination[i] = PorterDuffFunctions.HardLightDest(background[i], source[i], amount); + for (int i = 0; i < destination.Length; i++) + { + destination[i] = PorterDuffFunctions.HardLightDest(background[i], source[i], amount); + } } } /// protected override void BlendFunction(Span destination, ReadOnlySpan background, ReadOnlySpan source, ReadOnlySpan amount) { - for (int i = 0; i < destination.Length; i++) + if (Avx2.IsSupported && destination.Length >= 2) + { + // Divide by 2 as 4 elements per Vector4 and 8 per Vector256 + ref Vector256 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); + ref Vector256 destinationLast = ref Unsafe.Add(ref destinationBase, (IntPtr)((uint)destination.Length / 2u)); + + ref Vector256 backgroundBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(background)); + ref Vector256 sourceBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(source)); + ref float amountBase = ref MemoryMarshal.GetReference(amount); + + Vector256 vOne = Vector256.Create(1F); + + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) + { + // We need to create a Vector256 containing the current and next amount values + // taking up each half of the Vector256 and then clamp them. + Vector256 opacity = Vector256.Create( + Vector128.Create(amountBase), + Vector128.Create(Unsafe.Add(ref amountBase, 1))); + opacity = Avx.Min(Avx.Max(Vector256.Zero, opacity), vOne); + + destinationBase = PorterDuffFunctions.HardLightDest(backgroundBase, sourceBase, opacity); + destinationBase = ref Unsafe.Add(ref destinationBase, 1); + backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); + sourceBase = ref Unsafe.Add(ref sourceBase, 1); + amountBase = ref Unsafe.Add(ref amountBase, 2); + } + + if (Numerics.Modulo2(destination.Length) != 0) + { + // Vector4 fits neatly in pairs. Any overlap has to be equal to 1. + int i = destination.Length - 1; + destination[i] = PorterDuffFunctions.HardLightDest(background[i], source[i], Numerics.Clamp(amount[i], 0, 1F)); + } + } + else { - destination[i] = PorterDuffFunctions.HardLightDest(background[i], source[i], Numerics.Clamp(amount[i], 0, 1)); + for (int i = 0; i < destination.Length; i++) + { + destination[i] = PorterDuffFunctions.HardLightDest(background[i], source[i], Numerics.Clamp(amount[i], 0, 1F)); + } } } } @@ -2095,18 +5717,85 @@ internal static class DefaultPixelBlenders protected override void BlendFunction(Span destination, ReadOnlySpan background, ReadOnlySpan source, float amount) { amount = Numerics.Clamp(amount, 0, 1); - for (int i = 0; i < destination.Length; i++) + + if (Avx2.IsSupported && destination.Length >= 2) + { + // Divide by 2 as 4 elements per Vector4 and 8 per Vector256 + ref Vector256 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); + ref Vector256 destinationLast = ref Unsafe.Add(ref destinationBase, (IntPtr)((uint)destination.Length / 2u)); + + ref Vector256 backgroundBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(background)); + ref Vector256 sourceBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(source)); + Vector256 opacity = Vector256.Create(amount); + + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) + { + destinationBase = PorterDuffFunctions.NormalDestAtop(backgroundBase, sourceBase, opacity); + destinationBase = ref Unsafe.Add(ref destinationBase, 1); + backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); + sourceBase = ref Unsafe.Add(ref sourceBase, 1); + } + + if (Numerics.Modulo2(destination.Length) != 0) + { + // Vector4 fits neatly in pairs. Any overlap has to be equal to 1. + int i = destination.Length - 1; + destination[i] = PorterDuffFunctions.NormalDestAtop(background[i], source[i], amount); + } + } + else { - destination[i] = PorterDuffFunctions.NormalDestAtop(background[i], source[i], amount); + for (int i = 0; i < destination.Length; i++) + { + destination[i] = PorterDuffFunctions.NormalDestAtop(background[i], source[i], amount); + } } } /// protected override void BlendFunction(Span destination, ReadOnlySpan background, ReadOnlySpan source, ReadOnlySpan amount) { - for (int i = 0; i < destination.Length; i++) + if (Avx2.IsSupported && destination.Length >= 2) + { + // Divide by 2 as 4 elements per Vector4 and 8 per Vector256 + ref Vector256 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); + ref Vector256 destinationLast = ref Unsafe.Add(ref destinationBase, (IntPtr)((uint)destination.Length / 2u)); + + ref Vector256 backgroundBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(background)); + ref Vector256 sourceBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(source)); + ref float amountBase = ref MemoryMarshal.GetReference(amount); + + Vector256 vOne = Vector256.Create(1F); + + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) + { + // We need to create a Vector256 containing the current and next amount values + // taking up each half of the Vector256 and then clamp them. + Vector256 opacity = Vector256.Create( + Vector128.Create(amountBase), + Vector128.Create(Unsafe.Add(ref amountBase, 1))); + opacity = Avx.Min(Avx.Max(Vector256.Zero, opacity), vOne); + + destinationBase = PorterDuffFunctions.NormalDestAtop(backgroundBase, sourceBase, opacity); + destinationBase = ref Unsafe.Add(ref destinationBase, 1); + backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); + sourceBase = ref Unsafe.Add(ref sourceBase, 1); + amountBase = ref Unsafe.Add(ref amountBase, 2); + } + + if (Numerics.Modulo2(destination.Length) != 0) + { + // Vector4 fits neatly in pairs. Any overlap has to be equal to 1. + int i = destination.Length - 1; + destination[i] = PorterDuffFunctions.NormalDestAtop(background[i], source[i], Numerics.Clamp(amount[i], 0, 1F)); + } + } + else { - destination[i] = PorterDuffFunctions.NormalDestAtop(background[i], source[i], Numerics.Clamp(amount[i], 0, 1)); + for (int i = 0; i < destination.Length; i++) + { + destination[i] = PorterDuffFunctions.NormalDestAtop(background[i], source[i], Numerics.Clamp(amount[i], 0, 1F)); + } } } } @@ -2133,18 +5822,85 @@ internal static class DefaultPixelBlenders protected override void BlendFunction(Span destination, ReadOnlySpan background, ReadOnlySpan source, float amount) { amount = Numerics.Clamp(amount, 0, 1); - for (int i = 0; i < destination.Length; i++) + + if (Avx2.IsSupported && destination.Length >= 2) + { + // Divide by 2 as 4 elements per Vector4 and 8 per Vector256 + ref Vector256 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); + ref Vector256 destinationLast = ref Unsafe.Add(ref destinationBase, (IntPtr)((uint)destination.Length / 2u)); + + ref Vector256 backgroundBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(background)); + ref Vector256 sourceBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(source)); + Vector256 opacity = Vector256.Create(amount); + + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) + { + destinationBase = PorterDuffFunctions.MultiplyDestAtop(backgroundBase, sourceBase, opacity); + destinationBase = ref Unsafe.Add(ref destinationBase, 1); + backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); + sourceBase = ref Unsafe.Add(ref sourceBase, 1); + } + + if (Numerics.Modulo2(destination.Length) != 0) + { + // Vector4 fits neatly in pairs. Any overlap has to be equal to 1. + int i = destination.Length - 1; + destination[i] = PorterDuffFunctions.MultiplyDestAtop(background[i], source[i], amount); + } + } + else { - destination[i] = PorterDuffFunctions.MultiplyDestAtop(background[i], source[i], amount); + for (int i = 0; i < destination.Length; i++) + { + destination[i] = PorterDuffFunctions.MultiplyDestAtop(background[i], source[i], amount); + } } } /// protected override void BlendFunction(Span destination, ReadOnlySpan background, ReadOnlySpan source, ReadOnlySpan amount) { - for (int i = 0; i < destination.Length; i++) + if (Avx2.IsSupported && destination.Length >= 2) + { + // Divide by 2 as 4 elements per Vector4 and 8 per Vector256 + ref Vector256 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); + ref Vector256 destinationLast = ref Unsafe.Add(ref destinationBase, (IntPtr)((uint)destination.Length / 2u)); + + ref Vector256 backgroundBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(background)); + ref Vector256 sourceBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(source)); + ref float amountBase = ref MemoryMarshal.GetReference(amount); + + Vector256 vOne = Vector256.Create(1F); + + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) + { + // We need to create a Vector256 containing the current and next amount values + // taking up each half of the Vector256 and then clamp them. + Vector256 opacity = Vector256.Create( + Vector128.Create(amountBase), + Vector128.Create(Unsafe.Add(ref amountBase, 1))); + opacity = Avx.Min(Avx.Max(Vector256.Zero, opacity), vOne); + + destinationBase = PorterDuffFunctions.MultiplyDestAtop(backgroundBase, sourceBase, opacity); + destinationBase = ref Unsafe.Add(ref destinationBase, 1); + backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); + sourceBase = ref Unsafe.Add(ref sourceBase, 1); + amountBase = ref Unsafe.Add(ref amountBase, 2); + } + + if (Numerics.Modulo2(destination.Length) != 0) + { + // Vector4 fits neatly in pairs. Any overlap has to be equal to 1. + int i = destination.Length - 1; + destination[i] = PorterDuffFunctions.MultiplyDestAtop(background[i], source[i], Numerics.Clamp(amount[i], 0, 1F)); + } + } + else { - destination[i] = PorterDuffFunctions.MultiplyDestAtop(background[i], source[i], Numerics.Clamp(amount[i], 0, 1)); + for (int i = 0; i < destination.Length; i++) + { + destination[i] = PorterDuffFunctions.MultiplyDestAtop(background[i], source[i], Numerics.Clamp(amount[i], 0, 1F)); + } } } } @@ -2171,18 +5927,85 @@ internal static class DefaultPixelBlenders protected override void BlendFunction(Span destination, ReadOnlySpan background, ReadOnlySpan source, float amount) { amount = Numerics.Clamp(amount, 0, 1); - for (int i = 0; i < destination.Length; i++) + + if (Avx2.IsSupported && destination.Length >= 2) + { + // Divide by 2 as 4 elements per Vector4 and 8 per Vector256 + ref Vector256 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); + ref Vector256 destinationLast = ref Unsafe.Add(ref destinationBase, (IntPtr)((uint)destination.Length / 2u)); + + ref Vector256 backgroundBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(background)); + ref Vector256 sourceBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(source)); + Vector256 opacity = Vector256.Create(amount); + + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) + { + destinationBase = PorterDuffFunctions.AddDestAtop(backgroundBase, sourceBase, opacity); + destinationBase = ref Unsafe.Add(ref destinationBase, 1); + backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); + sourceBase = ref Unsafe.Add(ref sourceBase, 1); + } + + if (Numerics.Modulo2(destination.Length) != 0) + { + // Vector4 fits neatly in pairs. Any overlap has to be equal to 1. + int i = destination.Length - 1; + destination[i] = PorterDuffFunctions.AddDestAtop(background[i], source[i], amount); + } + } + else { - destination[i] = PorterDuffFunctions.AddDestAtop(background[i], source[i], amount); + for (int i = 0; i < destination.Length; i++) + { + destination[i] = PorterDuffFunctions.AddDestAtop(background[i], source[i], amount); + } } } /// protected override void BlendFunction(Span destination, ReadOnlySpan background, ReadOnlySpan source, ReadOnlySpan amount) { - for (int i = 0; i < destination.Length; i++) + if (Avx2.IsSupported && destination.Length >= 2) { - destination[i] = PorterDuffFunctions.AddDestAtop(background[i], source[i], Numerics.Clamp(amount[i], 0, 1)); + // Divide by 2 as 4 elements per Vector4 and 8 per Vector256 + ref Vector256 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); + ref Vector256 destinationLast = ref Unsafe.Add(ref destinationBase, (IntPtr)((uint)destination.Length / 2u)); + + ref Vector256 backgroundBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(background)); + ref Vector256 sourceBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(source)); + ref float amountBase = ref MemoryMarshal.GetReference(amount); + + Vector256 vOne = Vector256.Create(1F); + + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) + { + // We need to create a Vector256 containing the current and next amount values + // taking up each half of the Vector256 and then clamp them. + Vector256 opacity = Vector256.Create( + Vector128.Create(amountBase), + Vector128.Create(Unsafe.Add(ref amountBase, 1))); + opacity = Avx.Min(Avx.Max(Vector256.Zero, opacity), vOne); + + destinationBase = PorterDuffFunctions.AddDestAtop(backgroundBase, sourceBase, opacity); + destinationBase = ref Unsafe.Add(ref destinationBase, 1); + backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); + sourceBase = ref Unsafe.Add(ref sourceBase, 1); + amountBase = ref Unsafe.Add(ref amountBase, 2); + } + + if (Numerics.Modulo2(destination.Length) != 0) + { + // Vector4 fits neatly in pairs. Any overlap has to be equal to 1. + int i = destination.Length - 1; + destination[i] = PorterDuffFunctions.AddDestAtop(background[i], source[i], Numerics.Clamp(amount[i], 0, 1F)); + } + } + else + { + for (int i = 0; i < destination.Length; i++) + { + destination[i] = PorterDuffFunctions.AddDestAtop(background[i], source[i], Numerics.Clamp(amount[i], 0, 1F)); + } } } } @@ -2209,18 +6032,85 @@ internal static class DefaultPixelBlenders protected override void BlendFunction(Span destination, ReadOnlySpan background, ReadOnlySpan source, float amount) { amount = Numerics.Clamp(amount, 0, 1); - for (int i = 0; i < destination.Length; i++) + + if (Avx2.IsSupported && destination.Length >= 2) { - destination[i] = PorterDuffFunctions.SubtractDestAtop(background[i], source[i], amount); + // Divide by 2 as 4 elements per Vector4 and 8 per Vector256 + ref Vector256 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); + ref Vector256 destinationLast = ref Unsafe.Add(ref destinationBase, (IntPtr)((uint)destination.Length / 2u)); + + ref Vector256 backgroundBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(background)); + ref Vector256 sourceBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(source)); + Vector256 opacity = Vector256.Create(amount); + + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) + { + destinationBase = PorterDuffFunctions.SubtractDestAtop(backgroundBase, sourceBase, opacity); + destinationBase = ref Unsafe.Add(ref destinationBase, 1); + backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); + sourceBase = ref Unsafe.Add(ref sourceBase, 1); + } + + if (Numerics.Modulo2(destination.Length) != 0) + { + // Vector4 fits neatly in pairs. Any overlap has to be equal to 1. + int i = destination.Length - 1; + destination[i] = PorterDuffFunctions.SubtractDestAtop(background[i], source[i], amount); + } + } + else + { + for (int i = 0; i < destination.Length; i++) + { + destination[i] = PorterDuffFunctions.SubtractDestAtop(background[i], source[i], amount); + } } } /// protected override void BlendFunction(Span destination, ReadOnlySpan background, ReadOnlySpan source, ReadOnlySpan amount) { - for (int i = 0; i < destination.Length; i++) + if (Avx2.IsSupported && destination.Length >= 2) + { + // Divide by 2 as 4 elements per Vector4 and 8 per Vector256 + ref Vector256 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); + ref Vector256 destinationLast = ref Unsafe.Add(ref destinationBase, (IntPtr)((uint)destination.Length / 2u)); + + ref Vector256 backgroundBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(background)); + ref Vector256 sourceBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(source)); + ref float amountBase = ref MemoryMarshal.GetReference(amount); + + Vector256 vOne = Vector256.Create(1F); + + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) + { + // We need to create a Vector256 containing the current and next amount values + // taking up each half of the Vector256 and then clamp them. + Vector256 opacity = Vector256.Create( + Vector128.Create(amountBase), + Vector128.Create(Unsafe.Add(ref amountBase, 1))); + opacity = Avx.Min(Avx.Max(Vector256.Zero, opacity), vOne); + + destinationBase = PorterDuffFunctions.SubtractDestAtop(backgroundBase, sourceBase, opacity); + destinationBase = ref Unsafe.Add(ref destinationBase, 1); + backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); + sourceBase = ref Unsafe.Add(ref sourceBase, 1); + amountBase = ref Unsafe.Add(ref amountBase, 2); + } + + if (Numerics.Modulo2(destination.Length) != 0) + { + // Vector4 fits neatly in pairs. Any overlap has to be equal to 1. + int i = destination.Length - 1; + destination[i] = PorterDuffFunctions.SubtractDestAtop(background[i], source[i], Numerics.Clamp(amount[i], 0, 1F)); + } + } + else { - destination[i] = PorterDuffFunctions.SubtractDestAtop(background[i], source[i], Numerics.Clamp(amount[i], 0, 1)); + for (int i = 0; i < destination.Length; i++) + { + destination[i] = PorterDuffFunctions.SubtractDestAtop(background[i], source[i], Numerics.Clamp(amount[i], 0, 1F)); + } } } } @@ -2247,18 +6137,85 @@ internal static class DefaultPixelBlenders protected override void BlendFunction(Span destination, ReadOnlySpan background, ReadOnlySpan source, float amount) { amount = Numerics.Clamp(amount, 0, 1); - for (int i = 0; i < destination.Length; i++) + + if (Avx2.IsSupported && destination.Length >= 2) + { + // Divide by 2 as 4 elements per Vector4 and 8 per Vector256 + ref Vector256 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); + ref Vector256 destinationLast = ref Unsafe.Add(ref destinationBase, (IntPtr)((uint)destination.Length / 2u)); + + ref Vector256 backgroundBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(background)); + ref Vector256 sourceBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(source)); + Vector256 opacity = Vector256.Create(amount); + + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) + { + destinationBase = PorterDuffFunctions.ScreenDestAtop(backgroundBase, sourceBase, opacity); + destinationBase = ref Unsafe.Add(ref destinationBase, 1); + backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); + sourceBase = ref Unsafe.Add(ref sourceBase, 1); + } + + if (Numerics.Modulo2(destination.Length) != 0) + { + // Vector4 fits neatly in pairs. Any overlap has to be equal to 1. + int i = destination.Length - 1; + destination[i] = PorterDuffFunctions.ScreenDestAtop(background[i], source[i], amount); + } + } + else { - destination[i] = PorterDuffFunctions.ScreenDestAtop(background[i], source[i], amount); + for (int i = 0; i < destination.Length; i++) + { + destination[i] = PorterDuffFunctions.ScreenDestAtop(background[i], source[i], amount); + } } } /// protected override void BlendFunction(Span destination, ReadOnlySpan background, ReadOnlySpan source, ReadOnlySpan amount) { - for (int i = 0; i < destination.Length; i++) + if (Avx2.IsSupported && destination.Length >= 2) + { + // Divide by 2 as 4 elements per Vector4 and 8 per Vector256 + ref Vector256 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); + ref Vector256 destinationLast = ref Unsafe.Add(ref destinationBase, (IntPtr)((uint)destination.Length / 2u)); + + ref Vector256 backgroundBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(background)); + ref Vector256 sourceBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(source)); + ref float amountBase = ref MemoryMarshal.GetReference(amount); + + Vector256 vOne = Vector256.Create(1F); + + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) + { + // We need to create a Vector256 containing the current and next amount values + // taking up each half of the Vector256 and then clamp them. + Vector256 opacity = Vector256.Create( + Vector128.Create(amountBase), + Vector128.Create(Unsafe.Add(ref amountBase, 1))); + opacity = Avx.Min(Avx.Max(Vector256.Zero, opacity), vOne); + + destinationBase = PorterDuffFunctions.ScreenDestAtop(backgroundBase, sourceBase, opacity); + destinationBase = ref Unsafe.Add(ref destinationBase, 1); + backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); + sourceBase = ref Unsafe.Add(ref sourceBase, 1); + amountBase = ref Unsafe.Add(ref amountBase, 2); + } + + if (Numerics.Modulo2(destination.Length) != 0) + { + // Vector4 fits neatly in pairs. Any overlap has to be equal to 1. + int i = destination.Length - 1; + destination[i] = PorterDuffFunctions.ScreenDestAtop(background[i], source[i], Numerics.Clamp(amount[i], 0, 1F)); + } + } + else { - destination[i] = PorterDuffFunctions.ScreenDestAtop(background[i], source[i], Numerics.Clamp(amount[i], 0, 1)); + for (int i = 0; i < destination.Length; i++) + { + destination[i] = PorterDuffFunctions.ScreenDestAtop(background[i], source[i], Numerics.Clamp(amount[i], 0, 1F)); + } } } } @@ -2285,18 +6242,85 @@ internal static class DefaultPixelBlenders protected override void BlendFunction(Span destination, ReadOnlySpan background, ReadOnlySpan source, float amount) { amount = Numerics.Clamp(amount, 0, 1); - for (int i = 0; i < destination.Length; i++) + + if (Avx2.IsSupported && destination.Length >= 2) + { + // Divide by 2 as 4 elements per Vector4 and 8 per Vector256 + ref Vector256 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); + ref Vector256 destinationLast = ref Unsafe.Add(ref destinationBase, (IntPtr)((uint)destination.Length / 2u)); + + ref Vector256 backgroundBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(background)); + ref Vector256 sourceBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(source)); + Vector256 opacity = Vector256.Create(amount); + + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) + { + destinationBase = PorterDuffFunctions.DarkenDestAtop(backgroundBase, sourceBase, opacity); + destinationBase = ref Unsafe.Add(ref destinationBase, 1); + backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); + sourceBase = ref Unsafe.Add(ref sourceBase, 1); + } + + if (Numerics.Modulo2(destination.Length) != 0) + { + // Vector4 fits neatly in pairs. Any overlap has to be equal to 1. + int i = destination.Length - 1; + destination[i] = PorterDuffFunctions.DarkenDestAtop(background[i], source[i], amount); + } + } + else { - destination[i] = PorterDuffFunctions.DarkenDestAtop(background[i], source[i], amount); + for (int i = 0; i < destination.Length; i++) + { + destination[i] = PorterDuffFunctions.DarkenDestAtop(background[i], source[i], amount); + } } } /// protected override void BlendFunction(Span destination, ReadOnlySpan background, ReadOnlySpan source, ReadOnlySpan amount) { - for (int i = 0; i < destination.Length; i++) + if (Avx2.IsSupported && destination.Length >= 2) + { + // Divide by 2 as 4 elements per Vector4 and 8 per Vector256 + ref Vector256 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); + ref Vector256 destinationLast = ref Unsafe.Add(ref destinationBase, (IntPtr)((uint)destination.Length / 2u)); + + ref Vector256 backgroundBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(background)); + ref Vector256 sourceBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(source)); + ref float amountBase = ref MemoryMarshal.GetReference(amount); + + Vector256 vOne = Vector256.Create(1F); + + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) + { + // We need to create a Vector256 containing the current and next amount values + // taking up each half of the Vector256 and then clamp them. + Vector256 opacity = Vector256.Create( + Vector128.Create(amountBase), + Vector128.Create(Unsafe.Add(ref amountBase, 1))); + opacity = Avx.Min(Avx.Max(Vector256.Zero, opacity), vOne); + + destinationBase = PorterDuffFunctions.DarkenDestAtop(backgroundBase, sourceBase, opacity); + destinationBase = ref Unsafe.Add(ref destinationBase, 1); + backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); + sourceBase = ref Unsafe.Add(ref sourceBase, 1); + amountBase = ref Unsafe.Add(ref amountBase, 2); + } + + if (Numerics.Modulo2(destination.Length) != 0) + { + // Vector4 fits neatly in pairs. Any overlap has to be equal to 1. + int i = destination.Length - 1; + destination[i] = PorterDuffFunctions.DarkenDestAtop(background[i], source[i], Numerics.Clamp(amount[i], 0, 1F)); + } + } + else { - destination[i] = PorterDuffFunctions.DarkenDestAtop(background[i], source[i], Numerics.Clamp(amount[i], 0, 1)); + for (int i = 0; i < destination.Length; i++) + { + destination[i] = PorterDuffFunctions.DarkenDestAtop(background[i], source[i], Numerics.Clamp(amount[i], 0, 1F)); + } } } } @@ -2323,18 +6347,85 @@ internal static class DefaultPixelBlenders protected override void BlendFunction(Span destination, ReadOnlySpan background, ReadOnlySpan source, float amount) { amount = Numerics.Clamp(amount, 0, 1); - for (int i = 0; i < destination.Length; i++) + + if (Avx2.IsSupported && destination.Length >= 2) + { + // Divide by 2 as 4 elements per Vector4 and 8 per Vector256 + ref Vector256 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); + ref Vector256 destinationLast = ref Unsafe.Add(ref destinationBase, (IntPtr)((uint)destination.Length / 2u)); + + ref Vector256 backgroundBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(background)); + ref Vector256 sourceBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(source)); + Vector256 opacity = Vector256.Create(amount); + + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) + { + destinationBase = PorterDuffFunctions.LightenDestAtop(backgroundBase, sourceBase, opacity); + destinationBase = ref Unsafe.Add(ref destinationBase, 1); + backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); + sourceBase = ref Unsafe.Add(ref sourceBase, 1); + } + + if (Numerics.Modulo2(destination.Length) != 0) + { + // Vector4 fits neatly in pairs. Any overlap has to be equal to 1. + int i = destination.Length - 1; + destination[i] = PorterDuffFunctions.LightenDestAtop(background[i], source[i], amount); + } + } + else { - destination[i] = PorterDuffFunctions.LightenDestAtop(background[i], source[i], amount); + for (int i = 0; i < destination.Length; i++) + { + destination[i] = PorterDuffFunctions.LightenDestAtop(background[i], source[i], amount); + } } } /// protected override void BlendFunction(Span destination, ReadOnlySpan background, ReadOnlySpan source, ReadOnlySpan amount) { - for (int i = 0; i < destination.Length; i++) + if (Avx2.IsSupported && destination.Length >= 2) + { + // Divide by 2 as 4 elements per Vector4 and 8 per Vector256 + ref Vector256 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); + ref Vector256 destinationLast = ref Unsafe.Add(ref destinationBase, (IntPtr)((uint)destination.Length / 2u)); + + ref Vector256 backgroundBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(background)); + ref Vector256 sourceBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(source)); + ref float amountBase = ref MemoryMarshal.GetReference(amount); + + Vector256 vOne = Vector256.Create(1F); + + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) + { + // We need to create a Vector256 containing the current and next amount values + // taking up each half of the Vector256 and then clamp them. + Vector256 opacity = Vector256.Create( + Vector128.Create(amountBase), + Vector128.Create(Unsafe.Add(ref amountBase, 1))); + opacity = Avx.Min(Avx.Max(Vector256.Zero, opacity), vOne); + + destinationBase = PorterDuffFunctions.LightenDestAtop(backgroundBase, sourceBase, opacity); + destinationBase = ref Unsafe.Add(ref destinationBase, 1); + backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); + sourceBase = ref Unsafe.Add(ref sourceBase, 1); + amountBase = ref Unsafe.Add(ref amountBase, 2); + } + + if (Numerics.Modulo2(destination.Length) != 0) + { + // Vector4 fits neatly in pairs. Any overlap has to be equal to 1. + int i = destination.Length - 1; + destination[i] = PorterDuffFunctions.LightenDestAtop(background[i], source[i], Numerics.Clamp(amount[i], 0, 1F)); + } + } + else { - destination[i] = PorterDuffFunctions.LightenDestAtop(background[i], source[i], Numerics.Clamp(amount[i], 0, 1)); + for (int i = 0; i < destination.Length; i++) + { + destination[i] = PorterDuffFunctions.LightenDestAtop(background[i], source[i], Numerics.Clamp(amount[i], 0, 1F)); + } } } } @@ -2361,18 +6452,85 @@ internal static class DefaultPixelBlenders protected override void BlendFunction(Span destination, ReadOnlySpan background, ReadOnlySpan source, float amount) { amount = Numerics.Clamp(amount, 0, 1); - for (int i = 0; i < destination.Length; i++) + + if (Avx2.IsSupported && destination.Length >= 2) + { + // Divide by 2 as 4 elements per Vector4 and 8 per Vector256 + ref Vector256 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); + ref Vector256 destinationLast = ref Unsafe.Add(ref destinationBase, (IntPtr)((uint)destination.Length / 2u)); + + ref Vector256 backgroundBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(background)); + ref Vector256 sourceBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(source)); + Vector256 opacity = Vector256.Create(amount); + + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) + { + destinationBase = PorterDuffFunctions.OverlayDestAtop(backgroundBase, sourceBase, opacity); + destinationBase = ref Unsafe.Add(ref destinationBase, 1); + backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); + sourceBase = ref Unsafe.Add(ref sourceBase, 1); + } + + if (Numerics.Modulo2(destination.Length) != 0) + { + // Vector4 fits neatly in pairs. Any overlap has to be equal to 1. + int i = destination.Length - 1; + destination[i] = PorterDuffFunctions.OverlayDestAtop(background[i], source[i], amount); + } + } + else { - destination[i] = PorterDuffFunctions.OverlayDestAtop(background[i], source[i], amount); + for (int i = 0; i < destination.Length; i++) + { + destination[i] = PorterDuffFunctions.OverlayDestAtop(background[i], source[i], amount); + } } } /// protected override void BlendFunction(Span destination, ReadOnlySpan background, ReadOnlySpan source, ReadOnlySpan amount) { - for (int i = 0; i < destination.Length; i++) + if (Avx2.IsSupported && destination.Length >= 2) + { + // Divide by 2 as 4 elements per Vector4 and 8 per Vector256 + ref Vector256 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); + ref Vector256 destinationLast = ref Unsafe.Add(ref destinationBase, (IntPtr)((uint)destination.Length / 2u)); + + ref Vector256 backgroundBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(background)); + ref Vector256 sourceBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(source)); + ref float amountBase = ref MemoryMarshal.GetReference(amount); + + Vector256 vOne = Vector256.Create(1F); + + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) + { + // We need to create a Vector256 containing the current and next amount values + // taking up each half of the Vector256 and then clamp them. + Vector256 opacity = Vector256.Create( + Vector128.Create(amountBase), + Vector128.Create(Unsafe.Add(ref amountBase, 1))); + opacity = Avx.Min(Avx.Max(Vector256.Zero, opacity), vOne); + + destinationBase = PorterDuffFunctions.OverlayDestAtop(backgroundBase, sourceBase, opacity); + destinationBase = ref Unsafe.Add(ref destinationBase, 1); + backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); + sourceBase = ref Unsafe.Add(ref sourceBase, 1); + amountBase = ref Unsafe.Add(ref amountBase, 2); + } + + if (Numerics.Modulo2(destination.Length) != 0) + { + // Vector4 fits neatly in pairs. Any overlap has to be equal to 1. + int i = destination.Length - 1; + destination[i] = PorterDuffFunctions.OverlayDestAtop(background[i], source[i], Numerics.Clamp(amount[i], 0, 1F)); + } + } + else { - destination[i] = PorterDuffFunctions.OverlayDestAtop(background[i], source[i], Numerics.Clamp(amount[i], 0, 1)); + for (int i = 0; i < destination.Length; i++) + { + destination[i] = PorterDuffFunctions.OverlayDestAtop(background[i], source[i], Numerics.Clamp(amount[i], 0, 1F)); + } } } } @@ -2399,18 +6557,85 @@ internal static class DefaultPixelBlenders protected override void BlendFunction(Span destination, ReadOnlySpan background, ReadOnlySpan source, float amount) { amount = Numerics.Clamp(amount, 0, 1); - for (int i = 0; i < destination.Length; i++) + + if (Avx2.IsSupported && destination.Length >= 2) + { + // Divide by 2 as 4 elements per Vector4 and 8 per Vector256 + ref Vector256 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); + ref Vector256 destinationLast = ref Unsafe.Add(ref destinationBase, (IntPtr)((uint)destination.Length / 2u)); + + ref Vector256 backgroundBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(background)); + ref Vector256 sourceBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(source)); + Vector256 opacity = Vector256.Create(amount); + + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) + { + destinationBase = PorterDuffFunctions.HardLightDestAtop(backgroundBase, sourceBase, opacity); + destinationBase = ref Unsafe.Add(ref destinationBase, 1); + backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); + sourceBase = ref Unsafe.Add(ref sourceBase, 1); + } + + if (Numerics.Modulo2(destination.Length) != 0) + { + // Vector4 fits neatly in pairs. Any overlap has to be equal to 1. + int i = destination.Length - 1; + destination[i] = PorterDuffFunctions.HardLightDestAtop(background[i], source[i], amount); + } + } + else { - destination[i] = PorterDuffFunctions.HardLightDestAtop(background[i], source[i], amount); + for (int i = 0; i < destination.Length; i++) + { + destination[i] = PorterDuffFunctions.HardLightDestAtop(background[i], source[i], amount); + } } } /// protected override void BlendFunction(Span destination, ReadOnlySpan background, ReadOnlySpan source, ReadOnlySpan amount) { - for (int i = 0; i < destination.Length; i++) + if (Avx2.IsSupported && destination.Length >= 2) + { + // Divide by 2 as 4 elements per Vector4 and 8 per Vector256 + ref Vector256 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); + ref Vector256 destinationLast = ref Unsafe.Add(ref destinationBase, (IntPtr)((uint)destination.Length / 2u)); + + ref Vector256 backgroundBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(background)); + ref Vector256 sourceBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(source)); + ref float amountBase = ref MemoryMarshal.GetReference(amount); + + Vector256 vOne = Vector256.Create(1F); + + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) + { + // We need to create a Vector256 containing the current and next amount values + // taking up each half of the Vector256 and then clamp them. + Vector256 opacity = Vector256.Create( + Vector128.Create(amountBase), + Vector128.Create(Unsafe.Add(ref amountBase, 1))); + opacity = Avx.Min(Avx.Max(Vector256.Zero, opacity), vOne); + + destinationBase = PorterDuffFunctions.HardLightDestAtop(backgroundBase, sourceBase, opacity); + destinationBase = ref Unsafe.Add(ref destinationBase, 1); + backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); + sourceBase = ref Unsafe.Add(ref sourceBase, 1); + amountBase = ref Unsafe.Add(ref amountBase, 2); + } + + if (Numerics.Modulo2(destination.Length) != 0) + { + // Vector4 fits neatly in pairs. Any overlap has to be equal to 1. + int i = destination.Length - 1; + destination[i] = PorterDuffFunctions.HardLightDestAtop(background[i], source[i], Numerics.Clamp(amount[i], 0, 1F)); + } + } + else { - destination[i] = PorterDuffFunctions.HardLightDestAtop(background[i], source[i], Numerics.Clamp(amount[i], 0, 1)); + for (int i = 0; i < destination.Length; i++) + { + destination[i] = PorterDuffFunctions.HardLightDestAtop(background[i], source[i], Numerics.Clamp(amount[i], 0, 1F)); + } } } } @@ -2437,18 +6662,85 @@ internal static class DefaultPixelBlenders protected override void BlendFunction(Span destination, ReadOnlySpan background, ReadOnlySpan source, float amount) { amount = Numerics.Clamp(amount, 0, 1); - for (int i = 0; i < destination.Length; i++) + + if (Avx2.IsSupported && destination.Length >= 2) + { + // Divide by 2 as 4 elements per Vector4 and 8 per Vector256 + ref Vector256 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); + ref Vector256 destinationLast = ref Unsafe.Add(ref destinationBase, (IntPtr)((uint)destination.Length / 2u)); + + ref Vector256 backgroundBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(background)); + ref Vector256 sourceBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(source)); + Vector256 opacity = Vector256.Create(amount); + + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) + { + destinationBase = PorterDuffFunctions.NormalDestOver(backgroundBase, sourceBase, opacity); + destinationBase = ref Unsafe.Add(ref destinationBase, 1); + backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); + sourceBase = ref Unsafe.Add(ref sourceBase, 1); + } + + if (Numerics.Modulo2(destination.Length) != 0) + { + // Vector4 fits neatly in pairs. Any overlap has to be equal to 1. + int i = destination.Length - 1; + destination[i] = PorterDuffFunctions.NormalDestOver(background[i], source[i], amount); + } + } + else { - destination[i] = PorterDuffFunctions.NormalDestOver(background[i], source[i], amount); + for (int i = 0; i < destination.Length; i++) + { + destination[i] = PorterDuffFunctions.NormalDestOver(background[i], source[i], amount); + } } } /// protected override void BlendFunction(Span destination, ReadOnlySpan background, ReadOnlySpan source, ReadOnlySpan amount) { - for (int i = 0; i < destination.Length; i++) + if (Avx2.IsSupported && destination.Length >= 2) + { + // Divide by 2 as 4 elements per Vector4 and 8 per Vector256 + ref Vector256 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); + ref Vector256 destinationLast = ref Unsafe.Add(ref destinationBase, (IntPtr)((uint)destination.Length / 2u)); + + ref Vector256 backgroundBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(background)); + ref Vector256 sourceBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(source)); + ref float amountBase = ref MemoryMarshal.GetReference(amount); + + Vector256 vOne = Vector256.Create(1F); + + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) + { + // We need to create a Vector256 containing the current and next amount values + // taking up each half of the Vector256 and then clamp them. + Vector256 opacity = Vector256.Create( + Vector128.Create(amountBase), + Vector128.Create(Unsafe.Add(ref amountBase, 1))); + opacity = Avx.Min(Avx.Max(Vector256.Zero, opacity), vOne); + + destinationBase = PorterDuffFunctions.NormalDestOver(backgroundBase, sourceBase, opacity); + destinationBase = ref Unsafe.Add(ref destinationBase, 1); + backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); + sourceBase = ref Unsafe.Add(ref sourceBase, 1); + amountBase = ref Unsafe.Add(ref amountBase, 2); + } + + if (Numerics.Modulo2(destination.Length) != 0) + { + // Vector4 fits neatly in pairs. Any overlap has to be equal to 1. + int i = destination.Length - 1; + destination[i] = PorterDuffFunctions.NormalDestOver(background[i], source[i], Numerics.Clamp(amount[i], 0, 1F)); + } + } + else { - destination[i] = PorterDuffFunctions.NormalDestOver(background[i], source[i], Numerics.Clamp(amount[i], 0, 1)); + for (int i = 0; i < destination.Length; i++) + { + destination[i] = PorterDuffFunctions.NormalDestOver(background[i], source[i], Numerics.Clamp(amount[i], 0, 1F)); + } } } } @@ -2475,18 +6767,85 @@ internal static class DefaultPixelBlenders protected override void BlendFunction(Span destination, ReadOnlySpan background, ReadOnlySpan source, float amount) { amount = Numerics.Clamp(amount, 0, 1); - for (int i = 0; i < destination.Length; i++) + + if (Avx2.IsSupported && destination.Length >= 2) + { + // Divide by 2 as 4 elements per Vector4 and 8 per Vector256 + ref Vector256 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); + ref Vector256 destinationLast = ref Unsafe.Add(ref destinationBase, (IntPtr)((uint)destination.Length / 2u)); + + ref Vector256 backgroundBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(background)); + ref Vector256 sourceBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(source)); + Vector256 opacity = Vector256.Create(amount); + + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) + { + destinationBase = PorterDuffFunctions.MultiplyDestOver(backgroundBase, sourceBase, opacity); + destinationBase = ref Unsafe.Add(ref destinationBase, 1); + backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); + sourceBase = ref Unsafe.Add(ref sourceBase, 1); + } + + if (Numerics.Modulo2(destination.Length) != 0) + { + // Vector4 fits neatly in pairs. Any overlap has to be equal to 1. + int i = destination.Length - 1; + destination[i] = PorterDuffFunctions.MultiplyDestOver(background[i], source[i], amount); + } + } + else { - destination[i] = PorterDuffFunctions.MultiplyDestOver(background[i], source[i], amount); + for (int i = 0; i < destination.Length; i++) + { + destination[i] = PorterDuffFunctions.MultiplyDestOver(background[i], source[i], amount); + } } } /// protected override void BlendFunction(Span destination, ReadOnlySpan background, ReadOnlySpan source, ReadOnlySpan amount) { - for (int i = 0; i < destination.Length; i++) + if (Avx2.IsSupported && destination.Length >= 2) { - destination[i] = PorterDuffFunctions.MultiplyDestOver(background[i], source[i], Numerics.Clamp(amount[i], 0, 1)); + // Divide by 2 as 4 elements per Vector4 and 8 per Vector256 + ref Vector256 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); + ref Vector256 destinationLast = ref Unsafe.Add(ref destinationBase, (IntPtr)((uint)destination.Length / 2u)); + + ref Vector256 backgroundBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(background)); + ref Vector256 sourceBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(source)); + ref float amountBase = ref MemoryMarshal.GetReference(amount); + + Vector256 vOne = Vector256.Create(1F); + + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) + { + // We need to create a Vector256 containing the current and next amount values + // taking up each half of the Vector256 and then clamp them. + Vector256 opacity = Vector256.Create( + Vector128.Create(amountBase), + Vector128.Create(Unsafe.Add(ref amountBase, 1))); + opacity = Avx.Min(Avx.Max(Vector256.Zero, opacity), vOne); + + destinationBase = PorterDuffFunctions.MultiplyDestOver(backgroundBase, sourceBase, opacity); + destinationBase = ref Unsafe.Add(ref destinationBase, 1); + backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); + sourceBase = ref Unsafe.Add(ref sourceBase, 1); + amountBase = ref Unsafe.Add(ref amountBase, 2); + } + + if (Numerics.Modulo2(destination.Length) != 0) + { + // Vector4 fits neatly in pairs. Any overlap has to be equal to 1. + int i = destination.Length - 1; + destination[i] = PorterDuffFunctions.MultiplyDestOver(background[i], source[i], Numerics.Clamp(amount[i], 0, 1F)); + } + } + else + { + for (int i = 0; i < destination.Length; i++) + { + destination[i] = PorterDuffFunctions.MultiplyDestOver(background[i], source[i], Numerics.Clamp(amount[i], 0, 1F)); + } } } } @@ -2513,18 +6872,85 @@ internal static class DefaultPixelBlenders protected override void BlendFunction(Span destination, ReadOnlySpan background, ReadOnlySpan source, float amount) { amount = Numerics.Clamp(amount, 0, 1); - for (int i = 0; i < destination.Length; i++) + + if (Avx2.IsSupported && destination.Length >= 2) { - destination[i] = PorterDuffFunctions.AddDestOver(background[i], source[i], amount); + // Divide by 2 as 4 elements per Vector4 and 8 per Vector256 + ref Vector256 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); + ref Vector256 destinationLast = ref Unsafe.Add(ref destinationBase, (IntPtr)((uint)destination.Length / 2u)); + + ref Vector256 backgroundBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(background)); + ref Vector256 sourceBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(source)); + Vector256 opacity = Vector256.Create(amount); + + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) + { + destinationBase = PorterDuffFunctions.AddDestOver(backgroundBase, sourceBase, opacity); + destinationBase = ref Unsafe.Add(ref destinationBase, 1); + backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); + sourceBase = ref Unsafe.Add(ref sourceBase, 1); + } + + if (Numerics.Modulo2(destination.Length) != 0) + { + // Vector4 fits neatly in pairs. Any overlap has to be equal to 1. + int i = destination.Length - 1; + destination[i] = PorterDuffFunctions.AddDestOver(background[i], source[i], amount); + } + } + else + { + for (int i = 0; i < destination.Length; i++) + { + destination[i] = PorterDuffFunctions.AddDestOver(background[i], source[i], amount); + } } } /// protected override void BlendFunction(Span destination, ReadOnlySpan background, ReadOnlySpan source, ReadOnlySpan amount) { - for (int i = 0; i < destination.Length; i++) + if (Avx2.IsSupported && destination.Length >= 2) + { + // Divide by 2 as 4 elements per Vector4 and 8 per Vector256 + ref Vector256 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); + ref Vector256 destinationLast = ref Unsafe.Add(ref destinationBase, (IntPtr)((uint)destination.Length / 2u)); + + ref Vector256 backgroundBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(background)); + ref Vector256 sourceBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(source)); + ref float amountBase = ref MemoryMarshal.GetReference(amount); + + Vector256 vOne = Vector256.Create(1F); + + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) + { + // We need to create a Vector256 containing the current and next amount values + // taking up each half of the Vector256 and then clamp them. + Vector256 opacity = Vector256.Create( + Vector128.Create(amountBase), + Vector128.Create(Unsafe.Add(ref amountBase, 1))); + opacity = Avx.Min(Avx.Max(Vector256.Zero, opacity), vOne); + + destinationBase = PorterDuffFunctions.AddDestOver(backgroundBase, sourceBase, opacity); + destinationBase = ref Unsafe.Add(ref destinationBase, 1); + backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); + sourceBase = ref Unsafe.Add(ref sourceBase, 1); + amountBase = ref Unsafe.Add(ref amountBase, 2); + } + + if (Numerics.Modulo2(destination.Length) != 0) + { + // Vector4 fits neatly in pairs. Any overlap has to be equal to 1. + int i = destination.Length - 1; + destination[i] = PorterDuffFunctions.AddDestOver(background[i], source[i], Numerics.Clamp(amount[i], 0, 1F)); + } + } + else { - destination[i] = PorterDuffFunctions.AddDestOver(background[i], source[i], Numerics.Clamp(amount[i], 0, 1)); + for (int i = 0; i < destination.Length; i++) + { + destination[i] = PorterDuffFunctions.AddDestOver(background[i], source[i], Numerics.Clamp(amount[i], 0, 1F)); + } } } } @@ -2551,18 +6977,85 @@ internal static class DefaultPixelBlenders protected override void BlendFunction(Span destination, ReadOnlySpan background, ReadOnlySpan source, float amount) { amount = Numerics.Clamp(amount, 0, 1); - for (int i = 0; i < destination.Length; i++) + + if (Avx2.IsSupported && destination.Length >= 2) + { + // Divide by 2 as 4 elements per Vector4 and 8 per Vector256 + ref Vector256 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); + ref Vector256 destinationLast = ref Unsafe.Add(ref destinationBase, (IntPtr)((uint)destination.Length / 2u)); + + ref Vector256 backgroundBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(background)); + ref Vector256 sourceBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(source)); + Vector256 opacity = Vector256.Create(amount); + + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) + { + destinationBase = PorterDuffFunctions.SubtractDestOver(backgroundBase, sourceBase, opacity); + destinationBase = ref Unsafe.Add(ref destinationBase, 1); + backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); + sourceBase = ref Unsafe.Add(ref sourceBase, 1); + } + + if (Numerics.Modulo2(destination.Length) != 0) + { + // Vector4 fits neatly in pairs. Any overlap has to be equal to 1. + int i = destination.Length - 1; + destination[i] = PorterDuffFunctions.SubtractDestOver(background[i], source[i], amount); + } + } + else { - destination[i] = PorterDuffFunctions.SubtractDestOver(background[i], source[i], amount); + for (int i = 0; i < destination.Length; i++) + { + destination[i] = PorterDuffFunctions.SubtractDestOver(background[i], source[i], amount); + } } } /// protected override void BlendFunction(Span destination, ReadOnlySpan background, ReadOnlySpan source, ReadOnlySpan amount) { - for (int i = 0; i < destination.Length; i++) + if (Avx2.IsSupported && destination.Length >= 2) + { + // Divide by 2 as 4 elements per Vector4 and 8 per Vector256 + ref Vector256 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); + ref Vector256 destinationLast = ref Unsafe.Add(ref destinationBase, (IntPtr)((uint)destination.Length / 2u)); + + ref Vector256 backgroundBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(background)); + ref Vector256 sourceBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(source)); + ref float amountBase = ref MemoryMarshal.GetReference(amount); + + Vector256 vOne = Vector256.Create(1F); + + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) + { + // We need to create a Vector256 containing the current and next amount values + // taking up each half of the Vector256 and then clamp them. + Vector256 opacity = Vector256.Create( + Vector128.Create(amountBase), + Vector128.Create(Unsafe.Add(ref amountBase, 1))); + opacity = Avx.Min(Avx.Max(Vector256.Zero, opacity), vOne); + + destinationBase = PorterDuffFunctions.SubtractDestOver(backgroundBase, sourceBase, opacity); + destinationBase = ref Unsafe.Add(ref destinationBase, 1); + backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); + sourceBase = ref Unsafe.Add(ref sourceBase, 1); + amountBase = ref Unsafe.Add(ref amountBase, 2); + } + + if (Numerics.Modulo2(destination.Length) != 0) + { + // Vector4 fits neatly in pairs. Any overlap has to be equal to 1. + int i = destination.Length - 1; + destination[i] = PorterDuffFunctions.SubtractDestOver(background[i], source[i], Numerics.Clamp(amount[i], 0, 1F)); + } + } + else { - destination[i] = PorterDuffFunctions.SubtractDestOver(background[i], source[i], Numerics.Clamp(amount[i], 0, 1)); + for (int i = 0; i < destination.Length; i++) + { + destination[i] = PorterDuffFunctions.SubtractDestOver(background[i], source[i], Numerics.Clamp(amount[i], 0, 1F)); + } } } } @@ -2589,18 +7082,85 @@ internal static class DefaultPixelBlenders protected override void BlendFunction(Span destination, ReadOnlySpan background, ReadOnlySpan source, float amount) { amount = Numerics.Clamp(amount, 0, 1); - for (int i = 0; i < destination.Length; i++) + + if (Avx2.IsSupported && destination.Length >= 2) + { + // Divide by 2 as 4 elements per Vector4 and 8 per Vector256 + ref Vector256 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); + ref Vector256 destinationLast = ref Unsafe.Add(ref destinationBase, (IntPtr)((uint)destination.Length / 2u)); + + ref Vector256 backgroundBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(background)); + ref Vector256 sourceBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(source)); + Vector256 opacity = Vector256.Create(amount); + + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) + { + destinationBase = PorterDuffFunctions.ScreenDestOver(backgroundBase, sourceBase, opacity); + destinationBase = ref Unsafe.Add(ref destinationBase, 1); + backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); + sourceBase = ref Unsafe.Add(ref sourceBase, 1); + } + + if (Numerics.Modulo2(destination.Length) != 0) + { + // Vector4 fits neatly in pairs. Any overlap has to be equal to 1. + int i = destination.Length - 1; + destination[i] = PorterDuffFunctions.ScreenDestOver(background[i], source[i], amount); + } + } + else { - destination[i] = PorterDuffFunctions.ScreenDestOver(background[i], source[i], amount); + for (int i = 0; i < destination.Length; i++) + { + destination[i] = PorterDuffFunctions.ScreenDestOver(background[i], source[i], amount); + } } } /// protected override void BlendFunction(Span destination, ReadOnlySpan background, ReadOnlySpan source, ReadOnlySpan amount) { - for (int i = 0; i < destination.Length; i++) + if (Avx2.IsSupported && destination.Length >= 2) + { + // Divide by 2 as 4 elements per Vector4 and 8 per Vector256 + ref Vector256 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); + ref Vector256 destinationLast = ref Unsafe.Add(ref destinationBase, (IntPtr)((uint)destination.Length / 2u)); + + ref Vector256 backgroundBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(background)); + ref Vector256 sourceBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(source)); + ref float amountBase = ref MemoryMarshal.GetReference(amount); + + Vector256 vOne = Vector256.Create(1F); + + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) + { + // We need to create a Vector256 containing the current and next amount values + // taking up each half of the Vector256 and then clamp them. + Vector256 opacity = Vector256.Create( + Vector128.Create(amountBase), + Vector128.Create(Unsafe.Add(ref amountBase, 1))); + opacity = Avx.Min(Avx.Max(Vector256.Zero, opacity), vOne); + + destinationBase = PorterDuffFunctions.ScreenDestOver(backgroundBase, sourceBase, opacity); + destinationBase = ref Unsafe.Add(ref destinationBase, 1); + backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); + sourceBase = ref Unsafe.Add(ref sourceBase, 1); + amountBase = ref Unsafe.Add(ref amountBase, 2); + } + + if (Numerics.Modulo2(destination.Length) != 0) + { + // Vector4 fits neatly in pairs. Any overlap has to be equal to 1. + int i = destination.Length - 1; + destination[i] = PorterDuffFunctions.ScreenDestOver(background[i], source[i], Numerics.Clamp(amount[i], 0, 1F)); + } + } + else { - destination[i] = PorterDuffFunctions.ScreenDestOver(background[i], source[i], Numerics.Clamp(amount[i], 0, 1)); + for (int i = 0; i < destination.Length; i++) + { + destination[i] = PorterDuffFunctions.ScreenDestOver(background[i], source[i], Numerics.Clamp(amount[i], 0, 1F)); + } } } } @@ -2627,18 +7187,85 @@ internal static class DefaultPixelBlenders protected override void BlendFunction(Span destination, ReadOnlySpan background, ReadOnlySpan source, float amount) { amount = Numerics.Clamp(amount, 0, 1); - for (int i = 0; i < destination.Length; i++) + + if (Avx2.IsSupported && destination.Length >= 2) + { + // Divide by 2 as 4 elements per Vector4 and 8 per Vector256 + ref Vector256 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); + ref Vector256 destinationLast = ref Unsafe.Add(ref destinationBase, (IntPtr)((uint)destination.Length / 2u)); + + ref Vector256 backgroundBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(background)); + ref Vector256 sourceBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(source)); + Vector256 opacity = Vector256.Create(amount); + + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) + { + destinationBase = PorterDuffFunctions.DarkenDestOver(backgroundBase, sourceBase, opacity); + destinationBase = ref Unsafe.Add(ref destinationBase, 1); + backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); + sourceBase = ref Unsafe.Add(ref sourceBase, 1); + } + + if (Numerics.Modulo2(destination.Length) != 0) + { + // Vector4 fits neatly in pairs. Any overlap has to be equal to 1. + int i = destination.Length - 1; + destination[i] = PorterDuffFunctions.DarkenDestOver(background[i], source[i], amount); + } + } + else { - destination[i] = PorterDuffFunctions.DarkenDestOver(background[i], source[i], amount); + for (int i = 0; i < destination.Length; i++) + { + destination[i] = PorterDuffFunctions.DarkenDestOver(background[i], source[i], amount); + } } } /// protected override void BlendFunction(Span destination, ReadOnlySpan background, ReadOnlySpan source, ReadOnlySpan amount) { - for (int i = 0; i < destination.Length; i++) + if (Avx2.IsSupported && destination.Length >= 2) + { + // Divide by 2 as 4 elements per Vector4 and 8 per Vector256 + ref Vector256 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); + ref Vector256 destinationLast = ref Unsafe.Add(ref destinationBase, (IntPtr)((uint)destination.Length / 2u)); + + ref Vector256 backgroundBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(background)); + ref Vector256 sourceBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(source)); + ref float amountBase = ref MemoryMarshal.GetReference(amount); + + Vector256 vOne = Vector256.Create(1F); + + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) + { + // We need to create a Vector256 containing the current and next amount values + // taking up each half of the Vector256 and then clamp them. + Vector256 opacity = Vector256.Create( + Vector128.Create(amountBase), + Vector128.Create(Unsafe.Add(ref amountBase, 1))); + opacity = Avx.Min(Avx.Max(Vector256.Zero, opacity), vOne); + + destinationBase = PorterDuffFunctions.DarkenDestOver(backgroundBase, sourceBase, opacity); + destinationBase = ref Unsafe.Add(ref destinationBase, 1); + backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); + sourceBase = ref Unsafe.Add(ref sourceBase, 1); + amountBase = ref Unsafe.Add(ref amountBase, 2); + } + + if (Numerics.Modulo2(destination.Length) != 0) + { + // Vector4 fits neatly in pairs. Any overlap has to be equal to 1. + int i = destination.Length - 1; + destination[i] = PorterDuffFunctions.DarkenDestOver(background[i], source[i], Numerics.Clamp(amount[i], 0, 1F)); + } + } + else { - destination[i] = PorterDuffFunctions.DarkenDestOver(background[i], source[i], Numerics.Clamp(amount[i], 0, 1)); + for (int i = 0; i < destination.Length; i++) + { + destination[i] = PorterDuffFunctions.DarkenDestOver(background[i], source[i], Numerics.Clamp(amount[i], 0, 1F)); + } } } } @@ -2665,18 +7292,85 @@ internal static class DefaultPixelBlenders protected override void BlendFunction(Span destination, ReadOnlySpan background, ReadOnlySpan source, float amount) { amount = Numerics.Clamp(amount, 0, 1); - for (int i = 0; i < destination.Length; i++) + + if (Avx2.IsSupported && destination.Length >= 2) + { + // Divide by 2 as 4 elements per Vector4 and 8 per Vector256 + ref Vector256 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); + ref Vector256 destinationLast = ref Unsafe.Add(ref destinationBase, (IntPtr)((uint)destination.Length / 2u)); + + ref Vector256 backgroundBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(background)); + ref Vector256 sourceBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(source)); + Vector256 opacity = Vector256.Create(amount); + + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) + { + destinationBase = PorterDuffFunctions.LightenDestOver(backgroundBase, sourceBase, opacity); + destinationBase = ref Unsafe.Add(ref destinationBase, 1); + backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); + sourceBase = ref Unsafe.Add(ref sourceBase, 1); + } + + if (Numerics.Modulo2(destination.Length) != 0) + { + // Vector4 fits neatly in pairs. Any overlap has to be equal to 1. + int i = destination.Length - 1; + destination[i] = PorterDuffFunctions.LightenDestOver(background[i], source[i], amount); + } + } + else { - destination[i] = PorterDuffFunctions.LightenDestOver(background[i], source[i], amount); + for (int i = 0; i < destination.Length; i++) + { + destination[i] = PorterDuffFunctions.LightenDestOver(background[i], source[i], amount); + } } } /// protected override void BlendFunction(Span destination, ReadOnlySpan background, ReadOnlySpan source, ReadOnlySpan amount) { - for (int i = 0; i < destination.Length; i++) + if (Avx2.IsSupported && destination.Length >= 2) + { + // Divide by 2 as 4 elements per Vector4 and 8 per Vector256 + ref Vector256 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); + ref Vector256 destinationLast = ref Unsafe.Add(ref destinationBase, (IntPtr)((uint)destination.Length / 2u)); + + ref Vector256 backgroundBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(background)); + ref Vector256 sourceBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(source)); + ref float amountBase = ref MemoryMarshal.GetReference(amount); + + Vector256 vOne = Vector256.Create(1F); + + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) + { + // We need to create a Vector256 containing the current and next amount values + // taking up each half of the Vector256 and then clamp them. + Vector256 opacity = Vector256.Create( + Vector128.Create(amountBase), + Vector128.Create(Unsafe.Add(ref amountBase, 1))); + opacity = Avx.Min(Avx.Max(Vector256.Zero, opacity), vOne); + + destinationBase = PorterDuffFunctions.LightenDestOver(backgroundBase, sourceBase, opacity); + destinationBase = ref Unsafe.Add(ref destinationBase, 1); + backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); + sourceBase = ref Unsafe.Add(ref sourceBase, 1); + amountBase = ref Unsafe.Add(ref amountBase, 2); + } + + if (Numerics.Modulo2(destination.Length) != 0) + { + // Vector4 fits neatly in pairs. Any overlap has to be equal to 1. + int i = destination.Length - 1; + destination[i] = PorterDuffFunctions.LightenDestOver(background[i], source[i], Numerics.Clamp(amount[i], 0, 1F)); + } + } + else { - destination[i] = PorterDuffFunctions.LightenDestOver(background[i], source[i], Numerics.Clamp(amount[i], 0, 1)); + for (int i = 0; i < destination.Length; i++) + { + destination[i] = PorterDuffFunctions.LightenDestOver(background[i], source[i], Numerics.Clamp(amount[i], 0, 1F)); + } } } } @@ -2703,18 +7397,85 @@ internal static class DefaultPixelBlenders protected override void BlendFunction(Span destination, ReadOnlySpan background, ReadOnlySpan source, float amount) { amount = Numerics.Clamp(amount, 0, 1); - for (int i = 0; i < destination.Length; i++) + + if (Avx2.IsSupported && destination.Length >= 2) + { + // Divide by 2 as 4 elements per Vector4 and 8 per Vector256 + ref Vector256 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); + ref Vector256 destinationLast = ref Unsafe.Add(ref destinationBase, (IntPtr)((uint)destination.Length / 2u)); + + ref Vector256 backgroundBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(background)); + ref Vector256 sourceBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(source)); + Vector256 opacity = Vector256.Create(amount); + + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) + { + destinationBase = PorterDuffFunctions.OverlayDestOver(backgroundBase, sourceBase, opacity); + destinationBase = ref Unsafe.Add(ref destinationBase, 1); + backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); + sourceBase = ref Unsafe.Add(ref sourceBase, 1); + } + + if (Numerics.Modulo2(destination.Length) != 0) + { + // Vector4 fits neatly in pairs. Any overlap has to be equal to 1. + int i = destination.Length - 1; + destination[i] = PorterDuffFunctions.OverlayDestOver(background[i], source[i], amount); + } + } + else { - destination[i] = PorterDuffFunctions.OverlayDestOver(background[i], source[i], amount); + for (int i = 0; i < destination.Length; i++) + { + destination[i] = PorterDuffFunctions.OverlayDestOver(background[i], source[i], amount); + } } } /// protected override void BlendFunction(Span destination, ReadOnlySpan background, ReadOnlySpan source, ReadOnlySpan amount) { - for (int i = 0; i < destination.Length; i++) + if (Avx2.IsSupported && destination.Length >= 2) + { + // Divide by 2 as 4 elements per Vector4 and 8 per Vector256 + ref Vector256 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); + ref Vector256 destinationLast = ref Unsafe.Add(ref destinationBase, (IntPtr)((uint)destination.Length / 2u)); + + ref Vector256 backgroundBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(background)); + ref Vector256 sourceBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(source)); + ref float amountBase = ref MemoryMarshal.GetReference(amount); + + Vector256 vOne = Vector256.Create(1F); + + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) + { + // We need to create a Vector256 containing the current and next amount values + // taking up each half of the Vector256 and then clamp them. + Vector256 opacity = Vector256.Create( + Vector128.Create(amountBase), + Vector128.Create(Unsafe.Add(ref amountBase, 1))); + opacity = Avx.Min(Avx.Max(Vector256.Zero, opacity), vOne); + + destinationBase = PorterDuffFunctions.OverlayDestOver(backgroundBase, sourceBase, opacity); + destinationBase = ref Unsafe.Add(ref destinationBase, 1); + backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); + sourceBase = ref Unsafe.Add(ref sourceBase, 1); + amountBase = ref Unsafe.Add(ref amountBase, 2); + } + + if (Numerics.Modulo2(destination.Length) != 0) + { + // Vector4 fits neatly in pairs. Any overlap has to be equal to 1. + int i = destination.Length - 1; + destination[i] = PorterDuffFunctions.OverlayDestOver(background[i], source[i], Numerics.Clamp(amount[i], 0, 1F)); + } + } + else { - destination[i] = PorterDuffFunctions.OverlayDestOver(background[i], source[i], Numerics.Clamp(amount[i], 0, 1)); + for (int i = 0; i < destination.Length; i++) + { + destination[i] = PorterDuffFunctions.OverlayDestOver(background[i], source[i], Numerics.Clamp(amount[i], 0, 1F)); + } } } } @@ -2741,18 +7502,85 @@ internal static class DefaultPixelBlenders protected override void BlendFunction(Span destination, ReadOnlySpan background, ReadOnlySpan source, float amount) { amount = Numerics.Clamp(amount, 0, 1); - for (int i = 0; i < destination.Length; i++) + + if (Avx2.IsSupported && destination.Length >= 2) + { + // Divide by 2 as 4 elements per Vector4 and 8 per Vector256 + ref Vector256 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); + ref Vector256 destinationLast = ref Unsafe.Add(ref destinationBase, (IntPtr)((uint)destination.Length / 2u)); + + ref Vector256 backgroundBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(background)); + ref Vector256 sourceBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(source)); + Vector256 opacity = Vector256.Create(amount); + + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) + { + destinationBase = PorterDuffFunctions.HardLightDestOver(backgroundBase, sourceBase, opacity); + destinationBase = ref Unsafe.Add(ref destinationBase, 1); + backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); + sourceBase = ref Unsafe.Add(ref sourceBase, 1); + } + + if (Numerics.Modulo2(destination.Length) != 0) + { + // Vector4 fits neatly in pairs. Any overlap has to be equal to 1. + int i = destination.Length - 1; + destination[i] = PorterDuffFunctions.HardLightDestOver(background[i], source[i], amount); + } + } + else { - destination[i] = PorterDuffFunctions.HardLightDestOver(background[i], source[i], amount); + for (int i = 0; i < destination.Length; i++) + { + destination[i] = PorterDuffFunctions.HardLightDestOver(background[i], source[i], amount); + } } } /// protected override void BlendFunction(Span destination, ReadOnlySpan background, ReadOnlySpan source, ReadOnlySpan amount) { - for (int i = 0; i < destination.Length; i++) + if (Avx2.IsSupported && destination.Length >= 2) + { + // Divide by 2 as 4 elements per Vector4 and 8 per Vector256 + ref Vector256 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); + ref Vector256 destinationLast = ref Unsafe.Add(ref destinationBase, (IntPtr)((uint)destination.Length / 2u)); + + ref Vector256 backgroundBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(background)); + ref Vector256 sourceBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(source)); + ref float amountBase = ref MemoryMarshal.GetReference(amount); + + Vector256 vOne = Vector256.Create(1F); + + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) + { + // We need to create a Vector256 containing the current and next amount values + // taking up each half of the Vector256 and then clamp them. + Vector256 opacity = Vector256.Create( + Vector128.Create(amountBase), + Vector128.Create(Unsafe.Add(ref amountBase, 1))); + opacity = Avx.Min(Avx.Max(Vector256.Zero, opacity), vOne); + + destinationBase = PorterDuffFunctions.HardLightDestOver(backgroundBase, sourceBase, opacity); + destinationBase = ref Unsafe.Add(ref destinationBase, 1); + backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); + sourceBase = ref Unsafe.Add(ref sourceBase, 1); + amountBase = ref Unsafe.Add(ref amountBase, 2); + } + + if (Numerics.Modulo2(destination.Length) != 0) + { + // Vector4 fits neatly in pairs. Any overlap has to be equal to 1. + int i = destination.Length - 1; + destination[i] = PorterDuffFunctions.HardLightDestOver(background[i], source[i], Numerics.Clamp(amount[i], 0, 1F)); + } + } + else { - destination[i] = PorterDuffFunctions.HardLightDestOver(background[i], source[i], Numerics.Clamp(amount[i], 0, 1)); + for (int i = 0; i < destination.Length; i++) + { + destination[i] = PorterDuffFunctions.HardLightDestOver(background[i], source[i], Numerics.Clamp(amount[i], 0, 1F)); + } } } } @@ -2779,18 +7607,85 @@ internal static class DefaultPixelBlenders protected override void BlendFunction(Span destination, ReadOnlySpan background, ReadOnlySpan source, float amount) { amount = Numerics.Clamp(amount, 0, 1); - for (int i = 0; i < destination.Length; i++) + + if (Avx2.IsSupported && destination.Length >= 2) + { + // Divide by 2 as 4 elements per Vector4 and 8 per Vector256 + ref Vector256 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); + ref Vector256 destinationLast = ref Unsafe.Add(ref destinationBase, (IntPtr)((uint)destination.Length / 2u)); + + ref Vector256 backgroundBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(background)); + ref Vector256 sourceBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(source)); + Vector256 opacity = Vector256.Create(amount); + + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) + { + destinationBase = PorterDuffFunctions.NormalDestIn(backgroundBase, sourceBase, opacity); + destinationBase = ref Unsafe.Add(ref destinationBase, 1); + backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); + sourceBase = ref Unsafe.Add(ref sourceBase, 1); + } + + if (Numerics.Modulo2(destination.Length) != 0) + { + // Vector4 fits neatly in pairs. Any overlap has to be equal to 1. + int i = destination.Length - 1; + destination[i] = PorterDuffFunctions.NormalDestIn(background[i], source[i], amount); + } + } + else { - destination[i] = PorterDuffFunctions.NormalDestIn(background[i], source[i], amount); + for (int i = 0; i < destination.Length; i++) + { + destination[i] = PorterDuffFunctions.NormalDestIn(background[i], source[i], amount); + } } } /// protected override void BlendFunction(Span destination, ReadOnlySpan background, ReadOnlySpan source, ReadOnlySpan amount) { - for (int i = 0; i < destination.Length; i++) + if (Avx2.IsSupported && destination.Length >= 2) { - destination[i] = PorterDuffFunctions.NormalDestIn(background[i], source[i], Numerics.Clamp(amount[i], 0, 1)); + // Divide by 2 as 4 elements per Vector4 and 8 per Vector256 + ref Vector256 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); + ref Vector256 destinationLast = ref Unsafe.Add(ref destinationBase, (IntPtr)((uint)destination.Length / 2u)); + + ref Vector256 backgroundBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(background)); + ref Vector256 sourceBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(source)); + ref float amountBase = ref MemoryMarshal.GetReference(amount); + + Vector256 vOne = Vector256.Create(1F); + + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) + { + // We need to create a Vector256 containing the current and next amount values + // taking up each half of the Vector256 and then clamp them. + Vector256 opacity = Vector256.Create( + Vector128.Create(amountBase), + Vector128.Create(Unsafe.Add(ref amountBase, 1))); + opacity = Avx.Min(Avx.Max(Vector256.Zero, opacity), vOne); + + destinationBase = PorterDuffFunctions.NormalDestIn(backgroundBase, sourceBase, opacity); + destinationBase = ref Unsafe.Add(ref destinationBase, 1); + backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); + sourceBase = ref Unsafe.Add(ref sourceBase, 1); + amountBase = ref Unsafe.Add(ref amountBase, 2); + } + + if (Numerics.Modulo2(destination.Length) != 0) + { + // Vector4 fits neatly in pairs. Any overlap has to be equal to 1. + int i = destination.Length - 1; + destination[i] = PorterDuffFunctions.NormalDestIn(background[i], source[i], Numerics.Clamp(amount[i], 0, 1F)); + } + } + else + { + for (int i = 0; i < destination.Length; i++) + { + destination[i] = PorterDuffFunctions.NormalDestIn(background[i], source[i], Numerics.Clamp(amount[i], 0, 1F)); + } } } } @@ -2817,18 +7712,85 @@ internal static class DefaultPixelBlenders protected override void BlendFunction(Span destination, ReadOnlySpan background, ReadOnlySpan source, float amount) { amount = Numerics.Clamp(amount, 0, 1); - for (int i = 0; i < destination.Length; i++) + + if (Avx2.IsSupported && destination.Length >= 2) { - destination[i] = PorterDuffFunctions.MultiplyDestIn(background[i], source[i], amount); + // Divide by 2 as 4 elements per Vector4 and 8 per Vector256 + ref Vector256 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); + ref Vector256 destinationLast = ref Unsafe.Add(ref destinationBase, (IntPtr)((uint)destination.Length / 2u)); + + ref Vector256 backgroundBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(background)); + ref Vector256 sourceBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(source)); + Vector256 opacity = Vector256.Create(amount); + + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) + { + destinationBase = PorterDuffFunctions.MultiplyDestIn(backgroundBase, sourceBase, opacity); + destinationBase = ref Unsafe.Add(ref destinationBase, 1); + backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); + sourceBase = ref Unsafe.Add(ref sourceBase, 1); + } + + if (Numerics.Modulo2(destination.Length) != 0) + { + // Vector4 fits neatly in pairs. Any overlap has to be equal to 1. + int i = destination.Length - 1; + destination[i] = PorterDuffFunctions.MultiplyDestIn(background[i], source[i], amount); + } + } + else + { + for (int i = 0; i < destination.Length; i++) + { + destination[i] = PorterDuffFunctions.MultiplyDestIn(background[i], source[i], amount); + } } } /// protected override void BlendFunction(Span destination, ReadOnlySpan background, ReadOnlySpan source, ReadOnlySpan amount) { - for (int i = 0; i < destination.Length; i++) + if (Avx2.IsSupported && destination.Length >= 2) + { + // Divide by 2 as 4 elements per Vector4 and 8 per Vector256 + ref Vector256 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); + ref Vector256 destinationLast = ref Unsafe.Add(ref destinationBase, (IntPtr)((uint)destination.Length / 2u)); + + ref Vector256 backgroundBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(background)); + ref Vector256 sourceBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(source)); + ref float amountBase = ref MemoryMarshal.GetReference(amount); + + Vector256 vOne = Vector256.Create(1F); + + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) + { + // We need to create a Vector256 containing the current and next amount values + // taking up each half of the Vector256 and then clamp them. + Vector256 opacity = Vector256.Create( + Vector128.Create(amountBase), + Vector128.Create(Unsafe.Add(ref amountBase, 1))); + opacity = Avx.Min(Avx.Max(Vector256.Zero, opacity), vOne); + + destinationBase = PorterDuffFunctions.MultiplyDestIn(backgroundBase, sourceBase, opacity); + destinationBase = ref Unsafe.Add(ref destinationBase, 1); + backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); + sourceBase = ref Unsafe.Add(ref sourceBase, 1); + amountBase = ref Unsafe.Add(ref amountBase, 2); + } + + if (Numerics.Modulo2(destination.Length) != 0) + { + // Vector4 fits neatly in pairs. Any overlap has to be equal to 1. + int i = destination.Length - 1; + destination[i] = PorterDuffFunctions.MultiplyDestIn(background[i], source[i], Numerics.Clamp(amount[i], 0, 1F)); + } + } + else { - destination[i] = PorterDuffFunctions.MultiplyDestIn(background[i], source[i], Numerics.Clamp(amount[i], 0, 1)); + for (int i = 0; i < destination.Length; i++) + { + destination[i] = PorterDuffFunctions.MultiplyDestIn(background[i], source[i], Numerics.Clamp(amount[i], 0, 1F)); + } } } } @@ -2855,18 +7817,85 @@ internal static class DefaultPixelBlenders protected override void BlendFunction(Span destination, ReadOnlySpan background, ReadOnlySpan source, float amount) { amount = Numerics.Clamp(amount, 0, 1); - for (int i = 0; i < destination.Length; i++) + + if (Avx2.IsSupported && destination.Length >= 2) + { + // Divide by 2 as 4 elements per Vector4 and 8 per Vector256 + ref Vector256 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); + ref Vector256 destinationLast = ref Unsafe.Add(ref destinationBase, (IntPtr)((uint)destination.Length / 2u)); + + ref Vector256 backgroundBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(background)); + ref Vector256 sourceBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(source)); + Vector256 opacity = Vector256.Create(amount); + + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) + { + destinationBase = PorterDuffFunctions.AddDestIn(backgroundBase, sourceBase, opacity); + destinationBase = ref Unsafe.Add(ref destinationBase, 1); + backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); + sourceBase = ref Unsafe.Add(ref sourceBase, 1); + } + + if (Numerics.Modulo2(destination.Length) != 0) + { + // Vector4 fits neatly in pairs. Any overlap has to be equal to 1. + int i = destination.Length - 1; + destination[i] = PorterDuffFunctions.AddDestIn(background[i], source[i], amount); + } + } + else { - destination[i] = PorterDuffFunctions.AddDestIn(background[i], source[i], amount); + for (int i = 0; i < destination.Length; i++) + { + destination[i] = PorterDuffFunctions.AddDestIn(background[i], source[i], amount); + } } } /// protected override void BlendFunction(Span destination, ReadOnlySpan background, ReadOnlySpan source, ReadOnlySpan amount) { - for (int i = 0; i < destination.Length; i++) + if (Avx2.IsSupported && destination.Length >= 2) + { + // Divide by 2 as 4 elements per Vector4 and 8 per Vector256 + ref Vector256 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); + ref Vector256 destinationLast = ref Unsafe.Add(ref destinationBase, (IntPtr)((uint)destination.Length / 2u)); + + ref Vector256 backgroundBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(background)); + ref Vector256 sourceBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(source)); + ref float amountBase = ref MemoryMarshal.GetReference(amount); + + Vector256 vOne = Vector256.Create(1F); + + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) + { + // We need to create a Vector256 containing the current and next amount values + // taking up each half of the Vector256 and then clamp them. + Vector256 opacity = Vector256.Create( + Vector128.Create(amountBase), + Vector128.Create(Unsafe.Add(ref amountBase, 1))); + opacity = Avx.Min(Avx.Max(Vector256.Zero, opacity), vOne); + + destinationBase = PorterDuffFunctions.AddDestIn(backgroundBase, sourceBase, opacity); + destinationBase = ref Unsafe.Add(ref destinationBase, 1); + backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); + sourceBase = ref Unsafe.Add(ref sourceBase, 1); + amountBase = ref Unsafe.Add(ref amountBase, 2); + } + + if (Numerics.Modulo2(destination.Length) != 0) + { + // Vector4 fits neatly in pairs. Any overlap has to be equal to 1. + int i = destination.Length - 1; + destination[i] = PorterDuffFunctions.AddDestIn(background[i], source[i], Numerics.Clamp(amount[i], 0, 1F)); + } + } + else { - destination[i] = PorterDuffFunctions.AddDestIn(background[i], source[i], Numerics.Clamp(amount[i], 0, 1)); + for (int i = 0; i < destination.Length; i++) + { + destination[i] = PorterDuffFunctions.AddDestIn(background[i], source[i], Numerics.Clamp(amount[i], 0, 1F)); + } } } } @@ -2893,18 +7922,85 @@ internal static class DefaultPixelBlenders protected override void BlendFunction(Span destination, ReadOnlySpan background, ReadOnlySpan source, float amount) { amount = Numerics.Clamp(amount, 0, 1); - for (int i = 0; i < destination.Length; i++) + + if (Avx2.IsSupported && destination.Length >= 2) + { + // Divide by 2 as 4 elements per Vector4 and 8 per Vector256 + ref Vector256 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); + ref Vector256 destinationLast = ref Unsafe.Add(ref destinationBase, (IntPtr)((uint)destination.Length / 2u)); + + ref Vector256 backgroundBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(background)); + ref Vector256 sourceBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(source)); + Vector256 opacity = Vector256.Create(amount); + + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) + { + destinationBase = PorterDuffFunctions.SubtractDestIn(backgroundBase, sourceBase, opacity); + destinationBase = ref Unsafe.Add(ref destinationBase, 1); + backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); + sourceBase = ref Unsafe.Add(ref sourceBase, 1); + } + + if (Numerics.Modulo2(destination.Length) != 0) + { + // Vector4 fits neatly in pairs. Any overlap has to be equal to 1. + int i = destination.Length - 1; + destination[i] = PorterDuffFunctions.SubtractDestIn(background[i], source[i], amount); + } + } + else { - destination[i] = PorterDuffFunctions.SubtractDestIn(background[i], source[i], amount); + for (int i = 0; i < destination.Length; i++) + { + destination[i] = PorterDuffFunctions.SubtractDestIn(background[i], source[i], amount); + } } } /// protected override void BlendFunction(Span destination, ReadOnlySpan background, ReadOnlySpan source, ReadOnlySpan amount) { - for (int i = 0; i < destination.Length; i++) + if (Avx2.IsSupported && destination.Length >= 2) + { + // Divide by 2 as 4 elements per Vector4 and 8 per Vector256 + ref Vector256 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); + ref Vector256 destinationLast = ref Unsafe.Add(ref destinationBase, (IntPtr)((uint)destination.Length / 2u)); + + ref Vector256 backgroundBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(background)); + ref Vector256 sourceBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(source)); + ref float amountBase = ref MemoryMarshal.GetReference(amount); + + Vector256 vOne = Vector256.Create(1F); + + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) + { + // We need to create a Vector256 containing the current and next amount values + // taking up each half of the Vector256 and then clamp them. + Vector256 opacity = Vector256.Create( + Vector128.Create(amountBase), + Vector128.Create(Unsafe.Add(ref amountBase, 1))); + opacity = Avx.Min(Avx.Max(Vector256.Zero, opacity), vOne); + + destinationBase = PorterDuffFunctions.SubtractDestIn(backgroundBase, sourceBase, opacity); + destinationBase = ref Unsafe.Add(ref destinationBase, 1); + backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); + sourceBase = ref Unsafe.Add(ref sourceBase, 1); + amountBase = ref Unsafe.Add(ref amountBase, 2); + } + + if (Numerics.Modulo2(destination.Length) != 0) + { + // Vector4 fits neatly in pairs. Any overlap has to be equal to 1. + int i = destination.Length - 1; + destination[i] = PorterDuffFunctions.SubtractDestIn(background[i], source[i], Numerics.Clamp(amount[i], 0, 1F)); + } + } + else { - destination[i] = PorterDuffFunctions.SubtractDestIn(background[i], source[i], Numerics.Clamp(amount[i], 0, 1)); + for (int i = 0; i < destination.Length; i++) + { + destination[i] = PorterDuffFunctions.SubtractDestIn(background[i], source[i], Numerics.Clamp(amount[i], 0, 1F)); + } } } } @@ -2931,18 +8027,85 @@ internal static class DefaultPixelBlenders protected override void BlendFunction(Span destination, ReadOnlySpan background, ReadOnlySpan source, float amount) { amount = Numerics.Clamp(amount, 0, 1); - for (int i = 0; i < destination.Length; i++) + + if (Avx2.IsSupported && destination.Length >= 2) + { + // Divide by 2 as 4 elements per Vector4 and 8 per Vector256 + ref Vector256 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); + ref Vector256 destinationLast = ref Unsafe.Add(ref destinationBase, (IntPtr)((uint)destination.Length / 2u)); + + ref Vector256 backgroundBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(background)); + ref Vector256 sourceBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(source)); + Vector256 opacity = Vector256.Create(amount); + + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) + { + destinationBase = PorterDuffFunctions.ScreenDestIn(backgroundBase, sourceBase, opacity); + destinationBase = ref Unsafe.Add(ref destinationBase, 1); + backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); + sourceBase = ref Unsafe.Add(ref sourceBase, 1); + } + + if (Numerics.Modulo2(destination.Length) != 0) + { + // Vector4 fits neatly in pairs. Any overlap has to be equal to 1. + int i = destination.Length - 1; + destination[i] = PorterDuffFunctions.ScreenDestIn(background[i], source[i], amount); + } + } + else { - destination[i] = PorterDuffFunctions.ScreenDestIn(background[i], source[i], amount); + for (int i = 0; i < destination.Length; i++) + { + destination[i] = PorterDuffFunctions.ScreenDestIn(background[i], source[i], amount); + } } } /// protected override void BlendFunction(Span destination, ReadOnlySpan background, ReadOnlySpan source, ReadOnlySpan amount) { - for (int i = 0; i < destination.Length; i++) + if (Avx2.IsSupported && destination.Length >= 2) + { + // Divide by 2 as 4 elements per Vector4 and 8 per Vector256 + ref Vector256 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); + ref Vector256 destinationLast = ref Unsafe.Add(ref destinationBase, (IntPtr)((uint)destination.Length / 2u)); + + ref Vector256 backgroundBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(background)); + ref Vector256 sourceBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(source)); + ref float amountBase = ref MemoryMarshal.GetReference(amount); + + Vector256 vOne = Vector256.Create(1F); + + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) + { + // We need to create a Vector256 containing the current and next amount values + // taking up each half of the Vector256 and then clamp them. + Vector256 opacity = Vector256.Create( + Vector128.Create(amountBase), + Vector128.Create(Unsafe.Add(ref amountBase, 1))); + opacity = Avx.Min(Avx.Max(Vector256.Zero, opacity), vOne); + + destinationBase = PorterDuffFunctions.ScreenDestIn(backgroundBase, sourceBase, opacity); + destinationBase = ref Unsafe.Add(ref destinationBase, 1); + backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); + sourceBase = ref Unsafe.Add(ref sourceBase, 1); + amountBase = ref Unsafe.Add(ref amountBase, 2); + } + + if (Numerics.Modulo2(destination.Length) != 0) + { + // Vector4 fits neatly in pairs. Any overlap has to be equal to 1. + int i = destination.Length - 1; + destination[i] = PorterDuffFunctions.ScreenDestIn(background[i], source[i], Numerics.Clamp(amount[i], 0, 1F)); + } + } + else { - destination[i] = PorterDuffFunctions.ScreenDestIn(background[i], source[i], Numerics.Clamp(amount[i], 0, 1)); + for (int i = 0; i < destination.Length; i++) + { + destination[i] = PorterDuffFunctions.ScreenDestIn(background[i], source[i], Numerics.Clamp(amount[i], 0, 1F)); + } } } } @@ -2969,18 +8132,85 @@ internal static class DefaultPixelBlenders protected override void BlendFunction(Span destination, ReadOnlySpan background, ReadOnlySpan source, float amount) { amount = Numerics.Clamp(amount, 0, 1); - for (int i = 0; i < destination.Length; i++) + + if (Avx2.IsSupported && destination.Length >= 2) + { + // Divide by 2 as 4 elements per Vector4 and 8 per Vector256 + ref Vector256 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); + ref Vector256 destinationLast = ref Unsafe.Add(ref destinationBase, (IntPtr)((uint)destination.Length / 2u)); + + ref Vector256 backgroundBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(background)); + ref Vector256 sourceBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(source)); + Vector256 opacity = Vector256.Create(amount); + + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) + { + destinationBase = PorterDuffFunctions.DarkenDestIn(backgroundBase, sourceBase, opacity); + destinationBase = ref Unsafe.Add(ref destinationBase, 1); + backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); + sourceBase = ref Unsafe.Add(ref sourceBase, 1); + } + + if (Numerics.Modulo2(destination.Length) != 0) + { + // Vector4 fits neatly in pairs. Any overlap has to be equal to 1. + int i = destination.Length - 1; + destination[i] = PorterDuffFunctions.DarkenDestIn(background[i], source[i], amount); + } + } + else { - destination[i] = PorterDuffFunctions.DarkenDestIn(background[i], source[i], amount); + for (int i = 0; i < destination.Length; i++) + { + destination[i] = PorterDuffFunctions.DarkenDestIn(background[i], source[i], amount); + } } } /// protected override void BlendFunction(Span destination, ReadOnlySpan background, ReadOnlySpan source, ReadOnlySpan amount) { - for (int i = 0; i < destination.Length; i++) + if (Avx2.IsSupported && destination.Length >= 2) + { + // Divide by 2 as 4 elements per Vector4 and 8 per Vector256 + ref Vector256 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); + ref Vector256 destinationLast = ref Unsafe.Add(ref destinationBase, (IntPtr)((uint)destination.Length / 2u)); + + ref Vector256 backgroundBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(background)); + ref Vector256 sourceBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(source)); + ref float amountBase = ref MemoryMarshal.GetReference(amount); + + Vector256 vOne = Vector256.Create(1F); + + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) + { + // We need to create a Vector256 containing the current and next amount values + // taking up each half of the Vector256 and then clamp them. + Vector256 opacity = Vector256.Create( + Vector128.Create(amountBase), + Vector128.Create(Unsafe.Add(ref amountBase, 1))); + opacity = Avx.Min(Avx.Max(Vector256.Zero, opacity), vOne); + + destinationBase = PorterDuffFunctions.DarkenDestIn(backgroundBase, sourceBase, opacity); + destinationBase = ref Unsafe.Add(ref destinationBase, 1); + backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); + sourceBase = ref Unsafe.Add(ref sourceBase, 1); + amountBase = ref Unsafe.Add(ref amountBase, 2); + } + + if (Numerics.Modulo2(destination.Length) != 0) + { + // Vector4 fits neatly in pairs. Any overlap has to be equal to 1. + int i = destination.Length - 1; + destination[i] = PorterDuffFunctions.DarkenDestIn(background[i], source[i], Numerics.Clamp(amount[i], 0, 1F)); + } + } + else { - destination[i] = PorterDuffFunctions.DarkenDestIn(background[i], source[i], Numerics.Clamp(amount[i], 0, 1)); + for (int i = 0; i < destination.Length; i++) + { + destination[i] = PorterDuffFunctions.DarkenDestIn(background[i], source[i], Numerics.Clamp(amount[i], 0, 1F)); + } } } } @@ -3007,18 +8237,85 @@ internal static class DefaultPixelBlenders protected override void BlendFunction(Span destination, ReadOnlySpan background, ReadOnlySpan source, float amount) { amount = Numerics.Clamp(amount, 0, 1); - for (int i = 0; i < destination.Length; i++) + + if (Avx2.IsSupported && destination.Length >= 2) + { + // Divide by 2 as 4 elements per Vector4 and 8 per Vector256 + ref Vector256 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); + ref Vector256 destinationLast = ref Unsafe.Add(ref destinationBase, (IntPtr)((uint)destination.Length / 2u)); + + ref Vector256 backgroundBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(background)); + ref Vector256 sourceBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(source)); + Vector256 opacity = Vector256.Create(amount); + + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) + { + destinationBase = PorterDuffFunctions.LightenDestIn(backgroundBase, sourceBase, opacity); + destinationBase = ref Unsafe.Add(ref destinationBase, 1); + backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); + sourceBase = ref Unsafe.Add(ref sourceBase, 1); + } + + if (Numerics.Modulo2(destination.Length) != 0) + { + // Vector4 fits neatly in pairs. Any overlap has to be equal to 1. + int i = destination.Length - 1; + destination[i] = PorterDuffFunctions.LightenDestIn(background[i], source[i], amount); + } + } + else { - destination[i] = PorterDuffFunctions.LightenDestIn(background[i], source[i], amount); + for (int i = 0; i < destination.Length; i++) + { + destination[i] = PorterDuffFunctions.LightenDestIn(background[i], source[i], amount); + } } } /// protected override void BlendFunction(Span destination, ReadOnlySpan background, ReadOnlySpan source, ReadOnlySpan amount) { - for (int i = 0; i < destination.Length; i++) + if (Avx2.IsSupported && destination.Length >= 2) + { + // Divide by 2 as 4 elements per Vector4 and 8 per Vector256 + ref Vector256 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); + ref Vector256 destinationLast = ref Unsafe.Add(ref destinationBase, (IntPtr)((uint)destination.Length / 2u)); + + ref Vector256 backgroundBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(background)); + ref Vector256 sourceBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(source)); + ref float amountBase = ref MemoryMarshal.GetReference(amount); + + Vector256 vOne = Vector256.Create(1F); + + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) + { + // We need to create a Vector256 containing the current and next amount values + // taking up each half of the Vector256 and then clamp them. + Vector256 opacity = Vector256.Create( + Vector128.Create(amountBase), + Vector128.Create(Unsafe.Add(ref amountBase, 1))); + opacity = Avx.Min(Avx.Max(Vector256.Zero, opacity), vOne); + + destinationBase = PorterDuffFunctions.LightenDestIn(backgroundBase, sourceBase, opacity); + destinationBase = ref Unsafe.Add(ref destinationBase, 1); + backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); + sourceBase = ref Unsafe.Add(ref sourceBase, 1); + amountBase = ref Unsafe.Add(ref amountBase, 2); + } + + if (Numerics.Modulo2(destination.Length) != 0) + { + // Vector4 fits neatly in pairs. Any overlap has to be equal to 1. + int i = destination.Length - 1; + destination[i] = PorterDuffFunctions.LightenDestIn(background[i], source[i], Numerics.Clamp(amount[i], 0, 1F)); + } + } + else { - destination[i] = PorterDuffFunctions.LightenDestIn(background[i], source[i], Numerics.Clamp(amount[i], 0, 1)); + for (int i = 0; i < destination.Length; i++) + { + destination[i] = PorterDuffFunctions.LightenDestIn(background[i], source[i], Numerics.Clamp(amount[i], 0, 1F)); + } } } } @@ -3045,18 +8342,85 @@ internal static class DefaultPixelBlenders protected override void BlendFunction(Span destination, ReadOnlySpan background, ReadOnlySpan source, float amount) { amount = Numerics.Clamp(amount, 0, 1); - for (int i = 0; i < destination.Length; i++) + + if (Avx2.IsSupported && destination.Length >= 2) + { + // Divide by 2 as 4 elements per Vector4 and 8 per Vector256 + ref Vector256 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); + ref Vector256 destinationLast = ref Unsafe.Add(ref destinationBase, (IntPtr)((uint)destination.Length / 2u)); + + ref Vector256 backgroundBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(background)); + ref Vector256 sourceBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(source)); + Vector256 opacity = Vector256.Create(amount); + + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) + { + destinationBase = PorterDuffFunctions.OverlayDestIn(backgroundBase, sourceBase, opacity); + destinationBase = ref Unsafe.Add(ref destinationBase, 1); + backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); + sourceBase = ref Unsafe.Add(ref sourceBase, 1); + } + + if (Numerics.Modulo2(destination.Length) != 0) + { + // Vector4 fits neatly in pairs. Any overlap has to be equal to 1. + int i = destination.Length - 1; + destination[i] = PorterDuffFunctions.OverlayDestIn(background[i], source[i], amount); + } + } + else { - destination[i] = PorterDuffFunctions.OverlayDestIn(background[i], source[i], amount); + for (int i = 0; i < destination.Length; i++) + { + destination[i] = PorterDuffFunctions.OverlayDestIn(background[i], source[i], amount); + } } } /// protected override void BlendFunction(Span destination, ReadOnlySpan background, ReadOnlySpan source, ReadOnlySpan amount) { - for (int i = 0; i < destination.Length; i++) + if (Avx2.IsSupported && destination.Length >= 2) + { + // Divide by 2 as 4 elements per Vector4 and 8 per Vector256 + ref Vector256 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); + ref Vector256 destinationLast = ref Unsafe.Add(ref destinationBase, (IntPtr)((uint)destination.Length / 2u)); + + ref Vector256 backgroundBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(background)); + ref Vector256 sourceBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(source)); + ref float amountBase = ref MemoryMarshal.GetReference(amount); + + Vector256 vOne = Vector256.Create(1F); + + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) + { + // We need to create a Vector256 containing the current and next amount values + // taking up each half of the Vector256 and then clamp them. + Vector256 opacity = Vector256.Create( + Vector128.Create(amountBase), + Vector128.Create(Unsafe.Add(ref amountBase, 1))); + opacity = Avx.Min(Avx.Max(Vector256.Zero, opacity), vOne); + + destinationBase = PorterDuffFunctions.OverlayDestIn(backgroundBase, sourceBase, opacity); + destinationBase = ref Unsafe.Add(ref destinationBase, 1); + backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); + sourceBase = ref Unsafe.Add(ref sourceBase, 1); + amountBase = ref Unsafe.Add(ref amountBase, 2); + } + + if (Numerics.Modulo2(destination.Length) != 0) + { + // Vector4 fits neatly in pairs. Any overlap has to be equal to 1. + int i = destination.Length - 1; + destination[i] = PorterDuffFunctions.OverlayDestIn(background[i], source[i], Numerics.Clamp(amount[i], 0, 1F)); + } + } + else { - destination[i] = PorterDuffFunctions.OverlayDestIn(background[i], source[i], Numerics.Clamp(amount[i], 0, 1)); + for (int i = 0; i < destination.Length; i++) + { + destination[i] = PorterDuffFunctions.OverlayDestIn(background[i], source[i], Numerics.Clamp(amount[i], 0, 1F)); + } } } } @@ -3083,18 +8447,85 @@ internal static class DefaultPixelBlenders protected override void BlendFunction(Span destination, ReadOnlySpan background, ReadOnlySpan source, float amount) { amount = Numerics.Clamp(amount, 0, 1); - for (int i = 0; i < destination.Length; i++) + + if (Avx2.IsSupported && destination.Length >= 2) + { + // Divide by 2 as 4 elements per Vector4 and 8 per Vector256 + ref Vector256 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); + ref Vector256 destinationLast = ref Unsafe.Add(ref destinationBase, (IntPtr)((uint)destination.Length / 2u)); + + ref Vector256 backgroundBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(background)); + ref Vector256 sourceBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(source)); + Vector256 opacity = Vector256.Create(amount); + + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) + { + destinationBase = PorterDuffFunctions.HardLightDestIn(backgroundBase, sourceBase, opacity); + destinationBase = ref Unsafe.Add(ref destinationBase, 1); + backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); + sourceBase = ref Unsafe.Add(ref sourceBase, 1); + } + + if (Numerics.Modulo2(destination.Length) != 0) + { + // Vector4 fits neatly in pairs. Any overlap has to be equal to 1. + int i = destination.Length - 1; + destination[i] = PorterDuffFunctions.HardLightDestIn(background[i], source[i], amount); + } + } + else { - destination[i] = PorterDuffFunctions.HardLightDestIn(background[i], source[i], amount); + for (int i = 0; i < destination.Length; i++) + { + destination[i] = PorterDuffFunctions.HardLightDestIn(background[i], source[i], amount); + } } } /// protected override void BlendFunction(Span destination, ReadOnlySpan background, ReadOnlySpan source, ReadOnlySpan amount) { - for (int i = 0; i < destination.Length; i++) + if (Avx2.IsSupported && destination.Length >= 2) { - destination[i] = PorterDuffFunctions.HardLightDestIn(background[i], source[i], Numerics.Clamp(amount[i], 0, 1)); + // Divide by 2 as 4 elements per Vector4 and 8 per Vector256 + ref Vector256 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); + ref Vector256 destinationLast = ref Unsafe.Add(ref destinationBase, (IntPtr)((uint)destination.Length / 2u)); + + ref Vector256 backgroundBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(background)); + ref Vector256 sourceBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(source)); + ref float amountBase = ref MemoryMarshal.GetReference(amount); + + Vector256 vOne = Vector256.Create(1F); + + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) + { + // We need to create a Vector256 containing the current and next amount values + // taking up each half of the Vector256 and then clamp them. + Vector256 opacity = Vector256.Create( + Vector128.Create(amountBase), + Vector128.Create(Unsafe.Add(ref amountBase, 1))); + opacity = Avx.Min(Avx.Max(Vector256.Zero, opacity), vOne); + + destinationBase = PorterDuffFunctions.HardLightDestIn(backgroundBase, sourceBase, opacity); + destinationBase = ref Unsafe.Add(ref destinationBase, 1); + backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); + sourceBase = ref Unsafe.Add(ref sourceBase, 1); + amountBase = ref Unsafe.Add(ref amountBase, 2); + } + + if (Numerics.Modulo2(destination.Length) != 0) + { + // Vector4 fits neatly in pairs. Any overlap has to be equal to 1. + int i = destination.Length - 1; + destination[i] = PorterDuffFunctions.HardLightDestIn(background[i], source[i], Numerics.Clamp(amount[i], 0, 1F)); + } + } + else + { + for (int i = 0; i < destination.Length; i++) + { + destination[i] = PorterDuffFunctions.HardLightDestIn(background[i], source[i], Numerics.Clamp(amount[i], 0, 1F)); + } } } } @@ -3121,18 +8552,85 @@ internal static class DefaultPixelBlenders protected override void BlendFunction(Span destination, ReadOnlySpan background, ReadOnlySpan source, float amount) { amount = Numerics.Clamp(amount, 0, 1); - for (int i = 0; i < destination.Length; i++) + + if (Avx2.IsSupported && destination.Length >= 2) { - destination[i] = PorterDuffFunctions.NormalDestOut(background[i], source[i], amount); + // Divide by 2 as 4 elements per Vector4 and 8 per Vector256 + ref Vector256 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); + ref Vector256 destinationLast = ref Unsafe.Add(ref destinationBase, (IntPtr)((uint)destination.Length / 2u)); + + ref Vector256 backgroundBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(background)); + ref Vector256 sourceBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(source)); + Vector256 opacity = Vector256.Create(amount); + + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) + { + destinationBase = PorterDuffFunctions.NormalDestOut(backgroundBase, sourceBase, opacity); + destinationBase = ref Unsafe.Add(ref destinationBase, 1); + backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); + sourceBase = ref Unsafe.Add(ref sourceBase, 1); + } + + if (Numerics.Modulo2(destination.Length) != 0) + { + // Vector4 fits neatly in pairs. Any overlap has to be equal to 1. + int i = destination.Length - 1; + destination[i] = PorterDuffFunctions.NormalDestOut(background[i], source[i], amount); + } + } + else + { + for (int i = 0; i < destination.Length; i++) + { + destination[i] = PorterDuffFunctions.NormalDestOut(background[i], source[i], amount); + } } } /// protected override void BlendFunction(Span destination, ReadOnlySpan background, ReadOnlySpan source, ReadOnlySpan amount) { - for (int i = 0; i < destination.Length; i++) + if (Avx2.IsSupported && destination.Length >= 2) + { + // Divide by 2 as 4 elements per Vector4 and 8 per Vector256 + ref Vector256 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); + ref Vector256 destinationLast = ref Unsafe.Add(ref destinationBase, (IntPtr)((uint)destination.Length / 2u)); + + ref Vector256 backgroundBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(background)); + ref Vector256 sourceBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(source)); + ref float amountBase = ref MemoryMarshal.GetReference(amount); + + Vector256 vOne = Vector256.Create(1F); + + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) + { + // We need to create a Vector256 containing the current and next amount values + // taking up each half of the Vector256 and then clamp them. + Vector256 opacity = Vector256.Create( + Vector128.Create(amountBase), + Vector128.Create(Unsafe.Add(ref amountBase, 1))); + opacity = Avx.Min(Avx.Max(Vector256.Zero, opacity), vOne); + + destinationBase = PorterDuffFunctions.NormalDestOut(backgroundBase, sourceBase, opacity); + destinationBase = ref Unsafe.Add(ref destinationBase, 1); + backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); + sourceBase = ref Unsafe.Add(ref sourceBase, 1); + amountBase = ref Unsafe.Add(ref amountBase, 2); + } + + if (Numerics.Modulo2(destination.Length) != 0) + { + // Vector4 fits neatly in pairs. Any overlap has to be equal to 1. + int i = destination.Length - 1; + destination[i] = PorterDuffFunctions.NormalDestOut(background[i], source[i], Numerics.Clamp(amount[i], 0, 1F)); + } + } + else { - destination[i] = PorterDuffFunctions.NormalDestOut(background[i], source[i], Numerics.Clamp(amount[i], 0, 1)); + for (int i = 0; i < destination.Length; i++) + { + destination[i] = PorterDuffFunctions.NormalDestOut(background[i], source[i], Numerics.Clamp(amount[i], 0, 1F)); + } } } } @@ -3159,18 +8657,85 @@ internal static class DefaultPixelBlenders protected override void BlendFunction(Span destination, ReadOnlySpan background, ReadOnlySpan source, float amount) { amount = Numerics.Clamp(amount, 0, 1); - for (int i = 0; i < destination.Length; i++) + + if (Avx2.IsSupported && destination.Length >= 2) + { + // Divide by 2 as 4 elements per Vector4 and 8 per Vector256 + ref Vector256 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); + ref Vector256 destinationLast = ref Unsafe.Add(ref destinationBase, (IntPtr)((uint)destination.Length / 2u)); + + ref Vector256 backgroundBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(background)); + ref Vector256 sourceBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(source)); + Vector256 opacity = Vector256.Create(amount); + + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) + { + destinationBase = PorterDuffFunctions.MultiplyDestOut(backgroundBase, sourceBase, opacity); + destinationBase = ref Unsafe.Add(ref destinationBase, 1); + backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); + sourceBase = ref Unsafe.Add(ref sourceBase, 1); + } + + if (Numerics.Modulo2(destination.Length) != 0) + { + // Vector4 fits neatly in pairs. Any overlap has to be equal to 1. + int i = destination.Length - 1; + destination[i] = PorterDuffFunctions.MultiplyDestOut(background[i], source[i], amount); + } + } + else { - destination[i] = PorterDuffFunctions.MultiplyDestOut(background[i], source[i], amount); + for (int i = 0; i < destination.Length; i++) + { + destination[i] = PorterDuffFunctions.MultiplyDestOut(background[i], source[i], amount); + } } } /// protected override void BlendFunction(Span destination, ReadOnlySpan background, ReadOnlySpan source, ReadOnlySpan amount) { - for (int i = 0; i < destination.Length; i++) + if (Avx2.IsSupported && destination.Length >= 2) + { + // Divide by 2 as 4 elements per Vector4 and 8 per Vector256 + ref Vector256 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); + ref Vector256 destinationLast = ref Unsafe.Add(ref destinationBase, (IntPtr)((uint)destination.Length / 2u)); + + ref Vector256 backgroundBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(background)); + ref Vector256 sourceBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(source)); + ref float amountBase = ref MemoryMarshal.GetReference(amount); + + Vector256 vOne = Vector256.Create(1F); + + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) + { + // We need to create a Vector256 containing the current and next amount values + // taking up each half of the Vector256 and then clamp them. + Vector256 opacity = Vector256.Create( + Vector128.Create(amountBase), + Vector128.Create(Unsafe.Add(ref amountBase, 1))); + opacity = Avx.Min(Avx.Max(Vector256.Zero, opacity), vOne); + + destinationBase = PorterDuffFunctions.MultiplyDestOut(backgroundBase, sourceBase, opacity); + destinationBase = ref Unsafe.Add(ref destinationBase, 1); + backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); + sourceBase = ref Unsafe.Add(ref sourceBase, 1); + amountBase = ref Unsafe.Add(ref amountBase, 2); + } + + if (Numerics.Modulo2(destination.Length) != 0) + { + // Vector4 fits neatly in pairs. Any overlap has to be equal to 1. + int i = destination.Length - 1; + destination[i] = PorterDuffFunctions.MultiplyDestOut(background[i], source[i], Numerics.Clamp(amount[i], 0, 1F)); + } + } + else { - destination[i] = PorterDuffFunctions.MultiplyDestOut(background[i], source[i], Numerics.Clamp(amount[i], 0, 1)); + for (int i = 0; i < destination.Length; i++) + { + destination[i] = PorterDuffFunctions.MultiplyDestOut(background[i], source[i], Numerics.Clamp(amount[i], 0, 1F)); + } } } } @@ -3197,18 +8762,85 @@ internal static class DefaultPixelBlenders protected override void BlendFunction(Span destination, ReadOnlySpan background, ReadOnlySpan source, float amount) { amount = Numerics.Clamp(amount, 0, 1); - for (int i = 0; i < destination.Length; i++) + + if (Avx2.IsSupported && destination.Length >= 2) + { + // Divide by 2 as 4 elements per Vector4 and 8 per Vector256 + ref Vector256 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); + ref Vector256 destinationLast = ref Unsafe.Add(ref destinationBase, (IntPtr)((uint)destination.Length / 2u)); + + ref Vector256 backgroundBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(background)); + ref Vector256 sourceBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(source)); + Vector256 opacity = Vector256.Create(amount); + + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) + { + destinationBase = PorterDuffFunctions.AddDestOut(backgroundBase, sourceBase, opacity); + destinationBase = ref Unsafe.Add(ref destinationBase, 1); + backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); + sourceBase = ref Unsafe.Add(ref sourceBase, 1); + } + + if (Numerics.Modulo2(destination.Length) != 0) + { + // Vector4 fits neatly in pairs. Any overlap has to be equal to 1. + int i = destination.Length - 1; + destination[i] = PorterDuffFunctions.AddDestOut(background[i], source[i], amount); + } + } + else { - destination[i] = PorterDuffFunctions.AddDestOut(background[i], source[i], amount); + for (int i = 0; i < destination.Length; i++) + { + destination[i] = PorterDuffFunctions.AddDestOut(background[i], source[i], amount); + } } } /// protected override void BlendFunction(Span destination, ReadOnlySpan background, ReadOnlySpan source, ReadOnlySpan amount) { - for (int i = 0; i < destination.Length; i++) + if (Avx2.IsSupported && destination.Length >= 2) + { + // Divide by 2 as 4 elements per Vector4 and 8 per Vector256 + ref Vector256 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); + ref Vector256 destinationLast = ref Unsafe.Add(ref destinationBase, (IntPtr)((uint)destination.Length / 2u)); + + ref Vector256 backgroundBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(background)); + ref Vector256 sourceBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(source)); + ref float amountBase = ref MemoryMarshal.GetReference(amount); + + Vector256 vOne = Vector256.Create(1F); + + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) + { + // We need to create a Vector256 containing the current and next amount values + // taking up each half of the Vector256 and then clamp them. + Vector256 opacity = Vector256.Create( + Vector128.Create(amountBase), + Vector128.Create(Unsafe.Add(ref amountBase, 1))); + opacity = Avx.Min(Avx.Max(Vector256.Zero, opacity), vOne); + + destinationBase = PorterDuffFunctions.AddDestOut(backgroundBase, sourceBase, opacity); + destinationBase = ref Unsafe.Add(ref destinationBase, 1); + backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); + sourceBase = ref Unsafe.Add(ref sourceBase, 1); + amountBase = ref Unsafe.Add(ref amountBase, 2); + } + + if (Numerics.Modulo2(destination.Length) != 0) + { + // Vector4 fits neatly in pairs. Any overlap has to be equal to 1. + int i = destination.Length - 1; + destination[i] = PorterDuffFunctions.AddDestOut(background[i], source[i], Numerics.Clamp(amount[i], 0, 1F)); + } + } + else { - destination[i] = PorterDuffFunctions.AddDestOut(background[i], source[i], Numerics.Clamp(amount[i], 0, 1)); + for (int i = 0; i < destination.Length; i++) + { + destination[i] = PorterDuffFunctions.AddDestOut(background[i], source[i], Numerics.Clamp(amount[i], 0, 1F)); + } } } } @@ -3235,18 +8867,85 @@ internal static class DefaultPixelBlenders protected override void BlendFunction(Span destination, ReadOnlySpan background, ReadOnlySpan source, float amount) { amount = Numerics.Clamp(amount, 0, 1); - for (int i = 0; i < destination.Length; i++) + + if (Avx2.IsSupported && destination.Length >= 2) + { + // Divide by 2 as 4 elements per Vector4 and 8 per Vector256 + ref Vector256 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); + ref Vector256 destinationLast = ref Unsafe.Add(ref destinationBase, (IntPtr)((uint)destination.Length / 2u)); + + ref Vector256 backgroundBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(background)); + ref Vector256 sourceBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(source)); + Vector256 opacity = Vector256.Create(amount); + + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) + { + destinationBase = PorterDuffFunctions.SubtractDestOut(backgroundBase, sourceBase, opacity); + destinationBase = ref Unsafe.Add(ref destinationBase, 1); + backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); + sourceBase = ref Unsafe.Add(ref sourceBase, 1); + } + + if (Numerics.Modulo2(destination.Length) != 0) + { + // Vector4 fits neatly in pairs. Any overlap has to be equal to 1. + int i = destination.Length - 1; + destination[i] = PorterDuffFunctions.SubtractDestOut(background[i], source[i], amount); + } + } + else { - destination[i] = PorterDuffFunctions.SubtractDestOut(background[i], source[i], amount); + for (int i = 0; i < destination.Length; i++) + { + destination[i] = PorterDuffFunctions.SubtractDestOut(background[i], source[i], amount); + } } } /// protected override void BlendFunction(Span destination, ReadOnlySpan background, ReadOnlySpan source, ReadOnlySpan amount) { - for (int i = 0; i < destination.Length; i++) + if (Avx2.IsSupported && destination.Length >= 2) + { + // Divide by 2 as 4 elements per Vector4 and 8 per Vector256 + ref Vector256 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); + ref Vector256 destinationLast = ref Unsafe.Add(ref destinationBase, (IntPtr)((uint)destination.Length / 2u)); + + ref Vector256 backgroundBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(background)); + ref Vector256 sourceBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(source)); + ref float amountBase = ref MemoryMarshal.GetReference(amount); + + Vector256 vOne = Vector256.Create(1F); + + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) + { + // We need to create a Vector256 containing the current and next amount values + // taking up each half of the Vector256 and then clamp them. + Vector256 opacity = Vector256.Create( + Vector128.Create(amountBase), + Vector128.Create(Unsafe.Add(ref amountBase, 1))); + opacity = Avx.Min(Avx.Max(Vector256.Zero, opacity), vOne); + + destinationBase = PorterDuffFunctions.SubtractDestOut(backgroundBase, sourceBase, opacity); + destinationBase = ref Unsafe.Add(ref destinationBase, 1); + backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); + sourceBase = ref Unsafe.Add(ref sourceBase, 1); + amountBase = ref Unsafe.Add(ref amountBase, 2); + } + + if (Numerics.Modulo2(destination.Length) != 0) + { + // Vector4 fits neatly in pairs. Any overlap has to be equal to 1. + int i = destination.Length - 1; + destination[i] = PorterDuffFunctions.SubtractDestOut(background[i], source[i], Numerics.Clamp(amount[i], 0, 1F)); + } + } + else { - destination[i] = PorterDuffFunctions.SubtractDestOut(background[i], source[i], Numerics.Clamp(amount[i], 0, 1)); + for (int i = 0; i < destination.Length; i++) + { + destination[i] = PorterDuffFunctions.SubtractDestOut(background[i], source[i], Numerics.Clamp(amount[i], 0, 1F)); + } } } } @@ -3273,18 +8972,85 @@ internal static class DefaultPixelBlenders protected override void BlendFunction(Span destination, ReadOnlySpan background, ReadOnlySpan source, float amount) { amount = Numerics.Clamp(amount, 0, 1); - for (int i = 0; i < destination.Length; i++) + + if (Avx2.IsSupported && destination.Length >= 2) + { + // Divide by 2 as 4 elements per Vector4 and 8 per Vector256 + ref Vector256 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); + ref Vector256 destinationLast = ref Unsafe.Add(ref destinationBase, (IntPtr)((uint)destination.Length / 2u)); + + ref Vector256 backgroundBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(background)); + ref Vector256 sourceBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(source)); + Vector256 opacity = Vector256.Create(amount); + + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) + { + destinationBase = PorterDuffFunctions.ScreenDestOut(backgroundBase, sourceBase, opacity); + destinationBase = ref Unsafe.Add(ref destinationBase, 1); + backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); + sourceBase = ref Unsafe.Add(ref sourceBase, 1); + } + + if (Numerics.Modulo2(destination.Length) != 0) + { + // Vector4 fits neatly in pairs. Any overlap has to be equal to 1. + int i = destination.Length - 1; + destination[i] = PorterDuffFunctions.ScreenDestOut(background[i], source[i], amount); + } + } + else { - destination[i] = PorterDuffFunctions.ScreenDestOut(background[i], source[i], amount); + for (int i = 0; i < destination.Length; i++) + { + destination[i] = PorterDuffFunctions.ScreenDestOut(background[i], source[i], amount); + } } } /// protected override void BlendFunction(Span destination, ReadOnlySpan background, ReadOnlySpan source, ReadOnlySpan amount) { - for (int i = 0; i < destination.Length; i++) + if (Avx2.IsSupported && destination.Length >= 2) + { + // Divide by 2 as 4 elements per Vector4 and 8 per Vector256 + ref Vector256 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); + ref Vector256 destinationLast = ref Unsafe.Add(ref destinationBase, (IntPtr)((uint)destination.Length / 2u)); + + ref Vector256 backgroundBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(background)); + ref Vector256 sourceBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(source)); + ref float amountBase = ref MemoryMarshal.GetReference(amount); + + Vector256 vOne = Vector256.Create(1F); + + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) + { + // We need to create a Vector256 containing the current and next amount values + // taking up each half of the Vector256 and then clamp them. + Vector256 opacity = Vector256.Create( + Vector128.Create(amountBase), + Vector128.Create(Unsafe.Add(ref amountBase, 1))); + opacity = Avx.Min(Avx.Max(Vector256.Zero, opacity), vOne); + + destinationBase = PorterDuffFunctions.ScreenDestOut(backgroundBase, sourceBase, opacity); + destinationBase = ref Unsafe.Add(ref destinationBase, 1); + backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); + sourceBase = ref Unsafe.Add(ref sourceBase, 1); + amountBase = ref Unsafe.Add(ref amountBase, 2); + } + + if (Numerics.Modulo2(destination.Length) != 0) + { + // Vector4 fits neatly in pairs. Any overlap has to be equal to 1. + int i = destination.Length - 1; + destination[i] = PorterDuffFunctions.ScreenDestOut(background[i], source[i], Numerics.Clamp(amount[i], 0, 1F)); + } + } + else { - destination[i] = PorterDuffFunctions.ScreenDestOut(background[i], source[i], Numerics.Clamp(amount[i], 0, 1)); + for (int i = 0; i < destination.Length; i++) + { + destination[i] = PorterDuffFunctions.ScreenDestOut(background[i], source[i], Numerics.Clamp(amount[i], 0, 1F)); + } } } } @@ -3311,18 +9077,85 @@ internal static class DefaultPixelBlenders protected override void BlendFunction(Span destination, ReadOnlySpan background, ReadOnlySpan source, float amount) { amount = Numerics.Clamp(amount, 0, 1); - for (int i = 0; i < destination.Length; i++) + + if (Avx2.IsSupported && destination.Length >= 2) + { + // Divide by 2 as 4 elements per Vector4 and 8 per Vector256 + ref Vector256 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); + ref Vector256 destinationLast = ref Unsafe.Add(ref destinationBase, (IntPtr)((uint)destination.Length / 2u)); + + ref Vector256 backgroundBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(background)); + ref Vector256 sourceBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(source)); + Vector256 opacity = Vector256.Create(amount); + + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) + { + destinationBase = PorterDuffFunctions.DarkenDestOut(backgroundBase, sourceBase, opacity); + destinationBase = ref Unsafe.Add(ref destinationBase, 1); + backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); + sourceBase = ref Unsafe.Add(ref sourceBase, 1); + } + + if (Numerics.Modulo2(destination.Length) != 0) + { + // Vector4 fits neatly in pairs. Any overlap has to be equal to 1. + int i = destination.Length - 1; + destination[i] = PorterDuffFunctions.DarkenDestOut(background[i], source[i], amount); + } + } + else { - destination[i] = PorterDuffFunctions.DarkenDestOut(background[i], source[i], amount); + for (int i = 0; i < destination.Length; i++) + { + destination[i] = PorterDuffFunctions.DarkenDestOut(background[i], source[i], amount); + } } } /// protected override void BlendFunction(Span destination, ReadOnlySpan background, ReadOnlySpan source, ReadOnlySpan amount) { - for (int i = 0; i < destination.Length; i++) + if (Avx2.IsSupported && destination.Length >= 2) + { + // Divide by 2 as 4 elements per Vector4 and 8 per Vector256 + ref Vector256 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); + ref Vector256 destinationLast = ref Unsafe.Add(ref destinationBase, (IntPtr)((uint)destination.Length / 2u)); + + ref Vector256 backgroundBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(background)); + ref Vector256 sourceBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(source)); + ref float amountBase = ref MemoryMarshal.GetReference(amount); + + Vector256 vOne = Vector256.Create(1F); + + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) + { + // We need to create a Vector256 containing the current and next amount values + // taking up each half of the Vector256 and then clamp them. + Vector256 opacity = Vector256.Create( + Vector128.Create(amountBase), + Vector128.Create(Unsafe.Add(ref amountBase, 1))); + opacity = Avx.Min(Avx.Max(Vector256.Zero, opacity), vOne); + + destinationBase = PorterDuffFunctions.DarkenDestOut(backgroundBase, sourceBase, opacity); + destinationBase = ref Unsafe.Add(ref destinationBase, 1); + backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); + sourceBase = ref Unsafe.Add(ref sourceBase, 1); + amountBase = ref Unsafe.Add(ref amountBase, 2); + } + + if (Numerics.Modulo2(destination.Length) != 0) + { + // Vector4 fits neatly in pairs. Any overlap has to be equal to 1. + int i = destination.Length - 1; + destination[i] = PorterDuffFunctions.DarkenDestOut(background[i], source[i], Numerics.Clamp(amount[i], 0, 1F)); + } + } + else { - destination[i] = PorterDuffFunctions.DarkenDestOut(background[i], source[i], Numerics.Clamp(amount[i], 0, 1)); + for (int i = 0; i < destination.Length; i++) + { + destination[i] = PorterDuffFunctions.DarkenDestOut(background[i], source[i], Numerics.Clamp(amount[i], 0, 1F)); + } } } } @@ -3349,18 +9182,85 @@ internal static class DefaultPixelBlenders protected override void BlendFunction(Span destination, ReadOnlySpan background, ReadOnlySpan source, float amount) { amount = Numerics.Clamp(amount, 0, 1); - for (int i = 0; i < destination.Length; i++) + + if (Avx2.IsSupported && destination.Length >= 2) + { + // Divide by 2 as 4 elements per Vector4 and 8 per Vector256 + ref Vector256 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); + ref Vector256 destinationLast = ref Unsafe.Add(ref destinationBase, (IntPtr)((uint)destination.Length / 2u)); + + ref Vector256 backgroundBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(background)); + ref Vector256 sourceBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(source)); + Vector256 opacity = Vector256.Create(amount); + + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) + { + destinationBase = PorterDuffFunctions.LightenDestOut(backgroundBase, sourceBase, opacity); + destinationBase = ref Unsafe.Add(ref destinationBase, 1); + backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); + sourceBase = ref Unsafe.Add(ref sourceBase, 1); + } + + if (Numerics.Modulo2(destination.Length) != 0) + { + // Vector4 fits neatly in pairs. Any overlap has to be equal to 1. + int i = destination.Length - 1; + destination[i] = PorterDuffFunctions.LightenDestOut(background[i], source[i], amount); + } + } + else { - destination[i] = PorterDuffFunctions.LightenDestOut(background[i], source[i], amount); + for (int i = 0; i < destination.Length; i++) + { + destination[i] = PorterDuffFunctions.LightenDestOut(background[i], source[i], amount); + } } } /// protected override void BlendFunction(Span destination, ReadOnlySpan background, ReadOnlySpan source, ReadOnlySpan amount) { - for (int i = 0; i < destination.Length; i++) + if (Avx2.IsSupported && destination.Length >= 2) + { + // Divide by 2 as 4 elements per Vector4 and 8 per Vector256 + ref Vector256 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); + ref Vector256 destinationLast = ref Unsafe.Add(ref destinationBase, (IntPtr)((uint)destination.Length / 2u)); + + ref Vector256 backgroundBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(background)); + ref Vector256 sourceBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(source)); + ref float amountBase = ref MemoryMarshal.GetReference(amount); + + Vector256 vOne = Vector256.Create(1F); + + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) + { + // We need to create a Vector256 containing the current and next amount values + // taking up each half of the Vector256 and then clamp them. + Vector256 opacity = Vector256.Create( + Vector128.Create(amountBase), + Vector128.Create(Unsafe.Add(ref amountBase, 1))); + opacity = Avx.Min(Avx.Max(Vector256.Zero, opacity), vOne); + + destinationBase = PorterDuffFunctions.LightenDestOut(backgroundBase, sourceBase, opacity); + destinationBase = ref Unsafe.Add(ref destinationBase, 1); + backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); + sourceBase = ref Unsafe.Add(ref sourceBase, 1); + amountBase = ref Unsafe.Add(ref amountBase, 2); + } + + if (Numerics.Modulo2(destination.Length) != 0) + { + // Vector4 fits neatly in pairs. Any overlap has to be equal to 1. + int i = destination.Length - 1; + destination[i] = PorterDuffFunctions.LightenDestOut(background[i], source[i], Numerics.Clamp(amount[i], 0, 1F)); + } + } + else { - destination[i] = PorterDuffFunctions.LightenDestOut(background[i], source[i], Numerics.Clamp(amount[i], 0, 1)); + for (int i = 0; i < destination.Length; i++) + { + destination[i] = PorterDuffFunctions.LightenDestOut(background[i], source[i], Numerics.Clamp(amount[i], 0, 1F)); + } } } } @@ -3387,18 +9287,85 @@ internal static class DefaultPixelBlenders protected override void BlendFunction(Span destination, ReadOnlySpan background, ReadOnlySpan source, float amount) { amount = Numerics.Clamp(amount, 0, 1); - for (int i = 0; i < destination.Length; i++) + + if (Avx2.IsSupported && destination.Length >= 2) + { + // Divide by 2 as 4 elements per Vector4 and 8 per Vector256 + ref Vector256 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); + ref Vector256 destinationLast = ref Unsafe.Add(ref destinationBase, (IntPtr)((uint)destination.Length / 2u)); + + ref Vector256 backgroundBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(background)); + ref Vector256 sourceBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(source)); + Vector256 opacity = Vector256.Create(amount); + + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) + { + destinationBase = PorterDuffFunctions.OverlayDestOut(backgroundBase, sourceBase, opacity); + destinationBase = ref Unsafe.Add(ref destinationBase, 1); + backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); + sourceBase = ref Unsafe.Add(ref sourceBase, 1); + } + + if (Numerics.Modulo2(destination.Length) != 0) + { + // Vector4 fits neatly in pairs. Any overlap has to be equal to 1. + int i = destination.Length - 1; + destination[i] = PorterDuffFunctions.OverlayDestOut(background[i], source[i], amount); + } + } + else { - destination[i] = PorterDuffFunctions.OverlayDestOut(background[i], source[i], amount); + for (int i = 0; i < destination.Length; i++) + { + destination[i] = PorterDuffFunctions.OverlayDestOut(background[i], source[i], amount); + } } } /// protected override void BlendFunction(Span destination, ReadOnlySpan background, ReadOnlySpan source, ReadOnlySpan amount) { - for (int i = 0; i < destination.Length; i++) + if (Avx2.IsSupported && destination.Length >= 2) { - destination[i] = PorterDuffFunctions.OverlayDestOut(background[i], source[i], Numerics.Clamp(amount[i], 0, 1)); + // Divide by 2 as 4 elements per Vector4 and 8 per Vector256 + ref Vector256 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); + ref Vector256 destinationLast = ref Unsafe.Add(ref destinationBase, (IntPtr)((uint)destination.Length / 2u)); + + ref Vector256 backgroundBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(background)); + ref Vector256 sourceBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(source)); + ref float amountBase = ref MemoryMarshal.GetReference(amount); + + Vector256 vOne = Vector256.Create(1F); + + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) + { + // We need to create a Vector256 containing the current and next amount values + // taking up each half of the Vector256 and then clamp them. + Vector256 opacity = Vector256.Create( + Vector128.Create(amountBase), + Vector128.Create(Unsafe.Add(ref amountBase, 1))); + opacity = Avx.Min(Avx.Max(Vector256.Zero, opacity), vOne); + + destinationBase = PorterDuffFunctions.OverlayDestOut(backgroundBase, sourceBase, opacity); + destinationBase = ref Unsafe.Add(ref destinationBase, 1); + backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); + sourceBase = ref Unsafe.Add(ref sourceBase, 1); + amountBase = ref Unsafe.Add(ref amountBase, 2); + } + + if (Numerics.Modulo2(destination.Length) != 0) + { + // Vector4 fits neatly in pairs. Any overlap has to be equal to 1. + int i = destination.Length - 1; + destination[i] = PorterDuffFunctions.OverlayDestOut(background[i], source[i], Numerics.Clamp(amount[i], 0, 1F)); + } + } + else + { + for (int i = 0; i < destination.Length; i++) + { + destination[i] = PorterDuffFunctions.OverlayDestOut(background[i], source[i], Numerics.Clamp(amount[i], 0, 1F)); + } } } } @@ -3425,18 +9392,85 @@ internal static class DefaultPixelBlenders protected override void BlendFunction(Span destination, ReadOnlySpan background, ReadOnlySpan source, float amount) { amount = Numerics.Clamp(amount, 0, 1); - for (int i = 0; i < destination.Length; i++) + + if (Avx2.IsSupported && destination.Length >= 2) { - destination[i] = PorterDuffFunctions.HardLightDestOut(background[i], source[i], amount); + // Divide by 2 as 4 elements per Vector4 and 8 per Vector256 + ref Vector256 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); + ref Vector256 destinationLast = ref Unsafe.Add(ref destinationBase, (IntPtr)((uint)destination.Length / 2u)); + + ref Vector256 backgroundBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(background)); + ref Vector256 sourceBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(source)); + Vector256 opacity = Vector256.Create(amount); + + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) + { + destinationBase = PorterDuffFunctions.HardLightDestOut(backgroundBase, sourceBase, opacity); + destinationBase = ref Unsafe.Add(ref destinationBase, 1); + backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); + sourceBase = ref Unsafe.Add(ref sourceBase, 1); + } + + if (Numerics.Modulo2(destination.Length) != 0) + { + // Vector4 fits neatly in pairs. Any overlap has to be equal to 1. + int i = destination.Length - 1; + destination[i] = PorterDuffFunctions.HardLightDestOut(background[i], source[i], amount); + } + } + else + { + for (int i = 0; i < destination.Length; i++) + { + destination[i] = PorterDuffFunctions.HardLightDestOut(background[i], source[i], amount); + } } } /// protected override void BlendFunction(Span destination, ReadOnlySpan background, ReadOnlySpan source, ReadOnlySpan amount) { - for (int i = 0; i < destination.Length; i++) + if (Avx2.IsSupported && destination.Length >= 2) + { + // Divide by 2 as 4 elements per Vector4 and 8 per Vector256 + ref Vector256 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); + ref Vector256 destinationLast = ref Unsafe.Add(ref destinationBase, (IntPtr)((uint)destination.Length / 2u)); + + ref Vector256 backgroundBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(background)); + ref Vector256 sourceBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(source)); + ref float amountBase = ref MemoryMarshal.GetReference(amount); + + Vector256 vOne = Vector256.Create(1F); + + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) + { + // We need to create a Vector256 containing the current and next amount values + // taking up each half of the Vector256 and then clamp them. + Vector256 opacity = Vector256.Create( + Vector128.Create(amountBase), + Vector128.Create(Unsafe.Add(ref amountBase, 1))); + opacity = Avx.Min(Avx.Max(Vector256.Zero, opacity), vOne); + + destinationBase = PorterDuffFunctions.HardLightDestOut(backgroundBase, sourceBase, opacity); + destinationBase = ref Unsafe.Add(ref destinationBase, 1); + backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); + sourceBase = ref Unsafe.Add(ref sourceBase, 1); + amountBase = ref Unsafe.Add(ref amountBase, 2); + } + + if (Numerics.Modulo2(destination.Length) != 0) + { + // Vector4 fits neatly in pairs. Any overlap has to be equal to 1. + int i = destination.Length - 1; + destination[i] = PorterDuffFunctions.HardLightDestOut(background[i], source[i], Numerics.Clamp(amount[i], 0, 1F)); + } + } + else { - destination[i] = PorterDuffFunctions.HardLightDestOut(background[i], source[i], Numerics.Clamp(amount[i], 0, 1)); + for (int i = 0; i < destination.Length; i++) + { + destination[i] = PorterDuffFunctions.HardLightDestOut(background[i], source[i], Numerics.Clamp(amount[i], 0, 1F)); + } } } } @@ -3463,18 +9497,85 @@ internal static class DefaultPixelBlenders protected override void BlendFunction(Span destination, ReadOnlySpan background, ReadOnlySpan source, float amount) { amount = Numerics.Clamp(amount, 0, 1); - for (int i = 0; i < destination.Length; i++) + + if (Avx2.IsSupported && destination.Length >= 2) + { + // Divide by 2 as 4 elements per Vector4 and 8 per Vector256 + ref Vector256 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); + ref Vector256 destinationLast = ref Unsafe.Add(ref destinationBase, (IntPtr)((uint)destination.Length / 2u)); + + ref Vector256 backgroundBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(background)); + ref Vector256 sourceBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(source)); + Vector256 opacity = Vector256.Create(amount); + + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) + { + destinationBase = PorterDuffFunctions.NormalClear(backgroundBase, sourceBase, opacity); + destinationBase = ref Unsafe.Add(ref destinationBase, 1); + backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); + sourceBase = ref Unsafe.Add(ref sourceBase, 1); + } + + if (Numerics.Modulo2(destination.Length) != 0) + { + // Vector4 fits neatly in pairs. Any overlap has to be equal to 1. + int i = destination.Length - 1; + destination[i] = PorterDuffFunctions.NormalClear(background[i], source[i], amount); + } + } + else { - destination[i] = PorterDuffFunctions.NormalClear(background[i], source[i], amount); + for (int i = 0; i < destination.Length; i++) + { + destination[i] = PorterDuffFunctions.NormalClear(background[i], source[i], amount); + } } } /// protected override void BlendFunction(Span destination, ReadOnlySpan background, ReadOnlySpan source, ReadOnlySpan amount) { - for (int i = 0; i < destination.Length; i++) + if (Avx2.IsSupported && destination.Length >= 2) + { + // Divide by 2 as 4 elements per Vector4 and 8 per Vector256 + ref Vector256 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); + ref Vector256 destinationLast = ref Unsafe.Add(ref destinationBase, (IntPtr)((uint)destination.Length / 2u)); + + ref Vector256 backgroundBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(background)); + ref Vector256 sourceBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(source)); + ref float amountBase = ref MemoryMarshal.GetReference(amount); + + Vector256 vOne = Vector256.Create(1F); + + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) + { + // We need to create a Vector256 containing the current and next amount values + // taking up each half of the Vector256 and then clamp them. + Vector256 opacity = Vector256.Create( + Vector128.Create(amountBase), + Vector128.Create(Unsafe.Add(ref amountBase, 1))); + opacity = Avx.Min(Avx.Max(Vector256.Zero, opacity), vOne); + + destinationBase = PorterDuffFunctions.NormalClear(backgroundBase, sourceBase, opacity); + destinationBase = ref Unsafe.Add(ref destinationBase, 1); + backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); + sourceBase = ref Unsafe.Add(ref sourceBase, 1); + amountBase = ref Unsafe.Add(ref amountBase, 2); + } + + if (Numerics.Modulo2(destination.Length) != 0) + { + // Vector4 fits neatly in pairs. Any overlap has to be equal to 1. + int i = destination.Length - 1; + destination[i] = PorterDuffFunctions.NormalClear(background[i], source[i], Numerics.Clamp(amount[i], 0, 1F)); + } + } + else { - destination[i] = PorterDuffFunctions.NormalClear(background[i], source[i], Numerics.Clamp(amount[i], 0, 1)); + for (int i = 0; i < destination.Length; i++) + { + destination[i] = PorterDuffFunctions.NormalClear(background[i], source[i], Numerics.Clamp(amount[i], 0, 1F)); + } } } } @@ -3501,18 +9602,85 @@ internal static class DefaultPixelBlenders protected override void BlendFunction(Span destination, ReadOnlySpan background, ReadOnlySpan source, float amount) { amount = Numerics.Clamp(amount, 0, 1); - for (int i = 0; i < destination.Length; i++) + + if (Avx2.IsSupported && destination.Length >= 2) + { + // Divide by 2 as 4 elements per Vector4 and 8 per Vector256 + ref Vector256 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); + ref Vector256 destinationLast = ref Unsafe.Add(ref destinationBase, (IntPtr)((uint)destination.Length / 2u)); + + ref Vector256 backgroundBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(background)); + ref Vector256 sourceBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(source)); + Vector256 opacity = Vector256.Create(amount); + + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) + { + destinationBase = PorterDuffFunctions.MultiplyClear(backgroundBase, sourceBase, opacity); + destinationBase = ref Unsafe.Add(ref destinationBase, 1); + backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); + sourceBase = ref Unsafe.Add(ref sourceBase, 1); + } + + if (Numerics.Modulo2(destination.Length) != 0) + { + // Vector4 fits neatly in pairs. Any overlap has to be equal to 1. + int i = destination.Length - 1; + destination[i] = PorterDuffFunctions.MultiplyClear(background[i], source[i], amount); + } + } + else { - destination[i] = PorterDuffFunctions.MultiplyClear(background[i], source[i], amount); + for (int i = 0; i < destination.Length; i++) + { + destination[i] = PorterDuffFunctions.MultiplyClear(background[i], source[i], amount); + } } } /// protected override void BlendFunction(Span destination, ReadOnlySpan background, ReadOnlySpan source, ReadOnlySpan amount) { - for (int i = 0; i < destination.Length; i++) + if (Avx2.IsSupported && destination.Length >= 2) + { + // Divide by 2 as 4 elements per Vector4 and 8 per Vector256 + ref Vector256 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); + ref Vector256 destinationLast = ref Unsafe.Add(ref destinationBase, (IntPtr)((uint)destination.Length / 2u)); + + ref Vector256 backgroundBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(background)); + ref Vector256 sourceBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(source)); + ref float amountBase = ref MemoryMarshal.GetReference(amount); + + Vector256 vOne = Vector256.Create(1F); + + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) + { + // We need to create a Vector256 containing the current and next amount values + // taking up each half of the Vector256 and then clamp them. + Vector256 opacity = Vector256.Create( + Vector128.Create(amountBase), + Vector128.Create(Unsafe.Add(ref amountBase, 1))); + opacity = Avx.Min(Avx.Max(Vector256.Zero, opacity), vOne); + + destinationBase = PorterDuffFunctions.MultiplyClear(backgroundBase, sourceBase, opacity); + destinationBase = ref Unsafe.Add(ref destinationBase, 1); + backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); + sourceBase = ref Unsafe.Add(ref sourceBase, 1); + amountBase = ref Unsafe.Add(ref amountBase, 2); + } + + if (Numerics.Modulo2(destination.Length) != 0) + { + // Vector4 fits neatly in pairs. Any overlap has to be equal to 1. + int i = destination.Length - 1; + destination[i] = PorterDuffFunctions.MultiplyClear(background[i], source[i], Numerics.Clamp(amount[i], 0, 1F)); + } + } + else { - destination[i] = PorterDuffFunctions.MultiplyClear(background[i], source[i], Numerics.Clamp(amount[i], 0, 1)); + for (int i = 0; i < destination.Length; i++) + { + destination[i] = PorterDuffFunctions.MultiplyClear(background[i], source[i], Numerics.Clamp(amount[i], 0, 1F)); + } } } } @@ -3539,18 +9707,85 @@ internal static class DefaultPixelBlenders protected override void BlendFunction(Span destination, ReadOnlySpan background, ReadOnlySpan source, float amount) { amount = Numerics.Clamp(amount, 0, 1); - for (int i = 0; i < destination.Length; i++) + + if (Avx2.IsSupported && destination.Length >= 2) + { + // Divide by 2 as 4 elements per Vector4 and 8 per Vector256 + ref Vector256 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); + ref Vector256 destinationLast = ref Unsafe.Add(ref destinationBase, (IntPtr)((uint)destination.Length / 2u)); + + ref Vector256 backgroundBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(background)); + ref Vector256 sourceBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(source)); + Vector256 opacity = Vector256.Create(amount); + + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) + { + destinationBase = PorterDuffFunctions.AddClear(backgroundBase, sourceBase, opacity); + destinationBase = ref Unsafe.Add(ref destinationBase, 1); + backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); + sourceBase = ref Unsafe.Add(ref sourceBase, 1); + } + + if (Numerics.Modulo2(destination.Length) != 0) + { + // Vector4 fits neatly in pairs. Any overlap has to be equal to 1. + int i = destination.Length - 1; + destination[i] = PorterDuffFunctions.AddClear(background[i], source[i], amount); + } + } + else { - destination[i] = PorterDuffFunctions.AddClear(background[i], source[i], amount); + for (int i = 0; i < destination.Length; i++) + { + destination[i] = PorterDuffFunctions.AddClear(background[i], source[i], amount); + } } } /// protected override void BlendFunction(Span destination, ReadOnlySpan background, ReadOnlySpan source, ReadOnlySpan amount) { - for (int i = 0; i < destination.Length; i++) + if (Avx2.IsSupported && destination.Length >= 2) + { + // Divide by 2 as 4 elements per Vector4 and 8 per Vector256 + ref Vector256 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); + ref Vector256 destinationLast = ref Unsafe.Add(ref destinationBase, (IntPtr)((uint)destination.Length / 2u)); + + ref Vector256 backgroundBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(background)); + ref Vector256 sourceBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(source)); + ref float amountBase = ref MemoryMarshal.GetReference(amount); + + Vector256 vOne = Vector256.Create(1F); + + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) + { + // We need to create a Vector256 containing the current and next amount values + // taking up each half of the Vector256 and then clamp them. + Vector256 opacity = Vector256.Create( + Vector128.Create(amountBase), + Vector128.Create(Unsafe.Add(ref amountBase, 1))); + opacity = Avx.Min(Avx.Max(Vector256.Zero, opacity), vOne); + + destinationBase = PorterDuffFunctions.AddClear(backgroundBase, sourceBase, opacity); + destinationBase = ref Unsafe.Add(ref destinationBase, 1); + backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); + sourceBase = ref Unsafe.Add(ref sourceBase, 1); + amountBase = ref Unsafe.Add(ref amountBase, 2); + } + + if (Numerics.Modulo2(destination.Length) != 0) + { + // Vector4 fits neatly in pairs. Any overlap has to be equal to 1. + int i = destination.Length - 1; + destination[i] = PorterDuffFunctions.AddClear(background[i], source[i], Numerics.Clamp(amount[i], 0, 1F)); + } + } + else { - destination[i] = PorterDuffFunctions.AddClear(background[i], source[i], Numerics.Clamp(amount[i], 0, 1)); + for (int i = 0; i < destination.Length; i++) + { + destination[i] = PorterDuffFunctions.AddClear(background[i], source[i], Numerics.Clamp(amount[i], 0, 1F)); + } } } } @@ -3577,18 +9812,85 @@ internal static class DefaultPixelBlenders protected override void BlendFunction(Span destination, ReadOnlySpan background, ReadOnlySpan source, float amount) { amount = Numerics.Clamp(amount, 0, 1); - for (int i = 0; i < destination.Length; i++) + + if (Avx2.IsSupported && destination.Length >= 2) + { + // Divide by 2 as 4 elements per Vector4 and 8 per Vector256 + ref Vector256 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); + ref Vector256 destinationLast = ref Unsafe.Add(ref destinationBase, (IntPtr)((uint)destination.Length / 2u)); + + ref Vector256 backgroundBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(background)); + ref Vector256 sourceBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(source)); + Vector256 opacity = Vector256.Create(amount); + + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) + { + destinationBase = PorterDuffFunctions.SubtractClear(backgroundBase, sourceBase, opacity); + destinationBase = ref Unsafe.Add(ref destinationBase, 1); + backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); + sourceBase = ref Unsafe.Add(ref sourceBase, 1); + } + + if (Numerics.Modulo2(destination.Length) != 0) + { + // Vector4 fits neatly in pairs. Any overlap has to be equal to 1. + int i = destination.Length - 1; + destination[i] = PorterDuffFunctions.SubtractClear(background[i], source[i], amount); + } + } + else { - destination[i] = PorterDuffFunctions.SubtractClear(background[i], source[i], amount); + for (int i = 0; i < destination.Length; i++) + { + destination[i] = PorterDuffFunctions.SubtractClear(background[i], source[i], amount); + } } } /// protected override void BlendFunction(Span destination, ReadOnlySpan background, ReadOnlySpan source, ReadOnlySpan amount) { - for (int i = 0; i < destination.Length; i++) + if (Avx2.IsSupported && destination.Length >= 2) + { + // Divide by 2 as 4 elements per Vector4 and 8 per Vector256 + ref Vector256 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); + ref Vector256 destinationLast = ref Unsafe.Add(ref destinationBase, (IntPtr)((uint)destination.Length / 2u)); + + ref Vector256 backgroundBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(background)); + ref Vector256 sourceBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(source)); + ref float amountBase = ref MemoryMarshal.GetReference(amount); + + Vector256 vOne = Vector256.Create(1F); + + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) + { + // We need to create a Vector256 containing the current and next amount values + // taking up each half of the Vector256 and then clamp them. + Vector256 opacity = Vector256.Create( + Vector128.Create(amountBase), + Vector128.Create(Unsafe.Add(ref amountBase, 1))); + opacity = Avx.Min(Avx.Max(Vector256.Zero, opacity), vOne); + + destinationBase = PorterDuffFunctions.SubtractClear(backgroundBase, sourceBase, opacity); + destinationBase = ref Unsafe.Add(ref destinationBase, 1); + backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); + sourceBase = ref Unsafe.Add(ref sourceBase, 1); + amountBase = ref Unsafe.Add(ref amountBase, 2); + } + + if (Numerics.Modulo2(destination.Length) != 0) + { + // Vector4 fits neatly in pairs. Any overlap has to be equal to 1. + int i = destination.Length - 1; + destination[i] = PorterDuffFunctions.SubtractClear(background[i], source[i], Numerics.Clamp(amount[i], 0, 1F)); + } + } + else { - destination[i] = PorterDuffFunctions.SubtractClear(background[i], source[i], Numerics.Clamp(amount[i], 0, 1)); + for (int i = 0; i < destination.Length; i++) + { + destination[i] = PorterDuffFunctions.SubtractClear(background[i], source[i], Numerics.Clamp(amount[i], 0, 1F)); + } } } } @@ -3615,18 +9917,85 @@ internal static class DefaultPixelBlenders protected override void BlendFunction(Span destination, ReadOnlySpan background, ReadOnlySpan source, float amount) { amount = Numerics.Clamp(amount, 0, 1); - for (int i = 0; i < destination.Length; i++) + + if (Avx2.IsSupported && destination.Length >= 2) + { + // Divide by 2 as 4 elements per Vector4 and 8 per Vector256 + ref Vector256 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); + ref Vector256 destinationLast = ref Unsafe.Add(ref destinationBase, (IntPtr)((uint)destination.Length / 2u)); + + ref Vector256 backgroundBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(background)); + ref Vector256 sourceBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(source)); + Vector256 opacity = Vector256.Create(amount); + + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) + { + destinationBase = PorterDuffFunctions.ScreenClear(backgroundBase, sourceBase, opacity); + destinationBase = ref Unsafe.Add(ref destinationBase, 1); + backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); + sourceBase = ref Unsafe.Add(ref sourceBase, 1); + } + + if (Numerics.Modulo2(destination.Length) != 0) + { + // Vector4 fits neatly in pairs. Any overlap has to be equal to 1. + int i = destination.Length - 1; + destination[i] = PorterDuffFunctions.ScreenClear(background[i], source[i], amount); + } + } + else { - destination[i] = PorterDuffFunctions.ScreenClear(background[i], source[i], amount); + for (int i = 0; i < destination.Length; i++) + { + destination[i] = PorterDuffFunctions.ScreenClear(background[i], source[i], amount); + } } } /// protected override void BlendFunction(Span destination, ReadOnlySpan background, ReadOnlySpan source, ReadOnlySpan amount) { - for (int i = 0; i < destination.Length; i++) + if (Avx2.IsSupported && destination.Length >= 2) + { + // Divide by 2 as 4 elements per Vector4 and 8 per Vector256 + ref Vector256 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); + ref Vector256 destinationLast = ref Unsafe.Add(ref destinationBase, (IntPtr)((uint)destination.Length / 2u)); + + ref Vector256 backgroundBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(background)); + ref Vector256 sourceBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(source)); + ref float amountBase = ref MemoryMarshal.GetReference(amount); + + Vector256 vOne = Vector256.Create(1F); + + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) + { + // We need to create a Vector256 containing the current and next amount values + // taking up each half of the Vector256 and then clamp them. + Vector256 opacity = Vector256.Create( + Vector128.Create(amountBase), + Vector128.Create(Unsafe.Add(ref amountBase, 1))); + opacity = Avx.Min(Avx.Max(Vector256.Zero, opacity), vOne); + + destinationBase = PorterDuffFunctions.ScreenClear(backgroundBase, sourceBase, opacity); + destinationBase = ref Unsafe.Add(ref destinationBase, 1); + backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); + sourceBase = ref Unsafe.Add(ref sourceBase, 1); + amountBase = ref Unsafe.Add(ref amountBase, 2); + } + + if (Numerics.Modulo2(destination.Length) != 0) + { + // Vector4 fits neatly in pairs. Any overlap has to be equal to 1. + int i = destination.Length - 1; + destination[i] = PorterDuffFunctions.ScreenClear(background[i], source[i], Numerics.Clamp(amount[i], 0, 1F)); + } + } + else { - destination[i] = PorterDuffFunctions.ScreenClear(background[i], source[i], Numerics.Clamp(amount[i], 0, 1)); + for (int i = 0; i < destination.Length; i++) + { + destination[i] = PorterDuffFunctions.ScreenClear(background[i], source[i], Numerics.Clamp(amount[i], 0, 1F)); + } } } } @@ -3653,18 +10022,85 @@ internal static class DefaultPixelBlenders protected override void BlendFunction(Span destination, ReadOnlySpan background, ReadOnlySpan source, float amount) { amount = Numerics.Clamp(amount, 0, 1); - for (int i = 0; i < destination.Length; i++) + + if (Avx2.IsSupported && destination.Length >= 2) + { + // Divide by 2 as 4 elements per Vector4 and 8 per Vector256 + ref Vector256 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); + ref Vector256 destinationLast = ref Unsafe.Add(ref destinationBase, (IntPtr)((uint)destination.Length / 2u)); + + ref Vector256 backgroundBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(background)); + ref Vector256 sourceBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(source)); + Vector256 opacity = Vector256.Create(amount); + + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) + { + destinationBase = PorterDuffFunctions.DarkenClear(backgroundBase, sourceBase, opacity); + destinationBase = ref Unsafe.Add(ref destinationBase, 1); + backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); + sourceBase = ref Unsafe.Add(ref sourceBase, 1); + } + + if (Numerics.Modulo2(destination.Length) != 0) + { + // Vector4 fits neatly in pairs. Any overlap has to be equal to 1. + int i = destination.Length - 1; + destination[i] = PorterDuffFunctions.DarkenClear(background[i], source[i], amount); + } + } + else { - destination[i] = PorterDuffFunctions.DarkenClear(background[i], source[i], amount); + for (int i = 0; i < destination.Length; i++) + { + destination[i] = PorterDuffFunctions.DarkenClear(background[i], source[i], amount); + } } } /// protected override void BlendFunction(Span destination, ReadOnlySpan background, ReadOnlySpan source, ReadOnlySpan amount) { - for (int i = 0; i < destination.Length; i++) + if (Avx2.IsSupported && destination.Length >= 2) + { + // Divide by 2 as 4 elements per Vector4 and 8 per Vector256 + ref Vector256 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); + ref Vector256 destinationLast = ref Unsafe.Add(ref destinationBase, (IntPtr)((uint)destination.Length / 2u)); + + ref Vector256 backgroundBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(background)); + ref Vector256 sourceBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(source)); + ref float amountBase = ref MemoryMarshal.GetReference(amount); + + Vector256 vOne = Vector256.Create(1F); + + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) + { + // We need to create a Vector256 containing the current and next amount values + // taking up each half of the Vector256 and then clamp them. + Vector256 opacity = Vector256.Create( + Vector128.Create(amountBase), + Vector128.Create(Unsafe.Add(ref amountBase, 1))); + opacity = Avx.Min(Avx.Max(Vector256.Zero, opacity), vOne); + + destinationBase = PorterDuffFunctions.DarkenClear(backgroundBase, sourceBase, opacity); + destinationBase = ref Unsafe.Add(ref destinationBase, 1); + backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); + sourceBase = ref Unsafe.Add(ref sourceBase, 1); + amountBase = ref Unsafe.Add(ref amountBase, 2); + } + + if (Numerics.Modulo2(destination.Length) != 0) + { + // Vector4 fits neatly in pairs. Any overlap has to be equal to 1. + int i = destination.Length - 1; + destination[i] = PorterDuffFunctions.DarkenClear(background[i], source[i], Numerics.Clamp(amount[i], 0, 1F)); + } + } + else { - destination[i] = PorterDuffFunctions.DarkenClear(background[i], source[i], Numerics.Clamp(amount[i], 0, 1)); + for (int i = 0; i < destination.Length; i++) + { + destination[i] = PorterDuffFunctions.DarkenClear(background[i], source[i], Numerics.Clamp(amount[i], 0, 1F)); + } } } } @@ -3691,18 +10127,85 @@ internal static class DefaultPixelBlenders protected override void BlendFunction(Span destination, ReadOnlySpan background, ReadOnlySpan source, float amount) { amount = Numerics.Clamp(amount, 0, 1); - for (int i = 0; i < destination.Length; i++) + + if (Avx2.IsSupported && destination.Length >= 2) + { + // Divide by 2 as 4 elements per Vector4 and 8 per Vector256 + ref Vector256 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); + ref Vector256 destinationLast = ref Unsafe.Add(ref destinationBase, (IntPtr)((uint)destination.Length / 2u)); + + ref Vector256 backgroundBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(background)); + ref Vector256 sourceBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(source)); + Vector256 opacity = Vector256.Create(amount); + + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) + { + destinationBase = PorterDuffFunctions.LightenClear(backgroundBase, sourceBase, opacity); + destinationBase = ref Unsafe.Add(ref destinationBase, 1); + backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); + sourceBase = ref Unsafe.Add(ref sourceBase, 1); + } + + if (Numerics.Modulo2(destination.Length) != 0) + { + // Vector4 fits neatly in pairs. Any overlap has to be equal to 1. + int i = destination.Length - 1; + destination[i] = PorterDuffFunctions.LightenClear(background[i], source[i], amount); + } + } + else { - destination[i] = PorterDuffFunctions.LightenClear(background[i], source[i], amount); + for (int i = 0; i < destination.Length; i++) + { + destination[i] = PorterDuffFunctions.LightenClear(background[i], source[i], amount); + } } } /// protected override void BlendFunction(Span destination, ReadOnlySpan background, ReadOnlySpan source, ReadOnlySpan amount) { - for (int i = 0; i < destination.Length; i++) + if (Avx2.IsSupported && destination.Length >= 2) { - destination[i] = PorterDuffFunctions.LightenClear(background[i], source[i], Numerics.Clamp(amount[i], 0, 1)); + // Divide by 2 as 4 elements per Vector4 and 8 per Vector256 + ref Vector256 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); + ref Vector256 destinationLast = ref Unsafe.Add(ref destinationBase, (IntPtr)((uint)destination.Length / 2u)); + + ref Vector256 backgroundBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(background)); + ref Vector256 sourceBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(source)); + ref float amountBase = ref MemoryMarshal.GetReference(amount); + + Vector256 vOne = Vector256.Create(1F); + + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) + { + // We need to create a Vector256 containing the current and next amount values + // taking up each half of the Vector256 and then clamp them. + Vector256 opacity = Vector256.Create( + Vector128.Create(amountBase), + Vector128.Create(Unsafe.Add(ref amountBase, 1))); + opacity = Avx.Min(Avx.Max(Vector256.Zero, opacity), vOne); + + destinationBase = PorterDuffFunctions.LightenClear(backgroundBase, sourceBase, opacity); + destinationBase = ref Unsafe.Add(ref destinationBase, 1); + backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); + sourceBase = ref Unsafe.Add(ref sourceBase, 1); + amountBase = ref Unsafe.Add(ref amountBase, 2); + } + + if (Numerics.Modulo2(destination.Length) != 0) + { + // Vector4 fits neatly in pairs. Any overlap has to be equal to 1. + int i = destination.Length - 1; + destination[i] = PorterDuffFunctions.LightenClear(background[i], source[i], Numerics.Clamp(amount[i], 0, 1F)); + } + } + else + { + for (int i = 0; i < destination.Length; i++) + { + destination[i] = PorterDuffFunctions.LightenClear(background[i], source[i], Numerics.Clamp(amount[i], 0, 1F)); + } } } } @@ -3729,18 +10232,85 @@ internal static class DefaultPixelBlenders protected override void BlendFunction(Span destination, ReadOnlySpan background, ReadOnlySpan source, float amount) { amount = Numerics.Clamp(amount, 0, 1); - for (int i = 0; i < destination.Length; i++) + + if (Avx2.IsSupported && destination.Length >= 2) { - destination[i] = PorterDuffFunctions.OverlayClear(background[i], source[i], amount); + // Divide by 2 as 4 elements per Vector4 and 8 per Vector256 + ref Vector256 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); + ref Vector256 destinationLast = ref Unsafe.Add(ref destinationBase, (IntPtr)((uint)destination.Length / 2u)); + + ref Vector256 backgroundBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(background)); + ref Vector256 sourceBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(source)); + Vector256 opacity = Vector256.Create(amount); + + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) + { + destinationBase = PorterDuffFunctions.OverlayClear(backgroundBase, sourceBase, opacity); + destinationBase = ref Unsafe.Add(ref destinationBase, 1); + backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); + sourceBase = ref Unsafe.Add(ref sourceBase, 1); + } + + if (Numerics.Modulo2(destination.Length) != 0) + { + // Vector4 fits neatly in pairs. Any overlap has to be equal to 1. + int i = destination.Length - 1; + destination[i] = PorterDuffFunctions.OverlayClear(background[i], source[i], amount); + } + } + else + { + for (int i = 0; i < destination.Length; i++) + { + destination[i] = PorterDuffFunctions.OverlayClear(background[i], source[i], amount); + } } } /// protected override void BlendFunction(Span destination, ReadOnlySpan background, ReadOnlySpan source, ReadOnlySpan amount) { - for (int i = 0; i < destination.Length; i++) + if (Avx2.IsSupported && destination.Length >= 2) + { + // Divide by 2 as 4 elements per Vector4 and 8 per Vector256 + ref Vector256 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); + ref Vector256 destinationLast = ref Unsafe.Add(ref destinationBase, (IntPtr)((uint)destination.Length / 2u)); + + ref Vector256 backgroundBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(background)); + ref Vector256 sourceBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(source)); + ref float amountBase = ref MemoryMarshal.GetReference(amount); + + Vector256 vOne = Vector256.Create(1F); + + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) + { + // We need to create a Vector256 containing the current and next amount values + // taking up each half of the Vector256 and then clamp them. + Vector256 opacity = Vector256.Create( + Vector128.Create(amountBase), + Vector128.Create(Unsafe.Add(ref amountBase, 1))); + opacity = Avx.Min(Avx.Max(Vector256.Zero, opacity), vOne); + + destinationBase = PorterDuffFunctions.OverlayClear(backgroundBase, sourceBase, opacity); + destinationBase = ref Unsafe.Add(ref destinationBase, 1); + backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); + sourceBase = ref Unsafe.Add(ref sourceBase, 1); + amountBase = ref Unsafe.Add(ref amountBase, 2); + } + + if (Numerics.Modulo2(destination.Length) != 0) + { + // Vector4 fits neatly in pairs. Any overlap has to be equal to 1. + int i = destination.Length - 1; + destination[i] = PorterDuffFunctions.OverlayClear(background[i], source[i], Numerics.Clamp(amount[i], 0, 1F)); + } + } + else { - destination[i] = PorterDuffFunctions.OverlayClear(background[i], source[i], Numerics.Clamp(amount[i], 0, 1)); + for (int i = 0; i < destination.Length; i++) + { + destination[i] = PorterDuffFunctions.OverlayClear(background[i], source[i], Numerics.Clamp(amount[i], 0, 1F)); + } } } } @@ -3767,18 +10337,85 @@ internal static class DefaultPixelBlenders protected override void BlendFunction(Span destination, ReadOnlySpan background, ReadOnlySpan source, float amount) { amount = Numerics.Clamp(amount, 0, 1); - for (int i = 0; i < destination.Length; i++) + + if (Avx2.IsSupported && destination.Length >= 2) + { + // Divide by 2 as 4 elements per Vector4 and 8 per Vector256 + ref Vector256 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); + ref Vector256 destinationLast = ref Unsafe.Add(ref destinationBase, (IntPtr)((uint)destination.Length / 2u)); + + ref Vector256 backgroundBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(background)); + ref Vector256 sourceBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(source)); + Vector256 opacity = Vector256.Create(amount); + + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) + { + destinationBase = PorterDuffFunctions.HardLightClear(backgroundBase, sourceBase, opacity); + destinationBase = ref Unsafe.Add(ref destinationBase, 1); + backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); + sourceBase = ref Unsafe.Add(ref sourceBase, 1); + } + + if (Numerics.Modulo2(destination.Length) != 0) + { + // Vector4 fits neatly in pairs. Any overlap has to be equal to 1. + int i = destination.Length - 1; + destination[i] = PorterDuffFunctions.HardLightClear(background[i], source[i], amount); + } + } + else { - destination[i] = PorterDuffFunctions.HardLightClear(background[i], source[i], amount); + for (int i = 0; i < destination.Length; i++) + { + destination[i] = PorterDuffFunctions.HardLightClear(background[i], source[i], amount); + } } } /// protected override void BlendFunction(Span destination, ReadOnlySpan background, ReadOnlySpan source, ReadOnlySpan amount) { - for (int i = 0; i < destination.Length; i++) + if (Avx2.IsSupported && destination.Length >= 2) + { + // Divide by 2 as 4 elements per Vector4 and 8 per Vector256 + ref Vector256 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); + ref Vector256 destinationLast = ref Unsafe.Add(ref destinationBase, (IntPtr)((uint)destination.Length / 2u)); + + ref Vector256 backgroundBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(background)); + ref Vector256 sourceBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(source)); + ref float amountBase = ref MemoryMarshal.GetReference(amount); + + Vector256 vOne = Vector256.Create(1F); + + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) + { + // We need to create a Vector256 containing the current and next amount values + // taking up each half of the Vector256 and then clamp them. + Vector256 opacity = Vector256.Create( + Vector128.Create(amountBase), + Vector128.Create(Unsafe.Add(ref amountBase, 1))); + opacity = Avx.Min(Avx.Max(Vector256.Zero, opacity), vOne); + + destinationBase = PorterDuffFunctions.HardLightClear(backgroundBase, sourceBase, opacity); + destinationBase = ref Unsafe.Add(ref destinationBase, 1); + backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); + sourceBase = ref Unsafe.Add(ref sourceBase, 1); + amountBase = ref Unsafe.Add(ref amountBase, 2); + } + + if (Numerics.Modulo2(destination.Length) != 0) + { + // Vector4 fits neatly in pairs. Any overlap has to be equal to 1. + int i = destination.Length - 1; + destination[i] = PorterDuffFunctions.HardLightClear(background[i], source[i], Numerics.Clamp(amount[i], 0, 1F)); + } + } + else { - destination[i] = PorterDuffFunctions.HardLightClear(background[i], source[i], Numerics.Clamp(amount[i], 0, 1)); + for (int i = 0; i < destination.Length; i++) + { + destination[i] = PorterDuffFunctions.HardLightClear(background[i], source[i], Numerics.Clamp(amount[i], 0, 1F)); + } } } } @@ -3805,18 +10442,85 @@ internal static class DefaultPixelBlenders protected override void BlendFunction(Span destination, ReadOnlySpan background, ReadOnlySpan source, float amount) { amount = Numerics.Clamp(amount, 0, 1); - for (int i = 0; i < destination.Length; i++) + + if (Avx2.IsSupported && destination.Length >= 2) + { + // Divide by 2 as 4 elements per Vector4 and 8 per Vector256 + ref Vector256 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); + ref Vector256 destinationLast = ref Unsafe.Add(ref destinationBase, (IntPtr)((uint)destination.Length / 2u)); + + ref Vector256 backgroundBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(background)); + ref Vector256 sourceBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(source)); + Vector256 opacity = Vector256.Create(amount); + + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) + { + destinationBase = PorterDuffFunctions.NormalXor(backgroundBase, sourceBase, opacity); + destinationBase = ref Unsafe.Add(ref destinationBase, 1); + backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); + sourceBase = ref Unsafe.Add(ref sourceBase, 1); + } + + if (Numerics.Modulo2(destination.Length) != 0) + { + // Vector4 fits neatly in pairs. Any overlap has to be equal to 1. + int i = destination.Length - 1; + destination[i] = PorterDuffFunctions.NormalXor(background[i], source[i], amount); + } + } + else { - destination[i] = PorterDuffFunctions.NormalXor(background[i], source[i], amount); + for (int i = 0; i < destination.Length; i++) + { + destination[i] = PorterDuffFunctions.NormalXor(background[i], source[i], amount); + } } } /// protected override void BlendFunction(Span destination, ReadOnlySpan background, ReadOnlySpan source, ReadOnlySpan amount) { - for (int i = 0; i < destination.Length; i++) + if (Avx2.IsSupported && destination.Length >= 2) + { + // Divide by 2 as 4 elements per Vector4 and 8 per Vector256 + ref Vector256 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); + ref Vector256 destinationLast = ref Unsafe.Add(ref destinationBase, (IntPtr)((uint)destination.Length / 2u)); + + ref Vector256 backgroundBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(background)); + ref Vector256 sourceBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(source)); + ref float amountBase = ref MemoryMarshal.GetReference(amount); + + Vector256 vOne = Vector256.Create(1F); + + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) + { + // We need to create a Vector256 containing the current and next amount values + // taking up each half of the Vector256 and then clamp them. + Vector256 opacity = Vector256.Create( + Vector128.Create(amountBase), + Vector128.Create(Unsafe.Add(ref amountBase, 1))); + opacity = Avx.Min(Avx.Max(Vector256.Zero, opacity), vOne); + + destinationBase = PorterDuffFunctions.NormalXor(backgroundBase, sourceBase, opacity); + destinationBase = ref Unsafe.Add(ref destinationBase, 1); + backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); + sourceBase = ref Unsafe.Add(ref sourceBase, 1); + amountBase = ref Unsafe.Add(ref amountBase, 2); + } + + if (Numerics.Modulo2(destination.Length) != 0) + { + // Vector4 fits neatly in pairs. Any overlap has to be equal to 1. + int i = destination.Length - 1; + destination[i] = PorterDuffFunctions.NormalXor(background[i], source[i], Numerics.Clamp(amount[i], 0, 1F)); + } + } + else { - destination[i] = PorterDuffFunctions.NormalXor(background[i], source[i], Numerics.Clamp(amount[i], 0, 1)); + for (int i = 0; i < destination.Length; i++) + { + destination[i] = PorterDuffFunctions.NormalXor(background[i], source[i], Numerics.Clamp(amount[i], 0, 1F)); + } } } } @@ -3843,18 +10547,85 @@ internal static class DefaultPixelBlenders protected override void BlendFunction(Span destination, ReadOnlySpan background, ReadOnlySpan source, float amount) { amount = Numerics.Clamp(amount, 0, 1); - for (int i = 0; i < destination.Length; i++) + + if (Avx2.IsSupported && destination.Length >= 2) + { + // Divide by 2 as 4 elements per Vector4 and 8 per Vector256 + ref Vector256 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); + ref Vector256 destinationLast = ref Unsafe.Add(ref destinationBase, (IntPtr)((uint)destination.Length / 2u)); + + ref Vector256 backgroundBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(background)); + ref Vector256 sourceBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(source)); + Vector256 opacity = Vector256.Create(amount); + + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) + { + destinationBase = PorterDuffFunctions.MultiplyXor(backgroundBase, sourceBase, opacity); + destinationBase = ref Unsafe.Add(ref destinationBase, 1); + backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); + sourceBase = ref Unsafe.Add(ref sourceBase, 1); + } + + if (Numerics.Modulo2(destination.Length) != 0) + { + // Vector4 fits neatly in pairs. Any overlap has to be equal to 1. + int i = destination.Length - 1; + destination[i] = PorterDuffFunctions.MultiplyXor(background[i], source[i], amount); + } + } + else { - destination[i] = PorterDuffFunctions.MultiplyXor(background[i], source[i], amount); + for (int i = 0; i < destination.Length; i++) + { + destination[i] = PorterDuffFunctions.MultiplyXor(background[i], source[i], amount); + } } } /// protected override void BlendFunction(Span destination, ReadOnlySpan background, ReadOnlySpan source, ReadOnlySpan amount) { - for (int i = 0; i < destination.Length; i++) + if (Avx2.IsSupported && destination.Length >= 2) + { + // Divide by 2 as 4 elements per Vector4 and 8 per Vector256 + ref Vector256 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); + ref Vector256 destinationLast = ref Unsafe.Add(ref destinationBase, (IntPtr)((uint)destination.Length / 2u)); + + ref Vector256 backgroundBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(background)); + ref Vector256 sourceBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(source)); + ref float amountBase = ref MemoryMarshal.GetReference(amount); + + Vector256 vOne = Vector256.Create(1F); + + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) + { + // We need to create a Vector256 containing the current and next amount values + // taking up each half of the Vector256 and then clamp them. + Vector256 opacity = Vector256.Create( + Vector128.Create(amountBase), + Vector128.Create(Unsafe.Add(ref amountBase, 1))); + opacity = Avx.Min(Avx.Max(Vector256.Zero, opacity), vOne); + + destinationBase = PorterDuffFunctions.MultiplyXor(backgroundBase, sourceBase, opacity); + destinationBase = ref Unsafe.Add(ref destinationBase, 1); + backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); + sourceBase = ref Unsafe.Add(ref sourceBase, 1); + amountBase = ref Unsafe.Add(ref amountBase, 2); + } + + if (Numerics.Modulo2(destination.Length) != 0) + { + // Vector4 fits neatly in pairs. Any overlap has to be equal to 1. + int i = destination.Length - 1; + destination[i] = PorterDuffFunctions.MultiplyXor(background[i], source[i], Numerics.Clamp(amount[i], 0, 1F)); + } + } + else { - destination[i] = PorterDuffFunctions.MultiplyXor(background[i], source[i], Numerics.Clamp(amount[i], 0, 1)); + for (int i = 0; i < destination.Length; i++) + { + destination[i] = PorterDuffFunctions.MultiplyXor(background[i], source[i], Numerics.Clamp(amount[i], 0, 1F)); + } } } } @@ -3881,18 +10652,85 @@ internal static class DefaultPixelBlenders protected override void BlendFunction(Span destination, ReadOnlySpan background, ReadOnlySpan source, float amount) { amount = Numerics.Clamp(amount, 0, 1); - for (int i = 0; i < destination.Length; i++) + + if (Avx2.IsSupported && destination.Length >= 2) + { + // Divide by 2 as 4 elements per Vector4 and 8 per Vector256 + ref Vector256 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); + ref Vector256 destinationLast = ref Unsafe.Add(ref destinationBase, (IntPtr)((uint)destination.Length / 2u)); + + ref Vector256 backgroundBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(background)); + ref Vector256 sourceBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(source)); + Vector256 opacity = Vector256.Create(amount); + + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) + { + destinationBase = PorterDuffFunctions.AddXor(backgroundBase, sourceBase, opacity); + destinationBase = ref Unsafe.Add(ref destinationBase, 1); + backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); + sourceBase = ref Unsafe.Add(ref sourceBase, 1); + } + + if (Numerics.Modulo2(destination.Length) != 0) + { + // Vector4 fits neatly in pairs. Any overlap has to be equal to 1. + int i = destination.Length - 1; + destination[i] = PorterDuffFunctions.AddXor(background[i], source[i], amount); + } + } + else { - destination[i] = PorterDuffFunctions.AddXor(background[i], source[i], amount); + for (int i = 0; i < destination.Length; i++) + { + destination[i] = PorterDuffFunctions.AddXor(background[i], source[i], amount); + } } } /// protected override void BlendFunction(Span destination, ReadOnlySpan background, ReadOnlySpan source, ReadOnlySpan amount) { - for (int i = 0; i < destination.Length; i++) + if (Avx2.IsSupported && destination.Length >= 2) + { + // Divide by 2 as 4 elements per Vector4 and 8 per Vector256 + ref Vector256 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); + ref Vector256 destinationLast = ref Unsafe.Add(ref destinationBase, (IntPtr)((uint)destination.Length / 2u)); + + ref Vector256 backgroundBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(background)); + ref Vector256 sourceBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(source)); + ref float amountBase = ref MemoryMarshal.GetReference(amount); + + Vector256 vOne = Vector256.Create(1F); + + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) + { + // We need to create a Vector256 containing the current and next amount values + // taking up each half of the Vector256 and then clamp them. + Vector256 opacity = Vector256.Create( + Vector128.Create(amountBase), + Vector128.Create(Unsafe.Add(ref amountBase, 1))); + opacity = Avx.Min(Avx.Max(Vector256.Zero, opacity), vOne); + + destinationBase = PorterDuffFunctions.AddXor(backgroundBase, sourceBase, opacity); + destinationBase = ref Unsafe.Add(ref destinationBase, 1); + backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); + sourceBase = ref Unsafe.Add(ref sourceBase, 1); + amountBase = ref Unsafe.Add(ref amountBase, 2); + } + + if (Numerics.Modulo2(destination.Length) != 0) + { + // Vector4 fits neatly in pairs. Any overlap has to be equal to 1. + int i = destination.Length - 1; + destination[i] = PorterDuffFunctions.AddXor(background[i], source[i], Numerics.Clamp(amount[i], 0, 1F)); + } + } + else { - destination[i] = PorterDuffFunctions.AddXor(background[i], source[i], Numerics.Clamp(amount[i], 0, 1)); + for (int i = 0; i < destination.Length; i++) + { + destination[i] = PorterDuffFunctions.AddXor(background[i], source[i], Numerics.Clamp(amount[i], 0, 1F)); + } } } } @@ -3919,18 +10757,85 @@ internal static class DefaultPixelBlenders protected override void BlendFunction(Span destination, ReadOnlySpan background, ReadOnlySpan source, float amount) { amount = Numerics.Clamp(amount, 0, 1); - for (int i = 0; i < destination.Length; i++) + + if (Avx2.IsSupported && destination.Length >= 2) + { + // Divide by 2 as 4 elements per Vector4 and 8 per Vector256 + ref Vector256 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); + ref Vector256 destinationLast = ref Unsafe.Add(ref destinationBase, (IntPtr)((uint)destination.Length / 2u)); + + ref Vector256 backgroundBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(background)); + ref Vector256 sourceBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(source)); + Vector256 opacity = Vector256.Create(amount); + + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) + { + destinationBase = PorterDuffFunctions.SubtractXor(backgroundBase, sourceBase, opacity); + destinationBase = ref Unsafe.Add(ref destinationBase, 1); + backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); + sourceBase = ref Unsafe.Add(ref sourceBase, 1); + } + + if (Numerics.Modulo2(destination.Length) != 0) + { + // Vector4 fits neatly in pairs. Any overlap has to be equal to 1. + int i = destination.Length - 1; + destination[i] = PorterDuffFunctions.SubtractXor(background[i], source[i], amount); + } + } + else { - destination[i] = PorterDuffFunctions.SubtractXor(background[i], source[i], amount); + for (int i = 0; i < destination.Length; i++) + { + destination[i] = PorterDuffFunctions.SubtractXor(background[i], source[i], amount); + } } } /// protected override void BlendFunction(Span destination, ReadOnlySpan background, ReadOnlySpan source, ReadOnlySpan amount) { - for (int i = 0; i < destination.Length; i++) + if (Avx2.IsSupported && destination.Length >= 2) + { + // Divide by 2 as 4 elements per Vector4 and 8 per Vector256 + ref Vector256 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); + ref Vector256 destinationLast = ref Unsafe.Add(ref destinationBase, (IntPtr)((uint)destination.Length / 2u)); + + ref Vector256 backgroundBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(background)); + ref Vector256 sourceBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(source)); + ref float amountBase = ref MemoryMarshal.GetReference(amount); + + Vector256 vOne = Vector256.Create(1F); + + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) + { + // We need to create a Vector256 containing the current and next amount values + // taking up each half of the Vector256 and then clamp them. + Vector256 opacity = Vector256.Create( + Vector128.Create(amountBase), + Vector128.Create(Unsafe.Add(ref amountBase, 1))); + opacity = Avx.Min(Avx.Max(Vector256.Zero, opacity), vOne); + + destinationBase = PorterDuffFunctions.SubtractXor(backgroundBase, sourceBase, opacity); + destinationBase = ref Unsafe.Add(ref destinationBase, 1); + backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); + sourceBase = ref Unsafe.Add(ref sourceBase, 1); + amountBase = ref Unsafe.Add(ref amountBase, 2); + } + + if (Numerics.Modulo2(destination.Length) != 0) + { + // Vector4 fits neatly in pairs. Any overlap has to be equal to 1. + int i = destination.Length - 1; + destination[i] = PorterDuffFunctions.SubtractXor(background[i], source[i], Numerics.Clamp(amount[i], 0, 1F)); + } + } + else { - destination[i] = PorterDuffFunctions.SubtractXor(background[i], source[i], Numerics.Clamp(amount[i], 0, 1)); + for (int i = 0; i < destination.Length; i++) + { + destination[i] = PorterDuffFunctions.SubtractXor(background[i], source[i], Numerics.Clamp(amount[i], 0, 1F)); + } } } } @@ -3957,18 +10862,85 @@ internal static class DefaultPixelBlenders protected override void BlendFunction(Span destination, ReadOnlySpan background, ReadOnlySpan source, float amount) { amount = Numerics.Clamp(amount, 0, 1); - for (int i = 0; i < destination.Length; i++) + + if (Avx2.IsSupported && destination.Length >= 2) + { + // Divide by 2 as 4 elements per Vector4 and 8 per Vector256 + ref Vector256 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); + ref Vector256 destinationLast = ref Unsafe.Add(ref destinationBase, (IntPtr)((uint)destination.Length / 2u)); + + ref Vector256 backgroundBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(background)); + ref Vector256 sourceBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(source)); + Vector256 opacity = Vector256.Create(amount); + + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) + { + destinationBase = PorterDuffFunctions.ScreenXor(backgroundBase, sourceBase, opacity); + destinationBase = ref Unsafe.Add(ref destinationBase, 1); + backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); + sourceBase = ref Unsafe.Add(ref sourceBase, 1); + } + + if (Numerics.Modulo2(destination.Length) != 0) + { + // Vector4 fits neatly in pairs. Any overlap has to be equal to 1. + int i = destination.Length - 1; + destination[i] = PorterDuffFunctions.ScreenXor(background[i], source[i], amount); + } + } + else { - destination[i] = PorterDuffFunctions.ScreenXor(background[i], source[i], amount); + for (int i = 0; i < destination.Length; i++) + { + destination[i] = PorterDuffFunctions.ScreenXor(background[i], source[i], amount); + } } } /// protected override void BlendFunction(Span destination, ReadOnlySpan background, ReadOnlySpan source, ReadOnlySpan amount) { - for (int i = 0; i < destination.Length; i++) + if (Avx2.IsSupported && destination.Length >= 2) + { + // Divide by 2 as 4 elements per Vector4 and 8 per Vector256 + ref Vector256 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); + ref Vector256 destinationLast = ref Unsafe.Add(ref destinationBase, (IntPtr)((uint)destination.Length / 2u)); + + ref Vector256 backgroundBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(background)); + ref Vector256 sourceBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(source)); + ref float amountBase = ref MemoryMarshal.GetReference(amount); + + Vector256 vOne = Vector256.Create(1F); + + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) + { + // We need to create a Vector256 containing the current and next amount values + // taking up each half of the Vector256 and then clamp them. + Vector256 opacity = Vector256.Create( + Vector128.Create(amountBase), + Vector128.Create(Unsafe.Add(ref amountBase, 1))); + opacity = Avx.Min(Avx.Max(Vector256.Zero, opacity), vOne); + + destinationBase = PorterDuffFunctions.ScreenXor(backgroundBase, sourceBase, opacity); + destinationBase = ref Unsafe.Add(ref destinationBase, 1); + backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); + sourceBase = ref Unsafe.Add(ref sourceBase, 1); + amountBase = ref Unsafe.Add(ref amountBase, 2); + } + + if (Numerics.Modulo2(destination.Length) != 0) + { + // Vector4 fits neatly in pairs. Any overlap has to be equal to 1. + int i = destination.Length - 1; + destination[i] = PorterDuffFunctions.ScreenXor(background[i], source[i], Numerics.Clamp(amount[i], 0, 1F)); + } + } + else { - destination[i] = PorterDuffFunctions.ScreenXor(background[i], source[i], Numerics.Clamp(amount[i], 0, 1)); + for (int i = 0; i < destination.Length; i++) + { + destination[i] = PorterDuffFunctions.ScreenXor(background[i], source[i], Numerics.Clamp(amount[i], 0, 1F)); + } } } } @@ -3995,18 +10967,85 @@ internal static class DefaultPixelBlenders protected override void BlendFunction(Span destination, ReadOnlySpan background, ReadOnlySpan source, float amount) { amount = Numerics.Clamp(amount, 0, 1); - for (int i = 0; i < destination.Length; i++) + + if (Avx2.IsSupported && destination.Length >= 2) + { + // Divide by 2 as 4 elements per Vector4 and 8 per Vector256 + ref Vector256 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); + ref Vector256 destinationLast = ref Unsafe.Add(ref destinationBase, (IntPtr)((uint)destination.Length / 2u)); + + ref Vector256 backgroundBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(background)); + ref Vector256 sourceBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(source)); + Vector256 opacity = Vector256.Create(amount); + + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) + { + destinationBase = PorterDuffFunctions.DarkenXor(backgroundBase, sourceBase, opacity); + destinationBase = ref Unsafe.Add(ref destinationBase, 1); + backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); + sourceBase = ref Unsafe.Add(ref sourceBase, 1); + } + + if (Numerics.Modulo2(destination.Length) != 0) + { + // Vector4 fits neatly in pairs. Any overlap has to be equal to 1. + int i = destination.Length - 1; + destination[i] = PorterDuffFunctions.DarkenXor(background[i], source[i], amount); + } + } + else { - destination[i] = PorterDuffFunctions.DarkenXor(background[i], source[i], amount); + for (int i = 0; i < destination.Length; i++) + { + destination[i] = PorterDuffFunctions.DarkenXor(background[i], source[i], amount); + } } } /// protected override void BlendFunction(Span destination, ReadOnlySpan background, ReadOnlySpan source, ReadOnlySpan amount) { - for (int i = 0; i < destination.Length; i++) + if (Avx2.IsSupported && destination.Length >= 2) { - destination[i] = PorterDuffFunctions.DarkenXor(background[i], source[i], Numerics.Clamp(amount[i], 0, 1)); + // Divide by 2 as 4 elements per Vector4 and 8 per Vector256 + ref Vector256 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); + ref Vector256 destinationLast = ref Unsafe.Add(ref destinationBase, (IntPtr)((uint)destination.Length / 2u)); + + ref Vector256 backgroundBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(background)); + ref Vector256 sourceBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(source)); + ref float amountBase = ref MemoryMarshal.GetReference(amount); + + Vector256 vOne = Vector256.Create(1F); + + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) + { + // We need to create a Vector256 containing the current and next amount values + // taking up each half of the Vector256 and then clamp them. + Vector256 opacity = Vector256.Create( + Vector128.Create(amountBase), + Vector128.Create(Unsafe.Add(ref amountBase, 1))); + opacity = Avx.Min(Avx.Max(Vector256.Zero, opacity), vOne); + + destinationBase = PorterDuffFunctions.DarkenXor(backgroundBase, sourceBase, opacity); + destinationBase = ref Unsafe.Add(ref destinationBase, 1); + backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); + sourceBase = ref Unsafe.Add(ref sourceBase, 1); + amountBase = ref Unsafe.Add(ref amountBase, 2); + } + + if (Numerics.Modulo2(destination.Length) != 0) + { + // Vector4 fits neatly in pairs. Any overlap has to be equal to 1. + int i = destination.Length - 1; + destination[i] = PorterDuffFunctions.DarkenXor(background[i], source[i], Numerics.Clamp(amount[i], 0, 1F)); + } + } + else + { + for (int i = 0; i < destination.Length; i++) + { + destination[i] = PorterDuffFunctions.DarkenXor(background[i], source[i], Numerics.Clamp(amount[i], 0, 1F)); + } } } } @@ -4033,18 +11072,85 @@ internal static class DefaultPixelBlenders protected override void BlendFunction(Span destination, ReadOnlySpan background, ReadOnlySpan source, float amount) { amount = Numerics.Clamp(amount, 0, 1); - for (int i = 0; i < destination.Length; i++) + + if (Avx2.IsSupported && destination.Length >= 2) { - destination[i] = PorterDuffFunctions.LightenXor(background[i], source[i], amount); + // Divide by 2 as 4 elements per Vector4 and 8 per Vector256 + ref Vector256 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); + ref Vector256 destinationLast = ref Unsafe.Add(ref destinationBase, (IntPtr)((uint)destination.Length / 2u)); + + ref Vector256 backgroundBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(background)); + ref Vector256 sourceBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(source)); + Vector256 opacity = Vector256.Create(amount); + + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) + { + destinationBase = PorterDuffFunctions.LightenXor(backgroundBase, sourceBase, opacity); + destinationBase = ref Unsafe.Add(ref destinationBase, 1); + backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); + sourceBase = ref Unsafe.Add(ref sourceBase, 1); + } + + if (Numerics.Modulo2(destination.Length) != 0) + { + // Vector4 fits neatly in pairs. Any overlap has to be equal to 1. + int i = destination.Length - 1; + destination[i] = PorterDuffFunctions.LightenXor(background[i], source[i], amount); + } + } + else + { + for (int i = 0; i < destination.Length; i++) + { + destination[i] = PorterDuffFunctions.LightenXor(background[i], source[i], amount); + } } } /// protected override void BlendFunction(Span destination, ReadOnlySpan background, ReadOnlySpan source, ReadOnlySpan amount) { - for (int i = 0; i < destination.Length; i++) + if (Avx2.IsSupported && destination.Length >= 2) + { + // Divide by 2 as 4 elements per Vector4 and 8 per Vector256 + ref Vector256 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); + ref Vector256 destinationLast = ref Unsafe.Add(ref destinationBase, (IntPtr)((uint)destination.Length / 2u)); + + ref Vector256 backgroundBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(background)); + ref Vector256 sourceBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(source)); + ref float amountBase = ref MemoryMarshal.GetReference(amount); + + Vector256 vOne = Vector256.Create(1F); + + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) + { + // We need to create a Vector256 containing the current and next amount values + // taking up each half of the Vector256 and then clamp them. + Vector256 opacity = Vector256.Create( + Vector128.Create(amountBase), + Vector128.Create(Unsafe.Add(ref amountBase, 1))); + opacity = Avx.Min(Avx.Max(Vector256.Zero, opacity), vOne); + + destinationBase = PorterDuffFunctions.LightenXor(backgroundBase, sourceBase, opacity); + destinationBase = ref Unsafe.Add(ref destinationBase, 1); + backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); + sourceBase = ref Unsafe.Add(ref sourceBase, 1); + amountBase = ref Unsafe.Add(ref amountBase, 2); + } + + if (Numerics.Modulo2(destination.Length) != 0) + { + // Vector4 fits neatly in pairs. Any overlap has to be equal to 1. + int i = destination.Length - 1; + destination[i] = PorterDuffFunctions.LightenXor(background[i], source[i], Numerics.Clamp(amount[i], 0, 1F)); + } + } + else { - destination[i] = PorterDuffFunctions.LightenXor(background[i], source[i], Numerics.Clamp(amount[i], 0, 1)); + for (int i = 0; i < destination.Length; i++) + { + destination[i] = PorterDuffFunctions.LightenXor(background[i], source[i], Numerics.Clamp(amount[i], 0, 1F)); + } } } } @@ -4071,18 +11177,85 @@ internal static class DefaultPixelBlenders protected override void BlendFunction(Span destination, ReadOnlySpan background, ReadOnlySpan source, float amount) { amount = Numerics.Clamp(amount, 0, 1); - for (int i = 0; i < destination.Length; i++) + + if (Avx2.IsSupported && destination.Length >= 2) + { + // Divide by 2 as 4 elements per Vector4 and 8 per Vector256 + ref Vector256 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); + ref Vector256 destinationLast = ref Unsafe.Add(ref destinationBase, (IntPtr)((uint)destination.Length / 2u)); + + ref Vector256 backgroundBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(background)); + ref Vector256 sourceBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(source)); + Vector256 opacity = Vector256.Create(amount); + + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) + { + destinationBase = PorterDuffFunctions.OverlayXor(backgroundBase, sourceBase, opacity); + destinationBase = ref Unsafe.Add(ref destinationBase, 1); + backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); + sourceBase = ref Unsafe.Add(ref sourceBase, 1); + } + + if (Numerics.Modulo2(destination.Length) != 0) + { + // Vector4 fits neatly in pairs. Any overlap has to be equal to 1. + int i = destination.Length - 1; + destination[i] = PorterDuffFunctions.OverlayXor(background[i], source[i], amount); + } + } + else { - destination[i] = PorterDuffFunctions.OverlayXor(background[i], source[i], amount); + for (int i = 0; i < destination.Length; i++) + { + destination[i] = PorterDuffFunctions.OverlayXor(background[i], source[i], amount); + } } } /// protected override void BlendFunction(Span destination, ReadOnlySpan background, ReadOnlySpan source, ReadOnlySpan amount) { - for (int i = 0; i < destination.Length; i++) + if (Avx2.IsSupported && destination.Length >= 2) + { + // Divide by 2 as 4 elements per Vector4 and 8 per Vector256 + ref Vector256 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); + ref Vector256 destinationLast = ref Unsafe.Add(ref destinationBase, (IntPtr)((uint)destination.Length / 2u)); + + ref Vector256 backgroundBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(background)); + ref Vector256 sourceBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(source)); + ref float amountBase = ref MemoryMarshal.GetReference(amount); + + Vector256 vOne = Vector256.Create(1F); + + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) + { + // We need to create a Vector256 containing the current and next amount values + // taking up each half of the Vector256 and then clamp them. + Vector256 opacity = Vector256.Create( + Vector128.Create(amountBase), + Vector128.Create(Unsafe.Add(ref amountBase, 1))); + opacity = Avx.Min(Avx.Max(Vector256.Zero, opacity), vOne); + + destinationBase = PorterDuffFunctions.OverlayXor(backgroundBase, sourceBase, opacity); + destinationBase = ref Unsafe.Add(ref destinationBase, 1); + backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); + sourceBase = ref Unsafe.Add(ref sourceBase, 1); + amountBase = ref Unsafe.Add(ref amountBase, 2); + } + + if (Numerics.Modulo2(destination.Length) != 0) + { + // Vector4 fits neatly in pairs. Any overlap has to be equal to 1. + int i = destination.Length - 1; + destination[i] = PorterDuffFunctions.OverlayXor(background[i], source[i], Numerics.Clamp(amount[i], 0, 1F)); + } + } + else { - destination[i] = PorterDuffFunctions.OverlayXor(background[i], source[i], Numerics.Clamp(amount[i], 0, 1)); + for (int i = 0; i < destination.Length; i++) + { + destination[i] = PorterDuffFunctions.OverlayXor(background[i], source[i], Numerics.Clamp(amount[i], 0, 1F)); + } } } } @@ -4109,18 +11282,85 @@ internal static class DefaultPixelBlenders protected override void BlendFunction(Span destination, ReadOnlySpan background, ReadOnlySpan source, float amount) { amount = Numerics.Clamp(amount, 0, 1); - for (int i = 0; i < destination.Length; i++) + + if (Avx2.IsSupported && destination.Length >= 2) + { + // Divide by 2 as 4 elements per Vector4 and 8 per Vector256 + ref Vector256 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); + ref Vector256 destinationLast = ref Unsafe.Add(ref destinationBase, (IntPtr)((uint)destination.Length / 2u)); + + ref Vector256 backgroundBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(background)); + ref Vector256 sourceBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(source)); + Vector256 opacity = Vector256.Create(amount); + + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) + { + destinationBase = PorterDuffFunctions.HardLightXor(backgroundBase, sourceBase, opacity); + destinationBase = ref Unsafe.Add(ref destinationBase, 1); + backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); + sourceBase = ref Unsafe.Add(ref sourceBase, 1); + } + + if (Numerics.Modulo2(destination.Length) != 0) + { + // Vector4 fits neatly in pairs. Any overlap has to be equal to 1. + int i = destination.Length - 1; + destination[i] = PorterDuffFunctions.HardLightXor(background[i], source[i], amount); + } + } + else { - destination[i] = PorterDuffFunctions.HardLightXor(background[i], source[i], amount); + for (int i = 0; i < destination.Length; i++) + { + destination[i] = PorterDuffFunctions.HardLightXor(background[i], source[i], amount); + } } } /// protected override void BlendFunction(Span destination, ReadOnlySpan background, ReadOnlySpan source, ReadOnlySpan amount) { - for (int i = 0; i < destination.Length; i++) - { - destination[i] = PorterDuffFunctions.HardLightXor(background[i], source[i], Numerics.Clamp(amount[i], 0, 1)); + if (Avx2.IsSupported && destination.Length >= 2) + { + // Divide by 2 as 4 elements per Vector4 and 8 per Vector256 + ref Vector256 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); + ref Vector256 destinationLast = ref Unsafe.Add(ref destinationBase, (IntPtr)((uint)destination.Length / 2u)); + + ref Vector256 backgroundBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(background)); + ref Vector256 sourceBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(source)); + ref float amountBase = ref MemoryMarshal.GetReference(amount); + + Vector256 vOne = Vector256.Create(1F); + + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) + { + // We need to create a Vector256 containing the current and next amount values + // taking up each half of the Vector256 and then clamp them. + Vector256 opacity = Vector256.Create( + Vector128.Create(amountBase), + Vector128.Create(Unsafe.Add(ref amountBase, 1))); + opacity = Avx.Min(Avx.Max(Vector256.Zero, opacity), vOne); + + destinationBase = PorterDuffFunctions.HardLightXor(backgroundBase, sourceBase, opacity); + destinationBase = ref Unsafe.Add(ref destinationBase, 1); + backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); + sourceBase = ref Unsafe.Add(ref sourceBase, 1); + amountBase = ref Unsafe.Add(ref amountBase, 2); + } + + if (Numerics.Modulo2(destination.Length) != 0) + { + // Vector4 fits neatly in pairs. Any overlap has to be equal to 1. + int i = destination.Length - 1; + destination[i] = PorterDuffFunctions.HardLightXor(background[i], source[i], Numerics.Clamp(amount[i], 0, 1F)); + } + } + else + { + for (int i = 0; i < destination.Length; i++) + { + destination[i] = PorterDuffFunctions.HardLightXor(background[i], source[i], Numerics.Clamp(amount[i], 0, 1F)); + } } } } diff --git a/src/ImageSharp/PixelFormats/PixelBlenders/DefaultPixelBlenders.Generated.tt b/src/ImageSharp/PixelFormats/PixelBlenders/DefaultPixelBlenders.Generated.tt index 7bd51439ce..22b9ebf982 100644 --- a/src/ImageSharp/PixelFormats/PixelBlenders/DefaultPixelBlenders.Generated.tt +++ b/src/ImageSharp/PixelFormats/PixelBlenders/DefaultPixelBlenders.Generated.tt @@ -13,6 +13,10 @@ // using System.Numerics; +using System.Runtime.CompilerServices; +using System.Runtime.InteropServices; +using System.Runtime.Intrinsics; +using System.Runtime.Intrinsics.X86; namespace SixLabors.ImageSharp.PixelFormats.PixelBlenders; @@ -86,18 +90,85 @@ var blenders = new []{ protected override void BlendFunction(Span destination, ReadOnlySpan background, ReadOnlySpan source, float amount) { amount = Numerics.Clamp(amount, 0, 1); - for (int i = 0; i < destination.Length; i++) + + if (Avx2.IsSupported && destination.Length >= 2) { - destination[i] = PorterDuffFunctions.<#=blender_composer#>(background[i], source[i], amount); + // Divide by 2 as 4 elements per Vector4 and 8 per Vector256 + ref Vector256 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); + ref Vector256 destinationLast = ref Unsafe.Add(ref destinationBase, (IntPtr)((uint)destination.Length / 2u)); + + ref Vector256 backgroundBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(background)); + ref Vector256 sourceBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(source)); + Vector256 opacity = Vector256.Create(amount); + + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) + { + destinationBase = PorterDuffFunctions.<#=blender_composer#>(backgroundBase, sourceBase, opacity); + destinationBase = ref Unsafe.Add(ref destinationBase, 1); + backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); + sourceBase = ref Unsafe.Add(ref sourceBase, 1); + } + + if (Numerics.Modulo2(destination.Length) != 0) + { + // Vector4 fits neatly in pairs. Any overlap has to be equal to 1. + int i = destination.Length - 1; + destination[i] = PorterDuffFunctions.<#=blender_composer#>(background[i], source[i], amount); + } + } + else + { + for (int i = 0; i < destination.Length; i++) + { + destination[i] = PorterDuffFunctions.<#=blender_composer#>(background[i], source[i], amount); + } } } /// protected override void BlendFunction(Span destination, ReadOnlySpan background, ReadOnlySpan source, ReadOnlySpan amount) { - for (int i = 0; i < destination.Length; i++) + if (Avx2.IsSupported && destination.Length >= 2) + { + // Divide by 2 as 4 elements per Vector4 and 8 per Vector256 + ref Vector256 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); + ref Vector256 destinationLast = ref Unsafe.Add(ref destinationBase, (IntPtr)((uint)destination.Length / 2u)); + + ref Vector256 backgroundBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(background)); + ref Vector256 sourceBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(source)); + ref float amountBase = ref MemoryMarshal.GetReference(amount); + + Vector256 vOne = Vector256.Create(1F); + + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) + { + // We need to create a Vector256 containing the current and next amount values + // taking up each half of the Vector256 and then clamp them. + Vector256 opacity = Vector256.Create( + Vector128.Create(amountBase), + Vector128.Create(Unsafe.Add(ref amountBase, 1))); + opacity = Avx.Min(Avx.Max(Vector256.Zero, opacity), vOne); + + destinationBase = PorterDuffFunctions.<#=blender_composer#>(backgroundBase, sourceBase, opacity); + destinationBase = ref Unsafe.Add(ref destinationBase, 1); + backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); + sourceBase = ref Unsafe.Add(ref sourceBase, 1); + amountBase = ref Unsafe.Add(ref amountBase, 2); + } + + if (Numerics.Modulo2(destination.Length) != 0) + { + // Vector4 fits neatly in pairs. Any overlap has to be equal to 1. + int i = destination.Length - 1; + destination[i] = PorterDuffFunctions.<#=blender_composer#>(background[i], source[i], Numerics.Clamp(amount[i], 0, 1F)); + } + } + else { - destination[i] = PorterDuffFunctions.<#=blender_composer#>(background[i], source[i], Numerics.Clamp(amount[i], 0, 1)); + for (int i = 0; i < destination.Length; i++) + { + destination[i] = PorterDuffFunctions.<#=blender_composer#>(background[i], source[i], Numerics.Clamp(amount[i], 0, 1F)); + } } } } diff --git a/src/ImageSharp/PixelFormats/PixelBlenders/PorterDuffFunctions.Generated.cs b/src/ImageSharp/PixelFormats/PixelBlenders/PorterDuffFunctions.Generated.cs index ff41e70b20..bd522da192 100644 --- a/src/ImageSharp/PixelFormats/PixelBlenders/PorterDuffFunctions.Generated.cs +++ b/src/ImageSharp/PixelFormats/PixelBlenders/PorterDuffFunctions.Generated.cs @@ -5,6 +5,8 @@ using System.Numerics; using System.Runtime.CompilerServices; +using System.Runtime.Intrinsics; +using System.Runtime.Intrinsics.X86; namespace SixLabors.ImageSharp.PixelFormats.PixelBlenders; @@ -21,11 +23,22 @@ internal static partial class PorterDuffFunctions [MethodImpl(MethodImplOptions.AggressiveInlining)] public static Vector4 NormalSrc(Vector4 backdrop, Vector4 source, float opacity) { - source.W *= opacity; + source = WithW(source, source * opacity); return source; } + /// + /// Returns the result of the "NormalSrc compositing equation. + /// + /// The backdrop vector. + /// The source vector. + /// The source opacity. Range 0..1 + /// The . + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public static Vector256 NormalSrc(Vector256 backdrop, Vector256 source, Vector256 opacity) + => Avx.Blend(source, Avx.Multiply(source, opacity), BlendAlphaControl); + /// /// Returns the result of the "NormalSrcAtop" compositing equation. /// @@ -36,7 +49,22 @@ internal static partial class PorterDuffFunctions [MethodImpl(MethodImplOptions.AggressiveInlining)] public static Vector4 NormalSrcAtop(Vector4 backdrop, Vector4 source, float opacity) { - source.W *= opacity; + source = WithW(source, source * opacity); + + return Atop(backdrop, source, Normal(backdrop, source)); + } + + /// + /// Returns the result of the "NormalSrcAtop" compositing equation. + /// + /// The backdrop vector. + /// The source vector. + /// The source opacity. Range 0..1 + /// The . + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public static Vector256 NormalSrcAtop(Vector256 backdrop, Vector256 source, Vector256 opacity) + { + source = Avx.Blend(source, Avx.Multiply(source, opacity), BlendAlphaControl); return Atop(backdrop, source, Normal(backdrop, source)); } @@ -51,7 +79,22 @@ internal static partial class PorterDuffFunctions [MethodImpl(MethodImplOptions.AggressiveInlining)] public static Vector4 NormalSrcOver(Vector4 backdrop, Vector4 source, float opacity) { - source.W *= opacity; + source = WithW(source, source * opacity); + + return Over(backdrop, source, Normal(backdrop, source)); + } + + /// + /// Returns the result of the "NormalSrcOver" compositing equation. + /// + /// The backdrop vector. + /// The source vector. + /// The source opacity. Range 0..1 + /// The . + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public static Vector256 NormalSrcOver(Vector256 backdrop, Vector256 source, Vector256 opacity) + { + source = Avx.Blend(source, Avx.Multiply(source, opacity), BlendAlphaControl); return Over(backdrop, source, Normal(backdrop, source)); } @@ -66,11 +109,22 @@ internal static partial class PorterDuffFunctions [MethodImpl(MethodImplOptions.AggressiveInlining)] public static Vector4 NormalSrcIn(Vector4 backdrop, Vector4 source, float opacity) { - source.W *= opacity; + source = WithW(source, source * opacity); return In(backdrop, source); } + /// + /// Returns the result of the "NormalSrcIn" compositing equation. + /// + /// The backdrop vector. + /// The source vector. + /// The source opacity. Range 0..1 + /// The . + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public static Vector256 NormalSrcIn(Vector256 backdrop, Vector256 source, Vector256 opacity) + => In(backdrop, Avx.Blend(source, Avx.Multiply(source, opacity), BlendAlphaControl)); + /// /// Returns the result of the "NormalSrcOut" compositing equation. /// @@ -81,11 +135,22 @@ internal static partial class PorterDuffFunctions [MethodImpl(MethodImplOptions.AggressiveInlining)] public static Vector4 NormalSrcOut(Vector4 backdrop, Vector4 source, float opacity) { - source.W *= opacity; + source = WithW(source, source * opacity); return Out(backdrop, source); } + /// + /// Returns the result of the "NormalSrcOut" compositing equation. + /// + /// The backdrop vector. + /// The source vector. + /// The source opacity. Range 0..1 + /// The . + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public static Vector256 NormalSrcOut(Vector256 backdrop, Vector256 source, Vector256 opacity) + => Out(backdrop, Avx.Blend(source, Avx.Multiply(source, opacity), BlendAlphaControl)); + /// /// Returns the result of the "NormalDest" compositing equation. /// @@ -99,6 +164,19 @@ internal static partial class PorterDuffFunctions return backdrop; } + /// + /// Returns the result of the "NormalDest" compositing equation. + /// + /// The backdrop vector. + /// The source vector. + /// The source opacity. Range 0..1 + /// The . + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public static Vector256 NormalDest(Vector256 backdrop, Vector256 source, Vector256 opacity) + { + return backdrop; + } + /// /// Returns the result of the "NormalDestAtop" compositing equation. /// @@ -109,7 +187,22 @@ internal static partial class PorterDuffFunctions [MethodImpl(MethodImplOptions.AggressiveInlining)] public static Vector4 NormalDestAtop(Vector4 backdrop, Vector4 source, float opacity) { - source.W *= opacity; + source = WithW(source, source * opacity); + + return Atop(source, backdrop, Normal(source, backdrop)); + } + + /// + /// Returns the result of the "NormalDestAtop" compositing equation. + /// + /// The backdrop vector. + /// The source vector. + /// The source opacity. Range 0..1 + /// The . + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public static Vector256 NormalDestAtop(Vector256 backdrop, Vector256 source, Vector256 opacity) + { + source = Avx.Blend(source, Avx.Multiply(source, opacity), BlendAlphaControl); return Atop(source, backdrop, Normal(source, backdrop)); } @@ -124,7 +217,22 @@ internal static partial class PorterDuffFunctions [MethodImpl(MethodImplOptions.AggressiveInlining)] public static Vector4 NormalDestOver(Vector4 backdrop, Vector4 source, float opacity) { - source.W *= opacity; + source = WithW(source, source * opacity); + + return Over(source, backdrop, Normal(source, backdrop)); + } + + /// + /// Returns the result of the "NormalDestOver" compositing equation. + /// + /// The backdrop vector. + /// The source vector. + /// The source opacity. Range 0..1 + /// The . + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public static Vector256 NormalDestOver(Vector256 backdrop, Vector256 source, Vector256 opacity) + { + source = Avx.Blend(source, Avx.Multiply(source, opacity), BlendAlphaControl); return Over(source, backdrop, Normal(source, backdrop)); } @@ -139,11 +247,22 @@ internal static partial class PorterDuffFunctions [MethodImpl(MethodImplOptions.AggressiveInlining)] public static Vector4 NormalDestIn(Vector4 backdrop, Vector4 source, float opacity) { - source.W *= opacity; + source = WithW(source, source * opacity); return In(source, backdrop); } + /// + /// Returns the result of the "NormalDestIn" compositing equation. + /// + /// The backdrop vector. + /// The source vector. + /// The source opacity. Range 0..1 + /// The . + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public static Vector256 NormalDestIn(Vector256 backdrop, Vector256 source, Vector256 opacity) + => In(Avx.Blend(source, Avx.Multiply(source, opacity), BlendAlphaControl), backdrop); + /// /// Returns the result of the "NormalDestOut" compositing equation. /// @@ -154,11 +273,22 @@ internal static partial class PorterDuffFunctions [MethodImpl(MethodImplOptions.AggressiveInlining)] public static Vector4 NormalDestOut(Vector4 backdrop, Vector4 source, float opacity) { - source.W *= opacity; + source = WithW(source, source * opacity); return Out(source, backdrop); } + /// + /// Returns the result of the "NormalDestOut" compositing equation. + /// + /// The backdrop vector. + /// The source vector. + /// The source opacity. Range 0..1 + /// The . + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public static Vector256 NormalDestOut(Vector256 backdrop, Vector256 source, Vector256 opacity) + => Out(Avx.Blend(source, Avx.Multiply(source, opacity), BlendAlphaControl), backdrop); + /// /// Returns the result of the "NormalXor" compositing equation. /// @@ -169,11 +299,22 @@ internal static partial class PorterDuffFunctions [MethodImpl(MethodImplOptions.AggressiveInlining)] public static Vector4 NormalXor(Vector4 backdrop, Vector4 source, float opacity) { - source.W *= opacity; + source = WithW(source, source * opacity); return Xor(backdrop, source); } + /// + /// Returns the result of the "NormalXor" compositing equation. + /// + /// The backdrop vector. + /// The source vector. + /// The source opacity. Range 0..1 + /// The . + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public static Vector256 NormalXor(Vector256 backdrop, Vector256 source, Vector256 opacity) + => Xor(backdrop, Avx.Blend(source, Avx.Multiply(source, opacity), BlendAlphaControl)); + /// /// Returns the result of the "NormalClear" compositing equation. /// @@ -184,11 +325,22 @@ internal static partial class PorterDuffFunctions [MethodImpl(MethodImplOptions.AggressiveInlining)] public static Vector4 NormalClear(Vector4 backdrop, Vector4 source, float opacity) { - source.W *= opacity; + source = WithW(source, source * opacity); return Clear(backdrop, source); } + /// + /// Returns the result of the "NormalClear" compositing equation. + /// + /// The backdrop vector. + /// The source vector. + /// The source opacity. Range 0..1 + /// The . + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public static Vector256 NormalClear(Vector256 backdrop, Vector256 source, Vector256 opacity) + => Clear(backdrop, Avx.Blend(source, Avx.Multiply(source, opacity), BlendAlphaControl)); + /// /// Returns the result of the "NormalSrc" compositing equation. @@ -416,11 +568,22 @@ internal static partial class PorterDuffFunctions [MethodImpl(MethodImplOptions.AggressiveInlining)] public static Vector4 MultiplySrc(Vector4 backdrop, Vector4 source, float opacity) { - source.W *= opacity; + source = WithW(source, source * opacity); return source; } + /// + /// Returns the result of the "MultiplySrc compositing equation. + /// + /// The backdrop vector. + /// The source vector. + /// The source opacity. Range 0..1 + /// The . + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public static Vector256 MultiplySrc(Vector256 backdrop, Vector256 source, Vector256 opacity) + => Avx.Blend(source, Avx.Multiply(source, opacity), BlendAlphaControl); + /// /// Returns the result of the "MultiplySrcAtop" compositing equation. /// @@ -431,7 +594,22 @@ internal static partial class PorterDuffFunctions [MethodImpl(MethodImplOptions.AggressiveInlining)] public static Vector4 MultiplySrcAtop(Vector4 backdrop, Vector4 source, float opacity) { - source.W *= opacity; + source = WithW(source, source * opacity); + + return Atop(backdrop, source, Multiply(backdrop, source)); + } + + /// + /// Returns the result of the "MultiplySrcAtop" compositing equation. + /// + /// The backdrop vector. + /// The source vector. + /// The source opacity. Range 0..1 + /// The . + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public static Vector256 MultiplySrcAtop(Vector256 backdrop, Vector256 source, Vector256 opacity) + { + source = Avx.Blend(source, Avx.Multiply(source, opacity), BlendAlphaControl); return Atop(backdrop, source, Multiply(backdrop, source)); } @@ -446,7 +624,22 @@ internal static partial class PorterDuffFunctions [MethodImpl(MethodImplOptions.AggressiveInlining)] public static Vector4 MultiplySrcOver(Vector4 backdrop, Vector4 source, float opacity) { - source.W *= opacity; + source = WithW(source, source * opacity); + + return Over(backdrop, source, Multiply(backdrop, source)); + } + + /// + /// Returns the result of the "MultiplySrcOver" compositing equation. + /// + /// The backdrop vector. + /// The source vector. + /// The source opacity. Range 0..1 + /// The . + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public static Vector256 MultiplySrcOver(Vector256 backdrop, Vector256 source, Vector256 opacity) + { + source = Avx.Blend(source, Avx.Multiply(source, opacity), BlendAlphaControl); return Over(backdrop, source, Multiply(backdrop, source)); } @@ -461,11 +654,22 @@ internal static partial class PorterDuffFunctions [MethodImpl(MethodImplOptions.AggressiveInlining)] public static Vector4 MultiplySrcIn(Vector4 backdrop, Vector4 source, float opacity) { - source.W *= opacity; + source = WithW(source, source * opacity); return In(backdrop, source); } + /// + /// Returns the result of the "MultiplySrcIn" compositing equation. + /// + /// The backdrop vector. + /// The source vector. + /// The source opacity. Range 0..1 + /// The . + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public static Vector256 MultiplySrcIn(Vector256 backdrop, Vector256 source, Vector256 opacity) + => In(backdrop, Avx.Blend(source, Avx.Multiply(source, opacity), BlendAlphaControl)); + /// /// Returns the result of the "MultiplySrcOut" compositing equation. /// @@ -476,11 +680,22 @@ internal static partial class PorterDuffFunctions [MethodImpl(MethodImplOptions.AggressiveInlining)] public static Vector4 MultiplySrcOut(Vector4 backdrop, Vector4 source, float opacity) { - source.W *= opacity; + source = WithW(source, source * opacity); return Out(backdrop, source); } + /// + /// Returns the result of the "MultiplySrcOut" compositing equation. + /// + /// The backdrop vector. + /// The source vector. + /// The source opacity. Range 0..1 + /// The . + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public static Vector256 MultiplySrcOut(Vector256 backdrop, Vector256 source, Vector256 opacity) + => Out(backdrop, Avx.Blend(source, Avx.Multiply(source, opacity), BlendAlphaControl)); + /// /// Returns the result of the "MultiplyDest" compositing equation. /// @@ -494,6 +709,19 @@ internal static partial class PorterDuffFunctions return backdrop; } + /// + /// Returns the result of the "MultiplyDest" compositing equation. + /// + /// The backdrop vector. + /// The source vector. + /// The source opacity. Range 0..1 + /// The . + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public static Vector256 MultiplyDest(Vector256 backdrop, Vector256 source, Vector256 opacity) + { + return backdrop; + } + /// /// Returns the result of the "MultiplyDestAtop" compositing equation. /// @@ -504,7 +732,22 @@ internal static partial class PorterDuffFunctions [MethodImpl(MethodImplOptions.AggressiveInlining)] public static Vector4 MultiplyDestAtop(Vector4 backdrop, Vector4 source, float opacity) { - source.W *= opacity; + source = WithW(source, source * opacity); + + return Atop(source, backdrop, Multiply(source, backdrop)); + } + + /// + /// Returns the result of the "MultiplyDestAtop" compositing equation. + /// + /// The backdrop vector. + /// The source vector. + /// The source opacity. Range 0..1 + /// The . + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public static Vector256 MultiplyDestAtop(Vector256 backdrop, Vector256 source, Vector256 opacity) + { + source = Avx.Blend(source, Avx.Multiply(source, opacity), BlendAlphaControl); return Atop(source, backdrop, Multiply(source, backdrop)); } @@ -519,7 +762,22 @@ internal static partial class PorterDuffFunctions [MethodImpl(MethodImplOptions.AggressiveInlining)] public static Vector4 MultiplyDestOver(Vector4 backdrop, Vector4 source, float opacity) { - source.W *= opacity; + source = WithW(source, source * opacity); + + return Over(source, backdrop, Multiply(source, backdrop)); + } + + /// + /// Returns the result of the "MultiplyDestOver" compositing equation. + /// + /// The backdrop vector. + /// The source vector. + /// The source opacity. Range 0..1 + /// The . + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public static Vector256 MultiplyDestOver(Vector256 backdrop, Vector256 source, Vector256 opacity) + { + source = Avx.Blend(source, Avx.Multiply(source, opacity), BlendAlphaControl); return Over(source, backdrop, Multiply(source, backdrop)); } @@ -534,11 +792,22 @@ internal static partial class PorterDuffFunctions [MethodImpl(MethodImplOptions.AggressiveInlining)] public static Vector4 MultiplyDestIn(Vector4 backdrop, Vector4 source, float opacity) { - source.W *= opacity; + source = WithW(source, source * opacity); return In(source, backdrop); } + /// + /// Returns the result of the "MultiplyDestIn" compositing equation. + /// + /// The backdrop vector. + /// The source vector. + /// The source opacity. Range 0..1 + /// The . + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public static Vector256 MultiplyDestIn(Vector256 backdrop, Vector256 source, Vector256 opacity) + => In(Avx.Blend(source, Avx.Multiply(source, opacity), BlendAlphaControl), backdrop); + /// /// Returns the result of the "MultiplyDestOut" compositing equation. /// @@ -549,11 +818,22 @@ internal static partial class PorterDuffFunctions [MethodImpl(MethodImplOptions.AggressiveInlining)] public static Vector4 MultiplyDestOut(Vector4 backdrop, Vector4 source, float opacity) { - source.W *= opacity; + source = WithW(source, source * opacity); return Out(source, backdrop); } + /// + /// Returns the result of the "MultiplyDestOut" compositing equation. + /// + /// The backdrop vector. + /// The source vector. + /// The source opacity. Range 0..1 + /// The . + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public static Vector256 MultiplyDestOut(Vector256 backdrop, Vector256 source, Vector256 opacity) + => Out(Avx.Blend(source, Avx.Multiply(source, opacity), BlendAlphaControl), backdrop); + /// /// Returns the result of the "MultiplyXor" compositing equation. /// @@ -564,11 +844,22 @@ internal static partial class PorterDuffFunctions [MethodImpl(MethodImplOptions.AggressiveInlining)] public static Vector4 MultiplyXor(Vector4 backdrop, Vector4 source, float opacity) { - source.W *= opacity; + source = WithW(source, source * opacity); return Xor(backdrop, source); } + /// + /// Returns the result of the "MultiplyXor" compositing equation. + /// + /// The backdrop vector. + /// The source vector. + /// The source opacity. Range 0..1 + /// The . + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public static Vector256 MultiplyXor(Vector256 backdrop, Vector256 source, Vector256 opacity) + => Xor(backdrop, Avx.Blend(source, Avx.Multiply(source, opacity), BlendAlphaControl)); + /// /// Returns the result of the "MultiplyClear" compositing equation. /// @@ -579,11 +870,22 @@ internal static partial class PorterDuffFunctions [MethodImpl(MethodImplOptions.AggressiveInlining)] public static Vector4 MultiplyClear(Vector4 backdrop, Vector4 source, float opacity) { - source.W *= opacity; + source = WithW(source, source * opacity); return Clear(backdrop, source); } + /// + /// Returns the result of the "MultiplyClear" compositing equation. + /// + /// The backdrop vector. + /// The source vector. + /// The source opacity. Range 0..1 + /// The . + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public static Vector256 MultiplyClear(Vector256 backdrop, Vector256 source, Vector256 opacity) + => Clear(backdrop, Avx.Blend(source, Avx.Multiply(source, opacity), BlendAlphaControl)); + /// /// Returns the result of the "MultiplySrc" compositing equation. @@ -811,11 +1113,22 @@ internal static partial class PorterDuffFunctions [MethodImpl(MethodImplOptions.AggressiveInlining)] public static Vector4 AddSrc(Vector4 backdrop, Vector4 source, float opacity) { - source.W *= opacity; + source = WithW(source, source * opacity); return source; } + /// + /// Returns the result of the "AddSrc compositing equation. + /// + /// The backdrop vector. + /// The source vector. + /// The source opacity. Range 0..1 + /// The . + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public static Vector256 AddSrc(Vector256 backdrop, Vector256 source, Vector256 opacity) + => Avx.Blend(source, Avx.Multiply(source, opacity), BlendAlphaControl); + /// /// Returns the result of the "AddSrcAtop" compositing equation. /// @@ -826,65 +1139,130 @@ internal static partial class PorterDuffFunctions [MethodImpl(MethodImplOptions.AggressiveInlining)] public static Vector4 AddSrcAtop(Vector4 backdrop, Vector4 source, float opacity) { - source.W *= opacity; + source = WithW(source, source * opacity); return Atop(backdrop, source, Add(backdrop, source)); } /// - /// Returns the result of the "AddSrcOver" compositing equation. + /// Returns the result of the "AddSrcAtop" compositing equation. /// /// The backdrop vector. /// The source vector. /// The source opacity. Range 0..1 - /// The . + /// The . [MethodImpl(MethodImplOptions.AggressiveInlining)] - public static Vector4 AddSrcOver(Vector4 backdrop, Vector4 source, float opacity) + public static Vector256 AddSrcAtop(Vector256 backdrop, Vector256 source, Vector256 opacity) { - source.W *= opacity; + source = Avx.Blend(source, Avx.Multiply(source, opacity), BlendAlphaControl); - return Over(backdrop, source, Add(backdrop, source)); + return Atop(backdrop, source, Add(backdrop, source)); } /// - /// Returns the result of the "AddSrcIn" compositing equation. + /// Returns the result of the "AddSrcOver" compositing equation. /// /// The backdrop vector. /// The source vector. /// The source opacity. Range 0..1 /// The . [MethodImpl(MethodImplOptions.AggressiveInlining)] - public static Vector4 AddSrcIn(Vector4 backdrop, Vector4 source, float opacity) + public static Vector4 AddSrcOver(Vector4 backdrop, Vector4 source, float opacity) { - source.W *= opacity; + source = WithW(source, source * opacity); - return In(backdrop, source); + return Over(backdrop, source, Add(backdrop, source)); } /// - /// Returns the result of the "AddSrcOut" compositing equation. + /// Returns the result of the "AddSrcOver" compositing equation. /// /// The backdrop vector. /// The source vector. /// The source opacity. Range 0..1 - /// The . + /// The . [MethodImpl(MethodImplOptions.AggressiveInlining)] - public static Vector4 AddSrcOut(Vector4 backdrop, Vector4 source, float opacity) + public static Vector256 AddSrcOver(Vector256 backdrop, Vector256 source, Vector256 opacity) { - source.W *= opacity; + source = Avx.Blend(source, Avx.Multiply(source, opacity), BlendAlphaControl); - return Out(backdrop, source); + return Over(backdrop, source, Add(backdrop, source)); } /// - /// Returns the result of the "AddDest" compositing equation. + /// Returns the result of the "AddSrcIn" compositing equation. /// /// The backdrop vector. /// The source vector. /// The source opacity. Range 0..1 /// The . [MethodImpl(MethodImplOptions.AggressiveInlining)] - public static Vector4 AddDest(Vector4 backdrop, Vector4 source, float opacity) + public static Vector4 AddSrcIn(Vector4 backdrop, Vector4 source, float opacity) + { + source = WithW(source, source * opacity); + + return In(backdrop, source); + } + + /// + /// Returns the result of the "AddSrcIn" compositing equation. + /// + /// The backdrop vector. + /// The source vector. + /// The source opacity. Range 0..1 + /// The . + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public static Vector256 AddSrcIn(Vector256 backdrop, Vector256 source, Vector256 opacity) + => In(backdrop, Avx.Blend(source, Avx.Multiply(source, opacity), BlendAlphaControl)); + + /// + /// Returns the result of the "AddSrcOut" compositing equation. + /// + /// The backdrop vector. + /// The source vector. + /// The source opacity. Range 0..1 + /// The . + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public static Vector4 AddSrcOut(Vector4 backdrop, Vector4 source, float opacity) + { + source = WithW(source, source * opacity); + + return Out(backdrop, source); + } + + /// + /// Returns the result of the "AddSrcOut" compositing equation. + /// + /// The backdrop vector. + /// The source vector. + /// The source opacity. Range 0..1 + /// The . + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public static Vector256 AddSrcOut(Vector256 backdrop, Vector256 source, Vector256 opacity) + => Out(backdrop, Avx.Blend(source, Avx.Multiply(source, opacity), BlendAlphaControl)); + + /// + /// Returns the result of the "AddDest" compositing equation. + /// + /// The backdrop vector. + /// The source vector. + /// The source opacity. Range 0..1 + /// The . + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public static Vector4 AddDest(Vector4 backdrop, Vector4 source, float opacity) + { + return backdrop; + } + + /// + /// Returns the result of the "AddDest" compositing equation. + /// + /// The backdrop vector. + /// The source vector. + /// The source opacity. Range 0..1 + /// The . + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public static Vector256 AddDest(Vector256 backdrop, Vector256 source, Vector256 opacity) { return backdrop; } @@ -899,7 +1277,22 @@ internal static partial class PorterDuffFunctions [MethodImpl(MethodImplOptions.AggressiveInlining)] public static Vector4 AddDestAtop(Vector4 backdrop, Vector4 source, float opacity) { - source.W *= opacity; + source = WithW(source, source * opacity); + + return Atop(source, backdrop, Add(source, backdrop)); + } + + /// + /// Returns the result of the "AddDestAtop" compositing equation. + /// + /// The backdrop vector. + /// The source vector. + /// The source opacity. Range 0..1 + /// The . + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public static Vector256 AddDestAtop(Vector256 backdrop, Vector256 source, Vector256 opacity) + { + source = Avx.Blend(source, Avx.Multiply(source, opacity), BlendAlphaControl); return Atop(source, backdrop, Add(source, backdrop)); } @@ -914,7 +1307,22 @@ internal static partial class PorterDuffFunctions [MethodImpl(MethodImplOptions.AggressiveInlining)] public static Vector4 AddDestOver(Vector4 backdrop, Vector4 source, float opacity) { - source.W *= opacity; + source = WithW(source, source * opacity); + + return Over(source, backdrop, Add(source, backdrop)); + } + + /// + /// Returns the result of the "AddDestOver" compositing equation. + /// + /// The backdrop vector. + /// The source vector. + /// The source opacity. Range 0..1 + /// The . + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public static Vector256 AddDestOver(Vector256 backdrop, Vector256 source, Vector256 opacity) + { + source = Avx.Blend(source, Avx.Multiply(source, opacity), BlendAlphaControl); return Over(source, backdrop, Add(source, backdrop)); } @@ -929,11 +1337,22 @@ internal static partial class PorterDuffFunctions [MethodImpl(MethodImplOptions.AggressiveInlining)] public static Vector4 AddDestIn(Vector4 backdrop, Vector4 source, float opacity) { - source.W *= opacity; + source = WithW(source, source * opacity); return In(source, backdrop); } + /// + /// Returns the result of the "AddDestIn" compositing equation. + /// + /// The backdrop vector. + /// The source vector. + /// The source opacity. Range 0..1 + /// The . + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public static Vector256 AddDestIn(Vector256 backdrop, Vector256 source, Vector256 opacity) + => In(Avx.Blend(source, Avx.Multiply(source, opacity), BlendAlphaControl), backdrop); + /// /// Returns the result of the "AddDestOut" compositing equation. /// @@ -944,11 +1363,22 @@ internal static partial class PorterDuffFunctions [MethodImpl(MethodImplOptions.AggressiveInlining)] public static Vector4 AddDestOut(Vector4 backdrop, Vector4 source, float opacity) { - source.W *= opacity; + source = WithW(source, source * opacity); return Out(source, backdrop); } + /// + /// Returns the result of the "AddDestOut" compositing equation. + /// + /// The backdrop vector. + /// The source vector. + /// The source opacity. Range 0..1 + /// The . + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public static Vector256 AddDestOut(Vector256 backdrop, Vector256 source, Vector256 opacity) + => Out(Avx.Blend(source, Avx.Multiply(source, opacity), BlendAlphaControl), backdrop); + /// /// Returns the result of the "AddXor" compositing equation. /// @@ -959,11 +1389,22 @@ internal static partial class PorterDuffFunctions [MethodImpl(MethodImplOptions.AggressiveInlining)] public static Vector4 AddXor(Vector4 backdrop, Vector4 source, float opacity) { - source.W *= opacity; + source = WithW(source, source * opacity); return Xor(backdrop, source); } + /// + /// Returns the result of the "AddXor" compositing equation. + /// + /// The backdrop vector. + /// The source vector. + /// The source opacity. Range 0..1 + /// The . + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public static Vector256 AddXor(Vector256 backdrop, Vector256 source, Vector256 opacity) + => Xor(backdrop, Avx.Blend(source, Avx.Multiply(source, opacity), BlendAlphaControl)); + /// /// Returns the result of the "AddClear" compositing equation. /// @@ -974,11 +1415,22 @@ internal static partial class PorterDuffFunctions [MethodImpl(MethodImplOptions.AggressiveInlining)] public static Vector4 AddClear(Vector4 backdrop, Vector4 source, float opacity) { - source.W *= opacity; + source = WithW(source, source * opacity); return Clear(backdrop, source); } + /// + /// Returns the result of the "AddClear" compositing equation. + /// + /// The backdrop vector. + /// The source vector. + /// The source opacity. Range 0..1 + /// The . + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public static Vector256 AddClear(Vector256 backdrop, Vector256 source, Vector256 opacity) + => Clear(backdrop, Avx.Blend(source, Avx.Multiply(source, opacity), BlendAlphaControl)); + /// /// Returns the result of the "AddSrc" compositing equation. @@ -1206,11 +1658,22 @@ internal static partial class PorterDuffFunctions [MethodImpl(MethodImplOptions.AggressiveInlining)] public static Vector4 SubtractSrc(Vector4 backdrop, Vector4 source, float opacity) { - source.W *= opacity; + source = WithW(source, source * opacity); return source; } + /// + /// Returns the result of the "SubtractSrc compositing equation. + /// + /// The backdrop vector. + /// The source vector. + /// The source opacity. Range 0..1 + /// The . + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public static Vector256 SubtractSrc(Vector256 backdrop, Vector256 source, Vector256 opacity) + => Avx.Blend(source, Avx.Multiply(source, opacity), BlendAlphaControl); + /// /// Returns the result of the "SubtractSrcAtop" compositing equation. /// @@ -1221,7 +1684,22 @@ internal static partial class PorterDuffFunctions [MethodImpl(MethodImplOptions.AggressiveInlining)] public static Vector4 SubtractSrcAtop(Vector4 backdrop, Vector4 source, float opacity) { - source.W *= opacity; + source = WithW(source, source * opacity); + + return Atop(backdrop, source, Subtract(backdrop, source)); + } + + /// + /// Returns the result of the "SubtractSrcAtop" compositing equation. + /// + /// The backdrop vector. + /// The source vector. + /// The source opacity. Range 0..1 + /// The . + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public static Vector256 SubtractSrcAtop(Vector256 backdrop, Vector256 source, Vector256 opacity) + { + source = Avx.Blend(source, Avx.Multiply(source, opacity), BlendAlphaControl); return Atop(backdrop, source, Subtract(backdrop, source)); } @@ -1236,7 +1714,22 @@ internal static partial class PorterDuffFunctions [MethodImpl(MethodImplOptions.AggressiveInlining)] public static Vector4 SubtractSrcOver(Vector4 backdrop, Vector4 source, float opacity) { - source.W *= opacity; + source = WithW(source, source * opacity); + + return Over(backdrop, source, Subtract(backdrop, source)); + } + + /// + /// Returns the result of the "SubtractSrcOver" compositing equation. + /// + /// The backdrop vector. + /// The source vector. + /// The source opacity. Range 0..1 + /// The . + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public static Vector256 SubtractSrcOver(Vector256 backdrop, Vector256 source, Vector256 opacity) + { + source = Avx.Blend(source, Avx.Multiply(source, opacity), BlendAlphaControl); return Over(backdrop, source, Subtract(backdrop, source)); } @@ -1251,11 +1744,22 @@ internal static partial class PorterDuffFunctions [MethodImpl(MethodImplOptions.AggressiveInlining)] public static Vector4 SubtractSrcIn(Vector4 backdrop, Vector4 source, float opacity) { - source.W *= opacity; + source = WithW(source, source * opacity); return In(backdrop, source); } + /// + /// Returns the result of the "SubtractSrcIn" compositing equation. + /// + /// The backdrop vector. + /// The source vector. + /// The source opacity. Range 0..1 + /// The . + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public static Vector256 SubtractSrcIn(Vector256 backdrop, Vector256 source, Vector256 opacity) + => In(backdrop, Avx.Blend(source, Avx.Multiply(source, opacity), BlendAlphaControl)); + /// /// Returns the result of the "SubtractSrcOut" compositing equation. /// @@ -1266,11 +1770,22 @@ internal static partial class PorterDuffFunctions [MethodImpl(MethodImplOptions.AggressiveInlining)] public static Vector4 SubtractSrcOut(Vector4 backdrop, Vector4 source, float opacity) { - source.W *= opacity; + source = WithW(source, source * opacity); return Out(backdrop, source); } + /// + /// Returns the result of the "SubtractSrcOut" compositing equation. + /// + /// The backdrop vector. + /// The source vector. + /// The source opacity. Range 0..1 + /// The . + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public static Vector256 SubtractSrcOut(Vector256 backdrop, Vector256 source, Vector256 opacity) + => Out(backdrop, Avx.Blend(source, Avx.Multiply(source, opacity), BlendAlphaControl)); + /// /// Returns the result of the "SubtractDest" compositing equation. /// @@ -1284,6 +1799,19 @@ internal static partial class PorterDuffFunctions return backdrop; } + /// + /// Returns the result of the "SubtractDest" compositing equation. + /// + /// The backdrop vector. + /// The source vector. + /// The source opacity. Range 0..1 + /// The . + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public static Vector256 SubtractDest(Vector256 backdrop, Vector256 source, Vector256 opacity) + { + return backdrop; + } + /// /// Returns the result of the "SubtractDestAtop" compositing equation. /// @@ -1294,7 +1822,22 @@ internal static partial class PorterDuffFunctions [MethodImpl(MethodImplOptions.AggressiveInlining)] public static Vector4 SubtractDestAtop(Vector4 backdrop, Vector4 source, float opacity) { - source.W *= opacity; + source = WithW(source, source * opacity); + + return Atop(source, backdrop, Subtract(source, backdrop)); + } + + /// + /// Returns the result of the "SubtractDestAtop" compositing equation. + /// + /// The backdrop vector. + /// The source vector. + /// The source opacity. Range 0..1 + /// The . + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public static Vector256 SubtractDestAtop(Vector256 backdrop, Vector256 source, Vector256 opacity) + { + source = Avx.Blend(source, Avx.Multiply(source, opacity), BlendAlphaControl); return Atop(source, backdrop, Subtract(source, backdrop)); } @@ -1309,7 +1852,22 @@ internal static partial class PorterDuffFunctions [MethodImpl(MethodImplOptions.AggressiveInlining)] public static Vector4 SubtractDestOver(Vector4 backdrop, Vector4 source, float opacity) { - source.W *= opacity; + source = WithW(source, source * opacity); + + return Over(source, backdrop, Subtract(source, backdrop)); + } + + /// + /// Returns the result of the "SubtractDestOver" compositing equation. + /// + /// The backdrop vector. + /// The source vector. + /// The source opacity. Range 0..1 + /// The . + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public static Vector256 SubtractDestOver(Vector256 backdrop, Vector256 source, Vector256 opacity) + { + source = Avx.Blend(source, Avx.Multiply(source, opacity), BlendAlphaControl); return Over(source, backdrop, Subtract(source, backdrop)); } @@ -1324,11 +1882,22 @@ internal static partial class PorterDuffFunctions [MethodImpl(MethodImplOptions.AggressiveInlining)] public static Vector4 SubtractDestIn(Vector4 backdrop, Vector4 source, float opacity) { - source.W *= opacity; + source = WithW(source, source * opacity); return In(source, backdrop); } + /// + /// Returns the result of the "SubtractDestIn" compositing equation. + /// + /// The backdrop vector. + /// The source vector. + /// The source opacity. Range 0..1 + /// The . + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public static Vector256 SubtractDestIn(Vector256 backdrop, Vector256 source, Vector256 opacity) + => In(Avx.Blend(source, Avx.Multiply(source, opacity), BlendAlphaControl), backdrop); + /// /// Returns the result of the "SubtractDestOut" compositing equation. /// @@ -1339,11 +1908,22 @@ internal static partial class PorterDuffFunctions [MethodImpl(MethodImplOptions.AggressiveInlining)] public static Vector4 SubtractDestOut(Vector4 backdrop, Vector4 source, float opacity) { - source.W *= opacity; + source = WithW(source, source * opacity); return Out(source, backdrop); } + /// + /// Returns the result of the "SubtractDestOut" compositing equation. + /// + /// The backdrop vector. + /// The source vector. + /// The source opacity. Range 0..1 + /// The . + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public static Vector256 SubtractDestOut(Vector256 backdrop, Vector256 source, Vector256 opacity) + => Out(Avx.Blend(source, Avx.Multiply(source, opacity), BlendAlphaControl), backdrop); + /// /// Returns the result of the "SubtractXor" compositing equation. /// @@ -1354,11 +1934,22 @@ internal static partial class PorterDuffFunctions [MethodImpl(MethodImplOptions.AggressiveInlining)] public static Vector4 SubtractXor(Vector4 backdrop, Vector4 source, float opacity) { - source.W *= opacity; + source = WithW(source, source * opacity); return Xor(backdrop, source); } + /// + /// Returns the result of the "SubtractXor" compositing equation. + /// + /// The backdrop vector. + /// The source vector. + /// The source opacity. Range 0..1 + /// The . + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public static Vector256 SubtractXor(Vector256 backdrop, Vector256 source, Vector256 opacity) + => Xor(backdrop, Avx.Blend(source, Avx.Multiply(source, opacity), BlendAlphaControl)); + /// /// Returns the result of the "SubtractClear" compositing equation. /// @@ -1369,11 +1960,22 @@ internal static partial class PorterDuffFunctions [MethodImpl(MethodImplOptions.AggressiveInlining)] public static Vector4 SubtractClear(Vector4 backdrop, Vector4 source, float opacity) { - source.W *= opacity; + source = WithW(source, source * opacity); return Clear(backdrop, source); } + /// + /// Returns the result of the "SubtractClear" compositing equation. + /// + /// The backdrop vector. + /// The source vector. + /// The source opacity. Range 0..1 + /// The . + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public static Vector256 SubtractClear(Vector256 backdrop, Vector256 source, Vector256 opacity) + => Clear(backdrop, Avx.Blend(source, Avx.Multiply(source, opacity), BlendAlphaControl)); + /// /// Returns the result of the "SubtractSrc" compositing equation. @@ -1601,11 +2203,22 @@ internal static partial class PorterDuffFunctions [MethodImpl(MethodImplOptions.AggressiveInlining)] public static Vector4 ScreenSrc(Vector4 backdrop, Vector4 source, float opacity) { - source.W *= opacity; + source = WithW(source, source * opacity); return source; } + /// + /// Returns the result of the "ScreenSrc compositing equation. + /// + /// The backdrop vector. + /// The source vector. + /// The source opacity. Range 0..1 + /// The . + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public static Vector256 ScreenSrc(Vector256 backdrop, Vector256 source, Vector256 opacity) + => Avx.Blend(source, Avx.Multiply(source, opacity), BlendAlphaControl); + /// /// Returns the result of the "ScreenSrcAtop" compositing equation. /// @@ -1616,7 +2229,22 @@ internal static partial class PorterDuffFunctions [MethodImpl(MethodImplOptions.AggressiveInlining)] public static Vector4 ScreenSrcAtop(Vector4 backdrop, Vector4 source, float opacity) { - source.W *= opacity; + source = WithW(source, source * opacity); + + return Atop(backdrop, source, Screen(backdrop, source)); + } + + /// + /// Returns the result of the "ScreenSrcAtop" compositing equation. + /// + /// The backdrop vector. + /// The source vector. + /// The source opacity. Range 0..1 + /// The . + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public static Vector256 ScreenSrcAtop(Vector256 backdrop, Vector256 source, Vector256 opacity) + { + source = Avx.Blend(source, Avx.Multiply(source, opacity), BlendAlphaControl); return Atop(backdrop, source, Screen(backdrop, source)); } @@ -1631,7 +2259,22 @@ internal static partial class PorterDuffFunctions [MethodImpl(MethodImplOptions.AggressiveInlining)] public static Vector4 ScreenSrcOver(Vector4 backdrop, Vector4 source, float opacity) { - source.W *= opacity; + source = WithW(source, source * opacity); + + return Over(backdrop, source, Screen(backdrop, source)); + } + + /// + /// Returns the result of the "ScreenSrcOver" compositing equation. + /// + /// The backdrop vector. + /// The source vector. + /// The source opacity. Range 0..1 + /// The . + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public static Vector256 ScreenSrcOver(Vector256 backdrop, Vector256 source, Vector256 opacity) + { + source = Avx.Blend(source, Avx.Multiply(source, opacity), BlendAlphaControl); return Over(backdrop, source, Screen(backdrop, source)); } @@ -1646,11 +2289,22 @@ internal static partial class PorterDuffFunctions [MethodImpl(MethodImplOptions.AggressiveInlining)] public static Vector4 ScreenSrcIn(Vector4 backdrop, Vector4 source, float opacity) { - source.W *= opacity; + source = WithW(source, source * opacity); return In(backdrop, source); } + /// + /// Returns the result of the "ScreenSrcIn" compositing equation. + /// + /// The backdrop vector. + /// The source vector. + /// The source opacity. Range 0..1 + /// The . + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public static Vector256 ScreenSrcIn(Vector256 backdrop, Vector256 source, Vector256 opacity) + => In(backdrop, Avx.Blend(source, Avx.Multiply(source, opacity), BlendAlphaControl)); + /// /// Returns the result of the "ScreenSrcOut" compositing equation. /// @@ -1661,11 +2315,22 @@ internal static partial class PorterDuffFunctions [MethodImpl(MethodImplOptions.AggressiveInlining)] public static Vector4 ScreenSrcOut(Vector4 backdrop, Vector4 source, float opacity) { - source.W *= opacity; + source = WithW(source, source * opacity); return Out(backdrop, source); } + /// + /// Returns the result of the "ScreenSrcOut" compositing equation. + /// + /// The backdrop vector. + /// The source vector. + /// The source opacity. Range 0..1 + /// The . + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public static Vector256 ScreenSrcOut(Vector256 backdrop, Vector256 source, Vector256 opacity) + => Out(backdrop, Avx.Blend(source, Avx.Multiply(source, opacity), BlendAlphaControl)); + /// /// Returns the result of the "ScreenDest" compositing equation. /// @@ -1679,19 +2344,62 @@ internal static partial class PorterDuffFunctions return backdrop; } + /// + /// Returns the result of the "ScreenDest" compositing equation. + /// + /// The backdrop vector. + /// The source vector. + /// The source opacity. Range 0..1 + /// The . + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public static Vector256 ScreenDest(Vector256 backdrop, Vector256 source, Vector256 opacity) + { + return backdrop; + } + + /// + /// Returns the result of the "ScreenDestAtop" compositing equation. + /// + /// The backdrop vector. + /// The source vector. + /// The source opacity. Range 0..1 + /// The . + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public static Vector4 ScreenDestAtop(Vector4 backdrop, Vector4 source, float opacity) + { + source = WithW(source, source * opacity); + + return Atop(source, backdrop, Screen(source, backdrop)); + } + /// /// Returns the result of the "ScreenDestAtop" compositing equation. /// /// The backdrop vector. /// The source vector. /// The source opacity. Range 0..1 + /// The . + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public static Vector256 ScreenDestAtop(Vector256 backdrop, Vector256 source, Vector256 opacity) + { + source = Avx.Blend(source, Avx.Multiply(source, opacity), BlendAlphaControl); + + return Atop(source, backdrop, Screen(source, backdrop)); + } + + /// + /// Returns the result of the "ScreenDestOver" compositing equation. + /// + /// The backdrop vector. + /// The source vector. + /// The source opacity. Range 0..1 /// The . [MethodImpl(MethodImplOptions.AggressiveInlining)] - public static Vector4 ScreenDestAtop(Vector4 backdrop, Vector4 source, float opacity) + public static Vector4 ScreenDestOver(Vector4 backdrop, Vector4 source, float opacity) { - source.W *= opacity; + source = WithW(source, source * opacity); - return Atop(source, backdrop, Screen(source, backdrop)); + return Over(source, backdrop, Screen(source, backdrop)); } /// @@ -1700,11 +2408,11 @@ internal static partial class PorterDuffFunctions /// The backdrop vector. /// The source vector. /// The source opacity. Range 0..1 - /// The . + /// The . [MethodImpl(MethodImplOptions.AggressiveInlining)] - public static Vector4 ScreenDestOver(Vector4 backdrop, Vector4 source, float opacity) + public static Vector256 ScreenDestOver(Vector256 backdrop, Vector256 source, Vector256 opacity) { - source.W *= opacity; + source = Avx.Blend(source, Avx.Multiply(source, opacity), BlendAlphaControl); return Over(source, backdrop, Screen(source, backdrop)); } @@ -1719,11 +2427,22 @@ internal static partial class PorterDuffFunctions [MethodImpl(MethodImplOptions.AggressiveInlining)] public static Vector4 ScreenDestIn(Vector4 backdrop, Vector4 source, float opacity) { - source.W *= opacity; + source = WithW(source, source * opacity); return In(source, backdrop); } + /// + /// Returns the result of the "ScreenDestIn" compositing equation. + /// + /// The backdrop vector. + /// The source vector. + /// The source opacity. Range 0..1 + /// The . + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public static Vector256 ScreenDestIn(Vector256 backdrop, Vector256 source, Vector256 opacity) + => In(Avx.Blend(source, Avx.Multiply(source, opacity), BlendAlphaControl), backdrop); + /// /// Returns the result of the "ScreenDestOut" compositing equation. /// @@ -1734,11 +2453,22 @@ internal static partial class PorterDuffFunctions [MethodImpl(MethodImplOptions.AggressiveInlining)] public static Vector4 ScreenDestOut(Vector4 backdrop, Vector4 source, float opacity) { - source.W *= opacity; + source = WithW(source, source * opacity); return Out(source, backdrop); } + /// + /// Returns the result of the "ScreenDestOut" compositing equation. + /// + /// The backdrop vector. + /// The source vector. + /// The source opacity. Range 0..1 + /// The . + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public static Vector256 ScreenDestOut(Vector256 backdrop, Vector256 source, Vector256 opacity) + => Out(Avx.Blend(source, Avx.Multiply(source, opacity), BlendAlphaControl), backdrop); + /// /// Returns the result of the "ScreenXor" compositing equation. /// @@ -1749,11 +2479,22 @@ internal static partial class PorterDuffFunctions [MethodImpl(MethodImplOptions.AggressiveInlining)] public static Vector4 ScreenXor(Vector4 backdrop, Vector4 source, float opacity) { - source.W *= opacity; + source = WithW(source, source * opacity); return Xor(backdrop, source); } + /// + /// Returns the result of the "ScreenXor" compositing equation. + /// + /// The backdrop vector. + /// The source vector. + /// The source opacity. Range 0..1 + /// The . + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public static Vector256 ScreenXor(Vector256 backdrop, Vector256 source, Vector256 opacity) + => Xor(backdrop, Avx.Blend(source, Avx.Multiply(source, opacity), BlendAlphaControl)); + /// /// Returns the result of the "ScreenClear" compositing equation. /// @@ -1764,11 +2505,22 @@ internal static partial class PorterDuffFunctions [MethodImpl(MethodImplOptions.AggressiveInlining)] public static Vector4 ScreenClear(Vector4 backdrop, Vector4 source, float opacity) { - source.W *= opacity; + source = WithW(source, source * opacity); return Clear(backdrop, source); } + /// + /// Returns the result of the "ScreenClear" compositing equation. + /// + /// The backdrop vector. + /// The source vector. + /// The source opacity. Range 0..1 + /// The . + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public static Vector256 ScreenClear(Vector256 backdrop, Vector256 source, Vector256 opacity) + => Clear(backdrop, Avx.Blend(source, Avx.Multiply(source, opacity), BlendAlphaControl)); + /// /// Returns the result of the "ScreenSrc" compositing equation. @@ -1996,11 +2748,22 @@ internal static partial class PorterDuffFunctions [MethodImpl(MethodImplOptions.AggressiveInlining)] public static Vector4 DarkenSrc(Vector4 backdrop, Vector4 source, float opacity) { - source.W *= opacity; + source = WithW(source, source * opacity); return source; } + /// + /// Returns the result of the "DarkenSrc compositing equation. + /// + /// The backdrop vector. + /// The source vector. + /// The source opacity. Range 0..1 + /// The . + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public static Vector256 DarkenSrc(Vector256 backdrop, Vector256 source, Vector256 opacity) + => Avx.Blend(source, Avx.Multiply(source, opacity), BlendAlphaControl); + /// /// Returns the result of the "DarkenSrcAtop" compositing equation. /// @@ -2011,7 +2774,22 @@ internal static partial class PorterDuffFunctions [MethodImpl(MethodImplOptions.AggressiveInlining)] public static Vector4 DarkenSrcAtop(Vector4 backdrop, Vector4 source, float opacity) { - source.W *= opacity; + source = WithW(source, source * opacity); + + return Atop(backdrop, source, Darken(backdrop, source)); + } + + /// + /// Returns the result of the "DarkenSrcAtop" compositing equation. + /// + /// The backdrop vector. + /// The source vector. + /// The source opacity. Range 0..1 + /// The . + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public static Vector256 DarkenSrcAtop(Vector256 backdrop, Vector256 source, Vector256 opacity) + { + source = Avx.Blend(source, Avx.Multiply(source, opacity), BlendAlphaControl); return Atop(backdrop, source, Darken(backdrop, source)); } @@ -2026,7 +2804,22 @@ internal static partial class PorterDuffFunctions [MethodImpl(MethodImplOptions.AggressiveInlining)] public static Vector4 DarkenSrcOver(Vector4 backdrop, Vector4 source, float opacity) { - source.W *= opacity; + source = WithW(source, source * opacity); + + return Over(backdrop, source, Darken(backdrop, source)); + } + + /// + /// Returns the result of the "DarkenSrcOver" compositing equation. + /// + /// The backdrop vector. + /// The source vector. + /// The source opacity. Range 0..1 + /// The . + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public static Vector256 DarkenSrcOver(Vector256 backdrop, Vector256 source, Vector256 opacity) + { + source = Avx.Blend(source, Avx.Multiply(source, opacity), BlendAlphaControl); return Over(backdrop, source, Darken(backdrop, source)); } @@ -2041,11 +2834,22 @@ internal static partial class PorterDuffFunctions [MethodImpl(MethodImplOptions.AggressiveInlining)] public static Vector4 DarkenSrcIn(Vector4 backdrop, Vector4 source, float opacity) { - source.W *= opacity; + source = WithW(source, source * opacity); return In(backdrop, source); } + /// + /// Returns the result of the "DarkenSrcIn" compositing equation. + /// + /// The backdrop vector. + /// The source vector. + /// The source opacity. Range 0..1 + /// The . + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public static Vector256 DarkenSrcIn(Vector256 backdrop, Vector256 source, Vector256 opacity) + => In(backdrop, Avx.Blend(source, Avx.Multiply(source, opacity), BlendAlphaControl)); + /// /// Returns the result of the "DarkenSrcOut" compositing equation. /// @@ -2056,11 +2860,22 @@ internal static partial class PorterDuffFunctions [MethodImpl(MethodImplOptions.AggressiveInlining)] public static Vector4 DarkenSrcOut(Vector4 backdrop, Vector4 source, float opacity) { - source.W *= opacity; + source = WithW(source, source * opacity); return Out(backdrop, source); } + /// + /// Returns the result of the "DarkenSrcOut" compositing equation. + /// + /// The backdrop vector. + /// The source vector. + /// The source opacity. Range 0..1 + /// The . + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public static Vector256 DarkenSrcOut(Vector256 backdrop, Vector256 source, Vector256 opacity) + => Out(backdrop, Avx.Blend(source, Avx.Multiply(source, opacity), BlendAlphaControl)); + /// /// Returns the result of the "DarkenDest" compositing equation. /// @@ -2074,6 +2889,19 @@ internal static partial class PorterDuffFunctions return backdrop; } + /// + /// Returns the result of the "DarkenDest" compositing equation. + /// + /// The backdrop vector. + /// The source vector. + /// The source opacity. Range 0..1 + /// The . + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public static Vector256 DarkenDest(Vector256 backdrop, Vector256 source, Vector256 opacity) + { + return backdrop; + } + /// /// Returns the result of the "DarkenDestAtop" compositing equation. /// @@ -2084,7 +2912,22 @@ internal static partial class PorterDuffFunctions [MethodImpl(MethodImplOptions.AggressiveInlining)] public static Vector4 DarkenDestAtop(Vector4 backdrop, Vector4 source, float opacity) { - source.W *= opacity; + source = WithW(source, source * opacity); + + return Atop(source, backdrop, Darken(source, backdrop)); + } + + /// + /// Returns the result of the "DarkenDestAtop" compositing equation. + /// + /// The backdrop vector. + /// The source vector. + /// The source opacity. Range 0..1 + /// The . + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public static Vector256 DarkenDestAtop(Vector256 backdrop, Vector256 source, Vector256 opacity) + { + source = Avx.Blend(source, Avx.Multiply(source, opacity), BlendAlphaControl); return Atop(source, backdrop, Darken(source, backdrop)); } @@ -2099,7 +2942,22 @@ internal static partial class PorterDuffFunctions [MethodImpl(MethodImplOptions.AggressiveInlining)] public static Vector4 DarkenDestOver(Vector4 backdrop, Vector4 source, float opacity) { - source.W *= opacity; + source = WithW(source, source * opacity); + + return Over(source, backdrop, Darken(source, backdrop)); + } + + /// + /// Returns the result of the "DarkenDestOver" compositing equation. + /// + /// The backdrop vector. + /// The source vector. + /// The source opacity. Range 0..1 + /// The . + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public static Vector256 DarkenDestOver(Vector256 backdrop, Vector256 source, Vector256 opacity) + { + source = Avx.Blend(source, Avx.Multiply(source, opacity), BlendAlphaControl); return Over(source, backdrop, Darken(source, backdrop)); } @@ -2114,11 +2972,22 @@ internal static partial class PorterDuffFunctions [MethodImpl(MethodImplOptions.AggressiveInlining)] public static Vector4 DarkenDestIn(Vector4 backdrop, Vector4 source, float opacity) { - source.W *= opacity; + source = WithW(source, source * opacity); return In(source, backdrop); } + /// + /// Returns the result of the "DarkenDestIn" compositing equation. + /// + /// The backdrop vector. + /// The source vector. + /// The source opacity. Range 0..1 + /// The . + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public static Vector256 DarkenDestIn(Vector256 backdrop, Vector256 source, Vector256 opacity) + => In(Avx.Blend(source, Avx.Multiply(source, opacity), BlendAlphaControl), backdrop); + /// /// Returns the result of the "DarkenDestOut" compositing equation. /// @@ -2129,11 +2998,22 @@ internal static partial class PorterDuffFunctions [MethodImpl(MethodImplOptions.AggressiveInlining)] public static Vector4 DarkenDestOut(Vector4 backdrop, Vector4 source, float opacity) { - source.W *= opacity; + source = WithW(source, source * opacity); return Out(source, backdrop); } + /// + /// Returns the result of the "DarkenDestOut" compositing equation. + /// + /// The backdrop vector. + /// The source vector. + /// The source opacity. Range 0..1 + /// The . + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public static Vector256 DarkenDestOut(Vector256 backdrop, Vector256 source, Vector256 opacity) + => Out(Avx.Blend(source, Avx.Multiply(source, opacity), BlendAlphaControl), backdrop); + /// /// Returns the result of the "DarkenXor" compositing equation. /// @@ -2144,11 +3024,22 @@ internal static partial class PorterDuffFunctions [MethodImpl(MethodImplOptions.AggressiveInlining)] public static Vector4 DarkenXor(Vector4 backdrop, Vector4 source, float opacity) { - source.W *= opacity; + source = WithW(source, source * opacity); return Xor(backdrop, source); } + /// + /// Returns the result of the "DarkenXor" compositing equation. + /// + /// The backdrop vector. + /// The source vector. + /// The source opacity. Range 0..1 + /// The . + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public static Vector256 DarkenXor(Vector256 backdrop, Vector256 source, Vector256 opacity) + => Xor(backdrop, Avx.Blend(source, Avx.Multiply(source, opacity), BlendAlphaControl)); + /// /// Returns the result of the "DarkenClear" compositing equation. /// @@ -2159,11 +3050,22 @@ internal static partial class PorterDuffFunctions [MethodImpl(MethodImplOptions.AggressiveInlining)] public static Vector4 DarkenClear(Vector4 backdrop, Vector4 source, float opacity) { - source.W *= opacity; + source = WithW(source, source * opacity); return Clear(backdrop, source); } + /// + /// Returns the result of the "DarkenClear" compositing equation. + /// + /// The backdrop vector. + /// The source vector. + /// The source opacity. Range 0..1 + /// The . + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public static Vector256 DarkenClear(Vector256 backdrop, Vector256 source, Vector256 opacity) + => Clear(backdrop, Avx.Blend(source, Avx.Multiply(source, opacity), BlendAlphaControl)); + /// /// Returns the result of the "DarkenSrc" compositing equation. @@ -2391,11 +3293,22 @@ internal static partial class PorterDuffFunctions [MethodImpl(MethodImplOptions.AggressiveInlining)] public static Vector4 LightenSrc(Vector4 backdrop, Vector4 source, float opacity) { - source.W *= opacity; + source = WithW(source, source * opacity); return source; } + /// + /// Returns the result of the "LightenSrc compositing equation. + /// + /// The backdrop vector. + /// The source vector. + /// The source opacity. Range 0..1 + /// The . + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public static Vector256 LightenSrc(Vector256 backdrop, Vector256 source, Vector256 opacity) + => Avx.Blend(source, Avx.Multiply(source, opacity), BlendAlphaControl); + /// /// Returns the result of the "LightenSrcAtop" compositing equation. /// @@ -2406,7 +3319,22 @@ internal static partial class PorterDuffFunctions [MethodImpl(MethodImplOptions.AggressiveInlining)] public static Vector4 LightenSrcAtop(Vector4 backdrop, Vector4 source, float opacity) { - source.W *= opacity; + source = WithW(source, source * opacity); + + return Atop(backdrop, source, Lighten(backdrop, source)); + } + + /// + /// Returns the result of the "LightenSrcAtop" compositing equation. + /// + /// The backdrop vector. + /// The source vector. + /// The source opacity. Range 0..1 + /// The . + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public static Vector256 LightenSrcAtop(Vector256 backdrop, Vector256 source, Vector256 opacity) + { + source = Avx.Blend(source, Avx.Multiply(source, opacity), BlendAlphaControl); return Atop(backdrop, source, Lighten(backdrop, source)); } @@ -2421,7 +3349,22 @@ internal static partial class PorterDuffFunctions [MethodImpl(MethodImplOptions.AggressiveInlining)] public static Vector4 LightenSrcOver(Vector4 backdrop, Vector4 source, float opacity) { - source.W *= opacity; + source = WithW(source, source * opacity); + + return Over(backdrop, source, Lighten(backdrop, source)); + } + + /// + /// Returns the result of the "LightenSrcOver" compositing equation. + /// + /// The backdrop vector. + /// The source vector. + /// The source opacity. Range 0..1 + /// The . + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public static Vector256 LightenSrcOver(Vector256 backdrop, Vector256 source, Vector256 opacity) + { + source = Avx.Blend(source, Avx.Multiply(source, opacity), BlendAlphaControl); return Over(backdrop, source, Lighten(backdrop, source)); } @@ -2436,11 +3379,22 @@ internal static partial class PorterDuffFunctions [MethodImpl(MethodImplOptions.AggressiveInlining)] public static Vector4 LightenSrcIn(Vector4 backdrop, Vector4 source, float opacity) { - source.W *= opacity; + source = WithW(source, source * opacity); return In(backdrop, source); } + /// + /// Returns the result of the "LightenSrcIn" compositing equation. + /// + /// The backdrop vector. + /// The source vector. + /// The source opacity. Range 0..1 + /// The . + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public static Vector256 LightenSrcIn(Vector256 backdrop, Vector256 source, Vector256 opacity) + => In(backdrop, Avx.Blend(source, Avx.Multiply(source, opacity), BlendAlphaControl)); + /// /// Returns the result of the "LightenSrcOut" compositing equation. /// @@ -2451,11 +3405,22 @@ internal static partial class PorterDuffFunctions [MethodImpl(MethodImplOptions.AggressiveInlining)] public static Vector4 LightenSrcOut(Vector4 backdrop, Vector4 source, float opacity) { - source.W *= opacity; + source = WithW(source, source * opacity); return Out(backdrop, source); } + /// + /// Returns the result of the "LightenSrcOut" compositing equation. + /// + /// The backdrop vector. + /// The source vector. + /// The source opacity. Range 0..1 + /// The . + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public static Vector256 LightenSrcOut(Vector256 backdrop, Vector256 source, Vector256 opacity) + => Out(backdrop, Avx.Blend(source, Avx.Multiply(source, opacity), BlendAlphaControl)); + /// /// Returns the result of the "LightenDest" compositing equation. /// @@ -2469,6 +3434,19 @@ internal static partial class PorterDuffFunctions return backdrop; } + /// + /// Returns the result of the "LightenDest" compositing equation. + /// + /// The backdrop vector. + /// The source vector. + /// The source opacity. Range 0..1 + /// The . + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public static Vector256 LightenDest(Vector256 backdrop, Vector256 source, Vector256 opacity) + { + return backdrop; + } + /// /// Returns the result of the "LightenDestAtop" compositing equation. /// @@ -2479,7 +3457,22 @@ internal static partial class PorterDuffFunctions [MethodImpl(MethodImplOptions.AggressiveInlining)] public static Vector4 LightenDestAtop(Vector4 backdrop, Vector4 source, float opacity) { - source.W *= opacity; + source = WithW(source, source * opacity); + + return Atop(source, backdrop, Lighten(source, backdrop)); + } + + /// + /// Returns the result of the "LightenDestAtop" compositing equation. + /// + /// The backdrop vector. + /// The source vector. + /// The source opacity. Range 0..1 + /// The . + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public static Vector256 LightenDestAtop(Vector256 backdrop, Vector256 source, Vector256 opacity) + { + source = Avx.Blend(source, Avx.Multiply(source, opacity), BlendAlphaControl); return Atop(source, backdrop, Lighten(source, backdrop)); } @@ -2494,24 +3487,65 @@ internal static partial class PorterDuffFunctions [MethodImpl(MethodImplOptions.AggressiveInlining)] public static Vector4 LightenDestOver(Vector4 backdrop, Vector4 source, float opacity) { - source.W *= opacity; + source = WithW(source, source * opacity); + + return Over(source, backdrop, Lighten(source, backdrop)); + } + + /// + /// Returns the result of the "LightenDestOver" compositing equation. + /// + /// The backdrop vector. + /// The source vector. + /// The source opacity. Range 0..1 + /// The . + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public static Vector256 LightenDestOver(Vector256 backdrop, Vector256 source, Vector256 opacity) + { + source = Avx.Blend(source, Avx.Multiply(source, opacity), BlendAlphaControl); return Over(source, backdrop, Lighten(source, backdrop)); } /// - /// Returns the result of the "LightenDestIn" compositing equation. + /// Returns the result of the "LightenDestIn" compositing equation. + /// + /// The backdrop vector. + /// The source vector. + /// The source opacity. Range 0..1 + /// The . + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public static Vector4 LightenDestIn(Vector4 backdrop, Vector4 source, float opacity) + { + source = WithW(source, source * opacity); + + return In(source, backdrop); + } + + /// + /// Returns the result of the "LightenDestIn" compositing equation. + /// + /// The backdrop vector. + /// The source vector. + /// The source opacity. Range 0..1 + /// The . + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public static Vector256 LightenDestIn(Vector256 backdrop, Vector256 source, Vector256 opacity) + => In(Avx.Blend(source, Avx.Multiply(source, opacity), BlendAlphaControl), backdrop); + + /// + /// Returns the result of the "LightenDestOut" compositing equation. /// /// The backdrop vector. /// The source vector. /// The source opacity. Range 0..1 /// The . [MethodImpl(MethodImplOptions.AggressiveInlining)] - public static Vector4 LightenDestIn(Vector4 backdrop, Vector4 source, float opacity) + public static Vector4 LightenDestOut(Vector4 backdrop, Vector4 source, float opacity) { - source.W *= opacity; + source = WithW(source, source * opacity); - return In(source, backdrop); + return Out(source, backdrop); } /// @@ -2520,14 +3554,10 @@ internal static partial class PorterDuffFunctions /// The backdrop vector. /// The source vector. /// The source opacity. Range 0..1 - /// The . + /// The . [MethodImpl(MethodImplOptions.AggressiveInlining)] - public static Vector4 LightenDestOut(Vector4 backdrop, Vector4 source, float opacity) - { - source.W *= opacity; - - return Out(source, backdrop); - } + public static Vector256 LightenDestOut(Vector256 backdrop, Vector256 source, Vector256 opacity) + => Out(Avx.Blend(source, Avx.Multiply(source, opacity), BlendAlphaControl), backdrop); /// /// Returns the result of the "LightenXor" compositing equation. @@ -2539,11 +3569,22 @@ internal static partial class PorterDuffFunctions [MethodImpl(MethodImplOptions.AggressiveInlining)] public static Vector4 LightenXor(Vector4 backdrop, Vector4 source, float opacity) { - source.W *= opacity; + source = WithW(source, source * opacity); return Xor(backdrop, source); } + /// + /// Returns the result of the "LightenXor" compositing equation. + /// + /// The backdrop vector. + /// The source vector. + /// The source opacity. Range 0..1 + /// The . + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public static Vector256 LightenXor(Vector256 backdrop, Vector256 source, Vector256 opacity) + => Xor(backdrop, Avx.Blend(source, Avx.Multiply(source, opacity), BlendAlphaControl)); + /// /// Returns the result of the "LightenClear" compositing equation. /// @@ -2554,11 +3595,22 @@ internal static partial class PorterDuffFunctions [MethodImpl(MethodImplOptions.AggressiveInlining)] public static Vector4 LightenClear(Vector4 backdrop, Vector4 source, float opacity) { - source.W *= opacity; + source = WithW(source, source * opacity); return Clear(backdrop, source); } + /// + /// Returns the result of the "LightenClear" compositing equation. + /// + /// The backdrop vector. + /// The source vector. + /// The source opacity. Range 0..1 + /// The . + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public static Vector256 LightenClear(Vector256 backdrop, Vector256 source, Vector256 opacity) + => Clear(backdrop, Avx.Blend(source, Avx.Multiply(source, opacity), BlendAlphaControl)); + /// /// Returns the result of the "LightenSrc" compositing equation. @@ -2786,11 +3838,22 @@ internal static partial class PorterDuffFunctions [MethodImpl(MethodImplOptions.AggressiveInlining)] public static Vector4 OverlaySrc(Vector4 backdrop, Vector4 source, float opacity) { - source.W *= opacity; + source = WithW(source, source * opacity); return source; } + /// + /// Returns the result of the "OverlaySrc compositing equation. + /// + /// The backdrop vector. + /// The source vector. + /// The source opacity. Range 0..1 + /// The . + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public static Vector256 OverlaySrc(Vector256 backdrop, Vector256 source, Vector256 opacity) + => Avx.Blend(source, Avx.Multiply(source, opacity), BlendAlphaControl); + /// /// Returns the result of the "OverlaySrcAtop" compositing equation. /// @@ -2801,7 +3864,22 @@ internal static partial class PorterDuffFunctions [MethodImpl(MethodImplOptions.AggressiveInlining)] public static Vector4 OverlaySrcAtop(Vector4 backdrop, Vector4 source, float opacity) { - source.W *= opacity; + source = WithW(source, source * opacity); + + return Atop(backdrop, source, Overlay(backdrop, source)); + } + + /// + /// Returns the result of the "OverlaySrcAtop" compositing equation. + /// + /// The backdrop vector. + /// The source vector. + /// The source opacity. Range 0..1 + /// The . + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public static Vector256 OverlaySrcAtop(Vector256 backdrop, Vector256 source, Vector256 opacity) + { + source = Avx.Blend(source, Avx.Multiply(source, opacity), BlendAlphaControl); return Atop(backdrop, source, Overlay(backdrop, source)); } @@ -2816,7 +3894,22 @@ internal static partial class PorterDuffFunctions [MethodImpl(MethodImplOptions.AggressiveInlining)] public static Vector4 OverlaySrcOver(Vector4 backdrop, Vector4 source, float opacity) { - source.W *= opacity; + source = WithW(source, source * opacity); + + return Over(backdrop, source, Overlay(backdrop, source)); + } + + /// + /// Returns the result of the "OverlaySrcOver" compositing equation. + /// + /// The backdrop vector. + /// The source vector. + /// The source opacity. Range 0..1 + /// The . + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public static Vector256 OverlaySrcOver(Vector256 backdrop, Vector256 source, Vector256 opacity) + { + source = Avx.Blend(source, Avx.Multiply(source, opacity), BlendAlphaControl); return Over(backdrop, source, Overlay(backdrop, source)); } @@ -2831,11 +3924,22 @@ internal static partial class PorterDuffFunctions [MethodImpl(MethodImplOptions.AggressiveInlining)] public static Vector4 OverlaySrcIn(Vector4 backdrop, Vector4 source, float opacity) { - source.W *= opacity; + source = WithW(source, source * opacity); return In(backdrop, source); } + /// + /// Returns the result of the "OverlaySrcIn" compositing equation. + /// + /// The backdrop vector. + /// The source vector. + /// The source opacity. Range 0..1 + /// The . + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public static Vector256 OverlaySrcIn(Vector256 backdrop, Vector256 source, Vector256 opacity) + => In(backdrop, Avx.Blend(source, Avx.Multiply(source, opacity), BlendAlphaControl)); + /// /// Returns the result of the "OverlaySrcOut" compositing equation. /// @@ -2846,11 +3950,22 @@ internal static partial class PorterDuffFunctions [MethodImpl(MethodImplOptions.AggressiveInlining)] public static Vector4 OverlaySrcOut(Vector4 backdrop, Vector4 source, float opacity) { - source.W *= opacity; + source = WithW(source, source * opacity); return Out(backdrop, source); } + /// + /// Returns the result of the "OverlaySrcOut" compositing equation. + /// + /// The backdrop vector. + /// The source vector. + /// The source opacity. Range 0..1 + /// The . + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public static Vector256 OverlaySrcOut(Vector256 backdrop, Vector256 source, Vector256 opacity) + => Out(backdrop, Avx.Blend(source, Avx.Multiply(source, opacity), BlendAlphaControl)); + /// /// Returns the result of the "OverlayDest" compositing equation. /// @@ -2864,6 +3979,19 @@ internal static partial class PorterDuffFunctions return backdrop; } + /// + /// Returns the result of the "OverlayDest" compositing equation. + /// + /// The backdrop vector. + /// The source vector. + /// The source opacity. Range 0..1 + /// The . + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public static Vector256 OverlayDest(Vector256 backdrop, Vector256 source, Vector256 opacity) + { + return backdrop; + } + /// /// Returns the result of the "OverlayDestAtop" compositing equation. /// @@ -2874,7 +4002,22 @@ internal static partial class PorterDuffFunctions [MethodImpl(MethodImplOptions.AggressiveInlining)] public static Vector4 OverlayDestAtop(Vector4 backdrop, Vector4 source, float opacity) { - source.W *= opacity; + source = WithW(source, source * opacity); + + return Atop(source, backdrop, Overlay(source, backdrop)); + } + + /// + /// Returns the result of the "OverlayDestAtop" compositing equation. + /// + /// The backdrop vector. + /// The source vector. + /// The source opacity. Range 0..1 + /// The . + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public static Vector256 OverlayDestAtop(Vector256 backdrop, Vector256 source, Vector256 opacity) + { + source = Avx.Blend(source, Avx.Multiply(source, opacity), BlendAlphaControl); return Atop(source, backdrop, Overlay(source, backdrop)); } @@ -2889,7 +4032,22 @@ internal static partial class PorterDuffFunctions [MethodImpl(MethodImplOptions.AggressiveInlining)] public static Vector4 OverlayDestOver(Vector4 backdrop, Vector4 source, float opacity) { - source.W *= opacity; + source = WithW(source, source * opacity); + + return Over(source, backdrop, Overlay(source, backdrop)); + } + + /// + /// Returns the result of the "OverlayDestOver" compositing equation. + /// + /// The backdrop vector. + /// The source vector. + /// The source opacity. Range 0..1 + /// The . + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public static Vector256 OverlayDestOver(Vector256 backdrop, Vector256 source, Vector256 opacity) + { + source = Avx.Blend(source, Avx.Multiply(source, opacity), BlendAlphaControl); return Over(source, backdrop, Overlay(source, backdrop)); } @@ -2904,11 +4062,22 @@ internal static partial class PorterDuffFunctions [MethodImpl(MethodImplOptions.AggressiveInlining)] public static Vector4 OverlayDestIn(Vector4 backdrop, Vector4 source, float opacity) { - source.W *= opacity; + source = WithW(source, source * opacity); return In(source, backdrop); } + /// + /// Returns the result of the "OverlayDestIn" compositing equation. + /// + /// The backdrop vector. + /// The source vector. + /// The source opacity. Range 0..1 + /// The . + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public static Vector256 OverlayDestIn(Vector256 backdrop, Vector256 source, Vector256 opacity) + => In(Avx.Blend(source, Avx.Multiply(source, opacity), BlendAlphaControl), backdrop); + /// /// Returns the result of the "OverlayDestOut" compositing equation. /// @@ -2919,11 +4088,22 @@ internal static partial class PorterDuffFunctions [MethodImpl(MethodImplOptions.AggressiveInlining)] public static Vector4 OverlayDestOut(Vector4 backdrop, Vector4 source, float opacity) { - source.W *= opacity; + source = WithW(source, source * opacity); return Out(source, backdrop); } + /// + /// Returns the result of the "OverlayDestOut" compositing equation. + /// + /// The backdrop vector. + /// The source vector. + /// The source opacity. Range 0..1 + /// The . + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public static Vector256 OverlayDestOut(Vector256 backdrop, Vector256 source, Vector256 opacity) + => Out(Avx.Blend(source, Avx.Multiply(source, opacity), BlendAlphaControl), backdrop); + /// /// Returns the result of the "OverlayXor" compositing equation. /// @@ -2934,11 +4114,22 @@ internal static partial class PorterDuffFunctions [MethodImpl(MethodImplOptions.AggressiveInlining)] public static Vector4 OverlayXor(Vector4 backdrop, Vector4 source, float opacity) { - source.W *= opacity; + source = WithW(source, source * opacity); return Xor(backdrop, source); } + /// + /// Returns the result of the "OverlayXor" compositing equation. + /// + /// The backdrop vector. + /// The source vector. + /// The source opacity. Range 0..1 + /// The . + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public static Vector256 OverlayXor(Vector256 backdrop, Vector256 source, Vector256 opacity) + => Xor(backdrop, Avx.Blend(source, Avx.Multiply(source, opacity), BlendAlphaControl)); + /// /// Returns the result of the "OverlayClear" compositing equation. /// @@ -2949,11 +4140,22 @@ internal static partial class PorterDuffFunctions [MethodImpl(MethodImplOptions.AggressiveInlining)] public static Vector4 OverlayClear(Vector4 backdrop, Vector4 source, float opacity) { - source.W *= opacity; + source = WithW(source, source * opacity); return Clear(backdrop, source); } + /// + /// Returns the result of the "OverlayClear" compositing equation. + /// + /// The backdrop vector. + /// The source vector. + /// The source opacity. Range 0..1 + /// The . + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public static Vector256 OverlayClear(Vector256 backdrop, Vector256 source, Vector256 opacity) + => Clear(backdrop, Avx.Blend(source, Avx.Multiply(source, opacity), BlendAlphaControl)); + /// /// Returns the result of the "OverlaySrc" compositing equation. @@ -3181,11 +4383,22 @@ internal static partial class PorterDuffFunctions [MethodImpl(MethodImplOptions.AggressiveInlining)] public static Vector4 HardLightSrc(Vector4 backdrop, Vector4 source, float opacity) { - source.W *= opacity; + source = WithW(source, source * opacity); return source; } + /// + /// Returns the result of the "HardLightSrc compositing equation. + /// + /// The backdrop vector. + /// The source vector. + /// The source opacity. Range 0..1 + /// The . + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public static Vector256 HardLightSrc(Vector256 backdrop, Vector256 source, Vector256 opacity) + => Avx.Blend(source, Avx.Multiply(source, opacity), BlendAlphaControl); + /// /// Returns the result of the "HardLightSrcAtop" compositing equation. /// @@ -3196,7 +4409,22 @@ internal static partial class PorterDuffFunctions [MethodImpl(MethodImplOptions.AggressiveInlining)] public static Vector4 HardLightSrcAtop(Vector4 backdrop, Vector4 source, float opacity) { - source.W *= opacity; + source = WithW(source, source * opacity); + + return Atop(backdrop, source, HardLight(backdrop, source)); + } + + /// + /// Returns the result of the "HardLightSrcAtop" compositing equation. + /// + /// The backdrop vector. + /// The source vector. + /// The source opacity. Range 0..1 + /// The . + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public static Vector256 HardLightSrcAtop(Vector256 backdrop, Vector256 source, Vector256 opacity) + { + source = Avx.Blend(source, Avx.Multiply(source, opacity), BlendAlphaControl); return Atop(backdrop, source, HardLight(backdrop, source)); } @@ -3211,7 +4439,22 @@ internal static partial class PorterDuffFunctions [MethodImpl(MethodImplOptions.AggressiveInlining)] public static Vector4 HardLightSrcOver(Vector4 backdrop, Vector4 source, float opacity) { - source.W *= opacity; + source = WithW(source, source * opacity); + + return Over(backdrop, source, HardLight(backdrop, source)); + } + + /// + /// Returns the result of the "HardLightSrcOver" compositing equation. + /// + /// The backdrop vector. + /// The source vector. + /// The source opacity. Range 0..1 + /// The . + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public static Vector256 HardLightSrcOver(Vector256 backdrop, Vector256 source, Vector256 opacity) + { + source = Avx.Blend(source, Avx.Multiply(source, opacity), BlendAlphaControl); return Over(backdrop, source, HardLight(backdrop, source)); } @@ -3226,11 +4469,22 @@ internal static partial class PorterDuffFunctions [MethodImpl(MethodImplOptions.AggressiveInlining)] public static Vector4 HardLightSrcIn(Vector4 backdrop, Vector4 source, float opacity) { - source.W *= opacity; + source = WithW(source, source * opacity); return In(backdrop, source); } + /// + /// Returns the result of the "HardLightSrcIn" compositing equation. + /// + /// The backdrop vector. + /// The source vector. + /// The source opacity. Range 0..1 + /// The . + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public static Vector256 HardLightSrcIn(Vector256 backdrop, Vector256 source, Vector256 opacity) + => In(backdrop, Avx.Blend(source, Avx.Multiply(source, opacity), BlendAlphaControl)); + /// /// Returns the result of the "HardLightSrcOut" compositing equation. /// @@ -3241,11 +4495,22 @@ internal static partial class PorterDuffFunctions [MethodImpl(MethodImplOptions.AggressiveInlining)] public static Vector4 HardLightSrcOut(Vector4 backdrop, Vector4 source, float opacity) { - source.W *= opacity; + source = WithW(source, source * opacity); return Out(backdrop, source); } + /// + /// Returns the result of the "HardLightSrcOut" compositing equation. + /// + /// The backdrop vector. + /// The source vector. + /// The source opacity. Range 0..1 + /// The . + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public static Vector256 HardLightSrcOut(Vector256 backdrop, Vector256 source, Vector256 opacity) + => Out(backdrop, Avx.Blend(source, Avx.Multiply(source, opacity), BlendAlphaControl)); + /// /// Returns the result of the "HardLightDest" compositing equation. /// @@ -3259,6 +4524,19 @@ internal static partial class PorterDuffFunctions return backdrop; } + /// + /// Returns the result of the "HardLightDest" compositing equation. + /// + /// The backdrop vector. + /// The source vector. + /// The source opacity. Range 0..1 + /// The . + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public static Vector256 HardLightDest(Vector256 backdrop, Vector256 source, Vector256 opacity) + { + return backdrop; + } + /// /// Returns the result of the "HardLightDestAtop" compositing equation. /// @@ -3269,7 +4547,22 @@ internal static partial class PorterDuffFunctions [MethodImpl(MethodImplOptions.AggressiveInlining)] public static Vector4 HardLightDestAtop(Vector4 backdrop, Vector4 source, float opacity) { - source.W *= opacity; + source = WithW(source, source * opacity); + + return Atop(source, backdrop, HardLight(source, backdrop)); + } + + /// + /// Returns the result of the "HardLightDestAtop" compositing equation. + /// + /// The backdrop vector. + /// The source vector. + /// The source opacity. Range 0..1 + /// The . + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public static Vector256 HardLightDestAtop(Vector256 backdrop, Vector256 source, Vector256 opacity) + { + source = Avx.Blend(source, Avx.Multiply(source, opacity), BlendAlphaControl); return Atop(source, backdrop, HardLight(source, backdrop)); } @@ -3284,7 +4577,22 @@ internal static partial class PorterDuffFunctions [MethodImpl(MethodImplOptions.AggressiveInlining)] public static Vector4 HardLightDestOver(Vector4 backdrop, Vector4 source, float opacity) { - source.W *= opacity; + source = WithW(source, source * opacity); + + return Over(source, backdrop, HardLight(source, backdrop)); + } + + /// + /// Returns the result of the "HardLightDestOver" compositing equation. + /// + /// The backdrop vector. + /// The source vector. + /// The source opacity. Range 0..1 + /// The . + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public static Vector256 HardLightDestOver(Vector256 backdrop, Vector256 source, Vector256 opacity) + { + source = Avx.Blend(source, Avx.Multiply(source, opacity), BlendAlphaControl); return Over(source, backdrop, HardLight(source, backdrop)); } @@ -3299,11 +4607,22 @@ internal static partial class PorterDuffFunctions [MethodImpl(MethodImplOptions.AggressiveInlining)] public static Vector4 HardLightDestIn(Vector4 backdrop, Vector4 source, float opacity) { - source.W *= opacity; + source = WithW(source, source * opacity); return In(source, backdrop); } + /// + /// Returns the result of the "HardLightDestIn" compositing equation. + /// + /// The backdrop vector. + /// The source vector. + /// The source opacity. Range 0..1 + /// The . + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public static Vector256 HardLightDestIn(Vector256 backdrop, Vector256 source, Vector256 opacity) + => In(Avx.Blend(source, Avx.Multiply(source, opacity), BlendAlphaControl), backdrop); + /// /// Returns the result of the "HardLightDestOut" compositing equation. /// @@ -3314,11 +4633,22 @@ internal static partial class PorterDuffFunctions [MethodImpl(MethodImplOptions.AggressiveInlining)] public static Vector4 HardLightDestOut(Vector4 backdrop, Vector4 source, float opacity) { - source.W *= opacity; + source = WithW(source, source * opacity); return Out(source, backdrop); } + /// + /// Returns the result of the "HardLightDestOut" compositing equation. + /// + /// The backdrop vector. + /// The source vector. + /// The source opacity. Range 0..1 + /// The . + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public static Vector256 HardLightDestOut(Vector256 backdrop, Vector256 source, Vector256 opacity) + => Out(Avx.Blend(source, Avx.Multiply(source, opacity), BlendAlphaControl), backdrop); + /// /// Returns the result of the "HardLightXor" compositing equation. /// @@ -3329,11 +4659,22 @@ internal static partial class PorterDuffFunctions [MethodImpl(MethodImplOptions.AggressiveInlining)] public static Vector4 HardLightXor(Vector4 backdrop, Vector4 source, float opacity) { - source.W *= opacity; + source = WithW(source, source * opacity); return Xor(backdrop, source); } + /// + /// Returns the result of the "HardLightXor" compositing equation. + /// + /// The backdrop vector. + /// The source vector. + /// The source opacity. Range 0..1 + /// The . + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public static Vector256 HardLightXor(Vector256 backdrop, Vector256 source, Vector256 opacity) + => Xor(backdrop, Avx.Blend(source, Avx.Multiply(source, opacity), BlendAlphaControl)); + /// /// Returns the result of the "HardLightClear" compositing equation. /// @@ -3344,11 +4685,22 @@ internal static partial class PorterDuffFunctions [MethodImpl(MethodImplOptions.AggressiveInlining)] public static Vector4 HardLightClear(Vector4 backdrop, Vector4 source, float opacity) { - source.W *= opacity; + source = WithW(source, source * opacity); return Clear(backdrop, source); } + /// + /// Returns the result of the "HardLightClear" compositing equation. + /// + /// The backdrop vector. + /// The source vector. + /// The source opacity. Range 0..1 + /// The . + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public static Vector256 HardLightClear(Vector256 backdrop, Vector256 source, Vector256 opacity) + => Clear(backdrop, Avx.Blend(source, Avx.Multiply(source, opacity), BlendAlphaControl)); + /// /// Returns the result of the "HardLightSrc" compositing equation. diff --git a/src/ImageSharp/PixelFormats/PixelBlenders/PorterDuffFunctions.Generated.tt b/src/ImageSharp/PixelFormats/PixelBlenders/PorterDuffFunctions.Generated.tt index 40d8b89970..69dac875c3 100644 --- a/src/ImageSharp/PixelFormats/PixelBlenders/PorterDuffFunctions.Generated.tt +++ b/src/ImageSharp/PixelFormats/PixelBlenders/PorterDuffFunctions.Generated.tt @@ -15,6 +15,8 @@ using System.Numerics; using System.Runtime.CompilerServices; +using System.Runtime.Intrinsics; +using System.Runtime.Intrinsics.X86; namespace SixLabors.ImageSharp.PixelFormats.PixelBlenders; @@ -31,11 +33,22 @@ internal static partial class PorterDuffFunctions [MethodImpl(MethodImplOptions.AggressiveInlining)] public static Vector4 <#=blender#>Src(Vector4 backdrop, Vector4 source, float opacity) { - source.W *= opacity; + source = WithW(source, source * opacity); return source; } + /// + /// Returns the result of the "<#=blender#>Src compositing equation. + /// + /// The backdrop vector. + /// The source vector. + /// The source opacity. Range 0..1 + /// The . + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public static Vector256 <#=blender#>Src(Vector256 backdrop, Vector256 source, Vector256 opacity) + => Avx.Blend(source, Avx.Multiply(source, opacity), BlendAlphaControl); + /// /// Returns the result of the "<#=blender#>SrcAtop" compositing equation. /// @@ -46,7 +59,22 @@ internal static partial class PorterDuffFunctions [MethodImpl(MethodImplOptions.AggressiveInlining)] public static Vector4 <#=blender#>SrcAtop(Vector4 backdrop, Vector4 source, float opacity) { - source.W *= opacity; + source = WithW(source, source * opacity); + + return Atop(backdrop, source, <#=blender#>(backdrop, source)); + } + + /// + /// Returns the result of the "<#=blender#>SrcAtop" compositing equation. + /// + /// The backdrop vector. + /// The source vector. + /// The source opacity. Range 0..1 + /// The . + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public static Vector256 <#=blender#>SrcAtop(Vector256 backdrop, Vector256 source, Vector256 opacity) + { + source = Avx.Blend(source, Avx.Multiply(source, opacity), BlendAlphaControl); return Atop(backdrop, source, <#=blender#>(backdrop, source)); } @@ -61,7 +89,22 @@ internal static partial class PorterDuffFunctions [MethodImpl(MethodImplOptions.AggressiveInlining)] public static Vector4 <#=blender#>SrcOver(Vector4 backdrop, Vector4 source, float opacity) { - source.W *= opacity; + source = WithW(source, source * opacity); + + return Over(backdrop, source, <#=blender#>(backdrop, source)); + } + + /// + /// Returns the result of the "<#=blender#>SrcOver" compositing equation. + /// + /// The backdrop vector. + /// The source vector. + /// The source opacity. Range 0..1 + /// The . + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public static Vector256 <#=blender#>SrcOver(Vector256 backdrop, Vector256 source, Vector256 opacity) + { + source = Avx.Blend(source, Avx.Multiply(source, opacity), BlendAlphaControl); return Over(backdrop, source, <#=blender#>(backdrop, source)); } @@ -76,11 +119,22 @@ internal static partial class PorterDuffFunctions [MethodImpl(MethodImplOptions.AggressiveInlining)] public static Vector4 <#=blender#>SrcIn(Vector4 backdrop, Vector4 source, float opacity) { - source.W *= opacity; + source = WithW(source, source * opacity); return In(backdrop, source); } + /// + /// Returns the result of the "<#=blender#>SrcIn" compositing equation. + /// + /// The backdrop vector. + /// The source vector. + /// The source opacity. Range 0..1 + /// The . + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public static Vector256 <#=blender#>SrcIn(Vector256 backdrop, Vector256 source, Vector256 opacity) + => In(backdrop, Avx.Blend(source, Avx.Multiply(source, opacity), BlendAlphaControl)); + /// /// Returns the result of the "<#=blender#>SrcOut" compositing equation. /// @@ -91,11 +145,22 @@ internal static partial class PorterDuffFunctions [MethodImpl(MethodImplOptions.AggressiveInlining)] public static Vector4 <#=blender#>SrcOut(Vector4 backdrop, Vector4 source, float opacity) { - source.W *= opacity; + source = WithW(source, source * opacity); return Out(backdrop, source); } + /// + /// Returns the result of the "<#=blender#>SrcOut" compositing equation. + /// + /// The backdrop vector. + /// The source vector. + /// The source opacity. Range 0..1 + /// The . + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public static Vector256 <#=blender#>SrcOut(Vector256 backdrop, Vector256 source, Vector256 opacity) + => Out(backdrop, Avx.Blend(source, Avx.Multiply(source, opacity), BlendAlphaControl)); + /// /// Returns the result of the "<#=blender#>Dest" compositing equation. /// @@ -109,6 +174,19 @@ internal static partial class PorterDuffFunctions return backdrop; } + /// + /// Returns the result of the "<#=blender#>Dest" compositing equation. + /// + /// The backdrop vector. + /// The source vector. + /// The source opacity. Range 0..1 + /// The . + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public static Vector256 <#=blender#>Dest(Vector256 backdrop, Vector256 source, Vector256 opacity) + { + return backdrop; + } + /// /// Returns the result of the "<#=blender#>DestAtop" compositing equation. /// @@ -119,7 +197,22 @@ internal static partial class PorterDuffFunctions [MethodImpl(MethodImplOptions.AggressiveInlining)] public static Vector4 <#=blender#>DestAtop(Vector4 backdrop, Vector4 source, float opacity) { - source.W *= opacity; + source = WithW(source, source * opacity); + + return Atop(source, backdrop, <#=blender#>(source, backdrop)); + } + + /// + /// Returns the result of the "<#=blender#>DestAtop" compositing equation. + /// + /// The backdrop vector. + /// The source vector. + /// The source opacity. Range 0..1 + /// The . + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public static Vector256 <#=blender#>DestAtop(Vector256 backdrop, Vector256 source, Vector256 opacity) + { + source = Avx.Blend(source, Avx.Multiply(source, opacity), BlendAlphaControl); return Atop(source, backdrop, <#=blender#>(source, backdrop)); } @@ -134,7 +227,22 @@ internal static partial class PorterDuffFunctions [MethodImpl(MethodImplOptions.AggressiveInlining)] public static Vector4 <#=blender#>DestOver(Vector4 backdrop, Vector4 source, float opacity) { - source.W *= opacity; + source = WithW(source, source * opacity); + + return Over(source, backdrop, <#=blender#>(source, backdrop)); + } + + /// + /// Returns the result of the "<#=blender#>DestOver" compositing equation. + /// + /// The backdrop vector. + /// The source vector. + /// The source opacity. Range 0..1 + /// The . + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public static Vector256 <#=blender#>DestOver(Vector256 backdrop, Vector256 source, Vector256 opacity) + { + source = Avx.Blend(source, Avx.Multiply(source, opacity), BlendAlphaControl); return Over(source, backdrop, <#=blender#>(source, backdrop)); } @@ -149,11 +257,22 @@ internal static partial class PorterDuffFunctions [MethodImpl(MethodImplOptions.AggressiveInlining)] public static Vector4 <#=blender#>DestIn(Vector4 backdrop, Vector4 source, float opacity) { - source.W *= opacity; + source = WithW(source, source * opacity); return In(source, backdrop); } + /// + /// Returns the result of the "<#=blender#>DestIn" compositing equation. + /// + /// The backdrop vector. + /// The source vector. + /// The source opacity. Range 0..1 + /// The . + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public static Vector256 <#=blender#>DestIn(Vector256 backdrop, Vector256 source, Vector256 opacity) + => In(Avx.Blend(source, Avx.Multiply(source, opacity), BlendAlphaControl), backdrop); + /// /// Returns the result of the "<#=blender#>DestOut" compositing equation. /// @@ -164,11 +283,22 @@ internal static partial class PorterDuffFunctions [MethodImpl(MethodImplOptions.AggressiveInlining)] public static Vector4 <#=blender#>DestOut(Vector4 backdrop, Vector4 source, float opacity) { - source.W *= opacity; + source = WithW(source, source * opacity); return Out(source, backdrop); } + /// + /// Returns the result of the "<#=blender#>DestOut" compositing equation. + /// + /// The backdrop vector. + /// The source vector. + /// The source opacity. Range 0..1 + /// The . + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public static Vector256 <#=blender#>DestOut(Vector256 backdrop, Vector256 source, Vector256 opacity) + => Out(Avx.Blend(source, Avx.Multiply(source, opacity), BlendAlphaControl), backdrop); + /// /// Returns the result of the "<#=blender#>Xor" compositing equation. /// @@ -179,11 +309,22 @@ internal static partial class PorterDuffFunctions [MethodImpl(MethodImplOptions.AggressiveInlining)] public static Vector4 <#=blender#>Xor(Vector4 backdrop, Vector4 source, float opacity) { - source.W *= opacity; + source = WithW(source, source * opacity); return Xor(backdrop, source); } + /// + /// Returns the result of the "<#=blender#>Xor" compositing equation. + /// + /// The backdrop vector. + /// The source vector. + /// The source opacity. Range 0..1 + /// The . + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public static Vector256 <#=blender#>Xor(Vector256 backdrop, Vector256 source, Vector256 opacity) + => Xor(backdrop, Avx.Blend(source, Avx.Multiply(source, opacity), BlendAlphaControl)); + /// /// Returns the result of the "<#=blender#>Clear" compositing equation. /// @@ -194,11 +335,22 @@ internal static partial class PorterDuffFunctions [MethodImpl(MethodImplOptions.AggressiveInlining)] public static Vector4 <#=blender#>Clear(Vector4 backdrop, Vector4 source, float opacity) { - source.W *= opacity; + source = WithW(source, source * opacity); return Clear(backdrop, source); } + /// + /// Returns the result of the "<#=blender#>Clear" compositing equation. + /// + /// The backdrop vector. + /// The source vector. + /// The source opacity. Range 0..1 + /// The . + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public static Vector256 <#=blender#>Clear(Vector256 backdrop, Vector256 source, Vector256 opacity) + => Clear(backdrop, Avx.Blend(source, Avx.Multiply(source, opacity), BlendAlphaControl)); + <#} #> <# void GenerateGenericPixelBlender(string blender, string composer) { #> diff --git a/src/ImageSharp/PixelFormats/PixelBlenders/PorterDuffFunctions.cs b/src/ImageSharp/PixelFormats/PixelBlenders/PorterDuffFunctions.cs index 9bc7e35f30..baf7d80c0a 100644 --- a/src/ImageSharp/PixelFormats/PixelBlenders/PorterDuffFunctions.cs +++ b/src/ImageSharp/PixelFormats/PixelBlenders/PorterDuffFunctions.cs @@ -3,6 +3,8 @@ using System.Numerics; using System.Runtime.CompilerServices; +using System.Runtime.Intrinsics; +using System.Runtime.Intrinsics.X86; namespace SixLabors.ImageSharp.PixelFormats.PixelBlenders; @@ -19,6 +21,9 @@ namespace SixLabors.ImageSharp.PixelFormats.PixelBlenders; /// internal static partial class PorterDuffFunctions { + private const int BlendAlphaControl = 0b_10_00_10_00; + private const int ShuffleAlphaControl = 0b_11_11_11_11; + /// /// Returns the result of the "Normal" compositing equation. /// @@ -27,9 +32,17 @@ internal static partial class PorterDuffFunctions /// The . [MethodImpl(MethodImplOptions.AggressiveInlining)] public static Vector4 Normal(Vector4 backdrop, Vector4 source) - { - return source; - } + => source; + + /// + /// Returns the result of the "Normal" compositing equation. + /// + /// The backdrop vector. + /// The source vector. + /// The . + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public static Vector256 Normal(Vector256 backdrop, Vector256 source) + => source; /// /// Returns the result of the "Multiply" compositing equation. @@ -39,9 +52,17 @@ internal static partial class PorterDuffFunctions /// The . [MethodImpl(MethodImplOptions.AggressiveInlining)] public static Vector4 Multiply(Vector4 backdrop, Vector4 source) - { - return backdrop * source; - } + => backdrop * source; + + /// + /// Returns the result of the "Multiply" compositing equation. + /// + /// The backdrop vector. + /// The source vector. + /// The . + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public static Vector256 Multiply(Vector256 backdrop, Vector256 source) + => Avx.Multiply(backdrop, source); /// /// Returns the result of the "Add" compositing equation. @@ -51,9 +72,17 @@ internal static partial class PorterDuffFunctions /// The . [MethodImpl(MethodImplOptions.AggressiveInlining)] public static Vector4 Add(Vector4 backdrop, Vector4 source) - { - return Vector4.Min(Vector4.One, backdrop + source); - } + => Vector4.Min(Vector4.One, backdrop + source); + + /// + /// Returns the result of the "Add" compositing equation. + /// + /// The backdrop vector. + /// The source vector. + /// The . + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public static Vector256 Add(Vector256 backdrop, Vector256 source) + => Avx.Min(Vector256.Create(1F), Avx.Add(backdrop, source)); /// /// Returns the result of the "Subtract" compositing equation. @@ -63,9 +92,17 @@ internal static partial class PorterDuffFunctions /// The . [MethodImpl(MethodImplOptions.AggressiveInlining)] public static Vector4 Subtract(Vector4 backdrop, Vector4 source) - { - return Vector4.Max(Vector4.Zero, backdrop - source); - } + => Vector4.Max(Vector4.Zero, backdrop - source); + + /// + /// Returns the result of the "Subtract" compositing equation. + /// + /// The backdrop vector. + /// The source vector. + /// The . + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public static Vector256 Subtract(Vector256 backdrop, Vector256 source) + => Avx.Max(Vector256.Zero, Avx.Subtract(backdrop, source)); /// /// Returns the result of the "Screen" compositing equation. @@ -75,8 +112,19 @@ internal static partial class PorterDuffFunctions /// The . [MethodImpl(MethodImplOptions.AggressiveInlining)] public static Vector4 Screen(Vector4 backdrop, Vector4 source) + => Vector4.One - ((Vector4.One - backdrop) * (Vector4.One - source)); + + /// + /// Returns the result of the "Screen" compositing equation. + /// + /// The backdrop vector. + /// The source vector. + /// The . + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public static Vector256 Screen(Vector256 backdrop, Vector256 source) { - return Vector4.One - ((Vector4.One - backdrop) * (Vector4.One - source)); + Vector256 vOne = Vector256.Create(1F); + return SimdUtils.HwIntrinsics.MultiplyAddNegated(Avx.Subtract(vOne, backdrop), Avx.Subtract(vOne, source), vOne); } /// @@ -87,9 +135,17 @@ internal static partial class PorterDuffFunctions /// The . [MethodImpl(MethodImplOptions.AggressiveInlining)] public static Vector4 Darken(Vector4 backdrop, Vector4 source) - { - return Vector4.Min(backdrop, source); - } + => Vector4.Min(backdrop, source); + + /// + /// Returns the result of the "Darken" compositing equation. + /// + /// The backdrop vector. + /// The source vector. + /// The . + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public static Vector256 Darken(Vector256 backdrop, Vector256 source) + => Avx.Min(backdrop, source); /// /// Returns the result of the "Lighten" compositing equation. @@ -98,10 +154,17 @@ internal static partial class PorterDuffFunctions /// The source vector. /// The . [MethodImpl(MethodImplOptions.AggressiveInlining)] - public static Vector4 Lighten(Vector4 backdrop, Vector4 source) - { - return Vector4.Max(backdrop, source); - } + public static Vector4 Lighten(Vector4 backdrop, Vector4 source) => Vector4.Max(backdrop, source); + + /// + /// Returns the result of the "Lighten" compositing equation. + /// + /// The backdrop vector. + /// The source vector. + /// The . + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public static Vector256 Lighten(Vector256 backdrop, Vector256 source) + => Avx.Max(backdrop, source); /// /// Returns the result of the "Overlay" compositing equation. @@ -119,6 +182,19 @@ internal static partial class PorterDuffFunctions return Vector4.Min(Vector4.One, new Vector4(cr, cg, cb, 0)); } + /// + /// Returns the result of the "Overlay" compositing equation. + /// + /// The backdrop vector. + /// The source vector. + /// The . + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public static Vector256 Overlay(Vector256 backdrop, Vector256 source) + { + Vector256 color = OverlayValueFunction(backdrop, source); + return Avx.Min(Vector256.Create(1F), Avx.Blend(color, Vector256.Zero, BlendAlphaControl)); + } + /// /// Returns the result of the "HardLight" compositing equation. /// @@ -136,15 +212,44 @@ internal static partial class PorterDuffFunctions } /// - /// Helper function for Overlay andHardLight modes + /// Returns the result of the "HardLight" compositing equation. + /// + /// The backdrop vector. + /// The source vector. + /// The . + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public static Vector256 HardLight(Vector256 backdrop, Vector256 source) + { + Vector256 color = OverlayValueFunction(source, backdrop); + return Avx.Min(Vector256.Create(1F), Avx.Blend(color, Vector256.Zero, BlendAlphaControl)); + } + + /// + /// Helper function for Overlay and HardLight modes /// /// Backdrop color element /// Source color element /// Overlay value [MethodImpl(MethodImplOptions.AggressiveInlining)] private static float OverlayValueFunction(float backdrop, float source) + => backdrop <= 0.5f ? (2 * backdrop * source) : 1 - (2 * (1 - source) * (1 - backdrop)); + + /// + /// Helper function for Overlay and HardLight modes + /// + /// Backdrop color element + /// Source color element + /// Overlay value + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public static Vector256 OverlayValueFunction(Vector256 backdrop, Vector256 source) { - return backdrop <= 0.5f ? (2 * backdrop * source) : 1 - (2 * (1 - source) * (1 - backdrop)); + Vector256 vOne = Vector256.Create(1F); + Vector256 left = Avx.Multiply(Avx.Add(backdrop, backdrop), source); + + Vector256 vOneMinusSource = Avx.Subtract(vOne, source); + Vector256 right = SimdUtils.HwIntrinsics.MultiplyAddNegated(Avx.Add(vOneMinusSource, vOneMinusSource), Avx.Subtract(vOne, backdrop), vOne); + Vector256 cmp = Avx.CompareGreaterThan(backdrop, Vector256.Create(.5F)); + return Avx.BlendVariable(left, right, cmp); } /// @@ -158,21 +263,53 @@ internal static partial class PorterDuffFunctions public static Vector4 Over(Vector4 destination, Vector4 source, Vector4 blend) { // calculate weights - float blendW = destination.W * source.W; - float dstW = destination.W - blendW; - float srcW = source.W - blendW; + Vector4 sW = PermuteW(source); + Vector4 dW = PermuteW(destination); + + Vector4 blendW = sW * dW; + Vector4 dstW = dW - blendW; + Vector4 srcW = sW - blendW; // calculate final alpha - float alpha = dstW + source.W; + Vector4 alpha = dstW + sW; // calculate final color Vector4 color = (destination * dstW) + (source * srcW) + (blend * blendW); // unpremultiply - color /= MathF.Max(alpha, Constants.Epsilon); - color.W = alpha; + color /= Vector4.Max(alpha, new(Constants.Epsilon)); + return WithW(color, alpha); + } + + /// + /// Returns the result of the "Over" compositing equation. + /// + /// The destination vector. + /// The source vector. + /// The amount to blend. Range 0..1 + /// The . + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public static Vector256 Over(Vector256 destination, Vector256 source, Vector256 blend) + { + // calculate weights + Vector256 sW = Avx.Permute(source, ShuffleAlphaControl); + Vector256 dW = Avx.Permute(destination, ShuffleAlphaControl); + + Vector256 blendW = Avx.Multiply(sW, dW); + Vector256 dstW = Avx.Subtract(dW, blendW); + Vector256 srcW = Avx.Subtract(sW, blendW); + + // calculate final alpha + Vector256 alpha = Avx.Add(dstW, sW); + + // calculate final color + Vector256 color = Avx.Multiply(destination, dstW); + color = SimdUtils.HwIntrinsics.MultiplyAdd(color, source, srcW); + color = SimdUtils.HwIntrinsics.MultiplyAdd(color, blend, blendW); - return color; + // unpremultiply + color = Avx.Divide(color, Avx.Max(alpha, Vector256.Create(Constants.Epsilon))); + return Avx.Blend(color, alpha, BlendAlphaControl); } /// @@ -186,20 +323,47 @@ internal static partial class PorterDuffFunctions public static Vector4 Atop(Vector4 destination, Vector4 source, Vector4 blend) { // calculate weights - float blendW = destination.W * source.W; - float dstW = destination.W - blendW; + Vector4 sW = PermuteW(source); + Vector4 dW = PermuteW(destination); + + Vector4 blendW = sW * dW; + Vector4 dstW = dW - blendW; // calculate final alpha - float alpha = destination.W; + Vector4 alpha = dW; // calculate final color Vector4 color = (destination * dstW) + (blend * blendW); // unpremultiply - color /= MathF.Max(alpha, Constants.Epsilon); - color.W = alpha; + color /= Vector4.Max(alpha, new(Constants.Epsilon)); + return WithW(color, alpha); + } - return color; + /// + /// Returns the result of the "Atop" compositing equation. + /// + /// The destination vector. + /// The source vector. + /// The amount to blend. Range 0..1 + /// The . + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public static Vector256 Atop(Vector256 destination, Vector256 source, Vector256 blend) + { + // calculate final alpha + Vector256 alpha = Avx.Permute(destination, ShuffleAlphaControl); + + // calculate weights + Vector256 sW = Avx.Permute(source, ShuffleAlphaControl); + Vector256 blendW = Avx.Multiply(sW, alpha); + Vector256 dstW = Avx.Subtract(alpha, blendW); + + // calculate final color + Vector256 color = SimdUtils.HwIntrinsics.MultiplyAdd(Avx.Multiply(blend, blendW), destination, dstW); + + // unpremultiply + color = Avx.Divide(color, Avx.Max(alpha, Vector256.Create(Constants.Epsilon))); + return Avx.Blend(color, alpha, BlendAlphaControl); } /// @@ -211,13 +375,33 @@ internal static partial class PorterDuffFunctions [MethodImpl(MethodImplOptions.AggressiveInlining)] public static Vector4 In(Vector4 destination, Vector4 source) { - float alpha = destination.W * source.W; + Vector4 sW = PermuteW(source); + Vector4 dW = PermuteW(destination); + Vector4 alpha = dW * sW; Vector4 color = source * alpha; // premultiply - color /= MathF.Max(alpha, Constants.Epsilon); // unpremultiply - color.W = alpha; + color /= Vector4.Max(alpha, new(Constants.Epsilon)); // unpremultiply + return WithW(color, alpha); + } - return color; + /// + /// Returns the result of the "In" compositing equation. + /// + /// The destination vector. + /// The source vector. + /// The . + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public static Vector256 In(Vector256 destination, Vector256 source) + { + // calculate alpha + Vector256 alpha = Avx.Permute(Avx.Multiply(source, destination), ShuffleAlphaControl); + + // premultiply + Vector256 color = Avx.Multiply(source, alpha); + + // unpremultiply + color = Avx.Divide(color, Avx.Max(alpha, Vector256.Create(Constants.Epsilon))); + return Avx.Blend(color, alpha, BlendAlphaControl); } /// @@ -229,13 +413,33 @@ internal static partial class PorterDuffFunctions [MethodImpl(MethodImplOptions.AggressiveInlining)] public static Vector4 Out(Vector4 destination, Vector4 source) { - float alpha = (1 - destination.W) * source.W; + Vector4 sW = PermuteW(source); + Vector4 dW = PermuteW(destination); + Vector4 alpha = (Vector4.One - dW) * sW; Vector4 color = source * alpha; // premultiply - color /= MathF.Max(alpha, Constants.Epsilon); // unpremultiply - color.W = alpha; + color /= Vector4.Max(alpha, new(Constants.Epsilon)); // unpremultiply + return WithW(color, alpha); + } - return color; + /// + /// Returns the result of the "Out" compositing equation. + /// + /// The destination vector. + /// The source vector. + /// The . + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public static Vector256 Out(Vector256 destination, Vector256 source) + { + // calculate alpha + Vector256 alpha = Avx.Permute(Avx.Multiply(source, Avx.Subtract(Vector256.Create(1F), destination)), ShuffleAlphaControl); + + // premultiply + Vector256 color = Avx.Multiply(source, alpha); + + // unpremultiply + color = Avx.Divide(color, Avx.Max(alpha, Vector256.Create(Constants.Epsilon))); + return Avx.Blend(color, alpha, BlendAlphaControl); } /// @@ -247,22 +451,80 @@ internal static partial class PorterDuffFunctions [MethodImpl(MethodImplOptions.AggressiveInlining)] public static Vector4 Xor(Vector4 destination, Vector4 source) { - float srcW = 1 - destination.W; - float dstW = 1 - source.W; + Vector4 sW = PermuteW(source); + Vector4 dW = PermuteW(destination); + + Vector4 srcW = Vector4.One - dW; + Vector4 dstW = Vector4.One - sW; + + Vector4 alpha = (sW * srcW) + (dW * dstW); + Vector4 color = (sW * source * srcW) + (dW * destination * dstW); + + // unpremultiply + color /= Vector4.Max(alpha, new(Constants.Epsilon)); + return WithW(color, alpha); + } + + /// + /// Returns the result of the "XOr" compositing equation. + /// + /// The destination vector. + /// The source vector. + /// The . + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public static Vector256 Xor(Vector256 destination, Vector256 source) + { + // calculate weights + Vector256 sW = Avx.Shuffle(source, source, ShuffleAlphaControl); + Vector256 dW = Avx.Shuffle(destination, destination, ShuffleAlphaControl); + + Vector256 vOne = Vector256.Create(1F); + Vector256 srcW = Avx.Subtract(vOne, dW); + Vector256 dstW = Avx.Subtract(vOne, sW); - float alpha = (source.W * srcW) + (destination.W * dstW); - Vector4 color = (source.W * source * srcW) + (destination.W * destination * dstW); + // calculate alpha + Vector256 alpha = SimdUtils.HwIntrinsics.MultiplyAdd(Avx.Multiply(dW, dstW), sW, srcW); + Vector256 color = SimdUtils.HwIntrinsics.MultiplyAdd(Avx.Multiply(Avx.Multiply(dW, destination), dstW), Avx.Multiply(sW, source), srcW); // unpremultiply - color /= MathF.Max(alpha, Constants.Epsilon); - color.W = alpha; + color = Avx.Divide(color, Avx.Max(alpha, Vector256.Create(Constants.Epsilon))); + return Avx.Blend(color, alpha, BlendAlphaControl); + } + + [MethodImpl(MethodImplOptions.AggressiveInlining)] + private static Vector4 Clear(Vector4 backdrop, Vector4 source) => Vector4.Zero; - return color; + [MethodImpl(MethodImplOptions.AggressiveInlining)] + private static Vector256 Clear(Vector256 backdrop, Vector256 source) => Vector256.Zero; + + [MethodImpl(MethodImplOptions.AggressiveInlining)] + private static Vector4 WithW(Vector4 value, Vector4 w) + { + if (Sse41.IsSupported) + { + return Sse41.Insert(value.AsVector128(), w.AsVector128(), 0b11_11_0000).AsVector4(); + } + + if (Sse.IsSupported) + { + // Create tmp as + // Then return (which is ) + Vector128 tmp = Sse.Shuffle(w.AsVector128(), value.AsVector128(), 0b00_10_00_11); + return Sse.Shuffle(value.AsVector128(), tmp, 0b00_10_01_00).AsVector4(); + } + + value.W = w.W; + return value; } [MethodImpl(MethodImplOptions.AggressiveInlining)] - private static Vector4 Clear(Vector4 backdrop, Vector4 source) + private static Vector4 PermuteW(Vector4 value) { - return Vector4.Zero; + if (Sse.IsSupported) + { + return Sse.Shuffle(value.AsVector128(), value.AsVector128(), 0b11111111).AsVector4(); + } + + return new(value.W); } } diff --git a/src/ImageSharp/PixelFormats/Utils/PixelConverter.cs b/src/ImageSharp/PixelFormats/Utils/PixelConverter.cs index 3ae5a3b5e4..50a68906e7 100644 --- a/src/ImageSharp/PixelFormats/Utils/PixelConverter.cs +++ b/src/ImageSharp/PixelFormats/Utils/PixelConverter.cs @@ -29,6 +29,8 @@ internal static class PixelConverter /// pixels to a representing /// a collection of pixels. /// + /// The source span of bytes. + /// The destination span of bytes. [MethodImpl(InliningOptions.ShortMethod)] public static void ToArgb32(ReadOnlySpan source, Span dest) => SimdUtils.Shuffle4(source, dest, default); @@ -38,6 +40,8 @@ internal static class PixelConverter /// pixels to a representing /// a collection of pixels. /// + /// The source span of bytes. + /// The destination span of bytes. [MethodImpl(InliningOptions.ShortMethod)] public static void ToBgra32(ReadOnlySpan source, Span dest) => SimdUtils.Shuffle4(source, dest, default); @@ -47,6 +51,8 @@ internal static class PixelConverter /// pixels to a representing /// a collection of pixels. /// + /// The source span of bytes. + /// The destination span of bytes. [MethodImpl(InliningOptions.ShortMethod)] public static void ToAbgr32(ReadOnlySpan source, Span dest) => SimdUtils.Shuffle4(source, dest, default); @@ -56,6 +62,8 @@ internal static class PixelConverter /// pixels to a representing /// a collection of pixels. /// + /// The source span of bytes. + /// The destination span of bytes. [MethodImpl(InliningOptions.ShortMethod)] public static void ToRgb24(ReadOnlySpan source, Span dest) => SimdUtils.Shuffle4Slice3(source, dest, default); @@ -65,9 +73,11 @@ internal static class PixelConverter /// pixels to a representing /// a collection of pixels. /// + /// The source span of bytes. + /// The destination span of bytes. [MethodImpl(InliningOptions.ShortMethod)] public static void ToBgr24(ReadOnlySpan source, Span dest) - => SimdUtils.Shuffle4Slice3(source, dest, new DefaultShuffle4Slice3(3, 0, 1, 2)); + => SimdUtils.Shuffle4Slice3(source, dest, new DefaultShuffle4Slice3(SimdUtils.Shuffle.MMShuffle3012)); } /// @@ -82,6 +92,8 @@ internal static class PixelConverter /// pixels to a representing /// a collection of pixels. /// + /// The source span of bytes. + /// The destination span of bytes. [MethodImpl(InliningOptions.ShortMethod)] public static void ToRgba32(ReadOnlySpan source, Span dest) => SimdUtils.Shuffle4(source, dest, default); @@ -91,6 +103,8 @@ internal static class PixelConverter /// pixels to a representing /// a collection of pixels. /// + /// The source span of bytes. + /// The destination span of bytes. [MethodImpl(InliningOptions.ShortMethod)] public static void ToBgra32(ReadOnlySpan source, Span dest) => SimdUtils.Shuffle4(source, dest, default); @@ -100,6 +114,8 @@ internal static class PixelConverter /// pixels to a representing /// a collection of pixels. /// + /// The source span of bytes. + /// The destination span of bytes. [MethodImpl(InliningOptions.ShortMethod)] public static void ToAbgr32(ReadOnlySpan source, Span dest) => SimdUtils.Shuffle4(source, dest, default); @@ -109,18 +125,22 @@ internal static class PixelConverter /// pixels to a representing /// a collection of pixels. /// + /// The source span of bytes. + /// The destination span of bytes. [MethodImpl(InliningOptions.ShortMethod)] public static void ToRgb24(ReadOnlySpan source, Span dest) - => SimdUtils.Shuffle4Slice3(source, dest, new DefaultShuffle4Slice3(0, 3, 2, 1)); + => SimdUtils.Shuffle4Slice3(source, dest, new DefaultShuffle4Slice3(SimdUtils.Shuffle.MMShuffle0321)); /// /// Converts a representing a collection of /// pixels to a representing /// a collection of pixels. /// + /// The source span of bytes. + /// The destination span of bytes. [MethodImpl(InliningOptions.ShortMethod)] public static void ToBgr24(ReadOnlySpan source, Span dest) - => SimdUtils.Shuffle4Slice3(source, dest, new DefaultShuffle4Slice3(0, 1, 2, 3)); + => SimdUtils.Shuffle4Slice3(source, dest, new DefaultShuffle4Slice3(SimdUtils.Shuffle.MMShuffle0123)); } /// @@ -135,6 +155,8 @@ internal static class PixelConverter /// pixels to a representing /// a collection of pixels. /// + /// The source span of bytes. + /// The destination span of bytes. [MethodImpl(InliningOptions.ShortMethod)] public static void ToArgb32(ReadOnlySpan source, Span dest) => SimdUtils.Shuffle4(source, dest, default); @@ -144,6 +166,8 @@ internal static class PixelConverter /// pixels to a representing /// a collection of pixels. /// + /// The source span of bytes. + /// The destination span of bytes. [MethodImpl(InliningOptions.ShortMethod)] public static void ToRgba32(ReadOnlySpan source, Span dest) => SimdUtils.Shuffle4(source, dest, default); @@ -153,6 +177,8 @@ internal static class PixelConverter /// pixels to a representing /// a collection of pixels. /// + /// The source span of bytes. + /// The destination span of bytes. [MethodImpl(InliningOptions.ShortMethod)] public static void ToAbgr32(ReadOnlySpan source, Span dest) => SimdUtils.Shuffle4(source, dest, default); @@ -162,15 +188,19 @@ internal static class PixelConverter /// pixels to a representing /// a collection of pixels. /// + /// The source span of bytes. + /// The destination span of bytes. [MethodImpl(InliningOptions.ShortMethod)] public static void ToRgb24(ReadOnlySpan source, Span dest) - => SimdUtils.Shuffle4Slice3(source, dest, new DefaultShuffle4Slice3(3, 0, 1, 2)); + => SimdUtils.Shuffle4Slice3(source, dest, new DefaultShuffle4Slice3(SimdUtils.Shuffle.MMShuffle3012)); /// /// Converts a representing a collection of /// pixels to a representing /// a collection of pixels. /// + /// The source span of bytes. + /// The destination span of bytes. [MethodImpl(InliningOptions.ShortMethod)] public static void ToBgr24(ReadOnlySpan source, Span dest) => SimdUtils.Shuffle4Slice3(source, dest, default); @@ -188,6 +218,8 @@ internal static class PixelConverter /// pixels to a representing /// a collection of pixels. /// + /// The source span of bytes. + /// The destination span of bytes. [MethodImpl(InliningOptions.ShortMethod)] public static void ToArgb32(ReadOnlySpan source, Span dest) => SimdUtils.Shuffle4(source, dest, default); @@ -197,6 +229,8 @@ internal static class PixelConverter /// pixels to a representing /// a collection of pixels. /// + /// The source span of bytes. + /// The destination span of bytes. [MethodImpl(InliningOptions.ShortMethod)] public static void ToRgba32(ReadOnlySpan source, Span dest) => SimdUtils.Shuffle4(source, dest, default); @@ -206,6 +240,8 @@ internal static class PixelConverter /// pixels to a representing /// a collection of pixels. /// + /// The source span of bytes. + /// The destination span of bytes. [MethodImpl(InliningOptions.ShortMethod)] public static void ToBgra32(ReadOnlySpan source, Span dest) => SimdUtils.Shuffle4(source, dest, default); @@ -215,18 +251,22 @@ internal static class PixelConverter /// pixels to a representing /// a collection of pixels. /// + /// The source span of bytes. + /// The destination span of bytes. [MethodImpl(InliningOptions.ShortMethod)] public static void ToRgb24(ReadOnlySpan source, Span dest) - => SimdUtils.Shuffle4Slice3(source, dest, new DefaultShuffle4Slice3(0, 1, 2, 3)); + => SimdUtils.Shuffle4Slice3(source, dest, new DefaultShuffle4Slice3(SimdUtils.Shuffle.MMShuffle0123)); /// /// Converts a representing a collection of /// pixels to a representing /// a collection of pixels. /// + /// The source span of bytes. + /// The destination span of bytes. [MethodImpl(InliningOptions.ShortMethod)] public static void ToBgr24(ReadOnlySpan source, Span dest) - => SimdUtils.Shuffle4Slice3(source, dest, new DefaultShuffle4Slice3(0, 3, 2, 1)); + => SimdUtils.Shuffle4Slice3(source, dest, new DefaultShuffle4Slice3(SimdUtils.Shuffle.MMShuffle0321)); } /// @@ -241,6 +281,8 @@ internal static class PixelConverter /// pixels to a representing /// a collection of pixels. /// + /// The source span of bytes. + /// The destination span of bytes. [MethodImpl(InliningOptions.ShortMethod)] public static void ToRgba32(ReadOnlySpan source, Span dest) => SimdUtils.Pad3Shuffle4(source, dest, default); @@ -250,36 +292,44 @@ internal static class PixelConverter /// pixels to a representing /// a collection of pixels. /// + /// The source span of bytes. + /// The destination span of bytes. [MethodImpl(InliningOptions.ShortMethod)] public static void ToArgb32(ReadOnlySpan source, Span dest) - => SimdUtils.Pad3Shuffle4(source, dest, new DefaultPad3Shuffle4(2, 1, 0, 3)); + => SimdUtils.Pad3Shuffle4(source, dest, new DefaultPad3Shuffle4(SimdUtils.Shuffle.MMShuffle2103)); /// /// Converts a representing a collection of /// pixels to a representing /// a collection of pixels. /// + /// The source span of bytes. + /// The destination span of bytes. [MethodImpl(InliningOptions.ShortMethod)] public static void ToBgra32(ReadOnlySpan source, Span dest) - => SimdUtils.Pad3Shuffle4(source, dest, new DefaultPad3Shuffle4(3, 0, 1, 2)); + => SimdUtils.Pad3Shuffle4(source, dest, new DefaultPad3Shuffle4(SimdUtils.Shuffle.MMShuffle3012)); /// /// Converts a representing a collection of /// pixels to a representing /// a collection of pixels. /// + /// The source span of bytes. + /// The destination span of bytes. [MethodImpl(InliningOptions.ShortMethod)] public static void ToAbgr32(ReadOnlySpan source, Span dest) - => SimdUtils.Pad3Shuffle4(source, dest, new DefaultPad3Shuffle4(0, 1, 2, 3)); + => SimdUtils.Pad3Shuffle4(source, dest, new DefaultPad3Shuffle4(SimdUtils.Shuffle.MMShuffle0123)); /// /// Converts a representing a collection of /// pixels to a representing /// a collection of pixels. /// + /// The source span of bytes. + /// The destination span of bytes. [MethodImpl(InliningOptions.ShortMethod)] public static void ToBgr24(ReadOnlySpan source, Span dest) - => SimdUtils.Shuffle3(source, dest, new DefaultShuffle3(0, 1, 2)); + => SimdUtils.Shuffle3(source, dest, new DefaultShuffle3(SimdUtils.Shuffle.MMShuffle3012)); } /// @@ -294,24 +344,30 @@ internal static class PixelConverter /// pixels to a representing /// a collection of pixels. /// + /// The source span of bytes. + /// The destination span of bytes. [MethodImpl(InliningOptions.ShortMethod)] public static void ToArgb32(ReadOnlySpan source, Span dest) - => SimdUtils.Pad3Shuffle4(source, dest, new DefaultPad3Shuffle4(0, 1, 2, 3)); + => SimdUtils.Pad3Shuffle4(source, dest, new DefaultPad3Shuffle4(SimdUtils.Shuffle.MMShuffle0123)); /// /// Converts a representing a collection of /// pixels to a representing /// a collection of pixels. /// + /// The source span of bytes. + /// The destination span of bytes. [MethodImpl(InliningOptions.ShortMethod)] public static void ToRgba32(ReadOnlySpan source, Span dest) - => SimdUtils.Pad3Shuffle4(source, dest, new DefaultPad3Shuffle4(3, 0, 1, 2)); + => SimdUtils.Pad3Shuffle4(source, dest, new DefaultPad3Shuffle4(SimdUtils.Shuffle.MMShuffle3012)); /// /// Converts a representing a collection of /// pixels to a representing /// a collection of pixels. /// + /// The source span of bytes. + /// The destination span of bytes. [MethodImpl(InliningOptions.ShortMethod)] public static void ToBgra32(ReadOnlySpan source, Span dest) => SimdUtils.Pad3Shuffle4(source, dest, default); @@ -321,17 +377,21 @@ internal static class PixelConverter /// pixels to a representing /// a collection of pixels. /// + /// The source span of bytes. + /// The destination span of bytes. [MethodImpl(InliningOptions.ShortMethod)] public static void ToAbgr32(ReadOnlySpan source, Span dest) - => SimdUtils.Pad3Shuffle4(source, dest, new DefaultPad3Shuffle4(2, 1, 0, 3)); + => SimdUtils.Pad3Shuffle4(source, dest, new DefaultPad3Shuffle4(SimdUtils.Shuffle.MMShuffle2103)); /// /// Converts a representing a collection of /// pixels to a representing /// a collection of pixels. /// + /// The source span of bytes. + /// The destination span of bytes. [MethodImpl(InliningOptions.ShortMethod)] public static void ToRgb24(ReadOnlySpan source, Span dest) - => SimdUtils.Shuffle3(source, dest, new DefaultShuffle3(0, 1, 2)); + => SimdUtils.Shuffle3(source, dest, new DefaultShuffle3(SimdUtils.Shuffle.MMShuffle3012)); } } diff --git a/src/ImageSharp/Processing/Extensions/ProcessingExtensions.cs b/src/ImageSharp/Processing/Extensions/ProcessingExtensions.cs index 575ce293ca..f830ddfd09 100644 --- a/src/ImageSharp/Processing/Extensions/ProcessingExtensions.cs +++ b/src/ImageSharp/Processing/Extensions/ProcessingExtensions.cs @@ -1,6 +1,5 @@ // Copyright (c) Six Labors. // Licensed under the Six Labors Split License. -#nullable disable using SixLabors.ImageSharp.Advanced; using SixLabors.ImageSharp.PixelFormats; @@ -157,9 +156,9 @@ public static partial class ProcessingExtensions Guard.NotNull(operation, nameof(operation)); source.EnsureNotDisposed(); - var visitor = new ProcessingVisitor(configuration, operation, false); + ProcessingVisitor visitor = new(configuration, operation, false); source.AcceptVisitor(visitor); - return visitor.ResultImage; + return visitor.GetResultImage(); } /// @@ -275,6 +274,8 @@ public static partial class ProcessingExtensions private readonly bool mutate; + private Image? resultImage; + public ProcessingVisitor(Configuration configuration, Action operation, bool mutate) { this.configuration = configuration; @@ -282,7 +283,7 @@ public static partial class ProcessingExtensions this.mutate = mutate; } - public Image ResultImage { get; private set; } + public Image GetResultImage() => this.resultImage!; public void Visit(Image image) where TPixel : unmanaged, IPixel @@ -291,7 +292,7 @@ public static partial class ProcessingExtensions this.configuration.ImageOperationsProvider.CreateImageProcessingContext(this.configuration, image, this.mutate); this.operation(operationsRunner); - this.ResultImage = operationsRunner.GetResultImage(); + this.resultImage = operationsRunner.GetResultImage(); } } } diff --git a/src/ImageSharp/Processing/Processors/Transforms/Linear/AffineTransformProcessor.cs b/src/ImageSharp/Processing/Processors/Transforms/Linear/AffineTransformProcessor.cs index abb8beccdf..30c279e593 100644 --- a/src/ImageSharp/Processing/Processors/Transforms/Linear/AffineTransformProcessor.cs +++ b/src/ImageSharp/Processing/Processors/Transforms/Linear/AffineTransformProcessor.cs @@ -19,7 +19,7 @@ public class AffineTransformProcessor : CloningImageProcessor public AffineTransformProcessor(Matrix3x2 matrix, IResampler sampler, Size targetDimensions) { Guard.NotNull(sampler, nameof(sampler)); - Guard.MustBeValueType(sampler, nameof(sampler)); + Guard.MustBeValueType(sampler); if (TransformUtils.IsDegenerate(matrix)) { diff --git a/src/ImageSharp/Processing/Processors/Transforms/Linear/ProjectiveTransformProcessor.cs b/src/ImageSharp/Processing/Processors/Transforms/Linear/ProjectiveTransformProcessor.cs index 9b0b979e69..9e9507b737 100644 --- a/src/ImageSharp/Processing/Processors/Transforms/Linear/ProjectiveTransformProcessor.cs +++ b/src/ImageSharp/Processing/Processors/Transforms/Linear/ProjectiveTransformProcessor.cs @@ -19,7 +19,7 @@ public sealed class ProjectiveTransformProcessor : CloningImageProcessor public ProjectiveTransformProcessor(Matrix4x4 matrix, IResampler sampler, Size targetDimensions) { Guard.NotNull(sampler, nameof(sampler)); - Guard.MustBeValueType(sampler, nameof(sampler)); + Guard.MustBeValueType(sampler); if (TransformUtils.IsDegenerate(matrix)) { diff --git a/src/ImageSharp/Processing/Processors/Transforms/Resize/ResizeProcessor.cs b/src/ImageSharp/Processing/Processors/Transforms/Resize/ResizeProcessor.cs index b7651c03f3..719622a28f 100644 --- a/src/ImageSharp/Processing/Processors/Transforms/Resize/ResizeProcessor.cs +++ b/src/ImageSharp/Processing/Processors/Transforms/Resize/ResizeProcessor.cs @@ -17,7 +17,7 @@ public class ResizeProcessor : CloningImageProcessor { Guard.NotNull(options, nameof(options)); Guard.NotNull(options.Sampler, nameof(options.Sampler)); - Guard.MustBeValueType(options.Sampler, nameof(options.Sampler)); + Guard.MustBeValueType(options.Sampler); (Size size, Rectangle rectangle) = ResizeHelper.CalculateTargetLocationAndBounds(sourceSize, options); diff --git a/src/ImageSharp/Processing/ProjectiveTransformBuilder.cs b/src/ImageSharp/Processing/ProjectiveTransformBuilder.cs index 44d6d8e720..ad0888ad77 100644 --- a/src/ImageSharp/Processing/ProjectiveTransformBuilder.cs +++ b/src/ImageSharp/Processing/ProjectiveTransformBuilder.cs @@ -11,7 +11,7 @@ namespace SixLabors.ImageSharp.Processing; /// public class ProjectiveTransformBuilder { - private readonly List> matrixFactories = new List>(); + private readonly List> matrixFactories = new(); /// /// Prepends a matrix that performs a tapering projective transform. @@ -313,7 +313,7 @@ public class ProjectiveTransformBuilder Guard.MustBeGreaterThan(sourceRectangle.Height, 0, nameof(sourceRectangle)); // Translate the origin matrix to cater for source rectangle offsets. - var matrix = Matrix4x4.CreateTranslation(new Vector3(-sourceRectangle.Location, 0)); + Matrix4x4 matrix = Matrix4x4.CreateTranslation(new Vector3(-sourceRectangle.Location, 0)); Size size = sourceRectangle.Size; diff --git a/tests/ImageSharp.Benchmarks/Color/Bulk/Pad3Shuffle4Channel.cs b/tests/ImageSharp.Benchmarks/Color/Bulk/Pad3Shuffle4Channel.cs index df308cdd4b..73dcf55a19 100644 --- a/tests/ImageSharp.Benchmarks/Color/Bulk/Pad3Shuffle4Channel.cs +++ b/tests/ImageSharp.Benchmarks/Color/Bulk/Pad3Shuffle4Channel.cs @@ -8,8 +8,7 @@ namespace SixLabors.ImageSharp.Benchmarks.ColorSpaces.Bulk; [Config(typeof(Config.HwIntrinsics_SSE_AVX))] public class Pad3Shuffle4Channel { - private static readonly DefaultPad3Shuffle4 Control = new DefaultPad3Shuffle4(1, 0, 3, 2); - private static readonly XYZWPad3Shuffle4 ControlFast = default; + private static readonly DefaultPad3Shuffle4 Control = new(SimdUtils.Shuffle.MMShuffle1032); private byte[] source; private byte[] destination; @@ -26,15 +25,11 @@ public class Pad3Shuffle4Channel [Benchmark] public void Pad3Shuffle4() - { - SimdUtils.Pad3Shuffle4(this.source, this.destination, Control); - } + => SimdUtils.Pad3Shuffle4(this.source, this.destination, Control); [Benchmark] public void Pad3Shuffle4FastFallback() - { - SimdUtils.Pad3Shuffle4(this.source, this.destination, ControlFast); - } + => SimdUtils.Pad3Shuffle4(this.source, this.destination, default(XYZWPad3Shuffle4)); } // 2020-10-30 @@ -83,3 +78,50 @@ public class Pad3Shuffle4Channel // | Pad3Shuffle4FastFallback | 1. No HwIntrinsics | COMPlus_EnableHWIntrinsic=0,COMPlus_FeatureSIMD=0 | 1536 | 220.37 ns | 1.601 ns | 1.419 ns | 220.13 ns | 1.00 | 0.00 | - | - | - | - | // | Pad3Shuffle4FastFallback | 2. AVX | Empty | 1536 | 111.54 ns | 2.173 ns | 2.901 ns | 111.27 ns | 0.51 | 0.01 | - | - | - | - | // | Pad3Shuffle4FastFallback | 3. SSE | COMPlus_EnableAVX=0 | 1536 | 110.23 ns | 0.456 ns | 0.427 ns | 110.25 ns | 0.50 | 0.00 | - | - | - | - | + +// 2023-02-21 +// ########## +// +// BenchmarkDotNet=v0.13.0, OS=Windows 10.0.22621 +// 11th Gen Intel Core i7-11370H 3.30GHz, 1 CPU, 8 logical and 4 physical cores +// .NET SDK= 7.0.103 +// [Host] : .NET 6.0.14 (6.0.1423.7309), X64 RyuJIT +// 1. No HwIntrinsics : .NET 6.0.14 (6.0.1423.7309), X64 RyuJIT +// 2. SSE : .NET 6.0.14 (6.0.1423.7309), X64 RyuJIT +// 3. AVX : .NET 6.0.14 (6.0.1423.7309), X64 RyuJIT + +// Runtime=.NET 6.0 + +// | Method | Job | EnvironmentVariables | Count | Mean | Error | StdDev | Ratio | Gen 0 | Gen 1 | Gen 2 | Allocated | +// |------------------------- |------------------- |-------------------------------------------------- |------ |----------:|---------:|---------:|------:|------:|------:|------:|----------:| +// | Pad3Shuffle4 | 1. No HwIntrinsics | COMPlus_EnableHWIntrinsic=0,COMPlus_FeatureSIMD=0 | 96 | 57.45 ns | 0.126 ns | 0.118 ns | 1.00 | - | - | - | - | +// | Pad3Shuffle4 | 2. SSE | COMPlus_EnableAVX=0 | 96 | 14.70 ns | 0.105 ns | 0.098 ns | 0.26 | - | - | - | - | +// | Pad3Shuffle4 | 3. AVX | Empty | 96 | 14.63 ns | 0.070 ns | 0.062 ns | 0.25 | - | - | - | - | +// | | | | | | | | | | | | | +// | Pad3Shuffle4FastFallback | 1. No HwIntrinsics | COMPlus_EnableHWIntrinsic=0,COMPlus_FeatureSIMD=0 | 96 | 12.08 ns | 0.028 ns | 0.025 ns | 1.00 | - | - | - | - | +// | Pad3Shuffle4FastFallback | 2. SSE | COMPlus_EnableAVX=0 | 96 | 14.04 ns | 0.050 ns | 0.044 ns | 1.16 | - | - | - | - | +// | Pad3Shuffle4FastFallback | 3. AVX | Empty | 96 | 13.90 ns | 0.086 ns | 0.080 ns | 1.15 | - | - | - | - | +// | | | | | | | | | | | | | +// | Pad3Shuffle4 | 1. No HwIntrinsics | COMPlus_EnableHWIntrinsic=0,COMPlus_FeatureSIMD=0 | 384 | 202.67 ns | 2.010 ns | 1.678 ns | 1.00 | - | - | - | - | +// | Pad3Shuffle4 | 2. SSE | COMPlus_EnableAVX=0 | 384 | 25.54 ns | 0.060 ns | 0.053 ns | 0.13 | - | - | - | - | +// | Pad3Shuffle4 | 3. AVX | Empty | 384 | 25.72 ns | 0.139 ns | 0.130 ns | 0.13 | - | - | - | - | +// | | | | | | | | | | | | | +// | Pad3Shuffle4FastFallback | 1. No HwIntrinsics | COMPlus_EnableHWIntrinsic=0,COMPlus_FeatureSIMD=0 | 384 | 60.35 ns | 0.080 ns | 0.071 ns | 1.00 | - | - | - | - | +// | Pad3Shuffle4FastFallback | 2. SSE | COMPlus_EnableAVX=0 | 384 | 25.18 ns | 0.388 ns | 0.324 ns | 0.42 | - | - | - | - | +// | Pad3Shuffle4FastFallback | 3. AVX | Empty | 384 | 26.21 ns | 0.067 ns | 0.059 ns | 0.43 | - | - | - | - | +// | | | | | | | | | | | | | +// | Pad3Shuffle4 | 1. No HwIntrinsics | COMPlus_EnableHWIntrinsic=0,COMPlus_FeatureSIMD=0 | 768 | 393.88 ns | 1.353 ns | 1.199 ns | 1.00 | - | - | - | - | +// | Pad3Shuffle4 | 2. SSE | COMPlus_EnableAVX=0 | 768 | 39.44 ns | 0.230 ns | 0.204 ns | 0.10 | - | - | - | - | +// | Pad3Shuffle4 | 3. AVX | Empty | 768 | 39.51 ns | 0.108 ns | 0.101 ns | 0.10 | - | - | - | - | +// | | | | | | | | | | | | | +// | Pad3Shuffle4FastFallback | 1. No HwIntrinsics | COMPlus_EnableHWIntrinsic=0,COMPlus_FeatureSIMD=0 | 768 | 112.02 ns | 0.140 ns | 0.131 ns | 1.00 | - | - | - | - | +// | Pad3Shuffle4FastFallback | 2. SSE | COMPlus_EnableAVX=0 | 768 | 38.60 ns | 0.091 ns | 0.080 ns | 0.34 | - | - | - | - | +// | Pad3Shuffle4FastFallback | 3. AVX | Empty | 768 | 38.18 ns | 0.100 ns | 0.084 ns | 0.34 | - | - | - | - | +// | | | | | | | | | | | | | +// | Pad3Shuffle4 | 1. No HwIntrinsics | COMPlus_EnableHWIntrinsic=0,COMPlus_FeatureSIMD=0 | 1536 | 777.95 ns | 1.719 ns | 1.342 ns | 1.00 | - | - | - | - | +// | Pad3Shuffle4 | 2. SSE | COMPlus_EnableAVX=0 | 1536 | 73.11 ns | 0.090 ns | 0.075 ns | 0.09 | - | - | - | - | +// | Pad3Shuffle4 | 3. AVX | Empty | 1536 | 73.41 ns | 0.125 ns | 0.117 ns | 0.09 | - | - | - | - | +// | | | | | | | | | | | | | +// | Pad3Shuffle4FastFallback | 1. No HwIntrinsics | COMPlus_EnableHWIntrinsic=0,COMPlus_FeatureSIMD=0 | 1536 | 218.14 ns | 0.377 ns | 0.334 ns | 1.00 | - | - | - | - | +// | Pad3Shuffle4FastFallback | 2. SSE | COMPlus_EnableAVX=0 | 1536 | 72.55 ns | 1.418 ns | 1.184 ns | 0.33 | - | - | - | - | +// | Pad3Shuffle4FastFallback | 3. AVX | Empty | 1536 | 73.15 ns | 0.330 ns | 0.292 ns | 0.34 | - | - | - | - | diff --git a/tests/ImageSharp.Benchmarks/Color/Bulk/Shuffle3Channel.cs b/tests/ImageSharp.Benchmarks/Color/Bulk/Shuffle3Channel.cs index 7f6f5901ff..9bd85f4743 100644 --- a/tests/ImageSharp.Benchmarks/Color/Bulk/Shuffle3Channel.cs +++ b/tests/ImageSharp.Benchmarks/Color/Bulk/Shuffle3Channel.cs @@ -1,14 +1,16 @@ // Copyright (c) Six Labors. // Licensed under the Six Labors Split License. +using System.Runtime.InteropServices; using BenchmarkDotNet.Attributes; +using Iced.Intel; namespace SixLabors.ImageSharp.Benchmarks.ColorSpaces.Bulk; [Config(typeof(Config.HwIntrinsics_SSE_AVX))] public class Shuffle3Channel { - private static readonly DefaultShuffle3 Control = new DefaultShuffle3(1, 0, 2); + private static readonly DefaultShuffle3 Control = new(SimdUtils.Shuffle.MMShuffle3102); private byte[] source; private byte[] destination; @@ -25,9 +27,7 @@ public class Shuffle3Channel [Benchmark] public void Shuffle3() - { - SimdUtils.Shuffle3(this.source, this.destination, Control); - } + => SimdUtils.Shuffle3(this.source, this.destination, Control); } // 2020-11-02 @@ -60,3 +60,34 @@ public class Shuffle3Channel // | Shuffle3 | 1. No HwIntrinsics | COMPlus_EnableHWIntrinsic=0,COMPlus_FeatureSIMD=0 | 1536 | 773.70 ns | 5.516 ns | 4.890 ns | 772.96 ns | 1.00 | 0.00 | - | - | - | - | // | Shuffle3 | 2. AVX | Empty | 1536 | 190.41 ns | 1.090 ns | 0.851 ns | 190.38 ns | 0.25 | 0.00 | - | - | - | - | // | Shuffle3 | 3. SSE | COMPlus_EnableAVX=0 | 1536 | 190.94 ns | 0.985 ns | 0.769 ns | 190.85 ns | 0.25 | 0.00 | - | - | - | - | + +// 2023-02-21 +// ########## +// +// BenchmarkDotNet=v0.13.0, OS=Windows 10.0.22621 +// 11th Gen Intel Core i7-11370H 3.30GHz, 1 CPU, 8 logical and 4 physical cores +// .NET SDK= 7.0.103 +// [Host] : .NET 6.0.14 (6.0.1423.7309), X64 RyuJIT +// 1. No HwIntrinsics : .NET 6.0.14 (6.0.1423.7309), X64 RyuJIT +// 2. SSE : .NET 6.0.14 (6.0.1423.7309), X64 RyuJIT +// 3. AVX : .NET 6.0.14 (6.0.1423.7309), X64 RyuJIT + +// Runtime=.NET 6.0 + +// | Method | Job | EnvironmentVariables | Count | Mean | Error | StdDev | Ratio | Gen 0 | Gen 1 | Gen 2 | Allocated | +// |--------- |------------------- |-------------------------------------------------- |------ |----------:|---------:|---------:|------:|------:|------:|------:|----------:| +// | Shuffle3 | 1. No HwIntrinsics | COMPlus_EnableHWIntrinsic=0,COMPlus_FeatureSIMD=0 | 96 | 44.55 ns | 0.564 ns | 0.528 ns | 1.00 | - | - | - | - | +// | Shuffle3 | 2. SSE | COMPlus_EnableAVX=0 | 96 | 15.46 ns | 0.064 ns | 0.060 ns | 0.35 | - | - | - | - | +// | Shuffle3 | 3. AVX | Empty | 96 | 15.18 ns | 0.056 ns | 0.053 ns | 0.34 | - | - | - | - | +// | | | | | | | | | | | | | +// | Shuffle3 | 1. No HwIntrinsics | COMPlus_EnableHWIntrinsic=0,COMPlus_FeatureSIMD=0 | 384 | 155.68 ns | 0.539 ns | 0.504 ns | 1.00 | - | - | - | - | +// | Shuffle3 | 2. SSE | COMPlus_EnableAVX=0 | 384 | 30.04 ns | 0.100 ns | 0.089 ns | 0.19 | - | - | - | - | +// | Shuffle3 | 3. AVX | Empty | 384 | 29.70 ns | 0.061 ns | 0.054 ns | 0.19 | - | - | - | - | +// | | | | | | | | | | | | | +// | Shuffle3 | 1. No HwIntrinsics | COMPlus_EnableHWIntrinsic=0,COMPlus_FeatureSIMD=0 | 768 | 302.76 ns | 1.023 ns | 0.957 ns | 1.00 | - | - | - | - | +// | Shuffle3 | 2. SSE | COMPlus_EnableAVX=0 | 768 | 50.24 ns | 0.098 ns | 0.092 ns | 0.17 | - | - | - | - | +// | Shuffle3 | 3. AVX | Empty | 768 | 49.28 ns | 0.156 ns | 0.131 ns | 0.16 | - | - | - | - | +// | | | | | | | | | | | | | +// | Shuffle3 | 1. No HwIntrinsics | COMPlus_EnableHWIntrinsic=0,COMPlus_FeatureSIMD=0 | 1536 | 596.53 ns | 2.675 ns | 2.503 ns | 1.00 | - | - | - | - | +// | Shuffle3 | 2. SSE | COMPlus_EnableAVX=0 | 1536 | 94.09 ns | 0.312 ns | 0.260 ns | 0.16 | - | - | - | - | +// | Shuffle3 | 3. AVX | Empty | 1536 | 93.57 ns | 0.196 ns | 0.183 ns | 0.16 | - | - | - | - | diff --git a/tests/ImageSharp.Benchmarks/Color/Bulk/Shuffle4Slice3Channel.cs b/tests/ImageSharp.Benchmarks/Color/Bulk/Shuffle4Slice3Channel.cs index 0a0afb7050..f0890221c8 100644 --- a/tests/ImageSharp.Benchmarks/Color/Bulk/Shuffle4Slice3Channel.cs +++ b/tests/ImageSharp.Benchmarks/Color/Bulk/Shuffle4Slice3Channel.cs @@ -1,15 +1,16 @@ // Copyright (c) Six Labors. // Licensed under the Six Labors Split License. +using System.Runtime.InteropServices; using BenchmarkDotNet.Attributes; +using Iced.Intel; namespace SixLabors.ImageSharp.Benchmarks.ColorSpaces.Bulk; [Config(typeof(Config.HwIntrinsics_SSE_AVX))] public class Shuffle4Slice3Channel { - private static readonly DefaultShuffle4Slice3 Control = new DefaultShuffle4Slice3(1, 0, 3, 2); - private static readonly XYZWShuffle4Slice3 ControlFast = default; + private static readonly DefaultShuffle4Slice3 Control = new(SimdUtils.Shuffle.MMShuffle1032); private byte[] source; private byte[] destination; @@ -26,15 +27,11 @@ public class Shuffle4Slice3Channel [Benchmark] public void Shuffle4Slice3() - { - SimdUtils.Shuffle4Slice3(this.source, this.destination, Control); - } + => SimdUtils.Shuffle4Slice3(this.source, this.destination, Control); [Benchmark] public void Shuffle4Slice3FastFallback() - { - SimdUtils.Shuffle4Slice3(this.source, this.destination, ControlFast); - } + => SimdUtils.Shuffle4Slice3(this.source, this.destination, default(XYZWShuffle4Slice3)); } // 2020-10-29 @@ -91,3 +88,58 @@ public class Shuffle4Slice3Channel // | Shuffle4Slice3FastFallback | 1. No HwIntrinsics | COMPlus_EnableHWIntrinsic=0,COMPlus_FeatureSIMD=0 | 2048 | 382.97 ns | 1.064 ns | 0.831 ns | 382.87 ns | 1.00 | 0.00 | - | - | - | - | // | Shuffle4Slice3FastFallback | 2. AVX | Empty | 2048 | 126.93 ns | 0.382 ns | 0.339 ns | 126.94 ns | 0.33 | 0.00 | - | - | - | - | // | Shuffle4Slice3FastFallback | 3. SSE | COMPlus_EnableAVX=0 | 2048 | 149.36 ns | 1.875 ns | 1.754 ns | 149.33 ns | 0.39 | 0.00 | - | - | - | - | + +// 2023-02-21 +// ########## +// +// BenchmarkDotNet=v0.13.0, OS=Windows 10.0.22621 +// 11th Gen Intel Core i7-11370H 3.30GHz, 1 CPU, 8 logical and 4 physical cores +// .NET SDK= 7.0.103 +// [Host] : .NET 6.0.14 (6.0.1423.7309), X64 RyuJIT +// 1. No HwIntrinsics : .NET 6.0.14 (6.0.1423.7309), X64 RyuJIT +// 2. SSE : .NET 6.0.14 (6.0.1423.7309), X64 RyuJIT +// 3. AVX : .NET 6.0.14 (6.0.1423.7309), X64 RyuJIT +// +// Runtime=.NET 6.0 +// +// | Method | Job | EnvironmentVariables | Count | Mean | Error | StdDev | Ratio | Gen 0 | Gen 1 | Gen 2 | Allocated | +// |--------------------------- |------------------- |-------------------------------------------------- |------ |----------:|---------:|---------:|------:|------:|------:|------:|----------:| +// | Shuffle4Slice3 | 1. No HwIntrinsics | COMPlus_EnableHWIntrinsic=0,COMPlus_FeatureSIMD=0 | 128 | 45.59 ns | 0.166 ns | 0.147 ns | 1.00 | - | - | - | - | +// | Shuffle4Slice3 | 2. SSE | COMPlus_EnableAVX=0 | 128 | 15.62 ns | 0.056 ns | 0.052 ns | 0.34 | - | - | - | - | +// | Shuffle4Slice3 | 3. AVX | Empty | 128 | 16.37 ns | 0.047 ns | 0.040 ns | 0.36 | - | - | - | - | +// | | | | | | | | | | | | | +// | Shuffle4Slice3FastFallback | 1. No HwIntrinsics | COMPlus_EnableHWIntrinsic=0,COMPlus_FeatureSIMD=0 | 128 | 13.23 ns | 0.028 ns | 0.026 ns | 1.00 | - | - | - | - | +// | Shuffle4Slice3FastFallback | 2. SSE | COMPlus_EnableAVX=0 | 128 | 14.41 ns | 0.013 ns | 0.012 ns | 1.09 | - | - | - | - | +// | Shuffle4Slice3FastFallback | 3. AVX | Empty | 128 | 14.70 ns | 0.050 ns | 0.047 ns | 1.11 | - | - | - | - | +// | | | | | | | | | | | | | +// | Shuffle4Slice3 | 1. No HwIntrinsics | COMPlus_EnableHWIntrinsic=0,COMPlus_FeatureSIMD=0 | 256 | 85.48 ns | 0.192 ns | 0.179 ns | 1.00 | - | - | - | - | +// | Shuffle4Slice3 | 2. SSE | COMPlus_EnableAVX=0 | 256 | 19.18 ns | 0.230 ns | 0.204 ns | 0.22 | - | - | - | - | +// | Shuffle4Slice3 | 3. AVX | Empty | 256 | 18.66 ns | 0.017 ns | 0.015 ns | 0.22 | - | - | - | - | +// | | | | | | | | | | | | | +// | Shuffle4Slice3FastFallback | 1. No HwIntrinsics | COMPlus_EnableHWIntrinsic=0,COMPlus_FeatureSIMD=0 | 256 | 24.34 ns | 0.078 ns | 0.073 ns | 1.00 | - | - | - | - | +// | Shuffle4Slice3FastFallback | 2. SSE | COMPlus_EnableAVX=0 | 256 | 18.58 ns | 0.061 ns | 0.057 ns | 0.76 | - | - | - | - | +// | Shuffle4Slice3FastFallback | 3. AVX | Empty | 256 | 19.23 ns | 0.018 ns | 0.016 ns | 0.79 | - | - | - | - | +// | | | | | | | | | | | | | +// | Shuffle4Slice3 | 1. No HwIntrinsics | COMPlus_EnableHWIntrinsic=0,COMPlus_FeatureSIMD=0 | 512 | 165.31 ns | 0.742 ns | 0.694 ns | 1.00 | - | - | - | - | +// | Shuffle4Slice3 | 2. SSE | COMPlus_EnableAVX=0 | 512 | 28.10 ns | 0.077 ns | 0.068 ns | 0.17 | - | - | - | - | +// | Shuffle4Slice3 | 3. AVX | Empty | 512 | 28.99 ns | 0.018 ns | 0.014 ns | 0.18 | - | - | - | - | +// | | | | | | | | | | | | | +// | Shuffle4Slice3FastFallback | 1. No HwIntrinsics | COMPlus_EnableHWIntrinsic=0,COMPlus_FeatureSIMD=0 | 512 | 53.45 ns | 0.270 ns | 0.226 ns | 1.00 | - | - | - | - | +// | Shuffle4Slice3FastFallback | 2. SSE | COMPlus_EnableAVX=0 | 512 | 27.50 ns | 0.034 ns | 0.028 ns | 0.51 | - | - | - | - | +// | Shuffle4Slice3FastFallback | 3. AVX | Empty | 512 | 28.76 ns | 0.017 ns | 0.015 ns | 0.54 | - | - | - | - | +// | | | | | | | | | | | | | +// | Shuffle4Slice3 | 1. No HwIntrinsics | COMPlus_EnableHWIntrinsic=0,COMPlus_FeatureSIMD=0 | 1024 | 323.87 ns | 0.549 ns | 0.487 ns | 1.00 | - | - | - | - | +// | Shuffle4Slice3 | 2. SSE | COMPlus_EnableAVX=0 | 1024 | 40.81 ns | 0.056 ns | 0.050 ns | 0.13 | - | - | - | - | +// | Shuffle4Slice3 | 3. AVX | Empty | 1024 | 39.95 ns | 0.075 ns | 0.067 ns | 0.12 | - | - | - | - | +// | | | | | | | | | | | | | +// | Shuffle4Slice3FastFallback | 1. No HwIntrinsics | COMPlus_EnableHWIntrinsic=0,COMPlus_FeatureSIMD=0 | 1024 | 101.37 ns | 0.080 ns | 0.067 ns | 1.00 | - | - | - | - | +// | Shuffle4Slice3FastFallback | 2. SSE | COMPlus_EnableAVX=0 | 1024 | 40.72 ns | 0.049 ns | 0.041 ns | 0.40 | - | - | - | - | +// | Shuffle4Slice3FastFallback | 3. AVX | Empty | 1024 | 39.78 ns | 0.029 ns | 0.027 ns | 0.39 | - | - | - | - | +// | | | | | | | | | | | | | +// | Shuffle4Slice3 | 1. No HwIntrinsics | COMPlus_EnableHWIntrinsic=0,COMPlus_FeatureSIMD=0 | 2048 | 642.95 ns | 2.067 ns | 1.933 ns | 1.00 | - | - | - | - | +// | Shuffle4Slice3 | 2. SSE | COMPlus_EnableAVX=0 | 2048 | 73.19 ns | 0.082 ns | 0.077 ns | 0.11 | - | - | - | - | +// | Shuffle4Slice3 | 3. AVX | Empty | 2048 | 69.83 ns | 0.319 ns | 0.267 ns | 0.11 | - | - | - | - | +// | | | | | | | | | | | | | +// | Shuffle4Slice3FastFallback | 1. No HwIntrinsics | COMPlus_EnableHWIntrinsic=0,COMPlus_FeatureSIMD=0 | 2048 | 196.85 ns | 0.238 ns | 0.211 ns | 1.00 | - | - | - | - | +// | Shuffle4Slice3FastFallback | 2. SSE | COMPlus_EnableAVX=0 | 2048 | 72.89 ns | 0.117 ns | 0.098 ns | 0.37 | - | - | - | - | +// | Shuffle4Slice3FastFallback | 3. AVX | Empty | 2048 | 69.59 ns | 0.073 ns | 0.061 ns | 0.35 | - | - | - | - | diff --git a/tests/ImageSharp.Benchmarks/Color/Bulk/ShuffleByte4Channel.cs b/tests/ImageSharp.Benchmarks/Color/Bulk/ShuffleByte4Channel.cs index 6323e1c55c..8e740743ba 100644 --- a/tests/ImageSharp.Benchmarks/Color/Bulk/ShuffleByte4Channel.cs +++ b/tests/ImageSharp.Benchmarks/Color/Bulk/ShuffleByte4Channel.cs @@ -24,9 +24,7 @@ public class ShuffleByte4Channel [Benchmark] public void Shuffle4Channel() - { - SimdUtils.Shuffle4(this.source, this.destination, default); - } + => SimdUtils.Shuffle4(this.source, this.destination, default); } // 2020-10-29 @@ -63,3 +61,38 @@ public class ShuffleByte4Channel // | Shuffle4Channel | 1. No HwIntrinsics | COMPlus_EnableHWIntrinsic=0,COMPlus_FeatureSIMD=0 | 2048 | 315.29 ns | 5.206 ns | 6.583 ns | 1.00 | 0.00 | - | - | - | - | // | Shuffle4Channel | 2. AVX | Empty | 2048 | 57.37 ns | 1.152 ns | 1.078 ns | 0.18 | 0.01 | - | - | - | - | // | Shuffle4Channel | 3. SSE | COMPlus_EnableAVX=0 | 2048 | 65.75 ns | 1.198 ns | 1.600 ns | 0.21 | 0.01 | - | - | - | - | + +// 2023-02-21 +// ########## +// +// BenchmarkDotNet=v0.13.0, OS=Windows 10.0.22621 +// 11th Gen Intel Core i7-11370H 3.30GHz, 1 CPU, 8 logical and 4 physical cores +// .NET SDK= 7.0.103 +// [Host] : .NET 6.0.14 (6.0.1423.7309), X64 RyuJIT +// 1. No HwIntrinsics : .NET 6.0.14 (6.0.1423.7309), X64 RyuJIT +// 2. SSE : .NET 6.0.14 (6.0.1423.7309), X64 RyuJIT +// 3. AVX : .NET 6.0.14 (6.0.1423.7309), X64 RyuJIT +// +// Runtime=.NET 6.0 +// +// | Method | Job | EnvironmentVariables | Count | Mean | Error | StdDev | Ratio | RatioSD | Gen 0 | Gen 1 | Gen 2 | Allocated | +// |---------------- |------------------- |-------------------------------------------------- |------ |----------:|---------:|---------:|------:|--------:|------:|------:|------:|----------:| +// | Shuffle4Channel | 1. No HwIntrinsics | COMPlus_EnableHWIntrinsic=0,COMPlus_FeatureSIMD=0 | 128 | 10.76 ns | 0.033 ns | 0.029 ns | 1.00 | 0.00 | - | - | - | - | +// | Shuffle4Channel | 2. SSE | COMPlus_EnableAVX=0 | 128 | 11.39 ns | 0.045 ns | 0.040 ns | 1.06 | 0.01 | - | - | - | - | +// | Shuffle4Channel | 3. AVX | Empty | 128 | 14.05 ns | 0.029 ns | 0.024 ns | 1.31 | 0.00 | - | - | - | - | +// | | | | | | | | | | | | | | +// | Shuffle4Channel | 1. No HwIntrinsics | COMPlus_EnableHWIntrinsic=0,COMPlus_FeatureSIMD=0 | 256 | 32.09 ns | 0.655 ns | 1.000 ns | 1.00 | 0.00 | - | - | - | - | +// | Shuffle4Channel | 2. SSE | COMPlus_EnableAVX=0 | 256 | 14.03 ns | 0.047 ns | 0.041 ns | 0.44 | 0.02 | - | - | - | - | +// | Shuffle4Channel | 3. AVX | Empty | 256 | 15.18 ns | 0.052 ns | 0.043 ns | 0.48 | 0.03 | - | - | - | - | +// | | | | | | | | | | | | | | +// | Shuffle4Channel | 1. No HwIntrinsics | COMPlus_EnableHWIntrinsic=0,COMPlus_FeatureSIMD=0 | 512 | 59.26 ns | 0.084 ns | 0.070 ns | 1.00 | 0.00 | - | - | - | - | +// | Shuffle4Channel | 2. SSE | COMPlus_EnableAVX=0 | 512 | 18.80 ns | 0.036 ns | 0.034 ns | 0.32 | 0.00 | - | - | - | - | +// | Shuffle4Channel | 3. AVX | Empty | 512 | 17.69 ns | 0.038 ns | 0.034 ns | 0.30 | 0.00 | - | - | - | - | +// | | | | | | | | | | | | | | +// | Shuffle4Channel | 1. No HwIntrinsics | COMPlus_EnableHWIntrinsic=0,COMPlus_FeatureSIMD=0 | 1024 | 112.48 ns | 0.285 ns | 0.253 ns | 1.00 | 0.00 | - | - | - | - | +// | Shuffle4Channel | 2. SSE | COMPlus_EnableAVX=0 | 1024 | 31.57 ns | 0.041 ns | 0.036 ns | 0.28 | 0.00 | - | - | - | - | +// | Shuffle4Channel | 3. AVX | Empty | 1024 | 28.41 ns | 0.068 ns | 0.064 ns | 0.25 | 0.00 | - | - | - | - | +// | | | | | | | | | | | | | | +// | Shuffle4Channel | 1. No HwIntrinsics | COMPlus_EnableHWIntrinsic=0,COMPlus_FeatureSIMD=0 | 2048 | 218.59 ns | 0.303 ns | 0.283 ns | 1.00 | 0.00 | - | - | - | - | +// | Shuffle4Channel | 2. SSE | COMPlus_EnableAVX=0 | 2048 | 53.04 ns | 0.106 ns | 0.099 ns | 0.24 | 0.00 | - | - | - | - | +// | Shuffle4Channel | 3. AVX | Empty | 2048 | 34.74 ns | 0.061 ns | 0.054 ns | 0.16 | 0.00 | - | - | - | - | diff --git a/tests/ImageSharp.Benchmarks/Color/Bulk/ShuffleFloat4Channel.cs b/tests/ImageSharp.Benchmarks/Color/Bulk/ShuffleFloat4Channel.cs index bdeb880828..1c338e1a6c 100644 --- a/tests/ImageSharp.Benchmarks/Color/Bulk/ShuffleFloat4Channel.cs +++ b/tests/ImageSharp.Benchmarks/Color/Bulk/ShuffleFloat4Channel.cs @@ -9,7 +9,6 @@ namespace SixLabors.ImageSharp.Benchmarks.ColorSpaces.Bulk; [Config(typeof(Config.HwIntrinsics_SSE_AVX))] public class ShuffleFloat4Channel { - private static readonly byte Control = default(WXYZShuffle4).Control; private float[] source; private float[] destination; @@ -25,9 +24,7 @@ public class ShuffleFloat4Channel [Benchmark] public void Shuffle4Channel() - { - SimdUtils.Shuffle4(this.source, this.destination, Control); - } + => SimdUtils.Shuffle4(this.source, this.destination, SimdUtils.Shuffle.MMShuffle2103); } // 2020-10-29 @@ -64,3 +61,38 @@ public class ShuffleFloat4Channel // | Shuffle4Channel | 1. No HwIntrinsics | COMPlus_EnableHWIntrinsic=0,COMPlus_FeatureSIMD=0 | 2048 | 980.134 ns | 3.7407 ns | 3.1237 ns | 1.00 | - | - | - | - | // | Shuffle4Channel | 2. AVX | Empty | 2048 | 105.120 ns | 0.6140 ns | 0.5443 ns | 0.11 | - | - | - | - | // | Shuffle4Channel | 3. SSE | COMPlus_EnableAVX=0 | 2048 | 216.473 ns | 2.3268 ns | 2.0627 ns | 0.22 | - | - | - | - | + +// 2023-02-21 +// ########## +// +// BenchmarkDotNet=v0.13.0, OS=Windows 10.0.22621 +// 11th Gen Intel Core i7-11370H 3.30GHz, 1 CPU, 8 logical and 4 physical cores +// .NET SDK= 7.0.103 +// [Host] : .NET 6.0.14 (6.0.1423.7309), X64 RyuJIT +// 1. No HwIntrinsics : .NET 6.0.14 (6.0.1423.7309), X64 RyuJIT +// 2. SSE : .NET 6.0.14 (6.0.1423.7309), X64 RyuJIT +// 3. AVX : .NET 6.0.14 (6.0.1423.7309), X64 RyuJIT +// +// Runtime=.NET 6.0 +// +// | Method | Job | EnvironmentVariables | Count | Mean | Error | StdDev | Ratio | Gen 0 | Gen 1 | Gen 2 | Allocated | +// |---------------- |------------------- |-------------------------------------------------- |------ |-----------:|----------:|----------:|------:|------:|------:|------:|----------:| +// | Shuffle4Channel | 1. No HwIntrinsics | COMPlus_EnableHWIntrinsic=0,COMPlus_FeatureSIMD=0 | 128 | 57.819 ns | 0.2360 ns | 0.1970 ns | 1.00 | - | - | - | - | +// | Shuffle4Channel | 2. SSE | COMPlus_EnableAVX=0 | 128 | 11.564 ns | 0.0234 ns | 0.0195 ns | 0.20 | - | - | - | - | +// | Shuffle4Channel | 3. AVX | Empty | 128 | 7.770 ns | 0.0696 ns | 0.0617 ns | 0.13 | - | - | - | - | +// | | | | | | | | | | | | | +// | Shuffle4Channel | 1. No HwIntrinsics | COMPlus_EnableHWIntrinsic=0,COMPlus_FeatureSIMD=0 | 256 | 105.282 ns | 0.2713 ns | 0.2405 ns | 1.00 | - | - | - | - | +// | Shuffle4Channel | 2. SSE | COMPlus_EnableAVX=0 | 256 | 19.867 ns | 0.0393 ns | 0.0348 ns | 0.19 | - | - | - | - | +// | Shuffle4Channel | 3. AVX | Empty | 256 | 17.586 ns | 0.0582 ns | 0.0544 ns | 0.17 | - | - | - | - | +// | | | | | | | | | | | | | +// | Shuffle4Channel | 1. No HwIntrinsics | COMPlus_EnableHWIntrinsic=0,COMPlus_FeatureSIMD=0 | 512 | 200.799 ns | 0.5678 ns | 0.5033 ns | 1.00 | - | - | - | - | +// | Shuffle4Channel | 2. SSE | COMPlus_EnableAVX=0 | 512 | 41.137 ns | 0.1524 ns | 0.1351 ns | 0.20 | - | - | - | - | +// | Shuffle4Channel | 3. AVX | Empty | 512 | 24.040 ns | 0.0445 ns | 0.0395 ns | 0.12 | - | - | - | - | +// | | | | | | | | | | | | | +// | Shuffle4Channel | 1. No HwIntrinsics | COMPlus_EnableHWIntrinsic=0,COMPlus_FeatureSIMD=0 | 1024 | 401.046 ns | 0.5865 ns | 0.5199 ns | 1.00 | - | - | - | - | +// | Shuffle4Channel | 2. SSE | COMPlus_EnableAVX=0 | 1024 | 94.904 ns | 0.4633 ns | 0.4334 ns | 0.24 | - | - | - | - | +// | Shuffle4Channel | 3. AVX | Empty | 1024 | 68.456 ns | 0.1192 ns | 0.0996 ns | 0.17 | - | - | - | - | +// | | | | | | | | | | | | | +// | Shuffle4Channel | 1. No HwIntrinsics | COMPlus_EnableHWIntrinsic=0,COMPlus_FeatureSIMD=0 | 2048 | 772.297 ns | 0.6270 ns | 0.5558 ns | 1.00 | - | - | - | - | +// | Shuffle4Channel | 2. SSE | COMPlus_EnableAVX=0 | 2048 | 184.561 ns | 0.4319 ns | 0.4040 ns | 0.24 | - | - | - | - | +// | Shuffle4Channel | 3. AVX | Empty | 2048 | 133.634 ns | 1.7864 ns | 1.8345 ns | 0.17 | - | - | - | - | diff --git a/tests/ImageSharp.Benchmarks/PixelBlenders/PorterDuffBulkVsPixel.cs b/tests/ImageSharp.Benchmarks/PixelBlenders/PorterDuffBulkVsPixel.cs index 68956c880f..3e6667dbc2 100644 --- a/tests/ImageSharp.Benchmarks/PixelBlenders/PorterDuffBulkVsPixel.cs +++ b/tests/ImageSharp.Benchmarks/PixelBlenders/PorterDuffBulkVsPixel.cs @@ -12,9 +12,9 @@ namespace SixLabors.ImageSharp.Benchmarks; public class PorterDuffBulkVsPixel { - private Configuration Configuration => Configuration.Default; + private static Configuration Configuration => Configuration.Default; - private void BulkVectorConvert( + private static void BulkVectorConvert( Span destination, Span background, Span source, @@ -31,18 +31,18 @@ public class PorterDuffBulkVsPixel Span backgroundSpan = buffer.Slice(destination.Length, destination.Length); Span sourceSpan = buffer.Slice(destination.Length * 2, destination.Length); - PixelOperations.Instance.ToVector4(this.Configuration, background, backgroundSpan); - PixelOperations.Instance.ToVector4(this.Configuration, source, sourceSpan); + PixelOperations.Instance.ToVector4(Configuration, background, backgroundSpan); + PixelOperations.Instance.ToVector4(Configuration, source, sourceSpan); for (int i = 0; i < destination.Length; i++) { destinationSpan[i] = PorterDuffFunctions.NormalSrcOver(backgroundSpan[i], sourceSpan[i], amount[i]); } - PixelOperations.Instance.FromVector4Destructive(this.Configuration, destinationSpan, destination); + PixelOperations.Instance.FromVector4Destructive(Configuration, destinationSpan, destination); } - private void BulkPixelConvert( + private static void BulkPixelConvert( Span destination, Span background, Span source, @@ -60,9 +60,9 @@ public class PorterDuffBulkVsPixel } [Benchmark(Description = "ImageSharp BulkVectorConvert")] - public Size BulkVectorConvert() + public static Size BulkVectorConvert() { - using var image = new Image(800, 800); + using Image image = new(800, 800); using IMemoryOwner amounts = Configuration.Default.MemoryAllocator.Allocate(image.Width); amounts.GetSpan().Fill(1); @@ -70,23 +70,23 @@ public class PorterDuffBulkVsPixel for (int y = 0; y < image.Height; y++) { Span span = pixels.DangerousGetRowSpan(y); - this.BulkVectorConvert(span, span, span, amounts.GetSpan()); + BulkVectorConvert(span, span, span, amounts.GetSpan()); } return new Size(image.Width, image.Height); } [Benchmark(Description = "ImageSharp BulkPixelConvert")] - public Size BulkPixelConvert() + public static Size BulkPixelConvert() { - using var image = new Image(800, 800); + using Image image = new(800, 800); using IMemoryOwner amounts = Configuration.Default.MemoryAllocator.Allocate(image.Width); amounts.GetSpan().Fill(1); Buffer2D pixels = image.GetRootFramePixelBuffer(); for (int y = 0; y < image.Height; y++) { Span span = pixels.DangerousGetRowSpan(y); - this.BulkPixelConvert(span, span, span, amounts.GetSpan()); + BulkPixelConvert(span, span, span, amounts.GetSpan()); } return new Size(image.Width, image.Height); diff --git a/tests/ImageSharp.Benchmarks/PixelBlenders/PorterDuffBulkVsSingleVector.cs b/tests/ImageSharp.Benchmarks/PixelBlenders/PorterDuffBulkVsSingleVector.cs new file mode 100644 index 0000000000..fcf7e9dcce --- /dev/null +++ b/tests/ImageSharp.Benchmarks/PixelBlenders/PorterDuffBulkVsSingleVector.cs @@ -0,0 +1,68 @@ +// Copyright (c) Six Labors. +// Licensed under the Six Labors Split License. + +using System.Numerics; +using System.Runtime.CompilerServices; +using System.Runtime.InteropServices; +using System.Runtime.Intrinsics; +using BenchmarkDotNet.Attributes; +using SixLabors.ImageSharp.PixelFormats.PixelBlenders; + +namespace SixLabors.ImageSharp.Benchmarks.PixelBlenders; + +public class PorterDuffBulkVsSingleVector +{ + private Vector4[] backdrop; + private Vector4[] source; + + [GlobalSetup] + public void Setup() + { + this.backdrop = new Vector4[8 * 20]; + this.source = new Vector4[8 * 20]; + + FillRandom(this.backdrop); + FillRandom(this.source); + } + + private static void FillRandom(Vector4[] arr) + { + Random rng = new(); + for (int i = 0; i < arr.Length; i++) + { + arr[i].X = rng.NextSingle(); + arr[i].Y = rng.NextSingle(); + arr[i].Z = rng.NextSingle(); + arr[i].W = rng.NextSingle(); + } + } + + [Benchmark(Description = "Scalar", Baseline = true)] + public Vector4 OverlayValueFunction_Scalar() + { + Vector4 result = default; + for (int i = 0; i < this.backdrop.Length; i++) + { + result = PorterDuffFunctions.NormalSrcOver(this.backdrop[i], this.source[i], .5F); + } + + return result; + } + + [Benchmark(Description = "Avx")] + public Vector256 OverlayValueFunction_Avx() + { + ref Vector256 backdrop = ref Unsafe.As>(ref MemoryMarshal.GetReference(this.backdrop)); + ref Vector256 source = ref Unsafe.As>(ref MemoryMarshal.GetReference(this.source)); + + Vector256 result = default; + Vector256 opacity = Vector256.Create(.5F); + int count = this.backdrop.Length / 2; + for (int i = 0; i < count; i++) + { + result = PorterDuffFunctions.NormalSrcOver(Unsafe.Add(ref backdrop, i), Unsafe.Add(ref source, i), opacity); + } + + return result; + } +} diff --git a/tests/ImageSharp.Tests/Common/SimdUtilsTests.Shuffle.cs b/tests/ImageSharp.Tests/Common/SimdUtilsTests.Shuffle.cs index 9727731b6e..960b0613e6 100644 --- a/tests/ImageSharp.Tests/Common/SimdUtilsTests.Shuffle.cs +++ b/tests/ImageSharp.Tests/Common/SimdUtilsTests.Shuffle.cs @@ -7,6 +7,271 @@ namespace SixLabors.ImageSharp.Tests.Common; public partial class SimdUtilsTests { + public static readonly TheoryData MMShuffleData = new() + { + { SimdUtils.Shuffle.MMShuffle(0, 0, 0, 0), SimdUtils.Shuffle.MMShuffle0000 }, + { SimdUtils.Shuffle.MMShuffle(0, 0, 0, 1), SimdUtils.Shuffle.MMShuffle0001 }, + { SimdUtils.Shuffle.MMShuffle(0, 0, 0, 2), SimdUtils.Shuffle.MMShuffle0002 }, + { SimdUtils.Shuffle.MMShuffle(0, 0, 0, 3), SimdUtils.Shuffle.MMShuffle0003 }, + { SimdUtils.Shuffle.MMShuffle(0, 0, 1, 0), SimdUtils.Shuffle.MMShuffle0010 }, + { SimdUtils.Shuffle.MMShuffle(0, 0, 1, 1), SimdUtils.Shuffle.MMShuffle0011 }, + { SimdUtils.Shuffle.MMShuffle(0, 0, 1, 2), SimdUtils.Shuffle.MMShuffle0012 }, + { SimdUtils.Shuffle.MMShuffle(0, 0, 1, 3), SimdUtils.Shuffle.MMShuffle0013 }, + { SimdUtils.Shuffle.MMShuffle(0, 0, 2, 0), SimdUtils.Shuffle.MMShuffle0020 }, + { SimdUtils.Shuffle.MMShuffle(0, 0, 2, 1), SimdUtils.Shuffle.MMShuffle0021 }, + { SimdUtils.Shuffle.MMShuffle(0, 0, 2, 2), SimdUtils.Shuffle.MMShuffle0022 }, + { SimdUtils.Shuffle.MMShuffle(0, 0, 2, 3), SimdUtils.Shuffle.MMShuffle0023 }, + { SimdUtils.Shuffle.MMShuffle(0, 0, 3, 0), SimdUtils.Shuffle.MMShuffle0030 }, + { SimdUtils.Shuffle.MMShuffle(0, 0, 3, 1), SimdUtils.Shuffle.MMShuffle0031 }, + { SimdUtils.Shuffle.MMShuffle(0, 0, 3, 2), SimdUtils.Shuffle.MMShuffle0032 }, + { SimdUtils.Shuffle.MMShuffle(0, 0, 3, 3), SimdUtils.Shuffle.MMShuffle0033 }, + { SimdUtils.Shuffle.MMShuffle(0, 1, 0, 0), SimdUtils.Shuffle.MMShuffle0100 }, + { SimdUtils.Shuffle.MMShuffle(0, 1, 0, 1), SimdUtils.Shuffle.MMShuffle0101 }, + { SimdUtils.Shuffle.MMShuffle(0, 1, 0, 2), SimdUtils.Shuffle.MMShuffle0102 }, + { SimdUtils.Shuffle.MMShuffle(0, 1, 0, 3), SimdUtils.Shuffle.MMShuffle0103 }, + { SimdUtils.Shuffle.MMShuffle(0, 1, 1, 0), SimdUtils.Shuffle.MMShuffle0110 }, + { SimdUtils.Shuffle.MMShuffle(0, 1, 1, 1), SimdUtils.Shuffle.MMShuffle0111 }, + { SimdUtils.Shuffle.MMShuffle(0, 1, 1, 2), SimdUtils.Shuffle.MMShuffle0112 }, + { SimdUtils.Shuffle.MMShuffle(0, 1, 1, 3), SimdUtils.Shuffle.MMShuffle0113 }, + { SimdUtils.Shuffle.MMShuffle(0, 1, 2, 0), SimdUtils.Shuffle.MMShuffle0120 }, + { SimdUtils.Shuffle.MMShuffle(0, 1, 2, 1), SimdUtils.Shuffle.MMShuffle0121 }, + { SimdUtils.Shuffle.MMShuffle(0, 1, 2, 2), SimdUtils.Shuffle.MMShuffle0122 }, + { SimdUtils.Shuffle.MMShuffle(0, 1, 2, 3), SimdUtils.Shuffle.MMShuffle0123 }, + { SimdUtils.Shuffle.MMShuffle(0, 1, 3, 0), SimdUtils.Shuffle.MMShuffle0130 }, + { SimdUtils.Shuffle.MMShuffle(0, 1, 3, 1), SimdUtils.Shuffle.MMShuffle0131 }, + { SimdUtils.Shuffle.MMShuffle(0, 1, 3, 2), SimdUtils.Shuffle.MMShuffle0132 }, + { SimdUtils.Shuffle.MMShuffle(0, 1, 3, 3), SimdUtils.Shuffle.MMShuffle0133 }, + { SimdUtils.Shuffle.MMShuffle(0, 2, 0, 0), SimdUtils.Shuffle.MMShuffle0200 }, + { SimdUtils.Shuffle.MMShuffle(0, 2, 0, 1), SimdUtils.Shuffle.MMShuffle0201 }, + { SimdUtils.Shuffle.MMShuffle(0, 2, 0, 2), SimdUtils.Shuffle.MMShuffle0202 }, + { SimdUtils.Shuffle.MMShuffle(0, 2, 0, 3), SimdUtils.Shuffle.MMShuffle0203 }, + { SimdUtils.Shuffle.MMShuffle(0, 2, 1, 0), SimdUtils.Shuffle.MMShuffle0210 }, + { SimdUtils.Shuffle.MMShuffle(0, 2, 1, 1), SimdUtils.Shuffle.MMShuffle0211 }, + { SimdUtils.Shuffle.MMShuffle(0, 2, 1, 2), SimdUtils.Shuffle.MMShuffle0212 }, + { SimdUtils.Shuffle.MMShuffle(0, 2, 1, 3), SimdUtils.Shuffle.MMShuffle0213 }, + { SimdUtils.Shuffle.MMShuffle(0, 2, 2, 0), SimdUtils.Shuffle.MMShuffle0220 }, + { SimdUtils.Shuffle.MMShuffle(0, 2, 2, 1), SimdUtils.Shuffle.MMShuffle0221 }, + { SimdUtils.Shuffle.MMShuffle(0, 2, 2, 2), SimdUtils.Shuffle.MMShuffle0222 }, + { SimdUtils.Shuffle.MMShuffle(0, 2, 2, 3), SimdUtils.Shuffle.MMShuffle0223 }, + { SimdUtils.Shuffle.MMShuffle(0, 2, 3, 0), SimdUtils.Shuffle.MMShuffle0230 }, + { SimdUtils.Shuffle.MMShuffle(0, 2, 3, 1), SimdUtils.Shuffle.MMShuffle0231 }, + { SimdUtils.Shuffle.MMShuffle(0, 2, 3, 2), SimdUtils.Shuffle.MMShuffle0232 }, + { SimdUtils.Shuffle.MMShuffle(0, 2, 3, 3), SimdUtils.Shuffle.MMShuffle0233 }, + { SimdUtils.Shuffle.MMShuffle(0, 3, 0, 0), SimdUtils.Shuffle.MMShuffle0300 }, + { SimdUtils.Shuffle.MMShuffle(0, 3, 0, 1), SimdUtils.Shuffle.MMShuffle0301 }, + { SimdUtils.Shuffle.MMShuffle(0, 3, 0, 2), SimdUtils.Shuffle.MMShuffle0302 }, + { SimdUtils.Shuffle.MMShuffle(0, 3, 0, 3), SimdUtils.Shuffle.MMShuffle0303 }, + { SimdUtils.Shuffle.MMShuffle(0, 3, 1, 0), SimdUtils.Shuffle.MMShuffle0310 }, + { SimdUtils.Shuffle.MMShuffle(0, 3, 1, 1), SimdUtils.Shuffle.MMShuffle0311 }, + { SimdUtils.Shuffle.MMShuffle(0, 3, 1, 2), SimdUtils.Shuffle.MMShuffle0312 }, + { SimdUtils.Shuffle.MMShuffle(0, 3, 1, 3), SimdUtils.Shuffle.MMShuffle0313 }, + { SimdUtils.Shuffle.MMShuffle(0, 3, 2, 0), SimdUtils.Shuffle.MMShuffle0320 }, + { SimdUtils.Shuffle.MMShuffle(0, 3, 2, 1), SimdUtils.Shuffle.MMShuffle0321 }, + { SimdUtils.Shuffle.MMShuffle(0, 3, 2, 2), SimdUtils.Shuffle.MMShuffle0322 }, + { SimdUtils.Shuffle.MMShuffle(0, 3, 2, 3), SimdUtils.Shuffle.MMShuffle0323 }, + { SimdUtils.Shuffle.MMShuffle(0, 3, 3, 0), SimdUtils.Shuffle.MMShuffle0330 }, + { SimdUtils.Shuffle.MMShuffle(0, 3, 3, 1), SimdUtils.Shuffle.MMShuffle0331 }, + { SimdUtils.Shuffle.MMShuffle(0, 3, 3, 2), SimdUtils.Shuffle.MMShuffle0332 }, + { SimdUtils.Shuffle.MMShuffle(0, 3, 3, 3), SimdUtils.Shuffle.MMShuffle0333 }, + { SimdUtils.Shuffle.MMShuffle(1, 0, 0, 0), SimdUtils.Shuffle.MMShuffle1000 }, + { SimdUtils.Shuffle.MMShuffle(1, 0, 0, 1), SimdUtils.Shuffle.MMShuffle1001 }, + { SimdUtils.Shuffle.MMShuffle(1, 0, 0, 2), SimdUtils.Shuffle.MMShuffle1002 }, + { SimdUtils.Shuffle.MMShuffle(1, 0, 0, 3), SimdUtils.Shuffle.MMShuffle1003 }, + { SimdUtils.Shuffle.MMShuffle(1, 0, 1, 0), SimdUtils.Shuffle.MMShuffle1010 }, + { SimdUtils.Shuffle.MMShuffle(1, 0, 1, 1), SimdUtils.Shuffle.MMShuffle1011 }, + { SimdUtils.Shuffle.MMShuffle(1, 0, 1, 2), SimdUtils.Shuffle.MMShuffle1012 }, + { SimdUtils.Shuffle.MMShuffle(1, 0, 1, 3), SimdUtils.Shuffle.MMShuffle1013 }, + { SimdUtils.Shuffle.MMShuffle(1, 0, 2, 0), SimdUtils.Shuffle.MMShuffle1020 }, + { SimdUtils.Shuffle.MMShuffle(1, 0, 2, 1), SimdUtils.Shuffle.MMShuffle1021 }, + { SimdUtils.Shuffle.MMShuffle(1, 0, 2, 2), SimdUtils.Shuffle.MMShuffle1022 }, + { SimdUtils.Shuffle.MMShuffle(1, 0, 2, 3), SimdUtils.Shuffle.MMShuffle1023 }, + { SimdUtils.Shuffle.MMShuffle(1, 0, 3, 0), SimdUtils.Shuffle.MMShuffle1030 }, + { SimdUtils.Shuffle.MMShuffle(1, 0, 3, 1), SimdUtils.Shuffle.MMShuffle1031 }, + { SimdUtils.Shuffle.MMShuffle(1, 0, 3, 2), SimdUtils.Shuffle.MMShuffle1032 }, + { SimdUtils.Shuffle.MMShuffle(1, 0, 3, 3), SimdUtils.Shuffle.MMShuffle1033 }, + { SimdUtils.Shuffle.MMShuffle(1, 1, 0, 0), SimdUtils.Shuffle.MMShuffle1100 }, + { SimdUtils.Shuffle.MMShuffle(1, 1, 0, 1), SimdUtils.Shuffle.MMShuffle1101 }, + { SimdUtils.Shuffle.MMShuffle(1, 1, 0, 2), SimdUtils.Shuffle.MMShuffle1102 }, + { SimdUtils.Shuffle.MMShuffle(1, 1, 0, 3), SimdUtils.Shuffle.MMShuffle1103 }, + { SimdUtils.Shuffle.MMShuffle(1, 1, 1, 0), SimdUtils.Shuffle.MMShuffle1110 }, + { SimdUtils.Shuffle.MMShuffle(1, 1, 1, 1), SimdUtils.Shuffle.MMShuffle1111 }, + { SimdUtils.Shuffle.MMShuffle(1, 1, 1, 2), SimdUtils.Shuffle.MMShuffle1112 }, + { SimdUtils.Shuffle.MMShuffle(1, 1, 1, 3), SimdUtils.Shuffle.MMShuffle1113 }, + { SimdUtils.Shuffle.MMShuffle(1, 1, 2, 0), SimdUtils.Shuffle.MMShuffle1120 }, + { SimdUtils.Shuffle.MMShuffle(1, 1, 2, 1), SimdUtils.Shuffle.MMShuffle1121 }, + { SimdUtils.Shuffle.MMShuffle(1, 1, 2, 2), SimdUtils.Shuffle.MMShuffle1122 }, + { SimdUtils.Shuffle.MMShuffle(1, 1, 2, 3), SimdUtils.Shuffle.MMShuffle1123 }, + { SimdUtils.Shuffle.MMShuffle(1, 1, 3, 0), SimdUtils.Shuffle.MMShuffle1130 }, + { SimdUtils.Shuffle.MMShuffle(1, 1, 3, 1), SimdUtils.Shuffle.MMShuffle1131 }, + { SimdUtils.Shuffle.MMShuffle(1, 1, 3, 2), SimdUtils.Shuffle.MMShuffle1132 }, + { SimdUtils.Shuffle.MMShuffle(1, 1, 3, 3), SimdUtils.Shuffle.MMShuffle1133 }, + { SimdUtils.Shuffle.MMShuffle(1, 2, 0, 0), SimdUtils.Shuffle.MMShuffle1200 }, + { SimdUtils.Shuffle.MMShuffle(1, 2, 0, 1), SimdUtils.Shuffle.MMShuffle1201 }, + { SimdUtils.Shuffle.MMShuffle(1, 2, 0, 2), SimdUtils.Shuffle.MMShuffle1202 }, + { SimdUtils.Shuffle.MMShuffle(1, 2, 0, 3), SimdUtils.Shuffle.MMShuffle1203 }, + { SimdUtils.Shuffle.MMShuffle(1, 2, 1, 0), SimdUtils.Shuffle.MMShuffle1210 }, + { SimdUtils.Shuffle.MMShuffle(1, 2, 1, 1), SimdUtils.Shuffle.MMShuffle1211 }, + { SimdUtils.Shuffle.MMShuffle(1, 2, 1, 2), SimdUtils.Shuffle.MMShuffle1212 }, + { SimdUtils.Shuffle.MMShuffle(1, 2, 1, 3), SimdUtils.Shuffle.MMShuffle1213 }, + { SimdUtils.Shuffle.MMShuffle(1, 2, 2, 0), SimdUtils.Shuffle.MMShuffle1220 }, + { SimdUtils.Shuffle.MMShuffle(1, 2, 2, 1), SimdUtils.Shuffle.MMShuffle1221 }, + { SimdUtils.Shuffle.MMShuffle(1, 2, 2, 2), SimdUtils.Shuffle.MMShuffle1222 }, + { SimdUtils.Shuffle.MMShuffle(1, 2, 2, 3), SimdUtils.Shuffle.MMShuffle1223 }, + { SimdUtils.Shuffle.MMShuffle(1, 2, 3, 0), SimdUtils.Shuffle.MMShuffle1230 }, + { SimdUtils.Shuffle.MMShuffle(1, 2, 3, 1), SimdUtils.Shuffle.MMShuffle1231 }, + { SimdUtils.Shuffle.MMShuffle(1, 2, 3, 2), SimdUtils.Shuffle.MMShuffle1232 }, + { SimdUtils.Shuffle.MMShuffle(1, 2, 3, 3), SimdUtils.Shuffle.MMShuffle1233 }, + { SimdUtils.Shuffle.MMShuffle(1, 3, 0, 0), SimdUtils.Shuffle.MMShuffle1300 }, + { SimdUtils.Shuffle.MMShuffle(1, 3, 0, 1), SimdUtils.Shuffle.MMShuffle1301 }, + { SimdUtils.Shuffle.MMShuffle(1, 3, 0, 2), SimdUtils.Shuffle.MMShuffle1302 }, + { SimdUtils.Shuffle.MMShuffle(1, 3, 0, 3), SimdUtils.Shuffle.MMShuffle1303 }, + { SimdUtils.Shuffle.MMShuffle(1, 3, 1, 0), SimdUtils.Shuffle.MMShuffle1310 }, + { SimdUtils.Shuffle.MMShuffle(1, 3, 1, 1), SimdUtils.Shuffle.MMShuffle1311 }, + { SimdUtils.Shuffle.MMShuffle(1, 3, 1, 2), SimdUtils.Shuffle.MMShuffle1312 }, + { SimdUtils.Shuffle.MMShuffle(1, 3, 1, 3), SimdUtils.Shuffle.MMShuffle1313 }, + { SimdUtils.Shuffle.MMShuffle(1, 3, 2, 0), SimdUtils.Shuffle.MMShuffle1320 }, + { SimdUtils.Shuffle.MMShuffle(1, 3, 2, 1), SimdUtils.Shuffle.MMShuffle1321 }, + { SimdUtils.Shuffle.MMShuffle(1, 3, 2, 2), SimdUtils.Shuffle.MMShuffle1322 }, + { SimdUtils.Shuffle.MMShuffle(1, 3, 2, 3), SimdUtils.Shuffle.MMShuffle1323 }, + { SimdUtils.Shuffle.MMShuffle(1, 3, 3, 0), SimdUtils.Shuffle.MMShuffle1330 }, + { SimdUtils.Shuffle.MMShuffle(1, 3, 3, 1), SimdUtils.Shuffle.MMShuffle1331 }, + { SimdUtils.Shuffle.MMShuffle(1, 3, 3, 2), SimdUtils.Shuffle.MMShuffle1332 }, + { SimdUtils.Shuffle.MMShuffle(1, 3, 3, 3), SimdUtils.Shuffle.MMShuffle1333 }, + { SimdUtils.Shuffle.MMShuffle(2, 0, 0, 0), SimdUtils.Shuffle.MMShuffle2000 }, + { SimdUtils.Shuffle.MMShuffle(2, 0, 0, 1), SimdUtils.Shuffle.MMShuffle2001 }, + { SimdUtils.Shuffle.MMShuffle(2, 0, 0, 2), SimdUtils.Shuffle.MMShuffle2002 }, + { SimdUtils.Shuffle.MMShuffle(2, 0, 0, 3), SimdUtils.Shuffle.MMShuffle2003 }, + { SimdUtils.Shuffle.MMShuffle(2, 0, 1, 0), SimdUtils.Shuffle.MMShuffle2010 }, + { SimdUtils.Shuffle.MMShuffle(2, 0, 1, 1), SimdUtils.Shuffle.MMShuffle2011 }, + { SimdUtils.Shuffle.MMShuffle(2, 0, 1, 2), SimdUtils.Shuffle.MMShuffle2012 }, + { SimdUtils.Shuffle.MMShuffle(2, 0, 1, 3), SimdUtils.Shuffle.MMShuffle2013 }, + { SimdUtils.Shuffle.MMShuffle(2, 0, 2, 0), SimdUtils.Shuffle.MMShuffle2020 }, + { SimdUtils.Shuffle.MMShuffle(2, 0, 2, 1), SimdUtils.Shuffle.MMShuffle2021 }, + { SimdUtils.Shuffle.MMShuffle(2, 0, 2, 2), SimdUtils.Shuffle.MMShuffle2022 }, + { SimdUtils.Shuffle.MMShuffle(2, 0, 2, 3), SimdUtils.Shuffle.MMShuffle2023 }, + { SimdUtils.Shuffle.MMShuffle(2, 0, 3, 0), SimdUtils.Shuffle.MMShuffle2030 }, + { SimdUtils.Shuffle.MMShuffle(2, 0, 3, 1), SimdUtils.Shuffle.MMShuffle2031 }, + { SimdUtils.Shuffle.MMShuffle(2, 0, 3, 2), SimdUtils.Shuffle.MMShuffle2032 }, + { SimdUtils.Shuffle.MMShuffle(2, 0, 3, 3), SimdUtils.Shuffle.MMShuffle2033 }, + { SimdUtils.Shuffle.MMShuffle(2, 1, 0, 0), SimdUtils.Shuffle.MMShuffle2100 }, + { SimdUtils.Shuffle.MMShuffle(2, 1, 0, 1), SimdUtils.Shuffle.MMShuffle2101 }, + { SimdUtils.Shuffle.MMShuffle(2, 1, 0, 2), SimdUtils.Shuffle.MMShuffle2102 }, + { SimdUtils.Shuffle.MMShuffle(2, 1, 0, 3), SimdUtils.Shuffle.MMShuffle2103 }, + { SimdUtils.Shuffle.MMShuffle(2, 1, 1, 0), SimdUtils.Shuffle.MMShuffle2110 }, + { SimdUtils.Shuffle.MMShuffle(2, 1, 1, 1), SimdUtils.Shuffle.MMShuffle2111 }, + { SimdUtils.Shuffle.MMShuffle(2, 1, 1, 2), SimdUtils.Shuffle.MMShuffle2112 }, + { SimdUtils.Shuffle.MMShuffle(2, 1, 1, 3), SimdUtils.Shuffle.MMShuffle2113 }, + { SimdUtils.Shuffle.MMShuffle(2, 1, 2, 0), SimdUtils.Shuffle.MMShuffle2120 }, + { SimdUtils.Shuffle.MMShuffle(2, 1, 2, 1), SimdUtils.Shuffle.MMShuffle2121 }, + { SimdUtils.Shuffle.MMShuffle(2, 1, 2, 2), SimdUtils.Shuffle.MMShuffle2122 }, + { SimdUtils.Shuffle.MMShuffle(2, 1, 2, 3), SimdUtils.Shuffle.MMShuffle2123 }, + { SimdUtils.Shuffle.MMShuffle(2, 1, 3, 0), SimdUtils.Shuffle.MMShuffle2130 }, + { SimdUtils.Shuffle.MMShuffle(2, 1, 3, 1), SimdUtils.Shuffle.MMShuffle2131 }, + { SimdUtils.Shuffle.MMShuffle(2, 1, 3, 2), SimdUtils.Shuffle.MMShuffle2132 }, + { SimdUtils.Shuffle.MMShuffle(2, 1, 3, 3), SimdUtils.Shuffle.MMShuffle2133 }, + { SimdUtils.Shuffle.MMShuffle(2, 2, 0, 0), SimdUtils.Shuffle.MMShuffle2200 }, + { SimdUtils.Shuffle.MMShuffle(2, 2, 0, 1), SimdUtils.Shuffle.MMShuffle2201 }, + { SimdUtils.Shuffle.MMShuffle(2, 2, 0, 2), SimdUtils.Shuffle.MMShuffle2202 }, + { SimdUtils.Shuffle.MMShuffle(2, 2, 0, 3), SimdUtils.Shuffle.MMShuffle2203 }, + { SimdUtils.Shuffle.MMShuffle(2, 2, 1, 0), SimdUtils.Shuffle.MMShuffle2210 }, + { SimdUtils.Shuffle.MMShuffle(2, 2, 1, 1), SimdUtils.Shuffle.MMShuffle2211 }, + { SimdUtils.Shuffle.MMShuffle(2, 2, 1, 2), SimdUtils.Shuffle.MMShuffle2212 }, + { SimdUtils.Shuffle.MMShuffle(2, 2, 1, 3), SimdUtils.Shuffle.MMShuffle2213 }, + { SimdUtils.Shuffle.MMShuffle(2, 2, 2, 0), SimdUtils.Shuffle.MMShuffle2220 }, + { SimdUtils.Shuffle.MMShuffle(2, 2, 2, 1), SimdUtils.Shuffle.MMShuffle2221 }, + { SimdUtils.Shuffle.MMShuffle(2, 2, 2, 2), SimdUtils.Shuffle.MMShuffle2222 }, + { SimdUtils.Shuffle.MMShuffle(2, 2, 2, 3), SimdUtils.Shuffle.MMShuffle2223 }, + { SimdUtils.Shuffle.MMShuffle(2, 2, 3, 0), SimdUtils.Shuffle.MMShuffle2230 }, + { SimdUtils.Shuffle.MMShuffle(2, 2, 3, 1), SimdUtils.Shuffle.MMShuffle2231 }, + { SimdUtils.Shuffle.MMShuffle(2, 2, 3, 2), SimdUtils.Shuffle.MMShuffle2232 }, + { SimdUtils.Shuffle.MMShuffle(2, 2, 3, 3), SimdUtils.Shuffle.MMShuffle2233 }, + { SimdUtils.Shuffle.MMShuffle(2, 3, 0, 0), SimdUtils.Shuffle.MMShuffle2300 }, + { SimdUtils.Shuffle.MMShuffle(2, 3, 0, 1), SimdUtils.Shuffle.MMShuffle2301 }, + { SimdUtils.Shuffle.MMShuffle(2, 3, 0, 2), SimdUtils.Shuffle.MMShuffle2302 }, + { SimdUtils.Shuffle.MMShuffle(2, 3, 0, 3), SimdUtils.Shuffle.MMShuffle2303 }, + { SimdUtils.Shuffle.MMShuffle(2, 3, 1, 0), SimdUtils.Shuffle.MMShuffle2310 }, + { SimdUtils.Shuffle.MMShuffle(2, 3, 1, 1), SimdUtils.Shuffle.MMShuffle2311 }, + { SimdUtils.Shuffle.MMShuffle(2, 3, 1, 2), SimdUtils.Shuffle.MMShuffle2312 }, + { SimdUtils.Shuffle.MMShuffle(2, 3, 1, 3), SimdUtils.Shuffle.MMShuffle2313 }, + { SimdUtils.Shuffle.MMShuffle(2, 3, 2, 0), SimdUtils.Shuffle.MMShuffle2320 }, + { SimdUtils.Shuffle.MMShuffle(2, 3, 2, 1), SimdUtils.Shuffle.MMShuffle2321 }, + { SimdUtils.Shuffle.MMShuffle(2, 3, 2, 2), SimdUtils.Shuffle.MMShuffle2322 }, + { SimdUtils.Shuffle.MMShuffle(2, 3, 2, 3), SimdUtils.Shuffle.MMShuffle2323 }, + { SimdUtils.Shuffle.MMShuffle(2, 3, 3, 0), SimdUtils.Shuffle.MMShuffle2330 }, + { SimdUtils.Shuffle.MMShuffle(2, 3, 3, 1), SimdUtils.Shuffle.MMShuffle2331 }, + { SimdUtils.Shuffle.MMShuffle(2, 3, 3, 2), SimdUtils.Shuffle.MMShuffle2332 }, + { SimdUtils.Shuffle.MMShuffle(2, 3, 3, 3), SimdUtils.Shuffle.MMShuffle2333 }, + { SimdUtils.Shuffle.MMShuffle(3, 0, 0, 0), SimdUtils.Shuffle.MMShuffle3000 }, + { SimdUtils.Shuffle.MMShuffle(3, 0, 0, 1), SimdUtils.Shuffle.MMShuffle3001 }, + { SimdUtils.Shuffle.MMShuffle(3, 0, 0, 2), SimdUtils.Shuffle.MMShuffle3002 }, + { SimdUtils.Shuffle.MMShuffle(3, 0, 0, 3), SimdUtils.Shuffle.MMShuffle3003 }, + { SimdUtils.Shuffle.MMShuffle(3, 0, 1, 0), SimdUtils.Shuffle.MMShuffle3010 }, + { SimdUtils.Shuffle.MMShuffle(3, 0, 1, 1), SimdUtils.Shuffle.MMShuffle3011 }, + { SimdUtils.Shuffle.MMShuffle(3, 0, 1, 2), SimdUtils.Shuffle.MMShuffle3012 }, + { SimdUtils.Shuffle.MMShuffle(3, 0, 1, 3), SimdUtils.Shuffle.MMShuffle3013 }, + { SimdUtils.Shuffle.MMShuffle(3, 0, 2, 0), SimdUtils.Shuffle.MMShuffle3020 }, + { SimdUtils.Shuffle.MMShuffle(3, 0, 2, 1), SimdUtils.Shuffle.MMShuffle3021 }, + { SimdUtils.Shuffle.MMShuffle(3, 0, 2, 2), SimdUtils.Shuffle.MMShuffle3022 }, + { SimdUtils.Shuffle.MMShuffle(3, 0, 2, 3), SimdUtils.Shuffle.MMShuffle3023 }, + { SimdUtils.Shuffle.MMShuffle(3, 0, 3, 0), SimdUtils.Shuffle.MMShuffle3030 }, + { SimdUtils.Shuffle.MMShuffle(3, 0, 3, 1), SimdUtils.Shuffle.MMShuffle3031 }, + { SimdUtils.Shuffle.MMShuffle(3, 0, 3, 2), SimdUtils.Shuffle.MMShuffle3032 }, + { SimdUtils.Shuffle.MMShuffle(3, 0, 3, 3), SimdUtils.Shuffle.MMShuffle3033 }, + { SimdUtils.Shuffle.MMShuffle(3, 1, 0, 0), SimdUtils.Shuffle.MMShuffle3100 }, + { SimdUtils.Shuffle.MMShuffle(3, 1, 0, 1), SimdUtils.Shuffle.MMShuffle3101 }, + { SimdUtils.Shuffle.MMShuffle(3, 1, 0, 2), SimdUtils.Shuffle.MMShuffle3102 }, + { SimdUtils.Shuffle.MMShuffle(3, 1, 0, 3), SimdUtils.Shuffle.MMShuffle3103 }, + { SimdUtils.Shuffle.MMShuffle(3, 1, 1, 0), SimdUtils.Shuffle.MMShuffle3110 }, + { SimdUtils.Shuffle.MMShuffle(3, 1, 1, 1), SimdUtils.Shuffle.MMShuffle3111 }, + { SimdUtils.Shuffle.MMShuffle(3, 1, 1, 2), SimdUtils.Shuffle.MMShuffle3112 }, + { SimdUtils.Shuffle.MMShuffle(3, 1, 1, 3), SimdUtils.Shuffle.MMShuffle3113 }, + { SimdUtils.Shuffle.MMShuffle(3, 1, 2, 0), SimdUtils.Shuffle.MMShuffle3120 }, + { SimdUtils.Shuffle.MMShuffle(3, 1, 2, 1), SimdUtils.Shuffle.MMShuffle3121 }, + { SimdUtils.Shuffle.MMShuffle(3, 1, 2, 2), SimdUtils.Shuffle.MMShuffle3122 }, + { SimdUtils.Shuffle.MMShuffle(3, 1, 2, 3), SimdUtils.Shuffle.MMShuffle3123 }, + { SimdUtils.Shuffle.MMShuffle(3, 1, 3, 0), SimdUtils.Shuffle.MMShuffle3130 }, + { SimdUtils.Shuffle.MMShuffle(3, 1, 3, 1), SimdUtils.Shuffle.MMShuffle3131 }, + { SimdUtils.Shuffle.MMShuffle(3, 1, 3, 2), SimdUtils.Shuffle.MMShuffle3132 }, + { SimdUtils.Shuffle.MMShuffle(3, 1, 3, 3), SimdUtils.Shuffle.MMShuffle3133 }, + { SimdUtils.Shuffle.MMShuffle(3, 2, 0, 0), SimdUtils.Shuffle.MMShuffle3200 }, + { SimdUtils.Shuffle.MMShuffle(3, 2, 0, 1), SimdUtils.Shuffle.MMShuffle3201 }, + { SimdUtils.Shuffle.MMShuffle(3, 2, 0, 2), SimdUtils.Shuffle.MMShuffle3202 }, + { SimdUtils.Shuffle.MMShuffle(3, 2, 0, 3), SimdUtils.Shuffle.MMShuffle3203 }, + { SimdUtils.Shuffle.MMShuffle(3, 2, 1, 0), SimdUtils.Shuffle.MMShuffle3210 }, + { SimdUtils.Shuffle.MMShuffle(3, 2, 1, 1), SimdUtils.Shuffle.MMShuffle3211 }, + { SimdUtils.Shuffle.MMShuffle(3, 2, 1, 2), SimdUtils.Shuffle.MMShuffle3212 }, + { SimdUtils.Shuffle.MMShuffle(3, 2, 1, 3), SimdUtils.Shuffle.MMShuffle3213 }, + { SimdUtils.Shuffle.MMShuffle(3, 2, 2, 0), SimdUtils.Shuffle.MMShuffle3220 }, + { SimdUtils.Shuffle.MMShuffle(3, 2, 2, 1), SimdUtils.Shuffle.MMShuffle3221 }, + { SimdUtils.Shuffle.MMShuffle(3, 2, 2, 2), SimdUtils.Shuffle.MMShuffle3222 }, + { SimdUtils.Shuffle.MMShuffle(3, 2, 2, 3), SimdUtils.Shuffle.MMShuffle3223 }, + { SimdUtils.Shuffle.MMShuffle(3, 2, 3, 0), SimdUtils.Shuffle.MMShuffle3230 }, + { SimdUtils.Shuffle.MMShuffle(3, 2, 3, 1), SimdUtils.Shuffle.MMShuffle3231 }, + { SimdUtils.Shuffle.MMShuffle(3, 2, 3, 2), SimdUtils.Shuffle.MMShuffle3232 }, + { SimdUtils.Shuffle.MMShuffle(3, 2, 3, 3), SimdUtils.Shuffle.MMShuffle3233 }, + { SimdUtils.Shuffle.MMShuffle(3, 3, 0, 0), SimdUtils.Shuffle.MMShuffle3300 }, + { SimdUtils.Shuffle.MMShuffle(3, 3, 0, 1), SimdUtils.Shuffle.MMShuffle3301 }, + { SimdUtils.Shuffle.MMShuffle(3, 3, 0, 2), SimdUtils.Shuffle.MMShuffle3302 }, + { SimdUtils.Shuffle.MMShuffle(3, 3, 0, 3), SimdUtils.Shuffle.MMShuffle3303 }, + { SimdUtils.Shuffle.MMShuffle(3, 3, 1, 0), SimdUtils.Shuffle.MMShuffle3310 }, + { SimdUtils.Shuffle.MMShuffle(3, 3, 1, 1), SimdUtils.Shuffle.MMShuffle3311 }, + { SimdUtils.Shuffle.MMShuffle(3, 3, 1, 2), SimdUtils.Shuffle.MMShuffle3312 }, + { SimdUtils.Shuffle.MMShuffle(3, 3, 1, 3), SimdUtils.Shuffle.MMShuffle3313 }, + { SimdUtils.Shuffle.MMShuffle(3, 3, 2, 0), SimdUtils.Shuffle.MMShuffle3320 }, + { SimdUtils.Shuffle.MMShuffle(3, 3, 2, 1), SimdUtils.Shuffle.MMShuffle3321 }, + { SimdUtils.Shuffle.MMShuffle(3, 3, 2, 2), SimdUtils.Shuffle.MMShuffle3322 }, + { SimdUtils.Shuffle.MMShuffle(3, 3, 2, 3), SimdUtils.Shuffle.MMShuffle3323 }, + { SimdUtils.Shuffle.MMShuffle(3, 3, 3, 0), SimdUtils.Shuffle.MMShuffle3330 }, + { SimdUtils.Shuffle.MMShuffle(3, 3, 3, 1), SimdUtils.Shuffle.MMShuffle3331 }, + { SimdUtils.Shuffle.MMShuffle(3, 3, 3, 2), SimdUtils.Shuffle.MMShuffle3332 }, + { SimdUtils.Shuffle.MMShuffle(3, 3, 3, 3), SimdUtils.Shuffle.MMShuffle3333 } + }; + + [Theory] + [MemberData(nameof(MMShuffleData))] + public void MMShuffleConstantsAreCorrect(byte expected, byte actual) + => Assert.Equal(expected, actual); + [Theory] [MemberData(nameof(ArraySizesDivisibleBy4))] public void BulkShuffleFloat4Channel(int count) @@ -16,7 +281,7 @@ public partial class SimdUtilsTests // No need to test multiple shuffle controls as the // pipeline is always the same. int size = FeatureTestRunner.Deserialize(serialized); - byte control = default(WZYXShuffle4).Control; + const byte control = SimdUtils.Shuffle.MMShuffle0123; TestShuffleFloat4Channel( size, @@ -45,39 +310,39 @@ public partial class SimdUtilsTests TestShuffleByte4Channel( size, (s, d) => SimdUtils.Shuffle4(s.Span, d.Span, wxyz), - wxyz.Control); + SimdUtils.Shuffle.MMShuffle2103); WZYXShuffle4 wzyx = default; TestShuffleByte4Channel( size, (s, d) => SimdUtils.Shuffle4(s.Span, d.Span, wzyx), - wzyx.Control); + SimdUtils.Shuffle.MMShuffle0123); YZWXShuffle4 yzwx = default; TestShuffleByte4Channel( size, (s, d) => SimdUtils.Shuffle4(s.Span, d.Span, yzwx), - yzwx.Control); + SimdUtils.Shuffle.MMShuffle0321); ZYXWShuffle4 zyxw = default; TestShuffleByte4Channel( size, (s, d) => SimdUtils.Shuffle4(s.Span, d.Span, zyxw), - zyxw.Control); + SimdUtils.Shuffle.MMShuffle3012); - var xwyz = new DefaultShuffle4(2, 1, 3, 0); + DefaultShuffle4 xwyz = new(SimdUtils.Shuffle.MMShuffle2130); TestShuffleByte4Channel( size, (s, d) => SimdUtils.Shuffle4(s.Span, d.Span, xwyz), xwyz.Control); - var yyyy = new DefaultShuffle4(1, 1, 1, 1); + DefaultShuffle4 yyyy = new(SimdUtils.Shuffle.MMShuffle1111); TestShuffleByte4Channel( size, (s, d) => SimdUtils.Shuffle4(s.Span, d.Span, yyyy), yyyy.Control); - var wwww = new DefaultShuffle4(3, 3, 3, 3); + DefaultShuffle4 wwww = new(SimdUtils.Shuffle.MMShuffle3333); TestShuffleByte4Channel( size, (s, d) => SimdUtils.Shuffle4(s.Span, d.Span, wwww), @@ -101,25 +366,25 @@ public partial class SimdUtilsTests // These cannot be expressed as a theory as you cannot // use RemoteExecutor within generic methods nor pass // IShuffle3 to the generic utils method. - var zyx = new DefaultShuffle3(0, 1, 2); + DefaultShuffle3 zyx = new(SimdUtils.Shuffle.MMShuffle3012); TestShuffleByte3Channel( size, (s, d) => SimdUtils.Shuffle3(s.Span, d.Span, zyx), zyx.Control); - var xyz = new DefaultShuffle3(2, 1, 0); + DefaultShuffle3 xyz = new(SimdUtils.Shuffle.MMShuffle3210); TestShuffleByte3Channel( size, (s, d) => SimdUtils.Shuffle3(s.Span, d.Span, xyz), xyz.Control); - var yyy = new DefaultShuffle3(1, 1, 1); + DefaultShuffle3 yyy = new(SimdUtils.Shuffle.MMShuffle3111); TestShuffleByte3Channel( size, (s, d) => SimdUtils.Shuffle3(s.Span, d.Span, yyy), yyy.Control); - var zzz = new DefaultShuffle3(2, 2, 2); + DefaultShuffle3 zzz = new(SimdUtils.Shuffle.MMShuffle3222); TestShuffleByte3Channel( size, (s, d) => SimdUtils.Shuffle3(s.Span, d.Span, zzz), @@ -147,21 +412,21 @@ public partial class SimdUtilsTests TestPad3Shuffle4Channel( size, (s, d) => SimdUtils.Pad3Shuffle4(s.Span, d.Span, xyzw), - xyzw.Control); + SimdUtils.Shuffle.MMShuffle3210); - var xwyz = new DefaultPad3Shuffle4(2, 1, 3, 0); + DefaultPad3Shuffle4 xwyz = new(SimdUtils.Shuffle.MMShuffle2130); TestPad3Shuffle4Channel( size, (s, d) => SimdUtils.Pad3Shuffle4(s.Span, d.Span, xwyz), xwyz.Control); - var yyyy = new DefaultPad3Shuffle4(1, 1, 1, 1); + DefaultPad3Shuffle4 yyyy = new(SimdUtils.Shuffle.MMShuffle1111); TestPad3Shuffle4Channel( size, (s, d) => SimdUtils.Pad3Shuffle4(s.Span, d.Span, yyyy), yyyy.Control); - var wwww = new DefaultPad3Shuffle4(3, 3, 3, 3); + DefaultPad3Shuffle4 wwww = new(SimdUtils.Shuffle.MMShuffle3333); TestPad3Shuffle4Channel( size, (s, d) => SimdUtils.Pad3Shuffle4(s.Span, d.Span, wwww), @@ -189,21 +454,21 @@ public partial class SimdUtilsTests TestShuffle4Slice3Channel( size, (s, d) => SimdUtils.Shuffle4Slice3(s.Span, d.Span, xyzw), - xyzw.Control); + SimdUtils.Shuffle.MMShuffle3210); - var xwyz = new DefaultShuffle4Slice3(2, 1, 3, 0); + DefaultShuffle4Slice3 xwyz = new(SimdUtils.Shuffle.MMShuffle2130); TestShuffle4Slice3Channel( size, (s, d) => SimdUtils.Shuffle4Slice3(s.Span, d.Span, xwyz), xwyz.Control); - var yyyy = new DefaultShuffle4Slice3(1, 1, 1, 1); + DefaultShuffle4Slice3 yyyy = new(SimdUtils.Shuffle.MMShuffle1111); TestShuffle4Slice3Channel( size, (s, d) => SimdUtils.Shuffle4Slice3(s.Span, d.Span, yyyy), yyyy.Control); - var wwww = new DefaultShuffle4Slice3(3, 3, 3, 3); + DefaultShuffle4Slice3 wwww = new(SimdUtils.Shuffle.MMShuffle3333); TestShuffle4Slice3Channel( size, (s, d) => SimdUtils.Shuffle4Slice3(s.Span, d.Span, wwww), @@ -222,11 +487,11 @@ public partial class SimdUtilsTests byte control) { float[] source = new Random(count).GenerateRandomFloatArray(count, 0, 256); - var result = new float[count]; + float[] result = new float[count]; float[] expected = new float[count]; - SimdUtils.Shuffle.InverseMmShuffle( + SimdUtils.Shuffle.InverseMMShuffle( control, out int p3, out int p2, @@ -253,11 +518,11 @@ public partial class SimdUtilsTests { byte[] source = new byte[count]; new Random(count).NextBytes(source); - var result = new byte[count]; + byte[] result = new byte[count]; byte[] expected = new byte[count]; - SimdUtils.Shuffle.InverseMmShuffle( + SimdUtils.Shuffle.InverseMMShuffle( control, out int p3, out int p2, @@ -284,11 +549,11 @@ public partial class SimdUtilsTests { byte[] source = new byte[count]; new Random(count).NextBytes(source); - var result = new byte[count]; + byte[] result = new byte[count]; byte[] expected = new byte[count]; - SimdUtils.Shuffle.InverseMmShuffle( + SimdUtils.Shuffle.InverseMMShuffle( control, out int _, out int p2, @@ -315,11 +580,11 @@ public partial class SimdUtilsTests byte[] source = new byte[count]; new Random(count).NextBytes(source); - var result = new byte[count * 4 / 3]; + byte[] result = new byte[count * 4 / 3]; byte[] expected = new byte[result.Length]; - SimdUtils.Shuffle.InverseMmShuffle( + SimdUtils.Shuffle.InverseMMShuffle( control, out int p3, out int p2, @@ -366,11 +631,11 @@ public partial class SimdUtilsTests byte[] source = new byte[count]; new Random(count).NextBytes(source); - var result = new byte[count * 3 / 4]; + byte[] result = new byte[count * 3 / 4]; byte[] expected = new byte[result.Length]; - SimdUtils.Shuffle.InverseMmShuffle( + SimdUtils.Shuffle.InverseMMShuffle( control, out int _, out int p2, diff --git a/tests/ImageSharp.Tests/Image/ImageFrameCollectionTests.Generic.cs b/tests/ImageSharp.Tests/Image/ImageFrameCollectionTests.Generic.cs index fba19065b0..62d1ef4db9 100644 --- a/tests/ImageSharp.Tests/Image/ImageFrameCollectionTests.Generic.cs +++ b/tests/ImageSharp.Tests/Image/ImageFrameCollectionTests.Generic.cs @@ -182,12 +182,12 @@ public abstract partial class ImageFrameCollectionTests new[] { imageFrame1, imageFrame2 }); IPixelSource[] framesSnapShot = collection.OfType>().ToArray(); + + Assert.All(framesSnapShot, f => Assert.False(f.PixelBuffer.IsDisposed)); + collection.Dispose(); - Assert.All( - framesSnapShot, - f => // The pixel source of the frame is null after its been disposed. - Assert.Null(f.PixelBuffer)); + Assert.All(framesSnapShot, f => Assert.True(f.PixelBuffer.IsDisposed)); } [Theory] diff --git a/tests/ImageSharp.Tests/PixelFormats/PixelBlenders/PorterDuffCompositorTests.cs b/tests/ImageSharp.Tests/PixelFormats/PixelBlenders/PorterDuffCompositorTests.cs index c81b0a74ff..1086afe76d 100644 --- a/tests/ImageSharp.Tests/PixelFormats/PixelBlenders/PorterDuffCompositorTests.cs +++ b/tests/ImageSharp.Tests/PixelFormats/PixelBlenders/PorterDuffCompositorTests.cs @@ -1,59 +1,66 @@ // Copyright (c) Six Labors. // Licensed under the Six Labors Split License. -namespace SixLabors.ImageSharp.Tests.PixelFormats.PixelBlenders; - using SixLabors.ImageSharp.PixelFormats; using SixLabors.ImageSharp.Processing; +using SixLabors.ImageSharp.Tests.TestUtilities; -using Xunit; +namespace SixLabors.ImageSharp.Tests.PixelFormats.PixelBlenders; public class PorterDuffCompositorTests { // TODO: Add other modes to compare. public static readonly TheoryData CompositingOperators = - new TheoryData - { - PixelAlphaCompositionMode.Src, - PixelAlphaCompositionMode.SrcAtop, - PixelAlphaCompositionMode.SrcOver, - PixelAlphaCompositionMode.SrcIn, - PixelAlphaCompositionMode.SrcOut, - PixelAlphaCompositionMode.Dest, - PixelAlphaCompositionMode.DestAtop, - PixelAlphaCompositionMode.DestOver, - PixelAlphaCompositionMode.DestIn, - PixelAlphaCompositionMode.DestOut, - PixelAlphaCompositionMode.Clear, - PixelAlphaCompositionMode.Xor - }; + new() + { + PixelAlphaCompositionMode.Src, + PixelAlphaCompositionMode.SrcAtop, + PixelAlphaCompositionMode.SrcOver, + PixelAlphaCompositionMode.SrcIn, + PixelAlphaCompositionMode.SrcOut, + PixelAlphaCompositionMode.Dest, + PixelAlphaCompositionMode.DestAtop, + PixelAlphaCompositionMode.DestOver, + PixelAlphaCompositionMode.DestIn, + PixelAlphaCompositionMode.DestOut, + PixelAlphaCompositionMode.Clear, + PixelAlphaCompositionMode.Xor + }; [Theory] [WithFile(TestImages.Png.PDDest, nameof(CompositingOperators), PixelTypes.Rgba32)] public void PorterDuffOutputIsCorrect(TestImageProvider provider, PixelAlphaCompositionMode mode) { - var srcFile = TestFile.Create(TestImages.Png.PDSrc); - using (Image src = srcFile.CreateRgba32Image()) - using (Image dest = provider.GetImage()) + static void RunTest(string providerDump, string alphaMode) { - var options = new GraphicsOptions + TestImageProvider provider + = BasicSerializer.Deserialize>(providerDump); + + TestFile srcFile = TestFile.Create(TestImages.Png.PDSrc); + using Image src = srcFile.CreateRgba32Image(); + using Image dest = provider.GetImage(); + GraphicsOptions options = new() { Antialias = false, - AlphaCompositionMode = mode + AlphaCompositionMode = Enum.Parse(alphaMode) }; - using (Image res = dest.Clone(x => x.DrawImage(src, options))) - { - string combinedMode = mode.ToString(); - - if (combinedMode != "Src" && combinedMode.StartsWith("Src")) - { - combinedMode = combinedMode.Substring(3); - } + using Image res = dest.Clone(x => x.DrawImage(src, options)); + string combinedMode = alphaMode; - res.DebugSave(provider, combinedMode); - res.CompareToReferenceOutput(provider, combinedMode); + if (combinedMode != "Src" && combinedMode.StartsWith("Src", StringComparison.OrdinalIgnoreCase)) + { + combinedMode = combinedMode[3..]; } + + res.DebugSave(provider, combinedMode); + res.CompareToReferenceOutput(provider, combinedMode); } + + FeatureTestRunner.RunWithHwIntrinsicsFeature( + RunTest, + HwIntrinsics.AllowAll | HwIntrinsics.DisableAVX, + provider, + mode.ToString()); } } diff --git a/tests/ImageSharp.Tests/PixelFormats/PixelBlenders/PorterDuffFunctionsTests.cs b/tests/ImageSharp.Tests/PixelFormats/PixelBlenders/PorterDuffFunctionsTests.cs index 45dece8ec8..189d21f1e6 100644 --- a/tests/ImageSharp.Tests/PixelFormats/PixelBlenders/PorterDuffFunctionsTests.cs +++ b/tests/ImageSharp.Tests/PixelFormats/PixelBlenders/PorterDuffFunctionsTests.cs @@ -2,6 +2,7 @@ // Licensed under the Six Labors Split License. using System.Numerics; +using System.Runtime.Intrinsics; using SixLabors.ImageSharp.PixelFormats.PixelBlenders; using SixLabors.ImageSharp.Tests.TestUtilities; @@ -9,7 +10,9 @@ namespace SixLabors.ImageSharp.Tests.PixelFormats.PixelBlenders; public class PorterDuffFunctionsTests { - public static TheoryData NormalBlendFunctionData = new TheoryData + private static readonly ApproximateFloatComparer FloatComparer = new(.000001F); + + public static TheoryData NormalBlendFunctionData { get; } = new() { { new TestVector4(1, 1, 1, 1), new TestVector4(1, 1, 1, 1), 1, new TestVector4(1, 1, 1, 1) }, { new TestVector4(1, 1, 1, 1), new TestVector4(0, 0, 0, .8f), .5f, new TestVector4(0.6f, 0.6f, 0.6f, 1) } @@ -23,7 +26,19 @@ public class PorterDuffFunctionsTests Assert.Equal(expected, actual); } - public static TheoryData MultiplyFunctionData = new TheoryData + [Theory] + [MemberData(nameof(NormalBlendFunctionData))] + public void NormalBlendFunction256(TestVector4 back, TestVector4 source, float amount, TestVector4 expected) + { + Vector256 back256 = Vector256.Create(back.X, back.Y, back.Z, back.W, back.X, back.Y, back.Z, back.W); + Vector256 source256 = Vector256.Create(source.X, source.Y, source.Z, source.W, source.X, source.Y, source.Z, source.W); + + Vector256 expected256 = Vector256.Create(expected.X, expected.Y, expected.Z, expected.W, expected.X, expected.Y, expected.Z, expected.W); + Vector256 actual = PorterDuffFunctions.NormalSrcOver(back256, source256, Vector256.Create(amount)); + Assert.Equal(expected256, actual, FloatComparer); + } + + public static TheoryData MultiplyFunctionData { get; } = new() { { new TestVector4(1, 1, 1, 1), new TestVector4(1, 1, 1, 1), 1, new TestVector4(1, 1, 1, 1) }, { new TestVector4(1, 1, 1, 1), new TestVector4(0, 0, 0, .8f), .5f, new TestVector4(0.6f, 0.6f, 0.6f, 1) }, @@ -38,22 +53,46 @@ public class PorterDuffFunctionsTests VectorAssert.Equal(expected, actual, 5); } - public static TheoryData AddFunctionData = new TheoryData + [Theory] + [MemberData(nameof(MultiplyFunctionData))] + public void MultiplyFunction256(TestVector4 back, TestVector4 source, float amount, TestVector4 expected) + { + Vector256 back256 = Vector256.Create(back.X, back.Y, back.Z, back.W, back.X, back.Y, back.Z, back.W); + Vector256 source256 = Vector256.Create(source.X, source.Y, source.Z, source.W, source.X, source.Y, source.Z, source.W); + + Vector256 expected256 = Vector256.Create(expected.X, expected.Y, expected.Z, expected.W, expected.X, expected.Y, expected.Z, expected.W); + Vector256 actual = PorterDuffFunctions.MultiplySrcOver(back256, source256, Vector256.Create(amount)); + Assert.Equal(expected256, actual, FloatComparer); + } + + public static TheoryData AddFunctionData { get; } = new() { { new TestVector4(1, 1, 1, 1), new TestVector4(1, 1, 1, 1), 1, new TestVector4(1, 1, 1, 1) }, - { new TestVector4(1, 1, 1, 1), new TestVector4(0, 0, 0, .8f), .5f, new TestVector4(.6f, .6f, .6f, 1f) }, - { new TestVector4(0.2f, 0.2f, 0.2f, 0.3f), new TestVector4(0.3f, 0.3f, 0.3f, 0.2f), .5f, new TestVector4(.2075676f, .2075676f, .2075676f, .37f) } + { new TestVector4(1, 1, 1, 1), new TestVector4(0, 0, 0, .8f), .5f, new TestVector4(1, 1, 1, 1) }, + { new TestVector4(0.2f, 0.2f, 0.2f, 0.3f), new TestVector4(0.3f, 0.3f, 0.3f, 0.2f), .5f, new TestVector4(0.24324325f, 0.24324325f, 0.24324325f, .37f) } }; [Theory] [MemberData(nameof(AddFunctionData))] public void AddFunction(TestVector4 back, TestVector4 source, float amount, TestVector4 expected) { - Vector4 actual = PorterDuffFunctions.MultiplySrcOver((Vector4)back, source, amount); + Vector4 actual = PorterDuffFunctions.AddSrcOver((Vector4)back, source, amount); VectorAssert.Equal(expected, actual, 5); } - public static TheoryData SubtractFunctionData = new TheoryData + [Theory] + [MemberData(nameof(AddFunctionData))] + public void AddFunction256(TestVector4 back, TestVector4 source, float amount, TestVector4 expected) + { + Vector256 back256 = Vector256.Create(back.X, back.Y, back.Z, back.W, back.X, back.Y, back.Z, back.W); + Vector256 source256 = Vector256.Create(source.X, source.Y, source.Z, source.W, source.X, source.Y, source.Z, source.W); + + Vector256 expected256 = Vector256.Create(expected.X, expected.Y, expected.Z, expected.W, expected.X, expected.Y, expected.Z, expected.W); + Vector256 actual = PorterDuffFunctions.AddSrcOver(back256, source256, Vector256.Create(amount)); + Assert.Equal(expected256, actual, FloatComparer); + } + + public static TheoryData SubtractFunctionData { get; } = new() { { new TestVector4(1, 1, 1, 1), new TestVector4(1, 1, 1, 1), 1, new TestVector4(0, 0, 0, 1) }, { new TestVector4(1, 1, 1, 1), new TestVector4(0, 0, 0, .8f), .5f, new TestVector4(1, 1, 1, 1f) }, @@ -68,7 +107,19 @@ public class PorterDuffFunctionsTests VectorAssert.Equal(expected, actual, 5); } - public static TheoryData ScreenFunctionData = new TheoryData + [Theory] + [MemberData(nameof(SubtractFunctionData))] + public void SubtractFunction256(TestVector4 back, TestVector4 source, float amount, TestVector4 expected) + { + Vector256 back256 = Vector256.Create(back.X, back.Y, back.Z, back.W, back.X, back.Y, back.Z, back.W); + Vector256 source256 = Vector256.Create(source.X, source.Y, source.Z, source.W, source.X, source.Y, source.Z, source.W); + + Vector256 expected256 = Vector256.Create(expected.X, expected.Y, expected.Z, expected.W, expected.X, expected.Y, expected.Z, expected.W); + Vector256 actual = PorterDuffFunctions.SubtractSrcOver(back256, source256, Vector256.Create(amount)); + Assert.Equal(expected256, actual, FloatComparer); + } + + public static TheoryData ScreenFunctionData { get; } = new() { { new TestVector4(1, 1, 1, 1), new TestVector4(1, 1, 1, 1), 1, new TestVector4(1, 1, 1, 1) }, { new TestVector4(1, 1, 1, 1), new TestVector4(0, 0, 0, .8f), .5f, new TestVector4(1, 1, 1, 1f) }, @@ -83,7 +134,19 @@ public class PorterDuffFunctionsTests VectorAssert.Equal(expected, actual, 5); } - public static TheoryData DarkenFunctionData = new TheoryData + [Theory] + [MemberData(nameof(ScreenFunctionData))] + public void ScreenFunction256(TestVector4 back, TestVector4 source, float amount, TestVector4 expected) + { + Vector256 back256 = Vector256.Create(back.X, back.Y, back.Z, back.W, back.X, back.Y, back.Z, back.W); + Vector256 source256 = Vector256.Create(source.X, source.Y, source.Z, source.W, source.X, source.Y, source.Z, source.W); + + Vector256 expected256 = Vector256.Create(expected.X, expected.Y, expected.Z, expected.W, expected.X, expected.Y, expected.Z, expected.W); + Vector256 actual = PorterDuffFunctions.ScreenSrcOver(back256, source256, Vector256.Create(amount)); + Assert.Equal(expected256, actual, FloatComparer); + } + + public static TheoryData DarkenFunctionData { get; } = new() { { new TestVector4(1, 1, 1, 1), new TestVector4(1, 1, 1, 1), 1, new TestVector4(1, 1, 1, 1) }, { new TestVector4(1, 1, 1, 1), new TestVector4(0, 0, 0, .8f), .5f, new TestVector4(.6f, .6f, .6f, 1f) }, @@ -98,7 +161,19 @@ public class PorterDuffFunctionsTests VectorAssert.Equal(expected, actual, 5); } - public static TheoryData LightenFunctionData = new TheoryData + [Theory] + [MemberData(nameof(DarkenFunctionData))] + public void DarkenFunction256(TestVector4 back, TestVector4 source, float amount, TestVector4 expected) + { + Vector256 back256 = Vector256.Create(back.X, back.Y, back.Z, back.W, back.X, back.Y, back.Z, back.W); + Vector256 source256 = Vector256.Create(source.X, source.Y, source.Z, source.W, source.X, source.Y, source.Z, source.W); + + Vector256 expected256 = Vector256.Create(expected.X, expected.Y, expected.Z, expected.W, expected.X, expected.Y, expected.Z, expected.W); + Vector256 actual = PorterDuffFunctions.DarkenSrcOver(back256, source256, Vector256.Create(amount)); + Assert.Equal(expected256, actual, FloatComparer); + } + + public static TheoryData LightenFunctionData { get; } = new() { { new TestVector4(1, 1, 1, 1), new TestVector4(1, 1, 1, 1), 1, new TestVector4(1, 1, 1, 1) }, { new TestVector4(1, 1, 1, 1), new TestVector4(0, 0, 0, .8f), .5f, new TestVector4(1, 1, 1, 1f) }, @@ -113,7 +188,19 @@ public class PorterDuffFunctionsTests VectorAssert.Equal(expected, actual, 5); } - public static TheoryData OverlayFunctionData = new TheoryData + [Theory] + [MemberData(nameof(LightenFunctionData))] + public void LightenFunction256(TestVector4 back, TestVector4 source, float amount, TestVector4 expected) + { + Vector256 back256 = Vector256.Create(back.X, back.Y, back.Z, back.W, back.X, back.Y, back.Z, back.W); + Vector256 source256 = Vector256.Create(source.X, source.Y, source.Z, source.W, source.X, source.Y, source.Z, source.W); + + Vector256 expected256 = Vector256.Create(expected.X, expected.Y, expected.Z, expected.W, expected.X, expected.Y, expected.Z, expected.W); + Vector256 actual = PorterDuffFunctions.LightenSrcOver(back256, source256, Vector256.Create(amount)); + Assert.Equal(expected256, actual, FloatComparer); + } + + public static TheoryData OverlayFunctionData { get; } = new() { { new TestVector4(1, 1, 1, 1), new TestVector4(1, 1, 1, 1), 1, new TestVector4(1, 1, 1, 1) }, { new TestVector4(1, 1, 1, 1), new TestVector4(0, 0, 0, .8f), .5f, new TestVector4(1, 1, 1, 1f) }, @@ -128,7 +215,19 @@ public class PorterDuffFunctionsTests VectorAssert.Equal(expected, actual, 5); } - public static TheoryData HardLightFunctionData = new TheoryData + [Theory] + [MemberData(nameof(OverlayFunctionData))] + public void OverlayFunction256(TestVector4 back, TestVector4 source, float amount, TestVector4 expected) + { + Vector256 back256 = Vector256.Create(back.X, back.Y, back.Z, back.W, back.X, back.Y, back.Z, back.W); + Vector256 source256 = Vector256.Create(source.X, source.Y, source.Z, source.W, source.X, source.Y, source.Z, source.W); + + Vector256 expected256 = Vector256.Create(expected.X, expected.Y, expected.Z, expected.W, expected.X, expected.Y, expected.Z, expected.W); + Vector256 actual = PorterDuffFunctions.OverlaySrcOver(back256, source256, Vector256.Create(amount)); + Assert.Equal(expected256, actual, FloatComparer); + } + + public static TheoryData HardLightFunctionData { get; } = new() { { new TestVector4(1, 1, 1, 1), new TestVector4(1, 1, 1, 1), 1, new TestVector4(1, 1, 1, 1) }, { new TestVector4(1, 1, 1, 1), new TestVector4(0, 0, 0, .8f), .5f, new TestVector4(0.6f, 0.6f, 0.6f, 1f) }, @@ -142,4 +241,16 @@ public class PorterDuffFunctionsTests Vector4 actual = PorterDuffFunctions.HardLightSrcOver((Vector4)back, source, amount); VectorAssert.Equal(expected, actual, 5); } + + [Theory] + [MemberData(nameof(HardLightFunctionData))] + public void HardLightFunction256(TestVector4 back, TestVector4 source, float amount, TestVector4 expected) + { + Vector256 back256 = Vector256.Create(back.X, back.Y, back.Z, back.W, back.X, back.Y, back.Z, back.W); + Vector256 source256 = Vector256.Create(source.X, source.Y, source.Z, source.W, source.X, source.Y, source.Z, source.W); + + Vector256 expected256 = Vector256.Create(expected.X, expected.Y, expected.Z, expected.W, expected.X, expected.Y, expected.Z, expected.W); + Vector256 actual = PorterDuffFunctions.HardLightSrcOver(back256, source256, Vector256.Create(amount)); + Assert.Equal(expected256, actual, FloatComparer); + } } diff --git a/tests/ImageSharp.Tests/TestUtilities/ApproximateFloatComparer.cs b/tests/ImageSharp.Tests/TestUtilities/ApproximateFloatComparer.cs index 6d9652d898..e35f36feec 100644 --- a/tests/ImageSharp.Tests/TestUtilities/ApproximateFloatComparer.cs +++ b/tests/ImageSharp.Tests/TestUtilities/ApproximateFloatComparer.cs @@ -1,7 +1,9 @@ // Copyright (c) Six Labors. // Licensed under the Six Labors Split License. +using System.Diagnostics.CodeAnalysis; using System.Numerics; +using System.Runtime.Intrinsics; using SixLabors.ImageSharp.PixelFormats; namespace SixLabors.ImageSharp.Tests; @@ -14,7 +16,8 @@ internal readonly struct ApproximateFloatComparer : IEqualityComparer, IEqualityComparer, IEqualityComparer, - IEqualityComparer + IEqualityComparer, + IEqualityComparer> { private readonly float epsilon; @@ -72,4 +75,16 @@ internal readonly struct ApproximateFloatComparer : /// public int GetHashCode(ColorMatrix obj) => obj.GetHashCode(); + + public bool Equals(Vector256 x, Vector256 y) + => this.Equals(x.GetElement(0), y.GetElement(0)) + && this.Equals(x.GetElement(1), y.GetElement(1)) + && this.Equals(x.GetElement(2), y.GetElement(2)) + && this.Equals(x.GetElement(3), y.GetElement(3)) + && this.Equals(x.GetElement(4), y.GetElement(4)) + && this.Equals(x.GetElement(5), y.GetElement(5)) + && this.Equals(x.GetElement(6), y.GetElement(6)) + && this.Equals(x.GetElement(7), y.GetElement(7)); + + public int GetHashCode([DisallowNull] Vector256 obj) => obj.GetHashCode(); } diff --git a/tests/ImageSharp.Tests/TestUtilities/FeatureTesting/FeatureTestRunner.cs b/tests/ImageSharp.Tests/TestUtilities/FeatureTesting/FeatureTestRunner.cs index 1bb64d99d6..f68bfdbe6e 100644 --- a/tests/ImageSharp.Tests/TestUtilities/FeatureTesting/FeatureTestRunner.cs +++ b/tests/ImageSharp.Tests/TestUtilities/FeatureTesting/FeatureTestRunner.cs @@ -257,6 +257,52 @@ public static class FeatureTestRunner } } + /// + /// Runs the given test within an environment + /// where the given features. + /// + /// The test action to run. + /// The intrinsics features. + /// The value to pass as a parameter to the test action. + /// The second value to pass as a parameter to the test action. + public static void RunWithHwIntrinsicsFeature( + Action action, + HwIntrinsics intrinsics, + T arg1, + string arg2) + where T : IXunitSerializable + { + if (!RemoteExecutor.IsSupported) + { + return; + } + + foreach (KeyValuePair intrinsic in intrinsics.ToFeatureKeyValueCollection()) + { + ProcessStartInfo processStartInfo = new(); + if (intrinsic.Key != HwIntrinsics.AllowAll) + { + processStartInfo.Environment[$"COMPlus_{intrinsic.Value}"] = "0"; + + RemoteExecutor.Invoke( + action, + BasicSerializer.Serialize(arg1), + arg2, + new RemoteInvokeOptions + { + StartInfo = processStartInfo + }) + .Dispose(); + } + else + { + // Since we are running using the default architecture there is no + // point creating the overhead of running the action in a separate process. + action(BasicSerializer.Serialize(arg1), arg2); + } + } + } + /// /// Runs the given test within an environment /// where the given features.