diff --git a/src/ImageSharp/Advanced/ParallelExecutionSettings.cs b/src/ImageSharp/Advanced/ParallelExecutionSettings.cs
index fd9692f9ae..ad0318297a 100644
--- a/src/ImageSharp/Advanced/ParallelExecutionSettings.cs
+++ b/src/ImageSharp/Advanced/ParallelExecutionSettings.cs
@@ -18,7 +18,10 @@ public readonly struct ParallelExecutionSettings
///
/// Initializes a new instance of the struct.
///
- /// The value used for initializing when using TPL.
+ ///
+ /// The value used for initializing when using TPL.
+ /// Set to -1 to leave the degree of parallelism unbounded.
+ ///
/// The value for .
/// The .
public ParallelExecutionSettings(
@@ -44,7 +47,10 @@ public readonly struct ParallelExecutionSettings
///
/// Initializes a new instance of the struct.
///
- /// The value used for initializing when using TPL.
+ ///
+ /// The value used for initializing when using TPL.
+ /// Set to -1 to leave the degree of parallelism unbounded.
+ ///
/// The .
public ParallelExecutionSettings(int maxDegreeOfParallelism, MemoryAllocator memoryAllocator)
: this(maxDegreeOfParallelism, DefaultMinimumPixelsProcessedPerTask, memoryAllocator)
@@ -58,6 +64,7 @@ public readonly struct ParallelExecutionSettings
///
/// Gets the value used for initializing when using TPL.
+ /// A value of -1 leaves the degree of parallelism unbounded.
///
public int MaxDegreeOfParallelism { get; }
diff --git a/src/ImageSharp/Advanced/ParallelRowIterator.cs b/src/ImageSharp/Advanced/ParallelRowIterator.cs
index d170631a29..98c2656d11 100644
--- a/src/ImageSharp/Advanced/ParallelRowIterator.cs
+++ b/src/ImageSharp/Advanced/ParallelRowIterator.cs
@@ -44,14 +44,14 @@ public static partial class ParallelRowIterator
where T : struct, IRowOperation
{
ValidateRectangle(rectangle);
+ ValidateSettings(parallelSettings);
int top = rectangle.Top;
int bottom = rectangle.Bottom;
int width = rectangle.Width;
int height = rectangle.Height;
- int maxSteps = DivideCeil(width * (long)height, parallelSettings.MinimumPixelsProcessedPerTask);
- int numOfSteps = Math.Min(parallelSettings.MaxDegreeOfParallelism, maxSteps);
+ int numOfSteps = GetNumberOfSteps(width, height, parallelSettings);
// Avoid TPL overhead in this trivial case:
if (numOfSteps == 1)
@@ -65,7 +65,7 @@ public static partial class ParallelRowIterator
}
int verticalStep = DivideCeil(rectangle.Height, numOfSteps);
- ParallelOptions parallelOptions = new() { MaxDegreeOfParallelism = numOfSteps };
+ ParallelOptions parallelOptions = CreateParallelOptions(parallelSettings, numOfSteps);
RowOperationWrapper wrappingOperation = new(top, bottom, verticalStep, in operation);
_ = Parallel.For(
@@ -109,14 +109,14 @@ public static partial class ParallelRowIterator
where TBuffer : unmanaged
{
ValidateRectangle(rectangle);
+ ValidateSettings(parallelSettings);
int top = rectangle.Top;
int bottom = rectangle.Bottom;
int width = rectangle.Width;
int height = rectangle.Height;
- int maxSteps = DivideCeil(width * (long)height, parallelSettings.MinimumPixelsProcessedPerTask);
- int numOfSteps = Math.Min(parallelSettings.MaxDegreeOfParallelism, maxSteps);
+ int numOfSteps = GetNumberOfSteps(width, height, parallelSettings);
MemoryAllocator allocator = parallelSettings.MemoryAllocator;
int bufferLength = Unsafe.AsRef(in operation).GetRequiredBufferLength(rectangle);
@@ -135,7 +135,7 @@ public static partial class ParallelRowIterator
}
int verticalStep = DivideCeil(height, numOfSteps);
- ParallelOptions parallelOptions = new() { MaxDegreeOfParallelism = numOfSteps };
+ ParallelOptions parallelOptions = CreateParallelOptions(parallelSettings, numOfSteps);
RowOperationWrapper wrappingOperation = new(top, bottom, verticalStep, bufferLength, allocator, in operation);
_ = Parallel.For(
@@ -174,14 +174,14 @@ public static partial class ParallelRowIterator
where T : struct, IRowIntervalOperation
{
ValidateRectangle(rectangle);
+ ValidateSettings(parallelSettings);
int top = rectangle.Top;
int bottom = rectangle.Bottom;
int width = rectangle.Width;
int height = rectangle.Height;
- int maxSteps = DivideCeil(width * (long)height, parallelSettings.MinimumPixelsProcessedPerTask);
- int numOfSteps = Math.Min(parallelSettings.MaxDegreeOfParallelism, maxSteps);
+ int numOfSteps = GetNumberOfSteps(width, height, parallelSettings);
// Avoid TPL overhead in this trivial case:
if (numOfSteps == 1)
@@ -192,7 +192,7 @@ public static partial class ParallelRowIterator
}
int verticalStep = DivideCeil(rectangle.Height, numOfSteps);
- ParallelOptions parallelOptions = new() { MaxDegreeOfParallelism = numOfSteps };
+ ParallelOptions parallelOptions = CreateParallelOptions(parallelSettings, numOfSteps);
RowIntervalOperationWrapper wrappingOperation = new(top, bottom, verticalStep, in operation);
_ = Parallel.For(
@@ -236,14 +236,14 @@ public static partial class ParallelRowIterator
where TBuffer : unmanaged
{
ValidateRectangle(rectangle);
+ ValidateSettings(parallelSettings);
int top = rectangle.Top;
int bottom = rectangle.Bottom;
int width = rectangle.Width;
int height = rectangle.Height;
- int maxSteps = DivideCeil(width * (long)height, parallelSettings.MinimumPixelsProcessedPerTask);
- int numOfSteps = Math.Min(parallelSettings.MaxDegreeOfParallelism, maxSteps);
+ int numOfSteps = GetNumberOfSteps(width, height, parallelSettings);
MemoryAllocator allocator = parallelSettings.MemoryAllocator;
int bufferLength = Unsafe.AsRef(in operation).GetRequiredBufferLength(rectangle);
@@ -259,7 +259,7 @@ public static partial class ParallelRowIterator
}
int verticalStep = DivideCeil(height, numOfSteps);
- ParallelOptions parallelOptions = new() { MaxDegreeOfParallelism = numOfSteps };
+ ParallelOptions parallelOptions = CreateParallelOptions(parallelSettings, numOfSteps);
RowIntervalOperationWrapper wrappingOperation = new(top, bottom, verticalStep, bufferLength, allocator, in operation);
_ = Parallel.For(
@@ -272,6 +272,37 @@ public static partial class ParallelRowIterator
[MethodImpl(InliningOptions.ShortMethod)]
private static int DivideCeil(long dividend, int divisor) => (int)Math.Min(1 + ((dividend - 1) / divisor), int.MaxValue);
+ ///
+ /// Creates the for the current iteration.
+ ///
+ /// The execution settings.
+ /// The number of row partitions to execute.
+ /// The instance.
+ [MethodImpl(InliningOptions.ShortMethod)]
+ private static ParallelOptions CreateParallelOptions(in ParallelExecutionSettings parallelSettings, int numOfSteps)
+ => new() { MaxDegreeOfParallelism = parallelSettings.MaxDegreeOfParallelism == -1 ? -1 : numOfSteps };
+
+ ///
+ /// Calculates the number of row partitions to execute for the given region.
+ ///
+ /// The width of the region.
+ /// The height of the region.
+ /// The execution settings.
+ /// The number of row partitions to execute.
+ [MethodImpl(InliningOptions.ShortMethod)]
+ private static int GetNumberOfSteps(int width, int height, in ParallelExecutionSettings parallelSettings)
+ {
+ int maxSteps = DivideCeil(width * (long)height, parallelSettings.MinimumPixelsProcessedPerTask);
+
+ if (parallelSettings.MaxDegreeOfParallelism == -1)
+ {
+ // Row batching cannot produce more useful partitions than the number of rows available.
+ return Math.Min(height, maxSteps);
+ }
+
+ return Math.Min(parallelSettings.MaxDegreeOfParallelism, maxSteps);
+ }
+
private static void ValidateRectangle(Rectangle rectangle)
{
Guard.MustBeGreaterThan(
@@ -284,4 +315,35 @@ public static partial class ParallelRowIterator
0,
$"{nameof(rectangle)}.{nameof(rectangle.Height)}");
}
+
+ ///
+ /// Validates the supplied .
+ ///
+ /// The execution settings.
+ ///
+ /// Thrown when or
+ /// is invalid.
+ ///
+ ///
+ /// Thrown when is null.
+ /// This also guards the public default value, which bypasses constructor validation.
+ ///
+ private static void ValidateSettings(in ParallelExecutionSettings parallelSettings)
+ {
+ // ParallelExecutionSettings is a public struct, so callers can pass default and bypass constructor validation.
+ if (parallelSettings.MaxDegreeOfParallelism is 0 or < -1)
+ {
+ throw new ArgumentOutOfRangeException(
+ $"{nameof(parallelSettings)}.{nameof(ParallelExecutionSettings.MaxDegreeOfParallelism)}");
+ }
+
+ Guard.MustBeGreaterThan(
+ parallelSettings.MinimumPixelsProcessedPerTask,
+ 0,
+ $"{nameof(parallelSettings)}.{nameof(ParallelExecutionSettings.MinimumPixelsProcessedPerTask)}");
+
+ Guard.NotNull(
+ parallelSettings.MemoryAllocator,
+ $"{nameof(parallelSettings)}.{nameof(ParallelExecutionSettings.MemoryAllocator)}");
+ }
}
diff --git a/src/ImageSharp/Common/Helpers/Numerics.cs b/src/ImageSharp/Common/Helpers/Numerics.cs
index efe68977bb..04ed48e210 100644
--- a/src/ImageSharp/Common/Helpers/Numerics.cs
+++ b/src/ImageSharp/Common/Helpers/Numerics.cs
@@ -643,6 +643,20 @@ internal static class Numerics
return Avx.Blend(result, alpha, BlendAlphaControl);
}
+ [MethodImpl(MethodImplOptions.AggressiveInlining)]
+ public static Vector512 UnPremultiply(Vector512 source, Vector512 alpha)
+ {
+ // Check if alpha is zero to avoid division by zero
+ Vector512 zeroMask = Vector512.Equals(alpha, Vector512.Zero);
+
+ // Divide source by alpha if alpha is nonzero, otherwise set all components to match the source value
+ Vector512 result = Vector512.ConditionalSelect(zeroMask, source, source / alpha);
+
+ // Blend the result with the alpha vector to ensure that the alpha component is unchanged
+ Vector512 alphaMask = Vector512.Create(0, 0, 0, -1, 0, 0, 0, -1, 0, 0, 0, -1, 0, 0, 0, -1).AsSingle();
+ return Vector512.ConditionalSelect(alphaMask, alpha, result);
+ }
+
///
/// Permutes the given vector return a new instance with all the values set to .
///
@@ -690,7 +704,7 @@ internal static class Numerics
///
/// The span of vectors
[MethodImpl(MethodImplOptions.AggressiveInlining)]
- public static unsafe void CubePowOnXYZ(Span vectors)
+ public static void CubePowOnXYZ(Span vectors)
{
ref Vector4 baseRef = ref MemoryMarshal.GetReference(vectors);
ref Vector4 endRef = ref Unsafe.Add(ref baseRef, (uint)vectors.Length);
diff --git a/src/ImageSharp/Common/Helpers/SimdUtils.HwIntrinsics.cs b/src/ImageSharp/Common/Helpers/SimdUtils.HwIntrinsics.cs
index 076590605d..022056deb0 100644
--- a/src/ImageSharp/Common/Helpers/SimdUtils.HwIntrinsics.cs
+++ b/src/ImageSharp/Common/Helpers/SimdUtils.HwIntrinsics.cs
@@ -601,51 +601,6 @@ internal static partial class SimdUtils
}
}
- ///
- /// Performs a multiplication and an addition of the .
- /// TODO: Fix. The arguments are in a different order to the FMA intrinsic.
- ///
- /// ret = (vm0 * vm1) + va
- /// The vector to add to the intermediate result.
- /// The first vector to multiply.
- /// The second vector to multiply.
- /// The .
- [MethodImpl(InliningOptions.AlwaysInline)]
- public static Vector256 MultiplyAdd(
- Vector256 va,
- Vector256 vm0,
- Vector256 vm1)
- {
- if (Fma.IsSupported)
- {
- return Fma.MultiplyAdd(vm1, vm0, va);
- }
-
- return va + (vm0 * vm1);
- }
-
- ///
- /// Performs a multiplication and a negated addition of the .
- ///
- /// ret = c - (a * b)
- /// The first vector to multiply.
- /// The second vector to multiply.
- /// The vector to add negated to the intermediate result.
- /// The .
- [MethodImpl(InliningOptions.ShortMethod)]
- public static Vector256 MultiplyAddNegated(
- Vector256 a,
- Vector256 b,
- Vector256 c)
- {
- if (Fma.IsSupported)
- {
- return Fma.MultiplyAddNegated(a, b, c);
- }
-
- return Avx.Subtract(c, Avx.Multiply(a, b));
- }
-
///
/// Blend packed 8-bit integers from and using .
/// The high bit of each corresponding byte determines the selection.
diff --git a/src/ImageSharp/Common/Helpers/Vector256Utilities.cs b/src/ImageSharp/Common/Helpers/Vector256Utilities.cs
index 14ac13dd8d..90e3169b37 100644
--- a/src/ImageSharp/Common/Helpers/Vector256Utilities.cs
+++ b/src/ImageSharp/Common/Helpers/Vector256Utilities.cs
@@ -115,6 +115,28 @@ internal static class Vector256_
return va + (vm0 * vm1);
}
+ ///
+ /// Performs a multiplication and a negated addition of the .
+ ///
+ /// ret = va - (vm0 * vm1)
+ /// The vector to add to the negated intermediate result.
+ /// The first vector to multiply.
+ /// The second vector to multiply.
+ /// The .
+ [MethodImpl(InliningOptions.ShortMethod)]
+ public static Vector256 MultiplyAddNegated(
+ Vector256 va,
+ Vector256 vm0,
+ Vector256 vm1)
+ {
+ if (Fma.IsSupported)
+ {
+ return Fma.MultiplyAddNegated(vm0, vm1, va);
+ }
+
+ return va - (vm0 * vm1);
+ }
+
///
/// Performs a multiplication and a subtraction of the .
///
diff --git a/src/ImageSharp/Common/Helpers/Vector512Utilities.cs b/src/ImageSharp/Common/Helpers/Vector512Utilities.cs
index 03ee4626cd..82a20158ae 100644
--- a/src/ImageSharp/Common/Helpers/Vector512Utilities.cs
+++ b/src/ImageSharp/Common/Helpers/Vector512Utilities.cs
@@ -87,6 +87,21 @@ internal static class Vector512_
Vector512 vm1)
=> Avx512F.FusedMultiplyAdd(vm0, vm1, va);
+ ///
+ /// Performs a multiplication and a negated addition of the .
+ ///
+ /// ret = va - (vm0 * vm1)
+ /// The vector to add to the negated intermediate result.
+ /// The first vector to multiply.
+ /// The second vector to multiply.
+ /// The .
+ [MethodImpl(MethodImplOptions.AggressiveInlining)]
+ public static Vector512 MultiplyAddNegated(
+ Vector512 va,
+ Vector512 vm0,
+ Vector512 vm1)
+ => Avx512F.FusedMultiplyAddNegated(vm0, vm1, va);
+
///
/// Restricts a vector between a minimum and a maximum value.
///
diff --git a/src/ImageSharp/Configuration.cs b/src/ImageSharp/Configuration.cs
index c2b02dedd9..2673927231 100644
--- a/src/ImageSharp/Configuration.cs
+++ b/src/ImageSharp/Configuration.cs
@@ -64,6 +64,7 @@ public sealed class Configuration
///
/// Gets or sets the maximum number of concurrent tasks enabled in ImageSharp algorithms
/// configured with this instance.
+ /// Set to -1 to leave the degree of parallelism unbounded.
/// Initialized with by default.
///
public int MaxDegreeOfParallelism
diff --git a/src/ImageSharp/Formats/Gif/GifDecoderCore.cs b/src/ImageSharp/Formats/Gif/GifDecoderCore.cs
index 78ceb0b233..3d32c7cdac 100644
--- a/src/ImageSharp/Formats/Gif/GifDecoderCore.cs
+++ b/src/ImageSharp/Formats/Gif/GifDecoderCore.cs
@@ -468,7 +468,7 @@ internal sealed class GifDecoderCore : ImageDecoderCore
int length = this.currentLocalColorTableSize = this.imageDescriptor.LocalColorTableSize * 3;
this.currentLocalColorTable ??= this.configuration.MemoryAllocator.Allocate(768, AllocationOptions.Clean);
stream.Read(this.currentLocalColorTable.GetSpan()[..length]);
- rawColorTable = this.currentLocalColorTable!.GetSpan()[..length];
+ rawColorTable = this.currentLocalColorTable.GetSpan()[..length];
}
else if (this.globalColorTable != null)
{
diff --git a/src/ImageSharp/ImageInfo.cs b/src/ImageSharp/ImageInfo.cs
index 0bbd73b63a..d27c4b9330 100644
--- a/src/ImageSharp/ImageInfo.cs
+++ b/src/ImageSharp/ImageInfo.cs
@@ -63,8 +63,12 @@ public class ImageInfo
public int Height => this.Size.Height;
///
- /// Gets the number of frames in the image.
+ /// Gets the number of frame metadata entries available for the image.
///
+ ///
+ /// This value is the same as count and may be 0 when frame
+ /// metadata was not populated by the decoder.
+ ///
public int FrameCount => this.FrameMetadataCollection.Count;
///
@@ -73,8 +77,12 @@ public class ImageInfo
public ImageMetadata Metadata { get; }
///
- /// Gets the collection of metadata associated with individual image frames.
+ /// Gets the metadata associated with the decoded image frames, if available.
///
+ ///
+ /// For multi-frame formats, decoders populate one entry per decoded frame. For single-frame formats, this
+ /// collection is typically empty.
+ ///
public IReadOnlyList FrameMetadataCollection { get; }
///
@@ -86,4 +94,24 @@ public class ImageInfo
/// Gets the bounds of the image.
///
public Rectangle Bounds => new(Point.Empty, this.Size);
+
+ ///
+ /// Gets the total number of bytes required to store the image pixels in memory.
+ ///
+ ///
+ /// This reports the in-memory size of the pixel data represented by this , not the
+ /// encoded size of the image file. The value is computed from the image dimensions and
+ /// . When contains decoded frame metadata, the
+ /// per-frame size is multiplied by that count. Otherwise, the value is the in-memory size of the single
+ /// image frame represented by this .
+ ///
+ /// The total number of bytes required to store the image pixels in memory.
+ public long GetPixelMemorySize()
+ {
+ int count = this.FrameMetadataCollection.Count > 0
+ ? this.FrameMetadataCollection.Count
+ : 1;
+
+ return (long)this.Size.Width * this.Size.Height * (this.PixelType.BitsPerPixel / 8) * count;
+ }
}
diff --git a/src/ImageSharp/PixelFormats/PixelBlenders/DefaultPixelBlenders.Generated.cs b/src/ImageSharp/PixelFormats/PixelBlenders/DefaultPixelBlenders.Generated.cs
index 7cd9cc57ad..883693031e 100644
--- a/src/ImageSharp/PixelFormats/PixelBlenders/DefaultPixelBlenders.Generated.cs
+++ b/src/ImageSharp/PixelFormats/PixelBlenders/DefaultPixelBlenders.Generated.cs
@@ -46,7 +46,34 @@ internal static class DefaultPixelBlenders
{
amount = Numerics.Clamp(amount, 0, 1);
- if (Avx2.IsSupported && destination.Length >= 2)
+ if (Avx512F.IsSupported && destination.Length >= 4)
+ {
+ // Divide by 4 as 4 elements per Vector4 and 16 per Vector512
+ ref Vector512 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination));
+ ref Vector512 destinationLast = ref Unsafe.Add(ref destinationBase, (uint)destination.Length / 4u);
+
+ ref Vector512 backgroundBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(background));
+ ref Vector512 sourceBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(source));
+ Vector512 opacity = Vector512.Create(amount);
+
+ while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast))
+ {
+ destinationBase = PorterDuffFunctions.NormalSrc(backgroundBase, sourceBase, opacity);
+ destinationBase = ref Unsafe.Add(ref destinationBase, 1);
+ backgroundBase = ref Unsafe.Add(ref backgroundBase, 1);
+ sourceBase = ref Unsafe.Add(ref sourceBase, 1);
+ }
+
+ int remainder = Numerics.Modulo4(destination.Length);
+ if (remainder != 0)
+ {
+ for (int i = destination.Length - remainder; i < destination.Length; i++)
+ {
+ destination[i] = PorterDuffFunctions.NormalSrc(background[i], source[i], amount);
+ }
+ }
+ }
+ else if (Avx2.IsSupported && destination.Length >= 2)
{
// Divide by 2 as 4 elements per Vector4 and 8 per Vector256
ref Vector256 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination));
@@ -85,7 +112,37 @@ internal static class DefaultPixelBlenders
{
amount = Numerics.Clamp(amount, 0, 1);
- if (Avx2.IsSupported && destination.Length >= 2)
+ if (Avx512F.IsSupported && destination.Length >= 4)
+ {
+ // Divide by 4 as 4 elements per Vector4 and 16 per Vector512
+ ref Vector512 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination));
+ ref Vector512 destinationLast = ref Unsafe.Add(ref destinationBase, (uint)destination.Length / 4u);
+
+ ref Vector512 backgroundBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(background));
+ Vector512 sourceBase = Vector512.Create(
+ source.X, source.Y, source.Z, source.W,
+ source.X, source.Y, source.Z, source.W,
+ source.X, source.Y, source.Z, source.W,
+ source.X, source.Y, source.Z, source.W);
+ Vector512 opacity = Vector512.Create(amount);
+
+ while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast))
+ {
+ destinationBase = PorterDuffFunctions.NormalSrc(backgroundBase, sourceBase, opacity);
+ destinationBase = ref Unsafe.Add(ref destinationBase, 1);
+ backgroundBase = ref Unsafe.Add(ref backgroundBase, 1);
+ }
+
+ int remainder = Numerics.Modulo4(destination.Length);
+ if (remainder != 0)
+ {
+ for (int i = destination.Length - remainder; i < destination.Length; i++)
+ {
+ destination[i] = PorterDuffFunctions.NormalSrc(background[i], source, amount);
+ }
+ }
+ }
+ else if (Avx2.IsSupported && destination.Length >= 2)
{
// Divide by 2 as 4 elements per Vector4 and 8 per Vector256
ref Vector256 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination));
@@ -121,7 +178,51 @@ internal static class DefaultPixelBlenders
///
protected override void BlendFunction(Span destination, ReadOnlySpan background, ReadOnlySpan source, ReadOnlySpan amount)
{
- if (Avx2.IsSupported && destination.Length >= 2)
+ if (Avx512F.IsSupported && destination.Length >= 4)
+ {
+ // Divide by 4 as 4 elements per Vector4 and 16 per Vector512
+ ref Vector512 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination));
+ ref Vector512 destinationLast = ref Unsafe.Add(ref destinationBase, (uint)destination.Length / 4u);
+
+ ref Vector512 backgroundBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(background));
+ ref Vector512 sourceBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(source));
+ ref float amountBase = ref MemoryMarshal.GetReference(amount);
+
+ Vector512 vOne = Vector512.Create(1F);
+
+ while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast))
+ {
+ float amount0 = amountBase;
+ float amount1 = Unsafe.Add(ref amountBase, 1);
+ float amount2 = Unsafe.Add(ref amountBase, 2);
+ float amount3 = Unsafe.Add(ref amountBase, 3);
+
+ // We need to create a Vector512 containing the current four amount values
+ // taking up each quarter of the Vector512 and then clamp them.
+ Vector512 opacity = Vector512.Create(
+ amount0, amount0, amount0, amount0,
+ amount1, amount1, amount1, amount1,
+ amount2, amount2, amount2, amount2,
+ amount3, amount3, amount3, amount3);
+ opacity = Vector512.Min(Vector512.Max(Vector512.Zero, opacity), vOne);
+
+ destinationBase = PorterDuffFunctions.NormalSrc(backgroundBase, sourceBase, opacity);
+ destinationBase = ref Unsafe.Add(ref destinationBase, 1);
+ backgroundBase = ref Unsafe.Add(ref backgroundBase, 1);
+ sourceBase = ref Unsafe.Add(ref sourceBase, 1);
+ amountBase = ref Unsafe.Add(ref amountBase, 4);
+ }
+
+ int remainder = Numerics.Modulo4(destination.Length);
+ if (remainder != 0)
+ {
+ for (int i = destination.Length - remainder; i < destination.Length; i++)
+ {
+ destination[i] = PorterDuffFunctions.NormalSrc(background[i], source[i], Numerics.Clamp(amount[i], 0, 1F));
+ }
+ }
+ }
+ else if (Avx2.IsSupported && destination.Length >= 2)
{
// Divide by 2 as 4 elements per Vector4 and 8 per Vector256
ref Vector256 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination));
@@ -168,7 +269,54 @@ internal static class DefaultPixelBlenders
///
protected override void BlendFunction(Span destination, ReadOnlySpan background, Vector4 source, ReadOnlySpan amount)
{
- if (Avx2.IsSupported && destination.Length >= 2)
+ if (Avx512F.IsSupported && destination.Length >= 4)
+ {
+ // Divide by 4 as 4 elements per Vector4 and 16 per Vector512
+ ref Vector512 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination));
+ ref Vector512 destinationLast = ref Unsafe.Add(ref destinationBase, (uint)destination.Length / 4u);
+
+ ref Vector512 backgroundBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(background));
+ ref float amountBase = ref MemoryMarshal.GetReference(amount);
+
+ Vector512 sourceBase = Vector512.Create(
+ source.X, source.Y, source.Z, source.W,
+ source.X, source.Y, source.Z, source.W,
+ source.X, source.Y, source.Z, source.W,
+ source.X, source.Y, source.Z, source.W);
+ Vector512 vOne = Vector512.Create(1F);
+
+ while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast))
+ {
+ float amount0 = amountBase;
+ float amount1 = Unsafe.Add(ref amountBase, 1);
+ float amount2 = Unsafe.Add(ref amountBase, 2);
+ float amount3 = Unsafe.Add(ref amountBase, 3);
+
+ // We need to create a Vector512 containing the current four amount values
+ // taking up each quarter of the Vector512 and then clamp them.
+ Vector512 opacity = Vector512.Create(
+ amount0, amount0, amount0, amount0,
+ amount1, amount1, amount1, amount1,
+ amount2, amount2, amount2, amount2,
+ amount3, amount3, amount3, amount3);
+ opacity = Vector512.Min(Vector512.Max(Vector512.Zero, opacity), vOne);
+
+ destinationBase = PorterDuffFunctions.NormalSrc(backgroundBase, sourceBase, opacity);
+ destinationBase = ref Unsafe.Add(ref destinationBase, 1);
+ backgroundBase = ref Unsafe.Add(ref backgroundBase, 1);
+ amountBase = ref Unsafe.Add(ref amountBase, 4);
+ }
+
+ int remainder = Numerics.Modulo4(destination.Length);
+ if (remainder != 0)
+ {
+ for (int i = destination.Length - remainder; i < destination.Length; i++)
+ {
+ destination[i] = PorterDuffFunctions.NormalSrc(background[i], source, Numerics.Clamp(amount[i], 0, 1F));
+ }
+ }
+ }
+ else if (Avx2.IsSupported && destination.Length >= 2)
{
// Divide by 2 as 4 elements per Vector4 and 8 per Vector256
ref Vector256 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination));
@@ -233,7 +381,34 @@ internal static class DefaultPixelBlenders
{
amount = Numerics.Clamp(amount, 0, 1);
- if (Avx2.IsSupported && destination.Length >= 2)
+ if (Avx512F.IsSupported && destination.Length >= 4)
+ {
+ // Divide by 4 as 4 elements per Vector4 and 16 per Vector512
+ ref Vector512 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination));
+ ref Vector512 destinationLast = ref Unsafe.Add(ref destinationBase, (uint)destination.Length / 4u);
+
+ ref Vector512 backgroundBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(background));
+ ref Vector512 sourceBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(source));
+ Vector512 opacity = Vector512.Create(amount);
+
+ while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast))
+ {
+ destinationBase = PorterDuffFunctions.MultiplySrc(backgroundBase, sourceBase, opacity);
+ destinationBase = ref Unsafe.Add(ref destinationBase, 1);
+ backgroundBase = ref Unsafe.Add(ref backgroundBase, 1);
+ sourceBase = ref Unsafe.Add(ref sourceBase, 1);
+ }
+
+ int remainder = Numerics.Modulo4(destination.Length);
+ if (remainder != 0)
+ {
+ for (int i = destination.Length - remainder; i < destination.Length; i++)
+ {
+ destination[i] = PorterDuffFunctions.MultiplySrc(background[i], source[i], amount);
+ }
+ }
+ }
+ else if (Avx2.IsSupported && destination.Length >= 2)
{
// Divide by 2 as 4 elements per Vector4 and 8 per Vector256
ref Vector256 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination));
@@ -272,7 +447,37 @@ internal static class DefaultPixelBlenders
{
amount = Numerics.Clamp(amount, 0, 1);
- if (Avx2.IsSupported && destination.Length >= 2)
+ if (Avx512F.IsSupported && destination.Length >= 4)
+ {
+ // Divide by 4 as 4 elements per Vector4 and 16 per Vector512
+ ref Vector512 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination));
+ ref Vector512 destinationLast = ref Unsafe.Add(ref destinationBase, (uint)destination.Length / 4u);
+
+ ref Vector512 backgroundBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(background));
+ Vector512 sourceBase = Vector512.Create(
+ source.X, source.Y, source.Z, source.W,
+ source.X, source.Y, source.Z, source.W,
+ source.X, source.Y, source.Z, source.W,
+ source.X, source.Y, source.Z, source.W);
+ Vector512 opacity = Vector512.Create(amount);
+
+ while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast))
+ {
+ destinationBase = PorterDuffFunctions.MultiplySrc(backgroundBase, sourceBase, opacity);
+ destinationBase = ref Unsafe.Add(ref destinationBase, 1);
+ backgroundBase = ref Unsafe.Add(ref backgroundBase, 1);
+ }
+
+ int remainder = Numerics.Modulo4(destination.Length);
+ if (remainder != 0)
+ {
+ for (int i = destination.Length - remainder; i < destination.Length; i++)
+ {
+ destination[i] = PorterDuffFunctions.MultiplySrc(background[i], source, amount);
+ }
+ }
+ }
+ else if (Avx2.IsSupported && destination.Length >= 2)
{
// Divide by 2 as 4 elements per Vector4 and 8 per Vector256
ref Vector256 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination));
@@ -308,7 +513,51 @@ internal static class DefaultPixelBlenders
///
protected override void BlendFunction(Span destination, ReadOnlySpan background, ReadOnlySpan source, ReadOnlySpan amount)
{
- if (Avx2.IsSupported && destination.Length >= 2)
+ if (Avx512F.IsSupported && destination.Length >= 4)
+ {
+ // Divide by 4 as 4 elements per Vector4 and 16 per Vector512
+ ref Vector512 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination));
+ ref Vector512 destinationLast = ref Unsafe.Add(ref destinationBase, (uint)destination.Length / 4u);
+
+ ref Vector512 backgroundBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(background));
+ ref Vector512 sourceBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(source));
+ ref float amountBase = ref MemoryMarshal.GetReference(amount);
+
+ Vector512 vOne = Vector512.Create(1F);
+
+ while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast))
+ {
+ float amount0 = amountBase;
+ float amount1 = Unsafe.Add(ref amountBase, 1);
+ float amount2 = Unsafe.Add(ref amountBase, 2);
+ float amount3 = Unsafe.Add(ref amountBase, 3);
+
+ // We need to create a Vector512 containing the current four amount values
+ // taking up each quarter of the Vector512 and then clamp them.
+ Vector512 opacity = Vector512.Create(
+ amount0, amount0, amount0, amount0,
+ amount1, amount1, amount1, amount1,
+ amount2, amount2, amount2, amount2,
+ amount3, amount3, amount3, amount3);
+ opacity = Vector512.Min(Vector512.Max(Vector512.Zero, opacity), vOne);
+
+ destinationBase = PorterDuffFunctions.MultiplySrc(backgroundBase, sourceBase, opacity);
+ destinationBase = ref Unsafe.Add(ref destinationBase, 1);
+ backgroundBase = ref Unsafe.Add(ref backgroundBase, 1);
+ sourceBase = ref Unsafe.Add(ref sourceBase, 1);
+ amountBase = ref Unsafe.Add(ref amountBase, 4);
+ }
+
+ int remainder = Numerics.Modulo4(destination.Length);
+ if (remainder != 0)
+ {
+ for (int i = destination.Length - remainder; i < destination.Length; i++)
+ {
+ destination[i] = PorterDuffFunctions.MultiplySrc(background[i], source[i], Numerics.Clamp(amount[i], 0, 1F));
+ }
+ }
+ }
+ else if (Avx2.IsSupported && destination.Length >= 2)
{
// Divide by 2 as 4 elements per Vector4 and 8 per Vector256
ref Vector256 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination));
@@ -355,7 +604,54 @@ internal static class DefaultPixelBlenders
///
protected override void BlendFunction(Span destination, ReadOnlySpan background, Vector4 source, ReadOnlySpan amount)
{
- if (Avx2.IsSupported && destination.Length >= 2)
+ if (Avx512F.IsSupported && destination.Length >= 4)
+ {
+ // Divide by 4 as 4 elements per Vector4 and 16 per Vector512
+ ref Vector512 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination));
+ ref Vector512 destinationLast = ref Unsafe.Add(ref destinationBase, (uint)destination.Length / 4u);
+
+ ref Vector512 backgroundBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(background));
+ ref float amountBase = ref MemoryMarshal.GetReference(amount);
+
+ Vector512 sourceBase = Vector512.Create(
+ source.X, source.Y, source.Z, source.W,
+ source.X, source.Y, source.Z, source.W,
+ source.X, source.Y, source.Z, source.W,
+ source.X, source.Y, source.Z, source.W);
+ Vector512 vOne = Vector512.Create(1F);
+
+ while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast))
+ {
+ float amount0 = amountBase;
+ float amount1 = Unsafe.Add(ref amountBase, 1);
+ float amount2 = Unsafe.Add(ref amountBase, 2);
+ float amount3 = Unsafe.Add(ref amountBase, 3);
+
+ // We need to create a Vector512 containing the current four amount values
+ // taking up each quarter of the Vector512 and then clamp them.
+ Vector512 opacity = Vector512.Create(
+ amount0, amount0, amount0, amount0,
+ amount1, amount1, amount1, amount1,
+ amount2, amount2, amount2, amount2,
+ amount3, amount3, amount3, amount3);
+ opacity = Vector512.Min(Vector512.Max(Vector512.Zero, opacity), vOne);
+
+ destinationBase = PorterDuffFunctions.MultiplySrc(backgroundBase, sourceBase, opacity);
+ destinationBase = ref Unsafe.Add(ref destinationBase, 1);
+ backgroundBase = ref Unsafe.Add(ref backgroundBase, 1);
+ amountBase = ref Unsafe.Add(ref amountBase, 4);
+ }
+
+ int remainder = Numerics.Modulo4(destination.Length);
+ if (remainder != 0)
+ {
+ for (int i = destination.Length - remainder; i < destination.Length; i++)
+ {
+ destination[i] = PorterDuffFunctions.MultiplySrc(background[i], source, Numerics.Clamp(amount[i], 0, 1F));
+ }
+ }
+ }
+ else if (Avx2.IsSupported && destination.Length >= 2)
{
// Divide by 2 as 4 elements per Vector4 and 8 per Vector256
ref Vector256 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination));
@@ -420,7 +716,34 @@ internal static class DefaultPixelBlenders
{
amount = Numerics.Clamp(amount, 0, 1);
- if (Avx2.IsSupported && destination.Length >= 2)
+ if (Avx512F.IsSupported && destination.Length >= 4)
+ {
+ // Divide by 4 as 4 elements per Vector4 and 16 per Vector512
+ ref Vector512 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination));
+ ref Vector512 destinationLast = ref Unsafe.Add(ref destinationBase, (uint)destination.Length / 4u);
+
+ ref Vector512 backgroundBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(background));
+ ref Vector512 sourceBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(source));
+ Vector512 opacity = Vector512.Create(amount);
+
+ while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast))
+ {
+ destinationBase = PorterDuffFunctions.AddSrc(backgroundBase, sourceBase, opacity);
+ destinationBase = ref Unsafe.Add(ref destinationBase, 1);
+ backgroundBase = ref Unsafe.Add(ref backgroundBase, 1);
+ sourceBase = ref Unsafe.Add(ref sourceBase, 1);
+ }
+
+ int remainder = Numerics.Modulo4(destination.Length);
+ if (remainder != 0)
+ {
+ for (int i = destination.Length - remainder; i < destination.Length; i++)
+ {
+ destination[i] = PorterDuffFunctions.AddSrc(background[i], source[i], amount);
+ }
+ }
+ }
+ else if (Avx2.IsSupported && destination.Length >= 2)
{
// Divide by 2 as 4 elements per Vector4 and 8 per Vector256
ref Vector256 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination));
@@ -459,7 +782,37 @@ internal static class DefaultPixelBlenders
{
amount = Numerics.Clamp(amount, 0, 1);
- if (Avx2.IsSupported && destination.Length >= 2)
+ if (Avx512F.IsSupported && destination.Length >= 4)
+ {
+ // Divide by 4 as 4 elements per Vector4 and 16 per Vector512
+ ref Vector512 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination));
+ ref Vector512 destinationLast = ref Unsafe.Add(ref destinationBase, (uint)destination.Length / 4u);
+
+ ref Vector512 backgroundBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(background));
+ Vector512 sourceBase = Vector512.Create(
+ source.X, source.Y, source.Z, source.W,
+ source.X, source.Y, source.Z, source.W,
+ source.X, source.Y, source.Z, source.W,
+ source.X, source.Y, source.Z, source.W);
+ Vector512 opacity = Vector512.Create(amount);
+
+ while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast))
+ {
+ destinationBase = PorterDuffFunctions.AddSrc(backgroundBase, sourceBase, opacity);
+ destinationBase = ref Unsafe.Add(ref destinationBase, 1);
+ backgroundBase = ref Unsafe.Add(ref backgroundBase, 1);
+ }
+
+ int remainder = Numerics.Modulo4(destination.Length);
+ if (remainder != 0)
+ {
+ for (int i = destination.Length - remainder; i < destination.Length; i++)
+ {
+ destination[i] = PorterDuffFunctions.AddSrc(background[i], source, amount);
+ }
+ }
+ }
+ else if (Avx2.IsSupported && destination.Length >= 2)
{
// Divide by 2 as 4 elements per Vector4 and 8 per Vector256
ref Vector256 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination));
@@ -495,7 +848,51 @@ internal static class DefaultPixelBlenders
///
protected override void BlendFunction(Span destination, ReadOnlySpan background, ReadOnlySpan source, ReadOnlySpan amount)
{
- if (Avx2.IsSupported && destination.Length >= 2)
+ if (Avx512F.IsSupported && destination.Length >= 4)
+ {
+ // Divide by 4 as 4 elements per Vector4 and 16 per Vector512
+ ref Vector512 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination));
+ ref Vector512 destinationLast = ref Unsafe.Add(ref destinationBase, (uint)destination.Length / 4u);
+
+ ref Vector512 backgroundBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(background));
+ ref Vector512 sourceBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(source));
+ ref float amountBase = ref MemoryMarshal.GetReference(amount);
+
+ Vector512 vOne = Vector512.Create(1F);
+
+ while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast))
+ {
+ float amount0 = amountBase;
+ float amount1 = Unsafe.Add(ref amountBase, 1);
+ float amount2 = Unsafe.Add(ref amountBase, 2);
+ float amount3 = Unsafe.Add(ref amountBase, 3);
+
+ // We need to create a Vector512 containing the current four amount values
+ // taking up each quarter of the Vector512 and then clamp them.
+ Vector512 opacity = Vector512.Create(
+ amount0, amount0, amount0, amount0,
+ amount1, amount1, amount1, amount1,
+ amount2, amount2, amount2, amount2,
+ amount3, amount3, amount3, amount3);
+ opacity = Vector512.Min(Vector512.Max(Vector512.Zero, opacity), vOne);
+
+ destinationBase = PorterDuffFunctions.AddSrc(backgroundBase, sourceBase, opacity);
+ destinationBase = ref Unsafe.Add(ref destinationBase, 1);
+ backgroundBase = ref Unsafe.Add(ref backgroundBase, 1);
+ sourceBase = ref Unsafe.Add(ref sourceBase, 1);
+ amountBase = ref Unsafe.Add(ref amountBase, 4);
+ }
+
+ int remainder = Numerics.Modulo4(destination.Length);
+ if (remainder != 0)
+ {
+ for (int i = destination.Length - remainder; i < destination.Length; i++)
+ {
+ destination[i] = PorterDuffFunctions.AddSrc(background[i], source[i], Numerics.Clamp(amount[i], 0, 1F));
+ }
+ }
+ }
+ else if (Avx2.IsSupported && destination.Length >= 2)
{
// Divide by 2 as 4 elements per Vector4 and 8 per Vector256
ref Vector256 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination));
@@ -542,7 +939,54 @@ internal static class DefaultPixelBlenders
///
protected override void BlendFunction(Span destination, ReadOnlySpan background, Vector4 source, ReadOnlySpan amount)
{
- if (Avx2.IsSupported && destination.Length >= 2)
+ if (Avx512F.IsSupported && destination.Length >= 4)
+ {
+ // Divide by 4 as 4 elements per Vector4 and 16 per Vector512
+ ref Vector512 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination));
+ ref Vector512 destinationLast = ref Unsafe.Add(ref destinationBase, (uint)destination.Length / 4u);
+
+ ref Vector512 backgroundBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(background));
+ ref float amountBase = ref MemoryMarshal.GetReference(amount);
+
+ Vector512 sourceBase = Vector512.Create(
+ source.X, source.Y, source.Z, source.W,
+ source.X, source.Y, source.Z, source.W,
+ source.X, source.Y, source.Z, source.W,
+ source.X, source.Y, source.Z, source.W);
+ Vector512 vOne = Vector512.Create(1F);
+
+ while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast))
+ {
+ float amount0 = amountBase;
+ float amount1 = Unsafe.Add(ref amountBase, 1);
+ float amount2 = Unsafe.Add(ref amountBase, 2);
+ float amount3 = Unsafe.Add(ref amountBase, 3);
+
+ // We need to create a Vector512 containing the current four amount values
+ // taking up each quarter of the Vector512 and then clamp them.
+ Vector512 opacity = Vector512.Create(
+ amount0, amount0, amount0, amount0,
+ amount1, amount1, amount1, amount1,
+ amount2, amount2, amount2, amount2,
+ amount3, amount3, amount3, amount3);
+ opacity = Vector512.Min(Vector512.Max(Vector512.Zero, opacity), vOne);
+
+ destinationBase = PorterDuffFunctions.AddSrc(backgroundBase, sourceBase, opacity);
+ destinationBase = ref Unsafe.Add(ref destinationBase, 1);
+ backgroundBase = ref Unsafe.Add(ref backgroundBase, 1);
+ amountBase = ref Unsafe.Add(ref amountBase, 4);
+ }
+
+ int remainder = Numerics.Modulo4(destination.Length);
+ if (remainder != 0)
+ {
+ for (int i = destination.Length - remainder; i < destination.Length; i++)
+ {
+ destination[i] = PorterDuffFunctions.AddSrc(background[i], source, Numerics.Clamp(amount[i], 0, 1F));
+ }
+ }
+ }
+ else if (Avx2.IsSupported && destination.Length >= 2)
{
// Divide by 2 as 4 elements per Vector4 and 8 per Vector256
ref Vector256 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination));
@@ -607,7 +1051,34 @@ internal static class DefaultPixelBlenders
{
amount = Numerics.Clamp(amount, 0, 1);
- if (Avx2.IsSupported && destination.Length >= 2)
+ if (Avx512F.IsSupported && destination.Length >= 4)
+ {
+ // Divide by 4 as 4 elements per Vector4 and 16 per Vector512
+ ref Vector512 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination));
+ ref Vector512 destinationLast = ref Unsafe.Add(ref destinationBase, (uint)destination.Length / 4u);
+
+ ref Vector512 backgroundBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(background));
+ ref Vector512 sourceBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(source));
+ Vector512 opacity = Vector512.Create(amount);
+
+ while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast))
+ {
+ destinationBase = PorterDuffFunctions.SubtractSrc(backgroundBase, sourceBase, opacity);
+ destinationBase = ref Unsafe.Add(ref destinationBase, 1);
+ backgroundBase = ref Unsafe.Add(ref backgroundBase, 1);
+ sourceBase = ref Unsafe.Add(ref sourceBase, 1);
+ }
+
+ int remainder = Numerics.Modulo4(destination.Length);
+ if (remainder != 0)
+ {
+ for (int i = destination.Length - remainder; i < destination.Length; i++)
+ {
+ destination[i] = PorterDuffFunctions.SubtractSrc(background[i], source[i], amount);
+ }
+ }
+ }
+ else if (Avx2.IsSupported && destination.Length >= 2)
{
// Divide by 2 as 4 elements per Vector4 and 8 per Vector256
ref Vector256 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination));
@@ -646,7 +1117,37 @@ internal static class DefaultPixelBlenders
{
amount = Numerics.Clamp(amount, 0, 1);
- if (Avx2.IsSupported && destination.Length >= 2)
+ if (Avx512F.IsSupported && destination.Length >= 4)
+ {
+ // Divide by 4 as 4 elements per Vector4 and 16 per Vector512
+ ref Vector512 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination));
+ ref Vector512 destinationLast = ref Unsafe.Add(ref destinationBase, (uint)destination.Length / 4u);
+
+ ref Vector512 backgroundBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(background));
+ Vector512 sourceBase = Vector512.Create(
+ source.X, source.Y, source.Z, source.W,
+ source.X, source.Y, source.Z, source.W,
+ source.X, source.Y, source.Z, source.W,
+ source.X, source.Y, source.Z, source.W);
+ Vector512 opacity = Vector512.Create(amount);
+
+ while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast))
+ {
+ destinationBase = PorterDuffFunctions.SubtractSrc(backgroundBase, sourceBase, opacity);
+ destinationBase = ref Unsafe.Add(ref destinationBase, 1);
+ backgroundBase = ref Unsafe.Add(ref backgroundBase, 1);
+ }
+
+ int remainder = Numerics.Modulo4(destination.Length);
+ if (remainder != 0)
+ {
+ for (int i = destination.Length - remainder; i < destination.Length; i++)
+ {
+ destination[i] = PorterDuffFunctions.SubtractSrc(background[i], source, amount);
+ }
+ }
+ }
+ else if (Avx2.IsSupported && destination.Length >= 2)
{
// Divide by 2 as 4 elements per Vector4 and 8 per Vector256
ref Vector256 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination));
@@ -682,7 +1183,51 @@ internal static class DefaultPixelBlenders