diff --git a/src/ImageSharp/Advanced/AotCompilerTools.cs b/src/ImageSharp/Advanced/AotCompilerTools.cs
index fef49bffd4..0f28b28901 100644
--- a/src/ImageSharp/Advanced/AotCompilerTools.cs
+++ b/src/ImageSharp/Advanced/AotCompilerTools.cs
@@ -54,7 +54,7 @@ internal static class AotCompilerTools
///
/// This method doesn't actually do anything but serves an important purpose...
/// If you are running ImageSharp on iOS and try to call SaveAsGif, it will throw an exception:
- /// "Attempting to JIT compile method... OctreeFrameQuantizer.ConstructPalette... while running in aot-only mode."
+ /// "Attempting to JIT compile method... HexadecatreeQuantizer.ConstructPalette... while running in aot-only mode."
/// The reason this happens is the SaveAsGif method makes heavy use of generics, which are too confusing for the AoT
/// compiler used on Xamarin.iOS. It spins up the JIT compiler to try and figure it out, but that is an illegal op on
/// iOS so it bombs out.
@@ -479,7 +479,7 @@ internal static class AotCompilerTools
private static void AotCompileQuantizers()
where TPixel : unmanaged, IPixel
{
- AotCompileQuantizer();
+ AotCompileQuantizer();
AotCompileQuantizer();
AotCompileQuantizer();
AotCompileQuantizer();
@@ -523,10 +523,8 @@ internal static class AotCompilerTools
private static void AotCompilePixelMaps()
where TPixel : unmanaged, IPixel
{
- default(EuclideanPixelMap).GetClosestColor(default, out _);
default(EuclideanPixelMap).GetClosestColor(default, out _);
default(EuclideanPixelMap).GetClosestColor(default, out _);
- default(EuclideanPixelMap).GetClosestColor(default, out _);
}
///
@@ -551,8 +549,8 @@ internal static class AotCompilerTools
where TPixel : unmanaged, IPixel
where TDither : struct, IDither
{
- OctreeQuantizer octree = default;
- default(TDither).ApplyQuantizationDither, TPixel>(ref octree, default, default, default);
+ HexadecatreeQuantizer hexadecatree = default;
+ default(TDither).ApplyQuantizationDither, TPixel>(ref hexadecatree, default, default, default);
PaletteQuantizer palette = default;
default(TDither).ApplyQuantizationDither, TPixel>(ref palette, default, default, default);
diff --git a/src/ImageSharp/Advanced/ParallelExecutionSettings.cs b/src/ImageSharp/Advanced/ParallelExecutionSettings.cs
index fd9692f9ae..ad0318297a 100644
--- a/src/ImageSharp/Advanced/ParallelExecutionSettings.cs
+++ b/src/ImageSharp/Advanced/ParallelExecutionSettings.cs
@@ -18,7 +18,10 @@ public readonly struct ParallelExecutionSettings
///
/// Initializes a new instance of the struct.
///
- /// The value used for initializing when using TPL.
+ ///
+ /// The value used for initializing when using TPL.
+ /// Set to -1 to leave the degree of parallelism unbounded.
+ ///
/// The value for .
/// The .
public ParallelExecutionSettings(
@@ -44,7 +47,10 @@ public readonly struct ParallelExecutionSettings
///
/// Initializes a new instance of the struct.
///
- /// The value used for initializing when using TPL.
+ ///
+ /// The value used for initializing when using TPL.
+ /// Set to -1 to leave the degree of parallelism unbounded.
+ ///
/// The .
public ParallelExecutionSettings(int maxDegreeOfParallelism, MemoryAllocator memoryAllocator)
: this(maxDegreeOfParallelism, DefaultMinimumPixelsProcessedPerTask, memoryAllocator)
@@ -58,6 +64,7 @@ public readonly struct ParallelExecutionSettings
///
/// Gets the value used for initializing when using TPL.
+ /// A value of -1 leaves the degree of parallelism unbounded.
///
public int MaxDegreeOfParallelism { get; }
diff --git a/src/ImageSharp/Advanced/ParallelRowIterator.cs b/src/ImageSharp/Advanced/ParallelRowIterator.cs
index d170631a29..98c2656d11 100644
--- a/src/ImageSharp/Advanced/ParallelRowIterator.cs
+++ b/src/ImageSharp/Advanced/ParallelRowIterator.cs
@@ -44,14 +44,14 @@ public static partial class ParallelRowIterator
where T : struct, IRowOperation
{
ValidateRectangle(rectangle);
+ ValidateSettings(parallelSettings);
int top = rectangle.Top;
int bottom = rectangle.Bottom;
int width = rectangle.Width;
int height = rectangle.Height;
- int maxSteps = DivideCeil(width * (long)height, parallelSettings.MinimumPixelsProcessedPerTask);
- int numOfSteps = Math.Min(parallelSettings.MaxDegreeOfParallelism, maxSteps);
+ int numOfSteps = GetNumberOfSteps(width, height, parallelSettings);
// Avoid TPL overhead in this trivial case:
if (numOfSteps == 1)
@@ -65,7 +65,7 @@ public static partial class ParallelRowIterator
}
int verticalStep = DivideCeil(rectangle.Height, numOfSteps);
- ParallelOptions parallelOptions = new() { MaxDegreeOfParallelism = numOfSteps };
+ ParallelOptions parallelOptions = CreateParallelOptions(parallelSettings, numOfSteps);
RowOperationWrapper wrappingOperation = new(top, bottom, verticalStep, in operation);
_ = Parallel.For(
@@ -109,14 +109,14 @@ public static partial class ParallelRowIterator
where TBuffer : unmanaged
{
ValidateRectangle(rectangle);
+ ValidateSettings(parallelSettings);
int top = rectangle.Top;
int bottom = rectangle.Bottom;
int width = rectangle.Width;
int height = rectangle.Height;
- int maxSteps = DivideCeil(width * (long)height, parallelSettings.MinimumPixelsProcessedPerTask);
- int numOfSteps = Math.Min(parallelSettings.MaxDegreeOfParallelism, maxSteps);
+ int numOfSteps = GetNumberOfSteps(width, height, parallelSettings);
MemoryAllocator allocator = parallelSettings.MemoryAllocator;
int bufferLength = Unsafe.AsRef(in operation).GetRequiredBufferLength(rectangle);
@@ -135,7 +135,7 @@ public static partial class ParallelRowIterator
}
int verticalStep = DivideCeil(height, numOfSteps);
- ParallelOptions parallelOptions = new() { MaxDegreeOfParallelism = numOfSteps };
+ ParallelOptions parallelOptions = CreateParallelOptions(parallelSettings, numOfSteps);
RowOperationWrapper wrappingOperation = new(top, bottom, verticalStep, bufferLength, allocator, in operation);
_ = Parallel.For(
@@ -174,14 +174,14 @@ public static partial class ParallelRowIterator
where T : struct, IRowIntervalOperation
{
ValidateRectangle(rectangle);
+ ValidateSettings(parallelSettings);
int top = rectangle.Top;
int bottom = rectangle.Bottom;
int width = rectangle.Width;
int height = rectangle.Height;
- int maxSteps = DivideCeil(width * (long)height, parallelSettings.MinimumPixelsProcessedPerTask);
- int numOfSteps = Math.Min(parallelSettings.MaxDegreeOfParallelism, maxSteps);
+ int numOfSteps = GetNumberOfSteps(width, height, parallelSettings);
// Avoid TPL overhead in this trivial case:
if (numOfSteps == 1)
@@ -192,7 +192,7 @@ public static partial class ParallelRowIterator
}
int verticalStep = DivideCeil(rectangle.Height, numOfSteps);
- ParallelOptions parallelOptions = new() { MaxDegreeOfParallelism = numOfSteps };
+ ParallelOptions parallelOptions = CreateParallelOptions(parallelSettings, numOfSteps);
RowIntervalOperationWrapper wrappingOperation = new(top, bottom, verticalStep, in operation);
_ = Parallel.For(
@@ -236,14 +236,14 @@ public static partial class ParallelRowIterator
where TBuffer : unmanaged
{
ValidateRectangle(rectangle);
+ ValidateSettings(parallelSettings);
int top = rectangle.Top;
int bottom = rectangle.Bottom;
int width = rectangle.Width;
int height = rectangle.Height;
- int maxSteps = DivideCeil(width * (long)height, parallelSettings.MinimumPixelsProcessedPerTask);
- int numOfSteps = Math.Min(parallelSettings.MaxDegreeOfParallelism, maxSteps);
+ int numOfSteps = GetNumberOfSteps(width, height, parallelSettings);
MemoryAllocator allocator = parallelSettings.MemoryAllocator;
int bufferLength = Unsafe.AsRef(in operation).GetRequiredBufferLength(rectangle);
@@ -259,7 +259,7 @@ public static partial class ParallelRowIterator
}
int verticalStep = DivideCeil(height, numOfSteps);
- ParallelOptions parallelOptions = new() { MaxDegreeOfParallelism = numOfSteps };
+ ParallelOptions parallelOptions = CreateParallelOptions(parallelSettings, numOfSteps);
RowIntervalOperationWrapper wrappingOperation = new(top, bottom, verticalStep, bufferLength, allocator, in operation);
_ = Parallel.For(
@@ -272,6 +272,37 @@ public static partial class ParallelRowIterator
[MethodImpl(InliningOptions.ShortMethod)]
private static int DivideCeil(long dividend, int divisor) => (int)Math.Min(1 + ((dividend - 1) / divisor), int.MaxValue);
+ ///
+ /// Creates the for the current iteration.
+ ///
+ /// The execution settings.
+ /// The number of row partitions to execute.
+ /// The instance.
+ [MethodImpl(InliningOptions.ShortMethod)]
+ private static ParallelOptions CreateParallelOptions(in ParallelExecutionSettings parallelSettings, int numOfSteps)
+ => new() { MaxDegreeOfParallelism = parallelSettings.MaxDegreeOfParallelism == -1 ? -1 : numOfSteps };
+
+ ///
+ /// Calculates the number of row partitions to execute for the given region.
+ ///
+ /// The width of the region.
+ /// The height of the region.
+ /// The execution settings.
+ /// The number of row partitions to execute.
+ [MethodImpl(InliningOptions.ShortMethod)]
+ private static int GetNumberOfSteps(int width, int height, in ParallelExecutionSettings parallelSettings)
+ {
+ int maxSteps = DivideCeil(width * (long)height, parallelSettings.MinimumPixelsProcessedPerTask);
+
+ if (parallelSettings.MaxDegreeOfParallelism == -1)
+ {
+ // Row batching cannot produce more useful partitions than the number of rows available.
+ return Math.Min(height, maxSteps);
+ }
+
+ return Math.Min(parallelSettings.MaxDegreeOfParallelism, maxSteps);
+ }
+
private static void ValidateRectangle(Rectangle rectangle)
{
Guard.MustBeGreaterThan(
@@ -284,4 +315,35 @@ public static partial class ParallelRowIterator
0,
$"{nameof(rectangle)}.{nameof(rectangle.Height)}");
}
+
+ ///
+ /// Validates the supplied .
+ ///
+ /// The execution settings.
+ ///
+ /// Thrown when or
+ /// is invalid.
+ ///
+ ///
+ /// Thrown when is null.
+ /// This also guards the public default value, which bypasses constructor validation.
+ ///
+ private static void ValidateSettings(in ParallelExecutionSettings parallelSettings)
+ {
+ // ParallelExecutionSettings is a public struct, so callers can pass default and bypass constructor validation.
+ if (parallelSettings.MaxDegreeOfParallelism is 0 or < -1)
+ {
+ throw new ArgumentOutOfRangeException(
+ $"{nameof(parallelSettings)}.{nameof(ParallelExecutionSettings.MaxDegreeOfParallelism)}");
+ }
+
+ Guard.MustBeGreaterThan(
+ parallelSettings.MinimumPixelsProcessedPerTask,
+ 0,
+ $"{nameof(parallelSettings)}.{nameof(ParallelExecutionSettings.MinimumPixelsProcessedPerTask)}");
+
+ Guard.NotNull(
+ parallelSettings.MemoryAllocator,
+ $"{nameof(parallelSettings)}.{nameof(ParallelExecutionSettings.MemoryAllocator)}");
+ }
}
diff --git a/src/ImageSharp/Common/Helpers/Numerics.cs b/src/ImageSharp/Common/Helpers/Numerics.cs
index efe68977bb..04ed48e210 100644
--- a/src/ImageSharp/Common/Helpers/Numerics.cs
+++ b/src/ImageSharp/Common/Helpers/Numerics.cs
@@ -643,6 +643,20 @@ internal static class Numerics
return Avx.Blend(result, alpha, BlendAlphaControl);
}
+ [MethodImpl(MethodImplOptions.AggressiveInlining)]
+ public static Vector512 UnPremultiply(Vector512 source, Vector512 alpha)
+ {
+ // Check if alpha is zero to avoid division by zero
+ Vector512 zeroMask = Vector512.Equals(alpha, Vector512.Zero);
+
+ // Divide source by alpha if alpha is nonzero, otherwise set all components to match the source value
+ Vector512 result = Vector512.ConditionalSelect(zeroMask, source, source / alpha);
+
+ // Blend the result with the alpha vector to ensure that the alpha component is unchanged
+ Vector512 alphaMask = Vector512.Create(0, 0, 0, -1, 0, 0, 0, -1, 0, 0, 0, -1, 0, 0, 0, -1).AsSingle();
+ return Vector512.ConditionalSelect(alphaMask, alpha, result);
+ }
+
///
/// Permutes the given vector return a new instance with all the values set to .
///
@@ -690,7 +704,7 @@ internal static class Numerics
///
/// The span of vectors
[MethodImpl(MethodImplOptions.AggressiveInlining)]
- public static unsafe void CubePowOnXYZ(Span vectors)
+ public static void CubePowOnXYZ(Span vectors)
{
ref Vector4 baseRef = ref MemoryMarshal.GetReference(vectors);
ref Vector4 endRef = ref Unsafe.Add(ref baseRef, (uint)vectors.Length);
diff --git a/src/ImageSharp/Common/Helpers/SimdUtils.HwIntrinsics.cs b/src/ImageSharp/Common/Helpers/SimdUtils.HwIntrinsics.cs
index ff5ea5de33..022056deb0 100644
--- a/src/ImageSharp/Common/Helpers/SimdUtils.HwIntrinsics.cs
+++ b/src/ImageSharp/Common/Helpers/SimdUtils.HwIntrinsics.cs
@@ -601,51 +601,6 @@ internal static partial class SimdUtils
}
}
- ///
- /// Performs a multiplication and an addition of the .
- /// TODO: Fix. The arguments are in a different order to the FMA intrinsic.
- ///
- /// ret = (vm0 * vm1) + va
- /// The vector to add to the intermediate result.
- /// The first vector to multiply.
- /// The second vector to multiply.
- /// The .
- [MethodImpl(InliningOptions.AlwaysInline)]
- public static Vector256 MultiplyAdd(
- Vector256 va,
- Vector256 vm0,
- Vector256 vm1)
- {
- if (Fma.IsSupported)
- {
- return Fma.MultiplyAdd(vm1, vm0, va);
- }
-
- return va + (vm0 * vm1);
- }
-
- ///
- /// Performs a multiplication and a negated addition of the .
- ///
- /// ret = c - (a * b)
- /// The first vector to multiply.
- /// The second vector to multiply.
- /// The vector to add negated to the intermediate result.
- /// The .
- [MethodImpl(InliningOptions.ShortMethod)]
- public static Vector256 MultiplyAddNegated(
- Vector256 a,
- Vector256 b,
- Vector256 c)
- {
- if (Fma.IsSupported)
- {
- return Fma.MultiplyAddNegated(a, b, c);
- }
-
- return Avx.Subtract(c, Avx.Multiply(a, b));
- }
-
///
/// Blend packed 8-bit integers from and using .
/// The high bit of each corresponding byte determines the selection.
@@ -752,7 +707,7 @@ internal static partial class SimdUtils
/// Implementation is based on MagicScaler code:
/// https://github.com/saucecontrol/PhotoSauce/blob/b5811908041200488aa18fdfd17df5fc457415dc/src/MagicScaler/Magic/Processors/ConvertersFloat.cs#L80-L182
///
- internal static unsafe void ByteToNormalizedFloat(
+ internal static void ByteToNormalizedFloat(
ReadOnlySpan source,
Span destination)
{
@@ -1172,8 +1127,10 @@ internal static partial class SimdUtils
Vector256 rgb, rg, bx;
Vector256 r, g, b;
+ // Each iteration consumes 8 Rgb24 pixels (24 bytes) but starts with a 32-byte load,
+ // so we need 3 extra pixels of addressable slack beyond the vectorized chunk.
const int bytesPerRgbStride = 24;
- nuint count = (uint)source.Length / 8;
+ nuint count = source.Length > 3 ? (uint)(source.Length - 3) / 8 : 0;
for (nuint i = 0; i < count; i++)
{
rgb = Avx2.PermuteVar8x32(Unsafe.AddByteOffset(ref rgbByteSpan, (uint)(bytesPerRgbStride * i)).AsUInt32(), extractToLanesMask).AsByte();
@@ -1193,10 +1150,10 @@ internal static partial class SimdUtils
}
int sliceCount = (int)(count * 8);
- redChannel = redChannel.Slice(sliceCount);
- greenChannel = greenChannel.Slice(sliceCount);
- blueChannel = blueChannel.Slice(sliceCount);
- source = source.Slice(sliceCount);
+ redChannel = redChannel[sliceCount..];
+ greenChannel = greenChannel[sliceCount..];
+ blueChannel = blueChannel[sliceCount..];
+ source = source[sliceCount..];
}
}
}
diff --git a/src/ImageSharp/Common/Helpers/Vector256Utilities.cs b/src/ImageSharp/Common/Helpers/Vector256Utilities.cs
index 14ac13dd8d..90e3169b37 100644
--- a/src/ImageSharp/Common/Helpers/Vector256Utilities.cs
+++ b/src/ImageSharp/Common/Helpers/Vector256Utilities.cs
@@ -115,6 +115,28 @@ internal static class Vector256_
return va + (vm0 * vm1);
}
+ ///
+ /// Performs a multiplication and a negated addition of the .
+ ///
+ /// ret = va - (vm0 * vm1)
+ /// The vector to add to the negated intermediate result.
+ /// The first vector to multiply.
+ /// The second vector to multiply.
+ /// The .
+ [MethodImpl(InliningOptions.ShortMethod)]
+ public static Vector256 MultiplyAddNegated(
+ Vector256 va,
+ Vector256 vm0,
+ Vector256 vm1)
+ {
+ if (Fma.IsSupported)
+ {
+ return Fma.MultiplyAddNegated(vm0, vm1, va);
+ }
+
+ return va - (vm0 * vm1);
+ }
+
///
/// Performs a multiplication and a subtraction of the .
///
diff --git a/src/ImageSharp/Common/Helpers/Vector512Utilities.cs b/src/ImageSharp/Common/Helpers/Vector512Utilities.cs
index 03ee4626cd..82a20158ae 100644
--- a/src/ImageSharp/Common/Helpers/Vector512Utilities.cs
+++ b/src/ImageSharp/Common/Helpers/Vector512Utilities.cs
@@ -87,6 +87,21 @@ internal static class Vector512_
Vector512 vm1)
=> Avx512F.FusedMultiplyAdd(vm0, vm1, va);
+ ///
+ /// Performs a multiplication and a negated addition of the .
+ ///
+ /// ret = va - (vm0 * vm1)
+ /// The vector to add to the negated intermediate result.
+ /// The first vector to multiply.
+ /// The second vector to multiply.
+ /// The .
+ [MethodImpl(MethodImplOptions.AggressiveInlining)]
+ public static Vector512 MultiplyAddNegated(
+ Vector512 va,
+ Vector512 vm0,
+ Vector512 vm1)
+ => Avx512F.FusedMultiplyAddNegated(vm0, vm1, va);
+
///
/// Restricts a vector between a minimum and a maximum value.
///
diff --git a/src/ImageSharp/Configuration.cs b/src/ImageSharp/Configuration.cs
index c2b02dedd9..2673927231 100644
--- a/src/ImageSharp/Configuration.cs
+++ b/src/ImageSharp/Configuration.cs
@@ -64,6 +64,7 @@ public sealed class Configuration
///
/// Gets or sets the maximum number of concurrent tasks enabled in ImageSharp algorithms
/// configured with this instance.
+ /// Set to -1 to leave the degree of parallelism unbounded.
/// Initialized with by default.
///
public int MaxDegreeOfParallelism
diff --git a/src/ImageSharp/Formats/Bmp/BmpEncoder.cs b/src/ImageSharp/Formats/Bmp/BmpEncoder.cs
index e255568047..210c08464a 100644
--- a/src/ImageSharp/Formats/Bmp/BmpEncoder.cs
+++ b/src/ImageSharp/Formats/Bmp/BmpEncoder.cs
@@ -13,7 +13,7 @@ public sealed class BmpEncoder : QuantizingImageEncoder
///
/// Initializes a new instance of the class.
///
- public BmpEncoder() => this.Quantizer = KnownQuantizers.Octree;
+ public BmpEncoder() => this.Quantizer = KnownQuantizers.Hexadecatree;
///
/// Gets the number of bits per pixel.
diff --git a/src/ImageSharp/Formats/Bmp/BmpEncoderCore.cs b/src/ImageSharp/Formats/Bmp/BmpEncoderCore.cs
index ccc620d6c4..0bf57c5612 100644
--- a/src/ImageSharp/Formats/Bmp/BmpEncoderCore.cs
+++ b/src/ImageSharp/Formats/Bmp/BmpEncoderCore.cs
@@ -116,7 +116,7 @@ internal sealed class BmpEncoderCore
this.bitsPerPixel = encoder.BitsPerPixel;
// TODO: Use a palette quantizer if supplied.
- this.quantizer = encoder.Quantizer ?? KnownQuantizers.Octree;
+ this.quantizer = encoder.Quantizer ?? KnownQuantizers.Hexadecatree;
this.pixelSamplingStrategy = encoder.PixelSamplingStrategy;
this.transparentColorMode = encoder.TransparentColorMode;
this.infoHeaderType = encoder.SupportTransparency ? BmpInfoHeaderType.WinVersion4 : BmpInfoHeaderType.WinVersion3;
diff --git a/src/ImageSharp/Formats/Gif/GifDecoderCore.cs b/src/ImageSharp/Formats/Gif/GifDecoderCore.cs
index 78ceb0b233..3d32c7cdac 100644
--- a/src/ImageSharp/Formats/Gif/GifDecoderCore.cs
+++ b/src/ImageSharp/Formats/Gif/GifDecoderCore.cs
@@ -468,7 +468,7 @@ internal sealed class GifDecoderCore : ImageDecoderCore
int length = this.currentLocalColorTableSize = this.imageDescriptor.LocalColorTableSize * 3;
this.currentLocalColorTable ??= this.configuration.MemoryAllocator.Allocate(768, AllocationOptions.Clean);
stream.Read(this.currentLocalColorTable.GetSpan()[..length]);
- rawColorTable = this.currentLocalColorTable!.GetSpan()[..length];
+ rawColorTable = this.currentLocalColorTable.GetSpan()[..length];
}
else if (this.globalColorTable != null)
{
diff --git a/src/ImageSharp/Formats/Gif/GifEncoderCore.cs b/src/ImageSharp/Formats/Gif/GifEncoderCore.cs
index 07c73dcf22..d2883e2811 100644
--- a/src/ImageSharp/Formats/Gif/GifEncoderCore.cs
+++ b/src/ImageSharp/Formats/Gif/GifEncoderCore.cs
@@ -117,7 +117,7 @@ internal sealed class GifEncoderCore
if (globalQuantizer is null)
{
- // Is this a gif with color information. If so use that, otherwise use octree.
+ // Is this a gif with color information. If so use that, otherwise use the adaptive hexadecatree quantizer.
if (gifMetadata.ColorTableMode == FrameColorTableMode.Global && gifMetadata.GlobalColorTable?.Length > 0)
{
int ti = GetTransparentIndex(quantized, frameMetadata);
@@ -132,12 +132,12 @@ internal sealed class GifEncoderCore
}
else
{
- globalQuantizer = new OctreeQuantizer(options);
+ globalQuantizer = new HexadecatreeQuantizer(options);
}
}
else
{
- globalQuantizer = new OctreeQuantizer(options);
+ globalQuantizer = new HexadecatreeQuantizer(options);
}
}
diff --git a/src/ImageSharp/Formats/Jpeg/Components/Encoder/SpectralConverter{TPixel}.cs b/src/ImageSharp/Formats/Jpeg/Components/Encoder/SpectralConverter{TPixel}.cs
index b60ef68f11..8662c5c49b 100644
--- a/src/ImageSharp/Formats/Jpeg/Components/Encoder/SpectralConverter{TPixel}.cs
+++ b/src/ImageSharp/Formats/Jpeg/Components/Encoder/SpectralConverter{TPixel}.cs
@@ -114,9 +114,9 @@ internal class SpectralConverter : SpectralConverter, IDisposable
Span sourceRow = this.pixelBuffer.DangerousGetRowSpan(srcIndex);
PixelOperations.Instance.UnpackIntoRgbPlanes(rLane, gLane, bLane, sourceRow);
- rLane.Slice(paddingStartIndex).Fill(rLane[paddingStartIndex - 1]);
- gLane.Slice(paddingStartIndex).Fill(gLane[paddingStartIndex - 1]);
- bLane.Slice(paddingStartIndex).Fill(bLane[paddingStartIndex - 1]);
+ rLane.Slice(paddingStartIndex, paddedPixelsCount).Fill(rLane[paddingStartIndex - 1]);
+ gLane.Slice(paddingStartIndex, paddedPixelsCount).Fill(gLane[paddingStartIndex - 1]);
+ bLane.Slice(paddingStartIndex, paddedPixelsCount).Fill(bLane[paddingStartIndex - 1]);
// Convert from rgb24 to target pixel type
JpegColorConverterBase.ComponentValues values = new(this.componentProcessors, y);
diff --git a/src/ImageSharp/Formats/Png/PngDecoderCore.cs b/src/ImageSharp/Formats/Png/PngDecoderCore.cs
index 8962182679..d794c66e27 100644
--- a/src/ImageSharp/Formats/Png/PngDecoderCore.cs
+++ b/src/ImageSharp/Formats/Png/PngDecoderCore.cs
@@ -214,7 +214,7 @@ internal sealed class PngDecoderCore : ImageDecoderCore
break;
case PngChunkType.FrameData:
{
- if (frameCount >= this.maxFrames)
+ if (frameCount > this.maxFrames)
{
goto EOF;
}
@@ -275,7 +275,7 @@ internal sealed class PngDecoderCore : ImageDecoderCore
previousFrameControl = currentFrameControl;
}
- if (frameCount >= this.maxFrames)
+ if (frameCount > this.maxFrames)
{
goto EOF;
}
@@ -402,7 +402,7 @@ internal sealed class PngDecoderCore : ImageDecoderCore
break;
case PngChunkType.FrameControl:
++frameCount;
- if (frameCount >= this.maxFrames)
+ if (frameCount > this.maxFrames)
{
break;
}
@@ -411,8 +411,12 @@ internal sealed class PngDecoderCore : ImageDecoderCore
break;
case PngChunkType.FrameData:
- if (frameCount >= this.maxFrames)
+ if (frameCount > this.maxFrames)
{
+ // Must skip the chunk data even when we've hit maxFrames, because TryReadChunk
+ // restores the stream position to the start of the fdAT data after CRC validation.
+ this.SkipChunkDataAndCrc(chunk);
+ this.SkipRemainingFrameDataChunks(buffer);
break;
}
@@ -428,9 +432,10 @@ internal sealed class PngDecoderCore : ImageDecoderCore
InitializeFrameMetadata(framesMetadata, currentFrameControl.Value);
- // Skip sequence number
- this.currentStream.Skip(4);
+ // Skip data for this and all remaining FrameData chunks belonging to the same frame
+ // (comparable to how Decode consumes them via ReadScanlines + ReadNextFrameDataChunk).
this.SkipChunkDataAndCrc(chunk);
+ this.SkipRemainingFrameDataChunks(buffer);
break;
case PngChunkType.Data:
@@ -2093,6 +2098,31 @@ internal sealed class PngDecoderCore : ImageDecoderCore
return 0;
}
+ ///
+ /// Skips any remaining chunks belonging to the current frame.
+ /// This mirrors how is used during decoding:
+ /// consecutive fdAT chunks are consumed until a non-fdAT chunk is encountered,
+ /// which is stored in for the next iteration.
+ ///
+ /// Temporary buffer.
+ private void SkipRemainingFrameDataChunks(Span buffer)
+ {
+ while (this.TryReadChunk(buffer, out PngChunk chunk))
+ {
+ if (chunk.Type is PngChunkType.FrameData)
+ {
+ chunk.Data?.Dispose();
+ this.SkipChunkDataAndCrc(chunk);
+ }
+ else
+ {
+ // Not a FrameData chunk; store it so the next TryReadChunk call returns it.
+ this.nextChunk = chunk;
+ return;
+ }
+ }
+ }
+
///
/// Reads a chunk from the stream.
///
diff --git a/src/ImageSharp/Formats/Tiff/TiffEncoder.cs b/src/ImageSharp/Formats/Tiff/TiffEncoder.cs
index a068613bf4..7859b2c902 100644
--- a/src/ImageSharp/Formats/Tiff/TiffEncoder.cs
+++ b/src/ImageSharp/Formats/Tiff/TiffEncoder.cs
@@ -15,7 +15,7 @@ public class TiffEncoder : QuantizingImageEncoder
///
/// Initializes a new instance of the class.
///
- public TiffEncoder() => this.Quantizer = KnownQuantizers.Octree;
+ public TiffEncoder() => this.Quantizer = KnownQuantizers.Hexadecatree;
///
/// Gets the number of bits per pixel.
diff --git a/src/ImageSharp/Formats/Tiff/TiffEncoderCore.cs b/src/ImageSharp/Formats/Tiff/TiffEncoderCore.cs
index d7508b02e8..e5e47166e9 100644
--- a/src/ImageSharp/Formats/Tiff/TiffEncoderCore.cs
+++ b/src/ImageSharp/Formats/Tiff/TiffEncoderCore.cs
@@ -71,7 +71,7 @@ internal sealed class TiffEncoderCore
this.configuration = configuration;
this.memoryAllocator = configuration.MemoryAllocator;
this.PhotometricInterpretation = encoder.PhotometricInterpretation;
- this.quantizer = encoder.Quantizer ?? KnownQuantizers.Octree;
+ this.quantizer = encoder.Quantizer ?? KnownQuantizers.Hexadecatree;
this.pixelSamplingStrategy = encoder.PixelSamplingStrategy;
this.BitsPerPixel = encoder.BitsPerPixel;
this.HorizontalPredictor = encoder.HorizontalPredictor;
diff --git a/src/ImageSharp/ImageInfo.cs b/src/ImageSharp/ImageInfo.cs
index 0bbd73b63a..d27c4b9330 100644
--- a/src/ImageSharp/ImageInfo.cs
+++ b/src/ImageSharp/ImageInfo.cs
@@ -63,8 +63,12 @@ public class ImageInfo
public int Height => this.Size.Height;
///
- /// Gets the number of frames in the image.
+ /// Gets the number of frame metadata entries available for the image.
///
+ ///
+ /// This value is the same as count and may be 0 when frame
+ /// metadata was not populated by the decoder.
+ ///
public int FrameCount => this.FrameMetadataCollection.Count;
///
@@ -73,8 +77,12 @@ public class ImageInfo
public ImageMetadata Metadata { get; }
///
- /// Gets the collection of metadata associated with individual image frames.
+ /// Gets the metadata associated with the decoded image frames, if available.
///
+ ///
+ /// For multi-frame formats, decoders populate one entry per decoded frame. For single-frame formats, this
+ /// collection is typically empty.
+ ///
public IReadOnlyList FrameMetadataCollection { get; }
///
@@ -86,4 +94,24 @@ public class ImageInfo
/// Gets the bounds of the image.
///
public Rectangle Bounds => new(Point.Empty, this.Size);
+
+ ///
+ /// Gets the total number of bytes required to store the image pixels in memory.
+ ///
+ ///
+ /// This reports the in-memory size of the pixel data represented by this , not the
+ /// encoded size of the image file. The value is computed from the image dimensions and
+ /// . When contains decoded frame metadata, the
+ /// per-frame size is multiplied by that count. Otherwise, the value is the in-memory size of the single
+ /// image frame represented by this .
+ ///
+ /// The total number of bytes required to store the image pixels in memory.
+ public long GetPixelMemorySize()
+ {
+ int count = this.FrameMetadataCollection.Count > 0
+ ? this.FrameMetadataCollection.Count
+ : 1;
+
+ return (long)this.Size.Width * this.Size.Height * (this.PixelType.BitsPerPixel / 8) * count;
+ }
}
diff --git a/src/ImageSharp/PixelFormats/PixelBlenders/DefaultPixelBlenders.Generated.cs b/src/ImageSharp/PixelFormats/PixelBlenders/DefaultPixelBlenders.Generated.cs
index 7cd9cc57ad..883693031e 100644
--- a/src/ImageSharp/PixelFormats/PixelBlenders/DefaultPixelBlenders.Generated.cs
+++ b/src/ImageSharp/PixelFormats/PixelBlenders/DefaultPixelBlenders.Generated.cs
@@ -46,7 +46,34 @@ internal static class DefaultPixelBlenders
{
amount = Numerics.Clamp(amount, 0, 1);
- if (Avx2.IsSupported && destination.Length >= 2)
+ if (Avx512F.IsSupported && destination.Length >= 4)
+ {
+ // Divide by 4 as 4 elements per Vector4 and 16 per Vector512
+ ref Vector512 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination));
+ ref Vector512 destinationLast = ref Unsafe.Add(ref destinationBase, (uint)destination.Length / 4u);
+
+ ref Vector512 backgroundBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(background));
+ ref Vector512 sourceBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(source));
+ Vector512 opacity = Vector512.Create(amount);
+
+ while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast))
+ {
+ destinationBase = PorterDuffFunctions.NormalSrc(backgroundBase, sourceBase, opacity);
+ destinationBase = ref Unsafe.Add(ref destinationBase, 1);
+ backgroundBase = ref Unsafe.Add(ref backgroundBase, 1);
+ sourceBase = ref Unsafe.Add(ref sourceBase, 1);
+ }
+
+ int remainder = Numerics.Modulo4(destination.Length);
+ if (remainder != 0)
+ {
+ for (int i = destination.Length - remainder; i < destination.Length; i++)
+ {
+ destination[i] = PorterDuffFunctions.NormalSrc(background[i], source[i], amount);
+ }
+ }
+ }
+ else if (Avx2.IsSupported && destination.Length >= 2)
{
// Divide by 2 as 4 elements per Vector4 and 8 per Vector256
ref Vector256 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination));
@@ -85,7 +112,37 @@ internal static class DefaultPixelBlenders
{
amount = Numerics.Clamp(amount, 0, 1);
- if (Avx2.IsSupported && destination.Length >= 2)
+ if (Avx512F.IsSupported && destination.Length >= 4)
+ {
+ // Divide by 4 as 4 elements per Vector4 and 16 per Vector512
+ ref Vector512 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination));
+ ref Vector512 destinationLast = ref Unsafe.Add(ref destinationBase, (uint)destination.Length / 4u);
+
+ ref Vector512 backgroundBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(background));
+ Vector512 sourceBase = Vector512.Create(
+ source.X, source.Y, source.Z, source.W,
+ source.X, source.Y, source.Z, source.W,
+ source.X, source.Y, source.Z, source.W,
+ source.X, source.Y, source.Z, source.W);
+ Vector512 opacity = Vector512.Create(amount);
+
+ while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast))
+ {
+ destinationBase = PorterDuffFunctions.NormalSrc(backgroundBase, sourceBase, opacity);
+ destinationBase = ref Unsafe.Add(ref destinationBase, 1);
+ backgroundBase = ref Unsafe.Add(ref backgroundBase, 1);
+ }
+
+ int remainder = Numerics.Modulo4(destination.Length);
+ if (remainder != 0)
+ {
+ for (int i = destination.Length - remainder; i < destination.Length; i++)
+ {
+ destination[i] = PorterDuffFunctions.NormalSrc(background[i], source, amount);
+ }
+ }
+ }
+ else if (Avx2.IsSupported && destination.Length >= 2)
{
// Divide by 2 as 4 elements per Vector4 and 8 per Vector256
ref Vector256 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination));
@@ -121,7 +178,51 @@ internal static class DefaultPixelBlenders
///
protected override void BlendFunction(Span destination, ReadOnlySpan background, ReadOnlySpan source, ReadOnlySpan amount)
{
- if (Avx2.IsSupported && destination.Length >= 2)
+ if (Avx512F.IsSupported && destination.Length >= 4)
+ {
+ // Divide by 4 as 4 elements per Vector4 and 16 per Vector512
+ ref Vector512 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination));
+ ref Vector512 destinationLast = ref Unsafe.Add(ref destinationBase, (uint)destination.Length / 4u);
+
+ ref Vector512 backgroundBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(background));
+ ref Vector512 sourceBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(source));
+ ref float amountBase = ref MemoryMarshal.GetReference(amount);
+
+ Vector512 vOne = Vector512.Create(1F);
+
+ while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast))
+ {
+ float amount0 = amountBase;
+ float amount1 = Unsafe.Add(ref amountBase, 1);
+ float amount2 = Unsafe.Add(ref amountBase, 2);
+ float amount3 = Unsafe.Add(ref amountBase, 3);
+
+ // We need to create a Vector512 containing the current four amount values
+ // taking up each quarter of the Vector512 and then clamp them.
+ Vector512 opacity = Vector512.Create(
+ amount0, amount0, amount0, amount0,
+ amount1, amount1, amount1, amount1,
+ amount2, amount2, amount2, amount2,
+ amount3, amount3, amount3, amount3);
+ opacity = Vector512.Min(Vector512.Max(Vector512.Zero, opacity), vOne);
+
+ destinationBase = PorterDuffFunctions.NormalSrc(backgroundBase, sourceBase, opacity);
+ destinationBase = ref Unsafe.Add(ref destinationBase, 1);
+ backgroundBase = ref Unsafe.Add(ref backgroundBase, 1);
+ sourceBase = ref Unsafe.Add(ref sourceBase, 1);
+ amountBase = ref Unsafe.Add(ref amountBase, 4);
+ }
+
+ int remainder = Numerics.Modulo4(destination.Length);
+ if (remainder != 0)
+ {
+ for (int i = destination.Length - remainder; i < destination.Length; i++)
+ {
+ destination[i] = PorterDuffFunctions.NormalSrc(background[i], source[i], Numerics.Clamp(amount[i], 0, 1F));
+ }
+ }
+ }
+ else if (Avx2.IsSupported && destination.Length >= 2)
{
// Divide by 2 as 4 elements per Vector4 and 8 per Vector256
ref Vector256 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination));
@@ -168,7 +269,54 @@ internal static class DefaultPixelBlenders
///
protected override void BlendFunction(Span destination, ReadOnlySpan background, Vector4 source, ReadOnlySpan amount)
{
- if (Avx2.IsSupported && destination.Length >= 2)
+ if (Avx512F.IsSupported && destination.Length >= 4)
+ {
+ // Divide by 4 as 4 elements per Vector4 and 16 per Vector512
+ ref Vector512 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination));
+ ref Vector512 destinationLast = ref Unsafe.Add(ref destinationBase, (uint)destination.Length / 4u);
+
+ ref Vector512 backgroundBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(background));
+ ref float amountBase = ref MemoryMarshal.GetReference(amount);
+
+ Vector512 sourceBase = Vector512.Create(
+ source.X, source.Y, source.Z, source.W,
+ source.X, source.Y, source.Z, source.W,
+ source.X, source.Y, source.Z, source.W,
+ source.X, source.Y, source.Z, source.W);
+ Vector512 vOne = Vector512.Create(1F);
+
+ while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast))
+ {
+ float amount0 = amountBase;
+ float amount1 = Unsafe.Add(ref amountBase, 1);
+ float amount2 = Unsafe.Add(ref amountBase, 2);
+ float amount3 = Unsafe.Add(ref amountBase, 3);
+
+ // We need to create a Vector512 containing the current four amount values
+ // taking up each quarter of the Vector512 and then clamp them.
+ Vector512 opacity = Vector512.Create(
+ amount0, amount0, amount0, amount0,
+ amount1, amount1, amount1, amount1,
+ amount2, amount2, amount2, amount2,
+ amount3, amount3, amount3, amount3);
+ opacity = Vector512.Min(Vector512.Max(Vector512.Zero, opacity), vOne);
+
+ destinationBase = PorterDuffFunctions.NormalSrc(backgroundBase, sourceBase, opacity);
+ destinationBase = ref Unsafe.Add(ref destinationBase, 1);
+ backgroundBase = ref Unsafe.Add(ref backgroundBase, 1);
+ amountBase = ref Unsafe.Add(ref amountBase, 4);
+ }
+
+ int remainder = Numerics.Modulo4(destination.Length);
+ if (remainder != 0)
+ {
+ for (int i = destination.Length - remainder; i < destination.Length; i++)
+ {
+ destination[i] = PorterDuffFunctions.NormalSrc(background[i], source, Numerics.Clamp(amount[i], 0, 1F));
+ }
+ }
+ }
+ else if (Avx2.IsSupported && destination.Length >= 2)
{
// Divide by 2 as 4 elements per Vector4 and 8 per Vector256
ref Vector256 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination));
@@ -233,7 +381,34 @@ internal static class DefaultPixelBlenders
{
amount = Numerics.Clamp(amount, 0, 1);
- if (Avx2.IsSupported && destination.Length >= 2)
+ if (Avx512F.IsSupported && destination.Length >= 4)
+ {
+ // Divide by 4 as 4 elements per Vector4 and 16 per Vector512
+ ref Vector512 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination));
+ ref Vector512 destinationLast = ref Unsafe.Add(ref destinationBase, (uint)destination.Length / 4u);
+
+ ref Vector512 backgroundBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(background));
+ ref Vector512 sourceBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(source));
+ Vector512 opacity = Vector512.Create(amount);
+
+ while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast))
+ {
+ destinationBase = PorterDuffFunctions.MultiplySrc(backgroundBase, sourceBase, opacity);
+ destinationBase = ref Unsafe.Add(ref destinationBase, 1);
+ backgroundBase = ref Unsafe.Add(ref backgroundBase, 1);
+ sourceBase = ref Unsafe.Add(ref sourceBase, 1);
+ }
+
+ int remainder = Numerics.Modulo4(destination.Length);
+ if (remainder != 0)
+ {
+ for (int i = destination.Length - remainder; i < destination.Length; i++)
+ {
+ destination[i] = PorterDuffFunctions.MultiplySrc(background[i], source[i], amount);
+ }
+ }
+ }
+ else if (Avx2.IsSupported && destination.Length >= 2)
{
// Divide by 2 as 4 elements per Vector4 and 8 per Vector256
ref Vector256 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination));
@@ -272,7 +447,37 @@ internal static class DefaultPixelBlenders
{
amount = Numerics.Clamp(amount, 0, 1);
- if (Avx2.IsSupported && destination.Length >= 2)
+ if (Avx512F.IsSupported && destination.Length >= 4)
+ {
+ // Divide by 4 as 4 elements per Vector4 and 16 per Vector512
+ ref Vector512 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination));
+ ref Vector512 destinationLast = ref Unsafe.Add(ref destinationBase, (uint)destination.Length / 4u);
+
+ ref Vector512 backgroundBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(background));
+ Vector512 sourceBase = Vector512.Create(
+ source.X, source.Y, source.Z, source.W,
+ source.X, source.Y, source.Z, source.W,
+ source.X, source.Y, source.Z, source.W,
+ source.X, source.Y, source.Z, source.W);
+ Vector512 opacity = Vector512.Create(amount);
+
+ while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast))
+ {
+ destinationBase = PorterDuffFunctions.MultiplySrc(backgroundBase, sourceBase, opacity);
+ destinationBase = ref Unsafe.Add(ref destinationBase, 1);
+ backgroundBase = ref Unsafe.Add(ref backgroundBase, 1);
+ }
+
+ int remainder = Numerics.Modulo4(destination.Length);
+ if (remainder != 0)
+ {
+ for (int i = destination.Length - remainder; i < destination.Length; i++)
+ {
+ destination[i] = PorterDuffFunctions.MultiplySrc(background[i], source, amount);
+ }
+ }
+ }
+ else if (Avx2.IsSupported && destination.Length >= 2)
{
// Divide by 2 as 4 elements per Vector4 and 8 per Vector256
ref Vector256 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination));
@@ -308,7 +513,51 @@ internal static class DefaultPixelBlenders
///
protected override void BlendFunction(Span destination, ReadOnlySpan background, ReadOnlySpan source, ReadOnlySpan amount)
{
- if (Avx2.IsSupported && destination.Length >= 2)
+ if (Avx512F.IsSupported && destination.Length >= 4)
+ {
+ // Divide by 4 as 4 elements per Vector4 and 16 per Vector512
+ ref Vector512 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination));
+ ref Vector512 destinationLast = ref Unsafe.Add(ref destinationBase, (uint)destination.Length / 4u);
+
+ ref Vector512 backgroundBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(background));
+ ref Vector512 sourceBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(source));
+ ref float amountBase = ref MemoryMarshal.GetReference(amount);
+
+ Vector512 vOne = Vector512.Create(1F);
+
+ while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast))
+ {
+ float amount0 = amountBase;
+ float amount1 = Unsafe.Add(ref amountBase, 1);
+ float amount2 = Unsafe.Add(ref amountBase, 2);
+ float amount3 = Unsafe.Add(ref amountBase, 3);
+
+ // We need to create a Vector512 containing the current four amount values
+ // taking up each quarter of the Vector512 and then clamp them.
+ Vector512 opacity = Vector512.Create(
+ amount0, amount0, amount0, amount0,
+ amount1, amount1, amount1, amount1,
+ amount2, amount2, amount2, amount2,
+ amount3, amount3, amount3, amount3);
+ opacity = Vector512.Min(Vector512.Max(Vector512.Zero, opacity), vOne);
+
+ destinationBase = PorterDuffFunctions.MultiplySrc(backgroundBase, sourceBase, opacity);
+ destinationBase = ref Unsafe.Add(ref destinationBase, 1);
+ backgroundBase = ref Unsafe.Add(ref backgroundBase, 1);
+ sourceBase = ref Unsafe.Add(ref sourceBase, 1);
+ amountBase = ref Unsafe.Add(ref amountBase, 4);
+ }
+
+ int remainder = Numerics.Modulo4(destination.Length);
+ if (remainder != 0)
+ {
+ for (int i = destination.Length - remainder; i < destination.Length; i++)
+ {
+ destination[i] = PorterDuffFunctions.MultiplySrc(background[i], source[i], Numerics.Clamp(amount[i], 0, 1F));
+ }
+ }
+ }
+ else if (Avx2.IsSupported && destination.Length >= 2)
{
// Divide by 2 as 4 elements per Vector4 and 8 per Vector256
ref Vector256 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination));
@@ -355,7 +604,54 @@ internal static class DefaultPixelBlenders
///
protected override void BlendFunction(Span destination, ReadOnlySpan background, Vector4 source, ReadOnlySpan amount)
{
- if (Avx2.IsSupported && destination.Length >= 2)
+ if (Avx512F.IsSupported && destination.Length >= 4)
+ {
+ // Divide by 4 as 4 elements per Vector4 and 16 per Vector512
+ ref Vector512 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination));
+ ref Vector512 destinationLast = ref Unsafe.Add(ref destinationBase, (uint)destination.Length / 4u);
+
+ ref Vector512 backgroundBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(background));
+ ref float amountBase = ref MemoryMarshal.GetReference(amount);
+
+ Vector512 sourceBase = Vector512.Create(
+ source.X, source.Y, source.Z, source.W,
+ source.X, source.Y, source.Z, source.W,
+ source.X, source.Y, source.Z, source.W,
+ source.X, source.Y, source.Z, source.W);
+ Vector512 vOne = Vector512.Create(1F);
+
+ while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast))
+ {
+ float amount0 = amountBase;
+ float amount1 = Unsafe.Add(ref amountBase, 1);
+ float amount2 = Unsafe.Add(ref amountBase, 2);
+ float amount3 = Unsafe.Add(ref amountBase, 3);
+
+ // We need to create a Vector512 containing the current four amount values
+ // taking up each quarter of the Vector512 and then clamp them.
+ Vector512 opacity = Vector512.Create(
+ amount0, amount0, amount0, amount0,
+ amount1, amount1, amount1, amount1,
+ amount2, amount2, amount2, amount2,
+ amount3, amount3, amount3, amount3);
+ opacity = Vector512.Min(Vector512.Max(Vector512.Zero, opacity), vOne);
+
+ destinationBase = PorterDuffFunctions.MultiplySrc(backgroundBase, sourceBase, opacity);
+ destinationBase = ref Unsafe.Add(ref destinationBase, 1);
+ backgroundBase = ref Unsafe.Add(ref backgroundBase, 1);
+ amountBase = ref Unsafe.Add(ref amountBase, 4);
+ }
+
+ int remainder = Numerics.Modulo4(destination.Length);
+ if (remainder != 0)
+ {
+ for (int i = destination.Length - remainder; i < destination.Length; i++)
+ {
+ destination[i] = PorterDuffFunctions.MultiplySrc(background[i], source, Numerics.Clamp(amount[i], 0, 1F));
+ }
+ }
+ }
+ else if (Avx2.IsSupported && destination.Length >= 2)
{
// Divide by 2 as 4 elements per Vector4 and 8 per Vector256
ref Vector256 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination));
@@ -420,7 +716,34 @@ internal static class DefaultPixelBlenders
{
amount = Numerics.Clamp(amount, 0, 1);
- if (Avx2.IsSupported && destination.Length >= 2)
+ if (Avx512F.IsSupported && destination.Length >= 4)
+ {
+ // Divide by 4 as 4 elements per Vector4 and 16 per Vector512
+ ref Vector512 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination));
+ ref Vector512 destinationLast = ref Unsafe.Add(ref destinationBase, (uint)destination.Length / 4u);
+
+ ref Vector512 backgroundBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(background));
+ ref Vector512 sourceBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(source));
+ Vector512 opacity = Vector512.Create(amount);
+
+ while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast))
+ {
+ destinationBase = PorterDuffFunctions.AddSrc(backgroundBase, sourceBase, opacity);
+ destinationBase = ref Unsafe.Add(ref destinationBase, 1);
+ backgroundBase = ref Unsafe.Add(ref backgroundBase, 1);
+ sourceBase = ref Unsafe.Add(ref sourceBase, 1);
+ }
+
+ int remainder = Numerics.Modulo4(destination.Length);
+ if (remainder != 0)
+ {
+ for (int i = destination.Length - remainder; i < destination.Length; i++)
+ {
+ destination[i] = PorterDuffFunctions.AddSrc(background[i], source[i], amount);
+ }
+ }
+ }
+ else if (Avx2.IsSupported && destination.Length >= 2)
{
// Divide by 2 as 4 elements per Vector4 and 8 per Vector256
ref Vector256 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination));
@@ -459,7 +782,37 @@ internal static class DefaultPixelBlenders
{
amount = Numerics.Clamp(amount, 0, 1);
- if (Avx2.IsSupported && destination.Length >= 2)
+ if (Avx512F.IsSupported && destination.Length >= 4)
+ {
+ // Divide by 4 as 4 elements per Vector4 and 16 per Vector512
+ ref Vector512 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination));
+ ref Vector512 destinationLast = ref Unsafe.Add(ref destinationBase, (uint)destination.Length / 4u);
+
+ ref Vector512 backgroundBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(background));
+ Vector512 sourceBase = Vector512.Create(
+ source.X, source.Y, source.Z, source.W,
+ source.X, source.Y, source.Z, source.W,
+ source.X, source.Y, source.Z, source.W,
+ source.X, source.Y, source.Z, source.W);
+ Vector512 opacity = Vector512.Create(amount);
+
+ while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast))
+ {
+ destinationBase = PorterDuffFunctions.AddSrc(backgroundBase, sourceBase, opacity);
+ destinationBase = ref Unsafe.Add(ref destinationBase, 1);
+ backgroundBase = ref Unsafe.Add(ref backgroundBase, 1);
+ }
+
+ int remainder = Numerics.Modulo4(destination.Length);
+ if (remainder != 0)
+ {
+ for (int i = destination.Length - remainder; i < destination.Length; i++)
+ {
+ destination[i] = PorterDuffFunctions.AddSrc(background[i], source, amount);
+ }
+ }
+ }
+ else if (Avx2.IsSupported && destination.Length >= 2)
{
// Divide by 2 as 4 elements per Vector4 and 8 per Vector256
ref Vector256 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination));
@@ -495,7 +848,51 @@ internal static class DefaultPixelBlenders
///
protected override void BlendFunction(Span destination, ReadOnlySpan background, ReadOnlySpan source, ReadOnlySpan amount)
{
- if (Avx2.IsSupported && destination.Length >= 2)
+ if (Avx512F.IsSupported && destination.Length >= 4)
+ {
+ // Divide by 4 as 4 elements per Vector4 and 16 per Vector512
+ ref Vector512 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination));
+ ref Vector512 destinationLast = ref Unsafe.Add(ref destinationBase, (uint)destination.Length / 4u);
+
+ ref Vector512 backgroundBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(background));
+ ref Vector512 sourceBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(source));
+ ref float amountBase = ref MemoryMarshal.GetReference(amount);
+
+ Vector512 vOne = Vector512.Create(1F);
+
+ while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast))
+ {
+ float amount0 = amountBase;
+ float amount1 = Unsafe.Add(ref amountBase, 1);
+ float amount2 = Unsafe.Add(ref amountBase, 2);
+ float amount3 = Unsafe.Add(ref amountBase, 3);
+
+ // We need to create a Vector512 containing the current four amount values
+ // taking up each quarter of the Vector512 and then clamp them.
+ Vector512 opacity = Vector512.Create(
+ amount0, amount0, amount0, amount0,
+ amount1, amount1, amount1, amount1,
+ amount2, amount2, amount2, amount2,
+ amount3, amount3, amount3, amount3);
+ opacity = Vector512.Min(Vector512.Max(Vector512.Zero, opacity), vOne);
+
+ destinationBase = PorterDuffFunctions.AddSrc(backgroundBase, sourceBase, opacity);
+ destinationBase = ref Unsafe.Add(ref destinationBase, 1);
+ backgroundBase = ref Unsafe.Add(ref backgroundBase, 1);
+ sourceBase = ref Unsafe.Add(ref sourceBase, 1);
+ amountBase = ref Unsafe.Add(ref amountBase, 4);
+ }
+
+ int remainder = Numerics.Modulo4(destination.Length);
+ if (remainder != 0)
+ {
+ for (int i = destination.Length - remainder; i < destination.Length; i++)
+ {
+ destination[i] = PorterDuffFunctions.AddSrc(background[i], source[i], Numerics.Clamp(amount[i], 0, 1F));
+ }
+ }
+ }
+ else if (Avx2.IsSupported && destination.Length >= 2)
{
// Divide by 2 as 4 elements per Vector4 and 8 per Vector256
ref Vector256 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination));
@@ -542,7 +939,54 @@ internal static class DefaultPixelBlenders
///
protected override void BlendFunction(Span destination, ReadOnlySpan background, Vector4 source, ReadOnlySpan amount)
{
- if (Avx2.IsSupported && destination.Length >= 2)
+ if (Avx512F.IsSupported && destination.Length >= 4)
+ {
+ // Divide by 4 as 4 elements per Vector4 and 16 per Vector512
+ ref Vector512 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination));
+ ref Vector512 destinationLast = ref Unsafe.Add(ref destinationBase, (uint)destination.Length / 4u);
+
+ ref Vector512 backgroundBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(background));
+ ref float amountBase = ref MemoryMarshal.GetReference(amount);
+
+ Vector512 sourceBase = Vector512.Create(
+ source.X, source.Y, source.Z, source.W,
+ source.X, source.Y, source.Z, source.W,
+ source.X, source.Y, source.Z, source.W,
+ source.X, source.Y, source.Z, source.W);
+ Vector512 vOne = Vector512.Create(1F);
+
+ while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast))
+ {
+ float amount0 = amountBase;
+ float amount1 = Unsafe.Add(ref amountBase, 1);
+ float amount2 = Unsafe.Add(ref amountBase, 2);
+ float amount3 = Unsafe.Add(ref amountBase, 3);
+
+ // We need to create a Vector512 containing the current four amount values
+ // taking up each quarter of the Vector512 and then clamp them.
+ Vector512 opacity = Vector512.Create(
+ amount0, amount0, amount0, amount0,
+ amount1, amount1, amount1, amount1,
+ amount2, amount2, amount2, amount2,
+ amount3, amount3, amount3, amount3);
+ opacity = Vector512.Min(Vector512.Max(Vector512.Zero, opacity), vOne);
+
+ destinationBase = PorterDuffFunctions.AddSrc(backgroundBase, sourceBase, opacity);
+ destinationBase = ref Unsafe.Add(ref destinationBase, 1);
+ backgroundBase = ref Unsafe.Add(ref backgroundBase, 1);
+ amountBase = ref Unsafe.Add(ref amountBase, 4);
+ }
+
+ int remainder = Numerics.Modulo4(destination.Length);
+ if (remainder != 0)
+ {
+ for (int i = destination.Length - remainder; i < destination.Length; i++)
+ {
+ destination[i] = PorterDuffFunctions.AddSrc(background[i], source, Numerics.Clamp(amount[i], 0, 1F));
+ }
+ }
+ }
+ else if (Avx2.IsSupported && destination.Length >= 2)
{
// Divide by 2 as 4 elements per Vector4 and 8 per Vector256
ref Vector256 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination));
@@ -607,7 +1051,34 @@ internal static class DefaultPixelBlenders
{
amount = Numerics.Clamp(amount, 0, 1);
- if (Avx2.IsSupported && destination.Length >= 2)
+ if (Avx512F.IsSupported && destination.Length >= 4)
+ {
+ // Divide by 4 as 4 elements per Vector4 and 16 per Vector512
+ ref Vector512 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination));
+ ref Vector512 destinationLast = ref Unsafe.Add(ref destinationBase, (uint)destination.Length / 4u);
+
+ ref Vector512 backgroundBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(background));
+ ref Vector512 sourceBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(source));
+ Vector512 opacity = Vector512.Create(amount);
+
+ while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast))
+ {
+ destinationBase = PorterDuffFunctions.SubtractSrc(backgroundBase, sourceBase, opacity);
+ destinationBase = ref Unsafe.Add(ref destinationBase, 1);
+ backgroundBase = ref Unsafe.Add(ref backgroundBase, 1);
+ sourceBase = ref Unsafe.Add(ref sourceBase, 1);
+ }
+
+ int remainder = Numerics.Modulo4(destination.Length);
+ if (remainder != 0)
+ {
+ for (int i = destination.Length - remainder; i < destination.Length; i++)
+ {
+ destination[i] = PorterDuffFunctions.SubtractSrc(background[i], source[i], amount);
+ }
+ }
+ }
+ else if (Avx2.IsSupported && destination.Length >= 2)
{
// Divide by 2 as 4 elements per Vector4 and 8 per Vector256
ref Vector256 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination));
@@ -646,7 +1117,37 @@ internal static class DefaultPixelBlenders
{
amount = Numerics.Clamp(amount, 0, 1);
- if (Avx2.IsSupported && destination.Length >= 2)
+ if (Avx512F.IsSupported && destination.Length >= 4)
+ {
+ // Divide by 4 as 4 elements per Vector4 and 16 per Vector512
+ ref Vector512 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination));
+ ref Vector512 destinationLast = ref Unsafe.Add(ref destinationBase, (uint)destination.Length / 4u);
+
+ ref Vector512 backgroundBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(background));
+ Vector512 sourceBase = Vector512.Create(
+ source.X, source.Y, source.Z, source.W,
+ source.X, source.Y, source.Z, source.W,
+ source.X, source.Y, source.Z, source.W,
+ source.X, source.Y, source.Z, source.W);
+ Vector512 opacity = Vector512.Create(amount);
+
+ while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast))
+ {
+ destinationBase = PorterDuffFunctions.SubtractSrc(backgroundBase, sourceBase, opacity);
+ destinationBase = ref Unsafe.Add(ref destinationBase, 1);
+ backgroundBase = ref Unsafe.Add(ref backgroundBase, 1);
+ }
+
+ int remainder = Numerics.Modulo4(destination.Length);
+ if (remainder != 0)
+ {
+ for (int i = destination.Length - remainder; i < destination.Length; i++)
+ {
+ destination[i] = PorterDuffFunctions.SubtractSrc(background[i], source, amount);
+ }
+ }
+ }
+ else if (Avx2.IsSupported && destination.Length >= 2)
{
// Divide by 2 as 4 elements per Vector4 and 8 per Vector256
ref Vector256 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination));
@@ -682,7 +1183,51 @@ internal static class DefaultPixelBlenders
///
protected override void BlendFunction(Span destination, ReadOnlySpan background, ReadOnlySpan source, ReadOnlySpan amount)
{
- if (Avx2.IsSupported && destination.Length >= 2)
+ if (Avx512F.IsSupported && destination.Length >= 4)
+ {
+ // Divide by 4 as 4 elements per Vector4 and 16 per Vector512
+ ref Vector512 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination));
+ ref Vector512 destinationLast = ref Unsafe.Add(ref destinationBase, (uint)destination.Length / 4u);
+
+ ref Vector512 backgroundBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(background));
+ ref Vector512 sourceBase = ref Unsafe.As