From a9c43dc8007f12ff5fae38d6f37c070027a9cc8b Mon Sep 17 00:00:00 2001 From: James Jackson-South Date: Fri, 10 Apr 2026 13:14:45 +1000 Subject: [PATCH 01/12] Convolution Replace parallel row iteration with sequential loops --- .../Convolution/BokehBlurProcessor{TPixel}.cs | 114 +++++++++--------- .../Convolution2DProcessor{TPixel}.cs | 16 ++- .../Convolution2PassProcessor{TPixel}.cs | 33 +++-- .../ConvolutionProcessor{TPixel}.cs | 16 ++- .../EdgeDetectorCompassProcessor{TPixel}.cs | 12 +- ...ilterProcessor_BikeGrayscale_R16_C1_G3.png | 4 +- ...ilterProcessor_BikeGrayscale_R16_C2_G3.png | 4 +- ...FilterProcessor_BikeGrayscale_R8_C1_G1.png | 4 +- ...okehBlurFilterProcessor_Bike_R16_C1_G3.png | 4 +- ...okehBlurFilterProcessor_Bike_R16_C2_G3.png | 4 +- ...BokehBlurFilterProcessor_Bike_R8_C1_G1.png | 4 +- ...rProcessor_CalliphoraPartial_R16_C1_G3.png | 4 +- ...rProcessor_CalliphoraPartial_R16_C2_G3.png | 4 +- ...erProcessor_CalliphoraPartial_R8_C1_G1.png | 4 +- ...sor_Solid50x50_(255,0,0,255)_R16_C1_G3.png | 4 +- ...sor_Solid50x50_(255,0,0,255)_R16_C2_G3.png | 4 +- ...ssor_Solid50x50_(255,0,0,255)_R8_C1_G1.png | 4 +- ...Processor_TestPattern200x100_R16_C1_G3.png | 4 +- ...Processor_TestPattern200x100_R16_C2_G3.png | 4 +- ...rProcessor_TestPattern200x100_R8_C1_G1.png | 4 +- ...erProcessor_TestPattern23x31_R16_C1_G3.png | 4 +- ...erProcessor_TestPattern23x31_R16_C2_G3.png | 4 +- ...terProcessor_TestPattern23x31_R8_C1_G1.png | 4 +- ...erProcessor_TestPattern30x20_R16_C1_G3.png | 4 +- ...erProcessor_TestPattern30x20_R16_C2_G3.png | 4 +- ...terProcessor_TestPattern30x20_R8_C1_G1.png | 4 +- ...kehBlurFilterProcessor_cross_R16_C1_G3.png | 4 +- ...kehBlurFilterProcessor_cross_R16_C2_G3.png | 4 +- ...okehBlurFilterProcessor_cross_R8_C1_G1.png | 4 +- 29 files changed, 164 insertions(+), 123 deletions(-) diff --git a/src/ImageSharp/Processing/Processors/Convolution/BokehBlurProcessor{TPixel}.cs b/src/ImageSharp/Processing/Processors/Convolution/BokehBlurProcessor{TPixel}.cs index a96fa1993e..426544d692 100644 --- a/src/ImageSharp/Processing/Processors/Convolution/BokehBlurProcessor{TPixel}.cs +++ b/src/ImageSharp/Processing/Processors/Convolution/BokehBlurProcessor{TPixel}.cs @@ -1,10 +1,12 @@ // Copyright (c) Six Labors. // Licensed under the Six Labors Split License. +using System.Buffers; using System.Numerics; using System.Runtime.CompilerServices; using System.Runtime.InteropServices; using SixLabors.ImageSharp.Advanced; +using SixLabors.ImageSharp.ColorProfiles.Companding; using SixLabors.ImageSharp.Memory; using SixLabors.ImageSharp.PixelFormats; using SixLabors.ImageSharp.Processing.Processors.Convolution.Parameters; @@ -77,22 +79,36 @@ internal class BokehBlurProcessor : ImageProcessor { Rectangle sourceRectangle = Rectangle.Intersect(this.SourceRectangle, source.Bounds); + MemoryAllocator allocator = this.Configuration.MemoryAllocator; + + // Convolution is memory-bandwidth-bound with low arithmetic intensity. + // Parallelization degrades performance due to cache line contention from + // overlapping source row reads. See #3111. + // Preliminary gamma highlight pass if (this.gamma == 3F) { ApplyGamma3ExposureRowOperation gammaOperation = new(sourceRectangle, source.PixelBuffer, this.Configuration); - ParallelRowIterator.IterateRows( - this.Configuration, - sourceRectangle, - in gammaOperation); + + using IMemoryOwner gammaBuffer = allocator.Allocate(gammaOperation.GetRequiredBufferLength(sourceRectangle)); + Span gammaSpan = gammaBuffer.Memory.Span; + + for (int y = sourceRectangle.Top; y < sourceRectangle.Bottom; y++) + { + gammaOperation.Invoke(y, gammaSpan); + } } else { ApplyGammaExposureRowOperation gammaOperation = new(sourceRectangle, source.PixelBuffer, this.Configuration, this.gamma); - ParallelRowIterator.IterateRows( - this.Configuration, - sourceRectangle, - in gammaOperation); + + using IMemoryOwner gammaBuffer = allocator.Allocate(gammaOperation.GetRequiredBufferLength(sourceRectangle)); + Span gammaSpan = gammaBuffer.Memory.Span; + + for (int y = sourceRectangle.Top; y < sourceRectangle.Bottom; y++) + { + gammaOperation.Invoke(y, gammaSpan); + } } // Create a 0-filled buffer to use to store the result of the component convolutions @@ -105,18 +121,20 @@ internal class BokehBlurProcessor : ImageProcessor if (this.gamma == 3F) { ApplyInverseGamma3ExposureRowOperation operation = new(sourceRectangle, source.PixelBuffer, processingBuffer, this.Configuration); - ParallelRowIterator.IterateRows( - this.Configuration, - sourceRectangle, - in operation); + + for (int y = sourceRectangle.Top; y < sourceRectangle.Bottom; y++) + { + operation.Invoke(y); + } } else { - ApplyInverseGammaExposureRowOperation operation = new(sourceRectangle, source.PixelBuffer, processingBuffer, this.Configuration, 1 / this.gamma); - ParallelRowIterator.IterateRows( - this.Configuration, - sourceRectangle, - in operation); + ApplyInverseGammaExposureRowOperation operation = new(sourceRectangle, source.PixelBuffer, processingBuffer, this.Configuration, this.gamma); + + for (int y = sourceRectangle.Top; y < sourceRectangle.Bottom; y++) + { + operation.Invoke(y); + } } } @@ -169,10 +187,15 @@ internal class BokehBlurProcessor : ImageProcessor kernel, configuration); - ParallelRowIterator.IterateRows( - configuration, - sourceRectangle, - in horizontalOperation); + using (IMemoryOwner hBuffer = configuration.MemoryAllocator.Allocate(horizontalOperation.GetRequiredBufferLength(sourceRectangle))) + { + Span hSpan = hBuffer.Memory.Span; + + for (int y = sourceRectangle.Top; y < sourceRectangle.Bottom; y++) + { + horizontalOperation.Invoke(y, hSpan); + } + } // Vertical 1D convolutions to accumulate the partial results on the target buffer BokehBlurProcessor.SecondPassConvolutionRowOperation verticalOperation = new( @@ -184,10 +207,10 @@ internal class BokehBlurProcessor : ImageProcessor parameters.Z, parameters.W); - ParallelRowIterator.IterateRows( - configuration, - sourceRectangle, - in verticalOperation); + for (int y = sourceRectangle.Top; y < sourceRectangle.Bottom; y++) + { + verticalOperation.Invoke(y); + } } } @@ -305,15 +328,9 @@ internal class BokehBlurProcessor : ImageProcessor { Span targetRowSpan = this.targetPixels.DangerousGetRowSpan(y)[this.bounds.X..]; PixelOperations.Instance.ToVector4(this.configuration, targetRowSpan[..span.Length], span, PixelConversionModifiers.Premultiply); - ref Vector4 baseRef = ref MemoryMarshal.GetReference(span); - for (int x = 0; x < this.bounds.Width; x++) - { - ref Vector4 v = ref Unsafe.Add(ref baseRef, (uint)x); - v.X = MathF.Pow(v.X, this.gamma); - v.Y = MathF.Pow(v.Y, this.gamma); - v.Z = MathF.Pow(v.Z, this.gamma); - } + // Input is premultiplied [0,1] so the LUT is safe here. + GammaCompanding.Expand(span[..this.bounds.Width], this.gamma); PixelOperations.Instance.FromVector4Destructive(this.configuration, span, targetRowSpan); } @@ -367,7 +384,7 @@ internal class BokehBlurProcessor : ImageProcessor private readonly Buffer2D targetPixels; private readonly Buffer2D sourceValues; private readonly Configuration configuration; - private readonly float inverseGamma; + private readonly float gamma; [MethodImpl(InliningOptions.ShortMethod)] public ApplyInverseGammaExposureRowOperation( @@ -375,36 +392,26 @@ internal class BokehBlurProcessor : ImageProcessor Buffer2D targetPixels, Buffer2D sourceValues, Configuration configuration, - float inverseGamma) + float gamma) { this.bounds = bounds; this.targetPixels = targetPixels; this.sourceValues = sourceValues; this.configuration = configuration; - this.inverseGamma = inverseGamma; + this.gamma = gamma; } /// [MethodImpl(InliningOptions.ShortMethod)] public void Invoke(int y) { - Vector4 low = Vector4.Zero; - Vector4 high = new(float.PositiveInfinity, float.PositiveInfinity, float.PositiveInfinity, float.PositiveInfinity); - Span targetPixelSpan = this.targetPixels.DangerousGetRowSpan(y)[this.bounds.X..]; - Span sourceRowSpan = this.sourceValues.DangerousGetRowSpan(y)[this.bounds.X..]; - ref Vector4 sourceRef = ref MemoryMarshal.GetReference(sourceRowSpan); + Span sourceRowSpan = this.sourceValues.DangerousGetRowSpan(y).Slice(this.bounds.X, this.bounds.Width); - for (int x = 0; x < this.bounds.Width; x++) - { - ref Vector4 v = ref Unsafe.Add(ref sourceRef, (uint)x); - Vector4 clamp = Numerics.Clamp(v, low, high); - v.X = MathF.Pow(clamp.X, this.inverseGamma); - v.Y = MathF.Pow(clamp.Y, this.inverseGamma); - v.Z = MathF.Pow(clamp.Z, this.inverseGamma); - } + Numerics.Clamp(MemoryMarshal.Cast(sourceRowSpan), 0, 1F); + GammaCompanding.Compress(sourceRowSpan, this.gamma); - PixelOperations.Instance.FromVector4Destructive(this.configuration, sourceRowSpan[..this.bounds.Width], targetPixelSpan, PixelConversionModifiers.Premultiply); + PixelOperations.Instance.FromVector4Destructive(this.configuration, sourceRowSpan, targetPixelSpan, PixelConversionModifiers.Premultiply); } } @@ -433,17 +440,16 @@ internal class BokehBlurProcessor : ImageProcessor /// [MethodImpl(InliningOptions.ShortMethod)] - public unsafe void Invoke(int y) + public void Invoke(int y) { Span sourceRowSpan = this.sourceValues.DangerousGetRowSpan(y).Slice(this.bounds.X, this.bounds.Width); - ref Vector4 sourceRef = ref MemoryMarshal.GetReference(sourceRowSpan); - Numerics.Clamp(MemoryMarshal.Cast(sourceRowSpan), 0, float.PositiveInfinity); + Numerics.Clamp(MemoryMarshal.Cast(sourceRowSpan), 0, 1F); Numerics.CubeRootOnXYZ(sourceRowSpan); Span targetPixelSpan = this.targetPixels.DangerousGetRowSpan(y)[this.bounds.X..]; - PixelOperations.Instance.FromVector4Destructive(this.configuration, sourceRowSpan[..this.bounds.Width], targetPixelSpan, PixelConversionModifiers.Premultiply); + PixelOperations.Instance.FromVector4Destructive(this.configuration, sourceRowSpan, targetPixelSpan, PixelConversionModifiers.Premultiply); } } } diff --git a/src/ImageSharp/Processing/Processors/Convolution/Convolution2DProcessor{TPixel}.cs b/src/ImageSharp/Processing/Processors/Convolution/Convolution2DProcessor{TPixel}.cs index 02e06db494..4ed6934a20 100644 --- a/src/ImageSharp/Processing/Processors/Convolution/Convolution2DProcessor{TPixel}.cs +++ b/src/ImageSharp/Processing/Processors/Convolution/Convolution2DProcessor{TPixel}.cs @@ -1,8 +1,8 @@ // Copyright (c) Six Labors. // Licensed under the Six Labors Split License. +using System.Buffers; using System.Numerics; -using SixLabors.ImageSharp.Advanced; using SixLabors.ImageSharp.Memory; using SixLabors.ImageSharp.PixelFormats; @@ -79,10 +79,16 @@ internal class Convolution2DProcessor : ImageProcessor this.Configuration, this.PreserveAlpha); - ParallelRowIterator.IterateRows, Vector4>( - this.Configuration, - interest, - in operation); + // Convolution is memory-bandwidth-bound with low arithmetic intensity. + // Parallelization degrades performance due to cache line contention from + // overlapping source row reads. See #3111. + using IMemoryOwner buffer = allocator.Allocate(operation.GetRequiredBufferLength(interest)); + Span span = buffer.Memory.Span; + + for (int y = interest.Top; y < interest.Bottom; y++) + { + operation.Invoke(y, span); + } } Buffer2D.SwapOrCopyContent(source.PixelBuffer, targetPixels); diff --git a/src/ImageSharp/Processing/Processors/Convolution/Convolution2PassProcessor{TPixel}.cs b/src/ImageSharp/Processing/Processors/Convolution/Convolution2PassProcessor{TPixel}.cs index 1bbbdb3501..6d1d7c23c2 100644 --- a/src/ImageSharp/Processing/Processors/Convolution/Convolution2PassProcessor{TPixel}.cs +++ b/src/ImageSharp/Processing/Processors/Convolution/Convolution2PassProcessor{TPixel}.cs @@ -1,6 +1,7 @@ // Copyright (c) Six Labors. // Licensed under the Six Labors Split License. +using System.Buffers; using System.Numerics; using System.Runtime.CompilerServices; using System.Runtime.InteropServices; @@ -106,6 +107,12 @@ internal class Convolution2PassProcessor : ImageProcessor mapXY.BuildSamplingOffsetMap(this.KernelX.Length, this.KernelX.Length, interest, this.BorderWrapModeX, this.BorderWrapModeY); + MemoryAllocator allocator = this.Configuration.MemoryAllocator; + + // Convolution is memory-bandwidth-bound with low arithmetic intensity. + // Parallelization degrades performance due to cache line contention from + // overlapping source row reads. See #3111. + // Horizontal convolution HorizontalConvolutionRowOperation horizontalOperation = new( interest, @@ -116,10 +123,15 @@ internal class Convolution2PassProcessor : ImageProcessor this.Configuration, this.PreserveAlpha); - ParallelRowIterator.IterateRows( - this.Configuration, - interest, - in horizontalOperation); + using (IMemoryOwner hBuffer = allocator.Allocate(horizontalOperation.GetRequiredBufferLength(interest))) + { + Span hSpan = hBuffer.Memory.Span; + + for (int y = interest.Top; y < interest.Bottom; y++) + { + horizontalOperation.Invoke(y, hSpan); + } + } // Vertical convolution VerticalConvolutionRowOperation verticalOperation = new( @@ -131,10 +143,15 @@ internal class Convolution2PassProcessor : ImageProcessor this.Configuration, this.PreserveAlpha); - ParallelRowIterator.IterateRows( - this.Configuration, - interest, - in verticalOperation); + using (IMemoryOwner vBuffer = allocator.Allocate(verticalOperation.GetRequiredBufferLength(interest))) + { + Span vSpan = vBuffer.Memory.Span; + + for (int y = interest.Top; y < interest.Bottom; y++) + { + verticalOperation.Invoke(y, vSpan); + } + } } /// diff --git a/src/ImageSharp/Processing/Processors/Convolution/ConvolutionProcessor{TPixel}.cs b/src/ImageSharp/Processing/Processors/Convolution/ConvolutionProcessor{TPixel}.cs index feaaf30ce0..b704f556fa 100644 --- a/src/ImageSharp/Processing/Processors/Convolution/ConvolutionProcessor{TPixel}.cs +++ b/src/ImageSharp/Processing/Processors/Convolution/ConvolutionProcessor{TPixel}.cs @@ -1,6 +1,7 @@ // Copyright (c) Six Labors. // Licensed under the Six Labors Split License. +using System.Buffers; using System.Numerics; using System.Runtime.CompilerServices; using System.Runtime.InteropServices; @@ -96,10 +97,17 @@ internal class ConvolutionProcessor : ImageProcessor map.BuildSamplingOffsetMap(this.KernelXY.Rows, this.KernelXY.Columns, interest, this.BorderWrapModeX, this.BorderWrapModeY); RowOperation operation = new(interest, targetPixels, source.PixelBuffer, map, this.KernelXY, this.Configuration, this.PreserveAlpha); - ParallelRowIterator.IterateRows( - this.Configuration, - interest, - in operation); + + // Convolution is memory-bandwidth-bound with low arithmetic intensity. + // Parallelization degrades performance due to cache line contention from + // overlapping source row reads. See #3111. + using IMemoryOwner buffer = allocator.Allocate(operation.GetRequiredBufferLength(interest)); + Span span = buffer.Memory.Span; + + for (int y = interest.Top; y < interest.Bottom; y++) + { + operation.Invoke(y, span); + } } Buffer2D.SwapOrCopyContent(source.PixelBuffer, targetPixels); diff --git a/src/ImageSharp/Processing/Processors/Convolution/EdgeDetectorCompassProcessor{TPixel}.cs b/src/ImageSharp/Processing/Processors/Convolution/EdgeDetectorCompassProcessor{TPixel}.cs index eae7481661..f1edb75a20 100644 --- a/src/ImageSharp/Processing/Processors/Convolution/EdgeDetectorCompassProcessor{TPixel}.cs +++ b/src/ImageSharp/Processing/Processors/Convolution/EdgeDetectorCompassProcessor{TPixel}.cs @@ -83,11 +83,15 @@ internal class EdgeDetectorCompassProcessor : ImageProcessor processor.Apply(pass); } + // Convolution is memory-bandwidth-bound with low arithmetic intensity. + // Parallelization degrades performance due to cache line contention from + // overlapping source row reads. See #3111. RowOperation operation = new(source.PixelBuffer, pass.PixelBuffer, interest); - ParallelRowIterator.IterateRows( - this.Configuration, - interest, - in operation); + + for (int y = interest.Top; y < interest.Bottom; y++) + { + operation.Invoke(y); + } } } diff --git a/tests/Images/External/ReferenceOutput/BokehBlurTest/BokehBlurFilterProcessor_BikeGrayscale_R16_C1_G3.png b/tests/Images/External/ReferenceOutput/BokehBlurTest/BokehBlurFilterProcessor_BikeGrayscale_R16_C1_G3.png index 564a76f90c..3b4c12a24a 100644 --- a/tests/Images/External/ReferenceOutput/BokehBlurTest/BokehBlurFilterProcessor_BikeGrayscale_R16_C1_G3.png +++ b/tests/Images/External/ReferenceOutput/BokehBlurTest/BokehBlurFilterProcessor_BikeGrayscale_R16_C1_G3.png @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:b3a6e47d880b7276702e6bf4b1ba1b4781abfa2cd99ee9321a56169440fc318c -size 49844 +oid sha256:0c047a274060b1bf73af9c922bff4f2ea627bd0fad023b3c3a5852f49d026c6b +size 50025 diff --git a/tests/Images/External/ReferenceOutput/BokehBlurTest/BokehBlurFilterProcessor_BikeGrayscale_R16_C2_G3.png b/tests/Images/External/ReferenceOutput/BokehBlurTest/BokehBlurFilterProcessor_BikeGrayscale_R16_C2_G3.png index 4f2dc77b06..7dad93fba1 100644 --- a/tests/Images/External/ReferenceOutput/BokehBlurTest/BokehBlurFilterProcessor_BikeGrayscale_R16_C2_G3.png +++ b/tests/Images/External/ReferenceOutput/BokehBlurTest/BokehBlurFilterProcessor_BikeGrayscale_R16_C2_G3.png @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:18b4837dceb9a065c5b27133b67ef257a0f050fa1342d02a7e06d3501e068f47 -size 44966 +oid sha256:aa1b0b58ddb6a5c29ccd78f1bc2e773a2005493b92efe5c0deb8056af6fc0208 +size 45605 diff --git a/tests/Images/External/ReferenceOutput/BokehBlurTest/BokehBlurFilterProcessor_BikeGrayscale_R8_C1_G1.png b/tests/Images/External/ReferenceOutput/BokehBlurTest/BokehBlurFilterProcessor_BikeGrayscale_R8_C1_G1.png index 57ce8295ee..a9b287c6b1 100644 --- a/tests/Images/External/ReferenceOutput/BokehBlurTest/BokehBlurFilterProcessor_BikeGrayscale_R8_C1_G1.png +++ b/tests/Images/External/ReferenceOutput/BokehBlurTest/BokehBlurFilterProcessor_BikeGrayscale_R8_C1_G1.png @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:e652706e1aec475c8eec08392d65be1bd888cc79eccc4019eb3826db71c6dac0 -size 54744 +oid sha256:ead63aaaf5b185342f83317c2400b7ada29a82223e35e9a7382f690101ae40f7 +size 54798 diff --git a/tests/Images/External/ReferenceOutput/BokehBlurTest/BokehBlurFilterProcessor_Bike_R16_C1_G3.png b/tests/Images/External/ReferenceOutput/BokehBlurTest/BokehBlurFilterProcessor_Bike_R16_C1_G3.png index a0e9c1b248..a4ac9718aa 100644 --- a/tests/Images/External/ReferenceOutput/BokehBlurTest/BokehBlurFilterProcessor_Bike_R16_C1_G3.png +++ b/tests/Images/External/ReferenceOutput/BokehBlurTest/BokehBlurFilterProcessor_Bike_R16_C1_G3.png @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:ffc82427285a2bfeb3226e3e87ac316e41a2a8f853bb406b88ba2ae7bfae099d -size 164923 +oid sha256:025751d22eeeac10878e79d2bf1987e0a055cf9f54903048673189a385036744 +size 157237 diff --git a/tests/Images/External/ReferenceOutput/BokehBlurTest/BokehBlurFilterProcessor_Bike_R16_C2_G3.png b/tests/Images/External/ReferenceOutput/BokehBlurTest/BokehBlurFilterProcessor_Bike_R16_C2_G3.png index caf152f9b9..2c18a147dc 100644 --- a/tests/Images/External/ReferenceOutput/BokehBlurTest/BokehBlurFilterProcessor_Bike_R16_C2_G3.png +++ b/tests/Images/External/ReferenceOutput/BokehBlurTest/BokehBlurFilterProcessor_Bike_R16_C2_G3.png @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:2ea08065be7271e7ff75ed49e63544701bc14a03bd44d754fb056370559dd105 -size 149483 +oid sha256:77cae565842c0c8b0314d1c10c72f4b52483d002c614a33734b534f743d06b5d +size 146574 diff --git a/tests/Images/External/ReferenceOutput/BokehBlurTest/BokehBlurFilterProcessor_Bike_R8_C1_G1.png b/tests/Images/External/ReferenceOutput/BokehBlurTest/BokehBlurFilterProcessor_Bike_R8_C1_G1.png index 5ad60a4bf9..0e31e223b2 100644 --- a/tests/Images/External/ReferenceOutput/BokehBlurTest/BokehBlurFilterProcessor_Bike_R8_C1_G1.png +++ b/tests/Images/External/ReferenceOutput/BokehBlurTest/BokehBlurFilterProcessor_Bike_R8_C1_G1.png @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:7a61bca9de7e0b6bf6f88c4d01b9e3f1611f6866db10e4a1168550ab84804f42 -size 178531 +oid sha256:223027dcd3a8e5ded5f98b3aa47b0e18bc40b569e47f6bfdab1126f4420e5977 +size 171283 diff --git a/tests/Images/External/ReferenceOutput/BokehBlurTest/BokehBlurFilterProcessor_CalliphoraPartial_R16_C1_G3.png b/tests/Images/External/ReferenceOutput/BokehBlurTest/BokehBlurFilterProcessor_CalliphoraPartial_R16_C1_G3.png index d1b15eb8e3..be4b535c78 100644 --- a/tests/Images/External/ReferenceOutput/BokehBlurTest/BokehBlurFilterProcessor_CalliphoraPartial_R16_C1_G3.png +++ b/tests/Images/External/ReferenceOutput/BokehBlurTest/BokehBlurFilterProcessor_CalliphoraPartial_R16_C1_G3.png @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:4e2f6c6c42ddd15c0730edcd402bf8fd05a1116cb72495e3156a545988ff0230 -size 85901 +oid sha256:db64cd4e0369fa2db33aa1f21a94466ca39a191b0e0adbbbfbcbca44284c61ab +size 86151 diff --git a/tests/Images/External/ReferenceOutput/BokehBlurTest/BokehBlurFilterProcessor_CalliphoraPartial_R16_C2_G3.png b/tests/Images/External/ReferenceOutput/BokehBlurTest/BokehBlurFilterProcessor_CalliphoraPartial_R16_C2_G3.png index 3acb240b32..5ddb7ce12b 100644 --- a/tests/Images/External/ReferenceOutput/BokehBlurTest/BokehBlurFilterProcessor_CalliphoraPartial_R16_C2_G3.png +++ b/tests/Images/External/ReferenceOutput/BokehBlurTest/BokehBlurFilterProcessor_CalliphoraPartial_R16_C2_G3.png @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:8193ce8690dbb99517cd56e5a866268c0a5c971c992058afb818393658781b91 -size 77762 +oid sha256:afc3a020178a68715ab0dafa30ee1db81630b760fde3df3271048acc627f178c +size 77614 diff --git a/tests/Images/External/ReferenceOutput/BokehBlurTest/BokehBlurFilterProcessor_CalliphoraPartial_R8_C1_G1.png b/tests/Images/External/ReferenceOutput/BokehBlurTest/BokehBlurFilterProcessor_CalliphoraPartial_R8_C1_G1.png index 335375205e..db03e0fb43 100644 --- a/tests/Images/External/ReferenceOutput/BokehBlurTest/BokehBlurFilterProcessor_CalliphoraPartial_R8_C1_G1.png +++ b/tests/Images/External/ReferenceOutput/BokehBlurTest/BokehBlurFilterProcessor_CalliphoraPartial_R8_C1_G1.png @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:412c302f92500d855658d466aa1a686386ed998f6a3d5fe7326cd148a6f829ae -size 116229 +oid sha256:5ea7e0c3e16acf40491354191278a09af2db5ac6e0e7b92ba377f8fc9d52e305 +size 117067 diff --git a/tests/Images/External/ReferenceOutput/BokehBlurTest/BokehBlurFilterProcessor_Solid50x50_(255,0,0,255)_R16_C1_G3.png b/tests/Images/External/ReferenceOutput/BokehBlurTest/BokehBlurFilterProcessor_Solid50x50_(255,0,0,255)_R16_C1_G3.png index ffa9624d04..4d9777bd87 100644 --- a/tests/Images/External/ReferenceOutput/BokehBlurTest/BokehBlurFilterProcessor_Solid50x50_(255,0,0,255)_R16_C1_G3.png +++ b/tests/Images/External/ReferenceOutput/BokehBlurTest/BokehBlurFilterProcessor_Solid50x50_(255,0,0,255)_R16_C1_G3.png @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:d753a7dc732d6f01f7bce8c6de50d70158639aa5e0eb18d168e417fe36492731 -size 135 +oid sha256:ba9b046556b04556632f4b651dc9d8ebc4d27699e53567179ef15a850567fee9 +size 85 diff --git a/tests/Images/External/ReferenceOutput/BokehBlurTest/BokehBlurFilterProcessor_Solid50x50_(255,0,0,255)_R16_C2_G3.png b/tests/Images/External/ReferenceOutput/BokehBlurTest/BokehBlurFilterProcessor_Solid50x50_(255,0,0,255)_R16_C2_G3.png index ffa9624d04..4d9777bd87 100644 --- a/tests/Images/External/ReferenceOutput/BokehBlurTest/BokehBlurFilterProcessor_Solid50x50_(255,0,0,255)_R16_C2_G3.png +++ b/tests/Images/External/ReferenceOutput/BokehBlurTest/BokehBlurFilterProcessor_Solid50x50_(255,0,0,255)_R16_C2_G3.png @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:d753a7dc732d6f01f7bce8c6de50d70158639aa5e0eb18d168e417fe36492731 -size 135 +oid sha256:ba9b046556b04556632f4b651dc9d8ebc4d27699e53567179ef15a850567fee9 +size 85 diff --git a/tests/Images/External/ReferenceOutput/BokehBlurTest/BokehBlurFilterProcessor_Solid50x50_(255,0,0,255)_R8_C1_G1.png b/tests/Images/External/ReferenceOutput/BokehBlurTest/BokehBlurFilterProcessor_Solid50x50_(255,0,0,255)_R8_C1_G1.png index ffa9624d04..4d9777bd87 100644 --- a/tests/Images/External/ReferenceOutput/BokehBlurTest/BokehBlurFilterProcessor_Solid50x50_(255,0,0,255)_R8_C1_G1.png +++ b/tests/Images/External/ReferenceOutput/BokehBlurTest/BokehBlurFilterProcessor_Solid50x50_(255,0,0,255)_R8_C1_G1.png @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:d753a7dc732d6f01f7bce8c6de50d70158639aa5e0eb18d168e417fe36492731 -size 135 +oid sha256:ba9b046556b04556632f4b651dc9d8ebc4d27699e53567179ef15a850567fee9 +size 85 diff --git a/tests/Images/External/ReferenceOutput/BokehBlurTest/BokehBlurFilterProcessor_TestPattern200x100_R16_C1_G3.png b/tests/Images/External/ReferenceOutput/BokehBlurTest/BokehBlurFilterProcessor_TestPattern200x100_R16_C1_G3.png index e155d618df..9edb930aa8 100644 --- a/tests/Images/External/ReferenceOutput/BokehBlurTest/BokehBlurFilterProcessor_TestPattern200x100_R16_C1_G3.png +++ b/tests/Images/External/ReferenceOutput/BokehBlurTest/BokehBlurFilterProcessor_TestPattern200x100_R16_C1_G3.png @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:570cb16b4026d38cbf586592c34d0fe303f2c1d99baeb531670c4ba25956f9c3 -size 15489 +oid sha256:20bb78a7539049ced92c936c2a78fdeb3809ffe0cb1dfae034e55e326de094a8 +size 14285 diff --git a/tests/Images/External/ReferenceOutput/BokehBlurTest/BokehBlurFilterProcessor_TestPattern200x100_R16_C2_G3.png b/tests/Images/External/ReferenceOutput/BokehBlurTest/BokehBlurFilterProcessor_TestPattern200x100_R16_C2_G3.png index 3f93014b6d..ce093b38aa 100644 --- a/tests/Images/External/ReferenceOutput/BokehBlurTest/BokehBlurFilterProcessor_TestPattern200x100_R16_C2_G3.png +++ b/tests/Images/External/ReferenceOutput/BokehBlurTest/BokehBlurFilterProcessor_TestPattern200x100_R16_C2_G3.png @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:27813f975640c7ac15e78af910efc796ada950cd7efcd9a4fe045d531de24ec8 -size 15197 +oid sha256:a55e413991f9817ce9cecaf9d3ed31f5cd7559f9d527ff2e03c8bea5be091887 +size 13943 diff --git a/tests/Images/External/ReferenceOutput/BokehBlurTest/BokehBlurFilterProcessor_TestPattern200x100_R8_C1_G1.png b/tests/Images/External/ReferenceOutput/BokehBlurTest/BokehBlurFilterProcessor_TestPattern200x100_R8_C1_G1.png index 198baadf7d..a8d549dc66 100644 --- a/tests/Images/External/ReferenceOutput/BokehBlurTest/BokehBlurFilterProcessor_TestPattern200x100_R8_C1_G1.png +++ b/tests/Images/External/ReferenceOutput/BokehBlurTest/BokehBlurFilterProcessor_TestPattern200x100_R8_C1_G1.png @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:e10a91175585b3843b25b710dc45f734ba706b60fc56a21748da355f27b8dcf5 -size 12776 +oid sha256:2afdab1f7f430c3f38fa7a6cdac60fbeb03ebcdda00a1bc79b289680a6811388 +size 12056 diff --git a/tests/Images/External/ReferenceOutput/BokehBlurTest/BokehBlurFilterProcessor_TestPattern23x31_R16_C1_G3.png b/tests/Images/External/ReferenceOutput/BokehBlurTest/BokehBlurFilterProcessor_TestPattern23x31_R16_C1_G3.png index 9140cdb0b4..5104bad4b6 100644 --- a/tests/Images/External/ReferenceOutput/BokehBlurTest/BokehBlurFilterProcessor_TestPattern23x31_R16_C1_G3.png +++ b/tests/Images/External/ReferenceOutput/BokehBlurTest/BokehBlurFilterProcessor_TestPattern23x31_R16_C1_G3.png @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:6c0aca87ea94ec94c4d9e458dd33d366e0166533ee3e3c39066ee9d8be63a74e -size 1393 +oid sha256:028cd8201f9e7cd4988d63cc80d9afc96d0c739c65d807f7c755947e49e65111 +size 1326 diff --git a/tests/Images/External/ReferenceOutput/BokehBlurTest/BokehBlurFilterProcessor_TestPattern23x31_R16_C2_G3.png b/tests/Images/External/ReferenceOutput/BokehBlurTest/BokehBlurFilterProcessor_TestPattern23x31_R16_C2_G3.png index 32778c0a7f..f96d779283 100644 --- a/tests/Images/External/ReferenceOutput/BokehBlurTest/BokehBlurFilterProcessor_TestPattern23x31_R16_C2_G3.png +++ b/tests/Images/External/ReferenceOutput/BokehBlurTest/BokehBlurFilterProcessor_TestPattern23x31_R16_C2_G3.png @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:9d486512f51a57c90de6c3126791fbd3b7e7dd756932b5bd716660ee9b72f010 -size 1168 +oid sha256:213e3bd008292b807e2a2ec216ec017a31cd165f4d2a8bc86b3deea3daffc81c +size 1109 diff --git a/tests/Images/External/ReferenceOutput/BokehBlurTest/BokehBlurFilterProcessor_TestPattern23x31_R8_C1_G1.png b/tests/Images/External/ReferenceOutput/BokehBlurTest/BokehBlurFilterProcessor_TestPattern23x31_R8_C1_G1.png index a60c7f0cbf..6656f75640 100644 --- a/tests/Images/External/ReferenceOutput/BokehBlurTest/BokehBlurFilterProcessor_TestPattern23x31_R8_C1_G1.png +++ b/tests/Images/External/ReferenceOutput/BokehBlurTest/BokehBlurFilterProcessor_TestPattern23x31_R8_C1_G1.png @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:79f344abd9f1cfe6e8c2f0be52fd9498ee3ee6370cea94244d90fdaba7fa5e71 -size 1488 +oid sha256:2ceeb731111403f6354c460e403a30a2269ae1b2e2a9ef9b3819bdcadb50b859 +size 1424 diff --git a/tests/Images/External/ReferenceOutput/BokehBlurTest/BokehBlurFilterProcessor_TestPattern30x20_R16_C1_G3.png b/tests/Images/External/ReferenceOutput/BokehBlurTest/BokehBlurFilterProcessor_TestPattern30x20_R16_C1_G3.png index ae9b64afad..da9a0c2e3c 100644 --- a/tests/Images/External/ReferenceOutput/BokehBlurTest/BokehBlurFilterProcessor_TestPattern30x20_R16_C1_G3.png +++ b/tests/Images/External/ReferenceOutput/BokehBlurTest/BokehBlurFilterProcessor_TestPattern30x20_R16_C1_G3.png @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:90a45e7212ce97754be0e1432fffdc62ef5d2bdc30fdff0477a1951754cc0929 -size 1113 +oid sha256:fa9d77a2f1b727a42323a8ac19cb4cabdef0e7f0257bc92bb0100cd280f87613 +size 1076 diff --git a/tests/Images/External/ReferenceOutput/BokehBlurTest/BokehBlurFilterProcessor_TestPattern30x20_R16_C2_G3.png b/tests/Images/External/ReferenceOutput/BokehBlurTest/BokehBlurFilterProcessor_TestPattern30x20_R16_C2_G3.png index 1e9dc61e6e..e0ccdbb5d7 100644 --- a/tests/Images/External/ReferenceOutput/BokehBlurTest/BokehBlurFilterProcessor_TestPattern30x20_R16_C2_G3.png +++ b/tests/Images/External/ReferenceOutput/BokehBlurTest/BokehBlurFilterProcessor_TestPattern30x20_R16_C2_G3.png @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:7c61766f05dc48413c21baea8af1994895aa354732109fd5701a42f746f1f412 -size 1005 +oid sha256:fc9986cd6042d8cad709a5768dc464fc171a3cb5d3457406c11c46ef55388daf +size 956 diff --git a/tests/Images/External/ReferenceOutput/BokehBlurTest/BokehBlurFilterProcessor_TestPattern30x20_R8_C1_G1.png b/tests/Images/External/ReferenceOutput/BokehBlurTest/BokehBlurFilterProcessor_TestPattern30x20_R8_C1_G1.png index 99a249b1df..4f1d40e2ce 100644 --- a/tests/Images/External/ReferenceOutput/BokehBlurTest/BokehBlurFilterProcessor_TestPattern30x20_R8_C1_G1.png +++ b/tests/Images/External/ReferenceOutput/BokehBlurTest/BokehBlurFilterProcessor_TestPattern30x20_R8_C1_G1.png @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:be002593c2874eab803d0dc9b5646fb51b94ea92e17158797609219f46c2f723 -size 1518 +oid sha256:7e92efee845abd30705bb9a9cf61997092fca89b48e20d8383a0c08ab159a0af +size 1491 diff --git a/tests/Images/External/ReferenceOutput/BokehBlurTest/BokehBlurFilterProcessor_cross_R16_C1_G3.png b/tests/Images/External/ReferenceOutput/BokehBlurTest/BokehBlurFilterProcessor_cross_R16_C1_G3.png index d4ff4e16bc..404dc83b18 100644 --- a/tests/Images/External/ReferenceOutput/BokehBlurTest/BokehBlurFilterProcessor_cross_R16_C1_G3.png +++ b/tests/Images/External/ReferenceOutput/BokehBlurTest/BokehBlurFilterProcessor_cross_R16_C1_G3.png @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:11edba36c7f73271d9575b16cc7663a1e75c3b34560df51b7430cfde3ce1ea08 -size 6576 +oid sha256:21e8dce4256aecb250e0079c8bd60deb8b2ca55848fb0fd647ed3115afcf82af +size 8401 diff --git a/tests/Images/External/ReferenceOutput/BokehBlurTest/BokehBlurFilterProcessor_cross_R16_C2_G3.png b/tests/Images/External/ReferenceOutput/BokehBlurTest/BokehBlurFilterProcessor_cross_R16_C2_G3.png index a02c2d6702..525eedcd82 100644 --- a/tests/Images/External/ReferenceOutput/BokehBlurTest/BokehBlurFilterProcessor_cross_R16_C2_G3.png +++ b/tests/Images/External/ReferenceOutput/BokehBlurTest/BokehBlurFilterProcessor_cross_R16_C2_G3.png @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:75320c25e8977e5d56ff32f054e15773bd797e4ba745cd7bf3a2fd4c7000e3df -size 6400 +oid sha256:c4b1b7f904b23b6d7bd7b0fc4c533d3c213f38bcab689e9dac1936b05b71e9cf +size 8268 diff --git a/tests/Images/External/ReferenceOutput/BokehBlurTest/BokehBlurFilterProcessor_cross_R8_C1_G1.png b/tests/Images/External/ReferenceOutput/BokehBlurTest/BokehBlurFilterProcessor_cross_R8_C1_G1.png index 5fbabed0ac..2585ff479d 100644 --- a/tests/Images/External/ReferenceOutput/BokehBlurTest/BokehBlurFilterProcessor_cross_R8_C1_G1.png +++ b/tests/Images/External/ReferenceOutput/BokehBlurTest/BokehBlurFilterProcessor_cross_R8_C1_G1.png @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:8bb334d24245359fda8b64a74cbd30267cbdd1238e0aeae674b57be5371cd7db -size 6266 +oid sha256:33ca697815ebf4c74343e0abc7a418af8ffdd8fa0830285770249a1f20929eae +size 8444 From abafe100cd30a8e9ee510b26fd194826fee6d6eb Mon Sep 17 00:00:00 2001 From: James Jackson-South Date: Fri, 10 Apr 2026 13:18:54 +1000 Subject: [PATCH 02/12] Use sequential row processing in CropProcessor --- .../Processors/Transforms/CropProcessor{TPixel}.cs | 14 ++++++-------- 1 file changed, 6 insertions(+), 8 deletions(-) diff --git a/src/ImageSharp/Processing/Processors/Transforms/CropProcessor{TPixel}.cs b/src/ImageSharp/Processing/Processors/Transforms/CropProcessor{TPixel}.cs index 5ef2422a36..dd992df8cc 100644 --- a/src/ImageSharp/Processing/Processors/Transforms/CropProcessor{TPixel}.cs +++ b/src/ImageSharp/Processing/Processors/Transforms/CropProcessor{TPixel}.cs @@ -59,16 +59,14 @@ internal class CropProcessor : TransformProcessor Rectangle bounds = this.cropRectangle; - // Copying is cheap, we should process more pixels per task: - ParallelExecutionSettings parallelSettings = - ParallelExecutionSettings.FromConfiguration(this.Configuration).MultiplyMinimumPixelsPerTask(4); - + // Copying is too cheap to benefit from parallelization; + // the overhead exceeds the work per task. See #3111. RowOperation operation = new(bounds, source.PixelBuffer, destination.PixelBuffer); - ParallelRowIterator.IterateRows( - bounds, - in parallelSettings, - in operation); + for (int y = bounds.Top; y < bounds.Bottom; y++) + { + operation.Invoke(y); + } } /// From 0220108d86f7c0986257abb91029f182541bcf55 Mon Sep 17 00:00:00 2001 From: James Jackson-South Date: Fri, 10 Apr 2026 13:48:58 +1000 Subject: [PATCH 03/12] Use ParallelRowIterator for BokehBlur --- .../Convolution/BokehBlurProcessor{TPixel}.cs | 70 +++++++------------ 1 file changed, 24 insertions(+), 46 deletions(-) diff --git a/src/ImageSharp/Processing/Processors/Convolution/BokehBlurProcessor{TPixel}.cs b/src/ImageSharp/Processing/Processors/Convolution/BokehBlurProcessor{TPixel}.cs index 426544d692..663d54afc9 100644 --- a/src/ImageSharp/Processing/Processors/Convolution/BokehBlurProcessor{TPixel}.cs +++ b/src/ImageSharp/Processing/Processors/Convolution/BokehBlurProcessor{TPixel}.cs @@ -1,7 +1,6 @@ // Copyright (c) Six Labors. // Licensed under the Six Labors Split License. -using System.Buffers; using System.Numerics; using System.Runtime.CompilerServices; using System.Runtime.InteropServices; @@ -79,36 +78,22 @@ internal class BokehBlurProcessor : ImageProcessor { Rectangle sourceRectangle = Rectangle.Intersect(this.SourceRectangle, source.Bounds); - MemoryAllocator allocator = this.Configuration.MemoryAllocator; - - // Convolution is memory-bandwidth-bound with low arithmetic intensity. - // Parallelization degrades performance due to cache line contention from - // overlapping source row reads. See #3111. - // Preliminary gamma highlight pass if (this.gamma == 3F) { ApplyGamma3ExposureRowOperation gammaOperation = new(sourceRectangle, source.PixelBuffer, this.Configuration); - - using IMemoryOwner gammaBuffer = allocator.Allocate(gammaOperation.GetRequiredBufferLength(sourceRectangle)); - Span gammaSpan = gammaBuffer.Memory.Span; - - for (int y = sourceRectangle.Top; y < sourceRectangle.Bottom; y++) - { - gammaOperation.Invoke(y, gammaSpan); - } + ParallelRowIterator.IterateRows( + this.Configuration, + sourceRectangle, + in gammaOperation); } else { ApplyGammaExposureRowOperation gammaOperation = new(sourceRectangle, source.PixelBuffer, this.Configuration, this.gamma); - - using IMemoryOwner gammaBuffer = allocator.Allocate(gammaOperation.GetRequiredBufferLength(sourceRectangle)); - Span gammaSpan = gammaBuffer.Memory.Span; - - for (int y = sourceRectangle.Top; y < sourceRectangle.Bottom; y++) - { - gammaOperation.Invoke(y, gammaSpan); - } + ParallelRowIterator.IterateRows( + this.Configuration, + sourceRectangle, + in gammaOperation); } // Create a 0-filled buffer to use to store the result of the component convolutions @@ -121,20 +106,18 @@ internal class BokehBlurProcessor : ImageProcessor if (this.gamma == 3F) { ApplyInverseGamma3ExposureRowOperation operation = new(sourceRectangle, source.PixelBuffer, processingBuffer, this.Configuration); - - for (int y = sourceRectangle.Top; y < sourceRectangle.Bottom; y++) - { - operation.Invoke(y); - } + ParallelRowIterator.IterateRows( + this.Configuration, + sourceRectangle, + in operation); } else { ApplyInverseGammaExposureRowOperation operation = new(sourceRectangle, source.PixelBuffer, processingBuffer, this.Configuration, this.gamma); - - for (int y = sourceRectangle.Top; y < sourceRectangle.Bottom; y++) - { - operation.Invoke(y); - } + ParallelRowIterator.IterateRows( + this.Configuration, + sourceRectangle, + in operation); } } @@ -187,15 +170,10 @@ internal class BokehBlurProcessor : ImageProcessor kernel, configuration); - using (IMemoryOwner hBuffer = configuration.MemoryAllocator.Allocate(horizontalOperation.GetRequiredBufferLength(sourceRectangle))) - { - Span hSpan = hBuffer.Memory.Span; - - for (int y = sourceRectangle.Top; y < sourceRectangle.Bottom; y++) - { - horizontalOperation.Invoke(y, hSpan); - } - } + ParallelRowIterator.IterateRows( + configuration, + sourceRectangle, + in horizontalOperation); // Vertical 1D convolutions to accumulate the partial results on the target buffer BokehBlurProcessor.SecondPassConvolutionRowOperation verticalOperation = new( @@ -207,10 +185,10 @@ internal class BokehBlurProcessor : ImageProcessor parameters.Z, parameters.W); - for (int y = sourceRectangle.Top; y < sourceRectangle.Bottom; y++) - { - verticalOperation.Invoke(y); - } + ParallelRowIterator.IterateRows( + configuration, + sourceRectangle, + in verticalOperation); } } From e405df597f9cc673e14327188eaf42b018b4d696 Mon Sep 17 00:00:00 2001 From: James Jackson-South Date: Wed, 15 Apr 2026 12:50:38 +1000 Subject: [PATCH 04/12] Update settings --- .../Advanced/ParallelExecutionSettings.cs | 14 ++++++-------- src/ImageSharp/Configuration.cs | 5 +++-- 2 files changed, 9 insertions(+), 10 deletions(-) diff --git a/src/ImageSharp/Advanced/ParallelExecutionSettings.cs b/src/ImageSharp/Advanced/ParallelExecutionSettings.cs index ad0318297a..cf86d094df 100644 --- a/src/ImageSharp/Advanced/ParallelExecutionSettings.cs +++ b/src/ImageSharp/Advanced/ParallelExecutionSettings.cs @@ -20,7 +20,7 @@ public readonly struct ParallelExecutionSettings /// /// /// The value used for initializing when using TPL. - /// Set to -1 to leave the degree of parallelism unbounded. + /// If set to -1, there is no limit on the number of concurrently running operations. /// /// The value for . /// The . @@ -31,7 +31,7 @@ public readonly struct ParallelExecutionSettings { // Shall be compatible with ParallelOptions.MaxDegreeOfParallelism: // https://docs.microsoft.com/en-us/dotnet/api/system.threading.tasks.paralleloptions.maxdegreeofparallelism - if (maxDegreeOfParallelism == 0 || maxDegreeOfParallelism < -1) + if (maxDegreeOfParallelism is 0 or < -1) { throw new ArgumentOutOfRangeException(nameof(maxDegreeOfParallelism)); } @@ -49,7 +49,7 @@ public readonly struct ParallelExecutionSettings /// /// /// The value used for initializing when using TPL. - /// Set to -1 to leave the degree of parallelism unbounded. + /// If set to -1, there is no limit on the number of concurrently running operations. /// /// The . public ParallelExecutionSettings(int maxDegreeOfParallelism, MemoryAllocator memoryAllocator) @@ -64,7 +64,7 @@ public readonly struct ParallelExecutionSettings /// /// Gets the value used for initializing when using TPL. - /// A value of -1 leaves the degree of parallelism unbounded. + /// A value of -1 means there is no limit on the number of concurrently running operations. /// public int MaxDegreeOfParallelism { get; } @@ -93,12 +93,10 @@ public readonly struct ParallelExecutionSettings } /// - /// Get the default for a + /// Get the default for a /// /// The . /// The . public static ParallelExecutionSettings FromConfiguration(Configuration configuration) - { - return new ParallelExecutionSettings(configuration.MaxDegreeOfParallelism, configuration.MemoryAllocator); - } + => new(configuration.MaxDegreeOfParallelism, configuration.MemoryAllocator); } diff --git a/src/ImageSharp/Configuration.cs b/src/ImageSharp/Configuration.cs index 2673927231..ae6fd96612 100644 --- a/src/ImageSharp/Configuration.cs +++ b/src/ImageSharp/Configuration.cs @@ -64,8 +64,9 @@ public sealed class Configuration /// /// Gets or sets the maximum number of concurrent tasks enabled in ImageSharp algorithms /// configured with this instance. - /// Set to -1 to leave the degree of parallelism unbounded. - /// Initialized with by default. + /// A positive value limits the number of concurrent operations to the set value. + /// If set to -1, there is no limit on the number of concurrently running operations. + /// Defaults to . /// public int MaxDegreeOfParallelism { From 656f53d05297bedb61902bfd9b9e2962c1c7308d Mon Sep 17 00:00:00 2001 From: James Jackson-South Date: Thu, 16 Apr 2026 00:02:11 +1000 Subject: [PATCH 05/12] Remove not-needed validate method and tests --- .../Advanced/ParallelRowIterator.cs | 35 ------------------ .../Helpers/ParallelRowIteratorTests.cs | 36 ------------------- 2 files changed, 71 deletions(-) diff --git a/src/ImageSharp/Advanced/ParallelRowIterator.cs b/src/ImageSharp/Advanced/ParallelRowIterator.cs index 98c2656d11..f404326bce 100644 --- a/src/ImageSharp/Advanced/ParallelRowIterator.cs +++ b/src/ImageSharp/Advanced/ParallelRowIterator.cs @@ -44,7 +44,6 @@ public static partial class ParallelRowIterator where T : struct, IRowOperation { ValidateRectangle(rectangle); - ValidateSettings(parallelSettings); int top = rectangle.Top; int bottom = rectangle.Bottom; @@ -109,7 +108,6 @@ public static partial class ParallelRowIterator where TBuffer : unmanaged { ValidateRectangle(rectangle); - ValidateSettings(parallelSettings); int top = rectangle.Top; int bottom = rectangle.Bottom; @@ -174,7 +172,6 @@ public static partial class ParallelRowIterator where T : struct, IRowIntervalOperation { ValidateRectangle(rectangle); - ValidateSettings(parallelSettings); int top = rectangle.Top; int bottom = rectangle.Bottom; @@ -236,7 +233,6 @@ public static partial class ParallelRowIterator where TBuffer : unmanaged { ValidateRectangle(rectangle); - ValidateSettings(parallelSettings); int top = rectangle.Top; int bottom = rectangle.Bottom; @@ -315,35 +311,4 @@ public static partial class ParallelRowIterator 0, $"{nameof(rectangle)}.{nameof(rectangle.Height)}"); } - - /// - /// Validates the supplied . - /// - /// The execution settings. - /// - /// Thrown when or - /// is invalid. - /// - /// - /// Thrown when is null. - /// This also guards the public default value, which bypasses constructor validation. - /// - private static void ValidateSettings(in ParallelExecutionSettings parallelSettings) - { - // ParallelExecutionSettings is a public struct, so callers can pass default and bypass constructor validation. - if (parallelSettings.MaxDegreeOfParallelism is 0 or < -1) - { - throw new ArgumentOutOfRangeException( - $"{nameof(parallelSettings)}.{nameof(ParallelExecutionSettings.MaxDegreeOfParallelism)}"); - } - - Guard.MustBeGreaterThan( - parallelSettings.MinimumPixelsProcessedPerTask, - 0, - $"{nameof(parallelSettings)}.{nameof(ParallelExecutionSettings.MinimumPixelsProcessedPerTask)}"); - - Guard.NotNull( - parallelSettings.MemoryAllocator, - $"{nameof(parallelSettings)}.{nameof(ParallelExecutionSettings.MemoryAllocator)}"); - } } diff --git a/tests/ImageSharp.Tests/Helpers/ParallelRowIteratorTests.cs b/tests/ImageSharp.Tests/Helpers/ParallelRowIteratorTests.cs index cf68f702ac..017926fc5d 100644 --- a/tests/ImageSharp.Tests/Helpers/ParallelRowIteratorTests.cs +++ b/tests/ImageSharp.Tests/Helpers/ParallelRowIteratorTests.cs @@ -224,24 +224,6 @@ public class ParallelRowIteratorTests Assert.Equal(Enumerable.Repeat(1, rectangle.Height), actualData); } - [Fact] - public void IterateRowsWithTempBuffer_DefaultSettingsRequireInitialization() - { - ParallelExecutionSettings parallelSettings = default; - Rectangle rect = new(0, 0, 10, 10); - - void RowAction(int y, Span memory) - { - } - - TestRowOperation operation = new(RowAction); - - ArgumentOutOfRangeException ex = Assert.Throws( - () => ParallelRowIterator.IterateRows, Rgba32>(rect, in parallelSettings, in operation)); - - Assert.Contains(nameof(ParallelExecutionSettings.MaxDegreeOfParallelism), ex.Message); - } - public static TheoryData IterateRows_WithEffectiveMinimumPixelsLimit_Data = new() { @@ -367,24 +349,6 @@ public class ParallelRowIteratorTests Assert.Equal(Enumerable.Repeat(1, rectangle.Height), actualData); } - [Fact] - public void IterateRows_DefaultSettingsRequireInitialization() - { - ParallelExecutionSettings parallelSettings = default; - Rectangle rect = new(0, 0, 10, 10); - - void RowAction(int y) - { - } - - TestRowActionOperation operation = new(RowAction); - - ArgumentOutOfRangeException ex = Assert.Throws( - () => ParallelRowIterator.IterateRows(rect, in parallelSettings, in operation)); - - Assert.Contains(nameof(ParallelExecutionSettings.MaxDegreeOfParallelism), ex.Message); - } - public static readonly TheoryData IterateRectangularBuffer_Data = new() { From 03670ab0322d3afe091faa0253dc761c39d4c6a0 Mon Sep 17 00:00:00 2001 From: James Jackson-South Date: Thu, 16 Apr 2026 11:24:08 +1000 Subject: [PATCH 06/12] Revert some convolution processors --- .../Convolution2PassProcessor{TPixel}.cs | 26 ++++++------------- .../ConvolutionProcessor{TPixel}.cs | 15 +++-------- .../EdgeDetectorCompassProcessor{TPixel}.cs | 12 +++------ 3 files changed, 16 insertions(+), 37 deletions(-) diff --git a/src/ImageSharp/Processing/Processors/Convolution/Convolution2PassProcessor{TPixel}.cs b/src/ImageSharp/Processing/Processors/Convolution/Convolution2PassProcessor{TPixel}.cs index 6d1d7c23c2..38b6cab3f8 100644 --- a/src/ImageSharp/Processing/Processors/Convolution/Convolution2PassProcessor{TPixel}.cs +++ b/src/ImageSharp/Processing/Processors/Convolution/Convolution2PassProcessor{TPixel}.cs @@ -123,15 +123,10 @@ internal class Convolution2PassProcessor : ImageProcessor this.Configuration, this.PreserveAlpha); - using (IMemoryOwner hBuffer = allocator.Allocate(horizontalOperation.GetRequiredBufferLength(interest))) - { - Span hSpan = hBuffer.Memory.Span; - - for (int y = interest.Top; y < interest.Bottom; y++) - { - horizontalOperation.Invoke(y, hSpan); - } - } + ParallelRowIterator.IterateRows( + this.Configuration, + interest, + in horizontalOperation); // Vertical convolution VerticalConvolutionRowOperation verticalOperation = new( @@ -143,15 +138,10 @@ internal class Convolution2PassProcessor : ImageProcessor this.Configuration, this.PreserveAlpha); - using (IMemoryOwner vBuffer = allocator.Allocate(verticalOperation.GetRequiredBufferLength(interest))) - { - Span vSpan = vBuffer.Memory.Span; - - for (int y = interest.Top; y < interest.Bottom; y++) - { - verticalOperation.Invoke(y, vSpan); - } - } + ParallelRowIterator.IterateRows( + this.Configuration, + interest, + in verticalOperation); } /// diff --git a/src/ImageSharp/Processing/Processors/Convolution/ConvolutionProcessor{TPixel}.cs b/src/ImageSharp/Processing/Processors/Convolution/ConvolutionProcessor{TPixel}.cs index b704f556fa..69d72b3cc0 100644 --- a/src/ImageSharp/Processing/Processors/Convolution/ConvolutionProcessor{TPixel}.cs +++ b/src/ImageSharp/Processing/Processors/Convolution/ConvolutionProcessor{TPixel}.cs @@ -97,17 +97,10 @@ internal class ConvolutionProcessor : ImageProcessor map.BuildSamplingOffsetMap(this.KernelXY.Rows, this.KernelXY.Columns, interest, this.BorderWrapModeX, this.BorderWrapModeY); RowOperation operation = new(interest, targetPixels, source.PixelBuffer, map, this.KernelXY, this.Configuration, this.PreserveAlpha); - - // Convolution is memory-bandwidth-bound with low arithmetic intensity. - // Parallelization degrades performance due to cache line contention from - // overlapping source row reads. See #3111. - using IMemoryOwner buffer = allocator.Allocate(operation.GetRequiredBufferLength(interest)); - Span span = buffer.Memory.Span; - - for (int y = interest.Top; y < interest.Bottom; y++) - { - operation.Invoke(y, span); - } + ParallelRowIterator.IterateRows( + this.Configuration, + interest, + in operation); } Buffer2D.SwapOrCopyContent(source.PixelBuffer, targetPixels); diff --git a/src/ImageSharp/Processing/Processors/Convolution/EdgeDetectorCompassProcessor{TPixel}.cs b/src/ImageSharp/Processing/Processors/Convolution/EdgeDetectorCompassProcessor{TPixel}.cs index f1edb75a20..eae7481661 100644 --- a/src/ImageSharp/Processing/Processors/Convolution/EdgeDetectorCompassProcessor{TPixel}.cs +++ b/src/ImageSharp/Processing/Processors/Convolution/EdgeDetectorCompassProcessor{TPixel}.cs @@ -83,15 +83,11 @@ internal class EdgeDetectorCompassProcessor : ImageProcessor processor.Apply(pass); } - // Convolution is memory-bandwidth-bound with low arithmetic intensity. - // Parallelization degrades performance due to cache line contention from - // overlapping source row reads. See #3111. RowOperation operation = new(source.PixelBuffer, pass.PixelBuffer, interest); - - for (int y = interest.Top; y < interest.Bottom; y++) - { - operation.Invoke(y); - } + ParallelRowIterator.IterateRows( + this.Configuration, + interest, + in operation); } } From 3a879338c6bd052fab73bb1594fba81c67215318 Mon Sep 17 00:00:00 2001 From: James Jackson-South Date: Thu, 16 Apr 2026 11:26:02 +1000 Subject: [PATCH 07/12] Cleanup --- .../Convolution/Convolution2PassProcessor{TPixel}.cs | 10 +++------- .../Convolution/ConvolutionProcessor{TPixel}.cs | 1 - 2 files changed, 3 insertions(+), 8 deletions(-) diff --git a/src/ImageSharp/Processing/Processors/Convolution/Convolution2PassProcessor{TPixel}.cs b/src/ImageSharp/Processing/Processors/Convolution/Convolution2PassProcessor{TPixel}.cs index 38b6cab3f8..15ea98936e 100644 --- a/src/ImageSharp/Processing/Processors/Convolution/Convolution2PassProcessor{TPixel}.cs +++ b/src/ImageSharp/Processing/Processors/Convolution/Convolution2PassProcessor{TPixel}.cs @@ -109,10 +109,6 @@ internal class Convolution2PassProcessor : ImageProcessor MemoryAllocator allocator = this.Configuration.MemoryAllocator; - // Convolution is memory-bandwidth-bound with low arithmetic intensity. - // Parallelization degrades performance due to cache line contention from - // overlapping source row reads. See #3111. - // Horizontal convolution HorizontalConvolutionRowOperation horizontalOperation = new( interest, @@ -124,9 +120,9 @@ internal class Convolution2PassProcessor : ImageProcessor this.PreserveAlpha); ParallelRowIterator.IterateRows( - this.Configuration, - interest, - in horizontalOperation); + this.Configuration, + interest, + in horizontalOperation); // Vertical convolution VerticalConvolutionRowOperation verticalOperation = new( diff --git a/src/ImageSharp/Processing/Processors/Convolution/ConvolutionProcessor{TPixel}.cs b/src/ImageSharp/Processing/Processors/Convolution/ConvolutionProcessor{TPixel}.cs index 69d72b3cc0..feaaf30ce0 100644 --- a/src/ImageSharp/Processing/Processors/Convolution/ConvolutionProcessor{TPixel}.cs +++ b/src/ImageSharp/Processing/Processors/Convolution/ConvolutionProcessor{TPixel}.cs @@ -1,7 +1,6 @@ // Copyright (c) Six Labors. // Licensed under the Six Labors Split License. -using System.Buffers; using System.Numerics; using System.Runtime.CompilerServices; using System.Runtime.InteropServices; From 6998d43c617d1d97aa48bef3d4ca0c39017962c8 Mon Sep 17 00:00:00 2001 From: James Jackson-South Date: Thu, 16 Apr 2026 23:54:55 +1000 Subject: [PATCH 08/12] Update Convolution2PassProcessor{TPixel}.cs --- .../Convolution/Convolution2PassProcessor{TPixel}.cs | 3 --- 1 file changed, 3 deletions(-) diff --git a/src/ImageSharp/Processing/Processors/Convolution/Convolution2PassProcessor{TPixel}.cs b/src/ImageSharp/Processing/Processors/Convolution/Convolution2PassProcessor{TPixel}.cs index 15ea98936e..1bbbdb3501 100644 --- a/src/ImageSharp/Processing/Processors/Convolution/Convolution2PassProcessor{TPixel}.cs +++ b/src/ImageSharp/Processing/Processors/Convolution/Convolution2PassProcessor{TPixel}.cs @@ -1,7 +1,6 @@ // Copyright (c) Six Labors. // Licensed under the Six Labors Split License. -using System.Buffers; using System.Numerics; using System.Runtime.CompilerServices; using System.Runtime.InteropServices; @@ -107,8 +106,6 @@ internal class Convolution2PassProcessor : ImageProcessor mapXY.BuildSamplingOffsetMap(this.KernelX.Length, this.KernelX.Length, interest, this.BorderWrapModeX, this.BorderWrapModeY); - MemoryAllocator allocator = this.Configuration.MemoryAllocator; - // Horizontal convolution HorizontalConvolutionRowOperation horizontalOperation = new( interest, From 420ae7444c1beebd5233466ce9b3b84fcca951cf Mon Sep 17 00:00:00 2001 From: James Jackson-South Date: Fri, 17 Apr 2026 16:24:26 +1000 Subject: [PATCH 09/12] Optimize ScaledCopyTo for common scale factors --- .../DownScalingComponentProcessor2.cs | 339 +++++++++++++++++- .../DownScalingComponentProcessor4.cs | 281 ++++++++++++++- .../DownScalingComponentProcessor8.cs | 154 +++++++- 3 files changed, 733 insertions(+), 41 deletions(-) diff --git a/src/ImageSharp/Formats/Jpeg/Components/Decoder/ComponentProcessors/DownScalingComponentProcessor2.cs b/src/ImageSharp/Formats/Jpeg/Components/Decoder/ComponentProcessors/DownScalingComponentProcessor2.cs index 300a773311..23b81d2bad 100644 --- a/src/ImageSharp/Formats/Jpeg/Components/Decoder/ComponentProcessors/DownScalingComponentProcessor2.cs +++ b/src/ImageSharp/Formats/Jpeg/Components/Decoder/ComponentProcessors/DownScalingComponentProcessor2.cs @@ -66,32 +66,335 @@ internal sealed class DownScalingComponentProcessor2 : ComponentProcessor [MethodImpl(InliningOptions.ShortMethod)] public static void ScaledCopyTo(ref Block8x8F block, ref float destRef, int destStrideWidth, int horizontalScale, int verticalScale) { - // TODO: Optimize: implement all cases with scale-specific, loopless code! + if (horizontalScale == 1 && verticalScale == 1) + { + CopyTo1x1Scale(ref block, ref destRef, (uint)destStrideWidth); + return; + } + + if (horizontalScale == 2 && verticalScale == 2) + { + CopyTo2x2Scale(ref block, ref destRef, (uint)destStrideWidth); + return; + } + + if (horizontalScale == 2 && verticalScale == 1) + { + CopyTo2x1Scale(ref block, ref destRef, (uint)destStrideWidth); + return; + } + + if (horizontalScale == 1 && verticalScale == 2) + { + CopyTo1x2Scale(ref block, ref destRef, (uint)destStrideWidth); + return; + } + + if (horizontalScale == 4 && verticalScale == 1) + { + CopyTo4x1Scale(ref block, ref destRef, (uint)destStrideWidth); + return; + } + + if (horizontalScale == 4 && verticalScale == 2) + { + CopyTo4x2Scale(ref block, ref destRef, (uint)destStrideWidth); + return; + } + + if (horizontalScale == 1 && verticalScale == 4) + { + CopyTo1x4Scale(ref block, ref destRef, (uint)destStrideWidth); + return; + } + + if (horizontalScale == 2 && verticalScale == 4) + { + CopyTo2x4Scale(ref block, ref destRef, (uint)destStrideWidth); + return; + } + + if (horizontalScale == 4 && verticalScale == 4) + { + CopyTo4x4Scale(ref block, ref destRef, (uint)destStrideWidth); + return; + } + + // The common 1x, 2x, and 4x integral scales are specialized above. + // Uncommon legal factor-3 scales use the generic fallback. CopyArbitraryScale(ref block, ref destRef, (uint)destStrideWidth, (uint)horizontalScale, (uint)verticalScale); + } + + /// + /// Copies a 4x4 reduced block directly into the destination buffer when no chroma expansion is needed. + /// + [MethodImpl(InliningOptions.ShortMethod)] + private static void CopyTo1x1Scale(ref Block8x8F block, ref float areaOrigin, uint areaStride) + { + ref float sourceBase = ref Unsafe.As(ref block); + + CopyRow4(ref sourceBase, ref areaOrigin, 0u, 0u, areaStride); + CopyRow4(ref sourceBase, ref areaOrigin, 1u, 1u, areaStride); + CopyRow4(ref sourceBase, ref areaOrigin, 2u, 2u, areaStride); + CopyRow4(ref sourceBase, ref areaOrigin, 3u, 3u, areaStride); + } + + /// + /// Copies a 4x4 reduced block into the destination buffer while doubling only the horizontal axis. + /// + [MethodImpl(InliningOptions.ShortMethod)] + private static void CopyTo2x1Scale(ref Block8x8F block, ref float areaOrigin, uint areaStride) + { + ref float sourceBase = ref Unsafe.As(ref block); + + WidenRow4(ref sourceBase, ref areaOrigin, 0u, 0u, areaStride); + WidenRow4(ref sourceBase, ref areaOrigin, 1u, 1u, areaStride); + WidenRow4(ref sourceBase, ref areaOrigin, 2u, 2u, areaStride); + WidenRow4(ref sourceBase, ref areaOrigin, 3u, 3u, areaStride); + } + + /// + /// Copies a 4x4 reduced block into the destination buffer while doubling only the vertical axis. + /// + [MethodImpl(InliningOptions.ShortMethod)] + private static void CopyTo1x2Scale(ref Block8x8F block, ref float areaOrigin, uint areaStride) + { + ref float sourceBase = ref Unsafe.As(ref block); + + CopyRow4(ref sourceBase, ref areaOrigin, 0u, 0u, areaStride); + CopyRow4(ref sourceBase, ref areaOrigin, 0u, 1u, areaStride); + CopyRow4(ref sourceBase, ref areaOrigin, 1u, 2u, areaStride); + CopyRow4(ref sourceBase, ref areaOrigin, 1u, 3u, areaStride); + CopyRow4(ref sourceBase, ref areaOrigin, 2u, 4u, areaStride); + CopyRow4(ref sourceBase, ref areaOrigin, 2u, 5u, areaStride); + CopyRow4(ref sourceBase, ref areaOrigin, 3u, 6u, areaStride); + CopyRow4(ref sourceBase, ref areaOrigin, 3u, 7u, areaStride); + } + + /// + /// Copies a 4x4 reduced block into the destination buffer while doubling both axes. + /// + [MethodImpl(InliningOptions.ShortMethod)] + private static void CopyTo2x2Scale(ref Block8x8F block, ref float areaOrigin, uint areaStride) + { + ref float sourceBase = ref Unsafe.As(ref block); + + WidenRow4(ref sourceBase, ref areaOrigin, 0u, 0u, areaStride); + WidenRow4(ref sourceBase, ref areaOrigin, 0u, 1u, areaStride); + WidenRow4(ref sourceBase, ref areaOrigin, 1u, 2u, areaStride); + WidenRow4(ref sourceBase, ref areaOrigin, 1u, 3u, areaStride); + WidenRow4(ref sourceBase, ref areaOrigin, 2u, 4u, areaStride); + WidenRow4(ref sourceBase, ref areaOrigin, 2u, 5u, areaStride); + WidenRow4(ref sourceBase, ref areaOrigin, 3u, 6u, areaStride); + WidenRow4(ref sourceBase, ref areaOrigin, 3u, 7u, areaStride); + } - [MethodImpl(InliningOptions.ColdPath)] - static void CopyArbitraryScale(ref Block8x8F block, ref float areaOrigin, uint areaStride, uint horizontalScale, uint verticalScale) + /// + /// Copies a 4x4 reduced block into the destination buffer while quadrupling only the horizontal axis. + /// + [MethodImpl(InliningOptions.ShortMethod)] + private static void CopyTo4x1Scale(ref Block8x8F block, ref float areaOrigin, uint areaStride) + { + ref float sourceBase = ref Unsafe.As(ref block); + + ExpandRow4(ref sourceBase, ref areaOrigin, 0u, 0u, areaStride); + ExpandRow4(ref sourceBase, ref areaOrigin, 1u, 1u, areaStride); + ExpandRow4(ref sourceBase, ref areaOrigin, 2u, 2u, areaStride); + ExpandRow4(ref sourceBase, ref areaOrigin, 3u, 3u, areaStride); + } + + /// + /// Copies a 4x4 reduced block into the destination buffer while quadrupling horizontally and doubling vertically. + /// + [MethodImpl(InliningOptions.ShortMethod)] + private static void CopyTo4x2Scale(ref Block8x8F block, ref float areaOrigin, uint areaStride) + { + ref float sourceBase = ref Unsafe.As(ref block); + + ExpandRow4(ref sourceBase, ref areaOrigin, 0u, 0u, areaStride); + ExpandRow4(ref sourceBase, ref areaOrigin, 0u, 1u, areaStride); + ExpandRow4(ref sourceBase, ref areaOrigin, 1u, 2u, areaStride); + ExpandRow4(ref sourceBase, ref areaOrigin, 1u, 3u, areaStride); + ExpandRow4(ref sourceBase, ref areaOrigin, 2u, 4u, areaStride); + ExpandRow4(ref sourceBase, ref areaOrigin, 2u, 5u, areaStride); + ExpandRow4(ref sourceBase, ref areaOrigin, 3u, 6u, areaStride); + ExpandRow4(ref sourceBase, ref areaOrigin, 3u, 7u, areaStride); + } + + /// + /// Copies a 4x4 reduced block into the destination buffer while quadrupling only the vertical axis. + /// + [MethodImpl(InliningOptions.ShortMethod)] + private static void CopyTo1x4Scale(ref Block8x8F block, ref float areaOrigin, uint areaStride) + { + ref float sourceBase = ref Unsafe.As(ref block); + + CopyRow4(ref sourceBase, ref areaOrigin, 0u, 0u, areaStride); + CopyRow4(ref sourceBase, ref areaOrigin, 0u, 1u, areaStride); + CopyRow4(ref sourceBase, ref areaOrigin, 0u, 2u, areaStride); + CopyRow4(ref sourceBase, ref areaOrigin, 0u, 3u, areaStride); + CopyRow4(ref sourceBase, ref areaOrigin, 1u, 4u, areaStride); + CopyRow4(ref sourceBase, ref areaOrigin, 1u, 5u, areaStride); + CopyRow4(ref sourceBase, ref areaOrigin, 1u, 6u, areaStride); + CopyRow4(ref sourceBase, ref areaOrigin, 1u, 7u, areaStride); + CopyRow4(ref sourceBase, ref areaOrigin, 2u, 8u, areaStride); + CopyRow4(ref sourceBase, ref areaOrigin, 2u, 9u, areaStride); + CopyRow4(ref sourceBase, ref areaOrigin, 2u, 10u, areaStride); + CopyRow4(ref sourceBase, ref areaOrigin, 2u, 11u, areaStride); + CopyRow4(ref sourceBase, ref areaOrigin, 3u, 12u, areaStride); + CopyRow4(ref sourceBase, ref areaOrigin, 3u, 13u, areaStride); + CopyRow4(ref sourceBase, ref areaOrigin, 3u, 14u, areaStride); + CopyRow4(ref sourceBase, ref areaOrigin, 3u, 15u, areaStride); + } + + /// + /// Copies a 4x4 reduced block into the destination buffer while doubling horizontally and quadrupling vertically. + /// + [MethodImpl(InliningOptions.ShortMethod)] + private static void CopyTo2x4Scale(ref Block8x8F block, ref float areaOrigin, uint areaStride) + { + ref float sourceBase = ref Unsafe.As(ref block); + + WidenRow4(ref sourceBase, ref areaOrigin, 0u, 0u, areaStride); + WidenRow4(ref sourceBase, ref areaOrigin, 0u, 1u, areaStride); + WidenRow4(ref sourceBase, ref areaOrigin, 0u, 2u, areaStride); + WidenRow4(ref sourceBase, ref areaOrigin, 0u, 3u, areaStride); + WidenRow4(ref sourceBase, ref areaOrigin, 1u, 4u, areaStride); + WidenRow4(ref sourceBase, ref areaOrigin, 1u, 5u, areaStride); + WidenRow4(ref sourceBase, ref areaOrigin, 1u, 6u, areaStride); + WidenRow4(ref sourceBase, ref areaOrigin, 1u, 7u, areaStride); + WidenRow4(ref sourceBase, ref areaOrigin, 2u, 8u, areaStride); + WidenRow4(ref sourceBase, ref areaOrigin, 2u, 9u, areaStride); + WidenRow4(ref sourceBase, ref areaOrigin, 2u, 10u, areaStride); + WidenRow4(ref sourceBase, ref areaOrigin, 2u, 11u, areaStride); + WidenRow4(ref sourceBase, ref areaOrigin, 3u, 12u, areaStride); + WidenRow4(ref sourceBase, ref areaOrigin, 3u, 13u, areaStride); + WidenRow4(ref sourceBase, ref areaOrigin, 3u, 14u, areaStride); + WidenRow4(ref sourceBase, ref areaOrigin, 3u, 15u, areaStride); + } + + /// + /// Copies a 4x4 reduced block into the destination buffer while quadrupling both axes. + /// + [MethodImpl(InliningOptions.ShortMethod)] + private static void CopyTo4x4Scale(ref Block8x8F block, ref float areaOrigin, uint areaStride) + { + ref float sourceBase = ref Unsafe.As(ref block); + + ExpandRow4(ref sourceBase, ref areaOrigin, 0u, 0u, areaStride); + ExpandRow4(ref sourceBase, ref areaOrigin, 0u, 1u, areaStride); + ExpandRow4(ref sourceBase, ref areaOrigin, 0u, 2u, areaStride); + ExpandRow4(ref sourceBase, ref areaOrigin, 0u, 3u, areaStride); + ExpandRow4(ref sourceBase, ref areaOrigin, 1u, 4u, areaStride); + ExpandRow4(ref sourceBase, ref areaOrigin, 1u, 5u, areaStride); + ExpandRow4(ref sourceBase, ref areaOrigin, 1u, 6u, areaStride); + ExpandRow4(ref sourceBase, ref areaOrigin, 1u, 7u, areaStride); + ExpandRow4(ref sourceBase, ref areaOrigin, 2u, 8u, areaStride); + ExpandRow4(ref sourceBase, ref areaOrigin, 2u, 9u, areaStride); + ExpandRow4(ref sourceBase, ref areaOrigin, 2u, 10u, areaStride); + ExpandRow4(ref sourceBase, ref areaOrigin, 2u, 11u, areaStride); + ExpandRow4(ref sourceBase, ref areaOrigin, 3u, 12u, areaStride); + ExpandRow4(ref sourceBase, ref areaOrigin, 3u, 13u, areaStride); + ExpandRow4(ref sourceBase, ref areaOrigin, 3u, 14u, areaStride); + ExpandRow4(ref sourceBase, ref areaOrigin, 3u, 15u, areaStride); + } + + /// + /// Copies one four-sample row from the reduced block to the destination row. + /// + [MethodImpl(InliningOptions.ShortMethod)] + private static void CopyRow4(ref float sourceBase, ref float areaOrigin, nuint sourceRow, nuint destRow, uint areaStride) + { + ref float source = ref Unsafe.Add(ref sourceBase, sourceRow * 8u); + ref float dest = ref Unsafe.Add(ref areaOrigin, destRow * areaStride); + + Unsafe.CopyBlock( + ref Unsafe.As(ref dest), + ref Unsafe.As(ref source), + 4u * sizeof(float)); + } + + /// + /// Expands one four-sample row to eight samples by duplicating each source value horizontally. + /// + [MethodImpl(InliningOptions.ShortMethod)] + private static void WidenRow4(ref float sourceBase, ref float areaOrigin, nuint sourceRow, nuint destRow, uint areaStride) + { + ref float source = ref Unsafe.Add(ref sourceBase, sourceRow * 8u); + ref float dest = ref Unsafe.Add(ref areaOrigin, destRow * areaStride); + + float value0 = source; + float value1 = Unsafe.Add(ref source, 1u); + float value2 = Unsafe.Add(ref source, 2u); + float value3 = Unsafe.Add(ref source, 3u); + + dest = value0; + Unsafe.Add(ref dest, 1u) = value0; + Unsafe.Add(ref dest, 2u) = value1; + Unsafe.Add(ref dest, 3u) = value1; + Unsafe.Add(ref dest, 4u) = value2; + Unsafe.Add(ref dest, 5u) = value2; + Unsafe.Add(ref dest, 6u) = value3; + Unsafe.Add(ref dest, 7u) = value3; + } + + /// + /// Expands one four-sample row to sixteen samples by duplicating each source value four times horizontally. + /// + [MethodImpl(InliningOptions.ShortMethod)] + private static void ExpandRow4(ref float sourceBase, ref float areaOrigin, nuint sourceRow, nuint destRow, uint areaStride) + { + ref float source = ref Unsafe.Add(ref sourceBase, sourceRow * 8u); + ref float dest = ref Unsafe.Add(ref areaOrigin, destRow * areaStride); + + float value0 = source; + float value1 = Unsafe.Add(ref source, 1u); + float value2 = Unsafe.Add(ref source, 2u); + float value3 = Unsafe.Add(ref source, 3u); + + dest = value0; + Unsafe.Add(ref dest, 1u) = value0; + Unsafe.Add(ref dest, 2u) = value0; + Unsafe.Add(ref dest, 3u) = value0; + Unsafe.Add(ref dest, 4u) = value1; + Unsafe.Add(ref dest, 5u) = value1; + Unsafe.Add(ref dest, 6u) = value1; + Unsafe.Add(ref dest, 7u) = value1; + Unsafe.Add(ref dest, 8u) = value2; + Unsafe.Add(ref dest, 9u) = value2; + Unsafe.Add(ref dest, 10u) = value2; + Unsafe.Add(ref dest, 11u) = value2; + Unsafe.Add(ref dest, 12u) = value3; + Unsafe.Add(ref dest, 13u) = value3; + Unsafe.Add(ref dest, 14u) = value3; + Unsafe.Add(ref dest, 15u) = value3; + } + + /// + /// Replicates each reduced sample into an arbitrary integral expansion rectangle for uncommon subsampling ratios. + /// + [MethodImpl(InliningOptions.ColdPath)] + private static void CopyArbitraryScale(ref Block8x8F block, ref float areaOrigin, uint areaStride, uint horizontalScale, uint verticalScale) + { + for (nuint y = 0u; y < 4u; y++) { - for (nuint y = 0; y < 4; y++) + nuint yy = y * verticalScale; + nuint y8 = y * 8u; + + for (nuint x = 0u; x < 4u; x++) { - nuint yy = y * verticalScale; - nuint y8 = y * 8; + nuint xx = x * horizontalScale; - for (nuint x = 0; x < 4; x++) - { - nuint xx = x * horizontalScale; + float value = block[y8 + x]; - float value = block[y8 + x]; + for (nuint i = 0u; i < verticalScale; i++) + { + nuint baseIdx = ((yy + i) * areaStride) + xx; - for (nuint i = 0; i < verticalScale; i++) + for (nuint j = 0u; j < horizontalScale; j++) { - nuint baseIdx = ((yy + i) * areaStride) + xx; - - for (nuint j = 0; j < horizontalScale; j++) - { - // area[xx + j, yy + i] = value; - Unsafe.Add(ref areaOrigin, baseIdx + j) = value; - } + // area[xx + j, yy + i] = value; + Unsafe.Add(ref areaOrigin, baseIdx + j) = value; } } } diff --git a/src/ImageSharp/Formats/Jpeg/Components/Decoder/ComponentProcessors/DownScalingComponentProcessor4.cs b/src/ImageSharp/Formats/Jpeg/Components/Decoder/ComponentProcessors/DownScalingComponentProcessor4.cs index 7984169902..a645a88c90 100644 --- a/src/ImageSharp/Formats/Jpeg/Components/Decoder/ComponentProcessors/DownScalingComponentProcessor4.cs +++ b/src/ImageSharp/Formats/Jpeg/Components/Decoder/ComponentProcessors/DownScalingComponentProcessor4.cs @@ -66,32 +66,277 @@ internal sealed class DownScalingComponentProcessor4 : ComponentProcessor [MethodImpl(InliningOptions.ShortMethod)] public static void ScaledCopyTo(ref Block8x8F block, ref float destRef, int destStrideWidth, int horizontalScale, int verticalScale) { - // TODO: Optimize: implement all cases with scale-specific, loopless code! + if (horizontalScale == 1 && verticalScale == 1) + { + CopyTo1x1Scale(ref block, ref destRef, (uint)destStrideWidth); + return; + } + + if (horizontalScale == 2 && verticalScale == 2) + { + CopyTo2x2Scale(ref block, ref destRef, (uint)destStrideWidth); + return; + } + + if (horizontalScale == 2 && verticalScale == 1) + { + CopyTo2x1Scale(ref block, ref destRef, (uint)destStrideWidth); + return; + } + + if (horizontalScale == 1 && verticalScale == 2) + { + CopyTo1x2Scale(ref block, ref destRef, (uint)destStrideWidth); + return; + } + + if (horizontalScale == 4 && verticalScale == 1) + { + CopyTo4x1Scale(ref block, ref destRef, (uint)destStrideWidth); + return; + } + + if (horizontalScale == 4 && verticalScale == 2) + { + CopyTo4x2Scale(ref block, ref destRef, (uint)destStrideWidth); + return; + } + + if (horizontalScale == 1 && verticalScale == 4) + { + CopyTo1x4Scale(ref block, ref destRef, (uint)destStrideWidth); + return; + } + + if (horizontalScale == 2 && verticalScale == 4) + { + CopyTo2x4Scale(ref block, ref destRef, (uint)destStrideWidth); + return; + } + + if (horizontalScale == 4 && verticalScale == 4) + { + CopyTo4x4Scale(ref block, ref destRef, (uint)destStrideWidth); + return; + } + + // The common 1x, 2x, and 4x integral scales are specialized above. + // Uncommon legal factor-3 scales use the generic fallback. CopyArbitraryScale(ref block, ref destRef, (uint)destStrideWidth, (uint)horizontalScale, (uint)verticalScale); + } + + /// + /// Copies a 2x2 reduced block directly into the destination buffer when no chroma expansion is needed. + /// + [MethodImpl(InliningOptions.ShortMethod)] + private static void CopyTo1x1Scale(ref Block8x8F block, ref float areaOrigin, uint areaStride) + { + ref float sourceBase = ref Unsafe.As(ref block); + + CopyRow2(ref sourceBase, ref areaOrigin, 0u, 0u, areaStride); + CopyRow2(ref sourceBase, ref areaOrigin, 1u, 1u, areaStride); + } + + /// + /// Copies a 2x2 reduced block into the destination buffer while doubling only the horizontal axis. + /// + [MethodImpl(InliningOptions.ShortMethod)] + private static void CopyTo2x1Scale(ref Block8x8F block, ref float areaOrigin, uint areaStride) + { + ref float sourceBase = ref Unsafe.As(ref block); + + WidenRow2(ref sourceBase, ref areaOrigin, 0u, 0u, areaStride); + WidenRow2(ref sourceBase, ref areaOrigin, 1u, 1u, areaStride); + } + + /// + /// Copies a 2x2 reduced block into the destination buffer while doubling only the vertical axis. + /// + [MethodImpl(InliningOptions.ShortMethod)] + private static void CopyTo1x2Scale(ref Block8x8F block, ref float areaOrigin, uint areaStride) + { + ref float sourceBase = ref Unsafe.As(ref block); + + CopyRow2(ref sourceBase, ref areaOrigin, 0u, 0u, areaStride); + CopyRow2(ref sourceBase, ref areaOrigin, 0u, 1u, areaStride); + CopyRow2(ref sourceBase, ref areaOrigin, 1u, 2u, areaStride); + CopyRow2(ref sourceBase, ref areaOrigin, 1u, 3u, areaStride); + } + + /// + /// Copies a 2x2 reduced block into the destination buffer while doubling both axes. + /// + [MethodImpl(InliningOptions.ShortMethod)] + private static void CopyTo2x2Scale(ref Block8x8F block, ref float areaOrigin, uint areaStride) + { + ref float sourceBase = ref Unsafe.As(ref block); + + WidenRow2(ref sourceBase, ref areaOrigin, 0u, 0u, areaStride); + WidenRow2(ref sourceBase, ref areaOrigin, 0u, 1u, areaStride); + WidenRow2(ref sourceBase, ref areaOrigin, 1u, 2u, areaStride); + WidenRow2(ref sourceBase, ref areaOrigin, 1u, 3u, areaStride); + } - [MethodImpl(InliningOptions.ColdPath)] - static void CopyArbitraryScale(ref Block8x8F block, ref float areaOrigin, uint areaStride, uint horizontalScale, uint verticalScale) + /// + /// Copies a 2x2 reduced block into the destination buffer while quadrupling only the horizontal axis. + /// + [MethodImpl(InliningOptions.ShortMethod)] + private static void CopyTo4x1Scale(ref Block8x8F block, ref float areaOrigin, uint areaStride) + { + ref float sourceBase = ref Unsafe.As(ref block); + + ExpandRow2(ref sourceBase, ref areaOrigin, 0u, 0u, areaStride); + ExpandRow2(ref sourceBase, ref areaOrigin, 1u, 1u, areaStride); + } + + /// + /// Copies a 2x2 reduced block into the destination buffer while quadrupling horizontally and doubling vertically. + /// + [MethodImpl(InliningOptions.ShortMethod)] + private static void CopyTo4x2Scale(ref Block8x8F block, ref float areaOrigin, uint areaStride) + { + ref float sourceBase = ref Unsafe.As(ref block); + + ExpandRow2(ref sourceBase, ref areaOrigin, 0u, 0u, areaStride); + ExpandRow2(ref sourceBase, ref areaOrigin, 0u, 1u, areaStride); + ExpandRow2(ref sourceBase, ref areaOrigin, 1u, 2u, areaStride); + ExpandRow2(ref sourceBase, ref areaOrigin, 1u, 3u, areaStride); + } + + /// + /// Copies a 2x2 reduced block into the destination buffer while quadrupling only the vertical axis. + /// + [MethodImpl(InliningOptions.ShortMethod)] + private static void CopyTo1x4Scale(ref Block8x8F block, ref float areaOrigin, uint areaStride) + { + ref float sourceBase = ref Unsafe.As(ref block); + + CopyRow2(ref sourceBase, ref areaOrigin, 0u, 0u, areaStride); + CopyRow2(ref sourceBase, ref areaOrigin, 0u, 1u, areaStride); + CopyRow2(ref sourceBase, ref areaOrigin, 0u, 2u, areaStride); + CopyRow2(ref sourceBase, ref areaOrigin, 0u, 3u, areaStride); + CopyRow2(ref sourceBase, ref areaOrigin, 1u, 4u, areaStride); + CopyRow2(ref sourceBase, ref areaOrigin, 1u, 5u, areaStride); + CopyRow2(ref sourceBase, ref areaOrigin, 1u, 6u, areaStride); + CopyRow2(ref sourceBase, ref areaOrigin, 1u, 7u, areaStride); + } + + /// + /// Copies a 2x2 reduced block into the destination buffer while doubling horizontally and quadrupling vertically. + /// + [MethodImpl(InliningOptions.ShortMethod)] + private static void CopyTo2x4Scale(ref Block8x8F block, ref float areaOrigin, uint areaStride) + { + ref float sourceBase = ref Unsafe.As(ref block); + + WidenRow2(ref sourceBase, ref areaOrigin, 0u, 0u, areaStride); + WidenRow2(ref sourceBase, ref areaOrigin, 0u, 1u, areaStride); + WidenRow2(ref sourceBase, ref areaOrigin, 0u, 2u, areaStride); + WidenRow2(ref sourceBase, ref areaOrigin, 0u, 3u, areaStride); + WidenRow2(ref sourceBase, ref areaOrigin, 1u, 4u, areaStride); + WidenRow2(ref sourceBase, ref areaOrigin, 1u, 5u, areaStride); + WidenRow2(ref sourceBase, ref areaOrigin, 1u, 6u, areaStride); + WidenRow2(ref sourceBase, ref areaOrigin, 1u, 7u, areaStride); + } + + /// + /// Copies a 2x2 reduced block into the destination buffer while quadrupling both axes. + /// + [MethodImpl(InliningOptions.ShortMethod)] + private static void CopyTo4x4Scale(ref Block8x8F block, ref float areaOrigin, uint areaStride) + { + ref float sourceBase = ref Unsafe.As(ref block); + + ExpandRow2(ref sourceBase, ref areaOrigin, 0u, 0u, areaStride); + ExpandRow2(ref sourceBase, ref areaOrigin, 0u, 1u, areaStride); + ExpandRow2(ref sourceBase, ref areaOrigin, 0u, 2u, areaStride); + ExpandRow2(ref sourceBase, ref areaOrigin, 0u, 3u, areaStride); + ExpandRow2(ref sourceBase, ref areaOrigin, 1u, 4u, areaStride); + ExpandRow2(ref sourceBase, ref areaOrigin, 1u, 5u, areaStride); + ExpandRow2(ref sourceBase, ref areaOrigin, 1u, 6u, areaStride); + ExpandRow2(ref sourceBase, ref areaOrigin, 1u, 7u, areaStride); + } + + /// + /// Copies one two-sample row from the reduced block to the destination row. + /// + [MethodImpl(InliningOptions.ShortMethod)] + private static void CopyRow2(ref float sourceBase, ref float areaOrigin, nuint sourceRow, nuint destRow, uint areaStride) + { + ref float source = ref Unsafe.Add(ref sourceBase, sourceRow * 8u); + ref float dest = ref Unsafe.Add(ref areaOrigin, destRow * areaStride); + + Unsafe.CopyBlock( + ref Unsafe.As(ref dest), + ref Unsafe.As(ref source), + 2u * sizeof(float)); + } + + /// + /// Expands one two-sample row to four samples by duplicating each source value horizontally. + /// + [MethodImpl(InliningOptions.ShortMethod)] + private static void WidenRow2(ref float sourceBase, ref float areaOrigin, nuint sourceRow, nuint destRow, uint areaStride) + { + ref float source = ref Unsafe.Add(ref sourceBase, sourceRow * 8u); + ref float dest = ref Unsafe.Add(ref areaOrigin, destRow * areaStride); + + float value0 = source; + float value1 = Unsafe.Add(ref source, 1u); + + dest = value0; + Unsafe.Add(ref dest, 1u) = value0; + Unsafe.Add(ref dest, 2u) = value1; + Unsafe.Add(ref dest, 3u) = value1; + } + + /// + /// Expands one two-sample row to eight samples by duplicating each source value four times horizontally. + /// + [MethodImpl(InliningOptions.ShortMethod)] + private static void ExpandRow2(ref float sourceBase, ref float areaOrigin, nuint sourceRow, nuint destRow, uint areaStride) + { + ref float source = ref Unsafe.Add(ref sourceBase, sourceRow * 8u); + ref float dest = ref Unsafe.Add(ref areaOrigin, destRow * areaStride); + + float value0 = source; + float value1 = Unsafe.Add(ref source, 1u); + + dest = value0; + Unsafe.Add(ref dest, 1u) = value0; + Unsafe.Add(ref dest, 2u) = value0; + Unsafe.Add(ref dest, 3u) = value0; + Unsafe.Add(ref dest, 4u) = value1; + Unsafe.Add(ref dest, 5u) = value1; + Unsafe.Add(ref dest, 6u) = value1; + Unsafe.Add(ref dest, 7u) = value1; + } + + /// + /// Replicates each reduced sample into an arbitrary integral expansion rectangle for uncommon subsampling ratios. + /// + [MethodImpl(InliningOptions.ColdPath)] + private static void CopyArbitraryScale(ref Block8x8F block, ref float areaOrigin, uint areaStride, uint horizontalScale, uint verticalScale) + { + for (nuint y = 0u; y < 2u; y++) { - for (nuint y = 0; y < 2; y++) + nuint yy = y * verticalScale; + nuint y8 = y * 8u; + + for (nuint x = 0u; x < 2u; x++) { - nuint yy = y * verticalScale; - nuint y8 = y * 8; + nuint xx = x * horizontalScale; - for (nuint x = 0; x < 2; x++) - { - nuint xx = x * horizontalScale; + float value = block[y8 + x]; - float value = block[y8 + x]; + for (nuint i = 0u; i < verticalScale; i++) + { + nuint baseIdx = ((yy + i) * areaStride) + xx; - for (nuint i = 0; i < verticalScale; i++) + for (nuint j = 0u; j < horizontalScale; j++) { - nuint baseIdx = ((yy + i) * areaStride) + xx; - - for (nuint j = 0; j < horizontalScale; j++) - { - // area[xx + j, yy + i] = value; - Unsafe.Add(ref areaOrigin, baseIdx + j) = value; - } + // area[xx + j, yy + i] = value; + Unsafe.Add(ref areaOrigin, baseIdx + j) = value; } } } diff --git a/src/ImageSharp/Formats/Jpeg/Components/Decoder/ComponentProcessors/DownScalingComponentProcessor8.cs b/src/ImageSharp/Formats/Jpeg/Components/Decoder/ComponentProcessors/DownScalingComponentProcessor8.cs index f3b09e6b49..ef17bf002c 100644 --- a/src/ImageSharp/Formats/Jpeg/Components/Decoder/ComponentProcessors/DownScalingComponentProcessor8.cs +++ b/src/ImageSharp/Formats/Jpeg/Components/Decoder/ComponentProcessors/DownScalingComponentProcessor8.cs @@ -63,16 +63,56 @@ internal sealed class DownScalingComponentProcessor8 : ComponentProcessor return; } + if (horizontalScale == 2 && verticalScale == 1) + { + CopyTo2x1Scale(value, ref destRef); + return; + } + + if (horizontalScale == 1 && verticalScale == 2) + { + CopyTo1x2Scale(value, ref destRef, (uint)destStrideWidth); + return; + } + if (horizontalScale == 2 && verticalScale == 2) { - destRef = value; - Unsafe.Add(ref destRef, 1) = value; - Unsafe.Add(ref destRef, 0 + (uint)destStrideWidth) = value; - Unsafe.Add(ref destRef, 1 + (uint)destStrideWidth) = value; + CopyTo2x2Scale(value, ref destRef, (uint)destStrideWidth); + return; + } + + if (horizontalScale == 4 && verticalScale == 1) + { + CopyTo4x1Scale(value, ref destRef); + return; + } + + if (horizontalScale == 4 && verticalScale == 2) + { + CopyTo4x2Scale(value, ref destRef, (uint)destStrideWidth); return; } - // TODO: Optimize: implement all cases with scale-specific, loopless code! + if (horizontalScale == 1 && verticalScale == 4) + { + CopyTo1x4Scale(value, ref destRef, (uint)destStrideWidth); + return; + } + + if (horizontalScale == 2 && verticalScale == 4) + { + CopyTo2x4Scale(value, ref destRef, (uint)destStrideWidth); + return; + } + + if (horizontalScale == 4 && verticalScale == 4) + { + CopyTo4x4Scale(value, ref destRef, (uint)destStrideWidth); + return; + } + + // The common 1x, 2x, and 4x integral scales are specialized above. + // Uncommon legal factor-3 scales use the generic fallback. for (nuint y = 0; y < (uint)verticalScale; y++) { for (nuint x = 0; x < (uint)horizontalScale; x++) @@ -83,4 +123,108 @@ internal sealed class DownScalingComponentProcessor8 : ComponentProcessor destRef = ref Unsafe.Add(ref destRef, (uint)destStrideWidth); } } + + /// + /// Writes a single source value to two horizontally adjacent samples. + /// + [MethodImpl(InliningOptions.ShortMethod)] + private static void CopyTo2x1Scale(float value, ref float areaOrigin) + { + areaOrigin = value; + Unsafe.Add(ref areaOrigin, 1u) = value; + } + + /// + /// Writes a single source value to two vertically adjacent samples. + /// + [MethodImpl(InliningOptions.ShortMethod)] + private static void CopyTo1x2Scale(float value, ref float areaOrigin, uint areaStride) + { + areaOrigin = value; + Unsafe.Add(ref areaOrigin, areaStride) = value; + } + + /// + /// Writes a single source value to a 2x2 rectangle. + /// + [MethodImpl(InliningOptions.ShortMethod)] + private static void CopyTo2x2Scale(float value, ref float areaOrigin, uint areaStride) + { + areaOrigin = value; + Unsafe.Add(ref areaOrigin, 1u) = value; + Unsafe.Add(ref areaOrigin, areaStride) = value; + Unsafe.Add(ref areaOrigin, areaStride + 1u) = value; + } + + /// + /// Writes a single source value to four horizontally adjacent samples. + /// + [MethodImpl(InliningOptions.ShortMethod)] + private static void CopyTo4x1Scale(float value, ref float areaOrigin) + { + areaOrigin = value; + Unsafe.Add(ref areaOrigin, 1u) = value; + Unsafe.Add(ref areaOrigin, 2u) = value; + Unsafe.Add(ref areaOrigin, 3u) = value; + } + + /// + /// Writes a single source value to a 4x2 rectangle. + /// + [MethodImpl(InliningOptions.ShortMethod)] + private static void CopyTo4x2Scale(float value, ref float areaOrigin, uint areaStride) + { + CopyTo4x1Scale(value, ref areaOrigin); + + ref float nextRow = ref Unsafe.Add(ref areaOrigin, areaStride); + CopyTo4x1Scale(value, ref nextRow); + } + + /// + /// Writes a single source value to four vertically adjacent samples. + /// + [MethodImpl(InliningOptions.ShortMethod)] + private static void CopyTo1x4Scale(float value, ref float areaOrigin, uint areaStride) + { + areaOrigin = value; + Unsafe.Add(ref areaOrigin, areaStride) = value; + Unsafe.Add(ref areaOrigin, areaStride * 2u) = value; + Unsafe.Add(ref areaOrigin, areaStride * 3u) = value; + } + + /// + /// Writes a single source value to a 2x4 rectangle. + /// + [MethodImpl(InliningOptions.ShortMethod)] + private static void CopyTo2x4Scale(float value, ref float areaOrigin, uint areaStride) + { + CopyTo2x1Scale(value, ref areaOrigin); + + ref float row1 = ref Unsafe.Add(ref areaOrigin, areaStride); + CopyTo2x1Scale(value, ref row1); + + ref float row2 = ref Unsafe.Add(ref areaOrigin, areaStride * 2u); + CopyTo2x1Scale(value, ref row2); + + ref float row3 = ref Unsafe.Add(ref areaOrigin, areaStride * 3u); + CopyTo2x1Scale(value, ref row3); + } + + /// + /// Writes a single source value to a 4x4 rectangle. + /// + [MethodImpl(InliningOptions.ShortMethod)] + private static void CopyTo4x4Scale(float value, ref float areaOrigin, uint areaStride) + { + CopyTo4x1Scale(value, ref areaOrigin); + + ref float row1 = ref Unsafe.Add(ref areaOrigin, areaStride); + CopyTo4x1Scale(value, ref row1); + + ref float row2 = ref Unsafe.Add(ref areaOrigin, areaStride * 2u); + CopyTo4x1Scale(value, ref row2); + + ref float row3 = ref Unsafe.Add(ref areaOrigin, areaStride * 3u); + CopyTo4x1Scale(value, ref row3); + } } From cde366b5cd9bf5d4d202ac303d71e6f7590a7bb6 Mon Sep 17 00:00:00 2001 From: James Jackson-South Date: Fri, 17 Apr 2026 21:55:14 +1000 Subject: [PATCH 10/12] Remove premature rounding during normalization --- .../Jpeg/Components/Block8x8F.Vector128.cs | 40 +++++----- .../Jpeg/Components/Block8x8F.Vector256.cs | 26 +++--- .../Formats/Jpeg/Components/Block8x8F.cs | 79 +++++++------------ .../DirectComponentProcessor.cs | 8 +- .../Jpeg/Components/ScaledFloatingPointDCT.cs | 14 ++-- .../Formats/Jpg/Block8x8FTests.cs | 71 ----------------- .../ImageSharp.Tests/Formats/Jpg/DCTTests.cs | 10 +-- .../Formats/Jpg/JpegDecoderTests.Baseline.cs | 2 +- .../Jpg/JpegDecoderTests.Progressive.cs | 2 +- .../Formats/Jpg/JpegEncoderTests.cs | 2 +- .../Formats/Tiff/TiffDecoderTests.cs | 14 ++-- ...ToReferenceOutput_Rgba32_forest_bridge.png | 4 +- ...ToReferenceOutput_Rgba32_forest_bridge.png | 4 +- .../DecodeBaselineJpeg_Calliphora.png | 4 +- ...Issue394-MultiHuffmanBaseline-Speakers.png | 4 +- ...codeBaselineJpeg_MultiScanBaselineCMYK.png | 4 +- .../DecodeBaselineJpeg_badeof.png | 4 +- .../DecodeBaselineJpeg_badrst.png | 4 +- .../DecodeBaselineJpeg_cmyk.png | 4 +- .../DecodeBaselineJpeg_jpeg420small.png | 4 +- .../DecodeBaselineJpeg_jpeg422.png | 4 +- .../DecodeBaselineJpeg_jpeg444.png | 4 +- .../DecodeBaselineJpeg_testorig.png | 4 +- .../DecodeBaselineJpeg_testorig12.png | 4 +- ...DecodeBaselineJpeg_ycck-subsample-1222.png | 4 +- .../DecodeBaselineJpeg_ycck.png | 4 +- ...ecodeProgressiveJpeg_BadEofProgressive.png | 4 +- .../DecodeProgressiveJpeg_ExifUndefType.png | 4 +- .../DecodeProgressiveJpeg_fb.png | 4 +- .../DecodeProgressiveJpeg_progress.png | 4 +- .../Decode_CMYK_ICC_Jpeg_Rgba32_issue-129.png | 4 +- ...GB_ICC_Jpeg_Rgba32_Momiji-AdobeRGB-yes.png | 4 +- ...GB_ICC_Jpeg_Rgba32_Momiji-AppleRGB-yes.png | 4 +- ..._ICC_Jpeg_Rgba32_Momiji-ColorMatch-yes.png | 4 +- ...GB_ICC_Jpeg_Rgba32_Momiji-ProPhoto-yes.png | 4 +- ...RGB_ICC_Jpeg_Rgba32_Momiji-WideRGB-yes.png | 4 +- ...de_RGB_ICC_Jpeg_Rgba32_Momiji-sRGB-yes.png | 4 +- ...B_ICC_Jpeg_Rgba32_Perceptual-cLUT-only.png | 4 +- .../Decode_RGB_ICC_Jpeg_Rgba32_Perceptual.png | 4 +- .../Decode_RGB_ICC_Jpeg_Rgba32_sRGB_Gray.png | 4 +- ...Decode_YCCK_ICC_Jpeg_Rgba32_issue_2723.png | 4 +- ...code_Resize_Bicubic_Calliphora_150_150.png | 4 +- ...coder_Decode_Resize_Calliphora_150_150.png | 4 +- ...zed_Combined_Resize_Calliphora_150_150.png | 4 +- ...ialized_IDCT_Resize_Calliphora_150_150.png | 4 +- ...alized_Scale_Resize_Calliphora_150_150.png | 4 +- ...ecoder_CanDecode_Cmyk_Rgba32_Cmyk-jpeg.png | 4 +- ..._JpegCompressedWithIssue2679_Issue2679.png | 4 +- ..._CanDecode_YccK_ICC_Rgba32_Issue2454_A.png | 4 +- ..._CanDecode_YccK_ICC_Rgba32_Issue2454_B.png | 4 +- ...oder_CanDecode_YccK_Rgba32_Issue2454_A.png | 4 +- ...oder_CanDecode_YccK_Rgba32_Issue2454_B.png | 4 +- ...EntropyCrop_MultiScanBaselineCMYK_0.25.png | 4 +- ...EntropyCrop_MultiScanBaselineCMYK_0.75.png | 4 +- 54 files changed, 175 insertions(+), 265 deletions(-) diff --git a/src/ImageSharp/Formats/Jpeg/Components/Block8x8F.Vector128.cs b/src/ImageSharp/Formats/Jpeg/Components/Block8x8F.Vector128.cs index d4c0398d97..a80dcf86e5 100644 --- a/src/ImageSharp/Formats/Jpeg/Components/Block8x8F.Vector128.cs +++ b/src/ImageSharp/Formats/Jpeg/Components/Block8x8F.Vector128.cs @@ -13,31 +13,31 @@ namespace SixLabors.ImageSharp.Formats.Jpeg.Components; internal partial struct Block8x8F { /// - /// version of and . + /// version of . /// /// The maximum value to normalize to. [MethodImpl(InliningOptions.ShortMethod)] - public void NormalizeColorsAndRoundInPlaceVector128(float maximum) + public void NormalizeColorsInPlaceVector128(float maximum) { Vector128 max = Vector128.Create(maximum); Vector128 off = Vector128.Ceiling(max * .5F); - this.V0L = NormalizeAndRoundVector128(this.V0L.AsVector128(), off, max).AsVector4(); - this.V0R = NormalizeAndRoundVector128(this.V0R.AsVector128(), off, max).AsVector4(); - this.V1L = NormalizeAndRoundVector128(this.V1L.AsVector128(), off, max).AsVector4(); - this.V1R = NormalizeAndRoundVector128(this.V1R.AsVector128(), off, max).AsVector4(); - this.V2L = NormalizeAndRoundVector128(this.V2L.AsVector128(), off, max).AsVector4(); - this.V2R = NormalizeAndRoundVector128(this.V2R.AsVector128(), off, max).AsVector4(); - this.V3L = NormalizeAndRoundVector128(this.V3L.AsVector128(), off, max).AsVector4(); - this.V3R = NormalizeAndRoundVector128(this.V3R.AsVector128(), off, max).AsVector4(); - this.V4L = NormalizeAndRoundVector128(this.V4L.AsVector128(), off, max).AsVector4(); - this.V4R = NormalizeAndRoundVector128(this.V4R.AsVector128(), off, max).AsVector4(); - this.V5L = NormalizeAndRoundVector128(this.V5L.AsVector128(), off, max).AsVector4(); - this.V5R = NormalizeAndRoundVector128(this.V5R.AsVector128(), off, max).AsVector4(); - this.V6L = NormalizeAndRoundVector128(this.V6L.AsVector128(), off, max).AsVector4(); - this.V6R = NormalizeAndRoundVector128(this.V6R.AsVector128(), off, max).AsVector4(); - this.V7L = NormalizeAndRoundVector128(this.V7L.AsVector128(), off, max).AsVector4(); - this.V7R = NormalizeAndRoundVector128(this.V7R.AsVector128(), off, max).AsVector4(); + this.V0L = NormalizeVector128(this.V0L.AsVector128(), off, max).AsVector4(); + this.V0R = NormalizeVector128(this.V0R.AsVector128(), off, max).AsVector4(); + this.V1L = NormalizeVector128(this.V1L.AsVector128(), off, max).AsVector4(); + this.V1R = NormalizeVector128(this.V1R.AsVector128(), off, max).AsVector4(); + this.V2L = NormalizeVector128(this.V2L.AsVector128(), off, max).AsVector4(); + this.V2R = NormalizeVector128(this.V2R.AsVector128(), off, max).AsVector4(); + this.V3L = NormalizeVector128(this.V3L.AsVector128(), off, max).AsVector4(); + this.V3R = NormalizeVector128(this.V3R.AsVector128(), off, max).AsVector4(); + this.V4L = NormalizeVector128(this.V4L.AsVector128(), off, max).AsVector4(); + this.V4R = NormalizeVector128(this.V4R.AsVector128(), off, max).AsVector4(); + this.V5L = NormalizeVector128(this.V5L.AsVector128(), off, max).AsVector4(); + this.V5R = NormalizeVector128(this.V5R.AsVector128(), off, max).AsVector4(); + this.V6L = NormalizeVector128(this.V6L.AsVector128(), off, max).AsVector4(); + this.V6R = NormalizeVector128(this.V6R.AsVector128(), off, max).AsVector4(); + this.V7L = NormalizeVector128(this.V7L.AsVector128(), off, max).AsVector4(); + this.V7R = NormalizeVector128(this.V7R.AsVector128(), off, max).AsVector4(); } /// @@ -71,8 +71,8 @@ internal partial struct Block8x8F } [MethodImpl(InliningOptions.ShortMethod)] - private static Vector128 NormalizeAndRoundVector128(Vector128 value, Vector128 off, Vector128 max) - => Vector128_.RoundToNearestInteger(Vector128_.Clamp(value + off, Vector128.Zero, max)); + private static Vector128 NormalizeVector128(Vector128 value, Vector128 off, Vector128 max) + => Vector128_.Clamp(value + off, Vector128.Zero, max); private static void MultiplyIntoInt16Vector128(ref Block8x8F a, ref Block8x8F b, ref Block8x8 dest) { diff --git a/src/ImageSharp/Formats/Jpeg/Components/Block8x8F.Vector256.cs b/src/ImageSharp/Formats/Jpeg/Components/Block8x8F.Vector256.cs index 2aaf5c9431..f16452ed52 100644 --- a/src/ImageSharp/Formats/Jpeg/Components/Block8x8F.Vector256.cs +++ b/src/ImageSharp/Formats/Jpeg/Components/Block8x8F.Vector256.cs @@ -39,23 +39,23 @@ internal partial struct Block8x8F #pragma warning restore SA1310 // Field names should not contain underscore /// - /// version of and . + /// version of . /// /// The maximum value to normalize to. [MethodImpl(InliningOptions.ShortMethod)] - public void NormalizeColorsAndRoundInPlaceVector256(float maximum) + public void NormalizeColorsInPlaceVector256(float maximum) { Vector256 max = Vector256.Create(maximum); Vector256 off = Vector256.Ceiling(max * .5F); - this.V256_0 = NormalizeAndRoundVector256(this.V256_0, off, max); - this.V256_1 = NormalizeAndRoundVector256(this.V256_1, off, max); - this.V256_2 = NormalizeAndRoundVector256(this.V256_2, off, max); - this.V256_3 = NormalizeAndRoundVector256(this.V256_3, off, max); - this.V256_4 = NormalizeAndRoundVector256(this.V256_4, off, max); - this.V256_5 = NormalizeAndRoundVector256(this.V256_5, off, max); - this.V256_6 = NormalizeAndRoundVector256(this.V256_6, off, max); - this.V256_7 = NormalizeAndRoundVector256(this.V256_7, off, max); + this.V256_0 = NormalizeVector256(this.V256_0, off, max); + this.V256_1 = NormalizeVector256(this.V256_1, off, max); + this.V256_2 = NormalizeVector256(this.V256_2, off, max); + this.V256_3 = NormalizeVector256(this.V256_3, off, max); + this.V256_4 = NormalizeVector256(this.V256_4, off, max); + this.V256_5 = NormalizeVector256(this.V256_5, off, max); + this.V256_6 = NormalizeVector256(this.V256_6, off, max); + this.V256_7 = NormalizeVector256(this.V256_7, off, max); } /// @@ -95,10 +95,10 @@ internal partial struct Block8x8F } [MethodImpl(InliningOptions.ShortMethod)] - private static Vector256 NormalizeAndRoundVector256(Vector256 value, Vector256 off, Vector256 max) - => Vector256_.RoundToNearestInteger(Vector256_.Clamp(value + off, Vector256.Zero, max)); + private static Vector256 NormalizeVector256(Vector256 value, Vector256 off, Vector256 max) + => Vector256_.Clamp(value + off, Vector256.Zero, max); - private static unsafe void MultiplyIntoInt16Vector256(ref Block8x8F a, ref Block8x8F b, ref Block8x8 dest) + private static void MultiplyIntoInt16Vector256(ref Block8x8F a, ref Block8x8F b, ref Block8x8 dest) { DebugGuard.IsTrue(Vector256.IsHardwareAccelerated, "Vector256 support is required to run this operation!"); diff --git a/src/ImageSharp/Formats/Jpeg/Components/Block8x8F.cs b/src/ImageSharp/Formats/Jpeg/Components/Block8x8F.cs index 49b519201f..19a695d07d 100644 --- a/src/ImageSharp/Formats/Jpeg/Components/Block8x8F.cs +++ b/src/ImageSharp/Formats/Jpeg/Components/Block8x8F.cs @@ -132,7 +132,7 @@ internal partial struct Block8x8F : IEquatable /// /// Destination [MethodImpl(InliningOptions.ShortMethod)] - public unsafe void ScaledCopyTo(float[] dest) + public void ScaledCopyTo(float[] dest) { DebugGuard.MustBeGreaterThanOrEqualTo(dest.Length, Size, "dest is too small"); @@ -193,7 +193,7 @@ internal partial struct Block8x8F : IEquatable /// /// The other block. [MethodImpl(InliningOptions.ShortMethod)] - public unsafe void MultiplyInPlace(ref Block8x8F other) + public void MultiplyInPlace(ref Block8x8F other) { if (Vector256.IsHardwareAccelerated) { @@ -324,62 +324,43 @@ internal partial struct Block8x8F : IEquatable } /// - /// Level shift by +maximum/2, clip to [0..maximum], and round all the values in the block. + /// Level shift by +maximum/2, clip to [0, maximum] /// - /// The maximum value. - public void NormalizeColorsAndRoundInPlace(float maximum) + /// The maximum value to normalize to. + public void NormalizeColorsInPlace(float maximum) { if (Vector256.IsHardwareAccelerated) { - this.NormalizeColorsAndRoundInPlaceVector256(maximum); + this.NormalizeColorsInPlaceVector256(maximum); + return; } else if (Vector128.IsHardwareAccelerated) { - this.NormalizeColorsAndRoundInPlaceVector128(maximum); + this.NormalizeColorsInPlaceVector128(maximum); + return; } else { - this.NormalizeColorsInPlace(maximum); - this.RoundInPlace(); - } - } - - /// - /// Level shift by +maximum/2, clip to [0, maximum] - /// - /// The maximum value to normalize to. - public void NormalizeColorsInPlace(float maximum) - { - Vector4 min = Vector4.Zero; - Vector4 max = new(maximum); - Vector4 off = new(MathF.Ceiling(maximum * 0.5F)); - - this.V0L = Vector4.Clamp(this.V0L + off, min, max); - this.V0R = Vector4.Clamp(this.V0R + off, min, max); - this.V1L = Vector4.Clamp(this.V1L + off, min, max); - this.V1R = Vector4.Clamp(this.V1R + off, min, max); - this.V2L = Vector4.Clamp(this.V2L + off, min, max); - this.V2R = Vector4.Clamp(this.V2R + off, min, max); - this.V3L = Vector4.Clamp(this.V3L + off, min, max); - this.V3R = Vector4.Clamp(this.V3R + off, min, max); - this.V4L = Vector4.Clamp(this.V4L + off, min, max); - this.V4R = Vector4.Clamp(this.V4R + off, min, max); - this.V5L = Vector4.Clamp(this.V5L + off, min, max); - this.V5R = Vector4.Clamp(this.V5R + off, min, max); - this.V6L = Vector4.Clamp(this.V6L + off, min, max); - this.V6R = Vector4.Clamp(this.V6R + off, min, max); - this.V7L = Vector4.Clamp(this.V7L + off, min, max); - this.V7R = Vector4.Clamp(this.V7R + off, min, max); - } - - /// - /// Rounds all values in the block. - /// - public void RoundInPlace() - { - for (int i = 0; i < Size; i++) - { - this[i] = MathF.Round(this[i]); + Vector4 min = Vector4.Zero; + Vector4 max = new(maximum); + Vector4 off = new(MathF.Ceiling(maximum * 0.5F)); + + this.V0L = Vector4.Clamp(this.V0L + off, min, max); + this.V0R = Vector4.Clamp(this.V0R + off, min, max); + this.V1L = Vector4.Clamp(this.V1L + off, min, max); + this.V1R = Vector4.Clamp(this.V1R + off, min, max); + this.V2L = Vector4.Clamp(this.V2L + off, min, max); + this.V2R = Vector4.Clamp(this.V2R + off, min, max); + this.V3L = Vector4.Clamp(this.V3L + off, min, max); + this.V3R = Vector4.Clamp(this.V3R + off, min, max); + this.V4L = Vector4.Clamp(this.V4L + off, min, max); + this.V4R = Vector4.Clamp(this.V4R + off, min, max); + this.V5L = Vector4.Clamp(this.V5L + off, min, max); + this.V5R = Vector4.Clamp(this.V5R + off, min, max); + this.V6L = Vector4.Clamp(this.V6L + off, min, max); + this.V6R = Vector4.Clamp(this.V6R + off, min, max); + this.V7L = Vector4.Clamp(this.V7L + off, min, max); + this.V7R = Vector4.Clamp(this.V7R + off, min, max); } } @@ -533,7 +514,7 @@ internal partial struct Block8x8F : IEquatable } /// - public bool Equals(Block8x8F other) + public readonly bool Equals(Block8x8F other) => this.V0L == other.V0L && this.V0R == other.V0R && this.V1L == other.V1L diff --git a/src/ImageSharp/Formats/Jpeg/Components/Decoder/ComponentProcessors/DirectComponentProcessor.cs b/src/ImageSharp/Formats/Jpeg/Components/Decoder/ComponentProcessors/DirectComponentProcessor.cs index 79e25a67a9..b4399a69cd 100644 --- a/src/ImageSharp/Formats/Jpeg/Components/Decoder/ComponentProcessors/DirectComponentProcessor.cs +++ b/src/ImageSharp/Formats/Jpeg/Components/Decoder/ComponentProcessors/DirectComponentProcessor.cs @@ -53,10 +53,10 @@ internal sealed class DirectComponentProcessor : ComponentProcessor // Convert from spectral to color FloatingPointDCT.TransformIDCT(ref workspaceBlock); - // To conform better to libjpeg we actually NEED TO loose precision here. - // This is because they store blocks as Int16 between all the operations. - // To be "more accurate", we need to emulate this by rounding! - workspaceBlock.NormalizeColorsAndRoundInPlace(maximumValue); + // Normalize into the component sample range without quantizing away + // fractional precision. The later color conversion / final pack stage + // performs the only rounding we actually need for output samples. + workspaceBlock.NormalizeColorsInPlace(maximumValue); // Write to color buffer acording to sampling factors int xColorBufferStart = xBlock * this.BlockAreaSize.Width; diff --git a/src/ImageSharp/Formats/Jpeg/Components/ScaledFloatingPointDCT.cs b/src/ImageSharp/Formats/Jpeg/Components/ScaledFloatingPointDCT.cs index b8234ff3e4..b4d32f7095 100644 --- a/src/ImageSharp/Formats/Jpeg/Components/ScaledFloatingPointDCT.cs +++ b/src/ImageSharp/Formats/Jpeg/Components/ScaledFloatingPointDCT.cs @@ -136,10 +136,10 @@ internal static class ScaledFloatingPointDCT (z4 * FP32_2_562915447); // Save results to the top left 4x4 subregion - block[(ctr * 8) + 0] = MathF.Round(Numerics.Clamp(((tmp10 + tmp2) * 0.5F) + normalizationValue, 0, maxValue)); - block[(ctr * 8) + 3] = MathF.Round(Numerics.Clamp(((tmp10 - tmp2) * 0.5F) + normalizationValue, 0, maxValue)); - block[(ctr * 8) + 1] = MathF.Round(Numerics.Clamp(((tmp12 + tmp0) * 0.5F) + normalizationValue, 0, maxValue)); - block[(ctr * 8) + 2] = MathF.Round(Numerics.Clamp(((tmp12 - tmp0) * 0.5F) + normalizationValue, 0, maxValue)); + block[(ctr * 8) + 0] = Numerics.Clamp(((tmp10 + tmp2) * 0.5F) + normalizationValue, 0, maxValue); + block[(ctr * 8) + 3] = Numerics.Clamp(((tmp10 - tmp2) * 0.5F) + normalizationValue, 0, maxValue); + block[(ctr * 8) + 1] = Numerics.Clamp(((tmp12 + tmp0) * 0.5F) + normalizationValue, 0, maxValue); + block[(ctr * 8) + 2] = Numerics.Clamp(((tmp12 - tmp0) * 0.5F) + normalizationValue, 0, maxValue); } } @@ -199,8 +199,8 @@ internal static class ScaledFloatingPointDCT (block[ctr + (8 * 1) + 2] * FP32_3_624509785); // Save results to the top left 2x2 subregion - block[(ctr * 8) + 0] = MathF.Round(Numerics.Clamp(((tmp10 + tmp0) * 0.25F) + normalizationValue, 0, maxValue)); - block[(ctr * 8) + 1] = MathF.Round(Numerics.Clamp(((tmp10 - tmp0) * 0.25F) + normalizationValue, 0, maxValue)); + block[(ctr * 8) + 0] = Numerics.Clamp(((tmp10 + tmp0) * 0.25F) + normalizationValue, 0, maxValue); + block[(ctr * 8) + 1] = Numerics.Clamp(((tmp10 - tmp0) * 0.25F) + normalizationValue, 0, maxValue); } } @@ -213,6 +213,6 @@ internal static class ScaledFloatingPointDCT /// Output range normalization value, 1/2 of the . /// Maximum value of the output range. public static float TransformIDCT_1x1(float dc, float dequantizer, float normalizationValue, float maxValue) - => MathF.Round(Numerics.Clamp((dc * dequantizer) + normalizationValue, 0, maxValue)); + => Numerics.Clamp((dc * dequantizer) + normalizationValue, 0, maxValue); } #pragma warning restore IDE0078 diff --git a/tests/ImageSharp.Tests/Formats/Jpg/Block8x8FTests.cs b/tests/ImageSharp.Tests/Formats/Jpg/Block8x8FTests.cs index 368a7b3692..544ac85a51 100644 --- a/tests/ImageSharp.Tests/Formats/Jpg/Block8x8FTests.cs +++ b/tests/ImageSharp.Tests/Formats/Jpg/Block8x8FTests.cs @@ -181,54 +181,6 @@ public partial class Block8x8FTests : JpegFixture } } - [Theory] - [InlineData(1)] - [InlineData(2)] - public void NormalizeColorsAndRoundVector256(int seed) - { - if (this.SkipOnNonVector256Runner()) - { - return; - } - - Block8x8F source = CreateRandomFloatBlock(-200, 200, seed); - - Block8x8F expected = source; - expected.NormalizeColorsInPlace(255); - expected.RoundInPlace(); - - Block8x8F actual = source; - actual.NormalizeColorsAndRoundInPlaceVector256(255); - - this.Output.WriteLine(expected.ToString()); - this.Output.WriteLine(actual.ToString()); - this.CompareBlocks(expected, actual, 0); - } - - [Theory] - [InlineData(1)] - [InlineData(2)] - public void NormalizeColorsAndRoundVector128(int seed) - { - if (this.SkipOnNonVector128Runner()) - { - return; - } - - Block8x8F source = CreateRandomFloatBlock(-200, 200, seed); - - Block8x8F expected = source; - expected.NormalizeColorsInPlace(255); - expected.RoundInPlace(); - - Block8x8F actual = source; - actual.NormalizeColorsAndRoundInPlaceVector128(255); - - this.Output.WriteLine(expected.ToString()); - this.Output.WriteLine(actual.ToString()); - this.CompareBlocks(expected, actual, 0); - } - [Theory] [InlineData(1, 2)] [InlineData(2, 1)] @@ -290,29 +242,6 @@ public partial class Block8x8FTests : JpegFixture } } - [Theory] - [InlineData(1)] - [InlineData(2)] - [InlineData(3)] - public void RoundInPlaceSlow(int seed) - { - Block8x8F s = CreateRandomFloatBlock(-500, 500, seed); - - Block8x8F d = s; - d.RoundInPlace(); - - this.Output.WriteLine(s.ToString()); - this.Output.WriteLine(d.ToString()); - - for (int i = 0; i < 64; i++) - { - float expected = (float)Math.Round(s[i]); - float actual = d[i]; - - Assert.Equal(expected, actual); - } - } - [Fact] public void MultiplyInPlace_ByOtherBlock() { diff --git a/tests/ImageSharp.Tests/Formats/Jpg/DCTTests.cs b/tests/ImageSharp.Tests/Formats/Jpg/DCTTests.cs index 50eada4c7c..83c3a344da 100644 --- a/tests/ImageSharp.Tests/Formats/Jpg/DCTTests.cs +++ b/tests/ImageSharp.Tests/Formats/Jpg/DCTTests.cs @@ -193,7 +193,7 @@ public static class DCTTests { for (int x = 0; x < 4; x++) { - AssertScaledElementEquality(expectedSpan.Slice((y * 16) + (x * 2)), actualSpan.Slice((y * 8) + x)); + AssertScaledElementEquality(expectedSpan[((y * 16) + (x * 2))..], actualSpan[((y * 8) + x)..]); } } @@ -210,7 +210,7 @@ public static class DCTTests } } - average2x2 = MathF.Round(average2x2 / 4f); + average2x2 /= 4f; Assert.Equal((int)average2x2, (int)actual[0]); } @@ -254,7 +254,7 @@ public static class DCTTests { for (int x = 0; x < 2; x++) { - AssertScaledElementEquality(expectedSpan.Slice((y * 32) + (x * 4)), actualSpan.Slice((y * 8) + x)); + AssertScaledElementEquality(expectedSpan[((y * 32) + (x * 4))..], actualSpan[((y * 8) + x)..]); } } @@ -271,7 +271,7 @@ public static class DCTTests } } - average4x4 = MathF.Round(average4x4 / 16f); + average4x4 /= 16f; Assert.Equal((int)average4x4, (int)actual[0]); } @@ -311,7 +311,7 @@ public static class DCTTests NormalizationValue, MaxOutputValue); - float expected = MathF.Round(Numerics.Clamp(expectedDest[0] + NormalizationValue, 0, MaxOutputValue)); + float expected = Numerics.Clamp(expectedDest[0] + NormalizationValue, 0, MaxOutputValue); Assert.Equal((int)actual, (int)expected); } diff --git a/tests/ImageSharp.Tests/Formats/Jpg/JpegDecoderTests.Baseline.cs b/tests/ImageSharp.Tests/Formats/Jpg/JpegDecoderTests.Baseline.cs index f147e41325..e7e6d20334 100644 --- a/tests/ImageSharp.Tests/Formats/Jpg/JpegDecoderTests.Baseline.cs +++ b/tests/ImageSharp.Tests/Formats/Jpg/JpegDecoderTests.Baseline.cs @@ -30,7 +30,7 @@ public partial class JpegDecoderTests } using Image image = provider.GetImage(JpegDecoder.Instance); - image.DebugSave(provider, testOutputDetails: nonContiguousBuffersStr); + image.DebugSave(provider, testOutputDetails: nonContiguousBuffersStr, appendPixelTypeToFileName: false); provider.Utility.TestName = DecodeBaselineJpegOutputName; image.CompareToReferenceOutput( diff --git a/tests/ImageSharp.Tests/Formats/Jpg/JpegDecoderTests.Progressive.cs b/tests/ImageSharp.Tests/Formats/Jpg/JpegDecoderTests.Progressive.cs index a5472e1aef..fff1e085b3 100644 --- a/tests/ImageSharp.Tests/Formats/Jpg/JpegDecoderTests.Progressive.cs +++ b/tests/ImageSharp.Tests/Formats/Jpg/JpegDecoderTests.Progressive.cs @@ -21,7 +21,7 @@ public partial class JpegDecoderTests where TPixel : unmanaged, IPixel { using Image image = provider.GetImage(JpegDecoder.Instance); - image.DebugSave(provider); + image.DebugSave(provider, appendPixelTypeToFileName: false); provider.Utility.TestName = DecodeProgressiveJpegOutputName; image.CompareToReferenceOutput( diff --git a/tests/ImageSharp.Tests/Formats/Jpg/JpegEncoderTests.cs b/tests/ImageSharp.Tests/Formats/Jpg/JpegEncoderTests.cs index ede147dbe9..5fae85cc85 100644 --- a/tests/ImageSharp.Tests/Formats/Jpg/JpegEncoderTests.cs +++ b/tests/ImageSharp.Tests/Formats/Jpg/JpegEncoderTests.cs @@ -49,7 +49,7 @@ public partial class JpegEncoderTests public static readonly TheoryData CmykEncodingSetups = new() { - { JpegColorType.Cmyk, 100, 0.0159f / 100 }, + { JpegColorType.Cmyk, 100, 0.0164f / 100 }, { JpegColorType.Cmyk, 80, 0.3922f / 100 }, { JpegColorType.Cmyk, 40, 0.6488f / 100 }, }; diff --git a/tests/ImageSharp.Tests/Formats/Tiff/TiffDecoderTests.cs b/tests/ImageSharp.Tests/Formats/Tiff/TiffDecoderTests.cs index e432a7251b..6123d46dac 100644 --- a/tests/ImageSharp.Tests/Formats/Tiff/TiffDecoderTests.cs +++ b/tests/ImageSharp.Tests/Formats/Tiff/TiffDecoderTests.cs @@ -707,17 +707,17 @@ public class TiffDecoderTests : TiffDecoderBaseTester [WithFile(YCbCrJpegCompressed2, PixelTypes.Rgba32)] [WithFile(RgbJpegCompressedNoJpegTable, PixelTypes.Rgba32)] [WithFile(GrayscaleJpegCompressed, PixelTypes.Rgba32)] - [WithFile(Issues2123, PixelTypes.Rgba32)] - public void TiffDecoder_CanDecode_JpegCompressed(TestImageProvider provider) - where TPixel : unmanaged, IPixel => TestTiffDecoder(provider, useExactComparer: false); + [WithFile(Issues2123, PixelTypes.Rgba32, 0.110f)] + public void TiffDecoder_CanDecode_JpegCompressed(TestImageProvider provider, float tolerance = 0.001f) + where TPixel : unmanaged, IPixel => TestTiffDecoder(provider, useExactComparer: false, compareTolerance: tolerance); [Theory] [WithFile(RgbOldJpegCompressed, PixelTypes.Rgba32)] - [WithFile(RgbOldJpegCompressed2, PixelTypes.Rgba32)] + [WithFile(RgbOldJpegCompressed2, PixelTypes.Rgba32, 0.1003f)] [WithFile(RgbOldJpegCompressed3, PixelTypes.Rgba32)] [WithFile(RgbOldJpegCompressedGray, PixelTypes.Rgba32)] [WithFile(YCbCrOldJpegCompressed, PixelTypes.Rgba32)] - public void TiffDecoder_CanDecode_OldJpegCompressed(TestImageProvider provider) + public void TiffDecoder_CanDecode_OldJpegCompressed(TestImageProvider provider, float tolerance = 0.001f) where TPixel : unmanaged, IPixel { DecoderOptions decoderOptions = new() @@ -728,7 +728,7 @@ public class TiffDecoderTests : TiffDecoderBaseTester image.DebugSave(provider); image.CompareToOriginal( provider, - ImageComparer.Tolerant(0.001f), + ImageComparer.Tolerant(tolerance), ReferenceDecoder, decoderOptions); } @@ -784,7 +784,7 @@ public class TiffDecoderTests : TiffDecoderBaseTester // The image is handcrafted to simulate issue 2679. ImageMagick will throw an expection here and wont decode, // so we compare to rererence output instead. - image.DebugSave(provider); + image.DebugSave(provider, appendPixelTypeToFileName: false); image.CompareToReferenceOutput( ImageComparer.Exact, provider, diff --git a/tests/Images/External/ReferenceOutput/HistogramEqualizationTests/AutoLevel_SeparateChannels_CompareToReferenceOutput_Rgba32_forest_bridge.png b/tests/Images/External/ReferenceOutput/HistogramEqualizationTests/AutoLevel_SeparateChannels_CompareToReferenceOutput_Rgba32_forest_bridge.png index de79ec729c..84c7e64ee0 100644 --- a/tests/Images/External/ReferenceOutput/HistogramEqualizationTests/AutoLevel_SeparateChannels_CompareToReferenceOutput_Rgba32_forest_bridge.png +++ b/tests/Images/External/ReferenceOutput/HistogramEqualizationTests/AutoLevel_SeparateChannels_CompareToReferenceOutput_Rgba32_forest_bridge.png @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:aada4a2ccf45de24f2a591a18d9bc0260ceb3829e104fee6982061013ed87282 -size 14107709 +oid sha256:4a4d2a3a1f320d3fcb121bd8edf716354b7e8ceb251a9eaa2087f443a3adbbc1 +size 10896218 diff --git a/tests/Images/External/ReferenceOutput/HistogramEqualizationTests/AutoLevel_SynchronizedChannels_CompareToReferenceOutput_Rgba32_forest_bridge.png b/tests/Images/External/ReferenceOutput/HistogramEqualizationTests/AutoLevel_SynchronizedChannels_CompareToReferenceOutput_Rgba32_forest_bridge.png index ff5b35a5f7..679615d17e 100644 --- a/tests/Images/External/ReferenceOutput/HistogramEqualizationTests/AutoLevel_SynchronizedChannels_CompareToReferenceOutput_Rgba32_forest_bridge.png +++ b/tests/Images/External/ReferenceOutput/HistogramEqualizationTests/AutoLevel_SynchronizedChannels_CompareToReferenceOutput_Rgba32_forest_bridge.png @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:dca9b5b890d3a79b0002b7093d254d484ada4207e5010d1f0c6248d4dd6e22db -size 13909894 +oid sha256:0c3f30908a803ac5956dbd5959b225bceab1fc673a1b89d55d1096ba024c7bcc +size 12519351 diff --git a/tests/Images/External/ReferenceOutput/JpegDecoderTests/DecodeBaselineJpeg_Calliphora.png b/tests/Images/External/ReferenceOutput/JpegDecoderTests/DecodeBaselineJpeg_Calliphora.png index 07c29c0976..95ea3d4721 100644 --- a/tests/Images/External/ReferenceOutput/JpegDecoderTests/DecodeBaselineJpeg_Calliphora.png +++ b/tests/Images/External/ReferenceOutput/JpegDecoderTests/DecodeBaselineJpeg_Calliphora.png @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:362eddc5e06d672b4654bfe7a1ded995934a1c59719a3f909773b2e61931ffac -size 1332495 +oid sha256:9a6f484f06c0b466d16591db78fec3cd2c3f6ebc7578896075cd4af95d8ceaab +size 1411818 diff --git a/tests/Images/External/ReferenceOutput/JpegDecoderTests/DecodeBaselineJpeg_Issue394-MultiHuffmanBaseline-Speakers.png b/tests/Images/External/ReferenceOutput/JpegDecoderTests/DecodeBaselineJpeg_Issue394-MultiHuffmanBaseline-Speakers.png index 1f38ec542f..8288801623 100644 --- a/tests/Images/External/ReferenceOutput/JpegDecoderTests/DecodeBaselineJpeg_Issue394-MultiHuffmanBaseline-Speakers.png +++ b/tests/Images/External/ReferenceOutput/JpegDecoderTests/DecodeBaselineJpeg_Issue394-MultiHuffmanBaseline-Speakers.png @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:b7a1e89adcd70f792d678786a177bac927a15d6065001cd76aab0bf3cc1b7b4c -size 1034853 +oid sha256:88271ffea79e42914b8c37828da65afb691c7319b8e28fdd35942ecba4087baa +size 1060013 diff --git a/tests/Images/External/ReferenceOutput/JpegDecoderTests/DecodeBaselineJpeg_MultiScanBaselineCMYK.png b/tests/Images/External/ReferenceOutput/JpegDecoderTests/DecodeBaselineJpeg_MultiScanBaselineCMYK.png index 349ff64858..e72b1a0415 100644 --- a/tests/Images/External/ReferenceOutput/JpegDecoderTests/DecodeBaselineJpeg_MultiScanBaselineCMYK.png +++ b/tests/Images/External/ReferenceOutput/JpegDecoderTests/DecodeBaselineJpeg_MultiScanBaselineCMYK.png @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:d9dbb00a4be909d4b7fd605f83f786ff85545da54272a256c100afd80d687e8f -size 93253 +oid sha256:3b160cc44490303c23c0abaa0dda827d68e11bfa67127e19da974073085abbe4 +size 96042 diff --git a/tests/Images/External/ReferenceOutput/JpegDecoderTests/DecodeBaselineJpeg_badeof.png b/tests/Images/External/ReferenceOutput/JpegDecoderTests/DecodeBaselineJpeg_badeof.png index 830a95bd74..ccdb19e4db 100644 --- a/tests/Images/External/ReferenceOutput/JpegDecoderTests/DecodeBaselineJpeg_badeof.png +++ b/tests/Images/External/ReferenceOutput/JpegDecoderTests/DecodeBaselineJpeg_badeof.png @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:24f637742951c438da5acbae6a93545830ea4d0065031572a3bdd1c96be15cce -size 48990 +oid sha256:7ce1a5d71cefd6527daecb8c1d9089f63189b833f1186971ddb89c6257d076ef +size 46745 diff --git a/tests/Images/External/ReferenceOutput/JpegDecoderTests/DecodeBaselineJpeg_badrst.png b/tests/Images/External/ReferenceOutput/JpegDecoderTests/DecodeBaselineJpeg_badrst.png index 0512251ab4..4f2aae2ed6 100644 --- a/tests/Images/External/ReferenceOutput/JpegDecoderTests/DecodeBaselineJpeg_badrst.png +++ b/tests/Images/External/ReferenceOutput/JpegDecoderTests/DecodeBaselineJpeg_badrst.png @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:0501e53f97ddb4252e15073dc75f4129428befe5594a07297c21ae9f89f075b2 -size 316278 +oid sha256:522b0a2f9a97689217af461ab2a63a41553c0ae6ca28cde046b01019c87e2393 +size 348228 diff --git a/tests/Images/External/ReferenceOutput/JpegDecoderTests/DecodeBaselineJpeg_cmyk.png b/tests/Images/External/ReferenceOutput/JpegDecoderTests/DecodeBaselineJpeg_cmyk.png index 27c22ecc6d..e24cc59690 100644 --- a/tests/Images/External/ReferenceOutput/JpegDecoderTests/DecodeBaselineJpeg_cmyk.png +++ b/tests/Images/External/ReferenceOutput/JpegDecoderTests/DecodeBaselineJpeg_cmyk.png @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:d2253c83151238056caef2f3bca8800108a84360b2ff21979e4ff37fdddd2232 -size 419852 +oid sha256:5e81ac851f979c017082180a13ee4ddfe26c4151bfc082f7915252e57da44ec7 +size 416967 diff --git a/tests/Images/External/ReferenceOutput/JpegDecoderTests/DecodeBaselineJpeg_jpeg420small.png b/tests/Images/External/ReferenceOutput/JpegDecoderTests/DecodeBaselineJpeg_jpeg420small.png index 4032a32afb..ae30676b36 100644 --- a/tests/Images/External/ReferenceOutput/JpegDecoderTests/DecodeBaselineJpeg_jpeg420small.png +++ b/tests/Images/External/ReferenceOutput/JpegDecoderTests/DecodeBaselineJpeg_jpeg420small.png @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:2b5e1d91fb6dc1ddb696fbee63331ba9c6ef3548b619c005887e60c5b01f4981 -size 27303 +oid sha256:79291e1eddee66548156fc40bb5c8b1209c7b0048fb56f2bb7890c7d89ce655e +size 27264 diff --git a/tests/Images/External/ReferenceOutput/JpegDecoderTests/DecodeBaselineJpeg_jpeg422.png b/tests/Images/External/ReferenceOutput/JpegDecoderTests/DecodeBaselineJpeg_jpeg422.png index 018ecda7a5..8ddb748802 100644 --- a/tests/Images/External/ReferenceOutput/JpegDecoderTests/DecodeBaselineJpeg_jpeg422.png +++ b/tests/Images/External/ReferenceOutput/JpegDecoderTests/DecodeBaselineJpeg_jpeg422.png @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:733cc46271c4402974db2536a55e6ecae3110856df73031ca48dad03745d852d -size 35375 +oid sha256:5196d5a20f639d833cf1ad979a290c144c97b41897fa5f0fa7454f5f4e8d3c11 +size 28697 diff --git a/tests/Images/External/ReferenceOutput/JpegDecoderTests/DecodeBaselineJpeg_jpeg444.png b/tests/Images/External/ReferenceOutput/JpegDecoderTests/DecodeBaselineJpeg_jpeg444.png index e5ce7eb3d3..acca4b44e3 100644 --- a/tests/Images/External/ReferenceOutput/JpegDecoderTests/DecodeBaselineJpeg_jpeg444.png +++ b/tests/Images/External/ReferenceOutput/JpegDecoderTests/DecodeBaselineJpeg_jpeg444.png @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:6543f546fac9d05ebdac7a534b0cc422f31bfd81067212a19cb3a52ad24560a8 -size 3978 +oid sha256:9894983085f3d3d45b8c4ea9acc680932f236a4602230046386130aa2af916f1 +size 4936 diff --git a/tests/Images/External/ReferenceOutput/JpegDecoderTests/DecodeBaselineJpeg_testorig.png b/tests/Images/External/ReferenceOutput/JpegDecoderTests/DecodeBaselineJpeg_testorig.png index 830a95bd74..ccdb19e4db 100644 --- a/tests/Images/External/ReferenceOutput/JpegDecoderTests/DecodeBaselineJpeg_testorig.png +++ b/tests/Images/External/ReferenceOutput/JpegDecoderTests/DecodeBaselineJpeg_testorig.png @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:24f637742951c438da5acbae6a93545830ea4d0065031572a3bdd1c96be15cce -size 48990 +oid sha256:7ce1a5d71cefd6527daecb8c1d9089f63189b833f1186971ddb89c6257d076ef +size 46745 diff --git a/tests/Images/External/ReferenceOutput/JpegDecoderTests/DecodeBaselineJpeg_testorig12.png b/tests/Images/External/ReferenceOutput/JpegDecoderTests/DecodeBaselineJpeg_testorig12.png index 23f1941dcc..dc41461e01 100644 --- a/tests/Images/External/ReferenceOutput/JpegDecoderTests/DecodeBaselineJpeg_testorig12.png +++ b/tests/Images/External/ReferenceOutput/JpegDecoderTests/DecodeBaselineJpeg_testorig12.png @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:ad57cf87eade9ee6663f575b358eafaa869a16b3b02d0fb00ca8c683422b85a0 -size 47601 +oid sha256:26ef4244aa761c7b6c51534e414f79438753b9dbd1fbf45d2f1e2fd2dd4ec4c1 +size 46636 diff --git a/tests/Images/External/ReferenceOutput/JpegDecoderTests/DecodeBaselineJpeg_ycck-subsample-1222.png b/tests/Images/External/ReferenceOutput/JpegDecoderTests/DecodeBaselineJpeg_ycck-subsample-1222.png index f35c40ed89..66337b5c94 100644 --- a/tests/Images/External/ReferenceOutput/JpegDecoderTests/DecodeBaselineJpeg_ycck-subsample-1222.png +++ b/tests/Images/External/ReferenceOutput/JpegDecoderTests/DecodeBaselineJpeg_ycck-subsample-1222.png @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:a17478333c6ef0943aeb96faba1c0ea560995d0612564fe033b72b2949f4869f -size 66748 +oid sha256:91a522b32485af3be304c2d98a60b9906112637a4627d69a072976474cef7485 +size 66932 diff --git a/tests/Images/External/ReferenceOutput/JpegDecoderTests/DecodeBaselineJpeg_ycck.png b/tests/Images/External/ReferenceOutput/JpegDecoderTests/DecodeBaselineJpeg_ycck.png index 5f41fb5239..76183356bd 100644 --- a/tests/Images/External/ReferenceOutput/JpegDecoderTests/DecodeBaselineJpeg_ycck.png +++ b/tests/Images/External/ReferenceOutput/JpegDecoderTests/DecodeBaselineJpeg_ycck.png @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:63f7105e9e2d0794b3f7225af2187a5717c12b806b68b33b98e08e0a0b1ffa79 -size 71051 +oid sha256:f374012655af194ebece2e9329da416be830d1cadd24a2bd584480d45751ea78 +size 58272 diff --git a/tests/Images/External/ReferenceOutput/JpegDecoderTests/DecodeProgressiveJpeg_BadEofProgressive.png b/tests/Images/External/ReferenceOutput/JpegDecoderTests/DecodeProgressiveJpeg_BadEofProgressive.png index 2386ae49a2..ecf22f93ca 100644 --- a/tests/Images/External/ReferenceOutput/JpegDecoderTests/DecodeProgressiveJpeg_BadEofProgressive.png +++ b/tests/Images/External/ReferenceOutput/JpegDecoderTests/DecodeProgressiveJpeg_BadEofProgressive.png @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:6e5d84fa88ac8b552c21c042f155801924240d764da365440d28eb8133950925 -size 568177 +oid sha256:36b557cd6d55389b8f02b811ecd2b14f1f1c440c7e8dd5a5a3c55cea57547203 +size 564682 diff --git a/tests/Images/External/ReferenceOutput/JpegDecoderTests/DecodeProgressiveJpeg_ExifUndefType.png b/tests/Images/External/ReferenceOutput/JpegDecoderTests/DecodeProgressiveJpeg_ExifUndefType.png index 3de8b15578..c144554ca2 100644 --- a/tests/Images/External/ReferenceOutput/JpegDecoderTests/DecodeProgressiveJpeg_ExifUndefType.png +++ b/tests/Images/External/ReferenceOutput/JpegDecoderTests/DecodeProgressiveJpeg_ExifUndefType.png @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:fb636a9f304cdef713b823adc7424108b4d44fada294c48a366278c9b9e7b4b5 -size 20163 +oid sha256:ff9554b1f421ca04e0580fbb505895f002b80298feb128a15cdc045a1625a490 +size 20111 diff --git a/tests/Images/External/ReferenceOutput/JpegDecoderTests/DecodeProgressiveJpeg_fb.png b/tests/Images/External/ReferenceOutput/JpegDecoderTests/DecodeProgressiveJpeg_fb.png index 07a7c3573e..30bfb538f1 100644 --- a/tests/Images/External/ReferenceOutput/JpegDecoderTests/DecodeProgressiveJpeg_fb.png +++ b/tests/Images/External/ReferenceOutput/JpegDecoderTests/DecodeProgressiveJpeg_fb.png @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:2935d3dc66eec334eec65c1ed9fbb04046404a9c28bed413ac635878e63ea6cf -size 114688 +oid sha256:c2eb2bd4db9b67466d96fe154cd771d6780a24a798a0c051f307425f1b0e3d15 +size 115708 diff --git a/tests/Images/External/ReferenceOutput/JpegDecoderTests/DecodeProgressiveJpeg_progress.png b/tests/Images/External/ReferenceOutput/JpegDecoderTests/DecodeProgressiveJpeg_progress.png index 8241c36ac3..e9a2a0bd86 100644 --- a/tests/Images/External/ReferenceOutput/JpegDecoderTests/DecodeProgressiveJpeg_progress.png +++ b/tests/Images/External/ReferenceOutput/JpegDecoderTests/DecodeProgressiveJpeg_progress.png @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:7b48730a7a149e13dd87a24e62c0704895b08a5ed1f8ea48a6a6a7248450750d -size 301416 +oid sha256:c5c5b5711041f22e8ee1afb344137c00ee80ba51e777689513e8dfeea855befa +size 297941 diff --git a/tests/Images/External/ReferenceOutput/JpegDecoderTests/Decode_CMYK_ICC_Jpeg_Rgba32_issue-129.png b/tests/Images/External/ReferenceOutput/JpegDecoderTests/Decode_CMYK_ICC_Jpeg_Rgba32_issue-129.png index 77a9d0d9cb..4e554a34d3 100644 --- a/tests/Images/External/ReferenceOutput/JpegDecoderTests/Decode_CMYK_ICC_Jpeg_Rgba32_issue-129.png +++ b/tests/Images/External/ReferenceOutput/JpegDecoderTests/Decode_CMYK_ICC_Jpeg_Rgba32_issue-129.png @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:215cba73dfb0e19f75f6dc0c3fefca474bd65f57684a207a11d896e1637bb643 -size 1240827 +oid sha256:9750b54f5220020e0c3f5ce22e4f108cb70290750c613c29beeb49473d5c80a6 +size 1349665 diff --git a/tests/Images/External/ReferenceOutput/JpegDecoderTests/Decode_RGB_ICC_Jpeg_Rgba32_Momiji-AdobeRGB-yes.png b/tests/Images/External/ReferenceOutput/JpegDecoderTests/Decode_RGB_ICC_Jpeg_Rgba32_Momiji-AdobeRGB-yes.png index 0963e90b74..37e1e37cdc 100644 --- a/tests/Images/External/ReferenceOutput/JpegDecoderTests/Decode_RGB_ICC_Jpeg_Rgba32_Momiji-AdobeRGB-yes.png +++ b/tests/Images/External/ReferenceOutput/JpegDecoderTests/Decode_RGB_ICC_Jpeg_Rgba32_Momiji-AdobeRGB-yes.png @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:c942b534baa51b8e46e88bd38d1ced319bccf1b55a5711ae5761697b7437fe4e -size 458321 +oid sha256:dcc32a36dcdfe3b9cfae17a81d3eb180248db4507ae93fdd4bb92e1e5e29950d +size 466215 diff --git a/tests/Images/External/ReferenceOutput/JpegDecoderTests/Decode_RGB_ICC_Jpeg_Rgba32_Momiji-AppleRGB-yes.png b/tests/Images/External/ReferenceOutput/JpegDecoderTests/Decode_RGB_ICC_Jpeg_Rgba32_Momiji-AppleRGB-yes.png index 1e5eaca4da..dbfa40378d 100644 --- a/tests/Images/External/ReferenceOutput/JpegDecoderTests/Decode_RGB_ICC_Jpeg_Rgba32_Momiji-AppleRGB-yes.png +++ b/tests/Images/External/ReferenceOutput/JpegDecoderTests/Decode_RGB_ICC_Jpeg_Rgba32_Momiji-AppleRGB-yes.png @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:163711226bdfa2f102b314435baea9f69ad1be1b11ef5ad8348358cd09a029ae -size 482963 +oid sha256:ae9a32eba21fa0bd2039803b5af8ffdba0409c104746455d5028741f214eb1a1 +size 479922 diff --git a/tests/Images/External/ReferenceOutput/JpegDecoderTests/Decode_RGB_ICC_Jpeg_Rgba32_Momiji-ColorMatch-yes.png b/tests/Images/External/ReferenceOutput/JpegDecoderTests/Decode_RGB_ICC_Jpeg_Rgba32_Momiji-ColorMatch-yes.png index 06bee00d7a..5427b5c954 100644 --- a/tests/Images/External/ReferenceOutput/JpegDecoderTests/Decode_RGB_ICC_Jpeg_Rgba32_Momiji-ColorMatch-yes.png +++ b/tests/Images/External/ReferenceOutput/JpegDecoderTests/Decode_RGB_ICC_Jpeg_Rgba32_Momiji-ColorMatch-yes.png @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:6f7981f40bab5bffff3e7c9ea1676d224c173fabbaa6e7a920d7a9dabd58655f -size 464917 +oid sha256:e2bf66bb6d7eb3ff402b8564fb51bc951792ad47dc17fc9c7c886744638a59bb +size 469096 diff --git a/tests/Images/External/ReferenceOutput/JpegDecoderTests/Decode_RGB_ICC_Jpeg_Rgba32_Momiji-ProPhoto-yes.png b/tests/Images/External/ReferenceOutput/JpegDecoderTests/Decode_RGB_ICC_Jpeg_Rgba32_Momiji-ProPhoto-yes.png index 3ae12c657a..7870440681 100644 --- a/tests/Images/External/ReferenceOutput/JpegDecoderTests/Decode_RGB_ICC_Jpeg_Rgba32_Momiji-ProPhoto-yes.png +++ b/tests/Images/External/ReferenceOutput/JpegDecoderTests/Decode_RGB_ICC_Jpeg_Rgba32_Momiji-ProPhoto-yes.png @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:11b02b982c024e295915e88f56a428ad73068217a7ae625f705127ab8c35a4bf -size 477061 +oid sha256:e9c015e599b1710fd50c4ad61104fb03f27f211c13686cb162baba492ec4a935 +size 482675 diff --git a/tests/Images/External/ReferenceOutput/JpegDecoderTests/Decode_RGB_ICC_Jpeg_Rgba32_Momiji-WideRGB-yes.png b/tests/Images/External/ReferenceOutput/JpegDecoderTests/Decode_RGB_ICC_Jpeg_Rgba32_Momiji-WideRGB-yes.png index 0a2eb91cc5..2ee122b8de 100644 --- a/tests/Images/External/ReferenceOutput/JpegDecoderTests/Decode_RGB_ICC_Jpeg_Rgba32_Momiji-WideRGB-yes.png +++ b/tests/Images/External/ReferenceOutput/JpegDecoderTests/Decode_RGB_ICC_Jpeg_Rgba32_Momiji-WideRGB-yes.png @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:a49967c20cccf824df4de3f105f5ddb44d7a602c072ce22caa38939f21f62505 -size 469827 +oid sha256:e25ba3afb3346633db52e91062c4266474bc9d9c0b3fcf686f1040b3a4d70d0d +size 476050 diff --git a/tests/Images/External/ReferenceOutput/JpegDecoderTests/Decode_RGB_ICC_Jpeg_Rgba32_Momiji-sRGB-yes.png b/tests/Images/External/ReferenceOutput/JpegDecoderTests/Decode_RGB_ICC_Jpeg_Rgba32_Momiji-sRGB-yes.png index fa554484bb..a1e876d2b2 100644 --- a/tests/Images/External/ReferenceOutput/JpegDecoderTests/Decode_RGB_ICC_Jpeg_Rgba32_Momiji-sRGB-yes.png +++ b/tests/Images/External/ReferenceOutput/JpegDecoderTests/Decode_RGB_ICC_Jpeg_Rgba32_Momiji-sRGB-yes.png @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:4b33fc8fd03142aaaf8aabc39e084acd9e82e9222292e281953d92d65edcc1a7 -size 436111 +oid sha256:b80e88a7fdb27491687202c247eafbeb9d1c024affd5dc4633bcf15602fed990 +size 464921 diff --git a/tests/Images/External/ReferenceOutput/JpegDecoderTests/Decode_RGB_ICC_Jpeg_Rgba32_Perceptual-cLUT-only.png b/tests/Images/External/ReferenceOutput/JpegDecoderTests/Decode_RGB_ICC_Jpeg_Rgba32_Perceptual-cLUT-only.png index a0b73d299f..8a27087f51 100644 --- a/tests/Images/External/ReferenceOutput/JpegDecoderTests/Decode_RGB_ICC_Jpeg_Rgba32_Perceptual-cLUT-only.png +++ b/tests/Images/External/ReferenceOutput/JpegDecoderTests/Decode_RGB_ICC_Jpeg_Rgba32_Perceptual-cLUT-only.png @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:fe06798b92c9b476c167407e752b4379d50f1b1ad6329eceb368c8c36097b401 -size 95103 +oid sha256:dbe96e449dfeadc71bbfb0b48d2f56e6357d5b64a225153eed60263ea3356e71 +size 119987 diff --git a/tests/Images/External/ReferenceOutput/JpegDecoderTests/Decode_RGB_ICC_Jpeg_Rgba32_Perceptual.png b/tests/Images/External/ReferenceOutput/JpegDecoderTests/Decode_RGB_ICC_Jpeg_Rgba32_Perceptual.png index 99ae53f93e..586bc96479 100644 --- a/tests/Images/External/ReferenceOutput/JpegDecoderTests/Decode_RGB_ICC_Jpeg_Rgba32_Perceptual.png +++ b/tests/Images/External/ReferenceOutput/JpegDecoderTests/Decode_RGB_ICC_Jpeg_Rgba32_Perceptual.png @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:21f8d54d4b789b783f3020402d4c1b91bb541de6565e2960976b569f60694631 -size 99385 +oid sha256:6e9e4da17b7270565fd592f4ff710ff317e1054794d966d33f11748defc7c35b +size 124944 diff --git a/tests/Images/External/ReferenceOutput/JpegDecoderTests/Decode_RGB_ICC_Jpeg_Rgba32_sRGB_Gray.png b/tests/Images/External/ReferenceOutput/JpegDecoderTests/Decode_RGB_ICC_Jpeg_Rgba32_sRGB_Gray.png index 759b26a60c..5164defc14 100644 --- a/tests/Images/External/ReferenceOutput/JpegDecoderTests/Decode_RGB_ICC_Jpeg_Rgba32_sRGB_Gray.png +++ b/tests/Images/External/ReferenceOutput/JpegDecoderTests/Decode_RGB_ICC_Jpeg_Rgba32_sRGB_Gray.png @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:18ad361f79b4ab26d452d5cc7ada4c121dfbf45d20da7c23a58f71a9497d17a2 -size 5341 +oid sha256:0eaf217570e283b6fb9a7d52ba5087d760d9a98827e507cbae299915871e1c31 +size 2595 diff --git a/tests/Images/External/ReferenceOutput/JpegDecoderTests/Decode_YCCK_ICC_Jpeg_Rgba32_issue_2723.png b/tests/Images/External/ReferenceOutput/JpegDecoderTests/Decode_YCCK_ICC_Jpeg_Rgba32_issue_2723.png index bf95566838..a73e5f31ef 100644 --- a/tests/Images/External/ReferenceOutput/JpegDecoderTests/Decode_YCCK_ICC_Jpeg_Rgba32_issue_2723.png +++ b/tests/Images/External/ReferenceOutput/JpegDecoderTests/Decode_YCCK_ICC_Jpeg_Rgba32_issue_2723.png @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:cef85199e8560d6669766c094d078831024e44ac7fc537f8696f802c8e06138b -size 420141 +oid sha256:de73fa7a2cdd79763e40c51381f9a0cf5c7d43db2dcd7a86dad0a5b68377665f +size 421249 diff --git a/tests/Images/External/ReferenceOutput/JpegDecoderTests/JpegDecoder_Decode_Resize_Bicubic_Calliphora_150_150.png b/tests/Images/External/ReferenceOutput/JpegDecoderTests/JpegDecoder_Decode_Resize_Bicubic_Calliphora_150_150.png index e982d9034d..16df99e04e 100644 --- a/tests/Images/External/ReferenceOutput/JpegDecoderTests/JpegDecoder_Decode_Resize_Bicubic_Calliphora_150_150.png +++ b/tests/Images/External/ReferenceOutput/JpegDecoderTests/JpegDecoder_Decode_Resize_Bicubic_Calliphora_150_150.png @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:1bb6ed717a2af582d60ccd6c1c9c1ac92df0f8662755530b7e9063724835b23b -size 27709 +oid sha256:9be82bf220095b5650c7cd0a0eba9ee2b0fbe39881ff2bbde465ca9d2f94490f +size 27803 diff --git a/tests/Images/External/ReferenceOutput/JpegDecoderTests/JpegDecoder_Decode_Resize_Calliphora_150_150.png b/tests/Images/External/ReferenceOutput/JpegDecoderTests/JpegDecoder_Decode_Resize_Calliphora_150_150.png index 65aac22e90..98aac2c55d 100644 --- a/tests/Images/External/ReferenceOutput/JpegDecoderTests/JpegDecoder_Decode_Resize_Calliphora_150_150.png +++ b/tests/Images/External/ReferenceOutput/JpegDecoderTests/JpegDecoder_Decode_Resize_Calliphora_150_150.png @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:a4804948a2ba604e383dd2dcc4ca4cac91c75ac97a0ab10bd884478429fa50a5 -size 28178 +oid sha256:b7e4b589fdbad2688895317fc760a834624f76cc9a027a65ce3cd6b35563ec85 +size 28514 diff --git a/tests/Images/External/ReferenceOutput/JpegDecoderTests/JpegDecoder_Decode_Specialized_Combined_Resize_Calliphora_150_150.png b/tests/Images/External/ReferenceOutput/JpegDecoderTests/JpegDecoder_Decode_Specialized_Combined_Resize_Calliphora_150_150.png index 65aac22e90..98aac2c55d 100644 --- a/tests/Images/External/ReferenceOutput/JpegDecoderTests/JpegDecoder_Decode_Specialized_Combined_Resize_Calliphora_150_150.png +++ b/tests/Images/External/ReferenceOutput/JpegDecoderTests/JpegDecoder_Decode_Specialized_Combined_Resize_Calliphora_150_150.png @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:a4804948a2ba604e383dd2dcc4ca4cac91c75ac97a0ab10bd884478429fa50a5 -size 28178 +oid sha256:b7e4b589fdbad2688895317fc760a834624f76cc9a027a65ce3cd6b35563ec85 +size 28514 diff --git a/tests/Images/External/ReferenceOutput/JpegDecoderTests/JpegDecoder_Decode_Specialized_IDCT_Resize_Calliphora_150_150.png b/tests/Images/External/ReferenceOutput/JpegDecoderTests/JpegDecoder_Decode_Specialized_IDCT_Resize_Calliphora_150_150.png index abe31b2be7..6659673306 100644 --- a/tests/Images/External/ReferenceOutput/JpegDecoderTests/JpegDecoder_Decode_Specialized_IDCT_Resize_Calliphora_150_150.png +++ b/tests/Images/External/ReferenceOutput/JpegDecoderTests/JpegDecoder_Decode_Specialized_IDCT_Resize_Calliphora_150_150.png @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:fc67170d70378ad8b8c0e1c1695b5c268341f0d26a6c788d1a8dffa8c90482a0 -size 102165 +oid sha256:d28335f6ad2421b6bc7e420c2e61ead73cd64f63ba7e1c9bd98ac1c140188ddb +size 104381 diff --git a/tests/Images/External/ReferenceOutput/JpegDecoderTests/JpegDecoder_Decode_Specialized_Scale_Resize_Calliphora_150_150.png b/tests/Images/External/ReferenceOutput/JpegDecoderTests/JpegDecoder_Decode_Specialized_Scale_Resize_Calliphora_150_150.png index 87087adc51..9c01562f16 100644 --- a/tests/Images/External/ReferenceOutput/JpegDecoderTests/JpegDecoder_Decode_Specialized_Scale_Resize_Calliphora_150_150.png +++ b/tests/Images/External/ReferenceOutput/JpegDecoderTests/JpegDecoder_Decode_Specialized_Scale_Resize_Calliphora_150_150.png @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:ae7f6ebfd9f2ddd85611827fda13eaf316d36d5187900458568f80b929effb9b -size 28291 +oid sha256:d0db0474f28d6302aae207b09f63e2de9e9ce6a9777a95dbac69ccb957d08daa +size 28381 diff --git a/tests/Images/External/ReferenceOutput/TiffDecoderTests/TiffDecoder_CanDecode_Cmyk_Rgba32_Cmyk-jpeg.png b/tests/Images/External/ReferenceOutput/TiffDecoderTests/TiffDecoder_CanDecode_Cmyk_Rgba32_Cmyk-jpeg.png index 06d60e0303..6f35ecb553 100644 --- a/tests/Images/External/ReferenceOutput/TiffDecoderTests/TiffDecoder_CanDecode_Cmyk_Rgba32_Cmyk-jpeg.png +++ b/tests/Images/External/ReferenceOutput/TiffDecoderTests/TiffDecoder_CanDecode_Cmyk_Rgba32_Cmyk-jpeg.png @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:7f68db78d765a7f36570cd7b57a1f06cfca24c3b4916d0692a4aa051209ec327 -size 616 +oid sha256:b1ff0ce13bf0521b8a6f0d77806473c837c45428391786f751186a53fd951c0c +size 394 diff --git a/tests/Images/External/ReferenceOutput/TiffDecoderTests/TiffDecoder_CanDecode_JpegCompressedWithIssue2679_Issue2679.png b/tests/Images/External/ReferenceOutput/TiffDecoderTests/TiffDecoder_CanDecode_JpegCompressedWithIssue2679_Issue2679.png index 6150aacb37..d2f11dc175 100644 --- a/tests/Images/External/ReferenceOutput/TiffDecoderTests/TiffDecoder_CanDecode_JpegCompressedWithIssue2679_Issue2679.png +++ b/tests/Images/External/ReferenceOutput/TiffDecoderTests/TiffDecoder_CanDecode_JpegCompressedWithIssue2679_Issue2679.png @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:6cd36c7e07a08e22cceecd28a056c80e5553a8c092bfc091e902d13bd5c46f4d -size 120054 +oid sha256:96729898fee87fc4305913c111a5841af205564d032b916aeb04425a20c6b22c +size 46540 diff --git a/tests/Images/External/ReferenceOutput/TiffDecoderTests/TiffDecoder_CanDecode_YccK_ICC_Rgba32_Issue2454_A.png b/tests/Images/External/ReferenceOutput/TiffDecoderTests/TiffDecoder_CanDecode_YccK_ICC_Rgba32_Issue2454_A.png index 97118c15b0..addaa27bfe 100644 --- a/tests/Images/External/ReferenceOutput/TiffDecoderTests/TiffDecoder_CanDecode_YccK_ICC_Rgba32_Issue2454_A.png +++ b/tests/Images/External/ReferenceOutput/TiffDecoderTests/TiffDecoder_CanDecode_YccK_ICC_Rgba32_Issue2454_A.png @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:c4f77673028643af0ac02a8f6a1e2db14052177e3401c369391a8ff7e943770c -size 7679254 +oid sha256:13f51f69b061303d06b73fa7e626d7252a9fd14aa84c961ea894f6e7cd7218cc +size 7603818 diff --git a/tests/Images/External/ReferenceOutput/TiffDecoderTests/TiffDecoder_CanDecode_YccK_ICC_Rgba32_Issue2454_B.png b/tests/Images/External/ReferenceOutput/TiffDecoderTests/TiffDecoder_CanDecode_YccK_ICC_Rgba32_Issue2454_B.png index 52accc22dc..445cad99c8 100644 --- a/tests/Images/External/ReferenceOutput/TiffDecoderTests/TiffDecoder_CanDecode_YccK_ICC_Rgba32_Issue2454_B.png +++ b/tests/Images/External/ReferenceOutput/TiffDecoderTests/TiffDecoder_CanDecode_YccK_ICC_Rgba32_Issue2454_B.png @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:e616895c21fd8b19a216e8a3ef4968bd413589b5875efdac29860f019a710527 -size 7517284 +oid sha256:b6a35853e44d06a6f74e3aaf558e6a4b9c3209c6c3fa2c8f3ee650bf049ac0d3 +size 7624359 diff --git a/tests/Images/External/ReferenceOutput/TiffDecoderTests/TiffDecoder_CanDecode_YccK_Rgba32_Issue2454_A.png b/tests/Images/External/ReferenceOutput/TiffDecoderTests/TiffDecoder_CanDecode_YccK_Rgba32_Issue2454_A.png index 350d1af68c..e88a98fa48 100644 --- a/tests/Images/External/ReferenceOutput/TiffDecoderTests/TiffDecoder_CanDecode_YccK_Rgba32_Issue2454_A.png +++ b/tests/Images/External/ReferenceOutput/TiffDecoderTests/TiffDecoder_CanDecode_YccK_Rgba32_Issue2454_A.png @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:d7911e059049c427229136479740fd62e2e09907549ec3e1421a6a60da6167cc -size 7840892 +oid sha256:9e91f6ae534b95a9e052158043e929e1da22e0851b4d6dfdb8b802605396758f +size 7888373 diff --git a/tests/Images/External/ReferenceOutput/TiffDecoderTests/TiffDecoder_CanDecode_YccK_Rgba32_Issue2454_B.png b/tests/Images/External/ReferenceOutput/TiffDecoderTests/TiffDecoder_CanDecode_YccK_Rgba32_Issue2454_B.png index 3dc99e604e..29134afcaa 100644 --- a/tests/Images/External/ReferenceOutput/TiffDecoderTests/TiffDecoder_CanDecode_YccK_Rgba32_Issue2454_B.png +++ b/tests/Images/External/ReferenceOutput/TiffDecoderTests/TiffDecoder_CanDecode_YccK_Rgba32_Issue2454_B.png @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:291f2033a7b4cfc10fb3301283c167b3fbc288bc173c95b21bc726bf076865af -size 7649213 +oid sha256:81bb1f05a6f72aebee41941b2390ca0a35c4368758af2d27b3e81bbee8901c56 +size 7956102 diff --git a/tests/Images/External/ReferenceOutput/Transforms/EntropyCropTest/EntropyCrop_MultiScanBaselineCMYK_0.25.png b/tests/Images/External/ReferenceOutput/Transforms/EntropyCropTest/EntropyCrop_MultiScanBaselineCMYK_0.25.png index 331b8b30a6..1a052e4449 100644 --- a/tests/Images/External/ReferenceOutput/Transforms/EntropyCropTest/EntropyCrop_MultiScanBaselineCMYK_0.25.png +++ b/tests/Images/External/ReferenceOutput/Transforms/EntropyCropTest/EntropyCrop_MultiScanBaselineCMYK_0.25.png @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:a68aa183691be3240e881057cb2f5785e50228c9c5dd98163ba766b5cafa8b55 -size 88601 +oid sha256:1f312445263030ea79e2d1a05c62d19ade816c9cda9f0d3494ab76fde1e03ace +size 92534 diff --git a/tests/Images/External/ReferenceOutput/Transforms/EntropyCropTest/EntropyCrop_MultiScanBaselineCMYK_0.75.png b/tests/Images/External/ReferenceOutput/Transforms/EntropyCropTest/EntropyCrop_MultiScanBaselineCMYK_0.75.png index c375fff78b..769e172b7d 100644 --- a/tests/Images/External/ReferenceOutput/Transforms/EntropyCropTest/EntropyCrop_MultiScanBaselineCMYK_0.75.png +++ b/tests/Images/External/ReferenceOutput/Transforms/EntropyCropTest/EntropyCrop_MultiScanBaselineCMYK_0.75.png @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:53a7bc1aa7279cce0b73568b6ed9b141377f7654b73dcf926c43c22dd908039a -size 88502 +oid sha256:672f167d514f01c625145701ea0d45ced503a6e84cae75fbdb67e6b0db179aee +size 92318 From ac0adfccac9e2d4028c565d339f686b02cf6c9aa Mon Sep 17 00:00:00 2001 From: James Jackson-South Date: Fri, 17 Apr 2026 22:11:53 +1000 Subject: [PATCH 11/12] Optimize Block8x8F ScaledCopy for common scales --- .../Jpeg/Components/Block8x8F.ScaledCopy.cs | 324 +++++++++++++++++- .../Formats/Jpeg/Components/Block8x8F.cs | 2 +- 2 files changed, 324 insertions(+), 2 deletions(-) diff --git a/src/ImageSharp/Formats/Jpeg/Components/Block8x8F.ScaledCopy.cs b/src/ImageSharp/Formats/Jpeg/Components/Block8x8F.ScaledCopy.cs index 179f9aa287..c2a95989d3 100644 --- a/src/ImageSharp/Formats/Jpeg/Components/Block8x8F.ScaledCopy.cs +++ b/src/ImageSharp/Formats/Jpeg/Components/Block8x8F.ScaledCopy.cs @@ -29,7 +29,50 @@ internal partial struct Block8x8F return; } - // TODO: Optimize: implement all cases with scale-specific, loopless code! + if (horizontalScale == 2 && verticalScale == 1) + { + this.CopyTo2x1Scale(ref areaOrigin, (uint)areaStride); + return; + } + + if (horizontalScale == 1 && verticalScale == 2) + { + this.CopyTo1x2Scale(ref areaOrigin, (uint)areaStride); + return; + } + + if (horizontalScale == 4 && verticalScale == 1) + { + this.CopyTo4x1Scale(ref areaOrigin, (uint)areaStride); + return; + } + + if (horizontalScale == 4 && verticalScale == 2) + { + this.CopyTo4x2Scale(ref areaOrigin, (uint)areaStride); + return; + } + + if (horizontalScale == 1 && verticalScale == 4) + { + this.CopyTo1x4Scale(ref areaOrigin, (uint)areaStride); + return; + } + + if (horizontalScale == 2 && verticalScale == 4) + { + this.CopyTo2x4Scale(ref areaOrigin, (uint)areaStride); + return; + } + + if (horizontalScale == 4 && verticalScale == 4) + { + this.CopyTo4x4Scale(ref areaOrigin, (uint)areaStride); + return; + } + + // The common 1x, 2x, and 4x integral scales are specialized above. + // Uncommon legal factor-3 scales use the generic fallback. this.CopyArbitraryScale(ref areaOrigin, (uint)areaStride, (uint)horizontalScale, (uint)verticalScale); } @@ -85,6 +128,285 @@ internal partial struct Block8x8F } } + /// + /// Copies the full 8x8 block into the destination buffer while doubling only the horizontal axis. + /// + [MethodImpl(InliningOptions.ShortMethod)] + private void CopyTo2x1Scale(ref float areaOrigin, uint areaStride) + { + ref Vector4 sourceBase = ref this.V0L; + + WidenRow8(ref sourceBase, ref areaOrigin, 0u, 0u, areaStride); + WidenRow8(ref sourceBase, ref areaOrigin, 1u, 1u, areaStride); + WidenRow8(ref sourceBase, ref areaOrigin, 2u, 2u, areaStride); + WidenRow8(ref sourceBase, ref areaOrigin, 3u, 3u, areaStride); + WidenRow8(ref sourceBase, ref areaOrigin, 4u, 4u, areaStride); + WidenRow8(ref sourceBase, ref areaOrigin, 5u, 5u, areaStride); + WidenRow8(ref sourceBase, ref areaOrigin, 6u, 6u, areaStride); + WidenRow8(ref sourceBase, ref areaOrigin, 7u, 7u, areaStride); + } + + /// + /// Copies the full 8x8 block into the destination buffer while doubling only the vertical axis. + /// + [MethodImpl(InliningOptions.ShortMethod)] + private void CopyTo1x2Scale(ref float areaOrigin, uint areaStride) + { + ref Vector4 sourceBase = ref this.V0L; + + CopyRow8(ref sourceBase, ref areaOrigin, 0u, 0u, areaStride); + CopyRow8(ref sourceBase, ref areaOrigin, 0u, 1u, areaStride); + CopyRow8(ref sourceBase, ref areaOrigin, 1u, 2u, areaStride); + CopyRow8(ref sourceBase, ref areaOrigin, 1u, 3u, areaStride); + CopyRow8(ref sourceBase, ref areaOrigin, 2u, 4u, areaStride); + CopyRow8(ref sourceBase, ref areaOrigin, 2u, 5u, areaStride); + CopyRow8(ref sourceBase, ref areaOrigin, 3u, 6u, areaStride); + CopyRow8(ref sourceBase, ref areaOrigin, 3u, 7u, areaStride); + CopyRow8(ref sourceBase, ref areaOrigin, 4u, 8u, areaStride); + CopyRow8(ref sourceBase, ref areaOrigin, 4u, 9u, areaStride); + CopyRow8(ref sourceBase, ref areaOrigin, 5u, 10u, areaStride); + CopyRow8(ref sourceBase, ref areaOrigin, 5u, 11u, areaStride); + CopyRow8(ref sourceBase, ref areaOrigin, 6u, 12u, areaStride); + CopyRow8(ref sourceBase, ref areaOrigin, 6u, 13u, areaStride); + CopyRow8(ref sourceBase, ref areaOrigin, 7u, 14u, areaStride); + CopyRow8(ref sourceBase, ref areaOrigin, 7u, 15u, areaStride); + } + + /// + /// Copies the full 8x8 block into the destination buffer while quadrupling only the horizontal axis. + /// + [MethodImpl(InliningOptions.ShortMethod)] + private void CopyTo4x1Scale(ref float areaOrigin, uint areaStride) + { + ref Vector4 sourceBase = ref this.V0L; + + ExpandRow8(ref sourceBase, ref areaOrigin, 0u, 0u, areaStride); + ExpandRow8(ref sourceBase, ref areaOrigin, 1u, 1u, areaStride); + ExpandRow8(ref sourceBase, ref areaOrigin, 2u, 2u, areaStride); + ExpandRow8(ref sourceBase, ref areaOrigin, 3u, 3u, areaStride); + ExpandRow8(ref sourceBase, ref areaOrigin, 4u, 4u, areaStride); + ExpandRow8(ref sourceBase, ref areaOrigin, 5u, 5u, areaStride); + ExpandRow8(ref sourceBase, ref areaOrigin, 6u, 6u, areaStride); + ExpandRow8(ref sourceBase, ref areaOrigin, 7u, 7u, areaStride); + } + + /// + /// Copies the full 8x8 block into the destination buffer while quadrupling horizontally and doubling vertically. + /// + [MethodImpl(InliningOptions.ShortMethod)] + private void CopyTo4x2Scale(ref float areaOrigin, uint areaStride) + { + ref Vector4 sourceBase = ref this.V0L; + + ExpandRow8(ref sourceBase, ref areaOrigin, 0u, 0u, areaStride); + ExpandRow8(ref sourceBase, ref areaOrigin, 0u, 1u, areaStride); + ExpandRow8(ref sourceBase, ref areaOrigin, 1u, 2u, areaStride); + ExpandRow8(ref sourceBase, ref areaOrigin, 1u, 3u, areaStride); + ExpandRow8(ref sourceBase, ref areaOrigin, 2u, 4u, areaStride); + ExpandRow8(ref sourceBase, ref areaOrigin, 2u, 5u, areaStride); + ExpandRow8(ref sourceBase, ref areaOrigin, 3u, 6u, areaStride); + ExpandRow8(ref sourceBase, ref areaOrigin, 3u, 7u, areaStride); + ExpandRow8(ref sourceBase, ref areaOrigin, 4u, 8u, areaStride); + ExpandRow8(ref sourceBase, ref areaOrigin, 4u, 9u, areaStride); + ExpandRow8(ref sourceBase, ref areaOrigin, 5u, 10u, areaStride); + ExpandRow8(ref sourceBase, ref areaOrigin, 5u, 11u, areaStride); + ExpandRow8(ref sourceBase, ref areaOrigin, 6u, 12u, areaStride); + ExpandRow8(ref sourceBase, ref areaOrigin, 6u, 13u, areaStride); + ExpandRow8(ref sourceBase, ref areaOrigin, 7u, 14u, areaStride); + ExpandRow8(ref sourceBase, ref areaOrigin, 7u, 15u, areaStride); + } + + /// + /// Copies the full 8x8 block into the destination buffer while quadrupling only the vertical axis. + /// + [MethodImpl(InliningOptions.ShortMethod)] + private void CopyTo1x4Scale(ref float areaOrigin, uint areaStride) + { + ref Vector4 sourceBase = ref this.V0L; + + CopyRow8(ref sourceBase, ref areaOrigin, 0u, 0u, areaStride); + CopyRow8(ref sourceBase, ref areaOrigin, 0u, 1u, areaStride); + CopyRow8(ref sourceBase, ref areaOrigin, 0u, 2u, areaStride); + CopyRow8(ref sourceBase, ref areaOrigin, 0u, 3u, areaStride); + CopyRow8(ref sourceBase, ref areaOrigin, 1u, 4u, areaStride); + CopyRow8(ref sourceBase, ref areaOrigin, 1u, 5u, areaStride); + CopyRow8(ref sourceBase, ref areaOrigin, 1u, 6u, areaStride); + CopyRow8(ref sourceBase, ref areaOrigin, 1u, 7u, areaStride); + CopyRow8(ref sourceBase, ref areaOrigin, 2u, 8u, areaStride); + CopyRow8(ref sourceBase, ref areaOrigin, 2u, 9u, areaStride); + CopyRow8(ref sourceBase, ref areaOrigin, 2u, 10u, areaStride); + CopyRow8(ref sourceBase, ref areaOrigin, 2u, 11u, areaStride); + CopyRow8(ref sourceBase, ref areaOrigin, 3u, 12u, areaStride); + CopyRow8(ref sourceBase, ref areaOrigin, 3u, 13u, areaStride); + CopyRow8(ref sourceBase, ref areaOrigin, 3u, 14u, areaStride); + CopyRow8(ref sourceBase, ref areaOrigin, 3u, 15u, areaStride); + CopyRow8(ref sourceBase, ref areaOrigin, 4u, 16u, areaStride); + CopyRow8(ref sourceBase, ref areaOrigin, 4u, 17u, areaStride); + CopyRow8(ref sourceBase, ref areaOrigin, 4u, 18u, areaStride); + CopyRow8(ref sourceBase, ref areaOrigin, 4u, 19u, areaStride); + CopyRow8(ref sourceBase, ref areaOrigin, 5u, 20u, areaStride); + CopyRow8(ref sourceBase, ref areaOrigin, 5u, 21u, areaStride); + CopyRow8(ref sourceBase, ref areaOrigin, 5u, 22u, areaStride); + CopyRow8(ref sourceBase, ref areaOrigin, 5u, 23u, areaStride); + CopyRow8(ref sourceBase, ref areaOrigin, 6u, 24u, areaStride); + CopyRow8(ref sourceBase, ref areaOrigin, 6u, 25u, areaStride); + CopyRow8(ref sourceBase, ref areaOrigin, 6u, 26u, areaStride); + CopyRow8(ref sourceBase, ref areaOrigin, 6u, 27u, areaStride); + CopyRow8(ref sourceBase, ref areaOrigin, 7u, 28u, areaStride); + CopyRow8(ref sourceBase, ref areaOrigin, 7u, 29u, areaStride); + CopyRow8(ref sourceBase, ref areaOrigin, 7u, 30u, areaStride); + CopyRow8(ref sourceBase, ref areaOrigin, 7u, 31u, areaStride); + } + + /// + /// Copies the full 8x8 block into the destination buffer while doubling horizontally and quadrupling vertically. + /// + [MethodImpl(InliningOptions.ShortMethod)] + private void CopyTo2x4Scale(ref float areaOrigin, uint areaStride) + { + ref Vector4 sourceBase = ref this.V0L; + + WidenRow8(ref sourceBase, ref areaOrigin, 0u, 0u, areaStride); + WidenRow8(ref sourceBase, ref areaOrigin, 0u, 1u, areaStride); + WidenRow8(ref sourceBase, ref areaOrigin, 0u, 2u, areaStride); + WidenRow8(ref sourceBase, ref areaOrigin, 0u, 3u, areaStride); + WidenRow8(ref sourceBase, ref areaOrigin, 1u, 4u, areaStride); + WidenRow8(ref sourceBase, ref areaOrigin, 1u, 5u, areaStride); + WidenRow8(ref sourceBase, ref areaOrigin, 1u, 6u, areaStride); + WidenRow8(ref sourceBase, ref areaOrigin, 1u, 7u, areaStride); + WidenRow8(ref sourceBase, ref areaOrigin, 2u, 8u, areaStride); + WidenRow8(ref sourceBase, ref areaOrigin, 2u, 9u, areaStride); + WidenRow8(ref sourceBase, ref areaOrigin, 2u, 10u, areaStride); + WidenRow8(ref sourceBase, ref areaOrigin, 2u, 11u, areaStride); + WidenRow8(ref sourceBase, ref areaOrigin, 3u, 12u, areaStride); + WidenRow8(ref sourceBase, ref areaOrigin, 3u, 13u, areaStride); + WidenRow8(ref sourceBase, ref areaOrigin, 3u, 14u, areaStride); + WidenRow8(ref sourceBase, ref areaOrigin, 3u, 15u, areaStride); + WidenRow8(ref sourceBase, ref areaOrigin, 4u, 16u, areaStride); + WidenRow8(ref sourceBase, ref areaOrigin, 4u, 17u, areaStride); + WidenRow8(ref sourceBase, ref areaOrigin, 4u, 18u, areaStride); + WidenRow8(ref sourceBase, ref areaOrigin, 4u, 19u, areaStride); + WidenRow8(ref sourceBase, ref areaOrigin, 5u, 20u, areaStride); + WidenRow8(ref sourceBase, ref areaOrigin, 5u, 21u, areaStride); + WidenRow8(ref sourceBase, ref areaOrigin, 5u, 22u, areaStride); + WidenRow8(ref sourceBase, ref areaOrigin, 5u, 23u, areaStride); + WidenRow8(ref sourceBase, ref areaOrigin, 6u, 24u, areaStride); + WidenRow8(ref sourceBase, ref areaOrigin, 6u, 25u, areaStride); + WidenRow8(ref sourceBase, ref areaOrigin, 6u, 26u, areaStride); + WidenRow8(ref sourceBase, ref areaOrigin, 6u, 27u, areaStride); + WidenRow8(ref sourceBase, ref areaOrigin, 7u, 28u, areaStride); + WidenRow8(ref sourceBase, ref areaOrigin, 7u, 29u, areaStride); + WidenRow8(ref sourceBase, ref areaOrigin, 7u, 30u, areaStride); + WidenRow8(ref sourceBase, ref areaOrigin, 7u, 31u, areaStride); + } + + /// + /// Copies the full 8x8 block into the destination buffer while quadrupling both axes. + /// + [MethodImpl(InliningOptions.ShortMethod)] + private void CopyTo4x4Scale(ref float areaOrigin, uint areaStride) + { + ref Vector4 sourceBase = ref this.V0L; + + ExpandRow8(ref sourceBase, ref areaOrigin, 0u, 0u, areaStride); + ExpandRow8(ref sourceBase, ref areaOrigin, 0u, 1u, areaStride); + ExpandRow8(ref sourceBase, ref areaOrigin, 0u, 2u, areaStride); + ExpandRow8(ref sourceBase, ref areaOrigin, 0u, 3u, areaStride); + ExpandRow8(ref sourceBase, ref areaOrigin, 1u, 4u, areaStride); + ExpandRow8(ref sourceBase, ref areaOrigin, 1u, 5u, areaStride); + ExpandRow8(ref sourceBase, ref areaOrigin, 1u, 6u, areaStride); + ExpandRow8(ref sourceBase, ref areaOrigin, 1u, 7u, areaStride); + ExpandRow8(ref sourceBase, ref areaOrigin, 2u, 8u, areaStride); + ExpandRow8(ref sourceBase, ref areaOrigin, 2u, 9u, areaStride); + ExpandRow8(ref sourceBase, ref areaOrigin, 2u, 10u, areaStride); + ExpandRow8(ref sourceBase, ref areaOrigin, 2u, 11u, areaStride); + ExpandRow8(ref sourceBase, ref areaOrigin, 3u, 12u, areaStride); + ExpandRow8(ref sourceBase, ref areaOrigin, 3u, 13u, areaStride); + ExpandRow8(ref sourceBase, ref areaOrigin, 3u, 14u, areaStride); + ExpandRow8(ref sourceBase, ref areaOrigin, 3u, 15u, areaStride); + ExpandRow8(ref sourceBase, ref areaOrigin, 4u, 16u, areaStride); + ExpandRow8(ref sourceBase, ref areaOrigin, 4u, 17u, areaStride); + ExpandRow8(ref sourceBase, ref areaOrigin, 4u, 18u, areaStride); + ExpandRow8(ref sourceBase, ref areaOrigin, 4u, 19u, areaStride); + ExpandRow8(ref sourceBase, ref areaOrigin, 5u, 20u, areaStride); + ExpandRow8(ref sourceBase, ref areaOrigin, 5u, 21u, areaStride); + ExpandRow8(ref sourceBase, ref areaOrigin, 5u, 22u, areaStride); + ExpandRow8(ref sourceBase, ref areaOrigin, 5u, 23u, areaStride); + ExpandRow8(ref sourceBase, ref areaOrigin, 6u, 24u, areaStride); + ExpandRow8(ref sourceBase, ref areaOrigin, 6u, 25u, areaStride); + ExpandRow8(ref sourceBase, ref areaOrigin, 6u, 26u, areaStride); + ExpandRow8(ref sourceBase, ref areaOrigin, 6u, 27u, areaStride); + ExpandRow8(ref sourceBase, ref areaOrigin, 7u, 28u, areaStride); + ExpandRow8(ref sourceBase, ref areaOrigin, 7u, 29u, areaStride); + ExpandRow8(ref sourceBase, ref areaOrigin, 7u, 30u, areaStride); + ExpandRow8(ref sourceBase, ref areaOrigin, 7u, 31u, areaStride); + } + + /// + /// Copies one eight-sample row from the full block to the destination row. + /// + [MethodImpl(InliningOptions.ShortMethod)] + private static void CopyRow8(ref Vector4 sourceBase, ref float areaOrigin, nuint sourceRow, nuint destRow, uint areaStride) + { + ref Vector4 source = ref Unsafe.Add(ref sourceBase, sourceRow * 2u); + ref Vector4 dest = ref Unsafe.As(ref Unsafe.Add(ref areaOrigin, destRow * areaStride)); + + dest = source; + Unsafe.Add(ref dest, 1u) = Unsafe.Add(ref source, 1u); + } + + /// + /// Expands one eight-sample row to sixteen samples by duplicating each source value horizontally. + /// + [MethodImpl(InliningOptions.ShortMethod)] + private static void WidenRow8(ref Vector4 sourceBase, ref float areaOrigin, nuint sourceRow, nuint destRow, uint areaStride) + { + ref Vector4 sourceLeft = ref Unsafe.Add(ref sourceBase, sourceRow * 2u); + ref Vector4 sourceRight = ref Unsafe.Add(ref sourceLeft, 1u); + ref Vector4 dest = ref Unsafe.As(ref Unsafe.Add(ref areaOrigin, destRow * areaStride)); + + Vector4 xyLeft = new(sourceLeft.X); + xyLeft.Z = sourceLeft.Y; + xyLeft.W = sourceLeft.Y; + + Vector4 zwLeft = new(sourceLeft.Z); + zwLeft.Z = sourceLeft.W; + zwLeft.W = sourceLeft.W; + + Vector4 xyRight = new(sourceRight.X); + xyRight.Z = sourceRight.Y; + xyRight.W = sourceRight.Y; + + Vector4 zwRight = new(sourceRight.Z); + zwRight.Z = sourceRight.W; + zwRight.W = sourceRight.W; + + dest = xyLeft; + Unsafe.Add(ref dest, 1u) = zwLeft; + Unsafe.Add(ref dest, 2u) = xyRight; + Unsafe.Add(ref dest, 3u) = zwRight; + } + + /// + /// Expands one eight-sample row to thirty-two samples by duplicating each source value four times horizontally. + /// + [MethodImpl(InliningOptions.ShortMethod)] + private static void ExpandRow8(ref Vector4 sourceBase, ref float areaOrigin, nuint sourceRow, nuint destRow, uint areaStride) + { + ref Vector4 sourceLeft = ref Unsafe.Add(ref sourceBase, sourceRow * 2u); + ref Vector4 sourceRight = ref Unsafe.Add(ref sourceLeft, 1u); + ref Vector4 dest = ref Unsafe.As(ref Unsafe.Add(ref areaOrigin, destRow * areaStride)); + + dest = new Vector4(sourceLeft.X); + Unsafe.Add(ref dest, 1u) = new Vector4(sourceLeft.Y); + Unsafe.Add(ref dest, 2u) = new Vector4(sourceLeft.Z); + Unsafe.Add(ref dest, 3u) = new Vector4(sourceLeft.W); + Unsafe.Add(ref dest, 4u) = new Vector4(sourceRight.X); + Unsafe.Add(ref dest, 5u) = new Vector4(sourceRight.Y); + Unsafe.Add(ref dest, 6u) = new Vector4(sourceRight.Z); + Unsafe.Add(ref dest, 7u) = new Vector4(sourceRight.W); + } + [MethodImpl(InliningOptions.ColdPath)] private void CopyArbitraryScale(ref float areaOrigin, uint areaStride, uint horizontalScale, uint verticalScale) { diff --git a/src/ImageSharp/Formats/Jpeg/Components/Block8x8F.cs b/src/ImageSharp/Formats/Jpeg/Components/Block8x8F.cs index 19a695d07d..f3767c8884 100644 --- a/src/ImageSharp/Formats/Jpeg/Components/Block8x8F.cs +++ b/src/ImageSharp/Formats/Jpeg/Components/Block8x8F.cs @@ -132,7 +132,7 @@ internal partial struct Block8x8F : IEquatable /// /// Destination [MethodImpl(InliningOptions.ShortMethod)] - public void ScaledCopyTo(float[] dest) + public readonly void ScaledCopyTo(float[] dest) { DebugGuard.MustBeGreaterThanOrEqualTo(dest.Length, Size, "dest is too small"); From 9304b57022c57bb5abbf14b29d8cf3900f8c30a9 Mon Sep 17 00:00:00 2001 From: James Jackson-South Date: Sun, 19 Apr 2026 13:52:37 +1000 Subject: [PATCH 12/12] Use ColdPath for Block8x8F --- .../Formats/Jpeg/Components/Block8x8F.ScaledCopy.cs | 2 +- .../DownScalingComponentProcessor8.cs | 10 ++++++++++ 2 files changed, 11 insertions(+), 1 deletion(-) diff --git a/src/ImageSharp/Formats/Jpeg/Components/Block8x8F.ScaledCopy.cs b/src/ImageSharp/Formats/Jpeg/Components/Block8x8F.ScaledCopy.cs index c2a95989d3..160e6a4f9a 100644 --- a/src/ImageSharp/Formats/Jpeg/Components/Block8x8F.ScaledCopy.cs +++ b/src/ImageSharp/Formats/Jpeg/Components/Block8x8F.ScaledCopy.cs @@ -14,7 +14,7 @@ internal partial struct Block8x8F public void ScaledCopyFrom(ref float areaOrigin, int areaStride) => CopyFrom1x1Scale(ref Unsafe.As(ref areaOrigin), ref Unsafe.As(ref this), areaStride); - [MethodImpl(InliningOptions.ShortMethod)] + [MethodImpl(InliningOptions.ColdPath)] public void ScaledCopyTo(ref float areaOrigin, int areaStride, int horizontalScale, int verticalScale) { if (horizontalScale == 1 && verticalScale == 1) diff --git a/src/ImageSharp/Formats/Jpeg/Components/Decoder/ComponentProcessors/DownScalingComponentProcessor8.cs b/src/ImageSharp/Formats/Jpeg/Components/Decoder/ComponentProcessors/DownScalingComponentProcessor8.cs index ef17bf002c..0d881977f0 100644 --- a/src/ImageSharp/Formats/Jpeg/Components/Decoder/ComponentProcessors/DownScalingComponentProcessor8.cs +++ b/src/ImageSharp/Formats/Jpeg/Components/Decoder/ComponentProcessors/DownScalingComponentProcessor8.cs @@ -111,6 +111,14 @@ internal sealed class DownScalingComponentProcessor8 : ComponentProcessor return; } + // The common 1x, 2x, and 4x integral scales are specialized above. + // Uncommon legal factor-3 scales use the generic fallback. + CopyArbitraryScale(value, ref destRef, destStrideWidth, horizontalScale, verticalScale); + } + + [MethodImpl(InliningOptions.ColdPath)] + private static float CopyArbitraryScale(float value, ref float destRef, int destStrideWidth, int horizontalScale, int verticalScale) + { // The common 1x, 2x, and 4x integral scales are specialized above. // Uncommon legal factor-3 scales use the generic fallback. for (nuint y = 0; y < (uint)verticalScale; y++) @@ -122,6 +130,8 @@ internal sealed class DownScalingComponentProcessor8 : ComponentProcessor destRef = ref Unsafe.Add(ref destRef, (uint)destStrideWidth); } + + return destRef; } ///