From 192c4aba0530e8828c80884e06541541582b85ae Mon Sep 17 00:00:00 2001 From: James Jackson-South Date: Tue, 13 Jan 2026 14:46:56 +1000 Subject: [PATCH] Micro optimizations --- .../Transforms/Resize/ResizeWorker.cs | 72 ++++++++++++++----- tests/ImageSharp.Benchmarks/Config.cs | 12 +++- .../ImageSharp.Benchmarks.csproj | 5 +- .../Processing/Resize.cs | 2 +- 4 files changed, 68 insertions(+), 23 deletions(-) diff --git a/src/ImageSharp/Processing/Processors/Transforms/Resize/ResizeWorker.cs b/src/ImageSharp/Processing/Processors/Transforms/Resize/ResizeWorker.cs index cce27a401c..675a17ef46 100644 --- a/src/ImageSharp/Processing/Processors/Transforms/Resize/ResizeWorker.cs +++ b/src/ImageSharp/Processing/Processors/Transforms/Resize/ResizeWorker.cs @@ -114,30 +114,49 @@ internal sealed class ResizeWorker : IDisposable Span transposedFirstPassBufferSpan = this.transposedFirstPassBuffer.DangerousGetSingleSpan(); int left = this.targetWorkingRect.Left; - int right = this.targetWorkingRect.Right; int width = this.targetWorkingRect.Width; + nuint widthCount = (uint)width; + + // Hoist invariant calculations outside the loop + int targetOriginY = this.targetOrigin.Y; + int currentWindowMax = this.currentWindow.Max; + int currentWindowMin = this.currentWindow.Min; + nuint workerHeight = (uint)this.workerHeight; + nuint workerHeight2 = workerHeight * 2; + + ref Vector4 tempRowBase = ref MemoryMarshal.GetReference(tempColSpan); + for (int y = rowInterval.Min; y < rowInterval.Max; y++) { // Ensure offsets are normalized for cropping and padding. - ResizeKernel kernel = this.verticalKernelMap.GetKernel((uint)(y - this.targetOrigin.Y)); + ResizeKernel kernel = this.verticalKernelMap.GetKernel((uint)(y - targetOriginY)); - while (kernel.StartIndex + kernel.Length > this.currentWindow.Max) + // Check if we need to slide the window before processing this row + int kernelEnd = kernel.StartIndex + kernel.Length; + while (kernelEnd > currentWindowMax) { this.Slide(); + currentWindowMax = this.currentWindow.Max; + currentWindowMin = this.currentWindow.Min; } - ref Vector4 tempRowBase = ref MemoryMarshal.GetReference(tempColSpan); + int top = kernel.StartIndex - currentWindowMin; + ref Vector4 colRef0 = ref transposedFirstPassBufferSpan[top]; - int top = kernel.StartIndex - this.currentWindow.Min; + nuint i = 0; + for (; i + 1 < widthCount; i += 2) + { + ref Vector4 colRef1 = ref Unsafe.Add(ref colRef0, workerHeight); - ref Vector4 fpBase = ref transposedFirstPassBufferSpan[top]; + Unsafe.Add(ref tempRowBase, i) = kernel.ConvolveCore(ref colRef0); + Unsafe.Add(ref tempRowBase, i + 1) = kernel.ConvolveCore(ref colRef1); - for (nuint x = 0; x < (uint)(right - left); x++) - { - ref Vector4 firstPassColumnBase = ref Unsafe.Add(ref fpBase, x * (uint)this.workerHeight); + colRef0 = ref Unsafe.Add(ref colRef0, workerHeight2); + } - // Destination color components - Unsafe.Add(ref tempRowBase, x) = kernel.ConvolveCore(ref firstPassColumnBase); + if (i < widthCount) + { + Unsafe.Add(ref tempRowBase, i) = kernel.ConvolveCore(ref colRef0); } Span targetRowSpan = destination.DangerousGetRowSpan(y).Slice(left, width); @@ -172,6 +191,13 @@ internal sealed class ResizeWorker : IDisposable nuint left = (uint)this.targetWorkingRect.Left; nuint right = (uint)this.targetWorkingRect.Right; nuint targetOriginX = (uint)this.targetOrigin.X; + nuint widthCount = right - left; + nuint workerHeight = (uint)this.workerHeight; + int currentWindowMin = this.currentWindow.Min; + + // Cache the kernel map reference to reduce repeated field indirections. + ResizeKernelMap kernelMap = this.horizontalKernelMap; + for (int y = calculationInterval.Min; y < calculationInterval.Max; y++) { Span sourceRow = this.source.DangerousGetRowSpan(y); @@ -182,17 +208,25 @@ internal sealed class ResizeWorker : IDisposable tempRowSpan, this.conversionModifiers); - // optimization for: - // Span firstPassSpan = transposedFirstPassBufferSpan.Slice(y - this.currentWindow.Min); - ref Vector4 firstPassBaseRef = ref transposedFirstPassBufferSpan[y - this.currentWindow.Min]; + ref Vector4 firstPassBaseRef = ref transposedFirstPassBufferSpan[y - currentWindowMin]; + + // Unroll by 2 to reduce loop overhead and kernel fetch costs. + nuint x = left; + nuint z = 0; - for (nuint x = left, z = 0; x < right; x++, z++) + for (; z + 1 < widthCount; x += 2, z += 2) { - ResizeKernel kernel = this.horizontalKernelMap.GetKernel(x - targetOriginX); + ResizeKernel kernel0 = kernelMap.GetKernel(x - targetOriginX); + ResizeKernel kernel1 = kernelMap.GetKernel((x + 1) - targetOriginX); - // optimization for: - // firstPassSpan[x * this.workerHeight] = kernel.Convolve(tempRowSpan); - Unsafe.Add(ref firstPassBaseRef, z * (uint)this.workerHeight) = kernel.Convolve(tempRowSpan); + Unsafe.Add(ref firstPassBaseRef, z * workerHeight) = kernel0.Convolve(tempRowSpan); + Unsafe.Add(ref firstPassBaseRef, (z + 1) * workerHeight) = kernel1.Convolve(tempRowSpan); + } + + if (z < widthCount) + { + ResizeKernel kernel = kernelMap.GetKernel(x - targetOriginX); + Unsafe.Add(ref firstPassBaseRef, z * workerHeight) = kernel.Convolve(tempRowSpan); } } } diff --git a/tests/ImageSharp.Benchmarks/Config.cs b/tests/ImageSharp.Benchmarks/Config.cs index 190c245c94..c2d7abcc2e 100644 --- a/tests/ImageSharp.Benchmarks/Config.cs +++ b/tests/ImageSharp.Benchmarks/Config.cs @@ -10,6 +10,7 @@ using BenchmarkDotNet.Diagnosers; using BenchmarkDotNet.Environments; using BenchmarkDotNet.Jobs; using BenchmarkDotNet.Reports; +using BenchmarkDotNet.Toolchains.InProcess.Emit; namespace SixLabors.ImageSharp.Benchmarks; @@ -42,7 +43,16 @@ public partial class Config : ManualConfig .WithLaunchCount(1) .WithWarmupCount(3) .WithIterationCount(3) - .WithArguments([new MsBuildArgument("/p:DebugType=portable")])); + .WithArguments([new MsBuildArgument("/p:DebugType=portable")])) ; + } + + public class StandardInProcess : Config + { + public StandardInProcess() => this.AddJob( + Job.Default + .WithRuntime(CoreRuntime.Core80) + .WithToolchain(InProcessEmitToolchain.Instance) + .WithArguments([new MsBuildArgument("/p:DebugType=portable")])); } #if OS_WINDOWS diff --git a/tests/ImageSharp.Benchmarks/ImageSharp.Benchmarks.csproj b/tests/ImageSharp.Benchmarks/ImageSharp.Benchmarks.csproj index 2628623b4d..fa5fdd816d 100644 --- a/tests/ImageSharp.Benchmarks/ImageSharp.Benchmarks.csproj +++ b/tests/ImageSharp.Benchmarks/ImageSharp.Benchmarks.csproj @@ -57,8 +57,9 @@ - - + + + diff --git a/tests/ImageSharp.Benchmarks/Processing/Resize.cs b/tests/ImageSharp.Benchmarks/Processing/Resize.cs index 356027221c..3cc10afb72 100644 --- a/tests/ImageSharp.Benchmarks/Processing/Resize.cs +++ b/tests/ImageSharp.Benchmarks/Processing/Resize.cs @@ -12,7 +12,7 @@ using SDImage = System.Drawing.Image; namespace SixLabors.ImageSharp.Benchmarks; -[Config(typeof(Config.Standard))] +[Config(typeof(Config.StandardInProcess))] public abstract class Resize where TPixel : unmanaged, IPixel {