diff --git a/src/ImageSharp/Processing/Processors/Transforms/Resize/ResizeKernelMap.cs b/src/ImageSharp/Processing/Processors/Transforms/Resize/ResizeKernelMap.cs index c1907bb52..bd4a18c2f 100644 --- a/src/ImageSharp/Processing/Processors/Transforms/Resize/ResizeKernelMap.cs +++ b/src/ImageSharp/Processing/Processors/Transforms/Resize/ResizeKernelMap.cs @@ -102,6 +102,12 @@ internal partial class ResizeKernelMap : IDisposable [MethodImpl(InliningOptions.ShortMethod)] internal ref ResizeKernel GetKernel(nuint destIdx) => ref this.kernels[(int)destIdx]; + /// + /// Returns a read-only span of over the underlying kernel data. + /// + [MethodImpl(InliningOptions.ShortMethod)] + internal ReadOnlySpan GetKernelSpan() => this.kernels; + /// /// Computes the weights to apply at each pixel when resizing. /// diff --git a/src/ImageSharp/Processing/Processors/Transforms/Resize/ResizeWorker.cs b/src/ImageSharp/Processing/Processors/Transforms/Resize/ResizeWorker.cs index 675a17ef4..0a4d38655 100644 --- a/src/ImageSharp/Processing/Processors/Transforms/Resize/ResizeWorker.cs +++ b/src/ImageSharp/Processing/Processors/Transforms/Resize/ResizeWorker.cs @@ -110,28 +110,36 @@ internal sealed class ResizeWorker : IDisposable { Span tempColSpan = this.tempColumnBuffer.GetSpan(); - // When creating transposedFirstPassBuffer, we made sure it's contiguous: + // When creating transposedFirstPassBuffer, we made sure it's contiguous. Span transposedFirstPassBufferSpan = this.transposedFirstPassBuffer.DangerousGetSingleSpan(); int left = this.targetWorkingRect.Left; int width = this.targetWorkingRect.Width; nuint widthCount = (uint)width; - // Hoist invariant calculations outside the loop + // Normalize destination-space Y to kernel indices using uint arithmetic. + // This relies on the contract that processing addresses are normalized (cropping/padding handled by targetOrigin). int targetOriginY = this.targetOrigin.Y; + + // Hoist invariant calculations outside the loop. int currentWindowMax = this.currentWindow.Max; int currentWindowMin = this.currentWindow.Min; nuint workerHeight = (uint)this.workerHeight; nuint workerHeight2 = workerHeight * 2; + // Ref-walk the kernel table to avoid bounds checks in the tight loop. + ReadOnlySpan vKernels = this.verticalKernelMap.GetKernelSpan(); + ref ResizeKernel vKernelBase = ref MemoryMarshal.GetReference(vKernels); + ref Vector4 tempRowBase = ref MemoryMarshal.GetReference(tempColSpan); for (int y = rowInterval.Min; y < rowInterval.Max; y++) { - // Ensure offsets are normalized for cropping and padding. - ResizeKernel kernel = this.verticalKernelMap.GetKernel((uint)(y - targetOriginY)); + // Normalize destination-space Y to an unsigned kernel index. + uint vIdx = (uint)(y - targetOriginY); + ref ResizeKernel kernel = ref Unsafe.Add(ref vKernelBase, (nint)vIdx); - // Check if we need to slide the window before processing this row + // Slide the working window when the kernel would read beyond the current cached region. int kernelEnd = kernel.StartIndex + kernel.Length; while (kernelEnd > currentWindowMax) { @@ -143,6 +151,7 @@ internal sealed class ResizeWorker : IDisposable int top = kernel.StartIndex - currentWindowMin; ref Vector4 colRef0 = ref transposedFirstPassBufferSpan[top]; + // Unroll by 2 and advance column refs via arithmetic to reduce inner-loop overhead. nuint i = 0; for (; i + 1 < widthCount; i += 2) { @@ -190,13 +199,18 @@ internal sealed class ResizeWorker : IDisposable nuint left = (uint)this.targetWorkingRect.Left; nuint right = (uint)this.targetWorkingRect.Right; - nuint targetOriginX = (uint)this.targetOrigin.X; nuint widthCount = right - left; + + // Normalize destination-space X to kernel indices using uint arithmetic. + // This relies on the contract that processing addresses are normalized (cropping/padding handled by targetOrigin). + nuint targetOriginX = (uint)this.targetOrigin.X; + nuint workerHeight = (uint)this.workerHeight; int currentWindowMin = this.currentWindow.Min; - // Cache the kernel map reference to reduce repeated field indirections. - ResizeKernelMap kernelMap = this.horizontalKernelMap; + // Ref-walk the kernel table to avoid bounds checks in the tight loop. + ReadOnlySpan hKernels = this.horizontalKernelMap.GetKernelSpan(); + ref ResizeKernel hKernelBase = ref MemoryMarshal.GetReference(hKernels); for (int y = calculationInterval.Min; y < calculationInterval.Max; y++) { @@ -210,14 +224,17 @@ internal sealed class ResizeWorker : IDisposable ref Vector4 firstPassBaseRef = ref transposedFirstPassBufferSpan[y - currentWindowMin]; - // Unroll by 2 to reduce loop overhead and kernel fetch costs. + // Unroll by 2 to reduce loop and kernel lookup overhead. nuint x = left; nuint z = 0; for (; z + 1 < widthCount; x += 2, z += 2) { - ResizeKernel kernel0 = kernelMap.GetKernel(x - targetOriginX); - ResizeKernel kernel1 = kernelMap.GetKernel((x + 1) - targetOriginX); + nuint hIdx0 = (uint)(x - targetOriginX); + nuint hIdx1 = (uint)((x + 1) - targetOriginX); + + ref ResizeKernel kernel0 = ref Unsafe.Add(ref hKernelBase, (nint)hIdx0); + ref ResizeKernel kernel1 = ref Unsafe.Add(ref hKernelBase, (nint)hIdx1); Unsafe.Add(ref firstPassBaseRef, z * workerHeight) = kernel0.Convolve(tempRowSpan); Unsafe.Add(ref firstPassBaseRef, (z + 1) * workerHeight) = kernel1.Convolve(tempRowSpan); @@ -225,7 +242,9 @@ internal sealed class ResizeWorker : IDisposable if (z < widthCount) { - ResizeKernel kernel = kernelMap.GetKernel(x - targetOriginX); + nuint hIdx = (uint)(x - targetOriginX); + ref ResizeKernel kernel = ref Unsafe.Add(ref hKernelBase, (nint)hIdx); + Unsafe.Add(ref firstPassBaseRef, z * workerHeight) = kernel.Convolve(tempRowSpan); } }