Browse Source

Micro optimizations

pull/3044/head
James Jackson-South 4 months ago
parent
commit
192c4aba05
  1. 72
      src/ImageSharp/Processing/Processors/Transforms/Resize/ResizeWorker.cs
  2. 12
      tests/ImageSharp.Benchmarks/Config.cs
  3. 5
      tests/ImageSharp.Benchmarks/ImageSharp.Benchmarks.csproj
  4. 2
      tests/ImageSharp.Benchmarks/Processing/Resize.cs

72
src/ImageSharp/Processing/Processors/Transforms/Resize/ResizeWorker.cs

@ -114,30 +114,49 @@ internal sealed class ResizeWorker<TPixel> : IDisposable
Span<Vector4> transposedFirstPassBufferSpan = this.transposedFirstPassBuffer.DangerousGetSingleSpan();
int left = this.targetWorkingRect.Left;
int right = this.targetWorkingRect.Right;
int width = this.targetWorkingRect.Width;
nuint widthCount = (uint)width;
// Hoist invariant calculations outside the loop
int targetOriginY = this.targetOrigin.Y;
int currentWindowMax = this.currentWindow.Max;
int currentWindowMin = this.currentWindow.Min;
nuint workerHeight = (uint)this.workerHeight;
nuint workerHeight2 = workerHeight * 2;
ref Vector4 tempRowBase = ref MemoryMarshal.GetReference(tempColSpan);
for (int y = rowInterval.Min; y < rowInterval.Max; y++)
{
// Ensure offsets are normalized for cropping and padding.
ResizeKernel kernel = this.verticalKernelMap.GetKernel((uint)(y - this.targetOrigin.Y));
ResizeKernel kernel = this.verticalKernelMap.GetKernel((uint)(y - targetOriginY));
while (kernel.StartIndex + kernel.Length > this.currentWindow.Max)
// Check if we need to slide the window before processing this row
int kernelEnd = kernel.StartIndex + kernel.Length;
while (kernelEnd > currentWindowMax)
{
this.Slide();
currentWindowMax = this.currentWindow.Max;
currentWindowMin = this.currentWindow.Min;
}
ref Vector4 tempRowBase = ref MemoryMarshal.GetReference(tempColSpan);
int top = kernel.StartIndex - currentWindowMin;
ref Vector4 colRef0 = ref transposedFirstPassBufferSpan[top];
int top = kernel.StartIndex - this.currentWindow.Min;
nuint i = 0;
for (; i + 1 < widthCount; i += 2)
{
ref Vector4 colRef1 = ref Unsafe.Add(ref colRef0, workerHeight);
ref Vector4 fpBase = ref transposedFirstPassBufferSpan[top];
Unsafe.Add(ref tempRowBase, i) = kernel.ConvolveCore(ref colRef0);
Unsafe.Add(ref tempRowBase, i + 1) = kernel.ConvolveCore(ref colRef1);
for (nuint x = 0; x < (uint)(right - left); x++)
{
ref Vector4 firstPassColumnBase = ref Unsafe.Add(ref fpBase, x * (uint)this.workerHeight);
colRef0 = ref Unsafe.Add(ref colRef0, workerHeight2);
}
// Destination color components
Unsafe.Add(ref tempRowBase, x) = kernel.ConvolveCore(ref firstPassColumnBase);
if (i < widthCount)
{
Unsafe.Add(ref tempRowBase, i) = kernel.ConvolveCore(ref colRef0);
}
Span<TPixel> targetRowSpan = destination.DangerousGetRowSpan(y).Slice(left, width);
@ -172,6 +191,13 @@ internal sealed class ResizeWorker<TPixel> : IDisposable
nuint left = (uint)this.targetWorkingRect.Left;
nuint right = (uint)this.targetWorkingRect.Right;
nuint targetOriginX = (uint)this.targetOrigin.X;
nuint widthCount = right - left;
nuint workerHeight = (uint)this.workerHeight;
int currentWindowMin = this.currentWindow.Min;
// Cache the kernel map reference to reduce repeated field indirections.
ResizeKernelMap kernelMap = this.horizontalKernelMap;
for (int y = calculationInterval.Min; y < calculationInterval.Max; y++)
{
Span<TPixel> sourceRow = this.source.DangerousGetRowSpan(y);
@ -182,17 +208,25 @@ internal sealed class ResizeWorker<TPixel> : IDisposable
tempRowSpan,
this.conversionModifiers);
// optimization for:
// Span<Vector4> firstPassSpan = transposedFirstPassBufferSpan.Slice(y - this.currentWindow.Min);
ref Vector4 firstPassBaseRef = ref transposedFirstPassBufferSpan[y - this.currentWindow.Min];
ref Vector4 firstPassBaseRef = ref transposedFirstPassBufferSpan[y - currentWindowMin];
// Unroll by 2 to reduce loop overhead and kernel fetch costs.
nuint x = left;
nuint z = 0;
for (nuint x = left, z = 0; x < right; x++, z++)
for (; z + 1 < widthCount; x += 2, z += 2)
{
ResizeKernel kernel = this.horizontalKernelMap.GetKernel(x - targetOriginX);
ResizeKernel kernel0 = kernelMap.GetKernel(x - targetOriginX);
ResizeKernel kernel1 = kernelMap.GetKernel((x + 1) - targetOriginX);
// optimization for:
// firstPassSpan[x * this.workerHeight] = kernel.Convolve(tempRowSpan);
Unsafe.Add(ref firstPassBaseRef, z * (uint)this.workerHeight) = kernel.Convolve(tempRowSpan);
Unsafe.Add(ref firstPassBaseRef, z * workerHeight) = kernel0.Convolve(tempRowSpan);
Unsafe.Add(ref firstPassBaseRef, (z + 1) * workerHeight) = kernel1.Convolve(tempRowSpan);
}
if (z < widthCount)
{
ResizeKernel kernel = kernelMap.GetKernel(x - targetOriginX);
Unsafe.Add(ref firstPassBaseRef, z * workerHeight) = kernel.Convolve(tempRowSpan);
}
}
}

12
tests/ImageSharp.Benchmarks/Config.cs

@ -10,6 +10,7 @@ using BenchmarkDotNet.Diagnosers;
using BenchmarkDotNet.Environments;
using BenchmarkDotNet.Jobs;
using BenchmarkDotNet.Reports;
using BenchmarkDotNet.Toolchains.InProcess.Emit;
namespace SixLabors.ImageSharp.Benchmarks;
@ -42,7 +43,16 @@ public partial class Config : ManualConfig
.WithLaunchCount(1)
.WithWarmupCount(3)
.WithIterationCount(3)
.WithArguments([new MsBuildArgument("/p:DebugType=portable")]));
.WithArguments([new MsBuildArgument("/p:DebugType=portable")])) ;
}
public class StandardInProcess : Config
{
public StandardInProcess() => this.AddJob(
Job.Default
.WithRuntime(CoreRuntime.Core80)
.WithToolchain(InProcessEmitToolchain.Instance)
.WithArguments([new MsBuildArgument("/p:DebugType=portable")]));
}
#if OS_WINDOWS

5
tests/ImageSharp.Benchmarks/ImageSharp.Benchmarks.csproj

@ -57,8 +57,9 @@
<ItemGroup>
<PackageReference Include="Magick.NET-Q16-AnyCPU" />
<PackageReference Include="BenchmarkDotNet" Version="0.14.0" />
<PackageReference Include="BenchmarkDotNet.Diagnostics.Windows" Version="0.14.0" Condition="'$(IsWindows)'=='true'" />
<PackageReference Include="Microsoft.VisualStudio.DiagnosticsHub.BenchmarkDotNetDiagnosers" Version="18.3.36812.1" />
<PackageReference Include="BenchmarkDotNet" Version="0.15.8" />
<PackageReference Include="BenchmarkDotNet.Diagnostics.Windows" Version="0.15.8" Condition="'$(IsWindows)'=='true'" />
<PackageReference Include="Colourful" />
<PackageReference Include="NetVips" />
<PackageReference Include="NetVips.Native" />

2
tests/ImageSharp.Benchmarks/Processing/Resize.cs

@ -12,7 +12,7 @@ using SDImage = System.Drawing.Image;
namespace SixLabors.ImageSharp.Benchmarks;
[Config(typeof(Config.Standard))]
[Config(typeof(Config.StandardInProcess))]
public abstract class Resize<TPixel>
where TPixel : unmanaged, IPixel<TPixel>
{

Loading…
Cancel
Save