From c6cce4dfb5a4e28f5e85f504b4b001cc6cd39389 Mon Sep 17 00:00:00 2001 From: James Jackson-South Date: Mon, 23 Apr 2018 23:21:43 +1000 Subject: [PATCH] Optimize transforms and reduce struct copying. --- .../Common/Extensions/Vector4Extensions.cs | 18 +++++++-------- .../Processors/Convolution2DProcessor.cs | 6 +++-- .../Processors/Convolution2PassProcessor.cs | 3 ++- .../Processors/ConvolutionProcessor.cs | 6 +++-- .../Processors/AffineTransformProcessor.cs | 22 ++++++++++--------- .../InterpolatedTransformProcessorBase.cs | 19 ++++++++-------- .../ProjectiveTransformProcessor.cs | 22 ++++++++++--------- .../Transforms/Processors/WeightsWindow.cs | 5 +++-- 8 files changed, 55 insertions(+), 46 deletions(-) diff --git a/src/ImageSharp/Common/Extensions/Vector4Extensions.cs b/src/ImageSharp/Common/Extensions/Vector4Extensions.cs index 88712a736..d91c7e0d1 100644 --- a/src/ImageSharp/Common/Extensions/Vector4Extensions.cs +++ b/src/ImageSharp/Common/Extensions/Vector4Extensions.cs @@ -20,7 +20,7 @@ namespace SixLabors.ImageSharp /// The to premultiply /// The [MethodImpl(MethodImplOptions.AggressiveInlining)] - public static Vector4 Premultiply(this Vector4 source) + public static Vector4 Premultiply(this ref Vector4 source) { float w = source.W; Vector4 premultiplied = source * w; @@ -29,12 +29,12 @@ namespace SixLabors.ImageSharp } /// - /// Reverses the result of premultiplying a vector via . + /// Reverses the result of premultiplying a vector via . /// /// The to premultiply /// The [MethodImpl(MethodImplOptions.AggressiveInlining)] - public static Vector4 UnPremultiply(this Vector4 source) + public static Vector4 UnPremultiply(this ref Vector4 source) { float w = source.W; Vector4 unpremultiplied = source / w; @@ -50,10 +50,10 @@ namespace SixLabors.ImageSharp /// The whose signal to compress. /// The . [MethodImpl(MethodImplOptions.AggressiveInlining)] - public static Vector4 Compress(this Vector4 linear) + public static Vector4 Compress(this ref Vector4 linear) { // TODO: Is there a faster way to do this? - return new Vector4(Compress(linear.X), Compress(linear.Y), Compress(linear.Z), linear.W); + return new Vector4(Compress(ref linear.X), Compress(ref linear.Y), Compress(ref linear.Z), linear.W); } /// @@ -64,10 +64,10 @@ namespace SixLabors.ImageSharp /// The whose signal to expand. /// The . [MethodImpl(MethodImplOptions.AggressiveInlining)] - public static Vector4 Expand(this Vector4 gamma) + public static Vector4 Expand(this ref Vector4 gamma) { // TODO: Is there a faster way to do this? - return new Vector4(Expand(gamma.X), Expand(gamma.Y), Expand(gamma.Z), gamma.W); + return new Vector4(Expand(ref gamma.X), Expand(ref gamma.Y), Expand(ref gamma.Z), gamma.W); } /// @@ -80,7 +80,7 @@ namespace SixLabors.ImageSharp /// The . /// [MethodImpl(MethodImplOptions.AggressiveInlining)] - private static float Compress(float signal) + private static float Compress(ref float signal) { if (signal <= 0.0031308F) { @@ -100,7 +100,7 @@ namespace SixLabors.ImageSharp /// The . /// [MethodImpl(MethodImplOptions.AggressiveInlining)] - private static float Expand(float signal) + private static float Expand(ref float signal) { if (signal <= 0.04045F) { diff --git a/src/ImageSharp/Processing/Convolution/Processors/Convolution2DProcessor.cs b/src/ImageSharp/Processing/Convolution/Processors/Convolution2DProcessor.cs index ebadd2850..e07bdcbb9 100644 --- a/src/ImageSharp/Processing/Convolution/Processors/Convolution2DProcessor.cs +++ b/src/ImageSharp/Processing/Convolution/Processors/Convolution2DProcessor.cs @@ -95,7 +95,8 @@ namespace SixLabors.ImageSharp.Processing.Convolution.Processors int offsetX = x + fxr; offsetX = offsetX.Clamp(0, maxX); - Vector4 currentColor = sourceOffsetRow[offsetX].ToVector4().Premultiply(); + var currentColor = sourceOffsetRow[offsetX].ToVector4(); + currentColor = currentColor.Premultiply(); if (fy < kernelXHeight) { @@ -120,7 +121,8 @@ namespace SixLabors.ImageSharp.Processing.Convolution.Processors float blue = MathF.Sqrt((bX * bX) + (bY * bY)); ref TPixel pixel = ref targetRow[x]; - pixel.PackFromVector4(new Vector4(red, green, blue, sourceRow[x].ToVector4().W).UnPremultiply()); + var result = new Vector4(red, green, blue, sourceRow[x].ToVector4().W); + pixel.PackFromVector4(result.UnPremultiply()); } }); diff --git a/src/ImageSharp/Processing/Convolution/Processors/Convolution2PassProcessor.cs b/src/ImageSharp/Processing/Convolution/Processors/Convolution2PassProcessor.cs index 8f96546ae..05d9198e7 100644 --- a/src/ImageSharp/Processing/Convolution/Processors/Convolution2PassProcessor.cs +++ b/src/ImageSharp/Processing/Convolution/Processors/Convolution2PassProcessor.cs @@ -110,7 +110,8 @@ namespace SixLabors.ImageSharp.Processing.Convolution.Processors offsetX = offsetX.Clamp(0, maxX); - Vector4 currentColor = row[offsetX].ToVector4().Premultiply(); + var currentColor = row[offsetX].ToVector4(); + currentColor = currentColor.Premultiply(); destination += kernel[fy, fx] * currentColor; } } diff --git a/src/ImageSharp/Processing/Convolution/Processors/ConvolutionProcessor.cs b/src/ImageSharp/Processing/Convolution/Processors/ConvolutionProcessor.cs index 8f7a1caab..a7e6c0399 100644 --- a/src/ImageSharp/Processing/Convolution/Processors/ConvolutionProcessor.cs +++ b/src/ImageSharp/Processing/Convolution/Processors/ConvolutionProcessor.cs @@ -82,7 +82,8 @@ namespace SixLabors.ImageSharp.Processing.Convolution.Processors offsetX = offsetX.Clamp(0, maxX); - Vector4 currentColor = sourceOffsetRow[offsetX].ToVector4().Premultiply(); + var currentColor = sourceOffsetRow[offsetX].ToVector4(); + currentColor = currentColor.Premultiply(); currentColor *= this.KernelXY[fy, fx]; red += currentColor.X; @@ -92,7 +93,8 @@ namespace SixLabors.ImageSharp.Processing.Convolution.Processors } ref TPixel pixel = ref targetRow[x]; - pixel.PackFromVector4(new Vector4(red, green, blue, sourceRow[x].ToVector4().W).UnPremultiply()); + var result = new Vector4(red, green, blue, sourceRow[x].ToVector4().W); + pixel.PackFromVector4(result.UnPremultiply()); } }); diff --git a/src/ImageSharp/Processing/Transforms/Processors/AffineTransformProcessor.cs b/src/ImageSharp/Processing/Transforms/Processors/AffineTransformProcessor.cs index b9f3dc4bf..2d6083e55 100644 --- a/src/ImageSharp/Processing/Transforms/Processors/AffineTransformProcessor.cs +++ b/src/ImageSharp/Processing/Transforms/Processors/AffineTransformProcessor.cs @@ -5,6 +5,8 @@ using System; using System.Collections.Generic; using System.Linq; using System.Numerics; +using System.Runtime.CompilerServices; +using System.Runtime.InteropServices; using System.Threading.Tasks; using SixLabors.ImageSharp.Advanced; using SixLabors.ImageSharp.Memory; @@ -120,9 +122,9 @@ namespace SixLabors.ImageSharp.Processing.Transforms.Processors configuration.ParallelOptions, y => { - Span destRow = destination.GetPixelRowSpan(y); - Span ySpan = yBuffer.GetRowSpan(y); - Span xSpan = xBuffer.GetRowSpan(y); + ref TPixel destRowRef = ref MemoryMarshal.GetReference(destination.GetPixelRowSpan(y)); + ref float ySpanRef = ref MemoryMarshal.GetReference(yBuffer.GetRowSpan(y)); + ref float xSpanRef = ref MemoryMarshal.GetReference(xBuffer.GetRowSpan(y)); for (int x = 0; x < width; x++) { @@ -164,24 +166,24 @@ namespace SixLabors.ImageSharp.Processing.Transforms.Processors // I've optimized where I can but am always open to suggestions. if (yScale > 1 && xScale > 1) { - CalculateWeightsDown(top, bottom, minY, maxY, point.Y, sampler, yScale, ySpan); - CalculateWeightsDown(left, right, minX, maxX, point.X, sampler, xScale, xSpan); + CalculateWeightsDown(top, bottom, minY, maxY, point.Y, sampler, yScale, ref ySpanRef, yLength); + CalculateWeightsDown(left, right, minX, maxX, point.X, sampler, xScale, ref xSpanRef, xLength); } else { - CalculateWeightsScaleUp(minY, maxY, point.Y, sampler, ySpan); - CalculateWeightsScaleUp(minX, maxX, point.X, sampler, xSpan); + CalculateWeightsScaleUp(minY, maxY, point.Y, sampler, ref ySpanRef); + CalculateWeightsScaleUp(minX, maxX, point.X, sampler, ref xSpanRef); } // Now multiply the results against the offsets Vector4 sum = Vector4.Zero; for (int yy = 0, j = minY; j <= maxY; j++, yy++) { - float yWeight = ySpan[yy]; + float yWeight = Unsafe.Add(ref ySpanRef, yy); for (int xx = 0, i = minX; i <= maxX; i++, xx++) { - float xWeight = xSpan[xx]; + float xWeight = Unsafe.Add(ref xSpanRef, xx); var vector = source[i, j].ToVector4(); // Values are first premultiplied to prevent darkening of edge pixels @@ -190,7 +192,7 @@ namespace SixLabors.ImageSharp.Processing.Transforms.Processors } } - ref TPixel dest = ref destRow[x]; + ref TPixel dest = ref Unsafe.Add(ref destRowRef, x); // Reverse the premultiplication dest.PackFromVector4(sum.UnPremultiply()); diff --git a/src/ImageSharp/Processing/Transforms/Processors/InterpolatedTransformProcessorBase.cs b/src/ImageSharp/Processing/Transforms/Processors/InterpolatedTransformProcessorBase.cs index 6e663f1e1..8f57f3ba3 100644 --- a/src/ImageSharp/Processing/Transforms/Processors/InterpolatedTransformProcessorBase.cs +++ b/src/ImageSharp/Processing/Transforms/Processors/InterpolatedTransformProcessorBase.cs @@ -42,12 +42,12 @@ namespace SixLabors.ImageSharp.Processing.Transforms.Processors /// The transformed point dimension /// The sampler /// The transformed image scale relative to the source - /// The collection of weights + /// The reference to the collection of weights + /// The length of the weights collection [MethodImpl(MethodImplOptions.AggressiveInlining)] - protected static void CalculateWeightsDown(int min, int max, int sourceMin, int sourceMax, float point, IResampler sampler, float scale, Span weights) + protected static void CalculateWeightsDown(int min, int max, int sourceMin, int sourceMax, float point, IResampler sampler, float scale, ref float weightsRef, int length) { float sum = 0; - ref float weightsBaseRef = ref weights[0]; // Downsampling weights requires more edge sampling plus normalization of the weights for (int x = 0, i = min; i <= max; i++, x++) @@ -65,14 +65,14 @@ namespace SixLabors.ImageSharp.Processing.Transforms.Processors float weight = sampler.GetValue((index - point) / scale); sum += weight; - Unsafe.Add(ref weightsBaseRef, x) = weight; + Unsafe.Add(ref weightsRef, x) = weight; } if (sum > 0) { - for (int i = 0; i < weights.Length; i++) + for (int i = 0; i < length; i++) { - ref float wRef = ref Unsafe.Add(ref weightsBaseRef, i); + ref float wRef = ref Unsafe.Add(ref weightsRef, i); wRef = wRef / sum; } } @@ -85,15 +85,14 @@ namespace SixLabors.ImageSharp.Processing.Transforms.Processors /// The maximum source bounds /// The transformed point dimension /// The sampler - /// The collection of weights + /// The reference to the collection of weights [MethodImpl(MethodImplOptions.AggressiveInlining)] - protected static void CalculateWeightsScaleUp(int sourceMin, int sourceMax, float point, IResampler sampler, Span weights) + protected static void CalculateWeightsScaleUp(int sourceMin, int sourceMax, float point, IResampler sampler, ref float weightsRef) { - ref float weightsBaseRef = ref weights[0]; for (int x = 0, i = sourceMin; i <= sourceMax; i++, x++) { float weight = sampler.GetValue(i - point); - Unsafe.Add(ref weightsBaseRef, x) = weight; + Unsafe.Add(ref weightsRef, x) = weight; } } diff --git a/src/ImageSharp/Processing/Transforms/Processors/ProjectiveTransformProcessor.cs b/src/ImageSharp/Processing/Transforms/Processors/ProjectiveTransformProcessor.cs index 0a857edd2..2ca1f2ef7 100644 --- a/src/ImageSharp/Processing/Transforms/Processors/ProjectiveTransformProcessor.cs +++ b/src/ImageSharp/Processing/Transforms/Processors/ProjectiveTransformProcessor.cs @@ -5,6 +5,8 @@ using System; using System.Collections.Generic; using System.Linq; using System.Numerics; +using System.Runtime.CompilerServices; +using System.Runtime.InteropServices; using System.Threading.Tasks; using SixLabors.ImageSharp.Advanced; using SixLabors.ImageSharp.Memory; @@ -119,9 +121,9 @@ namespace SixLabors.ImageSharp.Processing.Transforms.Processors configuration.ParallelOptions, y => { - Span destRow = destination.GetPixelRowSpan(y); - Span ySpan = yBuffer.GetRowSpan(y); - Span xSpan = xBuffer.GetRowSpan(y); + ref TPixel destRowRef = ref MemoryMarshal.GetReference(destination.GetPixelRowSpan(y)); + ref float ySpanRef = ref MemoryMarshal.GetReference(yBuffer.GetRowSpan(y)); + ref float xSpanRef = ref MemoryMarshal.GetReference(xBuffer.GetRowSpan(y)); for (int x = 0; x < width; x++) { @@ -164,24 +166,24 @@ namespace SixLabors.ImageSharp.Processing.Transforms.Processors // I've optimized where I can but am always open to suggestions. if (yScale > 1 && xScale > 1) { - CalculateWeightsDown(top, bottom, minY, maxY, point.Y, sampler, yScale, ySpan); - CalculateWeightsDown(left, right, minX, maxX, point.X, sampler, xScale, xSpan); + CalculateWeightsDown(top, bottom, minY, maxY, point.Y, sampler, yScale, ref ySpanRef, yLength); + CalculateWeightsDown(left, right, minX, maxX, point.X, sampler, xScale, ref xSpanRef, xLength); } else { - CalculateWeightsScaleUp(minY, maxY, point.Y, sampler, ySpan); - CalculateWeightsScaleUp(minX, maxX, point.X, sampler, xSpan); + CalculateWeightsScaleUp(minY, maxY, point.Y, sampler, ref ySpanRef); + CalculateWeightsScaleUp(minX, maxX, point.X, sampler, ref xSpanRef); } // Now multiply the results against the offsets Vector4 sum = Vector4.Zero; for (int yy = 0, j = minY; j <= maxY; j++, yy++) { - float yWeight = ySpan[yy]; + float yWeight = Unsafe.Add(ref ySpanRef, yy); for (int xx = 0, i = minX; i <= maxX; i++, xx++) { - float xWeight = xSpan[xx]; + float xWeight = Unsafe.Add(ref xSpanRef, xx); var vector = source[i, j].ToVector4(); // Values are first premultiplied to prevent darkening of edge pixels @@ -190,7 +192,7 @@ namespace SixLabors.ImageSharp.Processing.Transforms.Processors } } - ref TPixel dest = ref destRow[x]; + ref TPixel dest = ref Unsafe.Add(ref destRowRef, x); // Reverse the premultiplication dest.PackFromVector4(sum.UnPremultiply()); diff --git a/src/ImageSharp/Processing/Transforms/Processors/WeightsWindow.cs b/src/ImageSharp/Processing/Transforms/Processors/WeightsWindow.cs index 26aaec502..6bc04c26d 100644 --- a/src/ImageSharp/Processing/Transforms/Processors/WeightsWindow.cs +++ b/src/ImageSharp/Processing/Transforms/Processors/WeightsWindow.cs @@ -96,7 +96,7 @@ namespace SixLabors.ImageSharp.Processing.Processors /// /// Computes the sum of vectors in 'rowSpan' weighted by weight values, pointed by this instance. - /// Applies to all input vectors. + /// Applies to all input vectors. /// /// The input span of vectors /// The source row position. @@ -115,7 +115,8 @@ namespace SixLabors.ImageSharp.Processing.Processors { float weight = Unsafe.Add(ref horizontalValues, i); Vector4 v = Unsafe.Add(ref vecPtr, i); - result += v.Premultiply().Expand() * weight; + v = v.Premultiply(); + result += v.Expand() * weight; } return result.UnPremultiply();