From a5bbe95e950e48ba450d7cf9ca0f0311c6984c81 Mon Sep 17 00:00:00 2001 From: Sergio Pedri Date: Tue, 15 Dec 2020 22:49:32 +0100 Subject: [PATCH] More codegen improvements to bokeh blur --- .../Convolution/BokehBlurProcessor.cs | 26 ++++++++++++------- .../Convolution/BokehBlurProcessor{TPixel}.cs | 21 +++++++++------ 2 files changed, 29 insertions(+), 18 deletions(-) diff --git a/src/ImageSharp/Processing/Processors/Convolution/BokehBlurProcessor.cs b/src/ImageSharp/Processing/Processors/Convolution/BokehBlurProcessor.cs index 55cef5df5..13fe627d1 100644 --- a/src/ImageSharp/Processing/Processors/Convolution/BokehBlurProcessor.cs +++ b/src/ImageSharp/Processing/Processors/Convolution/BokehBlurProcessor.cs @@ -134,23 +134,29 @@ namespace SixLabors.ImageSharp.Processing.Processors.Convolution // The target buffer is zeroed initially and then it accumulates the results // of each partial convolution, so we don't have to clear it here as well ref Vector4 targetBase = ref this.targetValues.GetElementUnsafe(boundsX, y); - ref Complex64 kernelBase = ref this.kernel[0]; + ref Complex64 kernelStart = ref this.kernel[0]; + ref Complex64 kernelEnd = ref Unsafe.Add(ref kernelStart, kernelSize); - for (int kY = 0; kY < kernelSize; kY++) + while (Unsafe.IsAddressLessThan(ref kernelStart, ref kernelEnd)) { // Get the precalculated source sample row for this kernel row and copy to our buffer - int sampleY = Unsafe.Add(ref sampleRowBase, kY); - ref ComplexVector4 sourceBase = ref this.sourceValues.GetElementUnsafe(0, sampleY); - Complex64 factor = Unsafe.Add(ref kernelBase, kY); + ref ComplexVector4 sourceBase = ref this.sourceValues.GetElementUnsafe(0, sampleRowBase); + ref ComplexVector4 sourceEnd = ref Unsafe.Add(ref sourceBase, boundsWidth); + ref Vector4 targetStart = ref targetBase; + Complex64 factor = kernelStart; - for (int x = 0; x < boundsWidth; x++) + while (Unsafe.IsAddressLessThan(ref sourceBase, ref sourceEnd)) { - ref Vector4 target = ref Unsafe.Add(ref targetBase, x); - ComplexVector4 sample = Unsafe.Add(ref sourceBase, x); - ComplexVector4 partial = factor * sample; + ComplexVector4 partial = factor * sourceBase; - target += partial.WeightedSum(this.z, this.w); + targetStart += partial.WeightedSum(this.z, this.w); + + sourceBase = ref Unsafe.Add(ref sourceBase, 1); + targetStart = ref Unsafe.Add(ref targetStart, 1); } + + kernelStart = ref Unsafe.Add(ref kernelStart, 1); + sampleRowBase = ref Unsafe.Add(ref sampleRowBase, 1); } } } diff --git a/src/ImageSharp/Processing/Processors/Convolution/BokehBlurProcessor{TPixel}.cs b/src/ImageSharp/Processing/Processors/Convolution/BokehBlurProcessor{TPixel}.cs index a21155e10..241ff9db2 100644 --- a/src/ImageSharp/Processing/Processors/Convolution/BokehBlurProcessor{TPixel}.cs +++ b/src/ImageSharp/Processing/Processors/Convolution/BokehBlurProcessor{TPixel}.cs @@ -233,32 +233,37 @@ namespace SixLabors.ImageSharp.Processing.Processors.Convolution // Clear the target buffer for each row run Span targetBuffer = this.targetValues.GetRowSpan(y); targetBuffer.Clear(); - ref ComplexVector4 targetBase = ref MemoryMarshal.GetReference(targetBuffer); // Execute the bulk pixel format conversion for the current row Span sourceRow = this.sourcePixels.GetRowSpan(y).Slice(boundsX, boundsWidth); PixelOperations.Instance.ToVector4(this.configuration, sourceRow, span); ref Vector4 sourceBase = ref MemoryMarshal.GetReference(span); + ref ComplexVector4 targetStart = ref MemoryMarshal.GetReference(targetBuffer); + ref ComplexVector4 targetEnd = ref Unsafe.Add(ref targetStart, span.Length); ref Complex64 kernelBase = ref this.kernel[0]; + ref Complex64 kernelEnd = ref Unsafe.Add(ref kernelBase, kernelSize); ref int sampleColumnBase = ref MemoryMarshal.GetReference(this.map.GetColumnOffsetSpan()); - for (int x = 0; x < span.Length; x++) + while (Unsafe.IsAddressLessThan(ref targetStart, ref targetEnd)) { - ref ComplexVector4 target = ref Unsafe.Add(ref targetBase, x); + ref Complex64 kernelStart = ref kernelBase; + ref int sampleColumnStart = ref sampleColumnBase; - for (int kX = 0; kX < kernelSize; kX++) + while (Unsafe.IsAddressLessThan(ref kernelStart, ref kernelEnd)) { - int sampleX = Unsafe.Add(ref sampleColumnBase, kX) - boundsX; - Vector4 sample = Unsafe.Add(ref sourceBase, sampleX); - Complex64 factor = Unsafe.Add(ref kernelBase, kX); + Vector4 sample = Unsafe.Add(ref sourceBase, sampleColumnStart - boundsX); - target.Sum(factor * sample); + targetStart.Sum(kernelStart * sample); + + kernelStart = ref Unsafe.Add(ref kernelStart, 1); + sampleColumnStart = ref Unsafe.Add(ref sampleColumnStart, 1); } // Shift the base column sampling reference by one row at the end of each outer // iteration so that the inner tight loop indexing can skip the multiplication sampleColumnBase = ref Unsafe.Add(ref sampleColumnBase, kernelSize); + targetStart = ref Unsafe.Add(ref targetStart, 1); } } }