From ca1a67a36cacd3c95c8a1fd91fdc994d57460b08 Mon Sep 17 00:00:00 2001 From: Sergio Pedri Date: Sat, 12 Dec 2020 22:10:36 +0100 Subject: [PATCH] Specialize bokeh blur operations for 1D kernels --- .../Convolution/BokehBlurProcessor.cs | 34 +++++++------- .../Convolution/BokehBlurProcessor{TPixel}.cs | 46 ++++++++++--------- 2 files changed, 40 insertions(+), 40 deletions(-) diff --git a/src/ImageSharp/Processing/Processors/Convolution/BokehBlurProcessor.cs b/src/ImageSharp/Processing/Processors/Convolution/BokehBlurProcessor.cs index e8f7351fa..edaac45b6 100644 --- a/src/ImageSharp/Processing/Processors/Convolution/BokehBlurProcessor.cs +++ b/src/ImageSharp/Processing/Processors/Convolution/BokehBlurProcessor.cs @@ -127,39 +127,37 @@ namespace SixLabors.ImageSharp.Processing.Processors.Convolution { int boundsX = this.bounds.X; int boundsWidth = this.bounds.Width; - Span targetBuffer = this.targetValues.GetRowSpan(y); + int kernelSize = this.kernel.Length; - var state = new ConvolutionState(this.kernel, this.kernel.Length, 1, this.map); - ref int sampleRowBase = ref state.GetSampleRow(y - this.bounds.Y); + Span rowOffsets = this.map.GetRowOffsetSpan(); + Span columnOffsets = this.map.GetColumnOffsetSpan(); + ref int sampleRowBase = ref Unsafe.Add(ref MemoryMarshal.GetReference(rowOffsets), (y - this.bounds.Y) * kernelSize); + ref int sampleColumnBase = ref MemoryMarshal.GetReference(columnOffsets); // The target buffer is zeroed initially and then it accumulates the results - // of each partial convolution, so we don't have to clear it here as well. - ref Vector4 targetBase = ref MemoryMarshal.GetReference(targetBuffer); + // of each partial convolution, so we don't have to clear it here as well + Span targetBuffer = this.targetValues.GetRowSpan(y); - ReadOnlyKernel kernel = state.Kernel; + ref Vector4 targetBase = ref MemoryMarshal.GetReference(targetBuffer); + ref Complex64 kernelBase = ref this.kernel[0]; - for (int kY = 0; kY < kernel.Rows; kY++) + for (int kY = 0; kY < kernelSize; kY++) { - // Get the precalculated source sample row for this kernel row and copy to our buffer. + // Get the precalculated source sample row for this kernel row and copy to our buffer int sampleY = Unsafe.Add(ref sampleRowBase, kY); Span sourceRow = this.sourceValues.GetRowSpan(sampleY).Slice(boundsX, boundsWidth); ref ComplexVector4 sourceBase = ref MemoryMarshal.GetReference(sourceRow); + Complex64 factor = Unsafe.Add(ref kernelBase, kY); for (int x = 0; x < boundsWidth; x++) { - ref int sampleColumnBase = ref state.GetSampleColumn(x); + int sampleX = Unsafe.Add(ref sampleColumnBase, x) - boundsX; ref Vector4 target = ref Unsafe.Add(ref targetBase, x); - ComplexVector4 pixel4 = default; - - for (int kX = 0; kX < kernel.Columns; kX++) - { - int sampleX = Unsafe.Add(ref sampleColumnBase, kX) - boundsX; - ComplexVector4 sample = Unsafe.Add(ref sourceBase, sampleX); - pixel4.Sum(kernel[kY, kX] * sample); - } + ComplexVector4 sample = Unsafe.Add(ref sourceBase, sampleX); + ComplexVector4 partial = factor * sample; - target += pixel4.WeightedSum(this.z, this.w); + target += partial.WeightedSum(this.z, this.w); } } } diff --git a/src/ImageSharp/Processing/Processors/Convolution/BokehBlurProcessor{TPixel}.cs b/src/ImageSharp/Processing/Processors/Convolution/BokehBlurProcessor{TPixel}.cs index aa6160799..cdadd4dee 100644 --- a/src/ImageSharp/Processing/Processors/Convolution/BokehBlurProcessor{TPixel}.cs +++ b/src/ImageSharp/Processing/Processors/Convolution/BokehBlurProcessor{TPixel}.cs @@ -207,39 +207,41 @@ namespace SixLabors.ImageSharp.Processing.Processors.Convolution { int boundsX = this.bounds.X; int boundsWidth = this.bounds.Width; + int kernelSize = this.kernel.Length; - var state = new ConvolutionState(this.kernel, 1, this.kernel.Length, this.map); - ref int sampleRowBase = ref state.GetSampleRow(y - this.bounds.Y); + Span rowOffsets = this.map.GetRowOffsetSpan(); + Span columnOffsets = this.map.GetColumnOffsetSpan(); + int sampleY = Unsafe.Add(ref MemoryMarshal.GetReference(rowOffsets), y - this.bounds.Y); + ref int sampleColumnBase = ref MemoryMarshal.GetReference(columnOffsets); + // Clear the target buffer for each row run Span targetBuffer = this.targetValues.GetRowSpan(y); - - // Clear the target buffer targetBuffer.Clear(); ref ComplexVector4 targetBase = ref MemoryMarshal.GetReference(targetBuffer); - ReadOnlyKernel kernel = state.Kernel; + // Execute the bulk pixel format conversion for the current row + Span sourceRow = this.sourcePixels.GetRowSpan(sampleY).Slice(boundsX, boundsWidth); + PixelOperations.Instance.ToVector4(this.configuration, sourceRow, span); - for (int kY = 0; kY < kernel.Rows; kY++) - { - // Get the precalculated source sample row for this kernel row and copy to our buffer. - int sampleY = Unsafe.Add(ref sampleRowBase, kY); - Span sourceRow = this.sourcePixels.GetRowSpan(sampleY).Slice(boundsX, boundsWidth); - PixelOperations.Instance.ToVector4(this.configuration, sourceRow, span); + ref Vector4 sourceBase = ref MemoryMarshal.GetReference(span); + ref Complex64 kernelBase = ref this.kernel[0]; - ref Vector4 sourceBase = ref MemoryMarshal.GetReference(span); + for (int x = 0; x < span.Length; x++) + { + ref ComplexVector4 target = ref Unsafe.Add(ref targetBase, x); - for (int x = 0; x < span.Length; x++) + for (int kX = 0; kX < kernelSize; kX++) { - ref int sampleColumnBase = ref state.GetSampleColumn(x); - ref ComplexVector4 target = ref Unsafe.Add(ref targetBase, x); - - for (int kX = 0; kX < kernel.Columns; kX++) - { - int sampleX = Unsafe.Add(ref sampleColumnBase, kX) - boundsX; - Vector4 sample = Unsafe.Add(ref sourceBase, sampleX); - target.Sum(kernel[kY, kX] * sample); - } + int sampleX = Unsafe.Add(ref sampleColumnBase, kX) - boundsX; + Vector4 sample = Unsafe.Add(ref sourceBase, sampleX); + Complex64 factor = Unsafe.Add(ref kernelBase, kX); + + target.Sum(factor * sample); } + + // Shift the base column sampling reference by one row at the end of each outer + // iteration so that the inner tight loop indexing can skip the multiplication + sampleColumnBase = ref Unsafe.Add(ref sampleColumnBase, kernelSize); } } }