diff --git a/src/ImageSharp/Processing/Processors/Convolution/BokehBlurProcessor.cs b/src/ImageSharp/Processing/Processors/Convolution/BokehBlurProcessor.cs index b3844ded84..d4fb27a57f 100644 --- a/src/ImageSharp/Processing/Processors/Convolution/BokehBlurProcessor.cs +++ b/src/ImageSharp/Processing/Processors/Convolution/BokehBlurProcessor.cs @@ -130,9 +130,7 @@ namespace SixLabors.ImageSharp.Processing.Processors.Convolution int kernelSize = this.kernel.Length; Span rowOffsets = this.map.GetRowOffsetSpan(); - Span columnOffsets = this.map.GetColumnOffsetSpan(); ref int sampleRowBase = ref Unsafe.Add(ref MemoryMarshal.GetReference(rowOffsets), (y - this.bounds.Y) * kernelSize); - ref int sampleColumnBase = ref MemoryMarshal.GetReference(columnOffsets); // The target buffer is zeroed initially and then it accumulates the results // of each partial convolution, so we don't have to clear it here as well @@ -148,9 +146,8 @@ namespace SixLabors.ImageSharp.Processing.Processors.Convolution for (int x = 0; x < boundsWidth; x++) { - int sampleX = Unsafe.Add(ref sampleColumnBase, x) - boundsX; ref Vector4 target = ref Unsafe.Add(ref targetBase, x); - ComplexVector4 sample = Unsafe.Add(ref sourceBase, sampleX); + ComplexVector4 sample = Unsafe.Add(ref sourceBase, x); ComplexVector4 partial = factor * sample; target += partial.WeightedSum(this.z, this.w); diff --git a/src/ImageSharp/Processing/Processors/Convolution/BokehBlurProcessor{TPixel}.cs b/src/ImageSharp/Processing/Processors/Convolution/BokehBlurProcessor{TPixel}.cs index 4b1d7f8f19..dda384390f 100644 --- a/src/ImageSharp/Processing/Processors/Convolution/BokehBlurProcessor{TPixel}.cs +++ b/src/ImageSharp/Processing/Processors/Convolution/BokehBlurProcessor{TPixel}.cs @@ -124,12 +124,13 @@ namespace SixLabors.ImageSharp.Processing.Processors.Convolution // This is needed because the bokeh blur operates as TPixel -> complex -> TPixel, so we cannot // convert back to standard pixels after each separate 1D convolution pass. Like in the gaussian // blur though, we preallocate and compute the kernel sampling maps before processing each complex - // component, to avoid recomputing the same sampling map once per convolution pass. - using var mapX = new KernelSamplingMap(configuration.MemoryAllocator); - using var mapY = new KernelSamplingMap(configuration.MemoryAllocator); + // component, to avoid recomputing the same sampling map once per convolution pass. Since we are + // doing two 1D convolutions with the same kernel, we can use a single kernel sampling map as if + // we were using a 2D kernel with each dimension being the same as the length of our kernel, and + // use the two sampling offset spans resulting from this same map. This saves some extra work. + using var mapXY = new KernelSamplingMap(configuration.MemoryAllocator); - mapX.BuildSamplingOffsetMap(1, this.kernelSize, sourceRectangle); - mapY.BuildSamplingOffsetMap(this.kernelSize, 1, sourceRectangle); + mapXY.BuildSamplingOffsetMap(this.kernelSize, this.kernelSize, sourceRectangle); ref Complex64[] baseRef = ref MemoryMarshal.GetReference(this.kernels.AsSpan()); ref Vector4 paramsRef = ref MemoryMarshal.GetReference(this.kernelParameters.AsSpan()); @@ -146,7 +147,7 @@ namespace SixLabors.ImageSharp.Processing.Processors.Convolution sourceRectangle, firstPassBuffer, source.PixelBuffer, - mapX, + mapXY, kernel, configuration); @@ -160,7 +161,7 @@ namespace SixLabors.ImageSharp.Processing.Processors.Convolution sourceRectangle, processingBuffer, firstPassBuffer, - mapY, + mapXY, kernel, parameters.Z, parameters.W); @@ -209,22 +210,18 @@ namespace SixLabors.ImageSharp.Processing.Processors.Convolution int boundsWidth = this.bounds.Width; int kernelSize = this.kernel.Length; - Span rowOffsets = this.map.GetRowOffsetSpan(); - Span columnOffsets = this.map.GetColumnOffsetSpan(); - int sampleY = Unsafe.Add(ref MemoryMarshal.GetReference(rowOffsets), y - this.bounds.Y); - ref int sampleColumnBase = ref MemoryMarshal.GetReference(columnOffsets); - // Clear the target buffer for each row run Span targetBuffer = this.targetValues.GetRowSpan(y); targetBuffer.Clear(); ref ComplexVector4 targetBase = ref MemoryMarshal.GetReference(targetBuffer); // Execute the bulk pixel format conversion for the current row - Span sourceRow = this.sourcePixels.GetRowSpan(sampleY).Slice(boundsX, boundsWidth); + Span sourceRow = this.sourcePixels.GetRowSpan(y).Slice(boundsX, boundsWidth); PixelOperations.Instance.ToVector4(this.configuration, sourceRow, span); ref Vector4 sourceBase = ref MemoryMarshal.GetReference(span); ref Complex64 kernelBase = ref this.kernel[0]; + ref int sampleColumnBase = ref MemoryMarshal.GetReference(this.map.GetColumnOffsetSpan()); for (int x = 0; x < span.Length; x++) {