diff --git a/src/ImageSharp/Processing/Processors/Convolution/BokehBlurProcessor.cs b/src/ImageSharp/Processing/Processors/Convolution/BokehBlurProcessor.cs index 352960f41..e8f7351fa 100644 --- a/src/ImageSharp/Processing/Processors/Convolution/BokehBlurProcessor.cs +++ b/src/ImageSharp/Processing/Processors/Convolution/BokehBlurProcessor.cs @@ -4,6 +4,7 @@ using System; using System.Numerics; using System.Runtime.CompilerServices; +using System.Runtime.InteropServices; using SixLabors.ImageSharp.Advanced; using SixLabors.ImageSharp.Memory; using SixLabors.ImageSharp.PixelFormats; @@ -91,31 +92,30 @@ namespace SixLabors.ImageSharp.Processing.Processors.Convolution /// it is actually used, because it does not use any generic parameters internally. Defining in a non-generic class means that there will only /// ever be a single instantiation of this type for the JIT/AOT compilers to process, instead of having duplicate versions for each pixel type. /// - internal readonly struct ApplyHorizontalConvolutionRowOperation : IRowOperation + internal readonly struct SecondPassConvolutionRowOperation : IRowOperation { private readonly Rectangle bounds; private readonly Buffer2D targetValues; private readonly Buffer2D sourceValues; + private readonly KernelSamplingMap map; private readonly Complex64[] kernel; private readonly float z; private readonly float w; - private readonly int maxY; - private readonly int maxX; [MethodImpl(InliningOptions.ShortMethod)] - public ApplyHorizontalConvolutionRowOperation( + public SecondPassConvolutionRowOperation( Rectangle bounds, Buffer2D targetValues, Buffer2D sourceValues, + KernelSamplingMap map, Complex64[] kernel, float z, float w) { this.bounds = bounds; - this.maxY = this.bounds.Bottom - 1; - this.maxX = this.bounds.Right - 1; this.targetValues = targetValues; this.sourceValues = sourceValues; + this.map = map; this.kernel = kernel; this.z = z; this.w = w; @@ -125,11 +125,42 @@ namespace SixLabors.ImageSharp.Processing.Processors.Convolution [MethodImpl(InliningOptions.ShortMethod)] public void Invoke(int y) { - Span targetRowSpan = this.targetValues.GetRowSpan(y).Slice(this.bounds.X); + int boundsX = this.bounds.X; + int boundsWidth = this.bounds.Width; + Span targetBuffer = this.targetValues.GetRowSpan(y); - for (int x = 0; x < this.bounds.Width; x++) + var state = new ConvolutionState(this.kernel, this.kernel.Length, 1, this.map); + ref int sampleRowBase = ref state.GetSampleRow(y - this.bounds.Y); + + // The target buffer is zeroed initially and then it accumulates the results + // of each partial convolution, so we don't have to clear it here as well. + ref Vector4 targetBase = ref MemoryMarshal.GetReference(targetBuffer); + + ReadOnlyKernel kernel = state.Kernel; + + for (int kY = 0; kY < kernel.Rows; kY++) { - Buffer2DUtils.Convolve4AndAccumulatePartials(this.kernel, this.sourceValues, targetRowSpan, y, x, this.bounds.Y, this.maxY, this.bounds.X, this.maxX, this.z, this.w); + // Get the precalculated source sample row for this kernel row and copy to our buffer. + int sampleY = Unsafe.Add(ref sampleRowBase, kY); + Span sourceRow = this.sourceValues.GetRowSpan(sampleY).Slice(boundsX, boundsWidth); + ref ComplexVector4 sourceBase = ref MemoryMarshal.GetReference(sourceRow); + + for (int x = 0; x < boundsWidth; x++) + { + ref int sampleColumnBase = ref state.GetSampleColumn(x); + ref Vector4 target = ref Unsafe.Add(ref targetBase, x); + ComplexVector4 pixel4 = default; + + for (int kX = 0; kX < kernel.Columns; kX++) + { + int sampleX = Unsafe.Add(ref sampleColumnBase, kX) - boundsX; + ComplexVector4 sample = Unsafe.Add(ref sourceBase, sampleX); + + pixel4.Sum(kernel[kY, kX] * sample); + } + + target += pixel4.WeightedSum(this.z, this.w); + } } } } diff --git a/src/ImageSharp/Processing/Processors/Convolution/BokehBlurProcessor{TPixel}.cs b/src/ImageSharp/Processing/Processors/Convolution/BokehBlurProcessor{TPixel}.cs index dfe54bf2e..aa6160799 100644 --- a/src/ImageSharp/Processing/Processors/Convolution/BokehBlurProcessor{TPixel}.cs +++ b/src/ImageSharp/Processing/Processors/Convolution/BokehBlurProcessor{TPixel}.cs @@ -26,6 +26,11 @@ namespace SixLabors.ImageSharp.Processing.Processors.Convolution /// private readonly float gamma; + /// + /// The size of each complex convolution kernel. + /// + private readonly int kernelSize; + /// /// The kernel parameters to use for the current instance (a: X, b: Y, A: Z, B: W) /// @@ -47,11 +52,12 @@ namespace SixLabors.ImageSharp.Processing.Processors.Convolution : base(configuration, source, sourceRectangle) { this.gamma = definition.Gamma; + this.kernelSize = (definition.Radius * 2) + 1; // Get the bokeh blur data BokehBlurKernelData data = BokehBlurKernelDataProvider.GetBokehBlurKernelData( definition.Radius, - (definition.Radius * 2) + 1, + this.kernelSize, definition.Components); this.kernelParameters = data.Parameters; @@ -108,69 +114,132 @@ namespace SixLabors.ImageSharp.Processing.Processors.Convolution Buffer2D processingBuffer) { // Allocate the buffer with the intermediate convolution results - using Buffer2D firstPassBuffer = this.Configuration.MemoryAllocator.Allocate2D(source.Size()); + using Buffer2D firstPassBuffer = configuration.MemoryAllocator.Allocate2D(source.Size()); + + var interest = Rectangle.Intersect(sourceRectangle, source.Bounds()); + + // Unlike in the standard 2 pass convolution processor, we use a rectangle of 1x the interest width + // to speedup the actual convolution, by applying bulk pixel conversion and clamping calculation. + // The second half of the buffer will just target the temporary buffer of complex pixel values. + // This is needed because the bokeh blur operates as TPixel -> complex -> TPixel, so we cannot + // convert back to standard pixels after each separate 1D convolution pass. Like in the gaussian + // blur though, we preallocate and compute the kernel sampling maps before processing each complex + // component, to avoid recomputing the same sampling map once per convolution pass. + using var mapX = new KernelSamplingMap(configuration.MemoryAllocator); + using var mapY = new KernelSamplingMap(configuration.MemoryAllocator); + + mapX.BuildSamplingOffsetMap(1, this.kernelSize, interest); + mapY.BuildSamplingOffsetMap(this.kernelSize, 1, interest); - // Perform two 1D convolutions for each component in the current instance ref Complex64[] baseRef = ref MemoryMarshal.GetReference(this.kernels.AsSpan()); ref Vector4 paramsRef = ref MemoryMarshal.GetReference(this.kernelParameters.AsSpan()); + + // Perform two 1D convolutions for each component in the current instance for (int i = 0; i < this.kernels.Length; i++) { // Compute the resulting complex buffer for the current component Complex64[] kernel = Unsafe.Add(ref baseRef, i); Vector4 parameters = Unsafe.Add(ref paramsRef, i); - // Compute the vertical 1D convolution - var verticalOperation = new ApplyVerticalConvolutionRowOperation(sourceRectangle, firstPassBuffer, source.PixelBuffer, kernel); - ParallelRowIterator.IterateRows( + // Horizontal convolution + var horizontalOperation = new FirstPassConvolutionRowOperation( + interest, + firstPassBuffer, + source.PixelBuffer, + mapX, + kernel, + configuration); + + ParallelRowIterator.IterateRows( configuration, - sourceRectangle, - in verticalOperation); + interest, + in horizontalOperation); + + // Vertical 1D convolutions to accumulate the partial results on the target buffer + var verticalOperation = new BokehBlurProcessor.SecondPassConvolutionRowOperation( + interest, + processingBuffer, + firstPassBuffer, + mapY, + kernel, + parameters.Z, + parameters.W); - // Compute the horizontal 1D convolutions and accumulate the partial results on the target buffer - var horizontalOperation = new BokehBlurProcessor.ApplyHorizontalConvolutionRowOperation(sourceRectangle, processingBuffer, firstPassBuffer, kernel, parameters.Z, parameters.W); ParallelRowIterator.IterateRows( configuration, - sourceRectangle, - in horizontalOperation); + interest, + in verticalOperation); } } /// /// A implementing the vertical convolution logic for . /// - private readonly struct ApplyVerticalConvolutionRowOperation : IRowOperation + private readonly struct FirstPassConvolutionRowOperation : IRowOperation { private readonly Rectangle bounds; private readonly Buffer2D targetValues; private readonly Buffer2D sourcePixels; + private readonly KernelSamplingMap map; private readonly Complex64[] kernel; - private readonly int maxY; - private readonly int maxX; + private readonly Configuration configuration; [MethodImpl(InliningOptions.ShortMethod)] - public ApplyVerticalConvolutionRowOperation( + public FirstPassConvolutionRowOperation( Rectangle bounds, Buffer2D targetValues, Buffer2D sourcePixels, - Complex64[] kernel) + KernelSamplingMap map, + Complex64[] kernel, + Configuration configuration) { this.bounds = bounds; - this.maxY = this.bounds.Bottom - 1; - this.maxX = this.bounds.Right - 1; this.targetValues = targetValues; this.sourcePixels = sourcePixels; + this.map = map; this.kernel = kernel; + this.configuration = configuration; } /// [MethodImpl(InliningOptions.ShortMethod)] - public void Invoke(int y) + public void Invoke(int y, Span span) { - Span targetRowSpan = this.targetValues.GetRowSpan(y).Slice(this.bounds.X); + int boundsX = this.bounds.X; + int boundsWidth = this.bounds.Width; - for (int x = 0; x < this.bounds.Width; x++) + var state = new ConvolutionState(this.kernel, 1, this.kernel.Length, this.map); + ref int sampleRowBase = ref state.GetSampleRow(y - this.bounds.Y); + + Span targetBuffer = this.targetValues.GetRowSpan(y); + + // Clear the target buffer + targetBuffer.Clear(); + ref ComplexVector4 targetBase = ref MemoryMarshal.GetReference(targetBuffer); + + ReadOnlyKernel kernel = state.Kernel; + + for (int kY = 0; kY < kernel.Rows; kY++) { - Buffer2DUtils.Convolve4(this.kernel, this.sourcePixels, targetRowSpan, y, x, this.bounds.Y, this.maxY, this.bounds.X, this.maxX); + // Get the precalculated source sample row for this kernel row and copy to our buffer. + int sampleY = Unsafe.Add(ref sampleRowBase, kY); + Span sourceRow = this.sourcePixels.GetRowSpan(sampleY).Slice(boundsX, boundsWidth); + PixelOperations.Instance.ToVector4(this.configuration, sourceRow, span); + + ref Vector4 sourceBase = ref MemoryMarshal.GetReference(span); + + for (int x = 0; x < span.Length; x++) + { + ref int sampleColumnBase = ref state.GetSampleColumn(x); + ref ComplexVector4 target = ref Unsafe.Add(ref targetBase, x); + + for (int kX = 0; kX < kernel.Columns; kX++) + { + int sampleX = Unsafe.Add(ref sampleColumnBase, kX) - boundsX; + Vector4 sample = Unsafe.Add(ref sourceBase, sampleX); + target.Sum(kernel[kY, kX] * sample); + } + } } } } diff --git a/src/ImageSharp/Processing/Processors/Convolution/Convolution2PassProcessor{TPixel}.cs b/src/ImageSharp/Processing/Processors/Convolution/Convolution2PassProcessor{TPixel}.cs index 151b0ffcc..16ce0fdd7 100644 --- a/src/ImageSharp/Processing/Processors/Convolution/Convolution2PassProcessor{TPixel}.cs +++ b/src/ImageSharp/Processing/Processors/Convolution/Convolution2PassProcessor{TPixel}.cs @@ -1,10 +1,7 @@ // Copyright (c) Six Labors. // Licensed under the Apache License, Version 2.0. -using System; using System.Numerics; -using System.Runtime.CompilerServices; -using System.Runtime.InteropServices; using SixLabors.ImageSharp.Advanced; using SixLabors.ImageSharp.Memory; using SixLabors.ImageSharp.PixelFormats;