Browse Source

Switched bokeh blur to optimized pipeline

js/color-alpha-handling
Sergio Pedri 6 years ago
parent
commit
b3f4befe5e
  1. 49
      src/ImageSharp/Processing/Processors/Convolution/BokehBlurProcessor.cs
  2. 115
      src/ImageSharp/Processing/Processors/Convolution/BokehBlurProcessor{TPixel}.cs
  3. 3
      src/ImageSharp/Processing/Processors/Convolution/Convolution2PassProcessor{TPixel}.cs

49
src/ImageSharp/Processing/Processors/Convolution/BokehBlurProcessor.cs

@ -4,6 +4,7 @@
using System; using System;
using System.Numerics; using System.Numerics;
using System.Runtime.CompilerServices; using System.Runtime.CompilerServices;
using System.Runtime.InteropServices;
using SixLabors.ImageSharp.Advanced; using SixLabors.ImageSharp.Advanced;
using SixLabors.ImageSharp.Memory; using SixLabors.ImageSharp.Memory;
using SixLabors.ImageSharp.PixelFormats; using SixLabors.ImageSharp.PixelFormats;
@ -91,31 +92,30 @@ namespace SixLabors.ImageSharp.Processing.Processors.Convolution
/// it is actually used, because it does not use any generic parameters internally. Defining in a non-generic class means that there will only /// it is actually used, because it does not use any generic parameters internally. Defining in a non-generic class means that there will only
/// ever be a single instantiation of this type for the JIT/AOT compilers to process, instead of having duplicate versions for each pixel type. /// ever be a single instantiation of this type for the JIT/AOT compilers to process, instead of having duplicate versions for each pixel type.
/// </remarks> /// </remarks>
internal readonly struct ApplyHorizontalConvolutionRowOperation : IRowOperation internal readonly struct SecondPassConvolutionRowOperation : IRowOperation
{ {
private readonly Rectangle bounds; private readonly Rectangle bounds;
private readonly Buffer2D<Vector4> targetValues; private readonly Buffer2D<Vector4> targetValues;
private readonly Buffer2D<ComplexVector4> sourceValues; private readonly Buffer2D<ComplexVector4> sourceValues;
private readonly KernelSamplingMap map;
private readonly Complex64[] kernel; private readonly Complex64[] kernel;
private readonly float z; private readonly float z;
private readonly float w; private readonly float w;
private readonly int maxY;
private readonly int maxX;
[MethodImpl(InliningOptions.ShortMethod)] [MethodImpl(InliningOptions.ShortMethod)]
public ApplyHorizontalConvolutionRowOperation( public SecondPassConvolutionRowOperation(
Rectangle bounds, Rectangle bounds,
Buffer2D<Vector4> targetValues, Buffer2D<Vector4> targetValues,
Buffer2D<ComplexVector4> sourceValues, Buffer2D<ComplexVector4> sourceValues,
KernelSamplingMap map,
Complex64[] kernel, Complex64[] kernel,
float z, float z,
float w) float w)
{ {
this.bounds = bounds; this.bounds = bounds;
this.maxY = this.bounds.Bottom - 1;
this.maxX = this.bounds.Right - 1;
this.targetValues = targetValues; this.targetValues = targetValues;
this.sourceValues = sourceValues; this.sourceValues = sourceValues;
this.map = map;
this.kernel = kernel; this.kernel = kernel;
this.z = z; this.z = z;
this.w = w; this.w = w;
@ -125,11 +125,42 @@ namespace SixLabors.ImageSharp.Processing.Processors.Convolution
[MethodImpl(InliningOptions.ShortMethod)] [MethodImpl(InliningOptions.ShortMethod)]
public void Invoke(int y) public void Invoke(int y)
{ {
Span<Vector4> targetRowSpan = this.targetValues.GetRowSpan(y).Slice(this.bounds.X); int boundsX = this.bounds.X;
int boundsWidth = this.bounds.Width;
Span<Vector4> targetBuffer = this.targetValues.GetRowSpan(y);
for (int x = 0; x < this.bounds.Width; x++) var state = new ConvolutionState<Complex64>(this.kernel, this.kernel.Length, 1, this.map);
ref int sampleRowBase = ref state.GetSampleRow(y - this.bounds.Y);
// The target buffer is zeroed initially and then it accumulates the results
// of each partial convolution, so we don't have to clear it here as well.
ref Vector4 targetBase = ref MemoryMarshal.GetReference(targetBuffer);
ReadOnlyKernel<Complex64> kernel = state.Kernel;
for (int kY = 0; kY < kernel.Rows; kY++)
{ {
Buffer2DUtils.Convolve4AndAccumulatePartials(this.kernel, this.sourceValues, targetRowSpan, y, x, this.bounds.Y, this.maxY, this.bounds.X, this.maxX, this.z, this.w); // Get the precalculated source sample row for this kernel row and copy to our buffer.
int sampleY = Unsafe.Add(ref sampleRowBase, kY);
Span<ComplexVector4> sourceRow = this.sourceValues.GetRowSpan(sampleY).Slice(boundsX, boundsWidth);
ref ComplexVector4 sourceBase = ref MemoryMarshal.GetReference(sourceRow);
for (int x = 0; x < boundsWidth; x++)
{
ref int sampleColumnBase = ref state.GetSampleColumn(x);
ref Vector4 target = ref Unsafe.Add(ref targetBase, x);
ComplexVector4 pixel4 = default;
for (int kX = 0; kX < kernel.Columns; kX++)
{
int sampleX = Unsafe.Add(ref sampleColumnBase, kX) - boundsX;
ComplexVector4 sample = Unsafe.Add(ref sourceBase, sampleX);
pixel4.Sum(kernel[kY, kX] * sample);
}
target += pixel4.WeightedSum(this.z, this.w);
}
} }
} }
} }

115
src/ImageSharp/Processing/Processors/Convolution/BokehBlurProcessor{TPixel}.cs

@ -26,6 +26,11 @@ namespace SixLabors.ImageSharp.Processing.Processors.Convolution
/// </summary> /// </summary>
private readonly float gamma; private readonly float gamma;
/// <summary>
/// The size of each complex convolution kernel.
/// </summary>
private readonly int kernelSize;
/// <summary> /// <summary>
/// The kernel parameters to use for the current instance (a: X, b: Y, A: Z, B: W) /// The kernel parameters to use for the current instance (a: X, b: Y, A: Z, B: W)
/// </summary> /// </summary>
@ -47,11 +52,12 @@ namespace SixLabors.ImageSharp.Processing.Processors.Convolution
: base(configuration, source, sourceRectangle) : base(configuration, source, sourceRectangle)
{ {
this.gamma = definition.Gamma; this.gamma = definition.Gamma;
this.kernelSize = (definition.Radius * 2) + 1;
// Get the bokeh blur data // Get the bokeh blur data
BokehBlurKernelData data = BokehBlurKernelDataProvider.GetBokehBlurKernelData( BokehBlurKernelData data = BokehBlurKernelDataProvider.GetBokehBlurKernelData(
definition.Radius, definition.Radius,
(definition.Radius * 2) + 1, this.kernelSize,
definition.Components); definition.Components);
this.kernelParameters = data.Parameters; this.kernelParameters = data.Parameters;
@ -108,69 +114,132 @@ namespace SixLabors.ImageSharp.Processing.Processors.Convolution
Buffer2D<Vector4> processingBuffer) Buffer2D<Vector4> processingBuffer)
{ {
// Allocate the buffer with the intermediate convolution results // Allocate the buffer with the intermediate convolution results
using Buffer2D<ComplexVector4> firstPassBuffer = this.Configuration.MemoryAllocator.Allocate2D<ComplexVector4>(source.Size()); using Buffer2D<ComplexVector4> firstPassBuffer = configuration.MemoryAllocator.Allocate2D<ComplexVector4>(source.Size());
var interest = Rectangle.Intersect(sourceRectangle, source.Bounds());
// Unlike in the standard 2 pass convolution processor, we use a rectangle of 1x the interest width
// to speedup the actual convolution, by applying bulk pixel conversion and clamping calculation.
// The second half of the buffer will just target the temporary buffer of complex pixel values.
// This is needed because the bokeh blur operates as TPixel -> complex -> TPixel, so we cannot
// convert back to standard pixels after each separate 1D convolution pass. Like in the gaussian
// blur though, we preallocate and compute the kernel sampling maps before processing each complex
// component, to avoid recomputing the same sampling map once per convolution pass.
using var mapX = new KernelSamplingMap(configuration.MemoryAllocator);
using var mapY = new KernelSamplingMap(configuration.MemoryAllocator);
mapX.BuildSamplingOffsetMap(1, this.kernelSize, interest);
mapY.BuildSamplingOffsetMap(this.kernelSize, 1, interest);
// Perform two 1D convolutions for each component in the current instance
ref Complex64[] baseRef = ref MemoryMarshal.GetReference(this.kernels.AsSpan()); ref Complex64[] baseRef = ref MemoryMarshal.GetReference(this.kernels.AsSpan());
ref Vector4 paramsRef = ref MemoryMarshal.GetReference(this.kernelParameters.AsSpan()); ref Vector4 paramsRef = ref MemoryMarshal.GetReference(this.kernelParameters.AsSpan());
// Perform two 1D convolutions for each component in the current instance
for (int i = 0; i < this.kernels.Length; i++) for (int i = 0; i < this.kernels.Length; i++)
{ {
// Compute the resulting complex buffer for the current component // Compute the resulting complex buffer for the current component
Complex64[] kernel = Unsafe.Add(ref baseRef, i); Complex64[] kernel = Unsafe.Add(ref baseRef, i);
Vector4 parameters = Unsafe.Add(ref paramsRef, i); Vector4 parameters = Unsafe.Add(ref paramsRef, i);
// Compute the vertical 1D convolution // Horizontal convolution
var verticalOperation = new ApplyVerticalConvolutionRowOperation(sourceRectangle, firstPassBuffer, source.PixelBuffer, kernel); var horizontalOperation = new FirstPassConvolutionRowOperation(
ParallelRowIterator.IterateRows( interest,
firstPassBuffer,
source.PixelBuffer,
mapX,
kernel,
configuration);
ParallelRowIterator.IterateRows<FirstPassConvolutionRowOperation, Vector4>(
configuration, configuration,
sourceRectangle, interest,
in verticalOperation); in horizontalOperation);
// Vertical 1D convolutions to accumulate the partial results on the target buffer
var verticalOperation = new BokehBlurProcessor.SecondPassConvolutionRowOperation(
interest,
processingBuffer,
firstPassBuffer,
mapY,
kernel,
parameters.Z,
parameters.W);
// Compute the horizontal 1D convolutions and accumulate the partial results on the target buffer
var horizontalOperation = new BokehBlurProcessor.ApplyHorizontalConvolutionRowOperation(sourceRectangle, processingBuffer, firstPassBuffer, kernel, parameters.Z, parameters.W);
ParallelRowIterator.IterateRows( ParallelRowIterator.IterateRows(
configuration, configuration,
sourceRectangle, interest,
in horizontalOperation); in verticalOperation);
} }
} }
/// <summary> /// <summary>
/// A <see langword="struct"/> implementing the vertical convolution logic for <see cref="BokehBlurProcessor{T}"/>. /// A <see langword="struct"/> implementing the vertical convolution logic for <see cref="BokehBlurProcessor{T}"/>.
/// </summary> /// </summary>
private readonly struct ApplyVerticalConvolutionRowOperation : IRowOperation private readonly struct FirstPassConvolutionRowOperation : IRowOperation<Vector4>
{ {
private readonly Rectangle bounds; private readonly Rectangle bounds;
private readonly Buffer2D<ComplexVector4> targetValues; private readonly Buffer2D<ComplexVector4> targetValues;
private readonly Buffer2D<TPixel> sourcePixels; private readonly Buffer2D<TPixel> sourcePixels;
private readonly KernelSamplingMap map;
private readonly Complex64[] kernel; private readonly Complex64[] kernel;
private readonly int maxY; private readonly Configuration configuration;
private readonly int maxX;
[MethodImpl(InliningOptions.ShortMethod)] [MethodImpl(InliningOptions.ShortMethod)]
public ApplyVerticalConvolutionRowOperation( public FirstPassConvolutionRowOperation(
Rectangle bounds, Rectangle bounds,
Buffer2D<ComplexVector4> targetValues, Buffer2D<ComplexVector4> targetValues,
Buffer2D<TPixel> sourcePixels, Buffer2D<TPixel> sourcePixels,
Complex64[] kernel) KernelSamplingMap map,
Complex64[] kernel,
Configuration configuration)
{ {
this.bounds = bounds; this.bounds = bounds;
this.maxY = this.bounds.Bottom - 1;
this.maxX = this.bounds.Right - 1;
this.targetValues = targetValues; this.targetValues = targetValues;
this.sourcePixels = sourcePixels; this.sourcePixels = sourcePixels;
this.map = map;
this.kernel = kernel; this.kernel = kernel;
this.configuration = configuration;
} }
/// <inheritdoc/> /// <inheritdoc/>
[MethodImpl(InliningOptions.ShortMethod)] [MethodImpl(InliningOptions.ShortMethod)]
public void Invoke(int y) public void Invoke(int y, Span<Vector4> span)
{ {
Span<ComplexVector4> targetRowSpan = this.targetValues.GetRowSpan(y).Slice(this.bounds.X); int boundsX = this.bounds.X;
int boundsWidth = this.bounds.Width;
for (int x = 0; x < this.bounds.Width; x++) var state = new ConvolutionState<Complex64>(this.kernel, 1, this.kernel.Length, this.map);
ref int sampleRowBase = ref state.GetSampleRow(y - this.bounds.Y);
Span<ComplexVector4> targetBuffer = this.targetValues.GetRowSpan(y);
// Clear the target buffer
targetBuffer.Clear();
ref ComplexVector4 targetBase = ref MemoryMarshal.GetReference(targetBuffer);
ReadOnlyKernel<Complex64> kernel = state.Kernel;
for (int kY = 0; kY < kernel.Rows; kY++)
{ {
Buffer2DUtils.Convolve4(this.kernel, this.sourcePixels, targetRowSpan, y, x, this.bounds.Y, this.maxY, this.bounds.X, this.maxX); // Get the precalculated source sample row for this kernel row and copy to our buffer.
int sampleY = Unsafe.Add(ref sampleRowBase, kY);
Span<TPixel> sourceRow = this.sourcePixels.GetRowSpan(sampleY).Slice(boundsX, boundsWidth);
PixelOperations<TPixel>.Instance.ToVector4(this.configuration, sourceRow, span);
ref Vector4 sourceBase = ref MemoryMarshal.GetReference(span);
for (int x = 0; x < span.Length; x++)
{
ref int sampleColumnBase = ref state.GetSampleColumn(x);
ref ComplexVector4 target = ref Unsafe.Add(ref targetBase, x);
for (int kX = 0; kX < kernel.Columns; kX++)
{
int sampleX = Unsafe.Add(ref sampleColumnBase, kX) - boundsX;
Vector4 sample = Unsafe.Add(ref sourceBase, sampleX);
target.Sum(kernel[kY, kX] * sample);
}
}
} }
} }
} }

3
src/ImageSharp/Processing/Processors/Convolution/Convolution2PassProcessor{TPixel}.cs

@ -1,10 +1,7 @@
// Copyright (c) Six Labors. // Copyright (c) Six Labors.
// Licensed under the Apache License, Version 2.0. // Licensed under the Apache License, Version 2.0.
using System;
using System.Numerics; using System.Numerics;
using System.Runtime.CompilerServices;
using System.Runtime.InteropServices;
using SixLabors.ImageSharp.Advanced; using SixLabors.ImageSharp.Advanced;
using SixLabors.ImageSharp.Memory; using SixLabors.ImageSharp.Memory;
using SixLabors.ImageSharp.PixelFormats; using SixLabors.ImageSharp.PixelFormats;

Loading…
Cancel
Save