diff --git a/src/ImageSharp/Processing/Processors/Convolution/Convolution2PassProcessor{TPixel}.cs b/src/ImageSharp/Processing/Processors/Convolution/Convolution2PassProcessor{TPixel}.cs index 16ce0fdd75..ba4e0a6ad6 100644 --- a/src/ImageSharp/Processing/Processors/Convolution/Convolution2PassProcessor{TPixel}.cs +++ b/src/ImageSharp/Processing/Processors/Convolution/Convolution2PassProcessor{TPixel}.cs @@ -1,7 +1,10 @@ // Copyright (c) Six Labors. // Licensed under the Apache License, Version 2.0. +using System; using System.Numerics; +using System.Runtime.CompilerServices; +using System.Runtime.InteropServices; using SixLabors.ImageSharp.Advanced; using SixLabors.ImageSharp.Memory; using SixLabors.ImageSharp.PixelFormats; @@ -69,7 +72,7 @@ namespace SixLabors.ImageSharp.Processing.Processors.Convolution mapX.BuildSamplingOffsetMap(this.KernelX, interest); // Horizontal convolution - var horizontalOperation = new ConvolutionRowOperation( + var horizontalOperation = new HorizontalConvolutionRowOperation( interest, firstPassPixels, source.PixelBuffer, @@ -78,7 +81,7 @@ namespace SixLabors.ImageSharp.Processing.Processors.Convolution this.Configuration, this.PreserveAlpha); - ParallelRowIterator.IterateRows, Vector4>( + ParallelRowIterator.IterateRows( this.Configuration, operationBounds, in horizontalOperation); @@ -104,5 +107,147 @@ namespace SixLabors.ImageSharp.Processing.Processors.Convolution in verticalOperation); } } + + /// + /// A implementing the logic for the horizontal 1D convolution. + /// + internal readonly struct HorizontalConvolutionRowOperation : IRowOperation + { + private readonly Rectangle bounds; + private readonly Buffer2D targetPixels; + private readonly Buffer2D sourcePixels; + private readonly KernelSamplingMap map; + private readonly DenseMatrix kernelMatrix; + private readonly Configuration configuration; + private readonly bool preserveAlpha; + + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public HorizontalConvolutionRowOperation( + Rectangle bounds, + Buffer2D targetPixels, + Buffer2D sourcePixels, + KernelSamplingMap map, + DenseMatrix kernelMatrix, + Configuration configuration, + bool preserveAlpha) + { + this.bounds = bounds; + this.targetPixels = targetPixels; + this.sourcePixels = sourcePixels; + this.map = map; + this.kernelMatrix = kernelMatrix; + this.configuration = configuration; + this.preserveAlpha = preserveAlpha; + } + + /// + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public void Invoke(int y, Span span) + { + if (this.preserveAlpha) + { + this.Convolve3(y, span); + } + else + { + this.Convolve4(y, span); + } + } + + [MethodImpl(MethodImplOptions.AggressiveInlining)] + private void Convolve3(int y, Span span) + { + // Span is 2x bounds. + int boundsX = this.bounds.X; + int boundsWidth = this.bounds.Width; + Span sourceBuffer = span.Slice(0, this.bounds.Width); + Span targetBuffer = span.Slice(this.bounds.Width); + + var state = new ConvolutionState(in this.kernelMatrix, this.map); + ref int sampleRowBase = ref state.GetSampleRow(y - this.bounds.Y); + + // Clear the target buffer for each row run. + targetBuffer.Clear(); + ref Vector4 targetBase = ref MemoryMarshal.GetReference(targetBuffer); + + // Get the precalculated source sample row for this kernel row and copy to our buffer. + ReadOnlyKernel kernel = state.Kernel; + int sampleY = Unsafe.Add(ref sampleRowBase, 0); + Span sourceRow = this.sourcePixels.GetRowSpan(sampleY).Slice(boundsX, boundsWidth); + PixelOperations.Instance.ToVector4(this.configuration, sourceRow, sourceBuffer); + + ref Vector4 sourceBase = ref MemoryMarshal.GetReference(sourceBuffer); + + for (int x = 0; x < sourceBuffer.Length; x++) + { + ref int sampleColumnBase = ref state.GetSampleColumn(x); + ref Vector4 target = ref Unsafe.Add(ref targetBase, x); + + for (int kX = 0; kX < kernel.Columns; kX++) + { + int sampleX = Unsafe.Add(ref sampleColumnBase, kX) - boundsX; + Vector4 sample = Unsafe.Add(ref sourceBase, sampleX); + target += kernel[0, kX] * sample; + } + } + + // Now we need to copy the original alpha values from the source row. + sourceRow = this.sourcePixels.GetRowSpan(y).Slice(boundsX, boundsWidth); + PixelOperations.Instance.ToVector4(this.configuration, sourceRow, sourceBuffer); + + for (int x = 0; x < sourceRow.Length; x++) + { + ref Vector4 target = ref Unsafe.Add(ref targetBase, x); + target.W = Unsafe.Add(ref MemoryMarshal.GetReference(sourceBuffer), x).W; + } + + Span targetRow = this.targetPixels.GetRowSpan(y).Slice(boundsX, boundsWidth); + PixelOperations.Instance.FromVector4Destructive(this.configuration, targetBuffer, targetRow); + } + + [MethodImpl(MethodImplOptions.AggressiveInlining)] + private void Convolve4(int y, Span span) + { + // Span is 2x bounds. + int boundsX = this.bounds.X; + int boundsWidth = this.bounds.Width; + Span sourceBuffer = span.Slice(0, this.bounds.Width); + Span targetBuffer = span.Slice(this.bounds.Width); + + var state = new ConvolutionState(in this.kernelMatrix, this.map); + ref int sampleRowBase = ref state.GetSampleRow(y - this.bounds.Y); + + // Clear the target buffer for each row run. + targetBuffer.Clear(); + ref Vector4 targetBase = ref MemoryMarshal.GetReference(targetBuffer); + + // Get the precalculated source sample row for this kernel row and copy to our buffer. + ReadOnlyKernel kernel = state.Kernel; + int sampleY = Unsafe.Add(ref sampleRowBase, 0); + Span sourceRow = this.sourcePixels.GetRowSpan(sampleY).Slice(boundsX, boundsWidth); + PixelOperations.Instance.ToVector4(this.configuration, sourceRow, sourceBuffer); + + Numerics.Premultiply(sourceBuffer); + ref Vector4 sourceBase = ref MemoryMarshal.GetReference(sourceBuffer); + + for (int x = 0; x < sourceBuffer.Length; x++) + { + ref int sampleColumnBase = ref state.GetSampleColumn(x); + ref Vector4 target = ref Unsafe.Add(ref targetBase, x); + + for (int kX = 0; kX < kernel.Columns; kX++) + { + int sampleX = Unsafe.Add(ref sampleColumnBase, kX) - boundsX; + Vector4 sample = Unsafe.Add(ref sourceBase, sampleX); + target += kernel[0, kX] * sample; + } + } + + Numerics.UnPremultiply(targetBuffer); + + Span targetRow = this.targetPixels.GetRowSpan(y).Slice(boundsX, boundsWidth); + PixelOperations.Instance.FromVector4Destructive(this.configuration, targetBuffer, targetRow); + } + } } }