From caba642513933ad57cef05687fdbcc7149194aba Mon Sep 17 00:00:00 2001 From: James Jackson-South Date: Mon, 7 Dec 2020 14:26:29 +0000 Subject: [PATCH] Working version per-row --- .../Convolution2DProcessor{TPixel}.cs | 11 +- .../Convolution2DRowOperation{TPixel}.cs | 193 ++++++++++++++++++ .../Convolution/Convolution2DState.cs | 4 +- .../Convolution2PassProcessor{TPixel}.cs | 88 +------- .../ConvolutionProcessor{TPixel}.cs | 94 +++++++-- .../ConvolutionRowOperation{TPixel}.cs | 169 +++++++++++++++ .../Convolution/ConvolutionState.cs | 4 +- .../Processors/Convolution/Convolver.cs | 96 ++++++++- 8 files changed, 552 insertions(+), 107 deletions(-) create mode 100644 src/ImageSharp/Processing/Processors/Convolution/Convolution2DRowOperation{TPixel}.cs create mode 100644 src/ImageSharp/Processing/Processors/Convolution/ConvolutionRowOperation{TPixel}.cs diff --git a/src/ImageSharp/Processing/Processors/Convolution/Convolution2DProcessor{TPixel}.cs b/src/ImageSharp/Processing/Processors/Convolution/Convolution2DProcessor{TPixel}.cs index 249c73e8d6..e787b3ec78 100644 --- a/src/ImageSharp/Processing/Processors/Convolution/Convolution2DProcessor{TPixel}.cs +++ b/src/ImageSharp/Processing/Processors/Convolution/Convolution2DProcessor{TPixel}.cs @@ -66,12 +66,17 @@ namespace SixLabors.ImageSharp.Processing.Processors.Convolution source.CopyTo(targetPixels); var interest = Rectangle.Intersect(this.SourceRectangle, source.Bounds()); + + // We use a rectangle 3x the interest width to allocate a buffer big enough + // for source and target bulk pixel conversion. + var operationBounds = new Rectangle(interest.X, interest.Y, interest.Width * 3, interest.Height); + using (var map = new KernelSamplingMap(allocator)) { // Since the kernel sizes are identical we can use a single map. map.BuildSamplingOffsetMap(this.KernelY, interest); - var operation = new RowOperation( + var operation = new Convolution2DRowOperation( interest, targetPixels, source.PixelBuffer, @@ -81,9 +86,9 @@ namespace SixLabors.ImageSharp.Processing.Processors.Convolution this.Configuration, this.PreserveAlpha); - ParallelRowIterator.IterateRows( + ParallelRowIterator.IterateRows, Vector4>( this.Configuration, - interest, + operationBounds, in operation); } diff --git a/src/ImageSharp/Processing/Processors/Convolution/Convolution2DRowOperation{TPixel}.cs b/src/ImageSharp/Processing/Processors/Convolution/Convolution2DRowOperation{TPixel}.cs new file mode 100644 index 0000000000..6528a2f851 --- /dev/null +++ b/src/ImageSharp/Processing/Processors/Convolution/Convolution2DRowOperation{TPixel}.cs @@ -0,0 +1,193 @@ +// Copyright (c) Six Labors. +// Licensed under the Apache License, Version 2.0. + +using System; +using System.Numerics; +using System.Runtime.CompilerServices; +using System.Runtime.InteropServices; +using SixLabors.ImageSharp.Advanced; +using SixLabors.ImageSharp.Memory; +using SixLabors.ImageSharp.PixelFormats; + +namespace SixLabors.ImageSharp.Processing.Processors.Convolution +{ + /// + /// A implementing the logic for 2D convolution. + /// + internal readonly struct Convolution2DRowOperation : IRowOperation + where TPixel : unmanaged, IPixel + { + private readonly Rectangle bounds; + private readonly Buffer2D targetPixels; + private readonly Buffer2D sourcePixels; + private readonly KernelSamplingMap map; + private readonly DenseMatrix kernelMatrixY; + private readonly DenseMatrix kernelMatrixX; + private readonly Configuration configuration; + private readonly bool preserveAlpha; + + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public Convolution2DRowOperation( + Rectangle bounds, + Buffer2D targetPixels, + Buffer2D sourcePixels, + KernelSamplingMap map, + DenseMatrix kernelMatrixY, + DenseMatrix kernelMatrixX, + Configuration configuration, + bool preserveAlpha) + { + this.bounds = bounds; + this.targetPixels = targetPixels; + this.sourcePixels = sourcePixels; + this.map = map; + this.kernelMatrixY = kernelMatrixY; + this.kernelMatrixX = kernelMatrixX; + this.configuration = configuration; + this.preserveAlpha = preserveAlpha; + } + + /// + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public void Invoke(int y, Span span) + { + if (this.preserveAlpha) + { + this.Convolve3(y, span); + } + else + { + this.Convolve4(y, span); + } + } + + [MethodImpl(MethodImplOptions.AggressiveInlining)] + private void Convolve3(int y, Span span) + { + // Span is 3x bounds. + int boundsX = this.bounds.X; + int boundsWidth = this.bounds.Width; + Span sourceBuffer = span.Slice(0, boundsWidth); + Span targetYBuffer = span.Slice(boundsWidth, boundsWidth); + Span targetXBuffer = span.Slice(boundsWidth * 2, boundsWidth); + + var state = new Convolution2DState(in this.kernelMatrixY, in this.kernelMatrixX, this.map); + ReadOnlyKernel kernelY = state.KernelY; + ReadOnlyKernel kernelX = state.KernelX; + int row = y - this.bounds.Y; + ref int sampleRowBase = ref state.GetSampleRow(row); + + // Clear the target buffers for each row run. + targetYBuffer.Clear(); + targetXBuffer.Clear(); + ref Vector4 targetBaseY = ref MemoryMarshal.GetReference(targetYBuffer); + ref Vector4 targetBaseX = ref MemoryMarshal.GetReference(targetXBuffer); + + Span sourceRow; + for (int kY = 0; kY < kernelY.Rows; kY++) + { + // Get the precalculated source sample row for this kernel row and copy to our buffer. + int offsetY = Unsafe.Add(ref sampleRowBase, kY); + sourceRow = this.sourcePixels.GetRowSpan(offsetY).Slice(boundsX, boundsWidth); + PixelOperations.Instance.ToVector4(this.configuration, sourceRow, sourceBuffer); + + ref Vector4 sourceBase = ref MemoryMarshal.GetReference(sourceBuffer); + + for (int x = 0; x < sourceBuffer.Length; x++) + { + ref int sampleColumnBase = ref state.GetSampleColumn(x); + ref Vector4 targetY = ref Unsafe.Add(ref targetBaseY, x); + ref Vector4 targetX = ref Unsafe.Add(ref targetBaseX, x); + + for (int kX = 0; kX < kernelY.Columns; kX++) + { + int offsetX = Unsafe.Add(ref sampleColumnBase, kX) - boundsX; + Vector4 sample = Unsafe.Add(ref sourceBase, offsetX); + targetY += kernelX[kY, kX] * sample; + targetX += kernelY[kY, kX] * sample; + } + } + } + + // Now we need to combine the values and copy the original alpha values from the source row. + sourceRow = this.sourcePixels.GetRowSpan(y).Slice(boundsX, boundsWidth); + PixelOperations.Instance.ToVector4(this.configuration, sourceRow, sourceBuffer); + + for (int x = 0; x < sourceRow.Length; x++) + { + ref Vector4 target = ref Unsafe.Add(ref targetBaseY, x); + Vector4 vectorY = target; + Vector4 vectorX = Unsafe.Add(ref targetBaseX, x); + + target = Vector4.SquareRoot((vectorX * vectorX) + (vectorY * vectorY)); + target.W = Unsafe.Add(ref MemoryMarshal.GetReference(sourceBuffer), x).W; + } + + Span targetRowSpan = this.targetPixels.GetRowSpan(y).Slice(boundsX, boundsWidth); + PixelOperations.Instance.FromVector4Destructive(this.configuration, targetYBuffer, targetRowSpan); + } + + [MethodImpl(MethodImplOptions.AggressiveInlining)] + private void Convolve4(int y, Span span) + { + // Span is 2x bounds. + int boundsX = this.bounds.X; + int boundsWidth = this.bounds.Width; + Span sourceBuffer = span.Slice(0, boundsWidth); + Span targetYBuffer = span.Slice(boundsWidth, boundsWidth); + Span targetXBuffer = span.Slice(boundsWidth * 2, boundsWidth); + + var state = new Convolution2DState(in this.kernelMatrixY, in this.kernelMatrixX, this.map); + ReadOnlyKernel kernelY = state.KernelY; + ReadOnlyKernel kernelX = state.KernelX; + int row = y - this.bounds.Y; + ref int sampleRowBase = ref state.GetSampleRow(row); + + // Clear the target buffers for each row run. + targetYBuffer.Clear(); + targetXBuffer.Clear(); + ref Vector4 targetBaseY = ref MemoryMarshal.GetReference(targetYBuffer); + ref Vector4 targetBaseX = ref MemoryMarshal.GetReference(targetXBuffer); + + for (int kY = 0; kY < kernelY.Rows; kY++) + { + // Get the precalculated source sample row for this kernel row and copy to our buffer. + int offsetY = Unsafe.Add(ref sampleRowBase, kY); + Span sourceRow = this.sourcePixels.GetRowSpan(offsetY).Slice(boundsX, boundsWidth); + PixelOperations.Instance.ToVector4(this.configuration, sourceRow, sourceBuffer); + + Numerics.Premultiply(sourceBuffer); + ref Vector4 sourceBase = ref MemoryMarshal.GetReference(sourceBuffer); + + for (int x = 0; x < sourceBuffer.Length; x++) + { + ref int sampleColumnBase = ref state.GetSampleColumn(x); + ref Vector4 targetY = ref Unsafe.Add(ref targetBaseY, x); + ref Vector4 targetX = ref Unsafe.Add(ref targetBaseX, x); + + for (int kX = 0; kX < kernelY.Columns; kX++) + { + int offsetX = Unsafe.Add(ref sampleColumnBase, kX) - boundsX; + Vector4 sample = Unsafe.Add(ref sourceBase, offsetX); + targetY += kernelX[kY, kX] * sample; + targetX += kernelY[kY, kX] * sample; + } + } + } + + for (int x = 0; x < targetYBuffer.Length; x++) + { + ref Vector4 target = ref Unsafe.Add(ref targetBaseY, x); + Vector4 vectorY = target; + Vector4 vectorX = Unsafe.Add(ref targetBaseX, x); + + target = Vector4.SquareRoot((vectorX * vectorX) + (vectorY * vectorY)); + } + + Numerics.UnPremultiply(targetYBuffer); + + Span targetRow = this.targetPixels.GetRowSpan(y).Slice(boundsX, boundsWidth); + PixelOperations.Instance.FromVector4Destructive(this.configuration, targetYBuffer, targetRow); + } + } +} diff --git a/src/ImageSharp/Processing/Processors/Convolution/Convolution2DState.cs b/src/ImageSharp/Processing/Processors/Convolution/Convolution2DState.cs index 9d17ebab05..218093ac4e 100644 --- a/src/ImageSharp/Processing/Processors/Convolution/Convolution2DState.cs +++ b/src/ImageSharp/Processing/Processors/Convolution/Convolution2DState.cs @@ -44,11 +44,11 @@ namespace SixLabors.ImageSharp.Processing.Processors.Convolution } [MethodImpl(MethodImplOptions.AggressiveInlining)] - public readonly ref int GetSampleOffsetRow(int row) + public readonly ref int GetSampleRow(int row) => ref Unsafe.Add(ref MemoryMarshal.GetReference(this.rowOffsetMap), row * this.kernelHeight); [MethodImpl(MethodImplOptions.AggressiveInlining)] - public readonly ref int GetSampleOffsetColumn(int column) + public readonly ref int GetSampleColumn(int column) => ref Unsafe.Add(ref MemoryMarshal.GetReference(this.columnOffsetMap), column * this.kernelWidth); } } diff --git a/src/ImageSharp/Processing/Processors/Convolution/Convolution2PassProcessor{TPixel}.cs b/src/ImageSharp/Processing/Processors/Convolution/Convolution2PassProcessor{TPixel}.cs index e05892fa54..151b0ffccc 100644 --- a/src/ImageSharp/Processing/Processors/Convolution/Convolution2PassProcessor{TPixel}.cs +++ b/src/ImageSharp/Processing/Processors/Convolution/Convolution2PassProcessor{TPixel}.cs @@ -63,12 +63,16 @@ namespace SixLabors.ImageSharp.Processing.Processors.Convolution var interest = Rectangle.Intersect(this.SourceRectangle, source.Bounds()); + // We use a rectangle 2x the interest width to allocate a buffer big enough + // for source and target bulk pixel conversion. + var operationBounds = new Rectangle(interest.X, interest.Y, interest.Width * 2, interest.Height); + using (var mapX = new KernelSamplingMap(this.Configuration.MemoryAllocator)) { mapX.BuildSamplingOffsetMap(this.KernelX, interest); // Horizontal convolution - var horizontalOperation = new RowOperation( + var horizontalOperation = new ConvolutionRowOperation( interest, firstPassPixels, source.PixelBuffer, @@ -77,9 +81,9 @@ namespace SixLabors.ImageSharp.Processing.Processors.Convolution this.Configuration, this.PreserveAlpha); - ParallelRowIterator.IterateRows( + ParallelRowIterator.IterateRows, Vector4>( this.Configuration, - interest, + operationBounds, in horizontalOperation); } @@ -88,7 +92,7 @@ namespace SixLabors.ImageSharp.Processing.Processors.Convolution mapY.BuildSamplingOffsetMap(this.KernelY, interest); // Vertical convolution - var verticalOperation = new RowOperation( + var verticalOperation = new ConvolutionRowOperation( interest, source.PixelBuffer, firstPassPixels, @@ -97,83 +101,11 @@ namespace SixLabors.ImageSharp.Processing.Processors.Convolution this.Configuration, this.PreserveAlpha); - ParallelRowIterator.IterateRows( + ParallelRowIterator.IterateRows, Vector4>( this.Configuration, - interest, + operationBounds, in verticalOperation); } } - - /// - /// A implementing the convolution logic for . - /// - private readonly struct RowOperation : IRowOperation - { - private readonly Rectangle bounds; - private readonly Buffer2D targetPixels; - private readonly Buffer2D sourcePixels; - private readonly KernelSamplingMap map; - private readonly DenseMatrix kernel; - private readonly Configuration configuration; - private readonly bool preserveAlpha; - - [MethodImpl(InliningOptions.ShortMethod)] - public RowOperation( - Rectangle bounds, - Buffer2D targetPixels, - Buffer2D sourcePixels, - KernelSamplingMap map, - DenseMatrix kernel, - Configuration configuration, - bool preserveAlpha) - { - this.bounds = bounds; - this.targetPixels = targetPixels; - this.sourcePixels = sourcePixels; - this.map = map; - this.kernel = kernel; - this.configuration = configuration; - this.preserveAlpha = preserveAlpha; - } - - /// - [MethodImpl(InliningOptions.ShortMethod)] - public void Invoke(int y, Span span) - { - ref Vector4 targetRowRef = ref MemoryMarshal.GetReference(span); - Span targetRowSpan = this.targetPixels.GetRowSpan(y).Slice(this.bounds.X); - PixelOperations.Instance.ToVector4(this.configuration, targetRowSpan.Slice(0, span.Length), span); - - var state = new ConvolutionState(in this.kernel, this.map); - int row = y - this.bounds.Y; - - if (this.preserveAlpha) - { - for (int column = 0; column < this.bounds.Width; column++) - { - Convolver.Convolve3( - in state, - this.sourcePixels, - ref targetRowRef, - row, - column); - } - } - else - { - for (int column = 0; column < this.bounds.Width; column++) - { - Convolver.Convolve4( - in state, - this.sourcePixels, - ref targetRowRef, - row, - column); - } - } - - PixelOperations.Instance.FromVector4Destructive(this.configuration, span, targetRowSpan); - } - } } } diff --git a/src/ImageSharp/Processing/Processors/Convolution/ConvolutionProcessor{TPixel}.cs b/src/ImageSharp/Processing/Processors/Convolution/ConvolutionProcessor{TPixel}.cs index d6be1dc563..924a1125bd 100644 --- a/src/ImageSharp/Processing/Processors/Convolution/ConvolutionProcessor{TPixel}.cs +++ b/src/ImageSharp/Processing/Processors/Convolution/ConvolutionProcessor{TPixel}.cs @@ -57,6 +57,10 @@ namespace SixLabors.ImageSharp.Processing.Processors.Convolution source.CopyTo(targetPixels); var interest = Rectangle.Intersect(this.SourceRectangle, source.Bounds()); + + // We use a rectangle 2x the interest width to allocate a buffer big enough + // for source and target bulk pixel conversion. + var operationBounds = new Rectangle(interest.X, interest.Y, interest.Width * 2, interest.Height); using (var map = new KernelSamplingMap(allocator)) { map.BuildSamplingOffsetMap(this.KernelXY, interest); @@ -64,7 +68,7 @@ namespace SixLabors.ImageSharp.Processing.Processors.Convolution var operation = new RowOperation(interest, targetPixels, source.PixelBuffer, map, this.KernelXY, this.Configuration, this.PreserveAlpha); ParallelRowIterator.IterateRows( this.Configuration, - interest, + operationBounds, in operation); } @@ -107,39 +111,93 @@ namespace SixLabors.ImageSharp.Processing.Processors.Convolution [MethodImpl(InliningOptions.ShortMethod)] public void Invoke(int y, Span span) { + // Span is 2x bounds. + int boundsX = this.bounds.X; + int boundsWidth = this.bounds.Width; + Span sourceBuffer = span.Slice(0, this.bounds.Width); + Span targetBuffer = span.Slice(this.bounds.Width); + ref Vector4 targetRowRef = ref MemoryMarshal.GetReference(span); - Span targetRowSpan = this.targetPixels.GetRowSpan(y).Slice(this.bounds.X); - PixelOperations.Instance.ToVector4(this.configuration, targetRowSpan.Slice(0, span.Length), span); + Span targetRowSpan = this.targetPixels.GetRowSpan(y).Slice(boundsX, boundsWidth); var state = new ConvolutionState(in this.kernel, this.map); int row = y - this.bounds.Y; + ref int sampleRowBase = ref state.GetSampleRow(row); if (this.preserveAlpha) { - for (int column = 0; column < this.bounds.Width; column++) + // Clear the target buffer for each row run. + targetBuffer.Clear(); + ref Vector4 targetBase = ref MemoryMarshal.GetReference(targetBuffer); + + Span sourceRow; + for (int kY = 0; kY < state.Kernel.Rows; kY++) { - Convolver.Convolve3( - in state, - this.sourcePixels, - ref targetRowRef, - row, - column); + // Get the precalculated source sample row for this kernel row and copy to our buffer. + int offsetY = Unsafe.Add(ref sampleRowBase, kY); + sourceRow = this.sourcePixels.GetRowSpan(offsetY).Slice(boundsX, boundsWidth); + PixelOperations.Instance.ToVector4(this.configuration, sourceRow, sourceBuffer); + + ref Vector4 sourceBase = ref MemoryMarshal.GetReference(sourceBuffer); + + for (int x = 0; x < sourceBuffer.Length; x++) + { + ref int sampleColumnBase = ref state.GetSampleColumn(x); + ref Vector4 target = ref Unsafe.Add(ref targetBase, x); + + for (int kX = 0; kX < state.Kernel.Columns; kX++) + { + int offsetX = Unsafe.Add(ref sampleColumnBase, kX) - boundsX; + Vector4 sample = Unsafe.Add(ref sourceBase, offsetX); + target += state.Kernel[kY, kX] * sample; + } + } + } + + // Now we need to copy the original alpha values from the source row. + sourceRow = this.sourcePixels.GetRowSpan(y).Slice(boundsX, boundsWidth); + PixelOperations.Instance.ToVector4(this.configuration, sourceRow, sourceBuffer); + + for (int x = 0; x < sourceRow.Length; x++) + { + ref Vector4 target = ref Unsafe.Add(ref targetBase, x); + target.W = Unsafe.Add(ref MemoryMarshal.GetReference(sourceBuffer), x).W; } } else { - for (int column = 0; column < this.bounds.Width; column++) + // Clear the target buffer for each row run. + targetBuffer.Clear(); + ref Vector4 targetBase = ref MemoryMarshal.GetReference(targetBuffer); + + for (int kY = 0; kY < state.Kernel.Rows; kY++) { - Convolver.Convolve4( - in state, - this.sourcePixels, - ref targetRowRef, - row, - column); + // Get the precalculated source sample row for this kernel row and copy to our buffer. + int offsetY = Unsafe.Add(ref sampleRowBase, kY); + Span sourceRow = this.sourcePixels.GetRowSpan(offsetY).Slice(boundsX, boundsWidth); + PixelOperations.Instance.ToVector4(this.configuration, sourceRow, sourceBuffer); + + Numerics.Premultiply(sourceBuffer); + ref Vector4 sourceBase = ref MemoryMarshal.GetReference(sourceBuffer); + + for (int x = 0; x < sourceBuffer.Length; x++) + { + ref int sampleColumnBase = ref state.GetSampleColumn(x); + ref Vector4 target = ref Unsafe.Add(ref targetBase, x); + + for (int kX = 0; kX < state.Kernel.Columns; kX++) + { + int offsetX = Unsafe.Add(ref sampleColumnBase, kX) - boundsX; + Vector4 sample = Unsafe.Add(ref sourceBase, offsetX); + target += state.Kernel[kY, kX] * sample; + } + } } + + Numerics.UnPremultiply(targetBuffer); } - PixelOperations.Instance.FromVector4Destructive(this.configuration, span, targetRowSpan); + PixelOperations.Instance.FromVector4Destructive(this.configuration, targetBuffer, targetRowSpan); } } } diff --git a/src/ImageSharp/Processing/Processors/Convolution/ConvolutionRowOperation{TPixel}.cs b/src/ImageSharp/Processing/Processors/Convolution/ConvolutionRowOperation{TPixel}.cs new file mode 100644 index 0000000000..82aecdaf7b --- /dev/null +++ b/src/ImageSharp/Processing/Processors/Convolution/ConvolutionRowOperation{TPixel}.cs @@ -0,0 +1,169 @@ +// Copyright (c) Six Labors. +// Licensed under the Apache License, Version 2.0. + +using System; +using System.Numerics; +using System.Runtime.CompilerServices; +using System.Runtime.InteropServices; +using SixLabors.ImageSharp.Advanced; +using SixLabors.ImageSharp.Memory; +using SixLabors.ImageSharp.PixelFormats; + +namespace SixLabors.ImageSharp.Processing.Processors.Convolution +{ + /// + /// A implementing the logic for 1D convolution. + /// + internal readonly struct ConvolutionRowOperation : IRowOperation + where TPixel : unmanaged, IPixel + { + private readonly Rectangle bounds; + private readonly Buffer2D targetPixels; + private readonly Buffer2D sourcePixels; + private readonly KernelSamplingMap map; + private readonly DenseMatrix kernelMatrix; + private readonly Configuration configuration; + private readonly bool preserveAlpha; + + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public ConvolutionRowOperation( + Rectangle bounds, + Buffer2D targetPixels, + Buffer2D sourcePixels, + KernelSamplingMap map, + DenseMatrix kernelMatrix, + Configuration configuration, + bool preserveAlpha) + { + this.bounds = bounds; + this.targetPixels = targetPixels; + this.sourcePixels = sourcePixels; + this.map = map; + this.kernelMatrix = kernelMatrix; + this.configuration = configuration; + this.preserveAlpha = preserveAlpha; + } + + /// + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public void Invoke(int y, Span span) + { + if (this.preserveAlpha) + { + this.Convolve3(y, span); + } + else + { + this.Convolve4(y, span); + } + } + + [MethodImpl(MethodImplOptions.AggressiveInlining)] + private void Convolve3(int y, Span span) + { + // Span is 2x bounds. + int boundsX = this.bounds.X; + int boundsWidth = this.bounds.Width; + Span sourceBuffer = span.Slice(0, this.bounds.Width); + Span targetBuffer = span.Slice(this.bounds.Width); + + ref Vector4 targetRowRef = ref MemoryMarshal.GetReference(span); + Span targetRowSpan = this.targetPixels.GetRowSpan(y).Slice(boundsX, boundsWidth); + + var state = new ConvolutionState(in this.kernelMatrix, this.map); + ReadOnlyKernel kernel = state.Kernel; + int row = y - this.bounds.Y; + ref int sampleRowBase = ref state.GetSampleRow(row); + + // Clear the target buffer for each row run. + targetBuffer.Clear(); + ref Vector4 targetBase = ref MemoryMarshal.GetReference(targetBuffer); + + Span sourceRow; + for (int kY = 0; kY < kernel.Rows; kY++) + { + // Get the precalculated source sample row for this kernel row and copy to our buffer. + int offsetY = Unsafe.Add(ref sampleRowBase, kY); + sourceRow = this.sourcePixels.GetRowSpan(offsetY).Slice(boundsX, boundsWidth); + PixelOperations.Instance.ToVector4(this.configuration, sourceRow, sourceBuffer); + + ref Vector4 sourceBase = ref MemoryMarshal.GetReference(sourceBuffer); + + for (int x = 0; x < sourceBuffer.Length; x++) + { + ref int sampleColumnBase = ref state.GetSampleColumn(x); + ref Vector4 target = ref Unsafe.Add(ref targetBase, x); + + for (int kX = 0; kX < kernel.Columns; kX++) + { + int offsetX = Unsafe.Add(ref sampleColumnBase, kX) - boundsX; + Vector4 sample = Unsafe.Add(ref sourceBase, offsetX); + target += kernel[kY, kX] * sample; + } + } + } + + // Now we need to copy the original alpha values from the source row. + sourceRow = this.sourcePixels.GetRowSpan(y).Slice(boundsX, boundsWidth); + PixelOperations.Instance.ToVector4(this.configuration, sourceRow, sourceBuffer); + + for (int x = 0; x < sourceRow.Length; x++) + { + ref Vector4 target = ref Unsafe.Add(ref targetBase, x); + target.W = Unsafe.Add(ref MemoryMarshal.GetReference(sourceBuffer), x).W; + } + + PixelOperations.Instance.FromVector4Destructive(this.configuration, targetBuffer, targetRowSpan); + } + + [MethodImpl(MethodImplOptions.AggressiveInlining)] + private void Convolve4(int y, Span span) + { + // Span is 2x bounds. + int boundsX = this.bounds.X; + int boundsWidth = this.bounds.Width; + Span sourceBuffer = span.Slice(0, this.bounds.Width); + Span targetBuffer = span.Slice(this.bounds.Width); + + ref Vector4 targetRowRef = ref MemoryMarshal.GetReference(span); + Span targetRowSpan = this.targetPixels.GetRowSpan(y).Slice(boundsX, boundsWidth); + + var state = new ConvolutionState(in this.kernelMatrix, this.map); + ReadOnlyKernel kernel = state.Kernel; + int row = y - this.bounds.Y; + ref int sampleRowBase = ref state.GetSampleRow(row); + + // Clear the target buffer for each row run. + targetBuffer.Clear(); + ref Vector4 targetBase = ref MemoryMarshal.GetReference(targetBuffer); + + for (int kY = 0; kY < kernel.Rows; kY++) + { + // Get the precalculated source sample row for this kernel row and copy to our buffer. + int offsetY = Unsafe.Add(ref sampleRowBase, kY); + Span sourceRow = this.sourcePixels.GetRowSpan(offsetY).Slice(boundsX, boundsWidth); + PixelOperations.Instance.ToVector4(this.configuration, sourceRow, sourceBuffer); + + Numerics.Premultiply(sourceBuffer); + ref Vector4 sourceBase = ref MemoryMarshal.GetReference(sourceBuffer); + + for (int x = 0; x < sourceBuffer.Length; x++) + { + ref int sampleColumnBase = ref state.GetSampleColumn(x); + ref Vector4 target = ref Unsafe.Add(ref targetBase, x); + + for (int kX = 0; kX < kernel.Columns; kX++) + { + int offsetX = Unsafe.Add(ref sampleColumnBase, kX) - boundsX; + Vector4 sample = Unsafe.Add(ref sourceBase, offsetX); + target += kernel[kY, kX] * sample; + } + } + } + + Numerics.UnPremultiply(targetBuffer); + + PixelOperations.Instance.FromVector4Destructive(this.configuration, targetBuffer, targetRowSpan); + } + } +} diff --git a/src/ImageSharp/Processing/Processors/Convolution/ConvolutionState.cs b/src/ImageSharp/Processing/Processors/Convolution/ConvolutionState.cs index 851eeec247..3f296c67df 100644 --- a/src/ImageSharp/Processing/Processors/Convolution/ConvolutionState.cs +++ b/src/ImageSharp/Processing/Processors/Convolution/ConvolutionState.cs @@ -35,11 +35,11 @@ namespace SixLabors.ImageSharp.Processing.Processors.Convolution } [MethodImpl(MethodImplOptions.AggressiveInlining)] - public readonly ref int GetSampleOffsetRow(int row) + public readonly ref int GetSampleRow(int row) => ref Unsafe.Add(ref MemoryMarshal.GetReference(this.rowOffsetMap), row * this.kernelHeight); [MethodImpl(MethodImplOptions.AggressiveInlining)] - public readonly ref int GetSampleOffsetColumn(int column) + public readonly ref int GetSampleColumn(int column) => ref Unsafe.Add(ref MemoryMarshal.GetReference(this.columnOffsetMap), column * this.kernelWidth); } } diff --git a/src/ImageSharp/Processing/Processors/Convolution/Convolver.cs b/src/ImageSharp/Processing/Processors/Convolution/Convolver.cs index c23b71b330..721f7bbad1 100644 --- a/src/ImageSharp/Processing/Processors/Convolution/Convolver.cs +++ b/src/ImageSharp/Processing/Processors/Convolution/Convolver.cs @@ -1,6 +1,7 @@ // Copyright (c) Six Labors. // Licensed under the Apache License, Version 2.0. +using System; using System.Numerics; using System.Runtime.CompilerServices; using System.Runtime.InteropServices; @@ -100,12 +101,12 @@ namespace SixLabors.ImageSharp Vector4 vectorY = default; Vector4 vectorX = default; - ref int sampleOffsetRowBase = ref state.GetSampleOffsetRow(row); + ref int sampleOffsetRowBase = ref state.GetSampleRow(row); for (int y = 0; y < kernelHeight; y++) { int offsetY = Unsafe.Add(ref sampleOffsetRowBase, y); ref TPixel sourceRowBase = ref MemoryMarshal.GetReference(sourcePixels.GetRowSpan(offsetY)); - ref int sampleOffsetColumnBase = ref state.GetSampleOffsetColumn(column); + ref int sampleOffsetColumnBase = ref state.GetSampleColumn(column); for (int x = 0; x < kernelWidth; x++) { @@ -188,6 +189,93 @@ namespace SixLabors.ImageSharp target = vector; } + /// + /// Computes the sum of vectors in the span referenced by weighted + /// by the kernel weight values. + /// Using this method the convolution filter is not applied to alpha in addition + /// to the color channels. + /// + /// The convolution kernel state. + /// The source row. + /// The target row. + /// The current kernel row. + /// The interest x-bounds relative to the interest image. + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public static void ConvolveRow3( + in ConvolutionState state, + Span sourceRow, + Span targetRow, + int kY, + int bX) + { + ReadOnlyKernel kernel = state.Kernel; + ref Vector4 sourceBase = ref MemoryMarshal.GetReference(sourceRow); + ref Vector4 targetBase = ref MemoryMarshal.GetReference(targetRow); + + Numerics.Premultiply(sourceRow); + + for (int x = 0; x < sourceRow.Length; x++) + { + Vector4 vector = default; + ref int sampleOffsetColumnBase = ref state.GetSampleColumn(x); + + for (int kX = 0; kX < kernel.Columns; kX++) + { + int offsetX = Unsafe.Add(ref sampleOffsetColumnBase, kX) - bX; + Vector4 sample = Unsafe.Add(ref sourceBase, offsetX); + vector += kernel[kY, kX] * sample; + } + + ref Vector4 target = ref Unsafe.Add(ref targetBase, x); + vector.W = target.W; + Numerics.UnPremultiply(ref vector); + target = vector; + } + } + + /// + /// Computes the sum of vectors in the span referenced by weighted + /// by the kernel weight values. + /// Using this method the convolution filter is applied to alpha in addition to the + /// color channels. + /// + /// The convolution kernel state. + /// The source row. + /// The target row. + /// The current kernel row. + /// The interest x-bounds relative to the interest image. + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public static void ConvolveRow4( + in ConvolutionState state, + Span sourceRow, + Span targetRow, + int kY, + int bX) + where TPixel : unmanaged, IPixel + { + ReadOnlyKernel kernel = state.Kernel; + + ref Vector4 sourceBase = ref MemoryMarshal.GetReference(sourceRow); + ref Vector4 targetBase = ref MemoryMarshal.GetReference(targetRow); + + Numerics.Premultiply(sourceRow); + + for (int x = 0; x < sourceRow.Length; x++) + { + ref int sampleOffsetColumnBase = ref state.GetSampleColumn(x); + ref Vector4 target = ref Unsafe.Add(ref targetBase, x); + + for (int kX = 0; kX < kernel.Columns; kX++) + { + int offsetX = Unsafe.Add(ref sampleOffsetColumnBase, kX) - bX; + Vector4 sample = Unsafe.Add(ref sourceBase, offsetX); + target += kernel[kY, kX] * sample; + } + } + + Numerics.UnPremultiply(targetRow); + } + [MethodImpl(MethodImplOptions.AggressiveInlining)] private static void ConvolveImpl( in ConvolutionState state, @@ -201,12 +289,12 @@ namespace SixLabors.ImageSharp int kernelHeight = kernel.Rows; int kernelWidth = kernel.Columns; - ref int sampleOffsetRowBase = ref state.GetSampleOffsetRow(row); + ref int sampleOffsetRowBase = ref state.GetSampleRow(row); for (int y = 0; y < kernelHeight; y++) { int offsetY = Unsafe.Add(ref sampleOffsetRowBase, y); ref TPixel sourceRowBase = ref MemoryMarshal.GetReference(sourcePixels.GetRowSpan(offsetY)); - ref int sampleOffsetColumnBase = ref state.GetSampleOffsetColumn(column); + ref int sampleOffsetColumnBase = ref state.GetSampleColumn(column); for (int x = 0; x < kernelWidth; x++) {