From d0306a2ab77558681be6609f83c82b71653858c7 Mon Sep 17 00:00:00 2001 From: James Jackson-South Date: Fri, 4 Dec 2020 14:54:50 +0000 Subject: [PATCH 01/24] First working no-clamp version for 2 pass convolution --- .gitattributes | 3 + .../Common/Helpers/DenseMatrixUtils.cs | 58 +++++------- .../Convolution2PassProcessor{TPixel}.cs | 86 +++++++++++------- .../ConvolutionProcessor{TPixel}.cs | 53 ++++++----- .../Convolution/Kernels/KernelOffsetMap.cs | 91 +++++++++++++++++++ 5 files changed, 198 insertions(+), 93 deletions(-) create mode 100644 src/ImageSharp/Processing/Processors/Convolution/Kernels/KernelOffsetMap.cs diff --git a/.gitattributes b/.gitattributes index c0bff6e189..7c648c0774 100644 --- a/.gitattributes +++ b/.gitattributes @@ -80,8 +80,11 @@ *.pvr binary *.snk binary *.tga binary +*.tif binary +*.tiff binary *.ttc binary *.ttf binary +*.wbmp binary *.webp binary *.woff binary *.woff2 binary diff --git a/src/ImageSharp/Common/Helpers/DenseMatrixUtils.cs b/src/ImageSharp/Common/Helpers/DenseMatrixUtils.cs index f265bdd517..cf7eb1162a 100644 --- a/src/ImageSharp/Common/Helpers/DenseMatrixUtils.cs +++ b/src/ImageSharp/Common/Helpers/DenseMatrixUtils.cs @@ -6,6 +6,7 @@ using System.Numerics; using System.Runtime.CompilerServices; using SixLabors.ImageSharp.Memory; using SixLabors.ImageSharp.PixelFormats; +using SixLabors.ImageSharp.Processing.Processors.Convolution; namespace SixLabors.ImageSharp { @@ -156,38 +157,32 @@ namespace SixLabors.ImageSharp /// /// The pixel format. /// The dense matrix. + /// The span containing precalculated kernel y-offsets. + /// The span containing precalculated kernel x-offsets. /// The source frame. /// The target row base reference. /// The current row. /// The current column. - /// The minimum working area row. - /// The maximum working area row. - /// The minimum working area column. - /// The maximum working area column. [MethodImpl(InliningOptions.ShortMethod)] public static void Convolve3( in DenseMatrix matrix, + Span yOffsetSpan, + Span xOffsetSpan, Buffer2D sourcePixels, ref Vector4 targetRowRef, int row, - int column, - int minRow, - int maxRow, - int minColumn, - int maxColumn) + int column) where TPixel : unmanaged, IPixel { Vector4 vector = default; ConvolveImpl( in matrix, + yOffsetSpan, + xOffsetSpan, sourcePixels, row, column, - minRow, - maxRow, - minColumn, - maxColumn, ref vector); ref Vector4 target = ref Unsafe.Add(ref targetRowRef, column); @@ -203,38 +198,32 @@ namespace SixLabors.ImageSharp /// /// The pixel format. /// The dense matrix. + /// The span containing precalculated kernel y-offsets. + /// The span containing precalculated kernel x-offsets. /// The source frame. /// The target row base reference. /// The current row. /// The current column. - /// The minimum working area row. - /// The maximum working area row. - /// The minimum working area column. - /// The maximum working area column. [MethodImpl(InliningOptions.ShortMethod)] public static void Convolve4( in DenseMatrix matrix, + Span yOffsetSpan, + Span xOffsetSpan, Buffer2D sourcePixels, ref Vector4 targetRowRef, int row, - int column, - int minRow, - int maxRow, - int minColumn, - int maxColumn) + int column) where TPixel : unmanaged, IPixel { Vector4 vector = default; ConvolveImpl( in matrix, + yOffsetSpan, + xOffsetSpan, sourcePixels, row, column, - minRow, - maxRow, - minColumn, - maxColumn, ref vector); ref Vector4 target = ref Unsafe.Add(ref targetRowRef, column); @@ -245,33 +234,28 @@ namespace SixLabors.ImageSharp [MethodImpl(InliningOptions.ShortMethod)] private static void ConvolveImpl( in DenseMatrix matrix, + Span yOffsetSpan, + Span xOffsetSpan, Buffer2D sourcePixels, int row, int column, - int minRow, - int maxRow, - int minColumn, - int maxColumn, - ref Vector4 vector) + ref Vector4 targetVector) where TPixel : unmanaged, IPixel { int matrixHeight = matrix.Rows; int matrixWidth = matrix.Columns; - int radiusY = matrixHeight >> 1; - int radiusX = matrixWidth >> 1; - int sourceOffsetColumnBase = column + minColumn; for (int y = 0; y < matrixHeight; y++) { - int offsetY = Numerics.Clamp(row + y - radiusY, minRow, maxRow); + int offsetY = yOffsetSpan[(row * matrixHeight) + y]; Span sourceRowSpan = sourcePixels.GetRowSpan(offsetY); for (int x = 0; x < matrixWidth; x++) { - int offsetX = Numerics.Clamp(sourceOffsetColumnBase + x - radiusX, minColumn, maxColumn); + int offsetX = xOffsetSpan[(column * matrixWidth) + x]; var currentColor = sourceRowSpan[offsetX].ToVector4(); Numerics.Premultiply(ref currentColor); - vector += matrix[y, x] * currentColor; + targetVector += matrix[y, x] * currentColor; } } } diff --git a/src/ImageSharp/Processing/Processors/Convolution/Convolution2PassProcessor{TPixel}.cs b/src/ImageSharp/Processing/Processors/Convolution/Convolution2PassProcessor{TPixel}.cs index b61690415a..bc17378c88 100644 --- a/src/ImageSharp/Processing/Processors/Convolution/Convolution2PassProcessor{TPixel}.cs +++ b/src/ImageSharp/Processing/Processors/Convolution/Convolution2PassProcessor{TPixel}.cs @@ -63,19 +63,45 @@ namespace SixLabors.ImageSharp.Processing.Processors.Convolution var interest = Rectangle.Intersect(this.SourceRectangle, source.Bounds()); - // Horizontal convolution - var horizontalOperation = new RowOperation(interest, firstPassPixels, source.PixelBuffer, this.KernelX, this.Configuration, this.PreserveAlpha); - ParallelRowIterator.IterateRows( - this.Configuration, - interest, - in horizontalOperation); - - // Vertical convolution - var verticalOperation = new RowOperation(interest, source.PixelBuffer, firstPassPixels, this.KernelY, this.Configuration, this.PreserveAlpha); - ParallelRowIterator.IterateRows( - this.Configuration, - interest, - in verticalOperation); + using (var mapX = new KernelOffsetMap(this.Configuration.MemoryAllocator)) + { + mapX.BuildOffsetMap(this.KernelX, interest); + + // Horizontal convolution + var horizontalOperation = new RowOperation( + interest, + firstPassPixels, + source.PixelBuffer, + mapX, + this.KernelX, + this.Configuration, + this.PreserveAlpha); + + ParallelRowIterator.IterateRows( + this.Configuration, + interest, + in horizontalOperation); + } + + using (var mapY = new KernelOffsetMap(this.Configuration.MemoryAllocator)) + { + mapY.BuildOffsetMap(this.KernelY, interest); + + // Vertical convolution + var verticalOperation = new RowOperation( + interest, + source.PixelBuffer, + firstPassPixels, + mapY, + this.KernelY, + this.Configuration, + this.PreserveAlpha); + + ParallelRowIterator.IterateRows( + this.Configuration, + interest, + in verticalOperation); + } } /// @@ -86,6 +112,7 @@ namespace SixLabors.ImageSharp.Processing.Processors.Convolution private readonly Rectangle bounds; private readonly Buffer2D targetPixels; private readonly Buffer2D sourcePixels; + private readonly KernelOffsetMap map; private readonly DenseMatrix kernel; private readonly Configuration configuration; private readonly bool preserveAlpha; @@ -95,6 +122,7 @@ namespace SixLabors.ImageSharp.Processing.Processors.Convolution Rectangle bounds, Buffer2D targetPixels, Buffer2D sourcePixels, + KernelOffsetMap map, DenseMatrix kernel, Configuration configuration, bool preserveAlpha) @@ -102,6 +130,7 @@ namespace SixLabors.ImageSharp.Processing.Processors.Convolution this.bounds = bounds; this.targetPixels = targetPixels; this.sourcePixels = sourcePixels; + this.map = map; this.kernel = kernel; this.configuration = configuration; this.preserveAlpha = preserveAlpha; @@ -112,43 +141,38 @@ namespace SixLabors.ImageSharp.Processing.Processors.Convolution public void Invoke(int y, Span span) { ref Vector4 spanRef = ref MemoryMarshal.GetReference(span); - - int maxY = this.bounds.Bottom - 1; - int maxX = this.bounds.Right - 1; - Span targetRowSpan = this.targetPixels.GetRowSpan(y).Slice(this.bounds.X); PixelOperations.Instance.ToVector4(this.configuration, targetRowSpan.Slice(0, span.Length), span); + Span yOffsets = this.map.GetYOffsetSpan(); + Span xOffsets = this.map.GetXOffsetSpan(); + int row = y - this.bounds.Y; if (this.preserveAlpha) { - for (int x = 0; x < this.bounds.Width; x++) + for (int column = 0; column < this.bounds.Width; column++) { DenseMatrixUtils.Convolve3( in this.kernel, + yOffsets, + xOffsets, this.sourcePixels, ref spanRef, - y, - x, - this.bounds.Y, - maxY, - this.bounds.X, - maxX); + row, + column); } } else { - for (int x = 0; x < this.bounds.Width; x++) + for (int column = 0; column < this.bounds.Width; column++) { DenseMatrixUtils.Convolve4( in this.kernel, + yOffsets, + xOffsets, this.sourcePixels, ref spanRef, - y, - x, - this.bounds.Y, - maxY, - this.bounds.X, - maxX); + row, + column); } } diff --git a/src/ImageSharp/Processing/Processors/Convolution/ConvolutionProcessor{TPixel}.cs b/src/ImageSharp/Processing/Processors/Convolution/ConvolutionProcessor{TPixel}.cs index 95fef15f62..b2c5de396f 100644 --- a/src/ImageSharp/Processing/Processors/Convolution/ConvolutionProcessor{TPixel}.cs +++ b/src/ImageSharp/Processing/Processors/Convolution/ConvolutionProcessor{TPixel}.cs @@ -51,16 +51,22 @@ namespace SixLabors.ImageSharp.Processing.Processors.Convolution /// protected override void OnFrameApply(ImageFrame source) { - using Buffer2D targetPixels = this.Configuration.MemoryAllocator.Allocate2D(source.Size()); + MemoryAllocator allocator = this.Configuration.MemoryAllocator; + using Buffer2D targetPixels = allocator.Allocate2D(source.Size()); source.CopyTo(targetPixels); var interest = Rectangle.Intersect(this.SourceRectangle, source.Bounds()); - var operation = new RowOperation(interest, targetPixels, source.PixelBuffer, this.KernelXY, this.Configuration, this.PreserveAlpha); - ParallelRowIterator.IterateRows( - this.Configuration, - interest, - in operation); + using (var map = new KernelOffsetMap(allocator)) + { + map.BuildOffsetMap(this.KernelXY, interest); + + var operation = new RowOperation(interest, targetPixels, source.PixelBuffer, map, this.KernelXY, this.Configuration, this.PreserveAlpha); + ParallelRowIterator.IterateRows( + this.Configuration, + interest, + in operation); + } Buffer2D.SwapOrCopyContent(source.PixelBuffer, targetPixels); } @@ -71,10 +77,9 @@ namespace SixLabors.ImageSharp.Processing.Processors.Convolution private readonly struct RowOperation : IRowOperation { private readonly Rectangle bounds; - private readonly int maxY; - private readonly int maxX; private readonly Buffer2D targetPixels; private readonly Buffer2D sourcePixels; + private readonly KernelOffsetMap map; private readonly DenseMatrix kernel; private readonly Configuration configuration; private readonly bool preserveAlpha; @@ -84,15 +89,15 @@ namespace SixLabors.ImageSharp.Processing.Processors.Convolution Rectangle bounds, Buffer2D targetPixels, Buffer2D sourcePixels, + KernelOffsetMap map, DenseMatrix kernel, Configuration configuration, bool preserveAlpha) { this.bounds = bounds; - this.maxY = this.bounds.Bottom - 1; - this.maxX = this.bounds.Right - 1; this.targetPixels = targetPixels; this.sourcePixels = sourcePixels; + this.map = map; this.kernel = kernel; this.configuration = configuration; this.preserveAlpha = preserveAlpha; @@ -103,40 +108,38 @@ namespace SixLabors.ImageSharp.Processing.Processors.Convolution public void Invoke(int y, Span span) { ref Vector4 spanRef = ref MemoryMarshal.GetReference(span); - Span targetRowSpan = this.targetPixels.GetRowSpan(y).Slice(this.bounds.X); PixelOperations.Instance.ToVector4(this.configuration, targetRowSpan.Slice(0, span.Length), span); + Span yOffsetSpan = this.map.GetYOffsetSpan(); + Span xOffsetSpan = this.map.GetXOffsetSpan(); + int row = y - this.bounds.Y; if (this.preserveAlpha) { - for (int x = 0; x < this.bounds.Width; x++) + for (int column = 0; column < this.bounds.Width; column++) { DenseMatrixUtils.Convolve3( in this.kernel, + yOffsetSpan, + xOffsetSpan, this.sourcePixels, ref spanRef, - y, - x, - this.bounds.Y, - this.maxY, - this.bounds.X, - this.maxX); + row, + column); } } else { - for (int x = 0; x < this.bounds.Width; x++) + for (int column = 0; column < this.bounds.Width; column++) { DenseMatrixUtils.Convolve4( in this.kernel, + yOffsetSpan, + xOffsetSpan, this.sourcePixels, ref spanRef, - y, - x, - this.bounds.Y, - this.maxY, - this.bounds.X, - this.maxX); + row, + column); } } diff --git a/src/ImageSharp/Processing/Processors/Convolution/Kernels/KernelOffsetMap.cs b/src/ImageSharp/Processing/Processors/Convolution/Kernels/KernelOffsetMap.cs new file mode 100644 index 0000000000..c1adf357ca --- /dev/null +++ b/src/ImageSharp/Processing/Processors/Convolution/Kernels/KernelOffsetMap.cs @@ -0,0 +1,91 @@ +// Copyright (c) Six Labors. +// Licensed under the Apache License, Version 2.0. + +using System; +using System.Buffers; +using System.Runtime.CompilerServices; +using SixLabors.ImageSharp.Memory; + +namespace SixLabors.ImageSharp.Processing.Processors.Convolution +{ + /// + /// Provides a map of the convolution kernel sampling offsets. + /// + internal sealed class KernelOffsetMap : IDisposable + { + private readonly MemoryAllocator allocator; + private bool isDisposed; + private IMemoryOwner yOffsets; + private IMemoryOwner xOffsets; + + /// + /// Initializes a new instance of the class. + /// + /// The memory allocator. + public KernelOffsetMap(MemoryAllocator allocator) => this.allocator = allocator; + + public void BuildOffsetMap(in DenseMatrix matrix, Rectangle bounds) + { + int matrixHeight = matrix.Rows; + int matrixWidth = matrix.Columns; + this.yOffsets = this.allocator.Allocate(bounds.Height * matrixHeight); + this.xOffsets = this.allocator.Allocate(bounds.Width * matrixWidth); + + int minY = bounds.Y; + int maxY = bounds.Bottom - 1; + int minX = bounds.X; + int maxX = bounds.Right - 1; + + int radiusY = matrixHeight >> 1; + int radiusX = matrixWidth >> 1; + + // Calculate the potential sampling y-offsets. + Span ySpan = this.yOffsets.GetSpan(); + for (int row = 0; row < bounds.Height; row++) + { + for (int y = 0; y < matrixHeight; y++) + { + ySpan[(row * matrixHeight) + y] = row + y + minY - radiusY; + } + } + + if (matrixHeight > 1) + { + Numerics.Clamp(ySpan, minY, maxY); + } + + // Calculate the potential sampling x-offsets. + Span xSpan = this.xOffsets.GetSpan(); + for (int column = 0; column < bounds.Width; column++) + { + for (int x = 0; x < matrixWidth; x++) + { + xSpan[(column * matrixWidth) + x] = column + x + minX - radiusX; + } + } + + if (matrixWidth > 1) + { + Numerics.Clamp(xSpan, minX, maxX); + } + } + + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public Span GetYOffsetSpan() => this.yOffsets.GetSpan(); + + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public Span GetXOffsetSpan() => this.xOffsets.GetSpan(); + + /// + public void Dispose() + { + if (!this.isDisposed) + { + this.yOffsets.Dispose(); + this.xOffsets.Dispose(); + + this.isDisposed = true; + } + } + } +} From 228e2771d6c3def2278712ae0039fc7f250b5303 Mon Sep 17 00:00:00 2001 From: James Jackson-South Date: Fri, 4 Dec 2020 15:05:15 +0000 Subject: [PATCH 02/24] Naming tweaks --- .../Convolution2PassProcessor{TPixel}.cs | 6 +++--- .../ConvolutionProcessor{TPixel}.cs | 18 +++++++++--------- 2 files changed, 12 insertions(+), 12 deletions(-) diff --git a/src/ImageSharp/Processing/Processors/Convolution/Convolution2PassProcessor{TPixel}.cs b/src/ImageSharp/Processing/Processors/Convolution/Convolution2PassProcessor{TPixel}.cs index bc17378c88..63fbca98a9 100644 --- a/src/ImageSharp/Processing/Processors/Convolution/Convolution2PassProcessor{TPixel}.cs +++ b/src/ImageSharp/Processing/Processors/Convolution/Convolution2PassProcessor{TPixel}.cs @@ -140,7 +140,7 @@ namespace SixLabors.ImageSharp.Processing.Processors.Convolution [MethodImpl(InliningOptions.ShortMethod)] public void Invoke(int y, Span span) { - ref Vector4 spanRef = ref MemoryMarshal.GetReference(span); + ref Vector4 targetRef = ref MemoryMarshal.GetReference(span); Span targetRowSpan = this.targetPixels.GetRowSpan(y).Slice(this.bounds.X); PixelOperations.Instance.ToVector4(this.configuration, targetRowSpan.Slice(0, span.Length), span); Span yOffsets = this.map.GetYOffsetSpan(); @@ -156,7 +156,7 @@ namespace SixLabors.ImageSharp.Processing.Processors.Convolution yOffsets, xOffsets, this.sourcePixels, - ref spanRef, + ref targetRef, row, column); } @@ -170,7 +170,7 @@ namespace SixLabors.ImageSharp.Processing.Processors.Convolution yOffsets, xOffsets, this.sourcePixels, - ref spanRef, + ref targetRef, row, column); } diff --git a/src/ImageSharp/Processing/Processors/Convolution/ConvolutionProcessor{TPixel}.cs b/src/ImageSharp/Processing/Processors/Convolution/ConvolutionProcessor{TPixel}.cs index b2c5de396f..ae2e8893f7 100644 --- a/src/ImageSharp/Processing/Processors/Convolution/ConvolutionProcessor{TPixel}.cs +++ b/src/ImageSharp/Processing/Processors/Convolution/ConvolutionProcessor{TPixel}.cs @@ -107,11 +107,11 @@ namespace SixLabors.ImageSharp.Processing.Processors.Convolution [MethodImpl(InliningOptions.ShortMethod)] public void Invoke(int y, Span span) { - ref Vector4 spanRef = ref MemoryMarshal.GetReference(span); + ref Vector4 targetRef = ref MemoryMarshal.GetReference(span); Span targetRowSpan = this.targetPixels.GetRowSpan(y).Slice(this.bounds.X); PixelOperations.Instance.ToVector4(this.configuration, targetRowSpan.Slice(0, span.Length), span); - Span yOffsetSpan = this.map.GetYOffsetSpan(); - Span xOffsetSpan = this.map.GetXOffsetSpan(); + Span yOffsets = this.map.GetYOffsetSpan(); + Span xOffsets = this.map.GetXOffsetSpan(); int row = y - this.bounds.Y; if (this.preserveAlpha) @@ -120,10 +120,10 @@ namespace SixLabors.ImageSharp.Processing.Processors.Convolution { DenseMatrixUtils.Convolve3( in this.kernel, - yOffsetSpan, - xOffsetSpan, + yOffsets, + xOffsets, this.sourcePixels, - ref spanRef, + ref targetRef, row, column); } @@ -134,10 +134,10 @@ namespace SixLabors.ImageSharp.Processing.Processors.Convolution { DenseMatrixUtils.Convolve4( in this.kernel, - yOffsetSpan, - xOffsetSpan, + yOffsets, + xOffsets, this.sourcePixels, - ref spanRef, + ref targetRef, row, column); } From 6455df3c4d15fab5169451b3571953609e300980 Mon Sep 17 00:00:00 2001 From: James Jackson-South Date: Fri, 4 Dec 2020 16:30:37 +0000 Subject: [PATCH 03/24] All tests pass --- .../Convolution2DProcessor{TPixel}.cs | 71 ++++--- .../Convolution2PassProcessor{TPixel}.cs | 22 +-- .../ConvolutionProcessor{TPixel}.cs | 18 +- .../Processors/Convolution/Convolver.cs} | 175 ++++++++---------- ...ernelOffsetMap.cs => KernelSamplingMap.cs} | 37 ++-- 5 files changed, 163 insertions(+), 160 deletions(-) rename src/ImageSharp/{Common/Helpers/DenseMatrixUtils.cs => Processing/Processors/Convolution/Convolver.cs} (57%) rename src/ImageSharp/Processing/Processors/Convolution/Kernels/{KernelOffsetMap.cs => KernelSamplingMap.cs} (66%) diff --git a/src/ImageSharp/Processing/Processors/Convolution/Convolution2DProcessor{TPixel}.cs b/src/ImageSharp/Processing/Processors/Convolution/Convolution2DProcessor{TPixel}.cs index 3a5f35cd14..8f1d373556 100644 --- a/src/ImageSharp/Processing/Processors/Convolution/Convolution2DProcessor{TPixel}.cs +++ b/src/ImageSharp/Processing/Processors/Convolution/Convolution2DProcessor{TPixel}.cs @@ -60,17 +60,32 @@ namespace SixLabors.ImageSharp.Processing.Processors.Convolution /// protected override void OnFrameApply(ImageFrame source) { - using Buffer2D targetPixels = this.Configuration.MemoryAllocator.Allocate2D(source.Width, source.Height); + MemoryAllocator allocator = this.Configuration.MemoryAllocator; + using Buffer2D targetPixels = allocator.Allocate2D(source.Width, source.Height); source.CopyTo(targetPixels); var interest = Rectangle.Intersect(this.SourceRectangle, source.Bounds()); - var operation = new RowOperation(interest, targetPixels, source.PixelBuffer, this.KernelY, this.KernelX, this.Configuration, this.PreserveAlpha); + using (var map = new KernelSamplingMap(allocator)) + { + // Since the kernel sizes are identical we can use a single map. + map.BuildSamplingOffsetMap(this.KernelY, interest); + + var operation = new RowOperation( + interest, + targetPixels, + source.PixelBuffer, + map, + this.KernelY, + this.KernelX, + this.Configuration, + this.PreserveAlpha); - ParallelRowIterator.IterateRows( - this.Configuration, - interest, - in operation); + ParallelRowIterator.IterateRows( + this.Configuration, + interest, + in operation); + } Buffer2D.SwapOrCopyContent(source.PixelBuffer, targetPixels); } @@ -81,10 +96,9 @@ namespace SixLabors.ImageSharp.Processing.Processors.Convolution private readonly struct RowOperation : IRowOperation { private readonly Rectangle bounds; - private readonly int maxY; - private readonly int maxX; private readonly Buffer2D targetPixels; private readonly Buffer2D sourcePixels; + private readonly KernelSamplingMap map; private readonly DenseMatrix kernelY; private readonly DenseMatrix kernelX; private readonly Configuration configuration; @@ -95,16 +109,16 @@ namespace SixLabors.ImageSharp.Processing.Processors.Convolution Rectangle bounds, Buffer2D targetPixels, Buffer2D sourcePixels, + KernelSamplingMap map, DenseMatrix kernelY, DenseMatrix kernelX, Configuration configuration, bool preserveAlpha) { this.bounds = bounds; - this.maxY = this.bounds.Bottom - 1; - this.maxX = this.bounds.Right - 1; this.targetPixels = targetPixels; this.sourcePixels = sourcePixels; + this.map = map; this.kernelY = kernelY; this.kernelX = kernelX; this.configuration = configuration; @@ -115,42 +129,41 @@ namespace SixLabors.ImageSharp.Processing.Processors.Convolution [MethodImpl(InliningOptions.ShortMethod)] public void Invoke(int y, Span span) { - ref Vector4 spanRef = ref MemoryMarshal.GetReference(span); + ref Vector4 targetRowRef = ref MemoryMarshal.GetReference(span); Span targetRowSpan = this.targetPixels.GetRowSpan(y).Slice(this.bounds.X); PixelOperations.Instance.ToVector4(this.configuration, targetRowSpan.Slice(0, span.Length), span); + Span yOffsets = this.map.GetYOffsetSpan(); + Span xOffsets = this.map.GetXOffsetSpan(); + int row = y - this.bounds.Y; if (this.preserveAlpha) { - for (int x = 0; x < this.bounds.Width; x++) + for (int column = 0; column < this.bounds.Width; column++) { - DenseMatrixUtils.Convolve2D3( + Convolver.Convolve2D3( in this.kernelY, in this.kernelX, + yOffsets, + xOffsets, this.sourcePixels, - ref spanRef, - y, - x, - this.bounds.Y, - this.maxY, - this.bounds.X, - this.maxX); + ref targetRowRef, + row, + column); } } else { - for (int x = 0; x < this.bounds.Width; x++) + for (int column = 0; column < this.bounds.Width; column++) { - DenseMatrixUtils.Convolve2D4( + Convolver.Convolve2D4( in this.kernelY, in this.kernelX, + yOffsets, + xOffsets, this.sourcePixels, - ref spanRef, - y, - x, - this.bounds.Y, - this.maxY, - this.bounds.X, - this.maxX); + ref targetRowRef, + row, + column); } } diff --git a/src/ImageSharp/Processing/Processors/Convolution/Convolution2PassProcessor{TPixel}.cs b/src/ImageSharp/Processing/Processors/Convolution/Convolution2PassProcessor{TPixel}.cs index 63fbca98a9..2ea062e281 100644 --- a/src/ImageSharp/Processing/Processors/Convolution/Convolution2PassProcessor{TPixel}.cs +++ b/src/ImageSharp/Processing/Processors/Convolution/Convolution2PassProcessor{TPixel}.cs @@ -63,9 +63,9 @@ namespace SixLabors.ImageSharp.Processing.Processors.Convolution var interest = Rectangle.Intersect(this.SourceRectangle, source.Bounds()); - using (var mapX = new KernelOffsetMap(this.Configuration.MemoryAllocator)) + using (var mapX = new KernelSamplingMap(this.Configuration.MemoryAllocator)) { - mapX.BuildOffsetMap(this.KernelX, interest); + mapX.BuildSamplingOffsetMap(this.KernelX, interest); // Horizontal convolution var horizontalOperation = new RowOperation( @@ -83,9 +83,9 @@ namespace SixLabors.ImageSharp.Processing.Processors.Convolution in horizontalOperation); } - using (var mapY = new KernelOffsetMap(this.Configuration.MemoryAllocator)) + using (var mapY = new KernelSamplingMap(this.Configuration.MemoryAllocator)) { - mapY.BuildOffsetMap(this.KernelY, interest); + mapY.BuildSamplingOffsetMap(this.KernelY, interest); // Vertical convolution var verticalOperation = new RowOperation( @@ -112,7 +112,7 @@ namespace SixLabors.ImageSharp.Processing.Processors.Convolution private readonly Rectangle bounds; private readonly Buffer2D targetPixels; private readonly Buffer2D sourcePixels; - private readonly KernelOffsetMap map; + private readonly KernelSamplingMap map; private readonly DenseMatrix kernel; private readonly Configuration configuration; private readonly bool preserveAlpha; @@ -122,7 +122,7 @@ namespace SixLabors.ImageSharp.Processing.Processors.Convolution Rectangle bounds, Buffer2D targetPixels, Buffer2D sourcePixels, - KernelOffsetMap map, + KernelSamplingMap map, DenseMatrix kernel, Configuration configuration, bool preserveAlpha) @@ -140,7 +140,7 @@ namespace SixLabors.ImageSharp.Processing.Processors.Convolution [MethodImpl(InliningOptions.ShortMethod)] public void Invoke(int y, Span span) { - ref Vector4 targetRef = ref MemoryMarshal.GetReference(span); + ref Vector4 targetRowRef = ref MemoryMarshal.GetReference(span); Span targetRowSpan = this.targetPixels.GetRowSpan(y).Slice(this.bounds.X); PixelOperations.Instance.ToVector4(this.configuration, targetRowSpan.Slice(0, span.Length), span); Span yOffsets = this.map.GetYOffsetSpan(); @@ -151,12 +151,12 @@ namespace SixLabors.ImageSharp.Processing.Processors.Convolution { for (int column = 0; column < this.bounds.Width; column++) { - DenseMatrixUtils.Convolve3( + Convolver.Convolve3( in this.kernel, yOffsets, xOffsets, this.sourcePixels, - ref targetRef, + ref targetRowRef, row, column); } @@ -165,12 +165,12 @@ namespace SixLabors.ImageSharp.Processing.Processors.Convolution { for (int column = 0; column < this.bounds.Width; column++) { - DenseMatrixUtils.Convolve4( + Convolver.Convolve4( in this.kernel, yOffsets, xOffsets, this.sourcePixels, - ref targetRef, + ref targetRowRef, row, column); } diff --git a/src/ImageSharp/Processing/Processors/Convolution/ConvolutionProcessor{TPixel}.cs b/src/ImageSharp/Processing/Processors/Convolution/ConvolutionProcessor{TPixel}.cs index ae2e8893f7..999fba22be 100644 --- a/src/ImageSharp/Processing/Processors/Convolution/ConvolutionProcessor{TPixel}.cs +++ b/src/ImageSharp/Processing/Processors/Convolution/ConvolutionProcessor{TPixel}.cs @@ -57,9 +57,9 @@ namespace SixLabors.ImageSharp.Processing.Processors.Convolution source.CopyTo(targetPixels); var interest = Rectangle.Intersect(this.SourceRectangle, source.Bounds()); - using (var map = new KernelOffsetMap(allocator)) + using (var map = new KernelSamplingMap(allocator)) { - map.BuildOffsetMap(this.KernelXY, interest); + map.BuildSamplingOffsetMap(this.KernelXY, interest); var operation = new RowOperation(interest, targetPixels, source.PixelBuffer, map, this.KernelXY, this.Configuration, this.PreserveAlpha); ParallelRowIterator.IterateRows( @@ -79,7 +79,7 @@ namespace SixLabors.ImageSharp.Processing.Processors.Convolution private readonly Rectangle bounds; private readonly Buffer2D targetPixels; private readonly Buffer2D sourcePixels; - private readonly KernelOffsetMap map; + private readonly KernelSamplingMap map; private readonly DenseMatrix kernel; private readonly Configuration configuration; private readonly bool preserveAlpha; @@ -89,7 +89,7 @@ namespace SixLabors.ImageSharp.Processing.Processors.Convolution Rectangle bounds, Buffer2D targetPixels, Buffer2D sourcePixels, - KernelOffsetMap map, + KernelSamplingMap map, DenseMatrix kernel, Configuration configuration, bool preserveAlpha) @@ -107,7 +107,7 @@ namespace SixLabors.ImageSharp.Processing.Processors.Convolution [MethodImpl(InliningOptions.ShortMethod)] public void Invoke(int y, Span span) { - ref Vector4 targetRef = ref MemoryMarshal.GetReference(span); + ref Vector4 targetRowRef = ref MemoryMarshal.GetReference(span); Span targetRowSpan = this.targetPixels.GetRowSpan(y).Slice(this.bounds.X); PixelOperations.Instance.ToVector4(this.configuration, targetRowSpan.Slice(0, span.Length), span); Span yOffsets = this.map.GetYOffsetSpan(); @@ -118,12 +118,12 @@ namespace SixLabors.ImageSharp.Processing.Processors.Convolution { for (int column = 0; column < this.bounds.Width; column++) { - DenseMatrixUtils.Convolve3( + Convolver.Convolve3( in this.kernel, yOffsets, xOffsets, this.sourcePixels, - ref targetRef, + ref targetRowRef, row, column); } @@ -132,12 +132,12 @@ namespace SixLabors.ImageSharp.Processing.Processors.Convolution { for (int column = 0; column < this.bounds.Width; column++) { - DenseMatrixUtils.Convolve4( + Convolver.Convolve4( in this.kernel, yOffsets, xOffsets, this.sourcePixels, - ref targetRef, + ref targetRowRef, row, column); } diff --git a/src/ImageSharp/Common/Helpers/DenseMatrixUtils.cs b/src/ImageSharp/Processing/Processors/Convolution/Convolver.cs similarity index 57% rename from src/ImageSharp/Common/Helpers/DenseMatrixUtils.cs rename to src/ImageSharp/Processing/Processors/Convolution/Convolver.cs index cf7eb1162a..c9e9d74148 100644 --- a/src/ImageSharp/Common/Helpers/DenseMatrixUtils.cs +++ b/src/ImageSharp/Processing/Processors/Convolution/Convolver.cs @@ -6,56 +6,50 @@ using System.Numerics; using System.Runtime.CompilerServices; using SixLabors.ImageSharp.Memory; using SixLabors.ImageSharp.PixelFormats; -using SixLabors.ImageSharp.Processing.Processors.Convolution; namespace SixLabors.ImageSharp { /// - /// Extension methods for . - /// TODO: One day rewrite all this to use SIMD intrinsics. There's a lot of scope for improvement. + /// Provides methods to perform convolution operations. /// - internal static class DenseMatrixUtils + internal static class Convolver { /// /// Computes the sum of vectors in the span referenced by weighted by the two kernel weight values. /// Using this method the convolution filter is not applied to alpha in addition to the color channels. /// /// The pixel format. - /// The vertical dense matrix. - /// The horizontal dense matrix. + /// The vertical convolution kernel. + /// The horizontal convolution kernel. + /// The span containing precalculated kernel y-sampling offsets. + /// The span containing precalculated kernel x-sampling offsets. /// The source frame. /// The target row base reference. /// The current row. /// The current column. - /// The minimum working area row. - /// The maximum working area row. - /// The minimum working area column. - /// The maximum working area column. [MethodImpl(InliningOptions.ShortMethod)] public static void Convolve2D3( - in DenseMatrix matrixY, - in DenseMatrix matrixX, + in DenseMatrix kernelY, + in DenseMatrix kernelX, + Span rowSampleOffsets, + Span columnSampleOffsets, Buffer2D sourcePixels, ref Vector4 targetRowRef, int row, - int column, - int minRow, - int maxRow, - int minColumn, - int maxColumn) + int column) where TPixel : unmanaged, IPixel { + Vector4 vector = default; + Convolve2DImpl( - in matrixY, - in matrixX, + in kernelY, + in kernelX, + rowSampleOffsets, + columnSampleOffsets, sourcePixels, row, column, - minRow, - maxRow, - minColumn, - maxColumn, - out Vector4 vector); + ref vector); ref Vector4 target = ref Unsafe.Add(ref targetRowRef, column); vector.W = target.W; @@ -69,41 +63,37 @@ namespace SixLabors.ImageSharp /// Using this method the convolution filter is applied to alpha in addition to the color channels. /// /// The pixel format. - /// The vertical dense matrix. - /// The horizontal dense matrix. + /// The vertical convolution kernel. + /// The horizontal convolution kernel. + /// The span containing precalculated kernel y-sampling offsets. + /// The span containing precalculated kernel x-sampling offsets. /// The source frame. /// The target row base reference. /// The current row. /// The current column. - /// The minimum working area row. - /// The maximum working area row. - /// The minimum working area column. - /// The maximum working area column. [MethodImpl(InliningOptions.ShortMethod)] public static void Convolve2D4( - in DenseMatrix matrixY, - in DenseMatrix matrixX, + in DenseMatrix kernelY, + in DenseMatrix kernelX, + Span rowSampleOffsets, + Span columnSampleOffsets, Buffer2D sourcePixels, ref Vector4 targetRowRef, int row, - int column, - int minRow, - int maxRow, - int minColumn, - int maxColumn) + int column) where TPixel : unmanaged, IPixel { + Vector4 vector = default; + Convolve2DImpl( - in matrixY, - in matrixX, + in kernelY, + in kernelX, + rowSampleOffsets, + columnSampleOffsets, sourcePixels, row, column, - minRow, - maxRow, - minColumn, - maxColumn, - out Vector4 vector); + ref vector); ref Vector4 target = ref Unsafe.Add(ref targetRowRef, column); Numerics.UnPremultiply(ref vector); @@ -112,43 +102,38 @@ namespace SixLabors.ImageSharp [MethodImpl(InliningOptions.ShortMethod)] public static void Convolve2DImpl( - in DenseMatrix matrixY, - in DenseMatrix matrixX, + in DenseMatrix kernelY, + in DenseMatrix kernelX, + Span rowSampleOffsets, + Span columnSampleOffsets, Buffer2D sourcePixels, int row, int column, - int minRow, - int maxRow, - int minColumn, - int maxColumn, - out Vector4 vector) + ref Vector4 targetVector) where TPixel : unmanaged, IPixel { Vector4 vectorY = default; Vector4 vectorX = default; - int matrixHeight = matrixY.Rows; - int matrixWidth = matrixY.Columns; - int radiusY = matrixHeight >> 1; - int radiusX = matrixWidth >> 1; - int sourceOffsetColumnBase = column + minColumn; + int kernelHeight = kernelY.Rows; + int kernelWidth = kernelY.Columns; - for (int y = 0; y < matrixHeight; y++) + for (int y = 0; y < kernelHeight; y++) { - int offsetY = Numerics.Clamp(row + y - radiusY, minRow, maxRow); + int offsetY = rowSampleOffsets[(row * kernelHeight) + y]; Span sourceRowSpan = sourcePixels.GetRowSpan(offsetY); - for (int x = 0; x < matrixWidth; x++) + for (int x = 0; x < kernelWidth; x++) { - int offsetX = Numerics.Clamp(sourceOffsetColumnBase + x - radiusX, minColumn, maxColumn); - var currentColor = sourceRowSpan[offsetX].ToVector4(); - Numerics.Premultiply(ref currentColor); + int offsetX = columnSampleOffsets[(column * kernelWidth) + x]; + var sample = sourceRowSpan[offsetX].ToVector4(); + Numerics.Premultiply(ref sample); - vectorX += matrixX[y, x] * currentColor; - vectorY += matrixY[y, x] * currentColor; + vectorX += kernelX[y, x] * sample; + vectorY += kernelY[y, x] * sample; } } - vector = Vector4.SquareRoot((vectorX * vectorX) + (vectorY * vectorY)); + targetVector = Vector4.SquareRoot((vectorX * vectorX) + (vectorY * vectorY)); } /// @@ -156,18 +141,18 @@ namespace SixLabors.ImageSharp /// Using this method the convolution filter is not applied to alpha in addition to the color channels. /// /// The pixel format. - /// The dense matrix. - /// The span containing precalculated kernel y-offsets. - /// The span containing precalculated kernel x-offsets. + /// The convolution kernel. + /// The span containing precalculated kernel y-sampling offsets. + /// The span containing precalculated kernel x-sampling offsets. /// The source frame. /// The target row base reference. /// The current row. /// The current column. [MethodImpl(InliningOptions.ShortMethod)] public static void Convolve3( - in DenseMatrix matrix, - Span yOffsetSpan, - Span xOffsetSpan, + in DenseMatrix kernel, + Span rowSampleOffsets, + Span columnSampleOffsets, Buffer2D sourcePixels, ref Vector4 targetRowRef, int row, @@ -177,9 +162,9 @@ namespace SixLabors.ImageSharp Vector4 vector = default; ConvolveImpl( - in matrix, - yOffsetSpan, - xOffsetSpan, + in kernel, + rowSampleOffsets, + columnSampleOffsets, sourcePixels, row, column, @@ -197,18 +182,18 @@ namespace SixLabors.ImageSharp /// Using this method the convolution filter is applied to alpha in addition to the color channels. /// /// The pixel format. - /// The dense matrix. - /// The span containing precalculated kernel y-offsets. - /// The span containing precalculated kernel x-offsets. + /// The convolution kernel. + /// The span containing precalculated kernel y-offsets. + /// The span containing precalculated kernel x-offsets. /// The source frame. /// The target row base reference. /// The current row. /// The current column. [MethodImpl(InliningOptions.ShortMethod)] public static void Convolve4( - in DenseMatrix matrix, - Span yOffsetSpan, - Span xOffsetSpan, + in DenseMatrix kernel, + Span rowSampleOffsets, + Span columnSampleOffsets, Buffer2D sourcePixels, ref Vector4 targetRowRef, int row, @@ -218,9 +203,9 @@ namespace SixLabors.ImageSharp Vector4 vector = default; ConvolveImpl( - in matrix, - yOffsetSpan, - xOffsetSpan, + in kernel, + rowSampleOffsets, + columnSampleOffsets, sourcePixels, row, column, @@ -233,29 +218,29 @@ namespace SixLabors.ImageSharp [MethodImpl(InliningOptions.ShortMethod)] private static void ConvolveImpl( - in DenseMatrix matrix, - Span yOffsetSpan, - Span xOffsetSpan, + in DenseMatrix kernel, + Span rowSampleOffsets, + Span columnSampleOffsets, Buffer2D sourcePixels, int row, int column, ref Vector4 targetVector) where TPixel : unmanaged, IPixel { - int matrixHeight = matrix.Rows; - int matrixWidth = matrix.Columns; + int kernelHeight = kernel.Rows; + int kernelWidth = kernel.Columns; - for (int y = 0; y < matrixHeight; y++) + for (int y = 0; y < kernelHeight; y++) { - int offsetY = yOffsetSpan[(row * matrixHeight) + y]; + int offsetY = rowSampleOffsets[(row * kernelHeight) + y]; Span sourceRowSpan = sourcePixels.GetRowSpan(offsetY); - for (int x = 0; x < matrixWidth; x++) + for (int x = 0; x < kernelWidth; x++) { - int offsetX = xOffsetSpan[(column * matrixWidth) + x]; - var currentColor = sourceRowSpan[offsetX].ToVector4(); - Numerics.Premultiply(ref currentColor); - targetVector += matrix[y, x] * currentColor; + int offsetX = columnSampleOffsets[(column * kernelWidth) + x]; + var sample = sourceRowSpan[offsetX].ToVector4(); + Numerics.Premultiply(ref sample); + targetVector += kernel[y, x] * sample; } } } diff --git a/src/ImageSharp/Processing/Processors/Convolution/Kernels/KernelOffsetMap.cs b/src/ImageSharp/Processing/Processors/Convolution/Kernels/KernelSamplingMap.cs similarity index 66% rename from src/ImageSharp/Processing/Processors/Convolution/Kernels/KernelOffsetMap.cs rename to src/ImageSharp/Processing/Processors/Convolution/Kernels/KernelSamplingMap.cs index c1adf357ca..493c0d0fd2 100644 --- a/src/ImageSharp/Processing/Processors/Convolution/Kernels/KernelOffsetMap.cs +++ b/src/ImageSharp/Processing/Processors/Convolution/Kernels/KernelSamplingMap.cs @@ -11,7 +11,7 @@ namespace SixLabors.ImageSharp.Processing.Processors.Convolution /// /// Provides a map of the convolution kernel sampling offsets. /// - internal sealed class KernelOffsetMap : IDisposable + internal sealed class KernelSamplingMap : IDisposable { private readonly MemoryAllocator allocator; private bool isDisposed; @@ -19,37 +19,42 @@ namespace SixLabors.ImageSharp.Processing.Processors.Convolution private IMemoryOwner xOffsets; /// - /// Initializes a new instance of the class. + /// Initializes a new instance of the class. /// /// The memory allocator. - public KernelOffsetMap(MemoryAllocator allocator) => this.allocator = allocator; + public KernelSamplingMap(MemoryAllocator allocator) => this.allocator = allocator; - public void BuildOffsetMap(in DenseMatrix matrix, Rectangle bounds) + /// + /// Builds a map of the sampling offsets for the kernel clamped by the given bounds. + /// + /// The convolution kernel. + /// The source bounds. + public void BuildSamplingOffsetMap(DenseMatrix kernel, Rectangle bounds) { - int matrixHeight = matrix.Rows; - int matrixWidth = matrix.Columns; - this.yOffsets = this.allocator.Allocate(bounds.Height * matrixHeight); - this.xOffsets = this.allocator.Allocate(bounds.Width * matrixWidth); + int kernelHeight = kernel.Rows; + int kernelWidth = kernel.Columns; + this.yOffsets = this.allocator.Allocate(bounds.Height * kernelHeight); + this.xOffsets = this.allocator.Allocate(bounds.Width * kernelWidth); int minY = bounds.Y; int maxY = bounds.Bottom - 1; int minX = bounds.X; int maxX = bounds.Right - 1; - int radiusY = matrixHeight >> 1; - int radiusX = matrixWidth >> 1; + int radiusY = kernelHeight >> 1; + int radiusX = kernelWidth >> 1; // Calculate the potential sampling y-offsets. Span ySpan = this.yOffsets.GetSpan(); for (int row = 0; row < bounds.Height; row++) { - for (int y = 0; y < matrixHeight; y++) + for (int y = 0; y < kernelHeight; y++) { - ySpan[(row * matrixHeight) + y] = row + y + minY - radiusY; + ySpan[(row * kernelHeight) + y] = row + y + minY - radiusY; } } - if (matrixHeight > 1) + if (kernelHeight > 1) { Numerics.Clamp(ySpan, minY, maxY); } @@ -58,13 +63,13 @@ namespace SixLabors.ImageSharp.Processing.Processors.Convolution Span xSpan = this.xOffsets.GetSpan(); for (int column = 0; column < bounds.Width; column++) { - for (int x = 0; x < matrixWidth; x++) + for (int x = 0; x < kernelWidth; x++) { - xSpan[(column * matrixWidth) + x] = column + x + minX - radiusX; + xSpan[(column * kernelWidth) + x] = column + x + minX - radiusX; } } - if (matrixWidth > 1) + if (kernelWidth > 1) { Numerics.Clamp(xSpan, minX, maxX); } From b273648420fd3ac44520e9aec140d0a7e154bf24 Mon Sep 17 00:00:00 2001 From: Anton Firszov Date: Sat, 5 Dec 2020 00:48:27 +0100 Subject: [PATCH 04/24] hack --- src/ImageSharp/ImageSharp.csproj | 3 ++- tests/ImageSharp.Tests/ImageSharp.Tests.csproj | 3 ++- 2 files changed, 4 insertions(+), 2 deletions(-) diff --git a/src/ImageSharp/ImageSharp.csproj b/src/ImageSharp/ImageSharp.csproj index 1d7fb2958b..d2d07af54c 100644 --- a/src/ImageSharp/ImageSharp.csproj +++ b/src/ImageSharp/ImageSharp.csproj @@ -12,7 +12,8 @@ $(RepositoryUrl) Image Resize Crop Gif Jpg Jpeg Bitmap Png Tga NetCore A new, fully featured, fully managed, cross-platform, 2D graphics API for .NET - netcoreapp3.1;netcoreapp2.1;netstandard2.1;netstandard2.0;netstandard1.3;net472 + + netcoreapp3.1 diff --git a/tests/ImageSharp.Tests/ImageSharp.Tests.csproj b/tests/ImageSharp.Tests/ImageSharp.Tests.csproj index 5426144401..ae920775a2 100644 --- a/tests/ImageSharp.Tests/ImageSharp.Tests.csproj +++ b/tests/ImageSharp.Tests/ImageSharp.Tests.csproj @@ -2,7 +2,8 @@ - netcoreapp3.1;netcoreapp2.1;net472 + + netcoreapp3.1 True SixLabors.ImageSharp.Tests AnyCPU;x64;x86 From ccdf9c26a132f4f374a85b9a936d6d47a94a955b Mon Sep 17 00:00:00 2001 From: James Jackson-South Date: Sat, 5 Dec 2020 01:30:47 +0000 Subject: [PATCH 05/24] Refactor 2D and cleanup --- src/ImageSharp/Primitives/DenseMatrix{T}.cs | 16 +-- .../Convolution2DProcessor{TPixel}.cs | 18 +-- .../Convolution/Convolution2DState.cs | 54 +++++++++ .../Convolution2PassProcessor{TPixel}.cs | 16 +-- .../ConvolutionProcessor{TPixel}.cs | 14 +-- .../Convolution/ConvolutionState.cs | 45 ++++++++ .../Processors/Convolution/Convolver.cs | 103 ++++++------------ .../{Kernels => }/KernelSamplingMap.cs | 4 +- .../Processors/Convolution/ReadOnlyKernel.cs | 63 +++++++++++ 9 files changed, 225 insertions(+), 108 deletions(-) create mode 100644 src/ImageSharp/Processing/Processors/Convolution/Convolution2DState.cs create mode 100644 src/ImageSharp/Processing/Processors/Convolution/ConvolutionState.cs rename src/ImageSharp/Processing/Processors/Convolution/{Kernels => }/KernelSamplingMap.cs (95%) create mode 100644 src/ImageSharp/Processing/Processors/Convolution/ReadOnlyKernel.cs diff --git a/src/ImageSharp/Primitives/DenseMatrix{T}.cs b/src/ImageSharp/Primitives/DenseMatrix{T}.cs index e312703368..60dadb617b 100644 --- a/src/ImageSharp/Primitives/DenseMatrix{T}.cs +++ b/src/ImageSharp/Primitives/DenseMatrix{T}.cs @@ -109,7 +109,7 @@ namespace SixLabors.ImageSharp /// The at the specified position. public ref T this[int row, int column] { - [MethodImpl(InliningOptions.ShortMethod)] + [MethodImpl(MethodImplOptions.AggressiveInlining)] get { this.CheckCoordinates(row, column); @@ -124,7 +124,7 @@ namespace SixLabors.ImageSharp /// /// The representation on the source data. /// - [MethodImpl(InliningOptions.ShortMethod)] + [MethodImpl(MethodImplOptions.AggressiveInlining)] public static implicit operator DenseMatrix(T[,] data) => new DenseMatrix(data); /// @@ -134,7 +134,7 @@ namespace SixLabors.ImageSharp /// /// The representation on the source data. /// - [MethodImpl(InliningOptions.ShortMethod)] + [MethodImpl(MethodImplOptions.AggressiveInlining)] #pragma warning disable SA1008 // Opening parenthesis should be spaced correctly public static implicit operator T[,](in DenseMatrix data) #pragma warning restore SA1008 // Opening parenthesis should be spaced correctly @@ -175,7 +175,7 @@ namespace SixLabors.ImageSharp /// Transposes the rows and columns of the dense matrix. /// /// The . - [MethodImpl(InliningOptions.ShortMethod)] + [MethodImpl(MethodImplOptions.AggressiveInlining)] public DenseMatrix Transpose() { var result = new DenseMatrix(this.Rows, this.Columns); @@ -196,13 +196,13 @@ namespace SixLabors.ImageSharp /// Fills the matrix with the given value /// /// The value to fill each item with - [MethodImpl(InliningOptions.ShortMethod)] + [MethodImpl(MethodImplOptions.AggressiveInlining)] public void Fill(T value) => this.Span.Fill(value); /// /// Clears the matrix setting each value to the default value for the element type /// - [MethodImpl(InliningOptions.ShortMethod)] + [MethodImpl(MethodImplOptions.AggressiveInlining)] public void Clear() => this.Span.Clear(); /// @@ -232,14 +232,14 @@ namespace SixLabors.ImageSharp => obj is DenseMatrix other && this.Equals(other); /// - [MethodImpl(InliningOptions.ShortMethod)] + [MethodImpl(MethodImplOptions.AggressiveInlining)] public bool Equals(DenseMatrix other) => this.Columns == other.Columns && this.Rows == other.Rows && this.Span.SequenceEqual(other.Span); /// - [MethodImpl(InliningOptions.ShortMethod)] + [MethodImpl(MethodImplOptions.AggressiveInlining)] public override int GetHashCode() { HashCode code = default; diff --git a/src/ImageSharp/Processing/Processors/Convolution/Convolution2DProcessor{TPixel}.cs b/src/ImageSharp/Processing/Processors/Convolution/Convolution2DProcessor{TPixel}.cs index 8f1d373556..249c73e8d6 100644 --- a/src/ImageSharp/Processing/Processors/Convolution/Convolution2DProcessor{TPixel}.cs +++ b/src/ImageSharp/Processing/Processors/Convolution/Convolution2DProcessor{TPixel}.cs @@ -43,12 +43,12 @@ namespace SixLabors.ImageSharp.Processing.Processors.Convolution } /// - /// Gets the horizontal gradient operator. + /// Gets the horizontal convolution kernel. /// public DenseMatrix KernelX { get; } /// - /// Gets the vertical gradient operator. + /// Gets the vertical convolution kernel. /// public DenseMatrix KernelY { get; } @@ -132,8 +132,8 @@ namespace SixLabors.ImageSharp.Processing.Processors.Convolution ref Vector4 targetRowRef = ref MemoryMarshal.GetReference(span); Span targetRowSpan = this.targetPixels.GetRowSpan(y).Slice(this.bounds.X); PixelOperations.Instance.ToVector4(this.configuration, targetRowSpan.Slice(0, span.Length), span); - Span yOffsets = this.map.GetYOffsetSpan(); - Span xOffsets = this.map.GetXOffsetSpan(); + + var state = new Convolution2DState(this.kernelY, this.kernelX, this.map); int row = y - this.bounds.Y; if (this.preserveAlpha) @@ -141,10 +141,7 @@ namespace SixLabors.ImageSharp.Processing.Processors.Convolution for (int column = 0; column < this.bounds.Width; column++) { Convolver.Convolve2D3( - in this.kernelY, - in this.kernelX, - yOffsets, - xOffsets, + in state, this.sourcePixels, ref targetRowRef, row, @@ -156,10 +153,7 @@ namespace SixLabors.ImageSharp.Processing.Processors.Convolution for (int column = 0; column < this.bounds.Width; column++) { Convolver.Convolve2D4( - in this.kernelY, - in this.kernelX, - yOffsets, - xOffsets, + in state, this.sourcePixels, ref targetRowRef, row, diff --git a/src/ImageSharp/Processing/Processors/Convolution/Convolution2DState.cs b/src/ImageSharp/Processing/Processors/Convolution/Convolution2DState.cs new file mode 100644 index 0000000000..e36d458a4a --- /dev/null +++ b/src/ImageSharp/Processing/Processors/Convolution/Convolution2DState.cs @@ -0,0 +1,54 @@ +// Copyright (c) Six Labors. +// Licensed under the Apache License, Version 2.0. + +using System; +using System.Runtime.CompilerServices; +using System.Runtime.InteropServices; + +namespace SixLabors.ImageSharp.Processing.Processors.Convolution +{ + /// + /// A stack only struct used for reducing reference indirection during 2D convolution operations. + /// + internal readonly ref struct Convolution2DState + { + private readonly Span rowOffsetMap; + private readonly Span columnOffsetMap; + private readonly int kernelHeight; + private readonly int kernelWidth; + + public Convolution2DState( + in DenseMatrix kernelY, + in DenseMatrix kernelX, + KernelSamplingMap map) + { + // We check the kernels are the same size upstream. + this.KernelY = new ReadOnlyKernel(kernelY); + this.KernelX = new ReadOnlyKernel(kernelX); + this.kernelHeight = kernelY.Rows; + this.kernelWidth = kernelY.Columns; + this.rowOffsetMap = map.GetRowOffsetSpan(); + this.columnOffsetMap = map.GetColumnOffsetSpan(); + } + + public ReadOnlyKernel KernelY + { + [MethodImpl(MethodImplOptions.AggressiveInlining)] + get; + } + + public ReadOnlyKernel KernelX + { + [MethodImpl(MethodImplOptions.AggressiveInlining)] + get; + } + + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public int GetRowSampleOffset(int row, int kernelRow) + => Unsafe.Add(ref MemoryMarshal.GetReference(this.rowOffsetMap), (row * this.kernelHeight) + kernelRow); + + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public int GetColumnSampleOffset(int column, int kernelColumn) + => Unsafe.Add(ref MemoryMarshal.GetReference(this.columnOffsetMap), (column * this.kernelWidth) + kernelColumn); + } +} diff --git a/src/ImageSharp/Processing/Processors/Convolution/Convolution2PassProcessor{TPixel}.cs b/src/ImageSharp/Processing/Processors/Convolution/Convolution2PassProcessor{TPixel}.cs index 2ea062e281..95fd3b83cc 100644 --- a/src/ImageSharp/Processing/Processors/Convolution/Convolution2PassProcessor{TPixel}.cs +++ b/src/ImageSharp/Processing/Processors/Convolution/Convolution2PassProcessor{TPixel}.cs @@ -42,12 +42,12 @@ namespace SixLabors.ImageSharp.Processing.Processors.Convolution } /// - /// Gets the horizontal gradient operator. + /// Gets the horizontal convolution kernel. /// public DenseMatrix KernelX { get; } /// - /// Gets the vertical gradient operator. + /// Gets the vertical convolution kernel. /// public DenseMatrix KernelY { get; } @@ -143,8 +143,8 @@ namespace SixLabors.ImageSharp.Processing.Processors.Convolution ref Vector4 targetRowRef = ref MemoryMarshal.GetReference(span); Span targetRowSpan = this.targetPixels.GetRowSpan(y).Slice(this.bounds.X); PixelOperations.Instance.ToVector4(this.configuration, targetRowSpan.Slice(0, span.Length), span); - Span yOffsets = this.map.GetYOffsetSpan(); - Span xOffsets = this.map.GetXOffsetSpan(); + + var state = new ConvolutionState(this.kernel, this.map); int row = y - this.bounds.Y; if (this.preserveAlpha) @@ -152,9 +152,7 @@ namespace SixLabors.ImageSharp.Processing.Processors.Convolution for (int column = 0; column < this.bounds.Width; column++) { Convolver.Convolve3( - in this.kernel, - yOffsets, - xOffsets, + in state, this.sourcePixels, ref targetRowRef, row, @@ -166,9 +164,7 @@ namespace SixLabors.ImageSharp.Processing.Processors.Convolution for (int column = 0; column < this.bounds.Width; column++) { Convolver.Convolve4( - in this.kernel, - yOffsets, - xOffsets, + in state, this.sourcePixels, ref targetRowRef, row, diff --git a/src/ImageSharp/Processing/Processors/Convolution/ConvolutionProcessor{TPixel}.cs b/src/ImageSharp/Processing/Processors/Convolution/ConvolutionProcessor{TPixel}.cs index 999fba22be..191460f40b 100644 --- a/src/ImageSharp/Processing/Processors/Convolution/ConvolutionProcessor{TPixel}.cs +++ b/src/ImageSharp/Processing/Processors/Convolution/ConvolutionProcessor{TPixel}.cs @@ -39,7 +39,7 @@ namespace SixLabors.ImageSharp.Processing.Processors.Convolution } /// - /// Gets the 2d gradient operator. + /// Gets the 2d convolution kernel. /// public DenseMatrix KernelXY { get; } @@ -110,8 +110,8 @@ namespace SixLabors.ImageSharp.Processing.Processors.Convolution ref Vector4 targetRowRef = ref MemoryMarshal.GetReference(span); Span targetRowSpan = this.targetPixels.GetRowSpan(y).Slice(this.bounds.X); PixelOperations.Instance.ToVector4(this.configuration, targetRowSpan.Slice(0, span.Length), span); - Span yOffsets = this.map.GetYOffsetSpan(); - Span xOffsets = this.map.GetXOffsetSpan(); + + var state = new ConvolutionState(this.kernel, this.map); int row = y - this.bounds.Y; if (this.preserveAlpha) @@ -119,9 +119,7 @@ namespace SixLabors.ImageSharp.Processing.Processors.Convolution for (int column = 0; column < this.bounds.Width; column++) { Convolver.Convolve3( - in this.kernel, - yOffsets, - xOffsets, + in state, this.sourcePixels, ref targetRowRef, row, @@ -133,9 +131,7 @@ namespace SixLabors.ImageSharp.Processing.Processors.Convolution for (int column = 0; column < this.bounds.Width; column++) { Convolver.Convolve4( - in this.kernel, - yOffsets, - xOffsets, + in state, this.sourcePixels, ref targetRowRef, row, diff --git a/src/ImageSharp/Processing/Processors/Convolution/ConvolutionState.cs b/src/ImageSharp/Processing/Processors/Convolution/ConvolutionState.cs new file mode 100644 index 0000000000..97a3af342e --- /dev/null +++ b/src/ImageSharp/Processing/Processors/Convolution/ConvolutionState.cs @@ -0,0 +1,45 @@ +// Copyright (c) Six Labors. +// Licensed under the Apache License, Version 2.0. + +using System; +using System.Runtime.CompilerServices; +using System.Runtime.InteropServices; + +namespace SixLabors.ImageSharp.Processing.Processors.Convolution +{ + /// + /// A stack only struct used for reducing reference indirection during convolution operations. + /// + internal readonly ref struct ConvolutionState + { + private readonly Span rowOffsetMap; + private readonly Span columnOffsetMap; + private readonly int kernelHeight; + private readonly int kernelWidth; + + public ConvolutionState( + in DenseMatrix kernel, + KernelSamplingMap map) + { + this.Kernel = new ReadOnlyKernel(kernel); + this.kernelHeight = kernel.Rows; + this.kernelWidth = kernel.Columns; + this.rowOffsetMap = map.GetRowOffsetSpan(); + this.columnOffsetMap = map.GetColumnOffsetSpan(); + } + + public ReadOnlyKernel Kernel + { + [MethodImpl(MethodImplOptions.AggressiveInlining)] + get; + } + + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public int GetRowSampleOffset(int row, int kernelRow) + => Unsafe.Add(ref MemoryMarshal.GetReference(this.rowOffsetMap), (row * this.kernelHeight) + kernelRow); + + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public int GetColumnSampleOffset(int column, int kernelColumn) + => Unsafe.Add(ref MemoryMarshal.GetReference(this.columnOffsetMap), (column * this.kernelWidth) + kernelColumn); + } +} diff --git a/src/ImageSharp/Processing/Processors/Convolution/Convolver.cs b/src/ImageSharp/Processing/Processors/Convolution/Convolver.cs index c9e9d74148..5ddc8e85c6 100644 --- a/src/ImageSharp/Processing/Processors/Convolution/Convolver.cs +++ b/src/ImageSharp/Processing/Processors/Convolution/Convolver.cs @@ -1,11 +1,12 @@ // Copyright (c) Six Labors. // Licensed under the Apache License, Version 2.0. -using System; using System.Numerics; using System.Runtime.CompilerServices; +using System.Runtime.InteropServices; using SixLabors.ImageSharp.Memory; using SixLabors.ImageSharp.PixelFormats; +using SixLabors.ImageSharp.Processing.Processors.Convolution; namespace SixLabors.ImageSharp { @@ -19,20 +20,14 @@ namespace SixLabors.ImageSharp /// Using this method the convolution filter is not applied to alpha in addition to the color channels. /// /// The pixel format. - /// The vertical convolution kernel. - /// The horizontal convolution kernel. - /// The span containing precalculated kernel y-sampling offsets. - /// The span containing precalculated kernel x-sampling offsets. + /// The 2D convolution kernels state. /// The source frame. /// The target row base reference. /// The current row. /// The current column. - [MethodImpl(InliningOptions.ShortMethod)] + [MethodImpl(MethodImplOptions.AggressiveInlining)] public static void Convolve2D3( - in DenseMatrix kernelY, - in DenseMatrix kernelX, - Span rowSampleOffsets, - Span columnSampleOffsets, + in Convolution2DState state, Buffer2D sourcePixels, ref Vector4 targetRowRef, int row, @@ -42,10 +37,7 @@ namespace SixLabors.ImageSharp Vector4 vector = default; Convolve2DImpl( - in kernelY, - in kernelX, - rowSampleOffsets, - columnSampleOffsets, + in state, sourcePixels, row, column, @@ -63,20 +55,14 @@ namespace SixLabors.ImageSharp /// Using this method the convolution filter is applied to alpha in addition to the color channels. /// /// The pixel format. - /// The vertical convolution kernel. - /// The horizontal convolution kernel. - /// The span containing precalculated kernel y-sampling offsets. - /// The span containing precalculated kernel x-sampling offsets. + /// The 2D convolution kernels state. /// The source frame. /// The target row base reference. /// The current row. /// The current column. - [MethodImpl(InliningOptions.ShortMethod)] + [MethodImpl(MethodImplOptions.AggressiveInlining)] public static void Convolve2D4( - in DenseMatrix kernelY, - in DenseMatrix kernelX, - Span rowSampleOffsets, - Span columnSampleOffsets, + in Convolution2DState state, Buffer2D sourcePixels, ref Vector4 targetRowRef, int row, @@ -86,10 +72,7 @@ namespace SixLabors.ImageSharp Vector4 vector = default; Convolve2DImpl( - in kernelY, - in kernelX, - rowSampleOffsets, - columnSampleOffsets, + in state, sourcePixels, row, column, @@ -100,34 +83,33 @@ namespace SixLabors.ImageSharp target = vector; } - [MethodImpl(InliningOptions.ShortMethod)] + [MethodImpl(MethodImplOptions.AggressiveInlining)] public static void Convolve2DImpl( - in DenseMatrix kernelY, - in DenseMatrix kernelX, - Span rowSampleOffsets, - Span columnSampleOffsets, + in Convolution2DState state, Buffer2D sourcePixels, int row, int column, ref Vector4 targetVector) where TPixel : unmanaged, IPixel { - Vector4 vectorY = default; - Vector4 vectorX = default; + ReadOnlyKernel kernelY = state.KernelY; + ReadOnlyKernel kernelX = state.KernelX; int kernelHeight = kernelY.Rows; int kernelWidth = kernelY.Columns; + Vector4 vectorY = default; + Vector4 vectorX = default; + for (int y = 0; y < kernelHeight; y++) { - int offsetY = rowSampleOffsets[(row * kernelHeight) + y]; - Span sourceRowSpan = sourcePixels.GetRowSpan(offsetY); + int offsetY = state.GetRowSampleOffset(row, y); + ref TPixel sourceRowBase = ref MemoryMarshal.GetReference(sourcePixels.GetRowSpan(offsetY)); for (int x = 0; x < kernelWidth; x++) { - int offsetX = columnSampleOffsets[(column * kernelWidth) + x]; - var sample = sourceRowSpan[offsetX].ToVector4(); + int offsetX = state.GetColumnSampleOffset(column, x); + var sample = Unsafe.Add(ref sourceRowBase, offsetX).ToVector4(); Numerics.Premultiply(ref sample); - vectorX += kernelX[y, x] * sample; vectorY += kernelY[y, x] * sample; } @@ -141,18 +123,14 @@ namespace SixLabors.ImageSharp /// Using this method the convolution filter is not applied to alpha in addition to the color channels. /// /// The pixel format. - /// The convolution kernel. - /// The span containing precalculated kernel y-sampling offsets. - /// The span containing precalculated kernel x-sampling offsets. + /// The convolution kernel state. /// The source frame. /// The target row base reference. /// The current row. /// The current column. - [MethodImpl(InliningOptions.ShortMethod)] + [MethodImpl(MethodImplOptions.AggressiveInlining)] public static void Convolve3( - in DenseMatrix kernel, - Span rowSampleOffsets, - Span columnSampleOffsets, + in ConvolutionState state, Buffer2D sourcePixels, ref Vector4 targetRowRef, int row, @@ -162,9 +140,7 @@ namespace SixLabors.ImageSharp Vector4 vector = default; ConvolveImpl( - in kernel, - rowSampleOffsets, - columnSampleOffsets, + state, sourcePixels, row, column, @@ -182,18 +158,14 @@ namespace SixLabors.ImageSharp /// Using this method the convolution filter is applied to alpha in addition to the color channels. /// /// The pixel format. - /// The convolution kernel. - /// The span containing precalculated kernel y-offsets. - /// The span containing precalculated kernel x-offsets. + /// The convolution kernel state. /// The source frame. /// The target row base reference. /// The current row. /// The current column. - [MethodImpl(InliningOptions.ShortMethod)] + [MethodImpl(MethodImplOptions.AggressiveInlining)] public static void Convolve4( - in DenseMatrix kernel, - Span rowSampleOffsets, - Span columnSampleOffsets, + in ConvolutionState state, Buffer2D sourcePixels, ref Vector4 targetRowRef, int row, @@ -203,9 +175,7 @@ namespace SixLabors.ImageSharp Vector4 vector = default; ConvolveImpl( - in kernel, - rowSampleOffsets, - columnSampleOffsets, + state, sourcePixels, row, column, @@ -216,29 +186,28 @@ namespace SixLabors.ImageSharp target = vector; } - [MethodImpl(InliningOptions.ShortMethod)] + [MethodImpl(MethodImplOptions.AggressiveInlining)] private static void ConvolveImpl( - in DenseMatrix kernel, - Span rowSampleOffsets, - Span columnSampleOffsets, + in ConvolutionState state, Buffer2D sourcePixels, int row, int column, ref Vector4 targetVector) where TPixel : unmanaged, IPixel { + ReadOnlyKernel kernel = state.Kernel; int kernelHeight = kernel.Rows; int kernelWidth = kernel.Columns; for (int y = 0; y < kernelHeight; y++) { - int offsetY = rowSampleOffsets[(row * kernelHeight) + y]; - Span sourceRowSpan = sourcePixels.GetRowSpan(offsetY); + int offsetY = state.GetRowSampleOffset(row, y); + ref TPixel sourceRowBase = ref MemoryMarshal.GetReference(sourcePixels.GetRowSpan(offsetY)); for (int x = 0; x < kernelWidth; x++) { - int offsetX = columnSampleOffsets[(column * kernelWidth) + x]; - var sample = sourceRowSpan[offsetX].ToVector4(); + int offsetX = state.GetColumnSampleOffset(column, x); + var sample = Unsafe.Add(ref sourceRowBase, offsetX).ToVector4(); Numerics.Premultiply(ref sample); targetVector += kernel[y, x] * sample; } diff --git a/src/ImageSharp/Processing/Processors/Convolution/Kernels/KernelSamplingMap.cs b/src/ImageSharp/Processing/Processors/Convolution/KernelSamplingMap.cs similarity index 95% rename from src/ImageSharp/Processing/Processors/Convolution/Kernels/KernelSamplingMap.cs rename to src/ImageSharp/Processing/Processors/Convolution/KernelSamplingMap.cs index 493c0d0fd2..73a4fa4004 100644 --- a/src/ImageSharp/Processing/Processors/Convolution/Kernels/KernelSamplingMap.cs +++ b/src/ImageSharp/Processing/Processors/Convolution/KernelSamplingMap.cs @@ -76,10 +76,10 @@ namespace SixLabors.ImageSharp.Processing.Processors.Convolution } [MethodImpl(MethodImplOptions.AggressiveInlining)] - public Span GetYOffsetSpan() => this.yOffsets.GetSpan(); + public Span GetRowOffsetSpan() => this.yOffsets.GetSpan(); [MethodImpl(MethodImplOptions.AggressiveInlining)] - public Span GetXOffsetSpan() => this.xOffsets.GetSpan(); + public Span GetColumnOffsetSpan() => this.xOffsets.GetSpan(); /// public void Dispose() diff --git a/src/ImageSharp/Processing/Processors/Convolution/ReadOnlyKernel.cs b/src/ImageSharp/Processing/Processors/Convolution/ReadOnlyKernel.cs new file mode 100644 index 0000000000..37e0060054 --- /dev/null +++ b/src/ImageSharp/Processing/Processors/Convolution/ReadOnlyKernel.cs @@ -0,0 +1,63 @@ +// Copyright (c) Six Labors. +// Licensed under the Apache License, Version 2.0. + +using System; +using System.Diagnostics; +using System.Runtime.CompilerServices; +using System.Runtime.InteropServices; + +namespace SixLabors.ImageSharp.Processing.Processors.Convolution +{ + /// + /// A stack only, readonly, kernel matrix that can be indexed without + /// bounds checks when compiled in release mode. + /// + internal readonly ref struct ReadOnlyKernel + { + private readonly ReadOnlySpan values; + + public ReadOnlyKernel(DenseMatrix matrix) + { + this.Columns = matrix.Columns; + this.Rows = matrix.Rows; + this.values = matrix.Span; + } + + public int Columns + { + [MethodImpl(MethodImplOptions.AggressiveInlining)] + get; + } + + public int Rows + { + [MethodImpl(MethodImplOptions.AggressiveInlining)] + get; + } + + public float this[int row, int column] + { + [MethodImpl(MethodImplOptions.AggressiveInlining)] + get + { + this.CheckCoordinates(row, column); + ref float vBase = ref MemoryMarshal.GetReference(this.values); + return Unsafe.Add(ref vBase, (row * this.Columns) + column); + } + } + + [Conditional("DEBUG")] + private void CheckCoordinates(int row, int column) + { + if (row < 0 || row >= this.Rows) + { + throw new ArgumentOutOfRangeException(nameof(row), row, $"{row} is outwith the matrix bounds."); + } + + if (column < 0 || column >= this.Columns) + { + throw new ArgumentOutOfRangeException(nameof(column), column, $"{column} is outwith the matrix bounds."); + } + } + } +} From 5059597bbfa5a05b7b07de34872d9eca39319801 Mon Sep 17 00:00:00 2001 From: James Jackson-South Date: Sat, 5 Dec 2020 13:55:49 +0000 Subject: [PATCH 06/24] Update KernelSamplingMap.cs --- .../Processors/Convolution/KernelSamplingMap.cs | 12 ++++++++---- 1 file changed, 8 insertions(+), 4 deletions(-) diff --git a/src/ImageSharp/Processing/Processors/Convolution/KernelSamplingMap.cs b/src/ImageSharp/Processing/Processors/Convolution/KernelSamplingMap.cs index 73a4fa4004..144d356c6e 100644 --- a/src/ImageSharp/Processing/Processors/Convolution/KernelSamplingMap.cs +++ b/src/ImageSharp/Processing/Processors/Convolution/KernelSamplingMap.cs @@ -4,6 +4,7 @@ using System; using System.Buffers; using System.Runtime.CompilerServices; +using System.Runtime.InteropServices; using SixLabors.ImageSharp.Memory; namespace SixLabors.ImageSharp.Processing.Processors.Convolution @@ -44,13 +45,16 @@ namespace SixLabors.ImageSharp.Processing.Processors.Convolution int radiusY = kernelHeight >> 1; int radiusX = kernelWidth >> 1; - // Calculate the potential sampling y-offsets. + // Calculate the y and x sampling offsets clamped to the given rectangle. + // While this isn't a hotpath we still dip into unsafe to avoid the span bounds + // checks as the can potentially be looping over large arrays. Span ySpan = this.yOffsets.GetSpan(); + ref int ySpanBase = ref MemoryMarshal.GetReference(ySpan); for (int row = 0; row < bounds.Height; row++) { for (int y = 0; y < kernelHeight; y++) { - ySpan[(row * kernelHeight) + y] = row + y + minY - radiusY; + Unsafe.Add(ref ySpanBase, (row * kernelHeight) + y) = row + y + minY - radiusY; } } @@ -59,13 +63,13 @@ namespace SixLabors.ImageSharp.Processing.Processors.Convolution Numerics.Clamp(ySpan, minY, maxY); } - // Calculate the potential sampling x-offsets. Span xSpan = this.xOffsets.GetSpan(); + ref int xSpanBase = ref MemoryMarshal.GetReference(xSpan); for (int column = 0; column < bounds.Width; column++) { for (int x = 0; x < kernelWidth; x++) { - xSpan[(column * kernelWidth) + x] = column + x + minX - radiusX; + Unsafe.Add(ref xSpanBase, (column * kernelWidth) + x) = column + x + minX - radiusX; } } From dd0447ef4cb26d12c7e7438eaecde588b156aacc Mon Sep 17 00:00:00 2001 From: Anton Firszov Date: Sat, 5 Dec 2020 16:30:31 +0100 Subject: [PATCH 07/24] entry API & tests --- src/ImageSharp/Common/Helpers/SimdUtils.cs | 21 ++++ .../PixelFormats/PixelOperations{TPixel}.cs | 26 +++++ .../ImageSharp.Tests/Common/SimdUtilsTests.cs | 105 +++++++----------- .../PixelOperations/PixelOperationsTests.cs | 18 ++- 4 files changed, 103 insertions(+), 67 deletions(-) diff --git a/src/ImageSharp/Common/Helpers/SimdUtils.cs b/src/ImageSharp/Common/Helpers/SimdUtils.cs index aaf6d405cf..f37226a1a3 100644 --- a/src/ImageSharp/Common/Helpers/SimdUtils.cs +++ b/src/ImageSharp/Common/Helpers/SimdUtils.cs @@ -6,6 +6,7 @@ using System.Diagnostics; using System.Numerics; using System.Runtime.CompilerServices; using System.Runtime.InteropServices; +using SixLabors.ImageSharp.PixelFormats; #if SUPPORTS_RUNTIME_INTRINSICS using System.Runtime.Intrinsics; using System.Runtime.Intrinsics.X86; @@ -147,6 +148,26 @@ namespace SixLabors.ImageSharp } } + [MethodImpl(InliningOptions.ShortMethod)] + internal static void PackFromRgbPlanes( + Configuration configuration, + ReadOnlySpan redChannel, + ReadOnlySpan greenChannel, + ReadOnlySpan blueChannel, + Span destination) + { + } + + [MethodImpl(InliningOptions.ShortMethod)] + internal static void PackFromRgbPlanes( + Configuration configuration, + ReadOnlySpan redChannel, + ReadOnlySpan greenChannel, + ReadOnlySpan blueChannel, + Span destination) + { + } + [MethodImpl(InliningOptions.ColdPath)] private static void ConvertByteToNormalizedFloatRemainder(ReadOnlySpan source, Span dest) { diff --git a/src/ImageSharp/PixelFormats/PixelOperations{TPixel}.cs b/src/ImageSharp/PixelFormats/PixelOperations{TPixel}.cs index dbe06702d9..e562f333c6 100644 --- a/src/ImageSharp/PixelFormats/PixelOperations{TPixel}.cs +++ b/src/ImageSharp/PixelFormats/PixelOperations{TPixel}.cs @@ -159,5 +159,31 @@ namespace SixLabors.ImageSharp.PixelFormats PixelOperations.Instance.From(configuration, sourcePixels, destinationPixels); } + + /// + /// Bulk operation that converts 3 seperate RGB channels to + /// + /// A to configure internal operations. + /// A to the red values. + /// A to the green values. + /// A to the blue values. + /// A to the destination pixels. + public virtual void PackFromRgbPlanes( + Configuration configuration, + ReadOnlySpan redChannel, + ReadOnlySpan greenChannel, + ReadOnlySpan blueChannel, + Span destination) + { + Guard.NotNull(configuration, nameof(configuration)); + Guard.DestinationShouldNotBeTooShort(redChannel, destination, nameof(destination)); + + for (int i = 0; i < destination.Length; i++) + { + var rgb24 = new Rgb24(redChannel[i], greenChannel[i], blueChannel[i]); + + destination[i].FromRgb24(rgb24); + } + } } } diff --git a/tests/ImageSharp.Tests/Common/SimdUtilsTests.cs b/tests/ImageSharp.Tests/Common/SimdUtilsTests.cs index ec09e43e57..38b4e2e8c6 100644 --- a/tests/ImageSharp.Tests/Common/SimdUtilsTests.cs +++ b/tests/ImageSharp.Tests/Common/SimdUtilsTests.cs @@ -7,6 +7,7 @@ using System.Numerics; using System.Runtime.CompilerServices; using System.Runtime.InteropServices; using SixLabors.ImageSharp.Common.Tuples; +using SixLabors.ImageSharp.PixelFormats; using SixLabors.ImageSharp.Tests.TestUtilities; using Xunit; using Xunit.Abstractions; @@ -336,90 +337,64 @@ namespace SixLabors.ImageSharp.Tests.Common } } - private static void TestImpl_BulkConvertNormalizedFloatToByteClampOverflows( - int count, - Action, - Memory> convert, - int seed = -1) + [Theory] + [MemberData(nameof(ArbitraryArraySizes))] + public void PackFromRgbPlanes_Rgb24(int count) { - seed = seed > 0 ? seed : count; - float[] source = new Random(seed).GenerateRandomFloatArray(count, -0.2f, 1.2f); - byte[] expected = source.Select(NormalizedFloatToByte).ToArray(); - var actual = new byte[count]; - - convert(source, actual); - - Assert.Equal(expected, actual); + TestPackFromRgbPlanes( + count, + (r, g, b, actual) => + SimdUtils.PackFromRgbPlanes(Configuration.Default, r, g, b, actual)); } - private static byte NormalizedFloatToByte(float f) => (byte)Math.Min(255f, Math.Max(0f, (f * 255f) + 0.5f)); - [Theory] - [InlineData(0)] - [InlineData(7)] - [InlineData(42)] - [InlineData(255)] - [InlineData(256)] - [InlineData(257)] - private void MagicConvertToByte(float value) + [MemberData(nameof(ArbitraryArraySizes))] + public void PackFromRgbPlanes_Rgba32(int count) { - byte actual = MagicConvert(value / 256f); - var expected = (byte)value; - - Assert.Equal(expected, actual); + TestPackFromRgbPlanes( + count, + (r, g, b, actual) => + SimdUtils.PackFromRgbPlanes(Configuration.Default, r, g, b, actual)); } - [Fact] - private void BulkConvertNormalizedFloatToByte_Step() + internal static void TestPackFromRgbPlanes(int count, Action packMethod) + where TPixel : unmanaged, IPixel { - if (this.SkipOnNonAvx2()) + Random rnd = new Random(42); + byte[] r = rnd.GenerateRandomByteArray(count); + byte[] g = rnd.GenerateRandomByteArray(count); + byte[] b = rnd.GenerateRandomByteArray(count); + + TPixel[] expected = new TPixel[count]; + for (int i = 0; i < count; i++) { - return; + expected[i].FromRgb24(new Rgb24(r[i], g[i], b[i])); } - float[] source = { 0, 7, 42, 255, 0.5f, 1.1f, 2.6f, 16f }; - - byte[] expected = source.Select(f => (byte)Math.Round(f)).ToArray(); + TPixel[] actual = new TPixel[count]; + packMethod(r, g, b, actual); - source = source.Select(f => f / 255f).ToArray(); - - Span dest = stackalloc byte[8]; - - this.MagicConvert(source, dest); - - Assert.True(dest.SequenceEqual(expected)); - } - - private static byte MagicConvert(float x) - { - float f = 32768.0f + x; - uint i = Unsafe.As(ref f); - return (byte)i; + Assert.Equal(expected, actual); } - private void MagicConvert(Span source, Span dest) + private static void TestImpl_BulkConvertNormalizedFloatToByteClampOverflows( + int count, + Action, + Memory> convert, + int seed = -1) { - var magick = new Vector(32768.0f); - - var scale = new Vector(255f) / new Vector(256f); - - Vector x = MemoryMarshal.Cast>(source)[0]; - - x = (x * scale) + magick; - - Tuple8.OfUInt32 ii = default; - - ref Vector iiRef = ref Unsafe.As>(ref ii); - - iiRef = x; + seed = seed > 0 ? seed : count; + float[] source = new Random(seed).GenerateRandomFloatArray(count, -0.2f, 1.2f); + byte[] expected = source.Select(NormalizedFloatToByte).ToArray(); + var actual = new byte[count]; - ref Tuple8.OfByte d = ref MemoryMarshal.Cast(dest)[0]; - d.LoadFrom(ref ii); + convert(source, actual); - this.Output.WriteLine(ii.ToString()); - this.Output.WriteLine(d.ToString()); + Assert.Equal(expected, actual); } + private static byte NormalizedFloatToByte(float f) => (byte)Math.Min(255f, Math.Max(0f, (f * 255f) + 0.5f)); + private static void AssertEvenRoundIsCorrect(Vector r, Vector v) { for (int i = 0; i < Vector.Count; i++) diff --git a/tests/ImageSharp.Tests/PixelFormats/PixelOperations/PixelOperationsTests.cs b/tests/ImageSharp.Tests/PixelFormats/PixelOperations/PixelOperationsTests.cs index 8d74ccec40..39786a2177 100644 --- a/tests/ImageSharp.Tests/PixelFormats/PixelOperations/PixelOperationsTests.cs +++ b/tests/ImageSharp.Tests/PixelFormats/PixelOperations/PixelOperationsTests.cs @@ -10,6 +10,7 @@ using System.Runtime.InteropServices; using SixLabors.ImageSharp.ColorSpaces.Companding; using SixLabors.ImageSharp.Memory; using SixLabors.ImageSharp.PixelFormats; +using SixLabors.ImageSharp.Tests.Common; using SixLabors.ImageSharp.Tests.TestUtilities; using Xunit; using Xunit.Abstractions; @@ -1002,6 +1003,19 @@ namespace SixLabors.ImageSharp.Tests.PixelFormats.PixelOperations (s, d) => this.Operations.ToRgba64Bytes(this.Configuration, s, d.GetSpan(), count)); } + [Theory] + [MemberData(nameof(ArraySizesData))] + public void PackFromRgbPlanes(int count) + { + SimdUtilsTests.TestPackFromRgbPlanes( + count, + ( + r, + g, + b, + actual) => PixelOperations.Instance.PackFromRgbPlanes(this.Configuration, r, g, b, actual)); + } + public delegate void RefAction(ref T1 arg1); internal static Vector4[] CreateExpectedVector4Data(TPixel[] source, RefAction vectorModifier = null) @@ -1102,10 +1116,10 @@ namespace SixLabors.ImageSharp.Tests.PixelFormats.PixelOperations return result; } - internal static byte[] CreateByteTestData(int length) + internal static byte[] CreateByteTestData(int length, int seed = 42) { byte[] result = new byte[length]; - var rnd = new Random(42); // Deterministic random values + var rnd = new Random(seed); // Deterministic random values for (int i = 0; i < result.Length; i++) { From e402700a445ea18be25851a24a8f22de68462833 Mon Sep 17 00:00:00 2001 From: Anton Firszov Date: Sat, 5 Dec 2020 18:21:56 +0100 Subject: [PATCH 08/24] benchmarks & scalar implementation --- .gitattributes | 3 + .../Common/Helpers/SimdUtils.Pack.cs | 99 +++++++ src/ImageSharp/Common/Helpers/SimdUtils.cs | 28 +- src/ImageSharp/ImageSharp.csproj | 10 +- .../PixelConversion_PackFromRgbPlanes.cs | 252 ++++++++++++++++++ 5 files changed, 367 insertions(+), 25 deletions(-) create mode 100644 src/ImageSharp/Common/Helpers/SimdUtils.Pack.cs create mode 100644 tests/ImageSharp.Benchmarks/General/PixelConversion/PixelConversion_PackFromRgbPlanes.cs diff --git a/.gitattributes b/.gitattributes index c0bff6e189..7c648c0774 100644 --- a/.gitattributes +++ b/.gitattributes @@ -80,8 +80,11 @@ *.pvr binary *.snk binary *.tga binary +*.tif binary +*.tiff binary *.ttc binary *.ttf binary +*.wbmp binary *.webp binary *.woff binary *.woff2 binary diff --git a/src/ImageSharp/Common/Helpers/SimdUtils.Pack.cs b/src/ImageSharp/Common/Helpers/SimdUtils.Pack.cs new file mode 100644 index 0000000000..4f3d732b49 --- /dev/null +++ b/src/ImageSharp/Common/Helpers/SimdUtils.Pack.cs @@ -0,0 +1,99 @@ +using System; +using System.Runtime.CompilerServices; +using System.Runtime.InteropServices; +using SixLabors.ImageSharp.PixelFormats; + +namespace SixLabors.ImageSharp +{ + internal static partial class SimdUtils + { + [MethodImpl(InliningOptions.ShortMethod)] + internal static void PackFromRgbPlanes( + Configuration configuration, + ReadOnlySpan redChannel, + ReadOnlySpan greenChannel, + ReadOnlySpan blueChannel, + Span destination) + { + PackFromRgbPlanesScalarBatchedReduce(ref redChannel, ref greenChannel, ref blueChannel, ref destination); + PackFromRgbPlanesRemainder(redChannel, greenChannel, blueChannel, destination); + } + + [MethodImpl(InliningOptions.ShortMethod)] + internal static void PackFromRgbPlanes( + Configuration configuration, + ReadOnlySpan redChannel, + ReadOnlySpan greenChannel, + ReadOnlySpan blueChannel, + Span destination) + { + } + + private static void PackFromRgbPlanesScalarBatchedReduce( + ref ReadOnlySpan redChannel, + ref ReadOnlySpan greenChannel, + ref ReadOnlySpan blueChannel, + ref Span destination) + { + ref ByteTuple4 r = ref Unsafe.As(ref MemoryMarshal.GetReference(redChannel)); + ref ByteTuple4 g = ref Unsafe.As(ref MemoryMarshal.GetReference(greenChannel)); + ref ByteTuple4 b = ref Unsafe.As(ref MemoryMarshal.GetReference(blueChannel)); + ref Rgb24 rgb = ref MemoryMarshal.GetReference(destination); + + int count = destination.Length / 4; + for (int i = 0; i < count; i++) + { + ref Rgb24 d0 = ref Unsafe.Add(ref rgb, i * 4); + ref Rgb24 d1 = ref Unsafe.Add(ref d0, 1); + ref Rgb24 d2 = ref Unsafe.Add(ref d0, 2); + ref Rgb24 d3 = ref Unsafe.Add(ref d0, 3); + + ref ByteTuple4 rr = ref Unsafe.Add(ref r, i); + ref ByteTuple4 gg = ref Unsafe.Add(ref g, i); + ref ByteTuple4 bb = ref Unsafe.Add(ref b, i); + + d0.R = rr.V0; + d0.G = gg.V0; + d0.B = bb.V0; + + d1.R = rr.V1; + d1.G = gg.V1; + d1.B = bb.V1; + + d2.R = rr.V2; + d2.G = gg.V2; + d2.B = bb.V2; + + d3.R = rr.V3; + d3.G = gg.V3; + d3.B = bb.V3; + } + + int finished = count * 4; + redChannel = redChannel.Slice(finished); + greenChannel = greenChannel.Slice(finished); + blueChannel = blueChannel.Slice(finished); + destination = destination.Slice(finished); + } + + private static void PackFromRgbPlanesRemainder( + ReadOnlySpan redChannel, + ReadOnlySpan greenChannel, + ReadOnlySpan blueChannel, + Span destination) + { + ref byte r = ref MemoryMarshal.GetReference(redChannel); + ref byte g = ref MemoryMarshal.GetReference(greenChannel); + ref byte b = ref MemoryMarshal.GetReference(blueChannel); + ref Rgb24 rgb = ref MemoryMarshal.GetReference(destination); + + for (int i = 0; i < destination.Length; i++) + { + ref Rgb24 d = ref Unsafe.Add(ref rgb, i); + d.R = Unsafe.Add(ref r, i); + d.G = Unsafe.Add(ref g, i); + d.B = Unsafe.Add(ref b, i); + } + } + } +} \ No newline at end of file diff --git a/src/ImageSharp/Common/Helpers/SimdUtils.cs b/src/ImageSharp/Common/Helpers/SimdUtils.cs index f37226a1a3..6d82cfad01 100644 --- a/src/ImageSharp/Common/Helpers/SimdUtils.cs +++ b/src/ImageSharp/Common/Helpers/SimdUtils.cs @@ -148,26 +148,6 @@ namespace SixLabors.ImageSharp } } - [MethodImpl(InliningOptions.ShortMethod)] - internal static void PackFromRgbPlanes( - Configuration configuration, - ReadOnlySpan redChannel, - ReadOnlySpan greenChannel, - ReadOnlySpan blueChannel, - Span destination) - { - } - - [MethodImpl(InliningOptions.ShortMethod)] - internal static void PackFromRgbPlanes( - Configuration configuration, - ReadOnlySpan redChannel, - ReadOnlySpan greenChannel, - ReadOnlySpan blueChannel, - Span destination) - { - } - [MethodImpl(InliningOptions.ColdPath)] private static void ConvertByteToNormalizedFloatRemainder(ReadOnlySpan source, Span dest) { @@ -241,5 +221,13 @@ namespace SixLabors.ImageSharp nameof(source), $"length should be divisible by {shouldBeDivisibleBy}!"); } + + private struct ByteTuple4 + { + public byte V0; + public byte V1; + public byte V2; + public byte V3; + } } } diff --git a/src/ImageSharp/ImageSharp.csproj b/src/ImageSharp/ImageSharp.csproj index d2d07af54c..66b88489fe 100644 --- a/src/ImageSharp/ImageSharp.csproj +++ b/src/ImageSharp/ImageSharp.csproj @@ -25,16 +25,16 @@ - + - + - + - - + + diff --git a/tests/ImageSharp.Benchmarks/General/PixelConversion/PixelConversion_PackFromRgbPlanes.cs b/tests/ImageSharp.Benchmarks/General/PixelConversion/PixelConversion_PackFromRgbPlanes.cs new file mode 100644 index 0000000000..db66ae941d --- /dev/null +++ b/tests/ImageSharp.Benchmarks/General/PixelConversion/PixelConversion_PackFromRgbPlanes.cs @@ -0,0 +1,252 @@ +// Copyright (c) Six Labors. +// Licensed under the Apache License, Version 2.0. + +using System; +using System.Runtime.CompilerServices; +using System.Runtime.InteropServices; +#if SUPPORTS_RUNTIME_INTRINSICS +using System.Runtime.Intrinsics; +using System.Runtime.Intrinsics.X86; +#endif +using BenchmarkDotNet.Attributes; +using SixLabors.ImageSharp.PixelFormats; + +namespace SixLabors.ImageSharp.Benchmarks.General.PixelConversion +{ + public unsafe class PixelConversion_PackFromRgbPlanes + { + private byte[] rBuf; + private byte[] gBuf; + private byte[] bBuf; + private Rgb24[] rgbBuf; + private Rgba32[] rgbaBuf; + + private float[] rFloat; + private float[] gFloat; + private float[] bFloat; + + private float[] rgbaFloat; + + [Params(512)] + public int Count { get; set; } + + [GlobalSetup] + public void Setup() + { + this.rBuf = new byte[this.Count]; + this.gBuf = new byte[this.Count]; + this.bBuf = new byte[this.Count]; + this.rgbBuf = new Rgb24[this.Count]; + this.rgbaBuf = new Rgba32[this.Count]; + + this.rFloat = new float[this.Count]; + this.gFloat = new float[this.Count]; + this.bFloat = new float[this.Count]; + + this.rgbaFloat = new float[this.Count * 4]; + } + + // [Benchmark(Baseline = true)] + public void Rgb24_Scalar_PerElement_Pinned() + { + fixed (byte* r = &this.rBuf[0]) + fixed (byte* g = &this.gBuf[0]) + fixed (byte* b = &this.bBuf[0]) + fixed (Rgb24* rgb = &this.rgbBuf[0]) + { + for (int i = 0; i < this.Count; i++) + { + Rgb24* d = rgb + i; + d->R = r[i]; + d->G = g[i]; + d->B = b[i]; + } + } + } + + [Benchmark] + public void Rgb24_Scalar_PerElement_Span() + { + Span r = this.rBuf; + Span g = this.rBuf; + Span b = this.rBuf; + Span rgb = this.rgbBuf; + + for (int i = 0; i < rgb.Length; i++) + { + ref Rgb24 d = ref rgb[i]; + d.R = r[i]; + d.G = g[i]; + d.B = b[i]; + } + } + + [Benchmark(Baseline = true)] + public void Rgb24_Scalar_PerElement_Unsafe() + { + ref byte r = ref this.rBuf[0]; + ref byte g = ref this.rBuf[0]; + ref byte b = ref this.rBuf[0]; + ref Rgb24 rgb = ref this.rgbBuf[0]; + + for (int i = 0; i < this.Count; i++) + { + ref Rgb24 d = ref Unsafe.Add(ref rgb, i); + d.R = Unsafe.Add(ref r, i); + d.G = Unsafe.Add(ref g, i); + d.B = Unsafe.Add(ref b, i); + } + } + + [Benchmark] + public void Rgb24_Scalar_PerElement_Batched8() + { + ref Byte8 r = ref Unsafe.As(ref this.rBuf[0]); + ref Byte8 g = ref Unsafe.As(ref this.rBuf[0]); + ref Byte8 b = ref Unsafe.As(ref this.rBuf[0]); + ref Rgb24 rgb = ref this.rgbBuf[0]; + + int count = this.Count / 8; + for (int i = 0; i < count; i++) + { + ref Rgb24 d0 = ref Unsafe.Add(ref rgb, i * 8); + ref Rgb24 d1 = ref Unsafe.Add(ref d0, 1); + ref Rgb24 d2 = ref Unsafe.Add(ref d0, 2); + ref Rgb24 d3 = ref Unsafe.Add(ref d0, 3); + ref Rgb24 d4 = ref Unsafe.Add(ref d0, 4); + ref Rgb24 d5 = ref Unsafe.Add(ref d0, 5); + ref Rgb24 d6 = ref Unsafe.Add(ref d0, 6); + ref Rgb24 d7 = ref Unsafe.Add(ref d0, 7); + + ref Byte8 rr = ref Unsafe.Add(ref r, i); + ref Byte8 gg = ref Unsafe.Add(ref g, i); + ref Byte8 bb = ref Unsafe.Add(ref b, i); + + d0.R = rr.V0; + d0.G = gg.V0; + d0.B = bb.V0; + + d1.R = rr.V1; + d1.G = gg.V1; + d1.B = bb.V1; + + d2.R = rr.V2; + d2.G = gg.V2; + d2.B = bb.V2; + + d3.R = rr.V3; + d3.G = gg.V3; + d3.B = bb.V3; + + d4.R = rr.V4; + d4.G = gg.V4; + d4.B = bb.V4; + + d5.R = rr.V5; + d5.G = gg.V5; + d5.B = bb.V5; + + d6.R = rr.V6; + d6.G = gg.V6; + d6.B = bb.V6; + + d7.R = rr.V7; + d7.G = gg.V7; + d7.B = bb.V7; + } + } + + [Benchmark] + public void Rgb24_Scalar_PerElement_Batched4() + { + ref Byte4 r = ref Unsafe.As(ref this.rBuf[0]); + ref Byte4 g = ref Unsafe.As(ref this.rBuf[0]); + ref Byte4 b = ref Unsafe.As(ref this.rBuf[0]); + ref Rgb24 rgb = ref this.rgbBuf[0]; + + int count = this.Count / 4; + for (int i = 0; i < count; i++) + { + ref Rgb24 d0 = ref Unsafe.Add(ref rgb, i * 4); + ref Rgb24 d1 = ref Unsafe.Add(ref d0, 1); + ref Rgb24 d2 = ref Unsafe.Add(ref d0, 2); + ref Rgb24 d3 = ref Unsafe.Add(ref d0, 3); + + ref Byte4 rr = ref Unsafe.Add(ref r, i); + ref Byte4 gg = ref Unsafe.Add(ref g, i); + ref Byte4 bb = ref Unsafe.Add(ref b, i); + + d0.R = rr.V0; + d0.G = gg.V0; + d0.B = bb.V0; + + d1.R = rr.V1; + d1.G = gg.V1; + d1.B = bb.V1; + + d2.R = rr.V2; + d2.G = gg.V2; + d2.B = bb.V2; + + d3.R = rr.V3; + d3.G = gg.V3; + d3.B = bb.V3; + } + } + +#if SUPPORTS_RUNTIME_INTRINSICS + [Benchmark] + public void Rgba32_Vector_Float() + { + ref Vector256 rBase = ref Unsafe.As>(ref this.rFloat[0]); + ref Vector256 gBase = ref Unsafe.As>(ref this.gFloat[0]); + ref Vector256 bBase = ref Unsafe.As>(ref this.bFloat[0]); + ref Vector256 resultBase = ref Unsafe.As>(ref this.rgbaFloat[0]); + + int count = this.Count / Vector256.Count; + + ref byte control = ref MemoryMarshal.GetReference(SimdUtils.HwIntrinsics.PermuteMaskEvenOdd8x32); + Vector256 vcontrol = Unsafe.As>(ref control); + + var va = Vector256.Create(1F); + + for (int i = 0; i < count; i++) + { + Vector256 r = Unsafe.Add(ref rBase, i); + Vector256 g = Unsafe.Add(ref gBase, i); + Vector256 b = Unsafe.Add(ref bBase, i); + + r = Avx2.PermuteVar8x32(r, vcontrol); + g = Avx2.PermuteVar8x32(g, vcontrol); + b = Avx2.PermuteVar8x32(b, vcontrol); + + Vector256 vte = Avx.UnpackLow(r, b); + Vector256 vto = Avx.UnpackLow(g, va); + + ref Vector256 destination = ref Unsafe.Add(ref resultBase, i * 4); + + destination = Avx.UnpackLow(vte, vto); + Unsafe.Add(ref destination, 1) = Avx.UnpackHigh(vte, vto); + + vte = Avx.UnpackHigh(r, b); + vto = Avx.UnpackHigh(g, va); + + Unsafe.Add(ref destination, 2) = Avx.UnpackLow(vte, vto); + Unsafe.Add(ref destination, 3) = Avx.UnpackHigh(vte, vto); + } + } +#endif + +#pragma warning disable SA1132 + private struct Byte8 + { + public byte V0, V1, V2, V3, V4, V5, V6, V7; + } + + private struct Byte4 + { + public byte V0, V1, V2, V3; + } +#pragma warning restore + } +} \ No newline at end of file From 7e33e3fbdba9d03b412f183d19d3a62ffc47b5c6 Mon Sep 17 00:00:00 2001 From: Anton Firszov Date: Sat, 5 Dec 2020 20:07:07 +0100 Subject: [PATCH 09/24] AVX2 implemetation seems to work --- .../Common/Helpers/SimdUtils.Pack.cs | 106 +++++++++++++++++- .../ImageSharp.Tests/Common/SimdUtilsTests.cs | 39 ++++++- 2 files changed, 143 insertions(+), 2 deletions(-) diff --git a/src/ImageSharp/Common/Helpers/SimdUtils.Pack.cs b/src/ImageSharp/Common/Helpers/SimdUtils.Pack.cs index 4f3d732b49..2810a212c6 100644 --- a/src/ImageSharp/Common/Helpers/SimdUtils.Pack.cs +++ b/src/ImageSharp/Common/Helpers/SimdUtils.Pack.cs @@ -3,10 +3,29 @@ using System.Runtime.CompilerServices; using System.Runtime.InteropServices; using SixLabors.ImageSharp.PixelFormats; +#if SUPPORTS_RUNTIME_INTRINSICS +using System.Runtime.Intrinsics; +using System.Runtime.Intrinsics.X86; +#endif + namespace SixLabors.ImageSharp { internal static partial class SimdUtils { + private static ReadOnlySpan ShuffleMaskShiftAlpha => + new byte[] + { + 0, 1, 2, 4, 5, 6, 8, 9, 10, 12, 13, 14, 3, 7, 11, 15, + 0, 1, 2, 4, 5, 6, 8, 9, 10, 12, 13, 14, 3, 7, 11, 15 + }; + + public static ReadOnlySpan PermuteMaskShiftAlpha8x32 => + new byte[] + { + 0, 0, 0, 0, 1, 0, 0, 0, 2, 0, 0, 0, 4, 0, 0, 0, + 5, 0, 0, 0, 6, 0, 0, 0, 3, 0, 0, 0, 7, 0, 0, 0 + }; + [MethodImpl(InliningOptions.ShortMethod)] internal static void PackFromRgbPlanes( Configuration configuration, @@ -15,7 +34,17 @@ namespace SixLabors.ImageSharp ReadOnlySpan blueChannel, Span destination) { - PackFromRgbPlanesScalarBatchedReduce(ref redChannel, ref greenChannel, ref blueChannel, ref destination); +#if SUPPORTS_RUNTIME_INTRINSICS + if (Avx2.IsSupported) + { + PackFromRgbPlanesAvx2Reduce(ref redChannel, ref greenChannel, ref blueChannel, ref destination); + } + else +#endif + { + PackFromRgbPlanesScalarBatchedReduce(ref redChannel, ref greenChannel, ref blueChannel, ref destination); + } + PackFromRgbPlanesRemainder(redChannel, greenChannel, blueChannel, destination); } @@ -29,6 +58,81 @@ namespace SixLabors.ImageSharp { } +#if SUPPORTS_RUNTIME_INTRINSICS + internal static void PackFromRgbPlanesAvx2Reduce( + ref ReadOnlySpan redChannel, + ref ReadOnlySpan greenChannel, + ref ReadOnlySpan blueChannel, + ref Span destination) + { + ref Vector256 rBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(redChannel)); + ref Vector256 gBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(greenChannel)); + ref Vector256 bBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(blueChannel)); + ref byte dBase = ref Unsafe.As(ref MemoryMarshal.GetReference(destination)); + + int count = redChannel.Length / Vector256.Count; + + ref byte control1Bytes = ref MemoryMarshal.GetReference(SimdUtils.HwIntrinsics.PermuteMaskEvenOdd8x32); + Vector256 control1 = Unsafe.As>(ref control1Bytes); + + ref byte control2Bytes = ref MemoryMarshal.GetReference(PermuteMaskShiftAlpha8x32); + Vector256 control2 = Unsafe.As>(ref control2Bytes); + + Vector256 a = Vector256.Create((byte)255); + + Vector256 shuffleAlpha = Unsafe.As>(ref MemoryMarshal.GetReference(ShuffleMaskShiftAlpha)); + + for (int i = 0; i < count; i++) + { + Vector256 r0 = Unsafe.Add(ref rBase, i); + Vector256 g0 = Unsafe.Add(ref gBase, i); + Vector256 b0 = Unsafe.Add(ref bBase, i); + + r0 = Avx2.PermuteVar8x32(r0.AsUInt32(), control1).AsByte(); + g0 = Avx2.PermuteVar8x32(g0.AsUInt32(), control1).AsByte(); + b0 = Avx2.PermuteVar8x32(b0.AsUInt32(), control1).AsByte(); + + Vector256 rg = Avx2.UnpackLow(r0, g0); + Vector256 b1 = Avx2.UnpackLow(b0, a); + + Vector256 rgb1 = Avx2.UnpackLow(rg.AsUInt16(), b1.AsUInt16()).AsByte(); + Vector256 rgb2 = Avx2.UnpackHigh(rg.AsUInt16(), b1.AsUInt16()).AsByte(); + + rg = Avx2.UnpackHigh(r0, g0); + b1 = Avx2.UnpackHigh(b0, a); + + Vector256 rgb3 = Avx2.UnpackLow(rg.AsUInt16(), b1.AsUInt16()).AsByte(); + Vector256 rgb4 = Avx2.UnpackHigh(rg.AsUInt16(), b1.AsUInt16()).AsByte(); + + rgb1 = Avx2.Shuffle(rgb1, shuffleAlpha); + rgb2 = Avx2.Shuffle(rgb2, shuffleAlpha); + rgb3 = Avx2.Shuffle(rgb3, shuffleAlpha); + rgb4 = Avx2.Shuffle(rgb4, shuffleAlpha); + + rgb1 = Avx2.PermuteVar8x32(rgb1.AsUInt32(), control2).AsByte(); + rgb2 = Avx2.PermuteVar8x32(rgb2.AsUInt32(), control2).AsByte(); + rgb3 = Avx2.PermuteVar8x32(rgb3.AsUInt32(), control2).AsByte(); + rgb4 = Avx2.PermuteVar8x32(rgb4.AsUInt32(), control2).AsByte(); + + ref byte d1 = ref Unsafe.Add(ref dBase, 24 * 4 * i); + ref byte d2 = ref Unsafe.Add(ref d1, 24); + ref byte d3 = ref Unsafe.Add(ref d2, 24); + ref byte d4 = ref Unsafe.Add(ref d3, 24); + + Unsafe.As>(ref d1) = rgb1; + Unsafe.As>(ref d2) = rgb2; + Unsafe.As>(ref d3) = rgb3; + Unsafe.As>(ref d4) = rgb4; + } + + int slice = count * Vector256.Count; + redChannel = redChannel.Slice(slice); + greenChannel = greenChannel.Slice(slice); + blueChannel = blueChannel.Slice(slice); + destination = destination.Slice(slice); + } +#endif + private static void PackFromRgbPlanesScalarBatchedReduce( ref ReadOnlySpan redChannel, ref ReadOnlySpan greenChannel, diff --git a/tests/ImageSharp.Tests/Common/SimdUtilsTests.cs b/tests/ImageSharp.Tests/Common/SimdUtilsTests.cs index 38b4e2e8c6..878e55c873 100644 --- a/tests/ImageSharp.Tests/Common/SimdUtilsTests.cs +++ b/tests/ImageSharp.Tests/Common/SimdUtilsTests.cs @@ -6,6 +6,7 @@ using System.Linq; using System.Numerics; using System.Runtime.CompilerServices; using System.Runtime.InteropServices; +using System.Runtime.Intrinsics.X86; using SixLabors.ImageSharp.Common.Tuples; using SixLabors.ImageSharp.PixelFormats; using SixLabors.ImageSharp.Tests.TestUtilities; @@ -170,7 +171,7 @@ namespace SixLabors.ImageSharp.Tests.Common public static readonly TheoryData ArbitraryArraySizes = new TheoryData { - 0, 1, 2, 3, 4, 7, 8, 9, 15, 16, 17, 63, 64, 255, 511, 512, 513, 514, 515, 516, 517, 518, 519, 520, 520, + 0, 1, 2, 3, 4, 7, 8, 9, 15, 16, 17, 63, 64, 255, 511, 512, 513, 514, 515, 516, 517, 518, 519, 520, }; [Theory] @@ -357,6 +358,42 @@ namespace SixLabors.ImageSharp.Tests.Common SimdUtils.PackFromRgbPlanes(Configuration.Default, r, g, b, actual)); } +#if SUPPORTS_RUNTIME_INTRINSICS + [Fact] + public void PackFromRgbPlanesAvx2Reduce_Rgb24() + { + if (!Avx2.IsSupported) + { + return; + } + + byte[] r = Enumerable.Range(0, 32).Select(x => (byte)x).ToArray(); + byte[] g = Enumerable.Range(100, 32).Select(x => (byte)x).ToArray(); + byte[] b = Enumerable.Range(200, 32).Select(x => (byte)x).ToArray(); + const int padding = 4; + Rgb24[] d = new Rgb24[32 + padding]; + + ReadOnlySpan rr = r.AsSpan(); + ReadOnlySpan gg = g.AsSpan(); + ReadOnlySpan bb = b.AsSpan(); + Span dd = d.AsSpan(); + + SimdUtils.PackFromRgbPlanesAvx2Reduce(ref rr, ref gg, ref bb, ref dd); + + for (int i = 0; i < 32; i++) + { + Assert.Equal(i, d[i].R); + Assert.Equal(i + 100, d[i].G); + Assert.Equal(i + 200, d[i].B); + } + + Assert.Equal(0, rr.Length); + Assert.Equal(0, gg.Length); + Assert.Equal(0, bb.Length); + Assert.Equal(padding, dd.Length); + } +#endif + internal static void TestPackFromRgbPlanes(int count, Action packMethod) where TPixel : unmanaged, IPixel { From 9e0b7fc87464cc7a031ae6ba61f2312efc69e275 Mon Sep 17 00:00:00 2001 From: Anton Firszov Date: Sat, 5 Dec 2020 20:38:47 +0100 Subject: [PATCH 10/24] started Rgba32 --- .../Common/Helpers/SimdUtils.HwIntrinsics.cs | 88 +++++++++++ .../Common/Helpers/SimdUtils.Pack.cs | 147 ++++++++---------- .../PixelFormats/PixelOperations{TPixel}.cs | 5 +- .../PixelConversion_PackFromRgbPlanes.cs | 22 ++- .../ImageSharp.Tests/Common/SimdUtilsTests.cs | 2 +- 5 files changed, 176 insertions(+), 88 deletions(-) diff --git a/src/ImageSharp/Common/Helpers/SimdUtils.HwIntrinsics.cs b/src/ImageSharp/Common/Helpers/SimdUtils.HwIntrinsics.cs index b760301167..13effce3e0 100644 --- a/src/ImageSharp/Common/Helpers/SimdUtils.HwIntrinsics.cs +++ b/src/ImageSharp/Common/Helpers/SimdUtils.HwIntrinsics.cs @@ -7,6 +7,7 @@ using System.Runtime.CompilerServices; using System.Runtime.InteropServices; using System.Runtime.Intrinsics; using System.Runtime.Intrinsics.X86; +using SixLabors.ImageSharp.PixelFormats; namespace SixLabors.ImageSharp { @@ -22,6 +23,20 @@ namespace SixLabors.ImageSharp private static ReadOnlySpan ShuffleMaskSlice4Nx16 => new byte[] { 0, 1, 2, 4, 5, 6, 8, 9, 10, 12, 13, 14, 0x80, 0x80, 0x80, 0x80 }; + private static ReadOnlySpan ShuffleMaskShiftAlpha => + new byte[] + { + 0, 1, 2, 4, 5, 6, 8, 9, 10, 12, 13, 14, 3, 7, 11, 15, + 0, 1, 2, 4, 5, 6, 8, 9, 10, 12, 13, 14, 3, 7, 11, 15 + }; + + public static ReadOnlySpan PermuteMaskShiftAlpha8x32 => + new byte[] + { + 0, 0, 0, 0, 1, 0, 0, 0, 2, 0, 0, 0, 4, 0, 0, 0, + 5, 0, 0, 0, 6, 0, 0, 0, 3, 0, 0, 0, 7, 0, 0, 0 + }; + /// /// Shuffle single-precision (32-bit) floating-point elements in /// using the control and store the results in . @@ -789,6 +804,79 @@ namespace SixLabors.ImageSharp } } } + + internal static void PackFromRgbPlanesAvx2Reduce( + ref ReadOnlySpan redChannel, + ref ReadOnlySpan greenChannel, + ref ReadOnlySpan blueChannel, + ref Span destination) + { + ref Vector256 rBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(redChannel)); + ref Vector256 gBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(greenChannel)); + ref Vector256 bBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(blueChannel)); + ref byte dBase = ref Unsafe.As(ref MemoryMarshal.GetReference(destination)); + + int count = redChannel.Length / Vector256.Count; + + ref byte control1Bytes = ref MemoryMarshal.GetReference(SimdUtils.HwIntrinsics.PermuteMaskEvenOdd8x32); + Vector256 control1 = Unsafe.As>(ref control1Bytes); + + ref byte control2Bytes = ref MemoryMarshal.GetReference(PermuteMaskShiftAlpha8x32); + Vector256 control2 = Unsafe.As>(ref control2Bytes); + + Vector256 a = Vector256.Create((byte)255); + + Vector256 shuffleAlpha = Unsafe.As>(ref MemoryMarshal.GetReference(ShuffleMaskShiftAlpha)); + + for (int i = 0; i < count; i++) + { + Vector256 r0 = Unsafe.Add(ref rBase, i); + Vector256 g0 = Unsafe.Add(ref gBase, i); + Vector256 b0 = Unsafe.Add(ref bBase, i); + + r0 = Avx2.PermuteVar8x32(r0.AsUInt32(), control1).AsByte(); + g0 = Avx2.PermuteVar8x32(g0.AsUInt32(), control1).AsByte(); + b0 = Avx2.PermuteVar8x32(b0.AsUInt32(), control1).AsByte(); + + Vector256 rg = Avx2.UnpackLow(r0, g0); + Vector256 b1 = Avx2.UnpackLow(b0, a); + + Vector256 rgb1 = Avx2.UnpackLow(rg.AsUInt16(), b1.AsUInt16()).AsByte(); + Vector256 rgb2 = Avx2.UnpackHigh(rg.AsUInt16(), b1.AsUInt16()).AsByte(); + + rg = Avx2.UnpackHigh(r0, g0); + b1 = Avx2.UnpackHigh(b0, a); + + Vector256 rgb3 = Avx2.UnpackLow(rg.AsUInt16(), b1.AsUInt16()).AsByte(); + Vector256 rgb4 = Avx2.UnpackHigh(rg.AsUInt16(), b1.AsUInt16()).AsByte(); + + rgb1 = Avx2.Shuffle(rgb1, shuffleAlpha); + rgb2 = Avx2.Shuffle(rgb2, shuffleAlpha); + rgb3 = Avx2.Shuffle(rgb3, shuffleAlpha); + rgb4 = Avx2.Shuffle(rgb4, shuffleAlpha); + + rgb1 = Avx2.PermuteVar8x32(rgb1.AsUInt32(), control2).AsByte(); + rgb2 = Avx2.PermuteVar8x32(rgb2.AsUInt32(), control2).AsByte(); + rgb3 = Avx2.PermuteVar8x32(rgb3.AsUInt32(), control2).AsByte(); + rgb4 = Avx2.PermuteVar8x32(rgb4.AsUInt32(), control2).AsByte(); + + ref byte d1 = ref Unsafe.Add(ref dBase, 24 * 4 * i); + ref byte d2 = ref Unsafe.Add(ref d1, 24); + ref byte d3 = ref Unsafe.Add(ref d2, 24); + ref byte d4 = ref Unsafe.Add(ref d3, 24); + + Unsafe.As>(ref d1) = rgb1; + Unsafe.As>(ref d2) = rgb2; + Unsafe.As>(ref d3) = rgb3; + Unsafe.As>(ref d4) = rgb4; + } + + int slice = count * Vector256.Count; + redChannel = redChannel.Slice(slice); + greenChannel = greenChannel.Slice(slice); + blueChannel = blueChannel.Slice(slice); + destination = destination.Slice(slice); + } } } } diff --git a/src/ImageSharp/Common/Helpers/SimdUtils.Pack.cs b/src/ImageSharp/Common/Helpers/SimdUtils.Pack.cs index 2810a212c6..db88ef3d91 100644 --- a/src/ImageSharp/Common/Helpers/SimdUtils.Pack.cs +++ b/src/ImageSharp/Common/Helpers/SimdUtils.Pack.cs @@ -12,20 +12,6 @@ namespace SixLabors.ImageSharp { internal static partial class SimdUtils { - private static ReadOnlySpan ShuffleMaskShiftAlpha => - new byte[] - { - 0, 1, 2, 4, 5, 6, 8, 9, 10, 12, 13, 14, 3, 7, 11, 15, - 0, 1, 2, 4, 5, 6, 8, 9, 10, 12, 13, 14, 3, 7, 11, 15 - }; - - public static ReadOnlySpan PermuteMaskShiftAlpha8x32 => - new byte[] - { - 0, 0, 0, 0, 1, 0, 0, 0, 2, 0, 0, 0, 4, 0, 0, 0, - 5, 0, 0, 0, 6, 0, 0, 0, 3, 0, 0, 0, 7, 0, 0, 0 - }; - [MethodImpl(InliningOptions.ShortMethod)] internal static void PackFromRgbPlanes( Configuration configuration, @@ -34,10 +20,17 @@ namespace SixLabors.ImageSharp ReadOnlySpan blueChannel, Span destination) { + int count = redChannel.Length; + DebugGuard.IsTrue(greenChannel.Length == count, "Channels must be of same size!"); + DebugGuard.IsTrue(blueChannel.Length == count, "Channels must be of same size!"); + + // To avoid overflows, this check is not debug-only: + Guard.IsTrue(destination.Length > count + 2, nameof(destination), "'destination' must contain a padding of 3 elements!"); + #if SUPPORTS_RUNTIME_INTRINSICS if (Avx2.IsSupported) { - PackFromRgbPlanesAvx2Reduce(ref redChannel, ref greenChannel, ref blueChannel, ref destination); + HwIntrinsics.PackFromRgbPlanesAvx2Reduce(ref redChannel, ref greenChannel, ref blueChannel, ref destination); } else #endif @@ -56,101 +49,76 @@ namespace SixLabors.ImageSharp ReadOnlySpan blueChannel, Span destination) { + PackFromRgbPlanesScalarBatchedReduce(ref redChannel, ref greenChannel, ref blueChannel, ref destination); + PackFromRgbPlanesRemainder(redChannel, greenChannel, blueChannel, destination); } -#if SUPPORTS_RUNTIME_INTRINSICS - internal static void PackFromRgbPlanesAvx2Reduce( + private static void PackFromRgbPlanesScalarBatchedReduce( ref ReadOnlySpan redChannel, ref ReadOnlySpan greenChannel, ref ReadOnlySpan blueChannel, ref Span destination) { - ref Vector256 rBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(redChannel)); - ref Vector256 gBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(greenChannel)); - ref Vector256 bBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(blueChannel)); - ref byte dBase = ref Unsafe.As(ref MemoryMarshal.GetReference(destination)); - - int count = redChannel.Length / Vector256.Count; - - ref byte control1Bytes = ref MemoryMarshal.GetReference(SimdUtils.HwIntrinsics.PermuteMaskEvenOdd8x32); - Vector256 control1 = Unsafe.As>(ref control1Bytes); - - ref byte control2Bytes = ref MemoryMarshal.GetReference(PermuteMaskShiftAlpha8x32); - Vector256 control2 = Unsafe.As>(ref control2Bytes); - - Vector256 a = Vector256.Create((byte)255); - - Vector256 shuffleAlpha = Unsafe.As>(ref MemoryMarshal.GetReference(ShuffleMaskShiftAlpha)); + ref ByteTuple4 r = ref Unsafe.As(ref MemoryMarshal.GetReference(redChannel)); + ref ByteTuple4 g = ref Unsafe.As(ref MemoryMarshal.GetReference(greenChannel)); + ref ByteTuple4 b = ref Unsafe.As(ref MemoryMarshal.GetReference(blueChannel)); + ref Rgb24 rgb = ref MemoryMarshal.GetReference(destination); + int count = destination.Length / 4; for (int i = 0; i < count; i++) { - Vector256 r0 = Unsafe.Add(ref rBase, i); - Vector256 g0 = Unsafe.Add(ref gBase, i); - Vector256 b0 = Unsafe.Add(ref bBase, i); - - r0 = Avx2.PermuteVar8x32(r0.AsUInt32(), control1).AsByte(); - g0 = Avx2.PermuteVar8x32(g0.AsUInt32(), control1).AsByte(); - b0 = Avx2.PermuteVar8x32(b0.AsUInt32(), control1).AsByte(); - - Vector256 rg = Avx2.UnpackLow(r0, g0); - Vector256 b1 = Avx2.UnpackLow(b0, a); - - Vector256 rgb1 = Avx2.UnpackLow(rg.AsUInt16(), b1.AsUInt16()).AsByte(); - Vector256 rgb2 = Avx2.UnpackHigh(rg.AsUInt16(), b1.AsUInt16()).AsByte(); - - rg = Avx2.UnpackHigh(r0, g0); - b1 = Avx2.UnpackHigh(b0, a); + ref Rgb24 d0 = ref Unsafe.Add(ref rgb, i * 4); + ref Rgb24 d1 = ref Unsafe.Add(ref d0, 1); + ref Rgb24 d2 = ref Unsafe.Add(ref d0, 2); + ref Rgb24 d3 = ref Unsafe.Add(ref d0, 3); - Vector256 rgb3 = Avx2.UnpackLow(rg.AsUInt16(), b1.AsUInt16()).AsByte(); - Vector256 rgb4 = Avx2.UnpackHigh(rg.AsUInt16(), b1.AsUInt16()).AsByte(); + ref ByteTuple4 rr = ref Unsafe.Add(ref r, i); + ref ByteTuple4 gg = ref Unsafe.Add(ref g, i); + ref ByteTuple4 bb = ref Unsafe.Add(ref b, i); - rgb1 = Avx2.Shuffle(rgb1, shuffleAlpha); - rgb2 = Avx2.Shuffle(rgb2, shuffleAlpha); - rgb3 = Avx2.Shuffle(rgb3, shuffleAlpha); - rgb4 = Avx2.Shuffle(rgb4, shuffleAlpha); + d0.R = rr.V0; + d0.G = gg.V0; + d0.B = bb.V0; - rgb1 = Avx2.PermuteVar8x32(rgb1.AsUInt32(), control2).AsByte(); - rgb2 = Avx2.PermuteVar8x32(rgb2.AsUInt32(), control2).AsByte(); - rgb3 = Avx2.PermuteVar8x32(rgb3.AsUInt32(), control2).AsByte(); - rgb4 = Avx2.PermuteVar8x32(rgb4.AsUInt32(), control2).AsByte(); + d1.R = rr.V1; + d1.G = gg.V1; + d1.B = bb.V1; - ref byte d1 = ref Unsafe.Add(ref dBase, 24 * 4 * i); - ref byte d2 = ref Unsafe.Add(ref d1, 24); - ref byte d3 = ref Unsafe.Add(ref d2, 24); - ref byte d4 = ref Unsafe.Add(ref d3, 24); + d2.R = rr.V2; + d2.G = gg.V2; + d2.B = bb.V2; - Unsafe.As>(ref d1) = rgb1; - Unsafe.As>(ref d2) = rgb2; - Unsafe.As>(ref d3) = rgb3; - Unsafe.As>(ref d4) = rgb4; + d3.R = rr.V3; + d3.G = gg.V3; + d3.B = bb.V3; } - int slice = count * Vector256.Count; - redChannel = redChannel.Slice(slice); - greenChannel = greenChannel.Slice(slice); - blueChannel = blueChannel.Slice(slice); - destination = destination.Slice(slice); + int finished = count * 4; + redChannel = redChannel.Slice(finished); + greenChannel = greenChannel.Slice(finished); + blueChannel = blueChannel.Slice(finished); + destination = destination.Slice(finished); } -#endif private static void PackFromRgbPlanesScalarBatchedReduce( ref ReadOnlySpan redChannel, ref ReadOnlySpan greenChannel, ref ReadOnlySpan blueChannel, - ref Span destination) + ref Span destination) { ref ByteTuple4 r = ref Unsafe.As(ref MemoryMarshal.GetReference(redChannel)); ref ByteTuple4 g = ref Unsafe.As(ref MemoryMarshal.GetReference(greenChannel)); ref ByteTuple4 b = ref Unsafe.As(ref MemoryMarshal.GetReference(blueChannel)); - ref Rgb24 rgb = ref MemoryMarshal.GetReference(destination); + ref Rgba32 rgb = ref MemoryMarshal.GetReference(destination); int count = destination.Length / 4; + destination.Fill(new Rgba32(0, 0, 0, 255)); for (int i = 0; i < count; i++) { - ref Rgb24 d0 = ref Unsafe.Add(ref rgb, i * 4); - ref Rgb24 d1 = ref Unsafe.Add(ref d0, 1); - ref Rgb24 d2 = ref Unsafe.Add(ref d0, 2); - ref Rgb24 d3 = ref Unsafe.Add(ref d0, 3); + ref Rgba32 d0 = ref Unsafe.Add(ref rgb, i * 4); + ref Rgba32 d1 = ref Unsafe.Add(ref d0, 1); + ref Rgba32 d2 = ref Unsafe.Add(ref d0, 2); + ref Rgba32 d3 = ref Unsafe.Add(ref d0, 3); ref ByteTuple4 rr = ref Unsafe.Add(ref r, i); ref ByteTuple4 gg = ref Unsafe.Add(ref g, i); @@ -199,5 +167,26 @@ namespace SixLabors.ImageSharp d.B = Unsafe.Add(ref b, i); } } + + private static void PackFromRgbPlanesRemainder( + ReadOnlySpan redChannel, + ReadOnlySpan greenChannel, + ReadOnlySpan blueChannel, + Span destination) + { + ref byte r = ref MemoryMarshal.GetReference(redChannel); + ref byte g = ref MemoryMarshal.GetReference(greenChannel); + ref byte b = ref MemoryMarshal.GetReference(blueChannel); + ref Rgba32 rgba = ref MemoryMarshal.GetReference(destination); + + for (int i = 0; i < destination.Length; i++) + { + ref Rgba32 d = ref Unsafe.Add(ref rgba, i); + d.R = Unsafe.Add(ref r, i); + d.G = Unsafe.Add(ref g, i); + d.B = Unsafe.Add(ref b, i); + d.A = 255; + } + } } } \ No newline at end of file diff --git a/src/ImageSharp/PixelFormats/PixelOperations{TPixel}.cs b/src/ImageSharp/PixelFormats/PixelOperations{TPixel}.cs index e562f333c6..57e5e85828 100644 --- a/src/ImageSharp/PixelFormats/PixelOperations{TPixel}.cs +++ b/src/ImageSharp/PixelFormats/PixelOperations{TPixel}.cs @@ -161,14 +161,15 @@ namespace SixLabors.ImageSharp.PixelFormats } /// - /// Bulk operation that converts 3 seperate RGB channels to + /// Bulk operation that packs 3 seperate RGB channels to . + /// The destination must have a padding of 3. /// /// A to configure internal operations. /// A to the red values. /// A to the green values. /// A to the blue values. /// A to the destination pixels. - public virtual void PackFromRgbPlanes( + internal virtual void PackFromRgbPlanes( Configuration configuration, ReadOnlySpan redChannel, ReadOnlySpan greenChannel, diff --git a/tests/ImageSharp.Benchmarks/General/PixelConversion/PixelConversion_PackFromRgbPlanes.cs b/tests/ImageSharp.Benchmarks/General/PixelConversion/PixelConversion_PackFromRgbPlanes.cs index db66ae941d..6a41c4bf44 100644 --- a/tests/ImageSharp.Benchmarks/General/PixelConversion/PixelConversion_PackFromRgbPlanes.cs +++ b/tests/ImageSharp.Benchmarks/General/PixelConversion/PixelConversion_PackFromRgbPlanes.cs @@ -27,7 +27,7 @@ namespace SixLabors.ImageSharp.Benchmarks.General.PixelConversion private float[] rgbaFloat; - [Params(512)] + [Params(1024)] public int Count { get; set; } [GlobalSetup] @@ -36,7 +36,7 @@ namespace SixLabors.ImageSharp.Benchmarks.General.PixelConversion this.rBuf = new byte[this.Count]; this.gBuf = new byte[this.Count]; this.bBuf = new byte[this.Count]; - this.rgbBuf = new Rgb24[this.Count]; + this.rgbBuf = new Rgb24[this.Count + 3]; // padded this.rgbaBuf = new Rgba32[this.Count]; this.rFloat = new float[this.Count]; @@ -46,7 +46,7 @@ namespace SixLabors.ImageSharp.Benchmarks.General.PixelConversion this.rgbaFloat = new float[this.Count * 4]; } - // [Benchmark(Baseline = true)] + // [Benchmark] public void Rgb24_Scalar_PerElement_Pinned() { fixed (byte* r = &this.rBuf[0]) @@ -72,7 +72,7 @@ namespace SixLabors.ImageSharp.Benchmarks.General.PixelConversion Span b = this.rBuf; Span rgb = this.rgbBuf; - for (int i = 0; i < rgb.Length; i++) + for (int i = 0; i < r.Length; i++) { ref Rgb24 d = ref rgb[i]; d.R = r[i]; @@ -81,7 +81,7 @@ namespace SixLabors.ImageSharp.Benchmarks.General.PixelConversion } } - [Benchmark(Baseline = true)] + [Benchmark] public void Rgb24_Scalar_PerElement_Unsafe() { ref byte r = ref this.rBuf[0]; @@ -195,7 +195,7 @@ namespace SixLabors.ImageSharp.Benchmarks.General.PixelConversion } #if SUPPORTS_RUNTIME_INTRINSICS - [Benchmark] + [Benchmark(Baseline = true)] public void Rgba32_Vector_Float() { ref Vector256 rBase = ref Unsafe.As>(ref this.rFloat[0]); @@ -235,6 +235,16 @@ namespace SixLabors.ImageSharp.Benchmarks.General.PixelConversion Unsafe.Add(ref destination, 3) = Avx.UnpackHigh(vte, vto); } } + + [Benchmark] + public void Rgba32_Vector_Bytes() + { + ReadOnlySpan r = this.rBuf; + ReadOnlySpan g = this.rBuf; + ReadOnlySpan b = this.rBuf; + Span rgb = this.rgbBuf; + SimdUtils.HwIntrinsics.PackFromRgbPlanesAvx2Reduce(ref r, ref g, ref b, ref rgb); + } #endif #pragma warning disable SA1132 diff --git a/tests/ImageSharp.Tests/Common/SimdUtilsTests.cs b/tests/ImageSharp.Tests/Common/SimdUtilsTests.cs index 878e55c873..ae1b5c9e32 100644 --- a/tests/ImageSharp.Tests/Common/SimdUtilsTests.cs +++ b/tests/ImageSharp.Tests/Common/SimdUtilsTests.cs @@ -378,7 +378,7 @@ namespace SixLabors.ImageSharp.Tests.Common ReadOnlySpan bb = b.AsSpan(); Span dd = d.AsSpan(); - SimdUtils.PackFromRgbPlanesAvx2Reduce(ref rr, ref gg, ref bb, ref dd); + SimdUtils.HwIntrinsics.PackFromRgbPlanesAvx2Reduce(ref rr, ref gg, ref bb, ref dd); for (int i = 0; i < 32; i++) { From 2ff0cb93dce87a52bca81d1b877b7eb5dd84feb6 Mon Sep 17 00:00:00 2001 From: Anton Firszov Date: Sat, 5 Dec 2020 21:02:28 +0100 Subject: [PATCH 11/24] polishing --- .../Common/Helpers/SimdUtils.HwIntrinsics.cs | 61 ++++++++++++++++++- .../Common/Helpers/SimdUtils.Pack.cs | 28 ++++++--- .../PixelOperations/Rgb24.PixelOperations.cs | 16 +++++ .../PixelOperations/Rgba32.PixelOperations.cs | 16 +++++ .../PixelConversion_PackFromRgbPlanes.cs | 14 ++++- .../ImageSharp.Tests/Common/SimdUtilsTests.cs | 35 +++++++++++ 6 files changed, 160 insertions(+), 10 deletions(-) diff --git a/src/ImageSharp/Common/Helpers/SimdUtils.HwIntrinsics.cs b/src/ImageSharp/Common/Helpers/SimdUtils.HwIntrinsics.cs index 13effce3e0..475d64bc4f 100644 --- a/src/ImageSharp/Common/Helpers/SimdUtils.HwIntrinsics.cs +++ b/src/ImageSharp/Common/Helpers/SimdUtils.HwIntrinsics.cs @@ -30,7 +30,7 @@ namespace SixLabors.ImageSharp 0, 1, 2, 4, 5, 6, 8, 9, 10, 12, 13, 14, 3, 7, 11, 15 }; - public static ReadOnlySpan PermuteMaskShiftAlpha8x32 => + public static ReadOnlySpan PermuteMaskShiftAlpha8x32 => new byte[] { 0, 0, 0, 0, 1, 0, 0, 0, 2, 0, 0, 0, 4, 0, 0, 0, @@ -877,6 +877,65 @@ namespace SixLabors.ImageSharp blueChannel = blueChannel.Slice(slice); destination = destination.Slice(slice); } + + internal static void PackFromRgbPlanesAvx2Reduce( + ref ReadOnlySpan redChannel, + ref ReadOnlySpan greenChannel, + ref ReadOnlySpan blueChannel, + ref Span destination) + { + ref Vector256 rBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(redChannel)); + ref Vector256 gBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(greenChannel)); + ref Vector256 bBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(blueChannel)); + ref Vector256 dBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); + + int count = redChannel.Length / Vector256.Count; + + ref byte control1Bytes = ref MemoryMarshal.GetReference(SimdUtils.HwIntrinsics.PermuteMaskEvenOdd8x32); + Vector256 control1 = Unsafe.As>(ref control1Bytes); + + ref byte control2Bytes = ref MemoryMarshal.GetReference(PermuteMaskShiftAlpha8x32); + Vector256 control2 = Unsafe.As>(ref control2Bytes); + + Vector256 a = Vector256.Create((byte)255); + + Vector256 shuffleAlpha = Unsafe.As>(ref MemoryMarshal.GetReference(ShuffleMaskShiftAlpha)); + + for (int i = 0; i < count; i++) + { + Vector256 r0 = Unsafe.Add(ref rBase, i); + Vector256 g0 = Unsafe.Add(ref gBase, i); + Vector256 b0 = Unsafe.Add(ref bBase, i); + + r0 = Avx2.PermuteVar8x32(r0.AsUInt32(), control1).AsByte(); + g0 = Avx2.PermuteVar8x32(g0.AsUInt32(), control1).AsByte(); + b0 = Avx2.PermuteVar8x32(b0.AsUInt32(), control1).AsByte(); + + Vector256 rg = Avx2.UnpackLow(r0, g0); + Vector256 b1 = Avx2.UnpackLow(b0, a); + + Vector256 rgb1 = Avx2.UnpackLow(rg.AsUInt16(), b1.AsUInt16()).AsByte(); + Vector256 rgb2 = Avx2.UnpackHigh(rg.AsUInt16(), b1.AsUInt16()).AsByte(); + + rg = Avx2.UnpackHigh(r0, g0); + b1 = Avx2.UnpackHigh(b0, a); + + Vector256 rgb3 = Avx2.UnpackLow(rg.AsUInt16(), b1.AsUInt16()).AsByte(); + Vector256 rgb4 = Avx2.UnpackHigh(rg.AsUInt16(), b1.AsUInt16()).AsByte(); + + ref Vector256 d0 = ref Unsafe.Add(ref dBase, i * 4); + d0 = rgb1; + Unsafe.Add(ref d0, 1) = rgb2; + Unsafe.Add(ref d0, 2) = rgb3; + Unsafe.Add(ref d0, 3) = rgb4; + } + + int slice = count * Vector256.Count; + redChannel = redChannel.Slice(slice); + greenChannel = greenChannel.Slice(slice); + blueChannel = blueChannel.Slice(slice); + destination = destination.Slice(slice); + } } } } diff --git a/src/ImageSharp/Common/Helpers/SimdUtils.Pack.cs b/src/ImageSharp/Common/Helpers/SimdUtils.Pack.cs index db88ef3d91..8cd15e01bc 100644 --- a/src/ImageSharp/Common/Helpers/SimdUtils.Pack.cs +++ b/src/ImageSharp/Common/Helpers/SimdUtils.Pack.cs @@ -1,3 +1,6 @@ +// Copyright (c) Six Labors. +// Licensed under the Apache License, Version 2.0. + using System; using System.Runtime.CompilerServices; using System.Runtime.InteropServices; @@ -20,12 +23,9 @@ namespace SixLabors.ImageSharp ReadOnlySpan blueChannel, Span destination) { - int count = redChannel.Length; - DebugGuard.IsTrue(greenChannel.Length == count, "Channels must be of same size!"); - DebugGuard.IsTrue(blueChannel.Length == count, "Channels must be of same size!"); - - // To avoid overflows, this check is not debug-only: - Guard.IsTrue(destination.Length > count + 2, nameof(destination), "'destination' must contain a padding of 3 elements!"); + DebugGuard.IsTrue(greenChannel.Length == redChannel.Length, nameof(greenChannel), "Channels must be of same size!"); + DebugGuard.IsTrue(blueChannel.Length == redChannel.Length, nameof(blueChannel), "Channels must be of same size!"); + DebugGuard.IsTrue(destination.Length > redChannel.Length + 2, nameof(destination), "'destination' must contain a padding of 3 elements!"); #if SUPPORTS_RUNTIME_INTRINSICS if (Avx2.IsSupported) @@ -49,7 +49,21 @@ namespace SixLabors.ImageSharp ReadOnlySpan blueChannel, Span destination) { - PackFromRgbPlanesScalarBatchedReduce(ref redChannel, ref greenChannel, ref blueChannel, ref destination); + DebugGuard.IsTrue(greenChannel.Length == redChannel.Length, nameof(greenChannel), "Channels must be of same size!"); + DebugGuard.IsTrue(blueChannel.Length == redChannel.Length, nameof(blueChannel), "Channels must be of same size!"); + DebugGuard.IsTrue(destination.Length > redChannel.Length, nameof(destination), "'destination' span should not be shorter than the source channels!"); + +#if SUPPORTS_RUNTIME_INTRINSICS + if (Avx2.IsSupported) + { + HwIntrinsics.PackFromRgbPlanesAvx2Reduce(ref redChannel, ref greenChannel, ref blueChannel, ref destination); + } + else +#endif + { + PackFromRgbPlanesScalarBatchedReduce(ref redChannel, ref greenChannel, ref blueChannel, ref destination); + } + PackFromRgbPlanesRemainder(redChannel, greenChannel, blueChannel, destination); } diff --git a/src/ImageSharp/PixelFormats/PixelImplementations/PixelOperations/Rgb24.PixelOperations.cs b/src/ImageSharp/PixelFormats/PixelImplementations/PixelOperations/Rgb24.PixelOperations.cs index 73b656f363..ebed7aadd6 100644 --- a/src/ImageSharp/PixelFormats/PixelImplementations/PixelOperations/Rgb24.PixelOperations.cs +++ b/src/ImageSharp/PixelFormats/PixelImplementations/PixelOperations/Rgb24.PixelOperations.cs @@ -21,6 +21,22 @@ namespace SixLabors.ImageSharp.PixelFormats /// public override PixelTypeInfo GetPixelTypeInfo() => LazyInfo.Value; + + /// + internal override void PackFromRgbPlanes( + Configuration configuration, + ReadOnlySpan redChannel, + ReadOnlySpan greenChannel, + ReadOnlySpan blueChannel, + Span destination) + { + int count = redChannel.Length; + Guard.IsTrue(greenChannel.Length == count, nameof(greenChannel), "Channels must be of same size!"); + Guard.IsTrue(blueChannel.Length == count, nameof(blueChannel), "Channels must be of same size!"); + Guard.IsTrue(destination.Length > count + 2, nameof(destination), "'destination' must contain a padding of 3 elements!"); + + SimdUtils.PackFromRgbPlanes(configuration, redChannel, greenChannel, blueChannel, destination); + } } } } diff --git a/src/ImageSharp/PixelFormats/PixelImplementations/PixelOperations/Rgba32.PixelOperations.cs b/src/ImageSharp/PixelFormats/PixelImplementations/PixelOperations/Rgba32.PixelOperations.cs index d8322e37d4..0ddc2f8b1e 100644 --- a/src/ImageSharp/PixelFormats/PixelImplementations/PixelOperations/Rgba32.PixelOperations.cs +++ b/src/ImageSharp/PixelFormats/PixelImplementations/PixelOperations/Rgba32.PixelOperations.cs @@ -56,6 +56,22 @@ namespace SixLabors.ImageSharp.PixelFormats MemoryMarshal.Cast(sourceVectors), MemoryMarshal.Cast(destinationPixels)); } + + /// + internal override void PackFromRgbPlanes( + Configuration configuration, + ReadOnlySpan redChannel, + ReadOnlySpan greenChannel, + ReadOnlySpan blueChannel, + Span destination) + { + int count = redChannel.Length; + Guard.IsTrue(greenChannel.Length == count, nameof(greenChannel), "Channels must be of same size!"); + Guard.IsTrue(blueChannel.Length == count, nameof(blueChannel), "Channels must be of same size!"); + Guard.IsTrue(destination.Length > count, nameof(destination), "'destination' span should not be shorter than the source channels!"); + + SimdUtils.PackFromRgbPlanes(configuration, redChannel, greenChannel, blueChannel, destination); + } } } } diff --git a/tests/ImageSharp.Benchmarks/General/PixelConversion/PixelConversion_PackFromRgbPlanes.cs b/tests/ImageSharp.Benchmarks/General/PixelConversion/PixelConversion_PackFromRgbPlanes.cs index 6a41c4bf44..11714027a5 100644 --- a/tests/ImageSharp.Benchmarks/General/PixelConversion/PixelConversion_PackFromRgbPlanes.cs +++ b/tests/ImageSharp.Benchmarks/General/PixelConversion/PixelConversion_PackFromRgbPlanes.cs @@ -196,7 +196,7 @@ namespace SixLabors.ImageSharp.Benchmarks.General.PixelConversion #if SUPPORTS_RUNTIME_INTRINSICS [Benchmark(Baseline = true)] - public void Rgba32_Vector_Float() + public void Rgba32_Avx2_Float() { ref Vector256 rBase = ref Unsafe.As>(ref this.rFloat[0]); ref Vector256 gBase = ref Unsafe.As>(ref this.gFloat[0]); @@ -237,7 +237,7 @@ namespace SixLabors.ImageSharp.Benchmarks.General.PixelConversion } [Benchmark] - public void Rgba32_Vector_Bytes() + public void Rgba24_Avx2_Bytes() { ReadOnlySpan r = this.rBuf; ReadOnlySpan g = this.rBuf; @@ -245,6 +245,16 @@ namespace SixLabors.ImageSharp.Benchmarks.General.PixelConversion Span rgb = this.rgbBuf; SimdUtils.HwIntrinsics.PackFromRgbPlanesAvx2Reduce(ref r, ref g, ref b, ref rgb); } + + [Benchmark] + public void Rgba32_Avx2_Bytes() + { + ReadOnlySpan r = this.rBuf; + ReadOnlySpan g = this.rBuf; + ReadOnlySpan b = this.rBuf; + Span rgb = this.rgbaBuf; + SimdUtils.HwIntrinsics.PackFromRgbPlanesAvx2Reduce(ref r, ref g, ref b, ref rgb); + } #endif #pragma warning disable SA1132 diff --git a/tests/ImageSharp.Tests/Common/SimdUtilsTests.cs b/tests/ImageSharp.Tests/Common/SimdUtilsTests.cs index ae1b5c9e32..565ea5f6da 100644 --- a/tests/ImageSharp.Tests/Common/SimdUtilsTests.cs +++ b/tests/ImageSharp.Tests/Common/SimdUtilsTests.cs @@ -392,6 +392,41 @@ namespace SixLabors.ImageSharp.Tests.Common Assert.Equal(0, bb.Length); Assert.Equal(padding, dd.Length); } + + [Fact] + public void PackFromRgbPlanesAvx2Reduce_Rgba32() + { + if (!Avx2.IsSupported) + { + return; + } + + byte[] r = Enumerable.Range(0, 32).Select(x => (byte)x).ToArray(); + byte[] g = Enumerable.Range(100, 32).Select(x => (byte)x).ToArray(); + byte[] b = Enumerable.Range(200, 32).Select(x => (byte)x).ToArray(); + + Rgba32[] d = new Rgba32[32]; + + ReadOnlySpan rr = r.AsSpan(); + ReadOnlySpan gg = g.AsSpan(); + ReadOnlySpan bb = b.AsSpan(); + Span dd = d.AsSpan(); + + SimdUtils.HwIntrinsics.PackFromRgbPlanesAvx2Reduce(ref rr, ref gg, ref bb, ref dd); + + for (int i = 0; i < 32; i++) + { + Assert.Equal(i, d[i].R); + Assert.Equal(i + 100, d[i].G); + Assert.Equal(i + 200, d[i].B); + Assert.Equal(255, d[i].A); + } + + Assert.Equal(0, rr.Length); + Assert.Equal(0, gg.Length); + Assert.Equal(0, bb.Length); + Assert.Equal(0, dd.Length); + } #endif internal static void TestPackFromRgbPlanes(int count, Action packMethod) From 6252f65408c8cd0f481fc528162b5d727b56c98e Mon Sep 17 00:00:00 2001 From: Anton Firszov Date: Sat, 5 Dec 2020 21:10:44 +0100 Subject: [PATCH 12/24] unhack --- src/ImageSharp/ImageSharp.csproj | 3 +-- tests/ImageSharp.Tests/ImageSharp.Tests.csproj | 3 +-- 2 files changed, 2 insertions(+), 4 deletions(-) diff --git a/src/ImageSharp/ImageSharp.csproj b/src/ImageSharp/ImageSharp.csproj index 66b88489fe..a90aaf715a 100644 --- a/src/ImageSharp/ImageSharp.csproj +++ b/src/ImageSharp/ImageSharp.csproj @@ -12,8 +12,7 @@ $(RepositoryUrl) Image Resize Crop Gif Jpg Jpeg Bitmap Png Tga NetCore A new, fully featured, fully managed, cross-platform, 2D graphics API for .NET - - netcoreapp3.1 + netcoreapp3.1;netcoreapp2.1;netstandard2.1;netstandard2.0;netstandard1.3;net472 diff --git a/tests/ImageSharp.Tests/ImageSharp.Tests.csproj b/tests/ImageSharp.Tests/ImageSharp.Tests.csproj index ae920775a2..5426144401 100644 --- a/tests/ImageSharp.Tests/ImageSharp.Tests.csproj +++ b/tests/ImageSharp.Tests/ImageSharp.Tests.csproj @@ -2,8 +2,7 @@ - - netcoreapp3.1 + netcoreapp3.1;netcoreapp2.1;net472 True SixLabors.ImageSharp.Tests AnyCPU;x64;x86 From 02ac45971b4a56e34772758aecaa4407dbcae3f1 Mon Sep 17 00:00:00 2001 From: Anton Firszov Date: Sat, 5 Dec 2020 21:20:10 +0100 Subject: [PATCH 13/24] add benchmark results and fix PixelOperations --- .../PixelOperations/Rgb24.PixelOperations.cs | 1 + .../PixelOperations/Rgba32.PixelOperations.cs | 1 + .../PixelFormats/PixelOperations{TPixel}.cs | 23 +++++++++++++++---- .../PixelConversion_PackFromRgbPlanes.cs | 14 +++++++++++ .../ImageSharp.Tests/Common/SimdUtilsTests.cs | 4 ++-- 5 files changed, 37 insertions(+), 6 deletions(-) diff --git a/src/ImageSharp/PixelFormats/PixelImplementations/PixelOperations/Rgb24.PixelOperations.cs b/src/ImageSharp/PixelFormats/PixelImplementations/PixelOperations/Rgb24.PixelOperations.cs index ebed7aadd6..f345f58bcd 100644 --- a/src/ImageSharp/PixelFormats/PixelImplementations/PixelOperations/Rgb24.PixelOperations.cs +++ b/src/ImageSharp/PixelFormats/PixelImplementations/PixelOperations/Rgb24.PixelOperations.cs @@ -30,6 +30,7 @@ namespace SixLabors.ImageSharp.PixelFormats ReadOnlySpan blueChannel, Span destination) { + Guard.NotNull(configuration, nameof(configuration)); int count = redChannel.Length; Guard.IsTrue(greenChannel.Length == count, nameof(greenChannel), "Channels must be of same size!"); Guard.IsTrue(blueChannel.Length == count, nameof(blueChannel), "Channels must be of same size!"); diff --git a/src/ImageSharp/PixelFormats/PixelImplementations/PixelOperations/Rgba32.PixelOperations.cs b/src/ImageSharp/PixelFormats/PixelImplementations/PixelOperations/Rgba32.PixelOperations.cs index 0ddc2f8b1e..9633059774 100644 --- a/src/ImageSharp/PixelFormats/PixelImplementations/PixelOperations/Rgba32.PixelOperations.cs +++ b/src/ImageSharp/PixelFormats/PixelImplementations/PixelOperations/Rgba32.PixelOperations.cs @@ -65,6 +65,7 @@ namespace SixLabors.ImageSharp.PixelFormats ReadOnlySpan blueChannel, Span destination) { + Guard.NotNull(configuration, nameof(configuration)); int count = redChannel.Length; Guard.IsTrue(greenChannel.Length == count, nameof(greenChannel), "Channels must be of same size!"); Guard.IsTrue(blueChannel.Length == count, nameof(blueChannel), "Channels must be of same size!"); diff --git a/src/ImageSharp/PixelFormats/PixelOperations{TPixel}.cs b/src/ImageSharp/PixelFormats/PixelOperations{TPixel}.cs index 57e5e85828..c5450538e4 100644 --- a/src/ImageSharp/PixelFormats/PixelOperations{TPixel}.cs +++ b/src/ImageSharp/PixelFormats/PixelOperations{TPixel}.cs @@ -4,6 +4,8 @@ using System; using System.Buffers; using System.Numerics; +using System.Runtime.CompilerServices; +using System.Runtime.InteropServices; using SixLabors.ImageSharp.Formats; using SixLabors.ImageSharp.Memory; @@ -177,13 +179,26 @@ namespace SixLabors.ImageSharp.PixelFormats Span destination) { Guard.NotNull(configuration, nameof(configuration)); + + int count = redChannel.Length; + Guard.IsTrue(greenChannel.Length == count, nameof(greenChannel), "Channels must be of same size!"); + Guard.IsTrue(blueChannel.Length == count, nameof(blueChannel), "Channels must be of same size!"); + Guard.IsTrue(destination.Length > count + 2, nameof(destination), "'destination' must contain a padding of 3 elements!"); + Guard.DestinationShouldNotBeTooShort(redChannel, destination, nameof(destination)); - for (int i = 0; i < destination.Length; i++) - { - var rgb24 = new Rgb24(redChannel[i], greenChannel[i], blueChannel[i]); + Rgb24 rgb24 = default; + ref byte r = ref MemoryMarshal.GetReference(redChannel); + ref byte g = ref MemoryMarshal.GetReference(greenChannel); + ref byte b = ref MemoryMarshal.GetReference(blueChannel); + ref TPixel d = ref MemoryMarshal.GetReference(destination); - destination[i].FromRgb24(rgb24); + for (int i = 0; i < count; i++) + { + rgb24.R = Unsafe.Add(ref r, i); + rgb24.G = Unsafe.Add(ref g, i); + rgb24.B = Unsafe.Add(ref b, i); + Unsafe.Add(ref d, i).FromRgb24(rgb24); } } } diff --git a/tests/ImageSharp.Benchmarks/General/PixelConversion/PixelConversion_PackFromRgbPlanes.cs b/tests/ImageSharp.Benchmarks/General/PixelConversion/PixelConversion_PackFromRgbPlanes.cs index 11714027a5..6b2ff90f76 100644 --- a/tests/ImageSharp.Benchmarks/General/PixelConversion/PixelConversion_PackFromRgbPlanes.cs +++ b/tests/ImageSharp.Benchmarks/General/PixelConversion/PixelConversion_PackFromRgbPlanes.cs @@ -268,5 +268,19 @@ namespace SixLabors.ImageSharp.Benchmarks.General.PixelConversion public byte V0, V1, V2, V3; } #pragma warning restore + + // Results @ Anton's PC, 2020 Dec 05 + // .NET Core 3.1.1 + // Intel Core i7-7700HQ CPU 2.80GHz (Kaby Lake), 1 CPU, 8 logical and 4 physical cores + // + // | Method | Count | Mean | Error | StdDev | Ratio | RatioSD | + // |--------------------------------- |------ |-----------:|---------:|---------:|------:|--------:| + // | Rgb24_Scalar_PerElement_Span | 1024 | 1,634.6 ns | 26.56 ns | 24.84 ns | 3.12 | 0.05 | + // | Rgb24_Scalar_PerElement_Unsafe | 1024 | 1,284.7 ns | 4.70 ns | 4.16 ns | 2.46 | 0.01 | + // | Rgb24_Scalar_PerElement_Batched8 | 1024 | 1,182.3 ns | 5.12 ns | 4.27 ns | 2.26 | 0.01 | + // | Rgb24_Scalar_PerElement_Batched4 | 1024 | 1,146.2 ns | 16.38 ns | 14.52 ns | 2.19 | 0.02 | + // | Rgba32_Avx2_Float | 1024 | 522.7 ns | 1.78 ns | 1.39 ns | 1.00 | 0.00 | + // | Rgba24_Avx2_Bytes | 1024 | 243.3 ns | 1.56 ns | 1.30 ns | 0.47 | 0.00 | + // | Rgba32_Avx2_Bytes | 1024 | 146.0 ns | 2.48 ns | 2.32 ns | 0.28 | 0.01 | } } \ No newline at end of file diff --git a/tests/ImageSharp.Tests/Common/SimdUtilsTests.cs b/tests/ImageSharp.Tests/Common/SimdUtilsTests.cs index 565ea5f6da..0ee43d74ba 100644 --- a/tests/ImageSharp.Tests/Common/SimdUtilsTests.cs +++ b/tests/ImageSharp.Tests/Common/SimdUtilsTests.cs @@ -443,10 +443,10 @@ namespace SixLabors.ImageSharp.Tests.Common expected[i].FromRgb24(new Rgb24(r[i], g[i], b[i])); } - TPixel[] actual = new TPixel[count]; + TPixel[] actual = new TPixel[count + 3]; // padding for Rgb24 AVX2 packMethod(r, g, b, actual); - Assert.Equal(expected, actual); + Assert.True(expected.AsSpan().SequenceEqual(actual.AsSpan().Slice(0, count))); } private static void TestImpl_BulkConvertNormalizedFloatToByteClampOverflows( From 681a89c9b1c726796d6db93791bed10df9344313 Mon Sep 17 00:00:00 2001 From: Anton Firszov Date: Sat, 5 Dec 2020 21:31:32 +0100 Subject: [PATCH 14/24] fix typo --- .../PixelConversion/PixelConversion_PackFromRgbPlanes.cs | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tests/ImageSharp.Benchmarks/General/PixelConversion/PixelConversion_PackFromRgbPlanes.cs b/tests/ImageSharp.Benchmarks/General/PixelConversion/PixelConversion_PackFromRgbPlanes.cs index 6b2ff90f76..eade8e0c43 100644 --- a/tests/ImageSharp.Benchmarks/General/PixelConversion/PixelConversion_PackFromRgbPlanes.cs +++ b/tests/ImageSharp.Benchmarks/General/PixelConversion/PixelConversion_PackFromRgbPlanes.cs @@ -237,7 +237,7 @@ namespace SixLabors.ImageSharp.Benchmarks.General.PixelConversion } [Benchmark] - public void Rgba24_Avx2_Bytes() + public void Rgb24_Avx2_Bytes() { ReadOnlySpan r = this.rBuf; ReadOnlySpan g = this.rBuf; @@ -280,7 +280,7 @@ namespace SixLabors.ImageSharp.Benchmarks.General.PixelConversion // | Rgb24_Scalar_PerElement_Batched8 | 1024 | 1,182.3 ns | 5.12 ns | 4.27 ns | 2.26 | 0.01 | // | Rgb24_Scalar_PerElement_Batched4 | 1024 | 1,146.2 ns | 16.38 ns | 14.52 ns | 2.19 | 0.02 | // | Rgba32_Avx2_Float | 1024 | 522.7 ns | 1.78 ns | 1.39 ns | 1.00 | 0.00 | - // | Rgba24_Avx2_Bytes | 1024 | 243.3 ns | 1.56 ns | 1.30 ns | 0.47 | 0.00 | + // | Rgb24_Avx2_Bytes | 1024 | 243.3 ns | 1.56 ns | 1.30 ns | 0.47 | 0.00 | // | Rgba32_Avx2_Bytes | 1024 | 146.0 ns | 2.48 ns | 2.32 ns | 0.28 | 0.01 | } } \ No newline at end of file From 88f9e53fe7f14ea9f181e4e326a648d3a2cc3382 Mon Sep 17 00:00:00 2001 From: Anton Firszov Date: Sat, 5 Dec 2020 21:32:45 +0100 Subject: [PATCH 15/24] revert .gitattributes --- .gitattributes | 3 --- 1 file changed, 3 deletions(-) diff --git a/.gitattributes b/.gitattributes index 7c648c0774..c0bff6e189 100644 --- a/.gitattributes +++ b/.gitattributes @@ -80,11 +80,8 @@ *.pvr binary *.snk binary *.tga binary -*.tif binary -*.tiff binary *.ttc binary *.ttf binary -*.wbmp binary *.webp binary *.woff binary *.woff2 binary From 2cc71f4101c8ad94b5cbe42ed6b4a7a08200f9c2 Mon Sep 17 00:00:00 2001 From: Anton Firszov Date: Sat, 5 Dec 2020 21:48:22 +0100 Subject: [PATCH 16/24] fix build --- tests/ImageSharp.Tests/Common/SimdUtilsTests.cs | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tests/ImageSharp.Tests/Common/SimdUtilsTests.cs b/tests/ImageSharp.Tests/Common/SimdUtilsTests.cs index 0ee43d74ba..1f680aa6cc 100644 --- a/tests/ImageSharp.Tests/Common/SimdUtilsTests.cs +++ b/tests/ImageSharp.Tests/Common/SimdUtilsTests.cs @@ -5,9 +5,9 @@ using System; using System.Linq; using System.Numerics; using System.Runtime.CompilerServices; -using System.Runtime.InteropServices; +#if SUPPORTS_RUNTIME_INTRINSICS using System.Runtime.Intrinsics.X86; -using SixLabors.ImageSharp.Common.Tuples; +#endif using SixLabors.ImageSharp.PixelFormats; using SixLabors.ImageSharp.Tests.TestUtilities; using Xunit; From 8fb339305538f0de2213089bb89dd9f5b91120ff Mon Sep 17 00:00:00 2001 From: Anton Firszov Date: Sat, 5 Dec 2020 22:02:27 +0100 Subject: [PATCH 17/24] fix scalar code --- src/ImageSharp/Common/Helpers/SimdUtils.Pack.cs | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/ImageSharp/Common/Helpers/SimdUtils.Pack.cs b/src/ImageSharp/Common/Helpers/SimdUtils.Pack.cs index 8cd15e01bc..fe02bd0072 100644 --- a/src/ImageSharp/Common/Helpers/SimdUtils.Pack.cs +++ b/src/ImageSharp/Common/Helpers/SimdUtils.Pack.cs @@ -78,7 +78,7 @@ namespace SixLabors.ImageSharp ref ByteTuple4 b = ref Unsafe.As(ref MemoryMarshal.GetReference(blueChannel)); ref Rgb24 rgb = ref MemoryMarshal.GetReference(destination); - int count = destination.Length / 4; + int count = redChannel.Length / 4; for (int i = 0; i < count; i++) { ref Rgb24 d0 = ref Unsafe.Add(ref rgb, i * 4); @@ -125,7 +125,7 @@ namespace SixLabors.ImageSharp ref ByteTuple4 b = ref Unsafe.As(ref MemoryMarshal.GetReference(blueChannel)); ref Rgba32 rgb = ref MemoryMarshal.GetReference(destination); - int count = destination.Length / 4; + int count = redChannel.Length / 4; destination.Fill(new Rgba32(0, 0, 0, 255)); for (int i = 0; i < count; i++) { From 25d3d817cd792879430bff85e0be47f8cdeb1923 Mon Sep 17 00:00:00 2001 From: James Jackson-South Date: Sat, 5 Dec 2020 23:51:12 +0000 Subject: [PATCH 18/24] Explicit in --- .../Processors/Convolution/Convolution2PassProcessor{TPixel}.cs | 2 +- .../Processors/Convolution/ConvolutionProcessor{TPixel}.cs | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/src/ImageSharp/Processing/Processors/Convolution/Convolution2PassProcessor{TPixel}.cs b/src/ImageSharp/Processing/Processors/Convolution/Convolution2PassProcessor{TPixel}.cs index 95fd3b83cc..e05892fa54 100644 --- a/src/ImageSharp/Processing/Processors/Convolution/Convolution2PassProcessor{TPixel}.cs +++ b/src/ImageSharp/Processing/Processors/Convolution/Convolution2PassProcessor{TPixel}.cs @@ -144,7 +144,7 @@ namespace SixLabors.ImageSharp.Processing.Processors.Convolution Span targetRowSpan = this.targetPixels.GetRowSpan(y).Slice(this.bounds.X); PixelOperations.Instance.ToVector4(this.configuration, targetRowSpan.Slice(0, span.Length), span); - var state = new ConvolutionState(this.kernel, this.map); + var state = new ConvolutionState(in this.kernel, this.map); int row = y - this.bounds.Y; if (this.preserveAlpha) diff --git a/src/ImageSharp/Processing/Processors/Convolution/ConvolutionProcessor{TPixel}.cs b/src/ImageSharp/Processing/Processors/Convolution/ConvolutionProcessor{TPixel}.cs index 191460f40b..d6be1dc563 100644 --- a/src/ImageSharp/Processing/Processors/Convolution/ConvolutionProcessor{TPixel}.cs +++ b/src/ImageSharp/Processing/Processors/Convolution/ConvolutionProcessor{TPixel}.cs @@ -111,7 +111,7 @@ namespace SixLabors.ImageSharp.Processing.Processors.Convolution Span targetRowSpan = this.targetPixels.GetRowSpan(y).Slice(this.bounds.X); PixelOperations.Instance.ToVector4(this.configuration, targetRowSpan.Slice(0, span.Length), span); - var state = new ConvolutionState(this.kernel, this.map); + var state = new ConvolutionState(in this.kernel, this.map); int row = y - this.bounds.Y; if (this.preserveAlpha) From 277751c038f3c669ac4abbd9791f088b98e009fb Mon Sep 17 00:00:00 2001 From: Max Eskin Date: Sat, 5 Dec 2020 19:05:41 -0500 Subject: [PATCH 19/24] Filter processor should use scaled vectors. --- .../Processing/Processors/Filters/FilterProcessor{TPixel}.cs | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/ImageSharp/Processing/Processors/Filters/FilterProcessor{TPixel}.cs b/src/ImageSharp/Processing/Processors/Filters/FilterProcessor{TPixel}.cs index 4dc9e41960..d0c8ff40d7 100644 --- a/src/ImageSharp/Processing/Processors/Filters/FilterProcessor{TPixel}.cs +++ b/src/ImageSharp/Processing/Processors/Filters/FilterProcessor{TPixel}.cs @@ -72,11 +72,11 @@ namespace SixLabors.ImageSharp.Processing.Processors.Filters public void Invoke(int y, Span span) { Span rowSpan = this.source.GetPixelRowSpan(y).Slice(this.startX, span.Length); - PixelOperations.Instance.ToVector4(this.configuration, rowSpan, span); + PixelOperations.Instance.ToVector4(this.configuration, rowSpan, span, PixelConversionModifiers.Scale); ColorNumerics.Transform(span, ref Unsafe.AsRef(this.matrix)); - PixelOperations.Instance.FromVector4Destructive(this.configuration, span, rowSpan); + PixelOperations.Instance.FromVector4Destructive(this.configuration, span, rowSpan, PixelConversionModifiers.Scale); } } } From e321a5054c5cf682ddafc5f215f6959a0d2aa73b Mon Sep 17 00:00:00 2001 From: James Jackson-South Date: Sun, 6 Dec 2020 00:27:14 +0000 Subject: [PATCH 20/24] Use faster GetSpan() --- .../Allocators/ArrayPoolMemoryAllocator.Buffer{T}.cs | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/src/ImageSharp/Memory/Allocators/ArrayPoolMemoryAllocator.Buffer{T}.cs b/src/ImageSharp/Memory/Allocators/ArrayPoolMemoryAllocator.Buffer{T}.cs index a7a51f77dd..0c35c88286 100644 --- a/src/ImageSharp/Memory/Allocators/ArrayPoolMemoryAllocator.Buffer{T}.cs +++ b/src/ImageSharp/Memory/Allocators/ArrayPoolMemoryAllocator.Buffer{T}.cs @@ -53,8 +53,13 @@ namespace SixLabors.ImageSharp.Memory { ThrowObjectDisposedException(); } - +#if SUPPORTS_CREATESPAN + ref byte r0 = ref MemoryMarshal.GetReference(this.Data); + return MemoryMarshal.CreateSpan(ref Unsafe.As(ref r0), this.length); +#else return MemoryMarshal.Cast(this.Data.AsSpan()).Slice(0, this.length); +#endif + } /// From 0f94c5ed41c32ef7409762dca1d34714af1c82a3 Mon Sep 17 00:00:00 2001 From: James Jackson-South Date: Sun, 6 Dec 2020 13:54:23 +0000 Subject: [PATCH 21/24] Avoid per-index multiply. --- .../Processors/Convolution/Convolution2DState.cs | 12 ++++++------ .../Processors/Convolution/ConvolutionState.cs | 10 +++++----- .../Processing/Processors/Convolution/Convolver.cs | 12 ++++++++---- .../Processors/Convolution/KernelSamplingMap.cs | 6 ++++-- tests/ImageSharp.Benchmarks/Config.cs | 8 ++++++++ tests/ImageSharp.Benchmarks/Samplers/GaussianBlur.cs | 2 +- 6 files changed, 32 insertions(+), 18 deletions(-) diff --git a/src/ImageSharp/Processing/Processors/Convolution/Convolution2DState.cs b/src/ImageSharp/Processing/Processors/Convolution/Convolution2DState.cs index e36d458a4a..9d17ebab05 100644 --- a/src/ImageSharp/Processing/Processors/Convolution/Convolution2DState.cs +++ b/src/ImageSharp/Processing/Processors/Convolution/Convolution2DState.cs @@ -31,24 +31,24 @@ namespace SixLabors.ImageSharp.Processing.Processors.Convolution this.columnOffsetMap = map.GetColumnOffsetSpan(); } - public ReadOnlyKernel KernelY + public readonly ReadOnlyKernel KernelY { [MethodImpl(MethodImplOptions.AggressiveInlining)] get; } - public ReadOnlyKernel KernelX + public readonly ReadOnlyKernel KernelX { [MethodImpl(MethodImplOptions.AggressiveInlining)] get; } [MethodImpl(MethodImplOptions.AggressiveInlining)] - public int GetRowSampleOffset(int row, int kernelRow) - => Unsafe.Add(ref MemoryMarshal.GetReference(this.rowOffsetMap), (row * this.kernelHeight) + kernelRow); + public readonly ref int GetSampleOffsetRow(int row) + => ref Unsafe.Add(ref MemoryMarshal.GetReference(this.rowOffsetMap), row * this.kernelHeight); [MethodImpl(MethodImplOptions.AggressiveInlining)] - public int GetColumnSampleOffset(int column, int kernelColumn) - => Unsafe.Add(ref MemoryMarshal.GetReference(this.columnOffsetMap), (column * this.kernelWidth) + kernelColumn); + public readonly ref int GetSampleOffsetColumn(int column) + => ref Unsafe.Add(ref MemoryMarshal.GetReference(this.columnOffsetMap), column * this.kernelWidth); } } diff --git a/src/ImageSharp/Processing/Processors/Convolution/ConvolutionState.cs b/src/ImageSharp/Processing/Processors/Convolution/ConvolutionState.cs index 97a3af342e..851eeec247 100644 --- a/src/ImageSharp/Processing/Processors/Convolution/ConvolutionState.cs +++ b/src/ImageSharp/Processing/Processors/Convolution/ConvolutionState.cs @@ -28,18 +28,18 @@ namespace SixLabors.ImageSharp.Processing.Processors.Convolution this.columnOffsetMap = map.GetColumnOffsetSpan(); } - public ReadOnlyKernel Kernel + public readonly ReadOnlyKernel Kernel { [MethodImpl(MethodImplOptions.AggressiveInlining)] get; } [MethodImpl(MethodImplOptions.AggressiveInlining)] - public int GetRowSampleOffset(int row, int kernelRow) - => Unsafe.Add(ref MemoryMarshal.GetReference(this.rowOffsetMap), (row * this.kernelHeight) + kernelRow); + public readonly ref int GetSampleOffsetRow(int row) + => ref Unsafe.Add(ref MemoryMarshal.GetReference(this.rowOffsetMap), row * this.kernelHeight); [MethodImpl(MethodImplOptions.AggressiveInlining)] - public int GetColumnSampleOffset(int column, int kernelColumn) - => Unsafe.Add(ref MemoryMarshal.GetReference(this.columnOffsetMap), (column * this.kernelWidth) + kernelColumn); + public readonly ref int GetSampleOffsetColumn(int column) + => ref Unsafe.Add(ref MemoryMarshal.GetReference(this.columnOffsetMap), column * this.kernelWidth); } } diff --git a/src/ImageSharp/Processing/Processors/Convolution/Convolver.cs b/src/ImageSharp/Processing/Processors/Convolution/Convolver.cs index 5ddc8e85c6..c23b71b330 100644 --- a/src/ImageSharp/Processing/Processors/Convolution/Convolver.cs +++ b/src/ImageSharp/Processing/Processors/Convolution/Convolver.cs @@ -100,14 +100,16 @@ namespace SixLabors.ImageSharp Vector4 vectorY = default; Vector4 vectorX = default; + ref int sampleOffsetRowBase = ref state.GetSampleOffsetRow(row); for (int y = 0; y < kernelHeight; y++) { - int offsetY = state.GetRowSampleOffset(row, y); + int offsetY = Unsafe.Add(ref sampleOffsetRowBase, y); ref TPixel sourceRowBase = ref MemoryMarshal.GetReference(sourcePixels.GetRowSpan(offsetY)); + ref int sampleOffsetColumnBase = ref state.GetSampleOffsetColumn(column); for (int x = 0; x < kernelWidth; x++) { - int offsetX = state.GetColumnSampleOffset(column, x); + int offsetX = Unsafe.Add(ref sampleOffsetColumnBase, x); var sample = Unsafe.Add(ref sourceRowBase, offsetX).ToVector4(); Numerics.Premultiply(ref sample); vectorX += kernelX[y, x] * sample; @@ -199,14 +201,16 @@ namespace SixLabors.ImageSharp int kernelHeight = kernel.Rows; int kernelWidth = kernel.Columns; + ref int sampleOffsetRowBase = ref state.GetSampleOffsetRow(row); for (int y = 0; y < kernelHeight; y++) { - int offsetY = state.GetRowSampleOffset(row, y); + int offsetY = Unsafe.Add(ref sampleOffsetRowBase, y); ref TPixel sourceRowBase = ref MemoryMarshal.GetReference(sourcePixels.GetRowSpan(offsetY)); + ref int sampleOffsetColumnBase = ref state.GetSampleOffsetColumn(column); for (int x = 0; x < kernelWidth; x++) { - int offsetX = state.GetColumnSampleOffset(column, x); + int offsetX = Unsafe.Add(ref sampleOffsetColumnBase, x); var sample = Unsafe.Add(ref sourceRowBase, offsetX).ToVector4(); Numerics.Premultiply(ref sample); targetVector += kernel[y, x] * sample; diff --git a/src/ImageSharp/Processing/Processors/Convolution/KernelSamplingMap.cs b/src/ImageSharp/Processing/Processors/Convolution/KernelSamplingMap.cs index 144d356c6e..e4b7dbea09 100644 --- a/src/ImageSharp/Processing/Processors/Convolution/KernelSamplingMap.cs +++ b/src/ImageSharp/Processing/Processors/Convolution/KernelSamplingMap.cs @@ -52,9 +52,10 @@ namespace SixLabors.ImageSharp.Processing.Processors.Convolution ref int ySpanBase = ref MemoryMarshal.GetReference(ySpan); for (int row = 0; row < bounds.Height; row++) { + int rowBase = row * kernelHeight; for (int y = 0; y < kernelHeight; y++) { - Unsafe.Add(ref ySpanBase, (row * kernelHeight) + y) = row + y + minY - radiusY; + Unsafe.Add(ref ySpanBase, rowBase + y) = row + y + minY - radiusY; } } @@ -67,9 +68,10 @@ namespace SixLabors.ImageSharp.Processing.Processors.Convolution ref int xSpanBase = ref MemoryMarshal.GetReference(xSpan); for (int column = 0; column < bounds.Width; column++) { + int columnBase = column * kernelWidth; for (int x = 0; x < kernelWidth; x++) { - Unsafe.Add(ref xSpanBase, (column * kernelWidth) + x) = column + x + minX - radiusX; + Unsafe.Add(ref xSpanBase, columnBase + x) = column + x + minX - radiusX; } } diff --git a/tests/ImageSharp.Benchmarks/Config.cs b/tests/ImageSharp.Benchmarks/Config.cs index 4c9f6c06db..d08e2f2d66 100644 --- a/tests/ImageSharp.Benchmarks/Config.cs +++ b/tests/ImageSharp.Benchmarks/Config.cs @@ -27,6 +27,14 @@ namespace SixLabors.ImageSharp.Benchmarks } + public class MultiFramework : Config + { + public MultiFramework() => this.AddJob( + Job.Default.WithRuntime(ClrRuntime.Net472), + Job.Default.WithRuntime(CoreRuntime.Core21), + Job.Default.WithRuntime(CoreRuntime.Core31)); + } + public class ShortClr : Config { public ShortClr() => this.AddJob( diff --git a/tests/ImageSharp.Benchmarks/Samplers/GaussianBlur.cs b/tests/ImageSharp.Benchmarks/Samplers/GaussianBlur.cs index 62d5806037..8f009e58f1 100644 --- a/tests/ImageSharp.Benchmarks/Samplers/GaussianBlur.cs +++ b/tests/ImageSharp.Benchmarks/Samplers/GaussianBlur.cs @@ -7,7 +7,7 @@ using SixLabors.ImageSharp.Processing; namespace SixLabors.ImageSharp.Benchmarks.Samplers { - [Config(typeof(Config.ShortClr))] + [Config(typeof(Config.MultiFramework))] public class GaussianBlur { [Benchmark] From caba642513933ad57cef05687fdbcc7149194aba Mon Sep 17 00:00:00 2001 From: James Jackson-South Date: Mon, 7 Dec 2020 14:26:29 +0000 Subject: [PATCH 22/24] Working version per-row --- .../Convolution2DProcessor{TPixel}.cs | 11 +- .../Convolution2DRowOperation{TPixel}.cs | 193 ++++++++++++++++++ .../Convolution/Convolution2DState.cs | 4 +- .../Convolution2PassProcessor{TPixel}.cs | 88 +------- .../ConvolutionProcessor{TPixel}.cs | 94 +++++++-- .../ConvolutionRowOperation{TPixel}.cs | 169 +++++++++++++++ .../Convolution/ConvolutionState.cs | 4 +- .../Processors/Convolution/Convolver.cs | 96 ++++++++- 8 files changed, 552 insertions(+), 107 deletions(-) create mode 100644 src/ImageSharp/Processing/Processors/Convolution/Convolution2DRowOperation{TPixel}.cs create mode 100644 src/ImageSharp/Processing/Processors/Convolution/ConvolutionRowOperation{TPixel}.cs diff --git a/src/ImageSharp/Processing/Processors/Convolution/Convolution2DProcessor{TPixel}.cs b/src/ImageSharp/Processing/Processors/Convolution/Convolution2DProcessor{TPixel}.cs index 249c73e8d6..e787b3ec78 100644 --- a/src/ImageSharp/Processing/Processors/Convolution/Convolution2DProcessor{TPixel}.cs +++ b/src/ImageSharp/Processing/Processors/Convolution/Convolution2DProcessor{TPixel}.cs @@ -66,12 +66,17 @@ namespace SixLabors.ImageSharp.Processing.Processors.Convolution source.CopyTo(targetPixels); var interest = Rectangle.Intersect(this.SourceRectangle, source.Bounds()); + + // We use a rectangle 3x the interest width to allocate a buffer big enough + // for source and target bulk pixel conversion. + var operationBounds = new Rectangle(interest.X, interest.Y, interest.Width * 3, interest.Height); + using (var map = new KernelSamplingMap(allocator)) { // Since the kernel sizes are identical we can use a single map. map.BuildSamplingOffsetMap(this.KernelY, interest); - var operation = new RowOperation( + var operation = new Convolution2DRowOperation( interest, targetPixels, source.PixelBuffer, @@ -81,9 +86,9 @@ namespace SixLabors.ImageSharp.Processing.Processors.Convolution this.Configuration, this.PreserveAlpha); - ParallelRowIterator.IterateRows( + ParallelRowIterator.IterateRows, Vector4>( this.Configuration, - interest, + operationBounds, in operation); } diff --git a/src/ImageSharp/Processing/Processors/Convolution/Convolution2DRowOperation{TPixel}.cs b/src/ImageSharp/Processing/Processors/Convolution/Convolution2DRowOperation{TPixel}.cs new file mode 100644 index 0000000000..6528a2f851 --- /dev/null +++ b/src/ImageSharp/Processing/Processors/Convolution/Convolution2DRowOperation{TPixel}.cs @@ -0,0 +1,193 @@ +// Copyright (c) Six Labors. +// Licensed under the Apache License, Version 2.0. + +using System; +using System.Numerics; +using System.Runtime.CompilerServices; +using System.Runtime.InteropServices; +using SixLabors.ImageSharp.Advanced; +using SixLabors.ImageSharp.Memory; +using SixLabors.ImageSharp.PixelFormats; + +namespace SixLabors.ImageSharp.Processing.Processors.Convolution +{ + /// + /// A implementing the logic for 2D convolution. + /// + internal readonly struct Convolution2DRowOperation : IRowOperation + where TPixel : unmanaged, IPixel + { + private readonly Rectangle bounds; + private readonly Buffer2D targetPixels; + private readonly Buffer2D sourcePixels; + private readonly KernelSamplingMap map; + private readonly DenseMatrix kernelMatrixY; + private readonly DenseMatrix kernelMatrixX; + private readonly Configuration configuration; + private readonly bool preserveAlpha; + + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public Convolution2DRowOperation( + Rectangle bounds, + Buffer2D targetPixels, + Buffer2D sourcePixels, + KernelSamplingMap map, + DenseMatrix kernelMatrixY, + DenseMatrix kernelMatrixX, + Configuration configuration, + bool preserveAlpha) + { + this.bounds = bounds; + this.targetPixels = targetPixels; + this.sourcePixels = sourcePixels; + this.map = map; + this.kernelMatrixY = kernelMatrixY; + this.kernelMatrixX = kernelMatrixX; + this.configuration = configuration; + this.preserveAlpha = preserveAlpha; + } + + /// + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public void Invoke(int y, Span span) + { + if (this.preserveAlpha) + { + this.Convolve3(y, span); + } + else + { + this.Convolve4(y, span); + } + } + + [MethodImpl(MethodImplOptions.AggressiveInlining)] + private void Convolve3(int y, Span span) + { + // Span is 3x bounds. + int boundsX = this.bounds.X; + int boundsWidth = this.bounds.Width; + Span sourceBuffer = span.Slice(0, boundsWidth); + Span targetYBuffer = span.Slice(boundsWidth, boundsWidth); + Span targetXBuffer = span.Slice(boundsWidth * 2, boundsWidth); + + var state = new Convolution2DState(in this.kernelMatrixY, in this.kernelMatrixX, this.map); + ReadOnlyKernel kernelY = state.KernelY; + ReadOnlyKernel kernelX = state.KernelX; + int row = y - this.bounds.Y; + ref int sampleRowBase = ref state.GetSampleRow(row); + + // Clear the target buffers for each row run. + targetYBuffer.Clear(); + targetXBuffer.Clear(); + ref Vector4 targetBaseY = ref MemoryMarshal.GetReference(targetYBuffer); + ref Vector4 targetBaseX = ref MemoryMarshal.GetReference(targetXBuffer); + + Span sourceRow; + for (int kY = 0; kY < kernelY.Rows; kY++) + { + // Get the precalculated source sample row for this kernel row and copy to our buffer. + int offsetY = Unsafe.Add(ref sampleRowBase, kY); + sourceRow = this.sourcePixels.GetRowSpan(offsetY).Slice(boundsX, boundsWidth); + PixelOperations.Instance.ToVector4(this.configuration, sourceRow, sourceBuffer); + + ref Vector4 sourceBase = ref MemoryMarshal.GetReference(sourceBuffer); + + for (int x = 0; x < sourceBuffer.Length; x++) + { + ref int sampleColumnBase = ref state.GetSampleColumn(x); + ref Vector4 targetY = ref Unsafe.Add(ref targetBaseY, x); + ref Vector4 targetX = ref Unsafe.Add(ref targetBaseX, x); + + for (int kX = 0; kX < kernelY.Columns; kX++) + { + int offsetX = Unsafe.Add(ref sampleColumnBase, kX) - boundsX; + Vector4 sample = Unsafe.Add(ref sourceBase, offsetX); + targetY += kernelX[kY, kX] * sample; + targetX += kernelY[kY, kX] * sample; + } + } + } + + // Now we need to combine the values and copy the original alpha values from the source row. + sourceRow = this.sourcePixels.GetRowSpan(y).Slice(boundsX, boundsWidth); + PixelOperations.Instance.ToVector4(this.configuration, sourceRow, sourceBuffer); + + for (int x = 0; x < sourceRow.Length; x++) + { + ref Vector4 target = ref Unsafe.Add(ref targetBaseY, x); + Vector4 vectorY = target; + Vector4 vectorX = Unsafe.Add(ref targetBaseX, x); + + target = Vector4.SquareRoot((vectorX * vectorX) + (vectorY * vectorY)); + target.W = Unsafe.Add(ref MemoryMarshal.GetReference(sourceBuffer), x).W; + } + + Span targetRowSpan = this.targetPixels.GetRowSpan(y).Slice(boundsX, boundsWidth); + PixelOperations.Instance.FromVector4Destructive(this.configuration, targetYBuffer, targetRowSpan); + } + + [MethodImpl(MethodImplOptions.AggressiveInlining)] + private void Convolve4(int y, Span span) + { + // Span is 2x bounds. + int boundsX = this.bounds.X; + int boundsWidth = this.bounds.Width; + Span sourceBuffer = span.Slice(0, boundsWidth); + Span targetYBuffer = span.Slice(boundsWidth, boundsWidth); + Span targetXBuffer = span.Slice(boundsWidth * 2, boundsWidth); + + var state = new Convolution2DState(in this.kernelMatrixY, in this.kernelMatrixX, this.map); + ReadOnlyKernel kernelY = state.KernelY; + ReadOnlyKernel kernelX = state.KernelX; + int row = y - this.bounds.Y; + ref int sampleRowBase = ref state.GetSampleRow(row); + + // Clear the target buffers for each row run. + targetYBuffer.Clear(); + targetXBuffer.Clear(); + ref Vector4 targetBaseY = ref MemoryMarshal.GetReference(targetYBuffer); + ref Vector4 targetBaseX = ref MemoryMarshal.GetReference(targetXBuffer); + + for (int kY = 0; kY < kernelY.Rows; kY++) + { + // Get the precalculated source sample row for this kernel row and copy to our buffer. + int offsetY = Unsafe.Add(ref sampleRowBase, kY); + Span sourceRow = this.sourcePixels.GetRowSpan(offsetY).Slice(boundsX, boundsWidth); + PixelOperations.Instance.ToVector4(this.configuration, sourceRow, sourceBuffer); + + Numerics.Premultiply(sourceBuffer); + ref Vector4 sourceBase = ref MemoryMarshal.GetReference(sourceBuffer); + + for (int x = 0; x < sourceBuffer.Length; x++) + { + ref int sampleColumnBase = ref state.GetSampleColumn(x); + ref Vector4 targetY = ref Unsafe.Add(ref targetBaseY, x); + ref Vector4 targetX = ref Unsafe.Add(ref targetBaseX, x); + + for (int kX = 0; kX < kernelY.Columns; kX++) + { + int offsetX = Unsafe.Add(ref sampleColumnBase, kX) - boundsX; + Vector4 sample = Unsafe.Add(ref sourceBase, offsetX); + targetY += kernelX[kY, kX] * sample; + targetX += kernelY[kY, kX] * sample; + } + } + } + + for (int x = 0; x < targetYBuffer.Length; x++) + { + ref Vector4 target = ref Unsafe.Add(ref targetBaseY, x); + Vector4 vectorY = target; + Vector4 vectorX = Unsafe.Add(ref targetBaseX, x); + + target = Vector4.SquareRoot((vectorX * vectorX) + (vectorY * vectorY)); + } + + Numerics.UnPremultiply(targetYBuffer); + + Span targetRow = this.targetPixels.GetRowSpan(y).Slice(boundsX, boundsWidth); + PixelOperations.Instance.FromVector4Destructive(this.configuration, targetYBuffer, targetRow); + } + } +} diff --git a/src/ImageSharp/Processing/Processors/Convolution/Convolution2DState.cs b/src/ImageSharp/Processing/Processors/Convolution/Convolution2DState.cs index 9d17ebab05..218093ac4e 100644 --- a/src/ImageSharp/Processing/Processors/Convolution/Convolution2DState.cs +++ b/src/ImageSharp/Processing/Processors/Convolution/Convolution2DState.cs @@ -44,11 +44,11 @@ namespace SixLabors.ImageSharp.Processing.Processors.Convolution } [MethodImpl(MethodImplOptions.AggressiveInlining)] - public readonly ref int GetSampleOffsetRow(int row) + public readonly ref int GetSampleRow(int row) => ref Unsafe.Add(ref MemoryMarshal.GetReference(this.rowOffsetMap), row * this.kernelHeight); [MethodImpl(MethodImplOptions.AggressiveInlining)] - public readonly ref int GetSampleOffsetColumn(int column) + public readonly ref int GetSampleColumn(int column) => ref Unsafe.Add(ref MemoryMarshal.GetReference(this.columnOffsetMap), column * this.kernelWidth); } } diff --git a/src/ImageSharp/Processing/Processors/Convolution/Convolution2PassProcessor{TPixel}.cs b/src/ImageSharp/Processing/Processors/Convolution/Convolution2PassProcessor{TPixel}.cs index e05892fa54..151b0ffccc 100644 --- a/src/ImageSharp/Processing/Processors/Convolution/Convolution2PassProcessor{TPixel}.cs +++ b/src/ImageSharp/Processing/Processors/Convolution/Convolution2PassProcessor{TPixel}.cs @@ -63,12 +63,16 @@ namespace SixLabors.ImageSharp.Processing.Processors.Convolution var interest = Rectangle.Intersect(this.SourceRectangle, source.Bounds()); + // We use a rectangle 2x the interest width to allocate a buffer big enough + // for source and target bulk pixel conversion. + var operationBounds = new Rectangle(interest.X, interest.Y, interest.Width * 2, interest.Height); + using (var mapX = new KernelSamplingMap(this.Configuration.MemoryAllocator)) { mapX.BuildSamplingOffsetMap(this.KernelX, interest); // Horizontal convolution - var horizontalOperation = new RowOperation( + var horizontalOperation = new ConvolutionRowOperation( interest, firstPassPixels, source.PixelBuffer, @@ -77,9 +81,9 @@ namespace SixLabors.ImageSharp.Processing.Processors.Convolution this.Configuration, this.PreserveAlpha); - ParallelRowIterator.IterateRows( + ParallelRowIterator.IterateRows, Vector4>( this.Configuration, - interest, + operationBounds, in horizontalOperation); } @@ -88,7 +92,7 @@ namespace SixLabors.ImageSharp.Processing.Processors.Convolution mapY.BuildSamplingOffsetMap(this.KernelY, interest); // Vertical convolution - var verticalOperation = new RowOperation( + var verticalOperation = new ConvolutionRowOperation( interest, source.PixelBuffer, firstPassPixels, @@ -97,83 +101,11 @@ namespace SixLabors.ImageSharp.Processing.Processors.Convolution this.Configuration, this.PreserveAlpha); - ParallelRowIterator.IterateRows( + ParallelRowIterator.IterateRows, Vector4>( this.Configuration, - interest, + operationBounds, in verticalOperation); } } - - /// - /// A implementing the convolution logic for . - /// - private readonly struct RowOperation : IRowOperation - { - private readonly Rectangle bounds; - private readonly Buffer2D targetPixels; - private readonly Buffer2D sourcePixels; - private readonly KernelSamplingMap map; - private readonly DenseMatrix kernel; - private readonly Configuration configuration; - private readonly bool preserveAlpha; - - [MethodImpl(InliningOptions.ShortMethod)] - public RowOperation( - Rectangle bounds, - Buffer2D targetPixels, - Buffer2D sourcePixels, - KernelSamplingMap map, - DenseMatrix kernel, - Configuration configuration, - bool preserveAlpha) - { - this.bounds = bounds; - this.targetPixels = targetPixels; - this.sourcePixels = sourcePixels; - this.map = map; - this.kernel = kernel; - this.configuration = configuration; - this.preserveAlpha = preserveAlpha; - } - - /// - [MethodImpl(InliningOptions.ShortMethod)] - public void Invoke(int y, Span span) - { - ref Vector4 targetRowRef = ref MemoryMarshal.GetReference(span); - Span targetRowSpan = this.targetPixels.GetRowSpan(y).Slice(this.bounds.X); - PixelOperations.Instance.ToVector4(this.configuration, targetRowSpan.Slice(0, span.Length), span); - - var state = new ConvolutionState(in this.kernel, this.map); - int row = y - this.bounds.Y; - - if (this.preserveAlpha) - { - for (int column = 0; column < this.bounds.Width; column++) - { - Convolver.Convolve3( - in state, - this.sourcePixels, - ref targetRowRef, - row, - column); - } - } - else - { - for (int column = 0; column < this.bounds.Width; column++) - { - Convolver.Convolve4( - in state, - this.sourcePixels, - ref targetRowRef, - row, - column); - } - } - - PixelOperations.Instance.FromVector4Destructive(this.configuration, span, targetRowSpan); - } - } } } diff --git a/src/ImageSharp/Processing/Processors/Convolution/ConvolutionProcessor{TPixel}.cs b/src/ImageSharp/Processing/Processors/Convolution/ConvolutionProcessor{TPixel}.cs index d6be1dc563..924a1125bd 100644 --- a/src/ImageSharp/Processing/Processors/Convolution/ConvolutionProcessor{TPixel}.cs +++ b/src/ImageSharp/Processing/Processors/Convolution/ConvolutionProcessor{TPixel}.cs @@ -57,6 +57,10 @@ namespace SixLabors.ImageSharp.Processing.Processors.Convolution source.CopyTo(targetPixels); var interest = Rectangle.Intersect(this.SourceRectangle, source.Bounds()); + + // We use a rectangle 2x the interest width to allocate a buffer big enough + // for source and target bulk pixel conversion. + var operationBounds = new Rectangle(interest.X, interest.Y, interest.Width * 2, interest.Height); using (var map = new KernelSamplingMap(allocator)) { map.BuildSamplingOffsetMap(this.KernelXY, interest); @@ -64,7 +68,7 @@ namespace SixLabors.ImageSharp.Processing.Processors.Convolution var operation = new RowOperation(interest, targetPixels, source.PixelBuffer, map, this.KernelXY, this.Configuration, this.PreserveAlpha); ParallelRowIterator.IterateRows( this.Configuration, - interest, + operationBounds, in operation); } @@ -107,39 +111,93 @@ namespace SixLabors.ImageSharp.Processing.Processors.Convolution [MethodImpl(InliningOptions.ShortMethod)] public void Invoke(int y, Span span) { + // Span is 2x bounds. + int boundsX = this.bounds.X; + int boundsWidth = this.bounds.Width; + Span sourceBuffer = span.Slice(0, this.bounds.Width); + Span targetBuffer = span.Slice(this.bounds.Width); + ref Vector4 targetRowRef = ref MemoryMarshal.GetReference(span); - Span targetRowSpan = this.targetPixels.GetRowSpan(y).Slice(this.bounds.X); - PixelOperations.Instance.ToVector4(this.configuration, targetRowSpan.Slice(0, span.Length), span); + Span targetRowSpan = this.targetPixels.GetRowSpan(y).Slice(boundsX, boundsWidth); var state = new ConvolutionState(in this.kernel, this.map); int row = y - this.bounds.Y; + ref int sampleRowBase = ref state.GetSampleRow(row); if (this.preserveAlpha) { - for (int column = 0; column < this.bounds.Width; column++) + // Clear the target buffer for each row run. + targetBuffer.Clear(); + ref Vector4 targetBase = ref MemoryMarshal.GetReference(targetBuffer); + + Span sourceRow; + for (int kY = 0; kY < state.Kernel.Rows; kY++) { - Convolver.Convolve3( - in state, - this.sourcePixels, - ref targetRowRef, - row, - column); + // Get the precalculated source sample row for this kernel row and copy to our buffer. + int offsetY = Unsafe.Add(ref sampleRowBase, kY); + sourceRow = this.sourcePixels.GetRowSpan(offsetY).Slice(boundsX, boundsWidth); + PixelOperations.Instance.ToVector4(this.configuration, sourceRow, sourceBuffer); + + ref Vector4 sourceBase = ref MemoryMarshal.GetReference(sourceBuffer); + + for (int x = 0; x < sourceBuffer.Length; x++) + { + ref int sampleColumnBase = ref state.GetSampleColumn(x); + ref Vector4 target = ref Unsafe.Add(ref targetBase, x); + + for (int kX = 0; kX < state.Kernel.Columns; kX++) + { + int offsetX = Unsafe.Add(ref sampleColumnBase, kX) - boundsX; + Vector4 sample = Unsafe.Add(ref sourceBase, offsetX); + target += state.Kernel[kY, kX] * sample; + } + } + } + + // Now we need to copy the original alpha values from the source row. + sourceRow = this.sourcePixels.GetRowSpan(y).Slice(boundsX, boundsWidth); + PixelOperations.Instance.ToVector4(this.configuration, sourceRow, sourceBuffer); + + for (int x = 0; x < sourceRow.Length; x++) + { + ref Vector4 target = ref Unsafe.Add(ref targetBase, x); + target.W = Unsafe.Add(ref MemoryMarshal.GetReference(sourceBuffer), x).W; } } else { - for (int column = 0; column < this.bounds.Width; column++) + // Clear the target buffer for each row run. + targetBuffer.Clear(); + ref Vector4 targetBase = ref MemoryMarshal.GetReference(targetBuffer); + + for (int kY = 0; kY < state.Kernel.Rows; kY++) { - Convolver.Convolve4( - in state, - this.sourcePixels, - ref targetRowRef, - row, - column); + // Get the precalculated source sample row for this kernel row and copy to our buffer. + int offsetY = Unsafe.Add(ref sampleRowBase, kY); + Span sourceRow = this.sourcePixels.GetRowSpan(offsetY).Slice(boundsX, boundsWidth); + PixelOperations.Instance.ToVector4(this.configuration, sourceRow, sourceBuffer); + + Numerics.Premultiply(sourceBuffer); + ref Vector4 sourceBase = ref MemoryMarshal.GetReference(sourceBuffer); + + for (int x = 0; x < sourceBuffer.Length; x++) + { + ref int sampleColumnBase = ref state.GetSampleColumn(x); + ref Vector4 target = ref Unsafe.Add(ref targetBase, x); + + for (int kX = 0; kX < state.Kernel.Columns; kX++) + { + int offsetX = Unsafe.Add(ref sampleColumnBase, kX) - boundsX; + Vector4 sample = Unsafe.Add(ref sourceBase, offsetX); + target += state.Kernel[kY, kX] * sample; + } + } } + + Numerics.UnPremultiply(targetBuffer); } - PixelOperations.Instance.FromVector4Destructive(this.configuration, span, targetRowSpan); + PixelOperations.Instance.FromVector4Destructive(this.configuration, targetBuffer, targetRowSpan); } } } diff --git a/src/ImageSharp/Processing/Processors/Convolution/ConvolutionRowOperation{TPixel}.cs b/src/ImageSharp/Processing/Processors/Convolution/ConvolutionRowOperation{TPixel}.cs new file mode 100644 index 0000000000..82aecdaf7b --- /dev/null +++ b/src/ImageSharp/Processing/Processors/Convolution/ConvolutionRowOperation{TPixel}.cs @@ -0,0 +1,169 @@ +// Copyright (c) Six Labors. +// Licensed under the Apache License, Version 2.0. + +using System; +using System.Numerics; +using System.Runtime.CompilerServices; +using System.Runtime.InteropServices; +using SixLabors.ImageSharp.Advanced; +using SixLabors.ImageSharp.Memory; +using SixLabors.ImageSharp.PixelFormats; + +namespace SixLabors.ImageSharp.Processing.Processors.Convolution +{ + /// + /// A implementing the logic for 1D convolution. + /// + internal readonly struct ConvolutionRowOperation : IRowOperation + where TPixel : unmanaged, IPixel + { + private readonly Rectangle bounds; + private readonly Buffer2D targetPixels; + private readonly Buffer2D sourcePixels; + private readonly KernelSamplingMap map; + private readonly DenseMatrix kernelMatrix; + private readonly Configuration configuration; + private readonly bool preserveAlpha; + + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public ConvolutionRowOperation( + Rectangle bounds, + Buffer2D targetPixels, + Buffer2D sourcePixels, + KernelSamplingMap map, + DenseMatrix kernelMatrix, + Configuration configuration, + bool preserveAlpha) + { + this.bounds = bounds; + this.targetPixels = targetPixels; + this.sourcePixels = sourcePixels; + this.map = map; + this.kernelMatrix = kernelMatrix; + this.configuration = configuration; + this.preserveAlpha = preserveAlpha; + } + + /// + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public void Invoke(int y, Span span) + { + if (this.preserveAlpha) + { + this.Convolve3(y, span); + } + else + { + this.Convolve4(y, span); + } + } + + [MethodImpl(MethodImplOptions.AggressiveInlining)] + private void Convolve3(int y, Span span) + { + // Span is 2x bounds. + int boundsX = this.bounds.X; + int boundsWidth = this.bounds.Width; + Span sourceBuffer = span.Slice(0, this.bounds.Width); + Span targetBuffer = span.Slice(this.bounds.Width); + + ref Vector4 targetRowRef = ref MemoryMarshal.GetReference(span); + Span targetRowSpan = this.targetPixels.GetRowSpan(y).Slice(boundsX, boundsWidth); + + var state = new ConvolutionState(in this.kernelMatrix, this.map); + ReadOnlyKernel kernel = state.Kernel; + int row = y - this.bounds.Y; + ref int sampleRowBase = ref state.GetSampleRow(row); + + // Clear the target buffer for each row run. + targetBuffer.Clear(); + ref Vector4 targetBase = ref MemoryMarshal.GetReference(targetBuffer); + + Span sourceRow; + for (int kY = 0; kY < kernel.Rows; kY++) + { + // Get the precalculated source sample row for this kernel row and copy to our buffer. + int offsetY = Unsafe.Add(ref sampleRowBase, kY); + sourceRow = this.sourcePixels.GetRowSpan(offsetY).Slice(boundsX, boundsWidth); + PixelOperations.Instance.ToVector4(this.configuration, sourceRow, sourceBuffer); + + ref Vector4 sourceBase = ref MemoryMarshal.GetReference(sourceBuffer); + + for (int x = 0; x < sourceBuffer.Length; x++) + { + ref int sampleColumnBase = ref state.GetSampleColumn(x); + ref Vector4 target = ref Unsafe.Add(ref targetBase, x); + + for (int kX = 0; kX < kernel.Columns; kX++) + { + int offsetX = Unsafe.Add(ref sampleColumnBase, kX) - boundsX; + Vector4 sample = Unsafe.Add(ref sourceBase, offsetX); + target += kernel[kY, kX] * sample; + } + } + } + + // Now we need to copy the original alpha values from the source row. + sourceRow = this.sourcePixels.GetRowSpan(y).Slice(boundsX, boundsWidth); + PixelOperations.Instance.ToVector4(this.configuration, sourceRow, sourceBuffer); + + for (int x = 0; x < sourceRow.Length; x++) + { + ref Vector4 target = ref Unsafe.Add(ref targetBase, x); + target.W = Unsafe.Add(ref MemoryMarshal.GetReference(sourceBuffer), x).W; + } + + PixelOperations.Instance.FromVector4Destructive(this.configuration, targetBuffer, targetRowSpan); + } + + [MethodImpl(MethodImplOptions.AggressiveInlining)] + private void Convolve4(int y, Span span) + { + // Span is 2x bounds. + int boundsX = this.bounds.X; + int boundsWidth = this.bounds.Width; + Span sourceBuffer = span.Slice(0, this.bounds.Width); + Span targetBuffer = span.Slice(this.bounds.Width); + + ref Vector4 targetRowRef = ref MemoryMarshal.GetReference(span); + Span targetRowSpan = this.targetPixels.GetRowSpan(y).Slice(boundsX, boundsWidth); + + var state = new ConvolutionState(in this.kernelMatrix, this.map); + ReadOnlyKernel kernel = state.Kernel; + int row = y - this.bounds.Y; + ref int sampleRowBase = ref state.GetSampleRow(row); + + // Clear the target buffer for each row run. + targetBuffer.Clear(); + ref Vector4 targetBase = ref MemoryMarshal.GetReference(targetBuffer); + + for (int kY = 0; kY < kernel.Rows; kY++) + { + // Get the precalculated source sample row for this kernel row and copy to our buffer. + int offsetY = Unsafe.Add(ref sampleRowBase, kY); + Span sourceRow = this.sourcePixels.GetRowSpan(offsetY).Slice(boundsX, boundsWidth); + PixelOperations.Instance.ToVector4(this.configuration, sourceRow, sourceBuffer); + + Numerics.Premultiply(sourceBuffer); + ref Vector4 sourceBase = ref MemoryMarshal.GetReference(sourceBuffer); + + for (int x = 0; x < sourceBuffer.Length; x++) + { + ref int sampleColumnBase = ref state.GetSampleColumn(x); + ref Vector4 target = ref Unsafe.Add(ref targetBase, x); + + for (int kX = 0; kX < kernel.Columns; kX++) + { + int offsetX = Unsafe.Add(ref sampleColumnBase, kX) - boundsX; + Vector4 sample = Unsafe.Add(ref sourceBase, offsetX); + target += kernel[kY, kX] * sample; + } + } + } + + Numerics.UnPremultiply(targetBuffer); + + PixelOperations.Instance.FromVector4Destructive(this.configuration, targetBuffer, targetRowSpan); + } + } +} diff --git a/src/ImageSharp/Processing/Processors/Convolution/ConvolutionState.cs b/src/ImageSharp/Processing/Processors/Convolution/ConvolutionState.cs index 851eeec247..3f296c67df 100644 --- a/src/ImageSharp/Processing/Processors/Convolution/ConvolutionState.cs +++ b/src/ImageSharp/Processing/Processors/Convolution/ConvolutionState.cs @@ -35,11 +35,11 @@ namespace SixLabors.ImageSharp.Processing.Processors.Convolution } [MethodImpl(MethodImplOptions.AggressiveInlining)] - public readonly ref int GetSampleOffsetRow(int row) + public readonly ref int GetSampleRow(int row) => ref Unsafe.Add(ref MemoryMarshal.GetReference(this.rowOffsetMap), row * this.kernelHeight); [MethodImpl(MethodImplOptions.AggressiveInlining)] - public readonly ref int GetSampleOffsetColumn(int column) + public readonly ref int GetSampleColumn(int column) => ref Unsafe.Add(ref MemoryMarshal.GetReference(this.columnOffsetMap), column * this.kernelWidth); } } diff --git a/src/ImageSharp/Processing/Processors/Convolution/Convolver.cs b/src/ImageSharp/Processing/Processors/Convolution/Convolver.cs index c23b71b330..721f7bbad1 100644 --- a/src/ImageSharp/Processing/Processors/Convolution/Convolver.cs +++ b/src/ImageSharp/Processing/Processors/Convolution/Convolver.cs @@ -1,6 +1,7 @@ // Copyright (c) Six Labors. // Licensed under the Apache License, Version 2.0. +using System; using System.Numerics; using System.Runtime.CompilerServices; using System.Runtime.InteropServices; @@ -100,12 +101,12 @@ namespace SixLabors.ImageSharp Vector4 vectorY = default; Vector4 vectorX = default; - ref int sampleOffsetRowBase = ref state.GetSampleOffsetRow(row); + ref int sampleOffsetRowBase = ref state.GetSampleRow(row); for (int y = 0; y < kernelHeight; y++) { int offsetY = Unsafe.Add(ref sampleOffsetRowBase, y); ref TPixel sourceRowBase = ref MemoryMarshal.GetReference(sourcePixels.GetRowSpan(offsetY)); - ref int sampleOffsetColumnBase = ref state.GetSampleOffsetColumn(column); + ref int sampleOffsetColumnBase = ref state.GetSampleColumn(column); for (int x = 0; x < kernelWidth; x++) { @@ -188,6 +189,93 @@ namespace SixLabors.ImageSharp target = vector; } + /// + /// Computes the sum of vectors in the span referenced by weighted + /// by the kernel weight values. + /// Using this method the convolution filter is not applied to alpha in addition + /// to the color channels. + /// + /// The convolution kernel state. + /// The source row. + /// The target row. + /// The current kernel row. + /// The interest x-bounds relative to the interest image. + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public static void ConvolveRow3( + in ConvolutionState state, + Span sourceRow, + Span targetRow, + int kY, + int bX) + { + ReadOnlyKernel kernel = state.Kernel; + ref Vector4 sourceBase = ref MemoryMarshal.GetReference(sourceRow); + ref Vector4 targetBase = ref MemoryMarshal.GetReference(targetRow); + + Numerics.Premultiply(sourceRow); + + for (int x = 0; x < sourceRow.Length; x++) + { + Vector4 vector = default; + ref int sampleOffsetColumnBase = ref state.GetSampleColumn(x); + + for (int kX = 0; kX < kernel.Columns; kX++) + { + int offsetX = Unsafe.Add(ref sampleOffsetColumnBase, kX) - bX; + Vector4 sample = Unsafe.Add(ref sourceBase, offsetX); + vector += kernel[kY, kX] * sample; + } + + ref Vector4 target = ref Unsafe.Add(ref targetBase, x); + vector.W = target.W; + Numerics.UnPremultiply(ref vector); + target = vector; + } + } + + /// + /// Computes the sum of vectors in the span referenced by weighted + /// by the kernel weight values. + /// Using this method the convolution filter is applied to alpha in addition to the + /// color channels. + /// + /// The convolution kernel state. + /// The source row. + /// The target row. + /// The current kernel row. + /// The interest x-bounds relative to the interest image. + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public static void ConvolveRow4( + in ConvolutionState state, + Span sourceRow, + Span targetRow, + int kY, + int bX) + where TPixel : unmanaged, IPixel + { + ReadOnlyKernel kernel = state.Kernel; + + ref Vector4 sourceBase = ref MemoryMarshal.GetReference(sourceRow); + ref Vector4 targetBase = ref MemoryMarshal.GetReference(targetRow); + + Numerics.Premultiply(sourceRow); + + for (int x = 0; x < sourceRow.Length; x++) + { + ref int sampleOffsetColumnBase = ref state.GetSampleColumn(x); + ref Vector4 target = ref Unsafe.Add(ref targetBase, x); + + for (int kX = 0; kX < kernel.Columns; kX++) + { + int offsetX = Unsafe.Add(ref sampleOffsetColumnBase, kX) - bX; + Vector4 sample = Unsafe.Add(ref sourceBase, offsetX); + target += kernel[kY, kX] * sample; + } + } + + Numerics.UnPremultiply(targetRow); + } + [MethodImpl(MethodImplOptions.AggressiveInlining)] private static void ConvolveImpl( in ConvolutionState state, @@ -201,12 +289,12 @@ namespace SixLabors.ImageSharp int kernelHeight = kernel.Rows; int kernelWidth = kernel.Columns; - ref int sampleOffsetRowBase = ref state.GetSampleOffsetRow(row); + ref int sampleOffsetRowBase = ref state.GetSampleRow(row); for (int y = 0; y < kernelHeight; y++) { int offsetY = Unsafe.Add(ref sampleOffsetRowBase, y); ref TPixel sourceRowBase = ref MemoryMarshal.GetReference(sourcePixels.GetRowSpan(offsetY)); - ref int sampleOffsetColumnBase = ref state.GetSampleOffsetColumn(column); + ref int sampleOffsetColumnBase = ref state.GetSampleColumn(column); for (int x = 0; x < kernelWidth; x++) { From a4ff07edd2b02ea194584772e56df7d7fde73d34 Mon Sep 17 00:00:00 2001 From: James Jackson-South Date: Mon, 7 Dec 2020 14:53:47 +0000 Subject: [PATCH 23/24] Cleanup --- .../Convolution2DProcessor{TPixel}.cs | 78 ----- .../Convolution2DRowOperation{TPixel}.cs | 36 +- .../ConvolutionRowOperation{TPixel}.cs | 38 +-- .../Processors/Convolution/Convolver.cs | 309 ------------------ 4 files changed, 34 insertions(+), 427 deletions(-) delete mode 100644 src/ImageSharp/Processing/Processors/Convolution/Convolver.cs diff --git a/src/ImageSharp/Processing/Processors/Convolution/Convolution2DProcessor{TPixel}.cs b/src/ImageSharp/Processing/Processors/Convolution/Convolution2DProcessor{TPixel}.cs index e787b3ec78..bb559019b7 100644 --- a/src/ImageSharp/Processing/Processors/Convolution/Convolution2DProcessor{TPixel}.cs +++ b/src/ImageSharp/Processing/Processors/Convolution/Convolution2DProcessor{TPixel}.cs @@ -1,10 +1,7 @@ // Copyright (c) Six Labors. // Licensed under the Apache License, Version 2.0. -using System; using System.Numerics; -using System.Runtime.CompilerServices; -using System.Runtime.InteropServices; using SixLabors.ImageSharp.Advanced; using SixLabors.ImageSharp.Memory; using SixLabors.ImageSharp.PixelFormats; @@ -94,80 +91,5 @@ namespace SixLabors.ImageSharp.Processing.Processors.Convolution Buffer2D.SwapOrCopyContent(source.PixelBuffer, targetPixels); } - - /// - /// A implementing the convolution logic for . - /// - private readonly struct RowOperation : IRowOperation - { - private readonly Rectangle bounds; - private readonly Buffer2D targetPixels; - private readonly Buffer2D sourcePixels; - private readonly KernelSamplingMap map; - private readonly DenseMatrix kernelY; - private readonly DenseMatrix kernelX; - private readonly Configuration configuration; - private readonly bool preserveAlpha; - - [MethodImpl(InliningOptions.ShortMethod)] - public RowOperation( - Rectangle bounds, - Buffer2D targetPixels, - Buffer2D sourcePixels, - KernelSamplingMap map, - DenseMatrix kernelY, - DenseMatrix kernelX, - Configuration configuration, - bool preserveAlpha) - { - this.bounds = bounds; - this.targetPixels = targetPixels; - this.sourcePixels = sourcePixels; - this.map = map; - this.kernelY = kernelY; - this.kernelX = kernelX; - this.configuration = configuration; - this.preserveAlpha = preserveAlpha; - } - - /// - [MethodImpl(InliningOptions.ShortMethod)] - public void Invoke(int y, Span span) - { - ref Vector4 targetRowRef = ref MemoryMarshal.GetReference(span); - Span targetRowSpan = this.targetPixels.GetRowSpan(y).Slice(this.bounds.X); - PixelOperations.Instance.ToVector4(this.configuration, targetRowSpan.Slice(0, span.Length), span); - - var state = new Convolution2DState(this.kernelY, this.kernelX, this.map); - int row = y - this.bounds.Y; - - if (this.preserveAlpha) - { - for (int column = 0; column < this.bounds.Width; column++) - { - Convolver.Convolve2D3( - in state, - this.sourcePixels, - ref targetRowRef, - row, - column); - } - } - else - { - for (int column = 0; column < this.bounds.Width; column++) - { - Convolver.Convolve2D4( - in state, - this.sourcePixels, - ref targetRowRef, - row, - column); - } - } - - PixelOperations.Instance.FromVector4Destructive(this.configuration, span, targetRowSpan); - } - } } } diff --git a/src/ImageSharp/Processing/Processors/Convolution/Convolution2DRowOperation{TPixel}.cs b/src/ImageSharp/Processing/Processors/Convolution/Convolution2DRowOperation{TPixel}.cs index 6528a2f851..802d1809f2 100644 --- a/src/ImageSharp/Processing/Processors/Convolution/Convolution2DRowOperation{TPixel}.cs +++ b/src/ImageSharp/Processing/Processors/Convolution/Convolution2DRowOperation{TPixel}.cs @@ -72,10 +72,7 @@ namespace SixLabors.ImageSharp.Processing.Processors.Convolution Span targetXBuffer = span.Slice(boundsWidth * 2, boundsWidth); var state = new Convolution2DState(in this.kernelMatrixY, in this.kernelMatrixX, this.map); - ReadOnlyKernel kernelY = state.KernelY; - ReadOnlyKernel kernelX = state.KernelX; - int row = y - this.bounds.Y; - ref int sampleRowBase = ref state.GetSampleRow(row); + ref int sampleRowBase = ref state.GetSampleRow(y - this.bounds.Y); // Clear the target buffers for each row run. targetYBuffer.Clear(); @@ -83,12 +80,14 @@ namespace SixLabors.ImageSharp.Processing.Processors.Convolution ref Vector4 targetBaseY = ref MemoryMarshal.GetReference(targetYBuffer); ref Vector4 targetBaseX = ref MemoryMarshal.GetReference(targetXBuffer); + ReadOnlyKernel kernelY = state.KernelY; + ReadOnlyKernel kernelX = state.KernelX; Span sourceRow; for (int kY = 0; kY < kernelY.Rows; kY++) { // Get the precalculated source sample row for this kernel row and copy to our buffer. - int offsetY = Unsafe.Add(ref sampleRowBase, kY); - sourceRow = this.sourcePixels.GetRowSpan(offsetY).Slice(boundsX, boundsWidth); + int sampleY = Unsafe.Add(ref sampleRowBase, kY); + sourceRow = this.sourcePixels.GetRowSpan(sampleY).Slice(boundsX, boundsWidth); PixelOperations.Instance.ToVector4(this.configuration, sourceRow, sourceBuffer); ref Vector4 sourceBase = ref MemoryMarshal.GetReference(sourceBuffer); @@ -101,15 +100,16 @@ namespace SixLabors.ImageSharp.Processing.Processors.Convolution for (int kX = 0; kX < kernelY.Columns; kX++) { - int offsetX = Unsafe.Add(ref sampleColumnBase, kX) - boundsX; - Vector4 sample = Unsafe.Add(ref sourceBase, offsetX); + int sampleX = Unsafe.Add(ref sampleColumnBase, kX) - boundsX; + Vector4 sample = Unsafe.Add(ref sourceBase, sampleX); targetY += kernelX[kY, kX] * sample; targetX += kernelY[kY, kX] * sample; } } } - // Now we need to combine the values and copy the original alpha values from the source row. + // Now we need to combine the values and copy the original alpha values + // from the source row. sourceRow = this.sourcePixels.GetRowSpan(y).Slice(boundsX, boundsWidth); PixelOperations.Instance.ToVector4(this.configuration, sourceRow, sourceBuffer); @@ -130,7 +130,7 @@ namespace SixLabors.ImageSharp.Processing.Processors.Convolution [MethodImpl(MethodImplOptions.AggressiveInlining)] private void Convolve4(int y, Span span) { - // Span is 2x bounds. + // Span is 3x bounds. int boundsX = this.bounds.X; int boundsWidth = this.bounds.Width; Span sourceBuffer = span.Slice(0, boundsWidth); @@ -138,10 +138,7 @@ namespace SixLabors.ImageSharp.Processing.Processors.Convolution Span targetXBuffer = span.Slice(boundsWidth * 2, boundsWidth); var state = new Convolution2DState(in this.kernelMatrixY, in this.kernelMatrixX, this.map); - ReadOnlyKernel kernelY = state.KernelY; - ReadOnlyKernel kernelX = state.KernelX; - int row = y - this.bounds.Y; - ref int sampleRowBase = ref state.GetSampleRow(row); + ref int sampleRowBase = ref state.GetSampleRow(y - this.bounds.Y); // Clear the target buffers for each row run. targetYBuffer.Clear(); @@ -149,11 +146,13 @@ namespace SixLabors.ImageSharp.Processing.Processors.Convolution ref Vector4 targetBaseY = ref MemoryMarshal.GetReference(targetYBuffer); ref Vector4 targetBaseX = ref MemoryMarshal.GetReference(targetXBuffer); + ReadOnlyKernel kernelY = state.KernelY; + ReadOnlyKernel kernelX = state.KernelX; for (int kY = 0; kY < kernelY.Rows; kY++) { // Get the precalculated source sample row for this kernel row and copy to our buffer. - int offsetY = Unsafe.Add(ref sampleRowBase, kY); - Span sourceRow = this.sourcePixels.GetRowSpan(offsetY).Slice(boundsX, boundsWidth); + int sampleY = Unsafe.Add(ref sampleRowBase, kY); + Span sourceRow = this.sourcePixels.GetRowSpan(sampleY).Slice(boundsX, boundsWidth); PixelOperations.Instance.ToVector4(this.configuration, sourceRow, sourceBuffer); Numerics.Premultiply(sourceBuffer); @@ -167,14 +166,15 @@ namespace SixLabors.ImageSharp.Processing.Processors.Convolution for (int kX = 0; kX < kernelY.Columns; kX++) { - int offsetX = Unsafe.Add(ref sampleColumnBase, kX) - boundsX; - Vector4 sample = Unsafe.Add(ref sourceBase, offsetX); + int sampleX = Unsafe.Add(ref sampleColumnBase, kX) - boundsX; + Vector4 sample = Unsafe.Add(ref sourceBase, sampleX); targetY += kernelX[kY, kX] * sample; targetX += kernelY[kY, kX] * sample; } } } + // Now we need to combine the values for (int x = 0; x < targetYBuffer.Length; x++) { ref Vector4 target = ref Unsafe.Add(ref targetBaseY, x); diff --git a/src/ImageSharp/Processing/Processors/Convolution/ConvolutionRowOperation{TPixel}.cs b/src/ImageSharp/Processing/Processors/Convolution/ConvolutionRowOperation{TPixel}.cs index 82aecdaf7b..9876b2885b 100644 --- a/src/ImageSharp/Processing/Processors/Convolution/ConvolutionRowOperation{TPixel}.cs +++ b/src/ImageSharp/Processing/Processors/Convolution/ConvolutionRowOperation{TPixel}.cs @@ -67,24 +67,20 @@ namespace SixLabors.ImageSharp.Processing.Processors.Convolution Span sourceBuffer = span.Slice(0, this.bounds.Width); Span targetBuffer = span.Slice(this.bounds.Width); - ref Vector4 targetRowRef = ref MemoryMarshal.GetReference(span); - Span targetRowSpan = this.targetPixels.GetRowSpan(y).Slice(boundsX, boundsWidth); - var state = new ConvolutionState(in this.kernelMatrix, this.map); - ReadOnlyKernel kernel = state.Kernel; - int row = y - this.bounds.Y; - ref int sampleRowBase = ref state.GetSampleRow(row); + ref int sampleRowBase = ref state.GetSampleRow(y - this.bounds.Y); // Clear the target buffer for each row run. targetBuffer.Clear(); ref Vector4 targetBase = ref MemoryMarshal.GetReference(targetBuffer); + ReadOnlyKernel kernel = state.Kernel; Span sourceRow; for (int kY = 0; kY < kernel.Rows; kY++) { // Get the precalculated source sample row for this kernel row and copy to our buffer. - int offsetY = Unsafe.Add(ref sampleRowBase, kY); - sourceRow = this.sourcePixels.GetRowSpan(offsetY).Slice(boundsX, boundsWidth); + int sampleY = Unsafe.Add(ref sampleRowBase, kY); + sourceRow = this.sourcePixels.GetRowSpan(sampleY).Slice(boundsX, boundsWidth); PixelOperations.Instance.ToVector4(this.configuration, sourceRow, sourceBuffer); ref Vector4 sourceBase = ref MemoryMarshal.GetReference(sourceBuffer); @@ -96,8 +92,8 @@ namespace SixLabors.ImageSharp.Processing.Processors.Convolution for (int kX = 0; kX < kernel.Columns; kX++) { - int offsetX = Unsafe.Add(ref sampleColumnBase, kX) - boundsX; - Vector4 sample = Unsafe.Add(ref sourceBase, offsetX); + int sampleX = Unsafe.Add(ref sampleColumnBase, kX) - boundsX; + Vector4 sample = Unsafe.Add(ref sourceBase, sampleX); target += kernel[kY, kX] * sample; } } @@ -113,7 +109,8 @@ namespace SixLabors.ImageSharp.Processing.Processors.Convolution target.W = Unsafe.Add(ref MemoryMarshal.GetReference(sourceBuffer), x).W; } - PixelOperations.Instance.FromVector4Destructive(this.configuration, targetBuffer, targetRowSpan); + Span targetRow = this.targetPixels.GetRowSpan(y).Slice(boundsX, boundsWidth); + PixelOperations.Instance.FromVector4Destructive(this.configuration, targetBuffer, targetRow); } [MethodImpl(MethodImplOptions.AggressiveInlining)] @@ -125,23 +122,19 @@ namespace SixLabors.ImageSharp.Processing.Processors.Convolution Span sourceBuffer = span.Slice(0, this.bounds.Width); Span targetBuffer = span.Slice(this.bounds.Width); - ref Vector4 targetRowRef = ref MemoryMarshal.GetReference(span); - Span targetRowSpan = this.targetPixels.GetRowSpan(y).Slice(boundsX, boundsWidth); - var state = new ConvolutionState(in this.kernelMatrix, this.map); - ReadOnlyKernel kernel = state.Kernel; - int row = y - this.bounds.Y; - ref int sampleRowBase = ref state.GetSampleRow(row); + ref int sampleRowBase = ref state.GetSampleRow(y - this.bounds.Y); // Clear the target buffer for each row run. targetBuffer.Clear(); ref Vector4 targetBase = ref MemoryMarshal.GetReference(targetBuffer); + ReadOnlyKernel kernel = state.Kernel; for (int kY = 0; kY < kernel.Rows; kY++) { // Get the precalculated source sample row for this kernel row and copy to our buffer. - int offsetY = Unsafe.Add(ref sampleRowBase, kY); - Span sourceRow = this.sourcePixels.GetRowSpan(offsetY).Slice(boundsX, boundsWidth); + int sampleY = Unsafe.Add(ref sampleRowBase, kY); + Span sourceRow = this.sourcePixels.GetRowSpan(sampleY).Slice(boundsX, boundsWidth); PixelOperations.Instance.ToVector4(this.configuration, sourceRow, sourceBuffer); Numerics.Premultiply(sourceBuffer); @@ -154,8 +147,8 @@ namespace SixLabors.ImageSharp.Processing.Processors.Convolution for (int kX = 0; kX < kernel.Columns; kX++) { - int offsetX = Unsafe.Add(ref sampleColumnBase, kX) - boundsX; - Vector4 sample = Unsafe.Add(ref sourceBase, offsetX); + int sampleX = Unsafe.Add(ref sampleColumnBase, kX) - boundsX; + Vector4 sample = Unsafe.Add(ref sourceBase, sampleX); target += kernel[kY, kX] * sample; } } @@ -163,7 +156,8 @@ namespace SixLabors.ImageSharp.Processing.Processors.Convolution Numerics.UnPremultiply(targetBuffer); - PixelOperations.Instance.FromVector4Destructive(this.configuration, targetBuffer, targetRowSpan); + Span targetRow = this.targetPixels.GetRowSpan(y).Slice(boundsX, boundsWidth); + PixelOperations.Instance.FromVector4Destructive(this.configuration, targetBuffer, targetRow); } } } diff --git a/src/ImageSharp/Processing/Processors/Convolution/Convolver.cs b/src/ImageSharp/Processing/Processors/Convolution/Convolver.cs deleted file mode 100644 index 721f7bbad1..0000000000 --- a/src/ImageSharp/Processing/Processors/Convolution/Convolver.cs +++ /dev/null @@ -1,309 +0,0 @@ -// Copyright (c) Six Labors. -// Licensed under the Apache License, Version 2.0. - -using System; -using System.Numerics; -using System.Runtime.CompilerServices; -using System.Runtime.InteropServices; -using SixLabors.ImageSharp.Memory; -using SixLabors.ImageSharp.PixelFormats; -using SixLabors.ImageSharp.Processing.Processors.Convolution; - -namespace SixLabors.ImageSharp -{ - /// - /// Provides methods to perform convolution operations. - /// - internal static class Convolver - { - /// - /// Computes the sum of vectors in the span referenced by weighted by the two kernel weight values. - /// Using this method the convolution filter is not applied to alpha in addition to the color channels. - /// - /// The pixel format. - /// The 2D convolution kernels state. - /// The source frame. - /// The target row base reference. - /// The current row. - /// The current column. - [MethodImpl(MethodImplOptions.AggressiveInlining)] - public static void Convolve2D3( - in Convolution2DState state, - Buffer2D sourcePixels, - ref Vector4 targetRowRef, - int row, - int column) - where TPixel : unmanaged, IPixel - { - Vector4 vector = default; - - Convolve2DImpl( - in state, - sourcePixels, - row, - column, - ref vector); - - ref Vector4 target = ref Unsafe.Add(ref targetRowRef, column); - vector.W = target.W; - - Numerics.UnPremultiply(ref vector); - target = vector; - } - - /// - /// Computes the sum of vectors in the span referenced by weighted by the two kernel weight values. - /// Using this method the convolution filter is applied to alpha in addition to the color channels. - /// - /// The pixel format. - /// The 2D convolution kernels state. - /// The source frame. - /// The target row base reference. - /// The current row. - /// The current column. - [MethodImpl(MethodImplOptions.AggressiveInlining)] - public static void Convolve2D4( - in Convolution2DState state, - Buffer2D sourcePixels, - ref Vector4 targetRowRef, - int row, - int column) - where TPixel : unmanaged, IPixel - { - Vector4 vector = default; - - Convolve2DImpl( - in state, - sourcePixels, - row, - column, - ref vector); - - ref Vector4 target = ref Unsafe.Add(ref targetRowRef, column); - Numerics.UnPremultiply(ref vector); - target = vector; - } - - [MethodImpl(MethodImplOptions.AggressiveInlining)] - public static void Convolve2DImpl( - in Convolution2DState state, - Buffer2D sourcePixels, - int row, - int column, - ref Vector4 targetVector) - where TPixel : unmanaged, IPixel - { - ReadOnlyKernel kernelY = state.KernelY; - ReadOnlyKernel kernelX = state.KernelX; - int kernelHeight = kernelY.Rows; - int kernelWidth = kernelY.Columns; - - Vector4 vectorY = default; - Vector4 vectorX = default; - - ref int sampleOffsetRowBase = ref state.GetSampleRow(row); - for (int y = 0; y < kernelHeight; y++) - { - int offsetY = Unsafe.Add(ref sampleOffsetRowBase, y); - ref TPixel sourceRowBase = ref MemoryMarshal.GetReference(sourcePixels.GetRowSpan(offsetY)); - ref int sampleOffsetColumnBase = ref state.GetSampleColumn(column); - - for (int x = 0; x < kernelWidth; x++) - { - int offsetX = Unsafe.Add(ref sampleOffsetColumnBase, x); - var sample = Unsafe.Add(ref sourceRowBase, offsetX).ToVector4(); - Numerics.Premultiply(ref sample); - vectorX += kernelX[y, x] * sample; - vectorY += kernelY[y, x] * sample; - } - } - - targetVector = Vector4.SquareRoot((vectorX * vectorX) + (vectorY * vectorY)); - } - - /// - /// Computes the sum of vectors in the span referenced by weighted by the kernel weight values. - /// Using this method the convolution filter is not applied to alpha in addition to the color channels. - /// - /// The pixel format. - /// The convolution kernel state. - /// The source frame. - /// The target row base reference. - /// The current row. - /// The current column. - [MethodImpl(MethodImplOptions.AggressiveInlining)] - public static void Convolve3( - in ConvolutionState state, - Buffer2D sourcePixels, - ref Vector4 targetRowRef, - int row, - int column) - where TPixel : unmanaged, IPixel - { - Vector4 vector = default; - - ConvolveImpl( - state, - sourcePixels, - row, - column, - ref vector); - - ref Vector4 target = ref Unsafe.Add(ref targetRowRef, column); - vector.W = target.W; - - Numerics.UnPremultiply(ref vector); - target = vector; - } - - /// - /// Computes the sum of vectors in the span referenced by weighted by the kernel weight values. - /// Using this method the convolution filter is applied to alpha in addition to the color channels. - /// - /// The pixel format. - /// The convolution kernel state. - /// The source frame. - /// The target row base reference. - /// The current row. - /// The current column. - [MethodImpl(MethodImplOptions.AggressiveInlining)] - public static void Convolve4( - in ConvolutionState state, - Buffer2D sourcePixels, - ref Vector4 targetRowRef, - int row, - int column) - where TPixel : unmanaged, IPixel - { - Vector4 vector = default; - - ConvolveImpl( - state, - sourcePixels, - row, - column, - ref vector); - - ref Vector4 target = ref Unsafe.Add(ref targetRowRef, column); - Numerics.UnPremultiply(ref vector); - target = vector; - } - - /// - /// Computes the sum of vectors in the span referenced by weighted - /// by the kernel weight values. - /// Using this method the convolution filter is not applied to alpha in addition - /// to the color channels. - /// - /// The convolution kernel state. - /// The source row. - /// The target row. - /// The current kernel row. - /// The interest x-bounds relative to the interest image. - [MethodImpl(MethodImplOptions.AggressiveInlining)] - public static void ConvolveRow3( - in ConvolutionState state, - Span sourceRow, - Span targetRow, - int kY, - int bX) - { - ReadOnlyKernel kernel = state.Kernel; - ref Vector4 sourceBase = ref MemoryMarshal.GetReference(sourceRow); - ref Vector4 targetBase = ref MemoryMarshal.GetReference(targetRow); - - Numerics.Premultiply(sourceRow); - - for (int x = 0; x < sourceRow.Length; x++) - { - Vector4 vector = default; - ref int sampleOffsetColumnBase = ref state.GetSampleColumn(x); - - for (int kX = 0; kX < kernel.Columns; kX++) - { - int offsetX = Unsafe.Add(ref sampleOffsetColumnBase, kX) - bX; - Vector4 sample = Unsafe.Add(ref sourceBase, offsetX); - vector += kernel[kY, kX] * sample; - } - - ref Vector4 target = ref Unsafe.Add(ref targetBase, x); - vector.W = target.W; - Numerics.UnPremultiply(ref vector); - target = vector; - } - } - - /// - /// Computes the sum of vectors in the span referenced by weighted - /// by the kernel weight values. - /// Using this method the convolution filter is applied to alpha in addition to the - /// color channels. - /// - /// The convolution kernel state. - /// The source row. - /// The target row. - /// The current kernel row. - /// The interest x-bounds relative to the interest image. - [MethodImpl(MethodImplOptions.AggressiveInlining)] - public static void ConvolveRow4( - in ConvolutionState state, - Span sourceRow, - Span targetRow, - int kY, - int bX) - where TPixel : unmanaged, IPixel - { - ReadOnlyKernel kernel = state.Kernel; - - ref Vector4 sourceBase = ref MemoryMarshal.GetReference(sourceRow); - ref Vector4 targetBase = ref MemoryMarshal.GetReference(targetRow); - - Numerics.Premultiply(sourceRow); - - for (int x = 0; x < sourceRow.Length; x++) - { - ref int sampleOffsetColumnBase = ref state.GetSampleColumn(x); - ref Vector4 target = ref Unsafe.Add(ref targetBase, x); - - for (int kX = 0; kX < kernel.Columns; kX++) - { - int offsetX = Unsafe.Add(ref sampleOffsetColumnBase, kX) - bX; - Vector4 sample = Unsafe.Add(ref sourceBase, offsetX); - target += kernel[kY, kX] * sample; - } - } - - Numerics.UnPremultiply(targetRow); - } - - [MethodImpl(MethodImplOptions.AggressiveInlining)] - private static void ConvolveImpl( - in ConvolutionState state, - Buffer2D sourcePixels, - int row, - int column, - ref Vector4 targetVector) - where TPixel : unmanaged, IPixel - { - ReadOnlyKernel kernel = state.Kernel; - int kernelHeight = kernel.Rows; - int kernelWidth = kernel.Columns; - - ref int sampleOffsetRowBase = ref state.GetSampleRow(row); - for (int y = 0; y < kernelHeight; y++) - { - int offsetY = Unsafe.Add(ref sampleOffsetRowBase, y); - ref TPixel sourceRowBase = ref MemoryMarshal.GetReference(sourcePixels.GetRowSpan(offsetY)); - ref int sampleOffsetColumnBase = ref state.GetSampleColumn(column); - - for (int x = 0; x < kernelWidth; x++) - { - int offsetX = Unsafe.Add(ref sampleOffsetColumnBase, x); - var sample = Unsafe.Add(ref sourceRowBase, offsetX).ToVector4(); - Numerics.Premultiply(ref sample); - targetVector += kernel[y, x] * sample; - } - } - } - } -} From 84cc0daab390d03286bc24505e822f3752afb792 Mon Sep 17 00:00:00 2001 From: Max Eskin Date: Mon, 7 Dec 2020 12:00:15 -0500 Subject: [PATCH 24/24] Add test. --- .../Processing/Filters/BrightnessTest.cs | 33 +++++++++++++++++-- 1 file changed, 31 insertions(+), 2 deletions(-) diff --git a/tests/ImageSharp.Tests/Processing/Filters/BrightnessTest.cs b/tests/ImageSharp.Tests/Processing/Filters/BrightnessTest.cs index 75a9072c59..680a6afdce 100644 --- a/tests/ImageSharp.Tests/Processing/Filters/BrightnessTest.cs +++ b/tests/ImageSharp.Tests/Processing/Filters/BrightnessTest.cs @@ -1,6 +1,7 @@ -// Copyright (c) Six Labors. +// Copyright (c) Six Labors. // Licensed under the Apache License, Version 2.0. +using SixLabors.ImageSharp.PixelFormats; using SixLabors.ImageSharp.Processing; using SixLabors.ImageSharp.Processing.Processors.Filters; using Xunit; @@ -26,5 +27,33 @@ namespace SixLabors.ImageSharp.Tests.Processing.Effects Assert.Equal(1.5F, processor.Amount); } + + [Fact] + public void Brightness_scaled_vector() + { + var rgbImage = new Image(Configuration.Default, 100, 100, new Rgb24(0, 0, 0)); + + rgbImage.Mutate(x => x.ApplyProcessor(new BrightnessProcessor(2))); + + Assert.Equal(new Rgb24(0, 0, 0), rgbImage[0, 0]); + + rgbImage = new Image(Configuration.Default, 100, 100, new Rgb24(10, 10, 10)); + + rgbImage.Mutate(x => x.ApplyProcessor(new BrightnessProcessor(2))); + + Assert.Equal(new Rgb24(20, 20, 20), rgbImage[0, 0]); + + var halfSingleImage = new Image(Configuration.Default, 100, 100, new HalfSingle(-1)); + + halfSingleImage.Mutate(x => x.ApplyProcessor(new BrightnessProcessor(2))); + + Assert.Equal(new HalfSingle(-1), halfSingleImage[0, 0]); + + halfSingleImage = new Image(Configuration.Default, 100, 100, new HalfSingle(-0.5f)); + + halfSingleImage.Mutate(x => x.ApplyProcessor(new BrightnessProcessor(2))); + + Assert.Equal(new HalfSingle(0), halfSingleImage[0, 0]); + } } -} \ No newline at end of file +}