From bc1162e05b32d15041015614cdc8bd7d1f966185 Mon Sep 17 00:00:00 2001 From: Ynse Hoornenborg Date: Sun, 4 Sep 2022 17:30:54 +0200 Subject: [PATCH] Bulk convert source rows to Vector4 --- .../MedianBlurProcessor{TPixel}.cs | 11 +++---- .../Convolution/MedianRowOperation{TPixel}.cs | 32 ++++++++++++------- 2 files changed, 25 insertions(+), 18 deletions(-) diff --git a/src/ImageSharp/Processing/Processors/Convolution/MedianBlurProcessor{TPixel}.cs b/src/ImageSharp/Processing/Processors/Convolution/MedianBlurProcessor{TPixel}.cs index 8867c84e8f..a44a817251 100644 --- a/src/ImageSharp/Processing/Processors/Convolution/MedianBlurProcessor{TPixel}.cs +++ b/src/ImageSharp/Processing/Processors/Convolution/MedianBlurProcessor{TPixel}.cs @@ -17,10 +17,7 @@ namespace SixLabors.ImageSharp.Processing.Processors.Convolution private readonly MedianBlurProcessor definition; public MedianBlurProcessor(Configuration configuration, MedianBlurProcessor definition, Image source, Rectangle sourceRectangle) - : base(configuration, source, sourceRectangle) - { - this.definition = definition; - } + : base(configuration, source, sourceRectangle) => this.definition = definition; protected override void OnFrameApply(ImageFrame source) { @@ -31,13 +28,13 @@ namespace SixLabors.ImageSharp.Processing.Processors.Convolution source.CopyTo(targetPixels); - Rectangle interest = Rectangle.Intersect(this.SourceRectangle, source.Bounds()); + var interest = Rectangle.Intersect(this.SourceRectangle, source.Bounds()); // We use a rectangle with width set to 2 * kernelSize^2 + width, to allocate a buffer big enough // for kernel source and target bulk pixel conversion. - Rectangle operationBounds = new Rectangle(interest.X, interest.Y, (2 * (kernelSize * kernelSize)) + interest.Width, interest.Height); + var operationBounds = new Rectangle(interest.X, interest.Y, (2 * kernelSize * kernelSize) + interest.Width + (kernelSize * interest.Width), interest.Height); - using KernelSamplingMap map = new KernelSamplingMap(this.Configuration.MemoryAllocator); + using var map = new KernelSamplingMap(this.Configuration.MemoryAllocator); map.BuildSamplingOffsetMap(kernelSize, kernelSize, interest, this.definition.BorderWrapModeX, this.definition.BorderWrapModeY); var operation = new MedianRowOperation( diff --git a/src/ImageSharp/Processing/Processors/Convolution/MedianRowOperation{TPixel}.cs b/src/ImageSharp/Processing/Processors/Convolution/MedianRowOperation{TPixel}.cs index 4aa2ef7ec4..8296e064d2 100644 --- a/src/ImageSharp/Processing/Processors/Convolution/MedianRowOperation{TPixel}.cs +++ b/src/ImageSharp/Processing/Processors/Convolution/MedianRowOperation{TPixel}.cs @@ -51,7 +51,8 @@ namespace SixLabors.ImageSharp.Processing.Processors.Convolution int kernelCount = this.kernelSize * this.kernelSize; Span kernelBuffer = span.Slice(0, kernelCount); Span channelVectorBuffer = span.Slice(kernelCount, kernelCount); - Span targetBuffer = span.Slice(kernelCount << 1, boundsWidth); + Span sourceVectorBuffer = span.Slice(kernelCount << 1, this.kernelSize * boundsWidth); + Span targetBuffer = span.Slice((kernelCount << 1) + sourceVectorBuffer.Length, boundsWidth); // Stack 4 channels of floats in the space of Vector4's. Span channelBuffer = MemoryMarshal.Cast(channelVectorBuffer); @@ -59,13 +60,22 @@ namespace SixLabors.ImageSharp.Processing.Processors.Convolution Span yChannel = channelBuffer.Slice(this.yChannelStart, kernelCount); Span zChannel = channelBuffer.Slice(this.zChannelStart, kernelCount); - DenseMatrix kernel = new DenseMatrix(this.kernelSize, this.kernelSize, kernelBuffer); + var kernel = new DenseMatrix(this.kernelSize, this.kernelSize, kernelBuffer); int row = y - this.bounds.Y; - MedianConvolutionState state = new MedianConvolutionState(in kernel, this.map); + var state = new MedianConvolutionState(in kernel, this.map); ref int sampleRowBase = ref state.GetSampleRow(row); ref Vector4 targetBase = ref MemoryMarshal.GetReference(targetBuffer); + // First convert the required source rows to Vector4. + for (int i = 0; i < this.kernelSize; i++) + { + int currentYIndex = Unsafe.Add(ref sampleRowBase, i); + Span sourceRow = this.sourcePixels.DangerousGetRowSpan(currentYIndex).Slice(boundsX, boundsWidth); + Span sourceVectorRow = sourceVectorBuffer.Slice(i * boundsWidth, boundsWidth); + PixelOperations.Instance.ToVector4(this.configuration, sourceRow, sourceVectorRow); + } + if (this.preserveAlpha) { for (int x = 0; x < boundsWidth; x++) @@ -76,13 +86,13 @@ namespace SixLabors.ImageSharp.Processing.Processors.Convolution for (int kY = 0; kY < state.Kernel.Rows; kY++) { int currentYIndex = Unsafe.Add(ref sampleRowBase, kY); - Span sourceRow = this.sourcePixels.DangerousGetRowSpan(currentYIndex).Slice(boundsX, boundsWidth); - ref TPixel sourceRowBase = ref MemoryMarshal.GetReference(sourceRow); + Span sourceRow = sourceVectorBuffer.Slice(kY * boundsWidth); + ref Vector4 sourceRowBase = ref MemoryMarshal.GetReference(sourceRow); for (int kX = 0; kX < state.Kernel.Columns; kX++) { int currentXIndex = Unsafe.Add(ref sampleColumnBase, kX) - boundsX; - TPixel pixel = Unsafe.Add(ref sourceRowBase, currentXIndex); - state.Kernel.SetValue(index, pixel.ToVector4()); + Vector4 pixel = Unsafe.Add(ref sourceRowBase, currentXIndex); + state.Kernel.SetValue(index, pixel); index++; } } @@ -102,13 +112,13 @@ namespace SixLabors.ImageSharp.Processing.Processors.Convolution for (int kY = 0; kY < state.Kernel.Rows; kY++) { int currentYIndex = Unsafe.Add(ref sampleRowBase, kY); - Span sourceRow = this.sourcePixels.DangerousGetRowSpan(currentYIndex).Slice(boundsX, boundsWidth); - ref TPixel sourceRowBase = ref MemoryMarshal.GetReference(sourceRow); + Span sourceRow = sourceVectorBuffer.Slice(kY * boundsWidth); + ref Vector4 sourceRowBase = ref MemoryMarshal.GetReference(sourceRow); for (int kX = 0; kX < state.Kernel.Columns; kX++) { int currentXIndex = Unsafe.Add(ref sampleColumnBase, kX) - boundsX; - TPixel pixel = Unsafe.Add(ref sourceRowBase, currentXIndex); - state.Kernel.SetValue(index, pixel.ToVector4()); + Vector4 pixel = Unsafe.Add(ref sourceRowBase, currentXIndex); + state.Kernel.SetValue(index, pixel); index++; } }