mirror of https://github.com/SixLabors/ImageSharp
committed by
GitHub
27 changed files with 1685 additions and 571 deletions
@ -1,279 +0,0 @@ |
|||
// Copyright (c) Six Labors.
|
|||
// Licensed under the Apache License, Version 2.0.
|
|||
|
|||
using System; |
|||
using System.Numerics; |
|||
using System.Runtime.CompilerServices; |
|||
using SixLabors.ImageSharp.Memory; |
|||
using SixLabors.ImageSharp.PixelFormats; |
|||
|
|||
namespace SixLabors.ImageSharp |
|||
{ |
|||
/// <summary>
|
|||
/// Extension methods for <see cref="DenseMatrix{T}"/>.
|
|||
/// TODO: One day rewrite all this to use SIMD intrinsics. There's a lot of scope for improvement.
|
|||
/// </summary>
|
|||
internal static class DenseMatrixUtils |
|||
{ |
|||
/// <summary>
|
|||
/// Computes the sum of vectors in the span referenced by <paramref name="targetRowRef"/> weighted by the two kernel weight values.
|
|||
/// Using this method the convolution filter is not applied to alpha in addition to the color channels.
|
|||
/// </summary>
|
|||
/// <typeparam name="TPixel">The pixel format.</typeparam>
|
|||
/// <param name="matrixY">The vertical dense matrix.</param>
|
|||
/// <param name="matrixX">The horizontal dense matrix.</param>
|
|||
/// <param name="sourcePixels">The source frame.</param>
|
|||
/// <param name="targetRowRef">The target row base reference.</param>
|
|||
/// <param name="row">The current row.</param>
|
|||
/// <param name="column">The current column.</param>
|
|||
/// <param name="minRow">The minimum working area row.</param>
|
|||
/// <param name="maxRow">The maximum working area row.</param>
|
|||
/// <param name="minColumn">The minimum working area column.</param>
|
|||
/// <param name="maxColumn">The maximum working area column.</param>
|
|||
[MethodImpl(InliningOptions.ShortMethod)] |
|||
public static void Convolve2D3<TPixel>( |
|||
in DenseMatrix<float> matrixY, |
|||
in DenseMatrix<float> matrixX, |
|||
Buffer2D<TPixel> sourcePixels, |
|||
ref Vector4 targetRowRef, |
|||
int row, |
|||
int column, |
|||
int minRow, |
|||
int maxRow, |
|||
int minColumn, |
|||
int maxColumn) |
|||
where TPixel : unmanaged, IPixel<TPixel> |
|||
{ |
|||
Convolve2DImpl( |
|||
in matrixY, |
|||
in matrixX, |
|||
sourcePixels, |
|||
row, |
|||
column, |
|||
minRow, |
|||
maxRow, |
|||
minColumn, |
|||
maxColumn, |
|||
out Vector4 vector); |
|||
|
|||
ref Vector4 target = ref Unsafe.Add(ref targetRowRef, column); |
|||
vector.W = target.W; |
|||
|
|||
Numerics.UnPremultiply(ref vector); |
|||
target = vector; |
|||
} |
|||
|
|||
/// <summary>
|
|||
/// Computes the sum of vectors in the span referenced by <paramref name="targetRowRef"/> weighted by the two kernel weight values.
|
|||
/// Using this method the convolution filter is applied to alpha in addition to the color channels.
|
|||
/// </summary>
|
|||
/// <typeparam name="TPixel">The pixel format.</typeparam>
|
|||
/// <param name="matrixY">The vertical dense matrix.</param>
|
|||
/// <param name="matrixX">The horizontal dense matrix.</param>
|
|||
/// <param name="sourcePixels">The source frame.</param>
|
|||
/// <param name="targetRowRef">The target row base reference.</param>
|
|||
/// <param name="row">The current row.</param>
|
|||
/// <param name="column">The current column.</param>
|
|||
/// <param name="minRow">The minimum working area row.</param>
|
|||
/// <param name="maxRow">The maximum working area row.</param>
|
|||
/// <param name="minColumn">The minimum working area column.</param>
|
|||
/// <param name="maxColumn">The maximum working area column.</param>
|
|||
[MethodImpl(InliningOptions.ShortMethod)] |
|||
public static void Convolve2D4<TPixel>( |
|||
in DenseMatrix<float> matrixY, |
|||
in DenseMatrix<float> matrixX, |
|||
Buffer2D<TPixel> sourcePixels, |
|||
ref Vector4 targetRowRef, |
|||
int row, |
|||
int column, |
|||
int minRow, |
|||
int maxRow, |
|||
int minColumn, |
|||
int maxColumn) |
|||
where TPixel : unmanaged, IPixel<TPixel> |
|||
{ |
|||
Convolve2DImpl( |
|||
in matrixY, |
|||
in matrixX, |
|||
sourcePixels, |
|||
row, |
|||
column, |
|||
minRow, |
|||
maxRow, |
|||
minColumn, |
|||
maxColumn, |
|||
out Vector4 vector); |
|||
|
|||
ref Vector4 target = ref Unsafe.Add(ref targetRowRef, column); |
|||
Numerics.UnPremultiply(ref vector); |
|||
target = vector; |
|||
} |
|||
|
|||
[MethodImpl(InliningOptions.ShortMethod)] |
|||
public static void Convolve2DImpl<TPixel>( |
|||
in DenseMatrix<float> matrixY, |
|||
in DenseMatrix<float> matrixX, |
|||
Buffer2D<TPixel> sourcePixels, |
|||
int row, |
|||
int column, |
|||
int minRow, |
|||
int maxRow, |
|||
int minColumn, |
|||
int maxColumn, |
|||
out Vector4 vector) |
|||
where TPixel : unmanaged, IPixel<TPixel> |
|||
{ |
|||
Vector4 vectorY = default; |
|||
Vector4 vectorX = default; |
|||
int matrixHeight = matrixY.Rows; |
|||
int matrixWidth = matrixY.Columns; |
|||
int radiusY = matrixHeight >> 1; |
|||
int radiusX = matrixWidth >> 1; |
|||
int sourceOffsetColumnBase = column + minColumn; |
|||
|
|||
for (int y = 0; y < matrixHeight; y++) |
|||
{ |
|||
int offsetY = Numerics.Clamp(row + y - radiusY, minRow, maxRow); |
|||
Span<TPixel> sourceRowSpan = sourcePixels.GetRowSpan(offsetY); |
|||
|
|||
for (int x = 0; x < matrixWidth; x++) |
|||
{ |
|||
int offsetX = Numerics.Clamp(sourceOffsetColumnBase + x - radiusX, minColumn, maxColumn); |
|||
var currentColor = sourceRowSpan[offsetX].ToVector4(); |
|||
Numerics.Premultiply(ref currentColor); |
|||
|
|||
vectorX += matrixX[y, x] * currentColor; |
|||
vectorY += matrixY[y, x] * currentColor; |
|||
} |
|||
} |
|||
|
|||
vector = Vector4.SquareRoot((vectorX * vectorX) + (vectorY * vectorY)); |
|||
} |
|||
|
|||
/// <summary>
|
|||
/// Computes the sum of vectors in the span referenced by <paramref name="targetRowRef"/> weighted by the kernel weight values.
|
|||
/// Using this method the convolution filter is not applied to alpha in addition to the color channels.
|
|||
/// </summary>
|
|||
/// <typeparam name="TPixel">The pixel format.</typeparam>
|
|||
/// <param name="matrix">The dense matrix.</param>
|
|||
/// <param name="sourcePixels">The source frame.</param>
|
|||
/// <param name="targetRowRef">The target row base reference.</param>
|
|||
/// <param name="row">The current row.</param>
|
|||
/// <param name="column">The current column.</param>
|
|||
/// <param name="minRow">The minimum working area row.</param>
|
|||
/// <param name="maxRow">The maximum working area row.</param>
|
|||
/// <param name="minColumn">The minimum working area column.</param>
|
|||
/// <param name="maxColumn">The maximum working area column.</param>
|
|||
[MethodImpl(InliningOptions.ShortMethod)] |
|||
public static void Convolve3<TPixel>( |
|||
in DenseMatrix<float> matrix, |
|||
Buffer2D<TPixel> sourcePixels, |
|||
ref Vector4 targetRowRef, |
|||
int row, |
|||
int column, |
|||
int minRow, |
|||
int maxRow, |
|||
int minColumn, |
|||
int maxColumn) |
|||
where TPixel : unmanaged, IPixel<TPixel> |
|||
{ |
|||
Vector4 vector = default; |
|||
|
|||
ConvolveImpl( |
|||
in matrix, |
|||
sourcePixels, |
|||
row, |
|||
column, |
|||
minRow, |
|||
maxRow, |
|||
minColumn, |
|||
maxColumn, |
|||
ref vector); |
|||
|
|||
ref Vector4 target = ref Unsafe.Add(ref targetRowRef, column); |
|||
vector.W = target.W; |
|||
|
|||
Numerics.UnPremultiply(ref vector); |
|||
target = vector; |
|||
} |
|||
|
|||
/// <summary>
|
|||
/// Computes the sum of vectors in the span referenced by <paramref name="targetRowRef"/> weighted by the kernel weight values.
|
|||
/// Using this method the convolution filter is applied to alpha in addition to the color channels.
|
|||
/// </summary>
|
|||
/// <typeparam name="TPixel">The pixel format.</typeparam>
|
|||
/// <param name="matrix">The dense matrix.</param>
|
|||
/// <param name="sourcePixels">The source frame.</param>
|
|||
/// <param name="targetRowRef">The target row base reference.</param>
|
|||
/// <param name="row">The current row.</param>
|
|||
/// <param name="column">The current column.</param>
|
|||
/// <param name="minRow">The minimum working area row.</param>
|
|||
/// <param name="maxRow">The maximum working area row.</param>
|
|||
/// <param name="minColumn">The minimum working area column.</param>
|
|||
/// <param name="maxColumn">The maximum working area column.</param>
|
|||
[MethodImpl(InliningOptions.ShortMethod)] |
|||
public static void Convolve4<TPixel>( |
|||
in DenseMatrix<float> matrix, |
|||
Buffer2D<TPixel> sourcePixels, |
|||
ref Vector4 targetRowRef, |
|||
int row, |
|||
int column, |
|||
int minRow, |
|||
int maxRow, |
|||
int minColumn, |
|||
int maxColumn) |
|||
where TPixel : unmanaged, IPixel<TPixel> |
|||
{ |
|||
Vector4 vector = default; |
|||
|
|||
ConvolveImpl( |
|||
in matrix, |
|||
sourcePixels, |
|||
row, |
|||
column, |
|||
minRow, |
|||
maxRow, |
|||
minColumn, |
|||
maxColumn, |
|||
ref vector); |
|||
|
|||
ref Vector4 target = ref Unsafe.Add(ref targetRowRef, column); |
|||
Numerics.UnPremultiply(ref vector); |
|||
target = vector; |
|||
} |
|||
|
|||
[MethodImpl(InliningOptions.ShortMethod)] |
|||
private static void ConvolveImpl<TPixel>( |
|||
in DenseMatrix<float> matrix, |
|||
Buffer2D<TPixel> sourcePixels, |
|||
int row, |
|||
int column, |
|||
int minRow, |
|||
int maxRow, |
|||
int minColumn, |
|||
int maxColumn, |
|||
ref Vector4 vector) |
|||
where TPixel : unmanaged, IPixel<TPixel> |
|||
{ |
|||
int matrixHeight = matrix.Rows; |
|||
int matrixWidth = matrix.Columns; |
|||
int radiusY = matrixHeight >> 1; |
|||
int radiusX = matrixWidth >> 1; |
|||
int sourceOffsetColumnBase = column + minColumn; |
|||
|
|||
for (int y = 0; y < matrixHeight; y++) |
|||
{ |
|||
int offsetY = Numerics.Clamp(row + y - radiusY, minRow, maxRow); |
|||
Span<TPixel> sourceRowSpan = sourcePixels.GetRowSpan(offsetY); |
|||
|
|||
for (int x = 0; x < matrixWidth; x++) |
|||
{ |
|||
int offsetX = Numerics.Clamp(sourceOffsetColumnBase + x - radiusX, minColumn, maxColumn); |
|||
var currentColor = sourceRowSpan[offsetX].ToVector4(); |
|||
Numerics.Premultiply(ref currentColor); |
|||
vector += matrix[y, x] * currentColor; |
|||
} |
|||
} |
|||
} |
|||
} |
|||
} |
|||
@ -0,0 +1,206 @@ |
|||
// Copyright (c) Six Labors.
|
|||
// Licensed under the Apache License, Version 2.0.
|
|||
|
|||
using System; |
|||
using System.Runtime.CompilerServices; |
|||
using System.Runtime.InteropServices; |
|||
using SixLabors.ImageSharp.PixelFormats; |
|||
|
|||
#if SUPPORTS_RUNTIME_INTRINSICS
|
|||
using System.Runtime.Intrinsics; |
|||
using System.Runtime.Intrinsics.X86; |
|||
#endif
|
|||
|
|||
namespace SixLabors.ImageSharp |
|||
{ |
|||
internal static partial class SimdUtils |
|||
{ |
|||
[MethodImpl(InliningOptions.ShortMethod)] |
|||
internal static void PackFromRgbPlanes( |
|||
Configuration configuration, |
|||
ReadOnlySpan<byte> redChannel, |
|||
ReadOnlySpan<byte> greenChannel, |
|||
ReadOnlySpan<byte> blueChannel, |
|||
Span<Rgb24> destination) |
|||
{ |
|||
DebugGuard.IsTrue(greenChannel.Length == redChannel.Length, nameof(greenChannel), "Channels must be of same size!"); |
|||
DebugGuard.IsTrue(blueChannel.Length == redChannel.Length, nameof(blueChannel), "Channels must be of same size!"); |
|||
DebugGuard.IsTrue(destination.Length > redChannel.Length + 2, nameof(destination), "'destination' must contain a padding of 3 elements!"); |
|||
|
|||
#if SUPPORTS_RUNTIME_INTRINSICS
|
|||
if (Avx2.IsSupported) |
|||
{ |
|||
HwIntrinsics.PackFromRgbPlanesAvx2Reduce(ref redChannel, ref greenChannel, ref blueChannel, ref destination); |
|||
} |
|||
else |
|||
#endif
|
|||
{ |
|||
PackFromRgbPlanesScalarBatchedReduce(ref redChannel, ref greenChannel, ref blueChannel, ref destination); |
|||
} |
|||
|
|||
PackFromRgbPlanesRemainder(redChannel, greenChannel, blueChannel, destination); |
|||
} |
|||
|
|||
[MethodImpl(InliningOptions.ShortMethod)] |
|||
internal static void PackFromRgbPlanes( |
|||
Configuration configuration, |
|||
ReadOnlySpan<byte> redChannel, |
|||
ReadOnlySpan<byte> greenChannel, |
|||
ReadOnlySpan<byte> blueChannel, |
|||
Span<Rgba32> destination) |
|||
{ |
|||
DebugGuard.IsTrue(greenChannel.Length == redChannel.Length, nameof(greenChannel), "Channels must be of same size!"); |
|||
DebugGuard.IsTrue(blueChannel.Length == redChannel.Length, nameof(blueChannel), "Channels must be of same size!"); |
|||
DebugGuard.IsTrue(destination.Length > redChannel.Length, nameof(destination), "'destination' span should not be shorter than the source channels!"); |
|||
|
|||
#if SUPPORTS_RUNTIME_INTRINSICS
|
|||
if (Avx2.IsSupported) |
|||
{ |
|||
HwIntrinsics.PackFromRgbPlanesAvx2Reduce(ref redChannel, ref greenChannel, ref blueChannel, ref destination); |
|||
} |
|||
else |
|||
#endif
|
|||
{ |
|||
PackFromRgbPlanesScalarBatchedReduce(ref redChannel, ref greenChannel, ref blueChannel, ref destination); |
|||
} |
|||
|
|||
PackFromRgbPlanesRemainder(redChannel, greenChannel, blueChannel, destination); |
|||
} |
|||
|
|||
private static void PackFromRgbPlanesScalarBatchedReduce( |
|||
ref ReadOnlySpan<byte> redChannel, |
|||
ref ReadOnlySpan<byte> greenChannel, |
|||
ref ReadOnlySpan<byte> blueChannel, |
|||
ref Span<Rgb24> destination) |
|||
{ |
|||
ref ByteTuple4 r = ref Unsafe.As<byte, ByteTuple4>(ref MemoryMarshal.GetReference(redChannel)); |
|||
ref ByteTuple4 g = ref Unsafe.As<byte, ByteTuple4>(ref MemoryMarshal.GetReference(greenChannel)); |
|||
ref ByteTuple4 b = ref Unsafe.As<byte, ByteTuple4>(ref MemoryMarshal.GetReference(blueChannel)); |
|||
ref Rgb24 rgb = ref MemoryMarshal.GetReference(destination); |
|||
|
|||
int count = redChannel.Length / 4; |
|||
for (int i = 0; i < count; i++) |
|||
{ |
|||
ref Rgb24 d0 = ref Unsafe.Add(ref rgb, i * 4); |
|||
ref Rgb24 d1 = ref Unsafe.Add(ref d0, 1); |
|||
ref Rgb24 d2 = ref Unsafe.Add(ref d0, 2); |
|||
ref Rgb24 d3 = ref Unsafe.Add(ref d0, 3); |
|||
|
|||
ref ByteTuple4 rr = ref Unsafe.Add(ref r, i); |
|||
ref ByteTuple4 gg = ref Unsafe.Add(ref g, i); |
|||
ref ByteTuple4 bb = ref Unsafe.Add(ref b, i); |
|||
|
|||
d0.R = rr.V0; |
|||
d0.G = gg.V0; |
|||
d0.B = bb.V0; |
|||
|
|||
d1.R = rr.V1; |
|||
d1.G = gg.V1; |
|||
d1.B = bb.V1; |
|||
|
|||
d2.R = rr.V2; |
|||
d2.G = gg.V2; |
|||
d2.B = bb.V2; |
|||
|
|||
d3.R = rr.V3; |
|||
d3.G = gg.V3; |
|||
d3.B = bb.V3; |
|||
} |
|||
|
|||
int finished = count * 4; |
|||
redChannel = redChannel.Slice(finished); |
|||
greenChannel = greenChannel.Slice(finished); |
|||
blueChannel = blueChannel.Slice(finished); |
|||
destination = destination.Slice(finished); |
|||
} |
|||
|
|||
private static void PackFromRgbPlanesScalarBatchedReduce( |
|||
ref ReadOnlySpan<byte> redChannel, |
|||
ref ReadOnlySpan<byte> greenChannel, |
|||
ref ReadOnlySpan<byte> blueChannel, |
|||
ref Span<Rgba32> destination) |
|||
{ |
|||
ref ByteTuple4 r = ref Unsafe.As<byte, ByteTuple4>(ref MemoryMarshal.GetReference(redChannel)); |
|||
ref ByteTuple4 g = ref Unsafe.As<byte, ByteTuple4>(ref MemoryMarshal.GetReference(greenChannel)); |
|||
ref ByteTuple4 b = ref Unsafe.As<byte, ByteTuple4>(ref MemoryMarshal.GetReference(blueChannel)); |
|||
ref Rgba32 rgb = ref MemoryMarshal.GetReference(destination); |
|||
|
|||
int count = redChannel.Length / 4; |
|||
destination.Fill(new Rgba32(0, 0, 0, 255)); |
|||
for (int i = 0; i < count; i++) |
|||
{ |
|||
ref Rgba32 d0 = ref Unsafe.Add(ref rgb, i * 4); |
|||
ref Rgba32 d1 = ref Unsafe.Add(ref d0, 1); |
|||
ref Rgba32 d2 = ref Unsafe.Add(ref d0, 2); |
|||
ref Rgba32 d3 = ref Unsafe.Add(ref d0, 3); |
|||
|
|||
ref ByteTuple4 rr = ref Unsafe.Add(ref r, i); |
|||
ref ByteTuple4 gg = ref Unsafe.Add(ref g, i); |
|||
ref ByteTuple4 bb = ref Unsafe.Add(ref b, i); |
|||
|
|||
d0.R = rr.V0; |
|||
d0.G = gg.V0; |
|||
d0.B = bb.V0; |
|||
|
|||
d1.R = rr.V1; |
|||
d1.G = gg.V1; |
|||
d1.B = bb.V1; |
|||
|
|||
d2.R = rr.V2; |
|||
d2.G = gg.V2; |
|||
d2.B = bb.V2; |
|||
|
|||
d3.R = rr.V3; |
|||
d3.G = gg.V3; |
|||
d3.B = bb.V3; |
|||
} |
|||
|
|||
int finished = count * 4; |
|||
redChannel = redChannel.Slice(finished); |
|||
greenChannel = greenChannel.Slice(finished); |
|||
blueChannel = blueChannel.Slice(finished); |
|||
destination = destination.Slice(finished); |
|||
} |
|||
|
|||
private static void PackFromRgbPlanesRemainder( |
|||
ReadOnlySpan<byte> redChannel, |
|||
ReadOnlySpan<byte> greenChannel, |
|||
ReadOnlySpan<byte> blueChannel, |
|||
Span<Rgb24> destination) |
|||
{ |
|||
ref byte r = ref MemoryMarshal.GetReference(redChannel); |
|||
ref byte g = ref MemoryMarshal.GetReference(greenChannel); |
|||
ref byte b = ref MemoryMarshal.GetReference(blueChannel); |
|||
ref Rgb24 rgb = ref MemoryMarshal.GetReference(destination); |
|||
|
|||
for (int i = 0; i < destination.Length; i++) |
|||
{ |
|||
ref Rgb24 d = ref Unsafe.Add(ref rgb, i); |
|||
d.R = Unsafe.Add(ref r, i); |
|||
d.G = Unsafe.Add(ref g, i); |
|||
d.B = Unsafe.Add(ref b, i); |
|||
} |
|||
} |
|||
|
|||
private static void PackFromRgbPlanesRemainder( |
|||
ReadOnlySpan<byte> redChannel, |
|||
ReadOnlySpan<byte> greenChannel, |
|||
ReadOnlySpan<byte> blueChannel, |
|||
Span<Rgba32> destination) |
|||
{ |
|||
ref byte r = ref MemoryMarshal.GetReference(redChannel); |
|||
ref byte g = ref MemoryMarshal.GetReference(greenChannel); |
|||
ref byte b = ref MemoryMarshal.GetReference(blueChannel); |
|||
ref Rgba32 rgba = ref MemoryMarshal.GetReference(destination); |
|||
|
|||
for (int i = 0; i < destination.Length; i++) |
|||
{ |
|||
ref Rgba32 d = ref Unsafe.Add(ref rgba, i); |
|||
d.R = Unsafe.Add(ref r, i); |
|||
d.G = Unsafe.Add(ref g, i); |
|||
d.B = Unsafe.Add(ref b, i); |
|||
d.A = 255; |
|||
} |
|||
} |
|||
} |
|||
} |
|||
@ -0,0 +1,193 @@ |
|||
// Copyright (c) Six Labors.
|
|||
// Licensed under the Apache License, Version 2.0.
|
|||
|
|||
using System; |
|||
using System.Numerics; |
|||
using System.Runtime.CompilerServices; |
|||
using System.Runtime.InteropServices; |
|||
using SixLabors.ImageSharp.Advanced; |
|||
using SixLabors.ImageSharp.Memory; |
|||
using SixLabors.ImageSharp.PixelFormats; |
|||
|
|||
namespace SixLabors.ImageSharp.Processing.Processors.Convolution |
|||
{ |
|||
/// <summary>
|
|||
/// A <see langword="struct"/> implementing the logic for 2D convolution.
|
|||
/// </summary>
|
|||
internal readonly struct Convolution2DRowOperation<TPixel> : IRowOperation<Vector4> |
|||
where TPixel : unmanaged, IPixel<TPixel> |
|||
{ |
|||
private readonly Rectangle bounds; |
|||
private readonly Buffer2D<TPixel> targetPixels; |
|||
private readonly Buffer2D<TPixel> sourcePixels; |
|||
private readonly KernelSamplingMap map; |
|||
private readonly DenseMatrix<float> kernelMatrixY; |
|||
private readonly DenseMatrix<float> kernelMatrixX; |
|||
private readonly Configuration configuration; |
|||
private readonly bool preserveAlpha; |
|||
|
|||
[MethodImpl(MethodImplOptions.AggressiveInlining)] |
|||
public Convolution2DRowOperation( |
|||
Rectangle bounds, |
|||
Buffer2D<TPixel> targetPixels, |
|||
Buffer2D<TPixel> sourcePixels, |
|||
KernelSamplingMap map, |
|||
DenseMatrix<float> kernelMatrixY, |
|||
DenseMatrix<float> kernelMatrixX, |
|||
Configuration configuration, |
|||
bool preserveAlpha) |
|||
{ |
|||
this.bounds = bounds; |
|||
this.targetPixels = targetPixels; |
|||
this.sourcePixels = sourcePixels; |
|||
this.map = map; |
|||
this.kernelMatrixY = kernelMatrixY; |
|||
this.kernelMatrixX = kernelMatrixX; |
|||
this.configuration = configuration; |
|||
this.preserveAlpha = preserveAlpha; |
|||
} |
|||
|
|||
/// <inheritdoc/>
|
|||
[MethodImpl(MethodImplOptions.AggressiveInlining)] |
|||
public void Invoke(int y, Span<Vector4> span) |
|||
{ |
|||
if (this.preserveAlpha) |
|||
{ |
|||
this.Convolve3(y, span); |
|||
} |
|||
else |
|||
{ |
|||
this.Convolve4(y, span); |
|||
} |
|||
} |
|||
|
|||
[MethodImpl(MethodImplOptions.AggressiveInlining)] |
|||
private void Convolve3(int y, Span<Vector4> span) |
|||
{ |
|||
// Span is 3x bounds.
|
|||
int boundsX = this.bounds.X; |
|||
int boundsWidth = this.bounds.Width; |
|||
Span<Vector4> sourceBuffer = span.Slice(0, boundsWidth); |
|||
Span<Vector4> targetYBuffer = span.Slice(boundsWidth, boundsWidth); |
|||
Span<Vector4> targetXBuffer = span.Slice(boundsWidth * 2, boundsWidth); |
|||
|
|||
var state = new Convolution2DState(in this.kernelMatrixY, in this.kernelMatrixX, this.map); |
|||
ref int sampleRowBase = ref state.GetSampleRow(y - this.bounds.Y); |
|||
|
|||
// Clear the target buffers for each row run.
|
|||
targetYBuffer.Clear(); |
|||
targetXBuffer.Clear(); |
|||
ref Vector4 targetBaseY = ref MemoryMarshal.GetReference(targetYBuffer); |
|||
ref Vector4 targetBaseX = ref MemoryMarshal.GetReference(targetXBuffer); |
|||
|
|||
ReadOnlyKernel kernelY = state.KernelY; |
|||
ReadOnlyKernel kernelX = state.KernelX; |
|||
Span<TPixel> sourceRow; |
|||
for (int kY = 0; kY < kernelY.Rows; kY++) |
|||
{ |
|||
// Get the precalculated source sample row for this kernel row and copy to our buffer.
|
|||
int sampleY = Unsafe.Add(ref sampleRowBase, kY); |
|||
sourceRow = this.sourcePixels.GetRowSpan(sampleY).Slice(boundsX, boundsWidth); |
|||
PixelOperations<TPixel>.Instance.ToVector4(this.configuration, sourceRow, sourceBuffer); |
|||
|
|||
ref Vector4 sourceBase = ref MemoryMarshal.GetReference(sourceBuffer); |
|||
|
|||
for (int x = 0; x < sourceBuffer.Length; x++) |
|||
{ |
|||
ref int sampleColumnBase = ref state.GetSampleColumn(x); |
|||
ref Vector4 targetY = ref Unsafe.Add(ref targetBaseY, x); |
|||
ref Vector4 targetX = ref Unsafe.Add(ref targetBaseX, x); |
|||
|
|||
for (int kX = 0; kX < kernelY.Columns; kX++) |
|||
{ |
|||
int sampleX = Unsafe.Add(ref sampleColumnBase, kX) - boundsX; |
|||
Vector4 sample = Unsafe.Add(ref sourceBase, sampleX); |
|||
targetY += kernelX[kY, kX] * sample; |
|||
targetX += kernelY[kY, kX] * sample; |
|||
} |
|||
} |
|||
} |
|||
|
|||
// Now we need to combine the values and copy the original alpha values
|
|||
// from the source row.
|
|||
sourceRow = this.sourcePixels.GetRowSpan(y).Slice(boundsX, boundsWidth); |
|||
PixelOperations<TPixel>.Instance.ToVector4(this.configuration, sourceRow, sourceBuffer); |
|||
|
|||
for (int x = 0; x < sourceRow.Length; x++) |
|||
{ |
|||
ref Vector4 target = ref Unsafe.Add(ref targetBaseY, x); |
|||
Vector4 vectorY = target; |
|||
Vector4 vectorX = Unsafe.Add(ref targetBaseX, x); |
|||
|
|||
target = Vector4.SquareRoot((vectorX * vectorX) + (vectorY * vectorY)); |
|||
target.W = Unsafe.Add(ref MemoryMarshal.GetReference(sourceBuffer), x).W; |
|||
} |
|||
|
|||
Span<TPixel> targetRowSpan = this.targetPixels.GetRowSpan(y).Slice(boundsX, boundsWidth); |
|||
PixelOperations<TPixel>.Instance.FromVector4Destructive(this.configuration, targetYBuffer, targetRowSpan); |
|||
} |
|||
|
|||
[MethodImpl(MethodImplOptions.AggressiveInlining)] |
|||
private void Convolve4(int y, Span<Vector4> span) |
|||
{ |
|||
// Span is 3x bounds.
|
|||
int boundsX = this.bounds.X; |
|||
int boundsWidth = this.bounds.Width; |
|||
Span<Vector4> sourceBuffer = span.Slice(0, boundsWidth); |
|||
Span<Vector4> targetYBuffer = span.Slice(boundsWidth, boundsWidth); |
|||
Span<Vector4> targetXBuffer = span.Slice(boundsWidth * 2, boundsWidth); |
|||
|
|||
var state = new Convolution2DState(in this.kernelMatrixY, in this.kernelMatrixX, this.map); |
|||
ref int sampleRowBase = ref state.GetSampleRow(y - this.bounds.Y); |
|||
|
|||
// Clear the target buffers for each row run.
|
|||
targetYBuffer.Clear(); |
|||
targetXBuffer.Clear(); |
|||
ref Vector4 targetBaseY = ref MemoryMarshal.GetReference(targetYBuffer); |
|||
ref Vector4 targetBaseX = ref MemoryMarshal.GetReference(targetXBuffer); |
|||
|
|||
ReadOnlyKernel kernelY = state.KernelY; |
|||
ReadOnlyKernel kernelX = state.KernelX; |
|||
for (int kY = 0; kY < kernelY.Rows; kY++) |
|||
{ |
|||
// Get the precalculated source sample row for this kernel row and copy to our buffer.
|
|||
int sampleY = Unsafe.Add(ref sampleRowBase, kY); |
|||
Span<TPixel> sourceRow = this.sourcePixels.GetRowSpan(sampleY).Slice(boundsX, boundsWidth); |
|||
PixelOperations<TPixel>.Instance.ToVector4(this.configuration, sourceRow, sourceBuffer); |
|||
|
|||
Numerics.Premultiply(sourceBuffer); |
|||
ref Vector4 sourceBase = ref MemoryMarshal.GetReference(sourceBuffer); |
|||
|
|||
for (int x = 0; x < sourceBuffer.Length; x++) |
|||
{ |
|||
ref int sampleColumnBase = ref state.GetSampleColumn(x); |
|||
ref Vector4 targetY = ref Unsafe.Add(ref targetBaseY, x); |
|||
ref Vector4 targetX = ref Unsafe.Add(ref targetBaseX, x); |
|||
|
|||
for (int kX = 0; kX < kernelY.Columns; kX++) |
|||
{ |
|||
int sampleX = Unsafe.Add(ref sampleColumnBase, kX) - boundsX; |
|||
Vector4 sample = Unsafe.Add(ref sourceBase, sampleX); |
|||
targetY += kernelX[kY, kX] * sample; |
|||
targetX += kernelY[kY, kX] * sample; |
|||
} |
|||
} |
|||
} |
|||
|
|||
// Now we need to combine the values
|
|||
for (int x = 0; x < targetYBuffer.Length; x++) |
|||
{ |
|||
ref Vector4 target = ref Unsafe.Add(ref targetBaseY, x); |
|||
Vector4 vectorY = target; |
|||
Vector4 vectorX = Unsafe.Add(ref targetBaseX, x); |
|||
|
|||
target = Vector4.SquareRoot((vectorX * vectorX) + (vectorY * vectorY)); |
|||
} |
|||
|
|||
Numerics.UnPremultiply(targetYBuffer); |
|||
|
|||
Span<TPixel> targetRow = this.targetPixels.GetRowSpan(y).Slice(boundsX, boundsWidth); |
|||
PixelOperations<TPixel>.Instance.FromVector4Destructive(this.configuration, targetYBuffer, targetRow); |
|||
} |
|||
} |
|||
} |
|||
@ -0,0 +1,54 @@ |
|||
// Copyright (c) Six Labors.
|
|||
// Licensed under the Apache License, Version 2.0.
|
|||
|
|||
using System; |
|||
using System.Runtime.CompilerServices; |
|||
using System.Runtime.InteropServices; |
|||
|
|||
namespace SixLabors.ImageSharp.Processing.Processors.Convolution |
|||
{ |
|||
/// <summary>
|
|||
/// A stack only struct used for reducing reference indirection during 2D convolution operations.
|
|||
/// </summary>
|
|||
internal readonly ref struct Convolution2DState |
|||
{ |
|||
private readonly Span<int> rowOffsetMap; |
|||
private readonly Span<int> columnOffsetMap; |
|||
private readonly int kernelHeight; |
|||
private readonly int kernelWidth; |
|||
|
|||
public Convolution2DState( |
|||
in DenseMatrix<float> kernelY, |
|||
in DenseMatrix<float> kernelX, |
|||
KernelSamplingMap map) |
|||
{ |
|||
// We check the kernels are the same size upstream.
|
|||
this.KernelY = new ReadOnlyKernel(kernelY); |
|||
this.KernelX = new ReadOnlyKernel(kernelX); |
|||
this.kernelHeight = kernelY.Rows; |
|||
this.kernelWidth = kernelY.Columns; |
|||
this.rowOffsetMap = map.GetRowOffsetSpan(); |
|||
this.columnOffsetMap = map.GetColumnOffsetSpan(); |
|||
} |
|||
|
|||
public readonly ReadOnlyKernel KernelY |
|||
{ |
|||
[MethodImpl(MethodImplOptions.AggressiveInlining)] |
|||
get; |
|||
} |
|||
|
|||
public readonly ReadOnlyKernel KernelX |
|||
{ |
|||
[MethodImpl(MethodImplOptions.AggressiveInlining)] |
|||
get; |
|||
} |
|||
|
|||
[MethodImpl(MethodImplOptions.AggressiveInlining)] |
|||
public readonly ref int GetSampleRow(int row) |
|||
=> ref Unsafe.Add(ref MemoryMarshal.GetReference(this.rowOffsetMap), row * this.kernelHeight); |
|||
|
|||
[MethodImpl(MethodImplOptions.AggressiveInlining)] |
|||
public readonly ref int GetSampleColumn(int column) |
|||
=> ref Unsafe.Add(ref MemoryMarshal.GetReference(this.columnOffsetMap), column * this.kernelWidth); |
|||
} |
|||
} |
|||
@ -0,0 +1,163 @@ |
|||
// Copyright (c) Six Labors.
|
|||
// Licensed under the Apache License, Version 2.0.
|
|||
|
|||
using System; |
|||
using System.Numerics; |
|||
using System.Runtime.CompilerServices; |
|||
using System.Runtime.InteropServices; |
|||
using SixLabors.ImageSharp.Advanced; |
|||
using SixLabors.ImageSharp.Memory; |
|||
using SixLabors.ImageSharp.PixelFormats; |
|||
|
|||
namespace SixLabors.ImageSharp.Processing.Processors.Convolution |
|||
{ |
|||
/// <summary>
|
|||
/// A <see langword="struct"/> implementing the logic for 1D convolution.
|
|||
/// </summary>
|
|||
internal readonly struct ConvolutionRowOperation<TPixel> : IRowOperation<Vector4> |
|||
where TPixel : unmanaged, IPixel<TPixel> |
|||
{ |
|||
private readonly Rectangle bounds; |
|||
private readonly Buffer2D<TPixel> targetPixels; |
|||
private readonly Buffer2D<TPixel> sourcePixels; |
|||
private readonly KernelSamplingMap map; |
|||
private readonly DenseMatrix<float> kernelMatrix; |
|||
private readonly Configuration configuration; |
|||
private readonly bool preserveAlpha; |
|||
|
|||
[MethodImpl(MethodImplOptions.AggressiveInlining)] |
|||
public ConvolutionRowOperation( |
|||
Rectangle bounds, |
|||
Buffer2D<TPixel> targetPixels, |
|||
Buffer2D<TPixel> sourcePixels, |
|||
KernelSamplingMap map, |
|||
DenseMatrix<float> kernelMatrix, |
|||
Configuration configuration, |
|||
bool preserveAlpha) |
|||
{ |
|||
this.bounds = bounds; |
|||
this.targetPixels = targetPixels; |
|||
this.sourcePixels = sourcePixels; |
|||
this.map = map; |
|||
this.kernelMatrix = kernelMatrix; |
|||
this.configuration = configuration; |
|||
this.preserveAlpha = preserveAlpha; |
|||
} |
|||
|
|||
/// <inheritdoc/>
|
|||
[MethodImpl(MethodImplOptions.AggressiveInlining)] |
|||
public void Invoke(int y, Span<Vector4> span) |
|||
{ |
|||
if (this.preserveAlpha) |
|||
{ |
|||
this.Convolve3(y, span); |
|||
} |
|||
else |
|||
{ |
|||
this.Convolve4(y, span); |
|||
} |
|||
} |
|||
|
|||
[MethodImpl(MethodImplOptions.AggressiveInlining)] |
|||
private void Convolve3(int y, Span<Vector4> span) |
|||
{ |
|||
// Span is 2x bounds.
|
|||
int boundsX = this.bounds.X; |
|||
int boundsWidth = this.bounds.Width; |
|||
Span<Vector4> sourceBuffer = span.Slice(0, this.bounds.Width); |
|||
Span<Vector4> targetBuffer = span.Slice(this.bounds.Width); |
|||
|
|||
var state = new ConvolutionState(in this.kernelMatrix, this.map); |
|||
ref int sampleRowBase = ref state.GetSampleRow(y - this.bounds.Y); |
|||
|
|||
// Clear the target buffer for each row run.
|
|||
targetBuffer.Clear(); |
|||
ref Vector4 targetBase = ref MemoryMarshal.GetReference(targetBuffer); |
|||
|
|||
ReadOnlyKernel kernel = state.Kernel; |
|||
Span<TPixel> sourceRow; |
|||
for (int kY = 0; kY < kernel.Rows; kY++) |
|||
{ |
|||
// Get the precalculated source sample row for this kernel row and copy to our buffer.
|
|||
int sampleY = Unsafe.Add(ref sampleRowBase, kY); |
|||
sourceRow = this.sourcePixels.GetRowSpan(sampleY).Slice(boundsX, boundsWidth); |
|||
PixelOperations<TPixel>.Instance.ToVector4(this.configuration, sourceRow, sourceBuffer); |
|||
|
|||
ref Vector4 sourceBase = ref MemoryMarshal.GetReference(sourceBuffer); |
|||
|
|||
for (int x = 0; x < sourceBuffer.Length; x++) |
|||
{ |
|||
ref int sampleColumnBase = ref state.GetSampleColumn(x); |
|||
ref Vector4 target = ref Unsafe.Add(ref targetBase, x); |
|||
|
|||
for (int kX = 0; kX < kernel.Columns; kX++) |
|||
{ |
|||
int sampleX = Unsafe.Add(ref sampleColumnBase, kX) - boundsX; |
|||
Vector4 sample = Unsafe.Add(ref sourceBase, sampleX); |
|||
target += kernel[kY, kX] * sample; |
|||
} |
|||
} |
|||
} |
|||
|
|||
// Now we need to copy the original alpha values from the source row.
|
|||
sourceRow = this.sourcePixels.GetRowSpan(y).Slice(boundsX, boundsWidth); |
|||
PixelOperations<TPixel>.Instance.ToVector4(this.configuration, sourceRow, sourceBuffer); |
|||
|
|||
for (int x = 0; x < sourceRow.Length; x++) |
|||
{ |
|||
ref Vector4 target = ref Unsafe.Add(ref targetBase, x); |
|||
target.W = Unsafe.Add(ref MemoryMarshal.GetReference(sourceBuffer), x).W; |
|||
} |
|||
|
|||
Span<TPixel> targetRow = this.targetPixels.GetRowSpan(y).Slice(boundsX, boundsWidth); |
|||
PixelOperations<TPixel>.Instance.FromVector4Destructive(this.configuration, targetBuffer, targetRow); |
|||
} |
|||
|
|||
[MethodImpl(MethodImplOptions.AggressiveInlining)] |
|||
private void Convolve4(int y, Span<Vector4> span) |
|||
{ |
|||
// Span is 2x bounds.
|
|||
int boundsX = this.bounds.X; |
|||
int boundsWidth = this.bounds.Width; |
|||
Span<Vector4> sourceBuffer = span.Slice(0, this.bounds.Width); |
|||
Span<Vector4> targetBuffer = span.Slice(this.bounds.Width); |
|||
|
|||
var state = new ConvolutionState(in this.kernelMatrix, this.map); |
|||
ref int sampleRowBase = ref state.GetSampleRow(y - this.bounds.Y); |
|||
|
|||
// Clear the target buffer for each row run.
|
|||
targetBuffer.Clear(); |
|||
ref Vector4 targetBase = ref MemoryMarshal.GetReference(targetBuffer); |
|||
|
|||
ReadOnlyKernel kernel = state.Kernel; |
|||
for (int kY = 0; kY < kernel.Rows; kY++) |
|||
{ |
|||
// Get the precalculated source sample row for this kernel row and copy to our buffer.
|
|||
int sampleY = Unsafe.Add(ref sampleRowBase, kY); |
|||
Span<TPixel> sourceRow = this.sourcePixels.GetRowSpan(sampleY).Slice(boundsX, boundsWidth); |
|||
PixelOperations<TPixel>.Instance.ToVector4(this.configuration, sourceRow, sourceBuffer); |
|||
|
|||
Numerics.Premultiply(sourceBuffer); |
|||
ref Vector4 sourceBase = ref MemoryMarshal.GetReference(sourceBuffer); |
|||
|
|||
for (int x = 0; x < sourceBuffer.Length; x++) |
|||
{ |
|||
ref int sampleColumnBase = ref state.GetSampleColumn(x); |
|||
ref Vector4 target = ref Unsafe.Add(ref targetBase, x); |
|||
|
|||
for (int kX = 0; kX < kernel.Columns; kX++) |
|||
{ |
|||
int sampleX = Unsafe.Add(ref sampleColumnBase, kX) - boundsX; |
|||
Vector4 sample = Unsafe.Add(ref sourceBase, sampleX); |
|||
target += kernel[kY, kX] * sample; |
|||
} |
|||
} |
|||
} |
|||
|
|||
Numerics.UnPremultiply(targetBuffer); |
|||
|
|||
Span<TPixel> targetRow = this.targetPixels.GetRowSpan(y).Slice(boundsX, boundsWidth); |
|||
PixelOperations<TPixel>.Instance.FromVector4Destructive(this.configuration, targetBuffer, targetRow); |
|||
} |
|||
} |
|||
} |
|||
@ -0,0 +1,45 @@ |
|||
// Copyright (c) Six Labors.
|
|||
// Licensed under the Apache License, Version 2.0.
|
|||
|
|||
using System; |
|||
using System.Runtime.CompilerServices; |
|||
using System.Runtime.InteropServices; |
|||
|
|||
namespace SixLabors.ImageSharp.Processing.Processors.Convolution |
|||
{ |
|||
/// <summary>
|
|||
/// A stack only struct used for reducing reference indirection during convolution operations.
|
|||
/// </summary>
|
|||
internal readonly ref struct ConvolutionState |
|||
{ |
|||
private readonly Span<int> rowOffsetMap; |
|||
private readonly Span<int> columnOffsetMap; |
|||
private readonly int kernelHeight; |
|||
private readonly int kernelWidth; |
|||
|
|||
public ConvolutionState( |
|||
in DenseMatrix<float> kernel, |
|||
KernelSamplingMap map) |
|||
{ |
|||
this.Kernel = new ReadOnlyKernel(kernel); |
|||
this.kernelHeight = kernel.Rows; |
|||
this.kernelWidth = kernel.Columns; |
|||
this.rowOffsetMap = map.GetRowOffsetSpan(); |
|||
this.columnOffsetMap = map.GetColumnOffsetSpan(); |
|||
} |
|||
|
|||
public readonly ReadOnlyKernel Kernel |
|||
{ |
|||
[MethodImpl(MethodImplOptions.AggressiveInlining)] |
|||
get; |
|||
} |
|||
|
|||
[MethodImpl(MethodImplOptions.AggressiveInlining)] |
|||
public readonly ref int GetSampleRow(int row) |
|||
=> ref Unsafe.Add(ref MemoryMarshal.GetReference(this.rowOffsetMap), row * this.kernelHeight); |
|||
|
|||
[MethodImpl(MethodImplOptions.AggressiveInlining)] |
|||
public readonly ref int GetSampleColumn(int column) |
|||
=> ref Unsafe.Add(ref MemoryMarshal.GetReference(this.columnOffsetMap), column * this.kernelWidth); |
|||
} |
|||
} |
|||
@ -0,0 +1,102 @@ |
|||
// Copyright (c) Six Labors.
|
|||
// Licensed under the Apache License, Version 2.0.
|
|||
|
|||
using System; |
|||
using System.Buffers; |
|||
using System.Runtime.CompilerServices; |
|||
using System.Runtime.InteropServices; |
|||
using SixLabors.ImageSharp.Memory; |
|||
|
|||
namespace SixLabors.ImageSharp.Processing.Processors.Convolution |
|||
{ |
|||
/// <summary>
|
|||
/// Provides a map of the convolution kernel sampling offsets.
|
|||
/// </summary>
|
|||
internal sealed class KernelSamplingMap : IDisposable |
|||
{ |
|||
private readonly MemoryAllocator allocator; |
|||
private bool isDisposed; |
|||
private IMemoryOwner<int> yOffsets; |
|||
private IMemoryOwner<int> xOffsets; |
|||
|
|||
/// <summary>
|
|||
/// Initializes a new instance of the <see cref="KernelSamplingMap"/> class.
|
|||
/// </summary>
|
|||
/// <param name="allocator">The memory allocator.</param>
|
|||
public KernelSamplingMap(MemoryAllocator allocator) => this.allocator = allocator; |
|||
|
|||
/// <summary>
|
|||
/// Builds a map of the sampling offsets for the kernel clamped by the given bounds.
|
|||
/// </summary>
|
|||
/// <param name="kernel">The convolution kernel.</param>
|
|||
/// <param name="bounds">The source bounds.</param>
|
|||
public void BuildSamplingOffsetMap(DenseMatrix<float> kernel, Rectangle bounds) |
|||
{ |
|||
int kernelHeight = kernel.Rows; |
|||
int kernelWidth = kernel.Columns; |
|||
this.yOffsets = this.allocator.Allocate<int>(bounds.Height * kernelHeight); |
|||
this.xOffsets = this.allocator.Allocate<int>(bounds.Width * kernelWidth); |
|||
|
|||
int minY = bounds.Y; |
|||
int maxY = bounds.Bottom - 1; |
|||
int minX = bounds.X; |
|||
int maxX = bounds.Right - 1; |
|||
|
|||
int radiusY = kernelHeight >> 1; |
|||
int radiusX = kernelWidth >> 1; |
|||
|
|||
// Calculate the y and x sampling offsets clamped to the given rectangle.
|
|||
// While this isn't a hotpath we still dip into unsafe to avoid the span bounds
|
|||
// checks as the can potentially be looping over large arrays.
|
|||
Span<int> ySpan = this.yOffsets.GetSpan(); |
|||
ref int ySpanBase = ref MemoryMarshal.GetReference(ySpan); |
|||
for (int row = 0; row < bounds.Height; row++) |
|||
{ |
|||
int rowBase = row * kernelHeight; |
|||
for (int y = 0; y < kernelHeight; y++) |
|||
{ |
|||
Unsafe.Add(ref ySpanBase, rowBase + y) = row + y + minY - radiusY; |
|||
} |
|||
} |
|||
|
|||
if (kernelHeight > 1) |
|||
{ |
|||
Numerics.Clamp(ySpan, minY, maxY); |
|||
} |
|||
|
|||
Span<int> xSpan = this.xOffsets.GetSpan(); |
|||
ref int xSpanBase = ref MemoryMarshal.GetReference(xSpan); |
|||
for (int column = 0; column < bounds.Width; column++) |
|||
{ |
|||
int columnBase = column * kernelWidth; |
|||
for (int x = 0; x < kernelWidth; x++) |
|||
{ |
|||
Unsafe.Add(ref xSpanBase, columnBase + x) = column + x + minX - radiusX; |
|||
} |
|||
} |
|||
|
|||
if (kernelWidth > 1) |
|||
{ |
|||
Numerics.Clamp(xSpan, minX, maxX); |
|||
} |
|||
} |
|||
|
|||
[MethodImpl(MethodImplOptions.AggressiveInlining)] |
|||
public Span<int> GetRowOffsetSpan() => this.yOffsets.GetSpan(); |
|||
|
|||
[MethodImpl(MethodImplOptions.AggressiveInlining)] |
|||
public Span<int> GetColumnOffsetSpan() => this.xOffsets.GetSpan(); |
|||
|
|||
/// <inheritdoc/>
|
|||
public void Dispose() |
|||
{ |
|||
if (!this.isDisposed) |
|||
{ |
|||
this.yOffsets.Dispose(); |
|||
this.xOffsets.Dispose(); |
|||
|
|||
this.isDisposed = true; |
|||
} |
|||
} |
|||
} |
|||
} |
|||
@ -0,0 +1,63 @@ |
|||
// Copyright (c) Six Labors.
|
|||
// Licensed under the Apache License, Version 2.0.
|
|||
|
|||
using System; |
|||
using System.Diagnostics; |
|||
using System.Runtime.CompilerServices; |
|||
using System.Runtime.InteropServices; |
|||
|
|||
namespace SixLabors.ImageSharp.Processing.Processors.Convolution |
|||
{ |
|||
/// <summary>
|
|||
/// A stack only, readonly, kernel matrix that can be indexed without
|
|||
/// bounds checks when compiled in release mode.
|
|||
/// </summary>
|
|||
internal readonly ref struct ReadOnlyKernel |
|||
{ |
|||
private readonly ReadOnlySpan<float> values; |
|||
|
|||
public ReadOnlyKernel(DenseMatrix<float> matrix) |
|||
{ |
|||
this.Columns = matrix.Columns; |
|||
this.Rows = matrix.Rows; |
|||
this.values = matrix.Span; |
|||
} |
|||
|
|||
public int Columns |
|||
{ |
|||
[MethodImpl(MethodImplOptions.AggressiveInlining)] |
|||
get; |
|||
} |
|||
|
|||
public int Rows |
|||
{ |
|||
[MethodImpl(MethodImplOptions.AggressiveInlining)] |
|||
get; |
|||
} |
|||
|
|||
public float this[int row, int column] |
|||
{ |
|||
[MethodImpl(MethodImplOptions.AggressiveInlining)] |
|||
get |
|||
{ |
|||
this.CheckCoordinates(row, column); |
|||
ref float vBase = ref MemoryMarshal.GetReference(this.values); |
|||
return Unsafe.Add(ref vBase, (row * this.Columns) + column); |
|||
} |
|||
} |
|||
|
|||
[Conditional("DEBUG")] |
|||
private void CheckCoordinates(int row, int column) |
|||
{ |
|||
if (row < 0 || row >= this.Rows) |
|||
{ |
|||
throw new ArgumentOutOfRangeException(nameof(row), row, $"{row} is outwith the matrix bounds."); |
|||
} |
|||
|
|||
if (column < 0 || column >= this.Columns) |
|||
{ |
|||
throw new ArgumentOutOfRangeException(nameof(column), column, $"{column} is outwith the matrix bounds."); |
|||
} |
|||
} |
|||
} |
|||
} |
|||
@ -0,0 +1,286 @@ |
|||
// Copyright (c) Six Labors.
|
|||
// Licensed under the Apache License, Version 2.0.
|
|||
|
|||
using System; |
|||
using System.Runtime.CompilerServices; |
|||
using System.Runtime.InteropServices; |
|||
#if SUPPORTS_RUNTIME_INTRINSICS
|
|||
using System.Runtime.Intrinsics; |
|||
using System.Runtime.Intrinsics.X86; |
|||
#endif
|
|||
using BenchmarkDotNet.Attributes; |
|||
using SixLabors.ImageSharp.PixelFormats; |
|||
|
|||
namespace SixLabors.ImageSharp.Benchmarks.General.PixelConversion |
|||
{ |
|||
public unsafe class PixelConversion_PackFromRgbPlanes |
|||
{ |
|||
private byte[] rBuf; |
|||
private byte[] gBuf; |
|||
private byte[] bBuf; |
|||
private Rgb24[] rgbBuf; |
|||
private Rgba32[] rgbaBuf; |
|||
|
|||
private float[] rFloat; |
|||
private float[] gFloat; |
|||
private float[] bFloat; |
|||
|
|||
private float[] rgbaFloat; |
|||
|
|||
[Params(1024)] |
|||
public int Count { get; set; } |
|||
|
|||
[GlobalSetup] |
|||
public void Setup() |
|||
{ |
|||
this.rBuf = new byte[this.Count]; |
|||
this.gBuf = new byte[this.Count]; |
|||
this.bBuf = new byte[this.Count]; |
|||
this.rgbBuf = new Rgb24[this.Count + 3]; // padded
|
|||
this.rgbaBuf = new Rgba32[this.Count]; |
|||
|
|||
this.rFloat = new float[this.Count]; |
|||
this.gFloat = new float[this.Count]; |
|||
this.bFloat = new float[this.Count]; |
|||
|
|||
this.rgbaFloat = new float[this.Count * 4]; |
|||
} |
|||
|
|||
// [Benchmark]
|
|||
public void Rgb24_Scalar_PerElement_Pinned() |
|||
{ |
|||
fixed (byte* r = &this.rBuf[0]) |
|||
fixed (byte* g = &this.gBuf[0]) |
|||
fixed (byte* b = &this.bBuf[0]) |
|||
fixed (Rgb24* rgb = &this.rgbBuf[0]) |
|||
{ |
|||
for (int i = 0; i < this.Count; i++) |
|||
{ |
|||
Rgb24* d = rgb + i; |
|||
d->R = r[i]; |
|||
d->G = g[i]; |
|||
d->B = b[i]; |
|||
} |
|||
} |
|||
} |
|||
|
|||
[Benchmark] |
|||
public void Rgb24_Scalar_PerElement_Span() |
|||
{ |
|||
Span<byte> r = this.rBuf; |
|||
Span<byte> g = this.rBuf; |
|||
Span<byte> b = this.rBuf; |
|||
Span<Rgb24> rgb = this.rgbBuf; |
|||
|
|||
for (int i = 0; i < r.Length; i++) |
|||
{ |
|||
ref Rgb24 d = ref rgb[i]; |
|||
d.R = r[i]; |
|||
d.G = g[i]; |
|||
d.B = b[i]; |
|||
} |
|||
} |
|||
|
|||
[Benchmark] |
|||
public void Rgb24_Scalar_PerElement_Unsafe() |
|||
{ |
|||
ref byte r = ref this.rBuf[0]; |
|||
ref byte g = ref this.rBuf[0]; |
|||
ref byte b = ref this.rBuf[0]; |
|||
ref Rgb24 rgb = ref this.rgbBuf[0]; |
|||
|
|||
for (int i = 0; i < this.Count; i++) |
|||
{ |
|||
ref Rgb24 d = ref Unsafe.Add(ref rgb, i); |
|||
d.R = Unsafe.Add(ref r, i); |
|||
d.G = Unsafe.Add(ref g, i); |
|||
d.B = Unsafe.Add(ref b, i); |
|||
} |
|||
} |
|||
|
|||
[Benchmark] |
|||
public void Rgb24_Scalar_PerElement_Batched8() |
|||
{ |
|||
ref Byte8 r = ref Unsafe.As<byte, Byte8>(ref this.rBuf[0]); |
|||
ref Byte8 g = ref Unsafe.As<byte, Byte8>(ref this.rBuf[0]); |
|||
ref Byte8 b = ref Unsafe.As<byte, Byte8>(ref this.rBuf[0]); |
|||
ref Rgb24 rgb = ref this.rgbBuf[0]; |
|||
|
|||
int count = this.Count / 8; |
|||
for (int i = 0; i < count; i++) |
|||
{ |
|||
ref Rgb24 d0 = ref Unsafe.Add(ref rgb, i * 8); |
|||
ref Rgb24 d1 = ref Unsafe.Add(ref d0, 1); |
|||
ref Rgb24 d2 = ref Unsafe.Add(ref d0, 2); |
|||
ref Rgb24 d3 = ref Unsafe.Add(ref d0, 3); |
|||
ref Rgb24 d4 = ref Unsafe.Add(ref d0, 4); |
|||
ref Rgb24 d5 = ref Unsafe.Add(ref d0, 5); |
|||
ref Rgb24 d6 = ref Unsafe.Add(ref d0, 6); |
|||
ref Rgb24 d7 = ref Unsafe.Add(ref d0, 7); |
|||
|
|||
ref Byte8 rr = ref Unsafe.Add(ref r, i); |
|||
ref Byte8 gg = ref Unsafe.Add(ref g, i); |
|||
ref Byte8 bb = ref Unsafe.Add(ref b, i); |
|||
|
|||
d0.R = rr.V0; |
|||
d0.G = gg.V0; |
|||
d0.B = bb.V0; |
|||
|
|||
d1.R = rr.V1; |
|||
d1.G = gg.V1; |
|||
d1.B = bb.V1; |
|||
|
|||
d2.R = rr.V2; |
|||
d2.G = gg.V2; |
|||
d2.B = bb.V2; |
|||
|
|||
d3.R = rr.V3; |
|||
d3.G = gg.V3; |
|||
d3.B = bb.V3; |
|||
|
|||
d4.R = rr.V4; |
|||
d4.G = gg.V4; |
|||
d4.B = bb.V4; |
|||
|
|||
d5.R = rr.V5; |
|||
d5.G = gg.V5; |
|||
d5.B = bb.V5; |
|||
|
|||
d6.R = rr.V6; |
|||
d6.G = gg.V6; |
|||
d6.B = bb.V6; |
|||
|
|||
d7.R = rr.V7; |
|||
d7.G = gg.V7; |
|||
d7.B = bb.V7; |
|||
} |
|||
} |
|||
|
|||
[Benchmark] |
|||
public void Rgb24_Scalar_PerElement_Batched4() |
|||
{ |
|||
ref Byte4 r = ref Unsafe.As<byte, Byte4>(ref this.rBuf[0]); |
|||
ref Byte4 g = ref Unsafe.As<byte, Byte4>(ref this.rBuf[0]); |
|||
ref Byte4 b = ref Unsafe.As<byte, Byte4>(ref this.rBuf[0]); |
|||
ref Rgb24 rgb = ref this.rgbBuf[0]; |
|||
|
|||
int count = this.Count / 4; |
|||
for (int i = 0; i < count; i++) |
|||
{ |
|||
ref Rgb24 d0 = ref Unsafe.Add(ref rgb, i * 4); |
|||
ref Rgb24 d1 = ref Unsafe.Add(ref d0, 1); |
|||
ref Rgb24 d2 = ref Unsafe.Add(ref d0, 2); |
|||
ref Rgb24 d3 = ref Unsafe.Add(ref d0, 3); |
|||
|
|||
ref Byte4 rr = ref Unsafe.Add(ref r, i); |
|||
ref Byte4 gg = ref Unsafe.Add(ref g, i); |
|||
ref Byte4 bb = ref Unsafe.Add(ref b, i); |
|||
|
|||
d0.R = rr.V0; |
|||
d0.G = gg.V0; |
|||
d0.B = bb.V0; |
|||
|
|||
d1.R = rr.V1; |
|||
d1.G = gg.V1; |
|||
d1.B = bb.V1; |
|||
|
|||
d2.R = rr.V2; |
|||
d2.G = gg.V2; |
|||
d2.B = bb.V2; |
|||
|
|||
d3.R = rr.V3; |
|||
d3.G = gg.V3; |
|||
d3.B = bb.V3; |
|||
} |
|||
} |
|||
|
|||
#if SUPPORTS_RUNTIME_INTRINSICS
|
|||
[Benchmark(Baseline = true)] |
|||
public void Rgba32_Avx2_Float() |
|||
{ |
|||
ref Vector256<float> rBase = ref Unsafe.As<float, Vector256<float>>(ref this.rFloat[0]); |
|||
ref Vector256<float> gBase = ref Unsafe.As<float, Vector256<float>>(ref this.gFloat[0]); |
|||
ref Vector256<float> bBase = ref Unsafe.As<float, Vector256<float>>(ref this.bFloat[0]); |
|||
ref Vector256<float> resultBase = ref Unsafe.As<float, Vector256<float>>(ref this.rgbaFloat[0]); |
|||
|
|||
int count = this.Count / Vector256<float>.Count; |
|||
|
|||
ref byte control = ref MemoryMarshal.GetReference(SimdUtils.HwIntrinsics.PermuteMaskEvenOdd8x32); |
|||
Vector256<int> vcontrol = Unsafe.As<byte, Vector256<int>>(ref control); |
|||
|
|||
var va = Vector256.Create(1F); |
|||
|
|||
for (int i = 0; i < count; i++) |
|||
{ |
|||
Vector256<float> r = Unsafe.Add(ref rBase, i); |
|||
Vector256<float> g = Unsafe.Add(ref gBase, i); |
|||
Vector256<float> b = Unsafe.Add(ref bBase, i); |
|||
|
|||
r = Avx2.PermuteVar8x32(r, vcontrol); |
|||
g = Avx2.PermuteVar8x32(g, vcontrol); |
|||
b = Avx2.PermuteVar8x32(b, vcontrol); |
|||
|
|||
Vector256<float> vte = Avx.UnpackLow(r, b); |
|||
Vector256<float> vto = Avx.UnpackLow(g, va); |
|||
|
|||
ref Vector256<float> destination = ref Unsafe.Add(ref resultBase, i * 4); |
|||
|
|||
destination = Avx.UnpackLow(vte, vto); |
|||
Unsafe.Add(ref destination, 1) = Avx.UnpackHigh(vte, vto); |
|||
|
|||
vte = Avx.UnpackHigh(r, b); |
|||
vto = Avx.UnpackHigh(g, va); |
|||
|
|||
Unsafe.Add(ref destination, 2) = Avx.UnpackLow(vte, vto); |
|||
Unsafe.Add(ref destination, 3) = Avx.UnpackHigh(vte, vto); |
|||
} |
|||
} |
|||
|
|||
[Benchmark] |
|||
public void Rgb24_Avx2_Bytes() |
|||
{ |
|||
ReadOnlySpan<byte> r = this.rBuf; |
|||
ReadOnlySpan<byte> g = this.rBuf; |
|||
ReadOnlySpan<byte> b = this.rBuf; |
|||
Span<Rgb24> rgb = this.rgbBuf; |
|||
SimdUtils.HwIntrinsics.PackFromRgbPlanesAvx2Reduce(ref r, ref g, ref b, ref rgb); |
|||
} |
|||
|
|||
[Benchmark] |
|||
public void Rgba32_Avx2_Bytes() |
|||
{ |
|||
ReadOnlySpan<byte> r = this.rBuf; |
|||
ReadOnlySpan<byte> g = this.rBuf; |
|||
ReadOnlySpan<byte> b = this.rBuf; |
|||
Span<Rgba32> rgb = this.rgbaBuf; |
|||
SimdUtils.HwIntrinsics.PackFromRgbPlanesAvx2Reduce(ref r, ref g, ref b, ref rgb); |
|||
} |
|||
#endif
|
|||
|
|||
#pragma warning disable SA1132
|
|||
private struct Byte8 |
|||
{ |
|||
public byte V0, V1, V2, V3, V4, V5, V6, V7; |
|||
} |
|||
|
|||
private struct Byte4 |
|||
{ |
|||
public byte V0, V1, V2, V3; |
|||
} |
|||
#pragma warning restore
|
|||
|
|||
// Results @ Anton's PC, 2020 Dec 05
|
|||
// .NET Core 3.1.1
|
|||
// Intel Core i7-7700HQ CPU 2.80GHz (Kaby Lake), 1 CPU, 8 logical and 4 physical cores
|
|||
//
|
|||
// | Method | Count | Mean | Error | StdDev | Ratio | RatioSD |
|
|||
// |--------------------------------- |------ |-----------:|---------:|---------:|------:|--------:|
|
|||
// | Rgb24_Scalar_PerElement_Span | 1024 | 1,634.6 ns | 26.56 ns | 24.84 ns | 3.12 | 0.05 |
|
|||
// | Rgb24_Scalar_PerElement_Unsafe | 1024 | 1,284.7 ns | 4.70 ns | 4.16 ns | 2.46 | 0.01 |
|
|||
// | Rgb24_Scalar_PerElement_Batched8 | 1024 | 1,182.3 ns | 5.12 ns | 4.27 ns | 2.26 | 0.01 |
|
|||
// | Rgb24_Scalar_PerElement_Batched4 | 1024 | 1,146.2 ns | 16.38 ns | 14.52 ns | 2.19 | 0.02 |
|
|||
// | Rgba32_Avx2_Float | 1024 | 522.7 ns | 1.78 ns | 1.39 ns | 1.00 | 0.00 |
|
|||
// | Rgb24_Avx2_Bytes | 1024 | 243.3 ns | 1.56 ns | 1.30 ns | 0.47 | 0.00 |
|
|||
// | Rgba32_Avx2_Bytes | 1024 | 146.0 ns | 2.48 ns | 2.32 ns | 0.28 | 0.01 |
|
|||
} |
|||
} |
|||
Loading…
Reference in new issue